Commit | Line | Data |
---|---|---|
f727a0c3 | 1 | /* |
05d6ac1d | 2 | * Copyright(c) 2015, 2016 Intel Corporation. |
f727a0c3 MH |
3 | * |
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
5 | * redistributing this file, you may do so under either license. | |
6 | * | |
7 | * GPL LICENSE SUMMARY | |
8 | * | |
f727a0c3 MH |
9 | * This program is free software; you can redistribute it and/or modify |
10 | * it under the terms of version 2 of the GNU General Public License as | |
11 | * published by the Free Software Foundation. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, but | |
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * General Public License for more details. | |
17 | * | |
18 | * BSD LICENSE | |
19 | * | |
f727a0c3 MH |
20 | * Redistribution and use in source and binary forms, with or without |
21 | * modification, are permitted provided that the following conditions | |
22 | * are met: | |
23 | * | |
24 | * - Redistributions of source code must retain the above copyright | |
25 | * notice, this list of conditions and the following disclaimer. | |
26 | * - Redistributions in binary form must reproduce the above copyright | |
27 | * notice, this list of conditions and the following disclaimer in | |
28 | * the documentation and/or other materials provided with the | |
29 | * distribution. | |
30 | * - Neither the name of Intel Corporation nor the names of its | |
31 | * contributors may be used to endorse or promote products derived | |
32 | * from this software without specific prior written permission. | |
33 | * | |
34 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
35 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
36 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
37 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
38 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
39 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
40 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
41 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
42 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
43 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
44 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
45 | * | |
46 | */ | |
47 | #include <asm/page.h> | |
48 | ||
49 | #include "user_exp_rcv.h" | |
50 | #include "trace.h" | |
06e0ffa6 | 51 | #include "mmu_rb.h" |
f727a0c3 | 52 | |
b8abe346 MH |
53 | struct tid_group { |
54 | struct list_head list; | |
55 | unsigned base; | |
56 | u8 size; | |
57 | u8 used; | |
58 | u8 map; | |
59 | }; | |
60 | ||
06e0ffa6 MH |
61 | struct tid_rb_node { |
62 | struct mmu_rb_node mmu; | |
f727a0c3 | 63 | unsigned long phys; |
f727a0c3 MH |
64 | struct tid_group *grp; |
65 | u32 rcventry; | |
66 | dma_addr_t dma_addr; | |
67 | bool freed; | |
68 | unsigned npages; | |
69 | struct page *pages[0]; | |
70 | }; | |
71 | ||
f88e0c8a MH |
72 | struct tid_pageset { |
73 | u16 idx; | |
74 | u16 count; | |
75 | }; | |
76 | ||
b8abe346 MH |
77 | #define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list)) |
78 | ||
3abb33ac MH |
79 | #define num_user_pages(vaddr, len) \ |
80 | (1 + (((((unsigned long)(vaddr) + \ | |
81 | (unsigned long)(len) - 1) & PAGE_MASK) - \ | |
82 | ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT)) | |
83 | ||
f88e0c8a | 84 | static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *, |
3abb33ac | 85 | struct rb_root *); |
7e7a436e | 86 | static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *); |
f88e0c8a | 87 | static int set_rcvarray_entry(struct file *, unsigned long, u32, |
3abb33ac | 88 | struct tid_group *, struct page **, unsigned); |
06e0ffa6 | 89 | static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); |
909e2cd0 | 90 | static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, bool); |
06e0ffa6 | 91 | static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *); |
f88e0c8a MH |
92 | static int program_rcvarray(struct file *, unsigned long, struct tid_group *, |
93 | struct tid_pageset *, unsigned, u16, struct page **, | |
7e7a436e | 94 | u32 *, unsigned *, unsigned *); |
455d7f1a | 95 | static int unprogram_rcvarray(struct file *, u32, struct tid_group **); |
06e0ffa6 MH |
96 | static void clear_tid_node(struct hfi1_filedata *, u16, struct tid_rb_node *); |
97 | ||
98 | static struct mmu_rb_ops tid_rb_ops = { | |
06e0ffa6 MH |
99 | .insert = mmu_rb_insert, |
100 | .remove = mmu_rb_remove, | |
101 | .invalidate = mmu_rb_invalidate | |
102 | }; | |
f88e0c8a MH |
103 | |
104 | static inline u32 rcventry2tidinfo(u32 rcventry) | |
105 | { | |
106 | u32 pair = rcventry & ~0x1; | |
107 | ||
108 | return EXP_TID_SET(IDX, pair >> 1) | | |
109 | EXP_TID_SET(CTRL, 1 << (rcventry - pair)); | |
110 | } | |
f727a0c3 | 111 | |
b8abe346 MH |
112 | static inline void exp_tid_group_init(struct exp_tid_set *set) |
113 | { | |
114 | INIT_LIST_HEAD(&set->list); | |
115 | set->count = 0; | |
116 | } | |
117 | ||
118 | static inline void tid_group_remove(struct tid_group *grp, | |
119 | struct exp_tid_set *set) | |
120 | { | |
121 | list_del_init(&grp->list); | |
122 | set->count--; | |
123 | } | |
124 | ||
125 | static inline void tid_group_add_tail(struct tid_group *grp, | |
126 | struct exp_tid_set *set) | |
127 | { | |
128 | list_add_tail(&grp->list, &set->list); | |
129 | set->count++; | |
130 | } | |
131 | ||
132 | static inline struct tid_group *tid_group_pop(struct exp_tid_set *set) | |
133 | { | |
134 | struct tid_group *grp = | |
135 | list_first_entry(&set->list, struct tid_group, list); | |
136 | list_del_init(&grp->list); | |
137 | set->count--; | |
138 | return grp; | |
139 | } | |
140 | ||
141 | static inline void tid_group_move(struct tid_group *group, | |
142 | struct exp_tid_set *s1, | |
143 | struct exp_tid_set *s2) | |
144 | { | |
145 | tid_group_remove(group, s1); | |
146 | tid_group_add_tail(group, s2); | |
147 | } | |
148 | ||
f727a0c3 MH |
149 | /* |
150 | * Initialize context and file private data needed for Expected | |
151 | * receive caching. This needs to be done after the context has | |
152 | * been configured with the eager/expected RcvEntry counts. | |
153 | */ | |
154 | int hfi1_user_exp_rcv_init(struct file *fp) | |
155 | { | |
3abb33ac MH |
156 | struct hfi1_filedata *fd = fp->private_data; |
157 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
158 | struct hfi1_devdata *dd = uctxt->dd; | |
159 | unsigned tidbase; | |
160 | int i, ret = 0; | |
161 | ||
3abb33ac MH |
162 | spin_lock_init(&fd->tid_lock); |
163 | spin_lock_init(&fd->invalid_lock); | |
3abb33ac MH |
164 | fd->tid_rb_root = RB_ROOT; |
165 | ||
166 | if (!uctxt->subctxt_cnt || !fd->subctxt) { | |
167 | exp_tid_group_init(&uctxt->tid_group_list); | |
168 | exp_tid_group_init(&uctxt->tid_used_list); | |
169 | exp_tid_group_init(&uctxt->tid_full_list); | |
170 | ||
171 | tidbase = uctxt->expected_base; | |
172 | for (i = 0; i < uctxt->expected_count / | |
173 | dd->rcv_entries.group_size; i++) { | |
174 | struct tid_group *grp; | |
175 | ||
176 | grp = kzalloc(sizeof(*grp), GFP_KERNEL); | |
177 | if (!grp) { | |
178 | /* | |
179 | * If we fail here, the groups already | |
180 | * allocated will be freed by the close | |
181 | * call. | |
182 | */ | |
183 | ret = -ENOMEM; | |
184 | goto done; | |
185 | } | |
186 | grp->size = dd->rcv_entries.group_size; | |
187 | grp->base = tidbase; | |
188 | tid_group_add_tail(grp, &uctxt->tid_group_list); | |
189 | tidbase += dd->rcv_entries.group_size; | |
190 | } | |
191 | } | |
192 | ||
a92ba6d6 MH |
193 | fd->entry_to_rb = kcalloc(uctxt->expected_count, |
194 | sizeof(struct rb_node *), | |
195 | GFP_KERNEL); | |
196 | if (!fd->entry_to_rb) | |
197 | return -ENOMEM; | |
198 | ||
3abb33ac MH |
199 | if (!HFI1_CAP_IS_USET(TID_UNMAP)) { |
200 | fd->invalid_tid_idx = 0; | |
201 | fd->invalid_tids = kzalloc(uctxt->expected_count * | |
202 | sizeof(u32), GFP_KERNEL); | |
203 | if (!fd->invalid_tids) { | |
204 | ret = -ENOMEM; | |
205 | goto done; | |
a92ba6d6 MH |
206 | } |
207 | ||
208 | /* | |
209 | * Register MMU notifier callbacks. If the registration | |
210 | * fails, continue but turn off the TID caching for | |
211 | * all user contexts. | |
212 | */ | |
06e0ffa6 | 213 | ret = hfi1_mmu_rb_register(&fd->tid_rb_root, &tid_rb_ops); |
a92ba6d6 MH |
214 | if (ret) { |
215 | dd_dev_info(dd, | |
216 | "Failed MMU notifier registration %d\n", | |
217 | ret); | |
218 | HFI1_CAP_USET(TID_UNMAP); | |
219 | ret = 0; | |
3abb33ac MH |
220 | } |
221 | } | |
222 | ||
3abb33ac MH |
223 | /* |
224 | * PSM does not have a good way to separate, count, and | |
225 | * effectively enforce a limit on RcvArray entries used by | |
226 | * subctxts (when context sharing is used) when TID caching | |
227 | * is enabled. To help with that, we calculate a per-process | |
228 | * RcvArray entry share and enforce that. | |
229 | * If TID caching is not in use, PSM deals with usage on its | |
230 | * own. In that case, we allow any subctxt to take all of the | |
231 | * entries. | |
232 | * | |
233 | * Make sure that we set the tid counts only after successful | |
234 | * init. | |
235 | */ | |
455d7f1a | 236 | spin_lock(&fd->tid_lock); |
3abb33ac MH |
237 | if (uctxt->subctxt_cnt && !HFI1_CAP_IS_USET(TID_UNMAP)) { |
238 | u16 remainder; | |
239 | ||
240 | fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; | |
241 | remainder = uctxt->expected_count % uctxt->subctxt_cnt; | |
242 | if (remainder && fd->subctxt < remainder) | |
243 | fd->tid_limit++; | |
244 | } else { | |
245 | fd->tid_limit = uctxt->expected_count; | |
246 | } | |
455d7f1a | 247 | spin_unlock(&fd->tid_lock); |
3abb33ac MH |
248 | done: |
249 | return ret; | |
f727a0c3 MH |
250 | } |
251 | ||
252 | int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) | |
253 | { | |
3abb33ac MH |
254 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
255 | struct tid_group *grp, *gptr; | |
256 | ||
257 | /* | |
258 | * The notifier would have been removed when the process'es mm | |
259 | * was freed. | |
260 | */ | |
06e0ffa6 MH |
261 | if (!HFI1_CAP_IS_USET(TID_UNMAP)) |
262 | hfi1_mmu_rb_unregister(&fd->tid_rb_root); | |
3abb33ac MH |
263 | |
264 | kfree(fd->invalid_tids); | |
265 | ||
266 | if (!uctxt->cnt) { | |
267 | if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) | |
268 | unlock_exp_tids(uctxt, &uctxt->tid_full_list, | |
269 | &fd->tid_rb_root); | |
270 | if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) | |
271 | unlock_exp_tids(uctxt, &uctxt->tid_used_list, | |
272 | &fd->tid_rb_root); | |
273 | list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, | |
274 | list) { | |
275 | list_del_init(&grp->list); | |
276 | kfree(grp); | |
277 | } | |
3abb33ac MH |
278 | hfi1_clear_tids(uctxt); |
279 | } | |
a92ba6d6 MH |
280 | |
281 | kfree(fd->entry_to_rb); | |
3abb33ac | 282 | return 0; |
f727a0c3 MH |
283 | } |
284 | ||
b8abe346 MH |
285 | /* |
286 | * Write an "empty" RcvArray entry. | |
287 | * This function exists so the TID registaration code can use it | |
288 | * to write to unused/unneeded entries and still take advantage | |
289 | * of the WC performance improvements. The HFI will ignore this | |
290 | * write to the RcvArray entry. | |
291 | */ | |
292 | static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) | |
293 | { | |
294 | /* | |
295 | * Doing the WC fill writes only makes sense if the device is | |
296 | * present and the RcvArray has been mapped as WC memory. | |
297 | */ | |
298 | if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) | |
299 | writeq(0, dd->rcvarray_wc + (index * 8)); | |
300 | } | |
301 | ||
7e7a436e MH |
302 | /* |
303 | * RcvArray entry allocation for Expected Receives is done by the | |
304 | * following algorithm: | |
305 | * | |
306 | * The context keeps 3 lists of groups of RcvArray entries: | |
307 | * 1. List of empty groups - tid_group_list | |
308 | * This list is created during user context creation and | |
309 | * contains elements which describe sets (of 8) of empty | |
310 | * RcvArray entries. | |
311 | * 2. List of partially used groups - tid_used_list | |
312 | * This list contains sets of RcvArray entries which are | |
313 | * not completely used up. Another mapping request could | |
314 | * use some of all of the remaining entries. | |
315 | * 3. List of full groups - tid_full_list | |
316 | * This is the list where sets that are completely used | |
317 | * up go. | |
318 | * | |
319 | * An attempt to optimize the usage of RcvArray entries is | |
320 | * made by finding all sets of physically contiguous pages in a | |
321 | * user's buffer. | |
322 | * These physically contiguous sets are further split into | |
323 | * sizes supported by the receive engine of the HFI. The | |
324 | * resulting sets of pages are stored in struct tid_pageset, | |
325 | * which describes the sets as: | |
326 | * * .count - number of pages in this set | |
327 | * * .idx - starting index into struct page ** array | |
328 | * of this set | |
329 | * | |
330 | * From this point on, the algorithm deals with the page sets | |
331 | * described above. The number of pagesets is divided by the | |
332 | * RcvArray group size to produce the number of full groups | |
333 | * needed. | |
334 | * | |
335 | * Groups from the 3 lists are manipulated using the following | |
336 | * rules: | |
337 | * 1. For each set of 8 pagesets, a complete group from | |
338 | * tid_group_list is taken, programmed, and moved to | |
339 | * the tid_full_list list. | |
340 | * 2. For all remaining pagesets: | |
341 | * 2.1 If the tid_used_list is empty and the tid_group_list | |
342 | * is empty, stop processing pageset and return only | |
343 | * what has been programmed up to this point. | |
344 | * 2.2 If the tid_used_list is empty and the tid_group_list | |
345 | * is not empty, move a group from tid_group_list to | |
346 | * tid_used_list. | |
347 | * 2.3 For each group is tid_used_group, program as much as | |
348 | * can fit into the group. If the group becomes fully | |
349 | * used, move it to tid_full_list. | |
350 | */ | |
f727a0c3 MH |
351 | int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) |
352 | { | |
7e7a436e MH |
353 | int ret = 0, need_group = 0, pinned; |
354 | struct hfi1_filedata *fd = fp->private_data; | |
355 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
356 | struct hfi1_devdata *dd = uctxt->dd; | |
357 | unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets, | |
358 | tididx = 0, mapped, mapped_pages = 0; | |
359 | unsigned long vaddr = tinfo->vaddr; | |
360 | struct page **pages = NULL; | |
361 | u32 *tidlist = NULL; | |
362 | struct tid_pageset *pagesets = NULL; | |
363 | ||
364 | /* Get the number of pages the user buffer spans */ | |
365 | npages = num_user_pages(vaddr, tinfo->length); | |
366 | if (!npages) | |
367 | return -EINVAL; | |
368 | ||
369 | if (npages > uctxt->expected_count) { | |
370 | dd_dev_err(dd, "Expected buffer too big\n"); | |
371 | return -EINVAL; | |
372 | } | |
373 | ||
374 | /* Verify that access is OK for the user buffer */ | |
375 | if (!access_ok(VERIFY_WRITE, (void __user *)vaddr, | |
376 | npages * PAGE_SIZE)) { | |
377 | dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n", | |
378 | (void *)vaddr, npages); | |
379 | return -EFAULT; | |
380 | } | |
381 | ||
382 | pagesets = kcalloc(uctxt->expected_count, sizeof(*pagesets), | |
383 | GFP_KERNEL); | |
384 | if (!pagesets) | |
385 | return -ENOMEM; | |
386 | ||
387 | /* Allocate the array of struct page pointers needed for pinning */ | |
388 | pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); | |
389 | if (!pages) { | |
390 | ret = -ENOMEM; | |
391 | goto bail; | |
392 | } | |
393 | ||
394 | /* | |
395 | * Pin all the pages of the user buffer. If we can't pin all the | |
396 | * pages, accept the amount pinned so far and program only that. | |
397 | * User space knows how to deal with partially programmed buffers. | |
398 | */ | |
a7922f7d MH |
399 | if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages)) |
400 | return -ENOMEM; | |
7e7a436e MH |
401 | pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages); |
402 | if (pinned <= 0) { | |
403 | ret = pinned; | |
404 | goto bail; | |
405 | } | |
a7922f7d | 406 | fd->tid_n_pinned += npages; |
7e7a436e MH |
407 | |
408 | /* Find sets of physically contiguous pages */ | |
409 | npagesets = find_phys_blocks(pages, pinned, pagesets); | |
410 | ||
411 | /* | |
412 | * We don't need to access this under a lock since tid_used is per | |
413 | * process and the same process cannot be in hfi1_user_exp_rcv_clear() | |
414 | * and hfi1_user_exp_rcv_setup() at the same time. | |
415 | */ | |
416 | spin_lock(&fd->tid_lock); | |
417 | if (fd->tid_used + npagesets > fd->tid_limit) | |
418 | pageset_count = fd->tid_limit - fd->tid_used; | |
419 | else | |
420 | pageset_count = npagesets; | |
421 | spin_unlock(&fd->tid_lock); | |
422 | ||
423 | if (!pageset_count) | |
424 | goto bail; | |
425 | ||
426 | ngroups = pageset_count / dd->rcv_entries.group_size; | |
427 | tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL); | |
428 | if (!tidlist) { | |
429 | ret = -ENOMEM; | |
430 | goto nomem; | |
431 | } | |
432 | ||
433 | tididx = 0; | |
434 | ||
435 | /* | |
436 | * From this point on, we are going to be using shared (between master | |
437 | * and subcontexts) context resources. We need to take the lock. | |
438 | */ | |
439 | mutex_lock(&uctxt->exp_lock); | |
440 | /* | |
441 | * The first step is to program the RcvArray entries which are complete | |
442 | * groups. | |
443 | */ | |
444 | while (ngroups && uctxt->tid_group_list.count) { | |
445 | struct tid_group *grp = | |
446 | tid_group_pop(&uctxt->tid_group_list); | |
447 | ||
448 | ret = program_rcvarray(fp, vaddr, grp, pagesets, | |
449 | pageidx, dd->rcv_entries.group_size, | |
450 | pages, tidlist, &tididx, &mapped); | |
451 | /* | |
452 | * If there was a failure to program the RcvArray | |
453 | * entries for the entire group, reset the grp fields | |
454 | * and add the grp back to the free group list. | |
455 | */ | |
456 | if (ret <= 0) { | |
457 | tid_group_add_tail(grp, &uctxt->tid_group_list); | |
458 | hfi1_cdbg(TID, | |
459 | "Failed to program RcvArray group %d", ret); | |
460 | goto unlock; | |
461 | } | |
462 | ||
463 | tid_group_add_tail(grp, &uctxt->tid_full_list); | |
464 | ngroups--; | |
465 | pageidx += ret; | |
466 | mapped_pages += mapped; | |
467 | } | |
468 | ||
469 | while (pageidx < pageset_count) { | |
470 | struct tid_group *grp, *ptr; | |
471 | /* | |
472 | * If we don't have any partially used tid groups, check | |
473 | * if we have empty groups. If so, take one from there and | |
474 | * put in the partially used list. | |
475 | */ | |
476 | if (!uctxt->tid_used_list.count || need_group) { | |
477 | if (!uctxt->tid_group_list.count) | |
478 | goto unlock; | |
479 | ||
480 | grp = tid_group_pop(&uctxt->tid_group_list); | |
481 | tid_group_add_tail(grp, &uctxt->tid_used_list); | |
482 | need_group = 0; | |
483 | } | |
484 | /* | |
485 | * There is an optimization opportunity here - instead of | |
486 | * fitting as many page sets as we can, check for a group | |
487 | * later on in the list that could fit all of them. | |
488 | */ | |
489 | list_for_each_entry_safe(grp, ptr, &uctxt->tid_used_list.list, | |
490 | list) { | |
491 | unsigned use = min_t(unsigned, pageset_count - pageidx, | |
492 | grp->size - grp->used); | |
493 | ||
494 | ret = program_rcvarray(fp, vaddr, grp, pagesets, | |
495 | pageidx, use, pages, tidlist, | |
496 | &tididx, &mapped); | |
497 | if (ret < 0) { | |
498 | hfi1_cdbg(TID, | |
499 | "Failed to program RcvArray entries %d", | |
500 | ret); | |
501 | ret = -EFAULT; | |
502 | goto unlock; | |
503 | } else if (ret > 0) { | |
504 | if (grp->used == grp->size) | |
505 | tid_group_move(grp, | |
506 | &uctxt->tid_used_list, | |
507 | &uctxt->tid_full_list); | |
508 | pageidx += ret; | |
509 | mapped_pages += mapped; | |
510 | need_group = 0; | |
511 | /* Check if we are done so we break out early */ | |
512 | if (pageidx >= pageset_count) | |
513 | break; | |
514 | } else if (WARN_ON(ret == 0)) { | |
515 | /* | |
516 | * If ret is 0, we did not program any entries | |
517 | * into this group, which can only happen if | |
518 | * we've screwed up the accounting somewhere. | |
519 | * Warn and try to continue. | |
520 | */ | |
521 | need_group = 1; | |
522 | } | |
523 | } | |
524 | } | |
525 | unlock: | |
526 | mutex_unlock(&uctxt->exp_lock); | |
527 | nomem: | |
528 | hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, | |
529 | mapped_pages, ret); | |
530 | if (tididx) { | |
531 | spin_lock(&fd->tid_lock); | |
532 | fd->tid_used += tididx; | |
533 | spin_unlock(&fd->tid_lock); | |
534 | tinfo->tidcnt = tididx; | |
535 | tinfo->length = mapped_pages * PAGE_SIZE; | |
536 | ||
537 | if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist, | |
538 | tidlist, sizeof(tidlist[0]) * tididx)) { | |
539 | /* | |
540 | * On failure to copy to the user level, we need to undo | |
541 | * everything done so far so we don't leak resources. | |
542 | */ | |
543 | tinfo->tidlist = (unsigned long)&tidlist; | |
544 | hfi1_user_exp_rcv_clear(fp, tinfo); | |
545 | tinfo->tidlist = 0; | |
546 | ret = -EFAULT; | |
547 | goto bail; | |
548 | } | |
549 | } | |
550 | ||
551 | /* | |
552 | * If not everything was mapped (due to insufficient RcvArray entries, | |
553 | * for example), unpin all unmapped pages so we can pin them nex time. | |
554 | */ | |
a7922f7d | 555 | if (mapped_pages != pinned) { |
bd3a8947 | 556 | hfi1_release_user_pages(current->mm, &pages[mapped_pages], |
7e7a436e MH |
557 | pinned - mapped_pages, |
558 | false); | |
a7922f7d MH |
559 | fd->tid_n_pinned -= pinned - mapped_pages; |
560 | } | |
7e7a436e MH |
561 | bail: |
562 | kfree(pagesets); | |
563 | kfree(pages); | |
564 | kfree(tidlist); | |
565 | return ret > 0 ? 0 : ret; | |
f727a0c3 MH |
566 | } |
567 | ||
568 | int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) | |
569 | { | |
455d7f1a MH |
570 | int ret = 0; |
571 | struct hfi1_filedata *fd = fp->private_data; | |
572 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
573 | u32 *tidinfo; | |
574 | unsigned tididx; | |
575 | ||
576 | tidinfo = kcalloc(tinfo->tidcnt, sizeof(*tidinfo), GFP_KERNEL); | |
577 | if (!tidinfo) | |
578 | return -ENOMEM; | |
579 | ||
580 | if (copy_from_user(tidinfo, (void __user *)(unsigned long) | |
581 | tinfo->tidlist, sizeof(tidinfo[0]) * | |
582 | tinfo->tidcnt)) { | |
583 | ret = -EFAULT; | |
584 | goto done; | |
585 | } | |
586 | ||
587 | mutex_lock(&uctxt->exp_lock); | |
588 | for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { | |
589 | ret = unprogram_rcvarray(fp, tidinfo[tididx], NULL); | |
590 | if (ret) { | |
591 | hfi1_cdbg(TID, "Failed to unprogram rcv array %d", | |
592 | ret); | |
593 | break; | |
594 | } | |
595 | } | |
596 | spin_lock(&fd->tid_lock); | |
597 | fd->tid_used -= tididx; | |
598 | spin_unlock(&fd->tid_lock); | |
599 | tinfo->tidcnt = tididx; | |
600 | mutex_unlock(&uctxt->exp_lock); | |
601 | done: | |
602 | kfree(tidinfo); | |
603 | return ret; | |
f727a0c3 MH |
604 | } |
605 | ||
606 | int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo) | |
607 | { | |
455d7f1a MH |
608 | struct hfi1_filedata *fd = fp->private_data; |
609 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
610 | unsigned long *ev = uctxt->dd->events + | |
611 | (((uctxt->ctxt - uctxt->dd->first_user_ctxt) * | |
612 | HFI1_MAX_SHARED_CTXTS) + fd->subctxt); | |
613 | u32 *array; | |
614 | int ret = 0; | |
615 | ||
616 | if (!fd->invalid_tids) | |
617 | return -EINVAL; | |
618 | ||
619 | /* | |
620 | * copy_to_user() can sleep, which will leave the invalid_lock | |
621 | * locked and cause the MMU notifier to be blocked on the lock | |
622 | * for a long time. | |
623 | * Copy the data to a local buffer so we can release the lock. | |
624 | */ | |
625 | array = kcalloc(uctxt->expected_count, sizeof(*array), GFP_KERNEL); | |
626 | if (!array) | |
627 | return -EFAULT; | |
628 | ||
629 | spin_lock(&fd->invalid_lock); | |
630 | if (fd->invalid_tid_idx) { | |
631 | memcpy(array, fd->invalid_tids, sizeof(*array) * | |
632 | fd->invalid_tid_idx); | |
633 | memset(fd->invalid_tids, 0, sizeof(*fd->invalid_tids) * | |
634 | fd->invalid_tid_idx); | |
635 | tinfo->tidcnt = fd->invalid_tid_idx; | |
636 | fd->invalid_tid_idx = 0; | |
637 | /* | |
638 | * Reset the user flag while still holding the lock. | |
639 | * Otherwise, PSM can miss events. | |
640 | */ | |
641 | clear_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); | |
642 | } else { | |
643 | tinfo->tidcnt = 0; | |
644 | } | |
645 | spin_unlock(&fd->invalid_lock); | |
646 | ||
647 | if (tinfo->tidcnt) { | |
648 | if (copy_to_user((void __user *)tinfo->tidlist, | |
649 | array, sizeof(*array) * tinfo->tidcnt)) | |
650 | ret = -EFAULT; | |
651 | } | |
652 | kfree(array); | |
653 | ||
654 | return ret; | |
f727a0c3 MH |
655 | } |
656 | ||
f88e0c8a MH |
657 | static u32 find_phys_blocks(struct page **pages, unsigned npages, |
658 | struct tid_pageset *list) | |
659 | { | |
660 | unsigned pagecount, pageidx, setcount = 0, i; | |
661 | unsigned long pfn, this_pfn; | |
662 | ||
663 | if (!npages) | |
664 | return 0; | |
665 | ||
666 | /* | |
667 | * Look for sets of physically contiguous pages in the user buffer. | |
668 | * This will allow us to optimize Expected RcvArray entry usage by | |
669 | * using the bigger supported sizes. | |
670 | */ | |
671 | pfn = page_to_pfn(pages[0]); | |
672 | for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) { | |
673 | this_pfn = i < npages ? page_to_pfn(pages[i]) : 0; | |
674 | ||
675 | /* | |
676 | * If the pfn's are not sequential, pages are not physically | |
677 | * contiguous. | |
678 | */ | |
679 | if (this_pfn != ++pfn) { | |
680 | /* | |
681 | * At this point we have to loop over the set of | |
682 | * physically contiguous pages and break them down it | |
683 | * sizes supported by the HW. | |
684 | * There are two main constraints: | |
685 | * 1. The max buffer size is MAX_EXPECTED_BUFFER. | |
686 | * If the total set size is bigger than that | |
687 | * program only a MAX_EXPECTED_BUFFER chunk. | |
688 | * 2. The buffer size has to be a power of two. If | |
689 | * it is not, round down to the closes power of | |
690 | * 2 and program that size. | |
691 | */ | |
692 | while (pagecount) { | |
693 | int maxpages = pagecount; | |
694 | u32 bufsize = pagecount * PAGE_SIZE; | |
695 | ||
696 | if (bufsize > MAX_EXPECTED_BUFFER) | |
697 | maxpages = | |
698 | MAX_EXPECTED_BUFFER >> | |
699 | PAGE_SHIFT; | |
700 | else if (!is_power_of_2(bufsize)) | |
701 | maxpages = | |
702 | rounddown_pow_of_two(bufsize) >> | |
703 | PAGE_SHIFT; | |
704 | ||
705 | list[setcount].idx = pageidx; | |
706 | list[setcount].count = maxpages; | |
707 | pagecount -= maxpages; | |
708 | pageidx += maxpages; | |
709 | setcount++; | |
710 | } | |
711 | pageidx = i; | |
712 | pagecount = 1; | |
713 | pfn = this_pfn; | |
714 | } else { | |
715 | pagecount++; | |
716 | } | |
717 | } | |
718 | return setcount; | |
719 | } | |
720 | ||
721 | /** | |
722 | * program_rcvarray() - program an RcvArray group with receive buffers | |
723 | * @fp: file pointer | |
724 | * @vaddr: starting user virtual address | |
725 | * @grp: RcvArray group | |
726 | * @sets: array of struct tid_pageset holding information on physically | |
727 | * contiguous chunks from the user buffer | |
728 | * @start: starting index into sets array | |
729 | * @count: number of struct tid_pageset's to program | |
730 | * @pages: an array of struct page * for the user buffer | |
731 | * @tidlist: the array of u32 elements when the information about the | |
732 | * programmed RcvArray entries is to be encoded. | |
733 | * @tididx: starting offset into tidlist | |
734 | * @pmapped: (output parameter) number of pages programmed into the RcvArray | |
735 | * entries. | |
736 | * | |
737 | * This function will program up to 'count' number of RcvArray entries from the | |
738 | * group 'grp'. To make best use of write-combining writes, the function will | |
739 | * perform writes to the unused RcvArray entries which will be ignored by the | |
740 | * HW. Each RcvArray entry will be programmed with a physically contiguous | |
741 | * buffer chunk from the user's virtual buffer. | |
742 | * | |
743 | * Return: | |
744 | * -EINVAL if the requested count is larger than the size of the group, | |
745 | * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or | |
746 | * number of RcvArray entries programmed. | |
747 | */ | |
748 | static int program_rcvarray(struct file *fp, unsigned long vaddr, | |
749 | struct tid_group *grp, | |
750 | struct tid_pageset *sets, | |
751 | unsigned start, u16 count, struct page **pages, | |
752 | u32 *tidlist, unsigned *tididx, unsigned *pmapped) | |
753 | { | |
754 | struct hfi1_filedata *fd = fp->private_data; | |
755 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
756 | struct hfi1_devdata *dd = uctxt->dd; | |
757 | u16 idx; | |
758 | u32 tidinfo = 0, rcventry, useidx = 0; | |
759 | int mapped = 0; | |
760 | ||
761 | /* Count should never be larger than the group size */ | |
762 | if (count > grp->size) | |
763 | return -EINVAL; | |
764 | ||
765 | /* Find the first unused entry in the group */ | |
766 | for (idx = 0; idx < grp->size; idx++) { | |
767 | if (!(grp->map & (1 << idx))) { | |
768 | useidx = idx; | |
769 | break; | |
770 | } | |
771 | rcv_array_wc_fill(dd, grp->base + idx); | |
772 | } | |
773 | ||
774 | idx = 0; | |
775 | while (idx < count) { | |
776 | u16 npages, pageidx, setidx = start + idx; | |
777 | int ret = 0; | |
778 | ||
779 | /* | |
780 | * If this entry in the group is used, move to the next one. | |
781 | * If we go past the end of the group, exit the loop. | |
782 | */ | |
783 | if (useidx >= grp->size) { | |
784 | break; | |
785 | } else if (grp->map & (1 << useidx)) { | |
786 | rcv_array_wc_fill(dd, grp->base + useidx); | |
787 | useidx++; | |
788 | continue; | |
789 | } | |
790 | ||
791 | rcventry = grp->base + useidx; | |
792 | npages = sets[setidx].count; | |
793 | pageidx = sets[setidx].idx; | |
794 | ||
795 | ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE), | |
796 | rcventry, grp, pages + pageidx, | |
797 | npages); | |
798 | if (ret) | |
799 | return ret; | |
800 | mapped += npages; | |
801 | ||
802 | tidinfo = rcventry2tidinfo(rcventry - uctxt->expected_base) | | |
803 | EXP_TID_SET(LEN, npages); | |
804 | tidlist[(*tididx)++] = tidinfo; | |
805 | grp->used++; | |
806 | grp->map |= 1 << useidx++; | |
807 | idx++; | |
808 | } | |
809 | ||
810 | /* Fill the rest of the group with "blank" writes */ | |
811 | for (; useidx < grp->size; useidx++) | |
812 | rcv_array_wc_fill(dd, grp->base + useidx); | |
813 | *pmapped = mapped; | |
814 | return idx; | |
815 | } | |
816 | ||
817 | static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, | |
818 | u32 rcventry, struct tid_group *grp, | |
819 | struct page **pages, unsigned npages) | |
820 | { | |
821 | int ret; | |
822 | struct hfi1_filedata *fd = fp->private_data; | |
823 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
06e0ffa6 | 824 | struct tid_rb_node *node; |
f88e0c8a MH |
825 | struct hfi1_devdata *dd = uctxt->dd; |
826 | struct rb_root *root = &fd->tid_rb_root; | |
827 | dma_addr_t phys; | |
828 | ||
829 | /* | |
830 | * Allocate the node first so we can handle a potential | |
831 | * failure before we've programmed anything. | |
832 | */ | |
833 | node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages), | |
834 | GFP_KERNEL); | |
835 | if (!node) | |
836 | return -ENOMEM; | |
837 | ||
838 | phys = pci_map_single(dd->pcidev, | |
839 | __va(page_to_phys(pages[0])), | |
840 | npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); | |
841 | if (dma_mapping_error(&dd->pcidev->dev, phys)) { | |
842 | dd_dev_err(dd, "Failed to DMA map Exp Rcv pages 0x%llx\n", | |
843 | phys); | |
844 | kfree(node); | |
845 | return -EFAULT; | |
846 | } | |
847 | ||
06e0ffa6 MH |
848 | node->mmu.addr = vaddr; |
849 | node->mmu.len = npages * PAGE_SIZE; | |
f88e0c8a | 850 | node->phys = page_to_phys(pages[0]); |
f88e0c8a MH |
851 | node->npages = npages; |
852 | node->rcventry = rcventry; | |
853 | node->dma_addr = phys; | |
854 | node->grp = grp; | |
855 | node->freed = false; | |
856 | memcpy(node->pages, pages, sizeof(struct page *) * npages); | |
857 | ||
368f2b59 MH |
858 | if (HFI1_CAP_IS_USET(TID_UNMAP)) |
859 | ret = mmu_rb_insert(root, &node->mmu); | |
860 | else | |
861 | ret = hfi1_mmu_rb_insert(root, &node->mmu); | |
f88e0c8a MH |
862 | |
863 | if (ret) { | |
864 | hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", | |
06e0ffa6 | 865 | node->rcventry, node->mmu.addr, node->phys, ret); |
f88e0c8a MH |
866 | pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, |
867 | PCI_DMA_FROMDEVICE); | |
868 | kfree(node); | |
869 | return -EFAULT; | |
870 | } | |
871 | hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); | |
06e0ffa6 MH |
872 | trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages, |
873 | node->mmu.addr, node->phys, phys); | |
f88e0c8a MH |
874 | return 0; |
875 | } | |
876 | ||
877 | static int unprogram_rcvarray(struct file *fp, u32 tidinfo, | |
878 | struct tid_group **grp) | |
879 | { | |
880 | struct hfi1_filedata *fd = fp->private_data; | |
881 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
882 | struct hfi1_devdata *dd = uctxt->dd; | |
06e0ffa6 | 883 | struct tid_rb_node *node; |
f88e0c8a | 884 | u8 tidctrl = EXP_TID_GET(tidinfo, CTRL); |
a92ba6d6 | 885 | u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry; |
f88e0c8a MH |
886 | |
887 | if (tididx >= uctxt->expected_count) { | |
888 | dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n", | |
889 | tididx, uctxt->ctxt); | |
890 | return -EINVAL; | |
891 | } | |
892 | ||
893 | if (tidctrl == 0x3) | |
894 | return -EINVAL; | |
895 | ||
a92ba6d6 | 896 | rcventry = tididx + (tidctrl - 1); |
f88e0c8a | 897 | |
a92ba6d6 | 898 | node = fd->entry_to_rb[rcventry]; |
06e0ffa6 | 899 | if (!node || node->rcventry != (uctxt->expected_base + rcventry)) |
f88e0c8a | 900 | return -EBADF; |
368f2b59 | 901 | if (HFI1_CAP_IS_USET(TID_UNMAP)) |
909e2cd0 | 902 | mmu_rb_remove(&fd->tid_rb_root, &node->mmu, false); |
368f2b59 MH |
903 | else |
904 | hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu); | |
06e0ffa6 | 905 | |
f88e0c8a MH |
906 | if (grp) |
907 | *grp = node->grp; | |
908 | clear_tid_node(fd, fd->subctxt, node); | |
909 | return 0; | |
910 | } | |
911 | ||
912 | static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, | |
06e0ffa6 | 913 | struct tid_rb_node *node) |
f88e0c8a MH |
914 | { |
915 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
916 | struct hfi1_devdata *dd = uctxt->dd; | |
917 | ||
0b091fb3 | 918 | trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, |
06e0ffa6 | 919 | node->npages, node->mmu.addr, node->phys, |
0b091fb3 MH |
920 | node->dma_addr); |
921 | ||
f88e0c8a MH |
922 | hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0); |
923 | /* | |
924 | * Make sure device has seen the write before we unpin the | |
925 | * pages. | |
926 | */ | |
927 | flush_wc(); | |
928 | ||
06e0ffa6 | 929 | pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len, |
f88e0c8a | 930 | PCI_DMA_FROMDEVICE); |
bd3a8947 | 931 | hfi1_release_user_pages(current->mm, node->pages, node->npages, true); |
a7922f7d | 932 | fd->tid_n_pinned -= node->npages; |
f88e0c8a MH |
933 | |
934 | node->grp->used--; | |
935 | node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); | |
936 | ||
937 | if (node->grp->used == node->grp->size - 1) | |
938 | tid_group_move(node->grp, &uctxt->tid_full_list, | |
939 | &uctxt->tid_used_list); | |
940 | else if (!node->grp->used) | |
941 | tid_group_move(node->grp, &uctxt->tid_used_list, | |
942 | &uctxt->tid_group_list); | |
943 | kfree(node); | |
944 | } | |
945 | ||
946 | static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, | |
947 | struct exp_tid_set *set, struct rb_root *root) | |
948 | { | |
949 | struct tid_group *grp, *ptr; | |
950 | struct hfi1_filedata *fd = container_of(root, struct hfi1_filedata, | |
951 | tid_rb_root); | |
952 | int i; | |
953 | ||
954 | list_for_each_entry_safe(grp, ptr, &set->list, list) { | |
955 | list_del_init(&grp->list); | |
956 | ||
f88e0c8a MH |
957 | for (i = 0; i < grp->size; i++) { |
958 | if (grp->map & (1 << i)) { | |
959 | u16 rcventry = grp->base + i; | |
06e0ffa6 | 960 | struct tid_rb_node *node; |
f88e0c8a | 961 | |
a92ba6d6 MH |
962 | node = fd->entry_to_rb[rcventry - |
963 | uctxt->expected_base]; | |
964 | if (!node || node->rcventry != rcventry) | |
f88e0c8a | 965 | continue; |
368f2b59 MH |
966 | if (HFI1_CAP_IS_USET(TID_UNMAP)) |
967 | mmu_rb_remove(&fd->tid_rb_root, | |
909e2cd0 | 968 | &node->mmu, false); |
368f2b59 MH |
969 | else |
970 | hfi1_mmu_rb_remove(&fd->tid_rb_root, | |
971 | &node->mmu); | |
f88e0c8a MH |
972 | clear_tid_node(fd, -1, node); |
973 | } | |
974 | } | |
f88e0c8a MH |
975 | } |
976 | } | |
977 | ||
06e0ffa6 | 978 | static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode) |
f727a0c3 | 979 | { |
06e0ffa6 MH |
980 | struct hfi1_filedata *fdata = |
981 | container_of(root, struct hfi1_filedata, tid_rb_root); | |
982 | struct hfi1_ctxtdata *uctxt = fdata->uctxt; | |
983 | struct tid_rb_node *node = | |
984 | container_of(mnode, struct tid_rb_node, mmu); | |
f727a0c3 | 985 | |
06e0ffa6 MH |
986 | if (node->freed) |
987 | return 0; | |
b5eb3b2f | 988 | |
06e0ffa6 MH |
989 | trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr, |
990 | node->rcventry, node->npages, node->dma_addr); | |
991 | node->freed = true; | |
0b091fb3 | 992 | |
06e0ffa6 MH |
993 | spin_lock(&fdata->invalid_lock); |
994 | if (fdata->invalid_tid_idx < uctxt->expected_count) { | |
995 | fdata->invalid_tids[fdata->invalid_tid_idx] = | |
996 | rcventry2tidinfo(node->rcventry - uctxt->expected_base); | |
997 | fdata->invalid_tids[fdata->invalid_tid_idx] |= | |
998 | EXP_TID_SET(LEN, node->npages); | |
999 | if (!fdata->invalid_tid_idx) { | |
1000 | unsigned long *ev; | |
b5eb3b2f | 1001 | |
b5eb3b2f | 1002 | /* |
06e0ffa6 MH |
1003 | * hfi1_set_uevent_bits() sets a user event flag |
1004 | * for all processes. Because calling into the | |
1005 | * driver to process TID cache invalidations is | |
1006 | * expensive and TID cache invalidations are | |
1007 | * handled on a per-process basis, we can | |
1008 | * optimize this to set the flag only for the | |
1009 | * process in question. | |
b5eb3b2f | 1010 | */ |
06e0ffa6 MH |
1011 | ev = uctxt->dd->events + |
1012 | (((uctxt->ctxt - uctxt->dd->first_user_ctxt) * | |
1013 | HFI1_MAX_SHARED_CTXTS) + fdata->subctxt); | |
1014 | set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); | |
b5eb3b2f | 1015 | } |
06e0ffa6 | 1016 | fdata->invalid_tid_idx++; |
b5eb3b2f | 1017 | } |
06e0ffa6 MH |
1018 | spin_unlock(&fdata->invalid_lock); |
1019 | return 0; | |
f727a0c3 MH |
1020 | } |
1021 | ||
06e0ffa6 | 1022 | static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node) |
f727a0c3 | 1023 | { |
06e0ffa6 MH |
1024 | struct hfi1_filedata *fdata = |
1025 | container_of(root, struct hfi1_filedata, tid_rb_root); | |
1026 | struct tid_rb_node *tnode = | |
1027 | container_of(node, struct tid_rb_node, mmu); | |
a92ba6d6 | 1028 | u32 base = fdata->uctxt->expected_base; |
f727a0c3 | 1029 | |
06e0ffa6 | 1030 | fdata->entry_to_rb[tnode->rcventry - base] = tnode; |
f727a0c3 MH |
1031 | return 0; |
1032 | } | |
1033 | ||
909e2cd0 MH |
1034 | static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node, |
1035 | bool notifier) | |
a92ba6d6 | 1036 | { |
06e0ffa6 MH |
1037 | struct hfi1_filedata *fdata = |
1038 | container_of(root, struct hfi1_filedata, tid_rb_root); | |
1039 | struct tid_rb_node *tnode = | |
1040 | container_of(node, struct tid_rb_node, mmu); | |
a92ba6d6 MH |
1041 | u32 base = fdata->uctxt->expected_base; |
1042 | ||
06e0ffa6 | 1043 | fdata->entry_to_rb[tnode->rcventry - base] = NULL; |
a92ba6d6 | 1044 | } |