Commit | Line | Data |
---|---|---|
f727a0c3 MH |
1 | /* |
2 | * | |
3 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
4 | * redistributing this file, you may do so under either license. | |
5 | * | |
6 | * GPL LICENSE SUMMARY | |
7 | * | |
8 | * Copyright(c) 2015 Intel Corporation. | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of version 2 of the GNU General Public License as | |
12 | * published by the Free Software Foundation. | |
13 | * | |
14 | * This program is distributed in the hope that it will be useful, but | |
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * General Public License for more details. | |
18 | * | |
19 | * BSD LICENSE | |
20 | * | |
21 | * Copyright(c) 2015 Intel Corporation. | |
22 | * | |
23 | * Redistribution and use in source and binary forms, with or without | |
24 | * modification, are permitted provided that the following conditions | |
25 | * are met: | |
26 | * | |
27 | * - Redistributions of source code must retain the above copyright | |
28 | * notice, this list of conditions and the following disclaimer. | |
29 | * - Redistributions in binary form must reproduce the above copyright | |
30 | * notice, this list of conditions and the following disclaimer in | |
31 | * the documentation and/or other materials provided with the | |
32 | * distribution. | |
33 | * - Neither the name of Intel Corporation nor the names of its | |
34 | * contributors may be used to endorse or promote products derived | |
35 | * from this software without specific prior written permission. | |
36 | * | |
37 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
38 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
39 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
40 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
41 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
42 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
43 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
44 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
45 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
46 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
47 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
48 | * | |
49 | */ | |
50 | #include <asm/page.h> | |
51 | ||
52 | #include "user_exp_rcv.h" | |
53 | #include "trace.h" | |
54 | ||
b8abe346 MH |
55 | struct tid_group { |
56 | struct list_head list; | |
57 | unsigned base; | |
58 | u8 size; | |
59 | u8 used; | |
60 | u8 map; | |
61 | }; | |
62 | ||
f727a0c3 MH |
63 | struct mmu_rb_node { |
64 | struct rb_node rbnode; | |
65 | unsigned long virt; | |
66 | unsigned long phys; | |
67 | unsigned long len; | |
68 | struct tid_group *grp; | |
69 | u32 rcventry; | |
70 | dma_addr_t dma_addr; | |
71 | bool freed; | |
72 | unsigned npages; | |
73 | struct page *pages[0]; | |
74 | }; | |
75 | ||
76 | enum mmu_call_types { | |
77 | MMU_INVALIDATE_PAGE = 0, | |
78 | MMU_INVALIDATE_RANGE = 1 | |
79 | }; | |
80 | ||
81 | static const char * const mmu_types[] = { | |
82 | "PAGE", | |
83 | "RANGE" | |
84 | }; | |
85 | ||
f88e0c8a MH |
86 | struct tid_pageset { |
87 | u16 idx; | |
88 | u16 count; | |
89 | }; | |
90 | ||
b8abe346 MH |
91 | #define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list)) |
92 | ||
f88e0c8a MH |
93 | static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *, |
94 | struct rb_root *) __maybe_unused; | |
95 | static u32 find_phys_blocks(struct page **, unsigned, | |
96 | struct tid_pageset *) __maybe_unused; | |
97 | static int set_rcvarray_entry(struct file *, unsigned long, u32, | |
98 | struct tid_group *, struct page **, | |
99 | unsigned) __maybe_unused; | |
f727a0c3 MH |
100 | static inline int mmu_addr_cmp(struct mmu_rb_node *, unsigned long, |
101 | unsigned long); | |
102 | static struct mmu_rb_node *mmu_rb_search_by_addr(struct rb_root *, | |
103 | unsigned long) __maybe_unused; | |
104 | static inline struct mmu_rb_node *mmu_rb_search_by_entry(struct rb_root *, | |
105 | u32); | |
106 | static int mmu_rb_insert_by_addr(struct rb_root *, | |
107 | struct mmu_rb_node *) __maybe_unused; | |
108 | static int mmu_rb_insert_by_entry(struct rb_root *, | |
109 | struct mmu_rb_node *) __maybe_unused; | |
110 | static void mmu_notifier_mem_invalidate(struct mmu_notifier *, | |
111 | unsigned long, unsigned long, | |
112 | enum mmu_call_types); | |
113 | static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *, | |
114 | unsigned long); | |
115 | static inline void mmu_notifier_range_start(struct mmu_notifier *, | |
116 | struct mm_struct *, | |
117 | unsigned long, unsigned long); | |
f88e0c8a MH |
118 | static int program_rcvarray(struct file *, unsigned long, struct tid_group *, |
119 | struct tid_pageset *, unsigned, u16, struct page **, | |
120 | u32 *, unsigned *, unsigned *) __maybe_unused; | |
121 | static int unprogram_rcvarray(struct file *, u32, | |
122 | struct tid_group **) __maybe_unused; | |
123 | static void clear_tid_node(struct hfi1_filedata *, u16, | |
124 | struct mmu_rb_node *) __maybe_unused; | |
125 | ||
126 | static inline u32 rcventry2tidinfo(u32 rcventry) | |
127 | { | |
128 | u32 pair = rcventry & ~0x1; | |
129 | ||
130 | return EXP_TID_SET(IDX, pair >> 1) | | |
131 | EXP_TID_SET(CTRL, 1 << (rcventry - pair)); | |
132 | } | |
f727a0c3 | 133 | |
b8abe346 MH |
134 | static inline void exp_tid_group_init(struct exp_tid_set *set) |
135 | { | |
136 | INIT_LIST_HEAD(&set->list); | |
137 | set->count = 0; | |
138 | } | |
139 | ||
140 | static inline void tid_group_remove(struct tid_group *grp, | |
141 | struct exp_tid_set *set) | |
142 | { | |
143 | list_del_init(&grp->list); | |
144 | set->count--; | |
145 | } | |
146 | ||
147 | static inline void tid_group_add_tail(struct tid_group *grp, | |
148 | struct exp_tid_set *set) | |
149 | { | |
150 | list_add_tail(&grp->list, &set->list); | |
151 | set->count++; | |
152 | } | |
153 | ||
154 | static inline struct tid_group *tid_group_pop(struct exp_tid_set *set) | |
155 | { | |
156 | struct tid_group *grp = | |
157 | list_first_entry(&set->list, struct tid_group, list); | |
158 | list_del_init(&grp->list); | |
159 | set->count--; | |
160 | return grp; | |
161 | } | |
162 | ||
163 | static inline void tid_group_move(struct tid_group *group, | |
164 | struct exp_tid_set *s1, | |
165 | struct exp_tid_set *s2) | |
166 | { | |
167 | tid_group_remove(group, s1); | |
168 | tid_group_add_tail(group, s2); | |
169 | } | |
170 | ||
f727a0c3 MH |
171 | static struct mmu_notifier_ops __maybe_unused mn_opts = { |
172 | .invalidate_page = mmu_notifier_page, | |
173 | .invalidate_range_start = mmu_notifier_range_start, | |
174 | }; | |
175 | ||
176 | /* | |
177 | * Initialize context and file private data needed for Expected | |
178 | * receive caching. This needs to be done after the context has | |
179 | * been configured with the eager/expected RcvEntry counts. | |
180 | */ | |
181 | int hfi1_user_exp_rcv_init(struct file *fp) | |
182 | { | |
183 | return -EINVAL; | |
184 | } | |
185 | ||
186 | int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) | |
187 | { | |
188 | return -EINVAL; | |
189 | } | |
190 | ||
b8abe346 MH |
191 | /* |
192 | * Write an "empty" RcvArray entry. | |
193 | * This function exists so the TID registaration code can use it | |
194 | * to write to unused/unneeded entries and still take advantage | |
195 | * of the WC performance improvements. The HFI will ignore this | |
196 | * write to the RcvArray entry. | |
197 | */ | |
198 | static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) | |
199 | { | |
200 | /* | |
201 | * Doing the WC fill writes only makes sense if the device is | |
202 | * present and the RcvArray has been mapped as WC memory. | |
203 | */ | |
204 | if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) | |
205 | writeq(0, dd->rcvarray_wc + (index * 8)); | |
206 | } | |
207 | ||
f727a0c3 MH |
208 | int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) |
209 | { | |
210 | return -EINVAL; | |
211 | } | |
212 | ||
213 | int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) | |
214 | { | |
215 | return -EINVAL; | |
216 | } | |
217 | ||
218 | int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo) | |
219 | { | |
220 | return -EINVAL; | |
221 | } | |
222 | ||
f88e0c8a MH |
223 | static u32 find_phys_blocks(struct page **pages, unsigned npages, |
224 | struct tid_pageset *list) | |
225 | { | |
226 | unsigned pagecount, pageidx, setcount = 0, i; | |
227 | unsigned long pfn, this_pfn; | |
228 | ||
229 | if (!npages) | |
230 | return 0; | |
231 | ||
232 | /* | |
233 | * Look for sets of physically contiguous pages in the user buffer. | |
234 | * This will allow us to optimize Expected RcvArray entry usage by | |
235 | * using the bigger supported sizes. | |
236 | */ | |
237 | pfn = page_to_pfn(pages[0]); | |
238 | for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) { | |
239 | this_pfn = i < npages ? page_to_pfn(pages[i]) : 0; | |
240 | ||
241 | /* | |
242 | * If the pfn's are not sequential, pages are not physically | |
243 | * contiguous. | |
244 | */ | |
245 | if (this_pfn != ++pfn) { | |
246 | /* | |
247 | * At this point we have to loop over the set of | |
248 | * physically contiguous pages and break them down it | |
249 | * sizes supported by the HW. | |
250 | * There are two main constraints: | |
251 | * 1. The max buffer size is MAX_EXPECTED_BUFFER. | |
252 | * If the total set size is bigger than that | |
253 | * program only a MAX_EXPECTED_BUFFER chunk. | |
254 | * 2. The buffer size has to be a power of two. If | |
255 | * it is not, round down to the closes power of | |
256 | * 2 and program that size. | |
257 | */ | |
258 | while (pagecount) { | |
259 | int maxpages = pagecount; | |
260 | u32 bufsize = pagecount * PAGE_SIZE; | |
261 | ||
262 | if (bufsize > MAX_EXPECTED_BUFFER) | |
263 | maxpages = | |
264 | MAX_EXPECTED_BUFFER >> | |
265 | PAGE_SHIFT; | |
266 | else if (!is_power_of_2(bufsize)) | |
267 | maxpages = | |
268 | rounddown_pow_of_two(bufsize) >> | |
269 | PAGE_SHIFT; | |
270 | ||
271 | list[setcount].idx = pageidx; | |
272 | list[setcount].count = maxpages; | |
273 | pagecount -= maxpages; | |
274 | pageidx += maxpages; | |
275 | setcount++; | |
276 | } | |
277 | pageidx = i; | |
278 | pagecount = 1; | |
279 | pfn = this_pfn; | |
280 | } else { | |
281 | pagecount++; | |
282 | } | |
283 | } | |
284 | return setcount; | |
285 | } | |
286 | ||
287 | /** | |
288 | * program_rcvarray() - program an RcvArray group with receive buffers | |
289 | * @fp: file pointer | |
290 | * @vaddr: starting user virtual address | |
291 | * @grp: RcvArray group | |
292 | * @sets: array of struct tid_pageset holding information on physically | |
293 | * contiguous chunks from the user buffer | |
294 | * @start: starting index into sets array | |
295 | * @count: number of struct tid_pageset's to program | |
296 | * @pages: an array of struct page * for the user buffer | |
297 | * @tidlist: the array of u32 elements when the information about the | |
298 | * programmed RcvArray entries is to be encoded. | |
299 | * @tididx: starting offset into tidlist | |
300 | * @pmapped: (output parameter) number of pages programmed into the RcvArray | |
301 | * entries. | |
302 | * | |
303 | * This function will program up to 'count' number of RcvArray entries from the | |
304 | * group 'grp'. To make best use of write-combining writes, the function will | |
305 | * perform writes to the unused RcvArray entries which will be ignored by the | |
306 | * HW. Each RcvArray entry will be programmed with a physically contiguous | |
307 | * buffer chunk from the user's virtual buffer. | |
308 | * | |
309 | * Return: | |
310 | * -EINVAL if the requested count is larger than the size of the group, | |
311 | * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or | |
312 | * number of RcvArray entries programmed. | |
313 | */ | |
314 | static int program_rcvarray(struct file *fp, unsigned long vaddr, | |
315 | struct tid_group *grp, | |
316 | struct tid_pageset *sets, | |
317 | unsigned start, u16 count, struct page **pages, | |
318 | u32 *tidlist, unsigned *tididx, unsigned *pmapped) | |
319 | { | |
320 | struct hfi1_filedata *fd = fp->private_data; | |
321 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
322 | struct hfi1_devdata *dd = uctxt->dd; | |
323 | u16 idx; | |
324 | u32 tidinfo = 0, rcventry, useidx = 0; | |
325 | int mapped = 0; | |
326 | ||
327 | /* Count should never be larger than the group size */ | |
328 | if (count > grp->size) | |
329 | return -EINVAL; | |
330 | ||
331 | /* Find the first unused entry in the group */ | |
332 | for (idx = 0; idx < grp->size; idx++) { | |
333 | if (!(grp->map & (1 << idx))) { | |
334 | useidx = idx; | |
335 | break; | |
336 | } | |
337 | rcv_array_wc_fill(dd, grp->base + idx); | |
338 | } | |
339 | ||
340 | idx = 0; | |
341 | while (idx < count) { | |
342 | u16 npages, pageidx, setidx = start + idx; | |
343 | int ret = 0; | |
344 | ||
345 | /* | |
346 | * If this entry in the group is used, move to the next one. | |
347 | * If we go past the end of the group, exit the loop. | |
348 | */ | |
349 | if (useidx >= grp->size) { | |
350 | break; | |
351 | } else if (grp->map & (1 << useidx)) { | |
352 | rcv_array_wc_fill(dd, grp->base + useidx); | |
353 | useidx++; | |
354 | continue; | |
355 | } | |
356 | ||
357 | rcventry = grp->base + useidx; | |
358 | npages = sets[setidx].count; | |
359 | pageidx = sets[setidx].idx; | |
360 | ||
361 | ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE), | |
362 | rcventry, grp, pages + pageidx, | |
363 | npages); | |
364 | if (ret) | |
365 | return ret; | |
366 | mapped += npages; | |
367 | ||
368 | tidinfo = rcventry2tidinfo(rcventry - uctxt->expected_base) | | |
369 | EXP_TID_SET(LEN, npages); | |
370 | tidlist[(*tididx)++] = tidinfo; | |
371 | grp->used++; | |
372 | grp->map |= 1 << useidx++; | |
373 | idx++; | |
374 | } | |
375 | ||
376 | /* Fill the rest of the group with "blank" writes */ | |
377 | for (; useidx < grp->size; useidx++) | |
378 | rcv_array_wc_fill(dd, grp->base + useidx); | |
379 | *pmapped = mapped; | |
380 | return idx; | |
381 | } | |
382 | ||
383 | static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, | |
384 | u32 rcventry, struct tid_group *grp, | |
385 | struct page **pages, unsigned npages) | |
386 | { | |
387 | int ret; | |
388 | struct hfi1_filedata *fd = fp->private_data; | |
389 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
390 | struct mmu_rb_node *node; | |
391 | struct hfi1_devdata *dd = uctxt->dd; | |
392 | struct rb_root *root = &fd->tid_rb_root; | |
393 | dma_addr_t phys; | |
394 | ||
395 | /* | |
396 | * Allocate the node first so we can handle a potential | |
397 | * failure before we've programmed anything. | |
398 | */ | |
399 | node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages), | |
400 | GFP_KERNEL); | |
401 | if (!node) | |
402 | return -ENOMEM; | |
403 | ||
404 | phys = pci_map_single(dd->pcidev, | |
405 | __va(page_to_phys(pages[0])), | |
406 | npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); | |
407 | if (dma_mapping_error(&dd->pcidev->dev, phys)) { | |
408 | dd_dev_err(dd, "Failed to DMA map Exp Rcv pages 0x%llx\n", | |
409 | phys); | |
410 | kfree(node); | |
411 | return -EFAULT; | |
412 | } | |
413 | ||
414 | node->virt = vaddr; | |
415 | node->phys = page_to_phys(pages[0]); | |
416 | node->len = npages * PAGE_SIZE; | |
417 | node->npages = npages; | |
418 | node->rcventry = rcventry; | |
419 | node->dma_addr = phys; | |
420 | node->grp = grp; | |
421 | node->freed = false; | |
422 | memcpy(node->pages, pages, sizeof(struct page *) * npages); | |
423 | ||
424 | spin_lock(&fd->rb_lock); | |
425 | ret = fd->mmu_rb_insert(root, node); | |
426 | spin_unlock(&fd->rb_lock); | |
427 | ||
428 | if (ret) { | |
429 | hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", | |
430 | node->rcventry, node->virt, node->phys, ret); | |
431 | pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, | |
432 | PCI_DMA_FROMDEVICE); | |
433 | kfree(node); | |
434 | return -EFAULT; | |
435 | } | |
436 | hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); | |
437 | return 0; | |
438 | } | |
439 | ||
440 | static int unprogram_rcvarray(struct file *fp, u32 tidinfo, | |
441 | struct tid_group **grp) | |
442 | { | |
443 | struct hfi1_filedata *fd = fp->private_data; | |
444 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
445 | struct hfi1_devdata *dd = uctxt->dd; | |
446 | struct mmu_rb_node *node; | |
447 | u8 tidctrl = EXP_TID_GET(tidinfo, CTRL); | |
448 | u32 tidbase = uctxt->expected_base, | |
449 | tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry; | |
450 | ||
451 | if (tididx >= uctxt->expected_count) { | |
452 | dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n", | |
453 | tididx, uctxt->ctxt); | |
454 | return -EINVAL; | |
455 | } | |
456 | ||
457 | if (tidctrl == 0x3) | |
458 | return -EINVAL; | |
459 | ||
460 | rcventry = tidbase + tididx + (tidctrl - 1); | |
461 | ||
462 | spin_lock(&fd->rb_lock); | |
463 | node = mmu_rb_search_by_entry(&fd->tid_rb_root, rcventry); | |
464 | if (!node) { | |
465 | spin_unlock(&fd->rb_lock); | |
466 | return -EBADF; | |
467 | } | |
468 | rb_erase(&node->rbnode, &fd->tid_rb_root); | |
469 | spin_unlock(&fd->rb_lock); | |
470 | if (grp) | |
471 | *grp = node->grp; | |
472 | clear_tid_node(fd, fd->subctxt, node); | |
473 | return 0; | |
474 | } | |
475 | ||
476 | static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, | |
477 | struct mmu_rb_node *node) | |
478 | { | |
479 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
480 | struct hfi1_devdata *dd = uctxt->dd; | |
481 | ||
482 | hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0); | |
483 | /* | |
484 | * Make sure device has seen the write before we unpin the | |
485 | * pages. | |
486 | */ | |
487 | flush_wc(); | |
488 | ||
489 | pci_unmap_single(dd->pcidev, node->dma_addr, node->len, | |
490 | PCI_DMA_FROMDEVICE); | |
491 | hfi1_release_user_pages(node->pages, node->npages, true); | |
492 | ||
493 | node->grp->used--; | |
494 | node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); | |
495 | ||
496 | if (node->grp->used == node->grp->size - 1) | |
497 | tid_group_move(node->grp, &uctxt->tid_full_list, | |
498 | &uctxt->tid_used_list); | |
499 | else if (!node->grp->used) | |
500 | tid_group_move(node->grp, &uctxt->tid_used_list, | |
501 | &uctxt->tid_group_list); | |
502 | kfree(node); | |
503 | } | |
504 | ||
505 | static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, | |
506 | struct exp_tid_set *set, struct rb_root *root) | |
507 | { | |
508 | struct tid_group *grp, *ptr; | |
509 | struct hfi1_filedata *fd = container_of(root, struct hfi1_filedata, | |
510 | tid_rb_root); | |
511 | int i; | |
512 | ||
513 | list_for_each_entry_safe(grp, ptr, &set->list, list) { | |
514 | list_del_init(&grp->list); | |
515 | ||
516 | spin_lock(&fd->rb_lock); | |
517 | for (i = 0; i < grp->size; i++) { | |
518 | if (grp->map & (1 << i)) { | |
519 | u16 rcventry = grp->base + i; | |
520 | struct mmu_rb_node *node; | |
521 | ||
522 | node = mmu_rb_search_by_entry(root, rcventry); | |
523 | if (!node) | |
524 | continue; | |
525 | rb_erase(&node->rbnode, root); | |
526 | clear_tid_node(fd, -1, node); | |
527 | } | |
528 | } | |
529 | spin_unlock(&fd->rb_lock); | |
530 | } | |
531 | } | |
532 | ||
f727a0c3 MH |
533 | static inline void mmu_notifier_page(struct mmu_notifier *mn, |
534 | struct mm_struct *mm, unsigned long addr) | |
535 | { | |
536 | mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE, | |
537 | MMU_INVALIDATE_PAGE); | |
538 | } | |
539 | ||
540 | static inline void mmu_notifier_range_start(struct mmu_notifier *mn, | |
541 | struct mm_struct *mm, | |
542 | unsigned long start, | |
543 | unsigned long end) | |
544 | { | |
545 | mmu_notifier_mem_invalidate(mn, start, end, MMU_INVALIDATE_RANGE); | |
546 | } | |
547 | ||
548 | static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, | |
549 | unsigned long start, unsigned long end, | |
550 | enum mmu_call_types type) | |
551 | { | |
552 | /* Stub for now */ | |
553 | } | |
554 | ||
555 | static inline int mmu_addr_cmp(struct mmu_rb_node *node, unsigned long addr, | |
556 | unsigned long len) | |
557 | { | |
558 | if ((addr + len) <= node->virt) | |
559 | return -1; | |
560 | else if (addr >= node->virt && addr < (node->virt + node->len)) | |
561 | return 0; | |
562 | else | |
563 | return 1; | |
564 | } | |
565 | ||
566 | static inline int mmu_entry_cmp(struct mmu_rb_node *node, u32 entry) | |
567 | { | |
568 | if (entry < node->rcventry) | |
569 | return -1; | |
570 | else if (entry > node->rcventry) | |
571 | return 1; | |
572 | else | |
573 | return 0; | |
574 | } | |
575 | ||
576 | static struct mmu_rb_node *mmu_rb_search_by_addr(struct rb_root *root, | |
577 | unsigned long addr) | |
578 | { | |
579 | struct rb_node *node = root->rb_node; | |
580 | ||
581 | while (node) { | |
582 | struct mmu_rb_node *mnode = | |
583 | container_of(node, struct mmu_rb_node, rbnode); | |
584 | /* | |
585 | * When searching, use at least one page length for size. The | |
586 | * MMU notifier will not give us anything less than that. We | |
587 | * also don't need anything more than a page because we are | |
588 | * guaranteed to have non-overlapping buffers in the tree. | |
589 | */ | |
590 | int result = mmu_addr_cmp(mnode, addr, PAGE_SIZE); | |
591 | ||
592 | if (result < 0) | |
593 | node = node->rb_left; | |
594 | else if (result > 0) | |
595 | node = node->rb_right; | |
596 | else | |
597 | return mnode; | |
598 | } | |
599 | return NULL; | |
600 | } | |
601 | ||
602 | static inline struct mmu_rb_node *mmu_rb_search_by_entry(struct rb_root *root, | |
603 | u32 index) | |
604 | { | |
605 | struct mmu_rb_node *rbnode; | |
606 | struct rb_node *node; | |
607 | ||
608 | if (root && !RB_EMPTY_ROOT(root)) | |
609 | for (node = rb_first(root); node; node = rb_next(node)) { | |
610 | rbnode = rb_entry(node, struct mmu_rb_node, rbnode); | |
611 | if (rbnode->rcventry == index) | |
612 | return rbnode; | |
613 | } | |
614 | return NULL; | |
615 | } | |
616 | ||
617 | static int mmu_rb_insert_by_entry(struct rb_root *root, | |
618 | struct mmu_rb_node *node) | |
619 | { | |
620 | struct rb_node **new = &root->rb_node, *parent = NULL; | |
621 | ||
622 | while (*new) { | |
623 | struct mmu_rb_node *this = | |
624 | container_of(*new, struct mmu_rb_node, rbnode); | |
625 | int result = mmu_entry_cmp(this, node->rcventry); | |
626 | ||
627 | parent = *new; | |
628 | if (result < 0) | |
629 | new = &((*new)->rb_left); | |
630 | else if (result > 0) | |
631 | new = &((*new)->rb_right); | |
632 | else | |
633 | return 1; | |
634 | } | |
635 | ||
636 | rb_link_node(&node->rbnode, parent, new); | |
637 | rb_insert_color(&node->rbnode, root); | |
638 | return 0; | |
639 | } | |
640 | ||
641 | static int mmu_rb_insert_by_addr(struct rb_root *root, struct mmu_rb_node *node) | |
642 | { | |
643 | struct rb_node **new = &root->rb_node, *parent = NULL; | |
644 | ||
645 | /* Figure out where to put new node */ | |
646 | while (*new) { | |
647 | struct mmu_rb_node *this = | |
648 | container_of(*new, struct mmu_rb_node, rbnode); | |
649 | int result = mmu_addr_cmp(this, node->virt, node->len); | |
650 | ||
651 | parent = *new; | |
652 | if (result < 0) | |
653 | new = &((*new)->rb_left); | |
654 | else if (result > 0) | |
655 | new = &((*new)->rb_right); | |
656 | else | |
657 | return 1; | |
658 | } | |
659 | ||
660 | /* Add new node and rebalance tree. */ | |
661 | rb_link_node(&node->rbnode, parent, new); | |
662 | rb_insert_color(&node->rbnode, root); | |
663 | ||
664 | return 0; | |
665 | } |