Commit | Line | Data |
---|---|---|
f1a2d865 SD |
1 | /* |
2 | * Intel MIC Platform Software Stack (MPSS) | |
3 | * | |
4 | * Copyright(c) 2015 Intel Corporation. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License, version 2, as | |
8 | * published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License for more details. | |
14 | * | |
15 | * Intel SCIF driver. | |
16 | * | |
17 | */ | |
18 | #include "scif_main.h" | |
19 | ||
20 | /* | |
21 | * struct scif_vma_info - Information about a remote memory mapping | |
22 | * created via scif_mmap(..) | |
23 | * @vma: VM area struct | |
24 | * @list: link to list of active vmas | |
25 | */ | |
26 | struct scif_vma_info { | |
27 | struct vm_area_struct *vma; | |
28 | struct list_head list; | |
29 | }; | |
30 | ||
31 | void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg) | |
32 | { | |
33 | struct scif_rma_req req; | |
34 | struct scif_window *window = NULL; | |
35 | struct scif_window *recv_window = | |
36 | (struct scif_window *)msg->payload[0]; | |
37 | struct scif_endpt *ep; | |
38 | ||
39 | ep = (struct scif_endpt *)recv_window->ep; | |
40 | req.out_window = &window; | |
41 | req.offset = recv_window->offset; | |
42 | req.prot = recv_window->prot; | |
43 | req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT; | |
44 | req.type = SCIF_WINDOW_FULL; | |
45 | req.head = &ep->rma_info.reg_list; | |
46 | msg->payload[0] = ep->remote_ep; | |
47 | ||
48 | mutex_lock(&ep->rma_info.rma_lock); | |
49 | /* Does a valid window exist? */ | |
50 | if (scif_query_window(&req)) { | |
51 | dev_err(&scifdev->sdev->dev, | |
52 | "%s %d -ENXIO\n", __func__, __LINE__); | |
53 | msg->uop = SCIF_UNREGISTER_ACK; | |
54 | goto error; | |
55 | } | |
56 | ||
57 | scif_put_window(window, window->nr_pages); | |
58 | ||
59 | if (!window->ref_count) { | |
60 | atomic_inc(&ep->rma_info.tw_refcount); | |
61 | ep->rma_info.async_list_del = 1; | |
62 | list_del_init(&window->list); | |
63 | scif_free_window_offset(ep, window, window->offset); | |
64 | } | |
65 | error: | |
66 | mutex_unlock(&ep->rma_info.rma_lock); | |
67 | if (window && !window->ref_count) | |
68 | scif_queue_for_cleanup(window, &scif_info.rma); | |
69 | } | |
70 | ||
71 | /* | |
72 | * Remove valid remote memory mappings created via scif_mmap(..) from the | |
73 | * process address space since the remote node is lost | |
74 | */ | |
75 | static void __scif_zap_mmaps(struct scif_endpt *ep) | |
76 | { | |
77 | struct list_head *item; | |
78 | struct scif_vma_info *info; | |
79 | struct vm_area_struct *vma; | |
80 | unsigned long size; | |
81 | ||
82 | spin_lock(&ep->lock); | |
83 | list_for_each(item, &ep->rma_info.vma_list) { | |
84 | info = list_entry(item, struct scif_vma_info, list); | |
85 | vma = info->vma; | |
86 | size = vma->vm_end - vma->vm_start; | |
87 | zap_vma_ptes(vma, vma->vm_start, size); | |
88 | dev_dbg(scif_info.mdev.this_device, | |
89 | "%s ep %p zap vma %p size 0x%lx\n", | |
90 | __func__, ep, info->vma, size); | |
91 | } | |
92 | spin_unlock(&ep->lock); | |
93 | } | |
94 | ||
95 | /* | |
96 | * Traverse the list of endpoints for a particular remote node and | |
97 | * zap valid remote memory mappings since the remote node is lost | |
98 | */ | |
99 | static void _scif_zap_mmaps(int node, struct list_head *head) | |
100 | { | |
101 | struct scif_endpt *ep; | |
102 | struct list_head *item; | |
103 | ||
104 | mutex_lock(&scif_info.connlock); | |
105 | list_for_each(item, head) { | |
106 | ep = list_entry(item, struct scif_endpt, list); | |
107 | if (ep->remote_dev->node == node) | |
108 | __scif_zap_mmaps(ep); | |
109 | } | |
110 | mutex_unlock(&scif_info.connlock); | |
111 | } | |
112 | ||
113 | /* | |
114 | * Wrapper for removing remote memory mappings for a particular node. This API | |
115 | * is called by peer nodes as part of handling a lost node. | |
116 | */ | |
117 | void scif_zap_mmaps(int node) | |
118 | { | |
119 | _scif_zap_mmaps(node, &scif_info.connected); | |
120 | _scif_zap_mmaps(node, &scif_info.disconnected); | |
121 | } | |
122 | ||
123 | /* | |
124 | * This API is only called while handling a lost node: | |
125 | * a) Remote node is dead. | |
126 | * b) Remote memory mappings have been zapped | |
127 | * So we can traverse the remote_reg_list without any locks. Since | |
128 | * the window has not yet been unregistered we can drop the ref count | |
129 | * and queue it to the cleanup thread. | |
130 | */ | |
131 | static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep) | |
132 | { | |
133 | struct list_head *pos, *tmp; | |
134 | struct scif_window *window; | |
135 | ||
136 | list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) { | |
137 | window = list_entry(pos, struct scif_window, list); | |
138 | if (window->ref_count) | |
139 | scif_put_window(window, window->nr_pages); | |
140 | else | |
141 | dev_err(scif_info.mdev.this_device, | |
142 | "%s %d unexpected\n", | |
143 | __func__, __LINE__); | |
144 | if (!window->ref_count) { | |
145 | atomic_inc(&ep->rma_info.tw_refcount); | |
146 | list_del_init(&window->list); | |
147 | scif_queue_for_cleanup(window, &scif_info.rma); | |
148 | } | |
149 | } | |
150 | } | |
151 | ||
152 | /* Cleanup remote registration lists for zombie endpoints */ | |
153 | void scif_cleanup_rma_for_zombies(int node) | |
154 | { | |
155 | struct scif_endpt *ep; | |
156 | struct list_head *item; | |
157 | ||
158 | mutex_lock(&scif_info.eplock); | |
159 | list_for_each(item, &scif_info.zombie) { | |
160 | ep = list_entry(item, struct scif_endpt, list); | |
161 | if (ep->remote_dev && ep->remote_dev->node == node) | |
162 | __scif_cleanup_rma_for_zombies(ep); | |
163 | } | |
164 | mutex_unlock(&scif_info.eplock); | |
165 | flush_work(&scif_info.misc_work); | |
166 | } | |
167 | ||
168 | /* Insert the VMA into the per endpoint VMA list */ | |
169 | static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma) | |
170 | { | |
171 | struct scif_vma_info *info; | |
172 | int err = 0; | |
173 | ||
174 | info = kzalloc(sizeof(*info), GFP_KERNEL); | |
175 | if (!info) { | |
176 | err = -ENOMEM; | |
177 | goto done; | |
178 | } | |
179 | info->vma = vma; | |
180 | spin_lock(&ep->lock); | |
181 | list_add_tail(&info->list, &ep->rma_info.vma_list); | |
182 | spin_unlock(&ep->lock); | |
183 | done: | |
184 | return err; | |
185 | } | |
186 | ||
187 | /* Delete the VMA from the per endpoint VMA list */ | |
188 | static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma) | |
189 | { | |
190 | struct list_head *item; | |
191 | struct scif_vma_info *info; | |
192 | ||
193 | spin_lock(&ep->lock); | |
194 | list_for_each(item, &ep->rma_info.vma_list) { | |
195 | info = list_entry(item, struct scif_vma_info, list); | |
196 | if (info->vma == vma) { | |
197 | list_del(&info->list); | |
198 | kfree(info); | |
199 | break; | |
200 | } | |
201 | } | |
202 | spin_unlock(&ep->lock); | |
203 | } | |
204 | ||
205 | static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep) | |
206 | { | |
207 | struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev; | |
208 | struct scif_hw_dev *sdev = scifdev->sdev; | |
209 | phys_addr_t out_phys, apt_base = 0; | |
210 | ||
211 | /* | |
212 | * If the DMA address is card relative then we need to add the | |
213 | * aperture base for mmap to work correctly | |
214 | */ | |
215 | if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da) | |
216 | apt_base = sdev->aper->pa; | |
217 | out_phys = apt_base + phys; | |
218 | return out_phys; | |
219 | } | |
220 | ||
221 | int scif_get_pages(scif_epd_t epd, off_t offset, size_t len, | |
222 | struct scif_range **pages) | |
223 | { | |
224 | struct scif_endpt *ep = (struct scif_endpt *)epd; | |
225 | struct scif_rma_req req; | |
226 | struct scif_window *window = NULL; | |
227 | int nr_pages, err, i; | |
228 | ||
229 | dev_dbg(scif_info.mdev.this_device, | |
230 | "SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n", | |
231 | ep, offset, len); | |
232 | err = scif_verify_epd(ep); | |
233 | if (err) | |
234 | return err; | |
235 | ||
236 | if (!len || (offset < 0) || | |
237 | (offset + len < offset) || | |
238 | (ALIGN(offset, PAGE_SIZE) != offset) || | |
239 | (ALIGN(len, PAGE_SIZE) != len)) | |
240 | return -EINVAL; | |
241 | ||
242 | nr_pages = len >> PAGE_SHIFT; | |
243 | ||
244 | req.out_window = &window; | |
245 | req.offset = offset; | |
246 | req.prot = 0; | |
247 | req.nr_bytes = len; | |
248 | req.type = SCIF_WINDOW_SINGLE; | |
249 | req.head = &ep->rma_info.remote_reg_list; | |
250 | ||
251 | mutex_lock(&ep->rma_info.rma_lock); | |
252 | /* Does a valid window exist? */ | |
253 | err = scif_query_window(&req); | |
254 | if (err) { | |
255 | dev_err(&ep->remote_dev->sdev->dev, | |
256 | "%s %d err %d\n", __func__, __LINE__, err); | |
257 | goto error; | |
258 | } | |
259 | ||
260 | /* Allocate scif_range */ | |
261 | *pages = kzalloc(sizeof(**pages), GFP_KERNEL); | |
262 | if (!*pages) { | |
263 | err = -ENOMEM; | |
264 | goto error; | |
265 | } | |
266 | ||
267 | /* Allocate phys addr array */ | |
268 | (*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t)); | |
269 | if (!((*pages)->phys_addr)) { | |
270 | err = -ENOMEM; | |
271 | goto error; | |
272 | } | |
273 | ||
274 | if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) { | |
275 | /* Allocate virtual address array */ | |
276 | ((*pages)->va = scif_zalloc(nr_pages * sizeof(void *))); | |
277 | if (!(*pages)->va) { | |
278 | err = -ENOMEM; | |
279 | goto error; | |
280 | } | |
281 | } | |
282 | /* Populate the values */ | |
283 | (*pages)->cookie = window; | |
284 | (*pages)->nr_pages = nr_pages; | |
285 | (*pages)->prot_flags = window->prot; | |
286 | ||
287 | for (i = 0; i < nr_pages; i++) { | |
288 | (*pages)->phys_addr[i] = | |
289 | __scif_off_to_dma_addr(window, offset + | |
290 | (i * PAGE_SIZE)); | |
291 | (*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i], | |
292 | ep); | |
293 | if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) | |
294 | (*pages)->va[i] = | |
295 | ep->remote_dev->sdev->aper->va + | |
296 | (*pages)->phys_addr[i] - | |
297 | ep->remote_dev->sdev->aper->pa; | |
298 | } | |
299 | ||
300 | scif_get_window(window, nr_pages); | |
301 | error: | |
302 | mutex_unlock(&ep->rma_info.rma_lock); | |
303 | if (err) { | |
304 | if (*pages) { | |
305 | scif_free((*pages)->phys_addr, | |
306 | nr_pages * sizeof(dma_addr_t)); | |
307 | scif_free((*pages)->va, | |
308 | nr_pages * sizeof(void *)); | |
309 | kfree(*pages); | |
310 | *pages = NULL; | |
311 | } | |
312 | dev_err(&ep->remote_dev->sdev->dev, | |
313 | "%s %d err %d\n", __func__, __LINE__, err); | |
314 | } | |
315 | return err; | |
316 | } | |
317 | EXPORT_SYMBOL_GPL(scif_get_pages); | |
318 | ||
319 | int scif_put_pages(struct scif_range *pages) | |
320 | { | |
321 | struct scif_endpt *ep; | |
322 | struct scif_window *window; | |
323 | struct scifmsg msg; | |
324 | ||
325 | if (!pages || !pages->cookie) | |
326 | return -EINVAL; | |
327 | ||
328 | window = pages->cookie; | |
329 | ||
330 | if (!window || window->magic != SCIFEP_MAGIC) | |
331 | return -EINVAL; | |
332 | ||
333 | ep = (struct scif_endpt *)window->ep; | |
334 | /* | |
335 | * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the | |
336 | * callee should be allowed to release references to the pages, | |
337 | * else the endpoint was not connected in the first place, | |
338 | * hence the ENOTCONN. | |
339 | */ | |
340 | if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED) | |
341 | return -ENOTCONN; | |
342 | ||
343 | mutex_lock(&ep->rma_info.rma_lock); | |
344 | ||
345 | scif_put_window(window, pages->nr_pages); | |
346 | ||
347 | /* Initiate window destruction if ref count is zero */ | |
348 | if (!window->ref_count) { | |
349 | list_del(&window->list); | |
350 | mutex_unlock(&ep->rma_info.rma_lock); | |
351 | scif_drain_dma_intr(ep->remote_dev->sdev, | |
352 | ep->rma_info.dma_chan); | |
353 | /* Inform the peer about this window being destroyed. */ | |
354 | msg.uop = SCIF_MUNMAP; | |
355 | msg.src = ep->port; | |
356 | msg.payload[0] = window->peer_window; | |
357 | /* No error handling for notification messages */ | |
358 | scif_nodeqp_send(ep->remote_dev, &msg); | |
359 | /* Destroy this window from the peer's registered AS */ | |
360 | scif_destroy_remote_window(window); | |
361 | } else { | |
362 | mutex_unlock(&ep->rma_info.rma_lock); | |
363 | } | |
364 | ||
365 | scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t)); | |
366 | scif_free(pages->va, pages->nr_pages * sizeof(void *)); | |
367 | kfree(pages); | |
368 | return 0; | |
369 | } | |
370 | EXPORT_SYMBOL_GPL(scif_put_pages); | |
371 | ||
372 | /* | |
373 | * scif_rma_list_mmap: | |
374 | * | |
375 | * Traverse the remote registration list starting from start_window: | |
376 | * 1) Create VtoP mappings via remap_pfn_range(..) | |
377 | * 2) Once step 1) and 2) complete successfully then traverse the range of | |
378 | * windows again and bump the reference count. | |
379 | * RMA lock must be held. | |
380 | */ | |
381 | static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset, | |
382 | int nr_pages, struct vm_area_struct *vma) | |
383 | { | |
384 | s64 end_offset, loop_offset = offset; | |
385 | struct scif_window *window = start_window; | |
386 | int loop_nr_pages, nr_pages_left = nr_pages; | |
387 | struct scif_endpt *ep = (struct scif_endpt *)start_window->ep; | |
388 | struct list_head *head = &ep->rma_info.remote_reg_list; | |
389 | int i, err = 0; | |
390 | dma_addr_t phys_addr; | |
391 | struct scif_window_iter src_win_iter; | |
392 | size_t contig_bytes = 0; | |
393 | ||
394 | might_sleep(); | |
395 | list_for_each_entry_from(window, head, list) { | |
396 | end_offset = window->offset + | |
397 | (window->nr_pages << PAGE_SHIFT); | |
398 | loop_nr_pages = min_t(int, | |
399 | (end_offset - loop_offset) >> PAGE_SHIFT, | |
400 | nr_pages_left); | |
401 | scif_init_window_iter(window, &src_win_iter); | |
402 | for (i = 0; i < loop_nr_pages; i++) { | |
403 | phys_addr = scif_off_to_dma_addr(window, loop_offset, | |
404 | &contig_bytes, | |
405 | &src_win_iter); | |
406 | phys_addr = scif_get_phys(phys_addr, ep); | |
407 | err = remap_pfn_range(vma, | |
408 | vma->vm_start + | |
409 | loop_offset - offset, | |
410 | phys_addr >> PAGE_SHIFT, | |
411 | PAGE_SIZE, | |
412 | vma->vm_page_prot); | |
413 | if (err) | |
414 | goto error; | |
415 | loop_offset += PAGE_SIZE; | |
416 | } | |
417 | nr_pages_left -= loop_nr_pages; | |
418 | if (!nr_pages_left) | |
419 | break; | |
420 | } | |
421 | /* | |
422 | * No more failures expected. Bump up the ref count for all | |
423 | * the windows. Another traversal from start_window required | |
424 | * for handling errors encountered across windows during | |
425 | * remap_pfn_range(..). | |
426 | */ | |
427 | loop_offset = offset; | |
428 | nr_pages_left = nr_pages; | |
429 | window = start_window; | |
430 | head = &ep->rma_info.remote_reg_list; | |
431 | list_for_each_entry_from(window, head, list) { | |
432 | end_offset = window->offset + | |
433 | (window->nr_pages << PAGE_SHIFT); | |
434 | loop_nr_pages = min_t(int, | |
435 | (end_offset - loop_offset) >> PAGE_SHIFT, | |
436 | nr_pages_left); | |
437 | scif_get_window(window, loop_nr_pages); | |
438 | nr_pages_left -= loop_nr_pages; | |
439 | loop_offset += (loop_nr_pages << PAGE_SHIFT); | |
440 | if (!nr_pages_left) | |
441 | break; | |
442 | } | |
443 | error: | |
444 | if (err) | |
445 | dev_err(scif_info.mdev.this_device, | |
446 | "%s %d err %d\n", __func__, __LINE__, err); | |
447 | return err; | |
448 | } | |
449 | ||
450 | /* | |
451 | * scif_rma_list_munmap: | |
452 | * | |
453 | * Traverse the remote registration list starting from window: | |
454 | * 1) Decrement ref count. | |
455 | * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer. | |
456 | * RMA lock must be held. | |
457 | */ | |
458 | static void scif_rma_list_munmap(struct scif_window *start_window, | |
459 | s64 offset, int nr_pages) | |
460 | { | |
461 | struct scifmsg msg; | |
462 | s64 loop_offset = offset, end_offset; | |
463 | int loop_nr_pages, nr_pages_left = nr_pages; | |
464 | struct scif_endpt *ep = (struct scif_endpt *)start_window->ep; | |
465 | struct list_head *head = &ep->rma_info.remote_reg_list; | |
466 | struct scif_window *window = start_window, *_window; | |
467 | ||
468 | msg.uop = SCIF_MUNMAP; | |
469 | msg.src = ep->port; | |
470 | loop_offset = offset; | |
471 | nr_pages_left = nr_pages; | |
472 | list_for_each_entry_safe_from(window, _window, head, list) { | |
473 | end_offset = window->offset + | |
474 | (window->nr_pages << PAGE_SHIFT); | |
475 | loop_nr_pages = min_t(int, | |
476 | (end_offset - loop_offset) >> PAGE_SHIFT, | |
477 | nr_pages_left); | |
478 | scif_put_window(window, loop_nr_pages); | |
479 | if (!window->ref_count) { | |
480 | struct scif_dev *rdev = ep->remote_dev; | |
481 | ||
482 | scif_drain_dma_intr(rdev->sdev, | |
483 | ep->rma_info.dma_chan); | |
484 | /* Inform the peer about this munmap */ | |
485 | msg.payload[0] = window->peer_window; | |
486 | /* No error handling for Notification messages. */ | |
487 | scif_nodeqp_send(ep->remote_dev, &msg); | |
488 | list_del(&window->list); | |
489 | /* Destroy this window from the peer's registered AS */ | |
490 | scif_destroy_remote_window(window); | |
491 | } | |
492 | nr_pages_left -= loop_nr_pages; | |
493 | loop_offset += (loop_nr_pages << PAGE_SHIFT); | |
494 | if (!nr_pages_left) | |
495 | break; | |
496 | } | |
497 | } | |
498 | ||
499 | /* | |
500 | * The private data field of each VMA used to mmap a remote window | |
501 | * points to an instance of struct vma_pvt | |
502 | */ | |
503 | struct vma_pvt { | |
504 | struct scif_endpt *ep; /* End point for remote window */ | |
505 | s64 offset; /* offset within remote window */ | |
506 | bool valid_offset; /* offset is valid only if the original | |
507 | * mmap request was for a single page | |
508 | * else the offset within the vma is | |
509 | * the correct offset | |
510 | */ | |
511 | struct kref ref; | |
512 | }; | |
513 | ||
514 | static void vma_pvt_release(struct kref *ref) | |
515 | { | |
516 | struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref); | |
517 | ||
518 | kfree(vmapvt); | |
519 | } | |
520 | ||
521 | /** | |
522 | * scif_vma_open - VMA open driver callback | |
523 | * @vma: VMM memory area. | |
524 | * The open method is called by the kernel to allow the subsystem implementing | |
525 | * the VMA to initialize the area. This method is invoked any time a new | |
526 | * reference to the VMA is made (when a process forks, for example). | |
527 | * The one exception happens when the VMA is first created by mmap; | |
528 | * in this case, the driver's mmap method is called instead. | |
529 | * This function is also invoked when an existing VMA is split by the kernel | |
530 | * due to a call to munmap on a subset of the VMA resulting in two VMAs. | |
531 | * The kernel invokes this function only on one of the two VMAs. | |
532 | */ | |
533 | static void scif_vma_open(struct vm_area_struct *vma) | |
534 | { | |
535 | struct vma_pvt *vmapvt = vma->vm_private_data; | |
536 | ||
537 | dev_dbg(scif_info.mdev.this_device, | |
538 | "SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n", | |
539 | vma->vm_start, vma->vm_end); | |
540 | scif_insert_vma(vmapvt->ep, vma); | |
541 | kref_get(&vmapvt->ref); | |
542 | } | |
543 | ||
544 | /** | |
545 | * scif_munmap - VMA close driver callback. | |
546 | * @vma: VMM memory area. | |
547 | * When an area is destroyed, the kernel calls its close operation. | |
548 | * Note that there's no usage count associated with VMA's; the area | |
549 | * is opened and closed exactly once by each process that uses it. | |
550 | */ | |
551 | static void scif_munmap(struct vm_area_struct *vma) | |
552 | { | |
553 | struct scif_endpt *ep; | |
554 | struct vma_pvt *vmapvt = vma->vm_private_data; | |
acde785e | 555 | int nr_pages = vma_pages(vma); |
f1a2d865 SD |
556 | s64 offset; |
557 | struct scif_rma_req req; | |
558 | struct scif_window *window = NULL; | |
559 | int err; | |
560 | ||
561 | might_sleep(); | |
562 | dev_dbg(scif_info.mdev.this_device, | |
563 | "SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n", | |
564 | vma->vm_start, vma->vm_end); | |
565 | ep = vmapvt->ep; | |
566 | offset = vmapvt->valid_offset ? vmapvt->offset : | |
567 | (vma->vm_pgoff) << PAGE_SHIFT; | |
568 | dev_dbg(scif_info.mdev.this_device, | |
569 | "SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n", | |
570 | ep, nr_pages, offset); | |
571 | req.out_window = &window; | |
572 | req.offset = offset; | |
573 | req.nr_bytes = vma->vm_end - vma->vm_start; | |
574 | req.prot = vma->vm_flags & (VM_READ | VM_WRITE); | |
575 | req.type = SCIF_WINDOW_PARTIAL; | |
576 | req.head = &ep->rma_info.remote_reg_list; | |
577 | ||
578 | mutex_lock(&ep->rma_info.rma_lock); | |
579 | ||
580 | err = scif_query_window(&req); | |
581 | if (err) | |
582 | dev_err(scif_info.mdev.this_device, | |
583 | "%s %d err %d\n", __func__, __LINE__, err); | |
584 | else | |
585 | scif_rma_list_munmap(window, offset, nr_pages); | |
586 | ||
587 | mutex_unlock(&ep->rma_info.rma_lock); | |
588 | /* | |
589 | * The kernel probably zeroes these out but we still want | |
590 | * to clean up our own mess just in case. | |
591 | */ | |
592 | vma->vm_ops = NULL; | |
593 | vma->vm_private_data = NULL; | |
594 | kref_put(&vmapvt->ref, vma_pvt_release); | |
595 | scif_delete_vma(ep, vma); | |
596 | } | |
597 | ||
598 | static const struct vm_operations_struct scif_vm_ops = { | |
599 | .open = scif_vma_open, | |
600 | .close = scif_munmap, | |
601 | }; | |
602 | ||
603 | /** | |
604 | * scif_mmap - Map pages in virtual address space to a remote window. | |
605 | * @vma: VMM memory area. | |
606 | * @epd: endpoint descriptor | |
607 | * | |
608 | * Return: Upon successful completion, scif_mmap() returns zero | |
609 | * else an apt error is returned as documented in scif.h | |
610 | */ | |
611 | int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd) | |
612 | { | |
613 | struct scif_rma_req req; | |
614 | struct scif_window *window = NULL; | |
615 | struct scif_endpt *ep = (struct scif_endpt *)epd; | |
616 | s64 start_offset = vma->vm_pgoff << PAGE_SHIFT; | |
acde785e | 617 | int nr_pages = vma_pages(vma); |
f1a2d865 SD |
618 | int err; |
619 | struct vma_pvt *vmapvt; | |
620 | ||
621 | dev_dbg(scif_info.mdev.this_device, | |
622 | "SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n", | |
623 | ep, start_offset, nr_pages); | |
624 | err = scif_verify_epd(ep); | |
625 | if (err) | |
626 | return err; | |
627 | ||
628 | might_sleep(); | |
629 | ||
630 | err = scif_insert_vma(ep, vma); | |
631 | if (err) | |
632 | return err; | |
633 | ||
634 | vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL); | |
635 | if (!vmapvt) { | |
636 | scif_delete_vma(ep, vma); | |
637 | return -ENOMEM; | |
638 | } | |
639 | ||
640 | vmapvt->ep = ep; | |
641 | kref_init(&vmapvt->ref); | |
642 | ||
643 | req.out_window = &window; | |
644 | req.offset = start_offset; | |
645 | req.nr_bytes = vma->vm_end - vma->vm_start; | |
646 | req.prot = vma->vm_flags & (VM_READ | VM_WRITE); | |
647 | req.type = SCIF_WINDOW_PARTIAL; | |
648 | req.head = &ep->rma_info.remote_reg_list; | |
649 | ||
650 | mutex_lock(&ep->rma_info.rma_lock); | |
651 | /* Does a valid window exist? */ | |
652 | err = scif_query_window(&req); | |
653 | if (err) { | |
654 | dev_err(&ep->remote_dev->sdev->dev, | |
655 | "%s %d err %d\n", __func__, __LINE__, err); | |
656 | goto error_unlock; | |
657 | } | |
658 | ||
659 | /* Default prot for loopback */ | |
660 | if (!scifdev_self(ep->remote_dev)) | |
661 | vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); | |
662 | ||
663 | /* | |
664 | * VM_DONTCOPY - Do not copy this vma on fork | |
665 | * VM_DONTEXPAND - Cannot expand with mremap() | |
666 | * VM_RESERVED - Count as reserved_vm like IO | |
667 | * VM_PFNMAP - Page-ranges managed without "struct page" | |
668 | * VM_IO - Memory mapped I/O or similar | |
669 | * | |
670 | * We do not want to copy this VMA automatically on a fork(), | |
671 | * expand this VMA due to mremap() or swap out these pages since | |
672 | * the VMA is actually backed by physical pages in the remote | |
673 | * node's physical memory and not via a struct page. | |
674 | */ | |
675 | vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; | |
676 | ||
677 | if (!scifdev_self(ep->remote_dev)) | |
678 | vma->vm_flags |= VM_IO | VM_PFNMAP; | |
679 | ||
680 | /* Map this range of windows */ | |
681 | err = scif_rma_list_mmap(window, start_offset, nr_pages, vma); | |
682 | if (err) { | |
683 | dev_err(&ep->remote_dev->sdev->dev, | |
684 | "%s %d err %d\n", __func__, __LINE__, err); | |
685 | goto error_unlock; | |
686 | } | |
687 | /* Set up the driver call back */ | |
688 | vma->vm_ops = &scif_vm_ops; | |
689 | vma->vm_private_data = vmapvt; | |
690 | error_unlock: | |
691 | mutex_unlock(&ep->rma_info.rma_lock); | |
692 | if (err) { | |
693 | kfree(vmapvt); | |
694 | dev_err(&ep->remote_dev->sdev->dev, | |
695 | "%s %d err %d\n", __func__, __LINE__, err); | |
696 | scif_delete_vma(ep, vma); | |
697 | } | |
698 | return err; | |
699 | } |