1 /******************************************************************************
4 * Interface to privileged domain-0 commands.
6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic
9 #include <linux/kernel.h>
10 #include <linux/module.h>
11 #include <linux/sched.h>
12 #include <linux/slab.h>
13 #include <linux/string.h>
14 #include <linux/errno.h>
16 #include <linux/mman.h>
17 #include <linux/uaccess.h>
18 #include <linux/swap.h>
19 #include <linux/highmem.h>
20 #include <linux/pagemap.h>
21 #include <linux/seq_file.h>
22 #include <linux/miscdevice.h>
24 #include <asm/pgalloc.h>
25 #include <asm/pgtable.h>
27 #include <asm/xen/hypervisor.h>
28 #include <asm/xen/hypercall.h>
31 #include <xen/privcmd.h>
32 #include <xen/interface/xen.h>
33 #include <xen/features.h>
35 #include <xen/xen-ops.h>
36 #include <xen/balloon.h>
40 MODULE_LICENSE("GPL");
42 #define PRIV_VMA_LOCKED ((void *)1)
44 #ifndef HAVE_ARCH_PRIVCMD_MMAP
45 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct
*vma
);
48 static long privcmd_ioctl_hypercall(void __user
*udata
)
50 struct privcmd_hypercall hypercall
;
53 if (copy_from_user(&hypercall
, udata
, sizeof(hypercall
)))
56 ret
= privcmd_call(hypercall
.op
,
57 hypercall
.arg
[0], hypercall
.arg
[1],
58 hypercall
.arg
[2], hypercall
.arg
[3],
64 static void free_page_list(struct list_head
*pages
)
68 list_for_each_entry_safe(p
, n
, pages
, lru
)
71 INIT_LIST_HEAD(pages
);
75 * Given an array of items in userspace, return a list of pages
76 * containing the data. If copying fails, either because of memory
77 * allocation failure or a problem reading user memory, return an
78 * error code; its up to the caller to dispose of any partial list.
80 static int gather_array(struct list_head
*pagelist
,
81 unsigned nelem
, size_t size
,
82 const void __user
*data
)
92 pagedata
= NULL
; /* quiet, gcc */
94 if (pageidx
> PAGE_SIZE
-size
) {
95 struct page
*page
= alloc_page(GFP_KERNEL
);
101 pagedata
= page_address(page
);
103 list_add_tail(&page
->lru
, pagelist
);
108 if (copy_from_user(pagedata
+ pageidx
, data
, size
))
122 * Call function "fn" on each element of the array fragmented
123 * over a list of pages.
125 static int traverse_pages(unsigned nelem
, size_t size
,
126 struct list_head
*pos
,
127 int (*fn
)(void *data
, void *state
),
134 BUG_ON(size
> PAGE_SIZE
);
137 pagedata
= NULL
; /* hush, gcc */
140 if (pageidx
> PAGE_SIZE
-size
) {
143 page
= list_entry(pos
, struct page
, lru
);
144 pagedata
= page_address(page
);
148 ret
= (*fn
)(pagedata
+ pageidx
, state
);
157 struct mmap_mfn_state
{
159 struct vm_area_struct
*vma
;
163 static int mmap_mfn_range(void *data
, void *state
)
165 struct privcmd_mmap_entry
*msg
= data
;
166 struct mmap_mfn_state
*st
= state
;
167 struct vm_area_struct
*vma
= st
->vma
;
170 /* Do not allow range to wrap the address space. */
171 if ((msg
->npages
> (LONG_MAX
>> PAGE_SHIFT
)) ||
172 ((unsigned long)(msg
->npages
<< PAGE_SHIFT
) >= -st
->va
))
175 /* Range chunks must be contiguous in va space. */
176 if ((msg
->va
!= st
->va
) ||
177 ((msg
->va
+(msg
->npages
<<PAGE_SHIFT
)) > vma
->vm_end
))
180 rc
= xen_remap_domain_mfn_range(vma
,
182 msg
->mfn
, msg
->npages
,
188 st
->va
+= msg
->npages
<< PAGE_SHIFT
;
193 static long privcmd_ioctl_mmap(void __user
*udata
)
195 struct privcmd_mmap mmapcmd
;
196 struct mm_struct
*mm
= current
->mm
;
197 struct vm_area_struct
*vma
;
200 struct mmap_mfn_state state
;
202 if (!xen_initial_domain())
205 /* We only support privcmd_ioctl_mmap_batch for auto translated. */
206 if (xen_feature(XENFEAT_auto_translated_physmap
))
209 if (copy_from_user(&mmapcmd
, udata
, sizeof(mmapcmd
)))
212 rc
= gather_array(&pagelist
,
213 mmapcmd
.num
, sizeof(struct privcmd_mmap_entry
),
216 if (rc
|| list_empty(&pagelist
))
219 down_write(&mm
->mmap_sem
);
222 struct page
*page
= list_first_entry(&pagelist
,
224 struct privcmd_mmap_entry
*msg
= page_address(page
);
226 vma
= find_vma(mm
, msg
->va
);
229 if (!vma
|| (msg
->va
!= vma
->vm_start
) ||
230 !privcmd_enforce_singleshot_mapping(vma
))
234 state
.va
= vma
->vm_start
;
236 state
.domain
= mmapcmd
.dom
;
238 rc
= traverse_pages(mmapcmd
.num
, sizeof(struct privcmd_mmap_entry
),
240 mmap_mfn_range
, &state
);
244 up_write(&mm
->mmap_sem
);
247 free_page_list(&pagelist
);
252 struct mmap_batch_state
{
255 struct vm_area_struct
*vma
;
259 * 1 if at least one error has happened (and no
260 * -ENOENT errors have happened)
261 * -ENOENT if at least 1 -ENOENT has happened.
264 /* An array for individual errors */
267 /* User-space mfn array to store errors in the second pass for V1. */
268 xen_pfn_t __user
*user_mfn
;
271 /* auto translated dom0 note: if domU being created is PV, then mfn is
272 * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
274 static int mmap_batch_fn(void *data
, void *state
)
276 xen_pfn_t
*mfnp
= data
;
277 struct mmap_batch_state
*st
= state
;
278 struct vm_area_struct
*vma
= st
->vma
;
279 struct page
**pages
= vma
->vm_private_data
;
280 struct page
*cur_page
= NULL
;
283 if (xen_feature(XENFEAT_auto_translated_physmap
))
284 cur_page
= pages
[st
->index
++];
286 ret
= xen_remap_domain_mfn_range(st
->vma
, st
->va
& PAGE_MASK
, *mfnp
, 1,
287 st
->vma
->vm_page_prot
, st
->domain
,
290 /* Store error code for second pass. */
293 /* And see if it affects the global_error. */
296 st
->global_error
= -ENOENT
;
298 /* Record that at least one error has happened. */
299 if (st
->global_error
== 0)
300 st
->global_error
= 1;
308 static int mmap_return_errors_v1(void *data
, void *state
)
310 xen_pfn_t
*mfnp
= data
;
311 struct mmap_batch_state
*st
= state
;
312 int err
= *(st
->err
++);
315 * V1 encodes the error codes in the 32bit top nibble of the
316 * mfn (with its known limitations vis-a-vis 64 bit callers).
318 *mfnp
|= (err
== -ENOENT
) ?
319 PRIVCMD_MMAPBATCH_PAGED_ERROR
:
320 PRIVCMD_MMAPBATCH_MFN_ERROR
;
321 return __put_user(*mfnp
, st
->user_mfn
++);
324 /* Allocate pfns that are then mapped with gmfns from foreign domid. Update
325 * the vma with the page info to use later.
326 * Returns: 0 if success, otherwise -errno
328 static int alloc_empty_pages(struct vm_area_struct
*vma
, int numpgs
)
333 pages
= kcalloc(numpgs
, sizeof(pages
[0]), GFP_KERNEL
);
337 rc
= alloc_xenballooned_pages(numpgs
, pages
, 0);
339 pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__
,
344 BUG_ON(vma
->vm_private_data
!= PRIV_VMA_LOCKED
);
345 vma
->vm_private_data
= pages
;
350 static struct vm_operations_struct privcmd_vm_ops
;
352 static long privcmd_ioctl_mmap_batch(void __user
*udata
, int version
)
355 struct privcmd_mmapbatch_v2 m
;
356 struct mm_struct
*mm
= current
->mm
;
357 struct vm_area_struct
*vma
;
358 unsigned long nr_pages
;
360 int *err_array
= NULL
;
361 struct mmap_batch_state state
;
363 if (!xen_initial_domain())
368 if (copy_from_user(&m
, udata
, sizeof(struct privcmd_mmapbatch
)))
370 /* Returns per-frame error in m.arr. */
372 if (!access_ok(VERIFY_WRITE
, m
.arr
, m
.num
* sizeof(*m
.arr
)))
376 if (copy_from_user(&m
, udata
, sizeof(struct privcmd_mmapbatch_v2
)))
378 /* Returns per-frame error code in m.err. */
379 if (!access_ok(VERIFY_WRITE
, m
.err
, m
.num
* (sizeof(*m
.err
))))
387 if ((m
.num
<= 0) || (nr_pages
> (LONG_MAX
>> PAGE_SHIFT
)))
390 ret
= gather_array(&pagelist
, m
.num
, sizeof(xen_pfn_t
), m
.arr
);
394 if (list_empty(&pagelist
)) {
399 err_array
= kcalloc(m
.num
, sizeof(int), GFP_KERNEL
);
400 if (err_array
== NULL
) {
405 down_write(&mm
->mmap_sem
);
407 vma
= find_vma(mm
, m
.addr
);
409 vma
->vm_ops
!= &privcmd_vm_ops
||
410 (m
.addr
!= vma
->vm_start
) ||
411 ((m
.addr
+ (nr_pages
<< PAGE_SHIFT
)) != vma
->vm_end
) ||
412 !privcmd_enforce_singleshot_mapping(vma
)) {
413 up_write(&mm
->mmap_sem
);
417 if (xen_feature(XENFEAT_auto_translated_physmap
)) {
418 ret
= alloc_empty_pages(vma
, m
.num
);
420 up_write(&mm
->mmap_sem
);
425 state
.domain
= m
.dom
;
429 state
.global_error
= 0;
430 state
.err
= err_array
;
432 /* mmap_batch_fn guarantees ret == 0 */
433 BUG_ON(traverse_pages(m
.num
, sizeof(xen_pfn_t
),
434 &pagelist
, mmap_batch_fn
, &state
));
436 up_write(&mm
->mmap_sem
);
439 if (state
.global_error
) {
440 /* Write back errors in second pass. */
441 state
.user_mfn
= (xen_pfn_t
*)m
.arr
;
442 state
.err
= err_array
;
443 ret
= traverse_pages(m
.num
, sizeof(xen_pfn_t
),
444 &pagelist
, mmap_return_errors_v1
, &state
);
448 } else if (version
== 2) {
449 ret
= __copy_to_user(m
.err
, err_array
, m
.num
* sizeof(int));
454 /* If we have not had any EFAULT-like global errors then set the global
455 * error to -ENOENT if necessary. */
456 if ((ret
== 0) && (state
.global_error
== -ENOENT
))
461 free_page_list(&pagelist
);
466 static long privcmd_ioctl(struct file
*file
,
467 unsigned int cmd
, unsigned long data
)
470 void __user
*udata
= (void __user
*) data
;
473 case IOCTL_PRIVCMD_HYPERCALL
:
474 ret
= privcmd_ioctl_hypercall(udata
);
477 case IOCTL_PRIVCMD_MMAP
:
478 ret
= privcmd_ioctl_mmap(udata
);
481 case IOCTL_PRIVCMD_MMAPBATCH
:
482 ret
= privcmd_ioctl_mmap_batch(udata
, 1);
485 case IOCTL_PRIVCMD_MMAPBATCH_V2
:
486 ret
= privcmd_ioctl_mmap_batch(udata
, 2);
497 static void privcmd_close(struct vm_area_struct
*vma
)
499 struct page
**pages
= vma
->vm_private_data
;
500 int numpgs
= (vma
->vm_end
- vma
->vm_start
) >> PAGE_SHIFT
;
502 if (!xen_feature(XENFEAT_auto_translated_physmap
|| !numpgs
|| !pages
))
505 xen_unmap_domain_mfn_range(vma
, numpgs
, pages
);
506 free_xenballooned_pages(numpgs
, pages
);
510 static int privcmd_fault(struct vm_area_struct
*vma
, struct vm_fault
*vmf
)
512 printk(KERN_DEBUG
"privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
513 vma
, vma
->vm_start
, vma
->vm_end
,
514 vmf
->pgoff
, vmf
->virtual_address
);
516 return VM_FAULT_SIGBUS
;
519 static struct vm_operations_struct privcmd_vm_ops
= {
520 .close
= privcmd_close
,
521 .fault
= privcmd_fault
524 static int privcmd_mmap(struct file
*file
, struct vm_area_struct
*vma
)
526 /* DONTCOPY is essential for Xen because copy_page_range doesn't know
527 * how to recreate these mappings */
528 vma
->vm_flags
|= VM_IO
| VM_PFNMAP
| VM_DONTCOPY
|
529 VM_DONTEXPAND
| VM_DONTDUMP
;
530 vma
->vm_ops
= &privcmd_vm_ops
;
531 vma
->vm_private_data
= NULL
;
536 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct
*vma
)
538 return !cmpxchg(&vma
->vm_private_data
, NULL
, PRIV_VMA_LOCKED
);
541 const struct file_operations xen_privcmd_fops
= {
542 .owner
= THIS_MODULE
,
543 .unlocked_ioctl
= privcmd_ioctl
,
544 .mmap
= privcmd_mmap
,
546 EXPORT_SYMBOL_GPL(xen_privcmd_fops
);
548 static struct miscdevice privcmd_dev
= {
549 .minor
= MISC_DYNAMIC_MINOR
,
550 .name
= "xen/privcmd",
551 .fops
= &xen_privcmd_fops
,
554 static int __init
privcmd_init(void)
561 err
= misc_register(&privcmd_dev
);
563 printk(KERN_ERR
"Could not register Xen privcmd device\n");
569 static void __exit
privcmd_exit(void)
571 misc_deregister(&privcmd_dev
);
574 module_init(privcmd_init
);
575 module_exit(privcmd_exit
);