1 /******************************************************************************
4 * Interface to privileged domain-0 commands.
6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic
9 #include <linux/kernel.h>
10 #include <linux/module.h>
11 #include <linux/sched.h>
12 #include <linux/slab.h>
13 #include <linux/string.h>
14 #include <linux/errno.h>
16 #include <linux/mman.h>
17 #include <linux/uaccess.h>
18 #include <linux/swap.h>
19 #include <linux/highmem.h>
20 #include <linux/pagemap.h>
21 #include <linux/seq_file.h>
22 #include <linux/miscdevice.h>
24 #include <asm/pgalloc.h>
25 #include <asm/pgtable.h>
27 #include <asm/xen/hypervisor.h>
28 #include <asm/xen/hypercall.h>
31 #include <xen/privcmd.h>
32 #include <xen/interface/xen.h>
33 #include <xen/features.h>
35 #include <xen/xen-ops.h>
36 #include <xen/balloon.h>
40 MODULE_LICENSE("GPL");
42 #define PRIV_VMA_LOCKED ((void *)1)
44 #ifndef HAVE_ARCH_PRIVCMD_MMAP
45 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct
*vma
);
48 static long privcmd_ioctl_hypercall(void __user
*udata
)
50 struct privcmd_hypercall hypercall
;
53 if (copy_from_user(&hypercall
, udata
, sizeof(hypercall
)))
56 ret
= privcmd_call(hypercall
.op
,
57 hypercall
.arg
[0], hypercall
.arg
[1],
58 hypercall
.arg
[2], hypercall
.arg
[3],
64 static void free_page_list(struct list_head
*pages
)
68 list_for_each_entry_safe(p
, n
, pages
, lru
)
71 INIT_LIST_HEAD(pages
);
75 * Given an array of items in userspace, return a list of pages
76 * containing the data. If copying fails, either because of memory
77 * allocation failure or a problem reading user memory, return an
78 * error code; its up to the caller to dispose of any partial list.
80 static int gather_array(struct list_head
*pagelist
,
81 unsigned nelem
, size_t size
,
82 const void __user
*data
)
92 pagedata
= NULL
; /* quiet, gcc */
94 if (pageidx
> PAGE_SIZE
-size
) {
95 struct page
*page
= alloc_page(GFP_KERNEL
);
101 pagedata
= page_address(page
);
103 list_add_tail(&page
->lru
, pagelist
);
108 if (copy_from_user(pagedata
+ pageidx
, data
, size
))
122 * Call function "fn" on each element of the array fragmented
123 * over a list of pages.
125 static int traverse_pages(unsigned nelem
, size_t size
,
126 struct list_head
*pos
,
127 int (*fn
)(void *data
, void *state
),
134 BUG_ON(size
> PAGE_SIZE
);
137 pagedata
= NULL
; /* hush, gcc */
140 if (pageidx
> PAGE_SIZE
-size
) {
143 page
= list_entry(pos
, struct page
, lru
);
144 pagedata
= page_address(page
);
148 ret
= (*fn
)(pagedata
+ pageidx
, state
);
157 struct mmap_mfn_state
{
159 struct vm_area_struct
*vma
;
163 static int mmap_mfn_range(void *data
, void *state
)
165 struct privcmd_mmap_entry
*msg
= data
;
166 struct mmap_mfn_state
*st
= state
;
167 struct vm_area_struct
*vma
= st
->vma
;
170 /* Do not allow range to wrap the address space. */
171 if ((msg
->npages
> (LONG_MAX
>> PAGE_SHIFT
)) ||
172 ((unsigned long)(msg
->npages
<< PAGE_SHIFT
) >= -st
->va
))
175 /* Range chunks must be contiguous in va space. */
176 if ((msg
->va
!= st
->va
) ||
177 ((msg
->va
+(msg
->npages
<<PAGE_SHIFT
)) > vma
->vm_end
))
180 rc
= xen_remap_domain_mfn_range(vma
,
182 msg
->mfn
, msg
->npages
,
188 st
->va
+= msg
->npages
<< PAGE_SHIFT
;
193 static long privcmd_ioctl_mmap(void __user
*udata
)
195 struct privcmd_mmap mmapcmd
;
196 struct mm_struct
*mm
= current
->mm
;
197 struct vm_area_struct
*vma
;
200 struct mmap_mfn_state state
;
202 /* We only support privcmd_ioctl_mmap_batch for auto translated. */
203 if (xen_feature(XENFEAT_auto_translated_physmap
))
206 if (copy_from_user(&mmapcmd
, udata
, sizeof(mmapcmd
)))
209 rc
= gather_array(&pagelist
,
210 mmapcmd
.num
, sizeof(struct privcmd_mmap_entry
),
213 if (rc
|| list_empty(&pagelist
))
216 down_write(&mm
->mmap_sem
);
219 struct page
*page
= list_first_entry(&pagelist
,
221 struct privcmd_mmap_entry
*msg
= page_address(page
);
223 vma
= find_vma(mm
, msg
->va
);
226 if (!vma
|| (msg
->va
!= vma
->vm_start
) ||
227 !privcmd_enforce_singleshot_mapping(vma
))
231 state
.va
= vma
->vm_start
;
233 state
.domain
= mmapcmd
.dom
;
235 rc
= traverse_pages(mmapcmd
.num
, sizeof(struct privcmd_mmap_entry
),
237 mmap_mfn_range
, &state
);
241 up_write(&mm
->mmap_sem
);
244 free_page_list(&pagelist
);
249 struct mmap_batch_state
{
252 struct vm_area_struct
*vma
;
256 * 1 if at least one error has happened (and no
257 * -ENOENT errors have happened)
258 * -ENOENT if at least 1 -ENOENT has happened.
263 /* User-space mfn array to store errors in the second pass for V1. */
264 xen_pfn_t __user
*user_mfn
;
265 /* User-space int array to store errors in the second pass for V2. */
266 int __user
*user_err
;
269 /* auto translated dom0 note: if domU being created is PV, then mfn is
270 * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
272 static int mmap_batch_fn(void *data
, void *state
)
274 xen_pfn_t
*mfnp
= data
;
275 struct mmap_batch_state
*st
= state
;
276 struct vm_area_struct
*vma
= st
->vma
;
277 struct page
**pages
= vma
->vm_private_data
;
278 struct page
*cur_page
= NULL
;
281 if (xen_feature(XENFEAT_auto_translated_physmap
))
282 cur_page
= pages
[st
->index
++];
284 ret
= xen_remap_domain_mfn_range(st
->vma
, st
->va
& PAGE_MASK
, *mfnp
, 1,
285 st
->vma
->vm_page_prot
, st
->domain
,
288 /* Store error code for second pass. */
289 if (st
->version
== 1) {
292 * V1 encodes the error codes in the 32bit top nibble of the
293 * mfn (with its known limitations vis-a-vis 64 bit callers).
295 *mfnp
|= (ret
== -ENOENT
) ?
296 PRIVCMD_MMAPBATCH_PAGED_ERROR
:
297 PRIVCMD_MMAPBATCH_MFN_ERROR
;
299 } else { /* st->version == 2 */
300 *((int *) mfnp
) = ret
;
303 /* And see if it affects the global_error. */
306 st
->global_error
= -ENOENT
;
308 /* Record that at least one error has happened. */
309 if (st
->global_error
== 0)
310 st
->global_error
= 1;
318 static int mmap_return_errors(void *data
, void *state
)
320 struct mmap_batch_state
*st
= state
;
322 if (st
->version
== 1) {
323 xen_pfn_t mfnp
= *((xen_pfn_t
*) data
);
324 if (mfnp
& PRIVCMD_MMAPBATCH_MFN_ERROR
)
325 return __put_user(mfnp
, st
->user_mfn
++);
328 } else { /* st->version == 2 */
329 int err
= *((int *) data
);
331 return __put_user(err
, st
->user_err
++);
339 /* Allocate pfns that are then mapped with gmfns from foreign domid. Update
340 * the vma with the page info to use later.
341 * Returns: 0 if success, otherwise -errno
343 static int alloc_empty_pages(struct vm_area_struct
*vma
, int numpgs
)
348 pages
= kcalloc(numpgs
, sizeof(pages
[0]), GFP_KERNEL
);
352 rc
= alloc_xenballooned_pages(numpgs
, pages
, 0);
354 pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__
,
359 BUG_ON(vma
->vm_private_data
!= PRIV_VMA_LOCKED
);
360 vma
->vm_private_data
= pages
;
365 static struct vm_operations_struct privcmd_vm_ops
;
367 static long privcmd_ioctl_mmap_batch(void __user
*udata
, int version
)
370 struct privcmd_mmapbatch_v2 m
;
371 struct mm_struct
*mm
= current
->mm
;
372 struct vm_area_struct
*vma
;
373 unsigned long nr_pages
;
375 struct mmap_batch_state state
;
379 if (copy_from_user(&m
, udata
, sizeof(struct privcmd_mmapbatch
)))
381 /* Returns per-frame error in m.arr. */
383 if (!access_ok(VERIFY_WRITE
, m
.arr
, m
.num
* sizeof(*m
.arr
)))
387 if (copy_from_user(&m
, udata
, sizeof(struct privcmd_mmapbatch_v2
)))
389 /* Returns per-frame error code in m.err. */
390 if (!access_ok(VERIFY_WRITE
, m
.err
, m
.num
* (sizeof(*m
.err
))))
398 if ((m
.num
<= 0) || (nr_pages
> (LONG_MAX
>> PAGE_SHIFT
)))
401 ret
= gather_array(&pagelist
, m
.num
, sizeof(xen_pfn_t
), m
.arr
);
405 if (list_empty(&pagelist
)) {
411 /* Zero error array now to only copy back actual errors. */
412 if (clear_user(m
.err
, sizeof(int) * m
.num
)) {
418 down_write(&mm
->mmap_sem
);
420 vma
= find_vma(mm
, m
.addr
);
422 vma
->vm_ops
!= &privcmd_vm_ops
||
423 (m
.addr
!= vma
->vm_start
) ||
424 ((m
.addr
+ (nr_pages
<< PAGE_SHIFT
)) != vma
->vm_end
) ||
425 !privcmd_enforce_singleshot_mapping(vma
)) {
426 up_write(&mm
->mmap_sem
);
430 if (xen_feature(XENFEAT_auto_translated_physmap
)) {
431 ret
= alloc_empty_pages(vma
, m
.num
);
433 up_write(&mm
->mmap_sem
);
438 state
.domain
= m
.dom
;
442 state
.global_error
= 0;
443 state
.version
= version
;
445 /* mmap_batch_fn guarantees ret == 0 */
446 BUG_ON(traverse_pages(m
.num
, sizeof(xen_pfn_t
),
447 &pagelist
, mmap_batch_fn
, &state
));
449 up_write(&mm
->mmap_sem
);
451 if (state
.global_error
) {
452 /* Write back errors in second pass. */
453 state
.user_mfn
= (xen_pfn_t
*)m
.arr
;
454 state
.user_err
= m
.err
;
455 ret
= traverse_pages(m
.num
, sizeof(xen_pfn_t
),
456 &pagelist
, mmap_return_errors
, &state
);
460 /* If we have not had any EFAULT-like global errors then set the global
461 * error to -ENOENT if necessary. */
462 if ((ret
== 0) && (state
.global_error
== -ENOENT
))
466 free_page_list(&pagelist
);
471 static long privcmd_ioctl(struct file
*file
,
472 unsigned int cmd
, unsigned long data
)
475 void __user
*udata
= (void __user
*) data
;
478 case IOCTL_PRIVCMD_HYPERCALL
:
479 ret
= privcmd_ioctl_hypercall(udata
);
482 case IOCTL_PRIVCMD_MMAP
:
483 ret
= privcmd_ioctl_mmap(udata
);
486 case IOCTL_PRIVCMD_MMAPBATCH
:
487 ret
= privcmd_ioctl_mmap_batch(udata
, 1);
490 case IOCTL_PRIVCMD_MMAPBATCH_V2
:
491 ret
= privcmd_ioctl_mmap_batch(udata
, 2);
502 static void privcmd_close(struct vm_area_struct
*vma
)
504 struct page
**pages
= vma
->vm_private_data
;
505 int numpgs
= (vma
->vm_end
- vma
->vm_start
) >> PAGE_SHIFT
;
507 if (!xen_feature(XENFEAT_auto_translated_physmap
) || !numpgs
|| !pages
)
510 xen_unmap_domain_mfn_range(vma
, numpgs
, pages
);
511 free_xenballooned_pages(numpgs
, pages
);
515 static int privcmd_fault(struct vm_area_struct
*vma
, struct vm_fault
*vmf
)
517 printk(KERN_DEBUG
"privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
518 vma
, vma
->vm_start
, vma
->vm_end
,
519 vmf
->pgoff
, vmf
->virtual_address
);
521 return VM_FAULT_SIGBUS
;
524 static struct vm_operations_struct privcmd_vm_ops
= {
525 .close
= privcmd_close
,
526 .fault
= privcmd_fault
529 static int privcmd_mmap(struct file
*file
, struct vm_area_struct
*vma
)
531 /* DONTCOPY is essential for Xen because copy_page_range doesn't know
532 * how to recreate these mappings */
533 vma
->vm_flags
|= VM_IO
| VM_PFNMAP
| VM_DONTCOPY
|
534 VM_DONTEXPAND
| VM_DONTDUMP
;
535 vma
->vm_ops
= &privcmd_vm_ops
;
536 vma
->vm_private_data
= NULL
;
541 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct
*vma
)
543 return !cmpxchg(&vma
->vm_private_data
, NULL
, PRIV_VMA_LOCKED
);
546 const struct file_operations xen_privcmd_fops
= {
547 .owner
= THIS_MODULE
,
548 .unlocked_ioctl
= privcmd_ioctl
,
549 .mmap
= privcmd_mmap
,
551 EXPORT_SYMBOL_GPL(xen_privcmd_fops
);
553 static struct miscdevice privcmd_dev
= {
554 .minor
= MISC_DYNAMIC_MINOR
,
555 .name
= "xen/privcmd",
556 .fops
= &xen_privcmd_fops
,
559 static int __init
privcmd_init(void)
566 err
= misc_register(&privcmd_dev
);
568 printk(KERN_ERR
"Could not register Xen privcmd device\n");
574 static void __exit
privcmd_exit(void)
576 misc_deregister(&privcmd_dev
);
579 module_init(privcmd_init
);
580 module_exit(privcmd_exit
);