2 * Freescale Hypervisor Management Driver
4 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc.
5 * Author: Timur Tabi <timur@freescale.com>
7 * This file is licensed under the terms of the GNU General Public License
8 * version 2. This program is licensed "as is" without any warranty of any
9 * kind, whether express or implied.
11 * The Freescale hypervisor management driver provides several services to
12 * drivers and applications related to the Freescale hypervisor:
14 * 1. An ioctl interface for querying and managing partitions.
16 * 2. A file interface to reading incoming doorbells.
18 * 3. An interrupt handler for shutting down the partition upon receiving the
19 * shutdown doorbell from a manager partition.
21 * 4. A kernel interface for receiving callbacks when a managed partition
25 #include <linux/kernel.h>
26 #include <linux/module.h>
27 #include <linux/init.h>
28 #include <linux/types.h>
29 #include <linux/err.h>
31 #include <linux/miscdevice.h>
33 #include <linux/pagemap.h>
34 #include <linux/slab.h>
35 #include <linux/poll.h>
37 #include <linux/reboot.h>
38 #include <linux/uaccess.h>
39 #include <linux/notifier.h>
40 #include <linux/interrupt.h>
43 #include <asm/fsl_hcalls.h>
45 #include <linux/fsl_hypervisor.h>
47 static BLOCKING_NOTIFIER_HEAD(failover_subscribers
);
50 * Ioctl interface for FSL_HV_IOCTL_PARTITION_RESTART
52 * Restart a running partition
54 static long ioctl_restart(struct fsl_hv_ioctl_restart __user
*p
)
56 struct fsl_hv_ioctl_restart param
;
58 /* Get the parameters from the user */
59 if (copy_from_user(¶m
, p
, sizeof(struct fsl_hv_ioctl_restart
)))
62 param
.ret
= fh_partition_restart(param
.partition
);
64 if (copy_to_user(&p
->ret
, ¶m
.ret
, sizeof(__u32
)))
71 * Ioctl interface for FSL_HV_IOCTL_PARTITION_STATUS
73 * Query the status of a partition
75 static long ioctl_status(struct fsl_hv_ioctl_status __user
*p
)
77 struct fsl_hv_ioctl_status param
;
80 /* Get the parameters from the user */
81 if (copy_from_user(¶m
, p
, sizeof(struct fsl_hv_ioctl_status
)))
84 param
.ret
= fh_partition_get_status(param
.partition
, &status
);
86 param
.status
= status
;
88 if (copy_to_user(p
, ¶m
, sizeof(struct fsl_hv_ioctl_status
)))
95 * Ioctl interface for FSL_HV_IOCTL_PARTITION_START
97 * Start a stopped partition.
99 static long ioctl_start(struct fsl_hv_ioctl_start __user
*p
)
101 struct fsl_hv_ioctl_start param
;
103 /* Get the parameters from the user */
104 if (copy_from_user(¶m
, p
, sizeof(struct fsl_hv_ioctl_start
)))
107 param
.ret
= fh_partition_start(param
.partition
, param
.entry_point
,
110 if (copy_to_user(&p
->ret
, ¶m
.ret
, sizeof(__u32
)))
117 * Ioctl interface for FSL_HV_IOCTL_PARTITION_STOP
119 * Stop a running partition
121 static long ioctl_stop(struct fsl_hv_ioctl_stop __user
*p
)
123 struct fsl_hv_ioctl_stop param
;
125 /* Get the parameters from the user */
126 if (copy_from_user(¶m
, p
, sizeof(struct fsl_hv_ioctl_stop
)))
129 param
.ret
= fh_partition_stop(param
.partition
);
131 if (copy_to_user(&p
->ret
, ¶m
.ret
, sizeof(__u32
)))
138 * Ioctl interface for FSL_HV_IOCTL_MEMCPY
140 * The FH_MEMCPY hypercall takes an array of address/address/size structures
141 * to represent the data being copied. As a convenience to the user, this
142 * ioctl takes a user-create buffer and a pointer to a guest physically
143 * contiguous buffer in the remote partition, and creates the
144 * address/address/size array for the hypercall.
146 static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user
*p
)
148 struct fsl_hv_ioctl_memcpy param
;
150 struct page
**pages
= NULL
;
151 void *sg_list_unaligned
= NULL
;
152 struct fh_sg_list
*sg_list
= NULL
;
154 unsigned int num_pages
;
155 unsigned long lb_offset
; /* Offset within a page of the local buffer */
159 int num_pinned
; /* return value from get_user_pages() */
160 phys_addr_t remote_paddr
; /* The next address in the remote buffer */
161 uint32_t count
; /* The number of bytes left to copy */
163 /* Get the parameters from the user */
164 if (copy_from_user(¶m
, p
, sizeof(struct fsl_hv_ioctl_memcpy
)))
168 * One partition must be local, the other must be remote. In other
169 * words, if source and target are both -1, or are both not -1, then
172 if ((param
.source
== -1) == (param
.target
== -1))
176 * The array of pages returned by get_user_pages() covers only
177 * page-aligned memory. Since the user buffer is probably not
178 * page-aligned, we need to handle the discrepancy.
180 * We calculate the offset within a page of the S/G list, and make
181 * adjustments accordingly. This will result in a page list that looks
184 * ---- <-- first page starts before the buffer
205 * | | <-- last page ends after the buffer
208 * The distance between the start of the first page and the start of the
209 * buffer is lb_offset. The hashed (///) areas are the parts of the
210 * page list that contain the actual buffer.
212 * The advantage of this approach is that the number of pages is
213 * equal to the number of entries in the S/G list that we give to the
216 lb_offset
= param
.local_vaddr
& (PAGE_SIZE
- 1);
217 num_pages
= (param
.count
+ lb_offset
+ PAGE_SIZE
- 1) >> PAGE_SHIFT
;
219 /* Allocate the buffers we need */
222 * 'pages' is an array of struct page pointers that's initialized by
225 pages
= kzalloc(num_pages
* sizeof(struct page
*), GFP_KERNEL
);
227 pr_debug("fsl-hv: could not allocate page list\n");
232 * sg_list is the list of fh_sg_list objects that we pass to the
235 sg_list_unaligned
= kmalloc(num_pages
* sizeof(struct fh_sg_list
) +
236 sizeof(struct fh_sg_list
) - 1, GFP_KERNEL
);
237 if (!sg_list_unaligned
) {
238 pr_debug("fsl-hv: could not allocate S/G list\n");
242 sg_list
= PTR_ALIGN(sg_list_unaligned
, sizeof(struct fh_sg_list
));
244 /* Get the physical addresses of the source buffer */
245 down_read(¤t
->mm
->mmap_sem
);
246 num_pinned
= get_user_pages(current
, current
->mm
,
247 param
.local_vaddr
- lb_offset
, num_pages
,
248 (param
.source
== -1) ? READ
: WRITE
,
250 up_read(¤t
->mm
->mmap_sem
);
252 if (num_pinned
!= num_pages
) {
253 /* get_user_pages() failed */
254 pr_debug("fsl-hv: could not lock source buffer\n");
255 ret
= (num_pinned
< 0) ? num_pinned
: -EFAULT
;
260 * Build the fh_sg_list[] array. The first page is special
261 * because it's misaligned.
263 if (param
.source
== -1) {
264 sg_list
[0].source
= page_to_phys(pages
[0]) + lb_offset
;
265 sg_list
[0].target
= param
.remote_paddr
;
267 sg_list
[0].source
= param
.remote_paddr
;
268 sg_list
[0].target
= page_to_phys(pages
[0]) + lb_offset
;
270 sg_list
[0].size
= min_t(uint64_t, param
.count
, PAGE_SIZE
- lb_offset
);
272 remote_paddr
= param
.remote_paddr
+ sg_list
[0].size
;
273 count
= param
.count
- sg_list
[0].size
;
275 for (i
= 1; i
< num_pages
; i
++) {
276 if (param
.source
== -1) {
277 /* local to remote */
278 sg_list
[i
].source
= page_to_phys(pages
[i
]);
279 sg_list
[i
].target
= remote_paddr
;
281 /* remote to local */
282 sg_list
[i
].source
= remote_paddr
;
283 sg_list
[i
].target
= page_to_phys(pages
[i
]);
285 sg_list
[i
].size
= min_t(uint64_t, count
, PAGE_SIZE
);
287 remote_paddr
+= sg_list
[i
].size
;
288 count
-= sg_list
[i
].size
;
291 param
.ret
= fh_partition_memcpy(param
.source
, param
.target
,
292 virt_to_phys(sg_list
), num_pages
);
296 for (i
= 0; i
< num_pages
; i
++)
301 kfree(sg_list_unaligned
);
305 if (copy_to_user(&p
->ret
, ¶m
.ret
, sizeof(__u32
)))
312 * Ioctl interface for FSL_HV_IOCTL_DOORBELL
316 static long ioctl_doorbell(struct fsl_hv_ioctl_doorbell __user
*p
)
318 struct fsl_hv_ioctl_doorbell param
;
320 /* Get the parameters from the user. */
321 if (copy_from_user(¶m
, p
, sizeof(struct fsl_hv_ioctl_doorbell
)))
324 param
.ret
= ev_doorbell_send(param
.doorbell
);
326 if (copy_to_user(&p
->ret
, ¶m
.ret
, sizeof(__u32
)))
332 static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user
*p
, int set
)
334 struct fsl_hv_ioctl_prop param
;
335 char __user
*upath
, *upropname
;
336 void __user
*upropval
;
337 char *path
= NULL
, *propname
= NULL
;
338 void *propval
= NULL
;
341 /* Get the parameters from the user. */
342 if (copy_from_user(¶m
, p
, sizeof(struct fsl_hv_ioctl_prop
)))
345 upath
= (char __user
*)(uintptr_t)param
.path
;
346 upropname
= (char __user
*)(uintptr_t)param
.propname
;
347 upropval
= (void __user
*)(uintptr_t)param
.propval
;
349 path
= strndup_user(upath
, FH_DTPROP_MAX_PATHLEN
);
355 propname
= strndup_user(upropname
, FH_DTPROP_MAX_PATHLEN
);
356 if (IS_ERR(propname
)) {
357 ret
= PTR_ERR(propname
);
361 if (param
.proplen
> FH_DTPROP_MAX_PROPLEN
) {
366 propval
= kmalloc(param
.proplen
, GFP_KERNEL
);
373 if (copy_from_user(propval
, upropval
, param
.proplen
)) {
378 param
.ret
= fh_partition_set_dtprop(param
.handle
,
380 virt_to_phys(propname
),
381 virt_to_phys(propval
),
384 param
.ret
= fh_partition_get_dtprop(param
.handle
,
386 virt_to_phys(propname
),
387 virt_to_phys(propval
),
390 if (param
.ret
== 0) {
391 if (copy_to_user(upropval
, propval
, param
.proplen
) ||
392 put_user(param
.proplen
, &p
->proplen
)) {
399 if (put_user(param
.ret
, &p
->ret
))
411 * Ioctl main entry point
413 static long fsl_hv_ioctl(struct file
*file
, unsigned int cmd
,
414 unsigned long argaddr
)
416 void __user
*arg
= (void __user
*)argaddr
;
420 case FSL_HV_IOCTL_PARTITION_RESTART
:
421 ret
= ioctl_restart(arg
);
423 case FSL_HV_IOCTL_PARTITION_GET_STATUS
:
424 ret
= ioctl_status(arg
);
426 case FSL_HV_IOCTL_PARTITION_START
:
427 ret
= ioctl_start(arg
);
429 case FSL_HV_IOCTL_PARTITION_STOP
:
430 ret
= ioctl_stop(arg
);
432 case FSL_HV_IOCTL_MEMCPY
:
433 ret
= ioctl_memcpy(arg
);
435 case FSL_HV_IOCTL_DOORBELL
:
436 ret
= ioctl_doorbell(arg
);
438 case FSL_HV_IOCTL_GETPROP
:
439 ret
= ioctl_dtprop(arg
, 0);
441 case FSL_HV_IOCTL_SETPROP
:
442 ret
= ioctl_dtprop(arg
, 1);
445 pr_debug("fsl-hv: bad ioctl dir=%u type=%u cmd=%u size=%u\n",
446 _IOC_DIR(cmd
), _IOC_TYPE(cmd
), _IOC_NR(cmd
),
454 /* Linked list of processes that have us open */
455 static struct list_head db_list
;
457 /* spinlock for db_list */
458 static DEFINE_SPINLOCK(db_list_lock
);
460 /* The size of the doorbell event queue. This must be a power of two. */
463 /* Returns the next head/tail pointer, wrapping around the queue if necessary */
464 #define nextp(x) (((x) + 1) & (QSIZE - 1))
466 /* Per-open data structure */
467 struct doorbell_queue
{
468 struct list_head list
;
470 wait_queue_head_t wait
;
476 /* Linked list of ISRs that we registered */
477 struct list_head isr_list
;
479 /* Per-ISR data structure */
480 struct doorbell_isr
{
481 struct list_head list
;
483 uint32_t doorbell
; /* The doorbell handle */
484 uint32_t partition
; /* The partition handle, if used */
488 * Add a doorbell to all of the doorbell queues
490 static void fsl_hv_queue_doorbell(uint32_t doorbell
)
492 struct doorbell_queue
*dbq
;
495 /* Prevent another core from modifying db_list */
496 spin_lock_irqsave(&db_list_lock
, flags
);
498 list_for_each_entry(dbq
, &db_list
, list
) {
499 if (dbq
->head
!= nextp(dbq
->tail
)) {
500 dbq
->q
[dbq
->tail
] = doorbell
;
502 * This memory barrier eliminates the need to grab
503 * the spinlock for dbq.
506 dbq
->tail
= nextp(dbq
->tail
);
507 wake_up_interruptible(&dbq
->wait
);
511 spin_unlock_irqrestore(&db_list_lock
, flags
);
515 * Interrupt handler for all doorbells
517 * We use the same interrupt handler for all doorbells. Whenever a doorbell
518 * is rung, and we receive an interrupt, we just put the handle for that
519 * doorbell (passed to us as *data) into all of the queues.
521 static irqreturn_t
fsl_hv_isr(int irq
, void *data
)
523 fsl_hv_queue_doorbell((uintptr_t) data
);
529 * State change thread function
531 * The state change notification arrives in an interrupt, but we can't call
532 * blocking_notifier_call_chain() in an interrupt handler. We could call
533 * atomic_notifier_call_chain(), but that would require the clients' call-back
534 * function to run in interrupt context. Since we don't want to impose that
535 * restriction on the clients, we use a threaded IRQ to process the
536 * notification in kernel context.
538 static irqreturn_t
fsl_hv_state_change_thread(int irq
, void *data
)
540 struct doorbell_isr
*dbisr
= data
;
542 blocking_notifier_call_chain(&failover_subscribers
, dbisr
->partition
,
549 * Interrupt handler for state-change doorbells
551 static irqreturn_t
fsl_hv_state_change_isr(int irq
, void *data
)
554 struct doorbell_isr
*dbisr
= data
;
557 /* It's still a doorbell, so add it to all the queues. */
558 fsl_hv_queue_doorbell(dbisr
->doorbell
);
560 /* Determine the new state, and if it's stopped, notify the clients. */
561 ret
= fh_partition_get_status(dbisr
->partition
, &status
);
562 if (!ret
&& (status
== FH_PARTITION_STOPPED
))
563 return IRQ_WAKE_THREAD
;
569 * Returns a bitmask indicating whether a read will block
571 static unsigned int fsl_hv_poll(struct file
*filp
, struct poll_table_struct
*p
)
573 struct doorbell_queue
*dbq
= filp
->private_data
;
577 spin_lock_irqsave(&dbq
->lock
, flags
);
579 poll_wait(filp
, &dbq
->wait
, p
);
580 mask
= (dbq
->head
== dbq
->tail
) ? 0 : (POLLIN
| POLLRDNORM
);
582 spin_unlock_irqrestore(&dbq
->lock
, flags
);
588 * Return the handles for any incoming doorbells
590 * If there are doorbell handles in the queue for this open instance, then
591 * return them to the caller as an array of 32-bit integers. Otherwise,
592 * block until there is at least one handle to return.
594 static ssize_t
fsl_hv_read(struct file
*filp
, char __user
*buf
, size_t len
,
597 struct doorbell_queue
*dbq
= filp
->private_data
;
598 uint32_t __user
*p
= (uint32_t __user
*) buf
; /* for put_user() */
602 /* Make sure we stop when the user buffer is full. */
603 while (len
>= sizeof(uint32_t)) {
604 uint32_t dbell
; /* Local copy of doorbell queue data */
606 spin_lock_irqsave(&dbq
->lock
, flags
);
609 * If the queue is empty, then either we're done or we need
610 * to block. If the application specified O_NONBLOCK, then
611 * we return the appropriate error code.
613 if (dbq
->head
== dbq
->tail
) {
614 spin_unlock_irqrestore(&dbq
->lock
, flags
);
617 if (filp
->f_flags
& O_NONBLOCK
)
619 if (wait_event_interruptible(dbq
->wait
,
620 dbq
->head
!= dbq
->tail
))
626 * Even though we have an smp_wmb() in the ISR, the core
627 * might speculatively execute the "dbell = ..." below while
628 * it's evaluating the if-statement above. In that case, the
629 * value put into dbell could be stale if the core accepts the
630 * speculation. To prevent that, we need a read memory barrier
635 /* Copy the data to a temporary local buffer, because
636 * we can't call copy_to_user() from inside a spinlock
638 dbell
= dbq
->q
[dbq
->head
];
639 dbq
->head
= nextp(dbq
->head
);
641 spin_unlock_irqrestore(&dbq
->lock
, flags
);
643 if (put_user(dbell
, p
))
646 count
+= sizeof(uint32_t);
647 len
-= sizeof(uint32_t);
654 * Open the driver and prepare for reading doorbells.
656 * Every time an application opens the driver, we create a doorbell queue
657 * for that file handle. This queue is used for any incoming doorbells.
659 static int fsl_hv_open(struct inode
*inode
, struct file
*filp
)
661 struct doorbell_queue
*dbq
;
665 dbq
= kzalloc(sizeof(struct doorbell_queue
), GFP_KERNEL
);
667 pr_err("fsl-hv: out of memory\n");
671 spin_lock_init(&dbq
->lock
);
672 init_waitqueue_head(&dbq
->wait
);
674 spin_lock_irqsave(&db_list_lock
, flags
);
675 list_add(&dbq
->list
, &db_list
);
676 spin_unlock_irqrestore(&db_list_lock
, flags
);
678 filp
->private_data
= dbq
;
686 static int fsl_hv_close(struct inode
*inode
, struct file
*filp
)
688 struct doorbell_queue
*dbq
= filp
->private_data
;
693 spin_lock_irqsave(&db_list_lock
, flags
);
694 list_del(&dbq
->list
);
695 spin_unlock_irqrestore(&db_list_lock
, flags
);
702 static const struct file_operations fsl_hv_fops
= {
703 .owner
= THIS_MODULE
,
705 .release
= fsl_hv_close
,
708 .unlocked_ioctl
= fsl_hv_ioctl
,
709 .compat_ioctl
= fsl_hv_ioctl
,
712 static struct miscdevice fsl_hv_misc_dev
= {
718 static irqreturn_t
fsl_hv_shutdown_isr(int irq
, void *data
)
720 orderly_poweroff(false);
726 * Returns the handle of the parent of the given node
728 * The handle is the value of the 'hv-handle' property
730 static int get_parent_handle(struct device_node
*np
)
732 struct device_node
*parent
;
733 const uint32_t *prop
;
737 parent
= of_get_parent(np
);
739 /* It's not really possible for this to fail */
743 * The proper name for the handle property is "hv-handle", but some
744 * older versions of the hypervisor used "reg".
746 prop
= of_get_property(parent
, "hv-handle", &len
);
748 prop
= of_get_property(parent
, "reg", &len
);
750 if (!prop
|| (len
!= sizeof(uint32_t))) {
751 /* This can happen only if the node is malformed */
756 handle
= be32_to_cpup(prop
);
763 * Register a callback for failover events
765 * This function is called by device drivers to register their callback
766 * functions for fail-over events.
768 int fsl_hv_failover_register(struct notifier_block
*nb
)
770 return blocking_notifier_chain_register(&failover_subscribers
, nb
);
772 EXPORT_SYMBOL(fsl_hv_failover_register
);
775 * Unregister a callback for failover events
777 int fsl_hv_failover_unregister(struct notifier_block
*nb
)
779 return blocking_notifier_chain_unregister(&failover_subscribers
, nb
);
781 EXPORT_SYMBOL(fsl_hv_failover_unregister
);
784 * Return TRUE if we're running under FSL hypervisor
786 * This function checks to see if we're running under the Freescale
787 * hypervisor, and returns zero if we're not, or non-zero if we are.
789 * First, it checks if MSR[GS]==1, which means we're running under some
790 * hypervisor. Then it checks if there is a hypervisor node in the device
791 * tree. Currently, that means there needs to be a node in the root called
792 * "hypervisor" and which has a property named "fsl,hv-version".
794 static int has_fsl_hypervisor(void)
796 struct device_node
*node
;
799 if (!(mfmsr() & MSR_GS
))
802 node
= of_find_node_by_path("/hypervisor");
806 ret
= of_find_property(node
, "fsl,hv-version", NULL
) != NULL
;
814 * Freescale hypervisor management driver init
816 * This function is called when this module is loaded.
818 * Register ourselves as a miscellaneous driver. This will register the
819 * fops structure and create the right sysfs entries for udev.
821 static int __init
fsl_hypervisor_init(void)
823 struct device_node
*np
;
824 struct doorbell_isr
*dbisr
, *n
;
827 pr_info("Freescale hypervisor management driver\n");
829 if (!has_fsl_hypervisor()) {
830 pr_info("fsl-hv: no hypervisor found\n");
834 ret
= misc_register(&fsl_hv_misc_dev
);
836 pr_err("fsl-hv: cannot register device\n");
840 INIT_LIST_HEAD(&db_list
);
841 INIT_LIST_HEAD(&isr_list
);
843 for_each_compatible_node(np
, NULL
, "epapr,hv-receive-doorbell") {
845 const uint32_t *handle
;
847 handle
= of_get_property(np
, "interrupts", NULL
);
848 irq
= irq_of_parse_and_map(np
, 0);
849 if (!handle
|| (irq
== NO_IRQ
)) {
850 pr_err("fsl-hv: no 'interrupts' property in %s node\n",
855 dbisr
= kzalloc(sizeof(*dbisr
), GFP_KERNEL
);
860 dbisr
->doorbell
= be32_to_cpup(handle
);
862 if (of_device_is_compatible(np
, "fsl,hv-shutdown-doorbell")) {
863 /* The shutdown doorbell gets its own ISR */
864 ret
= request_irq(irq
, fsl_hv_shutdown_isr
, 0,
866 } else if (of_device_is_compatible(np
,
867 "fsl,hv-state-change-doorbell")) {
869 * The state change doorbell triggers a notification if
870 * the state of the managed partition changes to
871 * "stopped". We need a separate interrupt handler for
872 * that, and we also need to know the handle of the
873 * target partition, not just the handle of the
876 dbisr
->partition
= ret
= get_parent_handle(np
);
878 pr_err("fsl-hv: node %s has missing or "
879 "malformed parent\n", np
->full_name
);
883 ret
= request_threaded_irq(irq
, fsl_hv_state_change_isr
,
884 fsl_hv_state_change_thread
,
887 ret
= request_irq(irq
, fsl_hv_isr
, 0, np
->name
, dbisr
);
890 pr_err("fsl-hv: could not request irq %u for node %s\n",
896 list_add(&dbisr
->list
, &isr_list
);
898 pr_info("fsl-hv: registered handler for doorbell %u\n",
905 list_for_each_entry_safe(dbisr
, n
, &isr_list
, list
) {
906 free_irq(dbisr
->irq
, dbisr
);
907 list_del(&dbisr
->list
);
911 misc_deregister(&fsl_hv_misc_dev
);
917 * Freescale hypervisor management driver termination
919 * This function is called when this driver is unloaded.
921 static void __exit
fsl_hypervisor_exit(void)
923 struct doorbell_isr
*dbisr
, *n
;
925 list_for_each_entry_safe(dbisr
, n
, &isr_list
, list
) {
926 free_irq(dbisr
->irq
, dbisr
);
927 list_del(&dbisr
->list
);
931 misc_deregister(&fsl_hv_misc_dev
);
934 module_init(fsl_hypervisor_init
);
935 module_exit(fsl_hypervisor_exit
);
937 MODULE_AUTHOR("Timur Tabi <timur@freescale.com>");
938 MODULE_DESCRIPTION("Freescale hypervisor management driver");
939 MODULE_LICENSE("GPL v2");