4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 #include <sys/errno.h>
29 #include <sys/types.h>
34 #include <sys/sunddi.h>
37 #include <sys/modctl.h>
38 #include <sys/ddi_impldefs.h>
39 #include <sys/sysmacros.h>
40 #include <sys/ddidevmap.h>
41 #include <sys/policy.h>
43 #include <sys/vmsystm.h>
44 #include <vm/hat_i86.h>
45 #include <vm/hat_pte.h>
46 #include <vm/seg_kmem.h>
47 #include <vm/seg_mf.h>
49 #include <xen/io/blkif_impl.h>
50 #include <xen/io/blk_common.h>
51 #include <xen/io/xpvtap.h>
54 static int xpvtap_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred
);
55 static int xpvtap_close(dev_t devp
, int flag
, int otyp
, cred_t
*cred
);
56 static int xpvtap_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int mode
,
57 cred_t
*cred
, int *rval
);
58 static int xpvtap_devmap(dev_t dev
, devmap_cookie_t dhp
, offset_t off
,
59 size_t len
, size_t *maplen
, uint_t model
);
60 static int xpvtap_segmap(dev_t dev
, off_t off
, struct as
*asp
, caddr_t
*addrp
,
61 off_t len
, unsigned int prot
, unsigned int maxprot
, unsigned int flags
,
63 static int xpvtap_chpoll(dev_t dev
, short events
, int anyyet
, short *reventsp
,
64 struct pollhead
**phpp
);
66 static struct cb_ops xpvtap_cb_ops
= {
67 xpvtap_open
, /* cb_open */
68 xpvtap_close
, /* cb_close */
69 nodev
, /* cb_strategy */
74 xpvtap_ioctl
, /* cb_ioctl */
75 xpvtap_devmap
, /* cb_devmap */
77 xpvtap_segmap
, /* cb_segmap */
78 xpvtap_chpoll
, /* cb_chpoll */
79 ddi_prop_op
, /* cb_prop_op */
81 D_NEW
| D_MP
| D_64BIT
| D_DEVMAP
, /* cb_flag */
85 static int xpvtap_getinfo(dev_info_t
*dip
, ddi_info_cmd_t cmd
, void *arg
,
87 static int xpvtap_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
);
88 static int xpvtap_detach(dev_info_t
*devi
, ddi_detach_cmd_t cmd
);
90 static struct dev_ops xpvtap_dev_ops
= {
91 DEVO_REV
, /* devo_rev */
93 xpvtap_getinfo
, /* devo_getinfo */
94 nulldev
, /* devo_identify */
95 nulldev
, /* devo_probe */
96 xpvtap_attach
, /* devo_attach */
97 xpvtap_detach
, /* devo_detach */
98 nodev
, /* devo_reset */
99 &xpvtap_cb_ops
, /* devo_cb_ops */
100 NULL
, /* devo_bus_ops */
105 static struct modldrv xpvtap_modldrv
= {
106 &mod_driverops
, /* Type of module. This one is a driver */
107 "xpvtap driver", /* Name of the module. */
108 &xpvtap_dev_ops
, /* driver ops */
111 static struct modlinkage xpvtap_modlinkage
= {
113 (void *) &xpvtap_modldrv
,
121 static xpvtap_state_t
*xpvtap_drv_init(int instance
);
122 static void xpvtap_drv_fini(xpvtap_state_t
*state
);
123 static uint_t
xpvtap_intr(caddr_t arg
);
125 typedef void (*xpvtap_rs_cleanup_t
)(xpvtap_state_t
*state
, uint_t rs
);
126 static void xpvtap_rs_init(uint_t min_val
, uint_t max_val
,
127 xpvtap_rs_hdl_t
*handle
);
128 static void xpvtap_rs_fini(xpvtap_rs_hdl_t
*handle
);
129 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle
, uint_t
*rs
);
130 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle
, uint_t rs
);
131 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle
,
132 xpvtap_rs_cleanup_t callback
, void *arg
);
134 static int xpvtap_segmf_register(xpvtap_state_t
*state
);
135 static void xpvtap_segmf_unregister(struct as
*as
, void *arg
, uint_t event
);
137 static int xpvtap_user_init(xpvtap_state_t
*state
);
138 static void xpvtap_user_fini(xpvtap_state_t
*state
);
139 static int xpvtap_user_ring_init(xpvtap_state_t
*state
);
140 static void xpvtap_user_ring_fini(xpvtap_state_t
*state
);
141 static int xpvtap_user_thread_init(xpvtap_state_t
*state
);
142 static void xpvtap_user_thread_fini(xpvtap_state_t
*state
);
143 static void xpvtap_user_thread_start(caddr_t arg
);
144 static void xpvtap_user_thread_stop(xpvtap_state_t
*state
);
145 static void xpvtap_user_thread(void *arg
);
147 static void xpvtap_user_app_stop(caddr_t arg
);
149 static int xpvtap_user_request_map(xpvtap_state_t
*state
, blkif_request_t
*req
,
151 static int xpvtap_user_request_push(xpvtap_state_t
*state
,
152 blkif_request_t
*req
, uint_t uid
);
153 static int xpvtap_user_response_get(xpvtap_state_t
*state
,
154 blkif_response_t
*resp
, uint_t
*uid
);
155 static void xpvtap_user_request_unmap(xpvtap_state_t
*state
, uint_t uid
);
166 e
= ddi_soft_state_init(&xpvtap_statep
, sizeof (xpvtap_state_t
), 1);
171 e
= mod_install(&xpvtap_modlinkage
);
173 ddi_soft_state_fini(&xpvtap_statep
);
185 _info(struct modinfo
*modinfop
)
187 return (mod_info(&xpvtap_modlinkage
, modinfop
));
199 e
= mod_remove(&xpvtap_modlinkage
);
204 ddi_soft_state_fini(&xpvtap_statep
);
214 xpvtap_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
216 blk_ringinit_args_t args
;
217 xpvtap_state_t
*state
;
227 return (DDI_SUCCESS
);
230 return (DDI_FAILURE
);
233 /* initialize our state info */
234 instance
= ddi_get_instance(dip
);
235 state
= xpvtap_drv_init(instance
);
237 return (DDI_FAILURE
);
241 /* Initialize the guest ring */
242 args
.ar_dip
= state
->bt_dip
;
243 args
.ar_intr
= xpvtap_intr
;
244 args
.ar_intr_arg
= (caddr_t
)state
;
245 args
.ar_ringup
= xpvtap_user_thread_start
;
246 args
.ar_ringup_arg
= (caddr_t
)state
;
247 args
.ar_ringdown
= xpvtap_user_app_stop
;
248 args
.ar_ringdown_arg
= (caddr_t
)state
;
249 e
= blk_ring_init(&args
, &state
->bt_guest_ring
);
250 if (e
!= DDI_SUCCESS
) {
251 goto attachfail_ringinit
;
254 /* create the minor node (for ioctl/mmap) */
255 e
= ddi_create_minor_node(dip
, "xpvtap", S_IFCHR
, instance
,
257 if (e
!= DDI_SUCCESS
) {
258 goto attachfail_minor_node
;
261 /* Report that driver was loaded */
264 return (DDI_SUCCESS
);
266 attachfail_minor_node
:
267 blk_ring_fini(&state
->bt_guest_ring
);
269 xpvtap_drv_fini(state
);
270 return (DDI_FAILURE
);
278 xpvtap_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
280 xpvtap_state_t
*state
;
284 instance
= ddi_get_instance(dip
);
285 state
= ddi_get_soft_state(xpvtap_statep
, instance
);
287 return (DDI_FAILURE
);
296 return (DDI_FAILURE
);
299 xpvtap_user_thread_stop(state
);
300 blk_ring_fini(&state
->bt_guest_ring
);
301 xpvtap_drv_fini(state
);
302 ddi_remove_minor_node(dip
, NULL
);
304 return (DDI_SUCCESS
);
313 xpvtap_getinfo(dev_info_t
*dip
, ddi_info_cmd_t cmd
, void *arg
, void **result
)
315 xpvtap_state_t
*state
;
322 instance
= getminor(dev
);
325 case DDI_INFO_DEVT2DEVINFO
:
326 state
= ddi_get_soft_state(xpvtap_statep
, instance
);
328 return (DDI_FAILURE
);
330 *result
= (void *)state
->bt_dip
;
334 case DDI_INFO_DEVT2INSTANCE
:
335 *result
= (void *)(uintptr_t)instance
;
353 xpvtap_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred
)
355 xpvtap_state_t
*state
;
359 if (secpolicy_xvm_control(cred
)) {
363 instance
= getminor(*devp
);
364 state
= ddi_get_soft_state(xpvtap_statep
, instance
);
369 /* we should only be opened once */
370 mutex_enter(&state
->bt_open
.bo_mutex
);
371 if (state
->bt_open
.bo_opened
) {
372 mutex_exit(&state
->bt_open
.bo_mutex
);
375 state
->bt_open
.bo_opened
= B_TRUE
;
376 mutex_exit(&state
->bt_open
.bo_mutex
);
379 * save the apps address space. need it for mapping/unmapping grefs
380 * since will be doing it in a separate kernel thread.
382 state
->bt_map
.um_as
= curproc
->p_as
;
393 xpvtap_close(dev_t devp
, int flag
, int otyp
, cred_t
*cred
)
395 xpvtap_state_t
*state
;
399 instance
= getminor(devp
);
400 state
= ddi_get_soft_state(xpvtap_statep
, instance
);
406 * wake thread so it can cleanup and wait for it to exit so we can
407 * be sure it's not in the middle of processing a request/response.
409 mutex_enter(&state
->bt_thread
.ut_mutex
);
410 state
->bt_thread
.ut_wake
= B_TRUE
;
411 state
->bt_thread
.ut_exit
= B_TRUE
;
412 cv_signal(&state
->bt_thread
.ut_wake_cv
);
413 if (!state
->bt_thread
.ut_exit_done
) {
414 cv_wait(&state
->bt_thread
.ut_exit_done_cv
,
415 &state
->bt_thread
.ut_mutex
);
417 ASSERT(state
->bt_thread
.ut_exit_done
);
418 mutex_exit(&state
->bt_thread
.ut_mutex
);
420 state
->bt_map
.um_as
= NULL
;
421 state
->bt_map
.um_guest_pages
= NULL
;
424 * when the ring is brought down, a userland hotplug script is run
425 * which tries to bring the userland app down. We'll wait for a bit
426 * for the user app to exit. Notify the thread waiting that the app
427 * has closed the driver.
429 mutex_enter(&state
->bt_open
.bo_mutex
);
430 ASSERT(state
->bt_open
.bo_opened
);
431 state
->bt_open
.bo_opened
= B_FALSE
;
432 cv_signal(&state
->bt_open
.bo_exit_cv
);
433 mutex_exit(&state
->bt_open
.bo_mutex
);
444 xpvtap_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int mode
, cred_t
*cred
,
447 xpvtap_state_t
*state
;
451 if (secpolicy_xvm_control(cred
)) {
455 instance
= getminor(dev
);
456 if (instance
== -1) {
460 state
= ddi_get_soft_state(xpvtap_statep
, instance
);
466 case XPVTAP_IOCTL_RESP_PUSH
:
468 * wake thread, thread handles guest requests and user app
471 mutex_enter(&state
->bt_thread
.ut_mutex
);
472 state
->bt_thread
.ut_wake
= B_TRUE
;
473 cv_signal(&state
->bt_thread
.ut_wake_cv
);
474 mutex_exit(&state
->bt_thread
.ut_mutex
);
478 cmn_err(CE_WARN
, "ioctl(%d) not supported\n", cmd
);
491 xpvtap_segmap(dev_t dev
, off_t off
, struct as
*asp
, caddr_t
*addrp
,
492 off_t len
, unsigned int prot
, unsigned int maxprot
, unsigned int flags
,
495 struct segmf_crargs a
;
496 xpvtap_state_t
*state
;
501 if (secpolicy_xvm_control(cred_p
)) {
505 instance
= getminor(dev
);
506 state
= ddi_get_soft_state(xpvtap_statep
, instance
);
511 /* the user app should be doing a MAP_SHARED mapping */
512 if ((flags
& MAP_TYPE
) != MAP_SHARED
) {
517 * if this is the user ring (offset = 0), devmap it (which ends up in
518 * xpvtap_devmap). devmap will alloc and map the ring into the
522 e
= devmap_setup(dev
, (offset_t
)off
, asp
, addrp
, (size_t)len
,
523 prot
, maxprot
, flags
, cred_p
);
527 /* this should be the mmap for the gref pages (offset = PAGESIZE) */
528 if (off
!= PAGESIZE
) {
532 /* make sure we get the size we're expecting */
533 if (len
!= XPVTAP_GREF_BUFSIZE
) {
538 * reserve user app VA space for the gref pages and use segmf to
539 * manage the backing store for the physical memory. segmf will
540 * map in/out the grefs and fault them in/out.
542 ASSERT(asp
== state
->bt_map
.um_as
);
544 if ((flags
& MAP_FIXED
) == 0) {
545 map_addr(addrp
, len
, 0, 0, flags
);
546 if (*addrp
== NULL
) {
551 /* User specified address */
552 (void) as_unmap(asp
, *addrp
, len
);
555 a
.prot
= (uchar_t
)prot
;
556 a
.maxprot
= (uchar_t
)maxprot
;
557 e
= as_map(asp
, *addrp
, len
, segmf_create
, &a
);
565 * Stash user base address, and compute address where the request
568 state
->bt_map
.um_guest_pages
= (caddr_t
)*addrp
;
569 state
->bt_map
.um_guest_size
= (size_t)len
;
571 /* register an as callback so we can cleanup when the app goes away */
572 e
= as_add_callback(asp
, xpvtap_segmf_unregister
, state
,
573 AS_UNMAP_EVENT
, *addrp
, len
, KM_SLEEP
);
575 (void) as_unmap(asp
, *addrp
, len
);
579 /* wake thread to see if there are requests already queued up */
580 mutex_enter(&state
->bt_thread
.ut_mutex
);
581 state
->bt_thread
.ut_wake
= B_TRUE
;
582 cv_signal(&state
->bt_thread
.ut_wake_cv
);
583 mutex_exit(&state
->bt_thread
.ut_mutex
);
594 xpvtap_devmap(dev_t dev
, devmap_cookie_t dhp
, offset_t off
, size_t len
,
595 size_t *maplen
, uint_t model
)
597 xpvtap_user_ring_t
*usring
;
598 xpvtap_state_t
*state
;
603 instance
= getminor(dev
);
604 state
= ddi_get_soft_state(xpvtap_statep
, instance
);
609 /* we should only get here if the offset was == 0 */
614 /* we should only be mapping in one page */
615 if (len
!= PAGESIZE
) {
620 * we already allocated the user ring during driver attach, all we
621 * need to do is map it into the user app's VA.
623 usring
= &state
->bt_user_ring
;
624 e
= devmap_umem_setup(dhp
, state
->bt_dip
, NULL
, usring
->ur_cookie
, 0,
625 PAGESIZE
, PROT_ALL
, DEVMAP_DEFAULTS
, NULL
);
630 /* return the size to compete the devmap */
641 xpvtap_chpoll(dev_t dev
, short events
, int anyyet
, short *reventsp
,
642 struct pollhead
**phpp
)
644 xpvtap_user_ring_t
*usring
;
645 xpvtap_state_t
*state
;
649 instance
= getminor(dev
);
650 if (instance
== -1) {
653 state
= ddi_get_soft_state(xpvtap_statep
, instance
);
658 if (((events
& (POLLIN
| POLLRDNORM
)) == 0) && !anyyet
) {
664 * if we pushed requests on the user ring since the last poll, wakeup
667 usring
= &state
->bt_user_ring
;
668 if (usring
->ur_prod_polled
!= usring
->ur_ring
.req_prod_pvt
) {
671 * XXX - is this faster here or xpvtap_user_request_push??
672 * prelim data says here. Because less membars or because
673 * user thread will spin in poll requests before getting to
676 RING_PUSH_REQUESTS(&usring
->ur_ring
);
678 usring
->ur_prod_polled
= usring
->ur_ring
.sring
->req_prod
;
679 *reventsp
= POLLIN
| POLLRDNORM
;
681 /* no new requests */
685 *phpp
= &state
->bt_pollhead
;
696 static xpvtap_state_t
*
697 xpvtap_drv_init(int instance
)
699 xpvtap_state_t
*state
;
703 e
= ddi_soft_state_zalloc(xpvtap_statep
, instance
);
704 if (e
!= DDI_SUCCESS
) {
707 state
= ddi_get_soft_state(xpvtap_statep
, instance
);
709 goto drvinitfail_get_soft_state
;
712 state
->bt_instance
= instance
;
713 mutex_init(&state
->bt_open
.bo_mutex
, NULL
, MUTEX_DRIVER
, NULL
);
714 cv_init(&state
->bt_open
.bo_exit_cv
, NULL
, CV_DRIVER
, NULL
);
715 state
->bt_open
.bo_opened
= B_FALSE
;
716 state
->bt_map
.um_registered
= B_FALSE
;
718 /* initialize user ring, thread, mapping state */
719 e
= xpvtap_user_init(state
);
720 if (e
!= DDI_SUCCESS
) {
721 goto drvinitfail_userinit
;
726 drvinitfail_userinit
:
727 cv_destroy(&state
->bt_open
.bo_exit_cv
);
728 mutex_destroy(&state
->bt_open
.bo_mutex
);
729 drvinitfail_get_soft_state
:
730 (void) ddi_soft_state_free(xpvtap_statep
, instance
);
739 xpvtap_drv_fini(xpvtap_state_t
*state
)
741 xpvtap_user_fini(state
);
742 cv_destroy(&state
->bt_open
.bo_exit_cv
);
743 mutex_destroy(&state
->bt_open
.bo_mutex
);
744 (void) ddi_soft_state_free(xpvtap_statep
, state
->bt_instance
);
750 * this routine will be called when we have a request on the guest ring.
753 xpvtap_intr(caddr_t arg
)
755 xpvtap_state_t
*state
;
758 state
= (xpvtap_state_t
*)arg
;
760 /* wake thread, thread handles guest requests and user app responses */
761 mutex_enter(&state
->bt_thread
.ut_mutex
);
762 state
->bt_thread
.ut_wake
= B_TRUE
;
763 cv_signal(&state
->bt_thread
.ut_wake_cv
);
764 mutex_exit(&state
->bt_thread
.ut_mutex
);
766 return (DDI_INTR_CLAIMED
);
771 * xpvtap_segmf_register()
774 xpvtap_segmf_register(xpvtap_state_t
*state
)
784 as
= state
->bt_map
.um_as
;
785 pgcnt
= btopr(state
->bt_map
.um_guest_size
);
786 uaddr
= state
->bt_map
.um_guest_pages
;
789 return (DDI_FAILURE
);
792 AS_LOCK_ENTER(as
, RW_READER
);
794 seg
= as_findseg(as
, state
->bt_map
.um_guest_pages
, 0);
795 if ((seg
== NULL
) || ((uaddr
+ state
->bt_map
.um_guest_size
) >
796 (seg
->s_base
+ seg
->s_size
))) {
798 return (DDI_FAILURE
);
802 * lock down the htables so the HAT can't steal them. Register the
803 * PTE MA's for each gref page with seg_mf so we can do user space
806 for (i
= 0; i
< pgcnt
; i
++) {
807 hat_prepare_mapping(as
->a_hat
, uaddr
, &pte_ma
);
808 hat_devload(as
->a_hat
, uaddr
, PAGESIZE
, (pfn_t
)0,
809 PROT_READ
| PROT_WRITE
| PROT_USER
| HAT_UNORDERED_OK
,
810 HAT_LOAD_NOCONSIST
| HAT_LOAD_LOCK
);
811 hat_release_mapping(as
->a_hat
, uaddr
);
812 segmf_add_gref_pte(seg
, uaddr
, pte_ma
);
816 state
->bt_map
.um_registered
= B_TRUE
;
820 return (DDI_SUCCESS
);
825 * xpvtap_segmf_unregister()
826 * as_callback routine
830 xpvtap_segmf_unregister(struct as
*as
, void *arg
, uint_t event
)
832 xpvtap_state_t
*state
;
838 state
= (xpvtap_state_t
*)arg
;
839 if (!state
->bt_map
.um_registered
) {
840 /* remove the callback (which is this routine) */
841 (void) as_delete_callback(as
, arg
);
845 pgcnt
= btopr(state
->bt_map
.um_guest_size
);
846 uaddr
= state
->bt_map
.um_guest_pages
;
848 /* unmap any outstanding req's grefs */
849 xpvtap_rs_flush(state
->bt_map
.um_rs
, xpvtap_user_request_unmap
, state
);
851 /* Unlock the gref pages */
852 for (i
= 0; i
< pgcnt
; i
++) {
853 AS_LOCK_ENTER(as
, RW_WRITER
);
854 hat_prepare_mapping(as
->a_hat
, uaddr
, NULL
);
855 hat_unload(as
->a_hat
, uaddr
, PAGESIZE
, HAT_UNLOAD_UNLOCK
);
856 hat_release_mapping(as
->a_hat
, uaddr
);
861 /* remove the callback (which is this routine) */
862 (void) as_delete_callback(as
, arg
);
864 state
->bt_map
.um_registered
= B_FALSE
;
872 xpvtap_user_init(xpvtap_state_t
*state
)
874 xpvtap_user_map_t
*map
;
878 map
= &state
->bt_map
;
880 /* Setup the ring between the driver and user app */
881 e
= xpvtap_user_ring_init(state
);
882 if (e
!= DDI_SUCCESS
) {
883 return (DDI_FAILURE
);
887 * the user ring can handle BLKIF_RING_SIZE outstanding requests. This
888 * is the same number of requests as the guest ring. Initialize the
889 * state we use to track request IDs to the user app. These IDs will
890 * also identify which group of gref pages correspond with the
893 xpvtap_rs_init(0, (BLKIF_RING_SIZE
- 1), &map
->um_rs
);
896 * allocate the space to store a copy of each outstanding requests. We
897 * will need to reference the ID and the number of segments when we
898 * get the response from the user app.
900 map
->um_outstanding_reqs
= kmem_zalloc(
901 sizeof (*map
->um_outstanding_reqs
) * BLKIF_RING_SIZE
,
905 * initialize the thread we use to process guest requests and user
908 e
= xpvtap_user_thread_init(state
);
909 if (e
!= DDI_SUCCESS
) {
910 goto userinitfail_user_thread_init
;
913 return (DDI_SUCCESS
);
915 userinitfail_user_thread_init
:
916 xpvtap_rs_fini(&map
->um_rs
);
917 kmem_free(map
->um_outstanding_reqs
,
918 sizeof (*map
->um_outstanding_reqs
) * BLKIF_RING_SIZE
);
919 xpvtap_user_ring_fini(state
);
920 return (DDI_FAILURE
);
925 * xpvtap_user_ring_init()
928 xpvtap_user_ring_init(xpvtap_state_t
*state
)
930 xpvtap_user_ring_t
*usring
;
933 usring
= &state
->bt_user_ring
;
935 /* alocate and initialize the page for the shared user ring */
936 usring
->ur_sring
= (blkif_sring_t
*)ddi_umem_alloc(PAGESIZE
,
937 DDI_UMEM_SLEEP
, &usring
->ur_cookie
);
938 SHARED_RING_INIT(usring
->ur_sring
);
939 FRONT_RING_INIT(&usring
->ur_ring
, usring
->ur_sring
, PAGESIZE
);
940 usring
->ur_prod_polled
= 0;
942 return (DDI_SUCCESS
);
947 * xpvtap_user_thread_init()
950 xpvtap_user_thread_init(xpvtap_state_t
*state
)
952 xpvtap_user_thread_t
*thread
;
956 thread
= &state
->bt_thread
;
958 mutex_init(&thread
->ut_mutex
, NULL
, MUTEX_DRIVER
, NULL
);
959 cv_init(&thread
->ut_wake_cv
, NULL
, CV_DRIVER
, NULL
);
960 cv_init(&thread
->ut_exit_done_cv
, NULL
, CV_DRIVER
, NULL
);
961 thread
->ut_wake
= B_FALSE
;
962 thread
->ut_exit
= B_FALSE
;
963 thread
->ut_exit_done
= B_TRUE
;
965 /* create but don't start the user thread */
966 (void) sprintf(taskqname
, "xvptap_%d", state
->bt_instance
);
967 thread
->ut_taskq
= ddi_taskq_create(state
->bt_dip
, taskqname
, 1,
968 TASKQ_DEFAULTPRI
, 0);
969 if (thread
->ut_taskq
== NULL
) {
970 goto userinitthrfail_taskq_create
;
973 return (DDI_SUCCESS
);
975 userinitthrfail_taskq_dispatch
:
976 ddi_taskq_destroy(thread
->ut_taskq
);
977 userinitthrfail_taskq_create
:
978 cv_destroy(&thread
->ut_exit_done_cv
);
979 cv_destroy(&thread
->ut_wake_cv
);
980 mutex_destroy(&thread
->ut_mutex
);
982 return (DDI_FAILURE
);
987 * xpvtap_user_thread_start()
990 xpvtap_user_thread_start(caddr_t arg
)
992 xpvtap_user_thread_t
*thread
;
993 xpvtap_state_t
*state
;
997 state
= (xpvtap_state_t
*)arg
;
998 thread
= &state
->bt_thread
;
1000 /* start the user thread */
1001 thread
->ut_exit_done
= B_FALSE
;
1002 e
= ddi_taskq_dispatch(thread
->ut_taskq
, xpvtap_user_thread
, state
,
1004 if (e
!= DDI_SUCCESS
) {
1005 thread
->ut_exit_done
= B_TRUE
;
1006 cmn_err(CE_WARN
, "Unable to start user thread\n");
1012 * xpvtap_user_thread_stop()
1015 xpvtap_user_thread_stop(xpvtap_state_t
*state
)
1017 /* wake thread so it can exit */
1018 mutex_enter(&state
->bt_thread
.ut_mutex
);
1019 state
->bt_thread
.ut_wake
= B_TRUE
;
1020 state
->bt_thread
.ut_exit
= B_TRUE
;
1021 cv_signal(&state
->bt_thread
.ut_wake_cv
);
1022 if (!state
->bt_thread
.ut_exit_done
) {
1023 cv_wait(&state
->bt_thread
.ut_exit_done_cv
,
1024 &state
->bt_thread
.ut_mutex
);
1026 mutex_exit(&state
->bt_thread
.ut_mutex
);
1027 ASSERT(state
->bt_thread
.ut_exit_done
);
1032 * xpvtap_user_fini()
1035 xpvtap_user_fini(xpvtap_state_t
*state
)
1037 xpvtap_user_map_t
*map
;
1040 map
= &state
->bt_map
;
1042 xpvtap_user_thread_fini(state
);
1043 xpvtap_rs_fini(&map
->um_rs
);
1044 kmem_free(map
->um_outstanding_reqs
,
1045 sizeof (*map
->um_outstanding_reqs
) * BLKIF_RING_SIZE
);
1046 xpvtap_user_ring_fini(state
);
1051 * xpvtap_user_ring_fini()
1054 xpvtap_user_ring_fini(xpvtap_state_t
*state
)
1056 ddi_umem_free(state
->bt_user_ring
.ur_cookie
);
1061 * xpvtap_user_thread_fini()
1064 xpvtap_user_thread_fini(xpvtap_state_t
*state
)
1066 ddi_taskq_destroy(state
->bt_thread
.ut_taskq
);
1067 cv_destroy(&state
->bt_thread
.ut_exit_done_cv
);
1068 cv_destroy(&state
->bt_thread
.ut_wake_cv
);
1069 mutex_destroy(&state
->bt_thread
.ut_mutex
);
1074 * xpvtap_user_thread()
1077 xpvtap_user_thread(void *arg
)
1079 xpvtap_user_thread_t
*thread
;
1080 blkif_response_t resp
;
1081 xpvtap_state_t
*state
;
1082 blkif_request_t req
;
1088 state
= (xpvtap_state_t
*)arg
;
1089 thread
= &state
->bt_thread
;
1091 xpvtap_thread_start
:
1092 /* See if we are supposed to exit */
1093 mutex_enter(&thread
->ut_mutex
);
1094 if (thread
->ut_exit
) {
1095 thread
->ut_exit_done
= B_TRUE
;
1096 cv_signal(&state
->bt_thread
.ut_exit_done_cv
);
1097 mutex_exit(&thread
->ut_mutex
);
1102 * if we aren't supposed to be awake, wait until someone wakes us.
1103 * when we wake up, check for a kill or someone telling us to exit.
1105 if (!thread
->ut_wake
) {
1106 e
= cv_wait_sig(&thread
->ut_wake_cv
, &thread
->ut_mutex
);
1107 if ((e
== 0) || (thread
->ut_exit
)) {
1108 thread
->ut_exit
= B_TRUE
;
1109 mutex_exit(&thread
->ut_mutex
);
1110 goto xpvtap_thread_start
;
1114 /* if someone didn't wake us, go back to the start of the thread */
1115 if (!thread
->ut_wake
) {
1116 mutex_exit(&thread
->ut_mutex
);
1117 goto xpvtap_thread_start
;
1121 thread
->ut_wake
= B_FALSE
;
1122 mutex_exit(&thread
->ut_mutex
);
1124 /* process requests from the guest */
1127 * check for requests from the guest. if we don't have any,
1128 * break out of the loop.
1130 e
= blk_ring_request_get(state
->bt_guest_ring
, &req
);
1135 /* we got a request, map the grefs into the user app's VA */
1136 e
= xpvtap_user_request_map(state
, &req
, &uid
);
1137 if (e
!= DDI_SUCCESS
) {
1139 * If we couldn't map the request (e.g. user app hasn't
1140 * opened the device yet), requeue it and try again
1143 blk_ring_request_requeue(state
->bt_guest_ring
);
1147 /* push the request to the user app */
1148 e
= xpvtap_user_request_push(state
, &req
, uid
);
1149 if (e
!= DDI_SUCCESS
) {
1151 resp
.operation
= req
.operation
;
1152 resp
.status
= BLKIF_RSP_ERROR
;
1153 blk_ring_response_put(state
->bt_guest_ring
, &resp
);
1155 } while (!thread
->ut_exit
);
1157 /* process reponses from the user app */
1160 * check for responses from the user app. if we don't have any,
1161 * break out of the loop.
1163 b
= xpvtap_user_response_get(state
, &resp
, &uid
);
1169 * if we got a response, unmap the grefs from the matching
1172 xpvtap_user_request_unmap(state
, uid
);
1174 /* push the response to the guest */
1175 blk_ring_response_put(state
->bt_guest_ring
, &resp
);
1176 } while (!thread
->ut_exit
);
1178 goto xpvtap_thread_start
;
1183 * xpvtap_user_request_map()
1186 xpvtap_user_request_map(xpvtap_state_t
*state
, blkif_request_t
*req
,
1189 grant_ref_t gref
[BLKIF_MAX_SEGMENTS_PER_REQUEST
];
1199 domid
= xvdi_get_oeid(state
->bt_dip
);
1201 as
= state
->bt_map
.um_as
;
1202 if ((as
== NULL
) || (state
->bt_map
.um_guest_pages
== NULL
)) {
1203 return (DDI_FAILURE
);
1206 /* has to happen after segmap returns */
1207 if (!state
->bt_map
.um_registered
) {
1208 /* register the pte's with segmf */
1209 e
= xpvtap_segmf_register(state
);
1210 if (e
!= DDI_SUCCESS
) {
1211 return (DDI_FAILURE
);
1215 /* alloc an ID for the user ring */
1216 e
= xpvtap_rs_alloc(state
->bt_map
.um_rs
, uid
);
1217 if (e
!= DDI_SUCCESS
) {
1218 return (DDI_FAILURE
);
1221 /* if we don't have any segments to map, we're done */
1222 if ((req
->operation
== BLKIF_OP_WRITE_BARRIER
) ||
1223 (req
->operation
== BLKIF_OP_FLUSH_DISKCACHE
) ||
1224 (req
->nr_segments
== 0)) {
1225 return (DDI_SUCCESS
);
1228 /* get the apps gref address */
1229 uaddr
= XPVTAP_GREF_REQADDR(state
->bt_map
.um_guest_pages
, *uid
);
1231 AS_LOCK_ENTER(as
, RW_READER
);
1232 seg
= as_findseg(as
, state
->bt_map
.um_guest_pages
, 0);
1233 if ((seg
== NULL
) || ((uaddr
+ mmu_ptob(req
->nr_segments
)) >
1234 (seg
->s_base
+ seg
->s_size
))) {
1236 return (DDI_FAILURE
);
1239 /* if we are reading from disk, we are writing into memory */
1241 if (req
->operation
== BLKIF_OP_READ
) {
1242 flags
|= SEGMF_GREF_WR
;
1245 /* Load the grefs into seg_mf */
1246 for (i
= 0; i
< req
->nr_segments
; i
++) {
1247 gref
[i
] = req
->seg
[i
].gref
;
1249 (void) segmf_add_grefs(seg
, uaddr
, flags
, gref
, req
->nr_segments
,
1254 return (DDI_SUCCESS
);
1259 * xpvtap_user_request_push()
1262 xpvtap_user_request_push(xpvtap_state_t
*state
, blkif_request_t
*req
,
1265 blkif_request_t
*outstanding_req
;
1266 blkif_front_ring_t
*uring
;
1267 blkif_request_t
*target
;
1268 xpvtap_user_map_t
*map
;
1271 uring
= &state
->bt_user_ring
.ur_ring
;
1272 map
= &state
->bt_map
;
1274 target
= RING_GET_REQUEST(uring
, uring
->req_prod_pvt
);
1277 * Save request from the frontend. used for ID mapping and unmap
1278 * on response/cleanup
1280 outstanding_req
= &map
->um_outstanding_reqs
[uid
];
1281 bcopy(req
, outstanding_req
, sizeof (*outstanding_req
));
1283 /* put the request on the user ring */
1284 bcopy(req
, target
, sizeof (*req
));
1285 target
->id
= (uint64_t)uid
;
1286 uring
->req_prod_pvt
++;
1288 pollwakeup(&state
->bt_pollhead
, POLLIN
| POLLRDNORM
);
1290 return (DDI_SUCCESS
);
1295 xpvtap_user_request_unmap(xpvtap_state_t
*state
, uint_t uid
)
1297 blkif_request_t
*req
;
1304 as
= state
->bt_map
.um_as
;
1309 /* get a copy of the original request */
1310 req
= &state
->bt_map
.um_outstanding_reqs
[uid
];
1312 /* unmap the grefs for this request */
1313 if ((req
->operation
!= BLKIF_OP_WRITE_BARRIER
) &&
1314 (req
->operation
!= BLKIF_OP_FLUSH_DISKCACHE
) &&
1315 (req
->nr_segments
!= 0)) {
1316 uaddr
= XPVTAP_GREF_REQADDR(state
->bt_map
.um_guest_pages
, uid
);
1317 AS_LOCK_ENTER(as
, RW_READER
);
1318 seg
= as_findseg(as
, state
->bt_map
.um_guest_pages
, 0);
1319 if ((seg
== NULL
) || ((uaddr
+ mmu_ptob(req
->nr_segments
)) >
1320 (seg
->s_base
+ seg
->s_size
))) {
1322 xpvtap_rs_free(state
->bt_map
.um_rs
, uid
);
1326 e
= segmf_release_grefs(seg
, uaddr
, req
->nr_segments
);
1328 cmn_err(CE_WARN
, "unable to release grefs");
1334 /* free up the user ring id */
1335 xpvtap_rs_free(state
->bt_map
.um_rs
, uid
);
1340 xpvtap_user_response_get(xpvtap_state_t
*state
, blkif_response_t
*resp
,
1343 blkif_front_ring_t
*uring
;
1344 blkif_response_t
*target
;
1347 uring
= &state
->bt_user_ring
.ur_ring
;
1349 if (!RING_HAS_UNCONSUMED_RESPONSES(uring
)) {
1354 target
= RING_GET_RESPONSE(uring
, uring
->rsp_cons
);
1355 if (target
== NULL
) {
1359 /* copy out the user app response */
1360 bcopy(target
, resp
, sizeof (*resp
));
1363 /* restore the quests id from the original request */
1364 *uid
= (uint_t
)resp
->id
;
1365 resp
->id
= state
->bt_map
.um_outstanding_reqs
[*uid
].id
;
1372 * xpvtap_user_app_stop()
1374 static void xpvtap_user_app_stop(caddr_t arg
)
1376 xpvtap_state_t
*state
;
1379 state
= (xpvtap_state_t
*)arg
;
1382 * Give the app 10 secs to exit. If it doesn't exit, it's not a serious
1383 * problem, we just won't auto-detach the driver.
1385 mutex_enter(&state
->bt_open
.bo_mutex
);
1386 if (state
->bt_open
.bo_opened
) {
1387 rc
= cv_reltimedwait(&state
->bt_open
.bo_exit_cv
,
1388 &state
->bt_open
.bo_mutex
, drv_usectohz(10000000),
1391 cmn_err(CE_NOTE
, "!user process still has driver open, "
1392 "deferring detach\n");
1395 mutex_exit(&state
->bt_open
.bo_mutex
);
1401 * Initialize the resource structure. init() returns a handle to be used
1402 * for the rest of the resource functions. This code is written assuming
1403 * that min_val will be close to 0. Therefore, we will allocate the free
1404 * buffer only taking max_val into account.
1407 xpvtap_rs_init(uint_t min_val
, uint_t max_val
, xpvtap_rs_hdl_t
*handle
)
1409 xpvtap_rs_t
*rstruct
;
1414 ASSERT(handle
!= NULL
);
1415 ASSERT(min_val
< max_val
);
1417 /* alloc space for resource structure */
1418 rstruct
= kmem_alloc(sizeof (xpvtap_rs_t
), KM_SLEEP
);
1421 * Test to see if the max value is 64-bit aligned. If so, we don't need
1422 * to allocate an extra 64-bit word. alloc space for free buffer
1423 * (8 bytes per uint64_t).
1425 if ((max_val
& 0x3F) == 0) {
1426 rstruct
->rs_free_size
= (max_val
>> 6) * 8;
1428 rstruct
->rs_free_size
= ((max_val
>> 6) + 1) * 8;
1430 rstruct
->rs_free
= kmem_alloc(rstruct
->rs_free_size
, KM_SLEEP
);
1432 /* Initialize resource structure */
1433 rstruct
->rs_min
= min_val
;
1434 rstruct
->rs_last
= min_val
;
1435 rstruct
->rs_max
= max_val
;
1436 mutex_init(&rstruct
->rs_mutex
, NULL
, MUTEX_DRIVER
, NULL
);
1437 rstruct
->rs_flushing
= B_FALSE
;
1439 /* Mark all resources as free */
1440 array_size
= rstruct
->rs_free_size
>> 3;
1441 for (index
= 0; index
< array_size
; index
++) {
1442 rstruct
->rs_free
[index
] = (uint64_t)0xFFFFFFFFFFFFFFFF;
1445 /* setup handle which is returned from this function */
1452 * Frees up the space allocated in init(). Notice that a pointer to the
1453 * handle is used for the parameter. fini() will set the handle to NULL
1457 xpvtap_rs_fini(xpvtap_rs_hdl_t
*handle
)
1459 xpvtap_rs_t
*rstruct
;
1462 ASSERT(handle
!= NULL
);
1464 rstruct
= (xpvtap_rs_t
*)*handle
;
1466 mutex_destroy(&rstruct
->rs_mutex
);
1467 kmem_free(rstruct
->rs_free
, rstruct
->rs_free_size
);
1468 kmem_free(rstruct
, sizeof (xpvtap_rs_t
));
1470 /* set handle to null. This helps catch bugs. */
1477 * alloc a resource. If alloc fails, we are out of resources.
1480 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle
, uint_t
*resource
)
1482 xpvtap_rs_t
*rstruct
;
1491 ASSERT(handle
!= NULL
);
1492 ASSERT(resource
!= NULL
);
1494 rstruct
= (xpvtap_rs_t
*)handle
;
1496 mutex_enter(&rstruct
->rs_mutex
);
1497 min
= rstruct
->rs_min
;
1498 max
= rstruct
->rs_max
;
1501 * Find a free resource. This will return out of the loop once it finds
1502 * a free resource. There are a total of 'max'-'min'+1 resources.
1503 * Performs a round robin allocation.
1505 for (index
= min
; index
<= max
; index
++) {
1507 array_idx
= rstruct
->rs_last
>> 6;
1508 free
= rstruct
->rs_free
[array_idx
];
1509 last
= rstruct
->rs_last
& 0x3F;
1511 /* if the next resource to check is free */
1512 if ((free
& ((uint64_t)1 << last
)) != 0) {
1513 /* we are using this resource */
1514 *resource
= rstruct
->rs_last
;
1516 /* take it out of the free list */
1517 rstruct
->rs_free
[array_idx
] &= ~((uint64_t)1 << last
);
1520 * increment the last count so we start checking the
1521 * next resource on the next alloc(). Note the rollover
1525 if (rstruct
->rs_last
> max
) {
1526 rstruct
->rs_last
= rstruct
->rs_min
;
1529 /* unlock the resource structure */
1530 mutex_exit(&rstruct
->rs_mutex
);
1532 return (DDI_SUCCESS
);
1536 * This resource is not free, lets go to the next one. Note the
1537 * rollover at 'max'.
1540 if (rstruct
->rs_last
> max
) {
1541 rstruct
->rs_last
= rstruct
->rs_min
;
1545 mutex_exit(&rstruct
->rs_mutex
);
1547 return (DDI_FAILURE
);
1553 * Free the previously alloc'd resource. Once a resource has been free'd,
1554 * it can be used again when alloc is called.
1557 xpvtap_rs_free(xpvtap_rs_hdl_t handle
, uint_t resource
)
1559 xpvtap_rs_t
*rstruct
;
1564 ASSERT(handle
!= NULL
);
1566 rstruct
= (xpvtap_rs_t
*)handle
;
1567 ASSERT(resource
>= rstruct
->rs_min
);
1568 ASSERT(resource
<= rstruct
->rs_max
);
1570 if (!rstruct
->rs_flushing
) {
1571 mutex_enter(&rstruct
->rs_mutex
);
1574 /* Put the resource back in the free list */
1575 array_idx
= resource
>> 6;
1576 offset
= resource
& 0x3F;
1577 rstruct
->rs_free
[array_idx
] |= ((uint64_t)1 << offset
);
1579 if (!rstruct
->rs_flushing
) {
1580 mutex_exit(&rstruct
->rs_mutex
);
1589 xpvtap_rs_flush(xpvtap_rs_hdl_t handle
, xpvtap_rs_cleanup_t callback
,
1592 xpvtap_rs_t
*rstruct
;
1601 ASSERT(handle
!= NULL
);
1603 rstruct
= (xpvtap_rs_t
*)handle
;
1605 mutex_enter(&rstruct
->rs_mutex
);
1606 min
= rstruct
->rs_min
;
1607 max
= rstruct
->rs_max
;
1609 rstruct
->rs_flushing
= B_TRUE
;
1612 * for all resources not free, call the callback routine to clean it
1615 for (index
= min
; index
<= max
; index
++) {
1617 array_idx
= rstruct
->rs_last
>> 6;
1618 free
= rstruct
->rs_free
[array_idx
];
1619 last
= rstruct
->rs_last
& 0x3F;
1621 /* if the next resource to check is not free */
1622 if ((free
& ((uint64_t)1 << last
)) == 0) {
1623 /* call the callback to cleanup */
1624 (*callback
)(arg
, rstruct
->rs_last
);
1626 /* put it back in the free list */
1627 rstruct
->rs_free
[array_idx
] |= ((uint64_t)1 << last
);
1630 /* go to the next one. Note the rollover at 'max' */
1632 if (rstruct
->rs_last
> max
) {
1633 rstruct
->rs_last
= rstruct
->rs_min
;
1637 mutex_exit(&rstruct
->rs_mutex
);