4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
29 * Copyright 2017 Nexenta Systems, Inc.
33 * xdf.c - Xen Virtual Block Device Driver
35 * - support alternate block size (currently only DEV_BSIZE supported)
36 * - revalidate geometry for removable devices
38 * This driver exports disk device nodes, accepts IO requests from those
39 * nodes, and services those requests by talking to a backend device
42 * Communication with the backend device is done via a ringbuffer (which is
43 * managed via xvdi interfaces) and dma memory (which is managed via ddi
46 * Communication with the backend device is dependant upon establishing a
47 * connection to the backend device. This connection process involves
48 * reading device configuration information from xenbus and publishing
49 * some frontend runtime configuration parameters via the xenbus (for
50 * consumption by the backend). Once we've published runtime configuration
51 * information via the xenbus, the backend device can enter the connected
52 * state and we'll enter the XD_CONNECTED state. But before we can allow
53 * random IO to begin, we need to do IO to the backend device to determine
54 * the device label and if flush operations are supported. Once this is
55 * done we enter the XD_READY state and can process any IO operations.
57 * We receive notifications of xenbus state changes for the backend device
58 * (aka, the "other end") via the xdf_oe_change() callback. This callback
59 * is single threaded, meaning that we can't receive new notification of
60 * other end state changes while we're processing an outstanding
61 * notification of an other end state change. There for we can't do any
62 * blocking operations from the xdf_oe_change() callback. This is why we
63 * have a seperate taskq (xdf_ready_tq) which exists to do the necessary
64 * IO to get us from the XD_CONNECTED to the XD_READY state. All IO
65 * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go
66 * throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs
67 * generated by the xdf_ready_tq_thread thread have priority over all
70 * We also communicate with the backend device via the xenbus "media-req"
71 * (XBP_MEDIA_REQ) property. For more information on this see the
72 * comments in blkif.h.
79 #include <sys/promif.h>
80 #include <sys/sysmacros.h>
81 #include <sys/kstat.h>
82 #include <sys/mach_mmu.h>
84 #include <sys/xpv_support.h>
85 #else /* !XPV_HVM_DRIVER */
86 #include <sys/evtchn_impl.h>
87 #endif /* !XPV_HVM_DRIVER */
88 #include <sys/sunndi.h>
89 #include <public/io/xenbus.h>
90 #include <xen/sys/xenbus_impl.h>
91 #include <sys/scsi/generic/inquiry.h>
92 #include <xen/io/blkif_impl.h>
97 * DEBUG_EVAL can be used to include debug only statements without
98 * having to use '#ifdef DEBUG' statements
101 #define DEBUG_EVAL(x) (x)
103 #define DEBUG_EVAL(x)
106 #define XDF_DRAIN_MSEC_DELAY (50*1000) /* 00.05 sec */
107 #define XDF_DRAIN_RETRY_COUNT 200 /* 10.00 sec */
108 #define XDF_STATE_TIMEOUT (30*1000*1000) /* 30.00 sec */
110 #define INVALID_DOMID ((domid_t)-1)
111 #define FLUSH_DISKCACHE 0x1
112 #define WRITE_BARRIER 0x2
113 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */
114 #define USE_WRITE_BARRIER(vdp) \
115 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported)
116 #define USE_FLUSH_DISKCACHE(vdp) \
117 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported)
118 #define IS_WRITE_BARRIER(vdp, bp) \
119 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \
120 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block))
121 #define IS_FLUSH_DISKCACHE(bp) \
122 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0))
124 #define VREQ_DONE(vreq) \
125 VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) && \
126 (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) || \
127 (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws)))
129 #define BP_VREQ(bp) ((v_req_t *)((bp)->av_back))
130 #define BP_VREQ_SET(bp, vreq) (((bp)->av_back = (buf_t *)(vreq)))
132 extern int do_polled_io
;
134 /* run-time tunables that we don't want the compiler to optimize away */
135 volatile int xdf_debug
= 0;
136 volatile boolean_t xdf_barrier_flush_disable
= B_FALSE
;
138 /* per module globals */
140 static void *xdf_ssp
;
141 static kmem_cache_t
*xdf_vreq_cache
;
142 static kmem_cache_t
*xdf_gs_cache
;
143 static int xdf_maxphys
= XB_MAXPHYS
;
144 static diskaddr_t xdf_flush_block
= DEFAULT_FLUSH_BLOCK
;
145 static int xdf_fbrewrites
; /* flush block re-write count */
147 /* misc public functions */
148 int xdf_lb_rdwr(dev_info_t
*, uchar_t
, void *, diskaddr_t
, size_t, void *);
149 int xdf_lb_getinfo(dev_info_t
*, int, void *, void *);
151 /* misc private functions */
152 static void xdf_io_start(xdf_t
*);
153 static void xdf_devid_setup(xdf_t
*);
155 /* callbacks from commmon label */
156 static cmlb_tg_ops_t xdf_lb_ops
= {
163 * I/O buffer DMA attributes
164 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most
166 static ddi_dma_attr_t xb_dma_attr
= {
168 (uint64_t)0, /* lowest address */
169 (uint64_t)0xffffffffffffffff, /* highest usable address */
170 (uint64_t)0xffffff, /* DMA counter limit max */
171 (uint64_t)XB_BSIZE
, /* alignment in bytes */
172 XB_BSIZE
- 1, /* bitmap of burst sizes */
173 XB_BSIZE
, /* min transfer */
174 (uint64_t)XB_MAX_XFER
, /* maximum transfer */
175 (uint64_t)PAGEOFFSET
, /* 1 page segment length */
176 BLKIF_MAX_SEGMENTS_PER_REQUEST
, /* maximum number of segments */
177 XB_BSIZE
, /* granularity */
178 0, /* flags (reserved) */
181 static ddi_device_acc_attr_t xc_acc_attr
= {
188 xdf_timeout_handler(void *arg
)
192 mutex_enter(&vdp
->xdf_dev_lk
);
193 vdp
->xdf_timeout_id
= 0;
194 mutex_exit(&vdp
->xdf_dev_lk
);
196 /* new timeout thread could be re-scheduled */
201 * callback func when DMA/GTE resources is available
203 * Note: we only register one callback function to grant table subsystem
204 * since we only have one 'struct gnttab_free_callback' in xdf_t.
207 xdf_dmacallback(caddr_t arg
)
209 xdf_t
*vdp
= (xdf_t
*)arg
;
212 DPRINTF(DMA_DBG
, ("xdf@%s: DMA callback started\n",
215 ddi_trigger_softintr(vdp
->xdf_softintr_id
);
216 return (DDI_DMA_CALLBACK_DONE
);
220 gs_get(xdf_t
*vdp
, int isread
)
225 /* try to alloc GTEs needed in this slot, first */
226 if (gnttab_alloc_grant_references(
227 BLKIF_MAX_SEGMENTS_PER_REQUEST
, &gh
) == -1) {
228 if (vdp
->xdf_gnt_callback
.next
== NULL
) {
230 gnttab_request_free_callback(
231 &vdp
->xdf_gnt_callback
,
232 (void (*)(void *))xdf_dmacallback
,
234 BLKIF_MAX_SEGMENTS_PER_REQUEST
);
239 gs
= kmem_cache_alloc(xdf_gs_cache
, KM_NOSLEEP
);
241 gnttab_free_grant_references(gh
);
242 if (vdp
->xdf_timeout_id
== 0)
243 /* restart I/O after one second */
244 vdp
->xdf_timeout_id
=
245 timeout(xdf_timeout_handler
, vdp
, hz
);
250 gs
->gs_oeid
= vdp
->xdf_peer
;
251 gs
->gs_isread
= isread
;
259 gs_free(ge_slot_t
*gs
)
263 /* release all grant table entry resources used in this slot */
264 for (i
= 0; i
< gs
->gs_ngrefs
; i
++)
265 gnttab_end_foreign_access(gs
->gs_ge
[i
], !gs
->gs_isread
, 0);
266 gnttab_free_grant_references(gs
->gs_ghead
);
267 list_remove(&gs
->gs_vreq
->v_gs
, gs
);
268 kmem_cache_free(xdf_gs_cache
, gs
);
272 gs_grant(ge_slot_t
*gs
, mfn_t mfn
)
274 grant_ref_t gr
= gnttab_claim_grant_reference(&gs
->gs_ghead
);
277 ASSERT(gs
->gs_ngrefs
< BLKIF_MAX_SEGMENTS_PER_REQUEST
);
278 gs
->gs_ge
[gs
->gs_ngrefs
++] = gr
;
279 gnttab_grant_foreign_access_ref(gr
, gs
->gs_oeid
, mfn
, !gs
->gs_isread
);
285 * Alloc a vreq for this bp
286 * bp->av_back contains the pointer to the vreq upon return
289 vreq_get(xdf_t
*vdp
, buf_t
*bp
)
291 v_req_t
*vreq
= NULL
;
293 ASSERT(BP_VREQ(bp
) == NULL
);
295 vreq
= kmem_cache_alloc(xdf_vreq_cache
, KM_NOSLEEP
);
297 if (vdp
->xdf_timeout_id
== 0)
298 /* restart I/O after one second */
299 vdp
->xdf_timeout_id
=
300 timeout(xdf_timeout_handler
, vdp
, hz
);
303 bzero(vreq
, sizeof (v_req_t
));
304 list_create(&vreq
->v_gs
, sizeof (ge_slot_t
),
305 offsetof(ge_slot_t
, gs_vreq_link
));
307 vreq
->v_status
= VREQ_INIT
;
308 vreq
->v_runq
= B_FALSE
;
309 BP_VREQ_SET(bp
, vreq
);
310 /* init of other fields in vreq is up to the caller */
312 list_insert_head(&vdp
->xdf_vreq_act
, (void *)vreq
);
318 vreq_free(xdf_t
*vdp
, v_req_t
*vreq
)
320 buf_t
*bp
= vreq
->v_buf
;
322 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
323 ASSERT(BP_VREQ(bp
) == vreq
);
325 list_remove(&vdp
->xdf_vreq_act
, vreq
);
327 if (vreq
->v_flush_diskcache
== FLUSH_DISKCACHE
)
330 switch (vreq
->v_status
) {
331 case VREQ_DMAWIN_DONE
:
332 case VREQ_GS_ALLOCED
:
333 case VREQ_DMABUF_BOUND
:
334 (void) ddi_dma_unbind_handle(vreq
->v_dmahdl
);
336 case VREQ_DMAMEM_ALLOCED
:
337 if (!ALIGNED_XFER(bp
)) {
338 ASSERT(vreq
->v_abuf
!= NULL
);
339 if (!IS_ERROR(bp
) && IS_READ(bp
))
340 bcopy(vreq
->v_abuf
, bp
->b_un
.b_addr
,
342 ddi_dma_mem_free(&vreq
->v_align
);
345 case VREQ_MEMDMAHDL_ALLOCED
:
346 if (!ALIGNED_XFER(bp
))
347 ddi_dma_free_handle(&vreq
->v_memdmahdl
);
349 case VREQ_DMAHDL_ALLOCED
:
350 ddi_dma_free_handle(&vreq
->v_dmahdl
);
356 ASSERT(!vreq
->v_runq
);
357 list_destroy(&vreq
->v_gs
);
358 kmem_cache_free(xdf_vreq_cache
, vreq
);
362 * Snarf new data if our flush block was re-written
365 check_fbwrite(xdf_t
*vdp
, buf_t
*bp
, daddr_t blkno
)
370 if (IS_WRITE_BARRIER(vdp
, bp
))
371 return; /* write was a flush write */
374 nblks
= bp
->b_bcount
>> DEV_BSHIFT
;
375 if (xdf_flush_block
>= blkno
&& xdf_flush_block
< (blkno
+ nblks
)) {
377 if (bp
->b_flags
& (B_PAGEIO
| B_PHYS
)) {
381 bcopy(bp
->b_un
.b_addr
+
382 ((xdf_flush_block
- blkno
) << DEV_BSHIFT
),
383 vdp
->xdf_cache_flush_block
, DEV_BSIZE
);
390 * Initalize the DMA and grant table resources for the buf
393 vreq_setup(xdf_t
*vdp
, v_req_t
*vreq
)
396 ddi_dma_attr_t dmaattr
;
399 ddi_dma_handle_t mdh
;
401 ddi_acc_handle_t abh
;
407 buf_t
*bp
= vreq
->v_buf
;
408 int dma_flags
= (IS_READ(bp
) ? DDI_DMA_READ
: DDI_DMA_WRITE
) |
409 DDI_DMA_STREAMING
| DDI_DMA_PARTIAL
;
411 switch (vreq
->v_status
) {
413 if (IS_FLUSH_DISKCACHE(bp
)) {
414 if ((gs
= gs_get(vdp
, IS_READ(bp
))) == NULL
) {
415 DPRINTF(DMA_DBG
, ("xdf@%s: "
416 "get ge_slotfailed\n", vdp
->xdf_addr
));
417 return (DDI_FAILURE
);
421 vreq
->v_flush_diskcache
= FLUSH_DISKCACHE
;
422 vreq
->v_status
= VREQ_GS_ALLOCED
;
424 list_insert_head(&vreq
->v_gs
, gs
);
425 return (DDI_SUCCESS
);
428 if (IS_WRITE_BARRIER(vdp
, bp
))
429 vreq
->v_flush_diskcache
= WRITE_BARRIER
;
430 vreq
->v_blkno
= bp
->b_blkno
+
431 (diskaddr_t
)(uintptr_t)bp
->b_private
;
432 /* See if we wrote new data to our flush block */
433 if (!IS_READ(bp
) && USE_WRITE_BARRIER(vdp
))
434 check_fbwrite(vdp
, bp
, vreq
->v_blkno
);
435 vreq
->v_status
= VREQ_INIT_DONE
;
442 rc
= ddi_dma_alloc_handle(vdp
->xdf_dip
, &xb_dma_attr
,
443 xdf_dmacallback
, (caddr_t
)vdp
, &dh
);
444 if (rc
!= DDI_SUCCESS
) {
446 DPRINTF(DMA_DBG
, ("xdf@%s: DMA handle alloc failed\n",
448 return (DDI_FAILURE
);
452 vreq
->v_status
= VREQ_DMAHDL_ALLOCED
;
455 case VREQ_DMAHDL_ALLOCED
:
457 * alloc dma handle for 512-byte aligned buf
459 if (!ALIGNED_XFER(bp
)) {
461 * XXPV: we need to temporarily enlarge the seg
462 * boundary and s/g length to work round CR6381968
464 dmaattr
= xb_dma_attr
;
465 dmaattr
.dma_attr_seg
= (uint64_t)-1;
466 dmaattr
.dma_attr_sgllen
= INT_MAX
;
467 rc
= ddi_dma_alloc_handle(vdp
->xdf_dip
, &dmaattr
,
468 xdf_dmacallback
, (caddr_t
)vdp
, &mdh
);
469 if (rc
!= DDI_SUCCESS
) {
471 DPRINTF(DMA_DBG
, ("xdf@%s: "
472 "unaligned buf DMAhandle alloc failed\n",
474 return (DDI_FAILURE
);
476 vreq
->v_memdmahdl
= mdh
;
477 vreq
->v_status
= VREQ_MEMDMAHDL_ALLOCED
;
481 case VREQ_MEMDMAHDL_ALLOCED
:
483 * alloc 512-byte aligned buf
485 if (!ALIGNED_XFER(bp
)) {
486 if (bp
->b_flags
& (B_PAGEIO
| B_PHYS
))
488 rc
= ddi_dma_mem_alloc(vreq
->v_memdmahdl
,
489 roundup(bp
->b_bcount
, XB_BSIZE
), &xc_acc_attr
,
490 DDI_DMA_STREAMING
, xdf_dmacallback
, (caddr_t
)vdp
,
492 if (rc
!= DDI_SUCCESS
) {
494 DPRINTF(DMA_DBG
, ("xdf@%s: "
495 "DMA mem allocation failed\n",
497 return (DDI_FAILURE
);
502 vreq
->v_status
= VREQ_DMAMEM_ALLOCED
;
504 ASSERT(bufsz
>= bp
->b_bcount
);
506 bcopy(bp
->b_un
.b_addr
, vreq
->v_abuf
,
511 case VREQ_DMAMEM_ALLOCED
:
515 if (ALIGNED_XFER(bp
)) {
516 rc
= ddi_dma_buf_bind_handle(vreq
->v_dmahdl
, bp
,
517 dma_flags
, xdf_dmacallback
, (caddr_t
)vdp
,
520 rc
= ddi_dma_addr_bind_handle(vreq
->v_dmahdl
,
521 NULL
, vreq
->v_abuf
, bp
->b_bcount
, dma_flags
,
522 xdf_dmacallback
, (caddr_t
)vdp
, &dc
, &ndcs
);
524 if (rc
== DDI_DMA_MAPPED
|| rc
== DDI_DMA_PARTIAL_MAP
) {
525 /* get num of dma windows */
526 if (rc
== DDI_DMA_PARTIAL_MAP
) {
527 rc
= ddi_dma_numwin(vreq
->v_dmahdl
, &ndws
);
528 ASSERT(rc
== DDI_SUCCESS
);
534 DPRINTF(DMA_DBG
, ("xdf@%s: DMA bind failed\n",
536 return (DDI_FAILURE
);
541 vreq
->v_ndmacs
= ndcs
;
542 vreq
->v_ndmaws
= ndws
;
543 vreq
->v_nslots
= ndws
;
544 vreq
->v_status
= VREQ_DMABUF_BOUND
;
547 case VREQ_DMABUF_BOUND
:
549 * get ge_slot, callback is set upon failure from gs_get(),
550 * if not set previously
552 if ((gs
= gs_get(vdp
, IS_READ(bp
))) == NULL
) {
553 DPRINTF(DMA_DBG
, ("xdf@%s: get ge_slot failed\n",
555 return (DDI_FAILURE
);
558 vreq
->v_status
= VREQ_GS_ALLOCED
;
560 list_insert_head(&vreq
->v_gs
, gs
);
563 case VREQ_GS_ALLOCED
:
564 /* nothing need to be done */
567 case VREQ_DMAWIN_DONE
:
569 * move to the next dma window
571 ASSERT((vreq
->v_dmaw
+ 1) < vreq
->v_ndmaws
);
573 /* get a ge_slot for this DMA window */
574 if ((gs
= gs_get(vdp
, IS_READ(bp
))) == NULL
) {
575 DPRINTF(DMA_DBG
, ("xdf@%s: get ge_slot failed\n",
577 return (DDI_FAILURE
);
581 VERIFY(ddi_dma_getwin(vreq
->v_dmahdl
, vreq
->v_dmaw
, &off
, &sz
,
582 &vreq
->v_dmac
, &vreq
->v_ndmacs
) == DDI_SUCCESS
);
583 vreq
->v_status
= VREQ_GS_ALLOCED
;
585 list_insert_head(&vreq
->v_gs
, gs
);
589 return (DDI_FAILURE
);
592 return (DDI_SUCCESS
);
596 xdf_cmlb_attach(xdf_t
*vdp
)
598 dev_info_t
*dip
= vdp
->xdf_dip
;
600 return (cmlb_attach(dip
, &xdf_lb_ops
,
601 XD_IS_CD(vdp
) ? DTYPE_RODIRECT
: DTYPE_DIRECT
,
604 XD_IS_CD(vdp
) ? DDI_NT_CD_XVMD
: DDI_NT_BLOCK_XVMD
,
605 #ifdef XPV_HVM_DRIVER
606 (XD_IS_CD(vdp
) ? 0 : CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT
),
607 #else /* XPV_HVM_DRIVER */
609 #endif /* XPV_HVM_DRIVER */
610 vdp
->xdf_vd_lbl
, NULL
));
614 xdf_io_err(buf_t
*bp
, int err
, size_t resid
)
618 bp
->b_resid
= bp
->b_bcount
;
623 xdf_kstat_enter(xdf_t
*vdp
, buf_t
*bp
)
625 v_req_t
*vreq
= BP_VREQ(bp
);
627 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
629 if (vdp
->xdf_xdev_iostat
== NULL
)
631 if ((vreq
!= NULL
) && vreq
->v_runq
) {
632 kstat_runq_enter(KSTAT_IO_PTR(vdp
->xdf_xdev_iostat
));
634 kstat_waitq_enter(KSTAT_IO_PTR(vdp
->xdf_xdev_iostat
));
639 xdf_kstat_exit(xdf_t
*vdp
, buf_t
*bp
)
641 v_req_t
*vreq
= BP_VREQ(bp
);
643 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
645 if (vdp
->xdf_xdev_iostat
== NULL
)
648 if ((vreq
!= NULL
) && vreq
->v_runq
) {
649 kstat_runq_exit(KSTAT_IO_PTR(vdp
->xdf_xdev_iostat
));
651 kstat_waitq_exit(KSTAT_IO_PTR(vdp
->xdf_xdev_iostat
));
654 if (bp
->b_flags
& B_READ
) {
655 KSTAT_IO_PTR(vdp
->xdf_xdev_iostat
)->reads
++;
656 KSTAT_IO_PTR(vdp
->xdf_xdev_iostat
)->nread
+= bp
->b_bcount
;
657 } else if (bp
->b_flags
& B_WRITE
) {
658 KSTAT_IO_PTR(vdp
->xdf_xdev_iostat
)->writes
++;
659 KSTAT_IO_PTR(vdp
->xdf_xdev_iostat
)->nwritten
+= bp
->b_bcount
;
664 xdf_kstat_waitq_to_runq(xdf_t
*vdp
, buf_t
*bp
)
666 v_req_t
*vreq
= BP_VREQ(bp
);
668 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
669 ASSERT(!vreq
->v_runq
);
671 vreq
->v_runq
= B_TRUE
;
672 if (vdp
->xdf_xdev_iostat
== NULL
)
674 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp
->xdf_xdev_iostat
));
678 xdf_kstat_runq_to_waitq(xdf_t
*vdp
, buf_t
*bp
)
680 v_req_t
*vreq
= BP_VREQ(bp
);
682 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
683 ASSERT(vreq
->v_runq
);
685 vreq
->v_runq
= B_FALSE
;
686 if (vdp
->xdf_xdev_iostat
== NULL
)
688 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp
->xdf_xdev_iostat
));
692 xdf_kstat_create(dev_info_t
*dip
)
694 xdf_t
*vdp
= (xdf_t
*)ddi_get_driver_private(dip
);
698 if ((kstat
= kstat_create("xdf", ddi_get_instance(dip
), NULL
, "disk",
699 KSTAT_TYPE_IO
, 1, KSTAT_FLAG_PERSISTENT
)) == NULL
)
702 /* See comment about locking in xdf_kstat_delete(). */
703 mutex_enter(&vdp
->xdf_iostat_lk
);
704 mutex_enter(&vdp
->xdf_dev_lk
);
706 /* only one kstat can exist at a time */
707 if (vdp
->xdf_xdev_iostat
!= NULL
) {
708 mutex_exit(&vdp
->xdf_dev_lk
);
709 mutex_exit(&vdp
->xdf_iostat_lk
);
714 vdp
->xdf_xdev_iostat
= kstat
;
715 vdp
->xdf_xdev_iostat
->ks_lock
= &vdp
->xdf_dev_lk
;
716 kstat_install(vdp
->xdf_xdev_iostat
);
719 * Now that we've created a kstat, we need to update the waitq and
720 * runq counts for the kstat to reflect our current state.
722 * For a buf_t structure to be on the runq, it must have a ring
723 * buffer slot associated with it. To get a ring buffer slot the
724 * buf must first have a v_req_t and a ge_slot_t associated with it.
725 * Then when it is granted a ring buffer slot, v_runq will be set to
728 * For a buf_t structure to be on the waitq, it must not be on the
729 * runq. So to find all the buf_t's that should be on waitq, we
730 * walk the active buf list and add any buf_t's which aren't on the
735 xdf_kstat_enter(vdp
, bp
);
738 if (vdp
->xdf_ready_tq_bp
!= NULL
)
739 xdf_kstat_enter(vdp
, vdp
->xdf_ready_tq_bp
);
741 mutex_exit(&vdp
->xdf_dev_lk
);
742 mutex_exit(&vdp
->xdf_iostat_lk
);
747 xdf_kstat_delete(dev_info_t
*dip
)
749 xdf_t
*vdp
= (xdf_t
*)ddi_get_driver_private(dip
);
754 * The locking order here is xdf_iostat_lk and then xdf_dev_lk.
755 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer
756 * and the contents of the our kstat. xdf_iostat_lk is used
757 * to protect the allocation and freeing of the actual kstat.
758 * xdf_dev_lk can't be used for this purpose because kstat
759 * readers use it to access the contents of the kstat and
760 * hence it can't be held when calling kstat_delete().
762 mutex_enter(&vdp
->xdf_iostat_lk
);
763 mutex_enter(&vdp
->xdf_dev_lk
);
765 if (vdp
->xdf_xdev_iostat
== NULL
) {
766 mutex_exit(&vdp
->xdf_dev_lk
);
767 mutex_exit(&vdp
->xdf_iostat_lk
);
772 * We're about to destroy the kstat structures, so it isn't really
773 * necessary to update the runq and waitq counts. But, since this
774 * isn't a hot code path we can afford to be a little pedantic and
775 * go ahead and decrement the runq and waitq kstat counters to zero
776 * before free'ing them. This helps us ensure that we've gotten all
777 * our accounting correct.
779 * For an explanation of how we determine which buffers go on the
780 * runq vs which go on the waitq, see the comments in
781 * xdf_kstat_create().
785 xdf_kstat_exit(vdp
, bp
);
788 if (vdp
->xdf_ready_tq_bp
!= NULL
)
789 xdf_kstat_exit(vdp
, vdp
->xdf_ready_tq_bp
);
791 kstat
= vdp
->xdf_xdev_iostat
;
792 vdp
->xdf_xdev_iostat
= NULL
;
793 mutex_exit(&vdp
->xdf_dev_lk
);
795 mutex_exit(&vdp
->xdf_iostat_lk
);
799 * Add an IO requests onto the active queue.
801 * We have to detect IOs generated by xdf_ready_tq_thread. These IOs
802 * are used to establish a connection to the backend, so they receive
803 * priority over all other IOs. Since xdf_ready_tq_thread only does
804 * synchronous IO, there can only be one xdf_ready_tq_thread request at any
805 * given time and we record the buf associated with that request in
809 xdf_bp_push(xdf_t
*vdp
, buf_t
*bp
)
811 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
812 ASSERT(bp
->av_forw
== NULL
);
814 xdf_kstat_enter(vdp
, bp
);
816 if (curthread
== vdp
->xdf_ready_tq_thread
) {
817 /* new IO requests from the ready thread */
818 ASSERT(vdp
->xdf_ready_tq_bp
== NULL
);
819 vdp
->xdf_ready_tq_bp
= bp
;
823 /* this is normal IO request */
824 ASSERT(bp
!= vdp
->xdf_ready_tq_bp
);
826 if (vdp
->xdf_f_act
== NULL
) {
827 /* this is only only IO on the active queue */
828 ASSERT(vdp
->xdf_l_act
== NULL
);
829 ASSERT(vdp
->xdf_i_act
== NULL
);
830 vdp
->xdf_f_act
= vdp
->xdf_l_act
= vdp
->xdf_i_act
= bp
;
834 /* add this IO to the tail of the active queue */
835 vdp
->xdf_l_act
->av_forw
= bp
;
837 if (vdp
->xdf_i_act
== NULL
)
842 xdf_bp_pop(xdf_t
*vdp
, buf_t
*bp
)
846 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
847 ASSERT(VREQ_DONE(BP_VREQ(bp
)));
849 if (vdp
->xdf_ready_tq_bp
== bp
) {
850 /* we're done with a ready thread IO request */
851 ASSERT(bp
->av_forw
== NULL
);
852 vdp
->xdf_ready_tq_bp
= NULL
;
856 /* we're done with a normal IO request */
857 ASSERT((bp
->av_forw
!= NULL
) || (bp
== vdp
->xdf_l_act
));
858 ASSERT((bp
->av_forw
== NULL
) || (bp
!= vdp
->xdf_l_act
));
859 ASSERT(VREQ_DONE(BP_VREQ(vdp
->xdf_f_act
)));
860 ASSERT(vdp
->xdf_f_act
!= vdp
->xdf_i_act
);
862 if (bp
== vdp
->xdf_f_act
) {
863 /* This IO was at the head of our active queue. */
864 vdp
->xdf_f_act
= bp
->av_forw
;
865 if (bp
== vdp
->xdf_l_act
)
866 vdp
->xdf_l_act
= NULL
;
868 /* There IO finished before some other pending IOs. */
869 bp_iter
= vdp
->xdf_f_act
;
870 while (bp
!= bp_iter
->av_forw
) {
871 bp_iter
= bp_iter
->av_forw
;
872 ASSERT(VREQ_DONE(BP_VREQ(bp_iter
)));
873 ASSERT(bp_iter
!= vdp
->xdf_i_act
);
875 bp_iter
->av_forw
= bp
->av_forw
;
876 if (bp
== vdp
->xdf_l_act
)
877 vdp
->xdf_l_act
= bp_iter
;
883 xdf_bp_next(xdf_t
*vdp
)
888 if (vdp
->xdf_state
== XD_CONNECTED
) {
890 * If we're in the XD_CONNECTED state, we only service IOs
891 * from the xdf_ready_tq_thread thread.
893 if ((bp
= vdp
->xdf_ready_tq_bp
) == NULL
)
895 if (((vreq
= BP_VREQ(bp
)) == NULL
) || (!VREQ_DONE(vreq
)))
900 /* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */
901 if (vdp
->xdf_state
!= XD_READY
)
904 ASSERT(vdp
->xdf_ready_tq_bp
== NULL
);
906 if ((bp
= vdp
->xdf_i_act
) == NULL
)
908 if (((vreq
= BP_VREQ(bp
)) == NULL
) || (!VREQ_DONE(vreq
)))
911 /* advance the active buf index pointer */
912 vdp
->xdf_i_act
= bp
->av_forw
;
917 xdf_io_fini(xdf_t
*vdp
, uint64_t id
, int bioerr
)
919 ge_slot_t
*gs
= (ge_slot_t
*)(uintptr_t)id
;
920 v_req_t
*vreq
= gs
->gs_vreq
;
921 buf_t
*bp
= vreq
->v_buf
;
923 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
924 ASSERT(BP_VREQ(bp
) == vreq
);
929 bioerror(bp
, bioerr
);
930 ASSERT(vreq
->v_nslots
> 0);
931 if (--vreq
->v_nslots
> 0)
934 /* remove this IO from our active queue */
937 ASSERT(vreq
->v_runq
);
938 xdf_kstat_exit(vdp
, bp
);
939 vreq
->v_runq
= B_FALSE
;
940 vreq_free(vdp
, vreq
);
943 xdf_io_err(bp
, geterror(bp
), 0);
944 } else if (bp
->b_resid
!= 0) {
945 /* Partial transfers are an error */
946 xdf_io_err(bp
, EIO
, bp
->b_resid
);
953 * xdf interrupt handler
956 xdf_intr_locked(xdf_t
*vdp
)
959 blkif_response_t
*resp
;
964 ddi_acc_handle_t acchdl
;
966 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
968 if ((xbr
= vdp
->xdf_xb_ring
) == NULL
)
969 return (DDI_INTR_UNCLAIMED
);
971 acchdl
= vdp
->xdf_xb_ring_hdl
;
974 * complete all requests which have a response
976 while (resp
= xvdi_ring_get_response(xbr
)) {
977 id
= ddi_get64(acchdl
, &resp
->id
);
978 op
= ddi_get8(acchdl
, &resp
->operation
);
979 status
= ddi_get16(acchdl
, (uint16_t *)&resp
->status
);
980 DPRINTF(INTR_DBG
, ("resp: op %d id %"PRIu64
" status %d\n",
983 if (status
!= BLKIF_RSP_OKAY
) {
984 DPRINTF(IO_DBG
, ("xdf@%s: I/O error while %s",
986 (op
== BLKIF_OP_READ
) ? "reading" : "writing"));
992 xdf_io_fini(vdp
, id
, bioerr
);
994 return (DDI_INTR_CLAIMED
);
998 * xdf_intr runs at PIL 5, so no one else can grab xdf_dev_lk and
999 * block at a lower pil.
1002 xdf_intr(caddr_t arg
)
1004 xdf_t
*vdp
= (xdf_t
*)arg
;
1007 mutex_enter(&vdp
->xdf_dev_lk
);
1008 rv
= xdf_intr_locked(vdp
);
1009 mutex_exit(&vdp
->xdf_dev_lk
);
1018 xdf_ring_push(xdf_t
*vdp
)
1020 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
1022 if (vdp
->xdf_xb_ring
== NULL
)
1025 if (xvdi_ring_push_request(vdp
->xdf_xb_ring
)) {
1027 "xdf@%s: xdf_ring_push: sent request(s) to backend\n",
1031 if (xvdi_get_evtchn(vdp
->xdf_dip
) != INVALID_EVTCHN
)
1032 xvdi_notify_oe(vdp
->xdf_dip
);
1036 xdf_ring_drain_locked(xdf_t
*vdp
)
1040 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
1042 if (xdf_debug
& SUSRES_DBG
)
1043 xen_printf("xdf_ring_drain: start\n");
1045 for (pollc
= 0; pollc
< XDF_DRAIN_RETRY_COUNT
; pollc
++) {
1046 if (vdp
->xdf_xb_ring
== NULL
)
1049 if (xvdi_ring_has_unconsumed_responses(vdp
->xdf_xb_ring
))
1050 (void) xdf_intr_locked(vdp
);
1051 if (!xvdi_ring_has_incomp_request(vdp
->xdf_xb_ring
))
1055 /* file-backed devices can be slow */
1056 mutex_exit(&vdp
->xdf_dev_lk
);
1057 #ifdef XPV_HVM_DRIVER
1058 (void) HYPERVISOR_yield();
1059 #endif /* XPV_HVM_DRIVER */
1060 delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY
));
1061 mutex_enter(&vdp
->xdf_dev_lk
);
1063 cmn_err(CE_WARN
, "xdf@%s: xdf_ring_drain: timeout", vdp
->xdf_addr
);
1066 if (vdp
->xdf_xb_ring
!= NULL
) {
1067 if (xvdi_ring_has_incomp_request(vdp
->xdf_xb_ring
) ||
1068 xvdi_ring_has_unconsumed_responses(vdp
->xdf_xb_ring
))
1071 if (xdf_debug
& SUSRES_DBG
)
1072 xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n",
1078 xdf_ring_drain(xdf_t
*vdp
)
1081 mutex_enter(&vdp
->xdf_dev_lk
);
1082 rv
= xdf_ring_drain_locked(vdp
);
1083 mutex_exit(&vdp
->xdf_dev_lk
);
1088 * Destroy all v_req_t, grant table entries, and our ring buffer.
1091 xdf_ring_destroy(xdf_t
*vdp
)
1097 ASSERT(MUTEX_HELD(&vdp
->xdf_cb_lk
));
1098 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
1100 if ((vdp
->xdf_state
!= XD_INIT
) &&
1101 (vdp
->xdf_state
!= XD_CONNECTED
) &&
1102 (vdp
->xdf_state
!= XD_READY
)) {
1103 ASSERT(vdp
->xdf_xb_ring
== NULL
);
1104 ASSERT(vdp
->xdf_xb_ring_hdl
== NULL
);
1105 ASSERT(vdp
->xdf_peer
== INVALID_DOMID
);
1106 ASSERT(vdp
->xdf_evtchn
== INVALID_EVTCHN
);
1107 ASSERT(list_is_empty(&vdp
->xdf_vreq_act
));
1112 * We don't want to receive async notifications from the backend
1113 * when it finishes processing ring entries.
1115 #ifdef XPV_HVM_DRIVER
1116 ec_unbind_evtchn(vdp
->xdf_evtchn
);
1117 #else /* !XPV_HVM_DRIVER */
1118 (void) ddi_remove_intr(vdp
->xdf_dip
, 0, NULL
);
1119 #endif /* !XPV_HVM_DRIVER */
1122 * Drain any requests in the ring. We need to do this before we
1123 * can free grant table entries, because if active ring entries
1124 * point to grants, then the backend could be trying to access
1127 (void) xdf_ring_drain_locked(vdp
);
1129 /* We're done talking to the backend so free up our event channel */
1130 xvdi_free_evtchn(vdp
->xdf_dip
);
1131 vdp
->xdf_evtchn
= INVALID_EVTCHN
;
1133 while ((vreq
= list_head(&vdp
->xdf_vreq_act
)) != NULL
) {
1135 ASSERT(BP_VREQ(bp
) == vreq
);
1137 /* Free up any grant table entries associaed with this IO */
1138 while ((gs
= list_head(&vreq
->v_gs
)) != NULL
)
1141 /* If this IO was on the runq, move it back to the waitq. */
1143 xdf_kstat_runq_to_waitq(vdp
, bp
);
1146 * Reset any buf IO state since we're going to re-issue the
1147 * IO when we reconnect.
1149 vreq_free(vdp
, vreq
);
1150 BP_VREQ_SET(bp
, NULL
);
1154 /* reset the active queue index pointer */
1155 vdp
->xdf_i_act
= vdp
->xdf_f_act
;
1157 /* Destroy the ring */
1158 xvdi_free_ring(vdp
->xdf_xb_ring
);
1159 vdp
->xdf_xb_ring
= NULL
;
1160 vdp
->xdf_xb_ring_hdl
= NULL
;
1161 vdp
->xdf_peer
= INVALID_DOMID
;
1165 xdfmin(struct buf
*bp
)
1167 if (bp
->b_bcount
> xdf_maxphys
)
1168 bp
->b_bcount
= xdf_maxphys
;
1172 * Check if we have a pending "eject" media request.
1175 xdf_eject_pending(xdf_t
*vdp
)
1177 dev_info_t
*dip
= vdp
->xdf_dip
;
1180 if (!vdp
->xdf_media_req_supported
)
1183 if (((xsname
= xvdi_get_xsname(dip
)) == NULL
) ||
1184 (xenbus_read_str(xsname
, XBP_MEDIA_REQ
, &str
) != 0))
1187 if (strcmp(str
, XBV_MEDIA_REQ_EJECT
) != 0) {
1196 * Generate a media request.
1199 xdf_media_req(xdf_t
*vdp
, char *req
, boolean_t media_required
)
1201 dev_info_t
*dip
= vdp
->xdf_dip
;
1205 * we can't be holding xdf_dev_lk because xenbus_printf() can
1206 * block while waiting for a PIL 1 interrupt message. this
1207 * would cause a deadlock with xdf_intr() which needs to grab
1208 * xdf_dev_lk as well and runs at PIL 5.
1210 ASSERT(MUTEX_HELD(&vdp
->xdf_cb_lk
));
1211 ASSERT(MUTEX_NOT_HELD(&vdp
->xdf_dev_lk
));
1213 if ((xsname
= xvdi_get_xsname(dip
)) == NULL
)
1216 /* Check if we support media requests */
1217 if (!XD_IS_CD(vdp
) || !vdp
->xdf_media_req_supported
)
1220 /* If an eject is pending then don't allow any new requests */
1221 if (xdf_eject_pending(vdp
))
1224 /* Make sure that there is media present */
1225 if (media_required
&& (vdp
->xdf_xdev_nblocks
== 0))
1228 /* We only allow operations when the device is ready and connected */
1229 if (vdp
->xdf_state
!= XD_READY
)
1232 if (xenbus_printf(XBT_NULL
, xsname
, XBP_MEDIA_REQ
, "%s", req
) != 0)
1239 * populate a single blkif_request_t w/ a buf
1242 xdf_process_rreq(xdf_t
*vdp
, struct buf
*bp
, blkif_request_t
*rreq
)
1245 uint8_t fsect
, lsect
;
1249 dev_info_t
*dip
= vdp
->xdf_dip
;
1250 blkif_vdev_t vdev
= xvdi_get_vdevnum(dip
);
1251 v_req_t
*vreq
= BP_VREQ(bp
);
1252 uint64_t blkno
= vreq
->v_blkno
;
1253 uint_t ndmacs
= vreq
->v_ndmacs
;
1254 ddi_acc_handle_t acchdl
= vdp
->xdf_xb_ring_hdl
;
1256 int isread
= IS_READ(bp
);
1257 ge_slot_t
*gs
= list_head(&vreq
->v_gs
);
1259 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
1260 ASSERT(vreq
->v_status
== VREQ_GS_ALLOCED
);
1263 ddi_put8(acchdl
, &rreq
->operation
, BLKIF_OP_READ
);
1265 switch (vreq
->v_flush_diskcache
) {
1266 case FLUSH_DISKCACHE
:
1267 ddi_put8(acchdl
, &rreq
->operation
,
1268 BLKIF_OP_FLUSH_DISKCACHE
);
1269 ddi_put16(acchdl
, &rreq
->handle
, vdev
);
1270 ddi_put64(acchdl
, &rreq
->id
,
1271 (uint64_t)(uintptr_t)(gs
));
1272 ddi_put8(acchdl
, &rreq
->nr_segments
, 0);
1273 vreq
->v_status
= VREQ_DMAWIN_DONE
;
1276 ddi_put8(acchdl
, &rreq
->operation
,
1277 BLKIF_OP_WRITE_BARRIER
);
1281 ddi_put8(acchdl
, &rreq
->operation
,
1282 BLKIF_OP_WRITE_BARRIER
);
1284 ddi_put8(acchdl
, &rreq
->operation
,
1290 ddi_put16(acchdl
, &rreq
->handle
, vdev
);
1291 ddi_put64(acchdl
, &rreq
->sector_number
, blkno
);
1292 ddi_put64(acchdl
, &rreq
->id
, (uint64_t)(uintptr_t)(gs
));
1295 * loop until all segments are populated or no more dma cookie in buf
1299 * Each segment of a blkif request can transfer up to
1300 * one 4K page of data.
1302 bcnt
= vreq
->v_dmac
.dmac_size
;
1303 dma_addr
= vreq
->v_dmac
.dmac_laddress
;
1304 blk_off
= (uint_t
)((paddr_t
)XB_SEGOFFSET
& dma_addr
);
1305 fsect
= blk_off
>> XB_BSHIFT
;
1306 lsect
= fsect
+ (bcnt
>> XB_BSHIFT
) - 1;
1308 ASSERT(bcnt
<= PAGESIZE
);
1309 ASSERT((bcnt
% XB_BSIZE
) == 0);
1310 ASSERT((blk_off
& XB_BMASK
) == 0);
1311 ASSERT(fsect
< XB_MAX_SEGLEN
/ XB_BSIZE
&&
1312 lsect
< XB_MAX_SEGLEN
/ XB_BSIZE
);
1314 gr
= gs_grant(gs
, PATOMA(dma_addr
) >> PAGESHIFT
);
1315 ddi_put32(acchdl
, &rreq
->seg
[seg
].gref
, gr
);
1316 ddi_put8(acchdl
, &rreq
->seg
[seg
].first_sect
, fsect
);
1317 ddi_put8(acchdl
, &rreq
->seg
[seg
].last_sect
, lsect
);
1320 "xdf@%s: seg%d: dmacS %lu blk_off %ld\n",
1321 vdp
->xdf_addr
, seg
, vreq
->v_dmac
.dmac_size
, blk_off
));
1323 "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64
"\n",
1324 vdp
->xdf_addr
, seg
, fsect
, lsect
, gr
, dma_addr
));
1326 blkno
+= (bcnt
>> XB_BSHIFT
);
1328 ASSERT(seg
<= BLKIF_MAX_SEGMENTS_PER_REQUEST
);
1330 ddi_dma_nextcookie(vreq
->v_dmahdl
, &vreq
->v_dmac
);
1334 vreq
->v_status
= VREQ_DMAWIN_DONE
;
1335 vreq
->v_blkno
= blkno
;
1338 ddi_put8(acchdl
, &rreq
->nr_segments
, seg
);
1340 "xdf@%s: xdf_process_rreq: request id=%"PRIx64
" ready\n",
1341 vdp
->xdf_addr
, rreq
->id
));
1345 xdf_io_start(xdf_t
*vdp
)
1349 blkif_request_t
*rreq
;
1350 boolean_t rreqready
= B_FALSE
;
1352 mutex_enter(&vdp
->xdf_dev_lk
);
1355 * Populate the ring request(s). Loop until there is no buf to
1356 * transfer or no free slot available in I/O ring.
1359 /* don't start any new IO if we're suspending */
1360 if (vdp
->xdf_suspending
)
1362 if ((bp
= xdf_bp_next(vdp
)) == NULL
)
1365 /* if the buf doesn't already have a vreq, allocate one */
1366 if (((vreq
= BP_VREQ(bp
)) == NULL
) &&
1367 ((vreq
= vreq_get(vdp
, bp
)) == NULL
))
1370 /* alloc DMA/GTE resources */
1371 if (vreq_setup(vdp
, vreq
) != DDI_SUCCESS
)
1374 /* get next blkif_request in the ring */
1375 if ((rreq
= xvdi_ring_get_request(vdp
->xdf_xb_ring
)) == NULL
)
1377 bzero(rreq
, sizeof (blkif_request_t
));
1380 /* populate blkif_request with this buf */
1381 xdf_process_rreq(vdp
, bp
, rreq
);
1384 * This buffer/vreq pair is has been allocated a ring buffer
1385 * resources, so if it isn't already in our runq, add it.
1388 xdf_kstat_waitq_to_runq(vdp
, bp
);
1391 /* Send the request(s) to the backend */
1395 mutex_exit(&vdp
->xdf_dev_lk
);
1399 /* check if partition is open, -1 - check all partitions on the disk */
1401 xdf_isopen(xdf_t
*vdp
, int partition
)
1405 boolean_t rval
= B_FALSE
;
1407 ASSERT((partition
== -1) ||
1408 ((partition
>= 0) || (partition
< XDF_PEXT
)));
1410 if (partition
== -1)
1411 parbit
= (ulong_t
)-1;
1413 parbit
= 1 << partition
;
1415 for (i
= 0; i
< OTYPCNT
; i
++) {
1416 if (vdp
->xdf_vd_open
[i
] & parbit
)
1424 * The connection should never be closed as long as someone is holding
1425 * us open, there is pending IO, or someone is waiting waiting for a
1429 xdf_busy(xdf_t
*vdp
)
1431 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
1433 if ((vdp
->xdf_xb_ring
!= NULL
) &&
1434 xvdi_ring_has_unconsumed_responses(vdp
->xdf_xb_ring
)) {
1435 ASSERT(vdp
->xdf_state
!= XD_CLOSED
);
1439 if (!list_is_empty(&vdp
->xdf_vreq_act
) || (vdp
->xdf_f_act
!= NULL
)) {
1440 ASSERT(vdp
->xdf_state
!= XD_CLOSED
);
1444 if (xdf_isopen(vdp
, -1)) {
1445 ASSERT(vdp
->xdf_state
!= XD_CLOSED
);
1449 if (vdp
->xdf_connect_req
> 0) {
1450 ASSERT(vdp
->xdf_state
!= XD_CLOSED
);
1458 xdf_set_state(xdf_t
*vdp
, xdf_state_t new_state
)
1460 ASSERT(MUTEX_HELD(&vdp
->xdf_cb_lk
));
1461 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
1462 DPRINTF(DDI_DBG
, ("xdf@%s: state change %d -> %d\n",
1463 vdp
->xdf_addr
, vdp
->xdf_state
, new_state
));
1464 vdp
->xdf_state
= new_state
;
1465 cv_broadcast(&vdp
->xdf_dev_cv
);
1469 xdf_disconnect(xdf_t
*vdp
, xdf_state_t new_state
, boolean_t quiet
)
1471 dev_info_t
*dip
= vdp
->xdf_dip
;
1474 ASSERT(MUTEX_HELD(&vdp
->xdf_cb_lk
));
1475 ASSERT(MUTEX_NOT_HELD(&vdp
->xdf_dev_lk
));
1476 ASSERT((new_state
== XD_UNKNOWN
) || (new_state
== XD_CLOSED
));
1478 /* Check if we're already there. */
1479 if (vdp
->xdf_state
== new_state
)
1482 mutex_enter(&vdp
->xdf_dev_lk
);
1483 busy
= xdf_busy(vdp
);
1485 /* If we're already closed then there's nothing todo. */
1486 if (vdp
->xdf_state
== XD_CLOSED
) {
1488 xdf_set_state(vdp
, new_state
);
1489 mutex_exit(&vdp
->xdf_dev_lk
);
1494 /* UhOh. Warn the user that something bad has happened. */
1495 if (!quiet
&& busy
&& (vdp
->xdf_state
== XD_READY
) &&
1496 (vdp
->xdf_xdev_nblocks
!= 0)) {
1497 cmn_err(CE_WARN
, "xdf@%s: disconnected while in use",
1502 xdf_ring_destroy(vdp
);
1504 /* If we're busy then we can only go into the unknown state */
1505 xdf_set_state(vdp
, (busy
) ? XD_UNKNOWN
: new_state
);
1506 mutex_exit(&vdp
->xdf_dev_lk
);
1508 /* if we're closed now, let the other end know */
1509 if (vdp
->xdf_state
== XD_CLOSED
)
1510 (void) xvdi_switch_state(dip
, XBT_NULL
, XenbusStateClosed
);
1515 * Kick-off connect process
1516 * Status should be XD_UNKNOWN or XD_CLOSED
1517 * On success, status will be changed to XD_INIT
1518 * On error, it will be changed to XD_UNKNOWN
1521 xdf_setstate_init(xdf_t
*vdp
)
1523 dev_info_t
*dip
= vdp
->xdf_dip
;
1524 xenbus_transaction_t xbt
;
1529 ASSERT(MUTEX_HELD(&vdp
->xdf_cb_lk
));
1530 ASSERT(MUTEX_NOT_HELD(&vdp
->xdf_dev_lk
));
1531 ASSERT((vdp
->xdf_state
== XD_UNKNOWN
) ||
1532 (vdp
->xdf_state
== XD_CLOSED
));
1535 ("xdf@%s: starting connection process\n", vdp
->xdf_addr
));
1538 * If an eject is pending then don't allow a new connection.
1539 * (Only the backend can clear media request eject request.)
1541 if (xdf_eject_pending(vdp
))
1542 return (DDI_FAILURE
);
1544 if ((xsname
= xvdi_get_xsname(dip
)) == NULL
)
1547 if ((vdp
->xdf_peer
= xvdi_get_oeid(dip
)) == INVALID_DOMID
)
1550 (void) xvdi_switch_state(dip
, XBT_NULL
, XenbusStateInitialising
);
1553 * Sanity check for the existance of the xenbus device-type property.
1554 * This property might not exist if our xenbus device nodes were
1555 * force destroyed while we were still connected to the backend.
1557 if (xenbus_read_str(xsname
, XBP_DEV_TYPE
, &str
) != 0)
1561 if (xvdi_alloc_evtchn(dip
) != DDI_SUCCESS
)
1564 vdp
->xdf_evtchn
= xvdi_get_evtchn(dip
);
1565 #ifdef XPV_HVM_DRIVER
1566 ec_bind_evtchn_to_handler(vdp
->xdf_evtchn
, IPL_VBD
, xdf_intr
, vdp
);
1567 #else /* !XPV_HVM_DRIVER */
1568 if (ddi_add_intr(dip
, 0, NULL
, NULL
, xdf_intr
, (caddr_t
)vdp
) !=
1570 cmn_err(CE_WARN
, "xdf@%s: xdf_setstate_init: "
1571 "failed to add intr handler", vdp
->xdf_addr
);
1574 #endif /* !XPV_HVM_DRIVER */
1576 if (xvdi_alloc_ring(dip
, BLKIF_RING_SIZE
,
1577 sizeof (union blkif_sring_entry
), &gref
, &vdp
->xdf_xb_ring
) !=
1579 cmn_err(CE_WARN
, "xdf@%s: failed to alloc comm ring",
1583 vdp
->xdf_xb_ring_hdl
= vdp
->xdf_xb_ring
->xr_acc_hdl
; /* ugly!! */
1586 * Write into xenstore the info needed by backend
1589 if (xenbus_transaction_start(&xbt
)) {
1590 cmn_err(CE_WARN
, "xdf@%s: failed to start transaction",
1592 xvdi_fatal_error(dip
, EIO
, "connect transaction init");
1597 * XBP_PROTOCOL is written by the domain builder in the case of PV
1598 * domains. However, it is not written for HVM domains, so let's
1601 if (((rv
= xenbus_printf(xbt
, xsname
,
1602 XBP_MEDIA_REQ
, "%s", XBV_MEDIA_REQ_NONE
)) != 0) ||
1603 ((rv
= xenbus_printf(xbt
, xsname
,
1604 XBP_RING_REF
, "%u", gref
)) != 0) ||
1605 ((rv
= xenbus_printf(xbt
, xsname
,
1606 XBP_EVENT_CHAN
, "%u", vdp
->xdf_evtchn
)) != 0) ||
1607 ((rv
= xenbus_printf(xbt
, xsname
,
1608 XBP_PROTOCOL
, "%s", XEN_IO_PROTO_ABI_NATIVE
)) != 0) ||
1609 ((rv
= xvdi_switch_state(dip
, xbt
, XenbusStateInitialised
)) > 0)) {
1610 (void) xenbus_transaction_end(xbt
, 1);
1611 xvdi_fatal_error(dip
, rv
, "connect transaction setup");
1615 /* kick-off connect process */
1616 if (rv
= xenbus_transaction_end(xbt
, 0)) {
1619 xvdi_fatal_error(dip
, rv
, "connect transaction commit");
1623 ASSERT(MUTEX_HELD(&vdp
->xdf_cb_lk
));
1624 mutex_enter(&vdp
->xdf_dev_lk
);
1625 xdf_set_state(vdp
, XD_INIT
);
1626 mutex_exit(&vdp
->xdf_dev_lk
);
1628 return (DDI_SUCCESS
);
1631 xvdi_free_ring(vdp
->xdf_xb_ring
);
1633 #ifdef XPV_HVM_DRIVER
1634 ec_unbind_evtchn(vdp
->xdf_evtchn
);
1635 #else /* !XPV_HVM_DRIVER */
1636 (void) ddi_remove_intr(vdp
->xdf_dip
, 0, NULL
);
1637 #endif /* !XPV_HVM_DRIVER */
1639 xvdi_free_evtchn(dip
);
1640 vdp
->xdf_evtchn
= INVALID_EVTCHN
;
1642 xdf_disconnect(vdp
, XD_UNKNOWN
, B_FALSE
);
1643 cmn_err(CE_WARN
, "xdf@%s: failed to start connection to backend",
1645 return (DDI_FAILURE
);
1649 xdf_get_flush_block(xdf_t
*vdp
)
1652 * Get a DEV_BSIZE aligned bufer
1654 vdp
->xdf_flush_mem
= kmem_alloc(vdp
->xdf_xdev_secsize
* 2, KM_SLEEP
);
1655 vdp
->xdf_cache_flush_block
=
1656 (char *)P2ROUNDUP((uintptr_t)(vdp
->xdf_flush_mem
),
1657 (int)vdp
->xdf_xdev_secsize
);
1659 if (xdf_lb_rdwr(vdp
->xdf_dip
, TG_READ
, vdp
->xdf_cache_flush_block
,
1660 xdf_flush_block
, vdp
->xdf_xdev_secsize
, NULL
) != 0)
1661 return (DDI_FAILURE
);
1662 return (DDI_SUCCESS
);
1666 xdf_setstate_ready(void *arg
)
1668 xdf_t
*vdp
= (xdf_t
*)arg
;
1670 vdp
->xdf_ready_tq_thread
= curthread
;
1673 * We've created all the minor nodes via cmlb_attach() using default
1674 * value in xdf_attach() to make it possible to block in xdf_open(),
1675 * in case there's anyone (say, booting thread) ever trying to open
1676 * it before connected to backend. We will refresh all those minor
1677 * nodes w/ latest info we've got now when we are almost connected.
1679 mutex_enter(&vdp
->xdf_dev_lk
);
1680 if (vdp
->xdf_cmbl_reattach
) {
1681 vdp
->xdf_cmbl_reattach
= B_FALSE
;
1683 mutex_exit(&vdp
->xdf_dev_lk
);
1684 if (xdf_cmlb_attach(vdp
) != 0) {
1685 xdf_disconnect(vdp
, XD_UNKNOWN
, B_FALSE
);
1688 mutex_enter(&vdp
->xdf_dev_lk
);
1691 /* If we're not still trying to get to the ready state, then bail. */
1692 if (vdp
->xdf_state
!= XD_CONNECTED
) {
1693 mutex_exit(&vdp
->xdf_dev_lk
);
1696 mutex_exit(&vdp
->xdf_dev_lk
);
1699 * If backend has feature-barrier, see if it supports disk
1702 vdp
->xdf_flush_supported
= B_FALSE
;
1703 if (vdp
->xdf_feature_barrier
) {
1705 * Pretend we already know flush is supported so probe
1706 * will attempt the correct op.
1708 vdp
->xdf_flush_supported
= B_TRUE
;
1709 if (xdf_lb_rdwr(vdp
->xdf_dip
, TG_WRITE
, NULL
, 0, 0, 0) == 0) {
1710 vdp
->xdf_flush_supported
= B_TRUE
;
1712 vdp
->xdf_flush_supported
= B_FALSE
;
1714 * If the other end does not support the cache flush op
1715 * then we must use a barrier-write to force disk
1716 * cache flushing. Barrier writes require that a data
1717 * block actually be written.
1718 * Cache a block to barrier-write when we are
1719 * asked to perform a flush.
1720 * XXX - would it be better to just copy 1 block
1721 * (512 bytes) from whatever write we did last
1722 * and rewrite that block?
1724 if (xdf_get_flush_block(vdp
) != DDI_SUCCESS
) {
1725 xdf_disconnect(vdp
, XD_UNKNOWN
, B_FALSE
);
1731 mutex_enter(&vdp
->xdf_cb_lk
);
1732 mutex_enter(&vdp
->xdf_dev_lk
);
1733 if (vdp
->xdf_state
== XD_CONNECTED
)
1734 xdf_set_state(vdp
, XD_READY
);
1735 mutex_exit(&vdp
->xdf_dev_lk
);
1737 /* Restart any currently queued up io */
1740 mutex_exit(&vdp
->xdf_cb_lk
);
1744 * synthetic geometry
1746 #define XDF_NSECTS 256
1747 #define XDF_NHEADS 16
1750 xdf_synthetic_pgeom(dev_info_t
*dip
, cmlb_geom_t
*geomp
)
1755 vdp
= ddi_get_soft_state(xdf_ssp
, ddi_get_instance(dip
));
1757 ncyl
= vdp
->xdf_xdev_nblocks
/ (XDF_NHEADS
* XDF_NSECTS
);
1759 bzero(geomp
, sizeof (*geomp
));
1760 geomp
->g_ncyl
= ncyl
== 0 ? 1 : ncyl
;
1762 geomp
->g_nhead
= XDF_NHEADS
;
1763 geomp
->g_nsect
= XDF_NSECTS
;
1764 geomp
->g_secsize
= vdp
->xdf_xdev_secsize
;
1765 geomp
->g_capacity
= vdp
->xdf_xdev_nblocks
;
1766 geomp
->g_intrlv
= 0;
1767 geomp
->g_rpm
= 7200;
1771 * Finish other initialization after we've connected to backend
1772 * Status should be XD_INIT before calling this routine
1773 * On success, status should be changed to XD_CONNECTED.
1774 * On error, status should stay XD_INIT
1777 xdf_setstate_connected(xdf_t
*vdp
)
1779 dev_info_t
*dip
= vdp
->xdf_dip
;
1781 diskaddr_t nblocks
= 0;
1783 char *oename
, *xsname
, *str
;
1786 ASSERT(MUTEX_HELD(&vdp
->xdf_cb_lk
));
1787 ASSERT(MUTEX_NOT_HELD(&vdp
->xdf_dev_lk
));
1788 ASSERT(vdp
->xdf_state
== XD_INIT
);
1790 if (((xsname
= xvdi_get_xsname(dip
)) == NULL
) ||
1791 ((oename
= xvdi_get_oename(dip
)) == NULL
))
1792 return (DDI_FAILURE
);
1794 /* Make sure the other end is XenbusStateConnected */
1795 if (xenbus_read_driver_state(oename
) != XenbusStateConnected
)
1796 return (DDI_FAILURE
);
1798 /* Determine if feature barrier is supported by backend */
1799 if (!(vdp
->xdf_feature_barrier
= xenbus_exists(oename
, XBP_FB
)))
1800 cmn_err(CE_NOTE
, "!xdf@%s: feature-barrier not supported",
1804 * Probe backend. Read the device size into xdf_xdev_nblocks
1805 * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE
1806 * flags in xdf_dinfo. If the emulated device type is "cdrom",
1807 * we always set VDISK_CDROM, regardless of if it's present in
1808 * the xenbus info parameter.
1810 if (xenbus_gather(XBT_NULL
, oename
,
1811 XBP_SECTORS
, "%"SCNu64
, &nblocks
,
1812 XBP_SECTOR_SIZE
, "%u", &secsize
,
1813 XBP_INFO
, "%u", &dinfo
,
1815 cmn_err(CE_WARN
, "xdf@%s: xdf_setstate_connected: "
1816 "cannot read backend info", vdp
->xdf_addr
);
1817 return (DDI_FAILURE
);
1819 if (xenbus_read_str(xsname
, XBP_DEV_TYPE
, &str
) != 0) {
1820 cmn_err(CE_WARN
, "xdf@%s: cannot read device-type",
1822 return (DDI_FAILURE
);
1824 if (strcmp(str
, XBV_DEV_TYPE_CD
) == 0)
1825 dinfo
|= VDISK_CDROM
;
1828 if (secsize
== 0 || !(ISP2(secsize
/ DEV_BSIZE
)))
1829 secsize
= DEV_BSIZE
;
1830 vdp
->xdf_xdev_nblocks
= nblocks
;
1831 vdp
->xdf_xdev_secsize
= secsize
;
1833 if (vdp
->xdf_xdev_nblocks
> DK_MAX_BLOCKS
) {
1834 cmn_err(CE_WARN
, "xdf@%s: xdf_setstate_connected: "
1835 "backend disk device too large with %llu blocks for"
1836 " 32-bit kernel", vdp
->xdf_addr
, vdp
->xdf_xdev_nblocks
);
1837 xvdi_fatal_error(dip
, EFBIG
, "reading backend info");
1838 return (DDI_FAILURE
);
1843 * If the physical geometry for a fixed disk has been explicity
1844 * set then make sure that the specified physical geometry isn't
1845 * larger than the device we connected to.
1847 if (vdp
->xdf_pgeom_fixed
&&
1848 (vdp
->xdf_pgeom
.g_capacity
> vdp
->xdf_xdev_nblocks
)) {
1850 "xdf@%s: connect failed, fixed geometry too large",
1852 return (DDI_FAILURE
);
1855 vdp
->xdf_media_req_supported
= xenbus_exists(oename
, XBP_MEDIA_REQ_SUP
);
1857 /* mark vbd is ready for I/O */
1858 mutex_enter(&vdp
->xdf_dev_lk
);
1859 xdf_set_state(vdp
, XD_CONNECTED
);
1861 /* check if the cmlb label should be updated */
1862 xdf_synthetic_pgeom(dip
, &pgeom
);
1863 if ((vdp
->xdf_dinfo
!= dinfo
) ||
1864 (!vdp
->xdf_pgeom_fixed
&&
1865 (memcmp(&vdp
->xdf_pgeom
, &pgeom
, sizeof (pgeom
)) != 0))) {
1866 vdp
->xdf_cmbl_reattach
= B_TRUE
;
1868 vdp
->xdf_dinfo
= dinfo
;
1869 if (!vdp
->xdf_pgeom_fixed
)
1870 vdp
->xdf_pgeom
= pgeom
;
1873 if (XD_IS_CD(vdp
) || XD_IS_RM(vdp
)) {
1874 if (vdp
->xdf_xdev_nblocks
== 0) {
1875 vdp
->xdf_mstate
= DKIO_EJECTED
;
1876 cv_broadcast(&vdp
->xdf_mstate_cv
);
1878 vdp
->xdf_mstate
= DKIO_INSERTED
;
1879 cv_broadcast(&vdp
->xdf_mstate_cv
);
1882 if (vdp
->xdf_mstate
!= DKIO_NONE
) {
1883 vdp
->xdf_mstate
= DKIO_NONE
;
1884 cv_broadcast(&vdp
->xdf_mstate_cv
);
1888 mutex_exit(&vdp
->xdf_dev_lk
);
1890 cmn_err(CE_CONT
, "?xdf@%s: %"PRIu64
" blocks", vdp
->xdf_addr
,
1891 (uint64_t)vdp
->xdf_xdev_nblocks
);
1893 /* Restart any currently queued up io */
1897 * To get to the ready state we have to do IO to the backend device,
1898 * but we can't initiate IO from the other end change callback thread
1899 * (which is the current context we're executing in.) This is because
1900 * if the other end disconnects while we're doing IO from the callback
1901 * thread, then we can't receive that disconnect event and we hang
1902 * waiting for an IO that can never complete.
1904 (void) ddi_taskq_dispatch(vdp
->xdf_ready_tq
, xdf_setstate_ready
, vdp
,
1907 (void) xvdi_switch_state(dip
, XBT_NULL
, XenbusStateConnected
);
1908 return (DDI_SUCCESS
);
1913 xdf_oe_change(dev_info_t
*dip
, ddi_eventcookie_t id
, void *arg
, void *impl_data
)
1915 XenbusState new_state
= *(XenbusState
*)impl_data
;
1916 xdf_t
*vdp
= (xdf_t
*)ddi_get_driver_private(dip
);
1918 DPRINTF(DDI_DBG
, ("xdf@%s: otherend state change to %d!\n",
1919 vdp
->xdf_addr
, new_state
));
1921 mutex_enter(&vdp
->xdf_cb_lk
);
1923 /* We assume that this callback is single threaded */
1924 ASSERT(vdp
->xdf_oe_change_thread
== NULL
);
1925 DEBUG_EVAL(vdp
->xdf_oe_change_thread
= curthread
);
1927 /* ignore any backend state changes if we're suspending/suspended */
1928 if (vdp
->xdf_suspending
|| (vdp
->xdf_state
== XD_SUSPEND
)) {
1929 DEBUG_EVAL(vdp
->xdf_oe_change_thread
= NULL
);
1930 mutex_exit(&vdp
->xdf_cb_lk
);
1934 switch (new_state
) {
1935 case XenbusStateUnknown
:
1936 case XenbusStateInitialising
:
1937 case XenbusStateInitWait
:
1938 case XenbusStateInitialised
:
1939 if (vdp
->xdf_state
== XD_INIT
)
1942 xdf_disconnect(vdp
, XD_UNKNOWN
, B_FALSE
);
1943 if (xdf_setstate_init(vdp
) != DDI_SUCCESS
)
1945 ASSERT(vdp
->xdf_state
== XD_INIT
);
1948 case XenbusStateConnected
:
1949 if ((vdp
->xdf_state
== XD_CONNECTED
) ||
1950 (vdp
->xdf_state
== XD_READY
))
1953 if (vdp
->xdf_state
!= XD_INIT
) {
1954 xdf_disconnect(vdp
, XD_UNKNOWN
, B_FALSE
);
1955 if (xdf_setstate_init(vdp
) != DDI_SUCCESS
)
1957 ASSERT(vdp
->xdf_state
== XD_INIT
);
1960 if (xdf_setstate_connected(vdp
) != DDI_SUCCESS
) {
1961 xdf_disconnect(vdp
, XD_UNKNOWN
, B_FALSE
);
1964 ASSERT(vdp
->xdf_state
== XD_CONNECTED
);
1967 case XenbusStateClosing
:
1968 if (xdf_isopen(vdp
, -1)) {
1970 "xdf@%s: hot-unplug failed, still in use",
1975 case XenbusStateClosed
:
1976 xdf_disconnect(vdp
, XD_CLOSED
, B_FALSE
);
1980 /* notify anybody waiting for oe state change */
1981 cv_broadcast(&vdp
->xdf_dev_cv
);
1982 DEBUG_EVAL(vdp
->xdf_oe_change_thread
= NULL
);
1983 mutex_exit(&vdp
->xdf_cb_lk
);
1987 xdf_connect_locked(xdf_t
*vdp
, boolean_t wait
)
1989 int rv
, timeouts
= 0, reset
= 20;
1991 ASSERT(MUTEX_HELD(&vdp
->xdf_cb_lk
));
1992 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
1994 /* we can't connect once we're in the closed state */
1995 if (vdp
->xdf_state
== XD_CLOSED
)
1998 vdp
->xdf_connect_req
++;
1999 while (vdp
->xdf_state
!= XD_READY
) {
2000 mutex_exit(&vdp
->xdf_dev_lk
);
2002 /* only one thread at a time can be the connection thread */
2003 if (vdp
->xdf_connect_thread
== NULL
)
2004 vdp
->xdf_connect_thread
= curthread
;
2006 if (vdp
->xdf_connect_thread
== curthread
) {
2007 if ((timeouts
> 0) && ((timeouts
% reset
) == 0)) {
2009 * If we haven't establised a connection
2010 * within the reset time, then disconnect
2011 * so we can try again, and double the reset
2012 * time. The reset time starts at 2 sec.
2014 (void) xdf_disconnect(vdp
, XD_UNKNOWN
, B_TRUE
);
2017 if (vdp
->xdf_state
== XD_UNKNOWN
)
2018 (void) xdf_setstate_init(vdp
);
2019 if (vdp
->xdf_state
== XD_INIT
)
2020 (void) xdf_setstate_connected(vdp
);
2023 mutex_enter(&vdp
->xdf_dev_lk
);
2024 if (!wait
|| (vdp
->xdf_state
== XD_READY
))
2027 mutex_exit((&vdp
->xdf_cb_lk
));
2028 if (vdp
->xdf_connect_thread
!= curthread
) {
2029 rv
= cv_wait_sig(&vdp
->xdf_dev_cv
, &vdp
->xdf_dev_lk
);
2031 /* delay for 0.1 sec */
2032 rv
= cv_reltimedwait_sig(&vdp
->xdf_dev_cv
,
2033 &vdp
->xdf_dev_lk
, drv_usectohz(100*1000),
2038 mutex_exit((&vdp
->xdf_dev_lk
));
2039 mutex_enter((&vdp
->xdf_cb_lk
));
2040 mutex_enter((&vdp
->xdf_dev_lk
));
2046 ASSERT(MUTEX_HELD(&vdp
->xdf_cb_lk
));
2047 ASSERT(MUTEX_HELD(&vdp
->xdf_dev_lk
));
2049 if (vdp
->xdf_connect_thread
== curthread
) {
2051 * wake up someone else so they can become the connection
2054 cv_signal(&vdp
->xdf_dev_cv
);
2055 vdp
->xdf_connect_thread
= NULL
;
2058 /* Try to lock the media */
2059 mutex_exit((&vdp
->xdf_dev_lk
));
2060 (void) xdf_media_req(vdp
, XBV_MEDIA_REQ_LOCK
, B_TRUE
);
2061 mutex_enter((&vdp
->xdf_dev_lk
));
2063 vdp
->xdf_connect_req
--;
2064 return (vdp
->xdf_state
);
2068 xdf_iorestart(caddr_t arg
)
2070 xdf_t
*vdp
= (xdf_t
*)arg
;
2072 ASSERT(vdp
!= NULL
);
2074 mutex_enter(&vdp
->xdf_dev_lk
);
2075 ASSERT(ISDMACBON(vdp
));
2077 mutex_exit(&vdp
->xdf_dev_lk
);
2081 return (DDI_INTR_CLAIMED
);
2084 #ifdef XPV_HVM_DRIVER
2086 typedef struct xdf_hvm_entry
{
2087 list_node_t xdf_he_list
;
2089 dev_info_t
*xdf_he_dip
;
2092 static list_t xdf_hvm_list
;
2093 static kmutex_t xdf_hvm_list_lock
;
2095 static xdf_hvm_entry_t
*
2096 i_xdf_hvm_find(const char *path
, dev_info_t
*dip
)
2100 ASSERT((path
!= NULL
) || (dip
!= NULL
));
2101 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock
));
2103 i
= list_head(&xdf_hvm_list
);
2105 if ((path
!= NULL
) && strcmp(i
->xdf_he_path
, path
) != 0) {
2106 i
= list_next(&xdf_hvm_list
, i
);
2109 if ((dip
!= NULL
) && (i
->xdf_he_dip
!= dip
)) {
2110 i
= list_next(&xdf_hvm_list
, i
);
2119 xdf_hvm_hold(const char *path
)
2124 mutex_enter(&xdf_hvm_list_lock
);
2125 i
= i_xdf_hvm_find(path
, NULL
);
2127 mutex_exit(&xdf_hvm_list_lock
);
2130 ndi_hold_devi(dip
= i
->xdf_he_dip
);
2131 mutex_exit(&xdf_hvm_list_lock
);
2136 xdf_hvm_add(dev_info_t
*dip
)
2141 /* figure out the path for the dip */
2142 path
= kmem_zalloc(MAXPATHLEN
, KM_SLEEP
);
2143 (void) ddi_pathname(dip
, path
);
2145 i
= kmem_alloc(sizeof (*i
), KM_SLEEP
);
2146 i
->xdf_he_dip
= dip
;
2147 i
->xdf_he_path
= i_ddi_strdup(path
, KM_SLEEP
);
2149 mutex_enter(&xdf_hvm_list_lock
);
2150 ASSERT(i_xdf_hvm_find(path
, NULL
) == NULL
);
2151 ASSERT(i_xdf_hvm_find(NULL
, dip
) == NULL
);
2152 list_insert_head(&xdf_hvm_list
, i
);
2153 mutex_exit(&xdf_hvm_list_lock
);
2155 kmem_free(path
, MAXPATHLEN
);
2159 xdf_hvm_rm(dev_info_t
*dip
)
2163 mutex_enter(&xdf_hvm_list_lock
);
2164 VERIFY((i
= i_xdf_hvm_find(NULL
, dip
)) != NULL
);
2165 list_remove(&xdf_hvm_list
, i
);
2166 mutex_exit(&xdf_hvm_list_lock
);
2168 kmem_free(i
->xdf_he_path
, strlen(i
->xdf_he_path
) + 1);
2169 kmem_free(i
, sizeof (*i
));
2175 list_create(&xdf_hvm_list
, sizeof (xdf_hvm_entry_t
),
2176 offsetof(xdf_hvm_entry_t
, xdf_he_list
));
2177 mutex_init(&xdf_hvm_list_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2183 ASSERT(list_head(&xdf_hvm_list
) == NULL
);
2184 list_destroy(&xdf_hvm_list
);
2185 mutex_destroy(&xdf_hvm_list_lock
);
2189 xdf_hvm_connect(dev_info_t
*dip
)
2191 xdf_t
*vdp
= (xdf_t
*)ddi_get_driver_private(dip
);
2195 mutex_enter(&vdp
->xdf_cb_lk
);
2198 * Before try to establish a connection we need to wait for the
2199 * backend hotplug scripts to have run. Once they are run the
2200 * "<oename>/hotplug-status" property will be set to "connected".
2203 ASSERT(MUTEX_HELD(&vdp
->xdf_cb_lk
));
2206 * Get the xenbus path to the backend device. Note that
2207 * we can't cache this path (and we look it up on each pass
2208 * through this loop) because it could change during
2209 * suspend, resume, and migration operations.
2211 if ((oename
= xvdi_get_oename(dip
)) == NULL
) {
2212 mutex_exit(&vdp
->xdf_cb_lk
);
2217 if ((xenbus_read_str(oename
, XBP_HP_STATUS
, &str
) == 0) &&
2218 (strcmp(str
, XBV_HP_STATUS_CONN
) == 0))
2224 /* wait for an update to "<oename>/hotplug-status" */
2225 if (cv_wait_sig(&vdp
->xdf_hp_status_cv
, &vdp
->xdf_cb_lk
) == 0) {
2226 /* we got interrupted by a signal */
2227 mutex_exit(&vdp
->xdf_cb_lk
);
2232 /* Good news. The backend hotplug scripts have been run. */
2233 ASSERT(MUTEX_HELD(&vdp
->xdf_cb_lk
));
2234 ASSERT(strcmp(str
, XBV_HP_STATUS_CONN
) == 0);
2238 * If we're emulating a cd device and if the backend doesn't support
2239 * media request opreations, then we're not going to bother trying
2240 * to establish a connection for a couple reasons. First off, media
2241 * requests support is required to support operations like eject and
2242 * media locking. Second, other backend platforms like Linux don't
2243 * support hvm pv cdrom access. They don't even have a backend pv
2244 * driver for cdrom device nodes, so we don't want to block forever
2245 * waiting for a connection to a backend driver that doesn't exist.
2247 if (XD_IS_CD(vdp
) && !xenbus_exists(oename
, XBP_MEDIA_REQ_SUP
)) {
2248 mutex_exit(&vdp
->xdf_cb_lk
);
2252 mutex_enter(&vdp
->xdf_dev_lk
);
2253 rv
= xdf_connect_locked(vdp
, B_TRUE
);
2254 mutex_exit(&vdp
->xdf_dev_lk
);
2255 mutex_exit(&vdp
->xdf_cb_lk
);
2257 return ((rv
== XD_READY
) ? B_TRUE
: B_FALSE
);
2261 xdf_hvm_setpgeom(dev_info_t
*dip
, cmlb_geom_t
*geomp
)
2263 xdf_t
*vdp
= (xdf_t
*)ddi_get_driver_private(dip
);
2265 /* sanity check the requested physical geometry */
2266 mutex_enter(&vdp
->xdf_dev_lk
);
2267 if ((geomp
->g_secsize
!= XB_BSIZE
) ||
2268 (geomp
->g_capacity
== 0)) {
2269 mutex_exit(&vdp
->xdf_dev_lk
);
2274 * If we've already connected to the backend device then make sure
2275 * we're not defining a physical geometry larger than our backend
2278 if ((vdp
->xdf_xdev_nblocks
!= 0) &&
2279 (geomp
->g_capacity
> vdp
->xdf_xdev_nblocks
)) {
2280 mutex_exit(&vdp
->xdf_dev_lk
);
2284 bzero(&vdp
->xdf_pgeom
, sizeof (vdp
->xdf_pgeom
));
2285 vdp
->xdf_pgeom
.g_ncyl
= geomp
->g_ncyl
;
2286 vdp
->xdf_pgeom
.g_acyl
= geomp
->g_acyl
;
2287 vdp
->xdf_pgeom
.g_nhead
= geomp
->g_nhead
;
2288 vdp
->xdf_pgeom
.g_nsect
= geomp
->g_nsect
;
2289 vdp
->xdf_pgeom
.g_secsize
= geomp
->g_secsize
;
2290 vdp
->xdf_pgeom
.g_capacity
= geomp
->g_capacity
;
2291 vdp
->xdf_pgeom
.g_intrlv
= geomp
->g_intrlv
;
2292 vdp
->xdf_pgeom
.g_rpm
= geomp
->g_rpm
;
2294 vdp
->xdf_pgeom_fixed
= B_TRUE
;
2295 mutex_exit(&vdp
->xdf_dev_lk
);
2297 /* force a re-validation */
2298 cmlb_invalidate(vdp
->xdf_vd_lbl
, NULL
);
2304 xdf_is_cd(dev_info_t
*dip
)
2306 xdf_t
*vdp
= (xdf_t
*)ddi_get_driver_private(dip
);
2309 mutex_enter(&vdp
->xdf_cb_lk
);
2311 mutex_exit(&vdp
->xdf_cb_lk
);
2316 xdf_is_rm(dev_info_t
*dip
)
2318 xdf_t
*vdp
= (xdf_t
*)ddi_get_driver_private(dip
);
2321 mutex_enter(&vdp
->xdf_cb_lk
);
2323 mutex_exit(&vdp
->xdf_cb_lk
);
2328 xdf_media_req_supported(dev_info_t
*dip
)
2330 xdf_t
*vdp
= (xdf_t
*)ddi_get_driver_private(dip
);
2333 mutex_enter(&vdp
->xdf_cb_lk
);
2334 rv
= vdp
->xdf_media_req_supported
;
2335 mutex_exit(&vdp
->xdf_cb_lk
);
2339 #endif /* XPV_HVM_DRIVER */
2342 xdf_lb_getcap(dev_info_t
*dip
, diskaddr_t
*capp
)
2345 vdp
= ddi_get_soft_state(xdf_ssp
, ddi_get_instance(dip
));
2350 mutex_enter(&vdp
->xdf_dev_lk
);
2351 *capp
= vdp
->xdf_pgeom
.g_capacity
;
2352 DPRINTF(LBL_DBG
, ("xdf@%s:capacity %llu\n", vdp
->xdf_addr
, *capp
));
2353 mutex_exit(&vdp
->xdf_dev_lk
);
2358 xdf_lb_getpgeom(dev_info_t
*dip
, cmlb_geom_t
*geomp
)
2362 if ((vdp
= ddi_get_soft_state(xdf_ssp
, ddi_get_instance(dip
))) == NULL
)
2364 *geomp
= vdp
->xdf_pgeom
;
2369 * No real HBA, no geometry available from it
2373 xdf_lb_getvgeom(dev_info_t
*dip
, cmlb_geom_t
*geomp
)
2379 xdf_lb_getattribute(dev_info_t
*dip
, tg_attribute_t
*tgattributep
)
2383 if (!(vdp
= ddi_get_soft_state(xdf_ssp
, ddi_get_instance(dip
))))
2387 tgattributep
->media_is_writable
= 0;
2389 tgattributep
->media_is_writable
= 1;
2390 tgattributep
->media_is_rotational
= 0;
2396 xdf_lb_getinfo(dev_info_t
*dip
, int cmd
, void *arg
, void *tg_cookie
)
2401 instance
= ddi_get_instance(dip
);
2403 if ((vdp
= ddi_get_soft_state(xdf_ssp
, instance
)) == NULL
)
2408 return (xdf_lb_getpgeom(dip
, (cmlb_geom_t
*)arg
));
2409 case TG_GETVIRTGEOM
:
2410 return (xdf_lb_getvgeom(dip
, (cmlb_geom_t
*)arg
));
2411 case TG_GETCAPACITY
:
2412 return (xdf_lb_getcap(dip
, (diskaddr_t
*)arg
));
2413 case TG_GETBLOCKSIZE
:
2414 mutex_enter(&vdp
->xdf_cb_lk
);
2415 *(uint32_t *)arg
= vdp
->xdf_xdev_secsize
;
2416 mutex_exit(&vdp
->xdf_cb_lk
);
2419 return (xdf_lb_getattribute(dip
, (tg_attribute_t
*)arg
));
2427 xdf_lb_rdwr(dev_info_t
*dip
, uchar_t cmd
, void *bufp
,
2428 diskaddr_t start
, size_t reqlen
, void *tg_cookie
)
2434 vdp
= ddi_get_soft_state(xdf_ssp
, ddi_get_instance(dip
));
2436 /* We don't allow IO from the oe_change callback thread */
2437 ASSERT(curthread
!= vdp
->xdf_oe_change_thread
);
2440 * Having secsize of 0 means that device isn't connected yet.
2441 * FIXME This happens for CD devices, and there's nothing we
2442 * can do about it at the moment.
2444 if (vdp
->xdf_xdev_secsize
== 0)
2447 if ((start
+ ((reqlen
/ (vdp
->xdf_xdev_secsize
/ DEV_BSIZE
))
2448 >> DEV_BSHIFT
)) > vdp
->xdf_pgeom
.g_capacity
)
2451 bp
= getrbuf(KM_SLEEP
);
2453 bp
->b_flags
= B_BUSY
| B_READ
;
2455 bp
->b_flags
= B_BUSY
| B_WRITE
;
2457 bp
->b_un
.b_addr
= bufp
;
2458 bp
->b_bcount
= reqlen
;
2459 bp
->b_blkno
= start
* (vdp
->xdf_xdev_secsize
/ DEV_BSIZE
);
2460 bp
->b_edev
= DDI_DEV_T_NONE
; /* don't have dev_t */
2462 mutex_enter(&vdp
->xdf_dev_lk
);
2463 xdf_bp_push(vdp
, bp
);
2464 mutex_exit(&vdp
->xdf_dev_lk
);
2466 if (curthread
== vdp
->xdf_ready_tq_thread
)
2467 (void) xdf_ring_drain(vdp
);
2469 ASSERT(bp
->b_flags
& B_DONE
);
2475 * Lock the current media. Set the media state to "lock".
2476 * (Media locks are only respected by the backend driver.)
2479 xdf_ioctl_mlock(xdf_t
*vdp
)
2482 mutex_enter(&vdp
->xdf_cb_lk
);
2483 rv
= xdf_media_req(vdp
, XBV_MEDIA_REQ_LOCK
, B_TRUE
);
2484 mutex_exit(&vdp
->xdf_cb_lk
);
2489 * Release a media lock. Set the media state to "none".
2492 xdf_ioctl_munlock(xdf_t
*vdp
)
2495 mutex_enter(&vdp
->xdf_cb_lk
);
2496 rv
= xdf_media_req(vdp
, XBV_MEDIA_REQ_NONE
, B_TRUE
);
2497 mutex_exit(&vdp
->xdf_cb_lk
);
2502 * Eject the current media. Ignores any media locks. (Media locks
2503 * are only for benifit of the the backend.)
2506 xdf_ioctl_eject(xdf_t
*vdp
)
2510 mutex_enter(&vdp
->xdf_cb_lk
);
2511 if ((rv
= xdf_media_req(vdp
, XBV_MEDIA_REQ_EJECT
, B_FALSE
)) != 0) {
2512 mutex_exit(&vdp
->xdf_cb_lk
);
2517 * We've set the media requests xenbus parameter to eject, so now
2518 * disconnect from the backend, wait for the backend to clear
2519 * the media requets xenbus paramter, and then we can reconnect
2522 (void) xdf_disconnect(vdp
, XD_UNKNOWN
, B_TRUE
);
2523 mutex_enter(&vdp
->xdf_dev_lk
);
2524 if (xdf_connect_locked(vdp
, B_TRUE
) != XD_READY
) {
2525 mutex_exit(&vdp
->xdf_dev_lk
);
2526 mutex_exit(&vdp
->xdf_cb_lk
);
2529 mutex_exit(&vdp
->xdf_dev_lk
);
2530 mutex_exit(&vdp
->xdf_cb_lk
);
2535 * Watch for media state changes. This can be an insertion of a device
2536 * (triggered by a 'xm block-configure' request in another domain) or
2537 * the ejection of a device (triggered by a local "eject" operation).
2538 * For a full description of the DKIOCSTATE ioctl behavior see dkio(7I).
2541 xdf_dkstate(xdf_t
*vdp
, enum dkio_state mstate
)
2543 enum dkio_state prev_state
;
2545 mutex_enter(&vdp
->xdf_cb_lk
);
2546 prev_state
= vdp
->xdf_mstate
;
2548 if (vdp
->xdf_mstate
== mstate
) {
2549 while (vdp
->xdf_mstate
== prev_state
) {
2550 if (cv_wait_sig(&vdp
->xdf_mstate_cv
,
2551 &vdp
->xdf_cb_lk
) == 0) {
2552 mutex_exit(&vdp
->xdf_cb_lk
);
2558 if ((prev_state
!= DKIO_INSERTED
) &&
2559 (vdp
->xdf_mstate
== DKIO_INSERTED
)) {
2560 (void) xdf_media_req(vdp
, XBV_MEDIA_REQ_LOCK
, B_TRUE
);
2561 mutex_exit(&vdp
->xdf_cb_lk
);
2565 mutex_exit(&vdp
->xdf_cb_lk
);
2571 xdf_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int mode
, cred_t
*credp
,
2574 minor_t minor
= getminor(dev
);
2575 int part
= XDF_PART(minor
);
2579 if (((vdp
= ddi_get_soft_state(xdf_ssp
, XDF_INST(minor
))) == NULL
) ||
2580 (!xdf_isopen(vdp
, part
)))
2583 DPRINTF(IOCTL_DBG
, ("xdf@%s:ioctl: cmd %d (0x%x)\n",
2584 vdp
->xdf_addr
, cmd
, cmd
));
2589 case DKIOCG_PHYGEOM
:
2590 case DKIOCG_VIRTGEOM
:
2600 case DKIOCEXTPARTINFO
:
2605 case DKIOCSETEXTPART
:
2606 case DKIOCPARTITION
:
2607 rv
= cmlb_ioctl(vdp
->xdf_vd_lbl
, dev
, cmd
, arg
, mode
, credp
,
2612 * If we're labelling the disk, we have to update the geometry
2613 * in the cmlb data structures, and we also have to write a new
2614 * devid to the disk. Note that writing an EFI label currently
2615 * requires 4 ioctls, and devid setup will fail on all but the
2618 if (cmd
== DKIOCSEXTVTOC
|| cmd
== DKIOCSVTOC
||
2619 cmd
== DKIOCSETEFI
) {
2620 rv
= cmlb_validate(vdp
->xdf_vd_lbl
, 0, 0);
2622 xdf_devid_setup(vdp
);
2625 "xdf@%s, labeling failed on validate",
2633 return (xdf_ioctl_eject(vdp
));
2635 return (xdf_ioctl_mlock(vdp
));
2637 return (xdf_ioctl_munlock(vdp
));
2638 case CDROMREADOFFSET
: {
2642 if (ddi_copyout(&offset
, (void *)arg
, sizeof (int), mode
))
2646 case DKIOCGMEDIAINFO
: {
2647 struct dk_minfo media_info
;
2649 media_info
.dki_lbsize
= vdp
->xdf_xdev_secsize
;
2650 media_info
.dki_capacity
= vdp
->xdf_pgeom
.g_capacity
;
2652 media_info
.dki_media_type
= DK_CDROM
;
2654 media_info
.dki_media_type
= DK_FIXED_DISK
;
2656 if (ddi_copyout(&media_info
, (void *)arg
,
2657 sizeof (struct dk_minfo
), mode
))
2662 struct dk_cinfo info
;
2664 /* controller information */
2666 info
.dki_ctype
= DKC_CDROM
;
2668 info
.dki_ctype
= DKC_VBD
;
2671 (void) strncpy((char *)(&info
.dki_cname
), "xdf", 8);
2673 /* unit information */
2674 info
.dki_unit
= ddi_get_instance(vdp
->xdf_dip
);
2675 (void) strncpy((char *)(&info
.dki_dname
), "xdf", 8);
2676 info
.dki_flags
= DKI_FMTVOL
;
2677 info
.dki_partition
= part
;
2678 info
.dki_maxtransfer
= maxphys
/ DEV_BSIZE
;
2684 if (ddi_copyout(&info
, (void *)arg
, sizeof (info
), mode
))
2689 enum dkio_state mstate
;
2691 if (ddi_copyin((void *)arg
, &mstate
,
2692 sizeof (mstate
), mode
) != 0)
2694 if ((rv
= xdf_dkstate(vdp
, mstate
)) != 0)
2696 mstate
= vdp
->xdf_mstate
;
2697 if (ddi_copyout(&mstate
, (void *)arg
,
2698 sizeof (mstate
), mode
) != 0)
2702 case DKIOCREMOVABLE
: {
2703 int i
= BOOLEAN2VOID(XD_IS_RM(vdp
));
2704 if (ddi_copyout(&i
, (caddr_t
)arg
, sizeof (i
), mode
))
2709 int i
= BOOLEAN2VOID(XD_IS_RM(vdp
));
2710 if (ddi_copyout(&i
, (void *)arg
, sizeof (i
), mode
))
2716 if (ddi_copyin((void *)arg
, &i
, sizeof (i
), mode
))
2718 vdp
->xdf_wce
= VOID2BOOLEAN(i
);
2721 case DKIOCFLUSHWRITECACHE
: {
2722 struct dk_callback
*dkc
= (struct dk_callback
*)arg
;
2724 if (vdp
->xdf_flush_supported
) {
2725 rv
= xdf_lb_rdwr(vdp
->xdf_dip
, TG_WRITE
,
2726 NULL
, 0, 0, (void *)dev
);
2727 } else if (vdp
->xdf_feature_barrier
&&
2728 !xdf_barrier_flush_disable
) {
2729 rv
= xdf_lb_rdwr(vdp
->xdf_dip
, TG_WRITE
,
2730 vdp
->xdf_cache_flush_block
, xdf_flush_block
,
2731 vdp
->xdf_xdev_secsize
, (void *)dev
);
2735 if ((mode
& FKIOCTL
) && (dkc
!= NULL
) &&
2736 (dkc
->dkc_callback
!= NULL
)) {
2737 (*dkc
->dkc_callback
)(dkc
->dkc_cookie
, rv
);
2738 /* need to return 0 after calling callback */
2748 xdf_strategy(struct buf
*bp
)
2752 diskaddr_t p_blkct
, p_blkst
;
2757 minor
= getminor(bp
->b_edev
);
2758 part
= XDF_PART(minor
);
2759 vdp
= ddi_get_soft_state(xdf_ssp
, XDF_INST(minor
));
2761 mutex_enter(&vdp
->xdf_dev_lk
);
2762 if (!xdf_isopen(vdp
, part
)) {
2763 mutex_exit(&vdp
->xdf_dev_lk
);
2764 xdf_io_err(bp
, ENXIO
, 0);
2768 /* We don't allow IO from the oe_change callback thread */
2769 ASSERT(curthread
!= vdp
->xdf_oe_change_thread
);
2771 /* Check for writes to a read only device */
2772 if (!IS_READ(bp
) && XD_IS_RO(vdp
)) {
2773 mutex_exit(&vdp
->xdf_dev_lk
);
2774 xdf_io_err(bp
, EROFS
, 0);
2778 /* Check if this I/O is accessing a partition or the entire disk */
2779 if ((long)bp
->b_private
== XB_SLICE_NONE
) {
2780 /* This I/O is using an absolute offset */
2781 p_blkct
= vdp
->xdf_xdev_nblocks
;
2784 /* This I/O is using a partition relative offset */
2785 mutex_exit(&vdp
->xdf_dev_lk
);
2786 if (cmlb_partinfo(vdp
->xdf_vd_lbl
, part
, &p_blkct
,
2787 &p_blkst
, NULL
, NULL
, NULL
)) {
2788 xdf_io_err(bp
, ENXIO
, 0);
2791 mutex_enter(&vdp
->xdf_dev_lk
);
2795 * Adjust the real blkno and bcount according to the underline
2796 * physical sector size.
2798 blkno
= bp
->b_blkno
/ (vdp
->xdf_xdev_secsize
/ XB_BSIZE
);
2800 /* check for a starting block beyond the disk or partition limit */
2801 if (blkno
> p_blkct
) {
2802 DPRINTF(IO_DBG
, ("xdf@%s: block %lld exceeds VBD size %"PRIu64
,
2803 vdp
->xdf_addr
, (longlong_t
)blkno
, (uint64_t)p_blkct
));
2804 mutex_exit(&vdp
->xdf_dev_lk
);
2805 xdf_io_err(bp
, EINVAL
, 0);
2809 /* Legacy: don't set error flag at this case */
2810 if (blkno
== p_blkct
) {
2811 mutex_exit(&vdp
->xdf_dev_lk
);
2812 bp
->b_resid
= bp
->b_bcount
;
2817 /* sanitize the input buf */
2820 bp
->av_back
= bp
->av_forw
= NULL
;
2822 /* Adjust for partial transfer, this will result in an error later */
2823 if (vdp
->xdf_xdev_secsize
!= 0 &&
2824 vdp
->xdf_xdev_secsize
!= XB_BSIZE
) {
2825 nblks
= bp
->b_bcount
/ vdp
->xdf_xdev_secsize
;
2827 nblks
= bp
->b_bcount
>> XB_BSHIFT
;
2830 if ((blkno
+ nblks
) > p_blkct
) {
2831 if (vdp
->xdf_xdev_secsize
!= 0 &&
2832 vdp
->xdf_xdev_secsize
!= XB_BSIZE
) {
2834 ((blkno
+ nblks
) - p_blkct
) *
2835 vdp
->xdf_xdev_secsize
;
2838 ((blkno
+ nblks
) - p_blkct
) <<
2841 bp
->b_bcount
-= bp
->b_resid
;
2844 DPRINTF(IO_DBG
, ("xdf@%s: strategy blk %lld len %lu\n",
2845 vdp
->xdf_addr
, (longlong_t
)blkno
, (ulong_t
)bp
->b_bcount
));
2847 /* Fix up the buf struct */
2848 bp
->b_flags
|= B_BUSY
;
2849 bp
->b_private
= (void *)(uintptr_t)p_blkst
;
2851 xdf_bp_push(vdp
, bp
);
2852 mutex_exit(&vdp
->xdf_dev_lk
);
2855 (void) xdf_ring_drain(vdp
);
2861 xdf_read(dev_t dev
, struct uio
*uiop
, cred_t
*credp
)
2865 diskaddr_t p_blkcnt
;
2868 minor
= getminor(dev
);
2869 if ((vdp
= ddi_get_soft_state(xdf_ssp
, XDF_INST(minor
))) == NULL
)
2872 DPRINTF(IO_DBG
, ("xdf@%s: read offset 0x%"PRIx64
"\n",
2873 vdp
->xdf_addr
, (int64_t)uiop
->uio_offset
));
2875 part
= XDF_PART(minor
);
2876 if (!xdf_isopen(vdp
, part
))
2879 if (cmlb_partinfo(vdp
->xdf_vd_lbl
, part
, &p_blkcnt
,
2880 NULL
, NULL
, NULL
, NULL
))
2883 if (uiop
->uio_loffset
>= XB_DTOB(p_blkcnt
, vdp
))
2889 return (physio(xdf_strategy
, NULL
, dev
, B_READ
, xdfmin
, uiop
));
2894 xdf_write(dev_t dev
, struct uio
*uiop
, cred_t
*credp
)
2898 diskaddr_t p_blkcnt
;
2901 minor
= getminor(dev
);
2902 if ((vdp
= ddi_get_soft_state(xdf_ssp
, XDF_INST(minor
))) == NULL
)
2905 DPRINTF(IO_DBG
, ("xdf@%s: write offset 0x%"PRIx64
"\n",
2906 vdp
->xdf_addr
, (int64_t)uiop
->uio_offset
));
2908 part
= XDF_PART(minor
);
2909 if (!xdf_isopen(vdp
, part
))
2912 if (cmlb_partinfo(vdp
->xdf_vd_lbl
, part
, &p_blkcnt
,
2913 NULL
, NULL
, NULL
, NULL
))
2916 if (uiop
->uio_loffset
>= XB_DTOB(p_blkcnt
, vdp
))
2922 return (physio(xdf_strategy
, NULL
, dev
, B_WRITE
, xdfmin
, uiop
));
2927 xdf_aread(dev_t dev
, struct aio_req
*aiop
, cred_t
*credp
)
2931 struct uio
*uiop
= aiop
->aio_uio
;
2932 diskaddr_t p_blkcnt
;
2935 minor
= getminor(dev
);
2936 if ((vdp
= ddi_get_soft_state(xdf_ssp
, XDF_INST(minor
))) == NULL
)
2939 part
= XDF_PART(minor
);
2940 if (!xdf_isopen(vdp
, part
))
2943 if (cmlb_partinfo(vdp
->xdf_vd_lbl
, part
, &p_blkcnt
,
2944 NULL
, NULL
, NULL
, NULL
))
2947 if (uiop
->uio_loffset
>= XB_DTOB(p_blkcnt
, vdp
))
2953 return (aphysio(xdf_strategy
, anocancel
, dev
, B_READ
, xdfmin
, aiop
));
2958 xdf_awrite(dev_t dev
, struct aio_req
*aiop
, cred_t
*credp
)
2962 struct uio
*uiop
= aiop
->aio_uio
;
2963 diskaddr_t p_blkcnt
;
2966 minor
= getminor(dev
);
2967 if ((vdp
= ddi_get_soft_state(xdf_ssp
, XDF_INST(minor
))) == NULL
)
2970 part
= XDF_PART(minor
);
2971 if (!xdf_isopen(vdp
, part
))
2974 if (cmlb_partinfo(vdp
->xdf_vd_lbl
, part
, &p_blkcnt
,
2975 NULL
, NULL
, NULL
, NULL
))
2978 if (uiop
->uio_loffset
>= XB_DTOB(p_blkcnt
, vdp
))
2984 return (aphysio(xdf_strategy
, anocancel
, dev
, B_WRITE
, xdfmin
, aiop
));
2988 xdf_dump(dev_t dev
, caddr_t addr
, daddr_t blkno
, int nblk
)
2990 struct buf dumpbuf
, *dbp
= &dumpbuf
;
2995 diskaddr_t p_blkcnt
, p_blkst
;
2997 minor
= getminor(dev
);
2998 if ((vdp
= ddi_get_soft_state(xdf_ssp
, XDF_INST(minor
))) == NULL
)
3001 DPRINTF(IO_DBG
, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n",
3002 vdp
->xdf_addr
, (void *)addr
, blkno
, nblk
));
3004 /* We don't allow IO from the oe_change callback thread */
3005 ASSERT(curthread
!= vdp
->xdf_oe_change_thread
);
3007 part
= XDF_PART(minor
);
3008 if (!xdf_isopen(vdp
, part
))
3011 if (cmlb_partinfo(vdp
->xdf_vd_lbl
, part
, &p_blkcnt
, &p_blkst
,
3015 if ((blkno
+ nblk
) >
3016 (p_blkcnt
* (vdp
->xdf_xdev_secsize
/ XB_BSIZE
))) {
3017 cmn_err(CE_WARN
, "xdf@%s: block %ld exceeds VBD size %"PRIu64
,
3018 vdp
->xdf_addr
, (daddr_t
)((blkno
+ nblk
) /
3019 (vdp
->xdf_xdev_secsize
/ XB_BSIZE
)), (uint64_t)p_blkcnt
);
3024 dbp
->b_flags
= B_BUSY
;
3025 dbp
->b_un
.b_addr
= addr
;
3026 dbp
->b_bcount
= nblk
<< DEV_BSHIFT
;
3027 dbp
->b_blkno
= blkno
;
3029 dbp
->b_private
= (void *)(uintptr_t)p_blkst
;
3031 mutex_enter(&vdp
->xdf_dev_lk
);
3032 xdf_bp_push(vdp
, dbp
);
3033 mutex_exit(&vdp
->xdf_dev_lk
);
3035 err
= xdf_ring_drain(vdp
);
3042 xdf_close(dev_t dev
, int flag
, int otyp
, struct cred
*credp
)
3049 minor
= getminor(dev
);
3050 if ((vdp
= ddi_get_soft_state(xdf_ssp
, XDF_INST(minor
))) == NULL
)
3053 mutex_enter(&vdp
->xdf_dev_lk
);
3054 part
= XDF_PART(minor
);
3055 if (!xdf_isopen(vdp
, part
)) {
3056 mutex_exit(&vdp
->xdf_dev_lk
);
3061 ASSERT((vdp
->xdf_vd_open
[otyp
] & parbit
) != 0);
3062 if (otyp
== OTYP_LYR
) {
3063 ASSERT(vdp
->xdf_vd_lyropen
[part
] > 0);
3064 if (--vdp
->xdf_vd_lyropen
[part
] == 0)
3065 vdp
->xdf_vd_open
[otyp
] &= ~parbit
;
3067 vdp
->xdf_vd_open
[otyp
] &= ~parbit
;
3069 vdp
->xdf_vd_exclopen
&= ~parbit
;
3071 mutex_exit(&vdp
->xdf_dev_lk
);
3076 xdf_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*credp
)
3082 diskaddr_t p_blkct
= 0;
3083 boolean_t firstopen
;
3086 minor
= getminor(*devp
);
3087 if ((vdp
= ddi_get_soft_state(xdf_ssp
, XDF_INST(minor
))) == NULL
)
3090 nodelay
= (flag
& (FNDELAY
| FNONBLOCK
));
3092 DPRINTF(DDI_DBG
, ("xdf@%s: opening\n", vdp
->xdf_addr
));
3094 /* do cv_wait until connected or failed */
3095 mutex_enter(&vdp
->xdf_cb_lk
);
3096 mutex_enter(&vdp
->xdf_dev_lk
);
3097 if (!nodelay
&& (xdf_connect_locked(vdp
, B_TRUE
) != XD_READY
)) {
3098 mutex_exit(&vdp
->xdf_dev_lk
);
3099 mutex_exit(&vdp
->xdf_cb_lk
);
3102 mutex_exit(&vdp
->xdf_cb_lk
);
3104 if ((flag
& FWRITE
) && XD_IS_RO(vdp
)) {
3105 mutex_exit(&vdp
->xdf_dev_lk
);
3109 part
= XDF_PART(minor
);
3111 if ((vdp
->xdf_vd_exclopen
& parbit
) ||
3112 ((flag
& FEXCL
) && xdf_isopen(vdp
, part
))) {
3113 mutex_exit(&vdp
->xdf_dev_lk
);
3117 /* are we the first one to open this node? */
3118 firstopen
= !xdf_isopen(vdp
, -1);
3120 if (otyp
== OTYP_LYR
)
3121 vdp
->xdf_vd_lyropen
[part
]++;
3123 vdp
->xdf_vd_open
[otyp
] |= parbit
;
3126 vdp
->xdf_vd_exclopen
|= parbit
;
3128 mutex_exit(&vdp
->xdf_dev_lk
);
3130 /* force a re-validation */
3132 cmlb_invalidate(vdp
->xdf_vd_lbl
, NULL
);
3134 /* If this is a non-blocking open then we're done */
3139 * This is a blocking open, so we require:
3140 * - that the disk have a valid label on it
3141 * - that the size of the partition that we're opening is non-zero
3143 if ((cmlb_partinfo(vdp
->xdf_vd_lbl
, part
, &p_blkct
,
3144 NULL
, NULL
, NULL
, NULL
) != 0) || (p_blkct
== 0)) {
3145 (void) xdf_close(*devp
, flag
, otyp
, credp
);
3154 xdf_watch_hp_status_cb(dev_info_t
*dip
, const char *path
, void *arg
)
3156 xdf_t
*vdp
= (xdf_t
*)ddi_get_driver_private(dip
);
3157 cv_broadcast(&vdp
->xdf_hp_status_cv
);
3161 xdf_prop_op(dev_t dev
, dev_info_t
*dip
, ddi_prop_op_t prop_op
, int flags
,
3162 char *name
, caddr_t valuep
, int *lengthp
)
3164 xdf_t
*vdp
= ddi_get_soft_state(xdf_ssp
, ddi_get_instance(dip
));
3167 * Sanity check that if a dev_t or dip were specified that they
3168 * correspond to this device driver. On debug kernels we'll
3169 * panic and on non-debug kernels we'll return failure.
3171 ASSERT(ddi_driver_major(dip
) == xdf_major
);
3172 ASSERT((dev
== DDI_DEV_T_ANY
) || (getmajor(dev
) == xdf_major
));
3173 if ((ddi_driver_major(dip
) != xdf_major
) ||
3174 ((dev
!= DDI_DEV_T_ANY
) && (getmajor(dev
) != xdf_major
)))
3175 return (DDI_PROP_NOT_FOUND
);
3178 return (ddi_prop_op(dev
, dip
, prop_op
, flags
,
3179 name
, valuep
, lengthp
));
3181 return (cmlb_prop_op(vdp
->xdf_vd_lbl
,
3182 dev
, dip
, prop_op
, flags
, name
, valuep
, lengthp
,
3183 XDF_PART(getminor(dev
)), NULL
));
3188 xdf_getinfo(dev_info_t
*dip
, ddi_info_cmd_t cmd
, void *arg
, void **rp
)
3190 int instance
= XDF_INST(getminor((dev_t
)arg
));
3194 case DDI_INFO_DEVT2DEVINFO
:
3195 if ((vbdp
= ddi_get_soft_state(xdf_ssp
, instance
)) == NULL
) {
3197 return (DDI_FAILURE
);
3199 *rp
= vbdp
->xdf_dip
;
3200 return (DDI_SUCCESS
);
3202 case DDI_INFO_DEVT2INSTANCE
:
3203 *rp
= (void *)(uintptr_t)instance
;
3204 return (DDI_SUCCESS
);
3207 return (DDI_FAILURE
);
3213 xdf_resume(dev_info_t
*dip
)
3218 if ((vdp
= ddi_get_soft_state(xdf_ssp
, ddi_get_instance(dip
))) == NULL
)
3221 if (xdf_debug
& SUSRES_DBG
)
3222 xen_printf("xdf@%s: xdf_resume\n", vdp
->xdf_addr
);
3224 mutex_enter(&vdp
->xdf_cb_lk
);
3226 if (xvdi_resume(dip
) != DDI_SUCCESS
) {
3227 mutex_exit(&vdp
->xdf_cb_lk
);
3231 if (((oename
= xvdi_get_oename(dip
)) == NULL
) ||
3232 (xvdi_add_xb_watch_handler(dip
, oename
, XBP_HP_STATUS
,
3233 xdf_watch_hp_status_cb
, NULL
) != DDI_SUCCESS
)) {
3234 mutex_exit(&vdp
->xdf_cb_lk
);
3238 mutex_enter(&vdp
->xdf_dev_lk
);
3239 ASSERT(vdp
->xdf_state
!= XD_READY
);
3240 xdf_set_state(vdp
, XD_UNKNOWN
);
3241 mutex_exit(&vdp
->xdf_dev_lk
);
3243 if (xdf_setstate_init(vdp
) != DDI_SUCCESS
) {
3244 mutex_exit(&vdp
->xdf_cb_lk
);
3248 mutex_exit(&vdp
->xdf_cb_lk
);
3250 if (xdf_debug
& SUSRES_DBG
)
3251 xen_printf("xdf@%s: xdf_resume: done\n", vdp
->xdf_addr
);
3252 return (DDI_SUCCESS
);
3254 if (xdf_debug
& SUSRES_DBG
)
3255 xen_printf("xdf@%s: xdf_resume: fail\n", vdp
->xdf_addr
);
3256 return (DDI_FAILURE
);
3260 * Uses the in-memory devid if one exists.
3262 * Create a devid and write it on the first block of the last track of
3263 * the last cylinder.
3264 * Return DDI_SUCCESS or DDI_FAILURE.
3267 xdf_devid_fabricate(xdf_t
*vdp
)
3269 ddi_devid_t devid
= vdp
->xdf_tgt_devid
; /* null if no devid */
3270 struct dk_devid
*dkdevidp
= NULL
; /* devid struct stored on disk */
3275 if (cmlb_get_devid_block(vdp
->xdf_vd_lbl
, &blk
, NULL
) != 0)
3278 if (devid
== NULL
&& ddi_devid_init(vdp
->xdf_dip
, DEVID_FAB
, 0,
3279 NULL
, &devid
) != DDI_SUCCESS
)
3282 /* allocate a buffer */
3283 dkdevidp
= (struct dk_devid
*)kmem_zalloc(NBPSCTR
, KM_SLEEP
);
3285 /* Fill in the revision */
3286 dkdevidp
->dkd_rev_hi
= DK_DEVID_REV_MSB
;
3287 dkdevidp
->dkd_rev_lo
= DK_DEVID_REV_LSB
;
3289 /* Copy in the device id */
3290 devid_size
= ddi_devid_sizeof(devid
);
3291 if (devid_size
> DK_DEVID_SIZE
)
3293 bcopy(devid
, dkdevidp
->dkd_devid
, devid_size
);
3295 /* Calculate the chksum */
3297 ip
= (uint_t
*)dkdevidp
;
3298 for (i
= 0; i
< (NBPSCTR
/ sizeof (int)) - 1; i
++)
3301 /* Fill in the checksum */
3302 DKD_FORMCHKSUM(chksum
, dkdevidp
);
3304 if (xdf_lb_rdwr(vdp
->xdf_dip
, TG_WRITE
, dkdevidp
, blk
,
3305 NBPSCTR
, NULL
) != 0)
3308 kmem_free(dkdevidp
, NBPSCTR
);
3310 vdp
->xdf_tgt_devid
= devid
;
3311 return (DDI_SUCCESS
);
3314 if (dkdevidp
!= NULL
)
3315 kmem_free(dkdevidp
, NBPSCTR
);
3316 if (devid
!= NULL
&& vdp
->xdf_tgt_devid
== NULL
)
3317 ddi_devid_free(devid
);
3318 return (DDI_FAILURE
);
3322 * xdf_devid_read() is a local copy of xdfs_devid_read(), modified to use xdf
3325 * Read a devid from on the first block of the last track of
3326 * the last cylinder. Make sure what we read is a valid devid.
3327 * Return DDI_SUCCESS or DDI_FAILURE.
3330 xdf_devid_read(xdf_t
*vdp
)
3333 struct dk_devid
*dkdevidp
;
3337 if (cmlb_get_devid_block(vdp
->xdf_vd_lbl
, &blk
, NULL
) != 0)
3338 return (DDI_FAILURE
);
3340 dkdevidp
= kmem_zalloc(NBPSCTR
, KM_SLEEP
);
3341 if (xdf_lb_rdwr(vdp
->xdf_dip
, TG_READ
, dkdevidp
, blk
,
3342 NBPSCTR
, NULL
) != 0)
3345 /* Validate the revision */
3346 if ((dkdevidp
->dkd_rev_hi
!= DK_DEVID_REV_MSB
) ||
3347 (dkdevidp
->dkd_rev_lo
!= DK_DEVID_REV_LSB
))
3350 /* Calculate the checksum */
3352 ip
= (uint_t
*)dkdevidp
;
3353 for (i
= 0; i
< (NBPSCTR
/ sizeof (int)) - 1; i
++)
3355 if (DKD_GETCHKSUM(dkdevidp
) != chksum
)
3358 /* Validate the device id */
3359 if (ddi_devid_valid((ddi_devid_t
)dkdevidp
->dkd_devid
) != DDI_SUCCESS
)
3362 /* keep a copy of the device id */
3363 i
= ddi_devid_sizeof((ddi_devid_t
)dkdevidp
->dkd_devid
);
3364 vdp
->xdf_tgt_devid
= kmem_alloc(i
, KM_SLEEP
);
3365 bcopy(dkdevidp
->dkd_devid
, vdp
->xdf_tgt_devid
, i
);
3366 kmem_free(dkdevidp
, NBPSCTR
);
3367 return (DDI_SUCCESS
);
3370 kmem_free(dkdevidp
, NBPSCTR
);
3371 return (DDI_FAILURE
);
3375 * xdf_devid_setup() is a modified copy of cmdk_devid_setup().
3377 * This function creates a devid if we don't already have one, and
3378 * registers it. If we already have one, we make sure that it can be
3379 * read from the disk, otherwise we write it to the disk ourselves. If
3380 * we didn't already have a devid, and we create one, we also need to
3384 xdf_devid_setup(xdf_t
*vdp
)
3387 boolean_t existed
= vdp
->xdf_tgt_devid
!= NULL
;
3389 /* Read devid from the disk, if present */
3390 rc
= xdf_devid_read(vdp
);
3392 /* Otherwise write a devid (which we create if necessary) on the disk */
3393 if (rc
!= DDI_SUCCESS
)
3394 rc
= xdf_devid_fabricate(vdp
);
3396 /* If we created a devid or found it on the disk, register it */
3397 if (rc
== DDI_SUCCESS
&& !existed
)
3398 (void) ddi_devid_register(vdp
->xdf_dip
, vdp
->xdf_tgt_devid
);
3402 xdf_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
3404 int n
, instance
= ddi_get_instance(dip
);
3405 ddi_iblock_cookie_t ibc
, softibc
;
3406 boolean_t dev_iscd
= B_FALSE
;
3408 char *oename
, *xsname
, *str
;
3412 if ((n
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, DDI_PROP_NOTPROM
,
3413 "xdf_debug", 0)) != 0)
3418 return (xdf_resume(dip
));
3422 return (DDI_FAILURE
);
3426 if ((xsname
= xvdi_get_xsname(dip
)) == NULL
||
3427 (oename
= xvdi_get_oename(dip
)) == NULL
)
3428 return (DDI_FAILURE
);
3431 * Disable auto-detach. This is necessary so that we don't get
3432 * detached while we're disconnected from the back end.
3434 if ((ddi_prop_update_int(DDI_DEV_T_NONE
, dip
,
3435 DDI_NO_AUTODETACH
, 1) != DDI_PROP_SUCCESS
))
3436 return (DDI_FAILURE
);
3438 /* driver handles kernel-issued IOCTLs */
3439 if (ddi_prop_create(DDI_DEV_T_NONE
, dip
,
3440 DDI_PROP_CANSLEEP
, DDI_KERNEL_IOCTL
, NULL
, 0) != DDI_PROP_SUCCESS
)
3441 return (DDI_FAILURE
);
3443 if (ddi_get_iblock_cookie(dip
, 0, &ibc
) != DDI_SUCCESS
)
3444 return (DDI_FAILURE
);
3446 if (ddi_get_soft_iblock_cookie(dip
,
3447 DDI_SOFTINT_LOW
, &softibc
) != DDI_SUCCESS
)
3448 return (DDI_FAILURE
);
3450 if (xenbus_read_str(xsname
, XBP_DEV_TYPE
, &str
) != 0) {
3451 cmn_err(CE_WARN
, "xdf@%s: cannot read device-type",
3452 ddi_get_name_addr(dip
));
3453 return (DDI_FAILURE
);
3455 if (strcmp(str
, XBV_DEV_TYPE_CD
) == 0)
3459 if (ddi_soft_state_zalloc(xdf_ssp
, instance
) != DDI_SUCCESS
)
3460 return (DDI_FAILURE
);
3462 DPRINTF(DDI_DBG
, ("xdf@%s: attaching\n", ddi_get_name_addr(dip
)));
3463 vdp
= ddi_get_soft_state(xdf_ssp
, instance
);
3464 ddi_set_driver_private(dip
, vdp
);
3466 vdp
->xdf_addr
= ddi_get_name_addr(dip
);
3467 vdp
->xdf_suspending
= B_FALSE
;
3468 vdp
->xdf_media_req_supported
= B_FALSE
;
3469 vdp
->xdf_peer
= INVALID_DOMID
;
3470 vdp
->xdf_evtchn
= INVALID_EVTCHN
;
3471 list_create(&vdp
->xdf_vreq_act
, sizeof (v_req_t
),
3472 offsetof(v_req_t
, v_link
));
3473 cv_init(&vdp
->xdf_dev_cv
, NULL
, CV_DEFAULT
, NULL
);
3474 cv_init(&vdp
->xdf_hp_status_cv
, NULL
, CV_DEFAULT
, NULL
);
3475 cv_init(&vdp
->xdf_mstate_cv
, NULL
, CV_DEFAULT
, NULL
);
3476 mutex_init(&vdp
->xdf_dev_lk
, NULL
, MUTEX_DRIVER
, (void *)ibc
);
3477 mutex_init(&vdp
->xdf_cb_lk
, NULL
, MUTEX_DRIVER
, (void *)ibc
);
3478 mutex_init(&vdp
->xdf_iostat_lk
, NULL
, MUTEX_DRIVER
, (void *)ibc
);
3479 vdp
->xdf_cmbl_reattach
= B_TRUE
;
3481 vdp
->xdf_dinfo
|= VDISK_CDROM
;
3482 vdp
->xdf_mstate
= DKIO_EJECTED
;
3484 vdp
->xdf_mstate
= DKIO_NONE
;
3487 if ((vdp
->xdf_ready_tq
= ddi_taskq_create(dip
, "xdf_ready_tq",
3488 1, TASKQ_DEFAULTPRI
, 0)) == NULL
)
3491 if (xvdi_add_xb_watch_handler(dip
, oename
, XBP_HP_STATUS
,
3492 xdf_watch_hp_status_cb
, NULL
) != DDI_SUCCESS
)
3495 if (ddi_add_softintr(dip
, DDI_SOFTINT_LOW
, &vdp
->xdf_softintr_id
,
3496 &softibc
, NULL
, xdf_iorestart
, (caddr_t
)vdp
) != DDI_SUCCESS
) {
3497 cmn_err(CE_WARN
, "xdf@%s: failed to add softintr",
3498 ddi_get_name_addr(dip
));
3503 * Initialize the physical geometry stucture. Note that currently
3504 * we don't know the size of the backend device so the number
3505 * of blocks on the device will be initialized to zero. Once
3506 * we connect to the backend device we'll update the physical
3507 * geometry to reflect the real size of the device.
3509 xdf_synthetic_pgeom(dip
, &vdp
->xdf_pgeom
);
3510 vdp
->xdf_pgeom_fixed
= B_FALSE
;
3513 * Create default device minor nodes: non-removable disk.
3514 * We will adjust minor nodes after we are connected w/ backend.
3516 * FIXME creating device minor nodes is currently disabled for CD
3517 * devices, re-enable once the issues with xdf CD devices are fixed.
3520 cmlb_alloc_handle(&vdp
->xdf_vd_lbl
);
3521 if (xdf_cmlb_attach(vdp
) != 0) {
3523 "xdf@%s: attach failed, cmlb attach failed",
3524 ddi_get_name_addr(dip
));
3529 /* We ship with cache-enabled disks */
3530 vdp
->xdf_wce
= B_TRUE
;
3532 mutex_enter(&vdp
->xdf_cb_lk
);
3533 /* Watch backend XenbusState change */
3534 if (xvdi_add_event_handler(dip
,
3535 XS_OE_STATE
, xdf_oe_change
, NULL
) != DDI_SUCCESS
) {
3536 mutex_exit(&vdp
->xdf_cb_lk
);
3540 if (xdf_setstate_init(vdp
) != DDI_SUCCESS
) {
3541 cmn_err(CE_WARN
, "xdf@%s: start connection failed",
3542 ddi_get_name_addr(dip
));
3543 mutex_exit(&vdp
->xdf_cb_lk
);
3547 /* Nothing else to do for CD devices */
3549 mutex_exit(&vdp
->xdf_cb_lk
);
3554 * In order to do cmlb_validate, we have to wait for the disk to
3555 * acknowledge the attach, so we can query the backend for the disk
3556 * geometry (see xdf_setstate_connected).
3558 * We only wait 30 seconds; if this is the root disk, the boot
3559 * will fail, but it would fail anyway if the device never
3560 * connected. If this is a non-boot disk, that disk will fail
3561 * to connect, but again, it would fail anyway.
3563 timeout
= ddi_get_lbolt() + drv_usectohz(XDF_STATE_TIMEOUT
);
3564 while (vdp
->xdf_state
!= XD_CONNECTED
&& vdp
->xdf_state
!= XD_READY
) {
3565 if (cv_timedwait(&vdp
->xdf_dev_cv
, &vdp
->xdf_cb_lk
,
3567 cmn_err(CE_WARN
, "xdf@%s: disk failed to connect",
3568 ddi_get_name_addr(dip
));
3569 mutex_exit(&vdp
->xdf_cb_lk
);
3573 mutex_exit(&vdp
->xdf_cb_lk
);
3576 * We call cmlb_validate so that the geometry information in
3577 * vdp->xdf_vd_lbl is correct; this fills out the number of
3578 * alternate cylinders so that we have a place to write the
3581 if ((err
= cmlb_validate(vdp
->xdf_vd_lbl
, 0, NULL
)) != 0) {
3583 "xdf@%s: cmlb_validate failed: %d",
3584 ddi_get_name_addr(dip
), err
);
3586 * We can carry on even if cmlb_validate() returns EINVAL here,
3587 * as we'll rewrite the disk label anyway.
3594 * xdf_devid_setup will only write a devid if one isn't
3595 * already present. If it fails to find or create one, we
3596 * create one in-memory so that when we label the disk later,
3597 * it will have a devid to use. This is helpful to deal with
3598 * cases where people use the devids of their disks before
3599 * labelling them; note that this does cause problems if
3600 * people rely on the devids of unlabelled disks to persist
3603 xdf_devid_setup(vdp
);
3604 if (vdp
->xdf_tgt_devid
== NULL
) {
3605 if (ddi_devid_init(vdp
->xdf_dip
, DEVID_FAB
, 0, NULL
,
3606 &vdp
->xdf_tgt_devid
) != DDI_SUCCESS
) {
3608 "xdf@%s_ attach failed, devid_init failed",
3609 ddi_get_name_addr(dip
));
3612 (void) ddi_devid_register(vdp
->xdf_dip
,
3613 vdp
->xdf_tgt_devid
);
3618 #ifdef XPV_HVM_DRIVER
3621 /* Report our version to dom0. */
3622 if (xenbus_printf(XBT_NULL
, "guest/xdf", "version", "%d",
3624 cmn_err(CE_WARN
, "xdf: couldn't write version\n");
3626 #endif /* XPV_HVM_DRIVER */
3628 /* Create kstat for iostat(1M) */
3629 if (xdf_kstat_create(dip
) != 0) {
3630 cmn_err(CE_WARN
, "xdf@%s: failed to create kstat",
3631 ddi_get_name_addr(dip
));
3636 * Don't bother with getting real device identification
3637 * strings (is it even possible?), they are unlikely to
3638 * change often (if at all).
3640 (void) ndi_prop_update_string(DDI_DEV_T_NONE
, dip
, INQUIRY_VENDOR_ID
,
3642 (void) ndi_prop_update_string(DDI_DEV_T_NONE
, dip
, INQUIRY_PRODUCT_ID
,
3643 dev_iscd
? "Virtual CD" : "Virtual disk");
3644 (void) ndi_prop_update_string(DDI_DEV_T_NONE
, dip
, INQUIRY_REVISION_ID
,
3647 ddi_report_dev(dip
);
3648 DPRINTF(DDI_DBG
, ("xdf@%s: attached\n", vdp
->xdf_addr
));
3649 return (DDI_SUCCESS
);
3652 (void) xvdi_switch_state(vdp
->xdf_dip
, XBT_NULL
, XenbusStateClosed
);
3653 xvdi_remove_event_handler(dip
, XS_OE_STATE
);
3655 if (vdp
->xdf_vd_lbl
!= NULL
) {
3656 cmlb_detach(vdp
->xdf_vd_lbl
, NULL
);
3657 cmlb_free_handle(&vdp
->xdf_vd_lbl
);
3658 vdp
->xdf_vd_lbl
= NULL
;
3660 if (vdp
->xdf_softintr_id
!= NULL
)
3661 ddi_remove_softintr(vdp
->xdf_softintr_id
);
3662 xvdi_remove_xb_watch_handlers(dip
);
3663 if (vdp
->xdf_ready_tq
!= NULL
)
3664 ddi_taskq_destroy(vdp
->xdf_ready_tq
);
3665 mutex_destroy(&vdp
->xdf_cb_lk
);
3666 mutex_destroy(&vdp
->xdf_dev_lk
);
3667 cv_destroy(&vdp
->xdf_dev_cv
);
3668 cv_destroy(&vdp
->xdf_hp_status_cv
);
3669 ddi_soft_state_free(xdf_ssp
, instance
);
3670 ddi_set_driver_private(dip
, NULL
);
3671 ddi_prop_remove_all(dip
);
3672 cmn_err(CE_WARN
, "xdf@%s: attach failed", ddi_get_name_addr(dip
));
3673 return (DDI_FAILURE
);
3677 xdf_suspend(dev_info_t
*dip
)
3679 int instance
= ddi_get_instance(dip
);
3682 if ((vdp
= ddi_get_soft_state(xdf_ssp
, instance
)) == NULL
)
3683 return (DDI_FAILURE
);
3685 if (xdf_debug
& SUSRES_DBG
)
3686 xen_printf("xdf@%s: xdf_suspend\n", vdp
->xdf_addr
);
3690 mutex_enter(&vdp
->xdf_cb_lk
);
3691 mutex_enter(&vdp
->xdf_dev_lk
);
3693 vdp
->xdf_suspending
= B_TRUE
;
3694 xdf_ring_destroy(vdp
);
3695 xdf_set_state(vdp
, XD_SUSPEND
);
3696 vdp
->xdf_suspending
= B_FALSE
;
3698 mutex_exit(&vdp
->xdf_dev_lk
);
3699 mutex_exit(&vdp
->xdf_cb_lk
);
3701 if (xdf_debug
& SUSRES_DBG
)
3702 xen_printf("xdf@%s: xdf_suspend: done\n", vdp
->xdf_addr
);
3704 return (DDI_SUCCESS
);
3708 xdf_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
3715 case DDI_PM_SUSPEND
:
3719 return (xdf_suspend(dip
));
3725 return (DDI_FAILURE
);
3728 instance
= ddi_get_instance(dip
);
3729 DPRINTF(DDI_DBG
, ("xdf@%s: detaching\n", ddi_get_name_addr(dip
)));
3730 vdp
= ddi_get_soft_state(xdf_ssp
, instance
);
3733 return (DDI_FAILURE
);
3735 mutex_enter(&vdp
->xdf_cb_lk
);
3736 xdf_disconnect(vdp
, XD_CLOSED
, B_FALSE
);
3737 if (vdp
->xdf_state
!= XD_CLOSED
) {
3738 mutex_exit(&vdp
->xdf_cb_lk
);
3739 return (DDI_FAILURE
);
3741 mutex_exit(&vdp
->xdf_cb_lk
);
3743 ASSERT(!ISDMACBON(vdp
));
3745 #ifdef XPV_HVM_DRIVER
3747 #endif /* XPV_HVM_DRIVER */
3749 if (vdp
->xdf_timeout_id
!= 0)
3750 (void) untimeout(vdp
->xdf_timeout_id
);
3752 xvdi_remove_event_handler(dip
, XS_OE_STATE
);
3753 ddi_taskq_destroy(vdp
->xdf_ready_tq
);
3755 cmlb_detach(vdp
->xdf_vd_lbl
, NULL
);
3756 cmlb_free_handle(&vdp
->xdf_vd_lbl
);
3758 /* we'll support backend running in domU later */
3760 (void) xvdi_post_event(dip
, XEN_HP_REMOVE
);
3763 list_destroy(&vdp
->xdf_vreq_act
);
3764 ddi_prop_remove_all(dip
);
3765 xdf_kstat_delete(dip
);
3766 ddi_remove_softintr(vdp
->xdf_softintr_id
);
3767 xvdi_remove_xb_watch_handlers(dip
);
3768 ddi_set_driver_private(dip
, NULL
);
3769 cv_destroy(&vdp
->xdf_dev_cv
);
3770 mutex_destroy(&vdp
->xdf_cb_lk
);
3771 mutex_destroy(&vdp
->xdf_dev_lk
);
3772 if (vdp
->xdf_cache_flush_block
!= NULL
)
3773 kmem_free(vdp
->xdf_flush_mem
, 2 * vdp
->xdf_xdev_secsize
);
3774 ddi_soft_state_free(xdf_ssp
, instance
);
3775 return (DDI_SUCCESS
);
3779 * Driver linkage structures.
3781 static struct cb_ops xdf_cbops
= {
3796 D_MP
| D_NEW
| D_64BIT
,
3802 struct dev_ops xdf_devops
= {
3803 DEVO_REV
, /* devo_rev */
3804 0, /* devo_refcnt */
3805 xdf_getinfo
, /* devo_getinfo */
3806 nulldev
, /* devo_identify */
3807 nulldev
, /* devo_probe */
3808 xdf_attach
, /* devo_attach */
3809 xdf_detach
, /* devo_detach */
3810 nodev
, /* devo_reset */
3811 &xdf_cbops
, /* devo_cb_ops */
3812 NULL
, /* devo_bus_ops */
3813 NULL
, /* devo_power */
3814 ddi_quiesce_not_supported
, /* devo_quiesce */
3818 * Module linkage structures.
3820 static struct modldrv modldrv
= {
3821 &mod_driverops
, /* Type of module. This one is a driver */
3822 "virtual block driver", /* short description */
3823 &xdf_devops
/* driver specific ops */
3826 static struct modlinkage xdf_modlinkage
= {
3827 MODREV_1
, (void *)&modldrv
, NULL
3831 * standard module entry points
3838 xdf_major
= ddi_name_to_major("xdf");
3839 if (xdf_major
== (major_t
)-1)
3842 if ((rc
= ddi_soft_state_init(&xdf_ssp
, sizeof (xdf_t
), 0)) != 0)
3845 xdf_vreq_cache
= kmem_cache_create("xdf_vreq_cache",
3846 sizeof (v_req_t
), 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
3847 xdf_gs_cache
= kmem_cache_create("xdf_gs_cache",
3848 sizeof (ge_slot_t
), 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
3850 #ifdef XPV_HVM_DRIVER
3852 #endif /* XPV_HVM_DRIVER */
3854 if ((rc
= mod_install(&xdf_modlinkage
)) != 0) {
3855 #ifdef XPV_HVM_DRIVER
3857 #endif /* XPV_HVM_DRIVER */
3858 kmem_cache_destroy(xdf_vreq_cache
);
3859 kmem_cache_destroy(xdf_gs_cache
);
3860 ddi_soft_state_fini(&xdf_ssp
);
3871 if ((err
= mod_remove(&xdf_modlinkage
)) != 0)
3874 #ifdef XPV_HVM_DRIVER
3876 #endif /* XPV_HVM_DRIVER */
3878 kmem_cache_destroy(xdf_vreq_cache
);
3879 kmem_cache_destroy(xdf_gs_cache
);
3880 ddi_soft_state_fini(&xdf_ssp
);
3886 _info(struct modinfo
*modinfop
)
3888 return (mod_info(&xdf_modlinkage
, modinfop
));