9024 rework PV-HVM disk device handling
[unleashed.git] / usr / src / uts / common / xen / io / xdf.c
blob7dfffaed416c88ab103713b6418e3582370c9aa5
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
29 * Copyright 2017 Nexenta Systems, Inc.
33 * xdf.c - Xen Virtual Block Device Driver
34 * TODO:
35 * - support alternate block size (currently only DEV_BSIZE supported)
36 * - revalidate geometry for removable devices
38 * This driver exports disk device nodes, accepts IO requests from those
39 * nodes, and services those requests by talking to a backend device
40 * in another domain.
42 * Communication with the backend device is done via a ringbuffer (which is
43 * managed via xvdi interfaces) and dma memory (which is managed via ddi
44 * interfaces).
46 * Communication with the backend device is dependant upon establishing a
47 * connection to the backend device. This connection process involves
48 * reading device configuration information from xenbus and publishing
49 * some frontend runtime configuration parameters via the xenbus (for
50 * consumption by the backend). Once we've published runtime configuration
51 * information via the xenbus, the backend device can enter the connected
52 * state and we'll enter the XD_CONNECTED state. But before we can allow
53 * random IO to begin, we need to do IO to the backend device to determine
54 * the device label and if flush operations are supported. Once this is
55 * done we enter the XD_READY state and can process any IO operations.
57 * We receive notifications of xenbus state changes for the backend device
58 * (aka, the "other end") via the xdf_oe_change() callback. This callback
59 * is single threaded, meaning that we can't receive new notification of
60 * other end state changes while we're processing an outstanding
61 * notification of an other end state change. There for we can't do any
62 * blocking operations from the xdf_oe_change() callback. This is why we
63 * have a seperate taskq (xdf_ready_tq) which exists to do the necessary
64 * IO to get us from the XD_CONNECTED to the XD_READY state. All IO
65 * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go
66 * throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs
67 * generated by the xdf_ready_tq_thread thread have priority over all
68 * other IO requests.
70 * We also communicate with the backend device via the xenbus "media-req"
71 * (XBP_MEDIA_REQ) property. For more information on this see the
72 * comments in blkif.h.
75 #include <io/xdf.h>
77 #include <sys/conf.h>
78 #include <sys/dkio.h>
79 #include <sys/promif.h>
80 #include <sys/sysmacros.h>
81 #include <sys/kstat.h>
82 #include <sys/mach_mmu.h>
83 #ifdef XPV_HVM_DRIVER
84 #include <sys/xpv_support.h>
85 #else /* !XPV_HVM_DRIVER */
86 #include <sys/evtchn_impl.h>
87 #endif /* !XPV_HVM_DRIVER */
88 #include <sys/sunndi.h>
89 #include <public/io/xenbus.h>
90 #include <xen/sys/xenbus_impl.h>
91 #include <sys/scsi/generic/inquiry.h>
92 #include <xen/io/blkif_impl.h>
93 #include <sys/fdio.h>
94 #include <sys/cdio.h>
97 * DEBUG_EVAL can be used to include debug only statements without
98 * having to use '#ifdef DEBUG' statements
100 #ifdef DEBUG
101 #define DEBUG_EVAL(x) (x)
102 #else /* !DEBUG */
103 #define DEBUG_EVAL(x)
104 #endif /* !DEBUG */
106 #define XDF_DRAIN_MSEC_DELAY (50*1000) /* 00.05 sec */
107 #define XDF_DRAIN_RETRY_COUNT 200 /* 10.00 sec */
108 #define XDF_STATE_TIMEOUT (30*1000*1000) /* 30.00 sec */
110 #define INVALID_DOMID ((domid_t)-1)
111 #define FLUSH_DISKCACHE 0x1
112 #define WRITE_BARRIER 0x2
113 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */
114 #define USE_WRITE_BARRIER(vdp) \
115 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported)
116 #define USE_FLUSH_DISKCACHE(vdp) \
117 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported)
118 #define IS_WRITE_BARRIER(vdp, bp) \
119 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \
120 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block))
121 #define IS_FLUSH_DISKCACHE(bp) \
122 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0))
124 #define VREQ_DONE(vreq) \
125 VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) && \
126 (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) || \
127 (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws)))
129 #define BP_VREQ(bp) ((v_req_t *)((bp)->av_back))
130 #define BP_VREQ_SET(bp, vreq) (((bp)->av_back = (buf_t *)(vreq)))
132 extern int do_polled_io;
134 /* run-time tunables that we don't want the compiler to optimize away */
135 volatile int xdf_debug = 0;
136 volatile boolean_t xdf_barrier_flush_disable = B_FALSE;
138 /* per module globals */
139 major_t xdf_major;
140 static void *xdf_ssp;
141 static kmem_cache_t *xdf_vreq_cache;
142 static kmem_cache_t *xdf_gs_cache;
143 static int xdf_maxphys = XB_MAXPHYS;
144 static diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK;
145 static int xdf_fbrewrites; /* flush block re-write count */
147 /* misc public functions */
148 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *);
149 int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
151 /* misc private functions */
152 static void xdf_io_start(xdf_t *);
153 static void xdf_devid_setup(xdf_t *);
155 /* callbacks from commmon label */
156 static cmlb_tg_ops_t xdf_lb_ops = {
157 TG_DK_OPS_VERSION_1,
158 xdf_lb_rdwr,
159 xdf_lb_getinfo
163 * I/O buffer DMA attributes
164 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most
166 static ddi_dma_attr_t xb_dma_attr = {
167 DMA_ATTR_V0,
168 (uint64_t)0, /* lowest address */
169 (uint64_t)0xffffffffffffffff, /* highest usable address */
170 (uint64_t)0xffffff, /* DMA counter limit max */
171 (uint64_t)XB_BSIZE, /* alignment in bytes */
172 XB_BSIZE - 1, /* bitmap of burst sizes */
173 XB_BSIZE, /* min transfer */
174 (uint64_t)XB_MAX_XFER, /* maximum transfer */
175 (uint64_t)PAGEOFFSET, /* 1 page segment length */
176 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */
177 XB_BSIZE, /* granularity */
178 0, /* flags (reserved) */
181 static ddi_device_acc_attr_t xc_acc_attr = {
182 DDI_DEVICE_ATTR_V0,
183 DDI_NEVERSWAP_ACC,
184 DDI_STRICTORDER_ACC
187 static void
188 xdf_timeout_handler(void *arg)
190 xdf_t *vdp = arg;
192 mutex_enter(&vdp->xdf_dev_lk);
193 vdp->xdf_timeout_id = 0;
194 mutex_exit(&vdp->xdf_dev_lk);
196 /* new timeout thread could be re-scheduled */
197 xdf_io_start(vdp);
201 * callback func when DMA/GTE resources is available
203 * Note: we only register one callback function to grant table subsystem
204 * since we only have one 'struct gnttab_free_callback' in xdf_t.
206 static int
207 xdf_dmacallback(caddr_t arg)
209 xdf_t *vdp = (xdf_t *)arg;
210 ASSERT(vdp != NULL);
212 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n",
213 vdp->xdf_addr));
215 ddi_trigger_softintr(vdp->xdf_softintr_id);
216 return (DDI_DMA_CALLBACK_DONE);
219 static ge_slot_t *
220 gs_get(xdf_t *vdp, int isread)
222 grant_ref_t gh;
223 ge_slot_t *gs;
225 /* try to alloc GTEs needed in this slot, first */
226 if (gnttab_alloc_grant_references(
227 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) {
228 if (vdp->xdf_gnt_callback.next == NULL) {
229 SETDMACBON(vdp);
230 gnttab_request_free_callback(
231 &vdp->xdf_gnt_callback,
232 (void (*)(void *))xdf_dmacallback,
233 (void *)vdp,
234 BLKIF_MAX_SEGMENTS_PER_REQUEST);
236 return (NULL);
239 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP);
240 if (gs == NULL) {
241 gnttab_free_grant_references(gh);
242 if (vdp->xdf_timeout_id == 0)
243 /* restart I/O after one second */
244 vdp->xdf_timeout_id =
245 timeout(xdf_timeout_handler, vdp, hz);
246 return (NULL);
249 /* init gs_slot */
250 gs->gs_oeid = vdp->xdf_peer;
251 gs->gs_isread = isread;
252 gs->gs_ghead = gh;
253 gs->gs_ngrefs = 0;
255 return (gs);
258 static void
259 gs_free(ge_slot_t *gs)
261 int i;
263 /* release all grant table entry resources used in this slot */
264 for (i = 0; i < gs->gs_ngrefs; i++)
265 gnttab_end_foreign_access(gs->gs_ge[i], !gs->gs_isread, 0);
266 gnttab_free_grant_references(gs->gs_ghead);
267 list_remove(&gs->gs_vreq->v_gs, gs);
268 kmem_cache_free(xdf_gs_cache, gs);
271 static grant_ref_t
272 gs_grant(ge_slot_t *gs, mfn_t mfn)
274 grant_ref_t gr = gnttab_claim_grant_reference(&gs->gs_ghead);
276 ASSERT(gr != -1);
277 ASSERT(gs->gs_ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST);
278 gs->gs_ge[gs->gs_ngrefs++] = gr;
279 gnttab_grant_foreign_access_ref(gr, gs->gs_oeid, mfn, !gs->gs_isread);
281 return (gr);
285 * Alloc a vreq for this bp
286 * bp->av_back contains the pointer to the vreq upon return
288 static v_req_t *
289 vreq_get(xdf_t *vdp, buf_t *bp)
291 v_req_t *vreq = NULL;
293 ASSERT(BP_VREQ(bp) == NULL);
295 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP);
296 if (vreq == NULL) {
297 if (vdp->xdf_timeout_id == 0)
298 /* restart I/O after one second */
299 vdp->xdf_timeout_id =
300 timeout(xdf_timeout_handler, vdp, hz);
301 return (NULL);
303 bzero(vreq, sizeof (v_req_t));
304 list_create(&vreq->v_gs, sizeof (ge_slot_t),
305 offsetof(ge_slot_t, gs_vreq_link));
306 vreq->v_buf = bp;
307 vreq->v_status = VREQ_INIT;
308 vreq->v_runq = B_FALSE;
309 BP_VREQ_SET(bp, vreq);
310 /* init of other fields in vreq is up to the caller */
312 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq);
314 return (vreq);
317 static void
318 vreq_free(xdf_t *vdp, v_req_t *vreq)
320 buf_t *bp = vreq->v_buf;
322 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
323 ASSERT(BP_VREQ(bp) == vreq);
325 list_remove(&vdp->xdf_vreq_act, vreq);
327 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE)
328 goto done;
330 switch (vreq->v_status) {
331 case VREQ_DMAWIN_DONE:
332 case VREQ_GS_ALLOCED:
333 case VREQ_DMABUF_BOUND:
334 (void) ddi_dma_unbind_handle(vreq->v_dmahdl);
335 /*FALLTHRU*/
336 case VREQ_DMAMEM_ALLOCED:
337 if (!ALIGNED_XFER(bp)) {
338 ASSERT(vreq->v_abuf != NULL);
339 if (!IS_ERROR(bp) && IS_READ(bp))
340 bcopy(vreq->v_abuf, bp->b_un.b_addr,
341 bp->b_bcount);
342 ddi_dma_mem_free(&vreq->v_align);
344 /*FALLTHRU*/
345 case VREQ_MEMDMAHDL_ALLOCED:
346 if (!ALIGNED_XFER(bp))
347 ddi_dma_free_handle(&vreq->v_memdmahdl);
348 /*FALLTHRU*/
349 case VREQ_DMAHDL_ALLOCED:
350 ddi_dma_free_handle(&vreq->v_dmahdl);
351 break;
352 default:
353 break;
355 done:
356 ASSERT(!vreq->v_runq);
357 list_destroy(&vreq->v_gs);
358 kmem_cache_free(xdf_vreq_cache, vreq);
362 * Snarf new data if our flush block was re-written
364 static void
365 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno)
367 int nblks;
368 boolean_t mapin;
370 if (IS_WRITE_BARRIER(vdp, bp))
371 return; /* write was a flush write */
373 mapin = B_FALSE;
374 nblks = bp->b_bcount >> DEV_BSHIFT;
375 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) {
376 xdf_fbrewrites++;
377 if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
378 mapin = B_TRUE;
379 bp_mapin(bp);
381 bcopy(bp->b_un.b_addr +
382 ((xdf_flush_block - blkno) << DEV_BSHIFT),
383 vdp->xdf_cache_flush_block, DEV_BSIZE);
384 if (mapin)
385 bp_mapout(bp);
390 * Initalize the DMA and grant table resources for the buf
392 static int
393 vreq_setup(xdf_t *vdp, v_req_t *vreq)
395 int rc;
396 ddi_dma_attr_t dmaattr;
397 uint_t ndcs, ndws;
398 ddi_dma_handle_t dh;
399 ddi_dma_handle_t mdh;
400 ddi_dma_cookie_t dc;
401 ddi_acc_handle_t abh;
402 caddr_t aba;
403 ge_slot_t *gs;
404 size_t bufsz;
405 off_t off;
406 size_t sz;
407 buf_t *bp = vreq->v_buf;
408 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) |
409 DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
411 switch (vreq->v_status) {
412 case VREQ_INIT:
413 if (IS_FLUSH_DISKCACHE(bp)) {
414 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
415 DPRINTF(DMA_DBG, ("xdf@%s: "
416 "get ge_slotfailed\n", vdp->xdf_addr));
417 return (DDI_FAILURE);
419 vreq->v_blkno = 0;
420 vreq->v_nslots = 1;
421 vreq->v_flush_diskcache = FLUSH_DISKCACHE;
422 vreq->v_status = VREQ_GS_ALLOCED;
423 gs->gs_vreq = vreq;
424 list_insert_head(&vreq->v_gs, gs);
425 return (DDI_SUCCESS);
428 if (IS_WRITE_BARRIER(vdp, bp))
429 vreq->v_flush_diskcache = WRITE_BARRIER;
430 vreq->v_blkno = bp->b_blkno +
431 (diskaddr_t)(uintptr_t)bp->b_private;
432 /* See if we wrote new data to our flush block */
433 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp))
434 check_fbwrite(vdp, bp, vreq->v_blkno);
435 vreq->v_status = VREQ_INIT_DONE;
436 /*FALLTHRU*/
438 case VREQ_INIT_DONE:
440 * alloc DMA handle
442 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr,
443 xdf_dmacallback, (caddr_t)vdp, &dh);
444 if (rc != DDI_SUCCESS) {
445 SETDMACBON(vdp);
446 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n",
447 vdp->xdf_addr));
448 return (DDI_FAILURE);
451 vreq->v_dmahdl = dh;
452 vreq->v_status = VREQ_DMAHDL_ALLOCED;
453 /*FALLTHRU*/
455 case VREQ_DMAHDL_ALLOCED:
457 * alloc dma handle for 512-byte aligned buf
459 if (!ALIGNED_XFER(bp)) {
461 * XXPV: we need to temporarily enlarge the seg
462 * boundary and s/g length to work round CR6381968
464 dmaattr = xb_dma_attr;
465 dmaattr.dma_attr_seg = (uint64_t)-1;
466 dmaattr.dma_attr_sgllen = INT_MAX;
467 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr,
468 xdf_dmacallback, (caddr_t)vdp, &mdh);
469 if (rc != DDI_SUCCESS) {
470 SETDMACBON(vdp);
471 DPRINTF(DMA_DBG, ("xdf@%s: "
472 "unaligned buf DMAhandle alloc failed\n",
473 vdp->xdf_addr));
474 return (DDI_FAILURE);
476 vreq->v_memdmahdl = mdh;
477 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED;
479 /*FALLTHRU*/
481 case VREQ_MEMDMAHDL_ALLOCED:
483 * alloc 512-byte aligned buf
485 if (!ALIGNED_XFER(bp)) {
486 if (bp->b_flags & (B_PAGEIO | B_PHYS))
487 bp_mapin(bp);
488 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl,
489 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr,
490 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp,
491 &aba, &bufsz, &abh);
492 if (rc != DDI_SUCCESS) {
493 SETDMACBON(vdp);
494 DPRINTF(DMA_DBG, ("xdf@%s: "
495 "DMA mem allocation failed\n",
496 vdp->xdf_addr));
497 return (DDI_FAILURE);
500 vreq->v_abuf = aba;
501 vreq->v_align = abh;
502 vreq->v_status = VREQ_DMAMEM_ALLOCED;
504 ASSERT(bufsz >= bp->b_bcount);
505 if (!IS_READ(bp))
506 bcopy(bp->b_un.b_addr, vreq->v_abuf,
507 bp->b_bcount);
509 /*FALLTHRU*/
511 case VREQ_DMAMEM_ALLOCED:
513 * dma bind
515 if (ALIGNED_XFER(bp)) {
516 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp,
517 dma_flags, xdf_dmacallback, (caddr_t)vdp,
518 &dc, &ndcs);
519 } else {
520 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl,
521 NULL, vreq->v_abuf, bp->b_bcount, dma_flags,
522 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs);
524 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) {
525 /* get num of dma windows */
526 if (rc == DDI_DMA_PARTIAL_MAP) {
527 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws);
528 ASSERT(rc == DDI_SUCCESS);
529 } else {
530 ndws = 1;
532 } else {
533 SETDMACBON(vdp);
534 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n",
535 vdp->xdf_addr));
536 return (DDI_FAILURE);
539 vreq->v_dmac = dc;
540 vreq->v_dmaw = 0;
541 vreq->v_ndmacs = ndcs;
542 vreq->v_ndmaws = ndws;
543 vreq->v_nslots = ndws;
544 vreq->v_status = VREQ_DMABUF_BOUND;
545 /*FALLTHRU*/
547 case VREQ_DMABUF_BOUND:
549 * get ge_slot, callback is set upon failure from gs_get(),
550 * if not set previously
552 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
553 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n",
554 vdp->xdf_addr));
555 return (DDI_FAILURE);
558 vreq->v_status = VREQ_GS_ALLOCED;
559 gs->gs_vreq = vreq;
560 list_insert_head(&vreq->v_gs, gs);
561 break;
563 case VREQ_GS_ALLOCED:
564 /* nothing need to be done */
565 break;
567 case VREQ_DMAWIN_DONE:
569 * move to the next dma window
571 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws);
573 /* get a ge_slot for this DMA window */
574 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
575 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n",
576 vdp->xdf_addr));
577 return (DDI_FAILURE);
580 vreq->v_dmaw++;
581 VERIFY(ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz,
582 &vreq->v_dmac, &vreq->v_ndmacs) == DDI_SUCCESS);
583 vreq->v_status = VREQ_GS_ALLOCED;
584 gs->gs_vreq = vreq;
585 list_insert_head(&vreq->v_gs, gs);
586 break;
588 default:
589 return (DDI_FAILURE);
592 return (DDI_SUCCESS);
595 static int
596 xdf_cmlb_attach(xdf_t *vdp)
598 dev_info_t *dip = vdp->xdf_dip;
600 return (cmlb_attach(dip, &xdf_lb_ops,
601 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT,
602 XD_IS_RM(vdp),
603 B_TRUE,
604 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD,
605 #ifdef XPV_HVM_DRIVER
606 (XD_IS_CD(vdp) ? 0 : CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT),
607 #else /* XPV_HVM_DRIVER */
609 #endif /* XPV_HVM_DRIVER */
610 vdp->xdf_vd_lbl, NULL));
613 static void
614 xdf_io_err(buf_t *bp, int err, size_t resid)
616 bioerror(bp, err);
617 if (resid == 0)
618 bp->b_resid = bp->b_bcount;
619 biodone(bp);
622 static void
623 xdf_kstat_enter(xdf_t *vdp, buf_t *bp)
625 v_req_t *vreq = BP_VREQ(bp);
627 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
629 if (vdp->xdf_xdev_iostat == NULL)
630 return;
631 if ((vreq != NULL) && vreq->v_runq) {
632 kstat_runq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
633 } else {
634 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
638 static void
639 xdf_kstat_exit(xdf_t *vdp, buf_t *bp)
641 v_req_t *vreq = BP_VREQ(bp);
643 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
645 if (vdp->xdf_xdev_iostat == NULL)
646 return;
648 if ((vreq != NULL) && vreq->v_runq) {
649 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
650 } else {
651 kstat_waitq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
654 if (bp->b_flags & B_READ) {
655 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->reads++;
656 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->nread += bp->b_bcount;
657 } else if (bp->b_flags & B_WRITE) {
658 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->writes++;
659 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->nwritten += bp->b_bcount;
663 static void
664 xdf_kstat_waitq_to_runq(xdf_t *vdp, buf_t *bp)
666 v_req_t *vreq = BP_VREQ(bp);
668 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
669 ASSERT(!vreq->v_runq);
671 vreq->v_runq = B_TRUE;
672 if (vdp->xdf_xdev_iostat == NULL)
673 return;
674 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
677 static void
678 xdf_kstat_runq_to_waitq(xdf_t *vdp, buf_t *bp)
680 v_req_t *vreq = BP_VREQ(bp);
682 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
683 ASSERT(vreq->v_runq);
685 vreq->v_runq = B_FALSE;
686 if (vdp->xdf_xdev_iostat == NULL)
687 return;
688 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
692 xdf_kstat_create(dev_info_t *dip)
694 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
695 kstat_t *kstat;
696 buf_t *bp;
698 if ((kstat = kstat_create("xdf", ddi_get_instance(dip), NULL, "disk",
699 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL)
700 return (-1);
702 /* See comment about locking in xdf_kstat_delete(). */
703 mutex_enter(&vdp->xdf_iostat_lk);
704 mutex_enter(&vdp->xdf_dev_lk);
706 /* only one kstat can exist at a time */
707 if (vdp->xdf_xdev_iostat != NULL) {
708 mutex_exit(&vdp->xdf_dev_lk);
709 mutex_exit(&vdp->xdf_iostat_lk);
710 kstat_delete(kstat);
711 return (-1);
714 vdp->xdf_xdev_iostat = kstat;
715 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk;
716 kstat_install(vdp->xdf_xdev_iostat);
719 * Now that we've created a kstat, we need to update the waitq and
720 * runq counts for the kstat to reflect our current state.
722 * For a buf_t structure to be on the runq, it must have a ring
723 * buffer slot associated with it. To get a ring buffer slot the
724 * buf must first have a v_req_t and a ge_slot_t associated with it.
725 * Then when it is granted a ring buffer slot, v_runq will be set to
726 * true.
728 * For a buf_t structure to be on the waitq, it must not be on the
729 * runq. So to find all the buf_t's that should be on waitq, we
730 * walk the active buf list and add any buf_t's which aren't on the
731 * runq to the waitq.
733 bp = vdp->xdf_f_act;
734 while (bp != NULL) {
735 xdf_kstat_enter(vdp, bp);
736 bp = bp->av_forw;
738 if (vdp->xdf_ready_tq_bp != NULL)
739 xdf_kstat_enter(vdp, vdp->xdf_ready_tq_bp);
741 mutex_exit(&vdp->xdf_dev_lk);
742 mutex_exit(&vdp->xdf_iostat_lk);
743 return (0);
746 void
747 xdf_kstat_delete(dev_info_t *dip)
749 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
750 kstat_t *kstat;
751 buf_t *bp;
754 * The locking order here is xdf_iostat_lk and then xdf_dev_lk.
755 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer
756 * and the contents of the our kstat. xdf_iostat_lk is used
757 * to protect the allocation and freeing of the actual kstat.
758 * xdf_dev_lk can't be used for this purpose because kstat
759 * readers use it to access the contents of the kstat and
760 * hence it can't be held when calling kstat_delete().
762 mutex_enter(&vdp->xdf_iostat_lk);
763 mutex_enter(&vdp->xdf_dev_lk);
765 if (vdp->xdf_xdev_iostat == NULL) {
766 mutex_exit(&vdp->xdf_dev_lk);
767 mutex_exit(&vdp->xdf_iostat_lk);
768 return;
772 * We're about to destroy the kstat structures, so it isn't really
773 * necessary to update the runq and waitq counts. But, since this
774 * isn't a hot code path we can afford to be a little pedantic and
775 * go ahead and decrement the runq and waitq kstat counters to zero
776 * before free'ing them. This helps us ensure that we've gotten all
777 * our accounting correct.
779 * For an explanation of how we determine which buffers go on the
780 * runq vs which go on the waitq, see the comments in
781 * xdf_kstat_create().
783 bp = vdp->xdf_f_act;
784 while (bp != NULL) {
785 xdf_kstat_exit(vdp, bp);
786 bp = bp->av_forw;
788 if (vdp->xdf_ready_tq_bp != NULL)
789 xdf_kstat_exit(vdp, vdp->xdf_ready_tq_bp);
791 kstat = vdp->xdf_xdev_iostat;
792 vdp->xdf_xdev_iostat = NULL;
793 mutex_exit(&vdp->xdf_dev_lk);
794 kstat_delete(kstat);
795 mutex_exit(&vdp->xdf_iostat_lk);
799 * Add an IO requests onto the active queue.
801 * We have to detect IOs generated by xdf_ready_tq_thread. These IOs
802 * are used to establish a connection to the backend, so they receive
803 * priority over all other IOs. Since xdf_ready_tq_thread only does
804 * synchronous IO, there can only be one xdf_ready_tq_thread request at any
805 * given time and we record the buf associated with that request in
806 * xdf_ready_tq_bp.
808 static void
809 xdf_bp_push(xdf_t *vdp, buf_t *bp)
811 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
812 ASSERT(bp->av_forw == NULL);
814 xdf_kstat_enter(vdp, bp);
816 if (curthread == vdp->xdf_ready_tq_thread) {
817 /* new IO requests from the ready thread */
818 ASSERT(vdp->xdf_ready_tq_bp == NULL);
819 vdp->xdf_ready_tq_bp = bp;
820 return;
823 /* this is normal IO request */
824 ASSERT(bp != vdp->xdf_ready_tq_bp);
826 if (vdp->xdf_f_act == NULL) {
827 /* this is only only IO on the active queue */
828 ASSERT(vdp->xdf_l_act == NULL);
829 ASSERT(vdp->xdf_i_act == NULL);
830 vdp->xdf_f_act = vdp->xdf_l_act = vdp->xdf_i_act = bp;
831 return;
834 /* add this IO to the tail of the active queue */
835 vdp->xdf_l_act->av_forw = bp;
836 vdp->xdf_l_act = bp;
837 if (vdp->xdf_i_act == NULL)
838 vdp->xdf_i_act = bp;
841 static void
842 xdf_bp_pop(xdf_t *vdp, buf_t *bp)
844 buf_t *bp_iter;
846 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
847 ASSERT(VREQ_DONE(BP_VREQ(bp)));
849 if (vdp->xdf_ready_tq_bp == bp) {
850 /* we're done with a ready thread IO request */
851 ASSERT(bp->av_forw == NULL);
852 vdp->xdf_ready_tq_bp = NULL;
853 return;
856 /* we're done with a normal IO request */
857 ASSERT((bp->av_forw != NULL) || (bp == vdp->xdf_l_act));
858 ASSERT((bp->av_forw == NULL) || (bp != vdp->xdf_l_act));
859 ASSERT(VREQ_DONE(BP_VREQ(vdp->xdf_f_act)));
860 ASSERT(vdp->xdf_f_act != vdp->xdf_i_act);
862 if (bp == vdp->xdf_f_act) {
863 /* This IO was at the head of our active queue. */
864 vdp->xdf_f_act = bp->av_forw;
865 if (bp == vdp->xdf_l_act)
866 vdp->xdf_l_act = NULL;
867 } else {
868 /* There IO finished before some other pending IOs. */
869 bp_iter = vdp->xdf_f_act;
870 while (bp != bp_iter->av_forw) {
871 bp_iter = bp_iter->av_forw;
872 ASSERT(VREQ_DONE(BP_VREQ(bp_iter)));
873 ASSERT(bp_iter != vdp->xdf_i_act);
875 bp_iter->av_forw = bp->av_forw;
876 if (bp == vdp->xdf_l_act)
877 vdp->xdf_l_act = bp_iter;
879 bp->av_forw = NULL;
882 static buf_t *
883 xdf_bp_next(xdf_t *vdp)
885 v_req_t *vreq;
886 buf_t *bp;
888 if (vdp->xdf_state == XD_CONNECTED) {
890 * If we're in the XD_CONNECTED state, we only service IOs
891 * from the xdf_ready_tq_thread thread.
893 if ((bp = vdp->xdf_ready_tq_bp) == NULL)
894 return (NULL);
895 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq)))
896 return (bp);
897 return (NULL);
900 /* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */
901 if (vdp->xdf_state != XD_READY)
902 return (NULL);
904 ASSERT(vdp->xdf_ready_tq_bp == NULL);
905 for (;;) {
906 if ((bp = vdp->xdf_i_act) == NULL)
907 return (NULL);
908 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq)))
909 return (bp);
911 /* advance the active buf index pointer */
912 vdp->xdf_i_act = bp->av_forw;
916 static void
917 xdf_io_fini(xdf_t *vdp, uint64_t id, int bioerr)
919 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id;
920 v_req_t *vreq = gs->gs_vreq;
921 buf_t *bp = vreq->v_buf;
923 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
924 ASSERT(BP_VREQ(bp) == vreq);
926 gs_free(gs);
928 if (bioerr != 0)
929 bioerror(bp, bioerr);
930 ASSERT(vreq->v_nslots > 0);
931 if (--vreq->v_nslots > 0)
932 return;
934 /* remove this IO from our active queue */
935 xdf_bp_pop(vdp, bp);
937 ASSERT(vreq->v_runq);
938 xdf_kstat_exit(vdp, bp);
939 vreq->v_runq = B_FALSE;
940 vreq_free(vdp, vreq);
942 if (IS_ERROR(bp)) {
943 xdf_io_err(bp, geterror(bp), 0);
944 } else if (bp->b_resid != 0) {
945 /* Partial transfers are an error */
946 xdf_io_err(bp, EIO, bp->b_resid);
947 } else {
948 biodone(bp);
953 * xdf interrupt handler
955 static uint_t
956 xdf_intr_locked(xdf_t *vdp)
958 xendev_ring_t *xbr;
959 blkif_response_t *resp;
960 int bioerr;
961 uint64_t id;
962 uint8_t op;
963 uint16_t status;
964 ddi_acc_handle_t acchdl;
966 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
968 if ((xbr = vdp->xdf_xb_ring) == NULL)
969 return (DDI_INTR_UNCLAIMED);
971 acchdl = vdp->xdf_xb_ring_hdl;
974 * complete all requests which have a response
976 while (resp = xvdi_ring_get_response(xbr)) {
977 id = ddi_get64(acchdl, &resp->id);
978 op = ddi_get8(acchdl, &resp->operation);
979 status = ddi_get16(acchdl, (uint16_t *)&resp->status);
980 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n",
981 op, id, status));
983 if (status != BLKIF_RSP_OKAY) {
984 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s",
985 vdp->xdf_addr,
986 (op == BLKIF_OP_READ) ? "reading" : "writing"));
987 bioerr = EIO;
988 } else {
989 bioerr = 0;
992 xdf_io_fini(vdp, id, bioerr);
994 return (DDI_INTR_CLAIMED);
998 * xdf_intr runs at PIL 5, so no one else can grab xdf_dev_lk and
999 * block at a lower pil.
1001 static uint_t
1002 xdf_intr(caddr_t arg)
1004 xdf_t *vdp = (xdf_t *)arg;
1005 int rv;
1007 mutex_enter(&vdp->xdf_dev_lk);
1008 rv = xdf_intr_locked(vdp);
1009 mutex_exit(&vdp->xdf_dev_lk);
1011 if (!do_polled_io)
1012 xdf_io_start(vdp);
1014 return (rv);
1017 static void
1018 xdf_ring_push(xdf_t *vdp)
1020 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1022 if (vdp->xdf_xb_ring == NULL)
1023 return;
1025 if (xvdi_ring_push_request(vdp->xdf_xb_ring)) {
1026 DPRINTF(IO_DBG, (
1027 "xdf@%s: xdf_ring_push: sent request(s) to backend\n",
1028 vdp->xdf_addr));
1031 if (xvdi_get_evtchn(vdp->xdf_dip) != INVALID_EVTCHN)
1032 xvdi_notify_oe(vdp->xdf_dip);
1035 static int
1036 xdf_ring_drain_locked(xdf_t *vdp)
1038 int pollc, rv = 0;
1040 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1042 if (xdf_debug & SUSRES_DBG)
1043 xen_printf("xdf_ring_drain: start\n");
1045 for (pollc = 0; pollc < XDF_DRAIN_RETRY_COUNT; pollc++) {
1046 if (vdp->xdf_xb_ring == NULL)
1047 goto out;
1049 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring))
1050 (void) xdf_intr_locked(vdp);
1051 if (!xvdi_ring_has_incomp_request(vdp->xdf_xb_ring))
1052 goto out;
1053 xdf_ring_push(vdp);
1055 /* file-backed devices can be slow */
1056 mutex_exit(&vdp->xdf_dev_lk);
1057 #ifdef XPV_HVM_DRIVER
1058 (void) HYPERVISOR_yield();
1059 #endif /* XPV_HVM_DRIVER */
1060 delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY));
1061 mutex_enter(&vdp->xdf_dev_lk);
1063 cmn_err(CE_WARN, "xdf@%s: xdf_ring_drain: timeout", vdp->xdf_addr);
1065 out:
1066 if (vdp->xdf_xb_ring != NULL) {
1067 if (xvdi_ring_has_incomp_request(vdp->xdf_xb_ring) ||
1068 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring))
1069 rv = EIO;
1071 if (xdf_debug & SUSRES_DBG)
1072 xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n",
1073 vdp->xdf_addr, rv);
1074 return (rv);
1077 static int
1078 xdf_ring_drain(xdf_t *vdp)
1080 int rv;
1081 mutex_enter(&vdp->xdf_dev_lk);
1082 rv = xdf_ring_drain_locked(vdp);
1083 mutex_exit(&vdp->xdf_dev_lk);
1084 return (rv);
1088 * Destroy all v_req_t, grant table entries, and our ring buffer.
1090 static void
1091 xdf_ring_destroy(xdf_t *vdp)
1093 v_req_t *vreq;
1094 buf_t *bp;
1095 ge_slot_t *gs;
1097 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1098 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1100 if ((vdp->xdf_state != XD_INIT) &&
1101 (vdp->xdf_state != XD_CONNECTED) &&
1102 (vdp->xdf_state != XD_READY)) {
1103 ASSERT(vdp->xdf_xb_ring == NULL);
1104 ASSERT(vdp->xdf_xb_ring_hdl == NULL);
1105 ASSERT(vdp->xdf_peer == INVALID_DOMID);
1106 ASSERT(vdp->xdf_evtchn == INVALID_EVTCHN);
1107 ASSERT(list_is_empty(&vdp->xdf_vreq_act));
1108 return;
1112 * We don't want to receive async notifications from the backend
1113 * when it finishes processing ring entries.
1115 #ifdef XPV_HVM_DRIVER
1116 ec_unbind_evtchn(vdp->xdf_evtchn);
1117 #else /* !XPV_HVM_DRIVER */
1118 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL);
1119 #endif /* !XPV_HVM_DRIVER */
1122 * Drain any requests in the ring. We need to do this before we
1123 * can free grant table entries, because if active ring entries
1124 * point to grants, then the backend could be trying to access
1125 * those grants.
1127 (void) xdf_ring_drain_locked(vdp);
1129 /* We're done talking to the backend so free up our event channel */
1130 xvdi_free_evtchn(vdp->xdf_dip);
1131 vdp->xdf_evtchn = INVALID_EVTCHN;
1133 while ((vreq = list_head(&vdp->xdf_vreq_act)) != NULL) {
1134 bp = vreq->v_buf;
1135 ASSERT(BP_VREQ(bp) == vreq);
1137 /* Free up any grant table entries associaed with this IO */
1138 while ((gs = list_head(&vreq->v_gs)) != NULL)
1139 gs_free(gs);
1141 /* If this IO was on the runq, move it back to the waitq. */
1142 if (vreq->v_runq)
1143 xdf_kstat_runq_to_waitq(vdp, bp);
1146 * Reset any buf IO state since we're going to re-issue the
1147 * IO when we reconnect.
1149 vreq_free(vdp, vreq);
1150 BP_VREQ_SET(bp, NULL);
1151 bioerror(bp, 0);
1154 /* reset the active queue index pointer */
1155 vdp->xdf_i_act = vdp->xdf_f_act;
1157 /* Destroy the ring */
1158 xvdi_free_ring(vdp->xdf_xb_ring);
1159 vdp->xdf_xb_ring = NULL;
1160 vdp->xdf_xb_ring_hdl = NULL;
1161 vdp->xdf_peer = INVALID_DOMID;
1164 void
1165 xdfmin(struct buf *bp)
1167 if (bp->b_bcount > xdf_maxphys)
1168 bp->b_bcount = xdf_maxphys;
1172 * Check if we have a pending "eject" media request.
1174 static int
1175 xdf_eject_pending(xdf_t *vdp)
1177 dev_info_t *dip = vdp->xdf_dip;
1178 char *xsname, *str;
1180 if (!vdp->xdf_media_req_supported)
1181 return (B_FALSE);
1183 if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
1184 (xenbus_read_str(xsname, XBP_MEDIA_REQ, &str) != 0))
1185 return (B_FALSE);
1187 if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) {
1188 strfree(str);
1189 return (B_FALSE);
1191 strfree(str);
1192 return (B_TRUE);
1196 * Generate a media request.
1198 static int
1199 xdf_media_req(xdf_t *vdp, char *req, boolean_t media_required)
1201 dev_info_t *dip = vdp->xdf_dip;
1202 char *xsname;
1205 * we can't be holding xdf_dev_lk because xenbus_printf() can
1206 * block while waiting for a PIL 1 interrupt message. this
1207 * would cause a deadlock with xdf_intr() which needs to grab
1208 * xdf_dev_lk as well and runs at PIL 5.
1210 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1211 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
1213 if ((xsname = xvdi_get_xsname(dip)) == NULL)
1214 return (ENXIO);
1216 /* Check if we support media requests */
1217 if (!XD_IS_CD(vdp) || !vdp->xdf_media_req_supported)
1218 return (ENOTTY);
1220 /* If an eject is pending then don't allow any new requests */
1221 if (xdf_eject_pending(vdp))
1222 return (ENXIO);
1224 /* Make sure that there is media present */
1225 if (media_required && (vdp->xdf_xdev_nblocks == 0))
1226 return (ENXIO);
1228 /* We only allow operations when the device is ready and connected */
1229 if (vdp->xdf_state != XD_READY)
1230 return (EIO);
1232 if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ, "%s", req) != 0)
1233 return (EIO);
1235 return (0);
1239 * populate a single blkif_request_t w/ a buf
1241 static void
1242 xdf_process_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq)
1244 grant_ref_t gr;
1245 uint8_t fsect, lsect;
1246 size_t bcnt;
1247 paddr_t dma_addr;
1248 off_t blk_off;
1249 dev_info_t *dip = vdp->xdf_dip;
1250 blkif_vdev_t vdev = xvdi_get_vdevnum(dip);
1251 v_req_t *vreq = BP_VREQ(bp);
1252 uint64_t blkno = vreq->v_blkno;
1253 uint_t ndmacs = vreq->v_ndmacs;
1254 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl;
1255 int seg = 0;
1256 int isread = IS_READ(bp);
1257 ge_slot_t *gs = list_head(&vreq->v_gs);
1259 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1260 ASSERT(vreq->v_status == VREQ_GS_ALLOCED);
1262 if (isread)
1263 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ);
1264 else {
1265 switch (vreq->v_flush_diskcache) {
1266 case FLUSH_DISKCACHE:
1267 ddi_put8(acchdl, &rreq->operation,
1268 BLKIF_OP_FLUSH_DISKCACHE);
1269 ddi_put16(acchdl, &rreq->handle, vdev);
1270 ddi_put64(acchdl, &rreq->id,
1271 (uint64_t)(uintptr_t)(gs));
1272 ddi_put8(acchdl, &rreq->nr_segments, 0);
1273 vreq->v_status = VREQ_DMAWIN_DONE;
1274 return;
1275 case WRITE_BARRIER:
1276 ddi_put8(acchdl, &rreq->operation,
1277 BLKIF_OP_WRITE_BARRIER);
1278 break;
1279 default:
1280 if (!vdp->xdf_wce)
1281 ddi_put8(acchdl, &rreq->operation,
1282 BLKIF_OP_WRITE_BARRIER);
1283 else
1284 ddi_put8(acchdl, &rreq->operation,
1285 BLKIF_OP_WRITE);
1286 break;
1290 ddi_put16(acchdl, &rreq->handle, vdev);
1291 ddi_put64(acchdl, &rreq->sector_number, blkno);
1292 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(gs));
1295 * loop until all segments are populated or no more dma cookie in buf
1297 for (;;) {
1299 * Each segment of a blkif request can transfer up to
1300 * one 4K page of data.
1302 bcnt = vreq->v_dmac.dmac_size;
1303 dma_addr = vreq->v_dmac.dmac_laddress;
1304 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr);
1305 fsect = blk_off >> XB_BSHIFT;
1306 lsect = fsect + (bcnt >> XB_BSHIFT) - 1;
1308 ASSERT(bcnt <= PAGESIZE);
1309 ASSERT((bcnt % XB_BSIZE) == 0);
1310 ASSERT((blk_off & XB_BMASK) == 0);
1311 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE &&
1312 lsect < XB_MAX_SEGLEN / XB_BSIZE);
1314 gr = gs_grant(gs, PATOMA(dma_addr) >> PAGESHIFT);
1315 ddi_put32(acchdl, &rreq->seg[seg].gref, gr);
1316 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect);
1317 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect);
1319 DPRINTF(IO_DBG, (
1320 "xdf@%s: seg%d: dmacS %lu blk_off %ld\n",
1321 vdp->xdf_addr, seg, vreq->v_dmac.dmac_size, blk_off));
1322 DPRINTF(IO_DBG, (
1323 "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64"\n",
1324 vdp->xdf_addr, seg, fsect, lsect, gr, dma_addr));
1326 blkno += (bcnt >> XB_BSHIFT);
1327 seg++;
1328 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
1329 if (--ndmacs) {
1330 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac);
1331 continue;
1334 vreq->v_status = VREQ_DMAWIN_DONE;
1335 vreq->v_blkno = blkno;
1336 break;
1338 ddi_put8(acchdl, &rreq->nr_segments, seg);
1339 DPRINTF(IO_DBG, (
1340 "xdf@%s: xdf_process_rreq: request id=%"PRIx64" ready\n",
1341 vdp->xdf_addr, rreq->id));
1344 static void
1345 xdf_io_start(xdf_t *vdp)
1347 struct buf *bp;
1348 v_req_t *vreq;
1349 blkif_request_t *rreq;
1350 boolean_t rreqready = B_FALSE;
1352 mutex_enter(&vdp->xdf_dev_lk);
1355 * Populate the ring request(s). Loop until there is no buf to
1356 * transfer or no free slot available in I/O ring.
1358 for (;;) {
1359 /* don't start any new IO if we're suspending */
1360 if (vdp->xdf_suspending)
1361 break;
1362 if ((bp = xdf_bp_next(vdp)) == NULL)
1363 break;
1365 /* if the buf doesn't already have a vreq, allocate one */
1366 if (((vreq = BP_VREQ(bp)) == NULL) &&
1367 ((vreq = vreq_get(vdp, bp)) == NULL))
1368 break;
1370 /* alloc DMA/GTE resources */
1371 if (vreq_setup(vdp, vreq) != DDI_SUCCESS)
1372 break;
1374 /* get next blkif_request in the ring */
1375 if ((rreq = xvdi_ring_get_request(vdp->xdf_xb_ring)) == NULL)
1376 break;
1377 bzero(rreq, sizeof (blkif_request_t));
1378 rreqready = B_TRUE;
1380 /* populate blkif_request with this buf */
1381 xdf_process_rreq(vdp, bp, rreq);
1384 * This buffer/vreq pair is has been allocated a ring buffer
1385 * resources, so if it isn't already in our runq, add it.
1387 if (!vreq->v_runq)
1388 xdf_kstat_waitq_to_runq(vdp, bp);
1391 /* Send the request(s) to the backend */
1392 if (rreqready)
1393 xdf_ring_push(vdp);
1395 mutex_exit(&vdp->xdf_dev_lk);
1399 /* check if partition is open, -1 - check all partitions on the disk */
1400 static boolean_t
1401 xdf_isopen(xdf_t *vdp, int partition)
1403 int i;
1404 ulong_t parbit;
1405 boolean_t rval = B_FALSE;
1407 ASSERT((partition == -1) ||
1408 ((partition >= 0) || (partition < XDF_PEXT)));
1410 if (partition == -1)
1411 parbit = (ulong_t)-1;
1412 else
1413 parbit = 1 << partition;
1415 for (i = 0; i < OTYPCNT; i++) {
1416 if (vdp->xdf_vd_open[i] & parbit)
1417 rval = B_TRUE;
1420 return (rval);
1424 * The connection should never be closed as long as someone is holding
1425 * us open, there is pending IO, or someone is waiting waiting for a
1426 * connection.
1428 static boolean_t
1429 xdf_busy(xdf_t *vdp)
1431 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1433 if ((vdp->xdf_xb_ring != NULL) &&
1434 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) {
1435 ASSERT(vdp->xdf_state != XD_CLOSED);
1436 return (B_TRUE);
1439 if (!list_is_empty(&vdp->xdf_vreq_act) || (vdp->xdf_f_act != NULL)) {
1440 ASSERT(vdp->xdf_state != XD_CLOSED);
1441 return (B_TRUE);
1444 if (xdf_isopen(vdp, -1)) {
1445 ASSERT(vdp->xdf_state != XD_CLOSED);
1446 return (B_TRUE);
1449 if (vdp->xdf_connect_req > 0) {
1450 ASSERT(vdp->xdf_state != XD_CLOSED);
1451 return (B_TRUE);
1454 return (B_FALSE);
1457 static void
1458 xdf_set_state(xdf_t *vdp, xdf_state_t new_state)
1460 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1461 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1462 DPRINTF(DDI_DBG, ("xdf@%s: state change %d -> %d\n",
1463 vdp->xdf_addr, vdp->xdf_state, new_state));
1464 vdp->xdf_state = new_state;
1465 cv_broadcast(&vdp->xdf_dev_cv);
1468 static void
1469 xdf_disconnect(xdf_t *vdp, xdf_state_t new_state, boolean_t quiet)
1471 dev_info_t *dip = vdp->xdf_dip;
1472 boolean_t busy;
1474 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1475 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
1476 ASSERT((new_state == XD_UNKNOWN) || (new_state == XD_CLOSED));
1478 /* Check if we're already there. */
1479 if (vdp->xdf_state == new_state)
1480 return;
1482 mutex_enter(&vdp->xdf_dev_lk);
1483 busy = xdf_busy(vdp);
1485 /* If we're already closed then there's nothing todo. */
1486 if (vdp->xdf_state == XD_CLOSED) {
1487 ASSERT(!busy);
1488 xdf_set_state(vdp, new_state);
1489 mutex_exit(&vdp->xdf_dev_lk);
1490 return;
1493 #ifdef DEBUG
1494 /* UhOh. Warn the user that something bad has happened. */
1495 if (!quiet && busy && (vdp->xdf_state == XD_READY) &&
1496 (vdp->xdf_xdev_nblocks != 0)) {
1497 cmn_err(CE_WARN, "xdf@%s: disconnected while in use",
1498 vdp->xdf_addr);
1500 #endif /* DEBUG */
1502 xdf_ring_destroy(vdp);
1504 /* If we're busy then we can only go into the unknown state */
1505 xdf_set_state(vdp, (busy) ? XD_UNKNOWN : new_state);
1506 mutex_exit(&vdp->xdf_dev_lk);
1508 /* if we're closed now, let the other end know */
1509 if (vdp->xdf_state == XD_CLOSED)
1510 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
1515 * Kick-off connect process
1516 * Status should be XD_UNKNOWN or XD_CLOSED
1517 * On success, status will be changed to XD_INIT
1518 * On error, it will be changed to XD_UNKNOWN
1520 static int
1521 xdf_setstate_init(xdf_t *vdp)
1523 dev_info_t *dip = vdp->xdf_dip;
1524 xenbus_transaction_t xbt;
1525 grant_ref_t gref;
1526 char *xsname, *str;
1527 int rv;
1529 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1530 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
1531 ASSERT((vdp->xdf_state == XD_UNKNOWN) ||
1532 (vdp->xdf_state == XD_CLOSED));
1534 DPRINTF(DDI_DBG,
1535 ("xdf@%s: starting connection process\n", vdp->xdf_addr));
1538 * If an eject is pending then don't allow a new connection.
1539 * (Only the backend can clear media request eject request.)
1541 if (xdf_eject_pending(vdp))
1542 return (DDI_FAILURE);
1544 if ((xsname = xvdi_get_xsname(dip)) == NULL)
1545 goto errout;
1547 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == INVALID_DOMID)
1548 goto errout;
1550 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialising);
1553 * Sanity check for the existance of the xenbus device-type property.
1554 * This property might not exist if our xenbus device nodes were
1555 * force destroyed while we were still connected to the backend.
1557 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0)
1558 goto errout;
1559 strfree(str);
1561 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS)
1562 goto errout;
1564 vdp->xdf_evtchn = xvdi_get_evtchn(dip);
1565 #ifdef XPV_HVM_DRIVER
1566 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp);
1567 #else /* !XPV_HVM_DRIVER */
1568 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) !=
1569 DDI_SUCCESS) {
1570 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_init: "
1571 "failed to add intr handler", vdp->xdf_addr);
1572 goto errout1;
1574 #endif /* !XPV_HVM_DRIVER */
1576 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE,
1577 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) !=
1578 DDI_SUCCESS) {
1579 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring",
1580 vdp->xdf_addr);
1581 goto errout2;
1583 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */
1586 * Write into xenstore the info needed by backend
1588 trans_retry:
1589 if (xenbus_transaction_start(&xbt)) {
1590 cmn_err(CE_WARN, "xdf@%s: failed to start transaction",
1591 vdp->xdf_addr);
1592 xvdi_fatal_error(dip, EIO, "connect transaction init");
1593 goto fail_trans;
1597 * XBP_PROTOCOL is written by the domain builder in the case of PV
1598 * domains. However, it is not written for HVM domains, so let's
1599 * write it here.
1601 if (((rv = xenbus_printf(xbt, xsname,
1602 XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE)) != 0) ||
1603 ((rv = xenbus_printf(xbt, xsname,
1604 XBP_RING_REF, "%u", gref)) != 0) ||
1605 ((rv = xenbus_printf(xbt, xsname,
1606 XBP_EVENT_CHAN, "%u", vdp->xdf_evtchn)) != 0) ||
1607 ((rv = xenbus_printf(xbt, xsname,
1608 XBP_PROTOCOL, "%s", XEN_IO_PROTO_ABI_NATIVE)) != 0) ||
1609 ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0)) {
1610 (void) xenbus_transaction_end(xbt, 1);
1611 xvdi_fatal_error(dip, rv, "connect transaction setup");
1612 goto fail_trans;
1615 /* kick-off connect process */
1616 if (rv = xenbus_transaction_end(xbt, 0)) {
1617 if (rv == EAGAIN)
1618 goto trans_retry;
1619 xvdi_fatal_error(dip, rv, "connect transaction commit");
1620 goto fail_trans;
1623 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1624 mutex_enter(&vdp->xdf_dev_lk);
1625 xdf_set_state(vdp, XD_INIT);
1626 mutex_exit(&vdp->xdf_dev_lk);
1628 return (DDI_SUCCESS);
1630 fail_trans:
1631 xvdi_free_ring(vdp->xdf_xb_ring);
1632 errout2:
1633 #ifdef XPV_HVM_DRIVER
1634 ec_unbind_evtchn(vdp->xdf_evtchn);
1635 #else /* !XPV_HVM_DRIVER */
1636 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL);
1637 #endif /* !XPV_HVM_DRIVER */
1638 errout1:
1639 xvdi_free_evtchn(dip);
1640 vdp->xdf_evtchn = INVALID_EVTCHN;
1641 errout:
1642 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1643 cmn_err(CE_WARN, "xdf@%s: failed to start connection to backend",
1644 vdp->xdf_addr);
1645 return (DDI_FAILURE);
1649 xdf_get_flush_block(xdf_t *vdp)
1652 * Get a DEV_BSIZE aligned bufer
1654 vdp->xdf_flush_mem = kmem_alloc(vdp->xdf_xdev_secsize * 2, KM_SLEEP);
1655 vdp->xdf_cache_flush_block =
1656 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem),
1657 (int)vdp->xdf_xdev_secsize);
1659 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block,
1660 xdf_flush_block, vdp->xdf_xdev_secsize, NULL) != 0)
1661 return (DDI_FAILURE);
1662 return (DDI_SUCCESS);
1665 static void
1666 xdf_setstate_ready(void *arg)
1668 xdf_t *vdp = (xdf_t *)arg;
1670 vdp->xdf_ready_tq_thread = curthread;
1673 * We've created all the minor nodes via cmlb_attach() using default
1674 * value in xdf_attach() to make it possible to block in xdf_open(),
1675 * in case there's anyone (say, booting thread) ever trying to open
1676 * it before connected to backend. We will refresh all those minor
1677 * nodes w/ latest info we've got now when we are almost connected.
1679 mutex_enter(&vdp->xdf_dev_lk);
1680 if (vdp->xdf_cmbl_reattach) {
1681 vdp->xdf_cmbl_reattach = B_FALSE;
1683 mutex_exit(&vdp->xdf_dev_lk);
1684 if (xdf_cmlb_attach(vdp) != 0) {
1685 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1686 return;
1688 mutex_enter(&vdp->xdf_dev_lk);
1691 /* If we're not still trying to get to the ready state, then bail. */
1692 if (vdp->xdf_state != XD_CONNECTED) {
1693 mutex_exit(&vdp->xdf_dev_lk);
1694 return;
1696 mutex_exit(&vdp->xdf_dev_lk);
1699 * If backend has feature-barrier, see if it supports disk
1700 * cache flush op.
1702 vdp->xdf_flush_supported = B_FALSE;
1703 if (vdp->xdf_feature_barrier) {
1705 * Pretend we already know flush is supported so probe
1706 * will attempt the correct op.
1708 vdp->xdf_flush_supported = B_TRUE;
1709 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) {
1710 vdp->xdf_flush_supported = B_TRUE;
1711 } else {
1712 vdp->xdf_flush_supported = B_FALSE;
1714 * If the other end does not support the cache flush op
1715 * then we must use a barrier-write to force disk
1716 * cache flushing. Barrier writes require that a data
1717 * block actually be written.
1718 * Cache a block to barrier-write when we are
1719 * asked to perform a flush.
1720 * XXX - would it be better to just copy 1 block
1721 * (512 bytes) from whatever write we did last
1722 * and rewrite that block?
1724 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) {
1725 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1726 return;
1731 mutex_enter(&vdp->xdf_cb_lk);
1732 mutex_enter(&vdp->xdf_dev_lk);
1733 if (vdp->xdf_state == XD_CONNECTED)
1734 xdf_set_state(vdp, XD_READY);
1735 mutex_exit(&vdp->xdf_dev_lk);
1737 /* Restart any currently queued up io */
1738 xdf_io_start(vdp);
1740 mutex_exit(&vdp->xdf_cb_lk);
1744 * synthetic geometry
1746 #define XDF_NSECTS 256
1747 #define XDF_NHEADS 16
1749 static void
1750 xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp)
1752 xdf_t *vdp;
1753 uint_t ncyl;
1755 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
1757 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS);
1759 bzero(geomp, sizeof (*geomp));
1760 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl;
1761 geomp->g_acyl = 0;
1762 geomp->g_nhead = XDF_NHEADS;
1763 geomp->g_nsect = XDF_NSECTS;
1764 geomp->g_secsize = vdp->xdf_xdev_secsize;
1765 geomp->g_capacity = vdp->xdf_xdev_nblocks;
1766 geomp->g_intrlv = 0;
1767 geomp->g_rpm = 7200;
1771 * Finish other initialization after we've connected to backend
1772 * Status should be XD_INIT before calling this routine
1773 * On success, status should be changed to XD_CONNECTED.
1774 * On error, status should stay XD_INIT
1776 static int
1777 xdf_setstate_connected(xdf_t *vdp)
1779 dev_info_t *dip = vdp->xdf_dip;
1780 cmlb_geom_t pgeom;
1781 diskaddr_t nblocks = 0;
1782 uint_t secsize = 0;
1783 char *oename, *xsname, *str;
1784 uint_t dinfo;
1786 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1787 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
1788 ASSERT(vdp->xdf_state == XD_INIT);
1790 if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
1791 ((oename = xvdi_get_oename(dip)) == NULL))
1792 return (DDI_FAILURE);
1794 /* Make sure the other end is XenbusStateConnected */
1795 if (xenbus_read_driver_state(oename) != XenbusStateConnected)
1796 return (DDI_FAILURE);
1798 /* Determine if feature barrier is supported by backend */
1799 if (!(vdp->xdf_feature_barrier = xenbus_exists(oename, XBP_FB)))
1800 cmn_err(CE_NOTE, "!xdf@%s: feature-barrier not supported",
1801 vdp->xdf_addr);
1804 * Probe backend. Read the device size into xdf_xdev_nblocks
1805 * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE
1806 * flags in xdf_dinfo. If the emulated device type is "cdrom",
1807 * we always set VDISK_CDROM, regardless of if it's present in
1808 * the xenbus info parameter.
1810 if (xenbus_gather(XBT_NULL, oename,
1811 XBP_SECTORS, "%"SCNu64, &nblocks,
1812 XBP_SECTOR_SIZE, "%u", &secsize,
1813 XBP_INFO, "%u", &dinfo,
1814 NULL) != 0) {
1815 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: "
1816 "cannot read backend info", vdp->xdf_addr);
1817 return (DDI_FAILURE);
1819 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) {
1820 cmn_err(CE_WARN, "xdf@%s: cannot read device-type",
1821 vdp->xdf_addr);
1822 return (DDI_FAILURE);
1824 if (strcmp(str, XBV_DEV_TYPE_CD) == 0)
1825 dinfo |= VDISK_CDROM;
1826 strfree(str);
1828 if (secsize == 0 || !(ISP2(secsize / DEV_BSIZE)))
1829 secsize = DEV_BSIZE;
1830 vdp->xdf_xdev_nblocks = nblocks;
1831 vdp->xdf_xdev_secsize = secsize;
1832 #ifdef _ILP32
1833 if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) {
1834 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: "
1835 "backend disk device too large with %llu blocks for"
1836 " 32-bit kernel", vdp->xdf_addr, vdp->xdf_xdev_nblocks);
1837 xvdi_fatal_error(dip, EFBIG, "reading backend info");
1838 return (DDI_FAILURE);
1840 #endif
1843 * If the physical geometry for a fixed disk has been explicity
1844 * set then make sure that the specified physical geometry isn't
1845 * larger than the device we connected to.
1847 if (vdp->xdf_pgeom_fixed &&
1848 (vdp->xdf_pgeom.g_capacity > vdp->xdf_xdev_nblocks)) {
1849 cmn_err(CE_WARN,
1850 "xdf@%s: connect failed, fixed geometry too large",
1851 vdp->xdf_addr);
1852 return (DDI_FAILURE);
1855 vdp->xdf_media_req_supported = xenbus_exists(oename, XBP_MEDIA_REQ_SUP);
1857 /* mark vbd is ready for I/O */
1858 mutex_enter(&vdp->xdf_dev_lk);
1859 xdf_set_state(vdp, XD_CONNECTED);
1861 /* check if the cmlb label should be updated */
1862 xdf_synthetic_pgeom(dip, &pgeom);
1863 if ((vdp->xdf_dinfo != dinfo) ||
1864 (!vdp->xdf_pgeom_fixed &&
1865 (memcmp(&vdp->xdf_pgeom, &pgeom, sizeof (pgeom)) != 0))) {
1866 vdp->xdf_cmbl_reattach = B_TRUE;
1868 vdp->xdf_dinfo = dinfo;
1869 if (!vdp->xdf_pgeom_fixed)
1870 vdp->xdf_pgeom = pgeom;
1873 if (XD_IS_CD(vdp) || XD_IS_RM(vdp)) {
1874 if (vdp->xdf_xdev_nblocks == 0) {
1875 vdp->xdf_mstate = DKIO_EJECTED;
1876 cv_broadcast(&vdp->xdf_mstate_cv);
1877 } else {
1878 vdp->xdf_mstate = DKIO_INSERTED;
1879 cv_broadcast(&vdp->xdf_mstate_cv);
1881 } else {
1882 if (vdp->xdf_mstate != DKIO_NONE) {
1883 vdp->xdf_mstate = DKIO_NONE;
1884 cv_broadcast(&vdp->xdf_mstate_cv);
1888 mutex_exit(&vdp->xdf_dev_lk);
1890 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", vdp->xdf_addr,
1891 (uint64_t)vdp->xdf_xdev_nblocks);
1893 /* Restart any currently queued up io */
1894 xdf_io_start(vdp);
1897 * To get to the ready state we have to do IO to the backend device,
1898 * but we can't initiate IO from the other end change callback thread
1899 * (which is the current context we're executing in.) This is because
1900 * if the other end disconnects while we're doing IO from the callback
1901 * thread, then we can't receive that disconnect event and we hang
1902 * waiting for an IO that can never complete.
1904 (void) ddi_taskq_dispatch(vdp->xdf_ready_tq, xdf_setstate_ready, vdp,
1905 DDI_SLEEP);
1907 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected);
1908 return (DDI_SUCCESS);
1911 /*ARGSUSED*/
1912 static void
1913 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data)
1915 XenbusState new_state = *(XenbusState *)impl_data;
1916 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
1918 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n",
1919 vdp->xdf_addr, new_state));
1921 mutex_enter(&vdp->xdf_cb_lk);
1923 /* We assume that this callback is single threaded */
1924 ASSERT(vdp->xdf_oe_change_thread == NULL);
1925 DEBUG_EVAL(vdp->xdf_oe_change_thread = curthread);
1927 /* ignore any backend state changes if we're suspending/suspended */
1928 if (vdp->xdf_suspending || (vdp->xdf_state == XD_SUSPEND)) {
1929 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL);
1930 mutex_exit(&vdp->xdf_cb_lk);
1931 return;
1934 switch (new_state) {
1935 case XenbusStateUnknown:
1936 case XenbusStateInitialising:
1937 case XenbusStateInitWait:
1938 case XenbusStateInitialised:
1939 if (vdp->xdf_state == XD_INIT)
1940 break;
1942 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1943 if (xdf_setstate_init(vdp) != DDI_SUCCESS)
1944 break;
1945 ASSERT(vdp->xdf_state == XD_INIT);
1946 break;
1948 case XenbusStateConnected:
1949 if ((vdp->xdf_state == XD_CONNECTED) ||
1950 (vdp->xdf_state == XD_READY))
1951 break;
1953 if (vdp->xdf_state != XD_INIT) {
1954 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1955 if (xdf_setstate_init(vdp) != DDI_SUCCESS)
1956 break;
1957 ASSERT(vdp->xdf_state == XD_INIT);
1960 if (xdf_setstate_connected(vdp) != DDI_SUCCESS) {
1961 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1962 break;
1964 ASSERT(vdp->xdf_state == XD_CONNECTED);
1965 break;
1967 case XenbusStateClosing:
1968 if (xdf_isopen(vdp, -1)) {
1969 cmn_err(CE_NOTE,
1970 "xdf@%s: hot-unplug failed, still in use",
1971 vdp->xdf_addr);
1972 break;
1974 /*FALLTHROUGH*/
1975 case XenbusStateClosed:
1976 xdf_disconnect(vdp, XD_CLOSED, B_FALSE);
1977 break;
1980 /* notify anybody waiting for oe state change */
1981 cv_broadcast(&vdp->xdf_dev_cv);
1982 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL);
1983 mutex_exit(&vdp->xdf_cb_lk);
1986 static int
1987 xdf_connect_locked(xdf_t *vdp, boolean_t wait)
1989 int rv, timeouts = 0, reset = 20;
1991 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1992 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1994 /* we can't connect once we're in the closed state */
1995 if (vdp->xdf_state == XD_CLOSED)
1996 return (XD_CLOSED);
1998 vdp->xdf_connect_req++;
1999 while (vdp->xdf_state != XD_READY) {
2000 mutex_exit(&vdp->xdf_dev_lk);
2002 /* only one thread at a time can be the connection thread */
2003 if (vdp->xdf_connect_thread == NULL)
2004 vdp->xdf_connect_thread = curthread;
2006 if (vdp->xdf_connect_thread == curthread) {
2007 if ((timeouts > 0) && ((timeouts % reset) == 0)) {
2009 * If we haven't establised a connection
2010 * within the reset time, then disconnect
2011 * so we can try again, and double the reset
2012 * time. The reset time starts at 2 sec.
2014 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE);
2015 reset *= 2;
2017 if (vdp->xdf_state == XD_UNKNOWN)
2018 (void) xdf_setstate_init(vdp);
2019 if (vdp->xdf_state == XD_INIT)
2020 (void) xdf_setstate_connected(vdp);
2023 mutex_enter(&vdp->xdf_dev_lk);
2024 if (!wait || (vdp->xdf_state == XD_READY))
2025 goto out;
2027 mutex_exit((&vdp->xdf_cb_lk));
2028 if (vdp->xdf_connect_thread != curthread) {
2029 rv = cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk);
2030 } else {
2031 /* delay for 0.1 sec */
2032 rv = cv_reltimedwait_sig(&vdp->xdf_dev_cv,
2033 &vdp->xdf_dev_lk, drv_usectohz(100*1000),
2034 TR_CLOCK_TICK);
2035 if (rv == -1)
2036 timeouts++;
2038 mutex_exit((&vdp->xdf_dev_lk));
2039 mutex_enter((&vdp->xdf_cb_lk));
2040 mutex_enter((&vdp->xdf_dev_lk));
2041 if (rv == 0)
2042 goto out;
2045 out:
2046 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
2047 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
2049 if (vdp->xdf_connect_thread == curthread) {
2051 * wake up someone else so they can become the connection
2052 * thread.
2054 cv_signal(&vdp->xdf_dev_cv);
2055 vdp->xdf_connect_thread = NULL;
2058 /* Try to lock the media */
2059 mutex_exit((&vdp->xdf_dev_lk));
2060 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE);
2061 mutex_enter((&vdp->xdf_dev_lk));
2063 vdp->xdf_connect_req--;
2064 return (vdp->xdf_state);
2067 static uint_t
2068 xdf_iorestart(caddr_t arg)
2070 xdf_t *vdp = (xdf_t *)arg;
2072 ASSERT(vdp != NULL);
2074 mutex_enter(&vdp->xdf_dev_lk);
2075 ASSERT(ISDMACBON(vdp));
2076 SETDMACBOFF(vdp);
2077 mutex_exit(&vdp->xdf_dev_lk);
2079 xdf_io_start(vdp);
2081 return (DDI_INTR_CLAIMED);
2084 #ifdef XPV_HVM_DRIVER
2086 typedef struct xdf_hvm_entry {
2087 list_node_t xdf_he_list;
2088 char *xdf_he_path;
2089 dev_info_t *xdf_he_dip;
2090 } xdf_hvm_entry_t;
2092 static list_t xdf_hvm_list;
2093 static kmutex_t xdf_hvm_list_lock;
2095 static xdf_hvm_entry_t *
2096 i_xdf_hvm_find(const char *path, dev_info_t *dip)
2098 xdf_hvm_entry_t *i;
2100 ASSERT((path != NULL) || (dip != NULL));
2101 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock));
2103 i = list_head(&xdf_hvm_list);
2104 while (i != NULL) {
2105 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) {
2106 i = list_next(&xdf_hvm_list, i);
2107 continue;
2109 if ((dip != NULL) && (i->xdf_he_dip != dip)) {
2110 i = list_next(&xdf_hvm_list, i);
2111 continue;
2113 break;
2115 return (i);
2118 dev_info_t *
2119 xdf_hvm_hold(const char *path)
2121 xdf_hvm_entry_t *i;
2122 dev_info_t *dip;
2124 mutex_enter(&xdf_hvm_list_lock);
2125 i = i_xdf_hvm_find(path, NULL);
2126 if (i == NULL) {
2127 mutex_exit(&xdf_hvm_list_lock);
2128 return (B_FALSE);
2130 ndi_hold_devi(dip = i->xdf_he_dip);
2131 mutex_exit(&xdf_hvm_list_lock);
2132 return (dip);
2135 static void
2136 xdf_hvm_add(dev_info_t *dip)
2138 xdf_hvm_entry_t *i;
2139 char *path;
2141 /* figure out the path for the dip */
2142 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2143 (void) ddi_pathname(dip, path);
2145 i = kmem_alloc(sizeof (*i), KM_SLEEP);
2146 i->xdf_he_dip = dip;
2147 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP);
2149 mutex_enter(&xdf_hvm_list_lock);
2150 ASSERT(i_xdf_hvm_find(path, NULL) == NULL);
2151 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL);
2152 list_insert_head(&xdf_hvm_list, i);
2153 mutex_exit(&xdf_hvm_list_lock);
2155 kmem_free(path, MAXPATHLEN);
2158 static void
2159 xdf_hvm_rm(dev_info_t *dip)
2161 xdf_hvm_entry_t *i;
2163 mutex_enter(&xdf_hvm_list_lock);
2164 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL);
2165 list_remove(&xdf_hvm_list, i);
2166 mutex_exit(&xdf_hvm_list_lock);
2168 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1);
2169 kmem_free(i, sizeof (*i));
2172 static void
2173 xdf_hvm_init(void)
2175 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t),
2176 offsetof(xdf_hvm_entry_t, xdf_he_list));
2177 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL);
2180 static void
2181 xdf_hvm_fini(void)
2183 ASSERT(list_head(&xdf_hvm_list) == NULL);
2184 list_destroy(&xdf_hvm_list);
2185 mutex_destroy(&xdf_hvm_list_lock);
2188 boolean_t
2189 xdf_hvm_connect(dev_info_t *dip)
2191 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
2192 char *oename, *str;
2193 int rv;
2195 mutex_enter(&vdp->xdf_cb_lk);
2198 * Before try to establish a connection we need to wait for the
2199 * backend hotplug scripts to have run. Once they are run the
2200 * "<oename>/hotplug-status" property will be set to "connected".
2202 for (;;) {
2203 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
2206 * Get the xenbus path to the backend device. Note that
2207 * we can't cache this path (and we look it up on each pass
2208 * through this loop) because it could change during
2209 * suspend, resume, and migration operations.
2211 if ((oename = xvdi_get_oename(dip)) == NULL) {
2212 mutex_exit(&vdp->xdf_cb_lk);
2213 return (B_FALSE);
2216 str = NULL;
2217 if ((xenbus_read_str(oename, XBP_HP_STATUS, &str) == 0) &&
2218 (strcmp(str, XBV_HP_STATUS_CONN) == 0))
2219 break;
2221 if (str != NULL)
2222 strfree(str);
2224 /* wait for an update to "<oename>/hotplug-status" */
2225 if (cv_wait_sig(&vdp->xdf_hp_status_cv, &vdp->xdf_cb_lk) == 0) {
2226 /* we got interrupted by a signal */
2227 mutex_exit(&vdp->xdf_cb_lk);
2228 return (B_FALSE);
2232 /* Good news. The backend hotplug scripts have been run. */
2233 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
2234 ASSERT(strcmp(str, XBV_HP_STATUS_CONN) == 0);
2235 strfree(str);
2238 * If we're emulating a cd device and if the backend doesn't support
2239 * media request opreations, then we're not going to bother trying
2240 * to establish a connection for a couple reasons. First off, media
2241 * requests support is required to support operations like eject and
2242 * media locking. Second, other backend platforms like Linux don't
2243 * support hvm pv cdrom access. They don't even have a backend pv
2244 * driver for cdrom device nodes, so we don't want to block forever
2245 * waiting for a connection to a backend driver that doesn't exist.
2247 if (XD_IS_CD(vdp) && !xenbus_exists(oename, XBP_MEDIA_REQ_SUP)) {
2248 mutex_exit(&vdp->xdf_cb_lk);
2249 return (B_FALSE);
2252 mutex_enter(&vdp->xdf_dev_lk);
2253 rv = xdf_connect_locked(vdp, B_TRUE);
2254 mutex_exit(&vdp->xdf_dev_lk);
2255 mutex_exit(&vdp->xdf_cb_lk);
2257 return ((rv == XD_READY) ? B_TRUE : B_FALSE);
2261 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp)
2263 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
2265 /* sanity check the requested physical geometry */
2266 mutex_enter(&vdp->xdf_dev_lk);
2267 if ((geomp->g_secsize != XB_BSIZE) ||
2268 (geomp->g_capacity == 0)) {
2269 mutex_exit(&vdp->xdf_dev_lk);
2270 return (EINVAL);
2274 * If we've already connected to the backend device then make sure
2275 * we're not defining a physical geometry larger than our backend
2276 * device.
2278 if ((vdp->xdf_xdev_nblocks != 0) &&
2279 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) {
2280 mutex_exit(&vdp->xdf_dev_lk);
2281 return (EINVAL);
2284 bzero(&vdp->xdf_pgeom, sizeof (vdp->xdf_pgeom));
2285 vdp->xdf_pgeom.g_ncyl = geomp->g_ncyl;
2286 vdp->xdf_pgeom.g_acyl = geomp->g_acyl;
2287 vdp->xdf_pgeom.g_nhead = geomp->g_nhead;
2288 vdp->xdf_pgeom.g_nsect = geomp->g_nsect;
2289 vdp->xdf_pgeom.g_secsize = geomp->g_secsize;
2290 vdp->xdf_pgeom.g_capacity = geomp->g_capacity;
2291 vdp->xdf_pgeom.g_intrlv = geomp->g_intrlv;
2292 vdp->xdf_pgeom.g_rpm = geomp->g_rpm;
2294 vdp->xdf_pgeom_fixed = B_TRUE;
2295 mutex_exit(&vdp->xdf_dev_lk);
2297 /* force a re-validation */
2298 cmlb_invalidate(vdp->xdf_vd_lbl, NULL);
2300 return (0);
2303 boolean_t
2304 xdf_is_cd(dev_info_t *dip)
2306 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
2307 boolean_t rv;
2309 mutex_enter(&vdp->xdf_cb_lk);
2310 rv = XD_IS_CD(vdp);
2311 mutex_exit(&vdp->xdf_cb_lk);
2312 return (rv);
2315 boolean_t
2316 xdf_is_rm(dev_info_t *dip)
2318 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
2319 boolean_t rv;
2321 mutex_enter(&vdp->xdf_cb_lk);
2322 rv = XD_IS_RM(vdp);
2323 mutex_exit(&vdp->xdf_cb_lk);
2324 return (rv);
2327 boolean_t
2328 xdf_media_req_supported(dev_info_t *dip)
2330 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
2331 boolean_t rv;
2333 mutex_enter(&vdp->xdf_cb_lk);
2334 rv = vdp->xdf_media_req_supported;
2335 mutex_exit(&vdp->xdf_cb_lk);
2336 return (rv);
2339 #endif /* XPV_HVM_DRIVER */
2341 static int
2342 xdf_lb_getcap(dev_info_t *dip, diskaddr_t *capp)
2344 xdf_t *vdp;
2345 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
2347 if (vdp == NULL)
2348 return (ENXIO);
2350 mutex_enter(&vdp->xdf_dev_lk);
2351 *capp = vdp->xdf_pgeom.g_capacity;
2352 DPRINTF(LBL_DBG, ("xdf@%s:capacity %llu\n", vdp->xdf_addr, *capp));
2353 mutex_exit(&vdp->xdf_dev_lk);
2354 return (0);
2357 static int
2358 xdf_lb_getpgeom(dev_info_t *dip, cmlb_geom_t *geomp)
2360 xdf_t *vdp;
2362 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL)
2363 return (ENXIO);
2364 *geomp = vdp->xdf_pgeom;
2365 return (0);
2369 * No real HBA, no geometry available from it
2371 /*ARGSUSED*/
2372 static int
2373 xdf_lb_getvgeom(dev_info_t *dip, cmlb_geom_t *geomp)
2375 return (EINVAL);
2378 static int
2379 xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep)
2381 xdf_t *vdp;
2383 if (!(vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))))
2384 return (ENXIO);
2386 if (XD_IS_RO(vdp))
2387 tgattributep->media_is_writable = 0;
2388 else
2389 tgattributep->media_is_writable = 1;
2390 tgattributep->media_is_rotational = 0;
2391 return (0);
2394 /* ARGSUSED3 */
2396 xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
2398 int instance;
2399 xdf_t *vdp;
2401 instance = ddi_get_instance(dip);
2403 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL)
2404 return (ENXIO);
2406 switch (cmd) {
2407 case TG_GETPHYGEOM:
2408 return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg));
2409 case TG_GETVIRTGEOM:
2410 return (xdf_lb_getvgeom(dip, (cmlb_geom_t *)arg));
2411 case TG_GETCAPACITY:
2412 return (xdf_lb_getcap(dip, (diskaddr_t *)arg));
2413 case TG_GETBLOCKSIZE:
2414 mutex_enter(&vdp->xdf_cb_lk);
2415 *(uint32_t *)arg = vdp->xdf_xdev_secsize;
2416 mutex_exit(&vdp->xdf_cb_lk);
2417 return (0);
2418 case TG_GETATTR:
2419 return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg));
2420 default:
2421 return (ENOTTY);
2425 /* ARGSUSED5 */
2427 xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp,
2428 diskaddr_t start, size_t reqlen, void *tg_cookie)
2430 xdf_t *vdp;
2431 struct buf *bp;
2432 int err = 0;
2434 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
2436 /* We don't allow IO from the oe_change callback thread */
2437 ASSERT(curthread != vdp->xdf_oe_change_thread);
2440 * Having secsize of 0 means that device isn't connected yet.
2441 * FIXME This happens for CD devices, and there's nothing we
2442 * can do about it at the moment.
2444 if (vdp->xdf_xdev_secsize == 0)
2445 return (EIO);
2447 if ((start + ((reqlen / (vdp->xdf_xdev_secsize / DEV_BSIZE))
2448 >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity)
2449 return (EINVAL);
2451 bp = getrbuf(KM_SLEEP);
2452 if (cmd == TG_READ)
2453 bp->b_flags = B_BUSY | B_READ;
2454 else
2455 bp->b_flags = B_BUSY | B_WRITE;
2457 bp->b_un.b_addr = bufp;
2458 bp->b_bcount = reqlen;
2459 bp->b_blkno = start * (vdp->xdf_xdev_secsize / DEV_BSIZE);
2460 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */
2462 mutex_enter(&vdp->xdf_dev_lk);
2463 xdf_bp_push(vdp, bp);
2464 mutex_exit(&vdp->xdf_dev_lk);
2465 xdf_io_start(vdp);
2466 if (curthread == vdp->xdf_ready_tq_thread)
2467 (void) xdf_ring_drain(vdp);
2468 err = biowait(bp);
2469 ASSERT(bp->b_flags & B_DONE);
2470 freerbuf(bp);
2471 return (err);
2475 * Lock the current media. Set the media state to "lock".
2476 * (Media locks are only respected by the backend driver.)
2478 static int
2479 xdf_ioctl_mlock(xdf_t *vdp)
2481 int rv;
2482 mutex_enter(&vdp->xdf_cb_lk);
2483 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE);
2484 mutex_exit(&vdp->xdf_cb_lk);
2485 return (rv);
2489 * Release a media lock. Set the media state to "none".
2491 static int
2492 xdf_ioctl_munlock(xdf_t *vdp)
2494 int rv;
2495 mutex_enter(&vdp->xdf_cb_lk);
2496 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_NONE, B_TRUE);
2497 mutex_exit(&vdp->xdf_cb_lk);
2498 return (rv);
2502 * Eject the current media. Ignores any media locks. (Media locks
2503 * are only for benifit of the the backend.)
2505 static int
2506 xdf_ioctl_eject(xdf_t *vdp)
2508 int rv;
2510 mutex_enter(&vdp->xdf_cb_lk);
2511 if ((rv = xdf_media_req(vdp, XBV_MEDIA_REQ_EJECT, B_FALSE)) != 0) {
2512 mutex_exit(&vdp->xdf_cb_lk);
2513 return (rv);
2517 * We've set the media requests xenbus parameter to eject, so now
2518 * disconnect from the backend, wait for the backend to clear
2519 * the media requets xenbus paramter, and then we can reconnect
2520 * to the backend.
2522 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE);
2523 mutex_enter(&vdp->xdf_dev_lk);
2524 if (xdf_connect_locked(vdp, B_TRUE) != XD_READY) {
2525 mutex_exit(&vdp->xdf_dev_lk);
2526 mutex_exit(&vdp->xdf_cb_lk);
2527 return (EIO);
2529 mutex_exit(&vdp->xdf_dev_lk);
2530 mutex_exit(&vdp->xdf_cb_lk);
2531 return (0);
2535 * Watch for media state changes. This can be an insertion of a device
2536 * (triggered by a 'xm block-configure' request in another domain) or
2537 * the ejection of a device (triggered by a local "eject" operation).
2538 * For a full description of the DKIOCSTATE ioctl behavior see dkio(7I).
2540 static int
2541 xdf_dkstate(xdf_t *vdp, enum dkio_state mstate)
2543 enum dkio_state prev_state;
2545 mutex_enter(&vdp->xdf_cb_lk);
2546 prev_state = vdp->xdf_mstate;
2548 if (vdp->xdf_mstate == mstate) {
2549 while (vdp->xdf_mstate == prev_state) {
2550 if (cv_wait_sig(&vdp->xdf_mstate_cv,
2551 &vdp->xdf_cb_lk) == 0) {
2552 mutex_exit(&vdp->xdf_cb_lk);
2553 return (EINTR);
2558 if ((prev_state != DKIO_INSERTED) &&
2559 (vdp->xdf_mstate == DKIO_INSERTED)) {
2560 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE);
2561 mutex_exit(&vdp->xdf_cb_lk);
2562 return (0);
2565 mutex_exit(&vdp->xdf_cb_lk);
2566 return (0);
2569 /*ARGSUSED*/
2570 static int
2571 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
2572 int *rvalp)
2574 minor_t minor = getminor(dev);
2575 int part = XDF_PART(minor);
2576 xdf_t *vdp;
2577 int rv;
2579 if (((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) ||
2580 (!xdf_isopen(vdp, part)))
2581 return (ENXIO);
2583 DPRINTF(IOCTL_DBG, ("xdf@%s:ioctl: cmd %d (0x%x)\n",
2584 vdp->xdf_addr, cmd, cmd));
2586 switch (cmd) {
2587 default:
2588 return (ENOTTY);
2589 case DKIOCG_PHYGEOM:
2590 case DKIOCG_VIRTGEOM:
2591 case DKIOCGGEOM:
2592 case DKIOCSGEOM:
2593 case DKIOCGAPART:
2594 case DKIOCSAPART:
2595 case DKIOCGVTOC:
2596 case DKIOCSVTOC:
2597 case DKIOCPARTINFO:
2598 case DKIOCGEXTVTOC:
2599 case DKIOCSEXTVTOC:
2600 case DKIOCEXTPARTINFO:
2601 case DKIOCGMBOOT:
2602 case DKIOCSMBOOT:
2603 case DKIOCGETEFI:
2604 case DKIOCSETEFI:
2605 case DKIOCSETEXTPART:
2606 case DKIOCPARTITION:
2607 rv = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp,
2608 rvalp, NULL);
2609 if (rv != 0)
2610 return (rv);
2612 * If we're labelling the disk, we have to update the geometry
2613 * in the cmlb data structures, and we also have to write a new
2614 * devid to the disk. Note that writing an EFI label currently
2615 * requires 4 ioctls, and devid setup will fail on all but the
2616 * last.
2618 if (cmd == DKIOCSEXTVTOC || cmd == DKIOCSVTOC ||
2619 cmd == DKIOCSETEFI) {
2620 rv = cmlb_validate(vdp->xdf_vd_lbl, 0, 0);
2621 if (rv == 0) {
2622 xdf_devid_setup(vdp);
2623 } else {
2624 cmn_err(CE_WARN,
2625 "xdf@%s, labeling failed on validate",
2626 vdp->xdf_addr);
2629 return (rv);
2630 case FDEJECT:
2631 case DKIOCEJECT:
2632 case CDROMEJECT:
2633 return (xdf_ioctl_eject(vdp));
2634 case DKIOCLOCK:
2635 return (xdf_ioctl_mlock(vdp));
2636 case DKIOCUNLOCK:
2637 return (xdf_ioctl_munlock(vdp));
2638 case CDROMREADOFFSET: {
2639 int offset = 0;
2640 if (!XD_IS_CD(vdp))
2641 return (ENOTTY);
2642 if (ddi_copyout(&offset, (void *)arg, sizeof (int), mode))
2643 return (EFAULT);
2644 return (0);
2646 case DKIOCGMEDIAINFO: {
2647 struct dk_minfo media_info;
2649 media_info.dki_lbsize = vdp->xdf_xdev_secsize;
2650 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity;
2651 if (XD_IS_CD(vdp))
2652 media_info.dki_media_type = DK_CDROM;
2653 else
2654 media_info.dki_media_type = DK_FIXED_DISK;
2656 if (ddi_copyout(&media_info, (void *)arg,
2657 sizeof (struct dk_minfo), mode))
2658 return (EFAULT);
2659 return (0);
2661 case DKIOCINFO: {
2662 struct dk_cinfo info;
2664 /* controller information */
2665 if (XD_IS_CD(vdp))
2666 info.dki_ctype = DKC_CDROM;
2667 else
2668 info.dki_ctype = DKC_VBD;
2670 info.dki_cnum = 0;
2671 (void) strncpy((char *)(&info.dki_cname), "xdf", 8);
2673 /* unit information */
2674 info.dki_unit = ddi_get_instance(vdp->xdf_dip);
2675 (void) strncpy((char *)(&info.dki_dname), "xdf", 8);
2676 info.dki_flags = DKI_FMTVOL;
2677 info.dki_partition = part;
2678 info.dki_maxtransfer = maxphys / DEV_BSIZE;
2679 info.dki_addr = 0;
2680 info.dki_space = 0;
2681 info.dki_prio = 0;
2682 info.dki_vec = 0;
2684 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode))
2685 return (EFAULT);
2686 return (0);
2688 case DKIOCSTATE: {
2689 enum dkio_state mstate;
2691 if (ddi_copyin((void *)arg, &mstate,
2692 sizeof (mstate), mode) != 0)
2693 return (EFAULT);
2694 if ((rv = xdf_dkstate(vdp, mstate)) != 0)
2695 return (rv);
2696 mstate = vdp->xdf_mstate;
2697 if (ddi_copyout(&mstate, (void *)arg,
2698 sizeof (mstate), mode) != 0)
2699 return (EFAULT);
2700 return (0);
2702 case DKIOCREMOVABLE: {
2703 int i = BOOLEAN2VOID(XD_IS_RM(vdp));
2704 if (ddi_copyout(&i, (caddr_t)arg, sizeof (i), mode))
2705 return (EFAULT);
2706 return (0);
2708 case DKIOCGETWCE: {
2709 int i = BOOLEAN2VOID(XD_IS_RM(vdp));
2710 if (ddi_copyout(&i, (void *)arg, sizeof (i), mode))
2711 return (EFAULT);
2712 return (0);
2714 case DKIOCSETWCE: {
2715 int i;
2716 if (ddi_copyin((void *)arg, &i, sizeof (i), mode))
2717 return (EFAULT);
2718 vdp->xdf_wce = VOID2BOOLEAN(i);
2719 return (0);
2721 case DKIOCFLUSHWRITECACHE: {
2722 struct dk_callback *dkc = (struct dk_callback *)arg;
2724 if (vdp->xdf_flush_supported) {
2725 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE,
2726 NULL, 0, 0, (void *)dev);
2727 } else if (vdp->xdf_feature_barrier &&
2728 !xdf_barrier_flush_disable) {
2729 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE,
2730 vdp->xdf_cache_flush_block, xdf_flush_block,
2731 vdp->xdf_xdev_secsize, (void *)dev);
2732 } else {
2733 return (ENOTTY);
2735 if ((mode & FKIOCTL) && (dkc != NULL) &&
2736 (dkc->dkc_callback != NULL)) {
2737 (*dkc->dkc_callback)(dkc->dkc_cookie, rv);
2738 /* need to return 0 after calling callback */
2739 rv = 0;
2741 return (rv);
2744 /*NOTREACHED*/
2747 static int
2748 xdf_strategy(struct buf *bp)
2750 xdf_t *vdp;
2751 minor_t minor;
2752 diskaddr_t p_blkct, p_blkst;
2753 daddr_t blkno;
2754 ulong_t nblks;
2755 int part;
2757 minor = getminor(bp->b_edev);
2758 part = XDF_PART(minor);
2759 vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor));
2761 mutex_enter(&vdp->xdf_dev_lk);
2762 if (!xdf_isopen(vdp, part)) {
2763 mutex_exit(&vdp->xdf_dev_lk);
2764 xdf_io_err(bp, ENXIO, 0);
2765 return (0);
2768 /* We don't allow IO from the oe_change callback thread */
2769 ASSERT(curthread != vdp->xdf_oe_change_thread);
2771 /* Check for writes to a read only device */
2772 if (!IS_READ(bp) && XD_IS_RO(vdp)) {
2773 mutex_exit(&vdp->xdf_dev_lk);
2774 xdf_io_err(bp, EROFS, 0);
2775 return (0);
2778 /* Check if this I/O is accessing a partition or the entire disk */
2779 if ((long)bp->b_private == XB_SLICE_NONE) {
2780 /* This I/O is using an absolute offset */
2781 p_blkct = vdp->xdf_xdev_nblocks;
2782 p_blkst = 0;
2783 } else {
2784 /* This I/O is using a partition relative offset */
2785 mutex_exit(&vdp->xdf_dev_lk);
2786 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct,
2787 &p_blkst, NULL, NULL, NULL)) {
2788 xdf_io_err(bp, ENXIO, 0);
2789 return (0);
2791 mutex_enter(&vdp->xdf_dev_lk);
2795 * Adjust the real blkno and bcount according to the underline
2796 * physical sector size.
2798 blkno = bp->b_blkno / (vdp->xdf_xdev_secsize / XB_BSIZE);
2800 /* check for a starting block beyond the disk or partition limit */
2801 if (blkno > p_blkct) {
2802 DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64,
2803 vdp->xdf_addr, (longlong_t)blkno, (uint64_t)p_blkct));
2804 mutex_exit(&vdp->xdf_dev_lk);
2805 xdf_io_err(bp, EINVAL, 0);
2806 return (0);
2809 /* Legacy: don't set error flag at this case */
2810 if (blkno == p_blkct) {
2811 mutex_exit(&vdp->xdf_dev_lk);
2812 bp->b_resid = bp->b_bcount;
2813 biodone(bp);
2814 return (0);
2817 /* sanitize the input buf */
2818 bioerror(bp, 0);
2819 bp->b_resid = 0;
2820 bp->av_back = bp->av_forw = NULL;
2822 /* Adjust for partial transfer, this will result in an error later */
2823 if (vdp->xdf_xdev_secsize != 0 &&
2824 vdp->xdf_xdev_secsize != XB_BSIZE) {
2825 nblks = bp->b_bcount / vdp->xdf_xdev_secsize;
2826 } else {
2827 nblks = bp->b_bcount >> XB_BSHIFT;
2830 if ((blkno + nblks) > p_blkct) {
2831 if (vdp->xdf_xdev_secsize != 0 &&
2832 vdp->xdf_xdev_secsize != XB_BSIZE) {
2833 bp->b_resid =
2834 ((blkno + nblks) - p_blkct) *
2835 vdp->xdf_xdev_secsize;
2836 } else {
2837 bp->b_resid =
2838 ((blkno + nblks) - p_blkct) <<
2839 XB_BSHIFT;
2841 bp->b_bcount -= bp->b_resid;
2844 DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n",
2845 vdp->xdf_addr, (longlong_t)blkno, (ulong_t)bp->b_bcount));
2847 /* Fix up the buf struct */
2848 bp->b_flags |= B_BUSY;
2849 bp->b_private = (void *)(uintptr_t)p_blkst;
2851 xdf_bp_push(vdp, bp);
2852 mutex_exit(&vdp->xdf_dev_lk);
2853 xdf_io_start(vdp);
2854 if (do_polled_io)
2855 (void) xdf_ring_drain(vdp);
2856 return (0);
2859 /*ARGSUSED*/
2860 static int
2861 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp)
2863 xdf_t *vdp;
2864 minor_t minor;
2865 diskaddr_t p_blkcnt;
2866 int part;
2868 minor = getminor(dev);
2869 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
2870 return (ENXIO);
2872 DPRINTF(IO_DBG, ("xdf@%s: read offset 0x%"PRIx64"\n",
2873 vdp->xdf_addr, (int64_t)uiop->uio_offset));
2875 part = XDF_PART(minor);
2876 if (!xdf_isopen(vdp, part))
2877 return (ENXIO);
2879 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
2880 NULL, NULL, NULL, NULL))
2881 return (ENXIO);
2883 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
2884 return (ENOSPC);
2886 if (U_INVAL(uiop))
2887 return (EINVAL);
2889 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop));
2892 /*ARGSUSED*/
2893 static int
2894 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp)
2896 xdf_t *vdp;
2897 minor_t minor;
2898 diskaddr_t p_blkcnt;
2899 int part;
2901 minor = getminor(dev);
2902 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
2903 return (ENXIO);
2905 DPRINTF(IO_DBG, ("xdf@%s: write offset 0x%"PRIx64"\n",
2906 vdp->xdf_addr, (int64_t)uiop->uio_offset));
2908 part = XDF_PART(minor);
2909 if (!xdf_isopen(vdp, part))
2910 return (ENXIO);
2912 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
2913 NULL, NULL, NULL, NULL))
2914 return (ENXIO);
2916 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
2917 return (ENOSPC);
2919 if (U_INVAL(uiop))
2920 return (EINVAL);
2922 return (physio(xdf_strategy, NULL, dev, B_WRITE, xdfmin, uiop));
2925 /*ARGSUSED*/
2926 static int
2927 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp)
2929 xdf_t *vdp;
2930 minor_t minor;
2931 struct uio *uiop = aiop->aio_uio;
2932 diskaddr_t p_blkcnt;
2933 int part;
2935 minor = getminor(dev);
2936 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
2937 return (ENXIO);
2939 part = XDF_PART(minor);
2940 if (!xdf_isopen(vdp, part))
2941 return (ENXIO);
2943 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
2944 NULL, NULL, NULL, NULL))
2945 return (ENXIO);
2947 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
2948 return (ENOSPC);
2950 if (U_INVAL(uiop))
2951 return (EINVAL);
2953 return (aphysio(xdf_strategy, anocancel, dev, B_READ, xdfmin, aiop));
2956 /*ARGSUSED*/
2957 static int
2958 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp)
2960 xdf_t *vdp;
2961 minor_t minor;
2962 struct uio *uiop = aiop->aio_uio;
2963 diskaddr_t p_blkcnt;
2964 int part;
2966 minor = getminor(dev);
2967 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
2968 return (ENXIO);
2970 part = XDF_PART(minor);
2971 if (!xdf_isopen(vdp, part))
2972 return (ENXIO);
2974 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
2975 NULL, NULL, NULL, NULL))
2976 return (ENXIO);
2978 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
2979 return (ENOSPC);
2981 if (U_INVAL(uiop))
2982 return (EINVAL);
2984 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, xdfmin, aiop));
2987 static int
2988 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
2990 struct buf dumpbuf, *dbp = &dumpbuf;
2991 xdf_t *vdp;
2992 minor_t minor;
2993 int err = 0;
2994 int part;
2995 diskaddr_t p_blkcnt, p_blkst;
2997 minor = getminor(dev);
2998 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
2999 return (ENXIO);
3001 DPRINTF(IO_DBG, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n",
3002 vdp->xdf_addr, (void *)addr, blkno, nblk));
3004 /* We don't allow IO from the oe_change callback thread */
3005 ASSERT(curthread != vdp->xdf_oe_change_thread);
3007 part = XDF_PART(minor);
3008 if (!xdf_isopen(vdp, part))
3009 return (ENXIO);
3011 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst,
3012 NULL, NULL, NULL))
3013 return (ENXIO);
3015 if ((blkno + nblk) >
3016 (p_blkcnt * (vdp->xdf_xdev_secsize / XB_BSIZE))) {
3017 cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64,
3018 vdp->xdf_addr, (daddr_t)((blkno + nblk) /
3019 (vdp->xdf_xdev_secsize / XB_BSIZE)), (uint64_t)p_blkcnt);
3020 return (EINVAL);
3023 bioinit(dbp);
3024 dbp->b_flags = B_BUSY;
3025 dbp->b_un.b_addr = addr;
3026 dbp->b_bcount = nblk << DEV_BSHIFT;
3027 dbp->b_blkno = blkno;
3028 dbp->b_edev = dev;
3029 dbp->b_private = (void *)(uintptr_t)p_blkst;
3031 mutex_enter(&vdp->xdf_dev_lk);
3032 xdf_bp_push(vdp, dbp);
3033 mutex_exit(&vdp->xdf_dev_lk);
3034 xdf_io_start(vdp);
3035 err = xdf_ring_drain(vdp);
3036 biofini(dbp);
3037 return (err);
3040 /*ARGSUSED*/
3041 static int
3042 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp)
3044 minor_t minor;
3045 xdf_t *vdp;
3046 int part;
3047 ulong_t parbit;
3049 minor = getminor(dev);
3050 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
3051 return (ENXIO);
3053 mutex_enter(&vdp->xdf_dev_lk);
3054 part = XDF_PART(minor);
3055 if (!xdf_isopen(vdp, part)) {
3056 mutex_exit(&vdp->xdf_dev_lk);
3057 return (ENXIO);
3059 parbit = 1 << part;
3061 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0);
3062 if (otyp == OTYP_LYR) {
3063 ASSERT(vdp->xdf_vd_lyropen[part] > 0);
3064 if (--vdp->xdf_vd_lyropen[part] == 0)
3065 vdp->xdf_vd_open[otyp] &= ~parbit;
3066 } else {
3067 vdp->xdf_vd_open[otyp] &= ~parbit;
3069 vdp->xdf_vd_exclopen &= ~parbit;
3071 mutex_exit(&vdp->xdf_dev_lk);
3072 return (0);
3075 static int
3076 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp)
3078 minor_t minor;
3079 xdf_t *vdp;
3080 int part;
3081 ulong_t parbit;
3082 diskaddr_t p_blkct = 0;
3083 boolean_t firstopen;
3084 boolean_t nodelay;
3086 minor = getminor(*devp);
3087 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
3088 return (ENXIO);
3090 nodelay = (flag & (FNDELAY | FNONBLOCK));
3092 DPRINTF(DDI_DBG, ("xdf@%s: opening\n", vdp->xdf_addr));
3094 /* do cv_wait until connected or failed */
3095 mutex_enter(&vdp->xdf_cb_lk);
3096 mutex_enter(&vdp->xdf_dev_lk);
3097 if (!nodelay && (xdf_connect_locked(vdp, B_TRUE) != XD_READY)) {
3098 mutex_exit(&vdp->xdf_dev_lk);
3099 mutex_exit(&vdp->xdf_cb_lk);
3100 return (ENXIO);
3102 mutex_exit(&vdp->xdf_cb_lk);
3104 if ((flag & FWRITE) && XD_IS_RO(vdp)) {
3105 mutex_exit(&vdp->xdf_dev_lk);
3106 return (EROFS);
3109 part = XDF_PART(minor);
3110 parbit = 1 << part;
3111 if ((vdp->xdf_vd_exclopen & parbit) ||
3112 ((flag & FEXCL) && xdf_isopen(vdp, part))) {
3113 mutex_exit(&vdp->xdf_dev_lk);
3114 return (EBUSY);
3117 /* are we the first one to open this node? */
3118 firstopen = !xdf_isopen(vdp, -1);
3120 if (otyp == OTYP_LYR)
3121 vdp->xdf_vd_lyropen[part]++;
3123 vdp->xdf_vd_open[otyp] |= parbit;
3125 if (flag & FEXCL)
3126 vdp->xdf_vd_exclopen |= parbit;
3128 mutex_exit(&vdp->xdf_dev_lk);
3130 /* force a re-validation */
3131 if (firstopen)
3132 cmlb_invalidate(vdp->xdf_vd_lbl, NULL);
3134 /* If this is a non-blocking open then we're done */
3135 if (nodelay)
3136 return (0);
3139 * This is a blocking open, so we require:
3140 * - that the disk have a valid label on it
3141 * - that the size of the partition that we're opening is non-zero
3143 if ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct,
3144 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0)) {
3145 (void) xdf_close(*devp, flag, otyp, credp);
3146 return (ENXIO);
3149 return (0);
3152 /*ARGSUSED*/
3153 static void
3154 xdf_watch_hp_status_cb(dev_info_t *dip, const char *path, void *arg)
3156 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
3157 cv_broadcast(&vdp->xdf_hp_status_cv);
3160 static int
3161 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
3162 char *name, caddr_t valuep, int *lengthp)
3164 xdf_t *vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
3167 * Sanity check that if a dev_t or dip were specified that they
3168 * correspond to this device driver. On debug kernels we'll
3169 * panic and on non-debug kernels we'll return failure.
3171 ASSERT(ddi_driver_major(dip) == xdf_major);
3172 ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdf_major));
3173 if ((ddi_driver_major(dip) != xdf_major) ||
3174 ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdf_major)))
3175 return (DDI_PROP_NOT_FOUND);
3177 if (vdp == NULL)
3178 return (ddi_prop_op(dev, dip, prop_op, flags,
3179 name, valuep, lengthp));
3181 return (cmlb_prop_op(vdp->xdf_vd_lbl,
3182 dev, dip, prop_op, flags, name, valuep, lengthp,
3183 XDF_PART(getminor(dev)), NULL));
3186 /*ARGSUSED*/
3187 static int
3188 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp)
3190 int instance = XDF_INST(getminor((dev_t)arg));
3191 xdf_t *vbdp;
3193 switch (cmd) {
3194 case DDI_INFO_DEVT2DEVINFO:
3195 if ((vbdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) {
3196 *rp = NULL;
3197 return (DDI_FAILURE);
3199 *rp = vbdp->xdf_dip;
3200 return (DDI_SUCCESS);
3202 case DDI_INFO_DEVT2INSTANCE:
3203 *rp = (void *)(uintptr_t)instance;
3204 return (DDI_SUCCESS);
3206 default:
3207 return (DDI_FAILURE);
3211 /*ARGSUSED*/
3212 static int
3213 xdf_resume(dev_info_t *dip)
3215 xdf_t *vdp;
3216 char *oename;
3218 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL)
3219 goto err;
3221 if (xdf_debug & SUSRES_DBG)
3222 xen_printf("xdf@%s: xdf_resume\n", vdp->xdf_addr);
3224 mutex_enter(&vdp->xdf_cb_lk);
3226 if (xvdi_resume(dip) != DDI_SUCCESS) {
3227 mutex_exit(&vdp->xdf_cb_lk);
3228 goto err;
3231 if (((oename = xvdi_get_oename(dip)) == NULL) ||
3232 (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS,
3233 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)) {
3234 mutex_exit(&vdp->xdf_cb_lk);
3235 goto err;
3238 mutex_enter(&vdp->xdf_dev_lk);
3239 ASSERT(vdp->xdf_state != XD_READY);
3240 xdf_set_state(vdp, XD_UNKNOWN);
3241 mutex_exit(&vdp->xdf_dev_lk);
3243 if (xdf_setstate_init(vdp) != DDI_SUCCESS) {
3244 mutex_exit(&vdp->xdf_cb_lk);
3245 goto err;
3248 mutex_exit(&vdp->xdf_cb_lk);
3250 if (xdf_debug & SUSRES_DBG)
3251 xen_printf("xdf@%s: xdf_resume: done\n", vdp->xdf_addr);
3252 return (DDI_SUCCESS);
3253 err:
3254 if (xdf_debug & SUSRES_DBG)
3255 xen_printf("xdf@%s: xdf_resume: fail\n", vdp->xdf_addr);
3256 return (DDI_FAILURE);
3260 * Uses the in-memory devid if one exists.
3262 * Create a devid and write it on the first block of the last track of
3263 * the last cylinder.
3264 * Return DDI_SUCCESS or DDI_FAILURE.
3266 static int
3267 xdf_devid_fabricate(xdf_t *vdp)
3269 ddi_devid_t devid = vdp->xdf_tgt_devid; /* null if no devid */
3270 struct dk_devid *dkdevidp = NULL; /* devid struct stored on disk */
3271 diskaddr_t blk;
3272 uint_t *ip, chksum;
3273 int i, devid_size;
3275 if (cmlb_get_devid_block(vdp->xdf_vd_lbl, &blk, NULL) != 0)
3276 goto err;
3278 if (devid == NULL && ddi_devid_init(vdp->xdf_dip, DEVID_FAB, 0,
3279 NULL, &devid) != DDI_SUCCESS)
3280 goto err;
3282 /* allocate a buffer */
3283 dkdevidp = (struct dk_devid *)kmem_zalloc(NBPSCTR, KM_SLEEP);
3285 /* Fill in the revision */
3286 dkdevidp->dkd_rev_hi = DK_DEVID_REV_MSB;
3287 dkdevidp->dkd_rev_lo = DK_DEVID_REV_LSB;
3289 /* Copy in the device id */
3290 devid_size = ddi_devid_sizeof(devid);
3291 if (devid_size > DK_DEVID_SIZE)
3292 goto err;
3293 bcopy(devid, dkdevidp->dkd_devid, devid_size);
3295 /* Calculate the chksum */
3296 chksum = 0;
3297 ip = (uint_t *)dkdevidp;
3298 for (i = 0; i < (NBPSCTR / sizeof (int)) - 1; i++)
3299 chksum ^= ip[i];
3301 /* Fill in the checksum */
3302 DKD_FORMCHKSUM(chksum, dkdevidp);
3304 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, dkdevidp, blk,
3305 NBPSCTR, NULL) != 0)
3306 goto err;
3308 kmem_free(dkdevidp, NBPSCTR);
3310 vdp->xdf_tgt_devid = devid;
3311 return (DDI_SUCCESS);
3313 err:
3314 if (dkdevidp != NULL)
3315 kmem_free(dkdevidp, NBPSCTR);
3316 if (devid != NULL && vdp->xdf_tgt_devid == NULL)
3317 ddi_devid_free(devid);
3318 return (DDI_FAILURE);
3322 * xdf_devid_read() is a local copy of xdfs_devid_read(), modified to use xdf
3323 * functions.
3325 * Read a devid from on the first block of the last track of
3326 * the last cylinder. Make sure what we read is a valid devid.
3327 * Return DDI_SUCCESS or DDI_FAILURE.
3329 static int
3330 xdf_devid_read(xdf_t *vdp)
3332 diskaddr_t blk;
3333 struct dk_devid *dkdevidp;
3334 uint_t *ip, chksum;
3335 int i;
3337 if (cmlb_get_devid_block(vdp->xdf_vd_lbl, &blk, NULL) != 0)
3338 return (DDI_FAILURE);
3340 dkdevidp = kmem_zalloc(NBPSCTR, KM_SLEEP);
3341 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, dkdevidp, blk,
3342 NBPSCTR, NULL) != 0)
3343 goto err;
3345 /* Validate the revision */
3346 if ((dkdevidp->dkd_rev_hi != DK_DEVID_REV_MSB) ||
3347 (dkdevidp->dkd_rev_lo != DK_DEVID_REV_LSB))
3348 goto err;
3350 /* Calculate the checksum */
3351 chksum = 0;
3352 ip = (uint_t *)dkdevidp;
3353 for (i = 0; i < (NBPSCTR / sizeof (int)) - 1; i++)
3354 chksum ^= ip[i];
3355 if (DKD_GETCHKSUM(dkdevidp) != chksum)
3356 goto err;
3358 /* Validate the device id */
3359 if (ddi_devid_valid((ddi_devid_t)dkdevidp->dkd_devid) != DDI_SUCCESS)
3360 goto err;
3362 /* keep a copy of the device id */
3363 i = ddi_devid_sizeof((ddi_devid_t)dkdevidp->dkd_devid);
3364 vdp->xdf_tgt_devid = kmem_alloc(i, KM_SLEEP);
3365 bcopy(dkdevidp->dkd_devid, vdp->xdf_tgt_devid, i);
3366 kmem_free(dkdevidp, NBPSCTR);
3367 return (DDI_SUCCESS);
3369 err:
3370 kmem_free(dkdevidp, NBPSCTR);
3371 return (DDI_FAILURE);
3375 * xdf_devid_setup() is a modified copy of cmdk_devid_setup().
3377 * This function creates a devid if we don't already have one, and
3378 * registers it. If we already have one, we make sure that it can be
3379 * read from the disk, otherwise we write it to the disk ourselves. If
3380 * we didn't already have a devid, and we create one, we also need to
3381 * register it.
3383 void
3384 xdf_devid_setup(xdf_t *vdp)
3386 int rc;
3387 boolean_t existed = vdp->xdf_tgt_devid != NULL;
3389 /* Read devid from the disk, if present */
3390 rc = xdf_devid_read(vdp);
3392 /* Otherwise write a devid (which we create if necessary) on the disk */
3393 if (rc != DDI_SUCCESS)
3394 rc = xdf_devid_fabricate(vdp);
3396 /* If we created a devid or found it on the disk, register it */
3397 if (rc == DDI_SUCCESS && !existed)
3398 (void) ddi_devid_register(vdp->xdf_dip, vdp->xdf_tgt_devid);
3401 static int
3402 xdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
3404 int n, instance = ddi_get_instance(dip);
3405 ddi_iblock_cookie_t ibc, softibc;
3406 boolean_t dev_iscd = B_FALSE;
3407 xdf_t *vdp;
3408 char *oename, *xsname, *str;
3409 clock_t timeout;
3410 int err = 0;
3412 if ((n = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_NOTPROM,
3413 "xdf_debug", 0)) != 0)
3414 xdf_debug = n;
3416 switch (cmd) {
3417 case DDI_RESUME:
3418 return (xdf_resume(dip));
3419 case DDI_ATTACH:
3420 break;
3421 default:
3422 return (DDI_FAILURE);
3424 /* DDI_ATTACH */
3426 if ((xsname = xvdi_get_xsname(dip)) == NULL ||
3427 (oename = xvdi_get_oename(dip)) == NULL)
3428 return (DDI_FAILURE);
3431 * Disable auto-detach. This is necessary so that we don't get
3432 * detached while we're disconnected from the back end.
3434 if ((ddi_prop_update_int(DDI_DEV_T_NONE, dip,
3435 DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS))
3436 return (DDI_FAILURE);
3438 /* driver handles kernel-issued IOCTLs */
3439 if (ddi_prop_create(DDI_DEV_T_NONE, dip,
3440 DDI_PROP_CANSLEEP, DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS)
3441 return (DDI_FAILURE);
3443 if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS)
3444 return (DDI_FAILURE);
3446 if (ddi_get_soft_iblock_cookie(dip,
3447 DDI_SOFTINT_LOW, &softibc) != DDI_SUCCESS)
3448 return (DDI_FAILURE);
3450 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) {
3451 cmn_err(CE_WARN, "xdf@%s: cannot read device-type",
3452 ddi_get_name_addr(dip));
3453 return (DDI_FAILURE);
3455 if (strcmp(str, XBV_DEV_TYPE_CD) == 0)
3456 dev_iscd = B_TRUE;
3457 strfree(str);
3459 if (ddi_soft_state_zalloc(xdf_ssp, instance) != DDI_SUCCESS)
3460 return (DDI_FAILURE);
3462 DPRINTF(DDI_DBG, ("xdf@%s: attaching\n", ddi_get_name_addr(dip)));
3463 vdp = ddi_get_soft_state(xdf_ssp, instance);
3464 ddi_set_driver_private(dip, vdp);
3465 vdp->xdf_dip = dip;
3466 vdp->xdf_addr = ddi_get_name_addr(dip);
3467 vdp->xdf_suspending = B_FALSE;
3468 vdp->xdf_media_req_supported = B_FALSE;
3469 vdp->xdf_peer = INVALID_DOMID;
3470 vdp->xdf_evtchn = INVALID_EVTCHN;
3471 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t),
3472 offsetof(v_req_t, v_link));
3473 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL);
3474 cv_init(&vdp->xdf_hp_status_cv, NULL, CV_DEFAULT, NULL);
3475 cv_init(&vdp->xdf_mstate_cv, NULL, CV_DEFAULT, NULL);
3476 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc);
3477 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc);
3478 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, (void *)ibc);
3479 vdp->xdf_cmbl_reattach = B_TRUE;
3480 if (dev_iscd) {
3481 vdp->xdf_dinfo |= VDISK_CDROM;
3482 vdp->xdf_mstate = DKIO_EJECTED;
3483 } else {
3484 vdp->xdf_mstate = DKIO_NONE;
3487 if ((vdp->xdf_ready_tq = ddi_taskq_create(dip, "xdf_ready_tq",
3488 1, TASKQ_DEFAULTPRI, 0)) == NULL)
3489 goto errout0;
3491 if (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS,
3492 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)
3493 goto errout0;
3495 if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id,
3496 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) {
3497 cmn_err(CE_WARN, "xdf@%s: failed to add softintr",
3498 ddi_get_name_addr(dip));
3499 goto errout0;
3503 * Initialize the physical geometry stucture. Note that currently
3504 * we don't know the size of the backend device so the number
3505 * of blocks on the device will be initialized to zero. Once
3506 * we connect to the backend device we'll update the physical
3507 * geometry to reflect the real size of the device.
3509 xdf_synthetic_pgeom(dip, &vdp->xdf_pgeom);
3510 vdp->xdf_pgeom_fixed = B_FALSE;
3513 * Create default device minor nodes: non-removable disk.
3514 * We will adjust minor nodes after we are connected w/ backend.
3516 * FIXME creating device minor nodes is currently disabled for CD
3517 * devices, re-enable once the issues with xdf CD devices are fixed.
3519 if (!dev_iscd) {
3520 cmlb_alloc_handle(&vdp->xdf_vd_lbl);
3521 if (xdf_cmlb_attach(vdp) != 0) {
3522 cmn_err(CE_WARN,
3523 "xdf@%s: attach failed, cmlb attach failed",
3524 ddi_get_name_addr(dip));
3525 goto errout0;
3529 /* We ship with cache-enabled disks */
3530 vdp->xdf_wce = B_TRUE;
3532 mutex_enter(&vdp->xdf_cb_lk);
3533 /* Watch backend XenbusState change */
3534 if (xvdi_add_event_handler(dip,
3535 XS_OE_STATE, xdf_oe_change, NULL) != DDI_SUCCESS) {
3536 mutex_exit(&vdp->xdf_cb_lk);
3537 goto errout0;
3540 if (xdf_setstate_init(vdp) != DDI_SUCCESS) {
3541 cmn_err(CE_WARN, "xdf@%s: start connection failed",
3542 ddi_get_name_addr(dip));
3543 mutex_exit(&vdp->xdf_cb_lk);
3544 goto errout1;
3547 /* Nothing else to do for CD devices */
3548 if (dev_iscd) {
3549 mutex_exit(&vdp->xdf_cb_lk);
3550 goto done;
3554 * In order to do cmlb_validate, we have to wait for the disk to
3555 * acknowledge the attach, so we can query the backend for the disk
3556 * geometry (see xdf_setstate_connected).
3558 * We only wait 30 seconds; if this is the root disk, the boot
3559 * will fail, but it would fail anyway if the device never
3560 * connected. If this is a non-boot disk, that disk will fail
3561 * to connect, but again, it would fail anyway.
3563 timeout = ddi_get_lbolt() + drv_usectohz(XDF_STATE_TIMEOUT);
3564 while (vdp->xdf_state != XD_CONNECTED && vdp->xdf_state != XD_READY) {
3565 if (cv_timedwait(&vdp->xdf_dev_cv, &vdp->xdf_cb_lk,
3566 timeout) < 0) {
3567 cmn_err(CE_WARN, "xdf@%s: disk failed to connect",
3568 ddi_get_name_addr(dip));
3569 mutex_exit(&vdp->xdf_cb_lk);
3570 goto errout1;
3573 mutex_exit(&vdp->xdf_cb_lk);
3576 * We call cmlb_validate so that the geometry information in
3577 * vdp->xdf_vd_lbl is correct; this fills out the number of
3578 * alternate cylinders so that we have a place to write the
3579 * devid.
3581 if ((err = cmlb_validate(vdp->xdf_vd_lbl, 0, NULL)) != 0) {
3582 cmn_err(CE_NOTE,
3583 "xdf@%s: cmlb_validate failed: %d",
3584 ddi_get_name_addr(dip), err);
3586 * We can carry on even if cmlb_validate() returns EINVAL here,
3587 * as we'll rewrite the disk label anyway.
3589 if (err != EINVAL)
3590 goto errout1;
3594 * xdf_devid_setup will only write a devid if one isn't
3595 * already present. If it fails to find or create one, we
3596 * create one in-memory so that when we label the disk later,
3597 * it will have a devid to use. This is helpful to deal with
3598 * cases where people use the devids of their disks before
3599 * labelling them; note that this does cause problems if
3600 * people rely on the devids of unlabelled disks to persist
3601 * across reboot.
3603 xdf_devid_setup(vdp);
3604 if (vdp->xdf_tgt_devid == NULL) {
3605 if (ddi_devid_init(vdp->xdf_dip, DEVID_FAB, 0, NULL,
3606 &vdp->xdf_tgt_devid) != DDI_SUCCESS) {
3607 cmn_err(CE_WARN,
3608 "xdf@%s_ attach failed, devid_init failed",
3609 ddi_get_name_addr(dip));
3610 goto errout1;
3611 } else {
3612 (void) ddi_devid_register(vdp->xdf_dip,
3613 vdp->xdf_tgt_devid);
3617 done:
3618 #ifdef XPV_HVM_DRIVER
3619 xdf_hvm_add(dip);
3621 /* Report our version to dom0. */
3622 if (xenbus_printf(XBT_NULL, "guest/xdf", "version", "%d",
3623 HVMPV_XDF_VERS))
3624 cmn_err(CE_WARN, "xdf: couldn't write version\n");
3626 #endif /* XPV_HVM_DRIVER */
3628 /* Create kstat for iostat(1M) */
3629 if (xdf_kstat_create(dip) != 0) {
3630 cmn_err(CE_WARN, "xdf@%s: failed to create kstat",
3631 ddi_get_name_addr(dip));
3632 goto errout1;
3636 * Don't bother with getting real device identification
3637 * strings (is it even possible?), they are unlikely to
3638 * change often (if at all).
3640 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, INQUIRY_VENDOR_ID,
3641 "Xen");
3642 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, INQUIRY_PRODUCT_ID,
3643 dev_iscd ? "Virtual CD" : "Virtual disk");
3644 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, INQUIRY_REVISION_ID,
3645 "1.0");
3647 ddi_report_dev(dip);
3648 DPRINTF(DDI_DBG, ("xdf@%s: attached\n", vdp->xdf_addr));
3649 return (DDI_SUCCESS);
3651 errout1:
3652 (void) xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed);
3653 xvdi_remove_event_handler(dip, XS_OE_STATE);
3654 errout0:
3655 if (vdp->xdf_vd_lbl != NULL) {
3656 cmlb_detach(vdp->xdf_vd_lbl, NULL);
3657 cmlb_free_handle(&vdp->xdf_vd_lbl);
3658 vdp->xdf_vd_lbl = NULL;
3660 if (vdp->xdf_softintr_id != NULL)
3661 ddi_remove_softintr(vdp->xdf_softintr_id);
3662 xvdi_remove_xb_watch_handlers(dip);
3663 if (vdp->xdf_ready_tq != NULL)
3664 ddi_taskq_destroy(vdp->xdf_ready_tq);
3665 mutex_destroy(&vdp->xdf_cb_lk);
3666 mutex_destroy(&vdp->xdf_dev_lk);
3667 cv_destroy(&vdp->xdf_dev_cv);
3668 cv_destroy(&vdp->xdf_hp_status_cv);
3669 ddi_soft_state_free(xdf_ssp, instance);
3670 ddi_set_driver_private(dip, NULL);
3671 ddi_prop_remove_all(dip);
3672 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(dip));
3673 return (DDI_FAILURE);
3676 static int
3677 xdf_suspend(dev_info_t *dip)
3679 int instance = ddi_get_instance(dip);
3680 xdf_t *vdp;
3682 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL)
3683 return (DDI_FAILURE);
3685 if (xdf_debug & SUSRES_DBG)
3686 xen_printf("xdf@%s: xdf_suspend\n", vdp->xdf_addr);
3688 xvdi_suspend(dip);
3690 mutex_enter(&vdp->xdf_cb_lk);
3691 mutex_enter(&vdp->xdf_dev_lk);
3693 vdp->xdf_suspending = B_TRUE;
3694 xdf_ring_destroy(vdp);
3695 xdf_set_state(vdp, XD_SUSPEND);
3696 vdp->xdf_suspending = B_FALSE;
3698 mutex_exit(&vdp->xdf_dev_lk);
3699 mutex_exit(&vdp->xdf_cb_lk);
3701 if (xdf_debug & SUSRES_DBG)
3702 xen_printf("xdf@%s: xdf_suspend: done\n", vdp->xdf_addr);
3704 return (DDI_SUCCESS);
3707 static int
3708 xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
3710 xdf_t *vdp;
3711 int instance;
3713 switch (cmd) {
3715 case DDI_PM_SUSPEND:
3716 break;
3718 case DDI_SUSPEND:
3719 return (xdf_suspend(dip));
3721 case DDI_DETACH:
3722 break;
3724 default:
3725 return (DDI_FAILURE);
3728 instance = ddi_get_instance(dip);
3729 DPRINTF(DDI_DBG, ("xdf@%s: detaching\n", ddi_get_name_addr(dip)));
3730 vdp = ddi_get_soft_state(xdf_ssp, instance);
3732 if (vdp == NULL)
3733 return (DDI_FAILURE);
3735 mutex_enter(&vdp->xdf_cb_lk);
3736 xdf_disconnect(vdp, XD_CLOSED, B_FALSE);
3737 if (vdp->xdf_state != XD_CLOSED) {
3738 mutex_exit(&vdp->xdf_cb_lk);
3739 return (DDI_FAILURE);
3741 mutex_exit(&vdp->xdf_cb_lk);
3743 ASSERT(!ISDMACBON(vdp));
3745 #ifdef XPV_HVM_DRIVER
3746 xdf_hvm_rm(dip);
3747 #endif /* XPV_HVM_DRIVER */
3749 if (vdp->xdf_timeout_id != 0)
3750 (void) untimeout(vdp->xdf_timeout_id);
3752 xvdi_remove_event_handler(dip, XS_OE_STATE);
3753 ddi_taskq_destroy(vdp->xdf_ready_tq);
3755 cmlb_detach(vdp->xdf_vd_lbl, NULL);
3756 cmlb_free_handle(&vdp->xdf_vd_lbl);
3758 /* we'll support backend running in domU later */
3759 #ifdef DOMU_BACKEND
3760 (void) xvdi_post_event(dip, XEN_HP_REMOVE);
3761 #endif
3763 list_destroy(&vdp->xdf_vreq_act);
3764 ddi_prop_remove_all(dip);
3765 xdf_kstat_delete(dip);
3766 ddi_remove_softintr(vdp->xdf_softintr_id);
3767 xvdi_remove_xb_watch_handlers(dip);
3768 ddi_set_driver_private(dip, NULL);
3769 cv_destroy(&vdp->xdf_dev_cv);
3770 mutex_destroy(&vdp->xdf_cb_lk);
3771 mutex_destroy(&vdp->xdf_dev_lk);
3772 if (vdp->xdf_cache_flush_block != NULL)
3773 kmem_free(vdp->xdf_flush_mem, 2 * vdp->xdf_xdev_secsize);
3774 ddi_soft_state_free(xdf_ssp, instance);
3775 return (DDI_SUCCESS);
3779 * Driver linkage structures.
3781 static struct cb_ops xdf_cbops = {
3782 xdf_open,
3783 xdf_close,
3784 xdf_strategy,
3785 nodev,
3786 xdf_dump,
3787 xdf_read,
3788 xdf_write,
3789 xdf_ioctl,
3790 nodev,
3791 nodev,
3792 nodev,
3793 nochpoll,
3794 xdf_prop_op,
3795 NULL,
3796 D_MP | D_NEW | D_64BIT,
3797 CB_REV,
3798 xdf_aread,
3799 xdf_awrite
3802 struct dev_ops xdf_devops = {
3803 DEVO_REV, /* devo_rev */
3804 0, /* devo_refcnt */
3805 xdf_getinfo, /* devo_getinfo */
3806 nulldev, /* devo_identify */
3807 nulldev, /* devo_probe */
3808 xdf_attach, /* devo_attach */
3809 xdf_detach, /* devo_detach */
3810 nodev, /* devo_reset */
3811 &xdf_cbops, /* devo_cb_ops */
3812 NULL, /* devo_bus_ops */
3813 NULL, /* devo_power */
3814 ddi_quiesce_not_supported, /* devo_quiesce */
3818 * Module linkage structures.
3820 static struct modldrv modldrv = {
3821 &mod_driverops, /* Type of module. This one is a driver */
3822 "virtual block driver", /* short description */
3823 &xdf_devops /* driver specific ops */
3826 static struct modlinkage xdf_modlinkage = {
3827 MODREV_1, (void *)&modldrv, NULL
3831 * standard module entry points
3834 _init(void)
3836 int rc;
3838 xdf_major = ddi_name_to_major("xdf");
3839 if (xdf_major == (major_t)-1)
3840 return (EINVAL);
3842 if ((rc = ddi_soft_state_init(&xdf_ssp, sizeof (xdf_t), 0)) != 0)
3843 return (rc);
3845 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache",
3846 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
3847 xdf_gs_cache = kmem_cache_create("xdf_gs_cache",
3848 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
3850 #ifdef XPV_HVM_DRIVER
3851 xdf_hvm_init();
3852 #endif /* XPV_HVM_DRIVER */
3854 if ((rc = mod_install(&xdf_modlinkage)) != 0) {
3855 #ifdef XPV_HVM_DRIVER
3856 xdf_hvm_fini();
3857 #endif /* XPV_HVM_DRIVER */
3858 kmem_cache_destroy(xdf_vreq_cache);
3859 kmem_cache_destroy(xdf_gs_cache);
3860 ddi_soft_state_fini(&xdf_ssp);
3861 return (rc);
3864 return (rc);
3868 _fini(void)
3870 int err;
3871 if ((err = mod_remove(&xdf_modlinkage)) != 0)
3872 return (err);
3874 #ifdef XPV_HVM_DRIVER
3875 xdf_hvm_fini();
3876 #endif /* XPV_HVM_DRIVER */
3878 kmem_cache_destroy(xdf_vreq_cache);
3879 kmem_cache_destroy(xdf_gs_cache);
3880 ddi_soft_state_fini(&xdf_ssp);
3882 return (0);
3886 _info(struct modinfo *modinfop)
3888 return (mod_info(&xdf_modlinkage, modinfop));