6305 blkdev should provide the device_error kstat for iostat -E
[unleashed.git] / usr / src / uts / common / io / blkdev / blkdev.c
blob7534139884d67fc6b6f0252f6d9e608519221a95
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved.
24 * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
25 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
28 #include <sys/types.h>
29 #include <sys/ksynch.h>
30 #include <sys/kmem.h>
31 #include <sys/file.h>
32 #include <sys/errno.h>
33 #include <sys/open.h>
34 #include <sys/buf.h>
35 #include <sys/uio.h>
36 #include <sys/aio_req.h>
37 #include <sys/cred.h>
38 #include <sys/modctl.h>
39 #include <sys/cmlb.h>
40 #include <sys/conf.h>
41 #include <sys/devops.h>
42 #include <sys/list.h>
43 #include <sys/sysmacros.h>
44 #include <sys/dkio.h>
45 #include <sys/vtoc.h>
46 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */
47 #include <sys/kstat.h>
48 #include <sys/fs/dv_node.h>
49 #include <sys/ddi.h>
50 #include <sys/sunddi.h>
51 #include <sys/note.h>
52 #include <sys/blkdev.h>
53 #include <sys/scsi/impl/inquiry.h>
55 #define BD_MAXPART 64
56 #define BDINST(dev) (getminor(dev) / BD_MAXPART)
57 #define BDPART(dev) (getminor(dev) % BD_MAXPART)
59 typedef struct bd bd_t;
60 typedef struct bd_xfer_impl bd_xfer_impl_t;
62 struct bd {
63 void *d_private;
64 dev_info_t *d_dip;
65 kmutex_t d_ocmutex;
66 kmutex_t d_iomutex;
67 kmutex_t *d_errmutex;
68 kmutex_t d_statemutex;
69 kcondvar_t d_statecv;
70 enum dkio_state d_state;
71 cmlb_handle_t d_cmlbh;
72 unsigned d_open_lyr[BD_MAXPART]; /* open count */
73 uint64_t d_open_excl; /* bit mask indexed by partition */
74 uint64_t d_open_reg[OTYPCNT]; /* bit mask */
76 uint32_t d_qsize;
77 uint32_t d_qactive;
78 uint32_t d_maxxfer;
79 uint32_t d_blkshift;
80 uint32_t d_pblkshift;
81 uint64_t d_numblks;
82 ddi_devid_t d_devid;
84 kmem_cache_t *d_cache;
85 list_t d_runq;
86 list_t d_waitq;
87 kstat_t *d_ksp;
88 kstat_io_t *d_kiop;
89 kstat_t *d_errstats;
90 struct bd_errstats *d_kerr;
92 boolean_t d_rdonly;
93 boolean_t d_ssd;
94 boolean_t d_removable;
95 boolean_t d_hotpluggable;
96 boolean_t d_use_dma;
98 ddi_dma_attr_t d_dma;
99 bd_ops_t d_ops;
100 bd_handle_t d_handle;
103 struct bd_handle {
104 bd_ops_t h_ops;
105 ddi_dma_attr_t *h_dma;
106 dev_info_t *h_parent;
107 dev_info_t *h_child;
108 void *h_private;
109 bd_t *h_bd;
110 char *h_name;
111 char h_addr[20]; /* enough for %X,%X */
114 struct bd_xfer_impl {
115 bd_xfer_t i_public;
116 list_node_t i_linkage;
117 bd_t *i_bd;
118 buf_t *i_bp;
119 uint_t i_num_win;
120 uint_t i_cur_win;
121 off_t i_offset;
122 int (*i_func)(void *, bd_xfer_t *);
123 uint32_t i_blkshift;
124 size_t i_len;
125 size_t i_resid;
128 #define i_dmah i_public.x_dmah
129 #define i_dmac i_public.x_dmac
130 #define i_ndmac i_public.x_ndmac
131 #define i_kaddr i_public.x_kaddr
132 #define i_nblks i_public.x_nblks
133 #define i_blkno i_public.x_blkno
134 #define i_flags i_public.x_flags
138 * Private prototypes.
141 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t);
142 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *);
143 static void bd_create_errstats(bd_t *, int, bd_drive_t *);
144 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *);
145 static void bd_init_errstats(bd_t *, bd_drive_t *);
147 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
148 static int bd_attach(dev_info_t *, ddi_attach_cmd_t);
149 static int bd_detach(dev_info_t *, ddi_detach_cmd_t);
151 static int bd_open(dev_t *, int, int, cred_t *);
152 static int bd_close(dev_t, int, int, cred_t *);
153 static int bd_strategy(struct buf *);
154 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
155 static int bd_dump(dev_t, caddr_t, daddr_t, int);
156 static int bd_read(dev_t, struct uio *, cred_t *);
157 static int bd_write(dev_t, struct uio *, cred_t *);
158 static int bd_aread(dev_t, struct aio_req *, cred_t *);
159 static int bd_awrite(dev_t, struct aio_req *, cred_t *);
160 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *,
161 caddr_t, int *);
163 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
164 void *);
165 static int bd_tg_getinfo(dev_info_t *, int, void *, void *);
166 static int bd_xfer_ctor(void *, void *, int);
167 static void bd_xfer_dtor(void *, void *);
168 static void bd_sched(bd_t *);
169 static void bd_submit(bd_t *, bd_xfer_impl_t *);
170 static void bd_runq_exit(bd_xfer_impl_t *, int);
171 static void bd_update_state(bd_t *);
172 static int bd_check_state(bd_t *, enum dkio_state *);
173 static int bd_flush_write_cache(bd_t *, struct dk_callback *);
175 struct cmlb_tg_ops bd_tg_ops = {
176 TG_DK_OPS_VERSION_1,
177 bd_tg_rdwr,
178 bd_tg_getinfo,
181 static struct cb_ops bd_cb_ops = {
182 bd_open, /* open */
183 bd_close, /* close */
184 bd_strategy, /* strategy */
185 nodev, /* print */
186 bd_dump, /* dump */
187 bd_read, /* read */
188 bd_write, /* write */
189 bd_ioctl, /* ioctl */
190 nodev, /* devmap */
191 nodev, /* mmap */
192 nodev, /* segmap */
193 nochpoll, /* poll */
194 bd_prop_op, /* cb_prop_op */
195 0, /* streamtab */
196 D_64BIT | D_MP, /* Driver comaptibility flag */
197 CB_REV, /* cb_rev */
198 bd_aread, /* async read */
199 bd_awrite /* async write */
202 struct dev_ops bd_dev_ops = {
203 DEVO_REV, /* devo_rev, */
204 0, /* refcnt */
205 bd_getinfo, /* getinfo */
206 nulldev, /* identify */
207 nulldev, /* probe */
208 bd_attach, /* attach */
209 bd_detach, /* detach */
210 nodev, /* reset */
211 &bd_cb_ops, /* driver operations */
212 NULL, /* bus operations */
213 NULL, /* power */
214 ddi_quiesce_not_needed, /* quiesce */
217 static struct modldrv modldrv = {
218 &mod_driverops,
219 "Generic Block Device",
220 &bd_dev_ops,
223 static struct modlinkage modlinkage = {
224 MODREV_1, { &modldrv, NULL }
227 static void *bd_state;
228 static krwlock_t bd_lock;
231 _init(void)
233 int rv;
235 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2);
236 if (rv != DDI_SUCCESS) {
237 return (rv);
239 rw_init(&bd_lock, NULL, RW_DRIVER, NULL);
240 rv = mod_install(&modlinkage);
241 if (rv != DDI_SUCCESS) {
242 rw_destroy(&bd_lock);
243 ddi_soft_state_fini(&bd_state);
245 return (rv);
249 _fini(void)
251 int rv;
253 rv = mod_remove(&modlinkage);
254 if (rv == DDI_SUCCESS) {
255 rw_destroy(&bd_lock);
256 ddi_soft_state_fini(&bd_state);
258 return (rv);
262 _info(struct modinfo *modinfop)
264 return (mod_info(&modlinkage, modinfop));
267 static int
268 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
270 bd_t *bd;
271 minor_t inst;
273 _NOTE(ARGUNUSED(dip));
275 inst = BDINST((dev_t)arg);
277 switch (cmd) {
278 case DDI_INFO_DEVT2DEVINFO:
279 bd = ddi_get_soft_state(bd_state, inst);
280 if (bd == NULL) {
281 return (DDI_FAILURE);
283 *resultp = (void *)bd->d_dip;
284 break;
286 case DDI_INFO_DEVT2INSTANCE:
287 *resultp = (void *)(intptr_t)inst;
288 break;
290 default:
291 return (DDI_FAILURE);
293 return (DDI_SUCCESS);
296 static void
297 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len)
299 int ilen;
300 char *data_string;
302 ilen = scsi_ascii_inquiry_len(data, len);
303 ASSERT3U(ilen, <=, len);
304 if (ilen <= 0)
305 return;
306 /* ensure null termination */
307 data_string = kmem_zalloc(ilen + 1, KM_SLEEP);
308 bcopy(data, data_string, ilen);
309 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string);
310 kmem_free(data_string, ilen + 1);
313 static void
314 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive)
316 if (drive->d_vendor_len > 0)
317 bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID,
318 drive->d_vendor, drive->d_vendor_len);
320 if (drive->d_product_len > 0)
321 bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID,
322 drive->d_product, drive->d_product_len);
324 if (drive->d_serial_len > 0)
325 bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO,
326 drive->d_serial, drive->d_serial_len);
328 if (drive->d_revision_len > 0)
329 bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID,
330 drive->d_revision, drive->d_revision_len);
333 static void
334 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive)
336 char ks_module[KSTAT_STRLEN];
337 char ks_name[KSTAT_STRLEN];
338 int ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t);
340 if (bd->d_errstats != NULL)
341 return;
343 (void) snprintf(ks_module, sizeof (ks_module), "%serr",
344 ddi_driver_name(bd->d_dip));
345 (void) snprintf(ks_name, sizeof (ks_name), "%s%d,err",
346 ddi_driver_name(bd->d_dip), inst);
348 bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error",
349 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
351 if (bd->d_errstats == NULL) {
353 * Even if we cannot create the kstat, we create a
354 * scratch kstat. The reason for this is to ensure
355 * that we can update the kstat all of the time,
356 * without adding an extra branch instruction.
358 bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats),
359 KM_SLEEP);
360 bd->d_errmutex = kmem_zalloc(sizeof (kmutex_t), KM_SLEEP);
361 mutex_init(bd->d_errmutex, NULL, MUTEX_DRIVER, NULL);
362 } else {
363 if (bd->d_errstats->ks_lock == NULL) {
364 bd->d_errstats->ks_lock = kmem_zalloc(sizeof (kmutex_t),
365 KM_SLEEP);
366 mutex_init(bd->d_errstats->ks_lock, NULL, MUTEX_DRIVER,
367 NULL);
370 bd->d_errmutex = bd->d_errstats->ks_lock;
371 bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data;
374 kstat_named_init(&bd->d_kerr->bd_softerrs, "Soft Errors",
375 KSTAT_DATA_UINT32);
376 kstat_named_init(&bd->d_kerr->bd_harderrs, "Hard Errors",
377 KSTAT_DATA_UINT32);
378 kstat_named_init(&bd->d_kerr->bd_transerrs, "Transport Errors",
379 KSTAT_DATA_UINT32);
381 if (drive->d_model_len > 0) {
382 kstat_named_init(&bd->d_kerr->bd_model, "Model",
383 KSTAT_DATA_STRING);
384 } else {
385 kstat_named_init(&bd->d_kerr->bd_vid, "Vendor",
386 KSTAT_DATA_STRING);
387 kstat_named_init(&bd->d_kerr->bd_pid, "Product",
388 KSTAT_DATA_STRING);
391 kstat_named_init(&bd->d_kerr->bd_revision, "Revision",
392 KSTAT_DATA_STRING);
393 kstat_named_init(&bd->d_kerr->bd_serial, "Serial No",
394 KSTAT_DATA_STRING);
395 kstat_named_init(&bd->d_kerr->bd_capacity, "Size",
396 KSTAT_DATA_ULONGLONG);
397 kstat_named_init(&bd->d_kerr->bd_rq_media_err, "Media Error",
398 KSTAT_DATA_UINT32);
399 kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err, "Device Not Ready",
400 KSTAT_DATA_UINT32);
401 kstat_named_init(&bd->d_kerr->bd_rq_nodev_err, "No Device",
402 KSTAT_DATA_UINT32);
403 kstat_named_init(&bd->d_kerr->bd_rq_recov_err, "Recoverable",
404 KSTAT_DATA_UINT32);
405 kstat_named_init(&bd->d_kerr->bd_rq_illrq_err, "Illegal Request",
406 KSTAT_DATA_UINT32);
407 kstat_named_init(&bd->d_kerr->bd_rq_pfa_err,
408 "Predictive Failure Analysis", KSTAT_DATA_UINT32);
410 bd->d_errstats->ks_private = bd;
412 kstat_install(bd->d_errstats);
415 static void
416 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt)
418 char *tmp;
420 if (KSTAT_NAMED_STR_PTR(k) == NULL) {
421 if (len > 0) {
422 tmp = kmem_alloc(len + 1, KM_SLEEP);
423 (void) strlcpy(tmp, str, len);
424 } else {
425 tmp = alt;
428 kstat_named_setstr(k, tmp);
432 static void
433 bd_init_errstats(bd_t *bd, bd_drive_t *drive)
435 struct bd_errstats *est = bd->d_kerr;
437 mutex_enter(bd->d_errmutex);
439 if (drive->d_model_len > 0 &&
440 KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) {
441 bd_errstats_setstr(&est->bd_model, drive->d_model,
442 drive->d_model_len, NULL);
443 } else {
444 bd_errstats_setstr(&est->bd_vid, drive->d_vendor,
445 drive->d_vendor_len, "Unknown ");
446 bd_errstats_setstr(&est->bd_pid, drive->d_product,
447 drive->d_product_len, "Unknown ");
450 bd_errstats_setstr(&est->bd_revision, drive->d_revision,
451 drive->d_revision_len, "0001");
452 bd_errstats_setstr(&est->bd_serial, drive->d_serial,
453 drive->d_serial_len, "0 ");
455 mutex_exit(bd->d_errmutex);
458 static int
459 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
461 int inst;
462 bd_handle_t hdl;
463 bd_t *bd;
464 bd_drive_t drive;
465 int rv;
466 char name[16];
467 char kcache[32];
469 switch (cmd) {
470 case DDI_ATTACH:
471 break;
472 case DDI_RESUME:
473 /* We don't do anything native for suspend/resume */
474 return (DDI_SUCCESS);
475 default:
476 return (DDI_FAILURE);
479 inst = ddi_get_instance(dip);
480 hdl = ddi_get_parent_data(dip);
482 (void) snprintf(name, sizeof (name), "%s%d",
483 ddi_driver_name(dip), ddi_get_instance(dip));
484 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name);
486 if (hdl == NULL) {
487 cmn_err(CE_WARN, "%s: missing parent data!", name);
488 return (DDI_FAILURE);
491 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) {
492 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name);
493 return (DDI_FAILURE);
495 bd = ddi_get_soft_state(bd_state, inst);
497 if (hdl->h_dma) {
498 bd->d_dma = *(hdl->h_dma);
499 bd->d_dma.dma_attr_granular =
500 max(DEV_BSIZE, bd->d_dma.dma_attr_granular);
501 bd->d_use_dma = B_TRUE;
503 if (bd->d_maxxfer &&
504 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) {
505 cmn_err(CE_WARN,
506 "%s: inconsistent maximum transfer size!",
507 name);
508 /* We force it */
509 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
510 } else {
511 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
513 } else {
514 bd->d_use_dma = B_FALSE;
515 if (bd->d_maxxfer == 0) {
516 bd->d_maxxfer = 1024 * 1024;
519 bd->d_ops = hdl->h_ops;
520 bd->d_private = hdl->h_private;
521 bd->d_blkshift = 9; /* 512 bytes, to start */
523 if (bd->d_maxxfer % DEV_BSIZE) {
524 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name);
525 bd->d_maxxfer &= ~(DEV_BSIZE - 1);
527 if (bd->d_maxxfer < DEV_BSIZE) {
528 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name);
529 ddi_soft_state_free(bd_state, inst);
530 return (DDI_FAILURE);
533 bd->d_dip = dip;
534 bd->d_handle = hdl;
535 hdl->h_bd = bd;
536 ddi_set_driver_private(dip, bd);
538 mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL);
539 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL);
540 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL);
541 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL);
543 list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t),
544 offsetof(struct bd_xfer_impl, i_linkage));
545 list_create(&bd->d_runq, sizeof (bd_xfer_impl_t),
546 offsetof(struct bd_xfer_impl, i_linkage));
548 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8,
549 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0);
551 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk",
552 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
553 if (bd->d_ksp != NULL) {
554 bd->d_ksp->ks_lock = &bd->d_iomutex;
555 kstat_install(bd->d_ksp);
556 bd->d_kiop = bd->d_ksp->ks_data;
557 } else {
559 * Even if we cannot create the kstat, we create a
560 * scratch kstat. The reason for this is to ensure
561 * that we can update the kstat all of the time,
562 * without adding an extra branch instruction.
564 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP);
567 cmlb_alloc_handle(&bd->d_cmlbh);
569 bd->d_state = DKIO_NONE;
571 bzero(&drive, sizeof (drive));
572 bd->d_ops.o_drive_info(bd->d_private, &drive);
573 bd->d_qsize = drive.d_qsize;
574 bd->d_removable = drive.d_removable;
575 bd->d_hotpluggable = drive.d_hotpluggable;
577 if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer)
578 bd->d_maxxfer = drive.d_maxxfer;
580 bd_create_inquiry_props(dip, &drive);
582 bd_create_errstats(bd, inst, &drive);
583 bd_init_errstats(bd, &drive);
584 bd_update_state(bd);
586 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT,
587 bd->d_removable, bd->d_hotpluggable,
588 drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK,
589 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0);
590 if (rv != 0) {
591 cmlb_free_handle(&bd->d_cmlbh);
592 kmem_cache_destroy(bd->d_cache);
593 mutex_destroy(&bd->d_iomutex);
594 mutex_destroy(&bd->d_ocmutex);
595 mutex_destroy(&bd->d_statemutex);
596 cv_destroy(&bd->d_statecv);
597 list_destroy(&bd->d_waitq);
598 list_destroy(&bd->d_runq);
599 if (bd->d_ksp != NULL) {
600 kstat_delete(bd->d_ksp);
601 bd->d_ksp = NULL;
602 } else {
603 kmem_free(bd->d_kiop, sizeof (kstat_io_t));
605 ddi_soft_state_free(bd_state, inst);
606 return (DDI_FAILURE);
609 if (bd->d_ops.o_devid_init != NULL) {
610 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid);
611 if (rv == DDI_SUCCESS) {
612 if (ddi_devid_register(dip, bd->d_devid) !=
613 DDI_SUCCESS) {
614 cmn_err(CE_WARN,
615 "%s: unable to register devid", name);
621 * Add a zero-length attribute to tell the world we support
622 * kernel ioctls (for layered drivers). Also set up properties
623 * used by HAL to identify removable media.
625 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
626 DDI_KERNEL_IOCTL, NULL, 0);
627 if (bd->d_removable) {
628 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
629 "removable-media", NULL, 0);
631 if (bd->d_hotpluggable) {
632 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
633 "hotpluggable", NULL, 0);
636 ddi_report_dev(dip);
638 return (DDI_SUCCESS);
641 static int
642 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
644 bd_t *bd;
646 bd = ddi_get_driver_private(dip);
648 switch (cmd) {
649 case DDI_DETACH:
650 break;
651 case DDI_SUSPEND:
652 /* We don't suspend, but our parent does */
653 return (DDI_SUCCESS);
654 default:
655 return (DDI_FAILURE);
657 if (bd->d_ksp != NULL) {
658 kstat_delete(bd->d_ksp);
659 bd->d_ksp = NULL;
660 } else {
661 kmem_free(bd->d_kiop, sizeof (kstat_io_t));
664 if (bd->d_errstats != NULL) {
665 kstat_delete(bd->d_errstats);
666 bd->d_errstats = NULL;
667 } else {
668 kmem_free(bd->d_kerr, sizeof (struct bd_errstats));
669 mutex_destroy(bd->d_errmutex);
672 cmlb_detach(bd->d_cmlbh, 0);
673 cmlb_free_handle(&bd->d_cmlbh);
674 if (bd->d_devid)
675 ddi_devid_free(bd->d_devid);
676 kmem_cache_destroy(bd->d_cache);
677 mutex_destroy(&bd->d_iomutex);
678 mutex_destroy(&bd->d_ocmutex);
679 mutex_destroy(&bd->d_statemutex);
680 cv_destroy(&bd->d_statecv);
681 list_destroy(&bd->d_waitq);
682 list_destroy(&bd->d_runq);
683 ddi_soft_state_free(bd_state, ddi_get_instance(dip));
684 return (DDI_SUCCESS);
687 static int
688 bd_xfer_ctor(void *buf, void *arg, int kmflag)
690 bd_xfer_impl_t *xi;
691 bd_t *bd = arg;
692 int (*dcb)(caddr_t);
694 if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) {
695 dcb = DDI_DMA_SLEEP;
696 } else {
697 dcb = DDI_DMA_DONTWAIT;
700 xi = buf;
701 bzero(xi, sizeof (*xi));
702 xi->i_bd = bd;
704 if (bd->d_use_dma) {
705 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL,
706 &xi->i_dmah) != DDI_SUCCESS) {
707 return (-1);
711 return (0);
714 static void
715 bd_xfer_dtor(void *buf, void *arg)
717 bd_xfer_impl_t *xi = buf;
719 _NOTE(ARGUNUSED(arg));
721 if (xi->i_dmah)
722 ddi_dma_free_handle(&xi->i_dmah);
723 xi->i_dmah = NULL;
726 static bd_xfer_impl_t *
727 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *),
728 int kmflag)
730 bd_xfer_impl_t *xi;
731 int rv = 0;
732 int status;
733 unsigned dir;
734 int (*cb)(caddr_t);
735 size_t len;
736 uint32_t shift;
738 if (kmflag == KM_SLEEP) {
739 cb = DDI_DMA_SLEEP;
740 } else {
741 cb = DDI_DMA_DONTWAIT;
744 xi = kmem_cache_alloc(bd->d_cache, kmflag);
745 if (xi == NULL) {
746 bioerror(bp, ENOMEM);
747 return (NULL);
750 ASSERT(bp);
752 xi->i_bp = bp;
753 xi->i_func = func;
754 xi->i_blkno = bp->b_lblkno;
756 if (bp->b_bcount == 0) {
757 xi->i_len = 0;
758 xi->i_nblks = 0;
759 xi->i_kaddr = NULL;
760 xi->i_resid = 0;
761 xi->i_num_win = 0;
762 goto done;
765 if (bp->b_flags & B_READ) {
766 dir = DDI_DMA_READ;
767 xi->i_func = bd->d_ops.o_read;
768 } else {
769 dir = DDI_DMA_WRITE;
770 xi->i_func = bd->d_ops.o_write;
773 shift = bd->d_blkshift;
774 xi->i_blkshift = shift;
776 if (!bd->d_use_dma) {
777 bp_mapin(bp);
778 rv = 0;
779 xi->i_offset = 0;
780 xi->i_num_win =
781 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer;
782 xi->i_cur_win = 0;
783 xi->i_len = min(bp->b_bcount, bd->d_maxxfer);
784 xi->i_nblks = xi->i_len >> shift;
785 xi->i_kaddr = bp->b_un.b_addr;
786 xi->i_resid = bp->b_bcount;
787 } else {
790 * We have to use consistent DMA if the address is misaligned.
792 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) &&
793 ((uintptr_t)bp->b_un.b_addr & 0x7)) {
794 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL;
795 } else {
796 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
799 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
800 NULL, &xi->i_dmac, &xi->i_ndmac);
801 switch (status) {
802 case DDI_DMA_MAPPED:
803 xi->i_num_win = 1;
804 xi->i_cur_win = 0;
805 xi->i_offset = 0;
806 xi->i_len = bp->b_bcount;
807 xi->i_nblks = xi->i_len >> shift;
808 xi->i_resid = bp->b_bcount;
809 rv = 0;
810 break;
811 case DDI_DMA_PARTIAL_MAP:
812 xi->i_cur_win = 0;
814 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
815 DDI_SUCCESS) ||
816 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
817 &len, &xi->i_dmac, &xi->i_ndmac) !=
818 DDI_SUCCESS) ||
819 (P2PHASE(len, shift) != 0)) {
820 (void) ddi_dma_unbind_handle(xi->i_dmah);
821 rv = EFAULT;
822 goto done;
824 xi->i_len = len;
825 xi->i_nblks = xi->i_len >> shift;
826 xi->i_resid = bp->b_bcount;
827 rv = 0;
828 break;
829 case DDI_DMA_NORESOURCES:
830 rv = EAGAIN;
831 goto done;
832 case DDI_DMA_TOOBIG:
833 rv = EINVAL;
834 goto done;
835 case DDI_DMA_NOMAPPING:
836 case DDI_DMA_INUSE:
837 default:
838 rv = EFAULT;
839 goto done;
843 done:
844 if (rv != 0) {
845 kmem_cache_free(bd->d_cache, xi);
846 bioerror(bp, rv);
847 return (NULL);
850 return (xi);
853 static void
854 bd_xfer_free(bd_xfer_impl_t *xi)
856 if (xi->i_dmah) {
857 (void) ddi_dma_unbind_handle(xi->i_dmah);
859 kmem_cache_free(xi->i_bd->d_cache, xi);
862 static int
863 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp)
865 dev_t dev = *devp;
866 bd_t *bd;
867 minor_t part;
868 minor_t inst;
869 uint64_t mask;
870 boolean_t ndelay;
871 int rv;
872 diskaddr_t nblks;
873 diskaddr_t lba;
875 _NOTE(ARGUNUSED(credp));
877 part = BDPART(dev);
878 inst = BDINST(dev);
880 if (otyp >= OTYPCNT)
881 return (EINVAL);
883 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE;
886 * Block any DR events from changing the set of registered
887 * devices while we function.
889 rw_enter(&bd_lock, RW_READER);
890 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
891 rw_exit(&bd_lock);
892 return (ENXIO);
895 mutex_enter(&bd->d_ocmutex);
897 ASSERT(part < 64);
898 mask = (1U << part);
900 bd_update_state(bd);
902 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) {
904 /* non-blocking opens are allowed to succeed */
905 if (!ndelay) {
906 rv = ENXIO;
907 goto done;
909 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba,
910 NULL, NULL, 0) == 0) {
913 * We read the partinfo, verify valid ranges. If the
914 * partition is invalid, and we aren't blocking or
915 * doing a raw access, then fail. (Non-blocking and
916 * raw accesses can still succeed to allow a disk with
917 * bad partition data to opened by format and fdisk.)
919 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) {
920 rv = ENXIO;
921 goto done;
923 } else if (!ndelay) {
925 * cmlb_partinfo failed -- invalid partition or no
926 * disk label.
928 rv = ENXIO;
929 goto done;
932 if ((flag & FWRITE) && bd->d_rdonly) {
933 rv = EROFS;
934 goto done;
937 if ((bd->d_open_excl) & (mask)) {
938 rv = EBUSY;
939 goto done;
941 if (flag & FEXCL) {
942 if (bd->d_open_lyr[part]) {
943 rv = EBUSY;
944 goto done;
946 for (int i = 0; i < OTYP_LYR; i++) {
947 if (bd->d_open_reg[i] & mask) {
948 rv = EBUSY;
949 goto done;
954 if (otyp == OTYP_LYR) {
955 bd->d_open_lyr[part]++;
956 } else {
957 bd->d_open_reg[otyp] |= mask;
959 if (flag & FEXCL) {
960 bd->d_open_excl |= mask;
963 rv = 0;
964 done:
965 mutex_exit(&bd->d_ocmutex);
966 rw_exit(&bd_lock);
968 return (rv);
971 static int
972 bd_close(dev_t dev, int flag, int otyp, cred_t *credp)
974 bd_t *bd;
975 minor_t inst;
976 minor_t part;
977 uint64_t mask;
978 boolean_t last = B_TRUE;
980 _NOTE(ARGUNUSED(flag));
981 _NOTE(ARGUNUSED(credp));
983 part = BDPART(dev);
984 inst = BDINST(dev);
986 ASSERT(part < 64);
987 mask = (1U << part);
989 rw_enter(&bd_lock, RW_READER);
991 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
992 rw_exit(&bd_lock);
993 return (ENXIO);
996 mutex_enter(&bd->d_ocmutex);
997 if (bd->d_open_excl & mask) {
998 bd->d_open_excl &= ~mask;
1000 if (otyp == OTYP_LYR) {
1001 bd->d_open_lyr[part]--;
1002 } else {
1003 bd->d_open_reg[otyp] &= ~mask;
1005 for (int i = 0; i < 64; i++) {
1006 if (bd->d_open_lyr[part]) {
1007 last = B_FALSE;
1010 for (int i = 0; last && (i < OTYP_LYR); i++) {
1011 if (bd->d_open_reg[i]) {
1012 last = B_FALSE;
1015 mutex_exit(&bd->d_ocmutex);
1017 if (last) {
1018 cmlb_invalidate(bd->d_cmlbh, 0);
1020 rw_exit(&bd_lock);
1022 return (0);
1025 static int
1026 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1028 minor_t inst;
1029 minor_t part;
1030 diskaddr_t pstart;
1031 diskaddr_t psize;
1032 bd_t *bd;
1033 bd_xfer_impl_t *xi;
1034 buf_t *bp;
1035 int rv;
1037 rw_enter(&bd_lock, RW_READER);
1039 part = BDPART(dev);
1040 inst = BDINST(dev);
1042 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1043 rw_exit(&bd_lock);
1044 return (ENXIO);
1047 * do cmlb, but do it synchronously unless we already have the
1048 * partition (which we probably should.)
1050 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1051 (void *)1)) {
1052 rw_exit(&bd_lock);
1053 return (ENXIO);
1056 if ((blkno + nblk) > psize) {
1057 rw_exit(&bd_lock);
1058 return (EINVAL);
1060 bp = getrbuf(KM_NOSLEEP);
1061 if (bp == NULL) {
1062 rw_exit(&bd_lock);
1063 return (ENOMEM);
1066 bp->b_bcount = nblk << bd->d_blkshift;
1067 bp->b_resid = bp->b_bcount;
1068 bp->b_lblkno = blkno;
1069 bp->b_un.b_addr = caddr;
1071 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP);
1072 if (xi == NULL) {
1073 rw_exit(&bd_lock);
1074 freerbuf(bp);
1075 return (ENOMEM);
1077 xi->i_blkno = blkno + pstart;
1078 xi->i_flags = BD_XFER_POLL;
1079 bd_submit(bd, xi);
1080 rw_exit(&bd_lock);
1083 * Generally, we should have run this entirely synchronously
1084 * at this point and the biowait call should be a no-op. If
1085 * it didn't happen this way, it's a bug in the underlying
1086 * driver not honoring BD_XFER_POLL.
1088 (void) biowait(bp);
1089 rv = geterror(bp);
1090 freerbuf(bp);
1091 return (rv);
1094 void
1095 bd_minphys(struct buf *bp)
1097 minor_t inst;
1098 bd_t *bd;
1099 inst = BDINST(bp->b_edev);
1101 bd = ddi_get_soft_state(bd_state, inst);
1104 * In a non-debug kernel, bd_strategy will catch !bd as
1105 * well, and will fail nicely.
1107 ASSERT(bd);
1109 if (bp->b_bcount > bd->d_maxxfer)
1110 bp->b_bcount = bd->d_maxxfer;
1113 static int
1114 bd_read(dev_t dev, struct uio *uio, cred_t *credp)
1116 _NOTE(ARGUNUSED(credp));
1117 return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio));
1120 static int
1121 bd_write(dev_t dev, struct uio *uio, cred_t *credp)
1123 _NOTE(ARGUNUSED(credp));
1124 return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio));
1127 static int
1128 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
1130 _NOTE(ARGUNUSED(credp));
1131 return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio));
1134 static int
1135 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1137 _NOTE(ARGUNUSED(credp));
1138 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1141 static int
1142 bd_strategy(struct buf *bp)
1144 minor_t inst;
1145 minor_t part;
1146 bd_t *bd;
1147 diskaddr_t p_lba;
1148 diskaddr_t p_nblks;
1149 diskaddr_t b_nblks;
1150 bd_xfer_impl_t *xi;
1151 uint32_t shift;
1152 int (*func)(void *, bd_xfer_t *);
1154 part = BDPART(bp->b_edev);
1155 inst = BDINST(bp->b_edev);
1157 ASSERT(bp);
1159 bp->b_resid = bp->b_bcount;
1161 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1162 bioerror(bp, ENXIO);
1163 biodone(bp);
1164 return (0);
1167 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1168 NULL, NULL, 0)) {
1169 bioerror(bp, ENXIO);
1170 biodone(bp);
1171 return (0);
1174 shift = bd->d_blkshift;
1176 if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) ||
1177 (bp->b_lblkno > p_nblks)) {
1178 bioerror(bp, ENXIO);
1179 biodone(bp);
1180 return (0);
1182 b_nblks = bp->b_bcount >> shift;
1183 if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) {
1184 biodone(bp);
1185 return (0);
1188 if ((b_nblks + bp->b_lblkno) > p_nblks) {
1189 bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift);
1190 bp->b_bcount -= bp->b_resid;
1191 } else {
1192 bp->b_resid = 0;
1194 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1196 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1197 if (xi == NULL) {
1198 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1200 if (xi == NULL) {
1201 /* bd_request_alloc will have done bioerror */
1202 biodone(bp);
1203 return (0);
1205 xi->i_blkno = bp->b_lblkno + p_lba;
1207 bd_submit(bd, xi);
1209 return (0);
1212 static int
1213 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1215 minor_t inst;
1216 uint16_t part;
1217 bd_t *bd;
1218 void *ptr = (void *)arg;
1219 int rv;
1221 part = BDPART(dev);
1222 inst = BDINST(dev);
1224 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1225 return (ENXIO);
1228 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0);
1229 if (rv != ENOTTY)
1230 return (rv);
1232 if (rvalp != NULL) {
1233 /* the return value of the ioctl is 0 by default */
1234 *rvalp = 0;
1237 switch (cmd) {
1238 case DKIOCGMEDIAINFO: {
1239 struct dk_minfo minfo;
1241 /* make sure our state information is current */
1242 bd_update_state(bd);
1243 bzero(&minfo, sizeof (minfo));
1244 minfo.dki_media_type = DK_FIXED_DISK;
1245 minfo.dki_lbsize = (1U << bd->d_blkshift);
1246 minfo.dki_capacity = bd->d_numblks;
1247 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) {
1248 return (EFAULT);
1250 return (0);
1252 case DKIOCGMEDIAINFOEXT: {
1253 struct dk_minfo_ext miext;
1255 /* make sure our state information is current */
1256 bd_update_state(bd);
1257 bzero(&miext, sizeof (miext));
1258 miext.dki_media_type = DK_FIXED_DISK;
1259 miext.dki_lbsize = (1U << bd->d_blkshift);
1260 miext.dki_pbsize = (1U << bd->d_pblkshift);
1261 miext.dki_capacity = bd->d_numblks;
1262 if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) {
1263 return (EFAULT);
1265 return (0);
1267 case DKIOCINFO: {
1268 struct dk_cinfo cinfo;
1269 bzero(&cinfo, sizeof (cinfo));
1270 cinfo.dki_ctype = DKC_BLKDEV;
1271 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip));
1272 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname),
1273 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip)));
1274 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname),
1275 "%s", ddi_driver_name(bd->d_dip));
1276 cinfo.dki_unit = inst;
1277 cinfo.dki_flags = DKI_FMTVOL;
1278 cinfo.dki_partition = part;
1279 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE;
1280 cinfo.dki_addr = 0;
1281 cinfo.dki_slave = 0;
1282 cinfo.dki_space = 0;
1283 cinfo.dki_prio = 0;
1284 cinfo.dki_vec = 0;
1285 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) {
1286 return (EFAULT);
1288 return (0);
1290 case DKIOCREMOVABLE: {
1291 int i;
1292 i = bd->d_removable ? 1 : 0;
1293 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1294 return (EFAULT);
1296 return (0);
1298 case DKIOCHOTPLUGGABLE: {
1299 int i;
1300 i = bd->d_hotpluggable ? 1 : 0;
1301 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1302 return (EFAULT);
1304 return (0);
1306 case DKIOCREADONLY: {
1307 int i;
1308 i = bd->d_rdonly ? 1 : 0;
1309 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1310 return (EFAULT);
1312 return (0);
1314 case DKIOCSOLIDSTATE: {
1315 int i;
1316 i = bd->d_ssd ? 1 : 0;
1317 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1318 return (EFAULT);
1320 return (0);
1322 case DKIOCSTATE: {
1323 enum dkio_state state;
1324 if (ddi_copyin(ptr, &state, sizeof (state), flag)) {
1325 return (EFAULT);
1327 if ((rv = bd_check_state(bd, &state)) != 0) {
1328 return (rv);
1330 if (ddi_copyout(&state, ptr, sizeof (state), flag)) {
1331 return (EFAULT);
1333 return (0);
1335 case DKIOCFLUSHWRITECACHE: {
1336 struct dk_callback *dkc = NULL;
1338 if (flag & FKIOCTL)
1339 dkc = (void *)arg;
1341 rv = bd_flush_write_cache(bd, dkc);
1342 return (rv);
1345 default:
1346 break;
1349 return (ENOTTY);
1352 static int
1353 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
1354 char *name, caddr_t valuep, int *lengthp)
1356 bd_t *bd;
1358 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1359 if (bd == NULL)
1360 return (ddi_prop_op(dev, dip, prop_op, mod_flags,
1361 name, valuep, lengthp));
1363 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name,
1364 valuep, lengthp, BDPART(dev), 0));
1368 static int
1369 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start,
1370 size_t length, void *tg_cookie)
1372 bd_t *bd;
1373 buf_t *bp;
1374 bd_xfer_impl_t *xi;
1375 int rv;
1376 int (*func)(void *, bd_xfer_t *);
1377 int kmflag;
1380 * If we are running in polled mode (such as during dump(9e)
1381 * execution), then we cannot sleep for kernel allocations.
1383 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP;
1385 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1387 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) {
1388 /* We can only transfer whole blocks at a time! */
1389 return (EINVAL);
1392 if ((bp = getrbuf(kmflag)) == NULL) {
1393 return (ENOMEM);
1396 switch (cmd) {
1397 case TG_READ:
1398 bp->b_flags = B_READ;
1399 func = bd->d_ops.o_read;
1400 break;
1401 case TG_WRITE:
1402 bp->b_flags = B_WRITE;
1403 func = bd->d_ops.o_write;
1404 break;
1405 default:
1406 freerbuf(bp);
1407 return (EINVAL);
1410 bp->b_un.b_addr = bufaddr;
1411 bp->b_bcount = length;
1412 xi = bd_xfer_alloc(bd, bp, func, kmflag);
1413 if (xi == NULL) {
1414 rv = geterror(bp);
1415 freerbuf(bp);
1416 return (rv);
1418 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0;
1419 xi->i_blkno = start;
1420 bd_submit(bd, xi);
1421 (void) biowait(bp);
1422 rv = geterror(bp);
1423 freerbuf(bp);
1425 return (rv);
1428 static int
1429 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
1431 bd_t *bd;
1433 _NOTE(ARGUNUSED(tg_cookie));
1434 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1436 switch (cmd) {
1437 case TG_GETPHYGEOM:
1438 case TG_GETVIRTGEOM:
1440 * We don't have any "geometry" as such, let cmlb
1441 * fabricate something.
1443 return (ENOTTY);
1445 case TG_GETCAPACITY:
1446 bd_update_state(bd);
1447 *(diskaddr_t *)arg = bd->d_numblks;
1448 return (0);
1450 case TG_GETBLOCKSIZE:
1451 *(uint32_t *)arg = (1U << bd->d_blkshift);
1452 return (0);
1454 case TG_GETATTR:
1456 * It turns out that cmlb really doesn't do much for
1457 * non-writable media, but lets make the information
1458 * available for it in case it does more in the
1459 * future. (The value is currently used for
1460 * triggering special behavior for CD-ROMs.)
1462 bd_update_state(bd);
1463 ((tg_attribute_t *)arg)->media_is_writable =
1464 bd->d_rdonly ? B_FALSE : B_TRUE;
1465 ((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd;
1466 return (0);
1468 default:
1469 return (EINVAL);
1474 static void
1475 bd_sched(bd_t *bd)
1477 bd_xfer_impl_t *xi;
1478 struct buf *bp;
1479 int rv;
1481 mutex_enter(&bd->d_iomutex);
1483 while ((bd->d_qactive < bd->d_qsize) &&
1484 ((xi = list_remove_head(&bd->d_waitq)) != NULL)) {
1485 bd->d_qactive++;
1486 kstat_waitq_to_runq(bd->d_kiop);
1487 list_insert_tail(&bd->d_runq, xi);
1490 * Submit the job to the driver. We drop the I/O mutex
1491 * so that we can deal with the case where the driver
1492 * completion routine calls back into us synchronously.
1495 mutex_exit(&bd->d_iomutex);
1497 rv = xi->i_func(bd->d_private, &xi->i_public);
1498 if (rv != 0) {
1499 bp = xi->i_bp;
1500 bioerror(bp, rv);
1501 biodone(bp);
1503 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1505 mutex_enter(&bd->d_iomutex);
1506 bd->d_qactive--;
1507 kstat_runq_exit(bd->d_kiop);
1508 list_remove(&bd->d_runq, xi);
1509 bd_xfer_free(xi);
1510 } else {
1511 mutex_enter(&bd->d_iomutex);
1515 mutex_exit(&bd->d_iomutex);
1518 static void
1519 bd_submit(bd_t *bd, bd_xfer_impl_t *xi)
1521 mutex_enter(&bd->d_iomutex);
1522 list_insert_tail(&bd->d_waitq, xi);
1523 kstat_waitq_enter(bd->d_kiop);
1524 mutex_exit(&bd->d_iomutex);
1526 bd_sched(bd);
1529 static void
1530 bd_runq_exit(bd_xfer_impl_t *xi, int err)
1532 bd_t *bd = xi->i_bd;
1533 buf_t *bp = xi->i_bp;
1535 mutex_enter(&bd->d_iomutex);
1536 bd->d_qactive--;
1537 kstat_runq_exit(bd->d_kiop);
1538 list_remove(&bd->d_runq, xi);
1539 mutex_exit(&bd->d_iomutex);
1541 if (err == 0) {
1542 if (bp->b_flags & B_READ) {
1543 bd->d_kiop->reads++;
1544 bd->d_kiop->nread += (bp->b_bcount - xi->i_resid);
1545 } else {
1546 bd->d_kiop->writes++;
1547 bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid);
1550 bd_sched(bd);
1553 static void
1554 bd_update_state(bd_t *bd)
1556 enum dkio_state state = DKIO_INSERTED;
1557 boolean_t docmlb = B_FALSE;
1558 bd_media_t media;
1560 bzero(&media, sizeof (media));
1562 mutex_enter(&bd->d_statemutex);
1563 if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) {
1564 bd->d_numblks = 0;
1565 state = DKIO_EJECTED;
1566 goto done;
1569 if ((media.m_blksize < 512) ||
1570 (!ISP2(media.m_blksize)) ||
1571 (P2PHASE(bd->d_maxxfer, media.m_blksize))) {
1572 cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)",
1573 ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip),
1574 media.m_blksize);
1576 * We can't use the media, treat it as not present.
1578 state = DKIO_EJECTED;
1579 bd->d_numblks = 0;
1580 goto done;
1583 if (((1U << bd->d_blkshift) != media.m_blksize) ||
1584 (bd->d_numblks != media.m_nblks)) {
1585 /* Device size changed */
1586 docmlb = B_TRUE;
1589 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1;
1590 bd->d_pblkshift = bd->d_blkshift;
1591 bd->d_numblks = media.m_nblks;
1592 bd->d_rdonly = media.m_readonly;
1593 bd->d_ssd = media.m_solidstate;
1596 * Only use the supplied physical block size if it is non-zero,
1597 * greater or equal to the block size, and a power of 2. Ignore it
1598 * if not, it's just informational and we can still use the media.
1600 if ((media.m_pblksize != 0) &&
1601 (media.m_pblksize >= media.m_blksize) &&
1602 (ISP2(media.m_pblksize)))
1603 bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1;
1605 done:
1606 if (state != bd->d_state) {
1607 bd->d_state = state;
1608 cv_broadcast(&bd->d_statecv);
1609 docmlb = B_TRUE;
1611 mutex_exit(&bd->d_statemutex);
1613 bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift;
1615 if (docmlb) {
1616 if (state == DKIO_INSERTED) {
1617 (void) cmlb_validate(bd->d_cmlbh, 0, 0);
1618 } else {
1619 cmlb_invalidate(bd->d_cmlbh, 0);
1624 static int
1625 bd_check_state(bd_t *bd, enum dkio_state *state)
1627 clock_t when;
1629 for (;;) {
1631 bd_update_state(bd);
1633 mutex_enter(&bd->d_statemutex);
1635 if (bd->d_state != *state) {
1636 *state = bd->d_state;
1637 mutex_exit(&bd->d_statemutex);
1638 break;
1641 when = drv_usectohz(1000000);
1642 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex,
1643 when, TR_CLOCK_TICK) == 0) {
1644 mutex_exit(&bd->d_statemutex);
1645 return (EINTR);
1648 mutex_exit(&bd->d_statemutex);
1651 return (0);
1654 static int
1655 bd_flush_write_cache_done(struct buf *bp)
1657 struct dk_callback *dc = (void *)bp->b_private;
1659 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp));
1660 kmem_free(dc, sizeof (*dc));
1661 freerbuf(bp);
1662 return (0);
1665 static int
1666 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc)
1668 buf_t *bp;
1669 struct dk_callback *dc;
1670 bd_xfer_impl_t *xi;
1671 int rv;
1673 if (bd->d_ops.o_sync_cache == NULL) {
1674 return (ENOTSUP);
1676 if ((bp = getrbuf(KM_SLEEP)) == NULL) {
1677 return (ENOMEM);
1679 bp->b_resid = 0;
1680 bp->b_bcount = 0;
1682 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP);
1683 if (xi == NULL) {
1684 rv = geterror(bp);
1685 freerbuf(bp);
1686 return (rv);
1689 /* Make an asynchronous flush, but only if there is a callback */
1690 if (dkc != NULL && dkc->dkc_callback != NULL) {
1691 /* Make a private copy of the callback structure */
1692 dc = kmem_alloc(sizeof (*dc), KM_SLEEP);
1693 *dc = *dkc;
1694 bp->b_private = dc;
1695 bp->b_iodone = bd_flush_write_cache_done;
1697 bd_submit(bd, xi);
1698 return (0);
1701 /* In case there is no callback, perform a synchronous flush */
1702 bd_submit(bd, xi);
1703 (void) biowait(bp);
1704 rv = geterror(bp);
1705 freerbuf(bp);
1707 return (rv);
1711 * Nexus support.
1714 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
1715 void *arg, void *result)
1717 bd_handle_t hdl;
1719 switch (ctlop) {
1720 case DDI_CTLOPS_REPORTDEV:
1721 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n",
1722 ddi_node_name(rdip), ddi_get_name_addr(rdip),
1723 ddi_driver_name(rdip), ddi_get_instance(rdip));
1724 return (DDI_SUCCESS);
1726 case DDI_CTLOPS_INITCHILD:
1727 hdl = ddi_get_parent_data((dev_info_t *)arg);
1728 if (hdl == NULL) {
1729 return (DDI_NOT_WELL_FORMED);
1731 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr);
1732 return (DDI_SUCCESS);
1734 case DDI_CTLOPS_UNINITCHILD:
1735 ddi_set_name_addr((dev_info_t *)arg, NULL);
1736 ndi_prop_remove_all((dev_info_t *)arg);
1737 return (DDI_SUCCESS);
1739 default:
1740 return (ddi_ctlops(dip, rdip, ctlop, arg, result));
1745 * Functions for device drivers.
1747 bd_handle_t
1748 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag)
1750 bd_handle_t hdl;
1752 hdl = kmem_zalloc(sizeof (*hdl), kmflag);
1753 if (hdl != NULL) {
1754 hdl->h_ops = *ops;
1755 hdl->h_dma = dma;
1756 hdl->h_private = private;
1759 return (hdl);
1762 void
1763 bd_free_handle(bd_handle_t hdl)
1765 kmem_free(hdl, sizeof (*hdl));
1769 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl)
1771 dev_info_t *child;
1772 bd_drive_t drive = { 0 };
1774 /* if drivers don't override this, make it assume none */
1775 drive.d_lun = -1;
1776 hdl->h_ops.o_drive_info(hdl->h_private, &drive);
1778 hdl->h_parent = dip;
1779 hdl->h_name = "blkdev";
1781 if (drive.d_lun >= 0) {
1782 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X,%X",
1783 drive.d_target, drive.d_lun);
1784 } else {
1785 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X",
1786 drive.d_target);
1788 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID,
1789 &child) != NDI_SUCCESS) {
1790 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s",
1791 ddi_driver_name(dip), ddi_get_instance(dip),
1792 "blkdev", hdl->h_addr);
1793 return (DDI_FAILURE);
1796 ddi_set_parent_data(child, hdl);
1797 hdl->h_child = child;
1799 if (ndi_devi_online(child, 0) == NDI_FAILURE) {
1800 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online",
1801 ddi_driver_name(dip), ddi_get_instance(dip),
1802 hdl->h_name, hdl->h_addr);
1803 (void) ndi_devi_free(child);
1804 return (DDI_FAILURE);
1807 return (DDI_SUCCESS);
1811 bd_detach_handle(bd_handle_t hdl)
1813 int circ;
1814 int rv;
1815 char *devnm;
1817 if (hdl->h_child == NULL) {
1818 return (DDI_SUCCESS);
1820 ndi_devi_enter(hdl->h_parent, &circ);
1821 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) {
1822 rv = ddi_remove_child(hdl->h_child, 0);
1823 } else {
1824 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
1825 (void) ddi_deviname(hdl->h_child, devnm);
1826 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE);
1827 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL,
1828 NDI_DEVI_REMOVE | NDI_UNCONFIG);
1829 kmem_free(devnm, MAXNAMELEN + 1);
1831 if (rv == 0) {
1832 hdl->h_child = NULL;
1835 ndi_devi_exit(hdl->h_parent, circ);
1836 return (rv = NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
1839 void
1840 bd_xfer_done(bd_xfer_t *xfer, int err)
1842 bd_xfer_impl_t *xi = (void *)xfer;
1843 buf_t *bp = xi->i_bp;
1844 int rv = DDI_SUCCESS;
1845 bd_t *bd = xi->i_bd;
1846 size_t len;
1848 if (err != 0) {
1849 bd_runq_exit(xi, err);
1850 atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32);
1852 bp->b_resid += xi->i_resid;
1853 bd_xfer_free(xi);
1854 bioerror(bp, err);
1855 biodone(bp);
1856 return;
1859 xi->i_cur_win++;
1860 xi->i_resid -= xi->i_len;
1862 if (xi->i_resid == 0) {
1863 /* Job completed succcessfully! */
1864 bd_runq_exit(xi, 0);
1866 bd_xfer_free(xi);
1867 biodone(bp);
1868 return;
1871 xi->i_blkno += xi->i_nblks;
1873 if (bd->d_use_dma) {
1874 /* More transfer still pending... advance to next DMA window. */
1875 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win,
1876 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
1877 } else {
1878 /* Advance memory window. */
1879 xi->i_kaddr += xi->i_len;
1880 xi->i_offset += xi->i_len;
1881 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
1885 if ((rv != DDI_SUCCESS) ||
1886 (P2PHASE(len, (1U << xi->i_blkshift) != 0))) {
1887 bd_runq_exit(xi, EFAULT);
1889 bp->b_resid += xi->i_resid;
1890 bd_xfer_free(xi);
1891 bioerror(bp, EFAULT);
1892 biodone(bp);
1893 return;
1895 xi->i_len = len;
1896 xi->i_nblks = len >> xi->i_blkshift;
1898 /* Submit next window to hardware. */
1899 rv = xi->i_func(bd->d_private, &xi->i_public);
1900 if (rv != 0) {
1901 bd_runq_exit(xi, rv);
1903 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1905 bp->b_resid += xi->i_resid;
1906 bd_xfer_free(xi);
1907 bioerror(bp, rv);
1908 biodone(bp);
1912 void
1913 bd_error(bd_xfer_t *xfer, int error)
1915 bd_xfer_impl_t *xi = (void *)xfer;
1916 bd_t *bd = xi->i_bd;
1918 switch (error) {
1919 case BD_ERR_MEDIA:
1920 atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32);
1921 break;
1922 case BD_ERR_NTRDY:
1923 atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32);
1924 break;
1925 case BD_ERR_NODEV:
1926 atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32);
1927 break;
1928 case BD_ERR_RECOV:
1929 atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32);
1930 break;
1931 case BD_ERR_ILLRQ:
1932 atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32);
1933 break;
1934 case BD_ERR_PFA:
1935 atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32);
1936 break;
1937 default:
1938 cmn_err(CE_PANIC, "bd_error: unknown error type %d", error);
1939 break;
1943 void
1944 bd_state_change(bd_handle_t hdl)
1946 bd_t *bd;
1948 if ((bd = hdl->h_bd) != NULL) {
1949 bd_update_state(bd);
1953 void
1954 bd_mod_init(struct dev_ops *devops)
1956 static struct bus_ops bd_bus_ops = {
1957 BUSO_REV, /* busops_rev */
1958 nullbusmap, /* bus_map */
1959 NULL, /* bus_get_intrspec (OBSOLETE) */
1960 NULL, /* bus_add_intrspec (OBSOLETE) */
1961 NULL, /* bus_remove_intrspec (OBSOLETE) */
1962 i_ddi_map_fault, /* bus_map_fault */
1963 NULL, /* bus_dma_map (OBSOLETE) */
1964 ddi_dma_allochdl, /* bus_dma_allochdl */
1965 ddi_dma_freehdl, /* bus_dma_freehdl */
1966 ddi_dma_bindhdl, /* bus_dma_bindhdl */
1967 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */
1968 ddi_dma_flush, /* bus_dma_flush */
1969 ddi_dma_win, /* bus_dma_win */
1970 ddi_dma_mctl, /* bus_dma_ctl */
1971 bd_bus_ctl, /* bus_ctl */
1972 ddi_bus_prop_op, /* bus_prop_op */
1973 NULL, /* bus_get_eventcookie */
1974 NULL, /* bus_add_eventcall */
1975 NULL, /* bus_remove_eventcall */
1976 NULL, /* bus_post_event */
1977 NULL, /* bus_intr_ctl (OBSOLETE) */
1978 NULL, /* bus_config */
1979 NULL, /* bus_unconfig */
1980 NULL, /* bus_fm_init */
1981 NULL, /* bus_fm_fini */
1982 NULL, /* bus_fm_access_enter */
1983 NULL, /* bus_fm_access_exit */
1984 NULL, /* bus_power */
1985 NULL, /* bus_intr_op */
1988 devops->devo_bus_ops = &bd_bus_ops;
1991 * NB: The device driver is free to supply its own
1992 * character entry device support.
1996 void
1997 bd_mod_fini(struct dev_ops *devops)
1999 devops->devo_bus_ops = NULL;