2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
14 * Copyright 2016 Tegile Systems, Inc. All rights reserved.
15 * Copyright (c) 2016 The MathWorks, Inc. All rights reserved.
16 * Copyright 2017 Joyent, Inc.
20 * blkdev driver for NVMe compliant storage devices
22 * This driver was written to conform to version 1.2.1 of the NVMe
23 * specification. It may work with newer versions, but that is completely
24 * untested and disabled by default.
26 * The driver has only been tested on x86 systems and will not work on big-
27 * endian systems without changes to the code accessing registers and data
28 * structures used by the hardware.
33 * The driver will use a single interrupt while configuring the device as the
34 * specification requires, but contrary to the specification it will try to use
35 * a single-message MSI(-X) or FIXED interrupt. Later in the attach process it
36 * will switch to multiple-message MSI(-X) if supported. The driver wants to
37 * have one interrupt vector per CPU, but it will work correctly if less are
38 * available. Interrupts can be shared by queues, the interrupt handler will
39 * iterate through the I/O queue array by steps of n_intr_cnt. Usually only
40 * the admin queue will share an interrupt with one I/O queue. The interrupt
41 * handler will retrieve completed commands from all queues sharing an interrupt
42 * vector and will post them to a taskq for completion processing.
47 * NVMe devices can have up to 65535 I/O queue pairs, with each queue holding up
48 * to 65536 I/O commands. The driver will configure one I/O queue pair per
49 * available interrupt vector, with the queue length usually much smaller than
50 * the maximum of 65536. If the hardware doesn't provide enough queues, fewer
51 * interrupt vectors will be used.
53 * Additionally the hardware provides a single special admin queue pair that can
54 * hold up to 4096 admin commands.
56 * From the hardware perspective both queues of a queue pair are independent,
57 * but they share some driver state: the command array (holding pointers to
58 * commands currently being processed by the hardware) and the active command
59 * counter. Access to the submission side of a queue pair and the shared state
60 * is protected by nq_mutex. The completion side of a queue pair does not need
61 * that protection apart from its access to the shared state; it is called only
62 * in the interrupt handler which does not run concurrently for the same
65 * When a command is submitted to a queue pair the active command counter is
66 * incremented and a pointer to the command is stored in the command array. The
67 * array index is used as command identifier (CID) in the submission queue
68 * entry. Some commands may take a very long time to complete, and if the queue
69 * wraps around in that time a submission may find the next array slot to still
70 * be used by a long-running command. In this case the array is sequentially
71 * searched for the next free slot. The length of the command array is the same
72 * as the configured queue length. Queue overrun is prevented by the semaphore,
73 * so a command submission may block if the queue is full.
78 * For kernel core dump support the driver can do polled I/O. As interrupts are
79 * turned off while dumping the driver will just submit a command in the regular
80 * way, and then repeatedly attempt a command retrieval until it gets the
86 * NVMe devices can have multiple namespaces, each being a independent data
87 * store. The driver supports multiple namespaces and creates a blkdev interface
88 * for each namespace found. Namespaces can have various attributes to support
89 * thin provisioning and protection information. This driver does not support
90 * any of this and ignores namespaces that have these attributes.
92 * As of NVMe 1.1 namespaces can have an 64bit Extended Unique Identifier
93 * (EUI64). This driver uses the EUI64 if present to generate the devid and
94 * passes it to blkdev to use it in the device node names. As this is currently
95 * untested namespaces with EUI64 are ignored by default.
97 * We currently support only (2 << NVME_MINOR_INST_SHIFT) - 2 namespaces in a
98 * single controller. This is an artificial limit imposed by the driver to be
99 * able to address a reasonable number of controllers and namespaces using a
100 * 32bit minor node number.
105 * For each NVMe device the driver exposes one minor node for the controller and
106 * one minor node for each namespace. The only operations supported by those
107 * minor nodes are open(9E), close(9E), and ioctl(9E). This serves as the
108 * interface for the nvmeadm(1M) utility.
113 * This driver uses blkdev to do all the heavy lifting involved with presenting
114 * a disk device to the system. As a result, the processing of I/O requests is
115 * relatively simple as blkdev takes care of partitioning, boundary checks, DMA
116 * setup, and splitting of transfers into manageable chunks.
118 * I/O requests coming in from blkdev are turned into NVM commands and posted to
119 * an I/O queue. The queue is selected by taking the CPU id modulo the number of
120 * queues. There is currently no timeout handling of I/O commands.
122 * Blkdev also supports querying device/media information and generating a
123 * devid. The driver reports the best block size as determined by the namespace
124 * format back to blkdev as physical block size to support partition and block
125 * alignment. The devid is either based on the namespace EUI64, if present, or
126 * composed using the device vendor ID, model number, serial number, and the
132 * Error handling is currently limited to detecting fatal hardware errors,
133 * either by asynchronous events, or synchronously through command status or
134 * admin command timeouts. In case of severe errors the device is fenced off,
135 * all further requests will return EIO. FMA is then called to fault the device.
137 * The hardware has a limit for outstanding asynchronous event requests. Before
138 * this limit is known the driver assumes it is at least 1 and posts a single
139 * asynchronous request. Later when the limit is known more asynchronous event
140 * requests are posted to allow quicker reception of error information. When an
141 * asynchronous event is posted by the hardware the driver will parse the error
142 * status fields and log information or fault the device, depending on the
143 * severity of the asynchronous event. The asynchronous event request is then
144 * reused and posted to the admin queue again.
146 * On command completion the command status is checked for errors. In case of
147 * errors indicating a driver bug the driver panics. Almost all other error
148 * status values just cause EIO to be returned.
150 * Command timeouts are currently detected for all admin commands except
151 * asynchronous event requests. If a command times out and the hardware appears
152 * to be healthy the driver attempts to abort the command. If this fails the
153 * driver assumes the device to be dead, fences it off, and calls FMA to retire
154 * it. In general admin commands are issued at attach time only. No timeout
155 * handling of normal I/O commands is presently done.
157 * In some cases it may be possible that the ABORT command times out, too. In
158 * that case the device is also declared dead and fenced off.
161 * Quiesce / Fast Reboot:
163 * The driver currently does not support fast reboot. A quiesce(9E) entry point
164 * is still provided which is used to send a shutdown notification to the
168 * Driver Configuration:
170 * The following driver properties can be changed to control some aspects of the
172 * - strict-version: can be set to 0 to allow devices conforming to newer
173 * versions or namespaces with EUI64 to be used
174 * - ignore-unknown-vendor-status: can be set to 1 to not handle any vendor
175 * specific command status as a fatal error leading device faulting
176 * - admin-queue-len: the maximum length of the admin queue (16-4096)
177 * - io-queue-len: the maximum length of the I/O queues (16-65536)
178 * - async-event-limit: the maximum number of asynchronous event requests to be
179 * posted by the driver
180 * - volatile-write-cache-enable: can be set to 0 to disable the volatile write
182 * - min-phys-block-size: the minimum physical block size to report to blkdev,
183 * which is among other things the basis for ZFS vdev ashift
187 * - figure out sane default for I/O queue depth reported to blkdev
188 * - FMA handling of media errors
189 * - support for devices supporting very large I/O requests using chained PRPs
190 * - support for configuring hardware parameters like interrupt coalescing
191 * - support for media formatting and hard partitioning into namespaces
192 * - support for big-endian systems
193 * - support for fast reboot
194 * - support for firmware updates
195 * - support for NVMe Subsystem Reset (1.1)
196 * - support for Scatter/Gather lists (1.1)
197 * - support for Reservations (1.1)
198 * - support for power management
201 #include <sys/byteorder.h>
203 #error nvme driver needs porting for big-endian platforms
206 #include <sys/modctl.h>
207 #include <sys/conf.h>
208 #include <sys/devops.h>
210 #include <sys/sunddi.h>
211 #include <sys/sunndi.h>
212 #include <sys/bitmap.h>
213 #include <sys/sysmacros.h>
214 #include <sys/param.h>
215 #include <sys/varargs.h>
216 #include <sys/cpuvar.h>
217 #include <sys/disp.h>
218 #include <sys/blkdev.h>
219 #include <sys/atomic.h>
220 #include <sys/archsystm.h>
221 #include <sys/sata/sata_hba.h>
222 #include <sys/stat.h>
223 #include <sys/policy.h>
225 #include <sys/nvme.h>
228 #include <sys/x86_archext.h>
231 #include "nvme_reg.h"
232 #include "nvme_var.h"
235 /* NVMe spec version supported */
236 static const int nvme_version_major
= 1;
237 static const int nvme_version_minor
= 2;
239 /* tunable for admin command timeout in seconds, default is 1s */
240 int nvme_admin_cmd_timeout
= 1;
242 /* tunable for FORMAT NVM command timeout in seconds, default is 600s */
243 int nvme_format_cmd_timeout
= 600;
245 static int nvme_attach(dev_info_t
*, ddi_attach_cmd_t
);
246 static int nvme_detach(dev_info_t
*, ddi_detach_cmd_t
);
247 static int nvme_quiesce(dev_info_t
*);
248 static int nvme_fm_errcb(dev_info_t
*, ddi_fm_error_t
*, const void *);
249 static int nvme_setup_interrupts(nvme_t
*, int, int);
250 static void nvme_release_interrupts(nvme_t
*);
251 static uint_t
nvme_intr(caddr_t
, caddr_t
);
253 static void nvme_shutdown(nvme_t
*, int, boolean_t
);
254 static boolean_t
nvme_reset(nvme_t
*, boolean_t
);
255 static int nvme_init(nvme_t
*);
256 static nvme_cmd_t
*nvme_alloc_cmd(nvme_t
*, int);
257 static void nvme_free_cmd(nvme_cmd_t
*);
258 static nvme_cmd_t
*nvme_create_nvm_cmd(nvme_namespace_t
*, uint8_t,
260 static int nvme_admin_cmd(nvme_cmd_t
*, int);
261 static void nvme_submit_admin_cmd(nvme_qpair_t
*, nvme_cmd_t
*);
262 static int nvme_submit_io_cmd(nvme_qpair_t
*, nvme_cmd_t
*);
263 static void nvme_submit_cmd_common(nvme_qpair_t
*, nvme_cmd_t
*);
264 static nvme_cmd_t
*nvme_retrieve_cmd(nvme_t
*, nvme_qpair_t
*);
265 static boolean_t
nvme_wait_cmd(nvme_cmd_t
*, uint_t
);
266 static void nvme_wakeup_cmd(void *);
267 static void nvme_async_event_task(void *);
269 static int nvme_check_unknown_cmd_status(nvme_cmd_t
*);
270 static int nvme_check_vendor_cmd_status(nvme_cmd_t
*);
271 static int nvme_check_integrity_cmd_status(nvme_cmd_t
*);
272 static int nvme_check_specific_cmd_status(nvme_cmd_t
*);
273 static int nvme_check_generic_cmd_status(nvme_cmd_t
*);
274 static inline int nvme_check_cmd_status(nvme_cmd_t
*);
276 static void nvme_abort_cmd(nvme_cmd_t
*);
277 static void nvme_async_event(nvme_t
*);
278 static int nvme_format_nvm(nvme_t
*, uint32_t, uint8_t, boolean_t
, uint8_t,
280 static int nvme_get_logpage(nvme_t
*, void **, size_t *, uint8_t, ...);
281 static void *nvme_identify(nvme_t
*, uint32_t);
282 static boolean_t
nvme_set_features(nvme_t
*, uint32_t, uint8_t, uint32_t,
284 static boolean_t
nvme_get_features(nvme_t
*, uint32_t, uint8_t, uint32_t *,
286 static boolean_t
nvme_write_cache_set(nvme_t
*, boolean_t
);
287 static int nvme_set_nqueues(nvme_t
*, uint16_t);
289 static void nvme_free_dma(nvme_dma_t
*);
290 static int nvme_zalloc_dma(nvme_t
*, size_t, uint_t
, ddi_dma_attr_t
*,
292 static int nvme_zalloc_queue_dma(nvme_t
*, uint32_t, uint16_t, uint_t
,
294 static void nvme_free_qpair(nvme_qpair_t
*);
295 static int nvme_alloc_qpair(nvme_t
*, uint32_t, nvme_qpair_t
**, int);
296 static int nvme_create_io_qpair(nvme_t
*, nvme_qpair_t
*, uint16_t);
298 static inline void nvme_put64(nvme_t
*, uintptr_t, uint64_t);
299 static inline void nvme_put32(nvme_t
*, uintptr_t, uint32_t);
300 static inline uint64_t nvme_get64(nvme_t
*, uintptr_t);
301 static inline uint32_t nvme_get32(nvme_t
*, uintptr_t);
303 static boolean_t
nvme_check_regs_hdl(nvme_t
*);
304 static boolean_t
nvme_check_dma_hdl(nvme_dma_t
*);
306 static int nvme_fill_prp(nvme_cmd_t
*, bd_xfer_t
*);
308 static void nvme_bd_xfer_done(void *);
309 static void nvme_bd_driveinfo(void *, bd_drive_t
*);
310 static int nvme_bd_mediainfo(void *, bd_media_t
*);
311 static int nvme_bd_cmd(nvme_namespace_t
*, bd_xfer_t
*, uint8_t);
312 static int nvme_bd_read(void *, bd_xfer_t
*);
313 static int nvme_bd_write(void *, bd_xfer_t
*);
314 static int nvme_bd_sync(void *, bd_xfer_t
*);
315 static int nvme_bd_devid(void *, dev_info_t
*, ddi_devid_t
*);
317 static int nvme_prp_dma_constructor(void *, void *, int);
318 static void nvme_prp_dma_destructor(void *, void *);
320 static void nvme_prepare_devid(nvme_t
*, uint32_t);
322 static int nvme_open(dev_t
*, int, int, cred_t
*);
323 static int nvme_close(dev_t
, int, int, cred_t
*);
324 static int nvme_ioctl(dev_t
, int, intptr_t, int, cred_t
*, int *);
326 #define NVME_MINOR_INST_SHIFT 9
327 #define NVME_MINOR(inst, nsid) (((inst) << NVME_MINOR_INST_SHIFT) | (nsid))
328 #define NVME_MINOR_INST(minor) ((minor) >> NVME_MINOR_INST_SHIFT)
329 #define NVME_MINOR_NSID(minor) ((minor) & ((1 << NVME_MINOR_INST_SHIFT) - 1))
330 #define NVME_MINOR_MAX (NVME_MINOR(1, 0) - 2)
332 static void *nvme_state
;
333 static kmem_cache_t
*nvme_cmd_cache
;
336 * DMA attributes for queue DMA memory
338 * Queue DMA memory must be page aligned. The maximum length of a queue is
339 * 65536 entries, and an entry can be 64 bytes long.
341 static ddi_dma_attr_t nvme_queue_dma_attr
= {
342 .dma_attr_version
= DMA_ATTR_V0
,
343 .dma_attr_addr_lo
= 0,
344 .dma_attr_addr_hi
= 0xffffffffffffffffULL
,
345 .dma_attr_count_max
= (UINT16_MAX
+ 1) * sizeof (nvme_sqe_t
) - 1,
346 .dma_attr_align
= 0x1000,
347 .dma_attr_burstsizes
= 0x7ff,
348 .dma_attr_minxfer
= 0x1000,
349 .dma_attr_maxxfer
= (UINT16_MAX
+ 1) * sizeof (nvme_sqe_t
),
350 .dma_attr_seg
= 0xffffffffffffffffULL
,
351 .dma_attr_sgllen
= 1,
352 .dma_attr_granular
= 1,
357 * DMA attributes for transfers using Physical Region Page (PRP) entries
359 * A PRP entry describes one page of DMA memory using the page size specified
360 * in the controller configuration's memory page size register (CC.MPS). It uses
361 * a 64bit base address aligned to this page size. There is no limitation on
362 * chaining PRPs together for arbitrarily large DMA transfers.
364 static ddi_dma_attr_t nvme_prp_dma_attr
= {
365 .dma_attr_version
= DMA_ATTR_V0
,
366 .dma_attr_addr_lo
= 0,
367 .dma_attr_addr_hi
= 0xffffffffffffffffULL
,
368 .dma_attr_count_max
= 0xfff,
369 .dma_attr_align
= 0x1000,
370 .dma_attr_burstsizes
= 0x7ff,
371 .dma_attr_minxfer
= 0x1000,
372 .dma_attr_maxxfer
= 0x1000,
373 .dma_attr_seg
= 0xfff,
374 .dma_attr_sgllen
= -1,
375 .dma_attr_granular
= 1,
380 * DMA attributes for transfers using scatter/gather lists
382 * A SGL entry describes a chunk of DMA memory using a 64bit base address and a
383 * 32bit length field. SGL Segment and SGL Last Segment entries require the
384 * length to be a multiple of 16 bytes.
386 static ddi_dma_attr_t nvme_sgl_dma_attr
= {
387 .dma_attr_version
= DMA_ATTR_V0
,
388 .dma_attr_addr_lo
= 0,
389 .dma_attr_addr_hi
= 0xffffffffffffffffULL
,
390 .dma_attr_count_max
= 0xffffffffUL
,
392 .dma_attr_burstsizes
= 0x7ff,
393 .dma_attr_minxfer
= 0x10,
394 .dma_attr_maxxfer
= 0xfffffffffULL
,
395 .dma_attr_seg
= 0xffffffffffffffffULL
,
396 .dma_attr_sgllen
= -1,
397 .dma_attr_granular
= 0x10,
401 static ddi_device_acc_attr_t nvme_reg_acc_attr
= {
402 .devacc_attr_version
= DDI_DEVICE_ATTR_V0
,
403 .devacc_attr_endian_flags
= DDI_STRUCTURE_LE_ACC
,
404 .devacc_attr_dataorder
= DDI_STRICTORDER_ACC
407 static struct cb_ops nvme_cb_ops
= {
408 .cb_open
= nvme_open
,
409 .cb_close
= nvme_close
,
410 .cb_strategy
= nodev
,
415 .cb_ioctl
= nvme_ioctl
,
419 .cb_chpoll
= nochpoll
,
420 .cb_prop_op
= ddi_prop_op
,
422 .cb_flag
= D_NEW
| D_MP
,
428 static struct dev_ops nvme_dev_ops
= {
429 .devo_rev
= DEVO_REV
,
431 .devo_getinfo
= ddi_no_info
,
432 .devo_identify
= nulldev
,
433 .devo_probe
= nulldev
,
434 .devo_attach
= nvme_attach
,
435 .devo_detach
= nvme_detach
,
437 .devo_cb_ops
= &nvme_cb_ops
,
438 .devo_bus_ops
= NULL
,
440 .devo_quiesce
= nvme_quiesce
,
443 static struct modldrv nvme_modldrv
= {
444 .drv_modops
= &mod_driverops
,
445 .drv_linkinfo
= "NVMe v1.1b",
446 .drv_dev_ops
= &nvme_dev_ops
449 static struct modlinkage nvme_modlinkage
= {
451 .ml_linkage
= { &nvme_modldrv
, NULL
}
454 static bd_ops_t nvme_bd_ops
= {
455 .o_version
= BD_OPS_VERSION_0
,
456 .o_drive_info
= nvme_bd_driveinfo
,
457 .o_media_info
= nvme_bd_mediainfo
,
458 .o_devid_init
= nvme_bd_devid
,
459 .o_sync_cache
= nvme_bd_sync
,
460 .o_read
= nvme_bd_read
,
461 .o_write
= nvme_bd_write
,
469 error
= ddi_soft_state_init(&nvme_state
, sizeof (nvme_t
), 1);
470 if (error
!= DDI_SUCCESS
)
473 nvme_cmd_cache
= kmem_cache_create("nvme_cmd_cache",
474 sizeof (nvme_cmd_t
), 64, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
476 bd_mod_init(&nvme_dev_ops
);
478 error
= mod_install(&nvme_modlinkage
);
479 if (error
!= DDI_SUCCESS
) {
480 ddi_soft_state_fini(&nvme_state
);
481 bd_mod_fini(&nvme_dev_ops
);
492 error
= mod_remove(&nvme_modlinkage
);
493 if (error
== DDI_SUCCESS
) {
494 ddi_soft_state_fini(&nvme_state
);
495 kmem_cache_destroy(nvme_cmd_cache
);
496 bd_mod_fini(&nvme_dev_ops
);
503 _info(struct modinfo
*modinfop
)
505 return (mod_info(&nvme_modlinkage
, modinfop
));
509 nvme_put64(nvme_t
*nvme
, uintptr_t reg
, uint64_t val
)
511 ASSERT(((uintptr_t)(nvme
->n_regs
+ reg
) & 0x7) == 0);
513 /*LINTED: E_BAD_PTR_CAST_ALIGN*/
514 ddi_put64(nvme
->n_regh
, (uint64_t *)(nvme
->n_regs
+ reg
), val
);
518 nvme_put32(nvme_t
*nvme
, uintptr_t reg
, uint32_t val
)
520 ASSERT(((uintptr_t)(nvme
->n_regs
+ reg
) & 0x3) == 0);
522 /*LINTED: E_BAD_PTR_CAST_ALIGN*/
523 ddi_put32(nvme
->n_regh
, (uint32_t *)(nvme
->n_regs
+ reg
), val
);
526 static inline uint64_t
527 nvme_get64(nvme_t
*nvme
, uintptr_t reg
)
531 ASSERT(((uintptr_t)(nvme
->n_regs
+ reg
) & 0x7) == 0);
533 /*LINTED: E_BAD_PTR_CAST_ALIGN*/
534 val
= ddi_get64(nvme
->n_regh
, (uint64_t *)(nvme
->n_regs
+ reg
));
539 static inline uint32_t
540 nvme_get32(nvme_t
*nvme
, uintptr_t reg
)
544 ASSERT(((uintptr_t)(nvme
->n_regs
+ reg
) & 0x3) == 0);
546 /*LINTED: E_BAD_PTR_CAST_ALIGN*/
547 val
= ddi_get32(nvme
->n_regh
, (uint32_t *)(nvme
->n_regs
+ reg
));
553 nvme_check_regs_hdl(nvme_t
*nvme
)
555 ddi_fm_error_t error
;
557 ddi_fm_acc_err_get(nvme
->n_regh
, &error
, DDI_FME_VERSION
);
559 if (error
.fme_status
!= DDI_FM_OK
)
566 nvme_check_dma_hdl(nvme_dma_t
*dma
)
568 ddi_fm_error_t error
;
573 ddi_fm_dma_err_get(dma
->nd_dmah
, &error
, DDI_FME_VERSION
);
575 if (error
.fme_status
!= DDI_FM_OK
)
582 nvme_free_dma_common(nvme_dma_t
*dma
)
584 if (dma
->nd_dmah
!= NULL
)
585 (void) ddi_dma_unbind_handle(dma
->nd_dmah
);
586 if (dma
->nd_acch
!= NULL
)
587 ddi_dma_mem_free(&dma
->nd_acch
);
588 if (dma
->nd_dmah
!= NULL
)
589 ddi_dma_free_handle(&dma
->nd_dmah
);
593 nvme_free_dma(nvme_dma_t
*dma
)
595 nvme_free_dma_common(dma
);
596 kmem_free(dma
, sizeof (*dma
));
601 nvme_prp_dma_destructor(void *buf
, void *private)
603 nvme_dma_t
*dma
= (nvme_dma_t
*)buf
;
605 nvme_free_dma_common(dma
);
609 nvme_alloc_dma_common(nvme_t
*nvme
, nvme_dma_t
*dma
,
610 size_t len
, uint_t flags
, ddi_dma_attr_t
*dma_attr
)
612 if (ddi_dma_alloc_handle(nvme
->n_dip
, dma_attr
, DDI_DMA_SLEEP
, NULL
,
613 &dma
->nd_dmah
) != DDI_SUCCESS
) {
615 * Due to DDI_DMA_SLEEP this can't be DDI_DMA_NORESOURCES, and
616 * the only other possible error is DDI_DMA_BADATTR which
617 * indicates a driver bug which should cause a panic.
619 dev_err(nvme
->n_dip
, CE_PANIC
,
620 "!failed to get DMA handle, check DMA attributes");
621 return (DDI_FAILURE
);
625 * ddi_dma_mem_alloc() can only fail when DDI_DMA_NOSLEEP is specified
626 * or the flags are conflicting, which isn't the case here.
628 (void) ddi_dma_mem_alloc(dma
->nd_dmah
, len
, &nvme
->n_reg_acc_attr
,
629 DDI_DMA_CONSISTENT
, DDI_DMA_SLEEP
, NULL
, &dma
->nd_memp
,
630 &dma
->nd_len
, &dma
->nd_acch
);
632 if (ddi_dma_addr_bind_handle(dma
->nd_dmah
, NULL
, dma
->nd_memp
,
633 dma
->nd_len
, flags
| DDI_DMA_CONSISTENT
, DDI_DMA_SLEEP
, NULL
,
634 &dma
->nd_cookie
, &dma
->nd_ncookie
) != DDI_DMA_MAPPED
) {
635 dev_err(nvme
->n_dip
, CE_WARN
,
636 "!failed to bind DMA memory");
637 atomic_inc_32(&nvme
->n_dma_bind_err
);
638 nvme_free_dma_common(dma
);
639 return (DDI_FAILURE
);
642 return (DDI_SUCCESS
);
646 nvme_zalloc_dma(nvme_t
*nvme
, size_t len
, uint_t flags
,
647 ddi_dma_attr_t
*dma_attr
, nvme_dma_t
**ret
)
649 nvme_dma_t
*dma
= kmem_zalloc(sizeof (nvme_dma_t
), KM_SLEEP
);
651 if (nvme_alloc_dma_common(nvme
, dma
, len
, flags
, dma_attr
) !=
654 kmem_free(dma
, sizeof (nvme_dma_t
));
655 return (DDI_FAILURE
);
658 bzero(dma
->nd_memp
, dma
->nd_len
);
661 return (DDI_SUCCESS
);
666 nvme_prp_dma_constructor(void *buf
, void *private, int flags
)
668 nvme_dma_t
*dma
= (nvme_dma_t
*)buf
;
669 nvme_t
*nvme
= (nvme_t
*)private;
674 if (nvme_alloc_dma_common(nvme
, dma
, nvme
->n_pagesize
,
675 DDI_DMA_READ
, &nvme
->n_prp_dma_attr
) != DDI_SUCCESS
) {
679 ASSERT(dma
->nd_ncookie
== 1);
681 dma
->nd_cached
= B_TRUE
;
687 nvme_zalloc_queue_dma(nvme_t
*nvme
, uint32_t nentry
, uint16_t qe_len
,
688 uint_t flags
, nvme_dma_t
**dma
)
690 uint32_t len
= nentry
* qe_len
;
691 ddi_dma_attr_t q_dma_attr
= nvme
->n_queue_dma_attr
;
693 len
= roundup(len
, nvme
->n_pagesize
);
695 q_dma_attr
.dma_attr_minxfer
= len
;
697 if (nvme_zalloc_dma(nvme
, len
, flags
, &q_dma_attr
, dma
)
699 dev_err(nvme
->n_dip
, CE_WARN
,
700 "!failed to get DMA memory for queue");
704 if ((*dma
)->nd_ncookie
!= 1) {
705 dev_err(nvme
->n_dip
, CE_WARN
,
706 "!got too many cookies for queue DMA");
710 return (DDI_SUCCESS
);
718 return (DDI_FAILURE
);
722 nvme_free_qpair(nvme_qpair_t
*qp
)
726 mutex_destroy(&qp
->nq_mutex
);
727 sema_destroy(&qp
->nq_sema
);
729 if (qp
->nq_sqdma
!= NULL
)
730 nvme_free_dma(qp
->nq_sqdma
);
731 if (qp
->nq_cqdma
!= NULL
)
732 nvme_free_dma(qp
->nq_cqdma
);
734 if (qp
->nq_active_cmds
> 0)
735 for (i
= 0; i
!= qp
->nq_nentry
; i
++)
736 if (qp
->nq_cmd
[i
] != NULL
)
737 nvme_free_cmd(qp
->nq_cmd
[i
]);
739 if (qp
->nq_cmd
!= NULL
)
740 kmem_free(qp
->nq_cmd
, sizeof (nvme_cmd_t
*) * qp
->nq_nentry
);
742 kmem_free(qp
, sizeof (nvme_qpair_t
));
746 nvme_alloc_qpair(nvme_t
*nvme
, uint32_t nentry
, nvme_qpair_t
**nqp
,
749 nvme_qpair_t
*qp
= kmem_zalloc(sizeof (*qp
), KM_SLEEP
);
751 mutex_init(&qp
->nq_mutex
, NULL
, MUTEX_DRIVER
,
752 DDI_INTR_PRI(nvme
->n_intr_pri
));
753 sema_init(&qp
->nq_sema
, nentry
, NULL
, SEMA_DRIVER
, NULL
);
755 if (nvme_zalloc_queue_dma(nvme
, nentry
, sizeof (nvme_sqe_t
),
756 DDI_DMA_WRITE
, &qp
->nq_sqdma
) != DDI_SUCCESS
)
759 if (nvme_zalloc_queue_dma(nvme
, nentry
, sizeof (nvme_cqe_t
),
760 DDI_DMA_READ
, &qp
->nq_cqdma
) != DDI_SUCCESS
)
763 qp
->nq_sq
= (nvme_sqe_t
*)qp
->nq_sqdma
->nd_memp
;
764 qp
->nq_cq
= (nvme_cqe_t
*)qp
->nq_cqdma
->nd_memp
;
765 qp
->nq_nentry
= nentry
;
767 qp
->nq_sqtdbl
= NVME_REG_SQTDBL(nvme
, idx
);
768 qp
->nq_cqhdbl
= NVME_REG_CQHDBL(nvme
, idx
);
770 qp
->nq_cmd
= kmem_zalloc(sizeof (nvme_cmd_t
*) * nentry
, KM_SLEEP
);
774 return (DDI_SUCCESS
);
780 return (DDI_FAILURE
);
784 nvme_alloc_cmd(nvme_t
*nvme
, int kmflag
)
786 nvme_cmd_t
*cmd
= kmem_cache_alloc(nvme_cmd_cache
, kmflag
);
791 bzero(cmd
, sizeof (nvme_cmd_t
));
795 mutex_init(&cmd
->nc_mutex
, NULL
, MUTEX_DRIVER
,
796 DDI_INTR_PRI(nvme
->n_intr_pri
));
797 cv_init(&cmd
->nc_cv
, NULL
, CV_DRIVER
, NULL
);
803 nvme_free_cmd(nvme_cmd_t
*cmd
)
806 if (cmd
->nc_dma
->nd_cached
)
807 kmem_cache_free(cmd
->nc_nvme
->n_prp_cache
,
810 nvme_free_dma(cmd
->nc_dma
);
814 cv_destroy(&cmd
->nc_cv
);
815 mutex_destroy(&cmd
->nc_mutex
);
817 kmem_cache_free(nvme_cmd_cache
, cmd
);
821 nvme_submit_admin_cmd(nvme_qpair_t
*qp
, nvme_cmd_t
*cmd
)
823 sema_p(&qp
->nq_sema
);
824 nvme_submit_cmd_common(qp
, cmd
);
828 nvme_submit_io_cmd(nvme_qpair_t
*qp
, nvme_cmd_t
*cmd
)
830 if (sema_tryp(&qp
->nq_sema
) == 0)
833 nvme_submit_cmd_common(qp
, cmd
);
838 nvme_submit_cmd_common(nvme_qpair_t
*qp
, nvme_cmd_t
*cmd
)
840 nvme_reg_sqtdbl_t tail
= { 0 };
842 mutex_enter(&qp
->nq_mutex
);
843 cmd
->nc_completed
= B_FALSE
;
846 * Try to insert the cmd into the active cmd array at the nq_next_cmd
847 * slot. If the slot is already occupied advance to the next slot and
848 * try again. This can happen for long running commands like async event
851 while (qp
->nq_cmd
[qp
->nq_next_cmd
] != NULL
)
852 qp
->nq_next_cmd
= (qp
->nq_next_cmd
+ 1) % qp
->nq_nentry
;
853 qp
->nq_cmd
[qp
->nq_next_cmd
] = cmd
;
855 qp
->nq_active_cmds
++;
857 cmd
->nc_sqe
.sqe_cid
= qp
->nq_next_cmd
;
858 bcopy(&cmd
->nc_sqe
, &qp
->nq_sq
[qp
->nq_sqtail
], sizeof (nvme_sqe_t
));
859 (void) ddi_dma_sync(qp
->nq_sqdma
->nd_dmah
,
860 sizeof (nvme_sqe_t
) * qp
->nq_sqtail
,
861 sizeof (nvme_sqe_t
), DDI_DMA_SYNC_FORDEV
);
862 qp
->nq_next_cmd
= (qp
->nq_next_cmd
+ 1) % qp
->nq_nentry
;
864 tail
.b
.sqtdbl_sqt
= qp
->nq_sqtail
= (qp
->nq_sqtail
+ 1) % qp
->nq_nentry
;
865 nvme_put32(cmd
->nc_nvme
, qp
->nq_sqtdbl
, tail
.r
);
867 mutex_exit(&qp
->nq_mutex
);
871 nvme_retrieve_cmd(nvme_t
*nvme
, nvme_qpair_t
*qp
)
873 nvme_reg_cqhdbl_t head
= { 0 };
878 (void) ddi_dma_sync(qp
->nq_cqdma
->nd_dmah
, 0,
879 sizeof (nvme_cqe_t
) * qp
->nq_nentry
, DDI_DMA_SYNC_FORKERNEL
);
881 mutex_enter(&qp
->nq_mutex
);
882 cqe
= &qp
->nq_cq
[qp
->nq_cqhead
];
884 /* Check phase tag of CQE. Hardware inverts it for new entries. */
885 if (cqe
->cqe_sf
.sf_p
== qp
->nq_phase
) {
886 mutex_exit(&qp
->nq_mutex
);
890 ASSERT(nvme
->n_ioq
[cqe
->cqe_sqid
] == qp
);
891 ASSERT(cqe
->cqe_cid
< qp
->nq_nentry
);
893 cmd
= qp
->nq_cmd
[cqe
->cqe_cid
];
894 qp
->nq_cmd
[cqe
->cqe_cid
] = NULL
;
895 qp
->nq_active_cmds
--;
898 ASSERT(cmd
->nc_nvme
== nvme
);
899 ASSERT(cmd
->nc_sqid
== cqe
->cqe_sqid
);
900 ASSERT(cmd
->nc_sqe
.sqe_cid
== cqe
->cqe_cid
);
901 bcopy(cqe
, &cmd
->nc_cqe
, sizeof (nvme_cqe_t
));
903 qp
->nq_sqhead
= cqe
->cqe_sqhd
;
905 head
.b
.cqhdbl_cqh
= qp
->nq_cqhead
= (qp
->nq_cqhead
+ 1) % qp
->nq_nentry
;
907 /* Toggle phase on wrap-around. */
908 if (qp
->nq_cqhead
== 0)
909 qp
->nq_phase
= qp
->nq_phase
? 0 : 1;
911 nvme_put32(cmd
->nc_nvme
, qp
->nq_cqhdbl
, head
.r
);
912 mutex_exit(&qp
->nq_mutex
);
913 sema_v(&qp
->nq_sema
);
919 nvme_check_unknown_cmd_status(nvme_cmd_t
*cmd
)
921 nvme_cqe_t
*cqe
= &cmd
->nc_cqe
;
923 dev_err(cmd
->nc_nvme
->n_dip
, CE_WARN
,
924 "!unknown command status received: opc = %x, sqid = %d, cid = %d, "
925 "sc = %x, sct = %x, dnr = %d, m = %d", cmd
->nc_sqe
.sqe_opc
,
926 cqe
->cqe_sqid
, cqe
->cqe_cid
, cqe
->cqe_sf
.sf_sc
, cqe
->cqe_sf
.sf_sct
,
927 cqe
->cqe_sf
.sf_dnr
, cqe
->cqe_sf
.sf_m
);
929 if (cmd
->nc_xfer
!= NULL
)
930 bd_error(cmd
->nc_xfer
, BD_ERR_ILLRQ
);
932 if (cmd
->nc_nvme
->n_strict_version
) {
933 cmd
->nc_nvme
->n_dead
= B_TRUE
;
934 ddi_fm_service_impact(cmd
->nc_nvme
->n_dip
, DDI_SERVICE_LOST
);
941 nvme_check_vendor_cmd_status(nvme_cmd_t
*cmd
)
943 nvme_cqe_t
*cqe
= &cmd
->nc_cqe
;
945 dev_err(cmd
->nc_nvme
->n_dip
, CE_WARN
,
946 "!unknown command status received: opc = %x, sqid = %d, cid = %d, "
947 "sc = %x, sct = %x, dnr = %d, m = %d", cmd
->nc_sqe
.sqe_opc
,
948 cqe
->cqe_sqid
, cqe
->cqe_cid
, cqe
->cqe_sf
.sf_sc
, cqe
->cqe_sf
.sf_sct
,
949 cqe
->cqe_sf
.sf_dnr
, cqe
->cqe_sf
.sf_m
);
950 if (!cmd
->nc_nvme
->n_ignore_unknown_vendor_status
) {
951 cmd
->nc_nvme
->n_dead
= B_TRUE
;
952 ddi_fm_service_impact(cmd
->nc_nvme
->n_dip
, DDI_SERVICE_LOST
);
959 nvme_check_integrity_cmd_status(nvme_cmd_t
*cmd
)
961 nvme_cqe_t
*cqe
= &cmd
->nc_cqe
;
963 switch (cqe
->cqe_sf
.sf_sc
) {
964 case NVME_CQE_SC_INT_NVM_WRITE
:
966 /* TODO: post ereport */
967 if (cmd
->nc_xfer
!= NULL
)
968 bd_error(cmd
->nc_xfer
, BD_ERR_MEDIA
);
971 case NVME_CQE_SC_INT_NVM_READ
:
973 /* TODO: post ereport */
974 if (cmd
->nc_xfer
!= NULL
)
975 bd_error(cmd
->nc_xfer
, BD_ERR_MEDIA
);
979 return (nvme_check_unknown_cmd_status(cmd
));
984 nvme_check_generic_cmd_status(nvme_cmd_t
*cmd
)
986 nvme_cqe_t
*cqe
= &cmd
->nc_cqe
;
988 switch (cqe
->cqe_sf
.sf_sc
) {
989 case NVME_CQE_SC_GEN_SUCCESS
:
993 * Errors indicating a bug in the driver should cause a panic.
995 case NVME_CQE_SC_GEN_INV_OPC
:
996 /* Invalid Command Opcode */
997 dev_err(cmd
->nc_nvme
->n_dip
, CE_PANIC
, "programming error: "
998 "invalid opcode in cmd %p", (void *)cmd
);
1001 case NVME_CQE_SC_GEN_INV_FLD
:
1002 /* Invalid Field in Command */
1003 if (!cmd
->nc_dontpanic
)
1004 dev_err(cmd
->nc_nvme
->n_dip
, CE_PANIC
,
1005 "programming error: invalid field in cmd %p",
1009 case NVME_CQE_SC_GEN_ID_CNFL
:
1010 /* Command ID Conflict */
1011 dev_err(cmd
->nc_nvme
->n_dip
, CE_PANIC
, "programming error: "
1012 "cmd ID conflict in cmd %p", (void *)cmd
);
1015 case NVME_CQE_SC_GEN_INV_NS
:
1016 /* Invalid Namespace or Format */
1017 if (!cmd
->nc_dontpanic
)
1018 dev_err(cmd
->nc_nvme
->n_dip
, CE_PANIC
,
1019 "programming error: " "invalid NS/format in cmd %p",
1023 case NVME_CQE_SC_GEN_NVM_LBA_RANGE
:
1024 /* LBA Out Of Range */
1025 dev_err(cmd
->nc_nvme
->n_dip
, CE_PANIC
, "programming error: "
1026 "LBA out of range in cmd %p", (void *)cmd
);
1030 * Non-fatal errors, handle gracefully.
1032 case NVME_CQE_SC_GEN_DATA_XFR_ERR
:
1033 /* Data Transfer Error (DMA) */
1034 /* TODO: post ereport */
1035 atomic_inc_32(&cmd
->nc_nvme
->n_data_xfr_err
);
1036 if (cmd
->nc_xfer
!= NULL
)
1037 bd_error(cmd
->nc_xfer
, BD_ERR_NTRDY
);
1040 case NVME_CQE_SC_GEN_INTERNAL_ERR
:
1042 * Internal Error. The spec (v1.0, section 4.5.1.2) says
1043 * detailed error information is returned as async event,
1044 * so we pretty much ignore the error here and handle it
1045 * in the async event handler.
1047 atomic_inc_32(&cmd
->nc_nvme
->n_internal_err
);
1048 if (cmd
->nc_xfer
!= NULL
)
1049 bd_error(cmd
->nc_xfer
, BD_ERR_NTRDY
);
1052 case NVME_CQE_SC_GEN_ABORT_REQUEST
:
1054 * Command Abort Requested. This normally happens only when a
1055 * command times out.
1057 /* TODO: post ereport or change blkdev to handle this? */
1058 atomic_inc_32(&cmd
->nc_nvme
->n_abort_rq_err
);
1061 case NVME_CQE_SC_GEN_ABORT_PWRLOSS
:
1062 /* Command Aborted due to Power Loss Notification */
1063 ddi_fm_service_impact(cmd
->nc_nvme
->n_dip
, DDI_SERVICE_LOST
);
1064 cmd
->nc_nvme
->n_dead
= B_TRUE
;
1067 case NVME_CQE_SC_GEN_ABORT_SQ_DEL
:
1068 /* Command Aborted due to SQ Deletion */
1069 atomic_inc_32(&cmd
->nc_nvme
->n_abort_sq_del
);
1072 case NVME_CQE_SC_GEN_NVM_CAP_EXC
:
1073 /* Capacity Exceeded */
1074 atomic_inc_32(&cmd
->nc_nvme
->n_nvm_cap_exc
);
1075 if (cmd
->nc_xfer
!= NULL
)
1076 bd_error(cmd
->nc_xfer
, BD_ERR_MEDIA
);
1079 case NVME_CQE_SC_GEN_NVM_NS_NOTRDY
:
1080 /* Namespace Not Ready */
1081 atomic_inc_32(&cmd
->nc_nvme
->n_nvm_ns_notrdy
);
1082 if (cmd
->nc_xfer
!= NULL
)
1083 bd_error(cmd
->nc_xfer
, BD_ERR_NTRDY
);
1087 return (nvme_check_unknown_cmd_status(cmd
));
1092 nvme_check_specific_cmd_status(nvme_cmd_t
*cmd
)
1094 nvme_cqe_t
*cqe
= &cmd
->nc_cqe
;
1096 switch (cqe
->cqe_sf
.sf_sc
) {
1097 case NVME_CQE_SC_SPC_INV_CQ
:
1098 /* Completion Queue Invalid */
1099 ASSERT(cmd
->nc_sqe
.sqe_opc
== NVME_OPC_CREATE_SQUEUE
);
1100 atomic_inc_32(&cmd
->nc_nvme
->n_inv_cq_err
);
1103 case NVME_CQE_SC_SPC_INV_QID
:
1104 /* Invalid Queue Identifier */
1105 ASSERT(cmd
->nc_sqe
.sqe_opc
== NVME_OPC_CREATE_SQUEUE
||
1106 cmd
->nc_sqe
.sqe_opc
== NVME_OPC_DELETE_SQUEUE
||
1107 cmd
->nc_sqe
.sqe_opc
== NVME_OPC_CREATE_CQUEUE
||
1108 cmd
->nc_sqe
.sqe_opc
== NVME_OPC_DELETE_CQUEUE
);
1109 atomic_inc_32(&cmd
->nc_nvme
->n_inv_qid_err
);
1112 case NVME_CQE_SC_SPC_MAX_QSZ_EXC
:
1113 /* Max Queue Size Exceeded */
1114 ASSERT(cmd
->nc_sqe
.sqe_opc
== NVME_OPC_CREATE_SQUEUE
||
1115 cmd
->nc_sqe
.sqe_opc
== NVME_OPC_CREATE_CQUEUE
);
1116 atomic_inc_32(&cmd
->nc_nvme
->n_max_qsz_exc
);
1119 case NVME_CQE_SC_SPC_ABRT_CMD_EXC
:
1120 /* Abort Command Limit Exceeded */
1121 ASSERT(cmd
->nc_sqe
.sqe_opc
== NVME_OPC_ABORT
);
1122 dev_err(cmd
->nc_nvme
->n_dip
, CE_PANIC
, "programming error: "
1123 "abort command limit exceeded in cmd %p", (void *)cmd
);
1126 case NVME_CQE_SC_SPC_ASYNC_EVREQ_EXC
:
1127 /* Async Event Request Limit Exceeded */
1128 ASSERT(cmd
->nc_sqe
.sqe_opc
== NVME_OPC_ASYNC_EVENT
);
1129 dev_err(cmd
->nc_nvme
->n_dip
, CE_PANIC
, "programming error: "
1130 "async event request limit exceeded in cmd %p",
1134 case NVME_CQE_SC_SPC_INV_INT_VECT
:
1135 /* Invalid Interrupt Vector */
1136 ASSERT(cmd
->nc_sqe
.sqe_opc
== NVME_OPC_CREATE_CQUEUE
);
1137 atomic_inc_32(&cmd
->nc_nvme
->n_inv_int_vect
);
1140 case NVME_CQE_SC_SPC_INV_LOG_PAGE
:
1141 /* Invalid Log Page */
1142 ASSERT(cmd
->nc_sqe
.sqe_opc
== NVME_OPC_GET_LOG_PAGE
);
1143 atomic_inc_32(&cmd
->nc_nvme
->n_inv_log_page
);
1146 case NVME_CQE_SC_SPC_INV_FORMAT
:
1147 /* Invalid Format */
1148 ASSERT(cmd
->nc_sqe
.sqe_opc
== NVME_OPC_NVM_FORMAT
);
1149 atomic_inc_32(&cmd
->nc_nvme
->n_inv_format
);
1150 if (cmd
->nc_xfer
!= NULL
)
1151 bd_error(cmd
->nc_xfer
, BD_ERR_ILLRQ
);
1154 case NVME_CQE_SC_SPC_INV_Q_DEL
:
1155 /* Invalid Queue Deletion */
1156 ASSERT(cmd
->nc_sqe
.sqe_opc
== NVME_OPC_DELETE_CQUEUE
);
1157 atomic_inc_32(&cmd
->nc_nvme
->n_inv_q_del
);
1160 case NVME_CQE_SC_SPC_NVM_CNFL_ATTR
:
1161 /* Conflicting Attributes */
1162 ASSERT(cmd
->nc_sqe
.sqe_opc
== NVME_OPC_NVM_DSET_MGMT
||
1163 cmd
->nc_sqe
.sqe_opc
== NVME_OPC_NVM_READ
||
1164 cmd
->nc_sqe
.sqe_opc
== NVME_OPC_NVM_WRITE
);
1165 atomic_inc_32(&cmd
->nc_nvme
->n_cnfl_attr
);
1166 if (cmd
->nc_xfer
!= NULL
)
1167 bd_error(cmd
->nc_xfer
, BD_ERR_ILLRQ
);
1170 case NVME_CQE_SC_SPC_NVM_INV_PROT
:
1171 /* Invalid Protection Information */
1172 ASSERT(cmd
->nc_sqe
.sqe_opc
== NVME_OPC_NVM_COMPARE
||
1173 cmd
->nc_sqe
.sqe_opc
== NVME_OPC_NVM_READ
||
1174 cmd
->nc_sqe
.sqe_opc
== NVME_OPC_NVM_WRITE
);
1175 atomic_inc_32(&cmd
->nc_nvme
->n_inv_prot
);
1176 if (cmd
->nc_xfer
!= NULL
)
1177 bd_error(cmd
->nc_xfer
, BD_ERR_ILLRQ
);
1180 case NVME_CQE_SC_SPC_NVM_READONLY
:
1181 /* Write to Read Only Range */
1182 ASSERT(cmd
->nc_sqe
.sqe_opc
== NVME_OPC_NVM_WRITE
);
1183 atomic_inc_32(&cmd
->nc_nvme
->n_readonly
);
1184 if (cmd
->nc_xfer
!= NULL
)
1185 bd_error(cmd
->nc_xfer
, BD_ERR_ILLRQ
);
1189 return (nvme_check_unknown_cmd_status(cmd
));
1194 nvme_check_cmd_status(nvme_cmd_t
*cmd
)
1196 nvme_cqe_t
*cqe
= &cmd
->nc_cqe
;
1198 /* take a shortcut if everything is alright */
1199 if (cqe
->cqe_sf
.sf_sct
== NVME_CQE_SCT_GENERIC
&&
1200 cqe
->cqe_sf
.sf_sc
== NVME_CQE_SC_GEN_SUCCESS
)
1203 if (cqe
->cqe_sf
.sf_sct
== NVME_CQE_SCT_GENERIC
)
1204 return (nvme_check_generic_cmd_status(cmd
));
1205 else if (cqe
->cqe_sf
.sf_sct
== NVME_CQE_SCT_SPECIFIC
)
1206 return (nvme_check_specific_cmd_status(cmd
));
1207 else if (cqe
->cqe_sf
.sf_sct
== NVME_CQE_SCT_INTEGRITY
)
1208 return (nvme_check_integrity_cmd_status(cmd
));
1209 else if (cqe
->cqe_sf
.sf_sct
== NVME_CQE_SCT_VENDOR
)
1210 return (nvme_check_vendor_cmd_status(cmd
));
1212 return (nvme_check_unknown_cmd_status(cmd
));
1216 * nvme_abort_cmd_cb -- replaces nc_callback of aborted commands
1218 * This functions takes care of cleaning up aborted commands. The command
1219 * status is checked to catch any fatal errors.
1222 nvme_abort_cmd_cb(void *arg
)
1224 nvme_cmd_t
*cmd
= arg
;
1227 * Grab the command mutex. Once we have it we hold the last reference
1228 * to the command and can safely free it.
1230 mutex_enter(&cmd
->nc_mutex
);
1231 (void) nvme_check_cmd_status(cmd
);
1232 mutex_exit(&cmd
->nc_mutex
);
1238 nvme_abort_cmd(nvme_cmd_t
*abort_cmd
)
1240 nvme_t
*nvme
= abort_cmd
->nc_nvme
;
1241 nvme_cmd_t
*cmd
= nvme_alloc_cmd(nvme
, KM_SLEEP
);
1242 nvme_abort_cmd_t ac
= { 0 };
1244 sema_p(&nvme
->n_abort_sema
);
1246 ac
.b
.ac_cid
= abort_cmd
->nc_sqe
.sqe_cid
;
1247 ac
.b
.ac_sqid
= abort_cmd
->nc_sqid
;
1250 * Drop the mutex of the aborted command. From this point on
1251 * we must assume that the abort callback has freed the command.
1253 mutex_exit(&abort_cmd
->nc_mutex
);
1256 cmd
->nc_sqe
.sqe_opc
= NVME_OPC_ABORT
;
1257 cmd
->nc_callback
= nvme_wakeup_cmd
;
1258 cmd
->nc_sqe
.sqe_cdw10
= ac
.r
;
1261 * Send the ABORT to the hardware. The ABORT command will return _after_
1262 * the aborted command has completed (aborted or otherwise).
1264 if (nvme_admin_cmd(cmd
, nvme_admin_cmd_timeout
) != DDI_SUCCESS
) {
1265 sema_v(&nvme
->n_abort_sema
);
1266 dev_err(nvme
->n_dip
, CE_WARN
,
1267 "!nvme_admin_cmd failed for ABORT");
1268 atomic_inc_32(&nvme
->n_abort_failed
);
1271 sema_v(&nvme
->n_abort_sema
);
1273 if (nvme_check_cmd_status(cmd
)) {
1274 dev_err(nvme
->n_dip
, CE_WARN
,
1275 "!ABORT failed with sct = %x, sc = %x",
1276 cmd
->nc_cqe
.cqe_sf
.sf_sct
, cmd
->nc_cqe
.cqe_sf
.sf_sc
);
1277 atomic_inc_32(&nvme
->n_abort_failed
);
1279 atomic_inc_32(&nvme
->n_cmd_aborted
);
1286 * nvme_wait_cmd -- wait for command completion or timeout
1288 * Returns B_TRUE if the command completed normally.
1290 * Returns B_FALSE if the command timed out and an abort was attempted. The
1291 * command mutex will be dropped and the command must be considered freed. The
1292 * freeing of the command is normally done by the abort command callback.
1294 * In case of a serious error or a timeout of the abort command the hardware
1295 * will be declared dead and FMA will be notified.
1298 nvme_wait_cmd(nvme_cmd_t
*cmd
, uint_t sec
)
1300 clock_t timeout
= ddi_get_lbolt() + drv_usectohz(sec
* MICROSEC
);
1301 nvme_t
*nvme
= cmd
->nc_nvme
;
1302 nvme_reg_csts_t csts
;
1304 ASSERT(mutex_owned(&cmd
->nc_mutex
));
1306 while (!cmd
->nc_completed
) {
1307 if (cv_timedwait(&cmd
->nc_cv
, &cmd
->nc_mutex
, timeout
) == -1)
1311 if (cmd
->nc_completed
)
1315 * The command timed out. Change the callback to the cleanup function.
1317 cmd
->nc_callback
= nvme_abort_cmd_cb
;
1320 * Check controller for fatal status, any errors associated with the
1321 * register or DMA handle, or for a double timeout (abort command timed
1322 * out). If necessary log a warning and call FMA.
1324 csts
.r
= nvme_get32(nvme
, NVME_REG_CSTS
);
1325 dev_err(nvme
->n_dip
, CE_WARN
, "!command timeout, "
1326 "OPC = %x, CFS = %d", cmd
->nc_sqe
.sqe_opc
, csts
.b
.csts_cfs
);
1327 atomic_inc_32(&nvme
->n_cmd_timeout
);
1329 if (csts
.b
.csts_cfs
||
1330 nvme_check_regs_hdl(nvme
) ||
1331 nvme_check_dma_hdl(cmd
->nc_dma
) ||
1332 cmd
->nc_sqe
.sqe_opc
== NVME_OPC_ABORT
) {
1333 ddi_fm_service_impact(nvme
->n_dip
, DDI_SERVICE_LOST
);
1334 nvme
->n_dead
= B_TRUE
;
1335 mutex_exit(&cmd
->nc_mutex
);
1338 * Try to abort the command. The command mutex is released by
1340 * If the abort succeeds it will have freed the aborted command.
1341 * If the abort fails for other reasons we must assume that the
1342 * command may complete at any time, and the callback will free
1345 nvme_abort_cmd(cmd
);
1352 nvme_wakeup_cmd(void *arg
)
1354 nvme_cmd_t
*cmd
= arg
;
1356 mutex_enter(&cmd
->nc_mutex
);
1358 * There is a slight chance that this command completed shortly after
1359 * the timeout was hit in nvme_wait_cmd() but before the callback was
1360 * changed. Catch that case here and clean up accordingly.
1362 if (cmd
->nc_callback
== nvme_abort_cmd_cb
) {
1363 mutex_exit(&cmd
->nc_mutex
);
1364 nvme_abort_cmd_cb(cmd
);
1368 cmd
->nc_completed
= B_TRUE
;
1369 cv_signal(&cmd
->nc_cv
);
1370 mutex_exit(&cmd
->nc_mutex
);
1374 nvme_async_event_task(void *arg
)
1376 nvme_cmd_t
*cmd
= arg
;
1377 nvme_t
*nvme
= cmd
->nc_nvme
;
1378 nvme_error_log_entry_t
*error_log
= NULL
;
1379 nvme_health_log_t
*health_log
= NULL
;
1381 nvme_async_event_t event
;
1384 * Check for errors associated with the async request itself. The only
1385 * command-specific error is "async event limit exceeded", which
1386 * indicates a programming error in the driver and causes a panic in
1387 * nvme_check_cmd_status().
1389 * Other possible errors are various scenarios where the async request
1390 * was aborted, or internal errors in the device. Internal errors are
1391 * reported to FMA, the command aborts need no special handling here.
1393 if (nvme_check_cmd_status(cmd
)) {
1394 dev_err(cmd
->nc_nvme
->n_dip
, CE_WARN
,
1395 "!async event request returned failure, sct = %x, "
1396 "sc = %x, dnr = %d, m = %d", cmd
->nc_cqe
.cqe_sf
.sf_sct
,
1397 cmd
->nc_cqe
.cqe_sf
.sf_sc
, cmd
->nc_cqe
.cqe_sf
.sf_dnr
,
1398 cmd
->nc_cqe
.cqe_sf
.sf_m
);
1400 if (cmd
->nc_cqe
.cqe_sf
.sf_sct
== NVME_CQE_SCT_GENERIC
&&
1401 cmd
->nc_cqe
.cqe_sf
.sf_sc
== NVME_CQE_SC_GEN_INTERNAL_ERR
) {
1402 cmd
->nc_nvme
->n_dead
= B_TRUE
;
1403 ddi_fm_service_impact(cmd
->nc_nvme
->n_dip
,
1411 event
.r
= cmd
->nc_cqe
.cqe_dw0
;
1413 /* Clear CQE and re-submit the async request. */
1414 bzero(&cmd
->nc_cqe
, sizeof (nvme_cqe_t
));
1415 nvme_submit_admin_cmd(nvme
->n_adminq
, cmd
);
1417 switch (event
.b
.ae_type
) {
1418 case NVME_ASYNC_TYPE_ERROR
:
1419 if (event
.b
.ae_logpage
== NVME_LOGPAGE_ERROR
) {
1420 (void) nvme_get_logpage(nvme
, (void **)&error_log
,
1421 &logsize
, event
.b
.ae_logpage
);
1423 dev_err(nvme
->n_dip
, CE_WARN
, "!wrong logpage in "
1424 "async event reply: %d", event
.b
.ae_logpage
);
1425 atomic_inc_32(&nvme
->n_wrong_logpage
);
1428 switch (event
.b
.ae_info
) {
1429 case NVME_ASYNC_ERROR_INV_SQ
:
1430 dev_err(nvme
->n_dip
, CE_PANIC
, "programming error: "
1431 "invalid submission queue");
1434 case NVME_ASYNC_ERROR_INV_DBL
:
1435 dev_err(nvme
->n_dip
, CE_PANIC
, "programming error: "
1436 "invalid doorbell write value");
1439 case NVME_ASYNC_ERROR_DIAGFAIL
:
1440 dev_err(nvme
->n_dip
, CE_WARN
, "!diagnostic failure");
1441 ddi_fm_service_impact(nvme
->n_dip
, DDI_SERVICE_LOST
);
1442 nvme
->n_dead
= B_TRUE
;
1443 atomic_inc_32(&nvme
->n_diagfail_event
);
1446 case NVME_ASYNC_ERROR_PERSISTENT
:
1447 dev_err(nvme
->n_dip
, CE_WARN
, "!persistent internal "
1449 ddi_fm_service_impact(nvme
->n_dip
, DDI_SERVICE_LOST
);
1450 nvme
->n_dead
= B_TRUE
;
1451 atomic_inc_32(&nvme
->n_persistent_event
);
1454 case NVME_ASYNC_ERROR_TRANSIENT
:
1455 dev_err(nvme
->n_dip
, CE_WARN
, "!transient internal "
1457 /* TODO: send ereport */
1458 atomic_inc_32(&nvme
->n_transient_event
);
1461 case NVME_ASYNC_ERROR_FW_LOAD
:
1462 dev_err(nvme
->n_dip
, CE_WARN
,
1463 "!firmware image load error");
1464 atomic_inc_32(&nvme
->n_fw_load_event
);
1469 case NVME_ASYNC_TYPE_HEALTH
:
1470 if (event
.b
.ae_logpage
== NVME_LOGPAGE_HEALTH
) {
1471 (void) nvme_get_logpage(nvme
, (void **)&health_log
,
1472 &logsize
, event
.b
.ae_logpage
, -1);
1474 dev_err(nvme
->n_dip
, CE_WARN
, "!wrong logpage in "
1475 "async event reply: %d", event
.b
.ae_logpage
);
1476 atomic_inc_32(&nvme
->n_wrong_logpage
);
1479 switch (event
.b
.ae_info
) {
1480 case NVME_ASYNC_HEALTH_RELIABILITY
:
1481 dev_err(nvme
->n_dip
, CE_WARN
,
1482 "!device reliability compromised");
1483 /* TODO: send ereport */
1484 atomic_inc_32(&nvme
->n_reliability_event
);
1487 case NVME_ASYNC_HEALTH_TEMPERATURE
:
1488 dev_err(nvme
->n_dip
, CE_WARN
,
1489 "!temperature above threshold");
1490 /* TODO: send ereport */
1491 atomic_inc_32(&nvme
->n_temperature_event
);
1494 case NVME_ASYNC_HEALTH_SPARE
:
1495 dev_err(nvme
->n_dip
, CE_WARN
,
1496 "!spare space below threshold");
1497 /* TODO: send ereport */
1498 atomic_inc_32(&nvme
->n_spare_event
);
1503 case NVME_ASYNC_TYPE_VENDOR
:
1504 dev_err(nvme
->n_dip
, CE_WARN
, "!vendor specific async event "
1505 "received, info = %x, logpage = %x", event
.b
.ae_info
,
1506 event
.b
.ae_logpage
);
1507 atomic_inc_32(&nvme
->n_vendor_event
);
1511 dev_err(nvme
->n_dip
, CE_WARN
, "!unknown async event received, "
1512 "type = %x, info = %x, logpage = %x", event
.b
.ae_type
,
1513 event
.b
.ae_info
, event
.b
.ae_logpage
);
1514 atomic_inc_32(&nvme
->n_unknown_event
);
1519 kmem_free(error_log
, logsize
);
1522 kmem_free(health_log
, logsize
);
1526 nvme_admin_cmd(nvme_cmd_t
*cmd
, int sec
)
1528 mutex_enter(&cmd
->nc_mutex
);
1529 nvme_submit_admin_cmd(cmd
->nc_nvme
->n_adminq
, cmd
);
1531 if (nvme_wait_cmd(cmd
, sec
) == B_FALSE
) {
1533 * The command timed out. An abort command was posted that
1534 * will take care of the cleanup.
1536 return (DDI_FAILURE
);
1538 mutex_exit(&cmd
->nc_mutex
);
1540 return (DDI_SUCCESS
);
1544 nvme_async_event(nvme_t
*nvme
)
1546 nvme_cmd_t
*cmd
= nvme_alloc_cmd(nvme
, KM_SLEEP
);
1549 cmd
->nc_sqe
.sqe_opc
= NVME_OPC_ASYNC_EVENT
;
1550 cmd
->nc_callback
= nvme_async_event_task
;
1552 nvme_submit_admin_cmd(nvme
->n_adminq
, cmd
);
1556 nvme_format_nvm(nvme_t
*nvme
, uint32_t nsid
, uint8_t lbaf
, boolean_t ms
,
1557 uint8_t pi
, boolean_t pil
, uint8_t ses
)
1559 nvme_cmd_t
*cmd
= nvme_alloc_cmd(nvme
, KM_SLEEP
);
1560 nvme_format_nvm_t format_nvm
= { 0 };
1563 format_nvm
.b
.fm_lbaf
= lbaf
& 0xf;
1564 format_nvm
.b
.fm_ms
= ms
? 1 : 0;
1565 format_nvm
.b
.fm_pi
= pi
& 0x7;
1566 format_nvm
.b
.fm_pil
= pil
? 1 : 0;
1567 format_nvm
.b
.fm_ses
= ses
& 0x7;
1570 cmd
->nc_callback
= nvme_wakeup_cmd
;
1571 cmd
->nc_sqe
.sqe_nsid
= nsid
;
1572 cmd
->nc_sqe
.sqe_opc
= NVME_OPC_NVM_FORMAT
;
1573 cmd
->nc_sqe
.sqe_cdw10
= format_nvm
.r
;
1576 * Some devices like Samsung SM951 don't allow formatting of all
1577 * namespaces in one command. Handle that gracefully.
1579 if (nsid
== (uint32_t)-1)
1580 cmd
->nc_dontpanic
= B_TRUE
;
1582 if ((ret
= nvme_admin_cmd(cmd
, nvme_format_cmd_timeout
))
1584 dev_err(nvme
->n_dip
, CE_WARN
,
1585 "!nvme_admin_cmd failed for FORMAT NVM");
1589 if ((ret
= nvme_check_cmd_status(cmd
)) != 0) {
1590 dev_err(nvme
->n_dip
, CE_WARN
,
1591 "!FORMAT failed with sct = %x, sc = %x",
1592 cmd
->nc_cqe
.cqe_sf
.sf_sct
, cmd
->nc_cqe
.cqe_sf
.sf_sc
);
1600 nvme_get_logpage(nvme_t
*nvme
, void **buf
, size_t *bufsize
, uint8_t logpage
,
1603 nvme_cmd_t
*cmd
= nvme_alloc_cmd(nvme
, KM_SLEEP
);
1604 nvme_getlogpage_t getlogpage
= { 0 };
1606 int ret
= DDI_FAILURE
;
1608 va_start(ap
, logpage
);
1611 cmd
->nc_callback
= nvme_wakeup_cmd
;
1612 cmd
->nc_sqe
.sqe_opc
= NVME_OPC_GET_LOG_PAGE
;
1614 getlogpage
.b
.lp_lid
= logpage
;
1617 case NVME_LOGPAGE_ERROR
:
1618 cmd
->nc_sqe
.sqe_nsid
= (uint32_t)-1;
1620 * The GET LOG PAGE command can use at most 2 pages to return
1621 * data, PRP lists are not supported.
1623 *bufsize
= MIN(2 * nvme
->n_pagesize
,
1624 nvme
->n_error_log_len
* sizeof (nvme_error_log_entry_t
));
1627 case NVME_LOGPAGE_HEALTH
:
1628 cmd
->nc_sqe
.sqe_nsid
= va_arg(ap
, uint32_t);
1629 *bufsize
= sizeof (nvme_health_log_t
);
1632 case NVME_LOGPAGE_FWSLOT
:
1633 cmd
->nc_sqe
.sqe_nsid
= (uint32_t)-1;
1634 *bufsize
= sizeof (nvme_fwslot_log_t
);
1638 dev_err(nvme
->n_dip
, CE_WARN
, "!unknown log page requested: %d",
1640 atomic_inc_32(&nvme
->n_unknown_logpage
);
1646 getlogpage
.b
.lp_numd
= *bufsize
/ sizeof (uint32_t) - 1;
1648 cmd
->nc_sqe
.sqe_cdw10
= getlogpage
.r
;
1650 if (nvme_zalloc_dma(nvme
, getlogpage
.b
.lp_numd
* sizeof (uint32_t),
1651 DDI_DMA_READ
, &nvme
->n_prp_dma_attr
, &cmd
->nc_dma
) != DDI_SUCCESS
) {
1652 dev_err(nvme
->n_dip
, CE_WARN
,
1653 "!nvme_zalloc_dma failed for GET LOG PAGE");
1657 if (cmd
->nc_dma
->nd_ncookie
> 2) {
1658 dev_err(nvme
->n_dip
, CE_WARN
,
1659 "!too many DMA cookies for GET LOG PAGE");
1660 atomic_inc_32(&nvme
->n_too_many_cookies
);
1664 cmd
->nc_sqe
.sqe_dptr
.d_prp
[0] = cmd
->nc_dma
->nd_cookie
.dmac_laddress
;
1665 if (cmd
->nc_dma
->nd_ncookie
> 1) {
1666 ddi_dma_nextcookie(cmd
->nc_dma
->nd_dmah
,
1667 &cmd
->nc_dma
->nd_cookie
);
1668 cmd
->nc_sqe
.sqe_dptr
.d_prp
[1] =
1669 cmd
->nc_dma
->nd_cookie
.dmac_laddress
;
1672 if (nvme_admin_cmd(cmd
, nvme_admin_cmd_timeout
) != DDI_SUCCESS
) {
1673 dev_err(nvme
->n_dip
, CE_WARN
,
1674 "!nvme_admin_cmd failed for GET LOG PAGE");
1678 if (nvme_check_cmd_status(cmd
)) {
1679 dev_err(nvme
->n_dip
, CE_WARN
,
1680 "!GET LOG PAGE failed with sct = %x, sc = %x",
1681 cmd
->nc_cqe
.cqe_sf
.sf_sct
, cmd
->nc_cqe
.cqe_sf
.sf_sc
);
1685 *buf
= kmem_alloc(*bufsize
, KM_SLEEP
);
1686 bcopy(cmd
->nc_dma
->nd_memp
, *buf
, *bufsize
);
1697 nvme_identify(nvme_t
*nvme
, uint32_t nsid
)
1699 nvme_cmd_t
*cmd
= nvme_alloc_cmd(nvme
, KM_SLEEP
);
1703 cmd
->nc_callback
= nvme_wakeup_cmd
;
1704 cmd
->nc_sqe
.sqe_opc
= NVME_OPC_IDENTIFY
;
1705 cmd
->nc_sqe
.sqe_nsid
= nsid
;
1706 cmd
->nc_sqe
.sqe_cdw10
= nsid
? NVME_IDENTIFY_NSID
: NVME_IDENTIFY_CTRL
;
1708 if (nvme_zalloc_dma(nvme
, NVME_IDENTIFY_BUFSIZE
, DDI_DMA_READ
,
1709 &nvme
->n_prp_dma_attr
, &cmd
->nc_dma
) != DDI_SUCCESS
) {
1710 dev_err(nvme
->n_dip
, CE_WARN
,
1711 "!nvme_zalloc_dma failed for IDENTIFY");
1715 if (cmd
->nc_dma
->nd_ncookie
> 2) {
1716 dev_err(nvme
->n_dip
, CE_WARN
,
1717 "!too many DMA cookies for IDENTIFY");
1718 atomic_inc_32(&nvme
->n_too_many_cookies
);
1722 cmd
->nc_sqe
.sqe_dptr
.d_prp
[0] = cmd
->nc_dma
->nd_cookie
.dmac_laddress
;
1723 if (cmd
->nc_dma
->nd_ncookie
> 1) {
1724 ddi_dma_nextcookie(cmd
->nc_dma
->nd_dmah
,
1725 &cmd
->nc_dma
->nd_cookie
);
1726 cmd
->nc_sqe
.sqe_dptr
.d_prp
[1] =
1727 cmd
->nc_dma
->nd_cookie
.dmac_laddress
;
1730 if (nvme_admin_cmd(cmd
, nvme_admin_cmd_timeout
) != DDI_SUCCESS
) {
1731 dev_err(nvme
->n_dip
, CE_WARN
,
1732 "!nvme_admin_cmd failed for IDENTIFY");
1736 if (nvme_check_cmd_status(cmd
)) {
1737 dev_err(nvme
->n_dip
, CE_WARN
,
1738 "!IDENTIFY failed with sct = %x, sc = %x",
1739 cmd
->nc_cqe
.cqe_sf
.sf_sct
, cmd
->nc_cqe
.cqe_sf
.sf_sc
);
1743 buf
= kmem_alloc(NVME_IDENTIFY_BUFSIZE
, KM_SLEEP
);
1744 bcopy(cmd
->nc_dma
->nd_memp
, buf
, NVME_IDENTIFY_BUFSIZE
);
1753 nvme_set_features(nvme_t
*nvme
, uint32_t nsid
, uint8_t feature
, uint32_t val
,
1756 _NOTE(ARGUNUSED(nsid
));
1757 nvme_cmd_t
*cmd
= nvme_alloc_cmd(nvme
, KM_SLEEP
);
1758 boolean_t ret
= B_FALSE
;
1760 ASSERT(res
!= NULL
);
1763 cmd
->nc_callback
= nvme_wakeup_cmd
;
1764 cmd
->nc_sqe
.sqe_opc
= NVME_OPC_SET_FEATURES
;
1765 cmd
->nc_sqe
.sqe_cdw10
= feature
;
1766 cmd
->nc_sqe
.sqe_cdw11
= val
;
1769 case NVME_FEAT_WRITE_CACHE
:
1770 if (!nvme
->n_write_cache_present
)
1774 case NVME_FEAT_NQUEUES
:
1781 if (nvme_admin_cmd(cmd
, nvme_admin_cmd_timeout
) != DDI_SUCCESS
) {
1782 dev_err(nvme
->n_dip
, CE_WARN
,
1783 "!nvme_admin_cmd failed for SET FEATURES");
1787 if (nvme_check_cmd_status(cmd
)) {
1788 dev_err(nvme
->n_dip
, CE_WARN
,
1789 "!SET FEATURES %d failed with sct = %x, sc = %x",
1790 feature
, cmd
->nc_cqe
.cqe_sf
.sf_sct
,
1791 cmd
->nc_cqe
.cqe_sf
.sf_sc
);
1795 *res
= cmd
->nc_cqe
.cqe_dw0
;
1804 nvme_get_features(nvme_t
*nvme
, uint32_t nsid
, uint8_t feature
, uint32_t *res
,
1805 void **buf
, size_t *bufsize
)
1807 nvme_cmd_t
*cmd
= nvme_alloc_cmd(nvme
, KM_SLEEP
);
1808 boolean_t ret
= B_FALSE
;
1810 ASSERT(res
!= NULL
);
1812 if (bufsize
!= NULL
)
1816 cmd
->nc_callback
= nvme_wakeup_cmd
;
1817 cmd
->nc_sqe
.sqe_opc
= NVME_OPC_GET_FEATURES
;
1818 cmd
->nc_sqe
.sqe_cdw10
= feature
;
1819 cmd
->nc_sqe
.sqe_cdw11
= *res
;
1822 case NVME_FEAT_ARBITRATION
:
1823 case NVME_FEAT_POWER_MGMT
:
1824 case NVME_FEAT_TEMPERATURE
:
1825 case NVME_FEAT_ERROR
:
1826 case NVME_FEAT_NQUEUES
:
1827 case NVME_FEAT_INTR_COAL
:
1828 case NVME_FEAT_INTR_VECT
:
1829 case NVME_FEAT_WRITE_ATOM
:
1830 case NVME_FEAT_ASYNC_EVENT
:
1831 case NVME_FEAT_PROGRESS
:
1834 case NVME_FEAT_WRITE_CACHE
:
1835 if (!nvme
->n_write_cache_present
)
1839 case NVME_FEAT_LBA_RANGE
:
1840 if (!nvme
->n_lba_range_supported
)
1844 * The LBA Range Type feature is optional. There doesn't seem
1845 * be a method of detecting whether it is supported other than
1846 * using it. This will cause a "invalid field in command" error,
1847 * which is normally considered a programming error and causes
1848 * panic in nvme_check_generic_cmd_status().
1850 cmd
->nc_dontpanic
= B_TRUE
;
1851 cmd
->nc_sqe
.sqe_nsid
= nsid
;
1852 ASSERT(bufsize
!= NULL
);
1853 *bufsize
= NVME_LBA_RANGE_BUFSIZE
;
1857 case NVME_FEAT_AUTO_PST
:
1858 if (!nvme
->n_auto_pst_supported
)
1861 ASSERT(bufsize
!= NULL
);
1862 *bufsize
= NVME_AUTO_PST_BUFSIZE
;
1869 if (bufsize
!= NULL
&& *bufsize
!= 0) {
1870 if (nvme_zalloc_dma(nvme
, *bufsize
, DDI_DMA_READ
,
1871 &nvme
->n_prp_dma_attr
, &cmd
->nc_dma
) != DDI_SUCCESS
) {
1872 dev_err(nvme
->n_dip
, CE_WARN
,
1873 "!nvme_zalloc_dma failed for GET FEATURES");
1877 if (cmd
->nc_dma
->nd_ncookie
> 2) {
1878 dev_err(nvme
->n_dip
, CE_WARN
,
1879 "!too many DMA cookies for GET FEATURES");
1880 atomic_inc_32(&nvme
->n_too_many_cookies
);
1884 cmd
->nc_sqe
.sqe_dptr
.d_prp
[0] =
1885 cmd
->nc_dma
->nd_cookie
.dmac_laddress
;
1886 if (cmd
->nc_dma
->nd_ncookie
> 1) {
1887 ddi_dma_nextcookie(cmd
->nc_dma
->nd_dmah
,
1888 &cmd
->nc_dma
->nd_cookie
);
1889 cmd
->nc_sqe
.sqe_dptr
.d_prp
[1] =
1890 cmd
->nc_dma
->nd_cookie
.dmac_laddress
;
1894 if (nvme_admin_cmd(cmd
, nvme_admin_cmd_timeout
) != DDI_SUCCESS
) {
1895 dev_err(nvme
->n_dip
, CE_WARN
,
1896 "!nvme_admin_cmd failed for GET FEATURES");
1900 if (nvme_check_cmd_status(cmd
)) {
1901 if (feature
== NVME_FEAT_LBA_RANGE
&&
1902 cmd
->nc_cqe
.cqe_sf
.sf_sct
== NVME_CQE_SCT_GENERIC
&&
1903 cmd
->nc_cqe
.cqe_sf
.sf_sc
== NVME_CQE_SC_GEN_INV_FLD
)
1904 nvme
->n_lba_range_supported
= B_FALSE
;
1906 dev_err(nvme
->n_dip
, CE_WARN
,
1907 "!GET FEATURES %d failed with sct = %x, sc = %x",
1908 feature
, cmd
->nc_cqe
.cqe_sf
.sf_sct
,
1909 cmd
->nc_cqe
.cqe_sf
.sf_sc
);
1913 if (bufsize
!= NULL
&& *bufsize
!= 0) {
1914 ASSERT(buf
!= NULL
);
1915 *buf
= kmem_alloc(*bufsize
, KM_SLEEP
);
1916 bcopy(cmd
->nc_dma
->nd_memp
, *buf
, *bufsize
);
1919 *res
= cmd
->nc_cqe
.cqe_dw0
;
1928 nvme_write_cache_set(nvme_t
*nvme
, boolean_t enable
)
1930 nvme_write_cache_t nwc
= { 0 };
1935 if (!nvme_set_features(nvme
, 0, NVME_FEAT_WRITE_CACHE
, nwc
.r
, &nwc
.r
))
1942 nvme_set_nqueues(nvme_t
*nvme
, uint16_t nqueues
)
1944 nvme_nqueues_t nq
= { 0 };
1946 nq
.b
.nq_nsq
= nq
.b
.nq_ncq
= nqueues
- 1;
1948 if (!nvme_set_features(nvme
, 0, NVME_FEAT_NQUEUES
, nq
.r
, &nq
.r
)) {
1953 * Always use the same number of submission and completion queues, and
1954 * never use more than the requested number of queues.
1956 return (MIN(nqueues
, MIN(nq
.b
.nq_nsq
, nq
.b
.nq_ncq
) + 1));
1960 nvme_create_io_qpair(nvme_t
*nvme
, nvme_qpair_t
*qp
, uint16_t idx
)
1962 nvme_cmd_t
*cmd
= nvme_alloc_cmd(nvme
, KM_SLEEP
);
1963 nvme_create_queue_dw10_t dw10
= { 0 };
1964 nvme_create_cq_dw11_t c_dw11
= { 0 };
1965 nvme_create_sq_dw11_t s_dw11
= { 0 };
1968 dw10
.b
.q_qsize
= qp
->nq_nentry
- 1;
1971 c_dw11
.b
.cq_ien
= 1;
1972 c_dw11
.b
.cq_iv
= idx
% nvme
->n_intr_cnt
;
1975 cmd
->nc_callback
= nvme_wakeup_cmd
;
1976 cmd
->nc_sqe
.sqe_opc
= NVME_OPC_CREATE_CQUEUE
;
1977 cmd
->nc_sqe
.sqe_cdw10
= dw10
.r
;
1978 cmd
->nc_sqe
.sqe_cdw11
= c_dw11
.r
;
1979 cmd
->nc_sqe
.sqe_dptr
.d_prp
[0] = qp
->nq_cqdma
->nd_cookie
.dmac_laddress
;
1981 if (nvme_admin_cmd(cmd
, nvme_admin_cmd_timeout
) != DDI_SUCCESS
) {
1982 dev_err(nvme
->n_dip
, CE_WARN
,
1983 "!nvme_admin_cmd failed for CREATE CQUEUE");
1984 return (DDI_FAILURE
);
1987 if (nvme_check_cmd_status(cmd
)) {
1988 dev_err(nvme
->n_dip
, CE_WARN
,
1989 "!CREATE CQUEUE failed with sct = %x, sc = %x",
1990 cmd
->nc_cqe
.cqe_sf
.sf_sct
, cmd
->nc_cqe
.cqe_sf
.sf_sc
);
1992 return (DDI_FAILURE
);
1998 s_dw11
.b
.sq_cqid
= idx
;
2000 cmd
= nvme_alloc_cmd(nvme
, KM_SLEEP
);
2002 cmd
->nc_callback
= nvme_wakeup_cmd
;
2003 cmd
->nc_sqe
.sqe_opc
= NVME_OPC_CREATE_SQUEUE
;
2004 cmd
->nc_sqe
.sqe_cdw10
= dw10
.r
;
2005 cmd
->nc_sqe
.sqe_cdw11
= s_dw11
.r
;
2006 cmd
->nc_sqe
.sqe_dptr
.d_prp
[0] = qp
->nq_sqdma
->nd_cookie
.dmac_laddress
;
2008 if (nvme_admin_cmd(cmd
, nvme_admin_cmd_timeout
) != DDI_SUCCESS
) {
2009 dev_err(nvme
->n_dip
, CE_WARN
,
2010 "!nvme_admin_cmd failed for CREATE SQUEUE");
2011 return (DDI_FAILURE
);
2014 if (nvme_check_cmd_status(cmd
)) {
2015 dev_err(nvme
->n_dip
, CE_WARN
,
2016 "!CREATE SQUEUE failed with sct = %x, sc = %x",
2017 cmd
->nc_cqe
.cqe_sf
.sf_sct
, cmd
->nc_cqe
.cqe_sf
.sf_sc
);
2019 return (DDI_FAILURE
);
2024 return (DDI_SUCCESS
);
2028 nvme_reset(nvme_t
*nvme
, boolean_t quiesce
)
2030 nvme_reg_csts_t csts
;
2033 nvme_put32(nvme
, NVME_REG_CC
, 0);
2035 csts
.r
= nvme_get32(nvme
, NVME_REG_CSTS
);
2036 if (csts
.b
.csts_rdy
== 1) {
2037 nvme_put32(nvme
, NVME_REG_CC
, 0);
2038 for (i
= 0; i
!= nvme
->n_timeout
* 10; i
++) {
2039 csts
.r
= nvme_get32(nvme
, NVME_REG_CSTS
);
2040 if (csts
.b
.csts_rdy
== 0)
2044 drv_usecwait(50000);
2046 delay(drv_usectohz(50000));
2050 nvme_put32(nvme
, NVME_REG_AQA
, 0);
2051 nvme_put32(nvme
, NVME_REG_ASQ
, 0);
2052 nvme_put32(nvme
, NVME_REG_ACQ
, 0);
2054 csts
.r
= nvme_get32(nvme
, NVME_REG_CSTS
);
2055 return (csts
.b
.csts_rdy
== 0 ? B_TRUE
: B_FALSE
);
2059 nvme_shutdown(nvme_t
*nvme
, int mode
, boolean_t quiesce
)
2062 nvme_reg_csts_t csts
;
2065 ASSERT(mode
== NVME_CC_SHN_NORMAL
|| mode
== NVME_CC_SHN_ABRUPT
);
2067 cc
.r
= nvme_get32(nvme
, NVME_REG_CC
);
2068 cc
.b
.cc_shn
= mode
& 0x3;
2069 nvme_put32(nvme
, NVME_REG_CC
, cc
.r
);
2071 for (i
= 0; i
!= 10; i
++) {
2072 csts
.r
= nvme_get32(nvme
, NVME_REG_CSTS
);
2073 if (csts
.b
.csts_shst
== NVME_CSTS_SHN_COMPLETE
)
2077 drv_usecwait(100000);
2079 delay(drv_usectohz(100000));
2085 nvme_prepare_devid(nvme_t
*nvme
, uint32_t nsid
)
2088 * Section 7.7 of the spec describes how to get a unique ID for
2089 * the controller: the vendor ID, the model name and the serial
2090 * number shall be unique when combined.
2092 * If a namespace has no EUI64 we use the above and add the hex
2093 * namespace ID to get a unique ID for the namespace.
2095 char model
[sizeof (nvme
->n_idctl
->id_model
) + 1];
2096 char serial
[sizeof (nvme
->n_idctl
->id_serial
) + 1];
2098 bcopy(nvme
->n_idctl
->id_model
, model
, sizeof (nvme
->n_idctl
->id_model
));
2099 bcopy(nvme
->n_idctl
->id_serial
, serial
,
2100 sizeof (nvme
->n_idctl
->id_serial
));
2102 model
[sizeof (nvme
->n_idctl
->id_model
)] = '\0';
2103 serial
[sizeof (nvme
->n_idctl
->id_serial
)] = '\0';
2105 nvme
->n_ns
[nsid
- 1].ns_devid
= kmem_asprintf("%4X-%s-%s-%X",
2106 nvme
->n_idctl
->id_vid
, model
, serial
, nsid
);
2110 nvme_init_ns(nvme_t
*nvme
, int nsid
)
2112 nvme_namespace_t
*ns
= &nvme
->n_ns
[nsid
- 1];
2113 nvme_identify_nsid_t
*idns
;
2117 idns
= nvme_identify(nvme
, nsid
);
2120 dev_err(nvme
->n_dip
, CE_WARN
,
2121 "!failed to identify namespace %d", nsid
);
2122 return (DDI_FAILURE
);
2127 ns
->ns_block_count
= idns
->id_nsize
;
2129 1 << idns
->id_lbaf
[idns
->id_flbas
.lba_format
].lbaf_lbads
;
2130 ns
->ns_best_block_size
= ns
->ns_block_size
;
2133 * Get the EUI64 if present. Use it for devid and device node names.
2135 if (NVME_VERSION_ATLEAST(&nvme
->n_version
, 1, 1))
2136 bcopy(idns
->id_eui64
, ns
->ns_eui64
, sizeof (ns
->ns_eui64
));
2138 /*LINTED: E_BAD_PTR_CAST_ALIGN*/
2139 if (*(uint64_t *)ns
->ns_eui64
!= 0) {
2140 uint8_t *eui64
= ns
->ns_eui64
;
2142 (void) snprintf(ns
->ns_name
, sizeof (ns
->ns_name
),
2143 "%02x%02x%02x%02x%02x%02x%02x%02x",
2144 eui64
[0], eui64
[1], eui64
[2], eui64
[3],
2145 eui64
[4], eui64
[5], eui64
[6], eui64
[7]);
2147 (void) snprintf(ns
->ns_name
, sizeof (ns
->ns_name
), "%d",
2150 nvme_prepare_devid(nvme
, ns
->ns_id
);
2154 * Find the LBA format with no metadata and the best relative
2155 * performance. A value of 3 means "degraded", 0 is best.
2158 for (int j
= 0; j
<= idns
->id_nlbaf
; j
++) {
2159 if (idns
->id_lbaf
[j
].lbaf_lbads
== 0)
2161 if (idns
->id_lbaf
[j
].lbaf_ms
!= 0)
2163 if (idns
->id_lbaf
[j
].lbaf_rp
>= last_rp
)
2165 last_rp
= idns
->id_lbaf
[j
].lbaf_rp
;
2166 ns
->ns_best_block_size
=
2167 1 << idns
->id_lbaf
[j
].lbaf_lbads
;
2170 if (ns
->ns_best_block_size
< nvme
->n_min_block_size
)
2171 ns
->ns_best_block_size
= nvme
->n_min_block_size
;
2174 * We currently don't support namespaces that use either:
2175 * - thin provisioning
2176 * - protection information
2177 * - illegal block size (< 512)
2179 if (idns
->id_nsfeat
.f_thin
||
2180 idns
->id_dps
.dp_pinfo
) {
2181 dev_err(nvme
->n_dip
, CE_WARN
,
2182 "!ignoring namespace %d, unsupported features: "
2183 "thin = %d, pinfo = %d", nsid
,
2184 idns
->id_nsfeat
.f_thin
, idns
->id_dps
.dp_pinfo
);
2185 ns
->ns_ignore
= B_TRUE
;
2186 } else if (ns
->ns_block_size
< 512) {
2187 dev_err(nvme
->n_dip
, CE_WARN
,
2188 "!ignoring namespace %d, unsupported block size %"PRIu64
,
2189 nsid
, (uint64_t)ns
->ns_block_size
);
2190 ns
->ns_ignore
= B_TRUE
;
2192 ns
->ns_ignore
= B_FALSE
;
2195 return (DDI_SUCCESS
);
2199 nvme_init(nvme_t
*nvme
)
2201 nvme_reg_cc_t cc
= { 0 };
2202 nvme_reg_aqa_t aqa
= { 0 };
2203 nvme_reg_asq_t asq
= { 0 };
2204 nvme_reg_acq_t acq
= { 0 };
2207 nvme_reg_csts_t csts
;
2210 char model
[sizeof (nvme
->n_idctl
->id_model
) + 1];
2211 char *vendor
, *product
;
2213 /* Check controller version */
2214 vs
.r
= nvme_get32(nvme
, NVME_REG_VS
);
2215 nvme
->n_version
.v_major
= vs
.b
.vs_mjr
;
2216 nvme
->n_version
.v_minor
= vs
.b
.vs_mnr
;
2217 dev_err(nvme
->n_dip
, CE_CONT
, "?NVMe spec version %d.%d",
2218 nvme
->n_version
.v_major
, nvme
->n_version
.v_minor
);
2220 if (NVME_VERSION_HIGHER(&nvme
->n_version
,
2221 nvme_version_major
, nvme_version_minor
)) {
2222 dev_err(nvme
->n_dip
, CE_WARN
, "!no support for version > %d.%d",
2223 nvme_version_major
, nvme_version_minor
);
2224 if (nvme
->n_strict_version
)
2228 /* retrieve controller configuration */
2229 cap
.r
= nvme_get64(nvme
, NVME_REG_CAP
);
2231 if ((cap
.b
.cap_css
& NVME_CAP_CSS_NVM
) == 0) {
2232 dev_err(nvme
->n_dip
, CE_WARN
,
2233 "!NVM command set not supported by hardware");
2237 nvme
->n_nssr_supported
= cap
.b
.cap_nssrs
;
2238 nvme
->n_doorbell_stride
= 4 << cap
.b
.cap_dstrd
;
2239 nvme
->n_timeout
= cap
.b
.cap_to
;
2240 nvme
->n_arbitration_mechanisms
= cap
.b
.cap_ams
;
2241 nvme
->n_cont_queues_reqd
= cap
.b
.cap_cqr
;
2242 nvme
->n_max_queue_entries
= cap
.b
.cap_mqes
+ 1;
2245 * The MPSMIN and MPSMAX fields in the CAP register use 0 to specify
2246 * the base page size of 4k (1<<12), so add 12 here to get the real
2249 nvme
->n_pageshift
= MIN(MAX(cap
.b
.cap_mpsmin
+ 12, PAGESHIFT
),
2250 cap
.b
.cap_mpsmax
+ 12);
2251 nvme
->n_pagesize
= 1UL << (nvme
->n_pageshift
);
2254 * Set up Queue DMA to transfer at least 1 page-aligned page at a time.
2256 nvme
->n_queue_dma_attr
.dma_attr_align
= nvme
->n_pagesize
;
2257 nvme
->n_queue_dma_attr
.dma_attr_minxfer
= nvme
->n_pagesize
;
2260 * Set up PRP DMA to transfer 1 page-aligned page at a time.
2261 * Maxxfer may be increased after we identified the controller limits.
2263 nvme
->n_prp_dma_attr
.dma_attr_maxxfer
= nvme
->n_pagesize
;
2264 nvme
->n_prp_dma_attr
.dma_attr_minxfer
= nvme
->n_pagesize
;
2265 nvme
->n_prp_dma_attr
.dma_attr_align
= nvme
->n_pagesize
;
2266 nvme
->n_prp_dma_attr
.dma_attr_seg
= nvme
->n_pagesize
- 1;
2269 * Reset controller if it's still in ready state.
2271 if (nvme_reset(nvme
, B_FALSE
) == B_FALSE
) {
2272 dev_err(nvme
->n_dip
, CE_WARN
, "!unable to reset controller");
2273 ddi_fm_service_impact(nvme
->n_dip
, DDI_SERVICE_LOST
);
2274 nvme
->n_dead
= B_TRUE
;
2279 * Create the admin queue pair.
2281 if (nvme_alloc_qpair(nvme
, nvme
->n_admin_queue_len
, &nvme
->n_adminq
, 0)
2283 dev_err(nvme
->n_dip
, CE_WARN
,
2284 "!unable to allocate admin qpair");
2287 nvme
->n_ioq
= kmem_alloc(sizeof (nvme_qpair_t
*), KM_SLEEP
);
2288 nvme
->n_ioq
[0] = nvme
->n_adminq
;
2290 nvme
->n_progress
|= NVME_ADMIN_QUEUE
;
2292 (void) ddi_prop_update_int(DDI_DEV_T_NONE
, nvme
->n_dip
,
2293 "admin-queue-len", nvme
->n_admin_queue_len
);
2295 aqa
.b
.aqa_asqs
= aqa
.b
.aqa_acqs
= nvme
->n_admin_queue_len
- 1;
2296 asq
= nvme
->n_adminq
->nq_sqdma
->nd_cookie
.dmac_laddress
;
2297 acq
= nvme
->n_adminq
->nq_cqdma
->nd_cookie
.dmac_laddress
;
2299 ASSERT((asq
& (nvme
->n_pagesize
- 1)) == 0);
2300 ASSERT((acq
& (nvme
->n_pagesize
- 1)) == 0);
2302 nvme_put32(nvme
, NVME_REG_AQA
, aqa
.r
);
2303 nvme_put64(nvme
, NVME_REG_ASQ
, asq
);
2304 nvme_put64(nvme
, NVME_REG_ACQ
, acq
);
2306 cc
.b
.cc_ams
= 0; /* use Round-Robin arbitration */
2307 cc
.b
.cc_css
= 0; /* use NVM command set */
2308 cc
.b
.cc_mps
= nvme
->n_pageshift
- 12;
2309 cc
.b
.cc_shn
= 0; /* no shutdown in progress */
2310 cc
.b
.cc_en
= 1; /* enable controller */
2311 cc
.b
.cc_iosqes
= 6; /* submission queue entry is 2^6 bytes long */
2312 cc
.b
.cc_iocqes
= 4; /* completion queue entry is 2^4 bytes long */
2314 nvme_put32(nvme
, NVME_REG_CC
, cc
.r
);
2317 * Wait for the controller to become ready.
2319 csts
.r
= nvme_get32(nvme
, NVME_REG_CSTS
);
2320 if (csts
.b
.csts_rdy
== 0) {
2321 for (i
= 0; i
!= nvme
->n_timeout
* 10; i
++) {
2322 delay(drv_usectohz(50000));
2323 csts
.r
= nvme_get32(nvme
, NVME_REG_CSTS
);
2325 if (csts
.b
.csts_cfs
== 1) {
2326 dev_err(nvme
->n_dip
, CE_WARN
,
2327 "!controller fatal status at init");
2328 ddi_fm_service_impact(nvme
->n_dip
,
2330 nvme
->n_dead
= B_TRUE
;
2334 if (csts
.b
.csts_rdy
== 1)
2339 if (csts
.b
.csts_rdy
== 0) {
2340 dev_err(nvme
->n_dip
, CE_WARN
, "!controller not ready");
2341 ddi_fm_service_impact(nvme
->n_dip
, DDI_SERVICE_LOST
);
2342 nvme
->n_dead
= B_TRUE
;
2347 * Assume an abort command limit of 1. We'll destroy and re-init
2348 * that later when we know the true abort command limit.
2350 sema_init(&nvme
->n_abort_sema
, 1, NULL
, SEMA_DRIVER
, NULL
);
2353 * Setup initial interrupt for admin queue.
2355 if ((nvme_setup_interrupts(nvme
, DDI_INTR_TYPE_MSIX
, 1)
2357 (nvme_setup_interrupts(nvme
, DDI_INTR_TYPE_MSI
, 1)
2359 (nvme_setup_interrupts(nvme
, DDI_INTR_TYPE_FIXED
, 1)
2361 dev_err(nvme
->n_dip
, CE_WARN
,
2362 "!failed to setup initial interrupt");
2367 * Post an asynchronous event command to catch errors.
2369 nvme_async_event(nvme
);
2372 * Identify Controller
2374 nvme
->n_idctl
= nvme_identify(nvme
, 0);
2375 if (nvme
->n_idctl
== NULL
) {
2376 dev_err(nvme
->n_dip
, CE_WARN
,
2377 "!failed to identify controller");
2382 * Get Vendor & Product ID
2384 bcopy(nvme
->n_idctl
->id_model
, model
, sizeof (nvme
->n_idctl
->id_model
));
2385 model
[sizeof (nvme
->n_idctl
->id_model
)] = '\0';
2386 sata_split_model(model
, &vendor
, &product
);
2389 nvme
->n_vendor
= strdup("NVMe");
2391 nvme
->n_vendor
= strdup(vendor
);
2393 nvme
->n_product
= strdup(product
);
2396 * Get controller limits.
2398 nvme
->n_async_event_limit
= MAX(NVME_MIN_ASYNC_EVENT_LIMIT
,
2399 MIN(nvme
->n_admin_queue_len
/ 10,
2400 MIN(nvme
->n_idctl
->id_aerl
+ 1, nvme
->n_async_event_limit
)));
2402 (void) ddi_prop_update_int(DDI_DEV_T_NONE
, nvme
->n_dip
,
2403 "async-event-limit", nvme
->n_async_event_limit
);
2405 nvme
->n_abort_command_limit
= nvme
->n_idctl
->id_acl
+ 1;
2408 * Reinitialize the semaphore with the true abort command limit
2409 * supported by the hardware. It's not necessary to disable interrupts
2410 * as only command aborts use the semaphore, and no commands are
2411 * executed or aborted while we're here.
2413 sema_destroy(&nvme
->n_abort_sema
);
2414 sema_init(&nvme
->n_abort_sema
, nvme
->n_abort_command_limit
- 1, NULL
,
2417 nvme
->n_progress
|= NVME_CTRL_LIMITS
;
2419 if (nvme
->n_idctl
->id_mdts
== 0)
2420 nvme
->n_max_data_transfer_size
= nvme
->n_pagesize
* 65536;
2422 nvme
->n_max_data_transfer_size
=
2423 1ull << (nvme
->n_pageshift
+ nvme
->n_idctl
->id_mdts
);
2425 nvme
->n_error_log_len
= nvme
->n_idctl
->id_elpe
+ 1;
2428 * Limit n_max_data_transfer_size to what we can handle in one PRP.
2429 * Chained PRPs are currently unsupported.
2431 * This is a no-op on hardware which doesn't support a transfer size
2432 * big enough to require chained PRPs.
2434 nvme
->n_max_data_transfer_size
= MIN(nvme
->n_max_data_transfer_size
,
2435 (nvme
->n_pagesize
/ sizeof (uint64_t) * nvme
->n_pagesize
));
2437 nvme
->n_prp_dma_attr
.dma_attr_maxxfer
= nvme
->n_max_data_transfer_size
;
2440 * Make sure the minimum/maximum queue entry sizes are not
2441 * larger/smaller than the default.
2444 if (((1 << nvme
->n_idctl
->id_sqes
.qes_min
) > sizeof (nvme_sqe_t
)) ||
2445 ((1 << nvme
->n_idctl
->id_sqes
.qes_max
) < sizeof (nvme_sqe_t
)) ||
2446 ((1 << nvme
->n_idctl
->id_cqes
.qes_min
) > sizeof (nvme_cqe_t
)) ||
2447 ((1 << nvme
->n_idctl
->id_cqes
.qes_max
) < sizeof (nvme_cqe_t
)))
2451 * Check for the presence of a Volatile Write Cache. If present,
2452 * enable or disable based on the value of the property
2453 * volatile-write-cache-enable (default is enabled).
2455 nvme
->n_write_cache_present
=
2456 nvme
->n_idctl
->id_vwc
.vwc_present
== 0 ? B_FALSE
: B_TRUE
;
2458 (void) ddi_prop_update_int(DDI_DEV_T_NONE
, nvme
->n_dip
,
2459 "volatile-write-cache-present",
2460 nvme
->n_write_cache_present
? 1 : 0);
2462 if (!nvme
->n_write_cache_present
) {
2463 nvme
->n_write_cache_enabled
= B_FALSE
;
2464 } else if (!nvme_write_cache_set(nvme
, nvme
->n_write_cache_enabled
)) {
2465 dev_err(nvme
->n_dip
, CE_WARN
,
2466 "!failed to %sable volatile write cache",
2467 nvme
->n_write_cache_enabled
? "en" : "dis");
2469 * Assume the cache is (still) enabled.
2471 nvme
->n_write_cache_enabled
= B_TRUE
;
2474 (void) ddi_prop_update_int(DDI_DEV_T_NONE
, nvme
->n_dip
,
2475 "volatile-write-cache-enable",
2476 nvme
->n_write_cache_enabled
? 1 : 0);
2479 * Assume LBA Range Type feature is supported. If it isn't this
2480 * will be set to B_FALSE by nvme_get_features().
2482 nvme
->n_lba_range_supported
= B_TRUE
;
2485 * Check support for Autonomous Power State Transition.
2487 if (NVME_VERSION_ATLEAST(&nvme
->n_version
, 1, 1))
2488 nvme
->n_auto_pst_supported
=
2489 nvme
->n_idctl
->id_apsta
.ap_sup
== 0 ? B_FALSE
: B_TRUE
;
2492 * Identify Namespaces
2494 nvme
->n_namespace_count
= nvme
->n_idctl
->id_nn
;
2495 if (nvme
->n_namespace_count
> NVME_MINOR_MAX
) {
2496 dev_err(nvme
->n_dip
, CE_WARN
,
2497 "!too many namespaces: %d, limiting to %d\n",
2498 nvme
->n_namespace_count
, NVME_MINOR_MAX
);
2499 nvme
->n_namespace_count
= NVME_MINOR_MAX
;
2502 nvme
->n_ns
= kmem_zalloc(sizeof (nvme_namespace_t
) *
2503 nvme
->n_namespace_count
, KM_SLEEP
);
2505 for (i
= 0; i
!= nvme
->n_namespace_count
; i
++) {
2506 mutex_init(&nvme
->n_ns
[i
].ns_minor
.nm_mutex
, NULL
, MUTEX_DRIVER
,
2508 if (nvme_init_ns(nvme
, i
+ 1) != DDI_SUCCESS
)
2513 * Try to set up MSI/MSI-X interrupts.
2515 if ((nvme
->n_intr_types
& (DDI_INTR_TYPE_MSI
| DDI_INTR_TYPE_MSIX
))
2517 nvme_release_interrupts(nvme
);
2519 nqueues
= MIN(UINT16_MAX
, ncpus
);
2521 if ((nvme_setup_interrupts(nvme
, DDI_INTR_TYPE_MSIX
,
2522 nqueues
) != DDI_SUCCESS
) &&
2523 (nvme_setup_interrupts(nvme
, DDI_INTR_TYPE_MSI
,
2524 nqueues
) != DDI_SUCCESS
)) {
2525 dev_err(nvme
->n_dip
, CE_WARN
,
2526 "!failed to setup MSI/MSI-X interrupts");
2531 nqueues
= nvme
->n_intr_cnt
;
2534 * Create I/O queue pairs.
2536 nvme
->n_ioq_count
= nvme_set_nqueues(nvme
, nqueues
);
2537 if (nvme
->n_ioq_count
== 0) {
2538 dev_err(nvme
->n_dip
, CE_WARN
,
2539 "!failed to set number of I/O queues to %d", nqueues
);
2544 * Reallocate I/O queue array
2546 kmem_free(nvme
->n_ioq
, sizeof (nvme_qpair_t
*));
2547 nvme
->n_ioq
= kmem_zalloc(sizeof (nvme_qpair_t
*) *
2548 (nvme
->n_ioq_count
+ 1), KM_SLEEP
);
2549 nvme
->n_ioq
[0] = nvme
->n_adminq
;
2552 * If we got less queues than we asked for we might as well give
2553 * some of the interrupt vectors back to the system.
2555 if (nvme
->n_ioq_count
< nqueues
) {
2556 nvme_release_interrupts(nvme
);
2558 if (nvme_setup_interrupts(nvme
, nvme
->n_intr_type
,
2559 nvme
->n_ioq_count
) != DDI_SUCCESS
) {
2560 dev_err(nvme
->n_dip
, CE_WARN
,
2561 "!failed to reduce number of interrupts");
2567 * Alloc & register I/O queue pairs
2569 nvme
->n_io_queue_len
=
2570 MIN(nvme
->n_io_queue_len
, nvme
->n_max_queue_entries
);
2571 (void) ddi_prop_update_int(DDI_DEV_T_NONE
, nvme
->n_dip
, "io-queue-len",
2572 nvme
->n_io_queue_len
);
2574 for (i
= 1; i
!= nvme
->n_ioq_count
+ 1; i
++) {
2575 if (nvme_alloc_qpair(nvme
, nvme
->n_io_queue_len
,
2576 &nvme
->n_ioq
[i
], i
) != DDI_SUCCESS
) {
2577 dev_err(nvme
->n_dip
, CE_WARN
,
2578 "!unable to allocate I/O qpair %d", i
);
2582 if (nvme_create_io_qpair(nvme
, nvme
->n_ioq
[i
], i
)
2584 dev_err(nvme
->n_dip
, CE_WARN
,
2585 "!unable to create I/O qpair %d", i
);
2591 * Post more asynchronous events commands to reduce event reporting
2592 * latency as suggested by the spec.
2594 for (i
= 1; i
!= nvme
->n_async_event_limit
; i
++)
2595 nvme_async_event(nvme
);
2597 return (DDI_SUCCESS
);
2600 (void) nvme_reset(nvme
, B_FALSE
);
2601 return (DDI_FAILURE
);
2605 nvme_intr(caddr_t arg1
, caddr_t arg2
)
2607 /*LINTED: E_PTR_BAD_CAST_ALIGN*/
2608 nvme_t
*nvme
= (nvme_t
*)arg1
;
2609 int inum
= (int)(uintptr_t)arg2
;
2614 if (inum
>= nvme
->n_intr_cnt
)
2615 return (DDI_INTR_UNCLAIMED
);
2618 * The interrupt vector a queue uses is calculated as queue_idx %
2619 * intr_cnt in nvme_create_io_qpair(). Iterate through the queue array
2620 * in steps of n_intr_cnt to process all queues using this vector.
2623 qnum
< nvme
->n_ioq_count
+ 1 && nvme
->n_ioq
[qnum
] != NULL
;
2624 qnum
+= nvme
->n_intr_cnt
) {
2625 while ((cmd
= nvme_retrieve_cmd(nvme
, nvme
->n_ioq
[qnum
]))) {
2626 taskq_dispatch_ent((taskq_t
*)cmd
->nc_nvme
->n_cmd_taskq
,
2627 cmd
->nc_callback
, cmd
, TQ_NOSLEEP
, &cmd
->nc_tqent
);
2632 return (ccnt
> 0 ? DDI_INTR_CLAIMED
: DDI_INTR_UNCLAIMED
);
2636 nvme_release_interrupts(nvme_t
*nvme
)
2640 for (i
= 0; i
< nvme
->n_intr_cnt
; i
++) {
2641 if (nvme
->n_inth
[i
] == NULL
)
2644 if (nvme
->n_intr_cap
& DDI_INTR_FLAG_BLOCK
)
2645 (void) ddi_intr_block_disable(&nvme
->n_inth
[i
], 1);
2647 (void) ddi_intr_disable(nvme
->n_inth
[i
]);
2649 (void) ddi_intr_remove_handler(nvme
->n_inth
[i
]);
2650 (void) ddi_intr_free(nvme
->n_inth
[i
]);
2653 kmem_free(nvme
->n_inth
, nvme
->n_inth_sz
);
2654 nvme
->n_inth
= NULL
;
2655 nvme
->n_inth_sz
= 0;
2657 nvme
->n_progress
&= ~NVME_INTERRUPTS
;
2661 nvme_setup_interrupts(nvme_t
*nvme
, int intr_type
, int nqpairs
)
2663 int nintrs
, navail
, count
;
2667 if (nvme
->n_intr_types
== 0) {
2668 ret
= ddi_intr_get_supported_types(nvme
->n_dip
,
2669 &nvme
->n_intr_types
);
2670 if (ret
!= DDI_SUCCESS
) {
2671 dev_err(nvme
->n_dip
, CE_WARN
,
2672 "!%s: ddi_intr_get_supported types failed",
2677 if (get_hwenv() == HW_VMWARE
)
2678 nvme
->n_intr_types
&= ~DDI_INTR_TYPE_MSIX
;
2682 if ((nvme
->n_intr_types
& intr_type
) == 0)
2683 return (DDI_FAILURE
);
2685 ret
= ddi_intr_get_nintrs(nvme
->n_dip
, intr_type
, &nintrs
);
2686 if (ret
!= DDI_SUCCESS
) {
2687 dev_err(nvme
->n_dip
, CE_WARN
, "!%s: ddi_intr_get_nintrs failed",
2692 ret
= ddi_intr_get_navail(nvme
->n_dip
, intr_type
, &navail
);
2693 if (ret
!= DDI_SUCCESS
) {
2694 dev_err(nvme
->n_dip
, CE_WARN
, "!%s: ddi_intr_get_navail failed",
2699 /* We want at most one interrupt per queue pair. */
2700 if (navail
> nqpairs
)
2703 nvme
->n_inth_sz
= sizeof (ddi_intr_handle_t
) * navail
;
2704 nvme
->n_inth
= kmem_zalloc(nvme
->n_inth_sz
, KM_SLEEP
);
2706 ret
= ddi_intr_alloc(nvme
->n_dip
, nvme
->n_inth
, intr_type
, 0, navail
,
2708 if (ret
!= DDI_SUCCESS
) {
2709 dev_err(nvme
->n_dip
, CE_WARN
, "!%s: ddi_intr_alloc failed",
2714 nvme
->n_intr_cnt
= count
;
2716 ret
= ddi_intr_get_pri(nvme
->n_inth
[0], &nvme
->n_intr_pri
);
2717 if (ret
!= DDI_SUCCESS
) {
2718 dev_err(nvme
->n_dip
, CE_WARN
, "!%s: ddi_intr_get_pri failed",
2723 for (i
= 0; i
< count
; i
++) {
2724 ret
= ddi_intr_add_handler(nvme
->n_inth
[i
], nvme_intr
,
2725 (void *)nvme
, (void *)(uintptr_t)i
);
2726 if (ret
!= DDI_SUCCESS
) {
2727 dev_err(nvme
->n_dip
, CE_WARN
,
2728 "!%s: ddi_intr_add_handler failed", __func__
);
2733 (void) ddi_intr_get_cap(nvme
->n_inth
[0], &nvme
->n_intr_cap
);
2735 for (i
= 0; i
< count
; i
++) {
2736 if (nvme
->n_intr_cap
& DDI_INTR_FLAG_BLOCK
)
2737 ret
= ddi_intr_block_enable(&nvme
->n_inth
[i
], 1);
2739 ret
= ddi_intr_enable(nvme
->n_inth
[i
]);
2741 if (ret
!= DDI_SUCCESS
) {
2742 dev_err(nvme
->n_dip
, CE_WARN
,
2743 "!%s: enabling interrupt %d failed", __func__
, i
);
2748 nvme
->n_intr_type
= intr_type
;
2750 nvme
->n_progress
|= NVME_INTERRUPTS
;
2752 return (DDI_SUCCESS
);
2755 nvme_release_interrupts(nvme
);
2761 nvme_fm_errcb(dev_info_t
*dip
, ddi_fm_error_t
*fm_error
, const void *arg
)
2763 _NOTE(ARGUNUSED(arg
));
2765 pci_ereport_post(dip
, fm_error
, NULL
);
2766 return (fm_error
->fme_status
);
2770 nvme_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
2779 if (cmd
!= DDI_ATTACH
)
2780 return (DDI_FAILURE
);
2782 instance
= ddi_get_instance(dip
);
2784 if (ddi_soft_state_zalloc(nvme_state
, instance
) != DDI_SUCCESS
)
2785 return (DDI_FAILURE
);
2787 nvme
= ddi_get_soft_state(nvme_state
, instance
);
2788 ddi_set_driver_private(dip
, nvme
);
2791 mutex_init(&nvme
->n_minor
.nm_mutex
, NULL
, MUTEX_DRIVER
, NULL
);
2793 nvme
->n_strict_version
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
,
2794 DDI_PROP_DONTPASS
, "strict-version", 1) == 1 ? B_TRUE
: B_FALSE
;
2795 nvme
->n_ignore_unknown_vendor_status
= ddi_prop_get_int(DDI_DEV_T_ANY
,
2796 dip
, DDI_PROP_DONTPASS
, "ignore-unknown-vendor-status", 0) == 1 ?
2798 nvme
->n_admin_queue_len
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
,
2799 DDI_PROP_DONTPASS
, "admin-queue-len", NVME_DEFAULT_ADMIN_QUEUE_LEN
);
2800 nvme
->n_io_queue_len
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
,
2801 DDI_PROP_DONTPASS
, "io-queue-len", NVME_DEFAULT_IO_QUEUE_LEN
);
2802 nvme
->n_async_event_limit
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
,
2803 DDI_PROP_DONTPASS
, "async-event-limit",
2804 NVME_DEFAULT_ASYNC_EVENT_LIMIT
);
2805 nvme
->n_write_cache_enabled
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
,
2806 DDI_PROP_DONTPASS
, "volatile-write-cache-enable", 1) != 0 ?
2808 nvme
->n_min_block_size
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
,
2809 DDI_PROP_DONTPASS
, "min-phys-block-size",
2810 NVME_DEFAULT_MIN_BLOCK_SIZE
);
2812 if (!ISP2(nvme
->n_min_block_size
) ||
2813 (nvme
->n_min_block_size
< NVME_DEFAULT_MIN_BLOCK_SIZE
)) {
2814 dev_err(dip
, CE_WARN
, "!min-phys-block-size %s, "
2815 "using default %d", ISP2(nvme
->n_min_block_size
) ?
2816 "too low" : "not a power of 2",
2817 NVME_DEFAULT_MIN_BLOCK_SIZE
);
2818 nvme
->n_min_block_size
= NVME_DEFAULT_MIN_BLOCK_SIZE
;
2821 if (nvme
->n_admin_queue_len
< NVME_MIN_ADMIN_QUEUE_LEN
)
2822 nvme
->n_admin_queue_len
= NVME_MIN_ADMIN_QUEUE_LEN
;
2823 else if (nvme
->n_admin_queue_len
> NVME_MAX_ADMIN_QUEUE_LEN
)
2824 nvme
->n_admin_queue_len
= NVME_MAX_ADMIN_QUEUE_LEN
;
2826 if (nvme
->n_io_queue_len
< NVME_MIN_IO_QUEUE_LEN
)
2827 nvme
->n_io_queue_len
= NVME_MIN_IO_QUEUE_LEN
;
2829 if (nvme
->n_async_event_limit
< 1)
2830 nvme
->n_async_event_limit
= NVME_DEFAULT_ASYNC_EVENT_LIMIT
;
2832 nvme
->n_reg_acc_attr
= nvme_reg_acc_attr
;
2833 nvme
->n_queue_dma_attr
= nvme_queue_dma_attr
;
2834 nvme
->n_prp_dma_attr
= nvme_prp_dma_attr
;
2835 nvme
->n_sgl_dma_attr
= nvme_sgl_dma_attr
;
2838 * Setup FMA support.
2840 nvme
->n_fm_cap
= ddi_getprop(DDI_DEV_T_ANY
, dip
,
2841 DDI_PROP_CANSLEEP
| DDI_PROP_DONTPASS
, "fm-capable",
2842 DDI_FM_EREPORT_CAPABLE
| DDI_FM_ACCCHK_CAPABLE
|
2843 DDI_FM_DMACHK_CAPABLE
| DDI_FM_ERRCB_CAPABLE
);
2845 ddi_fm_init(dip
, &nvme
->n_fm_cap
, &nvme
->n_fm_ibc
);
2847 if (nvme
->n_fm_cap
) {
2848 if (nvme
->n_fm_cap
& DDI_FM_ACCCHK_CAPABLE
)
2849 nvme
->n_reg_acc_attr
.devacc_attr_access
=
2852 if (nvme
->n_fm_cap
& DDI_FM_DMACHK_CAPABLE
) {
2853 nvme
->n_prp_dma_attr
.dma_attr_flags
|= DDI_DMA_FLAGERR
;
2854 nvme
->n_sgl_dma_attr
.dma_attr_flags
|= DDI_DMA_FLAGERR
;
2857 if (DDI_FM_EREPORT_CAP(nvme
->n_fm_cap
) ||
2858 DDI_FM_ERRCB_CAP(nvme
->n_fm_cap
))
2859 pci_ereport_setup(dip
);
2861 if (DDI_FM_ERRCB_CAP(nvme
->n_fm_cap
))
2862 ddi_fm_handler_register(dip
, nvme_fm_errcb
,
2866 nvme
->n_progress
|= NVME_FMA_INIT
;
2869 * The spec defines several register sets. Only the controller
2870 * registers (set 1) are currently used.
2872 if (ddi_dev_nregs(dip
, &nregs
) == DDI_FAILURE
||
2874 ddi_dev_regsize(dip
, 1, ®size
) == DDI_FAILURE
)
2877 if (ddi_regs_map_setup(dip
, 1, &nvme
->n_regs
, 0, regsize
,
2878 &nvme
->n_reg_acc_attr
, &nvme
->n_regh
) != DDI_SUCCESS
) {
2879 dev_err(dip
, CE_WARN
, "!failed to map regset 1");
2883 nvme
->n_progress
|= NVME_REGS_MAPPED
;
2886 * Create taskq for command completion.
2888 (void) snprintf(name
, sizeof (name
), "%s%d_cmd_taskq",
2889 ddi_driver_name(dip
), ddi_get_instance(dip
));
2890 nvme
->n_cmd_taskq
= ddi_taskq_create(dip
, name
, MIN(UINT16_MAX
, ncpus
),
2891 TASKQ_DEFAULTPRI
, 0);
2892 if (nvme
->n_cmd_taskq
== NULL
) {
2893 dev_err(dip
, CE_WARN
, "!failed to create cmd taskq");
2898 * Create PRP DMA cache
2900 (void) snprintf(name
, sizeof (name
), "%s%d_prp_cache",
2901 ddi_driver_name(dip
), ddi_get_instance(dip
));
2902 nvme
->n_prp_cache
= kmem_cache_create(name
, sizeof (nvme_dma_t
),
2903 0, nvme_prp_dma_constructor
, nvme_prp_dma_destructor
,
2904 NULL
, (void *)nvme
, NULL
, 0);
2906 if (nvme_init(nvme
) != DDI_SUCCESS
)
2910 * Attach the blkdev driver for each namespace.
2912 for (i
= 0; i
!= nvme
->n_namespace_count
; i
++) {
2913 if (ddi_create_minor_node(nvme
->n_dip
, nvme
->n_ns
[i
].ns_name
,
2914 S_IFCHR
, NVME_MINOR(ddi_get_instance(nvme
->n_dip
), i
+ 1),
2915 DDI_NT_NVME_ATTACHMENT_POINT
, 0) != DDI_SUCCESS
) {
2916 dev_err(dip
, CE_WARN
,
2917 "!failed to create minor node for namespace %d", i
);
2921 if (nvme
->n_ns
[i
].ns_ignore
)
2924 nvme
->n_ns
[i
].ns_bd_hdl
= bd_alloc_handle(&nvme
->n_ns
[i
],
2925 &nvme_bd_ops
, &nvme
->n_prp_dma_attr
, KM_SLEEP
);
2927 if (nvme
->n_ns
[i
].ns_bd_hdl
== NULL
) {
2928 dev_err(dip
, CE_WARN
,
2929 "!failed to get blkdev handle for namespace %d", i
);
2933 if (bd_attach_handle(dip
, nvme
->n_ns
[i
].ns_bd_hdl
)
2935 dev_err(dip
, CE_WARN
,
2936 "!failed to attach blkdev handle for namespace %d",
2942 if (ddi_create_minor_node(dip
, "devctl", S_IFCHR
,
2943 NVME_MINOR(ddi_get_instance(dip
), 0), DDI_NT_NVME_NEXUS
, 0)
2945 dev_err(dip
, CE_WARN
, "nvme_attach: "
2946 "cannot create devctl minor node");
2950 return (DDI_SUCCESS
);
2953 /* attach successful anyway so that FMA can retire the device */
2955 return (DDI_SUCCESS
);
2957 (void) nvme_detach(dip
, DDI_DETACH
);
2959 return (DDI_FAILURE
);
2963 nvme_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
2968 if (cmd
!= DDI_DETACH
)
2969 return (DDI_FAILURE
);
2971 instance
= ddi_get_instance(dip
);
2973 nvme
= ddi_get_soft_state(nvme_state
, instance
);
2976 return (DDI_FAILURE
);
2978 ddi_remove_minor_node(dip
, "devctl");
2979 mutex_destroy(&nvme
->n_minor
.nm_mutex
);
2982 for (i
= 0; i
!= nvme
->n_namespace_count
; i
++) {
2983 ddi_remove_minor_node(dip
, nvme
->n_ns
[i
].ns_name
);
2984 mutex_destroy(&nvme
->n_ns
[i
].ns_minor
.nm_mutex
);
2986 if (nvme
->n_ns
[i
].ns_bd_hdl
) {
2987 (void) bd_detach_handle(
2988 nvme
->n_ns
[i
].ns_bd_hdl
);
2989 bd_free_handle(nvme
->n_ns
[i
].ns_bd_hdl
);
2992 if (nvme
->n_ns
[i
].ns_idns
)
2993 kmem_free(nvme
->n_ns
[i
].ns_idns
,
2994 sizeof (nvme_identify_nsid_t
));
2995 if (nvme
->n_ns
[i
].ns_devid
)
2996 strfree(nvme
->n_ns
[i
].ns_devid
);
2999 kmem_free(nvme
->n_ns
, sizeof (nvme_namespace_t
) *
3000 nvme
->n_namespace_count
);
3003 if (nvme
->n_progress
& NVME_INTERRUPTS
)
3004 nvme_release_interrupts(nvme
);
3006 if (nvme
->n_cmd_taskq
)
3007 ddi_taskq_wait(nvme
->n_cmd_taskq
);
3009 if (nvme
->n_ioq_count
> 0) {
3010 for (i
= 1; i
!= nvme
->n_ioq_count
+ 1; i
++) {
3011 if (nvme
->n_ioq
[i
] != NULL
) {
3012 /* TODO: send destroy queue commands */
3013 nvme_free_qpair(nvme
->n_ioq
[i
]);
3017 kmem_free(nvme
->n_ioq
, sizeof (nvme_qpair_t
*) *
3018 (nvme
->n_ioq_count
+ 1));
3021 if (nvme
->n_prp_cache
!= NULL
) {
3022 kmem_cache_destroy(nvme
->n_prp_cache
);
3025 if (nvme
->n_progress
& NVME_REGS_MAPPED
) {
3026 nvme_shutdown(nvme
, NVME_CC_SHN_NORMAL
, B_FALSE
);
3027 (void) nvme_reset(nvme
, B_FALSE
);
3030 if (nvme
->n_cmd_taskq
)
3031 ddi_taskq_destroy(nvme
->n_cmd_taskq
);
3033 if (nvme
->n_progress
& NVME_CTRL_LIMITS
)
3034 sema_destroy(&nvme
->n_abort_sema
);
3036 if (nvme
->n_progress
& NVME_ADMIN_QUEUE
)
3037 nvme_free_qpair(nvme
->n_adminq
);
3040 kmem_free(nvme
->n_idctl
, NVME_IDENTIFY_BUFSIZE
);
3042 if (nvme
->n_progress
& NVME_REGS_MAPPED
)
3043 ddi_regs_map_free(&nvme
->n_regh
);
3045 if (nvme
->n_progress
& NVME_FMA_INIT
) {
3046 if (DDI_FM_ERRCB_CAP(nvme
->n_fm_cap
))
3047 ddi_fm_handler_unregister(nvme
->n_dip
);
3049 if (DDI_FM_EREPORT_CAP(nvme
->n_fm_cap
) ||
3050 DDI_FM_ERRCB_CAP(nvme
->n_fm_cap
))
3051 pci_ereport_teardown(nvme
->n_dip
);
3053 ddi_fm_fini(nvme
->n_dip
);
3056 if (nvme
->n_vendor
!= NULL
)
3057 strfree(nvme
->n_vendor
);
3059 if (nvme
->n_product
!= NULL
)
3060 strfree(nvme
->n_product
);
3062 ddi_soft_state_free(nvme_state
, instance
);
3064 return (DDI_SUCCESS
);
3068 nvme_quiesce(dev_info_t
*dip
)
3073 instance
= ddi_get_instance(dip
);
3075 nvme
= ddi_get_soft_state(nvme_state
, instance
);
3078 return (DDI_FAILURE
);
3080 nvme_shutdown(nvme
, NVME_CC_SHN_ABRUPT
, B_TRUE
);
3082 (void) nvme_reset(nvme
, B_TRUE
);
3084 return (DDI_FAILURE
);
3088 nvme_fill_prp(nvme_cmd_t
*cmd
, bd_xfer_t
*xfer
)
3090 nvme_t
*nvme
= cmd
->nc_nvme
;
3091 int nprp_page
, nprp
;
3094 if (xfer
->x_ndmac
== 0)
3095 return (DDI_FAILURE
);
3097 cmd
->nc_sqe
.sqe_dptr
.d_prp
[0] = xfer
->x_dmac
.dmac_laddress
;
3098 ddi_dma_nextcookie(xfer
->x_dmah
, &xfer
->x_dmac
);
3100 if (xfer
->x_ndmac
== 1) {
3101 cmd
->nc_sqe
.sqe_dptr
.d_prp
[1] = 0;
3102 return (DDI_SUCCESS
);
3103 } else if (xfer
->x_ndmac
== 2) {
3104 cmd
->nc_sqe
.sqe_dptr
.d_prp
[1] = xfer
->x_dmac
.dmac_laddress
;
3105 return (DDI_SUCCESS
);
3110 nprp_page
= nvme
->n_pagesize
/ sizeof (uint64_t) - 1;
3111 ASSERT(nprp_page
> 0);
3112 nprp
= (xfer
->x_ndmac
+ nprp_page
- 1) / nprp_page
;
3115 * We currently don't support chained PRPs and set up our DMA
3116 * attributes to reflect that. If we still get an I/O request
3117 * that needs a chained PRP something is very wrong.
3121 cmd
->nc_dma
= kmem_cache_alloc(nvme
->n_prp_cache
, KM_SLEEP
);
3122 bzero(cmd
->nc_dma
->nd_memp
, cmd
->nc_dma
->nd_len
);
3124 cmd
->nc_sqe
.sqe_dptr
.d_prp
[1] = cmd
->nc_dma
->nd_cookie
.dmac_laddress
;
3126 /*LINTED: E_PTR_BAD_CAST_ALIGN*/
3127 for (prp
= (uint64_t *)cmd
->nc_dma
->nd_memp
;
3129 prp
++, xfer
->x_ndmac
--) {
3130 *prp
= xfer
->x_dmac
.dmac_laddress
;
3131 ddi_dma_nextcookie(xfer
->x_dmah
, &xfer
->x_dmac
);
3134 (void) ddi_dma_sync(cmd
->nc_dma
->nd_dmah
, 0, cmd
->nc_dma
->nd_len
,
3135 DDI_DMA_SYNC_FORDEV
);
3136 return (DDI_SUCCESS
);
3140 nvme_create_nvm_cmd(nvme_namespace_t
*ns
, uint8_t opc
, bd_xfer_t
*xfer
)
3142 nvme_t
*nvme
= ns
->ns_nvme
;
3146 * Blkdev only sets BD_XFER_POLL when dumping, so don't sleep.
3148 cmd
= nvme_alloc_cmd(nvme
, (xfer
->x_flags
& BD_XFER_POLL
) ?
3149 KM_NOSLEEP
: KM_SLEEP
);
3154 cmd
->nc_sqe
.sqe_opc
= opc
;
3155 cmd
->nc_callback
= nvme_bd_xfer_done
;
3156 cmd
->nc_xfer
= xfer
;
3159 case NVME_OPC_NVM_WRITE
:
3160 case NVME_OPC_NVM_READ
:
3161 VERIFY(xfer
->x_nblks
<= 0x10000);
3163 cmd
->nc_sqe
.sqe_nsid
= ns
->ns_id
;
3165 cmd
->nc_sqe
.sqe_cdw10
= xfer
->x_blkno
& 0xffffffffu
;
3166 cmd
->nc_sqe
.sqe_cdw11
= (xfer
->x_blkno
>> 32);
3167 cmd
->nc_sqe
.sqe_cdw12
= (uint16_t)(xfer
->x_nblks
- 1);
3169 if (nvme_fill_prp(cmd
, xfer
) != DDI_SUCCESS
)
3173 case NVME_OPC_NVM_FLUSH
:
3174 cmd
->nc_sqe
.sqe_nsid
= ns
->ns_id
;
3189 nvme_bd_xfer_done(void *arg
)
3191 nvme_cmd_t
*cmd
= arg
;
3192 bd_xfer_t
*xfer
= cmd
->nc_xfer
;
3195 error
= nvme_check_cmd_status(cmd
);
3198 bd_xfer_done(xfer
, error
);
3202 nvme_bd_driveinfo(void *arg
, bd_drive_t
*drive
)
3204 nvme_namespace_t
*ns
= arg
;
3205 nvme_t
*nvme
= ns
->ns_nvme
;
3208 * blkdev maintains one queue size per instance (namespace),
3209 * but all namespace share the I/O queues.
3210 * TODO: need to figure out a sane default, or use per-NS I/O queues,
3211 * or change blkdev to handle EAGAIN
3213 drive
->d_qsize
= nvme
->n_ioq_count
* nvme
->n_io_queue_len
3214 / nvme
->n_namespace_count
;
3217 * d_maxxfer is not set, which means the value is taken from the DMA
3218 * attributes specified to bd_alloc_handle.
3221 drive
->d_removable
= B_FALSE
;
3222 drive
->d_hotpluggable
= B_FALSE
;
3224 bcopy(ns
->ns_eui64
, drive
->d_eui64
, sizeof (drive
->d_eui64
));
3225 drive
->d_target
= ns
->ns_id
;
3228 drive
->d_model
= nvme
->n_idctl
->id_model
;
3229 drive
->d_model_len
= sizeof (nvme
->n_idctl
->id_model
);
3230 drive
->d_vendor
= nvme
->n_vendor
;
3231 drive
->d_vendor_len
= strlen(nvme
->n_vendor
);
3232 drive
->d_product
= nvme
->n_product
;
3233 drive
->d_product_len
= strlen(nvme
->n_product
);
3234 drive
->d_serial
= nvme
->n_idctl
->id_serial
;
3235 drive
->d_serial_len
= sizeof (nvme
->n_idctl
->id_serial
);
3236 drive
->d_revision
= nvme
->n_idctl
->id_fwrev
;
3237 drive
->d_revision_len
= sizeof (nvme
->n_idctl
->id_fwrev
);
3241 nvme_bd_mediainfo(void *arg
, bd_media_t
*media
)
3243 nvme_namespace_t
*ns
= arg
;
3245 media
->m_nblks
= ns
->ns_block_count
;
3246 media
->m_blksize
= ns
->ns_block_size
;
3247 media
->m_readonly
= B_FALSE
;
3248 media
->m_solidstate
= B_TRUE
;
3250 media
->m_pblksize
= ns
->ns_best_block_size
;
3256 nvme_bd_cmd(nvme_namespace_t
*ns
, bd_xfer_t
*xfer
, uint8_t opc
)
3258 nvme_t
*nvme
= ns
->ns_nvme
;
3267 cmd
= nvme_create_nvm_cmd(ns
, opc
, xfer
);
3271 cmd
->nc_sqid
= (CPU
->cpu_id
% nvme
->n_ioq_count
) + 1;
3272 ASSERT(cmd
->nc_sqid
<= nvme
->n_ioq_count
);
3273 ioq
= nvme
->n_ioq
[cmd
->nc_sqid
];
3276 * Get the polling flag before submitting the command. The command may
3277 * complete immediately after it was submitted, which means we must
3278 * treat both cmd and xfer as if they have been freed already.
3280 poll
= (xfer
->x_flags
& BD_XFER_POLL
) != 0;
3282 ret
= nvme_submit_io_cmd(ioq
, cmd
);
3291 cmd
= nvme_retrieve_cmd(nvme
, ioq
);
3293 nvme_bd_xfer_done(cmd
);
3296 } while (ioq
->nq_active_cmds
!= 0);
3302 nvme_bd_read(void *arg
, bd_xfer_t
*xfer
)
3304 nvme_namespace_t
*ns
= arg
;
3306 return (nvme_bd_cmd(ns
, xfer
, NVME_OPC_NVM_READ
));
3310 nvme_bd_write(void *arg
, bd_xfer_t
*xfer
)
3312 nvme_namespace_t
*ns
= arg
;
3314 return (nvme_bd_cmd(ns
, xfer
, NVME_OPC_NVM_WRITE
));
3318 nvme_bd_sync(void *arg
, bd_xfer_t
*xfer
)
3320 nvme_namespace_t
*ns
= arg
;
3322 if (ns
->ns_nvme
->n_dead
)
3326 * If the volatile write cache is not present or not enabled the FLUSH
3327 * command is a no-op, so we can take a shortcut here.
3329 if (!ns
->ns_nvme
->n_write_cache_present
) {
3330 bd_xfer_done(xfer
, ENOTSUP
);
3334 if (!ns
->ns_nvme
->n_write_cache_enabled
) {
3335 bd_xfer_done(xfer
, 0);
3339 return (nvme_bd_cmd(ns
, xfer
, NVME_OPC_NVM_FLUSH
));
3343 nvme_bd_devid(void *arg
, dev_info_t
*devinfo
, ddi_devid_t
*devid
)
3345 nvme_namespace_t
*ns
= arg
;
3347 /*LINTED: E_BAD_PTR_CAST_ALIGN*/
3348 if (*(uint64_t *)ns
->ns_eui64
!= 0) {
3349 return (ddi_devid_init(devinfo
, DEVID_SCSI3_WWN
,
3350 sizeof (ns
->ns_eui64
), ns
->ns_eui64
, devid
));
3352 return (ddi_devid_init(devinfo
, DEVID_ENCAP
,
3353 strlen(ns
->ns_devid
), ns
->ns_devid
, devid
));
3358 nvme_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
3361 _NOTE(ARGUNUSED(cred_p
));
3363 minor_t minor
= getminor(*devp
);
3364 nvme_t
*nvme
= ddi_get_soft_state(nvme_state
, NVME_MINOR_INST(minor
));
3365 int nsid
= NVME_MINOR_NSID(minor
);
3366 nvme_minor_state_t
*nm
;
3369 if (otyp
!= OTYP_CHR
)
3375 if (nsid
> nvme
->n_namespace_count
)
3378 nm
= nsid
== 0 ? &nvme
->n_minor
: &nvme
->n_ns
[nsid
- 1].ns_minor
;
3380 mutex_enter(&nm
->nm_mutex
);
3387 if (nm
->nm_ocnt
!= 0) {
3391 nm
->nm_oexcl
= B_TRUE
;
3397 mutex_exit(&nm
->nm_mutex
);
3403 nvme_close(dev_t dev
, int flag
, int otyp
, cred_t
*cred_p
)
3406 _NOTE(ARGUNUSED(cred_p
));
3407 _NOTE(ARGUNUSED(flag
));
3409 minor_t minor
= getminor(dev
);
3410 nvme_t
*nvme
= ddi_get_soft_state(nvme_state
, NVME_MINOR_INST(minor
));
3411 int nsid
= NVME_MINOR_NSID(minor
);
3412 nvme_minor_state_t
*nm
;
3414 if (otyp
!= OTYP_CHR
)
3420 if (nsid
> nvme
->n_namespace_count
)
3423 nm
= nsid
== 0 ? &nvme
->n_minor
: &nvme
->n_ns
[nsid
- 1].ns_minor
;
3425 mutex_enter(&nm
->nm_mutex
);
3427 nm
->nm_oexcl
= B_FALSE
;
3429 ASSERT(nm
->nm_ocnt
> 0);
3431 mutex_exit(&nm
->nm_mutex
);
3437 nvme_ioctl_identify(nvme_t
*nvme
, int nsid
, nvme_ioctl_t
*nioc
, int mode
,
3440 _NOTE(ARGUNUSED(cred_p
));
3444 if ((mode
& FREAD
) == 0)
3447 if (nioc
->n_len
< NVME_IDENTIFY_BUFSIZE
)
3450 idctl
= nvme_identify(nvme
, nsid
);
3454 if (ddi_copyout(idctl
, (void *)nioc
->n_buf
, NVME_IDENTIFY_BUFSIZE
, mode
)
3458 kmem_free(idctl
, NVME_IDENTIFY_BUFSIZE
);
3464 nvme_ioctl_capabilities(nvme_t
*nvme
, int nsid
, nvme_ioctl_t
*nioc
,
3465 int mode
, cred_t
*cred_p
)
3467 _NOTE(ARGUNUSED(nsid
, cred_p
));
3469 nvme_reg_cap_t cap
= { 0 };
3470 nvme_capabilities_t nc
;
3472 if ((mode
& FREAD
) == 0)
3475 if (nioc
->n_len
< sizeof (nc
))
3478 cap
.r
= nvme_get64(nvme
, NVME_REG_CAP
);
3481 * The MPSMIN and MPSMAX fields in the CAP register use 0 to
3482 * specify the base page size of 4k (1<<12), so add 12 here to
3483 * get the real page size value.
3485 nc
.mpsmax
= 1 << (12 + cap
.b
.cap_mpsmax
);
3486 nc
.mpsmin
= 1 << (12 + cap
.b
.cap_mpsmin
);
3488 if (ddi_copyout(&nc
, (void *)nioc
->n_buf
, sizeof (nc
), mode
) != 0)
3495 nvme_ioctl_get_logpage(nvme_t
*nvme
, int nsid
, nvme_ioctl_t
*nioc
,
3496 int mode
, cred_t
*cred_p
)
3498 _NOTE(ARGUNUSED(cred_p
));
3503 if ((mode
& FREAD
) == 0)
3506 switch (nioc
->n_arg
) {
3507 case NVME_LOGPAGE_ERROR
:
3511 case NVME_LOGPAGE_HEALTH
:
3512 if (nsid
!= 0 && nvme
->n_idctl
->id_lpa
.lp_smart
== 0)
3516 nsid
= (uint32_t)-1;
3519 case NVME_LOGPAGE_FWSLOT
:
3527 if (nvme_get_logpage(nvme
, &log
, &bufsize
, nioc
->n_arg
, nsid
)
3531 if (nioc
->n_len
< bufsize
) {
3532 kmem_free(log
, bufsize
);
3536 if (ddi_copyout(log
, (void *)nioc
->n_buf
, bufsize
, mode
) != 0)
3539 nioc
->n_len
= bufsize
;
3540 kmem_free(log
, bufsize
);
3546 nvme_ioctl_get_features(nvme_t
*nvme
, int nsid
, nvme_ioctl_t
*nioc
,
3547 int mode
, cred_t
*cred_p
)
3549 _NOTE(ARGUNUSED(cred_p
));
3556 if ((mode
& FREAD
) == 0)
3559 if ((nioc
->n_arg
>> 32) > 0xff)
3562 feature
= (uint8_t)(nioc
->n_arg
>> 32);
3565 case NVME_FEAT_ARBITRATION
:
3566 case NVME_FEAT_POWER_MGMT
:
3567 case NVME_FEAT_TEMPERATURE
:
3568 case NVME_FEAT_ERROR
:
3569 case NVME_FEAT_NQUEUES
:
3570 case NVME_FEAT_INTR_COAL
:
3571 case NVME_FEAT_WRITE_ATOM
:
3572 case NVME_FEAT_ASYNC_EVENT
:
3573 case NVME_FEAT_PROGRESS
:
3578 case NVME_FEAT_INTR_VECT
:
3582 res
= nioc
->n_arg
& 0xffffffffUL
;
3583 if (res
>= nvme
->n_intr_cnt
)
3587 case NVME_FEAT_LBA_RANGE
:
3588 if (nvme
->n_lba_range_supported
== B_FALSE
)
3592 nsid
> nvme
->n_namespace_count
)
3597 case NVME_FEAT_WRITE_CACHE
:
3601 if (!nvme
->n_write_cache_present
)
3606 case NVME_FEAT_AUTO_PST
:
3610 if (!nvme
->n_auto_pst_supported
)
3619 if (nvme_get_features(nvme
, nsid
, feature
, &res
, &buf
, &bufsize
) ==
3623 if (nioc
->n_len
< bufsize
) {
3624 kmem_free(buf
, bufsize
);
3628 if (buf
&& ddi_copyout(buf
, (void*)nioc
->n_buf
, bufsize
, mode
) != 0)
3631 kmem_free(buf
, bufsize
);
3633 nioc
->n_len
= bufsize
;
3639 nvme_ioctl_intr_cnt(nvme_t
*nvme
, int nsid
, nvme_ioctl_t
*nioc
, int mode
,
3642 _NOTE(ARGUNUSED(nsid
, mode
, cred_p
));
3644 if ((mode
& FREAD
) == 0)
3647 nioc
->n_arg
= nvme
->n_intr_cnt
;
3652 nvme_ioctl_version(nvme_t
*nvme
, int nsid
, nvme_ioctl_t
*nioc
, int mode
,
3655 _NOTE(ARGUNUSED(nsid
, cred_p
));
3658 if ((mode
& FREAD
) == 0)
3661 if (nioc
->n_len
< sizeof (nvme
->n_version
))
3664 if (ddi_copyout(&nvme
->n_version
, (void *)nioc
->n_buf
,
3665 sizeof (nvme
->n_version
), mode
) != 0)
3672 nvme_ioctl_format(nvme_t
*nvme
, int nsid
, nvme_ioctl_t
*nioc
, int mode
,
3675 _NOTE(ARGUNUSED(mode
));
3676 nvme_format_nvm_t frmt
= { 0 };
3677 int c_nsid
= nsid
!= 0 ? nsid
- 1 : 0;
3679 if ((mode
& FWRITE
) == 0 || secpolicy_sys_config(cred_p
, B_FALSE
) != 0)
3682 frmt
.r
= nioc
->n_arg
& 0xffffffff;
3685 * Check whether the FORMAT NVM command is supported.
3687 if (nvme
->n_idctl
->id_oacs
.oa_format
== 0)
3691 * Don't allow format or secure erase of individual namespace if that
3692 * would cause a format or secure erase of all namespaces.
3694 if (nsid
!= 0 && nvme
->n_idctl
->id_fna
.fn_format
!= 0)
3697 if (nsid
!= 0 && frmt
.b
.fm_ses
!= NVME_FRMT_SES_NONE
&&
3698 nvme
->n_idctl
->id_fna
.fn_sec_erase
!= 0)
3702 * Don't allow formatting with Protection Information.
3704 if (frmt
.b
.fm_pi
!= 0 || frmt
.b
.fm_pil
!= 0 || frmt
.b
.fm_ms
!= 0)
3708 * Don't allow formatting using an illegal LBA format, or any LBA format
3709 * that uses metadata.
3711 if (frmt
.b
.fm_lbaf
> nvme
->n_ns
[c_nsid
].ns_idns
->id_nlbaf
||
3712 nvme
->n_ns
[c_nsid
].ns_idns
->id_lbaf
[frmt
.b
.fm_lbaf
].lbaf_ms
!= 0)
3716 * Don't allow formatting using an illegal Secure Erase setting.
3718 if (frmt
.b
.fm_ses
> NVME_FRMT_MAX_SES
||
3719 (frmt
.b
.fm_ses
== NVME_FRMT_SES_CRYPTO
&&
3720 nvme
->n_idctl
->id_fna
.fn_crypt_erase
== 0))
3724 nsid
= (uint32_t)-1;
3726 return (nvme_format_nvm(nvme
, nsid
, frmt
.b
.fm_lbaf
, B_FALSE
, 0, B_FALSE
,
3731 nvme_ioctl_detach(nvme_t
*nvme
, int nsid
, nvme_ioctl_t
*nioc
, int mode
,
3734 _NOTE(ARGUNUSED(nioc
, mode
));
3737 if ((mode
& FWRITE
) == 0 || secpolicy_sys_config(cred_p
, B_FALSE
) != 0)
3743 rv
= bd_detach_handle(nvme
->n_ns
[nsid
- 1].ns_bd_hdl
);
3744 if (rv
!= DDI_SUCCESS
)
3751 nvme_ioctl_attach(nvme_t
*nvme
, int nsid
, nvme_ioctl_t
*nioc
, int mode
,
3754 _NOTE(ARGUNUSED(nioc
, mode
));
3755 nvme_identify_nsid_t
*idns
;
3758 if ((mode
& FWRITE
) == 0 || secpolicy_sys_config(cred_p
, B_FALSE
) != 0)
3765 * Identify namespace again, free old identify data.
3767 idns
= nvme
->n_ns
[nsid
- 1].ns_idns
;
3768 if (nvme_init_ns(nvme
, nsid
) != DDI_SUCCESS
)
3771 kmem_free(idns
, sizeof (nvme_identify_nsid_t
));
3773 rv
= bd_attach_handle(nvme
->n_dip
, nvme
->n_ns
[nsid
- 1].ns_bd_hdl
);
3774 if (rv
!= DDI_SUCCESS
)
3781 nvme_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int mode
, cred_t
*cred_p
,
3785 _NOTE(ARGUNUSED(rval_p
));
3787 minor_t minor
= getminor(dev
);
3788 nvme_t
*nvme
= ddi_get_soft_state(nvme_state
, NVME_MINOR_INST(minor
));
3789 int nsid
= NVME_MINOR_NSID(minor
);
3793 int (*nvme_ioctl
[])(nvme_t
*, int, nvme_ioctl_t
*, int, cred_t
*) = {
3795 nvme_ioctl_identify
,
3796 nvme_ioctl_identify
,
3797 nvme_ioctl_capabilities
,
3798 nvme_ioctl_get_logpage
,
3799 nvme_ioctl_get_features
,
3800 nvme_ioctl_intr_cnt
,
3810 if (nsid
> nvme
->n_namespace_count
)
3814 return (ndi_devctl_ioctl(nvme
->n_dip
, cmd
, arg
, mode
, 0));
3816 #ifdef _MULTI_DATAMODEL
3817 switch (ddi_model_convert_from(mode
& FMODELS
)) {
3818 case DDI_MODEL_ILP32
: {
3819 nvme_ioctl32_t nioc32
;
3820 if (ddi_copyin((void*)arg
, &nioc32
, sizeof (nvme_ioctl32_t
),
3823 nioc
.n_len
= nioc32
.n_len
;
3824 nioc
.n_buf
= nioc32
.n_buf
;
3825 nioc
.n_arg
= nioc32
.n_arg
;
3828 case DDI_MODEL_NONE
:
3830 if (ddi_copyin((void*)arg
, &nioc
, sizeof (nvme_ioctl_t
), mode
)
3833 #ifdef _MULTI_DATAMODEL
3838 if (cmd
== NVME_IOC_IDENTIFY_CTRL
) {
3840 * This makes NVME_IOC_IDENTIFY_CTRL work the same on devctl and
3841 * attachment point nodes.
3844 } else if (cmd
== NVME_IOC_IDENTIFY_NSID
&& nsid
== 0) {
3846 * This makes NVME_IOC_IDENTIFY_NSID work on a devctl node, it
3847 * will always return identify data for namespace 1.
3852 if (IS_NVME_IOC(cmd
) && nvme_ioctl
[NVME_IOC_CMD(cmd
)] != NULL
)
3853 rv
= nvme_ioctl
[NVME_IOC_CMD(cmd
)](nvme
, nsid
, &nioc
, mode
,
3858 #ifdef _MULTI_DATAMODEL
3859 switch (ddi_model_convert_from(mode
& FMODELS
)) {
3860 case DDI_MODEL_ILP32
: {
3861 nvme_ioctl32_t nioc32
;
3863 nioc32
.n_len
= (size32_t
)nioc
.n_len
;
3864 nioc32
.n_buf
= (uintptr32_t
)nioc
.n_buf
;
3865 nioc32
.n_arg
= nioc
.n_arg
;
3867 if (ddi_copyout(&nioc32
, (void *)arg
, sizeof (nvme_ioctl32_t
),
3872 case DDI_MODEL_NONE
:
3874 if (ddi_copyout(&nioc
, (void *)arg
, sizeof (nvme_ioctl_t
), mode
)
3877 #ifdef _MULTI_DATAMODEL