4 * Copyright (c) 2012 Intel Corporation
5 * Copyright (c) 2021 Minwoo Im
6 * Copyright (c) 2021 Samsung Electronics Co., Ltd.
9 * Keith Busch <kbusch@kernel.org>
10 * Klaus Jensen <k.jensen@samsung.com>
11 * Gollu Appalanaidu <anaidu.gollu@samsung.com>
12 * Dmitry Fomichev <dmitry.fomichev@wdc.com>
13 * Minwoo Im <minwoo.im.dev@gmail.com>
15 * This code is licensed under the GNU GPL v2 or later.
18 #ifndef HW_NVME_NVME_H
19 #define HW_NVME_NVME_H
21 #include "qemu/uuid.h"
22 #include "hw/pci/pci_device.h"
23 #include "hw/block/block.h"
25 #include "block/nvme.h"
27 #define NVME_MAX_CONTROLLERS 256
28 #define NVME_MAX_NAMESPACES 256
29 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
30 #define NVME_FDP_MAX_EVENTS 63
31 #define NVME_FDP_MAXPIDS 128
34 * The controller only supports Submission and Completion Queue Entry Sizes of
35 * 64 and 16 bytes respectively.
40 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES
> NVME_NSID_BROADCAST
- 1);
42 typedef struct NvmeCtrl NvmeCtrl
;
43 typedef struct NvmeNamespace NvmeNamespace
;
45 #define TYPE_NVME_BUS "nvme-bus"
46 OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus
, NVME_BUS
)
48 typedef struct NvmeBus
{
52 #define TYPE_NVME_SUBSYS "nvme-subsys"
53 #define NVME_SUBSYS(obj) \
54 OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
55 #define SUBSYS_SLOT_RSVD (void *)0xFFFF
57 typedef struct NvmeReclaimUnit
{
61 typedef struct NvmeRuHandle
{
64 uint64_t event_filter
;
68 /* reclaim units indexed by reclaim group */
72 typedef struct NvmeFdpEventBuffer
{
73 NvmeFdpEvent events
[NVME_FDP_MAX_EVENTS
];
79 typedef struct NvmeEnduranceGroup
{
83 NvmeFdpEventBuffer host_events
, ctrl_events
;
100 typedef struct NvmeSubsystem
{
101 DeviceState parent_obj
;
106 NvmeCtrl
*ctrls
[NVME_MAX_CONTROLLERS
];
107 NvmeNamespace
*namespaces
[NVME_MAX_NAMESPACES
+ 1];
108 NvmeEnduranceGroup endgrp
;
122 int nvme_subsys_register_ctrl(NvmeCtrl
*n
, Error
**errp
);
123 void nvme_subsys_unregister_ctrl(NvmeSubsystem
*subsys
, NvmeCtrl
*n
);
125 static inline NvmeCtrl
*nvme_subsys_ctrl(NvmeSubsystem
*subsys
,
128 if (!subsys
|| cntlid
>= NVME_MAX_CONTROLLERS
) {
132 if (subsys
->ctrls
[cntlid
] == SUBSYS_SLOT_RSVD
) {
136 return subsys
->ctrls
[cntlid
];
139 static inline NvmeNamespace
*nvme_subsys_ns(NvmeSubsystem
*subsys
,
142 if (!subsys
|| !nsid
|| nsid
> NVME_MAX_NAMESPACES
) {
146 return subsys
->namespaces
[nsid
];
149 #define TYPE_NVME_NS "nvme-ns"
150 #define NVME_NS(obj) \
151 OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
153 typedef struct NvmeZone
{
156 QTAILQ_ENTRY(NvmeZone
) entry
;
159 #define FDP_EVT_MAX 0xff
160 #define NVME_FDP_MAX_NS_RUHS 32u
163 static const uint8_t nvme_fdp_evf_shifts
[FDP_EVT_MAX
] = {
165 [FDP_EVT_RU_NOT_FULLY_WRITTEN
] = 0,
166 [FDP_EVT_RU_ATL_EXCEEDED
] = 1,
167 [FDP_EVT_CTRL_RESET_RUH
] = 2,
168 [FDP_EVT_INVALID_PID
] = 3,
170 [FDP_EVT_MEDIA_REALLOC
] = 32,
171 [FDP_EVT_RUH_IMPLICIT_RU_CHANGE
] = 33,
174 typedef struct NvmeNamespaceParams
{
193 bool cross_zone_read
;
194 uint64_t zone_size_bs
;
195 uint64_t zone_cap_bs
;
196 uint32_t max_active_zones
;
197 uint32_t max_open_zones
;
198 uint32_t zd_extension_size
;
207 } NvmeNamespaceParams
;
209 typedef struct NvmeNamespace
{
210 DeviceState parent_obj
;
216 NvmeIdNsNvm id_ns_nvm
;
220 const uint32_t *iocs
;
232 QTAILQ_ENTRY(NvmeNamespace
) entry
;
234 NvmeIdNsZoned
*id_ns_zoned
;
235 NvmeZone
*zone_array
;
236 QTAILQ_HEAD(, NvmeZone
) exp_open_zones
;
237 QTAILQ_HEAD(, NvmeZone
) imp_open_zones
;
238 QTAILQ_HEAD(, NvmeZone
) closed_zones
;
239 QTAILQ_HEAD(, NvmeZone
) full_zones
;
242 uint64_t zone_capacity
;
243 uint32_t zone_size_log2
;
244 uint8_t *zd_extensions
;
245 int32_t nr_open_zones
;
246 int32_t nr_active_zones
;
248 NvmeNamespaceParams params
;
249 NvmeSubsystem
*subsys
;
250 NvmeEnduranceGroup
*endgrp
;
258 /* reclaim unit handle identifiers indexed by placement handle */
263 static inline uint32_t nvme_nsid(NvmeNamespace
*ns
)
266 return ns
->params
.nsid
;
272 static inline size_t nvme_l2b(NvmeNamespace
*ns
, uint64_t lba
)
274 return lba
<< ns
->lbaf
.ds
;
277 static inline size_t nvme_m2b(NvmeNamespace
*ns
, uint64_t lba
)
279 return ns
->lbaf
.ms
* lba
;
282 static inline int64_t nvme_moff(NvmeNamespace
*ns
, uint64_t lba
)
284 return ns
->moff
+ nvme_m2b(ns
, lba
);
287 static inline bool nvme_ns_ext(NvmeNamespace
*ns
)
289 return !!NVME_ID_NS_FLBAS_EXTENDED(ns
->id_ns
.flbas
);
292 static inline NvmeZoneState
nvme_get_zone_state(NvmeZone
*zone
)
294 return zone
->d
.zs
>> 4;
297 static inline void nvme_set_zone_state(NvmeZone
*zone
, NvmeZoneState state
)
299 zone
->d
.zs
= state
<< 4;
302 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace
*ns
, NvmeZone
*zone
)
304 return zone
->d
.zslba
+ ns
->zone_size
;
307 static inline uint64_t nvme_zone_wr_boundary(NvmeZone
*zone
)
309 return zone
->d
.zslba
+ zone
->d
.zcap
;
312 static inline bool nvme_wp_is_valid(NvmeZone
*zone
)
314 uint8_t st
= nvme_get_zone_state(zone
);
316 return st
!= NVME_ZONE_STATE_FULL
&&
317 st
!= NVME_ZONE_STATE_READ_ONLY
&&
318 st
!= NVME_ZONE_STATE_OFFLINE
;
321 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace
*ns
,
324 return &ns
->zd_extensions
[zone_idx
* ns
->params
.zd_extension_size
];
327 static inline void nvme_aor_inc_open(NvmeNamespace
*ns
)
329 assert(ns
->nr_open_zones
>= 0);
330 if (ns
->params
.max_open_zones
) {
332 assert(ns
->nr_open_zones
<= ns
->params
.max_open_zones
);
336 static inline void nvme_aor_dec_open(NvmeNamespace
*ns
)
338 if (ns
->params
.max_open_zones
) {
339 assert(ns
->nr_open_zones
> 0);
342 assert(ns
->nr_open_zones
>= 0);
345 static inline void nvme_aor_inc_active(NvmeNamespace
*ns
)
347 assert(ns
->nr_active_zones
>= 0);
348 if (ns
->params
.max_active_zones
) {
349 ns
->nr_active_zones
++;
350 assert(ns
->nr_active_zones
<= ns
->params
.max_active_zones
);
354 static inline void nvme_aor_dec_active(NvmeNamespace
*ns
)
356 if (ns
->params
.max_active_zones
) {
357 assert(ns
->nr_active_zones
> 0);
358 ns
->nr_active_zones
--;
359 assert(ns
->nr_active_zones
>= ns
->nr_open_zones
);
361 assert(ns
->nr_active_zones
>= 0);
364 static inline void nvme_fdp_stat_inc(uint64_t *a
, uint64_t b
)
366 uint64_t ret
= *a
+ b
;
367 *a
= ret
< *a
? UINT64_MAX
: ret
;
370 void nvme_ns_init_format(NvmeNamespace
*ns
);
371 int nvme_ns_setup(NvmeNamespace
*ns
, Error
**errp
);
372 void nvme_ns_drain(NvmeNamespace
*ns
);
373 void nvme_ns_shutdown(NvmeNamespace
*ns
);
374 void nvme_ns_cleanup(NvmeNamespace
*ns
);
376 typedef struct NvmeAsyncEvent
{
377 QTAILQ_ENTRY(NvmeAsyncEvent
) entry
;
378 NvmeAerResult result
;
382 NVME_SG_ALLOC
= 1 << 0,
383 NVME_SG_DMA
= 1 << 1,
386 typedef struct NvmeSg
{
395 typedef enum NvmeTxDirection
{
396 NVME_TX_DIRECTION_TO_DEVICE
= 0,
397 NVME_TX_DIRECTION_FROM_DEVICE
= 1,
400 typedef struct NvmeRequest
{
401 struct NvmeSQueue
*sq
;
402 struct NvmeNamespace
*ns
;
408 BlockAcctCookie acct
;
410 QTAILQ_ENTRY(NvmeRequest
)entry
;
413 typedef struct NvmeBounceContext
{
422 static inline const char *nvme_adm_opc_str(uint8_t opc
)
425 case NVME_ADM_CMD_DELETE_SQ
: return "NVME_ADM_CMD_DELETE_SQ";
426 case NVME_ADM_CMD_CREATE_SQ
: return "NVME_ADM_CMD_CREATE_SQ";
427 case NVME_ADM_CMD_GET_LOG_PAGE
: return "NVME_ADM_CMD_GET_LOG_PAGE";
428 case NVME_ADM_CMD_DELETE_CQ
: return "NVME_ADM_CMD_DELETE_CQ";
429 case NVME_ADM_CMD_CREATE_CQ
: return "NVME_ADM_CMD_CREATE_CQ";
430 case NVME_ADM_CMD_IDENTIFY
: return "NVME_ADM_CMD_IDENTIFY";
431 case NVME_ADM_CMD_ABORT
: return "NVME_ADM_CMD_ABORT";
432 case NVME_ADM_CMD_SET_FEATURES
: return "NVME_ADM_CMD_SET_FEATURES";
433 case NVME_ADM_CMD_GET_FEATURES
: return "NVME_ADM_CMD_GET_FEATURES";
434 case NVME_ADM_CMD_ASYNC_EV_REQ
: return "NVME_ADM_CMD_ASYNC_EV_REQ";
435 case NVME_ADM_CMD_NS_ATTACHMENT
: return "NVME_ADM_CMD_NS_ATTACHMENT";
436 case NVME_ADM_CMD_DIRECTIVE_SEND
: return "NVME_ADM_CMD_DIRECTIVE_SEND";
437 case NVME_ADM_CMD_VIRT_MNGMT
: return "NVME_ADM_CMD_VIRT_MNGMT";
438 case NVME_ADM_CMD_DIRECTIVE_RECV
: return "NVME_ADM_CMD_DIRECTIVE_RECV";
439 case NVME_ADM_CMD_DBBUF_CONFIG
: return "NVME_ADM_CMD_DBBUF_CONFIG";
440 case NVME_ADM_CMD_FORMAT_NVM
: return "NVME_ADM_CMD_FORMAT_NVM";
441 default: return "NVME_ADM_CMD_UNKNOWN";
445 static inline const char *nvme_io_opc_str(uint8_t opc
)
448 case NVME_CMD_FLUSH
: return "NVME_NVM_CMD_FLUSH";
449 case NVME_CMD_WRITE
: return "NVME_NVM_CMD_WRITE";
450 case NVME_CMD_READ
: return "NVME_NVM_CMD_READ";
451 case NVME_CMD_COMPARE
: return "NVME_NVM_CMD_COMPARE";
452 case NVME_CMD_WRITE_ZEROES
: return "NVME_NVM_CMD_WRITE_ZEROES";
453 case NVME_CMD_DSM
: return "NVME_NVM_CMD_DSM";
454 case NVME_CMD_VERIFY
: return "NVME_NVM_CMD_VERIFY";
455 case NVME_CMD_COPY
: return "NVME_NVM_CMD_COPY";
456 case NVME_CMD_ZONE_MGMT_SEND
: return "NVME_ZONED_CMD_MGMT_SEND";
457 case NVME_CMD_ZONE_MGMT_RECV
: return "NVME_ZONED_CMD_MGMT_RECV";
458 case NVME_CMD_ZONE_APPEND
: return "NVME_ZONED_CMD_ZONE_APPEND";
459 default: return "NVME_NVM_CMD_UNKNOWN";
463 typedef struct NvmeSQueue
{
464 struct NvmeCtrl
*ctrl
;
474 EventNotifier notifier
;
475 bool ioeventfd_enabled
;
477 QTAILQ_HEAD(, NvmeRequest
) req_list
;
478 QTAILQ_HEAD(, NvmeRequest
) out_req_list
;
479 QTAILQ_ENTRY(NvmeSQueue
) entry
;
482 typedef struct NvmeCQueue
{
483 struct NvmeCtrl
*ctrl
;
486 uint16_t irq_enabled
;
495 EventNotifier notifier
;
496 bool ioeventfd_enabled
;
497 QTAILQ_HEAD(, NvmeSQueue
) sq_list
;
498 QTAILQ_HEAD(, NvmeRequest
) req_list
;
501 #define TYPE_NVME "nvme"
503 OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
505 typedef struct NvmeParams
{
507 uint32_t num_queues
; /* deprecated since 5.1 */
508 uint32_t max_ioqpairs
;
510 uint32_t cmb_size_mb
;
512 uint32_t aer_max_queued
;
517 bool auto_transition_zones
;
520 uint8_t sriov_max_vfs
;
521 uint16_t sriov_vq_flexible
;
522 uint16_t sriov_vi_flexible
;
523 uint8_t sriov_max_vq_per_vf
;
524 uint8_t sriov_max_vi_per_vf
;
527 typedef struct NvmeCtrl
{
528 PCIDevice parent_obj
;
539 uint16_t max_prp_ents
;
541 uint8_t outstanding_aers
;
544 uint64_t host_timestamp
; /* Timestamp sent by the host */
545 uint64_t timestamp_set_qemu_clock_ms
; /* QEMU clock time */
546 uint64_t starttime_ms
;
547 uint16_t temperature
;
548 uint8_t smart_critical_warning
;
549 uint32_t conf_msix_qsize
;
550 uint32_t conf_ioqpairs
;
563 HostMemoryBackend
*dev
;
569 NvmeRequest
**aer_reqs
;
570 QTAILQ_HEAD(, NvmeAsyncEvent
) aer_queue
;
575 /* Namespace ID is started with 1 so bitmap should be 1-based */
576 #define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1)
577 DECLARE_BITMAP(changed_nsids
, NVME_CHANGED_NSID_SIZE
);
579 NvmeSubsystem
*subsys
;
581 NvmeNamespace
namespace;
582 NvmeNamespace
*namespaces
[NVME_MAX_NAMESPACES
+ 1];
591 uint16_t temp_thresh_hi
;
592 uint16_t temp_thresh_low
;
595 uint32_t async_config
;
596 NvmeHostBehaviorSupport hbs
;
599 NvmePriCtrlCap pri_ctrl_cap
;
600 NvmeSecCtrlList sec_ctrl_list
;
604 } next_pri_ctrl_cap
; /* These override pri_ctrl_cap after reset */
607 typedef enum NvmeResetType
{
608 NVME_RESET_FUNCTION
= 0,
609 NVME_RESET_CONTROLLER
= 1,
612 static inline NvmeNamespace
*nvme_ns(NvmeCtrl
*n
, uint32_t nsid
)
614 if (!nsid
|| nsid
> NVME_MAX_NAMESPACES
) {
618 return n
->namespaces
[nsid
];
621 static inline NvmeCQueue
*nvme_cq(NvmeRequest
*req
)
623 NvmeSQueue
*sq
= req
->sq
;
624 NvmeCtrl
*n
= sq
->ctrl
;
626 return n
->cq
[sq
->cqid
];
629 static inline NvmeCtrl
*nvme_ctrl(NvmeRequest
*req
)
631 NvmeSQueue
*sq
= req
->sq
;
635 static inline uint16_t nvme_cid(NvmeRequest
*req
)
641 return le16_to_cpu(req
->cqe
.cid
);
644 static inline NvmeSecCtrlEntry
*nvme_sctrl(NvmeCtrl
*n
)
646 PCIDevice
*pci_dev
= &n
->parent_obj
;
647 NvmeCtrl
*pf
= NVME(pcie_sriov_get_pf(pci_dev
));
649 if (pci_is_vf(pci_dev
)) {
650 return &pf
->sec_ctrl_list
.sec
[pcie_sriov_vf_number(pci_dev
)];
656 static inline NvmeSecCtrlEntry
*nvme_sctrl_for_cntlid(NvmeCtrl
*n
,
659 NvmeSecCtrlList
*list
= &n
->sec_ctrl_list
;
662 for (i
= 0; i
< list
->numcntl
; i
++) {
663 if (le16_to_cpu(list
->sec
[i
].scid
) == cntlid
) {
664 return &list
->sec
[i
];
671 void nvme_attach_ns(NvmeCtrl
*n
, NvmeNamespace
*ns
);
672 uint16_t nvme_bounce_data(NvmeCtrl
*n
, void *ptr
, uint32_t len
,
673 NvmeTxDirection dir
, NvmeRequest
*req
);
674 uint16_t nvme_bounce_mdata(NvmeCtrl
*n
, void *ptr
, uint32_t len
,
675 NvmeTxDirection dir
, NvmeRequest
*req
);
676 void nvme_rw_complete_cb(void *opaque
, int ret
);
677 uint16_t nvme_map_dptr(NvmeCtrl
*n
, NvmeSg
*sg
, size_t len
,
680 #endif /* HW_NVME_NVME_H */