4 * Copyright (c) 2012 Intel Corporation
5 * Copyright (c) 2021 Minwoo Im
6 * Copyright (c) 2021 Samsung Electronics Co., Ltd.
9 * Keith Busch <kbusch@kernel.org>
10 * Klaus Jensen <k.jensen@samsung.com>
11 * Gollu Appalanaidu <anaidu.gollu@samsung.com>
12 * Dmitry Fomichev <dmitry.fomichev@wdc.com>
13 * Minwoo Im <minwoo.im.dev@gmail.com>
15 * This code is licensed under the GNU GPL v2 or later.
18 #ifndef HW_NVME_NVME_H
19 #define HW_NVME_NVME_H
21 #include "qemu/uuid.h"
22 #include "hw/pci/pci_device.h"
23 #include "hw/block/block.h"
25 #include "block/nvme.h"
27 #define NVME_MAX_CONTROLLERS 256
28 #define NVME_MAX_NAMESPACES 256
29 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
30 #define NVME_FDP_MAX_EVENTS 63
31 #define NVME_FDP_MAXPIDS 128
33 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES
> NVME_NSID_BROADCAST
- 1);
35 typedef struct NvmeCtrl NvmeCtrl
;
36 typedef struct NvmeNamespace NvmeNamespace
;
38 #define TYPE_NVME_BUS "nvme-bus"
39 OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus
, NVME_BUS
)
41 typedef struct NvmeBus
{
45 #define TYPE_NVME_SUBSYS "nvme-subsys"
46 #define NVME_SUBSYS(obj) \
47 OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
48 #define SUBSYS_SLOT_RSVD (void *)0xFFFF
50 typedef struct NvmeReclaimUnit
{
54 typedef struct NvmeRuHandle
{
57 uint64_t event_filter
;
61 /* reclaim units indexed by reclaim group */
65 typedef struct NvmeFdpEventBuffer
{
66 NvmeFdpEvent events
[NVME_FDP_MAX_EVENTS
];
72 typedef struct NvmeEnduranceGroup
{
76 NvmeFdpEventBuffer host_events
, ctrl_events
;
93 typedef struct NvmeSubsystem
{
94 DeviceState parent_obj
;
99 NvmeCtrl
*ctrls
[NVME_MAX_CONTROLLERS
];
100 NvmeNamespace
*namespaces
[NVME_MAX_NAMESPACES
+ 1];
101 NvmeEnduranceGroup endgrp
;
115 int nvme_subsys_register_ctrl(NvmeCtrl
*n
, Error
**errp
);
116 void nvme_subsys_unregister_ctrl(NvmeSubsystem
*subsys
, NvmeCtrl
*n
);
118 static inline NvmeCtrl
*nvme_subsys_ctrl(NvmeSubsystem
*subsys
,
121 if (!subsys
|| cntlid
>= NVME_MAX_CONTROLLERS
) {
125 if (subsys
->ctrls
[cntlid
] == SUBSYS_SLOT_RSVD
) {
129 return subsys
->ctrls
[cntlid
];
132 static inline NvmeNamespace
*nvme_subsys_ns(NvmeSubsystem
*subsys
,
135 if (!subsys
|| !nsid
|| nsid
> NVME_MAX_NAMESPACES
) {
139 return subsys
->namespaces
[nsid
];
142 #define TYPE_NVME_NS "nvme-ns"
143 #define NVME_NS(obj) \
144 OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
146 typedef struct NvmeZone
{
149 QTAILQ_ENTRY(NvmeZone
) entry
;
152 #define FDP_EVT_MAX 0xff
153 #define NVME_FDP_MAX_NS_RUHS 32u
156 static const uint8_t nvme_fdp_evf_shifts
[FDP_EVT_MAX
] = {
158 [FDP_EVT_RU_NOT_FULLY_WRITTEN
] = 0,
159 [FDP_EVT_RU_ATL_EXCEEDED
] = 1,
160 [FDP_EVT_CTRL_RESET_RUH
] = 2,
161 [FDP_EVT_INVALID_PID
] = 3,
163 [FDP_EVT_MEDIA_REALLOC
] = 32,
164 [FDP_EVT_RUH_IMPLICIT_RU_CHANGE
] = 33,
167 typedef struct NvmeNamespaceParams
{
186 bool cross_zone_read
;
187 uint64_t zone_size_bs
;
188 uint64_t zone_cap_bs
;
189 uint32_t max_active_zones
;
190 uint32_t max_open_zones
;
191 uint32_t zd_extension_size
;
200 } NvmeNamespaceParams
;
202 typedef struct NvmeNamespace
{
203 DeviceState parent_obj
;
209 NvmeIdNsNvm id_ns_nvm
;
213 const uint32_t *iocs
;
225 QTAILQ_ENTRY(NvmeNamespace
) entry
;
227 NvmeIdNsZoned
*id_ns_zoned
;
228 NvmeZone
*zone_array
;
229 QTAILQ_HEAD(, NvmeZone
) exp_open_zones
;
230 QTAILQ_HEAD(, NvmeZone
) imp_open_zones
;
231 QTAILQ_HEAD(, NvmeZone
) closed_zones
;
232 QTAILQ_HEAD(, NvmeZone
) full_zones
;
235 uint64_t zone_capacity
;
236 uint32_t zone_size_log2
;
237 uint8_t *zd_extensions
;
238 int32_t nr_open_zones
;
239 int32_t nr_active_zones
;
241 NvmeNamespaceParams params
;
242 NvmeSubsystem
*subsys
;
243 NvmeEnduranceGroup
*endgrp
;
251 /* reclaim unit handle identifiers indexed by placement handle */
256 static inline uint32_t nvme_nsid(NvmeNamespace
*ns
)
259 return ns
->params
.nsid
;
265 static inline size_t nvme_l2b(NvmeNamespace
*ns
, uint64_t lba
)
267 return lba
<< ns
->lbaf
.ds
;
270 static inline size_t nvme_m2b(NvmeNamespace
*ns
, uint64_t lba
)
272 return ns
->lbaf
.ms
* lba
;
275 static inline int64_t nvme_moff(NvmeNamespace
*ns
, uint64_t lba
)
277 return ns
->moff
+ nvme_m2b(ns
, lba
);
280 static inline bool nvme_ns_ext(NvmeNamespace
*ns
)
282 return !!NVME_ID_NS_FLBAS_EXTENDED(ns
->id_ns
.flbas
);
285 static inline NvmeZoneState
nvme_get_zone_state(NvmeZone
*zone
)
287 return zone
->d
.zs
>> 4;
290 static inline void nvme_set_zone_state(NvmeZone
*zone
, NvmeZoneState state
)
292 zone
->d
.zs
= state
<< 4;
295 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace
*ns
, NvmeZone
*zone
)
297 return zone
->d
.zslba
+ ns
->zone_size
;
300 static inline uint64_t nvme_zone_wr_boundary(NvmeZone
*zone
)
302 return zone
->d
.zslba
+ zone
->d
.zcap
;
305 static inline bool nvme_wp_is_valid(NvmeZone
*zone
)
307 uint8_t st
= nvme_get_zone_state(zone
);
309 return st
!= NVME_ZONE_STATE_FULL
&&
310 st
!= NVME_ZONE_STATE_READ_ONLY
&&
311 st
!= NVME_ZONE_STATE_OFFLINE
;
314 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace
*ns
,
317 return &ns
->zd_extensions
[zone_idx
* ns
->params
.zd_extension_size
];
320 static inline void nvme_aor_inc_open(NvmeNamespace
*ns
)
322 assert(ns
->nr_open_zones
>= 0);
323 if (ns
->params
.max_open_zones
) {
325 assert(ns
->nr_open_zones
<= ns
->params
.max_open_zones
);
329 static inline void nvme_aor_dec_open(NvmeNamespace
*ns
)
331 if (ns
->params
.max_open_zones
) {
332 assert(ns
->nr_open_zones
> 0);
335 assert(ns
->nr_open_zones
>= 0);
338 static inline void nvme_aor_inc_active(NvmeNamespace
*ns
)
340 assert(ns
->nr_active_zones
>= 0);
341 if (ns
->params
.max_active_zones
) {
342 ns
->nr_active_zones
++;
343 assert(ns
->nr_active_zones
<= ns
->params
.max_active_zones
);
347 static inline void nvme_aor_dec_active(NvmeNamespace
*ns
)
349 if (ns
->params
.max_active_zones
) {
350 assert(ns
->nr_active_zones
> 0);
351 ns
->nr_active_zones
--;
352 assert(ns
->nr_active_zones
>= ns
->nr_open_zones
);
354 assert(ns
->nr_active_zones
>= 0);
357 static inline void nvme_fdp_stat_inc(uint64_t *a
, uint64_t b
)
359 uint64_t ret
= *a
+ b
;
360 *a
= ret
< *a
? UINT64_MAX
: ret
;
363 void nvme_ns_init_format(NvmeNamespace
*ns
);
364 int nvme_ns_setup(NvmeNamespace
*ns
, Error
**errp
);
365 void nvme_ns_drain(NvmeNamespace
*ns
);
366 void nvme_ns_shutdown(NvmeNamespace
*ns
);
367 void nvme_ns_cleanup(NvmeNamespace
*ns
);
369 typedef struct NvmeAsyncEvent
{
370 QTAILQ_ENTRY(NvmeAsyncEvent
) entry
;
371 NvmeAerResult result
;
375 NVME_SG_ALLOC
= 1 << 0,
376 NVME_SG_DMA
= 1 << 1,
379 typedef struct NvmeSg
{
388 typedef enum NvmeTxDirection
{
389 NVME_TX_DIRECTION_TO_DEVICE
= 0,
390 NVME_TX_DIRECTION_FROM_DEVICE
= 1,
393 typedef struct NvmeRequest
{
394 struct NvmeSQueue
*sq
;
395 struct NvmeNamespace
*ns
;
401 BlockAcctCookie acct
;
403 QTAILQ_ENTRY(NvmeRequest
)entry
;
406 typedef struct NvmeBounceContext
{
415 static inline const char *nvme_adm_opc_str(uint8_t opc
)
418 case NVME_ADM_CMD_DELETE_SQ
: return "NVME_ADM_CMD_DELETE_SQ";
419 case NVME_ADM_CMD_CREATE_SQ
: return "NVME_ADM_CMD_CREATE_SQ";
420 case NVME_ADM_CMD_GET_LOG_PAGE
: return "NVME_ADM_CMD_GET_LOG_PAGE";
421 case NVME_ADM_CMD_DELETE_CQ
: return "NVME_ADM_CMD_DELETE_CQ";
422 case NVME_ADM_CMD_CREATE_CQ
: return "NVME_ADM_CMD_CREATE_CQ";
423 case NVME_ADM_CMD_IDENTIFY
: return "NVME_ADM_CMD_IDENTIFY";
424 case NVME_ADM_CMD_ABORT
: return "NVME_ADM_CMD_ABORT";
425 case NVME_ADM_CMD_SET_FEATURES
: return "NVME_ADM_CMD_SET_FEATURES";
426 case NVME_ADM_CMD_GET_FEATURES
: return "NVME_ADM_CMD_GET_FEATURES";
427 case NVME_ADM_CMD_ASYNC_EV_REQ
: return "NVME_ADM_CMD_ASYNC_EV_REQ";
428 case NVME_ADM_CMD_NS_ATTACHMENT
: return "NVME_ADM_CMD_NS_ATTACHMENT";
429 case NVME_ADM_CMD_DIRECTIVE_SEND
: return "NVME_ADM_CMD_DIRECTIVE_SEND";
430 case NVME_ADM_CMD_VIRT_MNGMT
: return "NVME_ADM_CMD_VIRT_MNGMT";
431 case NVME_ADM_CMD_DIRECTIVE_RECV
: return "NVME_ADM_CMD_DIRECTIVE_RECV";
432 case NVME_ADM_CMD_DBBUF_CONFIG
: return "NVME_ADM_CMD_DBBUF_CONFIG";
433 case NVME_ADM_CMD_FORMAT_NVM
: return "NVME_ADM_CMD_FORMAT_NVM";
434 default: return "NVME_ADM_CMD_UNKNOWN";
438 static inline const char *nvme_io_opc_str(uint8_t opc
)
441 case NVME_CMD_FLUSH
: return "NVME_NVM_CMD_FLUSH";
442 case NVME_CMD_WRITE
: return "NVME_NVM_CMD_WRITE";
443 case NVME_CMD_READ
: return "NVME_NVM_CMD_READ";
444 case NVME_CMD_COMPARE
: return "NVME_NVM_CMD_COMPARE";
445 case NVME_CMD_WRITE_ZEROES
: return "NVME_NVM_CMD_WRITE_ZEROES";
446 case NVME_CMD_DSM
: return "NVME_NVM_CMD_DSM";
447 case NVME_CMD_VERIFY
: return "NVME_NVM_CMD_VERIFY";
448 case NVME_CMD_COPY
: return "NVME_NVM_CMD_COPY";
449 case NVME_CMD_ZONE_MGMT_SEND
: return "NVME_ZONED_CMD_MGMT_SEND";
450 case NVME_CMD_ZONE_MGMT_RECV
: return "NVME_ZONED_CMD_MGMT_RECV";
451 case NVME_CMD_ZONE_APPEND
: return "NVME_ZONED_CMD_ZONE_APPEND";
452 default: return "NVME_NVM_CMD_UNKNOWN";
456 typedef struct NvmeSQueue
{
457 struct NvmeCtrl
*ctrl
;
467 EventNotifier notifier
;
468 bool ioeventfd_enabled
;
470 QTAILQ_HEAD(, NvmeRequest
) req_list
;
471 QTAILQ_HEAD(, NvmeRequest
) out_req_list
;
472 QTAILQ_ENTRY(NvmeSQueue
) entry
;
475 typedef struct NvmeCQueue
{
476 struct NvmeCtrl
*ctrl
;
479 uint16_t irq_enabled
;
488 EventNotifier notifier
;
489 bool ioeventfd_enabled
;
490 QTAILQ_HEAD(, NvmeSQueue
) sq_list
;
491 QTAILQ_HEAD(, NvmeRequest
) req_list
;
494 #define TYPE_NVME "nvme"
496 OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
498 typedef struct NvmeParams
{
500 uint32_t num_queues
; /* deprecated since 5.1 */
501 uint32_t max_ioqpairs
;
503 uint32_t cmb_size_mb
;
505 uint32_t aer_max_queued
;
510 bool auto_transition_zones
;
513 uint8_t sriov_max_vfs
;
514 uint16_t sriov_vq_flexible
;
515 uint16_t sriov_vi_flexible
;
516 uint8_t sriov_max_vq_per_vf
;
517 uint8_t sriov_max_vi_per_vf
;
520 typedef struct NvmeCtrl
{
521 PCIDevice parent_obj
;
532 uint16_t max_prp_ents
;
536 uint8_t outstanding_aers
;
539 uint64_t host_timestamp
; /* Timestamp sent by the host */
540 uint64_t timestamp_set_qemu_clock_ms
; /* QEMU clock time */
541 uint64_t starttime_ms
;
542 uint16_t temperature
;
543 uint8_t smart_critical_warning
;
544 uint32_t conf_msix_qsize
;
545 uint32_t conf_ioqpairs
;
558 HostMemoryBackend
*dev
;
564 NvmeRequest
**aer_reqs
;
565 QTAILQ_HEAD(, NvmeAsyncEvent
) aer_queue
;
570 /* Namespace ID is started with 1 so bitmap should be 1-based */
571 #define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1)
572 DECLARE_BITMAP(changed_nsids
, NVME_CHANGED_NSID_SIZE
);
574 NvmeSubsystem
*subsys
;
576 NvmeNamespace
namespace;
577 NvmeNamespace
*namespaces
[NVME_MAX_NAMESPACES
+ 1];
586 uint16_t temp_thresh_hi
;
587 uint16_t temp_thresh_low
;
590 uint32_t async_config
;
591 NvmeHostBehaviorSupport hbs
;
594 NvmePriCtrlCap pri_ctrl_cap
;
595 NvmeSecCtrlList sec_ctrl_list
;
599 } next_pri_ctrl_cap
; /* These override pri_ctrl_cap after reset */
602 typedef enum NvmeResetType
{
603 NVME_RESET_FUNCTION
= 0,
604 NVME_RESET_CONTROLLER
= 1,
607 static inline NvmeNamespace
*nvme_ns(NvmeCtrl
*n
, uint32_t nsid
)
609 if (!nsid
|| nsid
> NVME_MAX_NAMESPACES
) {
613 return n
->namespaces
[nsid
];
616 static inline NvmeCQueue
*nvme_cq(NvmeRequest
*req
)
618 NvmeSQueue
*sq
= req
->sq
;
619 NvmeCtrl
*n
= sq
->ctrl
;
621 return n
->cq
[sq
->cqid
];
624 static inline NvmeCtrl
*nvme_ctrl(NvmeRequest
*req
)
626 NvmeSQueue
*sq
= req
->sq
;
630 static inline uint16_t nvme_cid(NvmeRequest
*req
)
636 return le16_to_cpu(req
->cqe
.cid
);
639 static inline NvmeSecCtrlEntry
*nvme_sctrl(NvmeCtrl
*n
)
641 PCIDevice
*pci_dev
= &n
->parent_obj
;
642 NvmeCtrl
*pf
= NVME(pcie_sriov_get_pf(pci_dev
));
644 if (pci_is_vf(pci_dev
)) {
645 return &pf
->sec_ctrl_list
.sec
[pcie_sriov_vf_number(pci_dev
)];
651 static inline NvmeSecCtrlEntry
*nvme_sctrl_for_cntlid(NvmeCtrl
*n
,
654 NvmeSecCtrlList
*list
= &n
->sec_ctrl_list
;
657 for (i
= 0; i
< list
->numcntl
; i
++) {
658 if (le16_to_cpu(list
->sec
[i
].scid
) == cntlid
) {
659 return &list
->sec
[i
];
666 void nvme_attach_ns(NvmeCtrl
*n
, NvmeNamespace
*ns
);
667 uint16_t nvme_bounce_data(NvmeCtrl
*n
, void *ptr
, uint32_t len
,
668 NvmeTxDirection dir
, NvmeRequest
*req
);
669 uint16_t nvme_bounce_mdata(NvmeCtrl
*n
, void *ptr
, uint32_t len
,
670 NvmeTxDirection dir
, NvmeRequest
*req
);
671 void nvme_rw_complete_cb(void *opaque
, int ret
);
672 uint16_t nvme_map_dptr(NvmeCtrl
*n
, NvmeSg
*sg
, size_t len
,
675 #endif /* HW_NVME_NVME_H */