4 * Copyright (c) 2012 Intel Corporation
5 * Copyright (c) 2021 Minwoo Im
6 * Copyright (c) 2021 Samsung Electronics Co., Ltd.
9 * Keith Busch <kbusch@kernel.org>
10 * Klaus Jensen <k.jensen@samsung.com>
11 * Gollu Appalanaidu <anaidu.gollu@samsung.com>
12 * Dmitry Fomichev <dmitry.fomichev@wdc.com>
13 * Minwoo Im <minwoo.im.dev@gmail.com>
15 * This code is licensed under the GNU GPL v2 or later.
18 #ifndef HW_NVME_NVME_H
19 #define HW_NVME_NVME_H
21 #include "qemu/uuid.h"
22 #include "hw/pci/pci_device.h"
23 #include "hw/block/block.h"
25 #include "block/nvme.h"
27 #define NVME_MAX_CONTROLLERS 256
28 #define NVME_MAX_NAMESPACES 256
29 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
31 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES
> NVME_NSID_BROADCAST
- 1);
33 typedef struct NvmeCtrl NvmeCtrl
;
34 typedef struct NvmeNamespace NvmeNamespace
;
36 #define TYPE_NVME_BUS "nvme-bus"
37 OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus
, NVME_BUS
)
39 typedef struct NvmeBus
{
43 #define TYPE_NVME_SUBSYS "nvme-subsys"
44 #define NVME_SUBSYS(obj) \
45 OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
46 #define SUBSYS_SLOT_RSVD (void *)0xFFFF
48 typedef struct NvmeSubsystem
{
49 DeviceState parent_obj
;
54 NvmeCtrl
*ctrls
[NVME_MAX_CONTROLLERS
];
55 NvmeNamespace
*namespaces
[NVME_MAX_NAMESPACES
+ 1];
62 int nvme_subsys_register_ctrl(NvmeCtrl
*n
, Error
**errp
);
63 void nvme_subsys_unregister_ctrl(NvmeSubsystem
*subsys
, NvmeCtrl
*n
);
65 static inline NvmeCtrl
*nvme_subsys_ctrl(NvmeSubsystem
*subsys
,
68 if (!subsys
|| cntlid
>= NVME_MAX_CONTROLLERS
) {
72 if (subsys
->ctrls
[cntlid
] == SUBSYS_SLOT_RSVD
) {
76 return subsys
->ctrls
[cntlid
];
79 static inline NvmeNamespace
*nvme_subsys_ns(NvmeSubsystem
*subsys
,
82 if (!subsys
|| !nsid
|| nsid
> NVME_MAX_NAMESPACES
) {
86 return subsys
->namespaces
[nsid
];
89 #define TYPE_NVME_NS "nvme-ns"
90 #define NVME_NS(obj) \
91 OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
93 typedef struct NvmeZone
{
96 QTAILQ_ENTRY(NvmeZone
) entry
;
99 typedef struct NvmeNamespaceParams
{
118 bool cross_zone_read
;
119 uint64_t zone_size_bs
;
120 uint64_t zone_cap_bs
;
121 uint32_t max_active_zones
;
122 uint32_t max_open_zones
;
123 uint32_t zd_extension_size
;
128 } NvmeNamespaceParams
;
130 typedef struct NvmeNamespace
{
131 DeviceState parent_obj
;
137 NvmeIdNsNvm id_ns_nvm
;
141 const uint32_t *iocs
;
153 QTAILQ_ENTRY(NvmeNamespace
) entry
;
155 NvmeIdNsZoned
*id_ns_zoned
;
156 NvmeZone
*zone_array
;
157 QTAILQ_HEAD(, NvmeZone
) exp_open_zones
;
158 QTAILQ_HEAD(, NvmeZone
) imp_open_zones
;
159 QTAILQ_HEAD(, NvmeZone
) closed_zones
;
160 QTAILQ_HEAD(, NvmeZone
) full_zones
;
163 uint64_t zone_capacity
;
164 uint32_t zone_size_log2
;
165 uint8_t *zd_extensions
;
166 int32_t nr_open_zones
;
167 int32_t nr_active_zones
;
169 NvmeNamespaceParams params
;
176 static inline uint32_t nvme_nsid(NvmeNamespace
*ns
)
179 return ns
->params
.nsid
;
185 static inline size_t nvme_l2b(NvmeNamespace
*ns
, uint64_t lba
)
187 return lba
<< ns
->lbaf
.ds
;
190 static inline size_t nvme_m2b(NvmeNamespace
*ns
, uint64_t lba
)
192 return ns
->lbaf
.ms
* lba
;
195 static inline int64_t nvme_moff(NvmeNamespace
*ns
, uint64_t lba
)
197 return ns
->moff
+ nvme_m2b(ns
, lba
);
200 static inline bool nvme_ns_ext(NvmeNamespace
*ns
)
202 return !!NVME_ID_NS_FLBAS_EXTENDED(ns
->id_ns
.flbas
);
205 static inline NvmeZoneState
nvme_get_zone_state(NvmeZone
*zone
)
207 return zone
->d
.zs
>> 4;
210 static inline void nvme_set_zone_state(NvmeZone
*zone
, NvmeZoneState state
)
212 zone
->d
.zs
= state
<< 4;
215 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace
*ns
, NvmeZone
*zone
)
217 return zone
->d
.zslba
+ ns
->zone_size
;
220 static inline uint64_t nvme_zone_wr_boundary(NvmeZone
*zone
)
222 return zone
->d
.zslba
+ zone
->d
.zcap
;
225 static inline bool nvme_wp_is_valid(NvmeZone
*zone
)
227 uint8_t st
= nvme_get_zone_state(zone
);
229 return st
!= NVME_ZONE_STATE_FULL
&&
230 st
!= NVME_ZONE_STATE_READ_ONLY
&&
231 st
!= NVME_ZONE_STATE_OFFLINE
;
234 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace
*ns
,
237 return &ns
->zd_extensions
[zone_idx
* ns
->params
.zd_extension_size
];
240 static inline void nvme_aor_inc_open(NvmeNamespace
*ns
)
242 assert(ns
->nr_open_zones
>= 0);
243 if (ns
->params
.max_open_zones
) {
245 assert(ns
->nr_open_zones
<= ns
->params
.max_open_zones
);
249 static inline void nvme_aor_dec_open(NvmeNamespace
*ns
)
251 if (ns
->params
.max_open_zones
) {
252 assert(ns
->nr_open_zones
> 0);
255 assert(ns
->nr_open_zones
>= 0);
258 static inline void nvme_aor_inc_active(NvmeNamespace
*ns
)
260 assert(ns
->nr_active_zones
>= 0);
261 if (ns
->params
.max_active_zones
) {
262 ns
->nr_active_zones
++;
263 assert(ns
->nr_active_zones
<= ns
->params
.max_active_zones
);
267 static inline void nvme_aor_dec_active(NvmeNamespace
*ns
)
269 if (ns
->params
.max_active_zones
) {
270 assert(ns
->nr_active_zones
> 0);
271 ns
->nr_active_zones
--;
272 assert(ns
->nr_active_zones
>= ns
->nr_open_zones
);
274 assert(ns
->nr_active_zones
>= 0);
277 void nvme_ns_init_format(NvmeNamespace
*ns
);
278 int nvme_ns_setup(NvmeNamespace
*ns
, Error
**errp
);
279 void nvme_ns_drain(NvmeNamespace
*ns
);
280 void nvme_ns_shutdown(NvmeNamespace
*ns
);
281 void nvme_ns_cleanup(NvmeNamespace
*ns
);
283 typedef struct NvmeAsyncEvent
{
284 QTAILQ_ENTRY(NvmeAsyncEvent
) entry
;
285 NvmeAerResult result
;
289 NVME_SG_ALLOC
= 1 << 0,
290 NVME_SG_DMA
= 1 << 1,
293 typedef struct NvmeSg
{
302 typedef enum NvmeTxDirection
{
303 NVME_TX_DIRECTION_TO_DEVICE
= 0,
304 NVME_TX_DIRECTION_FROM_DEVICE
= 1,
307 typedef struct NvmeRequest
{
308 struct NvmeSQueue
*sq
;
309 struct NvmeNamespace
*ns
;
315 BlockAcctCookie acct
;
317 QTAILQ_ENTRY(NvmeRequest
)entry
;
320 typedef struct NvmeBounceContext
{
329 static inline const char *nvme_adm_opc_str(uint8_t opc
)
332 case NVME_ADM_CMD_DELETE_SQ
: return "NVME_ADM_CMD_DELETE_SQ";
333 case NVME_ADM_CMD_CREATE_SQ
: return "NVME_ADM_CMD_CREATE_SQ";
334 case NVME_ADM_CMD_GET_LOG_PAGE
: return "NVME_ADM_CMD_GET_LOG_PAGE";
335 case NVME_ADM_CMD_DELETE_CQ
: return "NVME_ADM_CMD_DELETE_CQ";
336 case NVME_ADM_CMD_CREATE_CQ
: return "NVME_ADM_CMD_CREATE_CQ";
337 case NVME_ADM_CMD_IDENTIFY
: return "NVME_ADM_CMD_IDENTIFY";
338 case NVME_ADM_CMD_ABORT
: return "NVME_ADM_CMD_ABORT";
339 case NVME_ADM_CMD_SET_FEATURES
: return "NVME_ADM_CMD_SET_FEATURES";
340 case NVME_ADM_CMD_GET_FEATURES
: return "NVME_ADM_CMD_GET_FEATURES";
341 case NVME_ADM_CMD_ASYNC_EV_REQ
: return "NVME_ADM_CMD_ASYNC_EV_REQ";
342 case NVME_ADM_CMD_NS_ATTACHMENT
: return "NVME_ADM_CMD_NS_ATTACHMENT";
343 case NVME_ADM_CMD_VIRT_MNGMT
: return "NVME_ADM_CMD_VIRT_MNGMT";
344 case NVME_ADM_CMD_DBBUF_CONFIG
: return "NVME_ADM_CMD_DBBUF_CONFIG";
345 case NVME_ADM_CMD_FORMAT_NVM
: return "NVME_ADM_CMD_FORMAT_NVM";
346 default: return "NVME_ADM_CMD_UNKNOWN";
350 static inline const char *nvme_io_opc_str(uint8_t opc
)
353 case NVME_CMD_FLUSH
: return "NVME_NVM_CMD_FLUSH";
354 case NVME_CMD_WRITE
: return "NVME_NVM_CMD_WRITE";
355 case NVME_CMD_READ
: return "NVME_NVM_CMD_READ";
356 case NVME_CMD_COMPARE
: return "NVME_NVM_CMD_COMPARE";
357 case NVME_CMD_WRITE_ZEROES
: return "NVME_NVM_CMD_WRITE_ZEROES";
358 case NVME_CMD_DSM
: return "NVME_NVM_CMD_DSM";
359 case NVME_CMD_VERIFY
: return "NVME_NVM_CMD_VERIFY";
360 case NVME_CMD_COPY
: return "NVME_NVM_CMD_COPY";
361 case NVME_CMD_ZONE_MGMT_SEND
: return "NVME_ZONED_CMD_MGMT_SEND";
362 case NVME_CMD_ZONE_MGMT_RECV
: return "NVME_ZONED_CMD_MGMT_RECV";
363 case NVME_CMD_ZONE_APPEND
: return "NVME_ZONED_CMD_ZONE_APPEND";
364 default: return "NVME_NVM_CMD_UNKNOWN";
368 typedef struct NvmeSQueue
{
369 struct NvmeCtrl
*ctrl
;
379 EventNotifier notifier
;
380 bool ioeventfd_enabled
;
382 QTAILQ_HEAD(, NvmeRequest
) req_list
;
383 QTAILQ_HEAD(, NvmeRequest
) out_req_list
;
384 QTAILQ_ENTRY(NvmeSQueue
) entry
;
387 typedef struct NvmeCQueue
{
388 struct NvmeCtrl
*ctrl
;
391 uint16_t irq_enabled
;
400 EventNotifier notifier
;
401 bool ioeventfd_enabled
;
402 QTAILQ_HEAD(, NvmeSQueue
) sq_list
;
403 QTAILQ_HEAD(, NvmeRequest
) req_list
;
406 #define TYPE_NVME "nvme"
408 OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
410 typedef struct NvmeParams
{
412 uint32_t num_queues
; /* deprecated since 5.1 */
413 uint32_t max_ioqpairs
;
415 uint32_t cmb_size_mb
;
417 uint32_t aer_max_queued
;
422 bool auto_transition_zones
;
425 uint8_t sriov_max_vfs
;
426 uint16_t sriov_vq_flexible
;
427 uint16_t sriov_vi_flexible
;
428 uint8_t sriov_max_vq_per_vf
;
429 uint8_t sriov_max_vi_per_vf
;
432 typedef struct NvmeCtrl
{
433 PCIDevice parent_obj
;
444 uint16_t max_prp_ents
;
448 uint8_t outstanding_aers
;
451 uint64_t host_timestamp
; /* Timestamp sent by the host */
452 uint64_t timestamp_set_qemu_clock_ms
; /* QEMU clock time */
453 uint64_t starttime_ms
;
454 uint16_t temperature
;
455 uint8_t smart_critical_warning
;
456 uint32_t conf_msix_qsize
;
457 uint32_t conf_ioqpairs
;
470 HostMemoryBackend
*dev
;
476 NvmeRequest
**aer_reqs
;
477 QTAILQ_HEAD(, NvmeAsyncEvent
) aer_queue
;
482 /* Namespace ID is started with 1 so bitmap should be 1-based */
483 #define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1)
484 DECLARE_BITMAP(changed_nsids
, NVME_CHANGED_NSID_SIZE
);
486 NvmeSubsystem
*subsys
;
488 NvmeNamespace
namespace;
489 NvmeNamespace
*namespaces
[NVME_MAX_NAMESPACES
+ 1];
498 uint16_t temp_thresh_hi
;
499 uint16_t temp_thresh_low
;
502 uint32_t async_config
;
503 NvmeHostBehaviorSupport hbs
;
506 NvmePriCtrlCap pri_ctrl_cap
;
507 NvmeSecCtrlList sec_ctrl_list
;
511 } next_pri_ctrl_cap
; /* These override pri_ctrl_cap after reset */
514 typedef enum NvmeResetType
{
515 NVME_RESET_FUNCTION
= 0,
516 NVME_RESET_CONTROLLER
= 1,
519 static inline NvmeNamespace
*nvme_ns(NvmeCtrl
*n
, uint32_t nsid
)
521 if (!nsid
|| nsid
> NVME_MAX_NAMESPACES
) {
525 return n
->namespaces
[nsid
];
528 static inline NvmeCQueue
*nvme_cq(NvmeRequest
*req
)
530 NvmeSQueue
*sq
= req
->sq
;
531 NvmeCtrl
*n
= sq
->ctrl
;
533 return n
->cq
[sq
->cqid
];
536 static inline NvmeCtrl
*nvme_ctrl(NvmeRequest
*req
)
538 NvmeSQueue
*sq
= req
->sq
;
542 static inline uint16_t nvme_cid(NvmeRequest
*req
)
548 return le16_to_cpu(req
->cqe
.cid
);
551 static inline NvmeSecCtrlEntry
*nvme_sctrl(NvmeCtrl
*n
)
553 PCIDevice
*pci_dev
= &n
->parent_obj
;
554 NvmeCtrl
*pf
= NVME(pcie_sriov_get_pf(pci_dev
));
556 if (pci_is_vf(pci_dev
)) {
557 return &pf
->sec_ctrl_list
.sec
[pcie_sriov_vf_number(pci_dev
)];
563 static inline NvmeSecCtrlEntry
*nvme_sctrl_for_cntlid(NvmeCtrl
*n
,
566 NvmeSecCtrlList
*list
= &n
->sec_ctrl_list
;
569 for (i
= 0; i
< list
->numcntl
; i
++) {
570 if (le16_to_cpu(list
->sec
[i
].scid
) == cntlid
) {
571 return &list
->sec
[i
];
578 void nvme_attach_ns(NvmeCtrl
*n
, NvmeNamespace
*ns
);
579 uint16_t nvme_bounce_data(NvmeCtrl
*n
, void *ptr
, uint32_t len
,
580 NvmeTxDirection dir
, NvmeRequest
*req
);
581 uint16_t nvme_bounce_mdata(NvmeCtrl
*n
, void *ptr
, uint32_t len
,
582 NvmeTxDirection dir
, NvmeRequest
*req
);
583 void nvme_rw_complete_cb(void *opaque
, int ret
);
584 uint16_t nvme_map_dptr(NvmeCtrl
*n
, NvmeSg
*sg
, size_t len
,
587 #endif /* HW_NVME_NVME_H */