linux-user: Use page_find_range_empty for mmap_find_vma_reserved
[qemu/armbru.git] / hw / nvme / nvme.h
blob209e8f5b4c085777b94a081712bb33548b0f42b1
1 /*
2 * QEMU NVM Express
4 * Copyright (c) 2012 Intel Corporation
5 * Copyright (c) 2021 Minwoo Im
6 * Copyright (c) 2021 Samsung Electronics Co., Ltd.
8 * Authors:
9 * Keith Busch <kbusch@kernel.org>
10 * Klaus Jensen <k.jensen@samsung.com>
11 * Gollu Appalanaidu <anaidu.gollu@samsung.com>
12 * Dmitry Fomichev <dmitry.fomichev@wdc.com>
13 * Minwoo Im <minwoo.im.dev@gmail.com>
15 * This code is licensed under the GNU GPL v2 or later.
18 #ifndef HW_NVME_NVME_H
19 #define HW_NVME_NVME_H
21 #include "qemu/uuid.h"
22 #include "hw/pci/pci_device.h"
23 #include "hw/block/block.h"
25 #include "block/nvme.h"
27 #define NVME_MAX_CONTROLLERS 256
28 #define NVME_MAX_NAMESPACES 256
29 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
30 #define NVME_FDP_MAX_EVENTS 63
31 #define NVME_FDP_MAXPIDS 128
33 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
35 typedef struct NvmeCtrl NvmeCtrl;
36 typedef struct NvmeNamespace NvmeNamespace;
38 #define TYPE_NVME_BUS "nvme-bus"
39 OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
41 typedef struct NvmeBus {
42 BusState parent_bus;
43 } NvmeBus;
45 #define TYPE_NVME_SUBSYS "nvme-subsys"
46 #define NVME_SUBSYS(obj) \
47 OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
48 #define SUBSYS_SLOT_RSVD (void *)0xFFFF
50 typedef struct NvmeReclaimUnit {
51 uint64_t ruamw;
52 } NvmeReclaimUnit;
54 typedef struct NvmeRuHandle {
55 uint8_t ruht;
56 uint8_t ruha;
57 uint64_t event_filter;
58 uint8_t lbafi;
59 uint64_t ruamw;
61 /* reclaim units indexed by reclaim group */
62 NvmeReclaimUnit *rus;
63 } NvmeRuHandle;
65 typedef struct NvmeFdpEventBuffer {
66 NvmeFdpEvent events[NVME_FDP_MAX_EVENTS];
67 unsigned int nelems;
68 unsigned int start;
69 unsigned int next;
70 } NvmeFdpEventBuffer;
72 typedef struct NvmeEnduranceGroup {
73 uint8_t event_conf;
75 struct {
76 NvmeFdpEventBuffer host_events, ctrl_events;
78 uint16_t nruh;
79 uint16_t nrg;
80 uint8_t rgif;
81 uint64_t runs;
83 uint64_t hbmw;
84 uint64_t mbmw;
85 uint64_t mbe;
87 bool enabled;
89 NvmeRuHandle *ruhs;
90 } fdp;
91 } NvmeEnduranceGroup;
93 typedef struct NvmeSubsystem {
94 DeviceState parent_obj;
95 NvmeBus bus;
96 uint8_t subnqn[256];
97 char *serial;
99 NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
100 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
101 NvmeEnduranceGroup endgrp;
103 struct {
104 char *nqn;
106 struct {
107 bool enabled;
108 uint64_t runs;
109 uint16_t nruh;
110 uint32_t nrg;
111 } fdp;
112 } params;
113 } NvmeSubsystem;
115 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
116 void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);
118 static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
119 uint32_t cntlid)
121 if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
122 return NULL;
125 if (subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD) {
126 return NULL;
129 return subsys->ctrls[cntlid];
132 static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
133 uint32_t nsid)
135 if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
136 return NULL;
139 return subsys->namespaces[nsid];
142 #define TYPE_NVME_NS "nvme-ns"
143 #define NVME_NS(obj) \
144 OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
146 typedef struct NvmeZone {
147 NvmeZoneDescr d;
148 uint64_t w_ptr;
149 QTAILQ_ENTRY(NvmeZone) entry;
150 } NvmeZone;
152 #define FDP_EVT_MAX 0xff
153 #define NVME_FDP_MAX_NS_RUHS 32u
154 #define FDPVSS 0
156 static const uint8_t nvme_fdp_evf_shifts[FDP_EVT_MAX] = {
157 /* Host events */
158 [FDP_EVT_RU_NOT_FULLY_WRITTEN] = 0,
159 [FDP_EVT_RU_ATL_EXCEEDED] = 1,
160 [FDP_EVT_CTRL_RESET_RUH] = 2,
161 [FDP_EVT_INVALID_PID] = 3,
162 /* CTRL events */
163 [FDP_EVT_MEDIA_REALLOC] = 32,
164 [FDP_EVT_RUH_IMPLICIT_RU_CHANGE] = 33,
167 typedef struct NvmeNamespaceParams {
168 bool detached;
169 bool shared;
170 uint32_t nsid;
171 QemuUUID uuid;
172 uint64_t eui64;
173 bool eui64_default;
175 uint16_t ms;
176 uint8_t mset;
177 uint8_t pi;
178 uint8_t pil;
179 uint8_t pif;
181 uint16_t mssrl;
182 uint32_t mcl;
183 uint8_t msrc;
185 bool zoned;
186 bool cross_zone_read;
187 uint64_t zone_size_bs;
188 uint64_t zone_cap_bs;
189 uint32_t max_active_zones;
190 uint32_t max_open_zones;
191 uint32_t zd_extension_size;
193 uint32_t numzrwa;
194 uint64_t zrwas;
195 uint64_t zrwafg;
197 struct {
198 char *ruhs;
199 } fdp;
200 } NvmeNamespaceParams;
202 typedef struct NvmeNamespace {
203 DeviceState parent_obj;
204 BlockConf blkconf;
205 int32_t bootindex;
206 int64_t size;
207 int64_t moff;
208 NvmeIdNs id_ns;
209 NvmeIdNsNvm id_ns_nvm;
210 NvmeLBAF lbaf;
211 unsigned int nlbaf;
212 size_t lbasz;
213 const uint32_t *iocs;
214 uint8_t csi;
215 uint16_t status;
216 int attached;
217 uint8_t pif;
219 struct {
220 uint16_t zrwas;
221 uint16_t zrwafg;
222 uint32_t numzrwa;
223 } zns;
225 QTAILQ_ENTRY(NvmeNamespace) entry;
227 NvmeIdNsZoned *id_ns_zoned;
228 NvmeZone *zone_array;
229 QTAILQ_HEAD(, NvmeZone) exp_open_zones;
230 QTAILQ_HEAD(, NvmeZone) imp_open_zones;
231 QTAILQ_HEAD(, NvmeZone) closed_zones;
232 QTAILQ_HEAD(, NvmeZone) full_zones;
233 uint32_t num_zones;
234 uint64_t zone_size;
235 uint64_t zone_capacity;
236 uint32_t zone_size_log2;
237 uint8_t *zd_extensions;
238 int32_t nr_open_zones;
239 int32_t nr_active_zones;
241 NvmeNamespaceParams params;
242 NvmeSubsystem *subsys;
243 NvmeEnduranceGroup *endgrp;
245 struct {
246 uint32_t err_rec;
247 } features;
249 struct {
250 uint16_t nphs;
251 /* reclaim unit handle identifiers indexed by placement handle */
252 uint16_t *phs;
253 } fdp;
254 } NvmeNamespace;
256 static inline uint32_t nvme_nsid(NvmeNamespace *ns)
258 if (ns) {
259 return ns->params.nsid;
262 return 0;
265 static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
267 return lba << ns->lbaf.ds;
270 static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
272 return ns->lbaf.ms * lba;
275 static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
277 return ns->moff + nvme_m2b(ns, lba);
280 static inline bool nvme_ns_ext(NvmeNamespace *ns)
282 return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
285 static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
287 return zone->d.zs >> 4;
290 static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
292 zone->d.zs = state << 4;
295 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
297 return zone->d.zslba + ns->zone_size;
300 static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
302 return zone->d.zslba + zone->d.zcap;
305 static inline bool nvme_wp_is_valid(NvmeZone *zone)
307 uint8_t st = nvme_get_zone_state(zone);
309 return st != NVME_ZONE_STATE_FULL &&
310 st != NVME_ZONE_STATE_READ_ONLY &&
311 st != NVME_ZONE_STATE_OFFLINE;
314 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
315 uint32_t zone_idx)
317 return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
320 static inline void nvme_aor_inc_open(NvmeNamespace *ns)
322 assert(ns->nr_open_zones >= 0);
323 if (ns->params.max_open_zones) {
324 ns->nr_open_zones++;
325 assert(ns->nr_open_zones <= ns->params.max_open_zones);
329 static inline void nvme_aor_dec_open(NvmeNamespace *ns)
331 if (ns->params.max_open_zones) {
332 assert(ns->nr_open_zones > 0);
333 ns->nr_open_zones--;
335 assert(ns->nr_open_zones >= 0);
338 static inline void nvme_aor_inc_active(NvmeNamespace *ns)
340 assert(ns->nr_active_zones >= 0);
341 if (ns->params.max_active_zones) {
342 ns->nr_active_zones++;
343 assert(ns->nr_active_zones <= ns->params.max_active_zones);
347 static inline void nvme_aor_dec_active(NvmeNamespace *ns)
349 if (ns->params.max_active_zones) {
350 assert(ns->nr_active_zones > 0);
351 ns->nr_active_zones--;
352 assert(ns->nr_active_zones >= ns->nr_open_zones);
354 assert(ns->nr_active_zones >= 0);
357 static inline void nvme_fdp_stat_inc(uint64_t *a, uint64_t b)
359 uint64_t ret = *a + b;
360 *a = ret < *a ? UINT64_MAX : ret;
363 void nvme_ns_init_format(NvmeNamespace *ns);
364 int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
365 void nvme_ns_drain(NvmeNamespace *ns);
366 void nvme_ns_shutdown(NvmeNamespace *ns);
367 void nvme_ns_cleanup(NvmeNamespace *ns);
369 typedef struct NvmeAsyncEvent {
370 QTAILQ_ENTRY(NvmeAsyncEvent) entry;
371 NvmeAerResult result;
372 } NvmeAsyncEvent;
374 enum {
375 NVME_SG_ALLOC = 1 << 0,
376 NVME_SG_DMA = 1 << 1,
379 typedef struct NvmeSg {
380 int flags;
382 union {
383 QEMUSGList qsg;
384 QEMUIOVector iov;
386 } NvmeSg;
388 typedef enum NvmeTxDirection {
389 NVME_TX_DIRECTION_TO_DEVICE = 0,
390 NVME_TX_DIRECTION_FROM_DEVICE = 1,
391 } NvmeTxDirection;
393 typedef struct NvmeRequest {
394 struct NvmeSQueue *sq;
395 struct NvmeNamespace *ns;
396 BlockAIOCB *aiocb;
397 uint16_t status;
398 void *opaque;
399 NvmeCqe cqe;
400 NvmeCmd cmd;
401 BlockAcctCookie acct;
402 NvmeSg sg;
403 QTAILQ_ENTRY(NvmeRequest)entry;
404 } NvmeRequest;
406 typedef struct NvmeBounceContext {
407 NvmeRequest *req;
409 struct {
410 QEMUIOVector iov;
411 uint8_t *bounce;
412 } data, mdata;
413 } NvmeBounceContext;
415 static inline const char *nvme_adm_opc_str(uint8_t opc)
417 switch (opc) {
418 case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ";
419 case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ";
420 case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE";
421 case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ";
422 case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ";
423 case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY";
424 case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT";
425 case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES";
426 case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES";
427 case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ";
428 case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT";
429 case NVME_ADM_CMD_DIRECTIVE_SEND: return "NVME_ADM_CMD_DIRECTIVE_SEND";
430 case NVME_ADM_CMD_VIRT_MNGMT: return "NVME_ADM_CMD_VIRT_MNGMT";
431 case NVME_ADM_CMD_DIRECTIVE_RECV: return "NVME_ADM_CMD_DIRECTIVE_RECV";
432 case NVME_ADM_CMD_DBBUF_CONFIG: return "NVME_ADM_CMD_DBBUF_CONFIG";
433 case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM";
434 default: return "NVME_ADM_CMD_UNKNOWN";
438 static inline const char *nvme_io_opc_str(uint8_t opc)
440 switch (opc) {
441 case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH";
442 case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE";
443 case NVME_CMD_READ: return "NVME_NVM_CMD_READ";
444 case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE";
445 case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
446 case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM";
447 case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY";
448 case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY";
449 case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND";
450 case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV";
451 case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND";
452 default: return "NVME_NVM_CMD_UNKNOWN";
456 typedef struct NvmeSQueue {
457 struct NvmeCtrl *ctrl;
458 uint16_t sqid;
459 uint16_t cqid;
460 uint32_t head;
461 uint32_t tail;
462 uint32_t size;
463 uint64_t dma_addr;
464 uint64_t db_addr;
465 uint64_t ei_addr;
466 QEMUBH *bh;
467 EventNotifier notifier;
468 bool ioeventfd_enabled;
469 NvmeRequest *io_req;
470 QTAILQ_HEAD(, NvmeRequest) req_list;
471 QTAILQ_HEAD(, NvmeRequest) out_req_list;
472 QTAILQ_ENTRY(NvmeSQueue) entry;
473 } NvmeSQueue;
475 typedef struct NvmeCQueue {
476 struct NvmeCtrl *ctrl;
477 uint8_t phase;
478 uint16_t cqid;
479 uint16_t irq_enabled;
480 uint32_t head;
481 uint32_t tail;
482 uint32_t vector;
483 uint32_t size;
484 uint64_t dma_addr;
485 uint64_t db_addr;
486 uint64_t ei_addr;
487 QEMUBH *bh;
488 EventNotifier notifier;
489 bool ioeventfd_enabled;
490 QTAILQ_HEAD(, NvmeSQueue) sq_list;
491 QTAILQ_HEAD(, NvmeRequest) req_list;
492 } NvmeCQueue;
494 #define TYPE_NVME "nvme"
495 #define NVME(obj) \
496 OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
498 typedef struct NvmeParams {
499 char *serial;
500 uint32_t num_queues; /* deprecated since 5.1 */
501 uint32_t max_ioqpairs;
502 uint16_t msix_qsize;
503 uint32_t cmb_size_mb;
504 uint8_t aerl;
505 uint32_t aer_max_queued;
506 uint8_t mdts;
507 uint8_t vsl;
508 bool use_intel_id;
509 uint8_t zasl;
510 bool auto_transition_zones;
511 bool legacy_cmb;
512 bool ioeventfd;
513 uint8_t sriov_max_vfs;
514 uint16_t sriov_vq_flexible;
515 uint16_t sriov_vi_flexible;
516 uint8_t sriov_max_vq_per_vf;
517 uint8_t sriov_max_vi_per_vf;
518 } NvmeParams;
520 typedef struct NvmeCtrl {
521 PCIDevice parent_obj;
522 MemoryRegion bar0;
523 MemoryRegion iomem;
524 NvmeBar bar;
525 NvmeParams params;
526 NvmeBus bus;
528 uint16_t cntlid;
529 bool qs_created;
530 uint32_t page_size;
531 uint16_t page_bits;
532 uint16_t max_prp_ents;
533 uint16_t cqe_size;
534 uint16_t sqe_size;
535 uint32_t max_q_ents;
536 uint8_t outstanding_aers;
537 uint32_t irq_status;
538 int cq_pending;
539 uint64_t host_timestamp; /* Timestamp sent by the host */
540 uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */
541 uint64_t starttime_ms;
542 uint16_t temperature;
543 uint8_t smart_critical_warning;
544 uint32_t conf_msix_qsize;
545 uint32_t conf_ioqpairs;
546 uint64_t dbbuf_dbs;
547 uint64_t dbbuf_eis;
548 bool dbbuf_enabled;
550 struct {
551 MemoryRegion mem;
552 uint8_t *buf;
553 bool cmse;
554 hwaddr cba;
555 } cmb;
557 struct {
558 HostMemoryBackend *dev;
559 bool cmse;
560 hwaddr cba;
561 } pmr;
563 uint8_t aer_mask;
564 NvmeRequest **aer_reqs;
565 QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
566 int aer_queued;
568 uint32_t dmrsl;
570 /* Namespace ID is started with 1 so bitmap should be 1-based */
571 #define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1)
572 DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
574 NvmeSubsystem *subsys;
576 NvmeNamespace namespace;
577 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
578 NvmeSQueue **sq;
579 NvmeCQueue **cq;
580 NvmeSQueue admin_sq;
581 NvmeCQueue admin_cq;
582 NvmeIdCtrl id_ctrl;
584 struct {
585 struct {
586 uint16_t temp_thresh_hi;
587 uint16_t temp_thresh_low;
590 uint32_t async_config;
591 NvmeHostBehaviorSupport hbs;
592 } features;
594 NvmePriCtrlCap pri_ctrl_cap;
595 NvmeSecCtrlList sec_ctrl_list;
596 struct {
597 uint16_t vqrfap;
598 uint16_t virfap;
599 } next_pri_ctrl_cap; /* These override pri_ctrl_cap after reset */
600 } NvmeCtrl;
602 typedef enum NvmeResetType {
603 NVME_RESET_FUNCTION = 0,
604 NVME_RESET_CONTROLLER = 1,
605 } NvmeResetType;
607 static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
609 if (!nsid || nsid > NVME_MAX_NAMESPACES) {
610 return NULL;
613 return n->namespaces[nsid];
616 static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
618 NvmeSQueue *sq = req->sq;
619 NvmeCtrl *n = sq->ctrl;
621 return n->cq[sq->cqid];
624 static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
626 NvmeSQueue *sq = req->sq;
627 return sq->ctrl;
630 static inline uint16_t nvme_cid(NvmeRequest *req)
632 if (!req) {
633 return 0xffff;
636 return le16_to_cpu(req->cqe.cid);
639 static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n)
641 PCIDevice *pci_dev = &n->parent_obj;
642 NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev));
644 if (pci_is_vf(pci_dev)) {
645 return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)];
648 return NULL;
651 static inline NvmeSecCtrlEntry *nvme_sctrl_for_cntlid(NvmeCtrl *n,
652 uint16_t cntlid)
654 NvmeSecCtrlList *list = &n->sec_ctrl_list;
655 uint8_t i;
657 for (i = 0; i < list->numcntl; i++) {
658 if (le16_to_cpu(list->sec[i].scid) == cntlid) {
659 return &list->sec[i];
663 return NULL;
666 void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
667 uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
668 NvmeTxDirection dir, NvmeRequest *req);
669 uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
670 NvmeTxDirection dir, NvmeRequest *req);
671 void nvme_rw_complete_cb(void *opaque, int ret);
672 uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
673 NvmeCmd *cmd);
675 #endif /* HW_NVME_NVME_H */