linux-user: Use do_munmap for target_mmap failure
[qemu/kevin.git] / hw / nvme / nvme.h
blob5f2ae7b28b9cc317af4a7bcd476661349de65893
1 /*
2 * QEMU NVM Express
4 * Copyright (c) 2012 Intel Corporation
5 * Copyright (c) 2021 Minwoo Im
6 * Copyright (c) 2021 Samsung Electronics Co., Ltd.
8 * Authors:
9 * Keith Busch <kbusch@kernel.org>
10 * Klaus Jensen <k.jensen@samsung.com>
11 * Gollu Appalanaidu <anaidu.gollu@samsung.com>
12 * Dmitry Fomichev <dmitry.fomichev@wdc.com>
13 * Minwoo Im <minwoo.im.dev@gmail.com>
15 * This code is licensed under the GNU GPL v2 or later.
18 #ifndef HW_NVME_NVME_H
19 #define HW_NVME_NVME_H
21 #include "qemu/uuid.h"
22 #include "hw/pci/pci_device.h"
23 #include "hw/block/block.h"
25 #include "block/nvme.h"
27 #define NVME_MAX_CONTROLLERS 256
28 #define NVME_MAX_NAMESPACES 256
29 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
30 #define NVME_FDP_MAX_EVENTS 63
31 #define NVME_FDP_MAXPIDS 128
34 * The controller only supports Submission and Completion Queue Entry Sizes of
35 * 64 and 16 bytes respectively.
37 #define NVME_SQES 6
38 #define NVME_CQES 4
40 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
42 typedef struct NvmeCtrl NvmeCtrl;
43 typedef struct NvmeNamespace NvmeNamespace;
45 #define TYPE_NVME_BUS "nvme-bus"
46 OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
48 typedef struct NvmeBus {
49 BusState parent_bus;
50 } NvmeBus;
52 #define TYPE_NVME_SUBSYS "nvme-subsys"
53 #define NVME_SUBSYS(obj) \
54 OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
55 #define SUBSYS_SLOT_RSVD (void *)0xFFFF
57 typedef struct NvmeReclaimUnit {
58 uint64_t ruamw;
59 } NvmeReclaimUnit;
61 typedef struct NvmeRuHandle {
62 uint8_t ruht;
63 uint8_t ruha;
64 uint64_t event_filter;
65 uint8_t lbafi;
66 uint64_t ruamw;
68 /* reclaim units indexed by reclaim group */
69 NvmeReclaimUnit *rus;
70 } NvmeRuHandle;
72 typedef struct NvmeFdpEventBuffer {
73 NvmeFdpEvent events[NVME_FDP_MAX_EVENTS];
74 unsigned int nelems;
75 unsigned int start;
76 unsigned int next;
77 } NvmeFdpEventBuffer;
79 typedef struct NvmeEnduranceGroup {
80 uint8_t event_conf;
82 struct {
83 NvmeFdpEventBuffer host_events, ctrl_events;
85 uint16_t nruh;
86 uint16_t nrg;
87 uint8_t rgif;
88 uint64_t runs;
90 uint64_t hbmw;
91 uint64_t mbmw;
92 uint64_t mbe;
94 bool enabled;
96 NvmeRuHandle *ruhs;
97 } fdp;
98 } NvmeEnduranceGroup;
100 typedef struct NvmeSubsystem {
101 DeviceState parent_obj;
102 NvmeBus bus;
103 uint8_t subnqn[256];
104 char *serial;
106 NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
107 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
108 NvmeEnduranceGroup endgrp;
110 struct {
111 char *nqn;
113 struct {
114 bool enabled;
115 uint64_t runs;
116 uint16_t nruh;
117 uint32_t nrg;
118 } fdp;
119 } params;
120 } NvmeSubsystem;
122 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
123 void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);
125 static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
126 uint32_t cntlid)
128 if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
129 return NULL;
132 if (subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD) {
133 return NULL;
136 return subsys->ctrls[cntlid];
139 static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
140 uint32_t nsid)
142 if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
143 return NULL;
146 return subsys->namespaces[nsid];
149 #define TYPE_NVME_NS "nvme-ns"
150 #define NVME_NS(obj) \
151 OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
153 typedef struct NvmeZone {
154 NvmeZoneDescr d;
155 uint64_t w_ptr;
156 QTAILQ_ENTRY(NvmeZone) entry;
157 } NvmeZone;
159 #define FDP_EVT_MAX 0xff
160 #define NVME_FDP_MAX_NS_RUHS 32u
161 #define FDPVSS 0
163 static const uint8_t nvme_fdp_evf_shifts[FDP_EVT_MAX] = {
164 /* Host events */
165 [FDP_EVT_RU_NOT_FULLY_WRITTEN] = 0,
166 [FDP_EVT_RU_ATL_EXCEEDED] = 1,
167 [FDP_EVT_CTRL_RESET_RUH] = 2,
168 [FDP_EVT_INVALID_PID] = 3,
169 /* CTRL events */
170 [FDP_EVT_MEDIA_REALLOC] = 32,
171 [FDP_EVT_RUH_IMPLICIT_RU_CHANGE] = 33,
174 typedef struct NvmeNamespaceParams {
175 bool detached;
176 bool shared;
177 uint32_t nsid;
178 QemuUUID uuid;
179 uint64_t eui64;
180 bool eui64_default;
182 uint16_t ms;
183 uint8_t mset;
184 uint8_t pi;
185 uint8_t pil;
186 uint8_t pif;
188 uint16_t mssrl;
189 uint32_t mcl;
190 uint8_t msrc;
192 bool zoned;
193 bool cross_zone_read;
194 uint64_t zone_size_bs;
195 uint64_t zone_cap_bs;
196 uint32_t max_active_zones;
197 uint32_t max_open_zones;
198 uint32_t zd_extension_size;
200 uint32_t numzrwa;
201 uint64_t zrwas;
202 uint64_t zrwafg;
204 struct {
205 char *ruhs;
206 } fdp;
207 } NvmeNamespaceParams;
209 typedef struct NvmeNamespace {
210 DeviceState parent_obj;
211 BlockConf blkconf;
212 int32_t bootindex;
213 int64_t size;
214 int64_t moff;
215 NvmeIdNs id_ns;
216 NvmeIdNsNvm id_ns_nvm;
217 NvmeLBAF lbaf;
218 unsigned int nlbaf;
219 size_t lbasz;
220 const uint32_t *iocs;
221 uint8_t csi;
222 uint16_t status;
223 int attached;
224 uint8_t pif;
226 struct {
227 uint16_t zrwas;
228 uint16_t zrwafg;
229 uint32_t numzrwa;
230 } zns;
232 QTAILQ_ENTRY(NvmeNamespace) entry;
234 NvmeIdNsZoned *id_ns_zoned;
235 NvmeZone *zone_array;
236 QTAILQ_HEAD(, NvmeZone) exp_open_zones;
237 QTAILQ_HEAD(, NvmeZone) imp_open_zones;
238 QTAILQ_HEAD(, NvmeZone) closed_zones;
239 QTAILQ_HEAD(, NvmeZone) full_zones;
240 uint32_t num_zones;
241 uint64_t zone_size;
242 uint64_t zone_capacity;
243 uint32_t zone_size_log2;
244 uint8_t *zd_extensions;
245 int32_t nr_open_zones;
246 int32_t nr_active_zones;
248 NvmeNamespaceParams params;
249 NvmeSubsystem *subsys;
250 NvmeEnduranceGroup *endgrp;
252 struct {
253 uint32_t err_rec;
254 } features;
256 struct {
257 uint16_t nphs;
258 /* reclaim unit handle identifiers indexed by placement handle */
259 uint16_t *phs;
260 } fdp;
261 } NvmeNamespace;
263 static inline uint32_t nvme_nsid(NvmeNamespace *ns)
265 if (ns) {
266 return ns->params.nsid;
269 return 0;
272 static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
274 return lba << ns->lbaf.ds;
277 static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
279 return ns->lbaf.ms * lba;
282 static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
284 return ns->moff + nvme_m2b(ns, lba);
287 static inline bool nvme_ns_ext(NvmeNamespace *ns)
289 return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
292 static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
294 return zone->d.zs >> 4;
297 static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
299 zone->d.zs = state << 4;
302 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
304 return zone->d.zslba + ns->zone_size;
307 static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
309 return zone->d.zslba + zone->d.zcap;
312 static inline bool nvme_wp_is_valid(NvmeZone *zone)
314 uint8_t st = nvme_get_zone_state(zone);
316 return st != NVME_ZONE_STATE_FULL &&
317 st != NVME_ZONE_STATE_READ_ONLY &&
318 st != NVME_ZONE_STATE_OFFLINE;
321 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
322 uint32_t zone_idx)
324 return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
327 static inline void nvme_aor_inc_open(NvmeNamespace *ns)
329 assert(ns->nr_open_zones >= 0);
330 if (ns->params.max_open_zones) {
331 ns->nr_open_zones++;
332 assert(ns->nr_open_zones <= ns->params.max_open_zones);
336 static inline void nvme_aor_dec_open(NvmeNamespace *ns)
338 if (ns->params.max_open_zones) {
339 assert(ns->nr_open_zones > 0);
340 ns->nr_open_zones--;
342 assert(ns->nr_open_zones >= 0);
345 static inline void nvme_aor_inc_active(NvmeNamespace *ns)
347 assert(ns->nr_active_zones >= 0);
348 if (ns->params.max_active_zones) {
349 ns->nr_active_zones++;
350 assert(ns->nr_active_zones <= ns->params.max_active_zones);
354 static inline void nvme_aor_dec_active(NvmeNamespace *ns)
356 if (ns->params.max_active_zones) {
357 assert(ns->nr_active_zones > 0);
358 ns->nr_active_zones--;
359 assert(ns->nr_active_zones >= ns->nr_open_zones);
361 assert(ns->nr_active_zones >= 0);
364 static inline void nvme_fdp_stat_inc(uint64_t *a, uint64_t b)
366 uint64_t ret = *a + b;
367 *a = ret < *a ? UINT64_MAX : ret;
370 void nvme_ns_init_format(NvmeNamespace *ns);
371 int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
372 void nvme_ns_drain(NvmeNamespace *ns);
373 void nvme_ns_shutdown(NvmeNamespace *ns);
374 void nvme_ns_cleanup(NvmeNamespace *ns);
376 typedef struct NvmeAsyncEvent {
377 QTAILQ_ENTRY(NvmeAsyncEvent) entry;
378 NvmeAerResult result;
379 } NvmeAsyncEvent;
381 enum {
382 NVME_SG_ALLOC = 1 << 0,
383 NVME_SG_DMA = 1 << 1,
386 typedef struct NvmeSg {
387 int flags;
389 union {
390 QEMUSGList qsg;
391 QEMUIOVector iov;
393 } NvmeSg;
395 typedef enum NvmeTxDirection {
396 NVME_TX_DIRECTION_TO_DEVICE = 0,
397 NVME_TX_DIRECTION_FROM_DEVICE = 1,
398 } NvmeTxDirection;
400 typedef struct NvmeRequest {
401 struct NvmeSQueue *sq;
402 struct NvmeNamespace *ns;
403 BlockAIOCB *aiocb;
404 uint16_t status;
405 void *opaque;
406 NvmeCqe cqe;
407 NvmeCmd cmd;
408 BlockAcctCookie acct;
409 NvmeSg sg;
410 QTAILQ_ENTRY(NvmeRequest)entry;
411 } NvmeRequest;
413 typedef struct NvmeBounceContext {
414 NvmeRequest *req;
416 struct {
417 QEMUIOVector iov;
418 uint8_t *bounce;
419 } data, mdata;
420 } NvmeBounceContext;
422 static inline const char *nvme_adm_opc_str(uint8_t opc)
424 switch (opc) {
425 case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ";
426 case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ";
427 case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE";
428 case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ";
429 case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ";
430 case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY";
431 case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT";
432 case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES";
433 case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES";
434 case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ";
435 case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT";
436 case NVME_ADM_CMD_DIRECTIVE_SEND: return "NVME_ADM_CMD_DIRECTIVE_SEND";
437 case NVME_ADM_CMD_VIRT_MNGMT: return "NVME_ADM_CMD_VIRT_MNGMT";
438 case NVME_ADM_CMD_DIRECTIVE_RECV: return "NVME_ADM_CMD_DIRECTIVE_RECV";
439 case NVME_ADM_CMD_DBBUF_CONFIG: return "NVME_ADM_CMD_DBBUF_CONFIG";
440 case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM";
441 default: return "NVME_ADM_CMD_UNKNOWN";
445 static inline const char *nvme_io_opc_str(uint8_t opc)
447 switch (opc) {
448 case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH";
449 case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE";
450 case NVME_CMD_READ: return "NVME_NVM_CMD_READ";
451 case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE";
452 case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
453 case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM";
454 case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY";
455 case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY";
456 case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND";
457 case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV";
458 case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND";
459 default: return "NVME_NVM_CMD_UNKNOWN";
463 typedef struct NvmeSQueue {
464 struct NvmeCtrl *ctrl;
465 uint16_t sqid;
466 uint16_t cqid;
467 uint32_t head;
468 uint32_t tail;
469 uint32_t size;
470 uint64_t dma_addr;
471 uint64_t db_addr;
472 uint64_t ei_addr;
473 QEMUBH *bh;
474 EventNotifier notifier;
475 bool ioeventfd_enabled;
476 NvmeRequest *io_req;
477 QTAILQ_HEAD(, NvmeRequest) req_list;
478 QTAILQ_HEAD(, NvmeRequest) out_req_list;
479 QTAILQ_ENTRY(NvmeSQueue) entry;
480 } NvmeSQueue;
482 typedef struct NvmeCQueue {
483 struct NvmeCtrl *ctrl;
484 uint8_t phase;
485 uint16_t cqid;
486 uint16_t irq_enabled;
487 uint32_t head;
488 uint32_t tail;
489 uint32_t vector;
490 uint32_t size;
491 uint64_t dma_addr;
492 uint64_t db_addr;
493 uint64_t ei_addr;
494 QEMUBH *bh;
495 EventNotifier notifier;
496 bool ioeventfd_enabled;
497 QTAILQ_HEAD(, NvmeSQueue) sq_list;
498 QTAILQ_HEAD(, NvmeRequest) req_list;
499 } NvmeCQueue;
501 #define TYPE_NVME "nvme"
502 #define NVME(obj) \
503 OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
505 typedef struct NvmeParams {
506 char *serial;
507 uint32_t num_queues; /* deprecated since 5.1 */
508 uint32_t max_ioqpairs;
509 uint16_t msix_qsize;
510 uint32_t cmb_size_mb;
511 uint8_t aerl;
512 uint32_t aer_max_queued;
513 uint8_t mdts;
514 uint8_t vsl;
515 bool use_intel_id;
516 uint8_t zasl;
517 bool auto_transition_zones;
518 bool legacy_cmb;
519 bool ioeventfd;
520 uint8_t sriov_max_vfs;
521 uint16_t sriov_vq_flexible;
522 uint16_t sriov_vi_flexible;
523 uint8_t sriov_max_vq_per_vf;
524 uint8_t sriov_max_vi_per_vf;
525 } NvmeParams;
527 typedef struct NvmeCtrl {
528 PCIDevice parent_obj;
529 MemoryRegion bar0;
530 MemoryRegion iomem;
531 NvmeBar bar;
532 NvmeParams params;
533 NvmeBus bus;
535 uint16_t cntlid;
536 bool qs_created;
537 uint32_t page_size;
538 uint16_t page_bits;
539 uint16_t max_prp_ents;
540 uint32_t max_q_ents;
541 uint8_t outstanding_aers;
542 uint32_t irq_status;
543 int cq_pending;
544 uint64_t host_timestamp; /* Timestamp sent by the host */
545 uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */
546 uint64_t starttime_ms;
547 uint16_t temperature;
548 uint8_t smart_critical_warning;
549 uint32_t conf_msix_qsize;
550 uint32_t conf_ioqpairs;
551 uint64_t dbbuf_dbs;
552 uint64_t dbbuf_eis;
553 bool dbbuf_enabled;
555 struct {
556 MemoryRegion mem;
557 uint8_t *buf;
558 bool cmse;
559 hwaddr cba;
560 } cmb;
562 struct {
563 HostMemoryBackend *dev;
564 bool cmse;
565 hwaddr cba;
566 } pmr;
568 uint8_t aer_mask;
569 NvmeRequest **aer_reqs;
570 QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
571 int aer_queued;
573 uint32_t dmrsl;
575 /* Namespace ID is started with 1 so bitmap should be 1-based */
576 #define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1)
577 DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
579 NvmeSubsystem *subsys;
581 NvmeNamespace namespace;
582 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
583 NvmeSQueue **sq;
584 NvmeCQueue **cq;
585 NvmeSQueue admin_sq;
586 NvmeCQueue admin_cq;
587 NvmeIdCtrl id_ctrl;
589 struct {
590 struct {
591 uint16_t temp_thresh_hi;
592 uint16_t temp_thresh_low;
595 uint32_t async_config;
596 NvmeHostBehaviorSupport hbs;
597 } features;
599 NvmePriCtrlCap pri_ctrl_cap;
600 NvmeSecCtrlList sec_ctrl_list;
601 struct {
602 uint16_t vqrfap;
603 uint16_t virfap;
604 } next_pri_ctrl_cap; /* These override pri_ctrl_cap after reset */
605 } NvmeCtrl;
607 typedef enum NvmeResetType {
608 NVME_RESET_FUNCTION = 0,
609 NVME_RESET_CONTROLLER = 1,
610 } NvmeResetType;
612 static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
614 if (!nsid || nsid > NVME_MAX_NAMESPACES) {
615 return NULL;
618 return n->namespaces[nsid];
621 static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
623 NvmeSQueue *sq = req->sq;
624 NvmeCtrl *n = sq->ctrl;
626 return n->cq[sq->cqid];
629 static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
631 NvmeSQueue *sq = req->sq;
632 return sq->ctrl;
635 static inline uint16_t nvme_cid(NvmeRequest *req)
637 if (!req) {
638 return 0xffff;
641 return le16_to_cpu(req->cqe.cid);
644 static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n)
646 PCIDevice *pci_dev = &n->parent_obj;
647 NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev));
649 if (pci_is_vf(pci_dev)) {
650 return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)];
653 return NULL;
656 static inline NvmeSecCtrlEntry *nvme_sctrl_for_cntlid(NvmeCtrl *n,
657 uint16_t cntlid)
659 NvmeSecCtrlList *list = &n->sec_ctrl_list;
660 uint8_t i;
662 for (i = 0; i < list->numcntl; i++) {
663 if (le16_to_cpu(list->sec[i].scid) == cntlid) {
664 return &list->sec[i];
668 return NULL;
671 void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
672 uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
673 NvmeTxDirection dir, NvmeRequest *req);
674 uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
675 NvmeTxDirection dir, NvmeRequest *req);
676 void nvme_rw_complete_cb(void *opaque, int ret);
677 uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
678 NvmeCmd *cmd);
680 #endif /* HW_NVME_NVME_H */