hw/block/nvme: remove non-shared defines from header file
[qemu/rayw.git] / hw / block / nvme-ns.c
blob93aaf6de02afd87328c9ac255038826817b0e10d
1 /*
2 * QEMU NVM Express Virtual Namespace
4 * Copyright (c) 2019 CNEX Labs
5 * Copyright (c) 2020 Samsung Electronics
7 * Authors:
8 * Klaus Jensen <k.jensen@samsung.com>
10 * This work is licensed under the terms of the GNU GPL, version 2. See the
11 * COPYING file in the top-level directory.
15 #include "qemu/osdep.h"
16 #include "qemu/units.h"
17 #include "qemu/error-report.h"
18 #include "qapi/error.h"
19 #include "sysemu/sysemu.h"
20 #include "sysemu/block-backend.h"
22 #include "nvme.h"
23 #include "trace.h"
25 #define MIN_DISCARD_GRANULARITY (4 * KiB)
26 #define NVME_DEFAULT_ZONE_SIZE (128 * MiB)
28 void nvme_ns_init_format(NvmeNamespace *ns)
30 NvmeIdNs *id_ns = &ns->id_ns;
31 BlockDriverInfo bdi;
32 int npdg, nlbas, ret;
34 nlbas = nvme_ns_nlbas(ns);
36 id_ns->nsze = cpu_to_le64(nlbas);
38 /* no thin provisioning */
39 id_ns->ncap = id_ns->nsze;
40 id_ns->nuse = id_ns->ncap;
42 ns->mdata_offset = nvme_l2b(ns, nlbas);
44 npdg = ns->blkconf.discard_granularity / nvme_lsize(ns);
46 ret = bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi);
47 if (ret >= 0 && bdi.cluster_size > ns->blkconf.discard_granularity) {
48 npdg = bdi.cluster_size / nvme_lsize(ns);
51 id_ns->npda = id_ns->npdg = npdg - 1;
54 static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
56 NvmeIdNs *id_ns = &ns->id_ns;
57 uint8_t ds;
58 uint16_t ms;
59 int i;
61 ns->csi = NVME_CSI_NVM;
62 ns->status = 0x0;
64 ns->id_ns.dlfeat = 0x1;
66 /* support DULBE and I/O optimization fields */
67 id_ns->nsfeat |= (0x4 | 0x10);
69 if (ns->params.shared) {
70 id_ns->nmic |= NVME_NMIC_NS_SHARED;
73 /* simple copy */
74 id_ns->mssrl = cpu_to_le16(ns->params.mssrl);
75 id_ns->mcl = cpu_to_le32(ns->params.mcl);
76 id_ns->msrc = ns->params.msrc;
78 ds = 31 - clz32(ns->blkconf.logical_block_size);
79 ms = ns->params.ms;
81 if (ns->params.ms) {
82 id_ns->mc = 0x3;
84 if (ns->params.mset) {
85 id_ns->flbas |= 0x10;
88 id_ns->dpc = 0x1f;
89 id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi;
91 NvmeLBAF lbaf[16] = {
92 [0] = { .ds = 9 },
93 [1] = { .ds = 9, .ms = 8 },
94 [2] = { .ds = 9, .ms = 16 },
95 [3] = { .ds = 9, .ms = 64 },
96 [4] = { .ds = 12 },
97 [5] = { .ds = 12, .ms = 8 },
98 [6] = { .ds = 12, .ms = 16 },
99 [7] = { .ds = 12, .ms = 64 },
102 memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf));
103 id_ns->nlbaf = 7;
104 } else {
105 NvmeLBAF lbaf[16] = {
106 [0] = { .ds = 9 },
107 [1] = { .ds = 12 },
110 memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf));
111 id_ns->nlbaf = 1;
114 for (i = 0; i <= id_ns->nlbaf; i++) {
115 NvmeLBAF *lbaf = &id_ns->lbaf[i];
116 if (lbaf->ds == ds) {
117 if (lbaf->ms == ms) {
118 id_ns->flbas |= i;
119 goto lbaf_found;
124 /* add non-standard lba format */
125 id_ns->nlbaf++;
126 id_ns->lbaf[id_ns->nlbaf].ds = ds;
127 id_ns->lbaf[id_ns->nlbaf].ms = ms;
128 id_ns->flbas |= id_ns->nlbaf;
130 lbaf_found:
131 nvme_ns_init_format(ns);
133 return 0;
136 static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp)
138 bool read_only;
140 if (!blkconf_blocksizes(&ns->blkconf, errp)) {
141 return -1;
144 read_only = !blk_supports_write_perm(ns->blkconf.blk);
145 if (!blkconf_apply_backend_options(&ns->blkconf, read_only, false, errp)) {
146 return -1;
149 if (ns->blkconf.discard_granularity == -1) {
150 ns->blkconf.discard_granularity =
151 MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY);
154 ns->size = blk_getlength(ns->blkconf.blk);
155 if (ns->size < 0) {
156 error_setg_errno(errp, -ns->size, "could not get blockdev size");
157 return -1;
160 return 0;
163 static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp)
165 uint64_t zone_size, zone_cap;
166 uint32_t lbasz = nvme_lsize(ns);
168 /* Make sure that the values of ZNS properties are sane */
169 if (ns->params.zone_size_bs) {
170 zone_size = ns->params.zone_size_bs;
171 } else {
172 zone_size = NVME_DEFAULT_ZONE_SIZE;
174 if (ns->params.zone_cap_bs) {
175 zone_cap = ns->params.zone_cap_bs;
176 } else {
177 zone_cap = zone_size;
179 if (zone_cap > zone_size) {
180 error_setg(errp, "zone capacity %"PRIu64"B exceeds "
181 "zone size %"PRIu64"B", zone_cap, zone_size);
182 return -1;
184 if (zone_size < lbasz) {
185 error_setg(errp, "zone size %"PRIu64"B too small, "
186 "must be at least %"PRIu32"B", zone_size, lbasz);
187 return -1;
189 if (zone_cap < lbasz) {
190 error_setg(errp, "zone capacity %"PRIu64"B too small, "
191 "must be at least %"PRIu32"B", zone_cap, lbasz);
192 return -1;
196 * Save the main zone geometry values to avoid
197 * calculating them later again.
199 ns->zone_size = zone_size / lbasz;
200 ns->zone_capacity = zone_cap / lbasz;
201 ns->num_zones = nvme_ns_nlbas(ns) / ns->zone_size;
203 /* Do a few more sanity checks of ZNS properties */
204 if (!ns->num_zones) {
205 error_setg(errp,
206 "insufficient drive capacity, must be at least the size "
207 "of one zone (%"PRIu64"B)", zone_size);
208 return -1;
211 if (ns->params.max_open_zones > ns->num_zones) {
212 error_setg(errp,
213 "max_open_zones value %u exceeds the number of zones %u",
214 ns->params.max_open_zones, ns->num_zones);
215 return -1;
217 if (ns->params.max_active_zones > ns->num_zones) {
218 error_setg(errp,
219 "max_active_zones value %u exceeds the number of zones %u",
220 ns->params.max_active_zones, ns->num_zones);
221 return -1;
224 if (ns->params.max_active_zones) {
225 if (ns->params.max_open_zones > ns->params.max_active_zones) {
226 error_setg(errp, "max_open_zones (%u) exceeds max_active_zones (%u)",
227 ns->params.max_open_zones, ns->params.max_active_zones);
228 return -1;
231 if (!ns->params.max_open_zones) {
232 ns->params.max_open_zones = ns->params.max_active_zones;
236 if (ns->params.zd_extension_size) {
237 if (ns->params.zd_extension_size & 0x3f) {
238 error_setg(errp,
239 "zone descriptor extension size must be a multiple of 64B");
240 return -1;
242 if ((ns->params.zd_extension_size >> 6) > 0xff) {
243 error_setg(errp, "zone descriptor extension size is too large");
244 return -1;
248 return 0;
251 static void nvme_ns_zoned_init_state(NvmeNamespace *ns)
253 uint64_t start = 0, zone_size = ns->zone_size;
254 uint64_t capacity = ns->num_zones * zone_size;
255 NvmeZone *zone;
256 int i;
258 ns->zone_array = g_new0(NvmeZone, ns->num_zones);
259 if (ns->params.zd_extension_size) {
260 ns->zd_extensions = g_malloc0(ns->params.zd_extension_size *
261 ns->num_zones);
264 QTAILQ_INIT(&ns->exp_open_zones);
265 QTAILQ_INIT(&ns->imp_open_zones);
266 QTAILQ_INIT(&ns->closed_zones);
267 QTAILQ_INIT(&ns->full_zones);
269 zone = ns->zone_array;
270 for (i = 0; i < ns->num_zones; i++, zone++) {
271 if (start + zone_size > capacity) {
272 zone_size = capacity - start;
274 zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE;
275 nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
276 zone->d.za = 0;
277 zone->d.zcap = ns->zone_capacity;
278 zone->d.zslba = start;
279 zone->d.wp = start;
280 zone->w_ptr = start;
281 start += zone_size;
284 ns->zone_size_log2 = 0;
285 if (is_power_of_2(ns->zone_size)) {
286 ns->zone_size_log2 = 63 - clz64(ns->zone_size);
290 static void nvme_ns_init_zoned(NvmeNamespace *ns)
292 NvmeIdNsZoned *id_ns_z;
293 int i;
295 nvme_ns_zoned_init_state(ns);
297 id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned));
299 /* MAR/MOR are zeroes-based, FFFFFFFFFh means no limit */
300 id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1);
301 id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1);
302 id_ns_z->zoc = 0;
303 id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00;
305 for (i = 0; i <= ns->id_ns.nlbaf; i++) {
306 id_ns_z->lbafe[i].zsze = cpu_to_le64(ns->zone_size);
307 id_ns_z->lbafe[i].zdes =
308 ns->params.zd_extension_size >> 6; /* Units of 64B */
311 ns->csi = NVME_CSI_ZONED;
312 ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size);
313 ns->id_ns.ncap = ns->id_ns.nsze;
314 ns->id_ns.nuse = ns->id_ns.ncap;
317 * The device uses the BDRV_BLOCK_ZERO flag to determine the "deallocated"
318 * status of logical blocks. Since the spec defines that logical blocks
319 * SHALL be deallocated when then zone is in the Empty or Offline states,
320 * we can only support DULBE if the zone size is a multiple of the
321 * calculated NPDG.
323 if (ns->zone_size % (ns->id_ns.npdg + 1)) {
324 warn_report("the zone size (%"PRIu64" blocks) is not a multiple of "
325 "the calculated deallocation granularity (%d blocks); "
326 "DULBE support disabled",
327 ns->zone_size, ns->id_ns.npdg + 1);
329 ns->id_ns.nsfeat &= ~0x4;
332 ns->id_ns_zoned = id_ns_z;
335 static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone)
337 uint8_t state;
339 zone->w_ptr = zone->d.wp;
340 state = nvme_get_zone_state(zone);
341 if (zone->d.wp != zone->d.zslba ||
342 (zone->d.za & NVME_ZA_ZD_EXT_VALID)) {
343 if (state != NVME_ZONE_STATE_CLOSED) {
344 trace_pci_nvme_clear_ns_close(state, zone->d.zslba);
345 nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED);
347 nvme_aor_inc_active(ns);
348 QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry);
349 } else {
350 trace_pci_nvme_clear_ns_reset(state, zone->d.zslba);
351 nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
356 * Close all the zones that are currently open.
358 static void nvme_zoned_ns_shutdown(NvmeNamespace *ns)
360 NvmeZone *zone, *next;
362 QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
363 QTAILQ_REMOVE(&ns->closed_zones, zone, entry);
364 nvme_aor_dec_active(ns);
365 nvme_clear_zone(ns, zone);
367 QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
368 QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
369 nvme_aor_dec_open(ns);
370 nvme_aor_dec_active(ns);
371 nvme_clear_zone(ns, zone);
373 QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
374 QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
375 nvme_aor_dec_open(ns);
376 nvme_aor_dec_active(ns);
377 nvme_clear_zone(ns, zone);
380 assert(ns->nr_open_zones == 0);
383 static int nvme_ns_check_constraints(NvmeCtrl *n, NvmeNamespace *ns,
384 Error **errp)
386 if (!ns->blkconf.blk) {
387 error_setg(errp, "block backend not configured");
388 return -1;
391 if (ns->params.pi && ns->params.ms < 8) {
392 error_setg(errp, "at least 8 bytes of metadata required to enable "
393 "protection information");
394 return -1;
397 if (ns->params.nsid > NVME_MAX_NAMESPACES) {
398 error_setg(errp, "invalid namespace id (must be between 0 and %d)",
399 NVME_MAX_NAMESPACES);
400 return -1;
403 if (!n->subsys) {
404 if (ns->params.detached) {
405 error_setg(errp, "detached requires that the nvme device is "
406 "linked to an nvme-subsys device");
407 return -1;
410 if (ns->params.shared) {
411 error_setg(errp, "shared requires that the nvme device is "
412 "linked to an nvme-subsys device");
413 return -1;
417 return 0;
420 int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
422 if (nvme_ns_check_constraints(n, ns, errp)) {
423 return -1;
426 if (nvme_ns_init_blk(ns, errp)) {
427 return -1;
430 if (nvme_ns_init(ns, errp)) {
431 return -1;
433 if (ns->params.zoned) {
434 if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) {
435 return -1;
437 nvme_ns_init_zoned(ns);
440 return 0;
443 void nvme_ns_drain(NvmeNamespace *ns)
445 blk_drain(ns->blkconf.blk);
448 void nvme_ns_shutdown(NvmeNamespace *ns)
450 blk_flush(ns->blkconf.blk);
451 if (ns->params.zoned) {
452 nvme_zoned_ns_shutdown(ns);
456 void nvme_ns_cleanup(NvmeNamespace *ns)
458 if (ns->params.zoned) {
459 g_free(ns->id_ns_zoned);
460 g_free(ns->zone_array);
461 g_free(ns->zd_extensions);
465 static void nvme_ns_realize(DeviceState *dev, Error **errp)
467 NvmeNamespace *ns = NVME_NS(dev);
468 BusState *s = qdev_get_parent_bus(dev);
469 NvmeCtrl *n = NVME(s->parent);
470 NvmeSubsystem *subsys = n->subsys;
471 uint32_t nsid = ns->params.nsid;
472 int i;
474 if (nvme_ns_setup(n, ns, errp)) {
475 return;
478 if (!nsid) {
479 for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
480 if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) {
481 continue;
484 nsid = ns->params.nsid = i;
485 break;
488 if (!nsid) {
489 error_setg(errp, "no free namespace id");
490 return;
492 } else {
493 if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) {
494 error_setg(errp, "namespace id '%d' already allocated", nsid);
495 return;
499 if (subsys) {
500 subsys->namespaces[nsid] = ns;
502 if (ns->params.detached) {
503 return;
506 if (ns->params.shared) {
507 for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
508 NvmeCtrl *ctrl = subsys->ctrls[i];
510 if (ctrl) {
511 nvme_attach_ns(ctrl, ns);
515 return;
519 nvme_attach_ns(n, ns);
522 static Property nvme_ns_props[] = {
523 DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
524 DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false),
525 DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, false),
526 DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
527 DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
528 DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0),
529 DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0),
530 DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0),
531 DEFINE_PROP_UINT8("pil", NvmeNamespace, params.pil, 0),
532 DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128),
533 DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128),
534 DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127),
535 DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
536 DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs,
537 NVME_DEFAULT_ZONE_SIZE),
538 DEFINE_PROP_SIZE("zoned.zone_capacity", NvmeNamespace, params.zone_cap_bs,
540 DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace,
541 params.cross_zone_read, false),
542 DEFINE_PROP_UINT32("zoned.max_active", NvmeNamespace,
543 params.max_active_zones, 0),
544 DEFINE_PROP_UINT32("zoned.max_open", NvmeNamespace,
545 params.max_open_zones, 0),
546 DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace,
547 params.zd_extension_size, 0),
548 DEFINE_PROP_END_OF_LIST(),
551 static void nvme_ns_class_init(ObjectClass *oc, void *data)
553 DeviceClass *dc = DEVICE_CLASS(oc);
555 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
557 dc->bus_type = TYPE_NVME_BUS;
558 dc->realize = nvme_ns_realize;
559 device_class_set_props(dc, nvme_ns_props);
560 dc->desc = "Virtual NVMe namespace";
563 static void nvme_ns_instance_init(Object *obj)
565 NvmeNamespace *ns = NVME_NS(obj);
566 char *bootindex = g_strdup_printf("/namespace@%d,0", ns->params.nsid);
568 device_add_bootindex_property(obj, &ns->bootindex, "bootindex",
569 bootindex, DEVICE(obj));
571 g_free(bootindex);
574 static const TypeInfo nvme_ns_info = {
575 .name = TYPE_NVME_NS,
576 .parent = TYPE_DEVICE,
577 .class_init = nvme_ns_class_init,
578 .instance_size = sizeof(NvmeNamespace),
579 .instance_init = nvme_ns_instance_init,
582 static void nvme_ns_register_types(void)
584 type_register_static(&nvme_ns_info);
587 type_init(nvme_ns_register_types)