xref: /openbmc/qemu/hw/nvme/ns.c (revision 63dc36944383f70f1c7a20f6104966d8560300fa)
1 /*
2  * QEMU NVM Express Virtual Namespace
3  *
4  * Copyright (c) 2019 CNEX Labs
5  * Copyright (c) 2020 Samsung Electronics
6  *
7  * Authors:
8  *  Klaus Jensen      <k.jensen@samsung.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2. See the
11  * COPYING file in the top-level directory.
12  *
13  */
14 
15 #include "qemu/osdep.h"
16 #include "qemu/units.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "qapi/error.h"
20 #include "qemu/bitops.h"
21 #include "sysemu/sysemu.h"
22 #include "sysemu/block-backend.h"
23 
24 #include "nvme.h"
25 #include "trace.h"
26 
27 #define MIN_DISCARD_GRANULARITY (4 * KiB)
28 #define NVME_DEFAULT_ZONE_SIZE   (128 * MiB)
29 
30 void nvme_ns_init_format(NvmeNamespace *ns)
31 {
32     NvmeIdNs *id_ns = &ns->id_ns;
33     NvmeIdNsNvm *id_ns_nvm = &ns->id_ns_nvm;
34     BlockDriverInfo bdi;
35     int npdg, ret;
36     int64_t nlbas;
37 
38     ns->lbaf = id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)];
39     ns->lbasz = 1 << ns->lbaf.ds;
40 
41     nlbas = ns->size / (ns->lbasz + ns->lbaf.ms);
42 
43     id_ns->nsze = cpu_to_le64(nlbas);
44 
45     /* no thin provisioning */
46     id_ns->ncap = id_ns->nsze;
47     id_ns->nuse = id_ns->ncap;
48 
49     ns->moff = nlbas << ns->lbaf.ds;
50 
51     npdg = ns->blkconf.discard_granularity / ns->lbasz;
52 
53     ret = bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi);
54     if (ret >= 0 && bdi.cluster_size > ns->blkconf.discard_granularity) {
55         npdg = bdi.cluster_size / ns->lbasz;
56     }
57 
58     id_ns->npda = id_ns->npdg = npdg - 1;
59     id_ns_nvm->npdal = npdg;
60     id_ns_nvm->npdgl = npdg;
61 }
62 
63 static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
64 {
65     static uint64_t ns_count;
66     NvmeIdNs *id_ns = &ns->id_ns;
67     NvmeIdNsNvm *id_ns_nvm = &ns->id_ns_nvm;
68     NvmeIdNsInd *id_ns_ind = &ns->id_ns_ind;
69     uint8_t ds;
70     uint16_t ms;
71     int i;
72 
73     ns->csi = NVME_CSI_NVM;
74     ns->status = 0x0;
75 
76     ns->id_ns.dlfeat = 0x1;
77 
78     /* support DULBE and I/O optimization fields */
79     id_ns->nsfeat |= (NVME_ID_NS_NSFEAT_DAE | NVME_ID_NS_NSFEAT_OPTPERF_ALL);
80 
81     if (ns->params.shared) {
82         id_ns->nmic |= NVME_ID_NS_IND_NMIC_SHRNS;
83         id_ns_ind->nmic = NVME_ID_NS_IND_NMIC_SHRNS;
84         id_ns_ind->nstat = NVME_ID_NS_IND_NSTAT_NRDY;
85     }
86 
87     /* Substitute a missing EUI-64 by an autogenerated one */
88     ++ns_count;
89     if (!ns->params.eui64 && ns->params.eui64_default) {
90         ns->params.eui64 = ns_count + NVME_EUI64_DEFAULT;
91     }
92 
93     /* simple copy */
94     id_ns->mssrl = cpu_to_le16(ns->params.mssrl);
95     id_ns->mcl = cpu_to_le32(ns->params.mcl);
96     id_ns->msrc = ns->params.msrc;
97     id_ns->eui64 = cpu_to_be64(ns->params.eui64);
98     memcpy(&id_ns->nguid, &ns->params.nguid.data, sizeof(id_ns->nguid));
99 
100     ds = 31 - clz32(ns->blkconf.logical_block_size);
101     ms = ns->params.ms;
102 
103     id_ns->mc = NVME_ID_NS_MC_EXTENDED | NVME_ID_NS_MC_SEPARATE;
104 
105     if (ms && ns->params.mset) {
106         id_ns->flbas |= NVME_ID_NS_FLBAS_EXTENDED;
107     }
108 
109     id_ns->dpc = 0x1f;
110     id_ns->dps = ns->params.pi;
111     if (ns->params.pi && ns->params.pil) {
112         id_ns->dps |= NVME_ID_NS_DPS_FIRST_EIGHT;
113     }
114 
115     ns->pif = ns->params.pif;
116 
117     static const NvmeLBAF defaults[16] = {
118         [0] = { .ds =  9           },
119         [1] = { .ds =  9, .ms =  8 },
120         [2] = { .ds =  9, .ms = 16 },
121         [3] = { .ds =  9, .ms = 64 },
122         [4] = { .ds = 12           },
123         [5] = { .ds = 12, .ms =  8 },
124         [6] = { .ds = 12, .ms = 16 },
125         [7] = { .ds = 12, .ms = 64 },
126     };
127 
128     ns->nlbaf = 8;
129 
130     memcpy(&id_ns->lbaf, &defaults, sizeof(defaults));
131 
132     for (i = 0; i < ns->nlbaf; i++) {
133         NvmeLBAF *lbaf = &id_ns->lbaf[i];
134         if (lbaf->ds == ds) {
135             if (lbaf->ms == ms) {
136                 id_ns->flbas |= i;
137                 goto lbaf_found;
138             }
139         }
140     }
141 
142     /* add non-standard lba format */
143     id_ns->lbaf[ns->nlbaf].ds = ds;
144     id_ns->lbaf[ns->nlbaf].ms = ms;
145     ns->nlbaf++;
146 
147     id_ns->flbas |= i;
148 
149 
150 lbaf_found:
151     id_ns_nvm->elbaf[i] = (ns->pif & 0x3) << 7;
152     id_ns->nlbaf = ns->nlbaf - 1;
153     nvme_ns_init_format(ns);
154 
155     return 0;
156 }
157 
158 static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp)
159 {
160     bool read_only;
161 
162     if (!blkconf_blocksizes(&ns->blkconf, errp)) {
163         return -1;
164     }
165 
166     read_only = !blk_supports_write_perm(ns->blkconf.blk);
167     if (!blkconf_apply_backend_options(&ns->blkconf, read_only, false, errp)) {
168         return -1;
169     }
170 
171     if (ns->blkconf.discard_granularity == -1) {
172         ns->blkconf.discard_granularity =
173             MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY);
174     }
175 
176     ns->size = blk_getlength(ns->blkconf.blk);
177     if (ns->size < 0) {
178         error_setg_errno(errp, -ns->size, "could not get blockdev size");
179         return -1;
180     }
181 
182     return 0;
183 }
184 
185 static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp)
186 {
187     uint64_t zone_size, zone_cap;
188 
189     /* Make sure that the values of ZNS properties are sane */
190     if (ns->params.zone_size_bs) {
191         zone_size = ns->params.zone_size_bs;
192     } else {
193         zone_size = NVME_DEFAULT_ZONE_SIZE;
194     }
195     if (ns->params.zone_cap_bs) {
196         zone_cap = ns->params.zone_cap_bs;
197     } else {
198         zone_cap = zone_size;
199     }
200     if (zone_cap > zone_size) {
201         error_setg(errp, "zone capacity %"PRIu64"B exceeds "
202                    "zone size %"PRIu64"B", zone_cap, zone_size);
203         return -1;
204     }
205     if (zone_size < ns->lbasz) {
206         error_setg(errp, "zone size %"PRIu64"B too small, "
207                    "must be at least %zuB", zone_size, ns->lbasz);
208         return -1;
209     }
210     if (zone_cap < ns->lbasz) {
211         error_setg(errp, "zone capacity %"PRIu64"B too small, "
212                    "must be at least %zuB", zone_cap, ns->lbasz);
213         return -1;
214     }
215 
216     /*
217      * Save the main zone geometry values to avoid
218      * calculating them later again.
219      */
220     ns->zone_size = zone_size / ns->lbasz;
221     ns->zone_capacity = zone_cap / ns->lbasz;
222     ns->num_zones = le64_to_cpu(ns->id_ns.nsze) / ns->zone_size;
223 
224     /* Do a few more sanity checks of ZNS properties */
225     if (!ns->num_zones) {
226         error_setg(errp,
227                    "insufficient drive capacity, must be at least the size "
228                    "of one zone (%"PRIu64"B)", zone_size);
229         return -1;
230     }
231 
232     return 0;
233 }
234 
235 static void nvme_ns_zoned_init_state(NvmeNamespace *ns)
236 {
237     uint64_t start = 0, zone_size = ns->zone_size;
238     uint64_t capacity = ns->num_zones * zone_size;
239     NvmeZone *zone;
240     int i;
241 
242     ns->zone_array = g_new0(NvmeZone, ns->num_zones);
243     if (ns->params.zd_extension_size) {
244         ns->zd_extensions = g_malloc0(ns->params.zd_extension_size *
245                                       ns->num_zones);
246     }
247 
248     QTAILQ_INIT(&ns->exp_open_zones);
249     QTAILQ_INIT(&ns->imp_open_zones);
250     QTAILQ_INIT(&ns->closed_zones);
251     QTAILQ_INIT(&ns->full_zones);
252 
253     zone = ns->zone_array;
254     for (i = 0; i < ns->num_zones; i++, zone++) {
255         if (start + zone_size > capacity) {
256             zone_size = capacity - start;
257         }
258         zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE;
259         nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
260         zone->d.za = 0;
261         zone->d.zcap = ns->zone_capacity;
262         zone->d.zslba = start;
263         zone->d.wp = start;
264         zone->w_ptr = start;
265         start += zone_size;
266     }
267 
268     ns->zone_size_log2 = 0;
269     if (is_power_of_2(ns->zone_size)) {
270         ns->zone_size_log2 = 63 - clz64(ns->zone_size);
271     }
272 }
273 
274 static void nvme_ns_init_zoned(NvmeNamespace *ns)
275 {
276     NvmeIdNsZoned *id_ns_z;
277     int i;
278 
279     nvme_ns_zoned_init_state(ns);
280 
281     id_ns_z = g_new0(NvmeIdNsZoned, 1);
282 
283     /* MAR/MOR are zeroes-based, FFFFFFFFFh means no limit */
284     id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1);
285     id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1);
286     id_ns_z->zoc = 0;
287     id_ns_z->ozcs = ns->params.cross_zone_read ?
288         NVME_ID_NS_ZONED_OZCS_RAZB : 0x00;
289 
290     for (i = 0; i <= ns->id_ns.nlbaf; i++) {
291         id_ns_z->lbafe[i].zsze = cpu_to_le64(ns->zone_size);
292         id_ns_z->lbafe[i].zdes =
293             ns->params.zd_extension_size >> 6; /* Units of 64B */
294     }
295 
296     if (ns->params.zrwas) {
297         ns->zns.numzrwa = ns->params.numzrwa ?
298             ns->params.numzrwa : ns->num_zones;
299 
300         ns->zns.zrwas = ns->params.zrwas >> ns->lbaf.ds;
301         ns->zns.zrwafg = ns->params.zrwafg >> ns->lbaf.ds;
302 
303         id_ns_z->ozcs |= NVME_ID_NS_ZONED_OZCS_ZRWASUP;
304         id_ns_z->zrwacap = NVME_ID_NS_ZONED_ZRWACAP_EXPFLUSHSUP;
305 
306         id_ns_z->numzrwa = cpu_to_le32(ns->params.numzrwa);
307         id_ns_z->zrwas = cpu_to_le16(ns->zns.zrwas);
308         id_ns_z->zrwafg = cpu_to_le16(ns->zns.zrwafg);
309     }
310 
311     id_ns_z->ozcs = cpu_to_le16(id_ns_z->ozcs);
312 
313     ns->csi = NVME_CSI_ZONED;
314     ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size);
315     ns->id_ns.ncap = ns->id_ns.nsze;
316     ns->id_ns.nuse = ns->id_ns.ncap;
317 
318     /*
319      * The device uses the BDRV_BLOCK_ZERO flag to determine the "deallocated"
320      * status of logical blocks. Since the spec defines that logical blocks
321      * SHALL be deallocated when then zone is in the Empty or Offline states,
322      * we can only support DULBE if the zone size is a multiple of the
323      * calculated NPDG.
324      */
325     if (ns->zone_size % (ns->id_ns.npdg + 1)) {
326         warn_report("the zone size (%"PRIu64" blocks) is not a multiple of "
327                     "the calculated deallocation granularity (%d blocks); "
328                     "DULBE support disabled",
329                     ns->zone_size, ns->id_ns.npdg + 1);
330 
331         ns->id_ns.nsfeat &= ~0x4;
332     }
333 
334     ns->id_ns_zoned = id_ns_z;
335 }
336 
337 static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone)
338 {
339     uint8_t state;
340 
341     zone->w_ptr = zone->d.wp;
342     state = nvme_get_zone_state(zone);
343     if (zone->d.wp != zone->d.zslba ||
344         (zone->d.za & NVME_ZA_ZD_EXT_VALID)) {
345         if (state != NVME_ZONE_STATE_CLOSED) {
346             trace_pci_nvme_clear_ns_close(state, zone->d.zslba);
347             nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED);
348         }
349         nvme_aor_inc_active(ns);
350         QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry);
351     } else {
352         trace_pci_nvme_clear_ns_reset(state, zone->d.zslba);
353         if (zone->d.za & NVME_ZA_ZRWA_VALID) {
354             zone->d.za &= ~NVME_ZA_ZRWA_VALID;
355             ns->zns.numzrwa++;
356         }
357         nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
358     }
359 }
360 
361 /*
362  * Close all the zones that are currently open.
363  */
364 static void nvme_zoned_ns_shutdown(NvmeNamespace *ns)
365 {
366     NvmeZone *zone, *next;
367 
368     QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
369         QTAILQ_REMOVE(&ns->closed_zones, zone, entry);
370         nvme_aor_dec_active(ns);
371         nvme_clear_zone(ns, zone);
372     }
373     QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
374         QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
375         nvme_aor_dec_open(ns);
376         nvme_aor_dec_active(ns);
377         nvme_clear_zone(ns, zone);
378     }
379     QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
380         QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
381         nvme_aor_dec_open(ns);
382         nvme_aor_dec_active(ns);
383         nvme_clear_zone(ns, zone);
384     }
385 
386     assert(ns->nr_open_zones == 0);
387 }
388 
389 static NvmeRuHandle *nvme_find_ruh_by_attr(NvmeEnduranceGroup *endgrp,
390                                            uint8_t ruha, uint16_t *ruhid)
391 {
392     for (uint16_t i = 0; i < endgrp->fdp.nruh; i++) {
393         NvmeRuHandle *ruh = &endgrp->fdp.ruhs[i];
394 
395         if (ruh->ruha == ruha) {
396             *ruhid = i;
397             return ruh;
398         }
399     }
400 
401     return NULL;
402 }
403 
404 static bool nvme_ns_init_fdp(NvmeNamespace *ns, Error **errp)
405 {
406     NvmeEnduranceGroup *endgrp = ns->endgrp;
407     NvmeRuHandle *ruh;
408     uint8_t lbafi = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
409     g_autofree unsigned int *ruhids = NULL;
410     unsigned int n, m, *ruhid;
411     const char *endptr, *token;
412     char *r, *p;
413     uint16_t *ph;
414 
415     if (!ns->params.fdp.ruhs) {
416         ns->fdp.nphs = 1;
417         ph = ns->fdp.phs = g_new(uint16_t, 1);
418 
419         ruh = nvme_find_ruh_by_attr(endgrp, NVME_RUHA_CTRL, ph);
420         if (!ruh) {
421             ruh = nvme_find_ruh_by_attr(endgrp, NVME_RUHA_UNUSED, ph);
422             if (!ruh) {
423                 error_setg(errp, "no unused reclaim unit handles left");
424                 return false;
425             }
426 
427             ruh->ruha = NVME_RUHA_CTRL;
428             ruh->lbafi = lbafi;
429             ruh->ruamw = endgrp->fdp.runs >> ns->lbaf.ds;
430 
431             for (uint16_t rg = 0; rg < endgrp->fdp.nrg; rg++) {
432                 ruh->rus[rg].ruamw = ruh->ruamw;
433             }
434         } else if (ruh->lbafi != lbafi) {
435             error_setg(errp, "lba format index of controller assigned "
436                        "reclaim unit handle does not match namespace lba "
437                        "format index");
438             return false;
439         }
440 
441         return true;
442     }
443 
444     ruhid = ruhids = g_new0(unsigned int, endgrp->fdp.nruh);
445     r = p = strdup(ns->params.fdp.ruhs);
446 
447     /* parse the placement handle identifiers */
448     while ((token = qemu_strsep(&p, ";")) != NULL) {
449         if (qemu_strtoui(token, &endptr, 0, &n) < 0) {
450             error_setg(errp, "cannot parse reclaim unit handle identifier");
451             free(r);
452             return false;
453         }
454 
455         m = n;
456 
457         /* parse range */
458         if (*endptr == '-') {
459             token = endptr + 1;
460 
461             if (qemu_strtoui(token, NULL, 0, &m) < 0) {
462                 error_setg(errp, "cannot parse reclaim unit handle identifier");
463                 free(r);
464                 return false;
465             }
466 
467             if (m < n) {
468                 error_setg(errp, "invalid reclaim unit handle identifier range");
469                 free(r);
470                 return false;
471             }
472         }
473 
474         for (; n <= m; n++) {
475             if (ns->fdp.nphs++ == endgrp->fdp.nruh) {
476                 error_setg(errp, "too many placement handles");
477                 free(r);
478                 return false;
479             }
480 
481             *ruhid++ = n;
482         }
483     }
484 
485     free(r);
486 
487     /* verify that the ruhids are unique */
488     for (unsigned int i = 0; i < ns->fdp.nphs; i++) {
489         for (unsigned int j = i + 1; j < ns->fdp.nphs; j++) {
490             if (ruhids[i] == ruhids[j]) {
491                 error_setg(errp, "duplicate reclaim unit handle identifier: %u",
492                            ruhids[i]);
493                 return false;
494             }
495         }
496     }
497 
498     ph = ns->fdp.phs = g_new(uint16_t, ns->fdp.nphs);
499 
500     ruhid = ruhids;
501 
502     /* verify the identifiers */
503     for (unsigned int i = 0; i < ns->fdp.nphs; i++, ruhid++, ph++) {
504         if (*ruhid >= endgrp->fdp.nruh) {
505             error_setg(errp, "invalid reclaim unit handle identifier");
506             return false;
507         }
508 
509         ruh = &endgrp->fdp.ruhs[*ruhid];
510 
511         switch (ruh->ruha) {
512         case NVME_RUHA_UNUSED:
513             ruh->ruha = NVME_RUHA_HOST;
514             ruh->lbafi = lbafi;
515             ruh->ruamw = endgrp->fdp.runs >> ns->lbaf.ds;
516 
517             for (uint16_t rg = 0; rg < endgrp->fdp.nrg; rg++) {
518                 ruh->rus[rg].ruamw = ruh->ruamw;
519             }
520 
521             break;
522 
523         case NVME_RUHA_HOST:
524             if (ruh->lbafi != lbafi) {
525                 error_setg(errp, "lba format index of host assigned"
526                            "reclaim unit handle does not match namespace "
527                            "lba format index");
528                 return false;
529             }
530 
531             break;
532 
533         case NVME_RUHA_CTRL:
534             error_setg(errp, "reclaim unit handle is controller assigned");
535             return false;
536 
537         default:
538             abort();
539         }
540 
541         *ph = *ruhid;
542     }
543 
544     return true;
545 }
546 
547 static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
548 {
549     unsigned int pi_size;
550 
551     if (!ns->blkconf.blk) {
552         error_setg(errp, "block backend not configured");
553         return -1;
554     }
555 
556     if (ns->params.pi) {
557         if (ns->params.pi > NVME_ID_NS_DPS_TYPE_3) {
558             error_setg(errp, "invalid 'pi' value");
559             return -1;
560         }
561 
562         switch (ns->params.pif) {
563         case NVME_PI_GUARD_16:
564             pi_size = 8;
565             break;
566         case NVME_PI_GUARD_64:
567             pi_size = 16;
568             break;
569         default:
570             error_setg(errp, "invalid 'pif'");
571             return -1;
572         }
573 
574         if (ns->params.ms < pi_size) {
575             error_setg(errp, "at least %u bytes of metadata required to "
576                        "enable protection information", pi_size);
577             return -1;
578         }
579     }
580 
581     if (ns->params.nsid > NVME_MAX_NAMESPACES) {
582         error_setg(errp, "invalid namespace id (must be between 0 and %d)",
583                    NVME_MAX_NAMESPACES);
584         return -1;
585     }
586 
587     if (ns->params.zoned && ns->endgrp && ns->endgrp->fdp.enabled) {
588         error_setg(errp, "cannot be a zoned- in an FDP configuration");
589         return -1;
590     }
591 
592     if (ns->params.zoned) {
593         if (ns->params.max_active_zones) {
594             if (ns->params.max_open_zones > ns->params.max_active_zones) {
595                 error_setg(errp, "max_open_zones (%u) exceeds "
596                            "max_active_zones (%u)", ns->params.max_open_zones,
597                            ns->params.max_active_zones);
598                 return -1;
599             }
600 
601             if (!ns->params.max_open_zones) {
602                 ns->params.max_open_zones = ns->params.max_active_zones;
603             }
604         }
605 
606         if (ns->params.zd_extension_size) {
607             if (ns->params.zd_extension_size & 0x3f) {
608                 error_setg(errp, "zone descriptor extension size must be a "
609                            "multiple of 64B");
610                 return -1;
611             }
612             if ((ns->params.zd_extension_size >> 6) > 0xff) {
613                 error_setg(errp,
614                            "zone descriptor extension size is too large");
615                 return -1;
616             }
617         }
618 
619         if (ns->params.zrwas) {
620             if (ns->params.zrwas % ns->blkconf.logical_block_size) {
621                 error_setg(errp, "zone random write area size (zoned.zrwas "
622                            "%"PRIu64") must be a multiple of the logical "
623                            "block size (logical_block_size %"PRIu32")",
624                            ns->params.zrwas, ns->blkconf.logical_block_size);
625                 return -1;
626             }
627 
628             if (ns->params.zrwafg == -1) {
629                 ns->params.zrwafg = ns->blkconf.logical_block_size;
630             }
631 
632             if (ns->params.zrwas % ns->params.zrwafg) {
633                 error_setg(errp, "zone random write area size (zoned.zrwas "
634                            "%"PRIu64") must be a multiple of the zone random "
635                            "write area flush granularity (zoned.zrwafg, "
636                            "%"PRIu64")", ns->params.zrwas, ns->params.zrwafg);
637                 return -1;
638             }
639 
640             if (ns->params.max_active_zones) {
641                 if (ns->params.numzrwa > ns->params.max_active_zones) {
642                     error_setg(errp, "number of zone random write area "
643                                "resources (zoned.numzrwa, %d) must be less "
644                                "than or equal to maximum active resources "
645                                "(zoned.max_active_zones, %d)",
646                                ns->params.numzrwa,
647                                ns->params.max_active_zones);
648                     return -1;
649                 }
650             }
651         }
652     }
653 
654     return 0;
655 }
656 
657 int nvme_ns_setup(NvmeNamespace *ns, Error **errp)
658 {
659     if (nvme_ns_check_constraints(ns, errp)) {
660         return -1;
661     }
662 
663     if (nvme_ns_init_blk(ns, errp)) {
664         return -1;
665     }
666 
667     if (nvme_ns_init(ns, errp)) {
668         return -1;
669     }
670     if (ns->params.zoned) {
671         if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) {
672             return -1;
673         }
674         nvme_ns_init_zoned(ns);
675     }
676 
677     if (ns->endgrp && ns->endgrp->fdp.enabled) {
678         if (!nvme_ns_init_fdp(ns, errp)) {
679             return -1;
680         }
681     }
682 
683     return 0;
684 }
685 
686 void nvme_ns_drain(NvmeNamespace *ns)
687 {
688     blk_drain(ns->blkconf.blk);
689 }
690 
691 void nvme_ns_shutdown(NvmeNamespace *ns)
692 {
693     blk_flush(ns->blkconf.blk);
694     if (ns->params.zoned) {
695         nvme_zoned_ns_shutdown(ns);
696     }
697 }
698 
699 void nvme_ns_cleanup(NvmeNamespace *ns)
700 {
701     if (ns->params.zoned) {
702         g_free(ns->id_ns_zoned);
703         g_free(ns->zone_array);
704         g_free(ns->zd_extensions);
705     }
706 
707     if (ns->endgrp && ns->endgrp->fdp.enabled) {
708         g_free(ns->fdp.phs);
709     }
710 }
711 
712 static void nvme_ns_unrealize(DeviceState *dev)
713 {
714     NvmeNamespace *ns = NVME_NS(dev);
715 
716     nvme_ns_drain(ns);
717     nvme_ns_shutdown(ns);
718     nvme_ns_cleanup(ns);
719 }
720 
721 static void nvme_ns_realize(DeviceState *dev, Error **errp)
722 {
723     NvmeNamespace *ns = NVME_NS(dev);
724     BusState *s = qdev_get_parent_bus(dev);
725     NvmeCtrl *n = NVME(s->parent);
726     NvmeSubsystem *subsys = n->subsys;
727     uint32_t nsid = ns->params.nsid;
728     int i;
729 
730     if (!n->subsys) {
731         /* If no subsys, the ns cannot be attached to more than one ctrl. */
732         ns->params.shared = false;
733         if (ns->params.detached) {
734             error_setg(errp, "detached requires that the nvme device is "
735                        "linked to an nvme-subsys device");
736             return;
737         }
738     } else {
739         /*
740          * If this namespace belongs to a subsystem (through a link on the
741          * controller device), reparent the device.
742          */
743         if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
744             return;
745         }
746         ns->subsys = subsys;
747         ns->endgrp = &subsys->endgrp;
748     }
749 
750     if (nvme_ns_setup(ns, errp)) {
751         return;
752     }
753 
754     if (!nsid) {
755         for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
756             if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) {
757                 continue;
758             }
759 
760             nsid = ns->params.nsid = i;
761             break;
762         }
763 
764         if (!nsid) {
765             error_setg(errp, "no free namespace id");
766             return;
767         }
768     } else {
769         if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) {
770             error_setg(errp, "namespace id '%d' already allocated", nsid);
771             return;
772         }
773     }
774 
775     if (subsys) {
776         subsys->namespaces[nsid] = ns;
777 
778         ns->id_ns.endgid = cpu_to_le16(0x1);
779         ns->id_ns_ind.endgrpid = cpu_to_le16(0x1);
780 
781         if (ns->params.detached) {
782             return;
783         }
784 
785         if (ns->params.shared) {
786             for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
787                 NvmeCtrl *ctrl = subsys->ctrls[i];
788 
789                 if (ctrl && ctrl != SUBSYS_SLOT_RSVD) {
790                     nvme_attach_ns(ctrl, ns);
791                 }
792             }
793 
794             return;
795         }
796 
797     }
798 
799     nvme_attach_ns(n, ns);
800 }
801 
802 static Property nvme_ns_props[] = {
803     DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
804     DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false),
805     DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, true),
806     DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
807     DEFINE_PROP_UUID_NODEFAULT("uuid", NvmeNamespace, params.uuid),
808     DEFINE_PROP_NGUID_NODEFAULT("nguid", NvmeNamespace, params.nguid),
809     DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0),
810     DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0),
811     DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0),
812     DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0),
813     DEFINE_PROP_UINT8("pil", NvmeNamespace, params.pil, 0),
814     DEFINE_PROP_UINT8("pif", NvmeNamespace, params.pif, 0),
815     DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128),
816     DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128),
817     DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127),
818     DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
819     DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs,
820                      NVME_DEFAULT_ZONE_SIZE),
821     DEFINE_PROP_SIZE("zoned.zone_capacity", NvmeNamespace, params.zone_cap_bs,
822                      0),
823     DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace,
824                      params.cross_zone_read, false),
825     DEFINE_PROP_UINT32("zoned.max_active", NvmeNamespace,
826                        params.max_active_zones, 0),
827     DEFINE_PROP_UINT32("zoned.max_open", NvmeNamespace,
828                        params.max_open_zones, 0),
829     DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace,
830                        params.zd_extension_size, 0),
831     DEFINE_PROP_UINT32("zoned.numzrwa", NvmeNamespace, params.numzrwa, 0),
832     DEFINE_PROP_SIZE("zoned.zrwas", NvmeNamespace, params.zrwas, 0),
833     DEFINE_PROP_SIZE("zoned.zrwafg", NvmeNamespace, params.zrwafg, -1),
834     DEFINE_PROP_BOOL("eui64-default", NvmeNamespace, params.eui64_default,
835                      false),
836     DEFINE_PROP_STRING("fdp.ruhs", NvmeNamespace, params.fdp.ruhs),
837     DEFINE_PROP_END_OF_LIST(),
838 };
839 
840 static void nvme_ns_class_init(ObjectClass *oc, void *data)
841 {
842     DeviceClass *dc = DEVICE_CLASS(oc);
843 
844     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
845 
846     dc->bus_type = TYPE_NVME_BUS;
847     dc->realize = nvme_ns_realize;
848     dc->unrealize = nvme_ns_unrealize;
849     device_class_set_props(dc, nvme_ns_props);
850     dc->desc = "Virtual NVMe namespace";
851 }
852 
853 static void nvme_ns_instance_init(Object *obj)
854 {
855     NvmeNamespace *ns = NVME_NS(obj);
856     char *bootindex = g_strdup_printf("/namespace@%d,0", ns->params.nsid);
857 
858     device_add_bootindex_property(obj, &ns->bootindex, "bootindex",
859                                   bootindex, DEVICE(obj));
860 
861     g_free(bootindex);
862 }
863 
864 static const TypeInfo nvme_ns_info = {
865     .name = TYPE_NVME_NS,
866     .parent = TYPE_DEVICE,
867     .class_init = nvme_ns_class_init,
868     .instance_size = sizeof(NvmeNamespace),
869     .instance_init = nvme_ns_instance_init,
870 };
871 
872 static void nvme_ns_register_types(void)
873 {
874     type_register_static(&nvme_ns_info);
875 }
876 
877 type_init(nvme_ns_register_types)
878