188eea45cSKlaus Jensen /*
288eea45cSKlaus Jensen * QEMU NVM Express Subsystem: nvme-subsys
388eea45cSKlaus Jensen *
488eea45cSKlaus Jensen * Copyright (c) 2021 Minwoo Im <minwoo.im.dev@gmail.com>
588eea45cSKlaus Jensen *
688eea45cSKlaus Jensen * This code is licensed under the GNU GPL v2. Refer COPYING.
788eea45cSKlaus Jensen */
888eea45cSKlaus Jensen
988eea45cSKlaus Jensen #include "qemu/osdep.h"
1073064edfSJesper Devantier #include "qemu/units.h"
1188eea45cSKlaus Jensen #include "qapi/error.h"
1288eea45cSKlaus Jensen
1388eea45cSKlaus Jensen #include "nvme.h"
1488eea45cSKlaus Jensen
1573064edfSJesper Devantier #define NVME_DEFAULT_RU_SIZE (96 * MiB)
1673064edfSJesper Devantier
nvme_subsys_reserve_cntlids(NvmeCtrl * n,int start,int num)1799f48ae7SLukasz Maniak static int nvme_subsys_reserve_cntlids(NvmeCtrl *n, int start, int num)
1899f48ae7SLukasz Maniak {
1999f48ae7SLukasz Maniak NvmeSubsystem *subsys = n->subsys;
201a494d11SMinwoo Im NvmeSecCtrlEntry *list = n->sec_ctrl_list;
2199f48ae7SLukasz Maniak NvmeSecCtrlEntry *sctrl;
2299f48ae7SLukasz Maniak int i, cnt = 0;
2399f48ae7SLukasz Maniak
2499f48ae7SLukasz Maniak for (i = start; i < ARRAY_SIZE(subsys->ctrls) && cnt < num; i++) {
2599f48ae7SLukasz Maniak if (!subsys->ctrls[i]) {
261a494d11SMinwoo Im sctrl = &list[cnt];
2799f48ae7SLukasz Maniak sctrl->scid = cpu_to_le16(i);
2899f48ae7SLukasz Maniak subsys->ctrls[i] = SUBSYS_SLOT_RSVD;
2999f48ae7SLukasz Maniak cnt++;
3099f48ae7SLukasz Maniak }
3199f48ae7SLukasz Maniak }
3299f48ae7SLukasz Maniak
3399f48ae7SLukasz Maniak return cnt;
3499f48ae7SLukasz Maniak }
3599f48ae7SLukasz Maniak
nvme_subsys_unreserve_cntlids(NvmeCtrl * n)3699f48ae7SLukasz Maniak static void nvme_subsys_unreserve_cntlids(NvmeCtrl *n)
3799f48ae7SLukasz Maniak {
3899f48ae7SLukasz Maniak NvmeSubsystem *subsys = n->subsys;
391a494d11SMinwoo Im NvmeSecCtrlEntry *list = n->sec_ctrl_list;
4099f48ae7SLukasz Maniak NvmeSecCtrlEntry *sctrl;
4199f48ae7SLukasz Maniak int i, cntlid;
4299f48ae7SLukasz Maniak
4399f48ae7SLukasz Maniak for (i = 0; i < n->params.sriov_max_vfs; i++) {
441a494d11SMinwoo Im sctrl = &list[i];
4599f48ae7SLukasz Maniak cntlid = le16_to_cpu(sctrl->scid);
4699f48ae7SLukasz Maniak
4799f48ae7SLukasz Maniak if (cntlid) {
4899f48ae7SLukasz Maniak assert(subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD);
4999f48ae7SLukasz Maniak subsys->ctrls[cntlid] = NULL;
5099f48ae7SLukasz Maniak sctrl->scid = 0;
5199f48ae7SLukasz Maniak }
5299f48ae7SLukasz Maniak }
5399f48ae7SLukasz Maniak }
5499f48ae7SLukasz Maniak
nvme_subsys_register_ctrl(NvmeCtrl * n,Error ** errp)5588eea45cSKlaus Jensen int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
5688eea45cSKlaus Jensen {
5788eea45cSKlaus Jensen NvmeSubsystem *subsys = n->subsys;
5899f48ae7SLukasz Maniak NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
5999f48ae7SLukasz Maniak int cntlid, nsid, num_rsvd, num_vfs = n->params.sriov_max_vfs;
6088eea45cSKlaus Jensen
6199f48ae7SLukasz Maniak if (pci_is_vf(&n->parent_obj)) {
6299f48ae7SLukasz Maniak cntlid = le16_to_cpu(sctrl->scid);
6399f48ae7SLukasz Maniak } else {
64*c6159d0eSMinwoo Im n->sec_ctrl_list = g_new0(NvmeSecCtrlEntry, num_vfs);
65*c6159d0eSMinwoo Im
6688eea45cSKlaus Jensen for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
6788eea45cSKlaus Jensen if (!subsys->ctrls[cntlid]) {
6888eea45cSKlaus Jensen break;
6988eea45cSKlaus Jensen }
7088eea45cSKlaus Jensen }
7188eea45cSKlaus Jensen
7288eea45cSKlaus Jensen if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
7388eea45cSKlaus Jensen error_setg(errp, "no more free controller id");
7488eea45cSKlaus Jensen return -1;
7588eea45cSKlaus Jensen }
7688eea45cSKlaus Jensen
7799f48ae7SLukasz Maniak num_rsvd = nvme_subsys_reserve_cntlids(n, cntlid + 1, num_vfs);
7899f48ae7SLukasz Maniak if (num_rsvd != num_vfs) {
7999f48ae7SLukasz Maniak nvme_subsys_unreserve_cntlids(n);
8099f48ae7SLukasz Maniak error_setg(errp,
8199f48ae7SLukasz Maniak "no more free controller ids for secondary controllers");
8299f48ae7SLukasz Maniak return -1;
8399f48ae7SLukasz Maniak }
8499f48ae7SLukasz Maniak }
8599f48ae7SLukasz Maniak
86a859eb9fSKlaus Jensen if (!subsys->serial) {
87a859eb9fSKlaus Jensen subsys->serial = g_strdup(n->params.serial);
88a859eb9fSKlaus Jensen } else if (strcmp(subsys->serial, n->params.serial)) {
89a859eb9fSKlaus Jensen error_setg(errp, "invalid controller serial");
90a859eb9fSKlaus Jensen return -1;
91a859eb9fSKlaus Jensen }
92a859eb9fSKlaus Jensen
9388eea45cSKlaus Jensen subsys->ctrls[cntlid] = n;
9488eea45cSKlaus Jensen
959fc6e86eSHannes Reinecke for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) {
969fc6e86eSHannes Reinecke NvmeNamespace *ns = subsys->namespaces[nsid];
979fc6e86eSHannes Reinecke if (ns && ns->params.shared && !ns->params.detached) {
989fc6e86eSHannes Reinecke nvme_attach_ns(n, ns);
999fc6e86eSHannes Reinecke }
1009fc6e86eSHannes Reinecke }
1019fc6e86eSHannes Reinecke
10288eea45cSKlaus Jensen return cntlid;
10388eea45cSKlaus Jensen }
10488eea45cSKlaus Jensen
nvme_subsys_unregister_ctrl(NvmeSubsystem * subsys,NvmeCtrl * n)105b0fde9e8SKlaus Jensen void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n)
106b0fde9e8SKlaus Jensen {
10799f48ae7SLukasz Maniak if (pci_is_vf(&n->parent_obj)) {
10899f48ae7SLukasz Maniak subsys->ctrls[n->cntlid] = SUBSYS_SLOT_RSVD;
10999f48ae7SLukasz Maniak } else {
110b0fde9e8SKlaus Jensen subsys->ctrls[n->cntlid] = NULL;
11199f48ae7SLukasz Maniak nvme_subsys_unreserve_cntlids(n);
11299f48ae7SLukasz Maniak }
11399f48ae7SLukasz Maniak
1149fc6e86eSHannes Reinecke n->cntlid = -1;
115b0fde9e8SKlaus Jensen }
116b0fde9e8SKlaus Jensen
nvme_calc_rgif(uint16_t nruh,uint16_t nrg,uint8_t * rgif)11773064edfSJesper Devantier static bool nvme_calc_rgif(uint16_t nruh, uint16_t nrg, uint8_t *rgif)
11873064edfSJesper Devantier {
11973064edfSJesper Devantier uint16_t val;
12073064edfSJesper Devantier unsigned int i;
12173064edfSJesper Devantier
12273064edfSJesper Devantier if (unlikely(nrg == 1)) {
12373064edfSJesper Devantier /* PIDRG_NORGI scenario, all of pid is used for PHID */
12473064edfSJesper Devantier *rgif = 0;
12573064edfSJesper Devantier return true;
12673064edfSJesper Devantier }
12773064edfSJesper Devantier
12873064edfSJesper Devantier val = nrg;
12973064edfSJesper Devantier i = 0;
13073064edfSJesper Devantier while (val) {
13173064edfSJesper Devantier val >>= 1;
13273064edfSJesper Devantier i++;
13373064edfSJesper Devantier }
13473064edfSJesper Devantier *rgif = i;
13573064edfSJesper Devantier
13673064edfSJesper Devantier /* ensure remaining bits suffice to represent number of phids in a RG */
13773064edfSJesper Devantier if (unlikely((UINT16_MAX >> i) < nruh)) {
13873064edfSJesper Devantier *rgif = 0;
13973064edfSJesper Devantier return false;
14073064edfSJesper Devantier }
14173064edfSJesper Devantier
14273064edfSJesper Devantier return true;
14373064edfSJesper Devantier }
14473064edfSJesper Devantier
nvme_subsys_setup_fdp(NvmeSubsystem * subsys,Error ** errp)14573064edfSJesper Devantier static bool nvme_subsys_setup_fdp(NvmeSubsystem *subsys, Error **errp)
14673064edfSJesper Devantier {
14773064edfSJesper Devantier NvmeEnduranceGroup *endgrp = &subsys->endgrp;
14873064edfSJesper Devantier
14973064edfSJesper Devantier if (!subsys->params.fdp.runs) {
15073064edfSJesper Devantier error_setg(errp, "fdp.runs must be non-zero");
15173064edfSJesper Devantier return false;
15273064edfSJesper Devantier }
15373064edfSJesper Devantier
15473064edfSJesper Devantier endgrp->fdp.runs = subsys->params.fdp.runs;
15573064edfSJesper Devantier
15673064edfSJesper Devantier if (!subsys->params.fdp.nrg) {
15773064edfSJesper Devantier error_setg(errp, "fdp.nrg must be non-zero");
15873064edfSJesper Devantier return false;
15973064edfSJesper Devantier }
16073064edfSJesper Devantier
16173064edfSJesper Devantier endgrp->fdp.nrg = subsys->params.fdp.nrg;
16273064edfSJesper Devantier
1633ae8a54aSKlaus Jensen if (!subsys->params.fdp.nruh ||
1643ae8a54aSKlaus Jensen subsys->params.fdp.nruh > NVME_FDP_MAXPIDS) {
1653ae8a54aSKlaus Jensen error_setg(errp, "fdp.nruh must be non-zero and less than %u",
1663ae8a54aSKlaus Jensen NVME_FDP_MAXPIDS);
16773064edfSJesper Devantier return false;
16873064edfSJesper Devantier }
16973064edfSJesper Devantier
17073064edfSJesper Devantier endgrp->fdp.nruh = subsys->params.fdp.nruh;
17173064edfSJesper Devantier
17273064edfSJesper Devantier if (!nvme_calc_rgif(endgrp->fdp.nruh, endgrp->fdp.nrg, &endgrp->fdp.rgif)) {
17373064edfSJesper Devantier error_setg(errp,
17473064edfSJesper Devantier "cannot derive a valid rgif (nruh %"PRIu16" nrg %"PRIu32")",
17573064edfSJesper Devantier endgrp->fdp.nruh, endgrp->fdp.nrg);
17673064edfSJesper Devantier return false;
17773064edfSJesper Devantier }
17873064edfSJesper Devantier
17973064edfSJesper Devantier endgrp->fdp.ruhs = g_new(NvmeRuHandle, endgrp->fdp.nruh);
18073064edfSJesper Devantier
18173064edfSJesper Devantier for (uint16_t ruhid = 0; ruhid < endgrp->fdp.nruh; ruhid++) {
18273064edfSJesper Devantier endgrp->fdp.ruhs[ruhid] = (NvmeRuHandle) {
18373064edfSJesper Devantier .ruht = NVME_RUHT_INITIALLY_ISOLATED,
18473064edfSJesper Devantier .ruha = NVME_RUHA_UNUSED,
18573064edfSJesper Devantier };
18673064edfSJesper Devantier
18773064edfSJesper Devantier endgrp->fdp.ruhs[ruhid].rus = g_new(NvmeReclaimUnit, endgrp->fdp.nrg);
18873064edfSJesper Devantier }
18973064edfSJesper Devantier
19073064edfSJesper Devantier endgrp->fdp.enabled = true;
19173064edfSJesper Devantier
19273064edfSJesper Devantier return true;
19373064edfSJesper Devantier }
19473064edfSJesper Devantier
nvme_subsys_setup(NvmeSubsystem * subsys,Error ** errp)19573064edfSJesper Devantier static bool nvme_subsys_setup(NvmeSubsystem *subsys, Error **errp)
19688eea45cSKlaus Jensen {
19788eea45cSKlaus Jensen const char *nqn = subsys->params.nqn ?
19888eea45cSKlaus Jensen subsys->params.nqn : subsys->parent_obj.id;
19988eea45cSKlaus Jensen
20088eea45cSKlaus Jensen snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn),
20188eea45cSKlaus Jensen "nqn.2019-08.org.qemu:%s", nqn);
20273064edfSJesper Devantier
20373064edfSJesper Devantier if (subsys->params.fdp.enabled && !nvme_subsys_setup_fdp(subsys, errp)) {
20473064edfSJesper Devantier return false;
20573064edfSJesper Devantier }
20673064edfSJesper Devantier
20773064edfSJesper Devantier return true;
20888eea45cSKlaus Jensen }
20988eea45cSKlaus Jensen
nvme_subsys_realize(DeviceState * dev,Error ** errp)21088eea45cSKlaus Jensen static void nvme_subsys_realize(DeviceState *dev, Error **errp)
21188eea45cSKlaus Jensen {
21288eea45cSKlaus Jensen NvmeSubsystem *subsys = NVME_SUBSYS(dev);
21388eea45cSKlaus Jensen
214d637e1dcSPeter Maydell qbus_init(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, dev->id);
2155ffbaeedSKlaus Jensen
21673064edfSJesper Devantier nvme_subsys_setup(subsys, errp);
21788eea45cSKlaus Jensen }
21888eea45cSKlaus Jensen
21988eea45cSKlaus Jensen static Property nvme_subsystem_props[] = {
22088eea45cSKlaus Jensen DEFINE_PROP_STRING("nqn", NvmeSubsystem, params.nqn),
22173064edfSJesper Devantier DEFINE_PROP_BOOL("fdp", NvmeSubsystem, params.fdp.enabled, false),
22273064edfSJesper Devantier DEFINE_PROP_SIZE("fdp.runs", NvmeSubsystem, params.fdp.runs,
22373064edfSJesper Devantier NVME_DEFAULT_RU_SIZE),
22473064edfSJesper Devantier DEFINE_PROP_UINT32("fdp.nrg", NvmeSubsystem, params.fdp.nrg, 1),
22573064edfSJesper Devantier DEFINE_PROP_UINT16("fdp.nruh", NvmeSubsystem, params.fdp.nruh, 0),
22688eea45cSKlaus Jensen DEFINE_PROP_END_OF_LIST(),
22788eea45cSKlaus Jensen };
22888eea45cSKlaus Jensen
nvme_subsys_class_init(ObjectClass * oc,void * data)22988eea45cSKlaus Jensen static void nvme_subsys_class_init(ObjectClass *oc, void *data)
23088eea45cSKlaus Jensen {
23188eea45cSKlaus Jensen DeviceClass *dc = DEVICE_CLASS(oc);
23288eea45cSKlaus Jensen
23388eea45cSKlaus Jensen set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
23488eea45cSKlaus Jensen
23588eea45cSKlaus Jensen dc->realize = nvme_subsys_realize;
23688eea45cSKlaus Jensen dc->desc = "Virtual NVMe subsystem";
237cc6fb6bcSKlaus Jensen dc->hotpluggable = false;
23888eea45cSKlaus Jensen
23988eea45cSKlaus Jensen device_class_set_props(dc, nvme_subsystem_props);
24088eea45cSKlaus Jensen }
24188eea45cSKlaus Jensen
24288eea45cSKlaus Jensen static const TypeInfo nvme_subsys_info = {
24388eea45cSKlaus Jensen .name = TYPE_NVME_SUBSYS,
24488eea45cSKlaus Jensen .parent = TYPE_DEVICE,
24588eea45cSKlaus Jensen .class_init = nvme_subsys_class_init,
24688eea45cSKlaus Jensen .instance_size = sizeof(NvmeSubsystem),
24788eea45cSKlaus Jensen };
24888eea45cSKlaus Jensen
nvme_subsys_register_types(void)24988eea45cSKlaus Jensen static void nvme_subsys_register_types(void)
25088eea45cSKlaus Jensen {
25188eea45cSKlaus Jensen type_register_static(&nvme_subsys_info);
25288eea45cSKlaus Jensen }
25388eea45cSKlaus Jensen
25488eea45cSKlaus Jensen type_init(nvme_subsys_register_types)
255