xref: /openbmc/qemu/hw/nvme/nvme.h (revision c79aa350)
1 /*
2  * QEMU NVM Express
3  *
4  * Copyright (c) 2012 Intel Corporation
5  * Copyright (c) 2021 Minwoo Im
6  * Copyright (c) 2021 Samsung Electronics Co., Ltd.
7  *
8  * Authors:
9  *   Keith Busch            <kbusch@kernel.org>
10  *   Klaus Jensen           <k.jensen@samsung.com>
11  *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
12  *   Dmitry Fomichev        <dmitry.fomichev@wdc.com>
13  *   Minwoo Im              <minwoo.im.dev@gmail.com>
14  *
15  * This code is licensed under the GNU GPL v2 or later.
16  */
17 
18 #ifndef HW_NVME_NVME_H
19 #define HW_NVME_NVME_H
20 
21 #include "qemu/uuid.h"
22 #include "hw/pci/pci_device.h"
23 #include "hw/block/block.h"
24 
25 #include "block/nvme.h"
26 
27 #define NVME_MAX_CONTROLLERS 256
28 #define NVME_MAX_NAMESPACES  256
29 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
30 
31 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
32 
33 typedef struct NvmeCtrl NvmeCtrl;
34 typedef struct NvmeNamespace NvmeNamespace;
35 
36 #define TYPE_NVME_BUS "nvme-bus"
37 OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
38 
39 typedef struct NvmeBus {
40     BusState parent_bus;
41 } NvmeBus;
42 
43 #define TYPE_NVME_SUBSYS "nvme-subsys"
44 #define NVME_SUBSYS(obj) \
45     OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
46 #define SUBSYS_SLOT_RSVD (void *)0xFFFF
47 
48 typedef struct NvmeSubsystem {
49     DeviceState parent_obj;
50     NvmeBus     bus;
51     uint8_t     subnqn[256];
52     char        *serial;
53 
54     NvmeCtrl      *ctrls[NVME_MAX_CONTROLLERS];
55     NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
56 
57     struct {
58         char *nqn;
59     } params;
60 } NvmeSubsystem;
61 
62 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
63 void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);
64 
65 static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
66                                          uint32_t cntlid)
67 {
68     if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
69         return NULL;
70     }
71 
72     if (subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD) {
73         return NULL;
74     }
75 
76     return subsys->ctrls[cntlid];
77 }
78 
79 static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
80                                             uint32_t nsid)
81 {
82     if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
83         return NULL;
84     }
85 
86     return subsys->namespaces[nsid];
87 }
88 
89 #define TYPE_NVME_NS "nvme-ns"
90 #define NVME_NS(obj) \
91     OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
92 
93 typedef struct NvmeZone {
94     NvmeZoneDescr   d;
95     uint64_t        w_ptr;
96     QTAILQ_ENTRY(NvmeZone) entry;
97 } NvmeZone;
98 
99 typedef struct NvmeNamespaceParams {
100     bool     detached;
101     bool     shared;
102     uint32_t nsid;
103     QemuUUID uuid;
104     uint64_t eui64;
105     bool     eui64_default;
106 
107     uint16_t ms;
108     uint8_t  mset;
109     uint8_t  pi;
110     uint8_t  pil;
111     uint8_t  pif;
112 
113     uint16_t mssrl;
114     uint32_t mcl;
115     uint8_t  msrc;
116 
117     bool     zoned;
118     bool     cross_zone_read;
119     uint64_t zone_size_bs;
120     uint64_t zone_cap_bs;
121     uint32_t max_active_zones;
122     uint32_t max_open_zones;
123     uint32_t zd_extension_size;
124 
125     uint32_t numzrwa;
126     uint64_t zrwas;
127     uint64_t zrwafg;
128 } NvmeNamespaceParams;
129 
130 typedef struct NvmeNamespace {
131     DeviceState  parent_obj;
132     BlockConf    blkconf;
133     int32_t      bootindex;
134     int64_t      size;
135     int64_t      moff;
136     NvmeIdNs     id_ns;
137     NvmeIdNsNvm  id_ns_nvm;
138     NvmeLBAF     lbaf;
139     unsigned int nlbaf;
140     size_t       lbasz;
141     const uint32_t *iocs;
142     uint8_t      csi;
143     uint16_t     status;
144     int          attached;
145     uint8_t      pif;
146 
147     struct {
148         uint16_t zrwas;
149         uint16_t zrwafg;
150         uint32_t numzrwa;
151     } zns;
152 
153     QTAILQ_ENTRY(NvmeNamespace) entry;
154 
155     NvmeIdNsZoned   *id_ns_zoned;
156     NvmeZone        *zone_array;
157     QTAILQ_HEAD(, NvmeZone) exp_open_zones;
158     QTAILQ_HEAD(, NvmeZone) imp_open_zones;
159     QTAILQ_HEAD(, NvmeZone) closed_zones;
160     QTAILQ_HEAD(, NvmeZone) full_zones;
161     uint32_t        num_zones;
162     uint64_t        zone_size;
163     uint64_t        zone_capacity;
164     uint32_t        zone_size_log2;
165     uint8_t         *zd_extensions;
166     int32_t         nr_open_zones;
167     int32_t         nr_active_zones;
168 
169     NvmeNamespaceParams params;
170 
171     struct {
172         uint32_t err_rec;
173     } features;
174 } NvmeNamespace;
175 
176 static inline uint32_t nvme_nsid(NvmeNamespace *ns)
177 {
178     if (ns) {
179         return ns->params.nsid;
180     }
181 
182     return 0;
183 }
184 
185 static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
186 {
187     return lba << ns->lbaf.ds;
188 }
189 
190 static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
191 {
192     return ns->lbaf.ms * lba;
193 }
194 
195 static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
196 {
197     return ns->moff + nvme_m2b(ns, lba);
198 }
199 
200 static inline bool nvme_ns_ext(NvmeNamespace *ns)
201 {
202     return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
203 }
204 
205 static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
206 {
207     return zone->d.zs >> 4;
208 }
209 
210 static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
211 {
212     zone->d.zs = state << 4;
213 }
214 
215 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
216 {
217     return zone->d.zslba + ns->zone_size;
218 }
219 
220 static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
221 {
222     return zone->d.zslba + zone->d.zcap;
223 }
224 
225 static inline bool nvme_wp_is_valid(NvmeZone *zone)
226 {
227     uint8_t st = nvme_get_zone_state(zone);
228 
229     return st != NVME_ZONE_STATE_FULL &&
230            st != NVME_ZONE_STATE_READ_ONLY &&
231            st != NVME_ZONE_STATE_OFFLINE;
232 }
233 
234 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
235                                              uint32_t zone_idx)
236 {
237     return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
238 }
239 
240 static inline void nvme_aor_inc_open(NvmeNamespace *ns)
241 {
242     assert(ns->nr_open_zones >= 0);
243     if (ns->params.max_open_zones) {
244         ns->nr_open_zones++;
245         assert(ns->nr_open_zones <= ns->params.max_open_zones);
246     }
247 }
248 
249 static inline void nvme_aor_dec_open(NvmeNamespace *ns)
250 {
251     if (ns->params.max_open_zones) {
252         assert(ns->nr_open_zones > 0);
253         ns->nr_open_zones--;
254     }
255     assert(ns->nr_open_zones >= 0);
256 }
257 
258 static inline void nvme_aor_inc_active(NvmeNamespace *ns)
259 {
260     assert(ns->nr_active_zones >= 0);
261     if (ns->params.max_active_zones) {
262         ns->nr_active_zones++;
263         assert(ns->nr_active_zones <= ns->params.max_active_zones);
264     }
265 }
266 
267 static inline void nvme_aor_dec_active(NvmeNamespace *ns)
268 {
269     if (ns->params.max_active_zones) {
270         assert(ns->nr_active_zones > 0);
271         ns->nr_active_zones--;
272         assert(ns->nr_active_zones >= ns->nr_open_zones);
273     }
274     assert(ns->nr_active_zones >= 0);
275 }
276 
277 void nvme_ns_init_format(NvmeNamespace *ns);
278 int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
279 void nvme_ns_drain(NvmeNamespace *ns);
280 void nvme_ns_shutdown(NvmeNamespace *ns);
281 void nvme_ns_cleanup(NvmeNamespace *ns);
282 
283 typedef struct NvmeAsyncEvent {
284     QTAILQ_ENTRY(NvmeAsyncEvent) entry;
285     NvmeAerResult result;
286 } NvmeAsyncEvent;
287 
288 enum {
289     NVME_SG_ALLOC = 1 << 0,
290     NVME_SG_DMA   = 1 << 1,
291 };
292 
293 typedef struct NvmeSg {
294     int flags;
295 
296     union {
297         QEMUSGList   qsg;
298         QEMUIOVector iov;
299     };
300 } NvmeSg;
301 
302 typedef enum NvmeTxDirection {
303     NVME_TX_DIRECTION_TO_DEVICE   = 0,
304     NVME_TX_DIRECTION_FROM_DEVICE = 1,
305 } NvmeTxDirection;
306 
307 typedef struct NvmeRequest {
308     struct NvmeSQueue       *sq;
309     struct NvmeNamespace    *ns;
310     BlockAIOCB              *aiocb;
311     uint16_t                status;
312     void                    *opaque;
313     NvmeCqe                 cqe;
314     NvmeCmd                 cmd;
315     BlockAcctCookie         acct;
316     NvmeSg                  sg;
317     QTAILQ_ENTRY(NvmeRequest)entry;
318 } NvmeRequest;
319 
320 typedef struct NvmeBounceContext {
321     NvmeRequest *req;
322 
323     struct {
324         QEMUIOVector iov;
325         uint8_t *bounce;
326     } data, mdata;
327 } NvmeBounceContext;
328 
329 static inline const char *nvme_adm_opc_str(uint8_t opc)
330 {
331     switch (opc) {
332     case NVME_ADM_CMD_DELETE_SQ:        return "NVME_ADM_CMD_DELETE_SQ";
333     case NVME_ADM_CMD_CREATE_SQ:        return "NVME_ADM_CMD_CREATE_SQ";
334     case NVME_ADM_CMD_GET_LOG_PAGE:     return "NVME_ADM_CMD_GET_LOG_PAGE";
335     case NVME_ADM_CMD_DELETE_CQ:        return "NVME_ADM_CMD_DELETE_CQ";
336     case NVME_ADM_CMD_CREATE_CQ:        return "NVME_ADM_CMD_CREATE_CQ";
337     case NVME_ADM_CMD_IDENTIFY:         return "NVME_ADM_CMD_IDENTIFY";
338     case NVME_ADM_CMD_ABORT:            return "NVME_ADM_CMD_ABORT";
339     case NVME_ADM_CMD_SET_FEATURES:     return "NVME_ADM_CMD_SET_FEATURES";
340     case NVME_ADM_CMD_GET_FEATURES:     return "NVME_ADM_CMD_GET_FEATURES";
341     case NVME_ADM_CMD_ASYNC_EV_REQ:     return "NVME_ADM_CMD_ASYNC_EV_REQ";
342     case NVME_ADM_CMD_NS_ATTACHMENT:    return "NVME_ADM_CMD_NS_ATTACHMENT";
343     case NVME_ADM_CMD_VIRT_MNGMT:       return "NVME_ADM_CMD_VIRT_MNGMT";
344     case NVME_ADM_CMD_DBBUF_CONFIG:     return "NVME_ADM_CMD_DBBUF_CONFIG";
345     case NVME_ADM_CMD_FORMAT_NVM:       return "NVME_ADM_CMD_FORMAT_NVM";
346     default:                            return "NVME_ADM_CMD_UNKNOWN";
347     }
348 }
349 
350 static inline const char *nvme_io_opc_str(uint8_t opc)
351 {
352     switch (opc) {
353     case NVME_CMD_FLUSH:            return "NVME_NVM_CMD_FLUSH";
354     case NVME_CMD_WRITE:            return "NVME_NVM_CMD_WRITE";
355     case NVME_CMD_READ:             return "NVME_NVM_CMD_READ";
356     case NVME_CMD_COMPARE:          return "NVME_NVM_CMD_COMPARE";
357     case NVME_CMD_WRITE_ZEROES:     return "NVME_NVM_CMD_WRITE_ZEROES";
358     case NVME_CMD_DSM:              return "NVME_NVM_CMD_DSM";
359     case NVME_CMD_VERIFY:           return "NVME_NVM_CMD_VERIFY";
360     case NVME_CMD_COPY:             return "NVME_NVM_CMD_COPY";
361     case NVME_CMD_ZONE_MGMT_SEND:   return "NVME_ZONED_CMD_MGMT_SEND";
362     case NVME_CMD_ZONE_MGMT_RECV:   return "NVME_ZONED_CMD_MGMT_RECV";
363     case NVME_CMD_ZONE_APPEND:      return "NVME_ZONED_CMD_ZONE_APPEND";
364     default:                        return "NVME_NVM_CMD_UNKNOWN";
365     }
366 }
367 
368 typedef struct NvmeSQueue {
369     struct NvmeCtrl *ctrl;
370     uint16_t    sqid;
371     uint16_t    cqid;
372     uint32_t    head;
373     uint32_t    tail;
374     uint32_t    size;
375     uint64_t    dma_addr;
376     uint64_t    db_addr;
377     uint64_t    ei_addr;
378     QEMUBH      *bh;
379     EventNotifier notifier;
380     bool        ioeventfd_enabled;
381     NvmeRequest *io_req;
382     QTAILQ_HEAD(, NvmeRequest) req_list;
383     QTAILQ_HEAD(, NvmeRequest) out_req_list;
384     QTAILQ_ENTRY(NvmeSQueue) entry;
385 } NvmeSQueue;
386 
387 typedef struct NvmeCQueue {
388     struct NvmeCtrl *ctrl;
389     uint8_t     phase;
390     uint16_t    cqid;
391     uint16_t    irq_enabled;
392     uint32_t    head;
393     uint32_t    tail;
394     uint32_t    vector;
395     uint32_t    size;
396     uint64_t    dma_addr;
397     uint64_t    db_addr;
398     uint64_t    ei_addr;
399     QEMUBH      *bh;
400     EventNotifier notifier;
401     bool        ioeventfd_enabled;
402     QTAILQ_HEAD(, NvmeSQueue) sq_list;
403     QTAILQ_HEAD(, NvmeRequest) req_list;
404 } NvmeCQueue;
405 
406 #define TYPE_NVME "nvme"
407 #define NVME(obj) \
408         OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
409 
410 typedef struct NvmeParams {
411     char     *serial;
412     uint32_t num_queues; /* deprecated since 5.1 */
413     uint32_t max_ioqpairs;
414     uint16_t msix_qsize;
415     uint32_t cmb_size_mb;
416     uint8_t  aerl;
417     uint32_t aer_max_queued;
418     uint8_t  mdts;
419     uint8_t  vsl;
420     bool     use_intel_id;
421     uint8_t  zasl;
422     bool     auto_transition_zones;
423     bool     legacy_cmb;
424     bool     ioeventfd;
425     uint8_t  sriov_max_vfs;
426     uint16_t sriov_vq_flexible;
427     uint16_t sriov_vi_flexible;
428     uint8_t  sriov_max_vq_per_vf;
429     uint8_t  sriov_max_vi_per_vf;
430 } NvmeParams;
431 
432 typedef struct NvmeCtrl {
433     PCIDevice    parent_obj;
434     MemoryRegion bar0;
435     MemoryRegion iomem;
436     NvmeBar      bar;
437     NvmeParams   params;
438     NvmeBus      bus;
439 
440     uint16_t    cntlid;
441     bool        qs_created;
442     uint32_t    page_size;
443     uint16_t    page_bits;
444     uint16_t    max_prp_ents;
445     uint16_t    cqe_size;
446     uint16_t    sqe_size;
447     uint32_t    max_q_ents;
448     uint8_t     outstanding_aers;
449     uint32_t    irq_status;
450     int         cq_pending;
451     uint64_t    host_timestamp;                 /* Timestamp sent by the host */
452     uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
453     uint64_t    starttime_ms;
454     uint16_t    temperature;
455     uint8_t     smart_critical_warning;
456     uint32_t    conf_msix_qsize;
457     uint32_t    conf_ioqpairs;
458     uint64_t    dbbuf_dbs;
459     uint64_t    dbbuf_eis;
460     bool        dbbuf_enabled;
461 
462     struct {
463         MemoryRegion mem;
464         uint8_t      *buf;
465         bool         cmse;
466         hwaddr       cba;
467     } cmb;
468 
469     struct {
470         HostMemoryBackend *dev;
471         bool              cmse;
472         hwaddr            cba;
473     } pmr;
474 
475     uint8_t     aer_mask;
476     NvmeRequest **aer_reqs;
477     QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
478     int         aer_queued;
479 
480     uint32_t    dmrsl;
481 
482     /* Namespace ID is started with 1 so bitmap should be 1-based */
483 #define NVME_CHANGED_NSID_SIZE  (NVME_MAX_NAMESPACES + 1)
484     DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
485 
486     NvmeSubsystem   *subsys;
487 
488     NvmeNamespace   namespace;
489     NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES + 1];
490     NvmeSQueue      **sq;
491     NvmeCQueue      **cq;
492     NvmeSQueue      admin_sq;
493     NvmeCQueue      admin_cq;
494     NvmeIdCtrl      id_ctrl;
495 
496     struct {
497         struct {
498             uint16_t temp_thresh_hi;
499             uint16_t temp_thresh_low;
500         };
501 
502         uint32_t                async_config;
503         NvmeHostBehaviorSupport hbs;
504     } features;
505 
506     NvmePriCtrlCap  pri_ctrl_cap;
507     NvmeSecCtrlList sec_ctrl_list;
508     struct {
509         uint16_t    vqrfap;
510         uint16_t    virfap;
511     } next_pri_ctrl_cap;    /* These override pri_ctrl_cap after reset */
512 } NvmeCtrl;
513 
514 typedef enum NvmeResetType {
515     NVME_RESET_FUNCTION   = 0,
516     NVME_RESET_CONTROLLER = 1,
517 } NvmeResetType;
518 
519 static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
520 {
521     if (!nsid || nsid > NVME_MAX_NAMESPACES) {
522         return NULL;
523     }
524 
525     return n->namespaces[nsid];
526 }
527 
528 static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
529 {
530     NvmeSQueue *sq = req->sq;
531     NvmeCtrl *n = sq->ctrl;
532 
533     return n->cq[sq->cqid];
534 }
535 
536 static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
537 {
538     NvmeSQueue *sq = req->sq;
539     return sq->ctrl;
540 }
541 
542 static inline uint16_t nvme_cid(NvmeRequest *req)
543 {
544     if (!req) {
545         return 0xffff;
546     }
547 
548     return le16_to_cpu(req->cqe.cid);
549 }
550 
551 static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n)
552 {
553     PCIDevice *pci_dev = &n->parent_obj;
554     NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev));
555 
556     if (pci_is_vf(pci_dev)) {
557         return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)];
558     }
559 
560     return NULL;
561 }
562 
563 static inline NvmeSecCtrlEntry *nvme_sctrl_for_cntlid(NvmeCtrl *n,
564                                                       uint16_t cntlid)
565 {
566     NvmeSecCtrlList *list = &n->sec_ctrl_list;
567     uint8_t i;
568 
569     for (i = 0; i < list->numcntl; i++) {
570         if (le16_to_cpu(list->sec[i].scid) == cntlid) {
571             return &list->sec[i];
572         }
573     }
574 
575     return NULL;
576 }
577 
578 void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
579 uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
580                           NvmeTxDirection dir, NvmeRequest *req);
581 uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
582                            NvmeTxDirection dir, NvmeRequest *req);
583 void nvme_rw_complete_cb(void *opaque, int ret);
584 uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
585                        NvmeCmd *cmd);
586 
587 #endif /* HW_NVME_NVME_H */
588