xref: /openbmc/qemu/hw/nvme/nvme.h (revision ed3a06b1)
1 /*
2  * QEMU NVM Express
3  *
4  * Copyright (c) 2012 Intel Corporation
5  * Copyright (c) 2021 Minwoo Im
6  * Copyright (c) 2021 Samsung Electronics Co., Ltd.
7  *
8  * Authors:
9  *   Keith Busch            <kbusch@kernel.org>
10  *   Klaus Jensen           <k.jensen@samsung.com>
11  *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
12  *   Dmitry Fomichev        <dmitry.fomichev@wdc.com>
13  *   Minwoo Im              <minwoo.im.dev@gmail.com>
14  *
15  * This code is licensed under the GNU GPL v2 or later.
16  */
17 
18 #ifndef HW_NVME_NVME_H
19 #define HW_NVME_NVME_H
20 
21 #include "qemu/uuid.h"
22 #include "hw/pci/pci.h"
23 #include "hw/block/block.h"
24 
25 #include "block/nvme.h"
26 
27 #define NVME_MAX_CONTROLLERS 256
28 #define NVME_MAX_NAMESPACES  256
29 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
30 
31 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
32 
33 typedef struct NvmeCtrl NvmeCtrl;
34 typedef struct NvmeNamespace NvmeNamespace;
35 
36 #define TYPE_NVME_BUS "nvme-bus"
37 OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
38 
39 typedef struct NvmeBus {
40     BusState parent_bus;
41 } NvmeBus;
42 
43 #define TYPE_NVME_SUBSYS "nvme-subsys"
44 #define NVME_SUBSYS(obj) \
45     OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
46 #define SUBSYS_SLOT_RSVD (void *)0xFFFF
47 
48 typedef struct NvmeSubsystem {
49     DeviceState parent_obj;
50     NvmeBus     bus;
51     uint8_t     subnqn[256];
52     char        *serial;
53 
54     NvmeCtrl      *ctrls[NVME_MAX_CONTROLLERS];
55     NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
56 
57     struct {
58         char *nqn;
59     } params;
60 } NvmeSubsystem;
61 
62 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
63 void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);
64 
65 static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
66                                          uint32_t cntlid)
67 {
68     if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
69         return NULL;
70     }
71 
72     if (subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD) {
73         return NULL;
74     }
75 
76     return subsys->ctrls[cntlid];
77 }
78 
79 static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
80                                             uint32_t nsid)
81 {
82     if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
83         return NULL;
84     }
85 
86     return subsys->namespaces[nsid];
87 }
88 
89 #define TYPE_NVME_NS "nvme-ns"
90 #define NVME_NS(obj) \
91     OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
92 
93 typedef struct NvmeZone {
94     NvmeZoneDescr   d;
95     uint64_t        w_ptr;
96     QTAILQ_ENTRY(NvmeZone) entry;
97 } NvmeZone;
98 
99 typedef struct NvmeNamespaceParams {
100     bool     detached;
101     bool     shared;
102     uint32_t nsid;
103     QemuUUID uuid;
104     uint64_t eui64;
105     bool     eui64_default;
106 
107     uint16_t ms;
108     uint8_t  mset;
109     uint8_t  pi;
110     uint8_t  pil;
111     uint8_t  pif;
112 
113     uint16_t mssrl;
114     uint32_t mcl;
115     uint8_t  msrc;
116 
117     bool     zoned;
118     bool     cross_zone_read;
119     uint64_t zone_size_bs;
120     uint64_t zone_cap_bs;
121     uint32_t max_active_zones;
122     uint32_t max_open_zones;
123     uint32_t zd_extension_size;
124 
125     uint32_t numzrwa;
126     uint64_t zrwas;
127     uint64_t zrwafg;
128 } NvmeNamespaceParams;
129 
130 typedef struct NvmeNamespace {
131     DeviceState  parent_obj;
132     BlockConf    blkconf;
133     int32_t      bootindex;
134     int64_t      size;
135     int64_t      moff;
136     NvmeIdNs     id_ns;
137     NvmeIdNsNvm  id_ns_nvm;
138     NvmeLBAF     lbaf;
139     unsigned int nlbaf;
140     size_t       lbasz;
141     const uint32_t *iocs;
142     uint8_t      csi;
143     uint16_t     status;
144     int          attached;
145     uint8_t      pif;
146 
147     struct {
148         uint16_t zrwas;
149         uint16_t zrwafg;
150         uint32_t numzrwa;
151     } zns;
152 
153     QTAILQ_ENTRY(NvmeNamespace) entry;
154 
155     NvmeIdNsZoned   *id_ns_zoned;
156     NvmeZone        *zone_array;
157     QTAILQ_HEAD(, NvmeZone) exp_open_zones;
158     QTAILQ_HEAD(, NvmeZone) imp_open_zones;
159     QTAILQ_HEAD(, NvmeZone) closed_zones;
160     QTAILQ_HEAD(, NvmeZone) full_zones;
161     uint32_t        num_zones;
162     uint64_t        zone_size;
163     uint64_t        zone_capacity;
164     uint32_t        zone_size_log2;
165     uint8_t         *zd_extensions;
166     int32_t         nr_open_zones;
167     int32_t         nr_active_zones;
168 
169     NvmeNamespaceParams params;
170 
171     struct {
172         uint32_t err_rec;
173     } features;
174 } NvmeNamespace;
175 
176 static inline uint32_t nvme_nsid(NvmeNamespace *ns)
177 {
178     if (ns) {
179         return ns->params.nsid;
180     }
181 
182     return 0;
183 }
184 
185 static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
186 {
187     return lba << ns->lbaf.ds;
188 }
189 
190 static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
191 {
192     return ns->lbaf.ms * lba;
193 }
194 
195 static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
196 {
197     return ns->moff + nvme_m2b(ns, lba);
198 }
199 
200 static inline bool nvme_ns_ext(NvmeNamespace *ns)
201 {
202     return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
203 }
204 
205 static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
206 {
207     return zone->d.zs >> 4;
208 }
209 
210 static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
211 {
212     zone->d.zs = state << 4;
213 }
214 
215 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
216 {
217     return zone->d.zslba + ns->zone_size;
218 }
219 
220 static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
221 {
222     return zone->d.zslba + zone->d.zcap;
223 }
224 
225 static inline bool nvme_wp_is_valid(NvmeZone *zone)
226 {
227     uint8_t st = nvme_get_zone_state(zone);
228 
229     return st != NVME_ZONE_STATE_FULL &&
230            st != NVME_ZONE_STATE_READ_ONLY &&
231            st != NVME_ZONE_STATE_OFFLINE;
232 }
233 
234 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
235                                              uint32_t zone_idx)
236 {
237     return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
238 }
239 
240 static inline void nvme_aor_inc_open(NvmeNamespace *ns)
241 {
242     assert(ns->nr_open_zones >= 0);
243     if (ns->params.max_open_zones) {
244         ns->nr_open_zones++;
245         assert(ns->nr_open_zones <= ns->params.max_open_zones);
246     }
247 }
248 
249 static inline void nvme_aor_dec_open(NvmeNamespace *ns)
250 {
251     if (ns->params.max_open_zones) {
252         assert(ns->nr_open_zones > 0);
253         ns->nr_open_zones--;
254     }
255     assert(ns->nr_open_zones >= 0);
256 }
257 
258 static inline void nvme_aor_inc_active(NvmeNamespace *ns)
259 {
260     assert(ns->nr_active_zones >= 0);
261     if (ns->params.max_active_zones) {
262         ns->nr_active_zones++;
263         assert(ns->nr_active_zones <= ns->params.max_active_zones);
264     }
265 }
266 
267 static inline void nvme_aor_dec_active(NvmeNamespace *ns)
268 {
269     if (ns->params.max_active_zones) {
270         assert(ns->nr_active_zones > 0);
271         ns->nr_active_zones--;
272         assert(ns->nr_active_zones >= ns->nr_open_zones);
273     }
274     assert(ns->nr_active_zones >= 0);
275 }
276 
277 void nvme_ns_init_format(NvmeNamespace *ns);
278 int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
279 void nvme_ns_drain(NvmeNamespace *ns);
280 void nvme_ns_shutdown(NvmeNamespace *ns);
281 void nvme_ns_cleanup(NvmeNamespace *ns);
282 
283 typedef struct NvmeAsyncEvent {
284     QTAILQ_ENTRY(NvmeAsyncEvent) entry;
285     NvmeAerResult result;
286 } NvmeAsyncEvent;
287 
288 enum {
289     NVME_SG_ALLOC = 1 << 0,
290     NVME_SG_DMA   = 1 << 1,
291 };
292 
293 typedef struct NvmeSg {
294     int flags;
295 
296     union {
297         QEMUSGList   qsg;
298         QEMUIOVector iov;
299     };
300 } NvmeSg;
301 
302 typedef enum NvmeTxDirection {
303     NVME_TX_DIRECTION_TO_DEVICE   = 0,
304     NVME_TX_DIRECTION_FROM_DEVICE = 1,
305 } NvmeTxDirection;
306 
307 typedef struct NvmeRequest {
308     struct NvmeSQueue       *sq;
309     struct NvmeNamespace    *ns;
310     BlockAIOCB              *aiocb;
311     uint16_t                status;
312     void                    *opaque;
313     NvmeCqe                 cqe;
314     NvmeCmd                 cmd;
315     BlockAcctCookie         acct;
316     NvmeSg                  sg;
317     QTAILQ_ENTRY(NvmeRequest)entry;
318 } NvmeRequest;
319 
320 typedef struct NvmeBounceContext {
321     NvmeRequest *req;
322 
323     struct {
324         QEMUIOVector iov;
325         uint8_t *bounce;
326     } data, mdata;
327 } NvmeBounceContext;
328 
329 static inline const char *nvme_adm_opc_str(uint8_t opc)
330 {
331     switch (opc) {
332     case NVME_ADM_CMD_DELETE_SQ:        return "NVME_ADM_CMD_DELETE_SQ";
333     case NVME_ADM_CMD_CREATE_SQ:        return "NVME_ADM_CMD_CREATE_SQ";
334     case NVME_ADM_CMD_GET_LOG_PAGE:     return "NVME_ADM_CMD_GET_LOG_PAGE";
335     case NVME_ADM_CMD_DELETE_CQ:        return "NVME_ADM_CMD_DELETE_CQ";
336     case NVME_ADM_CMD_CREATE_CQ:        return "NVME_ADM_CMD_CREATE_CQ";
337     case NVME_ADM_CMD_IDENTIFY:         return "NVME_ADM_CMD_IDENTIFY";
338     case NVME_ADM_CMD_ABORT:            return "NVME_ADM_CMD_ABORT";
339     case NVME_ADM_CMD_SET_FEATURES:     return "NVME_ADM_CMD_SET_FEATURES";
340     case NVME_ADM_CMD_GET_FEATURES:     return "NVME_ADM_CMD_GET_FEATURES";
341     case NVME_ADM_CMD_ASYNC_EV_REQ:     return "NVME_ADM_CMD_ASYNC_EV_REQ";
342     case NVME_ADM_CMD_NS_ATTACHMENT:    return "NVME_ADM_CMD_NS_ATTACHMENT";
343     case NVME_ADM_CMD_VIRT_MNGMT:       return "NVME_ADM_CMD_VIRT_MNGMT";
344     case NVME_ADM_CMD_FORMAT_NVM:       return "NVME_ADM_CMD_FORMAT_NVM";
345     default:                            return "NVME_ADM_CMD_UNKNOWN";
346     }
347 }
348 
349 static inline const char *nvme_io_opc_str(uint8_t opc)
350 {
351     switch (opc) {
352     case NVME_CMD_FLUSH:            return "NVME_NVM_CMD_FLUSH";
353     case NVME_CMD_WRITE:            return "NVME_NVM_CMD_WRITE";
354     case NVME_CMD_READ:             return "NVME_NVM_CMD_READ";
355     case NVME_CMD_COMPARE:          return "NVME_NVM_CMD_COMPARE";
356     case NVME_CMD_WRITE_ZEROES:     return "NVME_NVM_CMD_WRITE_ZEROES";
357     case NVME_CMD_DSM:              return "NVME_NVM_CMD_DSM";
358     case NVME_CMD_VERIFY:           return "NVME_NVM_CMD_VERIFY";
359     case NVME_CMD_COPY:             return "NVME_NVM_CMD_COPY";
360     case NVME_CMD_ZONE_MGMT_SEND:   return "NVME_ZONED_CMD_MGMT_SEND";
361     case NVME_CMD_ZONE_MGMT_RECV:   return "NVME_ZONED_CMD_MGMT_RECV";
362     case NVME_CMD_ZONE_APPEND:      return "NVME_ZONED_CMD_ZONE_APPEND";
363     default:                        return "NVME_NVM_CMD_UNKNOWN";
364     }
365 }
366 
367 typedef struct NvmeSQueue {
368     struct NvmeCtrl *ctrl;
369     uint16_t    sqid;
370     uint16_t    cqid;
371     uint32_t    head;
372     uint32_t    tail;
373     uint32_t    size;
374     uint64_t    dma_addr;
375     QEMUTimer   *timer;
376     NvmeRequest *io_req;
377     QTAILQ_HEAD(, NvmeRequest) req_list;
378     QTAILQ_HEAD(, NvmeRequest) out_req_list;
379     QTAILQ_ENTRY(NvmeSQueue) entry;
380 } NvmeSQueue;
381 
382 typedef struct NvmeCQueue {
383     struct NvmeCtrl *ctrl;
384     uint8_t     phase;
385     uint16_t    cqid;
386     uint16_t    irq_enabled;
387     uint32_t    head;
388     uint32_t    tail;
389     uint32_t    vector;
390     uint32_t    size;
391     uint64_t    dma_addr;
392     QEMUTimer   *timer;
393     QTAILQ_HEAD(, NvmeSQueue) sq_list;
394     QTAILQ_HEAD(, NvmeRequest) req_list;
395 } NvmeCQueue;
396 
397 #define TYPE_NVME "nvme"
398 #define NVME(obj) \
399         OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
400 
401 typedef struct NvmeParams {
402     char     *serial;
403     uint32_t num_queues; /* deprecated since 5.1 */
404     uint32_t max_ioqpairs;
405     uint16_t msix_qsize;
406     uint32_t cmb_size_mb;
407     uint8_t  aerl;
408     uint32_t aer_max_queued;
409     uint8_t  mdts;
410     uint8_t  vsl;
411     bool     use_intel_id;
412     uint8_t  zasl;
413     bool     auto_transition_zones;
414     bool     legacy_cmb;
415     uint8_t  sriov_max_vfs;
416     uint16_t sriov_vq_flexible;
417     uint16_t sriov_vi_flexible;
418     uint8_t  sriov_max_vq_per_vf;
419     uint8_t  sriov_max_vi_per_vf;
420 } NvmeParams;
421 
422 typedef struct NvmeCtrl {
423     PCIDevice    parent_obj;
424     MemoryRegion bar0;
425     MemoryRegion iomem;
426     NvmeBar      bar;
427     NvmeParams   params;
428     NvmeBus      bus;
429 
430     uint16_t    cntlid;
431     bool        qs_created;
432     uint32_t    page_size;
433     uint16_t    page_bits;
434     uint16_t    max_prp_ents;
435     uint16_t    cqe_size;
436     uint16_t    sqe_size;
437     uint32_t    max_q_ents;
438     uint8_t     outstanding_aers;
439     uint32_t    irq_status;
440     int         cq_pending;
441     uint64_t    host_timestamp;                 /* Timestamp sent by the host */
442     uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
443     uint64_t    starttime_ms;
444     uint16_t    temperature;
445     uint8_t     smart_critical_warning;
446     uint32_t    conf_msix_qsize;
447     uint32_t    conf_ioqpairs;
448 
449     struct {
450         MemoryRegion mem;
451         uint8_t      *buf;
452         bool         cmse;
453         hwaddr       cba;
454     } cmb;
455 
456     struct {
457         HostMemoryBackend *dev;
458         bool              cmse;
459         hwaddr            cba;
460     } pmr;
461 
462     uint8_t     aer_mask;
463     NvmeRequest **aer_reqs;
464     QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
465     int         aer_queued;
466 
467     uint32_t    dmrsl;
468 
469     /* Namespace ID is started with 1 so bitmap should be 1-based */
470 #define NVME_CHANGED_NSID_SIZE  (NVME_MAX_NAMESPACES + 1)
471     DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
472 
473     NvmeSubsystem   *subsys;
474 
475     NvmeNamespace   namespace;
476     NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES + 1];
477     NvmeSQueue      **sq;
478     NvmeCQueue      **cq;
479     NvmeSQueue      admin_sq;
480     NvmeCQueue      admin_cq;
481     NvmeIdCtrl      id_ctrl;
482 
483     struct {
484         struct {
485             uint16_t temp_thresh_hi;
486             uint16_t temp_thresh_low;
487         };
488 
489         uint32_t                async_config;
490         NvmeHostBehaviorSupport hbs;
491     } features;
492 
493     NvmePriCtrlCap  pri_ctrl_cap;
494     NvmeSecCtrlList sec_ctrl_list;
495     struct {
496         uint16_t    vqrfap;
497         uint16_t    virfap;
498     } next_pri_ctrl_cap;    /* These override pri_ctrl_cap after reset */
499 } NvmeCtrl;
500 
501 typedef enum NvmeResetType {
502     NVME_RESET_FUNCTION   = 0,
503     NVME_RESET_CONTROLLER = 1,
504 } NvmeResetType;
505 
506 static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
507 {
508     if (!nsid || nsid > NVME_MAX_NAMESPACES) {
509         return NULL;
510     }
511 
512     return n->namespaces[nsid];
513 }
514 
515 static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
516 {
517     NvmeSQueue *sq = req->sq;
518     NvmeCtrl *n = sq->ctrl;
519 
520     return n->cq[sq->cqid];
521 }
522 
523 static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
524 {
525     NvmeSQueue *sq = req->sq;
526     return sq->ctrl;
527 }
528 
529 static inline uint16_t nvme_cid(NvmeRequest *req)
530 {
531     if (!req) {
532         return 0xffff;
533     }
534 
535     return le16_to_cpu(req->cqe.cid);
536 }
537 
538 static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n)
539 {
540     PCIDevice *pci_dev = &n->parent_obj;
541     NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev));
542 
543     if (pci_is_vf(pci_dev)) {
544         return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)];
545     }
546 
547     return NULL;
548 }
549 
550 static inline NvmeSecCtrlEntry *nvme_sctrl_for_cntlid(NvmeCtrl *n,
551                                                       uint16_t cntlid)
552 {
553     NvmeSecCtrlList *list = &n->sec_ctrl_list;
554     uint8_t i;
555 
556     for (i = 0; i < list->numcntl; i++) {
557         if (le16_to_cpu(list->sec[i].scid) == cntlid) {
558             return &list->sec[i];
559         }
560     }
561 
562     return NULL;
563 }
564 
565 void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
566 uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
567                           NvmeTxDirection dir, NvmeRequest *req);
568 uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
569                            NvmeTxDirection dir, NvmeRequest *req);
570 void nvme_rw_complete_cb(void *opaque, int ret);
571 uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
572                        NvmeCmd *cmd);
573 
574 #endif /* HW_NVME_NVME_H */
575