1 /* 2 * QEMU NVM Express 3 * 4 * Copyright (c) 2012 Intel Corporation 5 * Copyright (c) 2021 Minwoo Im 6 * Copyright (c) 2021 Samsung Electronics Co., Ltd. 7 * 8 * Authors: 9 * Keith Busch <kbusch@kernel.org> 10 * Klaus Jensen <k.jensen@samsung.com> 11 * Gollu Appalanaidu <anaidu.gollu@samsung.com> 12 * Dmitry Fomichev <dmitry.fomichev@wdc.com> 13 * Minwoo Im <minwoo.im.dev@gmail.com> 14 * 15 * This code is licensed under the GNU GPL v2 or later. 16 */ 17 18 #ifndef HW_NVME_NVME_H 19 #define HW_NVME_NVME_H 20 21 #include "qemu/uuid.h" 22 #include "hw/pci/pci.h" 23 #include "hw/block/block.h" 24 25 #include "block/nvme.h" 26 27 #define NVME_MAX_CONTROLLERS 256 28 #define NVME_MAX_NAMESPACES 256 29 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000) 30 31 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1); 32 33 typedef struct NvmeCtrl NvmeCtrl; 34 typedef struct NvmeNamespace NvmeNamespace; 35 36 #define TYPE_NVME_BUS "nvme-bus" 37 OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS) 38 39 typedef struct NvmeBus { 40 BusState parent_bus; 41 } NvmeBus; 42 43 #define TYPE_NVME_SUBSYS "nvme-subsys" 44 #define NVME_SUBSYS(obj) \ 45 OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) 46 #define SUBSYS_SLOT_RSVD (void *)0xFFFF 47 48 typedef struct NvmeSubsystem { 49 DeviceState parent_obj; 50 NvmeBus bus; 51 uint8_t subnqn[256]; 52 char *serial; 53 54 NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; 55 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; 56 57 struct { 58 char *nqn; 59 } params; 60 } NvmeSubsystem; 61 62 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp); 63 void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n); 64 65 static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, 66 uint32_t cntlid) 67 { 68 if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) { 69 return NULL; 70 } 71 72 if (subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD) { 73 return NULL; 74 } 75 76 return subsys->ctrls[cntlid]; 77 } 78 79 static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys, 80 uint32_t nsid) 81 { 82 if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) { 83 return NULL; 84 } 85 86 return subsys->namespaces[nsid]; 87 } 88 89 #define TYPE_NVME_NS "nvme-ns" 90 #define NVME_NS(obj) \ 91 OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS) 92 93 typedef struct NvmeZone { 94 NvmeZoneDescr d; 95 uint64_t w_ptr; 96 QTAILQ_ENTRY(NvmeZone) entry; 97 } NvmeZone; 98 99 typedef struct NvmeNamespaceParams { 100 bool detached; 101 bool shared; 102 uint32_t nsid; 103 QemuUUID uuid; 104 uint64_t eui64; 105 bool eui64_default; 106 107 uint16_t ms; 108 uint8_t mset; 109 uint8_t pi; 110 uint8_t pil; 111 uint8_t pif; 112 113 uint16_t mssrl; 114 uint32_t mcl; 115 uint8_t msrc; 116 117 bool zoned; 118 bool cross_zone_read; 119 uint64_t zone_size_bs; 120 uint64_t zone_cap_bs; 121 uint32_t max_active_zones; 122 uint32_t max_open_zones; 123 uint32_t zd_extension_size; 124 125 uint32_t numzrwa; 126 uint64_t zrwas; 127 uint64_t zrwafg; 128 } NvmeNamespaceParams; 129 130 typedef struct NvmeNamespace { 131 DeviceState parent_obj; 132 BlockConf blkconf; 133 int32_t bootindex; 134 int64_t size; 135 int64_t moff; 136 NvmeIdNs id_ns; 137 NvmeIdNsNvm id_ns_nvm; 138 NvmeLBAF lbaf; 139 unsigned int nlbaf; 140 size_t lbasz; 141 const uint32_t *iocs; 142 uint8_t csi; 143 uint16_t status; 144 int attached; 145 uint8_t pif; 146 147 struct { 148 uint16_t zrwas; 149 uint16_t zrwafg; 150 uint32_t numzrwa; 151 } zns; 152 153 QTAILQ_ENTRY(NvmeNamespace) entry; 154 155 NvmeIdNsZoned *id_ns_zoned; 156 NvmeZone *zone_array; 157 QTAILQ_HEAD(, NvmeZone) exp_open_zones; 158 QTAILQ_HEAD(, NvmeZone) imp_open_zones; 159 QTAILQ_HEAD(, NvmeZone) closed_zones; 160 QTAILQ_HEAD(, NvmeZone) full_zones; 161 uint32_t num_zones; 162 uint64_t zone_size; 163 uint64_t zone_capacity; 164 uint32_t zone_size_log2; 165 uint8_t *zd_extensions; 166 int32_t nr_open_zones; 167 int32_t nr_active_zones; 168 169 NvmeNamespaceParams params; 170 171 struct { 172 uint32_t err_rec; 173 } features; 174 } NvmeNamespace; 175 176 static inline uint32_t nvme_nsid(NvmeNamespace *ns) 177 { 178 if (ns) { 179 return ns->params.nsid; 180 } 181 182 return 0; 183 } 184 185 static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba) 186 { 187 return lba << ns->lbaf.ds; 188 } 189 190 static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba) 191 { 192 return ns->lbaf.ms * lba; 193 } 194 195 static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba) 196 { 197 return ns->moff + nvme_m2b(ns, lba); 198 } 199 200 static inline bool nvme_ns_ext(NvmeNamespace *ns) 201 { 202 return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas); 203 } 204 205 static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone) 206 { 207 return zone->d.zs >> 4; 208 } 209 210 static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state) 211 { 212 zone->d.zs = state << 4; 213 } 214 215 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone) 216 { 217 return zone->d.zslba + ns->zone_size; 218 } 219 220 static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone) 221 { 222 return zone->d.zslba + zone->d.zcap; 223 } 224 225 static inline bool nvme_wp_is_valid(NvmeZone *zone) 226 { 227 uint8_t st = nvme_get_zone_state(zone); 228 229 return st != NVME_ZONE_STATE_FULL && 230 st != NVME_ZONE_STATE_READ_ONLY && 231 st != NVME_ZONE_STATE_OFFLINE; 232 } 233 234 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns, 235 uint32_t zone_idx) 236 { 237 return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size]; 238 } 239 240 static inline void nvme_aor_inc_open(NvmeNamespace *ns) 241 { 242 assert(ns->nr_open_zones >= 0); 243 if (ns->params.max_open_zones) { 244 ns->nr_open_zones++; 245 assert(ns->nr_open_zones <= ns->params.max_open_zones); 246 } 247 } 248 249 static inline void nvme_aor_dec_open(NvmeNamespace *ns) 250 { 251 if (ns->params.max_open_zones) { 252 assert(ns->nr_open_zones > 0); 253 ns->nr_open_zones--; 254 } 255 assert(ns->nr_open_zones >= 0); 256 } 257 258 static inline void nvme_aor_inc_active(NvmeNamespace *ns) 259 { 260 assert(ns->nr_active_zones >= 0); 261 if (ns->params.max_active_zones) { 262 ns->nr_active_zones++; 263 assert(ns->nr_active_zones <= ns->params.max_active_zones); 264 } 265 } 266 267 static inline void nvme_aor_dec_active(NvmeNamespace *ns) 268 { 269 if (ns->params.max_active_zones) { 270 assert(ns->nr_active_zones > 0); 271 ns->nr_active_zones--; 272 assert(ns->nr_active_zones >= ns->nr_open_zones); 273 } 274 assert(ns->nr_active_zones >= 0); 275 } 276 277 void nvme_ns_init_format(NvmeNamespace *ns); 278 int nvme_ns_setup(NvmeNamespace *ns, Error **errp); 279 void nvme_ns_drain(NvmeNamespace *ns); 280 void nvme_ns_shutdown(NvmeNamespace *ns); 281 void nvme_ns_cleanup(NvmeNamespace *ns); 282 283 typedef struct NvmeAsyncEvent { 284 QTAILQ_ENTRY(NvmeAsyncEvent) entry; 285 NvmeAerResult result; 286 } NvmeAsyncEvent; 287 288 enum { 289 NVME_SG_ALLOC = 1 << 0, 290 NVME_SG_DMA = 1 << 1, 291 }; 292 293 typedef struct NvmeSg { 294 int flags; 295 296 union { 297 QEMUSGList qsg; 298 QEMUIOVector iov; 299 }; 300 } NvmeSg; 301 302 typedef enum NvmeTxDirection { 303 NVME_TX_DIRECTION_TO_DEVICE = 0, 304 NVME_TX_DIRECTION_FROM_DEVICE = 1, 305 } NvmeTxDirection; 306 307 typedef struct NvmeRequest { 308 struct NvmeSQueue *sq; 309 struct NvmeNamespace *ns; 310 BlockAIOCB *aiocb; 311 uint16_t status; 312 void *opaque; 313 NvmeCqe cqe; 314 NvmeCmd cmd; 315 BlockAcctCookie acct; 316 NvmeSg sg; 317 QTAILQ_ENTRY(NvmeRequest)entry; 318 } NvmeRequest; 319 320 typedef struct NvmeBounceContext { 321 NvmeRequest *req; 322 323 struct { 324 QEMUIOVector iov; 325 uint8_t *bounce; 326 } data, mdata; 327 } NvmeBounceContext; 328 329 static inline const char *nvme_adm_opc_str(uint8_t opc) 330 { 331 switch (opc) { 332 case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ"; 333 case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ"; 334 case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE"; 335 case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ"; 336 case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ"; 337 case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY"; 338 case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT"; 339 case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES"; 340 case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; 341 case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; 342 case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT"; 343 case NVME_ADM_CMD_VIRT_MNGMT: return "NVME_ADM_CMD_VIRT_MNGMT"; 344 case NVME_ADM_CMD_DBBUF_CONFIG: return "NVME_ADM_CMD_DBBUF_CONFIG"; 345 case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; 346 default: return "NVME_ADM_CMD_UNKNOWN"; 347 } 348 } 349 350 static inline const char *nvme_io_opc_str(uint8_t opc) 351 { 352 switch (opc) { 353 case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH"; 354 case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE"; 355 case NVME_CMD_READ: return "NVME_NVM_CMD_READ"; 356 case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE"; 357 case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES"; 358 case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM"; 359 case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY"; 360 case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY"; 361 case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; 362 case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; 363 case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND"; 364 default: return "NVME_NVM_CMD_UNKNOWN"; 365 } 366 } 367 368 typedef struct NvmeSQueue { 369 struct NvmeCtrl *ctrl; 370 uint16_t sqid; 371 uint16_t cqid; 372 uint32_t head; 373 uint32_t tail; 374 uint32_t size; 375 uint64_t dma_addr; 376 uint64_t db_addr; 377 uint64_t ei_addr; 378 QEMUTimer *timer; 379 EventNotifier notifier; 380 bool ioeventfd_enabled; 381 NvmeRequest *io_req; 382 QTAILQ_HEAD(, NvmeRequest) req_list; 383 QTAILQ_HEAD(, NvmeRequest) out_req_list; 384 QTAILQ_ENTRY(NvmeSQueue) entry; 385 } NvmeSQueue; 386 387 typedef struct NvmeCQueue { 388 struct NvmeCtrl *ctrl; 389 uint8_t phase; 390 uint16_t cqid; 391 uint16_t irq_enabled; 392 uint32_t head; 393 uint32_t tail; 394 uint32_t vector; 395 uint32_t size; 396 uint64_t dma_addr; 397 uint64_t db_addr; 398 uint64_t ei_addr; 399 QEMUTimer *timer; 400 EventNotifier notifier; 401 bool ioeventfd_enabled; 402 QTAILQ_HEAD(, NvmeSQueue) sq_list; 403 QTAILQ_HEAD(, NvmeRequest) req_list; 404 } NvmeCQueue; 405 406 #define TYPE_NVME "nvme" 407 #define NVME(obj) \ 408 OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) 409 410 typedef struct NvmeParams { 411 char *serial; 412 uint32_t num_queues; /* deprecated since 5.1 */ 413 uint32_t max_ioqpairs; 414 uint16_t msix_qsize; 415 uint32_t cmb_size_mb; 416 uint8_t aerl; 417 uint32_t aer_max_queued; 418 uint8_t mdts; 419 uint8_t vsl; 420 bool use_intel_id; 421 uint8_t zasl; 422 bool auto_transition_zones; 423 bool legacy_cmb; 424 bool ioeventfd; 425 uint8_t sriov_max_vfs; 426 uint16_t sriov_vq_flexible; 427 uint16_t sriov_vi_flexible; 428 uint8_t sriov_max_vq_per_vf; 429 uint8_t sriov_max_vi_per_vf; 430 } NvmeParams; 431 432 typedef struct NvmeCtrl { 433 PCIDevice parent_obj; 434 MemoryRegion bar0; 435 MemoryRegion iomem; 436 NvmeBar bar; 437 NvmeParams params; 438 NvmeBus bus; 439 440 uint16_t cntlid; 441 bool qs_created; 442 uint32_t page_size; 443 uint16_t page_bits; 444 uint16_t max_prp_ents; 445 uint16_t cqe_size; 446 uint16_t sqe_size; 447 uint32_t max_q_ents; 448 uint8_t outstanding_aers; 449 uint32_t irq_status; 450 int cq_pending; 451 uint64_t host_timestamp; /* Timestamp sent by the host */ 452 uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ 453 uint64_t starttime_ms; 454 uint16_t temperature; 455 uint8_t smart_critical_warning; 456 uint32_t conf_msix_qsize; 457 uint32_t conf_ioqpairs; 458 uint64_t dbbuf_dbs; 459 uint64_t dbbuf_eis; 460 bool dbbuf_enabled; 461 462 struct { 463 MemoryRegion mem; 464 uint8_t *buf; 465 bool cmse; 466 hwaddr cba; 467 } cmb; 468 469 struct { 470 HostMemoryBackend *dev; 471 bool cmse; 472 hwaddr cba; 473 } pmr; 474 475 uint8_t aer_mask; 476 NvmeRequest **aer_reqs; 477 QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue; 478 int aer_queued; 479 480 uint32_t dmrsl; 481 482 /* Namespace ID is started with 1 so bitmap should be 1-based */ 483 #define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1) 484 DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE); 485 486 NvmeSubsystem *subsys; 487 488 NvmeNamespace namespace; 489 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; 490 NvmeSQueue **sq; 491 NvmeCQueue **cq; 492 NvmeSQueue admin_sq; 493 NvmeCQueue admin_cq; 494 NvmeIdCtrl id_ctrl; 495 496 struct { 497 struct { 498 uint16_t temp_thresh_hi; 499 uint16_t temp_thresh_low; 500 }; 501 502 uint32_t async_config; 503 NvmeHostBehaviorSupport hbs; 504 } features; 505 506 NvmePriCtrlCap pri_ctrl_cap; 507 NvmeSecCtrlList sec_ctrl_list; 508 struct { 509 uint16_t vqrfap; 510 uint16_t virfap; 511 } next_pri_ctrl_cap; /* These override pri_ctrl_cap after reset */ 512 } NvmeCtrl; 513 514 typedef enum NvmeResetType { 515 NVME_RESET_FUNCTION = 0, 516 NVME_RESET_CONTROLLER = 1, 517 } NvmeResetType; 518 519 static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) 520 { 521 if (!nsid || nsid > NVME_MAX_NAMESPACES) { 522 return NULL; 523 } 524 525 return n->namespaces[nsid]; 526 } 527 528 static inline NvmeCQueue *nvme_cq(NvmeRequest *req) 529 { 530 NvmeSQueue *sq = req->sq; 531 NvmeCtrl *n = sq->ctrl; 532 533 return n->cq[sq->cqid]; 534 } 535 536 static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) 537 { 538 NvmeSQueue *sq = req->sq; 539 return sq->ctrl; 540 } 541 542 static inline uint16_t nvme_cid(NvmeRequest *req) 543 { 544 if (!req) { 545 return 0xffff; 546 } 547 548 return le16_to_cpu(req->cqe.cid); 549 } 550 551 static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n) 552 { 553 PCIDevice *pci_dev = &n->parent_obj; 554 NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev)); 555 556 if (pci_is_vf(pci_dev)) { 557 return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)]; 558 } 559 560 return NULL; 561 } 562 563 static inline NvmeSecCtrlEntry *nvme_sctrl_for_cntlid(NvmeCtrl *n, 564 uint16_t cntlid) 565 { 566 NvmeSecCtrlList *list = &n->sec_ctrl_list; 567 uint8_t i; 568 569 for (i = 0; i < list->numcntl; i++) { 570 if (le16_to_cpu(list->sec[i].scid) == cntlid) { 571 return &list->sec[i]; 572 } 573 } 574 575 return NULL; 576 } 577 578 void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns); 579 uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len, 580 NvmeTxDirection dir, NvmeRequest *req); 581 uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len, 582 NvmeTxDirection dir, NvmeRequest *req); 583 void nvme_rw_complete_cb(void *opaque, int ret); 584 uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, 585 NvmeCmd *cmd); 586 587 #endif /* HW_NVME_NVME_H */ 588