1 /* 2 * QEMU NVM Express 3 * 4 * Copyright (c) 2012 Intel Corporation 5 * Copyright (c) 2021 Minwoo Im 6 * Copyright (c) 2021 Samsung Electronics Co., Ltd. 7 * 8 * Authors: 9 * Keith Busch <kbusch@kernel.org> 10 * Klaus Jensen <k.jensen@samsung.com> 11 * Gollu Appalanaidu <anaidu.gollu@samsung.com> 12 * Dmitry Fomichev <dmitry.fomichev@wdc.com> 13 * Minwoo Im <minwoo.im.dev@gmail.com> 14 * 15 * This code is licensed under the GNU GPL v2 or later. 16 */ 17 18 #ifndef HW_NVME_INTERNAL_H 19 #define HW_NVME_INTERNAL_H 20 21 #include "qemu/uuid.h" 22 #include "hw/pci/pci.h" 23 #include "hw/block/block.h" 24 25 #include "block/nvme.h" 26 27 #define NVME_MAX_CONTROLLERS 32 28 #define NVME_MAX_NAMESPACES 256 29 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000) 30 31 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1); 32 33 typedef struct NvmeCtrl NvmeCtrl; 34 typedef struct NvmeNamespace NvmeNamespace; 35 36 #define TYPE_NVME_BUS "nvme-bus" 37 OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS) 38 39 typedef struct NvmeBus { 40 BusState parent_bus; 41 } NvmeBus; 42 43 #define TYPE_NVME_SUBSYS "nvme-subsys" 44 #define NVME_SUBSYS(obj) \ 45 OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) 46 47 typedef struct NvmeSubsystem { 48 DeviceState parent_obj; 49 NvmeBus bus; 50 uint8_t subnqn[256]; 51 52 NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; 53 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; 54 55 struct { 56 char *nqn; 57 } params; 58 } NvmeSubsystem; 59 60 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp); 61 void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n); 62 63 static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, 64 uint32_t cntlid) 65 { 66 if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) { 67 return NULL; 68 } 69 70 return subsys->ctrls[cntlid]; 71 } 72 73 static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys, 74 uint32_t nsid) 75 { 76 if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) { 77 return NULL; 78 } 79 80 return subsys->namespaces[nsid]; 81 } 82 83 #define TYPE_NVME_NS "nvme-ns" 84 #define NVME_NS(obj) \ 85 OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS) 86 87 typedef struct NvmeZone { 88 NvmeZoneDescr d; 89 uint64_t w_ptr; 90 QTAILQ_ENTRY(NvmeZone) entry; 91 } NvmeZone; 92 93 typedef struct NvmeNamespaceParams { 94 bool detached; 95 bool shared; 96 uint32_t nsid; 97 QemuUUID uuid; 98 uint64_t eui64; 99 bool eui64_default; 100 101 uint16_t ms; 102 uint8_t mset; 103 uint8_t pi; 104 uint8_t pil; 105 uint8_t pif; 106 107 uint16_t mssrl; 108 uint32_t mcl; 109 uint8_t msrc; 110 111 bool zoned; 112 bool cross_zone_read; 113 uint64_t zone_size_bs; 114 uint64_t zone_cap_bs; 115 uint32_t max_active_zones; 116 uint32_t max_open_zones; 117 uint32_t zd_extension_size; 118 119 uint32_t numzrwa; 120 uint64_t zrwas; 121 uint64_t zrwafg; 122 } NvmeNamespaceParams; 123 124 typedef struct NvmeNamespace { 125 DeviceState parent_obj; 126 BlockConf blkconf; 127 int32_t bootindex; 128 int64_t size; 129 int64_t moff; 130 NvmeIdNs id_ns; 131 NvmeIdNsNvm id_ns_nvm; 132 NvmeLBAF lbaf; 133 unsigned int nlbaf; 134 size_t lbasz; 135 const uint32_t *iocs; 136 uint8_t csi; 137 uint16_t status; 138 int attached; 139 uint8_t pif; 140 141 struct { 142 uint16_t zrwas; 143 uint16_t zrwafg; 144 uint32_t numzrwa; 145 } zns; 146 147 QTAILQ_ENTRY(NvmeNamespace) entry; 148 149 NvmeIdNsZoned *id_ns_zoned; 150 NvmeZone *zone_array; 151 QTAILQ_HEAD(, NvmeZone) exp_open_zones; 152 QTAILQ_HEAD(, NvmeZone) imp_open_zones; 153 QTAILQ_HEAD(, NvmeZone) closed_zones; 154 QTAILQ_HEAD(, NvmeZone) full_zones; 155 uint32_t num_zones; 156 uint64_t zone_size; 157 uint64_t zone_capacity; 158 uint32_t zone_size_log2; 159 uint8_t *zd_extensions; 160 int32_t nr_open_zones; 161 int32_t nr_active_zones; 162 163 NvmeNamespaceParams params; 164 165 struct { 166 uint32_t err_rec; 167 } features; 168 } NvmeNamespace; 169 170 static inline uint32_t nvme_nsid(NvmeNamespace *ns) 171 { 172 if (ns) { 173 return ns->params.nsid; 174 } 175 176 return 0; 177 } 178 179 static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba) 180 { 181 return lba << ns->lbaf.ds; 182 } 183 184 static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba) 185 { 186 return ns->lbaf.ms * lba; 187 } 188 189 static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba) 190 { 191 return ns->moff + nvme_m2b(ns, lba); 192 } 193 194 static inline bool nvme_ns_ext(NvmeNamespace *ns) 195 { 196 return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas); 197 } 198 199 static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone) 200 { 201 return zone->d.zs >> 4; 202 } 203 204 static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state) 205 { 206 zone->d.zs = state << 4; 207 } 208 209 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone) 210 { 211 return zone->d.zslba + ns->zone_size; 212 } 213 214 static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone) 215 { 216 return zone->d.zslba + zone->d.zcap; 217 } 218 219 static inline bool nvme_wp_is_valid(NvmeZone *zone) 220 { 221 uint8_t st = nvme_get_zone_state(zone); 222 223 return st != NVME_ZONE_STATE_FULL && 224 st != NVME_ZONE_STATE_READ_ONLY && 225 st != NVME_ZONE_STATE_OFFLINE; 226 } 227 228 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns, 229 uint32_t zone_idx) 230 { 231 return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size]; 232 } 233 234 static inline void nvme_aor_inc_open(NvmeNamespace *ns) 235 { 236 assert(ns->nr_open_zones >= 0); 237 if (ns->params.max_open_zones) { 238 ns->nr_open_zones++; 239 assert(ns->nr_open_zones <= ns->params.max_open_zones); 240 } 241 } 242 243 static inline void nvme_aor_dec_open(NvmeNamespace *ns) 244 { 245 if (ns->params.max_open_zones) { 246 assert(ns->nr_open_zones > 0); 247 ns->nr_open_zones--; 248 } 249 assert(ns->nr_open_zones >= 0); 250 } 251 252 static inline void nvme_aor_inc_active(NvmeNamespace *ns) 253 { 254 assert(ns->nr_active_zones >= 0); 255 if (ns->params.max_active_zones) { 256 ns->nr_active_zones++; 257 assert(ns->nr_active_zones <= ns->params.max_active_zones); 258 } 259 } 260 261 static inline void nvme_aor_dec_active(NvmeNamespace *ns) 262 { 263 if (ns->params.max_active_zones) { 264 assert(ns->nr_active_zones > 0); 265 ns->nr_active_zones--; 266 assert(ns->nr_active_zones >= ns->nr_open_zones); 267 } 268 assert(ns->nr_active_zones >= 0); 269 } 270 271 void nvme_ns_init_format(NvmeNamespace *ns); 272 int nvme_ns_setup(NvmeNamespace *ns, Error **errp); 273 void nvme_ns_drain(NvmeNamespace *ns); 274 void nvme_ns_shutdown(NvmeNamespace *ns); 275 void nvme_ns_cleanup(NvmeNamespace *ns); 276 277 typedef struct NvmeAsyncEvent { 278 QTAILQ_ENTRY(NvmeAsyncEvent) entry; 279 NvmeAerResult result; 280 } NvmeAsyncEvent; 281 282 enum { 283 NVME_SG_ALLOC = 1 << 0, 284 NVME_SG_DMA = 1 << 1, 285 }; 286 287 typedef struct NvmeSg { 288 int flags; 289 290 union { 291 QEMUSGList qsg; 292 QEMUIOVector iov; 293 }; 294 } NvmeSg; 295 296 typedef enum NvmeTxDirection { 297 NVME_TX_DIRECTION_TO_DEVICE = 0, 298 NVME_TX_DIRECTION_FROM_DEVICE = 1, 299 } NvmeTxDirection; 300 301 typedef struct NvmeRequest { 302 struct NvmeSQueue *sq; 303 struct NvmeNamespace *ns; 304 BlockAIOCB *aiocb; 305 uint16_t status; 306 void *opaque; 307 NvmeCqe cqe; 308 NvmeCmd cmd; 309 BlockAcctCookie acct; 310 NvmeSg sg; 311 QTAILQ_ENTRY(NvmeRequest)entry; 312 } NvmeRequest; 313 314 typedef struct NvmeBounceContext { 315 NvmeRequest *req; 316 317 struct { 318 QEMUIOVector iov; 319 uint8_t *bounce; 320 } data, mdata; 321 } NvmeBounceContext; 322 323 static inline const char *nvme_adm_opc_str(uint8_t opc) 324 { 325 switch (opc) { 326 case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ"; 327 case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ"; 328 case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE"; 329 case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ"; 330 case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ"; 331 case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY"; 332 case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT"; 333 case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES"; 334 case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; 335 case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; 336 case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT"; 337 case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; 338 default: return "NVME_ADM_CMD_UNKNOWN"; 339 } 340 } 341 342 static inline const char *nvme_io_opc_str(uint8_t opc) 343 { 344 switch (opc) { 345 case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH"; 346 case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE"; 347 case NVME_CMD_READ: return "NVME_NVM_CMD_READ"; 348 case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE"; 349 case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES"; 350 case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM"; 351 case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY"; 352 case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY"; 353 case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; 354 case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; 355 case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND"; 356 default: return "NVME_NVM_CMD_UNKNOWN"; 357 } 358 } 359 360 typedef struct NvmeSQueue { 361 struct NvmeCtrl *ctrl; 362 uint16_t sqid; 363 uint16_t cqid; 364 uint32_t head; 365 uint32_t tail; 366 uint32_t size; 367 uint64_t dma_addr; 368 QEMUTimer *timer; 369 NvmeRequest *io_req; 370 QTAILQ_HEAD(, NvmeRequest) req_list; 371 QTAILQ_HEAD(, NvmeRequest) out_req_list; 372 QTAILQ_ENTRY(NvmeSQueue) entry; 373 } NvmeSQueue; 374 375 typedef struct NvmeCQueue { 376 struct NvmeCtrl *ctrl; 377 uint8_t phase; 378 uint16_t cqid; 379 uint16_t irq_enabled; 380 uint32_t head; 381 uint32_t tail; 382 uint32_t vector; 383 uint32_t size; 384 uint64_t dma_addr; 385 QEMUTimer *timer; 386 QTAILQ_HEAD(, NvmeSQueue) sq_list; 387 QTAILQ_HEAD(, NvmeRequest) req_list; 388 } NvmeCQueue; 389 390 #define TYPE_NVME "nvme" 391 #define NVME(obj) \ 392 OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) 393 394 typedef struct NvmeParams { 395 char *serial; 396 uint32_t num_queues; /* deprecated since 5.1 */ 397 uint32_t max_ioqpairs; 398 uint16_t msix_qsize; 399 uint32_t cmb_size_mb; 400 uint8_t aerl; 401 uint32_t aer_max_queued; 402 uint8_t mdts; 403 uint8_t vsl; 404 bool use_intel_id; 405 uint8_t zasl; 406 bool auto_transition_zones; 407 bool legacy_cmb; 408 } NvmeParams; 409 410 typedef struct NvmeCtrl { 411 PCIDevice parent_obj; 412 MemoryRegion bar0; 413 MemoryRegion iomem; 414 NvmeBar bar; 415 NvmeParams params; 416 NvmeBus bus; 417 418 uint16_t cntlid; 419 bool qs_created; 420 uint32_t page_size; 421 uint16_t page_bits; 422 uint16_t max_prp_ents; 423 uint16_t cqe_size; 424 uint16_t sqe_size; 425 uint32_t reg_size; 426 uint32_t max_q_ents; 427 uint8_t outstanding_aers; 428 uint32_t irq_status; 429 int cq_pending; 430 uint64_t host_timestamp; /* Timestamp sent by the host */ 431 uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ 432 uint64_t starttime_ms; 433 uint16_t temperature; 434 uint8_t smart_critical_warning; 435 436 struct { 437 MemoryRegion mem; 438 uint8_t *buf; 439 bool cmse; 440 hwaddr cba; 441 } cmb; 442 443 struct { 444 HostMemoryBackend *dev; 445 bool cmse; 446 hwaddr cba; 447 } pmr; 448 449 uint8_t aer_mask; 450 NvmeRequest **aer_reqs; 451 QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue; 452 int aer_queued; 453 454 uint32_t dmrsl; 455 456 /* Namespace ID is started with 1 so bitmap should be 1-based */ 457 #define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1) 458 DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE); 459 460 NvmeSubsystem *subsys; 461 462 NvmeNamespace namespace; 463 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; 464 NvmeSQueue **sq; 465 NvmeCQueue **cq; 466 NvmeSQueue admin_sq; 467 NvmeCQueue admin_cq; 468 NvmeIdCtrl id_ctrl; 469 470 struct { 471 struct { 472 uint16_t temp_thresh_hi; 473 uint16_t temp_thresh_low; 474 }; 475 476 uint32_t async_config; 477 NvmeHostBehaviorSupport hbs; 478 } features; 479 } NvmeCtrl; 480 481 static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) 482 { 483 if (!nsid || nsid > NVME_MAX_NAMESPACES) { 484 return NULL; 485 } 486 487 return n->namespaces[nsid]; 488 } 489 490 static inline NvmeCQueue *nvme_cq(NvmeRequest *req) 491 { 492 NvmeSQueue *sq = req->sq; 493 NvmeCtrl *n = sq->ctrl; 494 495 return n->cq[sq->cqid]; 496 } 497 498 static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) 499 { 500 NvmeSQueue *sq = req->sq; 501 return sq->ctrl; 502 } 503 504 static inline uint16_t nvme_cid(NvmeRequest *req) 505 { 506 if (!req) { 507 return 0xffff; 508 } 509 510 return le16_to_cpu(req->cqe.cid); 511 } 512 513 void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns); 514 uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len, 515 NvmeTxDirection dir, NvmeRequest *req); 516 uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len, 517 NvmeTxDirection dir, NvmeRequest *req); 518 void nvme_rw_complete_cb(void *opaque, int ret); 519 uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, 520 NvmeCmd *cmd); 521 522 #endif /* HW_NVME_INTERNAL_H */ 523