1 /* 2 * QEMU NVM Express 3 * 4 * Copyright (c) 2012 Intel Corporation 5 * Copyright (c) 2021 Minwoo Im 6 * Copyright (c) 2021 Samsung Electronics Co., Ltd. 7 * 8 * Authors: 9 * Keith Busch <kbusch@kernel.org> 10 * Klaus Jensen <k.jensen@samsung.com> 11 * Gollu Appalanaidu <anaidu.gollu@samsung.com> 12 * Dmitry Fomichev <dmitry.fomichev@wdc.com> 13 * Minwoo Im <minwoo.im.dev@gmail.com> 14 * 15 * This code is licensed under the GNU GPL v2 or later. 16 */ 17 18 #ifndef HW_NVME_INTERNAL_H 19 #define HW_NVME_INTERNAL_H 20 21 #include "qemu/uuid.h" 22 #include "hw/pci/pci.h" 23 #include "hw/block/block.h" 24 25 #include "block/nvme.h" 26 27 #define NVME_MAX_CONTROLLERS 32 28 #define NVME_MAX_NAMESPACES 256 29 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000) 30 31 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1); 32 33 typedef struct NvmeCtrl NvmeCtrl; 34 typedef struct NvmeNamespace NvmeNamespace; 35 36 #define TYPE_NVME_SUBSYS "nvme-subsys" 37 #define NVME_SUBSYS(obj) \ 38 OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) 39 40 typedef struct NvmeSubsystem { 41 DeviceState parent_obj; 42 uint8_t subnqn[256]; 43 44 NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; 45 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; 46 47 struct { 48 char *nqn; 49 } params; 50 } NvmeSubsystem; 51 52 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp); 53 54 static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, 55 uint32_t cntlid) 56 { 57 if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) { 58 return NULL; 59 } 60 61 return subsys->ctrls[cntlid]; 62 } 63 64 static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys, 65 uint32_t nsid) 66 { 67 if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) { 68 return NULL; 69 } 70 71 return subsys->namespaces[nsid]; 72 } 73 74 #define TYPE_NVME_NS "nvme-ns" 75 #define NVME_NS(obj) \ 76 OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS) 77 78 typedef struct NvmeZone { 79 NvmeZoneDescr d; 80 uint64_t w_ptr; 81 QTAILQ_ENTRY(NvmeZone) entry; 82 } NvmeZone; 83 84 typedef struct NvmeNamespaceParams { 85 bool detached; 86 bool shared; 87 uint32_t nsid; 88 QemuUUID uuid; 89 uint64_t eui64; 90 bool eui64_default; 91 92 uint16_t ms; 93 uint8_t mset; 94 uint8_t pi; 95 uint8_t pil; 96 97 uint16_t mssrl; 98 uint32_t mcl; 99 uint8_t msrc; 100 101 bool zoned; 102 bool cross_zone_read; 103 uint64_t zone_size_bs; 104 uint64_t zone_cap_bs; 105 uint32_t max_active_zones; 106 uint32_t max_open_zones; 107 uint32_t zd_extension_size; 108 } NvmeNamespaceParams; 109 110 typedef struct NvmeNamespace { 111 DeviceState parent_obj; 112 BlockConf blkconf; 113 int32_t bootindex; 114 int64_t size; 115 int64_t moff; 116 NvmeIdNs id_ns; 117 NvmeLBAF lbaf; 118 size_t lbasz; 119 const uint32_t *iocs; 120 uint8_t csi; 121 uint16_t status; 122 int attached; 123 124 QTAILQ_ENTRY(NvmeNamespace) entry; 125 126 NvmeIdNsZoned *id_ns_zoned; 127 NvmeZone *zone_array; 128 QTAILQ_HEAD(, NvmeZone) exp_open_zones; 129 QTAILQ_HEAD(, NvmeZone) imp_open_zones; 130 QTAILQ_HEAD(, NvmeZone) closed_zones; 131 QTAILQ_HEAD(, NvmeZone) full_zones; 132 uint32_t num_zones; 133 uint64_t zone_size; 134 uint64_t zone_capacity; 135 uint32_t zone_size_log2; 136 uint8_t *zd_extensions; 137 int32_t nr_open_zones; 138 int32_t nr_active_zones; 139 140 NvmeNamespaceParams params; 141 142 struct { 143 uint32_t err_rec; 144 } features; 145 } NvmeNamespace; 146 147 static inline uint32_t nvme_nsid(NvmeNamespace *ns) 148 { 149 if (ns) { 150 return ns->params.nsid; 151 } 152 153 return 0; 154 } 155 156 static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba) 157 { 158 return lba << ns->lbaf.ds; 159 } 160 161 static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba) 162 { 163 return ns->lbaf.ms * lba; 164 } 165 166 static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba) 167 { 168 return ns->moff + nvme_m2b(ns, lba); 169 } 170 171 static inline bool nvme_ns_ext(NvmeNamespace *ns) 172 { 173 return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas); 174 } 175 176 static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone) 177 { 178 return zone->d.zs >> 4; 179 } 180 181 static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state) 182 { 183 zone->d.zs = state << 4; 184 } 185 186 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone) 187 { 188 return zone->d.zslba + ns->zone_size; 189 } 190 191 static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone) 192 { 193 return zone->d.zslba + zone->d.zcap; 194 } 195 196 static inline bool nvme_wp_is_valid(NvmeZone *zone) 197 { 198 uint8_t st = nvme_get_zone_state(zone); 199 200 return st != NVME_ZONE_STATE_FULL && 201 st != NVME_ZONE_STATE_READ_ONLY && 202 st != NVME_ZONE_STATE_OFFLINE; 203 } 204 205 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns, 206 uint32_t zone_idx) 207 { 208 return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size]; 209 } 210 211 static inline void nvme_aor_inc_open(NvmeNamespace *ns) 212 { 213 assert(ns->nr_open_zones >= 0); 214 if (ns->params.max_open_zones) { 215 ns->nr_open_zones++; 216 assert(ns->nr_open_zones <= ns->params.max_open_zones); 217 } 218 } 219 220 static inline void nvme_aor_dec_open(NvmeNamespace *ns) 221 { 222 if (ns->params.max_open_zones) { 223 assert(ns->nr_open_zones > 0); 224 ns->nr_open_zones--; 225 } 226 assert(ns->nr_open_zones >= 0); 227 } 228 229 static inline void nvme_aor_inc_active(NvmeNamespace *ns) 230 { 231 assert(ns->nr_active_zones >= 0); 232 if (ns->params.max_active_zones) { 233 ns->nr_active_zones++; 234 assert(ns->nr_active_zones <= ns->params.max_active_zones); 235 } 236 } 237 238 static inline void nvme_aor_dec_active(NvmeNamespace *ns) 239 { 240 if (ns->params.max_active_zones) { 241 assert(ns->nr_active_zones > 0); 242 ns->nr_active_zones--; 243 assert(ns->nr_active_zones >= ns->nr_open_zones); 244 } 245 assert(ns->nr_active_zones >= 0); 246 } 247 248 void nvme_ns_init_format(NvmeNamespace *ns); 249 int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp); 250 void nvme_ns_drain(NvmeNamespace *ns); 251 void nvme_ns_shutdown(NvmeNamespace *ns); 252 void nvme_ns_cleanup(NvmeNamespace *ns); 253 254 typedef struct NvmeAsyncEvent { 255 QTAILQ_ENTRY(NvmeAsyncEvent) entry; 256 NvmeAerResult result; 257 } NvmeAsyncEvent; 258 259 enum { 260 NVME_SG_ALLOC = 1 << 0, 261 NVME_SG_DMA = 1 << 1, 262 }; 263 264 typedef struct NvmeSg { 265 int flags; 266 267 union { 268 QEMUSGList qsg; 269 QEMUIOVector iov; 270 }; 271 } NvmeSg; 272 273 typedef enum NvmeTxDirection { 274 NVME_TX_DIRECTION_TO_DEVICE = 0, 275 NVME_TX_DIRECTION_FROM_DEVICE = 1, 276 } NvmeTxDirection; 277 278 typedef struct NvmeRequest { 279 struct NvmeSQueue *sq; 280 struct NvmeNamespace *ns; 281 BlockAIOCB *aiocb; 282 uint16_t status; 283 void *opaque; 284 NvmeCqe cqe; 285 NvmeCmd cmd; 286 BlockAcctCookie acct; 287 NvmeSg sg; 288 QTAILQ_ENTRY(NvmeRequest)entry; 289 } NvmeRequest; 290 291 typedef struct NvmeBounceContext { 292 NvmeRequest *req; 293 294 struct { 295 QEMUIOVector iov; 296 uint8_t *bounce; 297 } data, mdata; 298 } NvmeBounceContext; 299 300 static inline const char *nvme_adm_opc_str(uint8_t opc) 301 { 302 switch (opc) { 303 case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ"; 304 case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ"; 305 case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE"; 306 case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ"; 307 case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ"; 308 case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY"; 309 case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT"; 310 case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES"; 311 case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; 312 case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; 313 case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT"; 314 case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; 315 default: return "NVME_ADM_CMD_UNKNOWN"; 316 } 317 } 318 319 static inline const char *nvme_io_opc_str(uint8_t opc) 320 { 321 switch (opc) { 322 case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH"; 323 case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE"; 324 case NVME_CMD_READ: return "NVME_NVM_CMD_READ"; 325 case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE"; 326 case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES"; 327 case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM"; 328 case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY"; 329 case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY"; 330 case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; 331 case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; 332 case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND"; 333 default: return "NVME_NVM_CMD_UNKNOWN"; 334 } 335 } 336 337 typedef struct NvmeSQueue { 338 struct NvmeCtrl *ctrl; 339 uint16_t sqid; 340 uint16_t cqid; 341 uint32_t head; 342 uint32_t tail; 343 uint32_t size; 344 uint64_t dma_addr; 345 QEMUTimer *timer; 346 NvmeRequest *io_req; 347 QTAILQ_HEAD(, NvmeRequest) req_list; 348 QTAILQ_HEAD(, NvmeRequest) out_req_list; 349 QTAILQ_ENTRY(NvmeSQueue) entry; 350 } NvmeSQueue; 351 352 typedef struct NvmeCQueue { 353 struct NvmeCtrl *ctrl; 354 uint8_t phase; 355 uint16_t cqid; 356 uint16_t irq_enabled; 357 uint32_t head; 358 uint32_t tail; 359 uint32_t vector; 360 uint32_t size; 361 uint64_t dma_addr; 362 QEMUTimer *timer; 363 QTAILQ_HEAD(, NvmeSQueue) sq_list; 364 QTAILQ_HEAD(, NvmeRequest) req_list; 365 } NvmeCQueue; 366 367 #define TYPE_NVME_BUS "nvme-bus" 368 #define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS) 369 370 typedef struct NvmeBus { 371 BusState parent_bus; 372 } NvmeBus; 373 374 #define TYPE_NVME "nvme" 375 #define NVME(obj) \ 376 OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) 377 378 typedef struct NvmeParams { 379 char *serial; 380 uint32_t num_queues; /* deprecated since 5.1 */ 381 uint32_t max_ioqpairs; 382 uint16_t msix_qsize; 383 uint32_t cmb_size_mb; 384 uint8_t aerl; 385 uint32_t aer_max_queued; 386 uint8_t mdts; 387 uint8_t vsl; 388 bool use_intel_id; 389 uint8_t zasl; 390 bool auto_transition_zones; 391 bool legacy_cmb; 392 } NvmeParams; 393 394 typedef struct NvmeCtrl { 395 PCIDevice parent_obj; 396 MemoryRegion bar0; 397 MemoryRegion iomem; 398 NvmeBar bar; 399 NvmeParams params; 400 NvmeBus bus; 401 402 uint16_t cntlid; 403 bool qs_created; 404 uint32_t page_size; 405 uint16_t page_bits; 406 uint16_t max_prp_ents; 407 uint16_t cqe_size; 408 uint16_t sqe_size; 409 uint32_t reg_size; 410 uint32_t max_q_ents; 411 uint8_t outstanding_aers; 412 uint32_t irq_status; 413 int cq_pending; 414 uint64_t host_timestamp; /* Timestamp sent by the host */ 415 uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ 416 uint64_t starttime_ms; 417 uint16_t temperature; 418 uint8_t smart_critical_warning; 419 420 struct { 421 MemoryRegion mem; 422 uint8_t *buf; 423 bool cmse; 424 hwaddr cba; 425 } cmb; 426 427 struct { 428 HostMemoryBackend *dev; 429 bool cmse; 430 hwaddr cba; 431 } pmr; 432 433 uint8_t aer_mask; 434 NvmeRequest **aer_reqs; 435 QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue; 436 int aer_queued; 437 438 uint32_t dmrsl; 439 440 /* Namespace ID is started with 1 so bitmap should be 1-based */ 441 #define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1) 442 DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE); 443 444 NvmeSubsystem *subsys; 445 446 NvmeNamespace namespace; 447 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; 448 NvmeSQueue **sq; 449 NvmeCQueue **cq; 450 NvmeSQueue admin_sq; 451 NvmeCQueue admin_cq; 452 NvmeIdCtrl id_ctrl; 453 454 struct { 455 struct { 456 uint16_t temp_thresh_hi; 457 uint16_t temp_thresh_low; 458 }; 459 uint32_t async_config; 460 } features; 461 } NvmeCtrl; 462 463 static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) 464 { 465 if (!nsid || nsid > NVME_MAX_NAMESPACES) { 466 return NULL; 467 } 468 469 return n->namespaces[nsid]; 470 } 471 472 static inline NvmeCQueue *nvme_cq(NvmeRequest *req) 473 { 474 NvmeSQueue *sq = req->sq; 475 NvmeCtrl *n = sq->ctrl; 476 477 return n->cq[sq->cqid]; 478 } 479 480 static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) 481 { 482 NvmeSQueue *sq = req->sq; 483 return sq->ctrl; 484 } 485 486 static inline uint16_t nvme_cid(NvmeRequest *req) 487 { 488 if (!req) { 489 return 0xffff; 490 } 491 492 return le16_to_cpu(req->cqe.cid); 493 } 494 495 void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns); 496 uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, 497 NvmeTxDirection dir, NvmeRequest *req); 498 uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, 499 NvmeTxDirection dir, NvmeRequest *req); 500 void nvme_rw_complete_cb(void *opaque, int ret); 501 uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, 502 NvmeCmd *cmd); 503 504 /* from Linux kernel (crypto/crct10dif_common.c) */ 505 static const uint16_t t10_dif_crc_table[256] = { 506 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, 507 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, 508 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, 509 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, 510 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, 511 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, 512 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, 513 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, 514 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, 515 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, 516 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, 517 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, 518 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, 519 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, 520 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, 521 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, 522 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, 523 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, 524 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, 525 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, 526 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, 527 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, 528 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, 529 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, 530 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, 531 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, 532 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, 533 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, 534 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, 535 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, 536 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, 537 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 538 }; 539 540 uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, 541 uint32_t reftag); 542 uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, 543 uint64_t slba); 544 void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, 545 uint8_t *mbuf, size_t mlen, uint16_t apptag, 546 uint32_t *reftag); 547 uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, 548 uint8_t *mbuf, size_t mlen, uint8_t prinfo, 549 uint64_t slba, uint16_t apptag, 550 uint16_t appmask, uint32_t *reftag); 551 uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); 552 553 554 #endif /* HW_NVME_INTERNAL_H */ 555