1 /* 2 * QEMU NVM Express 3 * 4 * Copyright (c) 2012 Intel Corporation 5 * Copyright (c) 2021 Minwoo Im 6 * Copyright (c) 2021 Samsung Electronics Co., Ltd. 7 * 8 * Authors: 9 * Keith Busch <kbusch@kernel.org> 10 * Klaus Jensen <k.jensen@samsung.com> 11 * Gollu Appalanaidu <anaidu.gollu@samsung.com> 12 * Dmitry Fomichev <dmitry.fomichev@wdc.com> 13 * Minwoo Im <minwoo.im.dev@gmail.com> 14 * 15 * This code is licensed under the GNU GPL v2 or later. 16 */ 17 18 #ifndef HW_NVME_INTERNAL_H 19 #define HW_NVME_INTERNAL_H 20 21 #include "qemu/uuid.h" 22 #include "hw/pci/pci.h" 23 #include "hw/block/block.h" 24 25 #include "block/nvme.h" 26 27 #define NVME_MAX_CONTROLLERS 32 28 #define NVME_MAX_NAMESPACES 256 29 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000) 30 31 typedef struct NvmeCtrl NvmeCtrl; 32 typedef struct NvmeNamespace NvmeNamespace; 33 34 #define TYPE_NVME_SUBSYS "nvme-subsys" 35 #define NVME_SUBSYS(obj) \ 36 OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) 37 38 typedef struct NvmeSubsystem { 39 DeviceState parent_obj; 40 uint8_t subnqn[256]; 41 42 NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; 43 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; 44 45 struct { 46 char *nqn; 47 } params; 48 } NvmeSubsystem; 49 50 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp); 51 52 static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, 53 uint32_t cntlid) 54 { 55 if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) { 56 return NULL; 57 } 58 59 return subsys->ctrls[cntlid]; 60 } 61 62 static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys, 63 uint32_t nsid) 64 { 65 if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) { 66 return NULL; 67 } 68 69 return subsys->namespaces[nsid]; 70 } 71 72 #define TYPE_NVME_NS "nvme-ns" 73 #define NVME_NS(obj) \ 74 OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS) 75 76 typedef struct NvmeZone { 77 NvmeZoneDescr d; 78 uint64_t w_ptr; 79 QTAILQ_ENTRY(NvmeZone) entry; 80 } NvmeZone; 81 82 typedef struct NvmeNamespaceParams { 83 bool detached; 84 bool shared; 85 uint32_t nsid; 86 QemuUUID uuid; 87 uint64_t eui64; 88 bool eui64_default; 89 90 uint16_t ms; 91 uint8_t mset; 92 uint8_t pi; 93 uint8_t pil; 94 95 uint16_t mssrl; 96 uint32_t mcl; 97 uint8_t msrc; 98 99 bool zoned; 100 bool cross_zone_read; 101 uint64_t zone_size_bs; 102 uint64_t zone_cap_bs; 103 uint32_t max_active_zones; 104 uint32_t max_open_zones; 105 uint32_t zd_extension_size; 106 } NvmeNamespaceParams; 107 108 typedef struct NvmeNamespace { 109 DeviceState parent_obj; 110 BlockConf blkconf; 111 int32_t bootindex; 112 int64_t size; 113 int64_t moff; 114 NvmeIdNs id_ns; 115 NvmeLBAF lbaf; 116 size_t lbasz; 117 const uint32_t *iocs; 118 uint8_t csi; 119 uint16_t status; 120 int attached; 121 122 QTAILQ_ENTRY(NvmeNamespace) entry; 123 124 NvmeIdNsZoned *id_ns_zoned; 125 NvmeZone *zone_array; 126 QTAILQ_HEAD(, NvmeZone) exp_open_zones; 127 QTAILQ_HEAD(, NvmeZone) imp_open_zones; 128 QTAILQ_HEAD(, NvmeZone) closed_zones; 129 QTAILQ_HEAD(, NvmeZone) full_zones; 130 uint32_t num_zones; 131 uint64_t zone_size; 132 uint64_t zone_capacity; 133 uint32_t zone_size_log2; 134 uint8_t *zd_extensions; 135 int32_t nr_open_zones; 136 int32_t nr_active_zones; 137 138 NvmeNamespaceParams params; 139 140 struct { 141 uint32_t err_rec; 142 } features; 143 } NvmeNamespace; 144 145 static inline uint32_t nvme_nsid(NvmeNamespace *ns) 146 { 147 if (ns) { 148 return ns->params.nsid; 149 } 150 151 return 0; 152 } 153 154 static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba) 155 { 156 return lba << ns->lbaf.ds; 157 } 158 159 static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba) 160 { 161 return ns->lbaf.ms * lba; 162 } 163 164 static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba) 165 { 166 return ns->moff + nvme_m2b(ns, lba); 167 } 168 169 static inline bool nvme_ns_ext(NvmeNamespace *ns) 170 { 171 return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas); 172 } 173 174 static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone) 175 { 176 return zone->d.zs >> 4; 177 } 178 179 static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state) 180 { 181 zone->d.zs = state << 4; 182 } 183 184 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone) 185 { 186 return zone->d.zslba + ns->zone_size; 187 } 188 189 static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone) 190 { 191 return zone->d.zslba + zone->d.zcap; 192 } 193 194 static inline bool nvme_wp_is_valid(NvmeZone *zone) 195 { 196 uint8_t st = nvme_get_zone_state(zone); 197 198 return st != NVME_ZONE_STATE_FULL && 199 st != NVME_ZONE_STATE_READ_ONLY && 200 st != NVME_ZONE_STATE_OFFLINE; 201 } 202 203 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns, 204 uint32_t zone_idx) 205 { 206 return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size]; 207 } 208 209 static inline void nvme_aor_inc_open(NvmeNamespace *ns) 210 { 211 assert(ns->nr_open_zones >= 0); 212 if (ns->params.max_open_zones) { 213 ns->nr_open_zones++; 214 assert(ns->nr_open_zones <= ns->params.max_open_zones); 215 } 216 } 217 218 static inline void nvme_aor_dec_open(NvmeNamespace *ns) 219 { 220 if (ns->params.max_open_zones) { 221 assert(ns->nr_open_zones > 0); 222 ns->nr_open_zones--; 223 } 224 assert(ns->nr_open_zones >= 0); 225 } 226 227 static inline void nvme_aor_inc_active(NvmeNamespace *ns) 228 { 229 assert(ns->nr_active_zones >= 0); 230 if (ns->params.max_active_zones) { 231 ns->nr_active_zones++; 232 assert(ns->nr_active_zones <= ns->params.max_active_zones); 233 } 234 } 235 236 static inline void nvme_aor_dec_active(NvmeNamespace *ns) 237 { 238 if (ns->params.max_active_zones) { 239 assert(ns->nr_active_zones > 0); 240 ns->nr_active_zones--; 241 assert(ns->nr_active_zones >= ns->nr_open_zones); 242 } 243 assert(ns->nr_active_zones >= 0); 244 } 245 246 void nvme_ns_init_format(NvmeNamespace *ns); 247 int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp); 248 void nvme_ns_drain(NvmeNamespace *ns); 249 void nvme_ns_shutdown(NvmeNamespace *ns); 250 void nvme_ns_cleanup(NvmeNamespace *ns); 251 252 typedef struct NvmeAsyncEvent { 253 QTAILQ_ENTRY(NvmeAsyncEvent) entry; 254 NvmeAerResult result; 255 } NvmeAsyncEvent; 256 257 enum { 258 NVME_SG_ALLOC = 1 << 0, 259 NVME_SG_DMA = 1 << 1, 260 }; 261 262 typedef struct NvmeSg { 263 int flags; 264 265 union { 266 QEMUSGList qsg; 267 QEMUIOVector iov; 268 }; 269 } NvmeSg; 270 271 typedef enum NvmeTxDirection { 272 NVME_TX_DIRECTION_TO_DEVICE = 0, 273 NVME_TX_DIRECTION_FROM_DEVICE = 1, 274 } NvmeTxDirection; 275 276 typedef struct NvmeRequest { 277 struct NvmeSQueue *sq; 278 struct NvmeNamespace *ns; 279 BlockAIOCB *aiocb; 280 uint16_t status; 281 void *opaque; 282 NvmeCqe cqe; 283 NvmeCmd cmd; 284 BlockAcctCookie acct; 285 NvmeSg sg; 286 QTAILQ_ENTRY(NvmeRequest)entry; 287 } NvmeRequest; 288 289 typedef struct NvmeBounceContext { 290 NvmeRequest *req; 291 292 struct { 293 QEMUIOVector iov; 294 uint8_t *bounce; 295 } data, mdata; 296 } NvmeBounceContext; 297 298 static inline const char *nvme_adm_opc_str(uint8_t opc) 299 { 300 switch (opc) { 301 case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ"; 302 case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ"; 303 case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE"; 304 case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ"; 305 case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ"; 306 case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY"; 307 case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT"; 308 case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES"; 309 case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; 310 case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; 311 case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT"; 312 case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; 313 default: return "NVME_ADM_CMD_UNKNOWN"; 314 } 315 } 316 317 static inline const char *nvme_io_opc_str(uint8_t opc) 318 { 319 switch (opc) { 320 case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH"; 321 case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE"; 322 case NVME_CMD_READ: return "NVME_NVM_CMD_READ"; 323 case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE"; 324 case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES"; 325 case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM"; 326 case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY"; 327 case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY"; 328 case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; 329 case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; 330 case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND"; 331 default: return "NVME_NVM_CMD_UNKNOWN"; 332 } 333 } 334 335 typedef struct NvmeSQueue { 336 struct NvmeCtrl *ctrl; 337 uint16_t sqid; 338 uint16_t cqid; 339 uint32_t head; 340 uint32_t tail; 341 uint32_t size; 342 uint64_t dma_addr; 343 QEMUTimer *timer; 344 NvmeRequest *io_req; 345 QTAILQ_HEAD(, NvmeRequest) req_list; 346 QTAILQ_HEAD(, NvmeRequest) out_req_list; 347 QTAILQ_ENTRY(NvmeSQueue) entry; 348 } NvmeSQueue; 349 350 typedef struct NvmeCQueue { 351 struct NvmeCtrl *ctrl; 352 uint8_t phase; 353 uint16_t cqid; 354 uint16_t irq_enabled; 355 uint32_t head; 356 uint32_t tail; 357 uint32_t vector; 358 uint32_t size; 359 uint64_t dma_addr; 360 QEMUTimer *timer; 361 QTAILQ_HEAD(, NvmeSQueue) sq_list; 362 QTAILQ_HEAD(, NvmeRequest) req_list; 363 } NvmeCQueue; 364 365 #define TYPE_NVME_BUS "nvme-bus" 366 #define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS) 367 368 typedef struct NvmeBus { 369 BusState parent_bus; 370 } NvmeBus; 371 372 #define TYPE_NVME "nvme" 373 #define NVME(obj) \ 374 OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) 375 376 typedef struct NvmeParams { 377 char *serial; 378 uint32_t num_queues; /* deprecated since 5.1 */ 379 uint32_t max_ioqpairs; 380 uint16_t msix_qsize; 381 uint32_t cmb_size_mb; 382 uint8_t aerl; 383 uint32_t aer_max_queued; 384 uint8_t mdts; 385 uint8_t vsl; 386 bool use_intel_id; 387 uint8_t zasl; 388 bool auto_transition_zones; 389 bool legacy_cmb; 390 } NvmeParams; 391 392 typedef struct NvmeCtrl { 393 PCIDevice parent_obj; 394 MemoryRegion bar0; 395 MemoryRegion iomem; 396 NvmeBar bar; 397 NvmeParams params; 398 NvmeBus bus; 399 400 uint16_t cntlid; 401 bool qs_created; 402 uint32_t page_size; 403 uint16_t page_bits; 404 uint16_t max_prp_ents; 405 uint16_t cqe_size; 406 uint16_t sqe_size; 407 uint32_t reg_size; 408 uint32_t max_q_ents; 409 uint8_t outstanding_aers; 410 uint32_t irq_status; 411 uint64_t host_timestamp; /* Timestamp sent by the host */ 412 uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ 413 uint64_t starttime_ms; 414 uint16_t temperature; 415 uint8_t smart_critical_warning; 416 417 struct { 418 MemoryRegion mem; 419 uint8_t *buf; 420 bool cmse; 421 hwaddr cba; 422 } cmb; 423 424 struct { 425 HostMemoryBackend *dev; 426 bool cmse; 427 hwaddr cba; 428 } pmr; 429 430 uint8_t aer_mask; 431 NvmeRequest **aer_reqs; 432 QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue; 433 int aer_queued; 434 435 uint32_t dmrsl; 436 437 /* Namespace ID is started with 1 so bitmap should be 1-based */ 438 #define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1) 439 DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE); 440 441 NvmeSubsystem *subsys; 442 443 NvmeNamespace namespace; 444 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; 445 NvmeSQueue **sq; 446 NvmeCQueue **cq; 447 NvmeSQueue admin_sq; 448 NvmeCQueue admin_cq; 449 NvmeIdCtrl id_ctrl; 450 451 struct { 452 struct { 453 uint16_t temp_thresh_hi; 454 uint16_t temp_thresh_low; 455 }; 456 uint32_t async_config; 457 } features; 458 } NvmeCtrl; 459 460 static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) 461 { 462 if (!nsid || nsid > NVME_MAX_NAMESPACES) { 463 return NULL; 464 } 465 466 return n->namespaces[nsid]; 467 } 468 469 static inline NvmeCQueue *nvme_cq(NvmeRequest *req) 470 { 471 NvmeSQueue *sq = req->sq; 472 NvmeCtrl *n = sq->ctrl; 473 474 return n->cq[sq->cqid]; 475 } 476 477 static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) 478 { 479 NvmeSQueue *sq = req->sq; 480 return sq->ctrl; 481 } 482 483 static inline uint16_t nvme_cid(NvmeRequest *req) 484 { 485 if (!req) { 486 return 0xffff; 487 } 488 489 return le16_to_cpu(req->cqe.cid); 490 } 491 492 void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns); 493 uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, 494 NvmeTxDirection dir, NvmeRequest *req); 495 uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, 496 NvmeTxDirection dir, NvmeRequest *req); 497 void nvme_rw_complete_cb(void *opaque, int ret); 498 uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, 499 NvmeCmd *cmd); 500 501 /* from Linux kernel (crypto/crct10dif_common.c) */ 502 static const uint16_t t10_dif_crc_table[256] = { 503 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, 504 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, 505 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, 506 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, 507 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, 508 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, 509 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, 510 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, 511 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, 512 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, 513 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, 514 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, 515 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, 516 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, 517 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, 518 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, 519 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, 520 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, 521 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, 522 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, 523 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, 524 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, 525 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, 526 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, 527 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, 528 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, 529 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, 530 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, 531 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, 532 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, 533 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, 534 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 535 }; 536 537 uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, 538 uint32_t reftag); 539 uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, 540 uint64_t slba); 541 void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, 542 uint8_t *mbuf, size_t mlen, uint16_t apptag, 543 uint32_t reftag); 544 uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, 545 uint8_t *mbuf, size_t mlen, uint16_t ctrl, 546 uint64_t slba, uint16_t apptag, 547 uint16_t appmask, uint32_t reftag); 548 uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); 549 550 551 #endif /* HW_NVME_INTERNAL_H */ 552