1 /* 2 * QEMU NVM Express 3 * 4 * Copyright (c) 2012 Intel Corporation 5 * Copyright (c) 2021 Minwoo Im 6 * Copyright (c) 2021 Samsung Electronics Co., Ltd. 7 * 8 * Authors: 9 * Keith Busch <kbusch@kernel.org> 10 * Klaus Jensen <k.jensen@samsung.com> 11 * Gollu Appalanaidu <anaidu.gollu@samsung.com> 12 * Dmitry Fomichev <dmitry.fomichev@wdc.com> 13 * Minwoo Im <minwoo.im.dev@gmail.com> 14 * 15 * This code is licensed under the GNU GPL v2 or later. 16 */ 17 18 #ifndef HW_NVME_INTERNAL_H 19 #define HW_NVME_INTERNAL_H 20 21 #include "qemu/uuid.h" 22 #include "hw/pci/pci.h" 23 #include "hw/block/block.h" 24 25 #include "block/nvme.h" 26 27 #define NVME_MAX_CONTROLLERS 32 28 #define NVME_MAX_NAMESPACES 256 29 30 typedef struct NvmeCtrl NvmeCtrl; 31 typedef struct NvmeNamespace NvmeNamespace; 32 33 #define TYPE_NVME_SUBSYS "nvme-subsys" 34 #define NVME_SUBSYS(obj) \ 35 OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) 36 37 typedef struct NvmeSubsystem { 38 DeviceState parent_obj; 39 uint8_t subnqn[256]; 40 41 NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; 42 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; 43 44 struct { 45 char *nqn; 46 } params; 47 } NvmeSubsystem; 48 49 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp); 50 51 static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, 52 uint32_t cntlid) 53 { 54 if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) { 55 return NULL; 56 } 57 58 return subsys->ctrls[cntlid]; 59 } 60 61 static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys, 62 uint32_t nsid) 63 { 64 if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) { 65 return NULL; 66 } 67 68 return subsys->namespaces[nsid]; 69 } 70 71 #define TYPE_NVME_NS "nvme-ns" 72 #define NVME_NS(obj) \ 73 OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS) 74 75 typedef struct NvmeZone { 76 NvmeZoneDescr d; 77 uint64_t w_ptr; 78 QTAILQ_ENTRY(NvmeZone) entry; 79 } NvmeZone; 80 81 typedef struct NvmeNamespaceParams { 82 bool detached; 83 bool shared; 84 uint32_t nsid; 85 QemuUUID uuid; 86 87 uint16_t ms; 88 uint8_t mset; 89 uint8_t pi; 90 uint8_t pil; 91 92 uint16_t mssrl; 93 uint32_t mcl; 94 uint8_t msrc; 95 96 bool zoned; 97 bool cross_zone_read; 98 uint64_t zone_size_bs; 99 uint64_t zone_cap_bs; 100 uint32_t max_active_zones; 101 uint32_t max_open_zones; 102 uint32_t zd_extension_size; 103 } NvmeNamespaceParams; 104 105 typedef struct NvmeNamespace { 106 DeviceState parent_obj; 107 BlockConf blkconf; 108 int32_t bootindex; 109 int64_t size; 110 int64_t moff; 111 NvmeIdNs id_ns; 112 NvmeLBAF lbaf; 113 size_t lbasz; 114 const uint32_t *iocs; 115 uint8_t csi; 116 uint16_t status; 117 int attached; 118 119 QTAILQ_ENTRY(NvmeNamespace) entry; 120 121 NvmeIdNsZoned *id_ns_zoned; 122 NvmeZone *zone_array; 123 QTAILQ_HEAD(, NvmeZone) exp_open_zones; 124 QTAILQ_HEAD(, NvmeZone) imp_open_zones; 125 QTAILQ_HEAD(, NvmeZone) closed_zones; 126 QTAILQ_HEAD(, NvmeZone) full_zones; 127 uint32_t num_zones; 128 uint64_t zone_size; 129 uint64_t zone_capacity; 130 uint32_t zone_size_log2; 131 uint8_t *zd_extensions; 132 int32_t nr_open_zones; 133 int32_t nr_active_zones; 134 135 NvmeNamespaceParams params; 136 137 struct { 138 uint32_t err_rec; 139 } features; 140 } NvmeNamespace; 141 142 static inline uint32_t nvme_nsid(NvmeNamespace *ns) 143 { 144 if (ns) { 145 return ns->params.nsid; 146 } 147 148 return 0; 149 } 150 151 static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba) 152 { 153 return lba << ns->lbaf.ds; 154 } 155 156 static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba) 157 { 158 return ns->lbaf.ms * lba; 159 } 160 161 static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba) 162 { 163 return ns->moff + nvme_m2b(ns, lba); 164 } 165 166 static inline bool nvme_ns_ext(NvmeNamespace *ns) 167 { 168 return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas); 169 } 170 171 static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone) 172 { 173 return zone->d.zs >> 4; 174 } 175 176 static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state) 177 { 178 zone->d.zs = state << 4; 179 } 180 181 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone) 182 { 183 return zone->d.zslba + ns->zone_size; 184 } 185 186 static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone) 187 { 188 return zone->d.zslba + zone->d.zcap; 189 } 190 191 static inline bool nvme_wp_is_valid(NvmeZone *zone) 192 { 193 uint8_t st = nvme_get_zone_state(zone); 194 195 return st != NVME_ZONE_STATE_FULL && 196 st != NVME_ZONE_STATE_READ_ONLY && 197 st != NVME_ZONE_STATE_OFFLINE; 198 } 199 200 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns, 201 uint32_t zone_idx) 202 { 203 return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size]; 204 } 205 206 static inline void nvme_aor_inc_open(NvmeNamespace *ns) 207 { 208 assert(ns->nr_open_zones >= 0); 209 if (ns->params.max_open_zones) { 210 ns->nr_open_zones++; 211 assert(ns->nr_open_zones <= ns->params.max_open_zones); 212 } 213 } 214 215 static inline void nvme_aor_dec_open(NvmeNamespace *ns) 216 { 217 if (ns->params.max_open_zones) { 218 assert(ns->nr_open_zones > 0); 219 ns->nr_open_zones--; 220 } 221 assert(ns->nr_open_zones >= 0); 222 } 223 224 static inline void nvme_aor_inc_active(NvmeNamespace *ns) 225 { 226 assert(ns->nr_active_zones >= 0); 227 if (ns->params.max_active_zones) { 228 ns->nr_active_zones++; 229 assert(ns->nr_active_zones <= ns->params.max_active_zones); 230 } 231 } 232 233 static inline void nvme_aor_dec_active(NvmeNamespace *ns) 234 { 235 if (ns->params.max_active_zones) { 236 assert(ns->nr_active_zones > 0); 237 ns->nr_active_zones--; 238 assert(ns->nr_active_zones >= ns->nr_open_zones); 239 } 240 assert(ns->nr_active_zones >= 0); 241 } 242 243 void nvme_ns_init_format(NvmeNamespace *ns); 244 int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp); 245 void nvme_ns_drain(NvmeNamespace *ns); 246 void nvme_ns_shutdown(NvmeNamespace *ns); 247 void nvme_ns_cleanup(NvmeNamespace *ns); 248 249 typedef struct NvmeAsyncEvent { 250 QTAILQ_ENTRY(NvmeAsyncEvent) entry; 251 NvmeAerResult result; 252 } NvmeAsyncEvent; 253 254 enum { 255 NVME_SG_ALLOC = 1 << 0, 256 NVME_SG_DMA = 1 << 1, 257 }; 258 259 typedef struct NvmeSg { 260 int flags; 261 262 union { 263 QEMUSGList qsg; 264 QEMUIOVector iov; 265 }; 266 } NvmeSg; 267 268 typedef enum NvmeTxDirection { 269 NVME_TX_DIRECTION_TO_DEVICE = 0, 270 NVME_TX_DIRECTION_FROM_DEVICE = 1, 271 } NvmeTxDirection; 272 273 typedef struct NvmeRequest { 274 struct NvmeSQueue *sq; 275 struct NvmeNamespace *ns; 276 BlockAIOCB *aiocb; 277 uint16_t status; 278 void *opaque; 279 NvmeCqe cqe; 280 NvmeCmd cmd; 281 BlockAcctCookie acct; 282 NvmeSg sg; 283 QTAILQ_ENTRY(NvmeRequest)entry; 284 } NvmeRequest; 285 286 typedef struct NvmeBounceContext { 287 NvmeRequest *req; 288 289 struct { 290 QEMUIOVector iov; 291 uint8_t *bounce; 292 } data, mdata; 293 } NvmeBounceContext; 294 295 static inline const char *nvme_adm_opc_str(uint8_t opc) 296 { 297 switch (opc) { 298 case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ"; 299 case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ"; 300 case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE"; 301 case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ"; 302 case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ"; 303 case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY"; 304 case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT"; 305 case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES"; 306 case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; 307 case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; 308 case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT"; 309 case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; 310 default: return "NVME_ADM_CMD_UNKNOWN"; 311 } 312 } 313 314 static inline const char *nvme_io_opc_str(uint8_t opc) 315 { 316 switch (opc) { 317 case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH"; 318 case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE"; 319 case NVME_CMD_READ: return "NVME_NVM_CMD_READ"; 320 case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE"; 321 case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES"; 322 case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM"; 323 case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY"; 324 case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY"; 325 case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; 326 case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; 327 case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND"; 328 default: return "NVME_NVM_CMD_UNKNOWN"; 329 } 330 } 331 332 typedef struct NvmeSQueue { 333 struct NvmeCtrl *ctrl; 334 uint16_t sqid; 335 uint16_t cqid; 336 uint32_t head; 337 uint32_t tail; 338 uint32_t size; 339 uint64_t dma_addr; 340 QEMUTimer *timer; 341 NvmeRequest *io_req; 342 QTAILQ_HEAD(, NvmeRequest) req_list; 343 QTAILQ_HEAD(, NvmeRequest) out_req_list; 344 QTAILQ_ENTRY(NvmeSQueue) entry; 345 } NvmeSQueue; 346 347 typedef struct NvmeCQueue { 348 struct NvmeCtrl *ctrl; 349 uint8_t phase; 350 uint16_t cqid; 351 uint16_t irq_enabled; 352 uint32_t head; 353 uint32_t tail; 354 uint32_t vector; 355 uint32_t size; 356 uint64_t dma_addr; 357 QEMUTimer *timer; 358 QTAILQ_HEAD(, NvmeSQueue) sq_list; 359 QTAILQ_HEAD(, NvmeRequest) req_list; 360 } NvmeCQueue; 361 362 #define TYPE_NVME_BUS "nvme-bus" 363 #define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS) 364 365 typedef struct NvmeBus { 366 BusState parent_bus; 367 } NvmeBus; 368 369 #define TYPE_NVME "nvme" 370 #define NVME(obj) \ 371 OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) 372 373 typedef struct NvmeParams { 374 char *serial; 375 uint32_t num_queues; /* deprecated since 5.1 */ 376 uint32_t max_ioqpairs; 377 uint16_t msix_qsize; 378 uint32_t cmb_size_mb; 379 uint8_t aerl; 380 uint32_t aer_max_queued; 381 uint8_t mdts; 382 uint8_t vsl; 383 bool use_intel_id; 384 uint8_t zasl; 385 bool legacy_cmb; 386 } NvmeParams; 387 388 typedef struct NvmeCtrl { 389 PCIDevice parent_obj; 390 MemoryRegion bar0; 391 MemoryRegion iomem; 392 NvmeBar bar; 393 NvmeParams params; 394 NvmeBus bus; 395 396 uint16_t cntlid; 397 bool qs_created; 398 uint32_t page_size; 399 uint16_t page_bits; 400 uint16_t max_prp_ents; 401 uint16_t cqe_size; 402 uint16_t sqe_size; 403 uint32_t reg_size; 404 uint32_t max_q_ents; 405 uint8_t outstanding_aers; 406 uint32_t irq_status; 407 uint64_t host_timestamp; /* Timestamp sent by the host */ 408 uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ 409 uint64_t starttime_ms; 410 uint16_t temperature; 411 uint8_t smart_critical_warning; 412 413 struct { 414 MemoryRegion mem; 415 uint8_t *buf; 416 bool cmse; 417 hwaddr cba; 418 } cmb; 419 420 struct { 421 HostMemoryBackend *dev; 422 bool cmse; 423 hwaddr cba; 424 } pmr; 425 426 uint8_t aer_mask; 427 NvmeRequest **aer_reqs; 428 QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue; 429 int aer_queued; 430 431 uint32_t dmrsl; 432 433 /* Namespace ID is started with 1 so bitmap should be 1-based */ 434 #define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1) 435 DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE); 436 437 NvmeSubsystem *subsys; 438 439 NvmeNamespace namespace; 440 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; 441 NvmeSQueue **sq; 442 NvmeCQueue **cq; 443 NvmeSQueue admin_sq; 444 NvmeCQueue admin_cq; 445 NvmeIdCtrl id_ctrl; 446 447 struct { 448 struct { 449 uint16_t temp_thresh_hi; 450 uint16_t temp_thresh_low; 451 }; 452 uint32_t async_config; 453 } features; 454 } NvmeCtrl; 455 456 static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) 457 { 458 if (!nsid || nsid > NVME_MAX_NAMESPACES) { 459 return NULL; 460 } 461 462 return n->namespaces[nsid]; 463 } 464 465 static inline NvmeCQueue *nvme_cq(NvmeRequest *req) 466 { 467 NvmeSQueue *sq = req->sq; 468 NvmeCtrl *n = sq->ctrl; 469 470 return n->cq[sq->cqid]; 471 } 472 473 static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) 474 { 475 NvmeSQueue *sq = req->sq; 476 return sq->ctrl; 477 } 478 479 static inline uint16_t nvme_cid(NvmeRequest *req) 480 { 481 if (!req) { 482 return 0xffff; 483 } 484 485 return le16_to_cpu(req->cqe.cid); 486 } 487 488 void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns); 489 uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, 490 NvmeTxDirection dir, NvmeRequest *req); 491 uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, 492 NvmeTxDirection dir, NvmeRequest *req); 493 void nvme_rw_complete_cb(void *opaque, int ret); 494 uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, 495 NvmeCmd *cmd); 496 497 /* from Linux kernel (crypto/crct10dif_common.c) */ 498 static const uint16_t t10_dif_crc_table[256] = { 499 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, 500 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, 501 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, 502 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, 503 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, 504 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, 505 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, 506 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, 507 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, 508 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, 509 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, 510 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, 511 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, 512 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, 513 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, 514 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, 515 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, 516 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, 517 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, 518 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, 519 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, 520 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, 521 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, 522 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, 523 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, 524 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, 525 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, 526 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, 527 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, 528 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, 529 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, 530 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 531 }; 532 533 uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, 534 uint32_t reftag); 535 uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, 536 uint64_t slba); 537 void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, 538 uint8_t *mbuf, size_t mlen, uint16_t apptag, 539 uint32_t reftag); 540 uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, 541 uint8_t *mbuf, size_t mlen, uint16_t ctrl, 542 uint64_t slba, uint16_t apptag, 543 uint16_t appmask, uint32_t reftag); 544 uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); 545 546 547 #endif /* HW_NVME_INTERNAL_H */ 548