1 /* 2 * QEMU NVM Express End-to-End Data Protection support 3 * 4 * Copyright (c) 2021 Samsung Electronics Co., Ltd. 5 * 6 * Authors: 7 * Klaus Jensen <k.jensen@samsung.com> 8 * Gollu Appalanaidu <anaidu.gollu@samsung.com> 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "sysemu/block-backend.h" 14 15 #include "nvme.h" 16 #include "trace.h" 17 18 uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, 19 uint32_t reftag) 20 { 21 if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) && 22 (prinfo & NVME_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) { 23 return NVME_INVALID_PROT_INFO | NVME_DNR; 24 } 25 26 return NVME_SUCCESS; 27 } 28 29 /* from Linux kernel (crypto/crct10dif_common.c) */ 30 static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer, 31 size_t len) 32 { 33 unsigned int i; 34 35 for (i = 0; i < len; i++) { 36 crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; 37 } 38 39 return crc; 40 } 41 42 void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, 43 uint8_t *mbuf, size_t mlen, uint16_t apptag, 44 uint32_t *reftag) 45 { 46 uint8_t *end = buf + len; 47 int16_t pil = 0; 48 49 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { 50 pil = ns->lbaf.ms - sizeof(NvmeDifTuple); 51 } 52 53 trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil, 54 apptag, *reftag); 55 56 for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { 57 NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); 58 uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz); 59 60 if (pil) { 61 crc = crc_t10dif(crc, mbuf, pil); 62 } 63 64 dif->guard = cpu_to_be16(crc); 65 dif->apptag = cpu_to_be16(apptag); 66 dif->reftag = cpu_to_be32(*reftag); 67 68 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { 69 (*reftag)++; 70 } 71 } 72 } 73 74 static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, 75 uint8_t *buf, uint8_t *mbuf, size_t pil, 76 uint8_t prinfo, uint16_t apptag, 77 uint16_t appmask, uint32_t reftag) 78 { 79 switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { 80 case NVME_ID_NS_DPS_TYPE_3: 81 if (be32_to_cpu(dif->reftag) != 0xffffffff) { 82 break; 83 } 84 85 /* fallthrough */ 86 case NVME_ID_NS_DPS_TYPE_1: 87 case NVME_ID_NS_DPS_TYPE_2: 88 if (be16_to_cpu(dif->apptag) != 0xffff) { 89 break; 90 } 91 92 trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag), 93 be32_to_cpu(dif->reftag)); 94 95 return NVME_SUCCESS; 96 } 97 98 if (prinfo & NVME_PRINFO_PRCHK_GUARD) { 99 uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz); 100 101 if (pil) { 102 crc = crc_t10dif(crc, mbuf, pil); 103 } 104 105 trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc); 106 107 if (be16_to_cpu(dif->guard) != crc) { 108 return NVME_E2E_GUARD_ERROR; 109 } 110 } 111 112 if (prinfo & NVME_PRINFO_PRCHK_APP) { 113 trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag, 114 appmask); 115 116 if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) { 117 return NVME_E2E_APP_ERROR; 118 } 119 } 120 121 if (prinfo & NVME_PRINFO_PRCHK_REF) { 122 trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag); 123 124 if (be32_to_cpu(dif->reftag) != reftag) { 125 return NVME_E2E_REF_ERROR; 126 } 127 } 128 129 return NVME_SUCCESS; 130 } 131 132 uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, 133 uint8_t *mbuf, size_t mlen, uint8_t prinfo, 134 uint64_t slba, uint16_t apptag, 135 uint16_t appmask, uint32_t *reftag) 136 { 137 uint8_t *end = buf + len; 138 int16_t pil = 0; 139 uint16_t status; 140 141 status = nvme_check_prinfo(ns, prinfo, slba, *reftag); 142 if (status) { 143 return status; 144 } 145 146 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { 147 pil = ns->lbaf.ms - sizeof(NvmeDifTuple); 148 } 149 150 trace_pci_nvme_dif_check(prinfo, ns->lbasz + pil); 151 152 for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { 153 NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); 154 155 status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, prinfo, apptag, 156 appmask, *reftag); 157 if (status) { 158 return status; 159 } 160 161 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { 162 (*reftag)++; 163 } 164 } 165 166 return NVME_SUCCESS; 167 } 168 169 uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, 170 uint64_t slba) 171 { 172 BlockBackend *blk = ns->blkconf.blk; 173 BlockDriverState *bs = blk_bs(blk); 174 175 int64_t moffset = 0, offset = nvme_l2b(ns, slba); 176 uint8_t *mbufp, *end; 177 bool zeroed; 178 int16_t pil = 0; 179 int64_t bytes = (mlen / ns->lbaf.ms) << ns->lbaf.ds; 180 int64_t pnum = 0; 181 182 Error *err = NULL; 183 184 185 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { 186 pil = ns->lbaf.ms - sizeof(NvmeDifTuple); 187 } 188 189 do { 190 int ret; 191 192 bytes -= pnum; 193 194 ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL); 195 if (ret < 0) { 196 error_setg_errno(&err, -ret, "unable to get block status"); 197 error_report_err(err); 198 199 return NVME_INTERNAL_DEV_ERROR; 200 } 201 202 zeroed = !!(ret & BDRV_BLOCK_ZERO); 203 204 trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed); 205 206 if (zeroed) { 207 mbufp = mbuf + moffset; 208 mlen = (pnum >> ns->lbaf.ds) * ns->lbaf.ms; 209 end = mbufp + mlen; 210 211 for (; mbufp < end; mbufp += ns->lbaf.ms) { 212 memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple)); 213 } 214 } 215 216 moffset += (pnum >> ns->lbaf.ds) * ns->lbaf.ms; 217 offset += pnum; 218 } while (pnum != bytes); 219 220 return NVME_SUCCESS; 221 } 222 223 static void nvme_dif_rw_cb(void *opaque, int ret) 224 { 225 NvmeBounceContext *ctx = opaque; 226 NvmeRequest *req = ctx->req; 227 NvmeNamespace *ns = req->ns; 228 BlockBackend *blk = ns->blkconf.blk; 229 230 trace_pci_nvme_dif_rw_cb(nvme_cid(req), blk_name(blk)); 231 232 qemu_iovec_destroy(&ctx->data.iov); 233 g_free(ctx->data.bounce); 234 235 qemu_iovec_destroy(&ctx->mdata.iov); 236 g_free(ctx->mdata.bounce); 237 238 g_free(ctx); 239 240 nvme_rw_complete_cb(req, ret); 241 } 242 243 static void nvme_dif_rw_check_cb(void *opaque, int ret) 244 { 245 NvmeBounceContext *ctx = opaque; 246 NvmeRequest *req = ctx->req; 247 NvmeNamespace *ns = req->ns; 248 NvmeCtrl *n = nvme_ctrl(req); 249 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; 250 uint64_t slba = le64_to_cpu(rw->slba); 251 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); 252 uint16_t apptag = le16_to_cpu(rw->apptag); 253 uint16_t appmask = le16_to_cpu(rw->appmask); 254 uint32_t reftag = le32_to_cpu(rw->reftag); 255 uint16_t status; 256 257 trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), prinfo, apptag, appmask, 258 reftag); 259 260 if (ret) { 261 goto out; 262 } 263 264 status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, ctx->mdata.iov.size, 265 slba); 266 if (status) { 267 req->status = status; 268 goto out; 269 } 270 271 status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, 272 ctx->mdata.bounce, ctx->mdata.iov.size, prinfo, 273 slba, apptag, appmask, &reftag); 274 if (status) { 275 req->status = status; 276 goto out; 277 } 278 279 status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size, 280 NVME_TX_DIRECTION_FROM_DEVICE, req); 281 if (status) { 282 req->status = status; 283 goto out; 284 } 285 286 if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == 8) { 287 goto out; 288 } 289 290 status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size, 291 NVME_TX_DIRECTION_FROM_DEVICE, req); 292 if (status) { 293 req->status = status; 294 } 295 296 out: 297 nvme_dif_rw_cb(ctx, ret); 298 } 299 300 static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret) 301 { 302 NvmeBounceContext *ctx = opaque; 303 NvmeRequest *req = ctx->req; 304 NvmeNamespace *ns = req->ns; 305 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; 306 uint64_t slba = le64_to_cpu(rw->slba); 307 uint32_t nlb = le16_to_cpu(rw->nlb) + 1; 308 size_t mlen = nvme_m2b(ns, nlb); 309 uint64_t offset = nvme_moff(ns, slba); 310 BlockBackend *blk = ns->blkconf.blk; 311 312 trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk)); 313 314 if (ret) { 315 goto out; 316 } 317 318 ctx->mdata.bounce = g_malloc(mlen); 319 320 qemu_iovec_reset(&ctx->mdata.iov); 321 qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); 322 323 req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0, 324 nvme_dif_rw_check_cb, ctx); 325 return; 326 327 out: 328 nvme_dif_rw_cb(ctx, ret); 329 } 330 331 static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret) 332 { 333 NvmeBounceContext *ctx = opaque; 334 NvmeRequest *req = ctx->req; 335 NvmeNamespace *ns = req->ns; 336 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; 337 uint64_t slba = le64_to_cpu(rw->slba); 338 uint64_t offset = nvme_moff(ns, slba); 339 BlockBackend *blk = ns->blkconf.blk; 340 341 trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk)); 342 343 if (ret) { 344 goto out; 345 } 346 347 req->aiocb = blk_aio_pwritev(blk, offset, &ctx->mdata.iov, 0, 348 nvme_dif_rw_cb, ctx); 349 return; 350 351 out: 352 nvme_dif_rw_cb(ctx, ret); 353 } 354 355 uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) 356 { 357 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; 358 NvmeNamespace *ns = req->ns; 359 BlockBackend *blk = ns->blkconf.blk; 360 bool wrz = rw->opcode == NVME_CMD_WRITE_ZEROES; 361 uint32_t nlb = le16_to_cpu(rw->nlb) + 1; 362 uint64_t slba = le64_to_cpu(rw->slba); 363 size_t len = nvme_l2b(ns, nlb); 364 size_t mlen = nvme_m2b(ns, nlb); 365 size_t mapped_len = len; 366 int64_t offset = nvme_l2b(ns, slba); 367 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); 368 uint16_t apptag = le16_to_cpu(rw->apptag); 369 uint16_t appmask = le16_to_cpu(rw->appmask); 370 uint32_t reftag = le32_to_cpu(rw->reftag); 371 bool pract = !!(prinfo & NVME_PRINFO_PRACT); 372 NvmeBounceContext *ctx; 373 uint16_t status; 374 375 trace_pci_nvme_dif_rw(pract, prinfo); 376 377 ctx = g_new0(NvmeBounceContext, 1); 378 ctx->req = req; 379 380 if (wrz) { 381 BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP; 382 383 if (prinfo & NVME_PRINFO_PRCHK_MASK) { 384 status = NVME_INVALID_PROT_INFO | NVME_DNR; 385 goto err; 386 } 387 388 if (pract) { 389 uint8_t *mbuf, *end; 390 int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple); 391 392 status = nvme_check_prinfo(ns, prinfo, slba, reftag); 393 if (status) { 394 goto err; 395 } 396 397 flags = 0; 398 399 ctx->mdata.bounce = g_malloc0(mlen); 400 401 qemu_iovec_init(&ctx->mdata.iov, 1); 402 qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); 403 404 mbuf = ctx->mdata.bounce; 405 end = mbuf + mlen; 406 407 if (ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT) { 408 pil = 0; 409 } 410 411 for (; mbuf < end; mbuf += ns->lbaf.ms) { 412 NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); 413 414 dif->apptag = cpu_to_be16(apptag); 415 dif->reftag = cpu_to_be32(reftag); 416 417 switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { 418 case NVME_ID_NS_DPS_TYPE_1: 419 case NVME_ID_NS_DPS_TYPE_2: 420 reftag++; 421 } 422 } 423 } 424 425 req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len, flags, 426 nvme_dif_rw_mdata_out_cb, ctx); 427 return NVME_NO_COMPLETE; 428 } 429 430 if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == 8)) { 431 mapped_len += mlen; 432 } 433 434 status = nvme_map_dptr(n, &req->sg, mapped_len, &req->cmd); 435 if (status) { 436 goto err; 437 } 438 439 ctx->data.bounce = g_malloc(len); 440 441 qemu_iovec_init(&ctx->data.iov, 1); 442 qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len); 443 444 if (req->cmd.opcode == NVME_CMD_READ) { 445 block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, 446 BLOCK_ACCT_READ); 447 448 req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0, 449 nvme_dif_rw_mdata_in_cb, ctx); 450 return NVME_NO_COMPLETE; 451 } 452 453 status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size, 454 NVME_TX_DIRECTION_TO_DEVICE, req); 455 if (status) { 456 goto err; 457 } 458 459 ctx->mdata.bounce = g_malloc(mlen); 460 461 qemu_iovec_init(&ctx->mdata.iov, 1); 462 qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); 463 464 if (!(pract && ns->lbaf.ms == 8)) { 465 status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size, 466 NVME_TX_DIRECTION_TO_DEVICE, req); 467 if (status) { 468 goto err; 469 } 470 } 471 472 status = nvme_check_prinfo(ns, prinfo, slba, reftag); 473 if (status) { 474 goto err; 475 } 476 477 if (pract) { 478 /* splice generated protection information into the buffer */ 479 nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size, 480 ctx->mdata.bounce, ctx->mdata.iov.size, 481 apptag, &reftag); 482 } else { 483 status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, 484 ctx->mdata.bounce, ctx->mdata.iov.size, prinfo, 485 slba, apptag, appmask, &reftag); 486 if (status) { 487 goto err; 488 } 489 } 490 491 block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, 492 BLOCK_ACCT_WRITE); 493 494 req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->data.iov, 0, 495 nvme_dif_rw_mdata_out_cb, ctx); 496 497 return NVME_NO_COMPLETE; 498 499 err: 500 qemu_iovec_destroy(&ctx->data.iov); 501 g_free(ctx->data.bounce); 502 503 qemu_iovec_destroy(&ctx->mdata.iov); 504 g_free(ctx->mdata.bounce); 505 506 g_free(ctx); 507 508 return status; 509 } 510