1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * NVMe I/O command implementation. 4 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 #include <linux/blkdev.h> 8 #include <linux/blk-integrity.h> 9 #include <linux/module.h> 10 #include "nvmet.h" 11 12 void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) 13 { 14 const struct queue_limits *ql = &bdev_get_queue(bdev)->limits; 15 /* Number of logical blocks per physical block. */ 16 const u32 lpp = ql->physical_block_size / ql->logical_block_size; 17 /* Logical blocks per physical block, 0's based. */ 18 const __le16 lpp0b = to0based(lpp); 19 20 /* 21 * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN, 22 * NAWUPF, and NACWU are defined for this namespace and should be 23 * used by the host for this namespace instead of the AWUN, AWUPF, 24 * and ACWU fields in the Identify Controller data structure. If 25 * any of these fields are zero that means that the corresponding 26 * field from the identify controller data structure should be used. 27 */ 28 id->nsfeat |= 1 << 1; 29 id->nawun = lpp0b; 30 id->nawupf = lpp0b; 31 id->nacwu = lpp0b; 32 33 /* 34 * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and 35 * NOWS are defined for this namespace and should be used by 36 * the host for I/O optimization. 37 */ 38 id->nsfeat |= 1 << 4; 39 /* NPWG = Namespace Preferred Write Granularity. 0's based */ 40 id->npwg = lpp0b; 41 /* NPWA = Namespace Preferred Write Alignment. 0's based */ 42 id->npwa = id->npwg; 43 /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */ 44 id->npdg = to0based(ql->discard_granularity / ql->logical_block_size); 45 /* NPDG = Namespace Preferred Deallocate Alignment */ 46 id->npda = id->npdg; 47 /* NOWS = Namespace Optimal Write Size */ 48 id->nows = to0based(ql->io_opt / ql->logical_block_size); 49 } 50 51 void nvmet_bdev_ns_disable(struct nvmet_ns *ns) 52 { 53 if (ns->bdev) { 54 blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ); 55 ns->bdev = NULL; 56 } 57 } 58 59 static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns) 60 { 61 struct blk_integrity *bi = bdev_get_integrity(ns->bdev); 62 63 if (bi) { 64 ns->metadata_size = bi->tuple_size; 65 if (bi->profile == &t10_pi_type1_crc) 66 ns->pi_type = NVME_NS_DPS_PI_TYPE1; 67 else if (bi->profile == &t10_pi_type3_crc) 68 ns->pi_type = NVME_NS_DPS_PI_TYPE3; 69 else 70 /* Unsupported metadata type */ 71 ns->metadata_size = 0; 72 } 73 } 74 75 int nvmet_bdev_ns_enable(struct nvmet_ns *ns) 76 { 77 int ret; 78 79 ns->bdev = blkdev_get_by_path(ns->device_path, 80 FMODE_READ | FMODE_WRITE, NULL); 81 if (IS_ERR(ns->bdev)) { 82 ret = PTR_ERR(ns->bdev); 83 if (ret != -ENOTBLK) { 84 pr_err("failed to open block device %s: (%ld)\n", 85 ns->device_path, PTR_ERR(ns->bdev)); 86 } 87 ns->bdev = NULL; 88 return ret; 89 } 90 ns->size = bdev_nr_bytes(ns->bdev); 91 ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); 92 93 ns->pi_type = 0; 94 ns->metadata_size = 0; 95 if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10)) 96 nvmet_bdev_ns_enable_integrity(ns); 97 98 if (bdev_is_zoned(ns->bdev)) { 99 if (!nvmet_bdev_zns_enable(ns)) { 100 nvmet_bdev_ns_disable(ns); 101 return -EINVAL; 102 } 103 ns->csi = NVME_CSI_ZNS; 104 } 105 106 return 0; 107 } 108 109 void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns) 110 { 111 ns->size = bdev_nr_bytes(ns->bdev); 112 } 113 114 u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) 115 { 116 u16 status = NVME_SC_SUCCESS; 117 118 if (likely(blk_sts == BLK_STS_OK)) 119 return status; 120 /* 121 * Right now there exists M : 1 mapping between block layer error 122 * to the NVMe status code (see nvme_error_status()). For consistency, 123 * when we reverse map we use most appropriate NVMe Status code from 124 * the group of the NVMe staus codes used in the nvme_error_status(). 125 */ 126 switch (blk_sts) { 127 case BLK_STS_NOSPC: 128 status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR; 129 req->error_loc = offsetof(struct nvme_rw_command, length); 130 break; 131 case BLK_STS_TARGET: 132 status = NVME_SC_LBA_RANGE | NVME_SC_DNR; 133 req->error_loc = offsetof(struct nvme_rw_command, slba); 134 break; 135 case BLK_STS_NOTSUPP: 136 req->error_loc = offsetof(struct nvme_common_command, opcode); 137 switch (req->cmd->common.opcode) { 138 case nvme_cmd_dsm: 139 case nvme_cmd_write_zeroes: 140 status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR; 141 break; 142 default: 143 status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 144 } 145 break; 146 case BLK_STS_MEDIUM: 147 status = NVME_SC_ACCESS_DENIED; 148 req->error_loc = offsetof(struct nvme_rw_command, nsid); 149 break; 150 case BLK_STS_IOERR: 151 default: 152 status = NVME_SC_INTERNAL | NVME_SC_DNR; 153 req->error_loc = offsetof(struct nvme_common_command, opcode); 154 } 155 156 switch (req->cmd->common.opcode) { 157 case nvme_cmd_read: 158 case nvme_cmd_write: 159 req->error_slba = le64_to_cpu(req->cmd->rw.slba); 160 break; 161 case nvme_cmd_write_zeroes: 162 req->error_slba = 163 le64_to_cpu(req->cmd->write_zeroes.slba); 164 break; 165 default: 166 req->error_slba = 0; 167 } 168 return status; 169 } 170 171 static void nvmet_bio_done(struct bio *bio) 172 { 173 struct nvmet_req *req = bio->bi_private; 174 175 nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status)); 176 nvmet_req_bio_put(req, bio); 177 } 178 179 #ifdef CONFIG_BLK_DEV_INTEGRITY 180 static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, 181 struct sg_mapping_iter *miter) 182 { 183 struct blk_integrity *bi; 184 struct bio_integrity_payload *bip; 185 int rc; 186 size_t resid, len; 187 188 bi = bdev_get_integrity(req->ns->bdev); 189 if (unlikely(!bi)) { 190 pr_err("Unable to locate bio_integrity\n"); 191 return -ENODEV; 192 } 193 194 bip = bio_integrity_alloc(bio, GFP_NOIO, 195 bio_max_segs(req->metadata_sg_cnt)); 196 if (IS_ERR(bip)) { 197 pr_err("Unable to allocate bio_integrity_payload\n"); 198 return PTR_ERR(bip); 199 } 200 201 bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); 202 /* virtual start sector must be in integrity interval units */ 203 bip_set_seed(bip, bio->bi_iter.bi_sector >> 204 (bi->interval_exp - SECTOR_SHIFT)); 205 206 resid = bip->bip_iter.bi_size; 207 while (resid > 0 && sg_miter_next(miter)) { 208 len = min_t(size_t, miter->length, resid); 209 rc = bio_integrity_add_page(bio, miter->page, len, 210 offset_in_page(miter->addr)); 211 if (unlikely(rc != len)) { 212 pr_err("bio_integrity_add_page() failed; %d\n", rc); 213 sg_miter_stop(miter); 214 return -ENOMEM; 215 } 216 217 resid -= len; 218 if (len < miter->length) 219 miter->consumed -= miter->length - len; 220 } 221 sg_miter_stop(miter); 222 223 return 0; 224 } 225 #else 226 static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, 227 struct sg_mapping_iter *miter) 228 { 229 return -EINVAL; 230 } 231 #endif /* CONFIG_BLK_DEV_INTEGRITY */ 232 233 static void nvmet_bdev_execute_rw(struct nvmet_req *req) 234 { 235 unsigned int sg_cnt = req->sg_cnt; 236 struct bio *bio; 237 struct scatterlist *sg; 238 struct blk_plug plug; 239 sector_t sector; 240 int op, i, rc; 241 struct sg_mapping_iter prot_miter; 242 unsigned int iter_flags; 243 unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len; 244 245 if (!nvmet_check_transfer_len(req, total_len)) 246 return; 247 248 if (!req->sg_cnt) { 249 nvmet_req_complete(req, 0); 250 return; 251 } 252 253 if (req->cmd->rw.opcode == nvme_cmd_write) { 254 op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; 255 if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) 256 op |= REQ_FUA; 257 iter_flags = SG_MITER_TO_SG; 258 } else { 259 op = REQ_OP_READ; 260 iter_flags = SG_MITER_FROM_SG; 261 } 262 263 if (is_pci_p2pdma_page(sg_page(req->sg))) 264 op |= REQ_NOMERGE; 265 266 sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba); 267 268 if (nvmet_use_inline_bvec(req)) { 269 bio = &req->b.inline_bio; 270 bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); 271 } else { 272 bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt)); 273 } 274 bio_set_dev(bio, req->ns->bdev); 275 bio->bi_iter.bi_sector = sector; 276 bio->bi_private = req; 277 bio->bi_end_io = nvmet_bio_done; 278 bio->bi_opf = op; 279 280 blk_start_plug(&plug); 281 if (req->metadata_len) 282 sg_miter_start(&prot_miter, req->metadata_sg, 283 req->metadata_sg_cnt, iter_flags); 284 285 for_each_sg(req->sg, sg, req->sg_cnt, i) { 286 while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) 287 != sg->length) { 288 struct bio *prev = bio; 289 290 if (req->metadata_len) { 291 rc = nvmet_bdev_alloc_bip(req, bio, 292 &prot_miter); 293 if (unlikely(rc)) { 294 bio_io_error(bio); 295 return; 296 } 297 } 298 299 bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt)); 300 bio_set_dev(bio, req->ns->bdev); 301 bio->bi_iter.bi_sector = sector; 302 bio->bi_opf = op; 303 304 bio_chain(bio, prev); 305 submit_bio(prev); 306 } 307 308 sector += sg->length >> 9; 309 sg_cnt--; 310 } 311 312 if (req->metadata_len) { 313 rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter); 314 if (unlikely(rc)) { 315 bio_io_error(bio); 316 return; 317 } 318 } 319 320 submit_bio(bio); 321 blk_finish_plug(&plug); 322 } 323 324 static void nvmet_bdev_execute_flush(struct nvmet_req *req) 325 { 326 struct bio *bio = &req->b.inline_bio; 327 328 if (!nvmet_check_transfer_len(req, 0)) 329 return; 330 331 bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); 332 bio_set_dev(bio, req->ns->bdev); 333 bio->bi_private = req; 334 bio->bi_end_io = nvmet_bio_done; 335 bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; 336 337 submit_bio(bio); 338 } 339 340 u16 nvmet_bdev_flush(struct nvmet_req *req) 341 { 342 if (blkdev_issue_flush(req->ns->bdev)) 343 return NVME_SC_INTERNAL | NVME_SC_DNR; 344 return 0; 345 } 346 347 static u16 nvmet_bdev_discard_range(struct nvmet_req *req, 348 struct nvme_dsm_range *range, struct bio **bio) 349 { 350 struct nvmet_ns *ns = req->ns; 351 int ret; 352 353 ret = __blkdev_issue_discard(ns->bdev, 354 nvmet_lba_to_sect(ns, range->slba), 355 le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), 356 GFP_KERNEL, 0, bio); 357 if (ret && ret != -EOPNOTSUPP) { 358 req->error_slba = le64_to_cpu(range->slba); 359 return errno_to_nvme_status(req, ret); 360 } 361 return NVME_SC_SUCCESS; 362 } 363 364 static void nvmet_bdev_execute_discard(struct nvmet_req *req) 365 { 366 struct nvme_dsm_range range; 367 struct bio *bio = NULL; 368 int i; 369 u16 status; 370 371 for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { 372 status = nvmet_copy_from_sgl(req, i * sizeof(range), &range, 373 sizeof(range)); 374 if (status) 375 break; 376 377 status = nvmet_bdev_discard_range(req, &range, &bio); 378 if (status) 379 break; 380 } 381 382 if (bio) { 383 bio->bi_private = req; 384 bio->bi_end_io = nvmet_bio_done; 385 if (status) 386 bio_io_error(bio); 387 else 388 submit_bio(bio); 389 } else { 390 nvmet_req_complete(req, status); 391 } 392 } 393 394 static void nvmet_bdev_execute_dsm(struct nvmet_req *req) 395 { 396 if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) 397 return; 398 399 switch (le32_to_cpu(req->cmd->dsm.attributes)) { 400 case NVME_DSMGMT_AD: 401 nvmet_bdev_execute_discard(req); 402 return; 403 case NVME_DSMGMT_IDR: 404 case NVME_DSMGMT_IDW: 405 default: 406 /* Not supported yet */ 407 nvmet_req_complete(req, 0); 408 return; 409 } 410 } 411 412 static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) 413 { 414 struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; 415 struct bio *bio = NULL; 416 sector_t sector; 417 sector_t nr_sector; 418 int ret; 419 420 if (!nvmet_check_transfer_len(req, 0)) 421 return; 422 423 sector = nvmet_lba_to_sect(req->ns, write_zeroes->slba); 424 nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << 425 (req->ns->blksize_shift - 9)); 426 427 ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, 428 GFP_KERNEL, &bio, 0); 429 if (bio) { 430 bio->bi_private = req; 431 bio->bi_end_io = nvmet_bio_done; 432 submit_bio(bio); 433 } else { 434 nvmet_req_complete(req, errno_to_nvme_status(req, ret)); 435 } 436 } 437 438 u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) 439 { 440 switch (req->cmd->common.opcode) { 441 case nvme_cmd_read: 442 case nvme_cmd_write: 443 req->execute = nvmet_bdev_execute_rw; 444 if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns)) 445 req->metadata_len = nvmet_rw_metadata_len(req); 446 return 0; 447 case nvme_cmd_flush: 448 req->execute = nvmet_bdev_execute_flush; 449 return 0; 450 case nvme_cmd_dsm: 451 req->execute = nvmet_bdev_execute_dsm; 452 return 0; 453 case nvme_cmd_write_zeroes: 454 req->execute = nvmet_bdev_execute_write_zeroes; 455 return 0; 456 default: 457 return nvmet_report_invalid_opcode(req); 458 } 459 } 460