1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * NVMe I/O command implementation. 4 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 #include <linux/blkdev.h> 8 #include <linux/module.h> 9 #include "nvmet.h" 10 11 void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) 12 { 13 const struct queue_limits *ql = &bdev_get_queue(bdev)->limits; 14 /* Number of logical blocks per physical block. */ 15 const u32 lpp = ql->physical_block_size / ql->logical_block_size; 16 /* Logical blocks per physical block, 0's based. */ 17 const __le16 lpp0b = to0based(lpp); 18 19 /* 20 * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN, 21 * NAWUPF, and NACWU are defined for this namespace and should be 22 * used by the host for this namespace instead of the AWUN, AWUPF, 23 * and ACWU fields in the Identify Controller data structure. If 24 * any of these fields are zero that means that the corresponding 25 * field from the identify controller data structure should be used. 26 */ 27 id->nsfeat |= 1 << 1; 28 id->nawun = lpp0b; 29 id->nawupf = lpp0b; 30 id->nacwu = lpp0b; 31 32 /* 33 * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and 34 * NOWS are defined for this namespace and should be used by 35 * the host for I/O optimization. 36 */ 37 id->nsfeat |= 1 << 4; 38 /* NPWG = Namespace Preferred Write Granularity. 0's based */ 39 id->npwg = lpp0b; 40 /* NPWA = Namespace Preferred Write Alignment. 0's based */ 41 id->npwa = id->npwg; 42 /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */ 43 id->npdg = to0based(ql->discard_granularity / ql->logical_block_size); 44 /* NPDG = Namespace Preferred Deallocate Alignment */ 45 id->npda = id->npdg; 46 /* NOWS = Namespace Optimal Write Size */ 47 id->nows = to0based(ql->io_opt / ql->logical_block_size); 48 } 49 50 void nvmet_bdev_ns_disable(struct nvmet_ns *ns) 51 { 52 if (ns->bdev) { 53 blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ); 54 ns->bdev = NULL; 55 } 56 } 57 58 static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns) 59 { 60 struct blk_integrity *bi = bdev_get_integrity(ns->bdev); 61 62 if (bi) { 63 ns->metadata_size = bi->tuple_size; 64 if (bi->profile == &t10_pi_type1_crc) 65 ns->pi_type = NVME_NS_DPS_PI_TYPE1; 66 else if (bi->profile == &t10_pi_type3_crc) 67 ns->pi_type = NVME_NS_DPS_PI_TYPE3; 68 else 69 /* Unsupported metadata type */ 70 ns->metadata_size = 0; 71 } 72 } 73 74 int nvmet_bdev_ns_enable(struct nvmet_ns *ns) 75 { 76 int ret; 77 78 ns->bdev = blkdev_get_by_path(ns->device_path, 79 FMODE_READ | FMODE_WRITE, NULL); 80 if (IS_ERR(ns->bdev)) { 81 ret = PTR_ERR(ns->bdev); 82 if (ret != -ENOTBLK) { 83 pr_err("failed to open block device %s: (%ld)\n", 84 ns->device_path, PTR_ERR(ns->bdev)); 85 } 86 ns->bdev = NULL; 87 return ret; 88 } 89 ns->size = i_size_read(ns->bdev->bd_inode); 90 ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); 91 92 ns->pi_type = 0; 93 ns->metadata_size = 0; 94 if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10)) 95 nvmet_bdev_ns_enable_integrity(ns); 96 97 if (bdev_is_zoned(ns->bdev)) { 98 if (!nvmet_bdev_zns_enable(ns)) { 99 nvmet_bdev_ns_disable(ns); 100 return -EINVAL; 101 } 102 ns->csi = NVME_CSI_ZNS; 103 } 104 105 return 0; 106 } 107 108 void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns) 109 { 110 ns->size = i_size_read(ns->bdev->bd_inode); 111 } 112 113 u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) 114 { 115 u16 status = NVME_SC_SUCCESS; 116 117 if (likely(blk_sts == BLK_STS_OK)) 118 return status; 119 /* 120 * Right now there exists M : 1 mapping between block layer error 121 * to the NVMe status code (see nvme_error_status()). For consistency, 122 * when we reverse map we use most appropriate NVMe Status code from 123 * the group of the NVMe staus codes used in the nvme_error_status(). 124 */ 125 switch (blk_sts) { 126 case BLK_STS_NOSPC: 127 status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR; 128 req->error_loc = offsetof(struct nvme_rw_command, length); 129 break; 130 case BLK_STS_TARGET: 131 status = NVME_SC_LBA_RANGE | NVME_SC_DNR; 132 req->error_loc = offsetof(struct nvme_rw_command, slba); 133 break; 134 case BLK_STS_NOTSUPP: 135 req->error_loc = offsetof(struct nvme_common_command, opcode); 136 switch (req->cmd->common.opcode) { 137 case nvme_cmd_dsm: 138 case nvme_cmd_write_zeroes: 139 status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR; 140 break; 141 default: 142 status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 143 } 144 break; 145 case BLK_STS_MEDIUM: 146 status = NVME_SC_ACCESS_DENIED; 147 req->error_loc = offsetof(struct nvme_rw_command, nsid); 148 break; 149 case BLK_STS_IOERR: 150 default: 151 status = NVME_SC_INTERNAL | NVME_SC_DNR; 152 req->error_loc = offsetof(struct nvme_common_command, opcode); 153 } 154 155 switch (req->cmd->common.opcode) { 156 case nvme_cmd_read: 157 case nvme_cmd_write: 158 req->error_slba = le64_to_cpu(req->cmd->rw.slba); 159 break; 160 case nvme_cmd_write_zeroes: 161 req->error_slba = 162 le64_to_cpu(req->cmd->write_zeroes.slba); 163 break; 164 default: 165 req->error_slba = 0; 166 } 167 return status; 168 } 169 170 static void nvmet_bio_done(struct bio *bio) 171 { 172 struct nvmet_req *req = bio->bi_private; 173 174 nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status)); 175 nvmet_req_bio_put(req, bio); 176 } 177 178 #ifdef CONFIG_BLK_DEV_INTEGRITY 179 static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, 180 struct sg_mapping_iter *miter) 181 { 182 struct blk_integrity *bi; 183 struct bio_integrity_payload *bip; 184 int rc; 185 size_t resid, len; 186 187 bi = bdev_get_integrity(req->ns->bdev); 188 if (unlikely(!bi)) { 189 pr_err("Unable to locate bio_integrity\n"); 190 return -ENODEV; 191 } 192 193 bip = bio_integrity_alloc(bio, GFP_NOIO, 194 bio_max_segs(req->metadata_sg_cnt)); 195 if (IS_ERR(bip)) { 196 pr_err("Unable to allocate bio_integrity_payload\n"); 197 return PTR_ERR(bip); 198 } 199 200 bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); 201 /* virtual start sector must be in integrity interval units */ 202 bip_set_seed(bip, bio->bi_iter.bi_sector >> 203 (bi->interval_exp - SECTOR_SHIFT)); 204 205 resid = bip->bip_iter.bi_size; 206 while (resid > 0 && sg_miter_next(miter)) { 207 len = min_t(size_t, miter->length, resid); 208 rc = bio_integrity_add_page(bio, miter->page, len, 209 offset_in_page(miter->addr)); 210 if (unlikely(rc != len)) { 211 pr_err("bio_integrity_add_page() failed; %d\n", rc); 212 sg_miter_stop(miter); 213 return -ENOMEM; 214 } 215 216 resid -= len; 217 if (len < miter->length) 218 miter->consumed -= miter->length - len; 219 } 220 sg_miter_stop(miter); 221 222 return 0; 223 } 224 #else 225 static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, 226 struct sg_mapping_iter *miter) 227 { 228 return -EINVAL; 229 } 230 #endif /* CONFIG_BLK_DEV_INTEGRITY */ 231 232 static void nvmet_bdev_execute_rw(struct nvmet_req *req) 233 { 234 unsigned int sg_cnt = req->sg_cnt; 235 struct bio *bio; 236 struct scatterlist *sg; 237 struct blk_plug plug; 238 sector_t sector; 239 int op, i, rc; 240 struct sg_mapping_iter prot_miter; 241 unsigned int iter_flags; 242 unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len; 243 244 if (!nvmet_check_transfer_len(req, total_len)) 245 return; 246 247 if (!req->sg_cnt) { 248 nvmet_req_complete(req, 0); 249 return; 250 } 251 252 if (req->cmd->rw.opcode == nvme_cmd_write) { 253 op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; 254 if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) 255 op |= REQ_FUA; 256 iter_flags = SG_MITER_TO_SG; 257 } else { 258 op = REQ_OP_READ; 259 iter_flags = SG_MITER_FROM_SG; 260 } 261 262 if (is_pci_p2pdma_page(sg_page(req->sg))) 263 op |= REQ_NOMERGE; 264 265 sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba); 266 267 if (nvmet_use_inline_bvec(req)) { 268 bio = &req->b.inline_bio; 269 bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); 270 } else { 271 bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt)); 272 } 273 bio_set_dev(bio, req->ns->bdev); 274 bio->bi_iter.bi_sector = sector; 275 bio->bi_private = req; 276 bio->bi_end_io = nvmet_bio_done; 277 bio->bi_opf = op; 278 279 blk_start_plug(&plug); 280 if (req->metadata_len) 281 sg_miter_start(&prot_miter, req->metadata_sg, 282 req->metadata_sg_cnt, iter_flags); 283 284 for_each_sg(req->sg, sg, req->sg_cnt, i) { 285 while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) 286 != sg->length) { 287 struct bio *prev = bio; 288 289 if (req->metadata_len) { 290 rc = nvmet_bdev_alloc_bip(req, bio, 291 &prot_miter); 292 if (unlikely(rc)) { 293 bio_io_error(bio); 294 return; 295 } 296 } 297 298 bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt)); 299 bio_set_dev(bio, req->ns->bdev); 300 bio->bi_iter.bi_sector = sector; 301 bio->bi_opf = op; 302 303 bio_chain(bio, prev); 304 submit_bio(prev); 305 } 306 307 sector += sg->length >> 9; 308 sg_cnt--; 309 } 310 311 if (req->metadata_len) { 312 rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter); 313 if (unlikely(rc)) { 314 bio_io_error(bio); 315 return; 316 } 317 } 318 319 submit_bio(bio); 320 blk_finish_plug(&plug); 321 } 322 323 static void nvmet_bdev_execute_flush(struct nvmet_req *req) 324 { 325 struct bio *bio = &req->b.inline_bio; 326 327 if (!nvmet_check_transfer_len(req, 0)) 328 return; 329 330 bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); 331 bio_set_dev(bio, req->ns->bdev); 332 bio->bi_private = req; 333 bio->bi_end_io = nvmet_bio_done; 334 bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; 335 336 submit_bio(bio); 337 } 338 339 u16 nvmet_bdev_flush(struct nvmet_req *req) 340 { 341 if (blkdev_issue_flush(req->ns->bdev)) 342 return NVME_SC_INTERNAL | NVME_SC_DNR; 343 return 0; 344 } 345 346 static u16 nvmet_bdev_discard_range(struct nvmet_req *req, 347 struct nvme_dsm_range *range, struct bio **bio) 348 { 349 struct nvmet_ns *ns = req->ns; 350 int ret; 351 352 ret = __blkdev_issue_discard(ns->bdev, 353 nvmet_lba_to_sect(ns, range->slba), 354 le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), 355 GFP_KERNEL, 0, bio); 356 if (ret && ret != -EOPNOTSUPP) { 357 req->error_slba = le64_to_cpu(range->slba); 358 return errno_to_nvme_status(req, ret); 359 } 360 return NVME_SC_SUCCESS; 361 } 362 363 static void nvmet_bdev_execute_discard(struct nvmet_req *req) 364 { 365 struct nvme_dsm_range range; 366 struct bio *bio = NULL; 367 int i; 368 u16 status; 369 370 for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { 371 status = nvmet_copy_from_sgl(req, i * sizeof(range), &range, 372 sizeof(range)); 373 if (status) 374 break; 375 376 status = nvmet_bdev_discard_range(req, &range, &bio); 377 if (status) 378 break; 379 } 380 381 if (bio) { 382 bio->bi_private = req; 383 bio->bi_end_io = nvmet_bio_done; 384 if (status) 385 bio_io_error(bio); 386 else 387 submit_bio(bio); 388 } else { 389 nvmet_req_complete(req, status); 390 } 391 } 392 393 static void nvmet_bdev_execute_dsm(struct nvmet_req *req) 394 { 395 if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) 396 return; 397 398 switch (le32_to_cpu(req->cmd->dsm.attributes)) { 399 case NVME_DSMGMT_AD: 400 nvmet_bdev_execute_discard(req); 401 return; 402 case NVME_DSMGMT_IDR: 403 case NVME_DSMGMT_IDW: 404 default: 405 /* Not supported yet */ 406 nvmet_req_complete(req, 0); 407 return; 408 } 409 } 410 411 static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) 412 { 413 struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; 414 struct bio *bio = NULL; 415 sector_t sector; 416 sector_t nr_sector; 417 int ret; 418 419 if (!nvmet_check_transfer_len(req, 0)) 420 return; 421 422 sector = nvmet_lba_to_sect(req->ns, write_zeroes->slba); 423 nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << 424 (req->ns->blksize_shift - 9)); 425 426 ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, 427 GFP_KERNEL, &bio, 0); 428 if (bio) { 429 bio->bi_private = req; 430 bio->bi_end_io = nvmet_bio_done; 431 submit_bio(bio); 432 } else { 433 nvmet_req_complete(req, errno_to_nvme_status(req, ret)); 434 } 435 } 436 437 u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) 438 { 439 switch (req->cmd->common.opcode) { 440 case nvme_cmd_read: 441 case nvme_cmd_write: 442 req->execute = nvmet_bdev_execute_rw; 443 if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns)) 444 req->metadata_len = nvmet_rw_metadata_len(req); 445 return 0; 446 case nvme_cmd_flush: 447 req->execute = nvmet_bdev_execute_flush; 448 return 0; 449 case nvme_cmd_dsm: 450 req->execute = nvmet_bdev_execute_dsm; 451 return 0; 452 case nvme_cmd_write_zeroes: 453 req->execute = nvmet_bdev_execute_write_zeroes; 454 return 0; 455 default: 456 return nvmet_report_invalid_opcode(req); 457 } 458 } 459