1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * NVMe I/O command implementation.
4  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5  */
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 #include <linux/blkdev.h>
8 #include <linux/module.h>
9 #include "nvmet.h"
10 
11 int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
12 {
13 	int ret;
14 
15 	ns->bdev = blkdev_get_by_path(ns->device_path,
16 			FMODE_READ | FMODE_WRITE, NULL);
17 	if (IS_ERR(ns->bdev)) {
18 		ret = PTR_ERR(ns->bdev);
19 		if (ret != -ENOTBLK) {
20 			pr_err("failed to open block device %s: (%ld)\n",
21 					ns->device_path, PTR_ERR(ns->bdev));
22 		}
23 		ns->bdev = NULL;
24 		return ret;
25 	}
26 	ns->size = i_size_read(ns->bdev->bd_inode);
27 	ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
28 	return 0;
29 }
30 
31 void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
32 {
33 	if (ns->bdev) {
34 		blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
35 		ns->bdev = NULL;
36 	}
37 }
38 
39 static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
40 {
41 	u16 status = NVME_SC_SUCCESS;
42 
43 	if (likely(blk_sts == BLK_STS_OK))
44 		return status;
45 	/*
46 	 * Right now there exists M : 1 mapping between block layer error
47 	 * to the NVMe status code (see nvme_error_status()). For consistency,
48 	 * when we reverse map we use most appropriate NVMe Status code from
49 	 * the group of the NVMe staus codes used in the nvme_error_status().
50 	 */
51 	switch (blk_sts) {
52 	case BLK_STS_NOSPC:
53 		status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
54 		req->error_loc = offsetof(struct nvme_rw_command, length);
55 		break;
56 	case BLK_STS_TARGET:
57 		status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
58 		req->error_loc = offsetof(struct nvme_rw_command, slba);
59 		break;
60 	case BLK_STS_NOTSUPP:
61 		req->error_loc = offsetof(struct nvme_common_command, opcode);
62 		switch (req->cmd->common.opcode) {
63 		case nvme_cmd_dsm:
64 		case nvme_cmd_write_zeroes:
65 			status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
66 			break;
67 		default:
68 			status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
69 		}
70 		break;
71 	case BLK_STS_MEDIUM:
72 		status = NVME_SC_ACCESS_DENIED;
73 		req->error_loc = offsetof(struct nvme_rw_command, nsid);
74 		break;
75 	case BLK_STS_IOERR:
76 		/* fallthru */
77 	default:
78 		status = NVME_SC_INTERNAL | NVME_SC_DNR;
79 		req->error_loc = offsetof(struct nvme_common_command, opcode);
80 	}
81 
82 	switch (req->cmd->common.opcode) {
83 	case nvme_cmd_read:
84 	case nvme_cmd_write:
85 		req->error_slba = le64_to_cpu(req->cmd->rw.slba);
86 		break;
87 	case nvme_cmd_write_zeroes:
88 		req->error_slba =
89 			le64_to_cpu(req->cmd->write_zeroes.slba);
90 		break;
91 	default:
92 		req->error_slba = 0;
93 	}
94 	return status;
95 }
96 
97 static void nvmet_bio_done(struct bio *bio)
98 {
99 	struct nvmet_req *req = bio->bi_private;
100 
101 	nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
102 	if (bio != &req->b.inline_bio)
103 		bio_put(bio);
104 }
105 
106 static void nvmet_bdev_execute_rw(struct nvmet_req *req)
107 {
108 	int sg_cnt = req->sg_cnt;
109 	struct bio *bio;
110 	struct scatterlist *sg;
111 	sector_t sector;
112 	int op, op_flags = 0, i;
113 
114 	if (!req->sg_cnt) {
115 		nvmet_req_complete(req, 0);
116 		return;
117 	}
118 
119 	if (req->cmd->rw.opcode == nvme_cmd_write) {
120 		op = REQ_OP_WRITE;
121 		op_flags = REQ_SYNC | REQ_IDLE;
122 		if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
123 			op_flags |= REQ_FUA;
124 	} else {
125 		op = REQ_OP_READ;
126 	}
127 
128 	if (is_pci_p2pdma_page(sg_page(req->sg)))
129 		op_flags |= REQ_NOMERGE;
130 
131 	sector = le64_to_cpu(req->cmd->rw.slba);
132 	sector <<= (req->ns->blksize_shift - 9);
133 
134 	if (req->data_len <= NVMET_MAX_INLINE_DATA_LEN) {
135 		bio = &req->b.inline_bio;
136 		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
137 	} else {
138 		bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
139 	}
140 	bio_set_dev(bio, req->ns->bdev);
141 	bio->bi_iter.bi_sector = sector;
142 	bio->bi_private = req;
143 	bio->bi_end_io = nvmet_bio_done;
144 	bio_set_op_attrs(bio, op, op_flags);
145 
146 	for_each_sg(req->sg, sg, req->sg_cnt, i) {
147 		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
148 				!= sg->length) {
149 			struct bio *prev = bio;
150 
151 			bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
152 			bio_set_dev(bio, req->ns->bdev);
153 			bio->bi_iter.bi_sector = sector;
154 			bio_set_op_attrs(bio, op, op_flags);
155 
156 			bio_chain(bio, prev);
157 			submit_bio(prev);
158 		}
159 
160 		sector += sg->length >> 9;
161 		sg_cnt--;
162 	}
163 
164 	submit_bio(bio);
165 }
166 
167 static void nvmet_bdev_execute_flush(struct nvmet_req *req)
168 {
169 	struct bio *bio = &req->b.inline_bio;
170 
171 	bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
172 	bio_set_dev(bio, req->ns->bdev);
173 	bio->bi_private = req;
174 	bio->bi_end_io = nvmet_bio_done;
175 	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
176 
177 	submit_bio(bio);
178 }
179 
180 u16 nvmet_bdev_flush(struct nvmet_req *req)
181 {
182 	if (blkdev_issue_flush(req->ns->bdev, GFP_KERNEL, NULL))
183 		return NVME_SC_INTERNAL | NVME_SC_DNR;
184 	return 0;
185 }
186 
187 static u16 nvmet_bdev_discard_range(struct nvmet_req *req,
188 		struct nvme_dsm_range *range, struct bio **bio)
189 {
190 	struct nvmet_ns *ns = req->ns;
191 	int ret;
192 
193 	ret = __blkdev_issue_discard(ns->bdev,
194 			le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
195 			le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
196 			GFP_KERNEL, 0, bio);
197 	if (ret && ret != -EOPNOTSUPP) {
198 		req->error_slba = le64_to_cpu(range->slba);
199 		return errno_to_nvme_status(req, ret);
200 	}
201 	return NVME_SC_SUCCESS;
202 }
203 
204 static void nvmet_bdev_execute_discard(struct nvmet_req *req)
205 {
206 	struct nvme_dsm_range range;
207 	struct bio *bio = NULL;
208 	int i;
209 	u16 status;
210 
211 	for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
212 		status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
213 				sizeof(range));
214 		if (status)
215 			break;
216 
217 		status = nvmet_bdev_discard_range(req, &range, &bio);
218 		if (status)
219 			break;
220 	}
221 
222 	if (bio) {
223 		bio->bi_private = req;
224 		bio->bi_end_io = nvmet_bio_done;
225 		if (status) {
226 			bio->bi_status = BLK_STS_IOERR;
227 			bio_endio(bio);
228 		} else {
229 			submit_bio(bio);
230 		}
231 	} else {
232 		nvmet_req_complete(req, status);
233 	}
234 }
235 
236 static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
237 {
238 	switch (le32_to_cpu(req->cmd->dsm.attributes)) {
239 	case NVME_DSMGMT_AD:
240 		nvmet_bdev_execute_discard(req);
241 		return;
242 	case NVME_DSMGMT_IDR:
243 	case NVME_DSMGMT_IDW:
244 	default:
245 		/* Not supported yet */
246 		nvmet_req_complete(req, 0);
247 		return;
248 	}
249 }
250 
251 static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
252 {
253 	struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
254 	struct bio *bio = NULL;
255 	sector_t sector;
256 	sector_t nr_sector;
257 	int ret;
258 
259 	sector = le64_to_cpu(write_zeroes->slba) <<
260 		(req->ns->blksize_shift - 9);
261 	nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
262 		(req->ns->blksize_shift - 9));
263 
264 	ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
265 			GFP_KERNEL, &bio, 0);
266 	if (bio) {
267 		bio->bi_private = req;
268 		bio->bi_end_io = nvmet_bio_done;
269 		submit_bio(bio);
270 	} else {
271 		nvmet_req_complete(req, errno_to_nvme_status(req, ret));
272 	}
273 }
274 
275 u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
276 {
277 	struct nvme_command *cmd = req->cmd;
278 
279 	switch (cmd->common.opcode) {
280 	case nvme_cmd_read:
281 	case nvme_cmd_write:
282 		req->execute = nvmet_bdev_execute_rw;
283 		req->data_len = nvmet_rw_len(req);
284 		return 0;
285 	case nvme_cmd_flush:
286 		req->execute = nvmet_bdev_execute_flush;
287 		req->data_len = 0;
288 		return 0;
289 	case nvme_cmd_dsm:
290 		req->execute = nvmet_bdev_execute_dsm;
291 		req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
292 			sizeof(struct nvme_dsm_range);
293 		return 0;
294 	case nvme_cmd_write_zeroes:
295 		req->execute = nvmet_bdev_execute_write_zeroes;
296 		req->data_len = 0;
297 		return 0;
298 	default:
299 		pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
300 		       req->sq->qid);
301 		req->error_loc = offsetof(struct nvme_common_command, opcode);
302 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
303 	}
304 }
305