1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VDPA simulator for block device.
4  *
5  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
6  * Copyright (c) 2021, Red Hat Inc. All rights reserved.
7  *
8  */
9 
10 #include <linux/init.h>
11 #include <linux/module.h>
12 #include <linux/device.h>
13 #include <linux/kernel.h>
14 #include <linux/blkdev.h>
15 #include <linux/vringh.h>
16 #include <linux/vdpa.h>
17 #include <uapi/linux/virtio_blk.h>
18 
19 #include "vdpa_sim.h"
20 
21 #define DRV_VERSION  "0.1"
22 #define DRV_AUTHOR   "Max Gurtovoy <mgurtovoy@nvidia.com>"
23 #define DRV_DESC     "vDPA Device Simulator for block device"
24 #define DRV_LICENSE  "GPL v2"
25 
26 #define VDPASIM_BLK_FEATURES	(VDPASIM_FEATURES | \
27 				 (1ULL << VIRTIO_BLK_F_FLUSH)    | \
28 				 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
29 				 (1ULL << VIRTIO_BLK_F_SEG_MAX)  | \
30 				 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
31 				 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \
32 				 (1ULL << VIRTIO_BLK_F_MQ)       | \
33 				 (1ULL << VIRTIO_BLK_F_DISCARD)  | \
34 				 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES))
35 
36 #define VDPASIM_BLK_CAPACITY	0x40000
37 #define VDPASIM_BLK_SIZE_MAX	0x1000
38 #define VDPASIM_BLK_SEG_MAX	32
39 #define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX
40 
41 /* 1 virtqueue, 1 address space, 1 virtqueue group */
42 #define VDPASIM_BLK_VQ_NUM	1
43 #define VDPASIM_BLK_AS_NUM	1
44 #define VDPASIM_BLK_GROUP_NUM	1
45 
46 struct vdpasim_blk {
47 	struct vdpasim vdpasim;
48 	void *buffer;
49 	bool shared_backend;
50 };
51 
52 static struct vdpasim_blk *sim_to_blk(struct vdpasim *vdpasim)
53 {
54 	return container_of(vdpasim, struct vdpasim_blk, vdpasim);
55 }
56 
57 static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
58 
59 static bool shared_backend;
60 module_param(shared_backend, bool, 0444);
61 MODULE_PARM_DESC(shared_backend, "Enable the shared backend between virtio-blk devices");
62 
63 static void *shared_buffer;
64 /* mutex to synchronize shared_buffer access */
65 static DEFINE_MUTEX(shared_buffer_mutex);
66 
67 static void vdpasim_blk_buffer_lock(struct vdpasim_blk *blk)
68 {
69 	if (blk->shared_backend)
70 		mutex_lock(&shared_buffer_mutex);
71 }
72 
73 static void vdpasim_blk_buffer_unlock(struct vdpasim_blk *blk)
74 {
75 	if (blk->shared_backend)
76 		mutex_unlock(&shared_buffer_mutex);
77 }
78 
79 static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector,
80 				    u64 num_sectors, u64 max_sectors)
81 {
82 	if (start_sector > VDPASIM_BLK_CAPACITY) {
83 		dev_dbg(&vdpasim->vdpa.dev,
84 			"starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n",
85 			start_sector, VDPASIM_BLK_CAPACITY);
86 	}
87 
88 	if (num_sectors > max_sectors) {
89 		dev_dbg(&vdpasim->vdpa.dev,
90 			"number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n",
91 			num_sectors, max_sectors);
92 		return false;
93 	}
94 
95 	if (num_sectors > VDPASIM_BLK_CAPACITY - start_sector) {
96 		dev_dbg(&vdpasim->vdpa.dev,
97 			"request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n",
98 			start_sector, num_sectors, VDPASIM_BLK_CAPACITY);
99 		return false;
100 	}
101 
102 	return true;
103 }
104 
105 /* Returns 'true' if the request is handled (with or without an I/O error)
106  * and the status is correctly written in the last byte of the 'in iov',
107  * 'false' otherwise.
108  */
109 static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
110 				   struct vdpasim_virtqueue *vq)
111 {
112 	struct vdpasim_blk *blk = sim_to_blk(vdpasim);
113 	size_t pushed = 0, to_pull, to_push;
114 	struct virtio_blk_outhdr hdr;
115 	bool handled = false;
116 	ssize_t bytes;
117 	loff_t offset;
118 	u64 sector;
119 	u8 status;
120 	u32 type;
121 	int ret;
122 
123 	ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov,
124 				   &vq->head, GFP_ATOMIC);
125 	if (ret != 1)
126 		return false;
127 
128 	if (vq->out_iov.used < 1 || vq->in_iov.used < 1) {
129 		dev_dbg(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n",
130 			vq->out_iov.used, vq->in_iov.used);
131 		goto err;
132 	}
133 
134 	if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) {
135 		dev_dbg(&vdpasim->vdpa.dev, "request in header too short\n");
136 		goto err;
137 	}
138 
139 	/* The last byte is the status and we checked if the last iov has
140 	 * enough room for it.
141 	 */
142 	to_push = vringh_kiov_length(&vq->in_iov) - 1;
143 
144 	to_pull = vringh_kiov_length(&vq->out_iov);
145 
146 	bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr,
147 				      sizeof(hdr));
148 	if (bytes != sizeof(hdr)) {
149 		dev_dbg(&vdpasim->vdpa.dev, "request out header too short\n");
150 		goto err;
151 	}
152 
153 	to_pull -= bytes;
154 
155 	type = vdpasim32_to_cpu(vdpasim, hdr.type);
156 	sector = vdpasim64_to_cpu(vdpasim, hdr.sector);
157 	offset = sector << SECTOR_SHIFT;
158 	status = VIRTIO_BLK_S_OK;
159 
160 	if (type != VIRTIO_BLK_T_IN && type != VIRTIO_BLK_T_OUT &&
161 	    sector != 0) {
162 		dev_dbg(&vdpasim->vdpa.dev,
163 			"sector must be 0 for %u request - sector: 0x%llx\n",
164 			type, sector);
165 		status = VIRTIO_BLK_S_IOERR;
166 		goto err_status;
167 	}
168 
169 	switch (type) {
170 	case VIRTIO_BLK_T_IN:
171 		if (!vdpasim_blk_check_range(vdpasim, sector,
172 					     to_push >> SECTOR_SHIFT,
173 					     VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
174 			status = VIRTIO_BLK_S_IOERR;
175 			break;
176 		}
177 
178 		vdpasim_blk_buffer_lock(blk);
179 		bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
180 					      blk->buffer + offset, to_push);
181 		vdpasim_blk_buffer_unlock(blk);
182 		if (bytes < 0) {
183 			dev_dbg(&vdpasim->vdpa.dev,
184 				"vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
185 				bytes, offset, to_push);
186 			status = VIRTIO_BLK_S_IOERR;
187 			break;
188 		}
189 
190 		pushed += bytes;
191 		break;
192 
193 	case VIRTIO_BLK_T_OUT:
194 		if (!vdpasim_blk_check_range(vdpasim, sector,
195 					     to_pull >> SECTOR_SHIFT,
196 					     VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
197 			status = VIRTIO_BLK_S_IOERR;
198 			break;
199 		}
200 
201 		vdpasim_blk_buffer_lock(blk);
202 		bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov,
203 					      blk->buffer + offset, to_pull);
204 		vdpasim_blk_buffer_unlock(blk);
205 		if (bytes < 0) {
206 			dev_dbg(&vdpasim->vdpa.dev,
207 				"vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
208 				bytes, offset, to_pull);
209 			status = VIRTIO_BLK_S_IOERR;
210 			break;
211 		}
212 		break;
213 
214 	case VIRTIO_BLK_T_GET_ID:
215 		bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
216 					      vdpasim_blk_id,
217 					      VIRTIO_BLK_ID_BYTES);
218 		if (bytes < 0) {
219 			dev_dbg(&vdpasim->vdpa.dev,
220 				"vringh_iov_push_iotlb() error: %zd\n", bytes);
221 			status = VIRTIO_BLK_S_IOERR;
222 			break;
223 		}
224 
225 		pushed += bytes;
226 		break;
227 
228 	case VIRTIO_BLK_T_FLUSH:
229 		/* nothing to do */
230 		break;
231 
232 	case VIRTIO_BLK_T_DISCARD:
233 	case VIRTIO_BLK_T_WRITE_ZEROES: {
234 		struct virtio_blk_discard_write_zeroes range;
235 		u32 num_sectors, flags;
236 
237 		if (to_pull != sizeof(range)) {
238 			dev_dbg(&vdpasim->vdpa.dev,
239 				"discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n",
240 				to_pull, sizeof(range));
241 			status = VIRTIO_BLK_S_IOERR;
242 			break;
243 		}
244 
245 		bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &range,
246 					      to_pull);
247 		if (bytes < 0) {
248 			dev_dbg(&vdpasim->vdpa.dev,
249 				"vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
250 				bytes, offset, to_pull);
251 			status = VIRTIO_BLK_S_IOERR;
252 			break;
253 		}
254 
255 		sector = le64_to_cpu(range.sector);
256 		offset = sector << SECTOR_SHIFT;
257 		num_sectors = le32_to_cpu(range.num_sectors);
258 		flags = le32_to_cpu(range.flags);
259 
260 		if (type == VIRTIO_BLK_T_DISCARD && flags != 0) {
261 			dev_dbg(&vdpasim->vdpa.dev,
262 				"discard unexpected flags set - flags: 0x%x\n",
263 				flags);
264 			status = VIRTIO_BLK_S_UNSUPP;
265 			break;
266 		}
267 
268 		if (type == VIRTIO_BLK_T_WRITE_ZEROES &&
269 		    flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
270 			dev_dbg(&vdpasim->vdpa.dev,
271 				"write_zeroes unexpected flags set - flags: 0x%x\n",
272 				flags);
273 			status = VIRTIO_BLK_S_UNSUPP;
274 			break;
275 		}
276 
277 		if (!vdpasim_blk_check_range(vdpasim, sector, num_sectors,
278 					     VDPASIM_BLK_DWZ_MAX_SECTORS)) {
279 			status = VIRTIO_BLK_S_IOERR;
280 			break;
281 		}
282 
283 		if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
284 			vdpasim_blk_buffer_lock(blk);
285 			memset(blk->buffer + offset, 0,
286 			       num_sectors << SECTOR_SHIFT);
287 			vdpasim_blk_buffer_unlock(blk);
288 		}
289 
290 		break;
291 	}
292 	default:
293 		dev_dbg(&vdpasim->vdpa.dev,
294 			"Unsupported request type %d\n", type);
295 		status = VIRTIO_BLK_S_IOERR;
296 		break;
297 	}
298 
299 err_status:
300 	/* If some operations fail, we need to skip the remaining bytes
301 	 * to put the status in the last byte
302 	 */
303 	if (to_push - pushed > 0)
304 		vringh_kiov_advance(&vq->in_iov, to_push - pushed);
305 
306 	/* Last byte is the status */
307 	bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1);
308 	if (bytes != 1)
309 		goto err;
310 
311 	pushed += bytes;
312 
313 	/* Make sure data is wrote before advancing index */
314 	smp_wmb();
315 
316 	handled = true;
317 
318 err:
319 	vringh_complete_iotlb(&vq->vring, vq->head, pushed);
320 
321 	return handled;
322 }
323 
324 static void vdpasim_blk_work(struct vdpasim *vdpasim)
325 {
326 	bool reschedule = false;
327 	int i;
328 
329 	mutex_lock(&vdpasim->mutex);
330 
331 	if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
332 		goto out;
333 
334 	if (!vdpasim->running)
335 		goto out;
336 
337 	for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
338 		struct vdpasim_virtqueue *vq = &vdpasim->vqs[i];
339 		int reqs = 0;
340 
341 		if (!vq->ready)
342 			continue;
343 
344 		while (vdpasim_blk_handle_req(vdpasim, vq)) {
345 			/* Make sure used is visible before rasing the interrupt. */
346 			smp_wmb();
347 
348 			local_bh_disable();
349 			if (vringh_need_notify_iotlb(&vq->vring) > 0)
350 				vringh_notify(&vq->vring);
351 			local_bh_enable();
352 
353 			if (++reqs > 4) {
354 				reschedule = true;
355 				break;
356 			}
357 		}
358 	}
359 out:
360 	mutex_unlock(&vdpasim->mutex);
361 
362 	if (reschedule)
363 		vdpasim_schedule_work(vdpasim);
364 }
365 
366 static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
367 {
368 	struct virtio_blk_config *blk_config = config;
369 
370 	memset(config, 0, sizeof(struct virtio_blk_config));
371 
372 	blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY);
373 	blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX);
374 	blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX);
375 	blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM);
376 	blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1);
377 	blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1);
378 	blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
379 	/* VIRTIO_BLK_F_DISCARD */
380 	blk_config->discard_sector_alignment =
381 		cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
382 	blk_config->max_discard_sectors =
383 		cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
384 	blk_config->max_discard_seg = cpu_to_vdpasim32(vdpasim, 1);
385 	/* VIRTIO_BLK_F_WRITE_ZEROES */
386 	blk_config->max_write_zeroes_sectors =
387 		cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
388 	blk_config->max_write_zeroes_seg = cpu_to_vdpasim32(vdpasim, 1);
389 
390 }
391 
392 static void vdpasim_blk_free(struct vdpasim *vdpasim)
393 {
394 	struct vdpasim_blk *blk = sim_to_blk(vdpasim);
395 
396 	if (!blk->shared_backend)
397 		kvfree(blk->buffer);
398 }
399 
400 static void vdpasim_blk_mgmtdev_release(struct device *dev)
401 {
402 }
403 
404 static struct device vdpasim_blk_mgmtdev = {
405 	.init_name = "vdpasim_blk",
406 	.release = vdpasim_blk_mgmtdev_release,
407 };
408 
409 static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
410 			       const struct vdpa_dev_set_config *config)
411 {
412 	struct vdpasim_dev_attr dev_attr = {};
413 	struct vdpasim_blk *blk;
414 	struct vdpasim *simdev;
415 	int ret;
416 
417 	dev_attr.mgmt_dev = mdev;
418 	dev_attr.name = name;
419 	dev_attr.id = VIRTIO_ID_BLOCK;
420 	dev_attr.supported_features = VDPASIM_BLK_FEATURES;
421 	dev_attr.nvqs = VDPASIM_BLK_VQ_NUM;
422 	dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM;
423 	dev_attr.nas = VDPASIM_BLK_AS_NUM;
424 	dev_attr.alloc_size = sizeof(struct vdpasim_blk);
425 	dev_attr.config_size = sizeof(struct virtio_blk_config);
426 	dev_attr.get_config = vdpasim_blk_get_config;
427 	dev_attr.work_fn = vdpasim_blk_work;
428 	dev_attr.free = vdpasim_blk_free;
429 
430 	simdev = vdpasim_create(&dev_attr, config);
431 	if (IS_ERR(simdev))
432 		return PTR_ERR(simdev);
433 
434 	blk = sim_to_blk(simdev);
435 	blk->shared_backend = shared_backend;
436 
437 	if (blk->shared_backend) {
438 		blk->buffer = shared_buffer;
439 	} else {
440 		blk->buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
441 				       GFP_KERNEL);
442 		if (!blk->buffer) {
443 			ret = -ENOMEM;
444 			goto put_dev;
445 		}
446 	}
447 
448 	ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM);
449 	if (ret)
450 		goto put_dev;
451 
452 	return 0;
453 
454 put_dev:
455 	put_device(&simdev->vdpa.dev);
456 	return ret;
457 }
458 
459 static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev,
460 				struct vdpa_device *dev)
461 {
462 	struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
463 
464 	_vdpa_unregister_device(&simdev->vdpa);
465 }
466 
467 static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = {
468 	.dev_add = vdpasim_blk_dev_add,
469 	.dev_del = vdpasim_blk_dev_del
470 };
471 
472 static struct virtio_device_id id_table[] = {
473 	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
474 	{ 0 },
475 };
476 
477 static struct vdpa_mgmt_dev mgmt_dev = {
478 	.device = &vdpasim_blk_mgmtdev,
479 	.id_table = id_table,
480 	.ops = &vdpasim_blk_mgmtdev_ops,
481 };
482 
483 static int __init vdpasim_blk_init(void)
484 {
485 	int ret;
486 
487 	ret = device_register(&vdpasim_blk_mgmtdev);
488 	if (ret) {
489 		put_device(&vdpasim_blk_mgmtdev);
490 		return ret;
491 	}
492 
493 	ret = vdpa_mgmtdev_register(&mgmt_dev);
494 	if (ret)
495 		goto parent_err;
496 
497 	if (shared_backend) {
498 		shared_buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
499 					 GFP_KERNEL);
500 		if (!shared_buffer) {
501 			ret = -ENOMEM;
502 			goto mgmt_dev_err;
503 		}
504 	}
505 
506 	return 0;
507 mgmt_dev_err:
508 	vdpa_mgmtdev_unregister(&mgmt_dev);
509 parent_err:
510 	device_unregister(&vdpasim_blk_mgmtdev);
511 	return ret;
512 }
513 
514 static void __exit vdpasim_blk_exit(void)
515 {
516 	kvfree(shared_buffer);
517 	vdpa_mgmtdev_unregister(&mgmt_dev);
518 	device_unregister(&vdpasim_blk_mgmtdev);
519 }
520 
521 module_init(vdpasim_blk_init)
522 module_exit(vdpasim_blk_exit)
523 
524 MODULE_VERSION(DRV_VERSION);
525 MODULE_LICENSE(DRV_LICENSE);
526 MODULE_AUTHOR(DRV_AUTHOR);
527 MODULE_DESCRIPTION(DRV_DESC);
528