xref: /openbmc/linux/drivers/block/virtio_blk.c (revision 261a9af6)
1 //#define DEBUG
2 #include <linux/spinlock.h>
3 #include <linux/slab.h>
4 #include <linux/blkdev.h>
5 #include <linux/hdreg.h>
6 #include <linux/virtio.h>
7 #include <linux/virtio_blk.h>
8 #include <linux/scatterlist.h>
9 #include <linux/string_helpers.h>
10 #include <scsi/scsi_cmnd.h>
11 
12 #define PART_BITS 4
13 
14 static int major, index;
15 struct workqueue_struct *virtblk_wq;
16 
17 struct virtio_blk
18 {
19 	spinlock_t lock;
20 
21 	struct virtio_device *vdev;
22 	struct virtqueue *vq;
23 
24 	/* The disk structure for the kernel. */
25 	struct gendisk *disk;
26 
27 	/* Request tracking. */
28 	struct list_head reqs;
29 
30 	mempool_t *pool;
31 
32 	/* Process context for config space updates */
33 	struct work_struct config_work;
34 
35 	/* What host tells us, plus 2 for header & tailer. */
36 	unsigned int sg_elems;
37 
38 	/* Scatterlist: can be too big for stack. */
39 	struct scatterlist sg[/*sg_elems*/];
40 };
41 
42 struct virtblk_req
43 {
44 	struct list_head list;
45 	struct request *req;
46 	struct virtio_blk_outhdr out_hdr;
47 	struct virtio_scsi_inhdr in_hdr;
48 	u8 status;
49 };
50 
51 static void blk_done(struct virtqueue *vq)
52 {
53 	struct virtio_blk *vblk = vq->vdev->priv;
54 	struct virtblk_req *vbr;
55 	unsigned int len;
56 	unsigned long flags;
57 
58 	spin_lock_irqsave(&vblk->lock, flags);
59 	while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
60 		int error;
61 
62 		switch (vbr->status) {
63 		case VIRTIO_BLK_S_OK:
64 			error = 0;
65 			break;
66 		case VIRTIO_BLK_S_UNSUPP:
67 			error = -ENOTTY;
68 			break;
69 		default:
70 			error = -EIO;
71 			break;
72 		}
73 
74 		switch (vbr->req->cmd_type) {
75 		case REQ_TYPE_BLOCK_PC:
76 			vbr->req->resid_len = vbr->in_hdr.residual;
77 			vbr->req->sense_len = vbr->in_hdr.sense_len;
78 			vbr->req->errors = vbr->in_hdr.errors;
79 			break;
80 		case REQ_TYPE_SPECIAL:
81 			vbr->req->errors = (error != 0);
82 			break;
83 		default:
84 			break;
85 		}
86 
87 		__blk_end_request_all(vbr->req, error);
88 		list_del(&vbr->list);
89 		mempool_free(vbr, vblk->pool);
90 	}
91 	/* In case queue is stopped waiting for more buffers. */
92 	blk_start_queue(vblk->disk->queue);
93 	spin_unlock_irqrestore(&vblk->lock, flags);
94 }
95 
96 static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
97 		   struct request *req)
98 {
99 	unsigned long num, out = 0, in = 0;
100 	struct virtblk_req *vbr;
101 
102 	vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
103 	if (!vbr)
104 		/* When another request finishes we'll try again. */
105 		return false;
106 
107 	vbr->req = req;
108 
109 	if (req->cmd_flags & REQ_FLUSH) {
110 		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
111 		vbr->out_hdr.sector = 0;
112 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
113 	} else {
114 		switch (req->cmd_type) {
115 		case REQ_TYPE_FS:
116 			vbr->out_hdr.type = 0;
117 			vbr->out_hdr.sector = blk_rq_pos(vbr->req);
118 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
119 			break;
120 		case REQ_TYPE_BLOCK_PC:
121 			vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
122 			vbr->out_hdr.sector = 0;
123 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
124 			break;
125 		case REQ_TYPE_SPECIAL:
126 			vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
127 			vbr->out_hdr.sector = 0;
128 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
129 			break;
130 		default:
131 			/* We don't put anything else in the queue. */
132 			BUG();
133 		}
134 	}
135 
136 	sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
137 
138 	/*
139 	 * If this is a packet command we need a couple of additional headers.
140 	 * Behind the normal outhdr we put a segment with the scsi command
141 	 * block, and before the normal inhdr we put the sense data and the
142 	 * inhdr with additional status information before the normal inhdr.
143 	 */
144 	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
145 		sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
146 
147 	num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
148 
149 	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
150 		sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
151 		sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
152 			   sizeof(vbr->in_hdr));
153 	}
154 
155 	sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
156 		   sizeof(vbr->status));
157 
158 	if (num) {
159 		if (rq_data_dir(vbr->req) == WRITE) {
160 			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
161 			out += num;
162 		} else {
163 			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
164 			in += num;
165 		}
166 	}
167 
168 	if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
169 		mempool_free(vbr, vblk->pool);
170 		return false;
171 	}
172 
173 	list_add_tail(&vbr->list, &vblk->reqs);
174 	return true;
175 }
176 
177 static void do_virtblk_request(struct request_queue *q)
178 {
179 	struct virtio_blk *vblk = q->queuedata;
180 	struct request *req;
181 	unsigned int issued = 0;
182 
183 	while ((req = blk_peek_request(q)) != NULL) {
184 		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
185 
186 		/* If this request fails, stop queue and wait for something to
187 		   finish to restart it. */
188 		if (!do_req(q, vblk, req)) {
189 			blk_stop_queue(q);
190 			break;
191 		}
192 		blk_start_request(req);
193 		issued++;
194 	}
195 
196 	if (issued)
197 		virtqueue_kick(vblk->vq);
198 }
199 
200 /* return id (s/n) string for *disk to *id_str
201  */
202 static int virtblk_get_id(struct gendisk *disk, char *id_str)
203 {
204 	struct virtio_blk *vblk = disk->private_data;
205 	struct request *req;
206 	struct bio *bio;
207 	int err;
208 
209 	bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
210 			   GFP_KERNEL);
211 	if (IS_ERR(bio))
212 		return PTR_ERR(bio);
213 
214 	req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
215 	if (IS_ERR(req)) {
216 		bio_put(bio);
217 		return PTR_ERR(req);
218 	}
219 
220 	req->cmd_type = REQ_TYPE_SPECIAL;
221 	err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
222 	blk_put_request(req);
223 
224 	return err;
225 }
226 
227 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
228 			     unsigned int cmd, unsigned long data)
229 {
230 	struct gendisk *disk = bdev->bd_disk;
231 	struct virtio_blk *vblk = disk->private_data;
232 
233 	/*
234 	 * Only allow the generic SCSI ioctls if the host can support it.
235 	 */
236 	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
237 		return -ENOTTY;
238 
239 	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
240 			      (void __user *)data);
241 }
242 
243 /* We provide getgeo only to please some old bootloader/partitioning tools */
244 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
245 {
246 	struct virtio_blk *vblk = bd->bd_disk->private_data;
247 	struct virtio_blk_geometry vgeo;
248 	int err;
249 
250 	/* see if the host passed in geometry config */
251 	err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY,
252 				offsetof(struct virtio_blk_config, geometry),
253 				&vgeo);
254 
255 	if (!err) {
256 		geo->heads = vgeo.heads;
257 		geo->sectors = vgeo.sectors;
258 		geo->cylinders = vgeo.cylinders;
259 	} else {
260 		/* some standard values, similar to sd */
261 		geo->heads = 1 << 6;
262 		geo->sectors = 1 << 5;
263 		geo->cylinders = get_capacity(bd->bd_disk) >> 11;
264 	}
265 	return 0;
266 }
267 
268 static const struct block_device_operations virtblk_fops = {
269 	.ioctl  = virtblk_ioctl,
270 	.owner  = THIS_MODULE,
271 	.getgeo = virtblk_getgeo,
272 };
273 
274 static int index_to_minor(int index)
275 {
276 	return index << PART_BITS;
277 }
278 
279 static ssize_t virtblk_serial_show(struct device *dev,
280 				struct device_attribute *attr, char *buf)
281 {
282 	struct gendisk *disk = dev_to_disk(dev);
283 	int err;
284 
285 	/* sysfs gives us a PAGE_SIZE buffer */
286 	BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
287 
288 	buf[VIRTIO_BLK_ID_BYTES] = '\0';
289 	err = virtblk_get_id(disk, buf);
290 	if (!err)
291 		return strlen(buf);
292 
293 	if (err == -EIO) /* Unsupported? Make it empty. */
294 		return 0;
295 
296 	return err;
297 }
298 DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
299 
300 static void virtblk_config_changed_work(struct work_struct *work)
301 {
302 	struct virtio_blk *vblk =
303 		container_of(work, struct virtio_blk, config_work);
304 	struct virtio_device *vdev = vblk->vdev;
305 	struct request_queue *q = vblk->disk->queue;
306 	char cap_str_2[10], cap_str_10[10];
307 	u64 capacity, size;
308 
309 	/* Host must always specify the capacity. */
310 	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
311 			  &capacity, sizeof(capacity));
312 
313 	/* If capacity is too big, truncate with warning. */
314 	if ((sector_t)capacity != capacity) {
315 		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
316 			 (unsigned long long)capacity);
317 		capacity = (sector_t)-1;
318 	}
319 
320 	size = capacity * queue_logical_block_size(q);
321 	string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
322 	string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
323 
324 	dev_notice(&vdev->dev,
325 		  "new size: %llu %d-byte logical blocks (%s/%s)\n",
326 		  (unsigned long long)capacity,
327 		  queue_logical_block_size(q),
328 		  cap_str_10, cap_str_2);
329 
330 	set_capacity(vblk->disk, capacity);
331 }
332 
333 static void virtblk_config_changed(struct virtio_device *vdev)
334 {
335 	struct virtio_blk *vblk = vdev->priv;
336 
337 	queue_work(virtblk_wq, &vblk->config_work);
338 }
339 
340 static int __devinit virtblk_probe(struct virtio_device *vdev)
341 {
342 	struct virtio_blk *vblk;
343 	struct request_queue *q;
344 	int err;
345 	u64 cap;
346 	u32 v, blk_size, sg_elems, opt_io_size;
347 	u16 min_io_size;
348 	u8 physical_block_exp, alignment_offset;
349 
350 	if (index_to_minor(index) >= 1 << MINORBITS)
351 		return -ENOSPC;
352 
353 	/* We need to know how many segments before we allocate. */
354 	err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
355 				offsetof(struct virtio_blk_config, seg_max),
356 				&sg_elems);
357 
358 	/* We need at least one SG element, whatever they say. */
359 	if (err || !sg_elems)
360 		sg_elems = 1;
361 
362 	/* We need an extra sg elements at head and tail. */
363 	sg_elems += 2;
364 	vdev->priv = vblk = kmalloc(sizeof(*vblk) +
365 				    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
366 	if (!vblk) {
367 		err = -ENOMEM;
368 		goto out;
369 	}
370 
371 	INIT_LIST_HEAD(&vblk->reqs);
372 	spin_lock_init(&vblk->lock);
373 	vblk->vdev = vdev;
374 	vblk->sg_elems = sg_elems;
375 	sg_init_table(vblk->sg, vblk->sg_elems);
376 	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
377 
378 	/* We expect one virtqueue, for output. */
379 	vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
380 	if (IS_ERR(vblk->vq)) {
381 		err = PTR_ERR(vblk->vq);
382 		goto out_free_vblk;
383 	}
384 
385 	vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
386 	if (!vblk->pool) {
387 		err = -ENOMEM;
388 		goto out_free_vq;
389 	}
390 
391 	/* FIXME: How many partitions?  How long is a piece of string? */
392 	vblk->disk = alloc_disk(1 << PART_BITS);
393 	if (!vblk->disk) {
394 		err = -ENOMEM;
395 		goto out_mempool;
396 	}
397 
398 	q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
399 	if (!q) {
400 		err = -ENOMEM;
401 		goto out_put_disk;
402 	}
403 
404 	q->queuedata = vblk;
405 
406 	if (index < 26) {
407 		sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
408 	} else if (index < (26 + 1) * 26) {
409 		sprintf(vblk->disk->disk_name, "vd%c%c",
410 			'a' + index / 26 - 1, 'a' + index % 26);
411 	} else {
412 		const unsigned int m1 = (index / 26 - 1) / 26 - 1;
413 		const unsigned int m2 = (index / 26 - 1) % 26;
414 		const unsigned int m3 =  index % 26;
415 		sprintf(vblk->disk->disk_name, "vd%c%c%c",
416 			'a' + m1, 'a' + m2, 'a' + m3);
417 	}
418 
419 	vblk->disk->major = major;
420 	vblk->disk->first_minor = index_to_minor(index);
421 	vblk->disk->private_data = vblk;
422 	vblk->disk->fops = &virtblk_fops;
423 	vblk->disk->driverfs_dev = &vdev->dev;
424 	index++;
425 
426 	/* configure queue flush support */
427 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
428 		blk_queue_flush(q, REQ_FLUSH);
429 
430 	/* If disk is read-only in the host, the guest should obey */
431 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
432 		set_disk_ro(vblk->disk, 1);
433 
434 	/* Host must always specify the capacity. */
435 	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
436 			  &cap, sizeof(cap));
437 
438 	/* If capacity is too big, truncate with warning. */
439 	if ((sector_t)cap != cap) {
440 		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
441 			 (unsigned long long)cap);
442 		cap = (sector_t)-1;
443 	}
444 	set_capacity(vblk->disk, cap);
445 
446 	/* We can handle whatever the host told us to handle. */
447 	blk_queue_max_segments(q, vblk->sg_elems-2);
448 
449 	/* No need to bounce any requests */
450 	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
451 
452 	/* No real sector limit. */
453 	blk_queue_max_hw_sectors(q, -1U);
454 
455 	/* Host can optionally specify maximum segment size and number of
456 	 * segments. */
457 	err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
458 				offsetof(struct virtio_blk_config, size_max),
459 				&v);
460 	if (!err)
461 		blk_queue_max_segment_size(q, v);
462 	else
463 		blk_queue_max_segment_size(q, -1U);
464 
465 	/* Host can optionally specify the block size of the device */
466 	err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
467 				offsetof(struct virtio_blk_config, blk_size),
468 				&blk_size);
469 	if (!err)
470 		blk_queue_logical_block_size(q, blk_size);
471 	else
472 		blk_size = queue_logical_block_size(q);
473 
474 	/* Use topology information if available */
475 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
476 			offsetof(struct virtio_blk_config, physical_block_exp),
477 			&physical_block_exp);
478 	if (!err && physical_block_exp)
479 		blk_queue_physical_block_size(q,
480 				blk_size * (1 << physical_block_exp));
481 
482 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
483 			offsetof(struct virtio_blk_config, alignment_offset),
484 			&alignment_offset);
485 	if (!err && alignment_offset)
486 		blk_queue_alignment_offset(q, blk_size * alignment_offset);
487 
488 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
489 			offsetof(struct virtio_blk_config, min_io_size),
490 			&min_io_size);
491 	if (!err && min_io_size)
492 		blk_queue_io_min(q, blk_size * min_io_size);
493 
494 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
495 			offsetof(struct virtio_blk_config, opt_io_size),
496 			&opt_io_size);
497 	if (!err && opt_io_size)
498 		blk_queue_io_opt(q, blk_size * opt_io_size);
499 
500 
501 	add_disk(vblk->disk);
502 	err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
503 	if (err)
504 		goto out_del_disk;
505 
506 	return 0;
507 
508 out_del_disk:
509 	del_gendisk(vblk->disk);
510 	blk_cleanup_queue(vblk->disk->queue);
511 out_put_disk:
512 	put_disk(vblk->disk);
513 out_mempool:
514 	mempool_destroy(vblk->pool);
515 out_free_vq:
516 	vdev->config->del_vqs(vdev);
517 out_free_vblk:
518 	kfree(vblk);
519 out:
520 	return err;
521 }
522 
523 static void __devexit virtblk_remove(struct virtio_device *vdev)
524 {
525 	struct virtio_blk *vblk = vdev->priv;
526 
527 	flush_work(&vblk->config_work);
528 
529 	/* Nothing should be pending. */
530 	BUG_ON(!list_empty(&vblk->reqs));
531 
532 	/* Stop all the virtqueues. */
533 	vdev->config->reset(vdev);
534 
535 	del_gendisk(vblk->disk);
536 	blk_cleanup_queue(vblk->disk->queue);
537 	put_disk(vblk->disk);
538 	mempool_destroy(vblk->pool);
539 	vdev->config->del_vqs(vdev);
540 	kfree(vblk);
541 }
542 
543 static const struct virtio_device_id id_table[] = {
544 	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
545 	{ 0 },
546 };
547 
548 static unsigned int features[] = {
549 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
550 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
551 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
552 };
553 
554 /*
555  * virtio_blk causes spurious section mismatch warning by
556  * simultaneously referring to a __devinit and a __devexit function.
557  * Use __refdata to avoid this warning.
558  */
559 static struct virtio_driver __refdata virtio_blk = {
560 	.feature_table		= features,
561 	.feature_table_size	= ARRAY_SIZE(features),
562 	.driver.name		= KBUILD_MODNAME,
563 	.driver.owner		= THIS_MODULE,
564 	.id_table		= id_table,
565 	.probe			= virtblk_probe,
566 	.remove			= __devexit_p(virtblk_remove),
567 	.config_changed		= virtblk_config_changed,
568 };
569 
570 static int __init init(void)
571 {
572 	int error;
573 
574 	virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
575 	if (!virtblk_wq)
576 		return -ENOMEM;
577 
578 	major = register_blkdev(0, "virtblk");
579 	if (major < 0) {
580 		error = major;
581 		goto out_destroy_workqueue;
582 	}
583 
584 	error = register_virtio_driver(&virtio_blk);
585 	if (error)
586 		goto out_unregister_blkdev;
587 	return 0;
588 
589 out_unregister_blkdev:
590 	unregister_blkdev(major, "virtblk");
591 out_destroy_workqueue:
592 	destroy_workqueue(virtblk_wq);
593 	return error;
594 }
595 
596 static void __exit fini(void)
597 {
598 	unregister_blkdev(major, "virtblk");
599 	unregister_virtio_driver(&virtio_blk);
600 	destroy_workqueue(virtblk_wq);
601 }
602 module_init(init);
603 module_exit(fini);
604 
605 MODULE_DEVICE_TABLE(virtio, id_table);
606 MODULE_DESCRIPTION("Virtio block driver");
607 MODULE_LICENSE("GPL");
608