xref: /openbmc/linux/drivers/block/virtio_blk.c (revision 6c870213d6f3a25981c10728f46294a3bed1703f)
1 //#define DEBUG
2 #include <linux/spinlock.h>
3 #include <linux/slab.h>
4 #include <linux/blkdev.h>
5 #include <linux/hdreg.h>
6 #include <linux/module.h>
7 #include <linux/mutex.h>
8 #include <linux/virtio.h>
9 #include <linux/virtio_blk.h>
10 #include <linux/scatterlist.h>
11 #include <linux/string_helpers.h>
12 #include <scsi/scsi_cmnd.h>
13 #include <linux/idr.h>
14 #include <linux/blk-mq.h>
15 #include <linux/numa.h>
16 
17 #define PART_BITS 4
18 
19 static int major;
20 static DEFINE_IDA(vd_index_ida);
21 
22 static struct workqueue_struct *virtblk_wq;
23 
24 struct virtio_blk
25 {
26 	struct virtio_device *vdev;
27 	struct virtqueue *vq;
28 	spinlock_t vq_lock;
29 
30 	/* The disk structure for the kernel. */
31 	struct gendisk *disk;
32 
33 	/* Process context for config space updates */
34 	struct work_struct config_work;
35 
36 	/* Lock for config space updates */
37 	struct mutex config_lock;
38 
39 	/* enable config space updates */
40 	bool config_enable;
41 
42 	/* What host tells us, plus 2 for header & tailer. */
43 	unsigned int sg_elems;
44 
45 	/* Ida index - used to track minor number allocations. */
46 	int index;
47 };
48 
49 struct virtblk_req
50 {
51 	struct request *req;
52 	struct virtio_blk_outhdr out_hdr;
53 	struct virtio_scsi_inhdr in_hdr;
54 	u8 status;
55 	struct scatterlist sg[];
56 };
57 
58 static inline int virtblk_result(struct virtblk_req *vbr)
59 {
60 	switch (vbr->status) {
61 	case VIRTIO_BLK_S_OK:
62 		return 0;
63 	case VIRTIO_BLK_S_UNSUPP:
64 		return -ENOTTY;
65 	default:
66 		return -EIO;
67 	}
68 }
69 
70 static int __virtblk_add_req(struct virtqueue *vq,
71 			     struct virtblk_req *vbr,
72 			     struct scatterlist *data_sg,
73 			     bool have_data)
74 {
75 	struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6];
76 	unsigned int num_out = 0, num_in = 0;
77 	int type = vbr->out_hdr.type & ~VIRTIO_BLK_T_OUT;
78 
79 	sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
80 	sgs[num_out++] = &hdr;
81 
82 	/*
83 	 * If this is a packet command we need a couple of additional headers.
84 	 * Behind the normal outhdr we put a segment with the scsi command
85 	 * block, and before the normal inhdr we put the sense data and the
86 	 * inhdr with additional status information.
87 	 */
88 	if (type == VIRTIO_BLK_T_SCSI_CMD) {
89 		sg_init_one(&cmd, vbr->req->cmd, vbr->req->cmd_len);
90 		sgs[num_out++] = &cmd;
91 	}
92 
93 	if (have_data) {
94 		if (vbr->out_hdr.type & VIRTIO_BLK_T_OUT)
95 			sgs[num_out++] = data_sg;
96 		else
97 			sgs[num_out + num_in++] = data_sg;
98 	}
99 
100 	if (type == VIRTIO_BLK_T_SCSI_CMD) {
101 		sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
102 		sgs[num_out + num_in++] = &sense;
103 		sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr));
104 		sgs[num_out + num_in++] = &inhdr;
105 	}
106 
107 	sg_init_one(&status, &vbr->status, sizeof(vbr->status));
108 	sgs[num_out + num_in++] = &status;
109 
110 	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
111 }
112 
113 static inline void virtblk_request_done(struct request *req)
114 {
115 	struct virtblk_req *vbr = req->special;
116 	int error = virtblk_result(vbr);
117 
118 	if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
119 		req->resid_len = vbr->in_hdr.residual;
120 		req->sense_len = vbr->in_hdr.sense_len;
121 		req->errors = vbr->in_hdr.errors;
122 	} else if (req->cmd_type == REQ_TYPE_SPECIAL) {
123 		req->errors = (error != 0);
124 	}
125 
126 	blk_mq_end_io(req, error);
127 }
128 
129 static void virtblk_done(struct virtqueue *vq)
130 {
131 	struct virtio_blk *vblk = vq->vdev->priv;
132 	bool req_done = false;
133 	struct virtblk_req *vbr;
134 	unsigned long flags;
135 	unsigned int len;
136 
137 	spin_lock_irqsave(&vblk->vq_lock, flags);
138 	do {
139 		virtqueue_disable_cb(vq);
140 		while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
141 			blk_mq_complete_request(vbr->req);
142 			req_done = true;
143 		}
144 		if (unlikely(virtqueue_is_broken(vq)))
145 			break;
146 	} while (!virtqueue_enable_cb(vq));
147 	spin_unlock_irqrestore(&vblk->vq_lock, flags);
148 
149 	/* In case queue is stopped waiting for more buffers. */
150 	if (req_done)
151 		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
152 }
153 
154 static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
155 {
156 	struct virtio_blk *vblk = hctx->queue->queuedata;
157 	struct virtblk_req *vbr = req->special;
158 	unsigned long flags;
159 	unsigned int num;
160 	const bool last = (req->cmd_flags & REQ_END) != 0;
161 	int err;
162 
163 	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
164 
165 	vbr->req = req;
166 	if (req->cmd_flags & REQ_FLUSH) {
167 		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
168 		vbr->out_hdr.sector = 0;
169 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
170 	} else {
171 		switch (req->cmd_type) {
172 		case REQ_TYPE_FS:
173 			vbr->out_hdr.type = 0;
174 			vbr->out_hdr.sector = blk_rq_pos(vbr->req);
175 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
176 			break;
177 		case REQ_TYPE_BLOCK_PC:
178 			vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
179 			vbr->out_hdr.sector = 0;
180 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
181 			break;
182 		case REQ_TYPE_SPECIAL:
183 			vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
184 			vbr->out_hdr.sector = 0;
185 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
186 			break;
187 		default:
188 			/* We don't put anything else in the queue. */
189 			BUG();
190 		}
191 	}
192 
193 	num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
194 	if (num) {
195 		if (rq_data_dir(vbr->req) == WRITE)
196 			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
197 		else
198 			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
199 	}
200 
201 	spin_lock_irqsave(&vblk->vq_lock, flags);
202 	err = __virtblk_add_req(vblk->vq, vbr, vbr->sg, num);
203 	if (err) {
204 		virtqueue_kick(vblk->vq);
205 		spin_unlock_irqrestore(&vblk->vq_lock, flags);
206 		blk_mq_stop_hw_queue(hctx);
207 		/* Out of mem doesn't actually happen, since we fall back
208 		 * to direct descriptors */
209 		if (err == -ENOMEM || err == -ENOSPC)
210 			return BLK_MQ_RQ_QUEUE_BUSY;
211 		return BLK_MQ_RQ_QUEUE_ERROR;
212 	}
213 
214 	if (last)
215 		virtqueue_kick(vblk->vq);
216 
217 	spin_unlock_irqrestore(&vblk->vq_lock, flags);
218 	return BLK_MQ_RQ_QUEUE_OK;
219 }
220 
221 /* return id (s/n) string for *disk to *id_str
222  */
223 static int virtblk_get_id(struct gendisk *disk, char *id_str)
224 {
225 	struct virtio_blk *vblk = disk->private_data;
226 	struct request *req;
227 	struct bio *bio;
228 	int err;
229 
230 	bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
231 			   GFP_KERNEL);
232 	if (IS_ERR(bio))
233 		return PTR_ERR(bio);
234 
235 	req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
236 	if (IS_ERR(req)) {
237 		bio_put(bio);
238 		return PTR_ERR(req);
239 	}
240 
241 	req->cmd_type = REQ_TYPE_SPECIAL;
242 	err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
243 	blk_put_request(req);
244 
245 	return err;
246 }
247 
248 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
249 			     unsigned int cmd, unsigned long data)
250 {
251 	struct gendisk *disk = bdev->bd_disk;
252 	struct virtio_blk *vblk = disk->private_data;
253 
254 	/*
255 	 * Only allow the generic SCSI ioctls if the host can support it.
256 	 */
257 	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
258 		return -ENOTTY;
259 
260 	return scsi_cmd_blk_ioctl(bdev, mode, cmd,
261 				  (void __user *)data);
262 }
263 
264 /* We provide getgeo only to please some old bootloader/partitioning tools */
265 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
266 {
267 	struct virtio_blk *vblk = bd->bd_disk->private_data;
268 
269 	/* see if the host passed in geometry config */
270 	if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) {
271 		virtio_cread(vblk->vdev, struct virtio_blk_config,
272 			     geometry.cylinders, &geo->cylinders);
273 		virtio_cread(vblk->vdev, struct virtio_blk_config,
274 			     geometry.heads, &geo->heads);
275 		virtio_cread(vblk->vdev, struct virtio_blk_config,
276 			     geometry.sectors, &geo->sectors);
277 	} else {
278 		/* some standard values, similar to sd */
279 		geo->heads = 1 << 6;
280 		geo->sectors = 1 << 5;
281 		geo->cylinders = get_capacity(bd->bd_disk) >> 11;
282 	}
283 	return 0;
284 }
285 
286 static const struct block_device_operations virtblk_fops = {
287 	.ioctl  = virtblk_ioctl,
288 	.owner  = THIS_MODULE,
289 	.getgeo = virtblk_getgeo,
290 };
291 
292 static int index_to_minor(int index)
293 {
294 	return index << PART_BITS;
295 }
296 
297 static int minor_to_index(int minor)
298 {
299 	return minor >> PART_BITS;
300 }
301 
302 static ssize_t virtblk_serial_show(struct device *dev,
303 				struct device_attribute *attr, char *buf)
304 {
305 	struct gendisk *disk = dev_to_disk(dev);
306 	int err;
307 
308 	/* sysfs gives us a PAGE_SIZE buffer */
309 	BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
310 
311 	buf[VIRTIO_BLK_ID_BYTES] = '\0';
312 	err = virtblk_get_id(disk, buf);
313 	if (!err)
314 		return strlen(buf);
315 
316 	if (err == -EIO) /* Unsupported? Make it empty. */
317 		return 0;
318 
319 	return err;
320 }
321 DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
322 
323 static void virtblk_config_changed_work(struct work_struct *work)
324 {
325 	struct virtio_blk *vblk =
326 		container_of(work, struct virtio_blk, config_work);
327 	struct virtio_device *vdev = vblk->vdev;
328 	struct request_queue *q = vblk->disk->queue;
329 	char cap_str_2[10], cap_str_10[10];
330 	char *envp[] = { "RESIZE=1", NULL };
331 	u64 capacity, size;
332 
333 	mutex_lock(&vblk->config_lock);
334 	if (!vblk->config_enable)
335 		goto done;
336 
337 	/* Host must always specify the capacity. */
338 	virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity);
339 
340 	/* If capacity is too big, truncate with warning. */
341 	if ((sector_t)capacity != capacity) {
342 		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
343 			 (unsigned long long)capacity);
344 		capacity = (sector_t)-1;
345 	}
346 
347 	size = capacity * queue_logical_block_size(q);
348 	string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
349 	string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
350 
351 	dev_notice(&vdev->dev,
352 		  "new size: %llu %d-byte logical blocks (%s/%s)\n",
353 		  (unsigned long long)capacity,
354 		  queue_logical_block_size(q),
355 		  cap_str_10, cap_str_2);
356 
357 	set_capacity(vblk->disk, capacity);
358 	revalidate_disk(vblk->disk);
359 	kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp);
360 done:
361 	mutex_unlock(&vblk->config_lock);
362 }
363 
364 static void virtblk_config_changed(struct virtio_device *vdev)
365 {
366 	struct virtio_blk *vblk = vdev->priv;
367 
368 	queue_work(virtblk_wq, &vblk->config_work);
369 }
370 
371 static int init_vq(struct virtio_blk *vblk)
372 {
373 	int err = 0;
374 
375 	/* We expect one virtqueue, for output. */
376 	vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
377 	if (IS_ERR(vblk->vq))
378 		err = PTR_ERR(vblk->vq);
379 
380 	return err;
381 }
382 
383 /*
384  * Legacy naming scheme used for virtio devices.  We are stuck with it for
385  * virtio blk but don't ever use it for any new driver.
386  */
387 static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
388 {
389 	const int base = 'z' - 'a' + 1;
390 	char *begin = buf + strlen(prefix);
391 	char *end = buf + buflen;
392 	char *p;
393 	int unit;
394 
395 	p = end - 1;
396 	*p = '\0';
397 	unit = base;
398 	do {
399 		if (p == begin)
400 			return -EINVAL;
401 		*--p = 'a' + (index % unit);
402 		index = (index / unit) - 1;
403 	} while (index >= 0);
404 
405 	memmove(begin, p, end - p);
406 	memcpy(buf, prefix, strlen(prefix));
407 
408 	return 0;
409 }
410 
411 static int virtblk_get_cache_mode(struct virtio_device *vdev)
412 {
413 	u8 writeback;
414 	int err;
415 
416 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE,
417 				   struct virtio_blk_config, wce,
418 				   &writeback);
419 	if (err)
420 		writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
421 
422 	return writeback;
423 }
424 
425 static void virtblk_update_cache_mode(struct virtio_device *vdev)
426 {
427 	u8 writeback = virtblk_get_cache_mode(vdev);
428 	struct virtio_blk *vblk = vdev->priv;
429 
430 	if (writeback)
431 		blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
432 	else
433 		blk_queue_flush(vblk->disk->queue, 0);
434 
435 	revalidate_disk(vblk->disk);
436 }
437 
438 static const char *const virtblk_cache_types[] = {
439 	"write through", "write back"
440 };
441 
442 static ssize_t
443 virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
444 			 const char *buf, size_t count)
445 {
446 	struct gendisk *disk = dev_to_disk(dev);
447 	struct virtio_blk *vblk = disk->private_data;
448 	struct virtio_device *vdev = vblk->vdev;
449 	int i;
450 
451 	BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
452 	for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
453 		if (sysfs_streq(buf, virtblk_cache_types[i]))
454 			break;
455 
456 	if (i < 0)
457 		return -EINVAL;
458 
459 	virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i);
460 	virtblk_update_cache_mode(vdev);
461 	return count;
462 }
463 
464 static ssize_t
465 virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
466 			 char *buf)
467 {
468 	struct gendisk *disk = dev_to_disk(dev);
469 	struct virtio_blk *vblk = disk->private_data;
470 	u8 writeback = virtblk_get_cache_mode(vblk->vdev);
471 
472 	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
473 	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
474 }
475 
476 static const struct device_attribute dev_attr_cache_type_ro =
477 	__ATTR(cache_type, S_IRUGO,
478 	       virtblk_cache_type_show, NULL);
479 static const struct device_attribute dev_attr_cache_type_rw =
480 	__ATTR(cache_type, S_IRUGO|S_IWUSR,
481 	       virtblk_cache_type_show, virtblk_cache_type_store);
482 
483 static struct blk_mq_ops virtio_mq_ops = {
484 	.queue_rq	= virtio_queue_rq,
485 	.map_queue	= blk_mq_map_queue,
486 	.alloc_hctx	= blk_mq_alloc_single_hw_queue,
487 	.free_hctx	= blk_mq_free_single_hw_queue,
488 	.complete	= virtblk_request_done,
489 };
490 
491 static struct blk_mq_reg virtio_mq_reg = {
492 	.ops		= &virtio_mq_ops,
493 	.nr_hw_queues	= 1,
494 	.queue_depth	= 0, /* Set in virtblk_probe */
495 	.numa_node	= NUMA_NO_NODE,
496 	.flags		= BLK_MQ_F_SHOULD_MERGE,
497 };
498 module_param_named(queue_depth, virtio_mq_reg.queue_depth, uint, 0444);
499 
500 static int virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
501 			     struct request *rq, unsigned int nr)
502 {
503 	struct virtio_blk *vblk = data;
504 	struct virtblk_req *vbr = rq->special;
505 
506 	sg_init_table(vbr->sg, vblk->sg_elems);
507 	return 0;
508 }
509 
510 static int virtblk_probe(struct virtio_device *vdev)
511 {
512 	struct virtio_blk *vblk;
513 	struct request_queue *q;
514 	int err, index;
515 
516 	u64 cap;
517 	u32 v, blk_size, sg_elems, opt_io_size;
518 	u16 min_io_size;
519 	u8 physical_block_exp, alignment_offset;
520 
521 	err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
522 			     GFP_KERNEL);
523 	if (err < 0)
524 		goto out;
525 	index = err;
526 
527 	/* We need to know how many segments before we allocate. */
528 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX,
529 				   struct virtio_blk_config, seg_max,
530 				   &sg_elems);
531 
532 	/* We need at least one SG element, whatever they say. */
533 	if (err || !sg_elems)
534 		sg_elems = 1;
535 
536 	/* We need an extra sg elements at head and tail. */
537 	sg_elems += 2;
538 	vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
539 	if (!vblk) {
540 		err = -ENOMEM;
541 		goto out_free_index;
542 	}
543 
544 	vblk->vdev = vdev;
545 	vblk->sg_elems = sg_elems;
546 	mutex_init(&vblk->config_lock);
547 
548 	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
549 	vblk->config_enable = true;
550 
551 	err = init_vq(vblk);
552 	if (err)
553 		goto out_free_vblk;
554 	spin_lock_init(&vblk->vq_lock);
555 
556 	/* FIXME: How many partitions?  How long is a piece of string? */
557 	vblk->disk = alloc_disk(1 << PART_BITS);
558 	if (!vblk->disk) {
559 		err = -ENOMEM;
560 		goto out_free_vq;
561 	}
562 
563 	/* Default queue sizing is to fill the ring. */
564 	if (!virtio_mq_reg.queue_depth) {
565 		virtio_mq_reg.queue_depth = vblk->vq->num_free;
566 		/* ... but without indirect descs, we use 2 descs per req */
567 		if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
568 			virtio_mq_reg.queue_depth /= 2;
569 	}
570 	virtio_mq_reg.cmd_size =
571 		sizeof(struct virtblk_req) +
572 		sizeof(struct scatterlist) * sg_elems;
573 
574 	q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
575 	if (!q) {
576 		err = -ENOMEM;
577 		goto out_put_disk;
578 	}
579 
580 	blk_mq_init_commands(q, virtblk_init_vbr, vblk);
581 
582 	q->queuedata = vblk;
583 
584 	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
585 
586 	vblk->disk->major = major;
587 	vblk->disk->first_minor = index_to_minor(index);
588 	vblk->disk->private_data = vblk;
589 	vblk->disk->fops = &virtblk_fops;
590 	vblk->disk->driverfs_dev = &vdev->dev;
591 	vblk->index = index;
592 
593 	/* configure queue flush support */
594 	virtblk_update_cache_mode(vdev);
595 
596 	/* If disk is read-only in the host, the guest should obey */
597 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
598 		set_disk_ro(vblk->disk, 1);
599 
600 	/* Host must always specify the capacity. */
601 	virtio_cread(vdev, struct virtio_blk_config, capacity, &cap);
602 
603 	/* If capacity is too big, truncate with warning. */
604 	if ((sector_t)cap != cap) {
605 		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
606 			 (unsigned long long)cap);
607 		cap = (sector_t)-1;
608 	}
609 	set_capacity(vblk->disk, cap);
610 
611 	/* We can handle whatever the host told us to handle. */
612 	blk_queue_max_segments(q, vblk->sg_elems-2);
613 
614 	/* No need to bounce any requests */
615 	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
616 
617 	/* No real sector limit. */
618 	blk_queue_max_hw_sectors(q, -1U);
619 
620 	/* Host can optionally specify maximum segment size and number of
621 	 * segments. */
622 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX,
623 				   struct virtio_blk_config, size_max, &v);
624 	if (!err)
625 		blk_queue_max_segment_size(q, v);
626 	else
627 		blk_queue_max_segment_size(q, -1U);
628 
629 	/* Host can optionally specify the block size of the device */
630 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
631 				   struct virtio_blk_config, blk_size,
632 				   &blk_size);
633 	if (!err)
634 		blk_queue_logical_block_size(q, blk_size);
635 	else
636 		blk_size = queue_logical_block_size(q);
637 
638 	/* Use topology information if available */
639 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
640 				   struct virtio_blk_config, physical_block_exp,
641 				   &physical_block_exp);
642 	if (!err && physical_block_exp)
643 		blk_queue_physical_block_size(q,
644 				blk_size * (1 << physical_block_exp));
645 
646 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
647 				   struct virtio_blk_config, alignment_offset,
648 				   &alignment_offset);
649 	if (!err && alignment_offset)
650 		blk_queue_alignment_offset(q, blk_size * alignment_offset);
651 
652 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
653 				   struct virtio_blk_config, min_io_size,
654 				   &min_io_size);
655 	if (!err && min_io_size)
656 		blk_queue_io_min(q, blk_size * min_io_size);
657 
658 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
659 				   struct virtio_blk_config, opt_io_size,
660 				   &opt_io_size);
661 	if (!err && opt_io_size)
662 		blk_queue_io_opt(q, blk_size * opt_io_size);
663 
664 	add_disk(vblk->disk);
665 	err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
666 	if (err)
667 		goto out_del_disk;
668 
669 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
670 		err = device_create_file(disk_to_dev(vblk->disk),
671 					 &dev_attr_cache_type_rw);
672 	else
673 		err = device_create_file(disk_to_dev(vblk->disk),
674 					 &dev_attr_cache_type_ro);
675 	if (err)
676 		goto out_del_disk;
677 	return 0;
678 
679 out_del_disk:
680 	del_gendisk(vblk->disk);
681 	blk_cleanup_queue(vblk->disk->queue);
682 out_put_disk:
683 	put_disk(vblk->disk);
684 out_free_vq:
685 	vdev->config->del_vqs(vdev);
686 out_free_vblk:
687 	kfree(vblk);
688 out_free_index:
689 	ida_simple_remove(&vd_index_ida, index);
690 out:
691 	return err;
692 }
693 
694 static void virtblk_remove(struct virtio_device *vdev)
695 {
696 	struct virtio_blk *vblk = vdev->priv;
697 	int index = vblk->index;
698 	int refc;
699 
700 	/* Prevent config work handler from accessing the device. */
701 	mutex_lock(&vblk->config_lock);
702 	vblk->config_enable = false;
703 	mutex_unlock(&vblk->config_lock);
704 
705 	del_gendisk(vblk->disk);
706 	blk_cleanup_queue(vblk->disk->queue);
707 
708 	/* Stop all the virtqueues. */
709 	vdev->config->reset(vdev);
710 
711 	flush_work(&vblk->config_work);
712 
713 	refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
714 	put_disk(vblk->disk);
715 	vdev->config->del_vqs(vdev);
716 	kfree(vblk);
717 
718 	/* Only free device id if we don't have any users */
719 	if (refc == 1)
720 		ida_simple_remove(&vd_index_ida, index);
721 }
722 
723 #ifdef CONFIG_PM_SLEEP
724 static int virtblk_freeze(struct virtio_device *vdev)
725 {
726 	struct virtio_blk *vblk = vdev->priv;
727 
728 	/* Ensure we don't receive any more interrupts */
729 	vdev->config->reset(vdev);
730 
731 	/* Prevent config work handler from accessing the device. */
732 	mutex_lock(&vblk->config_lock);
733 	vblk->config_enable = false;
734 	mutex_unlock(&vblk->config_lock);
735 
736 	flush_work(&vblk->config_work);
737 
738 	blk_mq_stop_hw_queues(vblk->disk->queue);
739 
740 	vdev->config->del_vqs(vdev);
741 	return 0;
742 }
743 
744 static int virtblk_restore(struct virtio_device *vdev)
745 {
746 	struct virtio_blk *vblk = vdev->priv;
747 	int ret;
748 
749 	vblk->config_enable = true;
750 	ret = init_vq(vdev->priv);
751 	if (!ret)
752 		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
753 
754 	return ret;
755 }
756 #endif
757 
758 static const struct virtio_device_id id_table[] = {
759 	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
760 	{ 0 },
761 };
762 
763 static unsigned int features[] = {
764 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
765 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
766 	VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
767 };
768 
769 static struct virtio_driver virtio_blk = {
770 	.feature_table		= features,
771 	.feature_table_size	= ARRAY_SIZE(features),
772 	.driver.name		= KBUILD_MODNAME,
773 	.driver.owner		= THIS_MODULE,
774 	.id_table		= id_table,
775 	.probe			= virtblk_probe,
776 	.remove			= virtblk_remove,
777 	.config_changed		= virtblk_config_changed,
778 #ifdef CONFIG_PM_SLEEP
779 	.freeze			= virtblk_freeze,
780 	.restore		= virtblk_restore,
781 #endif
782 };
783 
784 static int __init init(void)
785 {
786 	int error;
787 
788 	virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
789 	if (!virtblk_wq)
790 		return -ENOMEM;
791 
792 	major = register_blkdev(0, "virtblk");
793 	if (major < 0) {
794 		error = major;
795 		goto out_destroy_workqueue;
796 	}
797 
798 	error = register_virtio_driver(&virtio_blk);
799 	if (error)
800 		goto out_unregister_blkdev;
801 	return 0;
802 
803 out_unregister_blkdev:
804 	unregister_blkdev(major, "virtblk");
805 out_destroy_workqueue:
806 	destroy_workqueue(virtblk_wq);
807 	return error;
808 }
809 
810 static void __exit fini(void)
811 {
812 	unregister_blkdev(major, "virtblk");
813 	unregister_virtio_driver(&virtio_blk);
814 	destroy_workqueue(virtblk_wq);
815 }
816 module_init(init);
817 module_exit(fini);
818 
819 MODULE_DEVICE_TABLE(virtio, id_table);
820 MODULE_DESCRIPTION("Virtio block driver");
821 MODULE_LICENSE("GPL");
822