xref: /openbmc/linux/drivers/block/virtio_blk.c (revision d2999e1b)
1 //#define DEBUG
2 #include <linux/spinlock.h>
3 #include <linux/slab.h>
4 #include <linux/blkdev.h>
5 #include <linux/hdreg.h>
6 #include <linux/module.h>
7 #include <linux/mutex.h>
8 #include <linux/virtio.h>
9 #include <linux/virtio_blk.h>
10 #include <linux/scatterlist.h>
11 #include <linux/string_helpers.h>
12 #include <scsi/scsi_cmnd.h>
13 #include <linux/idr.h>
14 #include <linux/blk-mq.h>
15 #include <linux/numa.h>
16 
17 #define PART_BITS 4
18 
19 static int major;
20 static DEFINE_IDA(vd_index_ida);
21 
22 static struct workqueue_struct *virtblk_wq;
23 
24 struct virtio_blk
25 {
26 	struct virtio_device *vdev;
27 	struct virtqueue *vq;
28 	spinlock_t vq_lock;
29 
30 	/* The disk structure for the kernel. */
31 	struct gendisk *disk;
32 
33 	/* Block layer tags. */
34 	struct blk_mq_tag_set tag_set;
35 
36 	/* Process context for config space updates */
37 	struct work_struct config_work;
38 
39 	/* Lock for config space updates */
40 	struct mutex config_lock;
41 
42 	/* enable config space updates */
43 	bool config_enable;
44 
45 	/* What host tells us, plus 2 for header & tailer. */
46 	unsigned int sg_elems;
47 
48 	/* Ida index - used to track minor number allocations. */
49 	int index;
50 };
51 
52 struct virtblk_req
53 {
54 	struct request *req;
55 	struct virtio_blk_outhdr out_hdr;
56 	struct virtio_scsi_inhdr in_hdr;
57 	u8 status;
58 	struct scatterlist sg[];
59 };
60 
61 static inline int virtblk_result(struct virtblk_req *vbr)
62 {
63 	switch (vbr->status) {
64 	case VIRTIO_BLK_S_OK:
65 		return 0;
66 	case VIRTIO_BLK_S_UNSUPP:
67 		return -ENOTTY;
68 	default:
69 		return -EIO;
70 	}
71 }
72 
73 static int __virtblk_add_req(struct virtqueue *vq,
74 			     struct virtblk_req *vbr,
75 			     struct scatterlist *data_sg,
76 			     bool have_data)
77 {
78 	struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6];
79 	unsigned int num_out = 0, num_in = 0;
80 	int type = vbr->out_hdr.type & ~VIRTIO_BLK_T_OUT;
81 
82 	sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
83 	sgs[num_out++] = &hdr;
84 
85 	/*
86 	 * If this is a packet command we need a couple of additional headers.
87 	 * Behind the normal outhdr we put a segment with the scsi command
88 	 * block, and before the normal inhdr we put the sense data and the
89 	 * inhdr with additional status information.
90 	 */
91 	if (type == VIRTIO_BLK_T_SCSI_CMD) {
92 		sg_init_one(&cmd, vbr->req->cmd, vbr->req->cmd_len);
93 		sgs[num_out++] = &cmd;
94 	}
95 
96 	if (have_data) {
97 		if (vbr->out_hdr.type & VIRTIO_BLK_T_OUT)
98 			sgs[num_out++] = data_sg;
99 		else
100 			sgs[num_out + num_in++] = data_sg;
101 	}
102 
103 	if (type == VIRTIO_BLK_T_SCSI_CMD) {
104 		sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
105 		sgs[num_out + num_in++] = &sense;
106 		sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr));
107 		sgs[num_out + num_in++] = &inhdr;
108 	}
109 
110 	sg_init_one(&status, &vbr->status, sizeof(vbr->status));
111 	sgs[num_out + num_in++] = &status;
112 
113 	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
114 }
115 
116 static inline void virtblk_request_done(struct request *req)
117 {
118 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
119 	int error = virtblk_result(vbr);
120 
121 	if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
122 		req->resid_len = vbr->in_hdr.residual;
123 		req->sense_len = vbr->in_hdr.sense_len;
124 		req->errors = vbr->in_hdr.errors;
125 	} else if (req->cmd_type == REQ_TYPE_SPECIAL) {
126 		req->errors = (error != 0);
127 	}
128 
129 	blk_mq_end_io(req, error);
130 }
131 
132 static void virtblk_done(struct virtqueue *vq)
133 {
134 	struct virtio_blk *vblk = vq->vdev->priv;
135 	bool req_done = false;
136 	struct virtblk_req *vbr;
137 	unsigned long flags;
138 	unsigned int len;
139 
140 	spin_lock_irqsave(&vblk->vq_lock, flags);
141 	do {
142 		virtqueue_disable_cb(vq);
143 		while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
144 			blk_mq_complete_request(vbr->req);
145 			req_done = true;
146 		}
147 		if (unlikely(virtqueue_is_broken(vq)))
148 			break;
149 	} while (!virtqueue_enable_cb(vq));
150 
151 	/* In case queue is stopped waiting for more buffers. */
152 	if (req_done)
153 		blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
154 	spin_unlock_irqrestore(&vblk->vq_lock, flags);
155 }
156 
157 static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
158 {
159 	struct virtio_blk *vblk = hctx->queue->queuedata;
160 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
161 	unsigned long flags;
162 	unsigned int num;
163 	const bool last = (req->cmd_flags & REQ_END) != 0;
164 	int err;
165 	bool notify = false;
166 
167 	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
168 
169 	vbr->req = req;
170 	if (req->cmd_flags & REQ_FLUSH) {
171 		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
172 		vbr->out_hdr.sector = 0;
173 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
174 	} else {
175 		switch (req->cmd_type) {
176 		case REQ_TYPE_FS:
177 			vbr->out_hdr.type = 0;
178 			vbr->out_hdr.sector = blk_rq_pos(vbr->req);
179 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
180 			break;
181 		case REQ_TYPE_BLOCK_PC:
182 			vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
183 			vbr->out_hdr.sector = 0;
184 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
185 			break;
186 		case REQ_TYPE_SPECIAL:
187 			vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
188 			vbr->out_hdr.sector = 0;
189 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
190 			break;
191 		default:
192 			/* We don't put anything else in the queue. */
193 			BUG();
194 		}
195 	}
196 
197 	num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
198 	if (num) {
199 		if (rq_data_dir(vbr->req) == WRITE)
200 			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
201 		else
202 			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
203 	}
204 
205 	spin_lock_irqsave(&vblk->vq_lock, flags);
206 	err = __virtblk_add_req(vblk->vq, vbr, vbr->sg, num);
207 	if (err) {
208 		virtqueue_kick(vblk->vq);
209 		blk_mq_stop_hw_queue(hctx);
210 		spin_unlock_irqrestore(&vblk->vq_lock, flags);
211 		/* Out of mem doesn't actually happen, since we fall back
212 		 * to direct descriptors */
213 		if (err == -ENOMEM || err == -ENOSPC)
214 			return BLK_MQ_RQ_QUEUE_BUSY;
215 		return BLK_MQ_RQ_QUEUE_ERROR;
216 	}
217 
218 	if (last && virtqueue_kick_prepare(vblk->vq))
219 		notify = true;
220 	spin_unlock_irqrestore(&vblk->vq_lock, flags);
221 
222 	if (notify)
223 		virtqueue_notify(vblk->vq);
224 	return BLK_MQ_RQ_QUEUE_OK;
225 }
226 
227 /* return id (s/n) string for *disk to *id_str
228  */
229 static int virtblk_get_id(struct gendisk *disk, char *id_str)
230 {
231 	struct virtio_blk *vblk = disk->private_data;
232 	struct request *req;
233 	struct bio *bio;
234 	int err;
235 
236 	bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
237 			   GFP_KERNEL);
238 	if (IS_ERR(bio))
239 		return PTR_ERR(bio);
240 
241 	req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
242 	if (IS_ERR(req)) {
243 		bio_put(bio);
244 		return PTR_ERR(req);
245 	}
246 
247 	req->cmd_type = REQ_TYPE_SPECIAL;
248 	err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
249 	blk_put_request(req);
250 
251 	return err;
252 }
253 
254 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
255 			     unsigned int cmd, unsigned long data)
256 {
257 	struct gendisk *disk = bdev->bd_disk;
258 	struct virtio_blk *vblk = disk->private_data;
259 
260 	/*
261 	 * Only allow the generic SCSI ioctls if the host can support it.
262 	 */
263 	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
264 		return -ENOTTY;
265 
266 	return scsi_cmd_blk_ioctl(bdev, mode, cmd,
267 				  (void __user *)data);
268 }
269 
270 /* We provide getgeo only to please some old bootloader/partitioning tools */
271 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
272 {
273 	struct virtio_blk *vblk = bd->bd_disk->private_data;
274 
275 	/* see if the host passed in geometry config */
276 	if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) {
277 		virtio_cread(vblk->vdev, struct virtio_blk_config,
278 			     geometry.cylinders, &geo->cylinders);
279 		virtio_cread(vblk->vdev, struct virtio_blk_config,
280 			     geometry.heads, &geo->heads);
281 		virtio_cread(vblk->vdev, struct virtio_blk_config,
282 			     geometry.sectors, &geo->sectors);
283 	} else {
284 		/* some standard values, similar to sd */
285 		geo->heads = 1 << 6;
286 		geo->sectors = 1 << 5;
287 		geo->cylinders = get_capacity(bd->bd_disk) >> 11;
288 	}
289 	return 0;
290 }
291 
292 static const struct block_device_operations virtblk_fops = {
293 	.ioctl  = virtblk_ioctl,
294 	.owner  = THIS_MODULE,
295 	.getgeo = virtblk_getgeo,
296 };
297 
298 static int index_to_minor(int index)
299 {
300 	return index << PART_BITS;
301 }
302 
303 static int minor_to_index(int minor)
304 {
305 	return minor >> PART_BITS;
306 }
307 
308 static ssize_t virtblk_serial_show(struct device *dev,
309 				struct device_attribute *attr, char *buf)
310 {
311 	struct gendisk *disk = dev_to_disk(dev);
312 	int err;
313 
314 	/* sysfs gives us a PAGE_SIZE buffer */
315 	BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
316 
317 	buf[VIRTIO_BLK_ID_BYTES] = '\0';
318 	err = virtblk_get_id(disk, buf);
319 	if (!err)
320 		return strlen(buf);
321 
322 	if (err == -EIO) /* Unsupported? Make it empty. */
323 		return 0;
324 
325 	return err;
326 }
327 DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
328 
329 static void virtblk_config_changed_work(struct work_struct *work)
330 {
331 	struct virtio_blk *vblk =
332 		container_of(work, struct virtio_blk, config_work);
333 	struct virtio_device *vdev = vblk->vdev;
334 	struct request_queue *q = vblk->disk->queue;
335 	char cap_str_2[10], cap_str_10[10];
336 	char *envp[] = { "RESIZE=1", NULL };
337 	u64 capacity, size;
338 
339 	mutex_lock(&vblk->config_lock);
340 	if (!vblk->config_enable)
341 		goto done;
342 
343 	/* Host must always specify the capacity. */
344 	virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity);
345 
346 	/* If capacity is too big, truncate with warning. */
347 	if ((sector_t)capacity != capacity) {
348 		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
349 			 (unsigned long long)capacity);
350 		capacity = (sector_t)-1;
351 	}
352 
353 	size = capacity * queue_logical_block_size(q);
354 	string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
355 	string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
356 
357 	dev_notice(&vdev->dev,
358 		  "new size: %llu %d-byte logical blocks (%s/%s)\n",
359 		  (unsigned long long)capacity,
360 		  queue_logical_block_size(q),
361 		  cap_str_10, cap_str_2);
362 
363 	set_capacity(vblk->disk, capacity);
364 	revalidate_disk(vblk->disk);
365 	kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp);
366 done:
367 	mutex_unlock(&vblk->config_lock);
368 }
369 
370 static void virtblk_config_changed(struct virtio_device *vdev)
371 {
372 	struct virtio_blk *vblk = vdev->priv;
373 
374 	queue_work(virtblk_wq, &vblk->config_work);
375 }
376 
377 static int init_vq(struct virtio_blk *vblk)
378 {
379 	int err = 0;
380 
381 	/* We expect one virtqueue, for output. */
382 	vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
383 	if (IS_ERR(vblk->vq))
384 		err = PTR_ERR(vblk->vq);
385 
386 	return err;
387 }
388 
389 /*
390  * Legacy naming scheme used for virtio devices.  We are stuck with it for
391  * virtio blk but don't ever use it for any new driver.
392  */
393 static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
394 {
395 	const int base = 'z' - 'a' + 1;
396 	char *begin = buf + strlen(prefix);
397 	char *end = buf + buflen;
398 	char *p;
399 	int unit;
400 
401 	p = end - 1;
402 	*p = '\0';
403 	unit = base;
404 	do {
405 		if (p == begin)
406 			return -EINVAL;
407 		*--p = 'a' + (index % unit);
408 		index = (index / unit) - 1;
409 	} while (index >= 0);
410 
411 	memmove(begin, p, end - p);
412 	memcpy(buf, prefix, strlen(prefix));
413 
414 	return 0;
415 }
416 
417 static int virtblk_get_cache_mode(struct virtio_device *vdev)
418 {
419 	u8 writeback;
420 	int err;
421 
422 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE,
423 				   struct virtio_blk_config, wce,
424 				   &writeback);
425 	if (err)
426 		writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
427 
428 	return writeback;
429 }
430 
431 static void virtblk_update_cache_mode(struct virtio_device *vdev)
432 {
433 	u8 writeback = virtblk_get_cache_mode(vdev);
434 	struct virtio_blk *vblk = vdev->priv;
435 
436 	if (writeback)
437 		blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
438 	else
439 		blk_queue_flush(vblk->disk->queue, 0);
440 
441 	revalidate_disk(vblk->disk);
442 }
443 
444 static const char *const virtblk_cache_types[] = {
445 	"write through", "write back"
446 };
447 
448 static ssize_t
449 virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
450 			 const char *buf, size_t count)
451 {
452 	struct gendisk *disk = dev_to_disk(dev);
453 	struct virtio_blk *vblk = disk->private_data;
454 	struct virtio_device *vdev = vblk->vdev;
455 	int i;
456 
457 	BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
458 	for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
459 		if (sysfs_streq(buf, virtblk_cache_types[i]))
460 			break;
461 
462 	if (i < 0)
463 		return -EINVAL;
464 
465 	virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i);
466 	virtblk_update_cache_mode(vdev);
467 	return count;
468 }
469 
470 static ssize_t
471 virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
472 			 char *buf)
473 {
474 	struct gendisk *disk = dev_to_disk(dev);
475 	struct virtio_blk *vblk = disk->private_data;
476 	u8 writeback = virtblk_get_cache_mode(vblk->vdev);
477 
478 	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
479 	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
480 }
481 
482 static const struct device_attribute dev_attr_cache_type_ro =
483 	__ATTR(cache_type, S_IRUGO,
484 	       virtblk_cache_type_show, NULL);
485 static const struct device_attribute dev_attr_cache_type_rw =
486 	__ATTR(cache_type, S_IRUGO|S_IWUSR,
487 	       virtblk_cache_type_show, virtblk_cache_type_store);
488 
489 static int virtblk_init_request(void *data, struct request *rq,
490 		unsigned int hctx_idx, unsigned int request_idx,
491 		unsigned int numa_node)
492 {
493 	struct virtio_blk *vblk = data;
494 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
495 
496 	sg_init_table(vbr->sg, vblk->sg_elems);
497 	return 0;
498 }
499 
500 static struct blk_mq_ops virtio_mq_ops = {
501 	.queue_rq	= virtio_queue_rq,
502 	.map_queue	= blk_mq_map_queue,
503 	.complete	= virtblk_request_done,
504 	.init_request	= virtblk_init_request,
505 };
506 
507 static unsigned int virtblk_queue_depth;
508 module_param_named(queue_depth, virtblk_queue_depth, uint, 0444);
509 
510 static int virtblk_probe(struct virtio_device *vdev)
511 {
512 	struct virtio_blk *vblk;
513 	struct request_queue *q;
514 	int err, index;
515 
516 	u64 cap;
517 	u32 v, blk_size, sg_elems, opt_io_size;
518 	u16 min_io_size;
519 	u8 physical_block_exp, alignment_offset;
520 
521 	err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
522 			     GFP_KERNEL);
523 	if (err < 0)
524 		goto out;
525 	index = err;
526 
527 	/* We need to know how many segments before we allocate. */
528 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX,
529 				   struct virtio_blk_config, seg_max,
530 				   &sg_elems);
531 
532 	/* We need at least one SG element, whatever they say. */
533 	if (err || !sg_elems)
534 		sg_elems = 1;
535 
536 	/* We need an extra sg elements at head and tail. */
537 	sg_elems += 2;
538 	vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
539 	if (!vblk) {
540 		err = -ENOMEM;
541 		goto out_free_index;
542 	}
543 
544 	vblk->vdev = vdev;
545 	vblk->sg_elems = sg_elems;
546 	mutex_init(&vblk->config_lock);
547 
548 	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
549 	vblk->config_enable = true;
550 
551 	err = init_vq(vblk);
552 	if (err)
553 		goto out_free_vblk;
554 	spin_lock_init(&vblk->vq_lock);
555 
556 	/* FIXME: How many partitions?  How long is a piece of string? */
557 	vblk->disk = alloc_disk(1 << PART_BITS);
558 	if (!vblk->disk) {
559 		err = -ENOMEM;
560 		goto out_free_vq;
561 	}
562 
563 	/* Default queue sizing is to fill the ring. */
564 	if (!virtblk_queue_depth) {
565 		virtblk_queue_depth = vblk->vq->num_free;
566 		/* ... but without indirect descs, we use 2 descs per req */
567 		if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
568 			virtblk_queue_depth /= 2;
569 	}
570 
571 	memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
572 	vblk->tag_set.ops = &virtio_mq_ops;
573 	vblk->tag_set.nr_hw_queues = 1;
574 	vblk->tag_set.queue_depth = virtblk_queue_depth;
575 	vblk->tag_set.numa_node = NUMA_NO_NODE;
576 	vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
577 	vblk->tag_set.cmd_size =
578 		sizeof(struct virtblk_req) +
579 		sizeof(struct scatterlist) * sg_elems;
580 	vblk->tag_set.driver_data = vblk;
581 
582 	err = blk_mq_alloc_tag_set(&vblk->tag_set);
583 	if (err)
584 		goto out_put_disk;
585 
586 	q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set);
587 	if (!q) {
588 		err = -ENOMEM;
589 		goto out_free_tags;
590 	}
591 
592 	q->queuedata = vblk;
593 
594 	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
595 
596 	vblk->disk->major = major;
597 	vblk->disk->first_minor = index_to_minor(index);
598 	vblk->disk->private_data = vblk;
599 	vblk->disk->fops = &virtblk_fops;
600 	vblk->disk->driverfs_dev = &vdev->dev;
601 	vblk->index = index;
602 
603 	/* configure queue flush support */
604 	virtblk_update_cache_mode(vdev);
605 
606 	/* If disk is read-only in the host, the guest should obey */
607 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
608 		set_disk_ro(vblk->disk, 1);
609 
610 	/* Host must always specify the capacity. */
611 	virtio_cread(vdev, struct virtio_blk_config, capacity, &cap);
612 
613 	/* If capacity is too big, truncate with warning. */
614 	if ((sector_t)cap != cap) {
615 		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
616 			 (unsigned long long)cap);
617 		cap = (sector_t)-1;
618 	}
619 	set_capacity(vblk->disk, cap);
620 
621 	/* We can handle whatever the host told us to handle. */
622 	blk_queue_max_segments(q, vblk->sg_elems-2);
623 
624 	/* No need to bounce any requests */
625 	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
626 
627 	/* No real sector limit. */
628 	blk_queue_max_hw_sectors(q, -1U);
629 
630 	/* Host can optionally specify maximum segment size and number of
631 	 * segments. */
632 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX,
633 				   struct virtio_blk_config, size_max, &v);
634 	if (!err)
635 		blk_queue_max_segment_size(q, v);
636 	else
637 		blk_queue_max_segment_size(q, -1U);
638 
639 	/* Host can optionally specify the block size of the device */
640 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
641 				   struct virtio_blk_config, blk_size,
642 				   &blk_size);
643 	if (!err)
644 		blk_queue_logical_block_size(q, blk_size);
645 	else
646 		blk_size = queue_logical_block_size(q);
647 
648 	/* Use topology information if available */
649 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
650 				   struct virtio_blk_config, physical_block_exp,
651 				   &physical_block_exp);
652 	if (!err && physical_block_exp)
653 		blk_queue_physical_block_size(q,
654 				blk_size * (1 << physical_block_exp));
655 
656 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
657 				   struct virtio_blk_config, alignment_offset,
658 				   &alignment_offset);
659 	if (!err && alignment_offset)
660 		blk_queue_alignment_offset(q, blk_size * alignment_offset);
661 
662 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
663 				   struct virtio_blk_config, min_io_size,
664 				   &min_io_size);
665 	if (!err && min_io_size)
666 		blk_queue_io_min(q, blk_size * min_io_size);
667 
668 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
669 				   struct virtio_blk_config, opt_io_size,
670 				   &opt_io_size);
671 	if (!err && opt_io_size)
672 		blk_queue_io_opt(q, blk_size * opt_io_size);
673 
674 	add_disk(vblk->disk);
675 	err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
676 	if (err)
677 		goto out_del_disk;
678 
679 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
680 		err = device_create_file(disk_to_dev(vblk->disk),
681 					 &dev_attr_cache_type_rw);
682 	else
683 		err = device_create_file(disk_to_dev(vblk->disk),
684 					 &dev_attr_cache_type_ro);
685 	if (err)
686 		goto out_del_disk;
687 	return 0;
688 
689 out_del_disk:
690 	del_gendisk(vblk->disk);
691 	blk_cleanup_queue(vblk->disk->queue);
692 out_free_tags:
693 	blk_mq_free_tag_set(&vblk->tag_set);
694 out_put_disk:
695 	put_disk(vblk->disk);
696 out_free_vq:
697 	vdev->config->del_vqs(vdev);
698 out_free_vblk:
699 	kfree(vblk);
700 out_free_index:
701 	ida_simple_remove(&vd_index_ida, index);
702 out:
703 	return err;
704 }
705 
706 static void virtblk_remove(struct virtio_device *vdev)
707 {
708 	struct virtio_blk *vblk = vdev->priv;
709 	int index = vblk->index;
710 	int refc;
711 
712 	/* Prevent config work handler from accessing the device. */
713 	mutex_lock(&vblk->config_lock);
714 	vblk->config_enable = false;
715 	mutex_unlock(&vblk->config_lock);
716 
717 	del_gendisk(vblk->disk);
718 	blk_cleanup_queue(vblk->disk->queue);
719 
720 	blk_mq_free_tag_set(&vblk->tag_set);
721 
722 	/* Stop all the virtqueues. */
723 	vdev->config->reset(vdev);
724 
725 	flush_work(&vblk->config_work);
726 
727 	refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
728 	put_disk(vblk->disk);
729 	vdev->config->del_vqs(vdev);
730 	kfree(vblk);
731 
732 	/* Only free device id if we don't have any users */
733 	if (refc == 1)
734 		ida_simple_remove(&vd_index_ida, index);
735 }
736 
737 #ifdef CONFIG_PM_SLEEP
738 static int virtblk_freeze(struct virtio_device *vdev)
739 {
740 	struct virtio_blk *vblk = vdev->priv;
741 
742 	/* Ensure we don't receive any more interrupts */
743 	vdev->config->reset(vdev);
744 
745 	/* Prevent config work handler from accessing the device. */
746 	mutex_lock(&vblk->config_lock);
747 	vblk->config_enable = false;
748 	mutex_unlock(&vblk->config_lock);
749 
750 	flush_work(&vblk->config_work);
751 
752 	blk_mq_stop_hw_queues(vblk->disk->queue);
753 
754 	vdev->config->del_vqs(vdev);
755 	return 0;
756 }
757 
758 static int virtblk_restore(struct virtio_device *vdev)
759 {
760 	struct virtio_blk *vblk = vdev->priv;
761 	int ret;
762 
763 	vblk->config_enable = true;
764 	ret = init_vq(vdev->priv);
765 	if (!ret)
766 		blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
767 
768 	return ret;
769 }
770 #endif
771 
772 static const struct virtio_device_id id_table[] = {
773 	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
774 	{ 0 },
775 };
776 
777 static unsigned int features[] = {
778 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
779 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
780 	VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
781 };
782 
783 static struct virtio_driver virtio_blk = {
784 	.feature_table		= features,
785 	.feature_table_size	= ARRAY_SIZE(features),
786 	.driver.name		= KBUILD_MODNAME,
787 	.driver.owner		= THIS_MODULE,
788 	.id_table		= id_table,
789 	.probe			= virtblk_probe,
790 	.remove			= virtblk_remove,
791 	.config_changed		= virtblk_config_changed,
792 #ifdef CONFIG_PM_SLEEP
793 	.freeze			= virtblk_freeze,
794 	.restore		= virtblk_restore,
795 #endif
796 };
797 
798 static int __init init(void)
799 {
800 	int error;
801 
802 	virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
803 	if (!virtblk_wq)
804 		return -ENOMEM;
805 
806 	major = register_blkdev(0, "virtblk");
807 	if (major < 0) {
808 		error = major;
809 		goto out_destroy_workqueue;
810 	}
811 
812 	error = register_virtio_driver(&virtio_blk);
813 	if (error)
814 		goto out_unregister_blkdev;
815 	return 0;
816 
817 out_unregister_blkdev:
818 	unregister_blkdev(major, "virtblk");
819 out_destroy_workqueue:
820 	destroy_workqueue(virtblk_wq);
821 	return error;
822 }
823 
824 static void __exit fini(void)
825 {
826 	unregister_blkdev(major, "virtblk");
827 	unregister_virtio_driver(&virtio_blk);
828 	destroy_workqueue(virtblk_wq);
829 }
830 module_init(init);
831 module_exit(fini);
832 
833 MODULE_DEVICE_TABLE(virtio, id_table);
834 MODULE_DESCRIPTION("Virtio block driver");
835 MODULE_LICENSE("GPL");
836