1 // SPDX-License-Identifier: GPL-2.0-only 2 //#define DEBUG 3 #include <linux/spinlock.h> 4 #include <linux/slab.h> 5 #include <linux/blkdev.h> 6 #include <linux/hdreg.h> 7 #include <linux/module.h> 8 #include <linux/mutex.h> 9 #include <linux/interrupt.h> 10 #include <linux/virtio.h> 11 #include <linux/virtio_blk.h> 12 #include <linux/scatterlist.h> 13 #include <linux/string_helpers.h> 14 #include <linux/idr.h> 15 #include <linux/blk-mq.h> 16 #include <linux/blk-mq-virtio.h> 17 #include <linux/numa.h> 18 #include <uapi/linux/virtio_ring.h> 19 20 #define PART_BITS 4 21 #define VQ_NAME_LEN 16 22 #define MAX_DISCARD_SEGMENTS 256u 23 24 static int major; 25 static DEFINE_IDA(vd_index_ida); 26 27 static struct workqueue_struct *virtblk_wq; 28 29 struct virtio_blk_vq { 30 struct virtqueue *vq; 31 spinlock_t lock; 32 char name[VQ_NAME_LEN]; 33 } ____cacheline_aligned_in_smp; 34 35 struct virtio_blk { 36 /* 37 * This mutex must be held by anything that may run after 38 * virtblk_remove() sets vblk->vdev to NULL. 39 * 40 * blk-mq, virtqueue processing, and sysfs attribute code paths are 41 * shut down before vblk->vdev is set to NULL and therefore do not need 42 * to hold this mutex. 43 */ 44 struct mutex vdev_mutex; 45 struct virtio_device *vdev; 46 47 /* The disk structure for the kernel. */ 48 struct gendisk *disk; 49 50 /* Block layer tags. */ 51 struct blk_mq_tag_set tag_set; 52 53 /* Process context for config space updates */ 54 struct work_struct config_work; 55 56 /* 57 * Tracks references from block_device_operations open/release and 58 * virtio_driver probe/remove so this object can be freed once no 59 * longer in use. 60 */ 61 refcount_t refs; 62 63 /* What host tells us, plus 2 for header & tailer. */ 64 unsigned int sg_elems; 65 66 /* Ida index - used to track minor number allocations. */ 67 int index; 68 69 /* num of vqs */ 70 int num_vqs; 71 struct virtio_blk_vq *vqs; 72 }; 73 74 struct virtblk_req { 75 struct virtio_blk_outhdr out_hdr; 76 u8 status; 77 struct scatterlist sg[]; 78 }; 79 80 static inline blk_status_t virtblk_result(struct virtblk_req *vbr) 81 { 82 switch (vbr->status) { 83 case VIRTIO_BLK_S_OK: 84 return BLK_STS_OK; 85 case VIRTIO_BLK_S_UNSUPP: 86 return BLK_STS_NOTSUPP; 87 default: 88 return BLK_STS_IOERR; 89 } 90 } 91 92 static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, 93 struct scatterlist *data_sg, bool have_data) 94 { 95 struct scatterlist hdr, status, *sgs[3]; 96 unsigned int num_out = 0, num_in = 0; 97 98 sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); 99 sgs[num_out++] = &hdr; 100 101 if (have_data) { 102 if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) 103 sgs[num_out++] = data_sg; 104 else 105 sgs[num_out + num_in++] = data_sg; 106 } 107 108 sg_init_one(&status, &vbr->status, sizeof(vbr->status)); 109 sgs[num_out + num_in++] = &status; 110 111 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); 112 } 113 114 static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap) 115 { 116 unsigned short segments = blk_rq_nr_discard_segments(req); 117 unsigned short n = 0; 118 struct virtio_blk_discard_write_zeroes *range; 119 struct bio *bio; 120 u32 flags = 0; 121 122 if (unmap) 123 flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP; 124 125 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); 126 if (!range) 127 return -ENOMEM; 128 129 /* 130 * Single max discard segment means multi-range discard isn't 131 * supported, and block layer only runs contiguity merge like 132 * normal RW request. So we can't reply on bio for retrieving 133 * each range info. 134 */ 135 if (queue_max_discard_segments(req->q) == 1) { 136 range[0].flags = cpu_to_le32(flags); 137 range[0].num_sectors = cpu_to_le32(blk_rq_sectors(req)); 138 range[0].sector = cpu_to_le64(blk_rq_pos(req)); 139 n = 1; 140 } else { 141 __rq_for_each_bio(bio, req) { 142 u64 sector = bio->bi_iter.bi_sector; 143 u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT; 144 145 range[n].flags = cpu_to_le32(flags); 146 range[n].num_sectors = cpu_to_le32(num_sectors); 147 range[n].sector = cpu_to_le64(sector); 148 n++; 149 } 150 } 151 152 WARN_ON_ONCE(n != segments); 153 154 req->special_vec.bv_page = virt_to_page(range); 155 req->special_vec.bv_offset = offset_in_page(range); 156 req->special_vec.bv_len = sizeof(*range) * segments; 157 req->rq_flags |= RQF_SPECIAL_PAYLOAD; 158 159 return 0; 160 } 161 162 static inline void virtblk_request_done(struct request *req) 163 { 164 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 165 166 if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { 167 kfree(page_address(req->special_vec.bv_page) + 168 req->special_vec.bv_offset); 169 } 170 171 blk_mq_end_request(req, virtblk_result(vbr)); 172 } 173 174 static void virtblk_done(struct virtqueue *vq) 175 { 176 struct virtio_blk *vblk = vq->vdev->priv; 177 bool req_done = false; 178 int qid = vq->index; 179 struct virtblk_req *vbr; 180 unsigned long flags; 181 unsigned int len; 182 183 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 184 do { 185 virtqueue_disable_cb(vq); 186 while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { 187 struct request *req = blk_mq_rq_from_pdu(vbr); 188 189 if (likely(!blk_should_fake_timeout(req->q))) 190 blk_mq_complete_request(req); 191 req_done = true; 192 } 193 if (unlikely(virtqueue_is_broken(vq))) 194 break; 195 } while (!virtqueue_enable_cb(vq)); 196 197 /* In case queue is stopped waiting for more buffers. */ 198 if (req_done) 199 blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); 200 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 201 } 202 203 static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) 204 { 205 struct virtio_blk *vblk = hctx->queue->queuedata; 206 struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num]; 207 bool kick; 208 209 spin_lock_irq(&vq->lock); 210 kick = virtqueue_kick_prepare(vq->vq); 211 spin_unlock_irq(&vq->lock); 212 213 if (kick) 214 virtqueue_notify(vq->vq); 215 } 216 217 static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, 218 const struct blk_mq_queue_data *bd) 219 { 220 struct virtio_blk *vblk = hctx->queue->queuedata; 221 struct request *req = bd->rq; 222 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 223 unsigned long flags; 224 unsigned int num; 225 int qid = hctx->queue_num; 226 int err; 227 bool notify = false; 228 bool unmap = false; 229 u32 type; 230 231 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); 232 233 switch (req_op(req)) { 234 case REQ_OP_READ: 235 case REQ_OP_WRITE: 236 type = 0; 237 break; 238 case REQ_OP_FLUSH: 239 type = VIRTIO_BLK_T_FLUSH; 240 break; 241 case REQ_OP_DISCARD: 242 type = VIRTIO_BLK_T_DISCARD; 243 break; 244 case REQ_OP_WRITE_ZEROES: 245 type = VIRTIO_BLK_T_WRITE_ZEROES; 246 unmap = !(req->cmd_flags & REQ_NOUNMAP); 247 break; 248 case REQ_OP_DRV_IN: 249 type = VIRTIO_BLK_T_GET_ID; 250 break; 251 default: 252 WARN_ON_ONCE(1); 253 return BLK_STS_IOERR; 254 } 255 256 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type); 257 vbr->out_hdr.sector = type ? 258 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req)); 259 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req)); 260 261 blk_mq_start_request(req); 262 263 if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { 264 err = virtblk_setup_discard_write_zeroes(req, unmap); 265 if (err) 266 return BLK_STS_RESOURCE; 267 } 268 269 num = blk_rq_map_sg(hctx->queue, req, vbr->sg); 270 if (num) { 271 if (rq_data_dir(req) == WRITE) 272 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT); 273 else 274 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN); 275 } 276 277 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 278 err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); 279 if (err) { 280 virtqueue_kick(vblk->vqs[qid].vq); 281 /* Don't stop the queue if -ENOMEM: we may have failed to 282 * bounce the buffer due to global resource outage. 283 */ 284 if (err == -ENOSPC) 285 blk_mq_stop_hw_queue(hctx); 286 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 287 switch (err) { 288 case -ENOSPC: 289 return BLK_STS_DEV_RESOURCE; 290 case -ENOMEM: 291 return BLK_STS_RESOURCE; 292 default: 293 return BLK_STS_IOERR; 294 } 295 } 296 297 if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) 298 notify = true; 299 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 300 301 if (notify) 302 virtqueue_notify(vblk->vqs[qid].vq); 303 return BLK_STS_OK; 304 } 305 306 /* return id (s/n) string for *disk to *id_str 307 */ 308 static int virtblk_get_id(struct gendisk *disk, char *id_str) 309 { 310 struct virtio_blk *vblk = disk->private_data; 311 struct request_queue *q = vblk->disk->queue; 312 struct request *req; 313 int err; 314 315 req = blk_get_request(q, REQ_OP_DRV_IN, 0); 316 if (IS_ERR(req)) 317 return PTR_ERR(req); 318 319 err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); 320 if (err) 321 goto out; 322 323 blk_execute_rq(vblk->disk, req, false); 324 err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req))); 325 out: 326 blk_put_request(req); 327 return err; 328 } 329 330 static void virtblk_get(struct virtio_blk *vblk) 331 { 332 refcount_inc(&vblk->refs); 333 } 334 335 static void virtblk_put(struct virtio_blk *vblk) 336 { 337 if (refcount_dec_and_test(&vblk->refs)) { 338 ida_simple_remove(&vd_index_ida, vblk->index); 339 mutex_destroy(&vblk->vdev_mutex); 340 kfree(vblk); 341 } 342 } 343 344 static int virtblk_open(struct block_device *bd, fmode_t mode) 345 { 346 struct virtio_blk *vblk = bd->bd_disk->private_data; 347 int ret = 0; 348 349 mutex_lock(&vblk->vdev_mutex); 350 351 if (vblk->vdev) 352 virtblk_get(vblk); 353 else 354 ret = -ENXIO; 355 356 mutex_unlock(&vblk->vdev_mutex); 357 return ret; 358 } 359 360 static void virtblk_release(struct gendisk *disk, fmode_t mode) 361 { 362 struct virtio_blk *vblk = disk->private_data; 363 364 virtblk_put(vblk); 365 } 366 367 /* We provide getgeo only to please some old bootloader/partitioning tools */ 368 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) 369 { 370 struct virtio_blk *vblk = bd->bd_disk->private_data; 371 int ret = 0; 372 373 mutex_lock(&vblk->vdev_mutex); 374 375 if (!vblk->vdev) { 376 ret = -ENXIO; 377 goto out; 378 } 379 380 /* see if the host passed in geometry config */ 381 if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) { 382 virtio_cread(vblk->vdev, struct virtio_blk_config, 383 geometry.cylinders, &geo->cylinders); 384 virtio_cread(vblk->vdev, struct virtio_blk_config, 385 geometry.heads, &geo->heads); 386 virtio_cread(vblk->vdev, struct virtio_blk_config, 387 geometry.sectors, &geo->sectors); 388 } else { 389 /* some standard values, similar to sd */ 390 geo->heads = 1 << 6; 391 geo->sectors = 1 << 5; 392 geo->cylinders = get_capacity(bd->bd_disk) >> 11; 393 } 394 out: 395 mutex_unlock(&vblk->vdev_mutex); 396 return ret; 397 } 398 399 static const struct block_device_operations virtblk_fops = { 400 .owner = THIS_MODULE, 401 .open = virtblk_open, 402 .release = virtblk_release, 403 .getgeo = virtblk_getgeo, 404 }; 405 406 static int index_to_minor(int index) 407 { 408 return index << PART_BITS; 409 } 410 411 static int minor_to_index(int minor) 412 { 413 return minor >> PART_BITS; 414 } 415 416 static ssize_t serial_show(struct device *dev, 417 struct device_attribute *attr, char *buf) 418 { 419 struct gendisk *disk = dev_to_disk(dev); 420 int err; 421 422 /* sysfs gives us a PAGE_SIZE buffer */ 423 BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES); 424 425 buf[VIRTIO_BLK_ID_BYTES] = '\0'; 426 err = virtblk_get_id(disk, buf); 427 if (!err) 428 return strlen(buf); 429 430 if (err == -EIO) /* Unsupported? Make it empty. */ 431 return 0; 432 433 return err; 434 } 435 436 static DEVICE_ATTR_RO(serial); 437 438 /* The queue's logical block size must be set before calling this */ 439 static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) 440 { 441 struct virtio_device *vdev = vblk->vdev; 442 struct request_queue *q = vblk->disk->queue; 443 char cap_str_2[10], cap_str_10[10]; 444 unsigned long long nblocks; 445 u64 capacity; 446 447 /* Host must always specify the capacity. */ 448 virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity); 449 450 /* If capacity is too big, truncate with warning. */ 451 if ((sector_t)capacity != capacity) { 452 dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", 453 (unsigned long long)capacity); 454 capacity = (sector_t)-1; 455 } 456 457 nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9); 458 459 string_get_size(nblocks, queue_logical_block_size(q), 460 STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); 461 string_get_size(nblocks, queue_logical_block_size(q), 462 STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); 463 464 dev_notice(&vdev->dev, 465 "[%s] %s%llu %d-byte logical blocks (%s/%s)\n", 466 vblk->disk->disk_name, 467 resize ? "new size: " : "", 468 nblocks, 469 queue_logical_block_size(q), 470 cap_str_10, 471 cap_str_2); 472 473 set_capacity_and_notify(vblk->disk, capacity); 474 } 475 476 static void virtblk_config_changed_work(struct work_struct *work) 477 { 478 struct virtio_blk *vblk = 479 container_of(work, struct virtio_blk, config_work); 480 481 virtblk_update_capacity(vblk, true); 482 } 483 484 static void virtblk_config_changed(struct virtio_device *vdev) 485 { 486 struct virtio_blk *vblk = vdev->priv; 487 488 queue_work(virtblk_wq, &vblk->config_work); 489 } 490 491 static int init_vq(struct virtio_blk *vblk) 492 { 493 int err; 494 int i; 495 vq_callback_t **callbacks; 496 const char **names; 497 struct virtqueue **vqs; 498 unsigned short num_vqs; 499 struct virtio_device *vdev = vblk->vdev; 500 struct irq_affinity desc = { 0, }; 501 502 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, 503 struct virtio_blk_config, num_queues, 504 &num_vqs); 505 if (err) 506 num_vqs = 1; 507 508 num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs); 509 510 vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); 511 if (!vblk->vqs) 512 return -ENOMEM; 513 514 names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL); 515 callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL); 516 vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL); 517 if (!names || !callbacks || !vqs) { 518 err = -ENOMEM; 519 goto out; 520 } 521 522 for (i = 0; i < num_vqs; i++) { 523 callbacks[i] = virtblk_done; 524 snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); 525 names[i] = vblk->vqs[i].name; 526 } 527 528 /* Discover virtqueues and write information to configuration. */ 529 err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc); 530 if (err) 531 goto out; 532 533 for (i = 0; i < num_vqs; i++) { 534 spin_lock_init(&vblk->vqs[i].lock); 535 vblk->vqs[i].vq = vqs[i]; 536 } 537 vblk->num_vqs = num_vqs; 538 539 out: 540 kfree(vqs); 541 kfree(callbacks); 542 kfree(names); 543 if (err) 544 kfree(vblk->vqs); 545 return err; 546 } 547 548 /* 549 * Legacy naming scheme used for virtio devices. We are stuck with it for 550 * virtio blk but don't ever use it for any new driver. 551 */ 552 static int virtblk_name_format(char *prefix, int index, char *buf, int buflen) 553 { 554 const int base = 'z' - 'a' + 1; 555 char *begin = buf + strlen(prefix); 556 char *end = buf + buflen; 557 char *p; 558 int unit; 559 560 p = end - 1; 561 *p = '\0'; 562 unit = base; 563 do { 564 if (p == begin) 565 return -EINVAL; 566 *--p = 'a' + (index % unit); 567 index = (index / unit) - 1; 568 } while (index >= 0); 569 570 memmove(begin, p, end - p); 571 memcpy(buf, prefix, strlen(prefix)); 572 573 return 0; 574 } 575 576 static int virtblk_get_cache_mode(struct virtio_device *vdev) 577 { 578 u8 writeback; 579 int err; 580 581 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE, 582 struct virtio_blk_config, wce, 583 &writeback); 584 585 /* 586 * If WCE is not configurable and flush is not available, 587 * assume no writeback cache is in use. 588 */ 589 if (err) 590 writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH); 591 592 return writeback; 593 } 594 595 static void virtblk_update_cache_mode(struct virtio_device *vdev) 596 { 597 u8 writeback = virtblk_get_cache_mode(vdev); 598 struct virtio_blk *vblk = vdev->priv; 599 600 blk_queue_write_cache(vblk->disk->queue, writeback, false); 601 } 602 603 static const char *const virtblk_cache_types[] = { 604 "write through", "write back" 605 }; 606 607 static ssize_t 608 cache_type_store(struct device *dev, struct device_attribute *attr, 609 const char *buf, size_t count) 610 { 611 struct gendisk *disk = dev_to_disk(dev); 612 struct virtio_blk *vblk = disk->private_data; 613 struct virtio_device *vdev = vblk->vdev; 614 int i; 615 616 BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE)); 617 i = sysfs_match_string(virtblk_cache_types, buf); 618 if (i < 0) 619 return i; 620 621 virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i); 622 virtblk_update_cache_mode(vdev); 623 return count; 624 } 625 626 static ssize_t 627 cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) 628 { 629 struct gendisk *disk = dev_to_disk(dev); 630 struct virtio_blk *vblk = disk->private_data; 631 u8 writeback = virtblk_get_cache_mode(vblk->vdev); 632 633 BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types)); 634 return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]); 635 } 636 637 static DEVICE_ATTR_RW(cache_type); 638 639 static struct attribute *virtblk_attrs[] = { 640 &dev_attr_serial.attr, 641 &dev_attr_cache_type.attr, 642 NULL, 643 }; 644 645 static umode_t virtblk_attrs_are_visible(struct kobject *kobj, 646 struct attribute *a, int n) 647 { 648 struct device *dev = kobj_to_dev(kobj); 649 struct gendisk *disk = dev_to_disk(dev); 650 struct virtio_blk *vblk = disk->private_data; 651 struct virtio_device *vdev = vblk->vdev; 652 653 if (a == &dev_attr_cache_type.attr && 654 !virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) 655 return S_IRUGO; 656 657 return a->mode; 658 } 659 660 static const struct attribute_group virtblk_attr_group = { 661 .attrs = virtblk_attrs, 662 .is_visible = virtblk_attrs_are_visible, 663 }; 664 665 static const struct attribute_group *virtblk_attr_groups[] = { 666 &virtblk_attr_group, 667 NULL, 668 }; 669 670 static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, 671 unsigned int hctx_idx, unsigned int numa_node) 672 { 673 struct virtio_blk *vblk = set->driver_data; 674 struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); 675 676 sg_init_table(vbr->sg, vblk->sg_elems); 677 return 0; 678 } 679 680 static int virtblk_map_queues(struct blk_mq_tag_set *set) 681 { 682 struct virtio_blk *vblk = set->driver_data; 683 684 return blk_mq_virtio_map_queues(&set->map[HCTX_TYPE_DEFAULT], 685 vblk->vdev, 0); 686 } 687 688 static const struct blk_mq_ops virtio_mq_ops = { 689 .queue_rq = virtio_queue_rq, 690 .commit_rqs = virtio_commit_rqs, 691 .complete = virtblk_request_done, 692 .init_request = virtblk_init_request, 693 .map_queues = virtblk_map_queues, 694 }; 695 696 static unsigned int virtblk_queue_depth; 697 module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); 698 699 static int virtblk_probe(struct virtio_device *vdev) 700 { 701 struct virtio_blk *vblk; 702 struct request_queue *q; 703 int err, index; 704 705 u32 v, blk_size, max_size, sg_elems, opt_io_size; 706 u16 min_io_size; 707 u8 physical_block_exp, alignment_offset; 708 unsigned int queue_depth; 709 710 if (!vdev->config->get) { 711 dev_err(&vdev->dev, "%s failure: config access disabled\n", 712 __func__); 713 return -EINVAL; 714 } 715 716 err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS), 717 GFP_KERNEL); 718 if (err < 0) 719 goto out; 720 index = err; 721 722 /* We need to know how many segments before we allocate. */ 723 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX, 724 struct virtio_blk_config, seg_max, 725 &sg_elems); 726 727 /* We need at least one SG element, whatever they say. */ 728 if (err || !sg_elems) 729 sg_elems = 1; 730 731 /* We need an extra sg elements at head and tail. */ 732 sg_elems += 2; 733 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); 734 if (!vblk) { 735 err = -ENOMEM; 736 goto out_free_index; 737 } 738 739 /* This reference is dropped in virtblk_remove(). */ 740 refcount_set(&vblk->refs, 1); 741 mutex_init(&vblk->vdev_mutex); 742 743 vblk->vdev = vdev; 744 vblk->sg_elems = sg_elems; 745 746 INIT_WORK(&vblk->config_work, virtblk_config_changed_work); 747 748 err = init_vq(vblk); 749 if (err) 750 goto out_free_vblk; 751 752 /* FIXME: How many partitions? How long is a piece of string? */ 753 vblk->disk = alloc_disk(1 << PART_BITS); 754 if (!vblk->disk) { 755 err = -ENOMEM; 756 goto out_free_vq; 757 } 758 759 /* Default queue sizing is to fill the ring. */ 760 if (likely(!virtblk_queue_depth)) { 761 queue_depth = vblk->vqs[0].vq->num_free; 762 /* ... but without indirect descs, we use 2 descs per req */ 763 if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) 764 queue_depth /= 2; 765 } else { 766 queue_depth = virtblk_queue_depth; 767 } 768 769 memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); 770 vblk->tag_set.ops = &virtio_mq_ops; 771 vblk->tag_set.queue_depth = queue_depth; 772 vblk->tag_set.numa_node = NUMA_NO_NODE; 773 vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 774 vblk->tag_set.cmd_size = 775 sizeof(struct virtblk_req) + 776 sizeof(struct scatterlist) * sg_elems; 777 vblk->tag_set.driver_data = vblk; 778 vblk->tag_set.nr_hw_queues = vblk->num_vqs; 779 780 err = blk_mq_alloc_tag_set(&vblk->tag_set); 781 if (err) 782 goto out_put_disk; 783 784 q = blk_mq_init_queue(&vblk->tag_set); 785 if (IS_ERR(q)) { 786 err = -ENOMEM; 787 goto out_free_tags; 788 } 789 vblk->disk->queue = q; 790 791 q->queuedata = vblk; 792 793 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); 794 795 vblk->disk->major = major; 796 vblk->disk->first_minor = index_to_minor(index); 797 vblk->disk->private_data = vblk; 798 vblk->disk->fops = &virtblk_fops; 799 vblk->disk->flags |= GENHD_FL_EXT_DEVT; 800 vblk->index = index; 801 802 /* configure queue flush support */ 803 virtblk_update_cache_mode(vdev); 804 805 /* If disk is read-only in the host, the guest should obey */ 806 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) 807 set_disk_ro(vblk->disk, 1); 808 809 /* We can handle whatever the host told us to handle. */ 810 blk_queue_max_segments(q, vblk->sg_elems-2); 811 812 /* No real sector limit. */ 813 blk_queue_max_hw_sectors(q, -1U); 814 815 max_size = virtio_max_dma_size(vdev); 816 817 /* Host can optionally specify maximum segment size and number of 818 * segments. */ 819 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX, 820 struct virtio_blk_config, size_max, &v); 821 if (!err) 822 max_size = min(max_size, v); 823 824 blk_queue_max_segment_size(q, max_size); 825 826 /* Host can optionally specify the block size of the device */ 827 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, 828 struct virtio_blk_config, blk_size, 829 &blk_size); 830 if (!err) 831 blk_queue_logical_block_size(q, blk_size); 832 else 833 blk_size = queue_logical_block_size(q); 834 835 /* Use topology information if available */ 836 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 837 struct virtio_blk_config, physical_block_exp, 838 &physical_block_exp); 839 if (!err && physical_block_exp) 840 blk_queue_physical_block_size(q, 841 blk_size * (1 << physical_block_exp)); 842 843 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 844 struct virtio_blk_config, alignment_offset, 845 &alignment_offset); 846 if (!err && alignment_offset) 847 blk_queue_alignment_offset(q, blk_size * alignment_offset); 848 849 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 850 struct virtio_blk_config, min_io_size, 851 &min_io_size); 852 if (!err && min_io_size) 853 blk_queue_io_min(q, blk_size * min_io_size); 854 855 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 856 struct virtio_blk_config, opt_io_size, 857 &opt_io_size); 858 if (!err && opt_io_size) 859 blk_queue_io_opt(q, blk_size * opt_io_size); 860 861 if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { 862 q->limits.discard_granularity = blk_size; 863 864 virtio_cread(vdev, struct virtio_blk_config, 865 discard_sector_alignment, &v); 866 q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0; 867 868 virtio_cread(vdev, struct virtio_blk_config, 869 max_discard_sectors, &v); 870 blk_queue_max_discard_sectors(q, v ? v : UINT_MAX); 871 872 virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, 873 &v); 874 blk_queue_max_discard_segments(q, 875 min_not_zero(v, 876 MAX_DISCARD_SEGMENTS)); 877 878 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); 879 } 880 881 if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { 882 virtio_cread(vdev, struct virtio_blk_config, 883 max_write_zeroes_sectors, &v); 884 blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX); 885 } 886 887 virtblk_update_capacity(vblk, false); 888 virtio_device_ready(vdev); 889 890 device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); 891 return 0; 892 893 out_free_tags: 894 blk_mq_free_tag_set(&vblk->tag_set); 895 out_put_disk: 896 put_disk(vblk->disk); 897 out_free_vq: 898 vdev->config->del_vqs(vdev); 899 kfree(vblk->vqs); 900 out_free_vblk: 901 kfree(vblk); 902 out_free_index: 903 ida_simple_remove(&vd_index_ida, index); 904 out: 905 return err; 906 } 907 908 static void virtblk_remove(struct virtio_device *vdev) 909 { 910 struct virtio_blk *vblk = vdev->priv; 911 912 /* Make sure no work handler is accessing the device. */ 913 flush_work(&vblk->config_work); 914 915 del_gendisk(vblk->disk); 916 blk_cleanup_queue(vblk->disk->queue); 917 918 blk_mq_free_tag_set(&vblk->tag_set); 919 920 mutex_lock(&vblk->vdev_mutex); 921 922 /* Stop all the virtqueues. */ 923 vdev->config->reset(vdev); 924 925 /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */ 926 vblk->vdev = NULL; 927 928 put_disk(vblk->disk); 929 vdev->config->del_vqs(vdev); 930 kfree(vblk->vqs); 931 932 mutex_unlock(&vblk->vdev_mutex); 933 934 virtblk_put(vblk); 935 } 936 937 #ifdef CONFIG_PM_SLEEP 938 static int virtblk_freeze(struct virtio_device *vdev) 939 { 940 struct virtio_blk *vblk = vdev->priv; 941 942 /* Ensure we don't receive any more interrupts */ 943 vdev->config->reset(vdev); 944 945 /* Make sure no work handler is accessing the device. */ 946 flush_work(&vblk->config_work); 947 948 blk_mq_quiesce_queue(vblk->disk->queue); 949 950 vdev->config->del_vqs(vdev); 951 return 0; 952 } 953 954 static int virtblk_restore(struct virtio_device *vdev) 955 { 956 struct virtio_blk *vblk = vdev->priv; 957 int ret; 958 959 ret = init_vq(vdev->priv); 960 if (ret) 961 return ret; 962 963 virtio_device_ready(vdev); 964 965 blk_mq_unquiesce_queue(vblk->disk->queue); 966 return 0; 967 } 968 #endif 969 970 static const struct virtio_device_id id_table[] = { 971 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 972 { 0 }, 973 }; 974 975 static unsigned int features_legacy[] = { 976 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 977 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 978 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 979 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 980 } 981 ; 982 static unsigned int features[] = { 983 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 984 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 985 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 986 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 987 }; 988 989 static struct virtio_driver virtio_blk = { 990 .feature_table = features, 991 .feature_table_size = ARRAY_SIZE(features), 992 .feature_table_legacy = features_legacy, 993 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 994 .driver.name = KBUILD_MODNAME, 995 .driver.owner = THIS_MODULE, 996 .id_table = id_table, 997 .probe = virtblk_probe, 998 .remove = virtblk_remove, 999 .config_changed = virtblk_config_changed, 1000 #ifdef CONFIG_PM_SLEEP 1001 .freeze = virtblk_freeze, 1002 .restore = virtblk_restore, 1003 #endif 1004 }; 1005 1006 static int __init init(void) 1007 { 1008 int error; 1009 1010 virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); 1011 if (!virtblk_wq) 1012 return -ENOMEM; 1013 1014 major = register_blkdev(0, "virtblk"); 1015 if (major < 0) { 1016 error = major; 1017 goto out_destroy_workqueue; 1018 } 1019 1020 error = register_virtio_driver(&virtio_blk); 1021 if (error) 1022 goto out_unregister_blkdev; 1023 return 0; 1024 1025 out_unregister_blkdev: 1026 unregister_blkdev(major, "virtblk"); 1027 out_destroy_workqueue: 1028 destroy_workqueue(virtblk_wq); 1029 return error; 1030 } 1031 1032 static void __exit fini(void) 1033 { 1034 unregister_virtio_driver(&virtio_blk); 1035 unregister_blkdev(major, "virtblk"); 1036 destroy_workqueue(virtblk_wq); 1037 } 1038 module_init(init); 1039 module_exit(fini); 1040 1041 MODULE_DEVICE_TABLE(virtio, id_table); 1042 MODULE_DESCRIPTION("Virtio block driver"); 1043 MODULE_LICENSE("GPL"); 1044