1 // SPDX-License-Identifier: GPL-2.0-only 2 //#define DEBUG 3 #include <linux/spinlock.h> 4 #include <linux/slab.h> 5 #include <linux/blkdev.h> 6 #include <linux/hdreg.h> 7 #include <linux/module.h> 8 #include <linux/mutex.h> 9 #include <linux/interrupt.h> 10 #include <linux/virtio.h> 11 #include <linux/virtio_blk.h> 12 #include <linux/scatterlist.h> 13 #include <linux/string_helpers.h> 14 #include <linux/idr.h> 15 #include <linux/blk-mq.h> 16 #include <linux/blk-mq-virtio.h> 17 #include <linux/numa.h> 18 #include <uapi/linux/virtio_ring.h> 19 20 #define PART_BITS 4 21 #define VQ_NAME_LEN 16 22 #define MAX_DISCARD_SEGMENTS 256u 23 24 /* The maximum number of sg elements that fit into a virtqueue */ 25 #define VIRTIO_BLK_MAX_SG_ELEMS 32768 26 27 static int major; 28 static DEFINE_IDA(vd_index_ida); 29 30 static struct workqueue_struct *virtblk_wq; 31 32 struct virtio_blk_vq { 33 struct virtqueue *vq; 34 spinlock_t lock; 35 char name[VQ_NAME_LEN]; 36 } ____cacheline_aligned_in_smp; 37 38 struct virtio_blk { 39 /* 40 * This mutex must be held by anything that may run after 41 * virtblk_remove() sets vblk->vdev to NULL. 42 * 43 * blk-mq, virtqueue processing, and sysfs attribute code paths are 44 * shut down before vblk->vdev is set to NULL and therefore do not need 45 * to hold this mutex. 46 */ 47 struct mutex vdev_mutex; 48 struct virtio_device *vdev; 49 50 /* The disk structure for the kernel. */ 51 struct gendisk *disk; 52 53 /* Block layer tags. */ 54 struct blk_mq_tag_set tag_set; 55 56 /* Process context for config space updates */ 57 struct work_struct config_work; 58 59 /* 60 * Tracks references from block_device_operations open/release and 61 * virtio_driver probe/remove so this object can be freed once no 62 * longer in use. 63 */ 64 refcount_t refs; 65 66 /* What host tells us, plus 2 for header & tailer. */ 67 unsigned int sg_elems; 68 69 /* Ida index - used to track minor number allocations. */ 70 int index; 71 72 /* num of vqs */ 73 int num_vqs; 74 struct virtio_blk_vq *vqs; 75 }; 76 77 struct virtblk_req { 78 struct virtio_blk_outhdr out_hdr; 79 u8 status; 80 struct scatterlist sg[]; 81 }; 82 83 static inline blk_status_t virtblk_result(struct virtblk_req *vbr) 84 { 85 switch (vbr->status) { 86 case VIRTIO_BLK_S_OK: 87 return BLK_STS_OK; 88 case VIRTIO_BLK_S_UNSUPP: 89 return BLK_STS_NOTSUPP; 90 default: 91 return BLK_STS_IOERR; 92 } 93 } 94 95 static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, 96 struct scatterlist *data_sg, bool have_data) 97 { 98 struct scatterlist hdr, status, *sgs[3]; 99 unsigned int num_out = 0, num_in = 0; 100 101 sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); 102 sgs[num_out++] = &hdr; 103 104 if (have_data) { 105 if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) 106 sgs[num_out++] = data_sg; 107 else 108 sgs[num_out + num_in++] = data_sg; 109 } 110 111 sg_init_one(&status, &vbr->status, sizeof(vbr->status)); 112 sgs[num_out + num_in++] = &status; 113 114 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); 115 } 116 117 static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap) 118 { 119 unsigned short segments = blk_rq_nr_discard_segments(req); 120 unsigned short n = 0; 121 struct virtio_blk_discard_write_zeroes *range; 122 struct bio *bio; 123 u32 flags = 0; 124 125 if (unmap) 126 flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP; 127 128 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); 129 if (!range) 130 return -ENOMEM; 131 132 /* 133 * Single max discard segment means multi-range discard isn't 134 * supported, and block layer only runs contiguity merge like 135 * normal RW request. So we can't reply on bio for retrieving 136 * each range info. 137 */ 138 if (queue_max_discard_segments(req->q) == 1) { 139 range[0].flags = cpu_to_le32(flags); 140 range[0].num_sectors = cpu_to_le32(blk_rq_sectors(req)); 141 range[0].sector = cpu_to_le64(blk_rq_pos(req)); 142 n = 1; 143 } else { 144 __rq_for_each_bio(bio, req) { 145 u64 sector = bio->bi_iter.bi_sector; 146 u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT; 147 148 range[n].flags = cpu_to_le32(flags); 149 range[n].num_sectors = cpu_to_le32(num_sectors); 150 range[n].sector = cpu_to_le64(sector); 151 n++; 152 } 153 } 154 155 WARN_ON_ONCE(n != segments); 156 157 req->special_vec.bv_page = virt_to_page(range); 158 req->special_vec.bv_offset = offset_in_page(range); 159 req->special_vec.bv_len = sizeof(*range) * segments; 160 req->rq_flags |= RQF_SPECIAL_PAYLOAD; 161 162 return 0; 163 } 164 165 static inline void virtblk_request_done(struct request *req) 166 { 167 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 168 169 if (req->rq_flags & RQF_SPECIAL_PAYLOAD) 170 kfree(bvec_virt(&req->special_vec)); 171 blk_mq_end_request(req, virtblk_result(vbr)); 172 } 173 174 static void virtblk_done(struct virtqueue *vq) 175 { 176 struct virtio_blk *vblk = vq->vdev->priv; 177 bool req_done = false; 178 int qid = vq->index; 179 struct virtblk_req *vbr; 180 unsigned long flags; 181 unsigned int len; 182 183 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 184 do { 185 virtqueue_disable_cb(vq); 186 while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { 187 struct request *req = blk_mq_rq_from_pdu(vbr); 188 189 if (likely(!blk_should_fake_timeout(req->q))) 190 blk_mq_complete_request(req); 191 req_done = true; 192 } 193 if (unlikely(virtqueue_is_broken(vq))) 194 break; 195 } while (!virtqueue_enable_cb(vq)); 196 197 /* In case queue is stopped waiting for more buffers. */ 198 if (req_done) 199 blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); 200 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 201 } 202 203 static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) 204 { 205 struct virtio_blk *vblk = hctx->queue->queuedata; 206 struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num]; 207 bool kick; 208 209 spin_lock_irq(&vq->lock); 210 kick = virtqueue_kick_prepare(vq->vq); 211 spin_unlock_irq(&vq->lock); 212 213 if (kick) 214 virtqueue_notify(vq->vq); 215 } 216 217 static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, 218 const struct blk_mq_queue_data *bd) 219 { 220 struct virtio_blk *vblk = hctx->queue->queuedata; 221 struct request *req = bd->rq; 222 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 223 unsigned long flags; 224 unsigned int num; 225 int qid = hctx->queue_num; 226 int err; 227 bool notify = false; 228 bool unmap = false; 229 u32 type; 230 231 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); 232 233 switch (req_op(req)) { 234 case REQ_OP_READ: 235 case REQ_OP_WRITE: 236 type = 0; 237 break; 238 case REQ_OP_FLUSH: 239 type = VIRTIO_BLK_T_FLUSH; 240 break; 241 case REQ_OP_DISCARD: 242 type = VIRTIO_BLK_T_DISCARD; 243 break; 244 case REQ_OP_WRITE_ZEROES: 245 type = VIRTIO_BLK_T_WRITE_ZEROES; 246 unmap = !(req->cmd_flags & REQ_NOUNMAP); 247 break; 248 case REQ_OP_DRV_IN: 249 type = VIRTIO_BLK_T_GET_ID; 250 break; 251 default: 252 WARN_ON_ONCE(1); 253 return BLK_STS_IOERR; 254 } 255 256 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type); 257 vbr->out_hdr.sector = type ? 258 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req)); 259 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req)); 260 261 blk_mq_start_request(req); 262 263 if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { 264 err = virtblk_setup_discard_write_zeroes(req, unmap); 265 if (err) 266 return BLK_STS_RESOURCE; 267 } 268 269 num = blk_rq_map_sg(hctx->queue, req, vbr->sg); 270 if (num) { 271 if (rq_data_dir(req) == WRITE) 272 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT); 273 else 274 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN); 275 } 276 277 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 278 err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); 279 if (err) { 280 virtqueue_kick(vblk->vqs[qid].vq); 281 /* Don't stop the queue if -ENOMEM: we may have failed to 282 * bounce the buffer due to global resource outage. 283 */ 284 if (err == -ENOSPC) 285 blk_mq_stop_hw_queue(hctx); 286 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 287 switch (err) { 288 case -ENOSPC: 289 return BLK_STS_DEV_RESOURCE; 290 case -ENOMEM: 291 return BLK_STS_RESOURCE; 292 default: 293 return BLK_STS_IOERR; 294 } 295 } 296 297 if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) 298 notify = true; 299 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 300 301 if (notify) 302 virtqueue_notify(vblk->vqs[qid].vq); 303 return BLK_STS_OK; 304 } 305 306 /* return id (s/n) string for *disk to *id_str 307 */ 308 static int virtblk_get_id(struct gendisk *disk, char *id_str) 309 { 310 struct virtio_blk *vblk = disk->private_data; 311 struct request_queue *q = vblk->disk->queue; 312 struct request *req; 313 int err; 314 315 req = blk_get_request(q, REQ_OP_DRV_IN, 0); 316 if (IS_ERR(req)) 317 return PTR_ERR(req); 318 319 err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); 320 if (err) 321 goto out; 322 323 blk_execute_rq(vblk->disk, req, false); 324 err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req))); 325 out: 326 blk_put_request(req); 327 return err; 328 } 329 330 static void virtblk_get(struct virtio_blk *vblk) 331 { 332 refcount_inc(&vblk->refs); 333 } 334 335 static void virtblk_put(struct virtio_blk *vblk) 336 { 337 if (refcount_dec_and_test(&vblk->refs)) { 338 ida_simple_remove(&vd_index_ida, vblk->index); 339 mutex_destroy(&vblk->vdev_mutex); 340 kfree(vblk); 341 } 342 } 343 344 static int virtblk_open(struct block_device *bd, fmode_t mode) 345 { 346 struct virtio_blk *vblk = bd->bd_disk->private_data; 347 int ret = 0; 348 349 mutex_lock(&vblk->vdev_mutex); 350 351 if (vblk->vdev) 352 virtblk_get(vblk); 353 else 354 ret = -ENXIO; 355 356 mutex_unlock(&vblk->vdev_mutex); 357 return ret; 358 } 359 360 static void virtblk_release(struct gendisk *disk, fmode_t mode) 361 { 362 struct virtio_blk *vblk = disk->private_data; 363 364 virtblk_put(vblk); 365 } 366 367 /* We provide getgeo only to please some old bootloader/partitioning tools */ 368 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) 369 { 370 struct virtio_blk *vblk = bd->bd_disk->private_data; 371 int ret = 0; 372 373 mutex_lock(&vblk->vdev_mutex); 374 375 if (!vblk->vdev) { 376 ret = -ENXIO; 377 goto out; 378 } 379 380 /* see if the host passed in geometry config */ 381 if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) { 382 virtio_cread(vblk->vdev, struct virtio_blk_config, 383 geometry.cylinders, &geo->cylinders); 384 virtio_cread(vblk->vdev, struct virtio_blk_config, 385 geometry.heads, &geo->heads); 386 virtio_cread(vblk->vdev, struct virtio_blk_config, 387 geometry.sectors, &geo->sectors); 388 } else { 389 /* some standard values, similar to sd */ 390 geo->heads = 1 << 6; 391 geo->sectors = 1 << 5; 392 geo->cylinders = get_capacity(bd->bd_disk) >> 11; 393 } 394 out: 395 mutex_unlock(&vblk->vdev_mutex); 396 return ret; 397 } 398 399 static const struct block_device_operations virtblk_fops = { 400 .owner = THIS_MODULE, 401 .open = virtblk_open, 402 .release = virtblk_release, 403 .getgeo = virtblk_getgeo, 404 }; 405 406 static int index_to_minor(int index) 407 { 408 return index << PART_BITS; 409 } 410 411 static int minor_to_index(int minor) 412 { 413 return minor >> PART_BITS; 414 } 415 416 static ssize_t serial_show(struct device *dev, 417 struct device_attribute *attr, char *buf) 418 { 419 struct gendisk *disk = dev_to_disk(dev); 420 int err; 421 422 /* sysfs gives us a PAGE_SIZE buffer */ 423 BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES); 424 425 buf[VIRTIO_BLK_ID_BYTES] = '\0'; 426 err = virtblk_get_id(disk, buf); 427 if (!err) 428 return strlen(buf); 429 430 if (err == -EIO) /* Unsupported? Make it empty. */ 431 return 0; 432 433 return err; 434 } 435 436 static DEVICE_ATTR_RO(serial); 437 438 /* The queue's logical block size must be set before calling this */ 439 static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) 440 { 441 struct virtio_device *vdev = vblk->vdev; 442 struct request_queue *q = vblk->disk->queue; 443 char cap_str_2[10], cap_str_10[10]; 444 unsigned long long nblocks; 445 u64 capacity; 446 447 /* Host must always specify the capacity. */ 448 virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity); 449 450 nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9); 451 452 string_get_size(nblocks, queue_logical_block_size(q), 453 STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); 454 string_get_size(nblocks, queue_logical_block_size(q), 455 STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); 456 457 dev_notice(&vdev->dev, 458 "[%s] %s%llu %d-byte logical blocks (%s/%s)\n", 459 vblk->disk->disk_name, 460 resize ? "new size: " : "", 461 nblocks, 462 queue_logical_block_size(q), 463 cap_str_10, 464 cap_str_2); 465 466 set_capacity_and_notify(vblk->disk, capacity); 467 } 468 469 static void virtblk_config_changed_work(struct work_struct *work) 470 { 471 struct virtio_blk *vblk = 472 container_of(work, struct virtio_blk, config_work); 473 474 virtblk_update_capacity(vblk, true); 475 } 476 477 static void virtblk_config_changed(struct virtio_device *vdev) 478 { 479 struct virtio_blk *vblk = vdev->priv; 480 481 queue_work(virtblk_wq, &vblk->config_work); 482 } 483 484 static int init_vq(struct virtio_blk *vblk) 485 { 486 int err; 487 int i; 488 vq_callback_t **callbacks; 489 const char **names; 490 struct virtqueue **vqs; 491 unsigned short num_vqs; 492 struct virtio_device *vdev = vblk->vdev; 493 struct irq_affinity desc = { 0, }; 494 495 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, 496 struct virtio_blk_config, num_queues, 497 &num_vqs); 498 if (err) 499 num_vqs = 1; 500 501 num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs); 502 503 vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); 504 if (!vblk->vqs) 505 return -ENOMEM; 506 507 names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL); 508 callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL); 509 vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL); 510 if (!names || !callbacks || !vqs) { 511 err = -ENOMEM; 512 goto out; 513 } 514 515 for (i = 0; i < num_vqs; i++) { 516 callbacks[i] = virtblk_done; 517 snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); 518 names[i] = vblk->vqs[i].name; 519 } 520 521 /* Discover virtqueues and write information to configuration. */ 522 err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc); 523 if (err) 524 goto out; 525 526 for (i = 0; i < num_vqs; i++) { 527 spin_lock_init(&vblk->vqs[i].lock); 528 vblk->vqs[i].vq = vqs[i]; 529 } 530 vblk->num_vqs = num_vqs; 531 532 out: 533 kfree(vqs); 534 kfree(callbacks); 535 kfree(names); 536 if (err) 537 kfree(vblk->vqs); 538 return err; 539 } 540 541 /* 542 * Legacy naming scheme used for virtio devices. We are stuck with it for 543 * virtio blk but don't ever use it for any new driver. 544 */ 545 static int virtblk_name_format(char *prefix, int index, char *buf, int buflen) 546 { 547 const int base = 'z' - 'a' + 1; 548 char *begin = buf + strlen(prefix); 549 char *end = buf + buflen; 550 char *p; 551 int unit; 552 553 p = end - 1; 554 *p = '\0'; 555 unit = base; 556 do { 557 if (p == begin) 558 return -EINVAL; 559 *--p = 'a' + (index % unit); 560 index = (index / unit) - 1; 561 } while (index >= 0); 562 563 memmove(begin, p, end - p); 564 memcpy(buf, prefix, strlen(prefix)); 565 566 return 0; 567 } 568 569 static int virtblk_get_cache_mode(struct virtio_device *vdev) 570 { 571 u8 writeback; 572 int err; 573 574 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE, 575 struct virtio_blk_config, wce, 576 &writeback); 577 578 /* 579 * If WCE is not configurable and flush is not available, 580 * assume no writeback cache is in use. 581 */ 582 if (err) 583 writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH); 584 585 return writeback; 586 } 587 588 static void virtblk_update_cache_mode(struct virtio_device *vdev) 589 { 590 u8 writeback = virtblk_get_cache_mode(vdev); 591 struct virtio_blk *vblk = vdev->priv; 592 593 blk_queue_write_cache(vblk->disk->queue, writeback, false); 594 } 595 596 static const char *const virtblk_cache_types[] = { 597 "write through", "write back" 598 }; 599 600 static ssize_t 601 cache_type_store(struct device *dev, struct device_attribute *attr, 602 const char *buf, size_t count) 603 { 604 struct gendisk *disk = dev_to_disk(dev); 605 struct virtio_blk *vblk = disk->private_data; 606 struct virtio_device *vdev = vblk->vdev; 607 int i; 608 609 BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE)); 610 i = sysfs_match_string(virtblk_cache_types, buf); 611 if (i < 0) 612 return i; 613 614 virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i); 615 virtblk_update_cache_mode(vdev); 616 return count; 617 } 618 619 static ssize_t 620 cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) 621 { 622 struct gendisk *disk = dev_to_disk(dev); 623 struct virtio_blk *vblk = disk->private_data; 624 u8 writeback = virtblk_get_cache_mode(vblk->vdev); 625 626 BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types)); 627 return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]); 628 } 629 630 static DEVICE_ATTR_RW(cache_type); 631 632 static struct attribute *virtblk_attrs[] = { 633 &dev_attr_serial.attr, 634 &dev_attr_cache_type.attr, 635 NULL, 636 }; 637 638 static umode_t virtblk_attrs_are_visible(struct kobject *kobj, 639 struct attribute *a, int n) 640 { 641 struct device *dev = kobj_to_dev(kobj); 642 struct gendisk *disk = dev_to_disk(dev); 643 struct virtio_blk *vblk = disk->private_data; 644 struct virtio_device *vdev = vblk->vdev; 645 646 if (a == &dev_attr_cache_type.attr && 647 !virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) 648 return S_IRUGO; 649 650 return a->mode; 651 } 652 653 static const struct attribute_group virtblk_attr_group = { 654 .attrs = virtblk_attrs, 655 .is_visible = virtblk_attrs_are_visible, 656 }; 657 658 static const struct attribute_group *virtblk_attr_groups[] = { 659 &virtblk_attr_group, 660 NULL, 661 }; 662 663 static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, 664 unsigned int hctx_idx, unsigned int numa_node) 665 { 666 struct virtio_blk *vblk = set->driver_data; 667 struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); 668 669 sg_init_table(vbr->sg, vblk->sg_elems); 670 return 0; 671 } 672 673 static int virtblk_map_queues(struct blk_mq_tag_set *set) 674 { 675 struct virtio_blk *vblk = set->driver_data; 676 677 return blk_mq_virtio_map_queues(&set->map[HCTX_TYPE_DEFAULT], 678 vblk->vdev, 0); 679 } 680 681 static const struct blk_mq_ops virtio_mq_ops = { 682 .queue_rq = virtio_queue_rq, 683 .commit_rqs = virtio_commit_rqs, 684 .complete = virtblk_request_done, 685 .init_request = virtblk_init_request, 686 .map_queues = virtblk_map_queues, 687 }; 688 689 static unsigned int virtblk_queue_depth; 690 module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); 691 692 static int virtblk_probe(struct virtio_device *vdev) 693 { 694 struct virtio_blk *vblk; 695 struct request_queue *q; 696 int err, index; 697 698 u32 v, blk_size, max_size, sg_elems, opt_io_size; 699 u16 min_io_size; 700 u8 physical_block_exp, alignment_offset; 701 unsigned int queue_depth; 702 703 if (!vdev->config->get) { 704 dev_err(&vdev->dev, "%s failure: config access disabled\n", 705 __func__); 706 return -EINVAL; 707 } 708 709 err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS), 710 GFP_KERNEL); 711 if (err < 0) 712 goto out; 713 index = err; 714 715 /* We need to know how many segments before we allocate. */ 716 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX, 717 struct virtio_blk_config, seg_max, 718 &sg_elems); 719 720 /* We need at least one SG element, whatever they say. */ 721 if (err || !sg_elems) 722 sg_elems = 1; 723 724 /* Prevent integer overflows and honor max vq size */ 725 sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2); 726 727 /* We need extra sg elements at head and tail. */ 728 sg_elems += 2; 729 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); 730 if (!vblk) { 731 err = -ENOMEM; 732 goto out_free_index; 733 } 734 735 /* This reference is dropped in virtblk_remove(). */ 736 refcount_set(&vblk->refs, 1); 737 mutex_init(&vblk->vdev_mutex); 738 739 vblk->vdev = vdev; 740 vblk->sg_elems = sg_elems; 741 742 INIT_WORK(&vblk->config_work, virtblk_config_changed_work); 743 744 err = init_vq(vblk); 745 if (err) 746 goto out_free_vblk; 747 748 /* Default queue sizing is to fill the ring. */ 749 if (!virtblk_queue_depth) { 750 queue_depth = vblk->vqs[0].vq->num_free; 751 /* ... but without indirect descs, we use 2 descs per req */ 752 if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) 753 queue_depth /= 2; 754 } else { 755 queue_depth = virtblk_queue_depth; 756 } 757 758 memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); 759 vblk->tag_set.ops = &virtio_mq_ops; 760 vblk->tag_set.queue_depth = queue_depth; 761 vblk->tag_set.numa_node = NUMA_NO_NODE; 762 vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 763 vblk->tag_set.cmd_size = 764 sizeof(struct virtblk_req) + 765 sizeof(struct scatterlist) * sg_elems; 766 vblk->tag_set.driver_data = vblk; 767 vblk->tag_set.nr_hw_queues = vblk->num_vqs; 768 769 err = blk_mq_alloc_tag_set(&vblk->tag_set); 770 if (err) 771 goto out_free_vq; 772 773 vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, vblk); 774 if (IS_ERR(vblk->disk)) { 775 err = PTR_ERR(vblk->disk); 776 goto out_free_tags; 777 } 778 q = vblk->disk->queue; 779 780 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); 781 782 vblk->disk->major = major; 783 vblk->disk->first_minor = index_to_minor(index); 784 vblk->disk->minors = 1 << PART_BITS; 785 vblk->disk->private_data = vblk; 786 vblk->disk->fops = &virtblk_fops; 787 vblk->disk->flags |= GENHD_FL_EXT_DEVT; 788 vblk->index = index; 789 790 /* configure queue flush support */ 791 virtblk_update_cache_mode(vdev); 792 793 /* If disk is read-only in the host, the guest should obey */ 794 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) 795 set_disk_ro(vblk->disk, 1); 796 797 /* We can handle whatever the host told us to handle. */ 798 blk_queue_max_segments(q, vblk->sg_elems-2); 799 800 /* No real sector limit. */ 801 blk_queue_max_hw_sectors(q, -1U); 802 803 max_size = virtio_max_dma_size(vdev); 804 805 /* Host can optionally specify maximum segment size and number of 806 * segments. */ 807 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX, 808 struct virtio_blk_config, size_max, &v); 809 if (!err) 810 max_size = min(max_size, v); 811 812 blk_queue_max_segment_size(q, max_size); 813 814 /* Host can optionally specify the block size of the device */ 815 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, 816 struct virtio_blk_config, blk_size, 817 &blk_size); 818 if (!err) 819 blk_queue_logical_block_size(q, blk_size); 820 else 821 blk_size = queue_logical_block_size(q); 822 823 /* Use topology information if available */ 824 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 825 struct virtio_blk_config, physical_block_exp, 826 &physical_block_exp); 827 if (!err && physical_block_exp) 828 blk_queue_physical_block_size(q, 829 blk_size * (1 << physical_block_exp)); 830 831 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 832 struct virtio_blk_config, alignment_offset, 833 &alignment_offset); 834 if (!err && alignment_offset) 835 blk_queue_alignment_offset(q, blk_size * alignment_offset); 836 837 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 838 struct virtio_blk_config, min_io_size, 839 &min_io_size); 840 if (!err && min_io_size) 841 blk_queue_io_min(q, blk_size * min_io_size); 842 843 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 844 struct virtio_blk_config, opt_io_size, 845 &opt_io_size); 846 if (!err && opt_io_size) 847 blk_queue_io_opt(q, blk_size * opt_io_size); 848 849 if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { 850 q->limits.discard_granularity = blk_size; 851 852 virtio_cread(vdev, struct virtio_blk_config, 853 discard_sector_alignment, &v); 854 q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0; 855 856 virtio_cread(vdev, struct virtio_blk_config, 857 max_discard_sectors, &v); 858 blk_queue_max_discard_sectors(q, v ? v : UINT_MAX); 859 860 virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, 861 &v); 862 blk_queue_max_discard_segments(q, 863 min_not_zero(v, 864 MAX_DISCARD_SEGMENTS)); 865 866 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); 867 } 868 869 if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { 870 virtio_cread(vdev, struct virtio_blk_config, 871 max_write_zeroes_sectors, &v); 872 blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX); 873 } 874 875 virtblk_update_capacity(vblk, false); 876 virtio_device_ready(vdev); 877 878 err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); 879 if (err) 880 goto out_cleanup_disk; 881 882 return 0; 883 884 out_cleanup_disk: 885 blk_cleanup_disk(vblk->disk); 886 out_free_tags: 887 blk_mq_free_tag_set(&vblk->tag_set); 888 out_free_vq: 889 vdev->config->del_vqs(vdev); 890 kfree(vblk->vqs); 891 out_free_vblk: 892 kfree(vblk); 893 out_free_index: 894 ida_simple_remove(&vd_index_ida, index); 895 out: 896 return err; 897 } 898 899 static void virtblk_remove(struct virtio_device *vdev) 900 { 901 struct virtio_blk *vblk = vdev->priv; 902 903 /* Make sure no work handler is accessing the device. */ 904 flush_work(&vblk->config_work); 905 906 del_gendisk(vblk->disk); 907 blk_cleanup_disk(vblk->disk); 908 blk_mq_free_tag_set(&vblk->tag_set); 909 910 mutex_lock(&vblk->vdev_mutex); 911 912 /* Stop all the virtqueues. */ 913 vdev->config->reset(vdev); 914 915 /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */ 916 vblk->vdev = NULL; 917 918 vdev->config->del_vqs(vdev); 919 kfree(vblk->vqs); 920 921 mutex_unlock(&vblk->vdev_mutex); 922 923 virtblk_put(vblk); 924 } 925 926 #ifdef CONFIG_PM_SLEEP 927 static int virtblk_freeze(struct virtio_device *vdev) 928 { 929 struct virtio_blk *vblk = vdev->priv; 930 931 /* Ensure we don't receive any more interrupts */ 932 vdev->config->reset(vdev); 933 934 /* Make sure no work handler is accessing the device. */ 935 flush_work(&vblk->config_work); 936 937 blk_mq_quiesce_queue(vblk->disk->queue); 938 939 vdev->config->del_vqs(vdev); 940 kfree(vblk->vqs); 941 942 return 0; 943 } 944 945 static int virtblk_restore(struct virtio_device *vdev) 946 { 947 struct virtio_blk *vblk = vdev->priv; 948 int ret; 949 950 ret = init_vq(vdev->priv); 951 if (ret) 952 return ret; 953 954 virtio_device_ready(vdev); 955 956 blk_mq_unquiesce_queue(vblk->disk->queue); 957 return 0; 958 } 959 #endif 960 961 static const struct virtio_device_id id_table[] = { 962 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 963 { 0 }, 964 }; 965 966 static unsigned int features_legacy[] = { 967 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 968 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 969 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 970 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 971 } 972 ; 973 static unsigned int features[] = { 974 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 975 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 976 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 977 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 978 }; 979 980 static struct virtio_driver virtio_blk = { 981 .feature_table = features, 982 .feature_table_size = ARRAY_SIZE(features), 983 .feature_table_legacy = features_legacy, 984 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 985 .driver.name = KBUILD_MODNAME, 986 .driver.owner = THIS_MODULE, 987 .id_table = id_table, 988 .probe = virtblk_probe, 989 .remove = virtblk_remove, 990 .config_changed = virtblk_config_changed, 991 #ifdef CONFIG_PM_SLEEP 992 .freeze = virtblk_freeze, 993 .restore = virtblk_restore, 994 #endif 995 }; 996 997 static int __init init(void) 998 { 999 int error; 1000 1001 virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); 1002 if (!virtblk_wq) 1003 return -ENOMEM; 1004 1005 major = register_blkdev(0, "virtblk"); 1006 if (major < 0) { 1007 error = major; 1008 goto out_destroy_workqueue; 1009 } 1010 1011 error = register_virtio_driver(&virtio_blk); 1012 if (error) 1013 goto out_unregister_blkdev; 1014 return 0; 1015 1016 out_unregister_blkdev: 1017 unregister_blkdev(major, "virtblk"); 1018 out_destroy_workqueue: 1019 destroy_workqueue(virtblk_wq); 1020 return error; 1021 } 1022 1023 static void __exit fini(void) 1024 { 1025 unregister_virtio_driver(&virtio_blk); 1026 unregister_blkdev(major, "virtblk"); 1027 destroy_workqueue(virtblk_wq); 1028 } 1029 module_init(init); 1030 module_exit(fini); 1031 1032 MODULE_DEVICE_TABLE(virtio, id_table); 1033 MODULE_DESCRIPTION("Virtio block driver"); 1034 MODULE_LICENSE("GPL"); 1035