1 // SPDX-License-Identifier: GPL-2.0-only 2 //#define DEBUG 3 #include <linux/spinlock.h> 4 #include <linux/slab.h> 5 #include <linux/blkdev.h> 6 #include <linux/hdreg.h> 7 #include <linux/module.h> 8 #include <linux/mutex.h> 9 #include <linux/interrupt.h> 10 #include <linux/virtio.h> 11 #include <linux/virtio_blk.h> 12 #include <linux/scatterlist.h> 13 #include <linux/string_helpers.h> 14 #include <linux/idr.h> 15 #include <linux/blk-mq.h> 16 #include <linux/blk-mq-virtio.h> 17 #include <linux/numa.h> 18 #include <uapi/linux/virtio_ring.h> 19 20 #define PART_BITS 4 21 #define VQ_NAME_LEN 16 22 #define MAX_DISCARD_SEGMENTS 256u 23 24 /* The maximum number of sg elements that fit into a virtqueue */ 25 #define VIRTIO_BLK_MAX_SG_ELEMS 32768 26 27 #ifdef CONFIG_ARCH_NO_SG_CHAIN 28 #define VIRTIO_BLK_INLINE_SG_CNT 0 29 #else 30 #define VIRTIO_BLK_INLINE_SG_CNT 2 31 #endif 32 33 static unsigned int num_request_queues; 34 module_param(num_request_queues, uint, 0644); 35 MODULE_PARM_DESC(num_request_queues, 36 "Limit the number of request queues to use for blk device. " 37 "0 for no limit. " 38 "Values > nr_cpu_ids truncated to nr_cpu_ids."); 39 40 static int major; 41 static DEFINE_IDA(vd_index_ida); 42 43 static struct workqueue_struct *virtblk_wq; 44 45 struct virtio_blk_vq { 46 struct virtqueue *vq; 47 spinlock_t lock; 48 char name[VQ_NAME_LEN]; 49 } ____cacheline_aligned_in_smp; 50 51 struct virtio_blk { 52 /* 53 * This mutex must be held by anything that may run after 54 * virtblk_remove() sets vblk->vdev to NULL. 55 * 56 * blk-mq, virtqueue processing, and sysfs attribute code paths are 57 * shut down before vblk->vdev is set to NULL and therefore do not need 58 * to hold this mutex. 59 */ 60 struct mutex vdev_mutex; 61 struct virtio_device *vdev; 62 63 /* The disk structure for the kernel. */ 64 struct gendisk *disk; 65 66 /* Block layer tags. */ 67 struct blk_mq_tag_set tag_set; 68 69 /* Process context for config space updates */ 70 struct work_struct config_work; 71 72 /* 73 * Tracks references from block_device_operations open/release and 74 * virtio_driver probe/remove so this object can be freed once no 75 * longer in use. 76 */ 77 refcount_t refs; 78 79 /* Ida index - used to track minor number allocations. */ 80 int index; 81 82 /* num of vqs */ 83 int num_vqs; 84 struct virtio_blk_vq *vqs; 85 }; 86 87 struct virtblk_req { 88 struct virtio_blk_outhdr out_hdr; 89 u8 status; 90 struct sg_table sg_table; 91 struct scatterlist sg[]; 92 }; 93 94 static inline blk_status_t virtblk_result(struct virtblk_req *vbr) 95 { 96 switch (vbr->status) { 97 case VIRTIO_BLK_S_OK: 98 return BLK_STS_OK; 99 case VIRTIO_BLK_S_UNSUPP: 100 return BLK_STS_NOTSUPP; 101 default: 102 return BLK_STS_IOERR; 103 } 104 } 105 106 static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, 107 struct scatterlist *data_sg, bool have_data) 108 { 109 struct scatterlist hdr, status, *sgs[3]; 110 unsigned int num_out = 0, num_in = 0; 111 112 sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); 113 sgs[num_out++] = &hdr; 114 115 if (have_data) { 116 if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) 117 sgs[num_out++] = data_sg; 118 else 119 sgs[num_out + num_in++] = data_sg; 120 } 121 122 sg_init_one(&status, &vbr->status, sizeof(vbr->status)); 123 sgs[num_out + num_in++] = &status; 124 125 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); 126 } 127 128 static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap) 129 { 130 unsigned short segments = blk_rq_nr_discard_segments(req); 131 unsigned short n = 0; 132 struct virtio_blk_discard_write_zeroes *range; 133 struct bio *bio; 134 u32 flags = 0; 135 136 if (unmap) 137 flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP; 138 139 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); 140 if (!range) 141 return -ENOMEM; 142 143 /* 144 * Single max discard segment means multi-range discard isn't 145 * supported, and block layer only runs contiguity merge like 146 * normal RW request. So we can't reply on bio for retrieving 147 * each range info. 148 */ 149 if (queue_max_discard_segments(req->q) == 1) { 150 range[0].flags = cpu_to_le32(flags); 151 range[0].num_sectors = cpu_to_le32(blk_rq_sectors(req)); 152 range[0].sector = cpu_to_le64(blk_rq_pos(req)); 153 n = 1; 154 } else { 155 __rq_for_each_bio(bio, req) { 156 u64 sector = bio->bi_iter.bi_sector; 157 u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT; 158 159 range[n].flags = cpu_to_le32(flags); 160 range[n].num_sectors = cpu_to_le32(num_sectors); 161 range[n].sector = cpu_to_le64(sector); 162 n++; 163 } 164 } 165 166 WARN_ON_ONCE(n != segments); 167 168 req->special_vec.bv_page = virt_to_page(range); 169 req->special_vec.bv_offset = offset_in_page(range); 170 req->special_vec.bv_len = sizeof(*range) * segments; 171 req->rq_flags |= RQF_SPECIAL_PAYLOAD; 172 173 return 0; 174 } 175 176 static void virtblk_unmap_data(struct request *req, struct virtblk_req *vbr) 177 { 178 if (blk_rq_nr_phys_segments(req)) 179 sg_free_table_chained(&vbr->sg_table, 180 VIRTIO_BLK_INLINE_SG_CNT); 181 } 182 183 static int virtblk_map_data(struct blk_mq_hw_ctx *hctx, struct request *req, 184 struct virtblk_req *vbr) 185 { 186 int err; 187 188 if (!blk_rq_nr_phys_segments(req)) 189 return 0; 190 191 vbr->sg_table.sgl = vbr->sg; 192 err = sg_alloc_table_chained(&vbr->sg_table, 193 blk_rq_nr_phys_segments(req), 194 vbr->sg_table.sgl, 195 VIRTIO_BLK_INLINE_SG_CNT); 196 if (unlikely(err)) 197 return -ENOMEM; 198 199 return blk_rq_map_sg(hctx->queue, req, vbr->sg_table.sgl); 200 } 201 202 static void virtblk_cleanup_cmd(struct request *req) 203 { 204 if (req->rq_flags & RQF_SPECIAL_PAYLOAD) 205 kfree(bvec_virt(&req->special_vec)); 206 } 207 208 static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, 209 struct request *req, 210 struct virtblk_req *vbr) 211 { 212 bool unmap = false; 213 u32 type; 214 215 vbr->out_hdr.sector = 0; 216 217 switch (req_op(req)) { 218 case REQ_OP_READ: 219 type = VIRTIO_BLK_T_IN; 220 vbr->out_hdr.sector = cpu_to_virtio64(vdev, 221 blk_rq_pos(req)); 222 break; 223 case REQ_OP_WRITE: 224 type = VIRTIO_BLK_T_OUT; 225 vbr->out_hdr.sector = cpu_to_virtio64(vdev, 226 blk_rq_pos(req)); 227 break; 228 case REQ_OP_FLUSH: 229 type = VIRTIO_BLK_T_FLUSH; 230 break; 231 case REQ_OP_DISCARD: 232 type = VIRTIO_BLK_T_DISCARD; 233 break; 234 case REQ_OP_WRITE_ZEROES: 235 type = VIRTIO_BLK_T_WRITE_ZEROES; 236 unmap = !(req->cmd_flags & REQ_NOUNMAP); 237 break; 238 case REQ_OP_DRV_IN: 239 type = VIRTIO_BLK_T_GET_ID; 240 break; 241 default: 242 WARN_ON_ONCE(1); 243 return BLK_STS_IOERR; 244 } 245 246 vbr->out_hdr.type = cpu_to_virtio32(vdev, type); 247 vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); 248 249 if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { 250 if (virtblk_setup_discard_write_zeroes(req, unmap)) 251 return BLK_STS_RESOURCE; 252 } 253 254 return 0; 255 } 256 257 static inline void virtblk_request_done(struct request *req) 258 { 259 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 260 261 virtblk_unmap_data(req, vbr); 262 virtblk_cleanup_cmd(req); 263 blk_mq_end_request(req, virtblk_result(vbr)); 264 } 265 266 static void virtblk_done(struct virtqueue *vq) 267 { 268 struct virtio_blk *vblk = vq->vdev->priv; 269 bool req_done = false; 270 int qid = vq->index; 271 struct virtblk_req *vbr; 272 unsigned long flags; 273 unsigned int len; 274 275 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 276 do { 277 virtqueue_disable_cb(vq); 278 while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { 279 struct request *req = blk_mq_rq_from_pdu(vbr); 280 281 if (likely(!blk_should_fake_timeout(req->q))) 282 blk_mq_complete_request(req); 283 req_done = true; 284 } 285 if (unlikely(virtqueue_is_broken(vq))) 286 break; 287 } while (!virtqueue_enable_cb(vq)); 288 289 /* In case queue is stopped waiting for more buffers. */ 290 if (req_done) 291 blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); 292 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 293 } 294 295 static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) 296 { 297 struct virtio_blk *vblk = hctx->queue->queuedata; 298 struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num]; 299 bool kick; 300 301 spin_lock_irq(&vq->lock); 302 kick = virtqueue_kick_prepare(vq->vq); 303 spin_unlock_irq(&vq->lock); 304 305 if (kick) 306 virtqueue_notify(vq->vq); 307 } 308 309 static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, 310 const struct blk_mq_queue_data *bd) 311 { 312 struct virtio_blk *vblk = hctx->queue->queuedata; 313 struct request *req = bd->rq; 314 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 315 unsigned long flags; 316 int num; 317 int qid = hctx->queue_num; 318 bool notify = false; 319 blk_status_t status; 320 int err; 321 322 status = virtblk_setup_cmd(vblk->vdev, req, vbr); 323 if (unlikely(status)) 324 return status; 325 326 blk_mq_start_request(req); 327 328 num = virtblk_map_data(hctx, req, vbr); 329 if (unlikely(num < 0)) { 330 virtblk_cleanup_cmd(req); 331 return BLK_STS_RESOURCE; 332 } 333 334 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 335 err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg_table.sgl, num); 336 if (err) { 337 virtqueue_kick(vblk->vqs[qid].vq); 338 /* Don't stop the queue if -ENOMEM: we may have failed to 339 * bounce the buffer due to global resource outage. 340 */ 341 if (err == -ENOSPC) 342 blk_mq_stop_hw_queue(hctx); 343 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 344 virtblk_unmap_data(req, vbr); 345 virtblk_cleanup_cmd(req); 346 switch (err) { 347 case -ENOSPC: 348 return BLK_STS_DEV_RESOURCE; 349 case -ENOMEM: 350 return BLK_STS_RESOURCE; 351 default: 352 return BLK_STS_IOERR; 353 } 354 } 355 356 if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) 357 notify = true; 358 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 359 360 if (notify) 361 virtqueue_notify(vblk->vqs[qid].vq); 362 return BLK_STS_OK; 363 } 364 365 /* return id (s/n) string for *disk to *id_str 366 */ 367 static int virtblk_get_id(struct gendisk *disk, char *id_str) 368 { 369 struct virtio_blk *vblk = disk->private_data; 370 struct request_queue *q = vblk->disk->queue; 371 struct request *req; 372 int err; 373 374 req = blk_mq_alloc_request(q, REQ_OP_DRV_IN, 0); 375 if (IS_ERR(req)) 376 return PTR_ERR(req); 377 378 err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); 379 if (err) 380 goto out; 381 382 blk_execute_rq(req, false); 383 err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req))); 384 out: 385 blk_mq_free_request(req); 386 return err; 387 } 388 389 static void virtblk_get(struct virtio_blk *vblk) 390 { 391 refcount_inc(&vblk->refs); 392 } 393 394 static void virtblk_put(struct virtio_blk *vblk) 395 { 396 if (refcount_dec_and_test(&vblk->refs)) { 397 ida_simple_remove(&vd_index_ida, vblk->index); 398 mutex_destroy(&vblk->vdev_mutex); 399 kfree(vblk); 400 } 401 } 402 403 static int virtblk_open(struct block_device *bd, fmode_t mode) 404 { 405 struct virtio_blk *vblk = bd->bd_disk->private_data; 406 int ret = 0; 407 408 mutex_lock(&vblk->vdev_mutex); 409 410 if (vblk->vdev) 411 virtblk_get(vblk); 412 else 413 ret = -ENXIO; 414 415 mutex_unlock(&vblk->vdev_mutex); 416 return ret; 417 } 418 419 static void virtblk_release(struct gendisk *disk, fmode_t mode) 420 { 421 struct virtio_blk *vblk = disk->private_data; 422 423 virtblk_put(vblk); 424 } 425 426 /* We provide getgeo only to please some old bootloader/partitioning tools */ 427 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) 428 { 429 struct virtio_blk *vblk = bd->bd_disk->private_data; 430 int ret = 0; 431 432 mutex_lock(&vblk->vdev_mutex); 433 434 if (!vblk->vdev) { 435 ret = -ENXIO; 436 goto out; 437 } 438 439 /* see if the host passed in geometry config */ 440 if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) { 441 virtio_cread(vblk->vdev, struct virtio_blk_config, 442 geometry.cylinders, &geo->cylinders); 443 virtio_cread(vblk->vdev, struct virtio_blk_config, 444 geometry.heads, &geo->heads); 445 virtio_cread(vblk->vdev, struct virtio_blk_config, 446 geometry.sectors, &geo->sectors); 447 } else { 448 /* some standard values, similar to sd */ 449 geo->heads = 1 << 6; 450 geo->sectors = 1 << 5; 451 geo->cylinders = get_capacity(bd->bd_disk) >> 11; 452 } 453 out: 454 mutex_unlock(&vblk->vdev_mutex); 455 return ret; 456 } 457 458 static const struct block_device_operations virtblk_fops = { 459 .owner = THIS_MODULE, 460 .open = virtblk_open, 461 .release = virtblk_release, 462 .getgeo = virtblk_getgeo, 463 }; 464 465 static int index_to_minor(int index) 466 { 467 return index << PART_BITS; 468 } 469 470 static int minor_to_index(int minor) 471 { 472 return minor >> PART_BITS; 473 } 474 475 static ssize_t serial_show(struct device *dev, 476 struct device_attribute *attr, char *buf) 477 { 478 struct gendisk *disk = dev_to_disk(dev); 479 int err; 480 481 /* sysfs gives us a PAGE_SIZE buffer */ 482 BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES); 483 484 buf[VIRTIO_BLK_ID_BYTES] = '\0'; 485 err = virtblk_get_id(disk, buf); 486 if (!err) 487 return strlen(buf); 488 489 if (err == -EIO) /* Unsupported? Make it empty. */ 490 return 0; 491 492 return err; 493 } 494 495 static DEVICE_ATTR_RO(serial); 496 497 /* The queue's logical block size must be set before calling this */ 498 static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) 499 { 500 struct virtio_device *vdev = vblk->vdev; 501 struct request_queue *q = vblk->disk->queue; 502 char cap_str_2[10], cap_str_10[10]; 503 unsigned long long nblocks; 504 u64 capacity; 505 506 /* Host must always specify the capacity. */ 507 virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity); 508 509 nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9); 510 511 string_get_size(nblocks, queue_logical_block_size(q), 512 STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); 513 string_get_size(nblocks, queue_logical_block_size(q), 514 STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); 515 516 dev_notice(&vdev->dev, 517 "[%s] %s%llu %d-byte logical blocks (%s/%s)\n", 518 vblk->disk->disk_name, 519 resize ? "new size: " : "", 520 nblocks, 521 queue_logical_block_size(q), 522 cap_str_10, 523 cap_str_2); 524 525 set_capacity_and_notify(vblk->disk, capacity); 526 } 527 528 static void virtblk_config_changed_work(struct work_struct *work) 529 { 530 struct virtio_blk *vblk = 531 container_of(work, struct virtio_blk, config_work); 532 533 virtblk_update_capacity(vblk, true); 534 } 535 536 static void virtblk_config_changed(struct virtio_device *vdev) 537 { 538 struct virtio_blk *vblk = vdev->priv; 539 540 queue_work(virtblk_wq, &vblk->config_work); 541 } 542 543 static int init_vq(struct virtio_blk *vblk) 544 { 545 int err; 546 int i; 547 vq_callback_t **callbacks; 548 const char **names; 549 struct virtqueue **vqs; 550 unsigned short num_vqs; 551 struct virtio_device *vdev = vblk->vdev; 552 struct irq_affinity desc = { 0, }; 553 554 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, 555 struct virtio_blk_config, num_queues, 556 &num_vqs); 557 if (err) 558 num_vqs = 1; 559 if (!err && !num_vqs) { 560 dev_err(&vdev->dev, "MQ advertised but zero queues reported\n"); 561 return -EINVAL; 562 } 563 564 num_vqs = min_t(unsigned int, 565 min_not_zero(num_request_queues, nr_cpu_ids), 566 num_vqs); 567 568 vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); 569 if (!vblk->vqs) 570 return -ENOMEM; 571 572 names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL); 573 callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL); 574 vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL); 575 if (!names || !callbacks || !vqs) { 576 err = -ENOMEM; 577 goto out; 578 } 579 580 for (i = 0; i < num_vqs; i++) { 581 callbacks[i] = virtblk_done; 582 snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); 583 names[i] = vblk->vqs[i].name; 584 } 585 586 /* Discover virtqueues and write information to configuration. */ 587 err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc); 588 if (err) 589 goto out; 590 591 for (i = 0; i < num_vqs; i++) { 592 spin_lock_init(&vblk->vqs[i].lock); 593 vblk->vqs[i].vq = vqs[i]; 594 } 595 vblk->num_vqs = num_vqs; 596 597 out: 598 kfree(vqs); 599 kfree(callbacks); 600 kfree(names); 601 if (err) 602 kfree(vblk->vqs); 603 return err; 604 } 605 606 /* 607 * Legacy naming scheme used for virtio devices. We are stuck with it for 608 * virtio blk but don't ever use it for any new driver. 609 */ 610 static int virtblk_name_format(char *prefix, int index, char *buf, int buflen) 611 { 612 const int base = 'z' - 'a' + 1; 613 char *begin = buf + strlen(prefix); 614 char *end = buf + buflen; 615 char *p; 616 int unit; 617 618 p = end - 1; 619 *p = '\0'; 620 unit = base; 621 do { 622 if (p == begin) 623 return -EINVAL; 624 *--p = 'a' + (index % unit); 625 index = (index / unit) - 1; 626 } while (index >= 0); 627 628 memmove(begin, p, end - p); 629 memcpy(buf, prefix, strlen(prefix)); 630 631 return 0; 632 } 633 634 static int virtblk_get_cache_mode(struct virtio_device *vdev) 635 { 636 u8 writeback; 637 int err; 638 639 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE, 640 struct virtio_blk_config, wce, 641 &writeback); 642 643 /* 644 * If WCE is not configurable and flush is not available, 645 * assume no writeback cache is in use. 646 */ 647 if (err) 648 writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH); 649 650 return writeback; 651 } 652 653 static void virtblk_update_cache_mode(struct virtio_device *vdev) 654 { 655 u8 writeback = virtblk_get_cache_mode(vdev); 656 struct virtio_blk *vblk = vdev->priv; 657 658 blk_queue_write_cache(vblk->disk->queue, writeback, false); 659 } 660 661 static const char *const virtblk_cache_types[] = { 662 "write through", "write back" 663 }; 664 665 static ssize_t 666 cache_type_store(struct device *dev, struct device_attribute *attr, 667 const char *buf, size_t count) 668 { 669 struct gendisk *disk = dev_to_disk(dev); 670 struct virtio_blk *vblk = disk->private_data; 671 struct virtio_device *vdev = vblk->vdev; 672 int i; 673 674 BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE)); 675 i = sysfs_match_string(virtblk_cache_types, buf); 676 if (i < 0) 677 return i; 678 679 virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i); 680 virtblk_update_cache_mode(vdev); 681 return count; 682 } 683 684 static ssize_t 685 cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) 686 { 687 struct gendisk *disk = dev_to_disk(dev); 688 struct virtio_blk *vblk = disk->private_data; 689 u8 writeback = virtblk_get_cache_mode(vblk->vdev); 690 691 BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types)); 692 return sysfs_emit(buf, "%s\n", virtblk_cache_types[writeback]); 693 } 694 695 static DEVICE_ATTR_RW(cache_type); 696 697 static struct attribute *virtblk_attrs[] = { 698 &dev_attr_serial.attr, 699 &dev_attr_cache_type.attr, 700 NULL, 701 }; 702 703 static umode_t virtblk_attrs_are_visible(struct kobject *kobj, 704 struct attribute *a, int n) 705 { 706 struct device *dev = kobj_to_dev(kobj); 707 struct gendisk *disk = dev_to_disk(dev); 708 struct virtio_blk *vblk = disk->private_data; 709 struct virtio_device *vdev = vblk->vdev; 710 711 if (a == &dev_attr_cache_type.attr && 712 !virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) 713 return S_IRUGO; 714 715 return a->mode; 716 } 717 718 static const struct attribute_group virtblk_attr_group = { 719 .attrs = virtblk_attrs, 720 .is_visible = virtblk_attrs_are_visible, 721 }; 722 723 static const struct attribute_group *virtblk_attr_groups[] = { 724 &virtblk_attr_group, 725 NULL, 726 }; 727 728 static int virtblk_map_queues(struct blk_mq_tag_set *set) 729 { 730 struct virtio_blk *vblk = set->driver_data; 731 732 return blk_mq_virtio_map_queues(&set->map[HCTX_TYPE_DEFAULT], 733 vblk->vdev, 0); 734 } 735 736 static const struct blk_mq_ops virtio_mq_ops = { 737 .queue_rq = virtio_queue_rq, 738 .commit_rqs = virtio_commit_rqs, 739 .complete = virtblk_request_done, 740 .map_queues = virtblk_map_queues, 741 }; 742 743 static unsigned int virtblk_queue_depth; 744 module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); 745 746 static int virtblk_probe(struct virtio_device *vdev) 747 { 748 struct virtio_blk *vblk; 749 struct request_queue *q; 750 int err, index; 751 752 u32 v, blk_size, max_size, sg_elems, opt_io_size; 753 u16 min_io_size; 754 u8 physical_block_exp, alignment_offset; 755 unsigned int queue_depth; 756 757 if (!vdev->config->get) { 758 dev_err(&vdev->dev, "%s failure: config access disabled\n", 759 __func__); 760 return -EINVAL; 761 } 762 763 err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS), 764 GFP_KERNEL); 765 if (err < 0) 766 goto out; 767 index = err; 768 769 /* We need to know how many segments before we allocate. */ 770 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX, 771 struct virtio_blk_config, seg_max, 772 &sg_elems); 773 774 /* We need at least one SG element, whatever they say. */ 775 if (err || !sg_elems) 776 sg_elems = 1; 777 778 /* Prevent integer overflows and honor max vq size */ 779 sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2); 780 781 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); 782 if (!vblk) { 783 err = -ENOMEM; 784 goto out_free_index; 785 } 786 787 /* This reference is dropped in virtblk_remove(). */ 788 refcount_set(&vblk->refs, 1); 789 mutex_init(&vblk->vdev_mutex); 790 791 vblk->vdev = vdev; 792 793 INIT_WORK(&vblk->config_work, virtblk_config_changed_work); 794 795 err = init_vq(vblk); 796 if (err) 797 goto out_free_vblk; 798 799 /* Default queue sizing is to fill the ring. */ 800 if (!virtblk_queue_depth) { 801 queue_depth = vblk->vqs[0].vq->num_free; 802 /* ... but without indirect descs, we use 2 descs per req */ 803 if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) 804 queue_depth /= 2; 805 } else { 806 queue_depth = virtblk_queue_depth; 807 } 808 809 memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); 810 vblk->tag_set.ops = &virtio_mq_ops; 811 vblk->tag_set.queue_depth = queue_depth; 812 vblk->tag_set.numa_node = NUMA_NO_NODE; 813 vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 814 vblk->tag_set.cmd_size = 815 sizeof(struct virtblk_req) + 816 sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; 817 vblk->tag_set.driver_data = vblk; 818 vblk->tag_set.nr_hw_queues = vblk->num_vqs; 819 820 err = blk_mq_alloc_tag_set(&vblk->tag_set); 821 if (err) 822 goto out_free_vq; 823 824 vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, vblk); 825 if (IS_ERR(vblk->disk)) { 826 err = PTR_ERR(vblk->disk); 827 goto out_free_tags; 828 } 829 q = vblk->disk->queue; 830 831 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); 832 833 vblk->disk->major = major; 834 vblk->disk->first_minor = index_to_minor(index); 835 vblk->disk->minors = 1 << PART_BITS; 836 vblk->disk->private_data = vblk; 837 vblk->disk->fops = &virtblk_fops; 838 vblk->index = index; 839 840 /* configure queue flush support */ 841 virtblk_update_cache_mode(vdev); 842 843 /* If disk is read-only in the host, the guest should obey */ 844 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) 845 set_disk_ro(vblk->disk, 1); 846 847 /* We can handle whatever the host told us to handle. */ 848 blk_queue_max_segments(q, sg_elems); 849 850 /* No real sector limit. */ 851 blk_queue_max_hw_sectors(q, -1U); 852 853 max_size = virtio_max_dma_size(vdev); 854 855 /* Host can optionally specify maximum segment size and number of 856 * segments. */ 857 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX, 858 struct virtio_blk_config, size_max, &v); 859 if (!err) 860 max_size = min(max_size, v); 861 862 blk_queue_max_segment_size(q, max_size); 863 864 /* Host can optionally specify the block size of the device */ 865 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, 866 struct virtio_blk_config, blk_size, 867 &blk_size); 868 if (!err) { 869 err = blk_validate_block_size(blk_size); 870 if (err) { 871 dev_err(&vdev->dev, 872 "virtio_blk: invalid block size: 0x%x\n", 873 blk_size); 874 goto out_cleanup_disk; 875 } 876 877 blk_queue_logical_block_size(q, blk_size); 878 } else 879 blk_size = queue_logical_block_size(q); 880 881 /* Use topology information if available */ 882 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 883 struct virtio_blk_config, physical_block_exp, 884 &physical_block_exp); 885 if (!err && physical_block_exp) 886 blk_queue_physical_block_size(q, 887 blk_size * (1 << physical_block_exp)); 888 889 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 890 struct virtio_blk_config, alignment_offset, 891 &alignment_offset); 892 if (!err && alignment_offset) 893 blk_queue_alignment_offset(q, blk_size * alignment_offset); 894 895 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 896 struct virtio_blk_config, min_io_size, 897 &min_io_size); 898 if (!err && min_io_size) 899 blk_queue_io_min(q, blk_size * min_io_size); 900 901 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 902 struct virtio_blk_config, opt_io_size, 903 &opt_io_size); 904 if (!err && opt_io_size) 905 blk_queue_io_opt(q, blk_size * opt_io_size); 906 907 if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { 908 q->limits.discard_granularity = blk_size; 909 910 virtio_cread(vdev, struct virtio_blk_config, 911 discard_sector_alignment, &v); 912 q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0; 913 914 virtio_cread(vdev, struct virtio_blk_config, 915 max_discard_sectors, &v); 916 blk_queue_max_discard_sectors(q, v ? v : UINT_MAX); 917 918 virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, 919 &v); 920 921 /* 922 * max_discard_seg == 0 is out of spec but we always 923 * handled it. 924 */ 925 if (!v) 926 v = sg_elems; 927 blk_queue_max_discard_segments(q, 928 min(v, MAX_DISCARD_SEGMENTS)); 929 930 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); 931 } 932 933 if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { 934 virtio_cread(vdev, struct virtio_blk_config, 935 max_write_zeroes_sectors, &v); 936 blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX); 937 } 938 939 virtblk_update_capacity(vblk, false); 940 virtio_device_ready(vdev); 941 942 err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); 943 if (err) 944 goto out_cleanup_disk; 945 946 return 0; 947 948 out_cleanup_disk: 949 blk_cleanup_disk(vblk->disk); 950 out_free_tags: 951 blk_mq_free_tag_set(&vblk->tag_set); 952 out_free_vq: 953 vdev->config->del_vqs(vdev); 954 kfree(vblk->vqs); 955 out_free_vblk: 956 kfree(vblk); 957 out_free_index: 958 ida_simple_remove(&vd_index_ida, index); 959 out: 960 return err; 961 } 962 963 static void virtblk_remove(struct virtio_device *vdev) 964 { 965 struct virtio_blk *vblk = vdev->priv; 966 967 /* Make sure no work handler is accessing the device. */ 968 flush_work(&vblk->config_work); 969 970 del_gendisk(vblk->disk); 971 blk_cleanup_disk(vblk->disk); 972 blk_mq_free_tag_set(&vblk->tag_set); 973 974 mutex_lock(&vblk->vdev_mutex); 975 976 /* Stop all the virtqueues. */ 977 virtio_reset_device(vdev); 978 979 /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */ 980 vblk->vdev = NULL; 981 982 vdev->config->del_vqs(vdev); 983 kfree(vblk->vqs); 984 985 mutex_unlock(&vblk->vdev_mutex); 986 987 virtblk_put(vblk); 988 } 989 990 #ifdef CONFIG_PM_SLEEP 991 static int virtblk_freeze(struct virtio_device *vdev) 992 { 993 struct virtio_blk *vblk = vdev->priv; 994 995 /* Ensure we don't receive any more interrupts */ 996 virtio_reset_device(vdev); 997 998 /* Make sure no work handler is accessing the device. */ 999 flush_work(&vblk->config_work); 1000 1001 blk_mq_quiesce_queue(vblk->disk->queue); 1002 1003 vdev->config->del_vqs(vdev); 1004 kfree(vblk->vqs); 1005 1006 return 0; 1007 } 1008 1009 static int virtblk_restore(struct virtio_device *vdev) 1010 { 1011 struct virtio_blk *vblk = vdev->priv; 1012 int ret; 1013 1014 ret = init_vq(vdev->priv); 1015 if (ret) 1016 return ret; 1017 1018 virtio_device_ready(vdev); 1019 1020 blk_mq_unquiesce_queue(vblk->disk->queue); 1021 return 0; 1022 } 1023 #endif 1024 1025 static const struct virtio_device_id id_table[] = { 1026 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 1027 { 0 }, 1028 }; 1029 1030 static unsigned int features_legacy[] = { 1031 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 1032 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 1033 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 1034 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 1035 } 1036 ; 1037 static unsigned int features[] = { 1038 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 1039 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 1040 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 1041 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 1042 }; 1043 1044 static struct virtio_driver virtio_blk = { 1045 .feature_table = features, 1046 .feature_table_size = ARRAY_SIZE(features), 1047 .feature_table_legacy = features_legacy, 1048 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 1049 .driver.name = KBUILD_MODNAME, 1050 .driver.owner = THIS_MODULE, 1051 .id_table = id_table, 1052 .probe = virtblk_probe, 1053 .remove = virtblk_remove, 1054 .config_changed = virtblk_config_changed, 1055 #ifdef CONFIG_PM_SLEEP 1056 .freeze = virtblk_freeze, 1057 .restore = virtblk_restore, 1058 #endif 1059 }; 1060 1061 static int __init init(void) 1062 { 1063 int error; 1064 1065 virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); 1066 if (!virtblk_wq) 1067 return -ENOMEM; 1068 1069 major = register_blkdev(0, "virtblk"); 1070 if (major < 0) { 1071 error = major; 1072 goto out_destroy_workqueue; 1073 } 1074 1075 error = register_virtio_driver(&virtio_blk); 1076 if (error) 1077 goto out_unregister_blkdev; 1078 return 0; 1079 1080 out_unregister_blkdev: 1081 unregister_blkdev(major, "virtblk"); 1082 out_destroy_workqueue: 1083 destroy_workqueue(virtblk_wq); 1084 return error; 1085 } 1086 1087 static void __exit fini(void) 1088 { 1089 unregister_virtio_driver(&virtio_blk); 1090 unregister_blkdev(major, "virtblk"); 1091 destroy_workqueue(virtblk_wq); 1092 } 1093 module_init(init); 1094 module_exit(fini); 1095 1096 MODULE_DEVICE_TABLE(virtio, id_table); 1097 MODULE_DESCRIPTION("Virtio block driver"); 1098 MODULE_LICENSE("GPL"); 1099