1 //#define DEBUG 2 #include <linux/spinlock.h> 3 #include <linux/slab.h> 4 #include <linux/blkdev.h> 5 #include <linux/hdreg.h> 6 #include <linux/module.h> 7 #include <linux/mutex.h> 8 #include <linux/virtio.h> 9 #include <linux/virtio_blk.h> 10 #include <linux/scatterlist.h> 11 #include <linux/string_helpers.h> 12 #include <scsi/scsi_cmnd.h> 13 #include <linux/idr.h> 14 15 #define PART_BITS 4 16 17 static bool use_bio; 18 module_param(use_bio, bool, S_IRUGO); 19 20 static int major; 21 static DEFINE_IDA(vd_index_ida); 22 23 static struct workqueue_struct *virtblk_wq; 24 25 struct virtio_blk 26 { 27 struct virtio_device *vdev; 28 struct virtqueue *vq; 29 wait_queue_head_t queue_wait; 30 31 /* The disk structure for the kernel. */ 32 struct gendisk *disk; 33 34 mempool_t *pool; 35 36 /* Process context for config space updates */ 37 struct work_struct config_work; 38 39 /* Lock for config space updates */ 40 struct mutex config_lock; 41 42 /* enable config space updates */ 43 bool config_enable; 44 45 /* What host tells us, plus 2 for header & tailer. */ 46 unsigned int sg_elems; 47 48 /* Ida index - used to track minor number allocations. */ 49 int index; 50 51 /* Scatterlist: can be too big for stack. */ 52 struct scatterlist sg[/*sg_elems*/]; 53 }; 54 55 struct virtblk_req 56 { 57 struct request *req; 58 struct bio *bio; 59 struct virtio_blk_outhdr out_hdr; 60 struct virtio_scsi_inhdr in_hdr; 61 struct work_struct work; 62 struct virtio_blk *vblk; 63 int flags; 64 u8 status; 65 struct scatterlist sg[]; 66 }; 67 68 enum { 69 VBLK_IS_FLUSH = 1, 70 VBLK_REQ_FLUSH = 2, 71 VBLK_REQ_DATA = 4, 72 VBLK_REQ_FUA = 8, 73 }; 74 75 static inline int virtblk_result(struct virtblk_req *vbr) 76 { 77 switch (vbr->status) { 78 case VIRTIO_BLK_S_OK: 79 return 0; 80 case VIRTIO_BLK_S_UNSUPP: 81 return -ENOTTY; 82 default: 83 return -EIO; 84 } 85 } 86 87 static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk, 88 gfp_t gfp_mask) 89 { 90 struct virtblk_req *vbr; 91 92 vbr = mempool_alloc(vblk->pool, gfp_mask); 93 if (!vbr) 94 return NULL; 95 96 vbr->vblk = vblk; 97 if (use_bio) 98 sg_init_table(vbr->sg, vblk->sg_elems); 99 100 return vbr; 101 } 102 103 static int __virtblk_add_req(struct virtqueue *vq, 104 struct virtblk_req *vbr, 105 struct scatterlist *data_sg, 106 bool have_data) 107 { 108 struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6]; 109 unsigned int num_out = 0, num_in = 0; 110 int type = vbr->out_hdr.type & ~VIRTIO_BLK_T_OUT; 111 112 sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); 113 sgs[num_out++] = &hdr; 114 115 /* 116 * If this is a packet command we need a couple of additional headers. 117 * Behind the normal outhdr we put a segment with the scsi command 118 * block, and before the normal inhdr we put the sense data and the 119 * inhdr with additional status information. 120 */ 121 if (type == VIRTIO_BLK_T_SCSI_CMD) { 122 sg_init_one(&cmd, vbr->req->cmd, vbr->req->cmd_len); 123 sgs[num_out++] = &cmd; 124 } 125 126 if (have_data) { 127 if (vbr->out_hdr.type & VIRTIO_BLK_T_OUT) 128 sgs[num_out++] = data_sg; 129 else 130 sgs[num_out + num_in++] = data_sg; 131 } 132 133 if (type == VIRTIO_BLK_T_SCSI_CMD) { 134 sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE); 135 sgs[num_out + num_in++] = &sense; 136 sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr)); 137 sgs[num_out + num_in++] = &inhdr; 138 } 139 140 sg_init_one(&status, &vbr->status, sizeof(vbr->status)); 141 sgs[num_out + num_in++] = &status; 142 143 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); 144 } 145 146 static void virtblk_add_req(struct virtblk_req *vbr, bool have_data) 147 { 148 struct virtio_blk *vblk = vbr->vblk; 149 DEFINE_WAIT(wait); 150 int ret; 151 152 spin_lock_irq(vblk->disk->queue->queue_lock); 153 while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg, 154 have_data)) < 0)) { 155 prepare_to_wait_exclusive(&vblk->queue_wait, &wait, 156 TASK_UNINTERRUPTIBLE); 157 158 spin_unlock_irq(vblk->disk->queue->queue_lock); 159 io_schedule(); 160 spin_lock_irq(vblk->disk->queue->queue_lock); 161 162 finish_wait(&vblk->queue_wait, &wait); 163 } 164 165 virtqueue_kick(vblk->vq); 166 spin_unlock_irq(vblk->disk->queue->queue_lock); 167 } 168 169 static void virtblk_bio_send_flush(struct virtblk_req *vbr) 170 { 171 vbr->flags |= VBLK_IS_FLUSH; 172 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; 173 vbr->out_hdr.sector = 0; 174 vbr->out_hdr.ioprio = 0; 175 176 virtblk_add_req(vbr, false); 177 } 178 179 static void virtblk_bio_send_data(struct virtblk_req *vbr) 180 { 181 struct virtio_blk *vblk = vbr->vblk; 182 struct bio *bio = vbr->bio; 183 bool have_data; 184 185 vbr->flags &= ~VBLK_IS_FLUSH; 186 vbr->out_hdr.type = 0; 187 vbr->out_hdr.sector = bio->bi_sector; 188 vbr->out_hdr.ioprio = bio_prio(bio); 189 190 if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) { 191 have_data = true; 192 if (bio->bi_rw & REQ_WRITE) 193 vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; 194 else 195 vbr->out_hdr.type |= VIRTIO_BLK_T_IN; 196 } else 197 have_data = false; 198 199 virtblk_add_req(vbr, have_data); 200 } 201 202 static void virtblk_bio_send_data_work(struct work_struct *work) 203 { 204 struct virtblk_req *vbr; 205 206 vbr = container_of(work, struct virtblk_req, work); 207 208 virtblk_bio_send_data(vbr); 209 } 210 211 static void virtblk_bio_send_flush_work(struct work_struct *work) 212 { 213 struct virtblk_req *vbr; 214 215 vbr = container_of(work, struct virtblk_req, work); 216 217 virtblk_bio_send_flush(vbr); 218 } 219 220 static inline void virtblk_request_done(struct virtblk_req *vbr) 221 { 222 struct virtio_blk *vblk = vbr->vblk; 223 struct request *req = vbr->req; 224 int error = virtblk_result(vbr); 225 226 if (req->cmd_type == REQ_TYPE_BLOCK_PC) { 227 req->resid_len = vbr->in_hdr.residual; 228 req->sense_len = vbr->in_hdr.sense_len; 229 req->errors = vbr->in_hdr.errors; 230 } else if (req->cmd_type == REQ_TYPE_SPECIAL) { 231 req->errors = (error != 0); 232 } 233 234 __blk_end_request_all(req, error); 235 mempool_free(vbr, vblk->pool); 236 } 237 238 static inline void virtblk_bio_flush_done(struct virtblk_req *vbr) 239 { 240 struct virtio_blk *vblk = vbr->vblk; 241 242 if (vbr->flags & VBLK_REQ_DATA) { 243 /* Send out the actual write data */ 244 INIT_WORK(&vbr->work, virtblk_bio_send_data_work); 245 queue_work(virtblk_wq, &vbr->work); 246 } else { 247 bio_endio(vbr->bio, virtblk_result(vbr)); 248 mempool_free(vbr, vblk->pool); 249 } 250 } 251 252 static inline void virtblk_bio_data_done(struct virtblk_req *vbr) 253 { 254 struct virtio_blk *vblk = vbr->vblk; 255 256 if (unlikely(vbr->flags & VBLK_REQ_FUA)) { 257 /* Send out a flush before end the bio */ 258 vbr->flags &= ~VBLK_REQ_DATA; 259 INIT_WORK(&vbr->work, virtblk_bio_send_flush_work); 260 queue_work(virtblk_wq, &vbr->work); 261 } else { 262 bio_endio(vbr->bio, virtblk_result(vbr)); 263 mempool_free(vbr, vblk->pool); 264 } 265 } 266 267 static inline void virtblk_bio_done(struct virtblk_req *vbr) 268 { 269 if (unlikely(vbr->flags & VBLK_IS_FLUSH)) 270 virtblk_bio_flush_done(vbr); 271 else 272 virtblk_bio_data_done(vbr); 273 } 274 275 static void virtblk_done(struct virtqueue *vq) 276 { 277 struct virtio_blk *vblk = vq->vdev->priv; 278 bool bio_done = false, req_done = false; 279 struct virtblk_req *vbr; 280 unsigned long flags; 281 unsigned int len; 282 283 spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); 284 do { 285 virtqueue_disable_cb(vq); 286 while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { 287 if (vbr->bio) { 288 virtblk_bio_done(vbr); 289 bio_done = true; 290 } else { 291 virtblk_request_done(vbr); 292 req_done = true; 293 } 294 } 295 } while (!virtqueue_enable_cb(vq)); 296 /* In case queue is stopped waiting for more buffers. */ 297 if (req_done) 298 blk_start_queue(vblk->disk->queue); 299 spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); 300 301 if (bio_done) 302 wake_up(&vblk->queue_wait); 303 } 304 305 static bool do_req(struct request_queue *q, struct virtio_blk *vblk, 306 struct request *req) 307 { 308 unsigned int num; 309 struct virtblk_req *vbr; 310 311 vbr = virtblk_alloc_req(vblk, GFP_ATOMIC); 312 if (!vbr) 313 /* When another request finishes we'll try again. */ 314 return false; 315 316 vbr->req = req; 317 vbr->bio = NULL; 318 if (req->cmd_flags & REQ_FLUSH) { 319 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; 320 vbr->out_hdr.sector = 0; 321 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); 322 } else { 323 switch (req->cmd_type) { 324 case REQ_TYPE_FS: 325 vbr->out_hdr.type = 0; 326 vbr->out_hdr.sector = blk_rq_pos(vbr->req); 327 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); 328 break; 329 case REQ_TYPE_BLOCK_PC: 330 vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; 331 vbr->out_hdr.sector = 0; 332 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); 333 break; 334 case REQ_TYPE_SPECIAL: 335 vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID; 336 vbr->out_hdr.sector = 0; 337 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); 338 break; 339 default: 340 /* We don't put anything else in the queue. */ 341 BUG(); 342 } 343 } 344 345 num = blk_rq_map_sg(q, vbr->req, vblk->sg); 346 if (num) { 347 if (rq_data_dir(vbr->req) == WRITE) 348 vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; 349 else 350 vbr->out_hdr.type |= VIRTIO_BLK_T_IN; 351 } 352 353 if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) { 354 mempool_free(vbr, vblk->pool); 355 return false; 356 } 357 358 return true; 359 } 360 361 static void virtblk_request(struct request_queue *q) 362 { 363 struct virtio_blk *vblk = q->queuedata; 364 struct request *req; 365 unsigned int issued = 0; 366 367 while ((req = blk_peek_request(q)) != NULL) { 368 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); 369 370 /* If this request fails, stop queue and wait for something to 371 finish to restart it. */ 372 if (!do_req(q, vblk, req)) { 373 blk_stop_queue(q); 374 break; 375 } 376 blk_start_request(req); 377 issued++; 378 } 379 380 if (issued) 381 virtqueue_kick(vblk->vq); 382 } 383 384 static void virtblk_make_request(struct request_queue *q, struct bio *bio) 385 { 386 struct virtio_blk *vblk = q->queuedata; 387 struct virtblk_req *vbr; 388 389 BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems); 390 391 vbr = virtblk_alloc_req(vblk, GFP_NOIO); 392 if (!vbr) { 393 bio_endio(bio, -ENOMEM); 394 return; 395 } 396 397 vbr->bio = bio; 398 vbr->flags = 0; 399 if (bio->bi_rw & REQ_FLUSH) 400 vbr->flags |= VBLK_REQ_FLUSH; 401 if (bio->bi_rw & REQ_FUA) 402 vbr->flags |= VBLK_REQ_FUA; 403 if (bio->bi_size) 404 vbr->flags |= VBLK_REQ_DATA; 405 406 if (unlikely(vbr->flags & VBLK_REQ_FLUSH)) 407 virtblk_bio_send_flush(vbr); 408 else 409 virtblk_bio_send_data(vbr); 410 } 411 412 /* return id (s/n) string for *disk to *id_str 413 */ 414 static int virtblk_get_id(struct gendisk *disk, char *id_str) 415 { 416 struct virtio_blk *vblk = disk->private_data; 417 struct request *req; 418 struct bio *bio; 419 int err; 420 421 bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES, 422 GFP_KERNEL); 423 if (IS_ERR(bio)) 424 return PTR_ERR(bio); 425 426 req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL); 427 if (IS_ERR(req)) { 428 bio_put(bio); 429 return PTR_ERR(req); 430 } 431 432 req->cmd_type = REQ_TYPE_SPECIAL; 433 err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); 434 blk_put_request(req); 435 436 return err; 437 } 438 439 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, 440 unsigned int cmd, unsigned long data) 441 { 442 struct gendisk *disk = bdev->bd_disk; 443 struct virtio_blk *vblk = disk->private_data; 444 445 /* 446 * Only allow the generic SCSI ioctls if the host can support it. 447 */ 448 if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI)) 449 return -ENOTTY; 450 451 return scsi_cmd_blk_ioctl(bdev, mode, cmd, 452 (void __user *)data); 453 } 454 455 /* We provide getgeo only to please some old bootloader/partitioning tools */ 456 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) 457 { 458 struct virtio_blk *vblk = bd->bd_disk->private_data; 459 struct virtio_blk_geometry vgeo; 460 int err; 461 462 /* see if the host passed in geometry config */ 463 err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY, 464 offsetof(struct virtio_blk_config, geometry), 465 &vgeo); 466 467 if (!err) { 468 geo->heads = vgeo.heads; 469 geo->sectors = vgeo.sectors; 470 geo->cylinders = vgeo.cylinders; 471 } else { 472 /* some standard values, similar to sd */ 473 geo->heads = 1 << 6; 474 geo->sectors = 1 << 5; 475 geo->cylinders = get_capacity(bd->bd_disk) >> 11; 476 } 477 return 0; 478 } 479 480 static const struct block_device_operations virtblk_fops = { 481 .ioctl = virtblk_ioctl, 482 .owner = THIS_MODULE, 483 .getgeo = virtblk_getgeo, 484 }; 485 486 static int index_to_minor(int index) 487 { 488 return index << PART_BITS; 489 } 490 491 static int minor_to_index(int minor) 492 { 493 return minor >> PART_BITS; 494 } 495 496 static ssize_t virtblk_serial_show(struct device *dev, 497 struct device_attribute *attr, char *buf) 498 { 499 struct gendisk *disk = dev_to_disk(dev); 500 int err; 501 502 /* sysfs gives us a PAGE_SIZE buffer */ 503 BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES); 504 505 buf[VIRTIO_BLK_ID_BYTES] = '\0'; 506 err = virtblk_get_id(disk, buf); 507 if (!err) 508 return strlen(buf); 509 510 if (err == -EIO) /* Unsupported? Make it empty. */ 511 return 0; 512 513 return err; 514 } 515 DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL); 516 517 static void virtblk_config_changed_work(struct work_struct *work) 518 { 519 struct virtio_blk *vblk = 520 container_of(work, struct virtio_blk, config_work); 521 struct virtio_device *vdev = vblk->vdev; 522 struct request_queue *q = vblk->disk->queue; 523 char cap_str_2[10], cap_str_10[10]; 524 char *envp[] = { "RESIZE=1", NULL }; 525 u64 capacity, size; 526 527 mutex_lock(&vblk->config_lock); 528 if (!vblk->config_enable) 529 goto done; 530 531 /* Host must always specify the capacity. */ 532 vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), 533 &capacity, sizeof(capacity)); 534 535 /* If capacity is too big, truncate with warning. */ 536 if ((sector_t)capacity != capacity) { 537 dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", 538 (unsigned long long)capacity); 539 capacity = (sector_t)-1; 540 } 541 542 size = capacity * queue_logical_block_size(q); 543 string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); 544 string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); 545 546 dev_notice(&vdev->dev, 547 "new size: %llu %d-byte logical blocks (%s/%s)\n", 548 (unsigned long long)capacity, 549 queue_logical_block_size(q), 550 cap_str_10, cap_str_2); 551 552 set_capacity(vblk->disk, capacity); 553 revalidate_disk(vblk->disk); 554 kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp); 555 done: 556 mutex_unlock(&vblk->config_lock); 557 } 558 559 static void virtblk_config_changed(struct virtio_device *vdev) 560 { 561 struct virtio_blk *vblk = vdev->priv; 562 563 queue_work(virtblk_wq, &vblk->config_work); 564 } 565 566 static int init_vq(struct virtio_blk *vblk) 567 { 568 int err = 0; 569 570 /* We expect one virtqueue, for output. */ 571 vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests"); 572 if (IS_ERR(vblk->vq)) 573 err = PTR_ERR(vblk->vq); 574 575 return err; 576 } 577 578 /* 579 * Legacy naming scheme used for virtio devices. We are stuck with it for 580 * virtio blk but don't ever use it for any new driver. 581 */ 582 static int virtblk_name_format(char *prefix, int index, char *buf, int buflen) 583 { 584 const int base = 'z' - 'a' + 1; 585 char *begin = buf + strlen(prefix); 586 char *end = buf + buflen; 587 char *p; 588 int unit; 589 590 p = end - 1; 591 *p = '\0'; 592 unit = base; 593 do { 594 if (p == begin) 595 return -EINVAL; 596 *--p = 'a' + (index % unit); 597 index = (index / unit) - 1; 598 } while (index >= 0); 599 600 memmove(begin, p, end - p); 601 memcpy(buf, prefix, strlen(prefix)); 602 603 return 0; 604 } 605 606 static int virtblk_get_cache_mode(struct virtio_device *vdev) 607 { 608 u8 writeback; 609 int err; 610 611 err = virtio_config_val(vdev, VIRTIO_BLK_F_CONFIG_WCE, 612 offsetof(struct virtio_blk_config, wce), 613 &writeback); 614 if (err) 615 writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE); 616 617 return writeback; 618 } 619 620 static void virtblk_update_cache_mode(struct virtio_device *vdev) 621 { 622 u8 writeback = virtblk_get_cache_mode(vdev); 623 struct virtio_blk *vblk = vdev->priv; 624 625 if (writeback) 626 blk_queue_flush(vblk->disk->queue, REQ_FLUSH); 627 else 628 blk_queue_flush(vblk->disk->queue, 0); 629 630 revalidate_disk(vblk->disk); 631 } 632 633 static const char *const virtblk_cache_types[] = { 634 "write through", "write back" 635 }; 636 637 static ssize_t 638 virtblk_cache_type_store(struct device *dev, struct device_attribute *attr, 639 const char *buf, size_t count) 640 { 641 struct gendisk *disk = dev_to_disk(dev); 642 struct virtio_blk *vblk = disk->private_data; 643 struct virtio_device *vdev = vblk->vdev; 644 int i; 645 u8 writeback; 646 647 BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE)); 648 for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; ) 649 if (sysfs_streq(buf, virtblk_cache_types[i])) 650 break; 651 652 if (i < 0) 653 return -EINVAL; 654 655 writeback = i; 656 vdev->config->set(vdev, 657 offsetof(struct virtio_blk_config, wce), 658 &writeback, sizeof(writeback)); 659 660 virtblk_update_cache_mode(vdev); 661 return count; 662 } 663 664 static ssize_t 665 virtblk_cache_type_show(struct device *dev, struct device_attribute *attr, 666 char *buf) 667 { 668 struct gendisk *disk = dev_to_disk(dev); 669 struct virtio_blk *vblk = disk->private_data; 670 u8 writeback = virtblk_get_cache_mode(vblk->vdev); 671 672 BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types)); 673 return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]); 674 } 675 676 static const struct device_attribute dev_attr_cache_type_ro = 677 __ATTR(cache_type, S_IRUGO, 678 virtblk_cache_type_show, NULL); 679 static const struct device_attribute dev_attr_cache_type_rw = 680 __ATTR(cache_type, S_IRUGO|S_IWUSR, 681 virtblk_cache_type_show, virtblk_cache_type_store); 682 683 static int virtblk_probe(struct virtio_device *vdev) 684 { 685 struct virtio_blk *vblk; 686 struct request_queue *q; 687 int err, index; 688 int pool_size; 689 690 u64 cap; 691 u32 v, blk_size, sg_elems, opt_io_size; 692 u16 min_io_size; 693 u8 physical_block_exp, alignment_offset; 694 695 err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS), 696 GFP_KERNEL); 697 if (err < 0) 698 goto out; 699 index = err; 700 701 /* We need to know how many segments before we allocate. */ 702 err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, 703 offsetof(struct virtio_blk_config, seg_max), 704 &sg_elems); 705 706 /* We need at least one SG element, whatever they say. */ 707 if (err || !sg_elems) 708 sg_elems = 1; 709 710 /* We need an extra sg elements at head and tail. */ 711 sg_elems += 2; 712 vdev->priv = vblk = kmalloc(sizeof(*vblk) + 713 sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL); 714 if (!vblk) { 715 err = -ENOMEM; 716 goto out_free_index; 717 } 718 719 init_waitqueue_head(&vblk->queue_wait); 720 vblk->vdev = vdev; 721 vblk->sg_elems = sg_elems; 722 sg_init_table(vblk->sg, vblk->sg_elems); 723 mutex_init(&vblk->config_lock); 724 725 INIT_WORK(&vblk->config_work, virtblk_config_changed_work); 726 vblk->config_enable = true; 727 728 err = init_vq(vblk); 729 if (err) 730 goto out_free_vblk; 731 732 pool_size = sizeof(struct virtblk_req); 733 if (use_bio) 734 pool_size += sizeof(struct scatterlist) * sg_elems; 735 vblk->pool = mempool_create_kmalloc_pool(1, pool_size); 736 if (!vblk->pool) { 737 err = -ENOMEM; 738 goto out_free_vq; 739 } 740 741 /* FIXME: How many partitions? How long is a piece of string? */ 742 vblk->disk = alloc_disk(1 << PART_BITS); 743 if (!vblk->disk) { 744 err = -ENOMEM; 745 goto out_mempool; 746 } 747 748 q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL); 749 if (!q) { 750 err = -ENOMEM; 751 goto out_put_disk; 752 } 753 754 if (use_bio) 755 blk_queue_make_request(q, virtblk_make_request); 756 q->queuedata = vblk; 757 758 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); 759 760 vblk->disk->major = major; 761 vblk->disk->first_minor = index_to_minor(index); 762 vblk->disk->private_data = vblk; 763 vblk->disk->fops = &virtblk_fops; 764 vblk->disk->driverfs_dev = &vdev->dev; 765 vblk->index = index; 766 767 /* configure queue flush support */ 768 virtblk_update_cache_mode(vdev); 769 770 /* If disk is read-only in the host, the guest should obey */ 771 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) 772 set_disk_ro(vblk->disk, 1); 773 774 /* Host must always specify the capacity. */ 775 vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), 776 &cap, sizeof(cap)); 777 778 /* If capacity is too big, truncate with warning. */ 779 if ((sector_t)cap != cap) { 780 dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", 781 (unsigned long long)cap); 782 cap = (sector_t)-1; 783 } 784 set_capacity(vblk->disk, cap); 785 786 /* We can handle whatever the host told us to handle. */ 787 blk_queue_max_segments(q, vblk->sg_elems-2); 788 789 /* No need to bounce any requests */ 790 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); 791 792 /* No real sector limit. */ 793 blk_queue_max_hw_sectors(q, -1U); 794 795 /* Host can optionally specify maximum segment size and number of 796 * segments. */ 797 err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX, 798 offsetof(struct virtio_blk_config, size_max), 799 &v); 800 if (!err) 801 blk_queue_max_segment_size(q, v); 802 else 803 blk_queue_max_segment_size(q, -1U); 804 805 /* Host can optionally specify the block size of the device */ 806 err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE, 807 offsetof(struct virtio_blk_config, blk_size), 808 &blk_size); 809 if (!err) 810 blk_queue_logical_block_size(q, blk_size); 811 else 812 blk_size = queue_logical_block_size(q); 813 814 /* Use topology information if available */ 815 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, 816 offsetof(struct virtio_blk_config, physical_block_exp), 817 &physical_block_exp); 818 if (!err && physical_block_exp) 819 blk_queue_physical_block_size(q, 820 blk_size * (1 << physical_block_exp)); 821 822 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, 823 offsetof(struct virtio_blk_config, alignment_offset), 824 &alignment_offset); 825 if (!err && alignment_offset) 826 blk_queue_alignment_offset(q, blk_size * alignment_offset); 827 828 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, 829 offsetof(struct virtio_blk_config, min_io_size), 830 &min_io_size); 831 if (!err && min_io_size) 832 blk_queue_io_min(q, blk_size * min_io_size); 833 834 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, 835 offsetof(struct virtio_blk_config, opt_io_size), 836 &opt_io_size); 837 if (!err && opt_io_size) 838 blk_queue_io_opt(q, blk_size * opt_io_size); 839 840 add_disk(vblk->disk); 841 err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); 842 if (err) 843 goto out_del_disk; 844 845 if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) 846 err = device_create_file(disk_to_dev(vblk->disk), 847 &dev_attr_cache_type_rw); 848 else 849 err = device_create_file(disk_to_dev(vblk->disk), 850 &dev_attr_cache_type_ro); 851 if (err) 852 goto out_del_disk; 853 return 0; 854 855 out_del_disk: 856 del_gendisk(vblk->disk); 857 blk_cleanup_queue(vblk->disk->queue); 858 out_put_disk: 859 put_disk(vblk->disk); 860 out_mempool: 861 mempool_destroy(vblk->pool); 862 out_free_vq: 863 vdev->config->del_vqs(vdev); 864 out_free_vblk: 865 kfree(vblk); 866 out_free_index: 867 ida_simple_remove(&vd_index_ida, index); 868 out: 869 return err; 870 } 871 872 static void virtblk_remove(struct virtio_device *vdev) 873 { 874 struct virtio_blk *vblk = vdev->priv; 875 int index = vblk->index; 876 int refc; 877 878 /* Prevent config work handler from accessing the device. */ 879 mutex_lock(&vblk->config_lock); 880 vblk->config_enable = false; 881 mutex_unlock(&vblk->config_lock); 882 883 del_gendisk(vblk->disk); 884 blk_cleanup_queue(vblk->disk->queue); 885 886 /* Stop all the virtqueues. */ 887 vdev->config->reset(vdev); 888 889 flush_work(&vblk->config_work); 890 891 refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); 892 put_disk(vblk->disk); 893 mempool_destroy(vblk->pool); 894 vdev->config->del_vqs(vdev); 895 kfree(vblk); 896 897 /* Only free device id if we don't have any users */ 898 if (refc == 1) 899 ida_simple_remove(&vd_index_ida, index); 900 } 901 902 #ifdef CONFIG_PM 903 static int virtblk_freeze(struct virtio_device *vdev) 904 { 905 struct virtio_blk *vblk = vdev->priv; 906 907 /* Ensure we don't receive any more interrupts */ 908 vdev->config->reset(vdev); 909 910 /* Prevent config work handler from accessing the device. */ 911 mutex_lock(&vblk->config_lock); 912 vblk->config_enable = false; 913 mutex_unlock(&vblk->config_lock); 914 915 flush_work(&vblk->config_work); 916 917 spin_lock_irq(vblk->disk->queue->queue_lock); 918 blk_stop_queue(vblk->disk->queue); 919 spin_unlock_irq(vblk->disk->queue->queue_lock); 920 blk_sync_queue(vblk->disk->queue); 921 922 vdev->config->del_vqs(vdev); 923 return 0; 924 } 925 926 static int virtblk_restore(struct virtio_device *vdev) 927 { 928 struct virtio_blk *vblk = vdev->priv; 929 int ret; 930 931 vblk->config_enable = true; 932 ret = init_vq(vdev->priv); 933 if (!ret) { 934 spin_lock_irq(vblk->disk->queue->queue_lock); 935 blk_start_queue(vblk->disk->queue); 936 spin_unlock_irq(vblk->disk->queue->queue_lock); 937 } 938 return ret; 939 } 940 #endif 941 942 static const struct virtio_device_id id_table[] = { 943 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 944 { 0 }, 945 }; 946 947 static unsigned int features[] = { 948 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 949 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, 950 VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE 951 }; 952 953 static struct virtio_driver virtio_blk = { 954 .feature_table = features, 955 .feature_table_size = ARRAY_SIZE(features), 956 .driver.name = KBUILD_MODNAME, 957 .driver.owner = THIS_MODULE, 958 .id_table = id_table, 959 .probe = virtblk_probe, 960 .remove = virtblk_remove, 961 .config_changed = virtblk_config_changed, 962 #ifdef CONFIG_PM 963 .freeze = virtblk_freeze, 964 .restore = virtblk_restore, 965 #endif 966 }; 967 968 static int __init init(void) 969 { 970 int error; 971 972 virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); 973 if (!virtblk_wq) 974 return -ENOMEM; 975 976 major = register_blkdev(0, "virtblk"); 977 if (major < 0) { 978 error = major; 979 goto out_destroy_workqueue; 980 } 981 982 error = register_virtio_driver(&virtio_blk); 983 if (error) 984 goto out_unregister_blkdev; 985 return 0; 986 987 out_unregister_blkdev: 988 unregister_blkdev(major, "virtblk"); 989 out_destroy_workqueue: 990 destroy_workqueue(virtblk_wq); 991 return error; 992 } 993 994 static void __exit fini(void) 995 { 996 unregister_blkdev(major, "virtblk"); 997 unregister_virtio_driver(&virtio_blk); 998 destroy_workqueue(virtblk_wq); 999 } 1000 module_init(init); 1001 module_exit(fini); 1002 1003 MODULE_DEVICE_TABLE(virtio, id_table); 1004 MODULE_DESCRIPTION("Virtio block driver"); 1005 MODULE_LICENSE("GPL"); 1006