1 /* 2 * Virtio Block Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu-common.h" 15 #include "qemu/iov.h" 16 #include "qemu/error-report.h" 17 #include "trace.h" 18 #include "hw/block/block.h" 19 #include "sysemu/blockdev.h" 20 #include "hw/virtio/virtio-blk.h" 21 #include "dataplane/virtio-blk.h" 22 #include "migration/migration.h" 23 #include "block/scsi.h" 24 #ifdef __linux__ 25 # include <scsi/sg.h> 26 #endif 27 #include "hw/virtio/virtio-bus.h" 28 #include "hw/virtio/virtio-access.h" 29 30 VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) 31 { 32 VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); 33 req->dev = s; 34 req->qiov.size = 0; 35 req->next = NULL; 36 return req; 37 } 38 39 void virtio_blk_free_request(VirtIOBlockReq *req) 40 { 41 if (req) { 42 g_slice_free(VirtIOBlockReq, req); 43 } 44 } 45 46 static void virtio_blk_complete_request(VirtIOBlockReq *req, 47 unsigned char status) 48 { 49 VirtIOBlock *s = req->dev; 50 VirtIODevice *vdev = VIRTIO_DEVICE(s); 51 52 trace_virtio_blk_req_complete(req, status); 53 54 stb_p(&req->in->status, status); 55 virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); 56 virtio_notify(vdev, s->vq); 57 } 58 59 static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) 60 { 61 req->dev->complete_request(req, status); 62 } 63 64 static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, 65 bool is_read) 66 { 67 BlockErrorAction action = bdrv_get_error_action(req->dev->bs, is_read, error); 68 VirtIOBlock *s = req->dev; 69 70 if (action == BLOCK_ERROR_ACTION_STOP) { 71 req->next = s->rq; 72 s->rq = req; 73 } else if (action == BLOCK_ERROR_ACTION_REPORT) { 74 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); 75 block_acct_done(bdrv_get_stats(s->bs), &req->acct); 76 virtio_blk_free_request(req); 77 } 78 79 bdrv_error_action(s->bs, action, is_read, error); 80 return action != BLOCK_ERROR_ACTION_IGNORE; 81 } 82 83 static void virtio_blk_rw_complete(void *opaque, int ret) 84 { 85 VirtIOBlockReq *req = opaque; 86 87 trace_virtio_blk_rw_complete(req, ret); 88 89 if (ret) { 90 int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); 91 bool is_read = !(p & VIRTIO_BLK_T_OUT); 92 if (virtio_blk_handle_rw_error(req, -ret, is_read)) 93 return; 94 } 95 96 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); 97 block_acct_done(bdrv_get_stats(req->dev->bs), &req->acct); 98 virtio_blk_free_request(req); 99 } 100 101 static void virtio_blk_flush_complete(void *opaque, int ret) 102 { 103 VirtIOBlockReq *req = opaque; 104 105 if (ret) { 106 if (virtio_blk_handle_rw_error(req, -ret, 0)) { 107 return; 108 } 109 } 110 111 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); 112 block_acct_done(bdrv_get_stats(req->dev->bs), &req->acct); 113 virtio_blk_free_request(req); 114 } 115 116 static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s) 117 { 118 VirtIOBlockReq *req = virtio_blk_alloc_request(s); 119 120 if (!virtqueue_pop(s->vq, &req->elem)) { 121 virtio_blk_free_request(req); 122 return NULL; 123 } 124 125 return req; 126 } 127 128 int virtio_blk_handle_scsi_req(VirtIOBlock *blk, 129 VirtQueueElement *elem) 130 { 131 int status = VIRTIO_BLK_S_OK; 132 struct virtio_scsi_inhdr *scsi = NULL; 133 VirtIODevice *vdev = VIRTIO_DEVICE(blk); 134 135 #ifdef __linux__ 136 int i; 137 struct sg_io_hdr hdr; 138 #endif 139 140 /* 141 * We require at least one output segment each for the virtio_blk_outhdr 142 * and the SCSI command block. 143 * 144 * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr 145 * and the sense buffer pointer in the input segments. 146 */ 147 if (elem->out_num < 2 || elem->in_num < 3) { 148 status = VIRTIO_BLK_S_IOERR; 149 goto fail; 150 } 151 152 /* 153 * The scsi inhdr is placed in the second-to-last input segment, just 154 * before the regular inhdr. 155 */ 156 scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base; 157 158 if (!blk->blk.scsi) { 159 status = VIRTIO_BLK_S_UNSUPP; 160 goto fail; 161 } 162 163 /* 164 * No support for bidirection commands yet. 165 */ 166 if (elem->out_num > 2 && elem->in_num > 3) { 167 status = VIRTIO_BLK_S_UNSUPP; 168 goto fail; 169 } 170 171 #ifdef __linux__ 172 memset(&hdr, 0, sizeof(struct sg_io_hdr)); 173 hdr.interface_id = 'S'; 174 hdr.cmd_len = elem->out_sg[1].iov_len; 175 hdr.cmdp = elem->out_sg[1].iov_base; 176 hdr.dxfer_len = 0; 177 178 if (elem->out_num > 2) { 179 /* 180 * If there are more than the minimally required 2 output segments 181 * there is write payload starting from the third iovec. 182 */ 183 hdr.dxfer_direction = SG_DXFER_TO_DEV; 184 hdr.iovec_count = elem->out_num - 2; 185 186 for (i = 0; i < hdr.iovec_count; i++) 187 hdr.dxfer_len += elem->out_sg[i + 2].iov_len; 188 189 hdr.dxferp = elem->out_sg + 2; 190 191 } else if (elem->in_num > 3) { 192 /* 193 * If we have more than 3 input segments the guest wants to actually 194 * read data. 195 */ 196 hdr.dxfer_direction = SG_DXFER_FROM_DEV; 197 hdr.iovec_count = elem->in_num - 3; 198 for (i = 0; i < hdr.iovec_count; i++) 199 hdr.dxfer_len += elem->in_sg[i].iov_len; 200 201 hdr.dxferp = elem->in_sg; 202 } else { 203 /* 204 * Some SCSI commands don't actually transfer any data. 205 */ 206 hdr.dxfer_direction = SG_DXFER_NONE; 207 } 208 209 hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base; 210 hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len; 211 212 status = bdrv_ioctl(blk->bs, SG_IO, &hdr); 213 if (status) { 214 status = VIRTIO_BLK_S_UNSUPP; 215 goto fail; 216 } 217 218 /* 219 * From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi) 220 * clear the masked_status field [hence status gets cleared too, see 221 * block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED 222 * status has occurred. However they do set DRIVER_SENSE in driver_status 223 * field. Also a (sb_len_wr > 0) indicates there is a sense buffer. 224 */ 225 if (hdr.status == 0 && hdr.sb_len_wr > 0) { 226 hdr.status = CHECK_CONDITION; 227 } 228 229 virtio_stl_p(vdev, &scsi->errors, 230 hdr.status | (hdr.msg_status << 8) | 231 (hdr.host_status << 16) | (hdr.driver_status << 24)); 232 virtio_stl_p(vdev, &scsi->residual, hdr.resid); 233 virtio_stl_p(vdev, &scsi->sense_len, hdr.sb_len_wr); 234 virtio_stl_p(vdev, &scsi->data_len, hdr.dxfer_len); 235 236 return status; 237 #else 238 abort(); 239 #endif 240 241 fail: 242 /* Just put anything nonzero so that the ioctl fails in the guest. */ 243 if (scsi) { 244 virtio_stl_p(vdev, &scsi->errors, 255); 245 } 246 return status; 247 } 248 249 static void virtio_blk_handle_scsi(VirtIOBlockReq *req) 250 { 251 int status; 252 253 status = virtio_blk_handle_scsi_req(req->dev, &req->elem); 254 virtio_blk_req_complete(req, status); 255 virtio_blk_free_request(req); 256 } 257 258 void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb) 259 { 260 int i, ret; 261 262 if (!mrb->num_writes) { 263 return; 264 } 265 266 ret = bdrv_aio_multiwrite(bs, mrb->blkreq, mrb->num_writes); 267 if (ret != 0) { 268 for (i = 0; i < mrb->num_writes; i++) { 269 if (mrb->blkreq[i].error) { 270 virtio_blk_rw_complete(mrb->blkreq[i].opaque, -EIO); 271 } 272 } 273 } 274 275 mrb->num_writes = 0; 276 } 277 278 static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) 279 { 280 block_acct_start(bdrv_get_stats(req->dev->bs), &req->acct, 0, 281 BLOCK_ACCT_FLUSH); 282 283 /* 284 * Make sure all outstanding writes are posted to the backing device. 285 */ 286 virtio_submit_multiwrite(req->dev->bs, mrb); 287 bdrv_aio_flush(req->dev->bs, virtio_blk_flush_complete, req); 288 } 289 290 static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, 291 uint64_t sector, size_t size) 292 { 293 uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; 294 uint64_t total_sectors; 295 296 if (sector & dev->sector_mask) { 297 return false; 298 } 299 if (size % dev->conf->logical_block_size) { 300 return false; 301 } 302 bdrv_get_geometry(dev->bs, &total_sectors); 303 if (sector > total_sectors || nb_sectors > total_sectors - sector) { 304 return false; 305 } 306 return true; 307 } 308 309 static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb) 310 { 311 BlockRequest *blkreq; 312 uint64_t sector; 313 314 sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector); 315 316 trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512); 317 318 if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) { 319 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); 320 virtio_blk_free_request(req); 321 return; 322 } 323 324 block_acct_start(bdrv_get_stats(req->dev->bs), &req->acct, req->qiov.size, 325 BLOCK_ACCT_WRITE); 326 327 if (mrb->num_writes == 32) { 328 virtio_submit_multiwrite(req->dev->bs, mrb); 329 } 330 331 blkreq = &mrb->blkreq[mrb->num_writes]; 332 blkreq->sector = sector; 333 blkreq->nb_sectors = req->qiov.size / BDRV_SECTOR_SIZE; 334 blkreq->qiov = &req->qiov; 335 blkreq->cb = virtio_blk_rw_complete; 336 blkreq->opaque = req; 337 blkreq->error = 0; 338 339 mrb->num_writes++; 340 } 341 342 static void virtio_blk_handle_read(VirtIOBlockReq *req) 343 { 344 uint64_t sector; 345 346 sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector); 347 348 trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512); 349 350 if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) { 351 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); 352 virtio_blk_free_request(req); 353 return; 354 } 355 356 block_acct_start(bdrv_get_stats(req->dev->bs), &req->acct, req->qiov.size, 357 BLOCK_ACCT_READ); 358 bdrv_aio_readv(req->dev->bs, sector, &req->qiov, 359 req->qiov.size / BDRV_SECTOR_SIZE, 360 virtio_blk_rw_complete, req); 361 } 362 363 void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) 364 { 365 uint32_t type; 366 struct iovec *in_iov = req->elem.in_sg; 367 struct iovec *iov = req->elem.out_sg; 368 unsigned in_num = req->elem.in_num; 369 unsigned out_num = req->elem.out_num; 370 371 if (req->elem.out_num < 1 || req->elem.in_num < 1) { 372 error_report("virtio-blk missing headers"); 373 exit(1); 374 } 375 376 if (unlikely(iov_to_buf(iov, out_num, 0, &req->out, 377 sizeof(req->out)) != sizeof(req->out))) { 378 error_report("virtio-blk request outhdr too short"); 379 exit(1); 380 } 381 382 iov_discard_front(&iov, &out_num, sizeof(req->out)); 383 384 if (in_num < 1 || 385 in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { 386 error_report("virtio-blk request inhdr too short"); 387 exit(1); 388 } 389 390 req->in = (void *)in_iov[in_num - 1].iov_base 391 + in_iov[in_num - 1].iov_len 392 - sizeof(struct virtio_blk_inhdr); 393 iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr)); 394 395 type = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); 396 397 if (type & VIRTIO_BLK_T_FLUSH) { 398 virtio_blk_handle_flush(req, mrb); 399 } else if (type & VIRTIO_BLK_T_SCSI_CMD) { 400 virtio_blk_handle_scsi(req); 401 } else if (type & VIRTIO_BLK_T_GET_ID) { 402 VirtIOBlock *s = req->dev; 403 404 /* 405 * NB: per existing s/n string convention the string is 406 * terminated by '\0' only when shorter than buffer. 407 */ 408 const char *serial = s->blk.serial ? s->blk.serial : ""; 409 size_t size = MIN(strlen(serial) + 1, 410 MIN(iov_size(in_iov, in_num), 411 VIRTIO_BLK_ID_BYTES)); 412 iov_from_buf(in_iov, in_num, 0, serial, size); 413 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); 414 virtio_blk_free_request(req); 415 } else if (type & VIRTIO_BLK_T_OUT) { 416 qemu_iovec_init_external(&req->qiov, iov, out_num); 417 virtio_blk_handle_write(req, mrb); 418 } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) { 419 /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */ 420 qemu_iovec_init_external(&req->qiov, in_iov, in_num); 421 virtio_blk_handle_read(req); 422 } else { 423 virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); 424 virtio_blk_free_request(req); 425 } 426 } 427 428 static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) 429 { 430 VirtIOBlock *s = VIRTIO_BLK(vdev); 431 VirtIOBlockReq *req; 432 MultiReqBuffer mrb = { 433 .num_writes = 0, 434 }; 435 436 /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start 437 * dataplane here instead of waiting for .set_status(). 438 */ 439 if (s->dataplane) { 440 virtio_blk_data_plane_start(s->dataplane); 441 return; 442 } 443 444 while ((req = virtio_blk_get_request(s))) { 445 virtio_blk_handle_request(req, &mrb); 446 } 447 448 virtio_submit_multiwrite(s->bs, &mrb); 449 450 /* 451 * FIXME: Want to check for completions before returning to guest mode, 452 * so cached reads and writes are reported as quickly as possible. But 453 * that should be done in the generic block layer. 454 */ 455 } 456 457 static void virtio_blk_dma_restart_bh(void *opaque) 458 { 459 VirtIOBlock *s = opaque; 460 VirtIOBlockReq *req = s->rq; 461 MultiReqBuffer mrb = { 462 .num_writes = 0, 463 }; 464 465 qemu_bh_delete(s->bh); 466 s->bh = NULL; 467 468 s->rq = NULL; 469 470 while (req) { 471 VirtIOBlockReq *next = req->next; 472 virtio_blk_handle_request(req, &mrb); 473 req = next; 474 } 475 476 virtio_submit_multiwrite(s->bs, &mrb); 477 } 478 479 static void virtio_blk_dma_restart_cb(void *opaque, int running, 480 RunState state) 481 { 482 VirtIOBlock *s = opaque; 483 484 if (!running) { 485 return; 486 } 487 488 if (!s->bh) { 489 s->bh = aio_bh_new(bdrv_get_aio_context(s->blk.conf.bs), 490 virtio_blk_dma_restart_bh, s); 491 qemu_bh_schedule(s->bh); 492 } 493 } 494 495 static void virtio_blk_reset(VirtIODevice *vdev) 496 { 497 VirtIOBlock *s = VIRTIO_BLK(vdev); 498 499 if (s->dataplane) { 500 virtio_blk_data_plane_stop(s->dataplane); 501 } 502 503 /* 504 * This should cancel pending requests, but can't do nicely until there 505 * are per-device request lists. 506 */ 507 bdrv_drain_all(); 508 bdrv_set_enable_write_cache(s->bs, s->original_wce); 509 } 510 511 /* coalesce internal state, copy to pci i/o region 0 512 */ 513 static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) 514 { 515 VirtIOBlock *s = VIRTIO_BLK(vdev); 516 struct virtio_blk_config blkcfg; 517 uint64_t capacity; 518 int blk_size = s->conf->logical_block_size; 519 520 bdrv_get_geometry(s->bs, &capacity); 521 memset(&blkcfg, 0, sizeof(blkcfg)); 522 virtio_stq_p(vdev, &blkcfg.capacity, capacity); 523 virtio_stl_p(vdev, &blkcfg.seg_max, 128 - 2); 524 virtio_stw_p(vdev, &blkcfg.cylinders, s->conf->cyls); 525 virtio_stl_p(vdev, &blkcfg.blk_size, blk_size); 526 virtio_stw_p(vdev, &blkcfg.min_io_size, s->conf->min_io_size / blk_size); 527 virtio_stw_p(vdev, &blkcfg.opt_io_size, s->conf->opt_io_size / blk_size); 528 blkcfg.heads = s->conf->heads; 529 /* 530 * We must ensure that the block device capacity is a multiple of 531 * the logical block size. If that is not the case, let's use 532 * sector_mask to adopt the geometry to have a correct picture. 533 * For those devices where the capacity is ok for the given geometry 534 * we don't touch the sector value of the geometry, since some devices 535 * (like s390 dasd) need a specific value. Here the capacity is already 536 * cyls*heads*secs*blk_size and the sector value is not block size 537 * divided by 512 - instead it is the amount of blk_size blocks 538 * per track (cylinder). 539 */ 540 if (bdrv_getlength(s->bs) / s->conf->heads / s->conf->secs % blk_size) { 541 blkcfg.sectors = s->conf->secs & ~s->sector_mask; 542 } else { 543 blkcfg.sectors = s->conf->secs; 544 } 545 blkcfg.size_max = 0; 546 blkcfg.physical_block_exp = get_physical_block_exp(s->conf); 547 blkcfg.alignment_offset = 0; 548 blkcfg.wce = bdrv_enable_write_cache(s->bs); 549 memcpy(config, &blkcfg, sizeof(struct virtio_blk_config)); 550 } 551 552 static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) 553 { 554 VirtIOBlock *s = VIRTIO_BLK(vdev); 555 struct virtio_blk_config blkcfg; 556 557 memcpy(&blkcfg, config, sizeof(blkcfg)); 558 559 aio_context_acquire(bdrv_get_aio_context(s->bs)); 560 bdrv_set_enable_write_cache(s->bs, blkcfg.wce != 0); 561 aio_context_release(bdrv_get_aio_context(s->bs)); 562 } 563 564 static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features) 565 { 566 VirtIOBlock *s = VIRTIO_BLK(vdev); 567 568 features |= (1 << VIRTIO_BLK_F_SEG_MAX); 569 features |= (1 << VIRTIO_BLK_F_GEOMETRY); 570 features |= (1 << VIRTIO_BLK_F_TOPOLOGY); 571 features |= (1 << VIRTIO_BLK_F_BLK_SIZE); 572 features |= (1 << VIRTIO_BLK_F_SCSI); 573 574 if (s->blk.config_wce) { 575 features |= (1 << VIRTIO_BLK_F_CONFIG_WCE); 576 } 577 if (bdrv_enable_write_cache(s->bs)) 578 features |= (1 << VIRTIO_BLK_F_WCE); 579 580 if (bdrv_is_read_only(s->bs)) 581 features |= 1 << VIRTIO_BLK_F_RO; 582 583 return features; 584 } 585 586 static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) 587 { 588 VirtIOBlock *s = VIRTIO_BLK(vdev); 589 uint32_t features; 590 591 if (s->dataplane && !(status & (VIRTIO_CONFIG_S_DRIVER | 592 VIRTIO_CONFIG_S_DRIVER_OK))) { 593 virtio_blk_data_plane_stop(s->dataplane); 594 } 595 596 if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { 597 return; 598 } 599 600 features = vdev->guest_features; 601 602 /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send 603 * cache flushes. Thus, the "auto writethrough" behavior is never 604 * necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature. 605 * Leaving it enabled would break the following sequence: 606 * 607 * Guest started with "-drive cache=writethrough" 608 * Guest sets status to 0 609 * Guest sets DRIVER bit in status field 610 * Guest reads host features (WCE=0, CONFIG_WCE=1) 611 * Guest writes guest features (WCE=0, CONFIG_WCE=1) 612 * Guest writes 1 to the WCE configuration field (writeback mode) 613 * Guest sets DRIVER_OK bit in status field 614 * 615 * s->bs would erroneously be placed in writethrough mode. 616 */ 617 if (!(features & (1 << VIRTIO_BLK_F_CONFIG_WCE))) { 618 aio_context_acquire(bdrv_get_aio_context(s->bs)); 619 bdrv_set_enable_write_cache(s->bs, 620 !!(features & (1 << VIRTIO_BLK_F_WCE))); 621 aio_context_release(bdrv_get_aio_context(s->bs)); 622 } 623 } 624 625 static void virtio_blk_save(QEMUFile *f, void *opaque) 626 { 627 VirtIODevice *vdev = VIRTIO_DEVICE(opaque); 628 629 virtio_save(vdev, f); 630 } 631 632 static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) 633 { 634 VirtIOBlock *s = VIRTIO_BLK(vdev); 635 VirtIOBlockReq *req = s->rq; 636 637 while (req) { 638 qemu_put_sbyte(f, 1); 639 qemu_put_buffer(f, (unsigned char *)&req->elem, 640 sizeof(VirtQueueElement)); 641 req = req->next; 642 } 643 qemu_put_sbyte(f, 0); 644 } 645 646 static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) 647 { 648 VirtIOBlock *s = opaque; 649 VirtIODevice *vdev = VIRTIO_DEVICE(s); 650 651 if (version_id != 2) 652 return -EINVAL; 653 654 return virtio_load(vdev, f, version_id); 655 } 656 657 static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, 658 int version_id) 659 { 660 VirtIOBlock *s = VIRTIO_BLK(vdev); 661 662 while (qemu_get_sbyte(f)) { 663 VirtIOBlockReq *req = virtio_blk_alloc_request(s); 664 qemu_get_buffer(f, (unsigned char *)&req->elem, 665 sizeof(VirtQueueElement)); 666 req->next = s->rq; 667 s->rq = req; 668 669 virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr, 670 req->elem.in_num, 1); 671 virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr, 672 req->elem.out_num, 0); 673 } 674 675 return 0; 676 } 677 678 static void virtio_blk_resize(void *opaque) 679 { 680 VirtIODevice *vdev = VIRTIO_DEVICE(opaque); 681 682 virtio_notify_config(vdev); 683 } 684 685 static const BlockDevOps virtio_block_ops = { 686 .resize_cb = virtio_blk_resize, 687 }; 688 689 /* Disable dataplane thread during live migration since it does not 690 * update the dirty memory bitmap yet. 691 */ 692 static void virtio_blk_migration_state_changed(Notifier *notifier, void *data) 693 { 694 VirtIOBlock *s = container_of(notifier, VirtIOBlock, 695 migration_state_notifier); 696 MigrationState *mig = data; 697 Error *err = NULL; 698 699 if (migration_in_setup(mig)) { 700 if (!s->dataplane) { 701 return; 702 } 703 virtio_blk_data_plane_destroy(s->dataplane); 704 s->dataplane = NULL; 705 } else if (migration_has_finished(mig) || 706 migration_has_failed(mig)) { 707 if (s->dataplane) { 708 return; 709 } 710 bdrv_drain_all(); /* complete in-flight non-dataplane requests */ 711 virtio_blk_data_plane_create(VIRTIO_DEVICE(s), &s->blk, 712 &s->dataplane, &err); 713 if (err != NULL) { 714 error_report("%s", error_get_pretty(err)); 715 error_free(err); 716 } 717 } 718 } 719 720 static void virtio_blk_device_realize(DeviceState *dev, Error **errp) 721 { 722 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 723 VirtIOBlock *s = VIRTIO_BLK(dev); 724 VirtIOBlkConf *blk = &(s->blk); 725 Error *err = NULL; 726 static int virtio_blk_id; 727 728 if (!blk->conf.bs) { 729 error_setg(errp, "drive property not set"); 730 return; 731 } 732 if (!bdrv_is_inserted(blk->conf.bs)) { 733 error_setg(errp, "Device needs media, but drive is empty"); 734 return; 735 } 736 737 blkconf_serial(&blk->conf, &blk->serial); 738 s->original_wce = bdrv_enable_write_cache(blk->conf.bs); 739 blkconf_geometry(&blk->conf, NULL, 65535, 255, 255, &err); 740 if (err) { 741 error_propagate(errp, err); 742 return; 743 } 744 745 virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, 746 sizeof(struct virtio_blk_config)); 747 748 s->bs = blk->conf.bs; 749 s->conf = &blk->conf; 750 s->rq = NULL; 751 s->sector_mask = (s->conf->logical_block_size / BDRV_SECTOR_SIZE) - 1; 752 753 s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output); 754 s->complete_request = virtio_blk_complete_request; 755 virtio_blk_data_plane_create(vdev, blk, &s->dataplane, &err); 756 if (err != NULL) { 757 error_propagate(errp, err); 758 virtio_cleanup(vdev); 759 return; 760 } 761 s->migration_state_notifier.notify = virtio_blk_migration_state_changed; 762 add_migration_state_change_notifier(&s->migration_state_notifier); 763 764 s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); 765 register_savevm(dev, "virtio-blk", virtio_blk_id++, 2, 766 virtio_blk_save, virtio_blk_load, s); 767 bdrv_set_dev_ops(s->bs, &virtio_block_ops, s); 768 bdrv_set_guest_block_size(s->bs, s->conf->logical_block_size); 769 770 bdrv_iostatus_enable(s->bs); 771 772 add_boot_device_path(s->conf->bootindex, dev, "/disk@0,0"); 773 } 774 775 static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp) 776 { 777 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 778 VirtIOBlock *s = VIRTIO_BLK(dev); 779 780 remove_migration_state_change_notifier(&s->migration_state_notifier); 781 virtio_blk_data_plane_destroy(s->dataplane); 782 s->dataplane = NULL; 783 qemu_del_vm_change_state_handler(s->change); 784 unregister_savevm(dev, "virtio-blk", s); 785 blockdev_mark_auto_del(s->bs); 786 virtio_cleanup(vdev); 787 } 788 789 static void virtio_blk_instance_init(Object *obj) 790 { 791 VirtIOBlock *s = VIRTIO_BLK(obj); 792 793 object_property_add_link(obj, "iothread", TYPE_IOTHREAD, 794 (Object **)&s->blk.iothread, 795 qdev_prop_allow_set_link_before_realize, 796 OBJ_PROP_LINK_UNREF_ON_RELEASE, NULL); 797 } 798 799 static Property virtio_blk_properties[] = { 800 DEFINE_BLOCK_PROPERTIES(VirtIOBlock, blk.conf), 801 DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, blk.conf), 802 DEFINE_PROP_STRING("serial", VirtIOBlock, blk.serial), 803 DEFINE_PROP_BIT("config-wce", VirtIOBlock, blk.config_wce, 0, true), 804 #ifdef __linux__ 805 DEFINE_PROP_BIT("scsi", VirtIOBlock, blk.scsi, 0, true), 806 #endif 807 DEFINE_PROP_BIT("x-data-plane", VirtIOBlock, blk.data_plane, 0, false), 808 DEFINE_PROP_END_OF_LIST(), 809 }; 810 811 static void virtio_blk_class_init(ObjectClass *klass, void *data) 812 { 813 DeviceClass *dc = DEVICE_CLASS(klass); 814 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 815 816 dc->props = virtio_blk_properties; 817 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); 818 vdc->realize = virtio_blk_device_realize; 819 vdc->unrealize = virtio_blk_device_unrealize; 820 vdc->get_config = virtio_blk_update_config; 821 vdc->set_config = virtio_blk_set_config; 822 vdc->get_features = virtio_blk_get_features; 823 vdc->set_status = virtio_blk_set_status; 824 vdc->reset = virtio_blk_reset; 825 vdc->save = virtio_blk_save_device; 826 vdc->load = virtio_blk_load_device; 827 } 828 829 static const TypeInfo virtio_device_info = { 830 .name = TYPE_VIRTIO_BLK, 831 .parent = TYPE_VIRTIO_DEVICE, 832 .instance_size = sizeof(VirtIOBlock), 833 .instance_init = virtio_blk_instance_init, 834 .class_init = virtio_blk_class_init, 835 }; 836 837 static void virtio_register_types(void) 838 { 839 type_register_static(&virtio_device_info); 840 } 841 842 type_init(virtio_register_types) 843