1 /* 2 * Virtio Block Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu-common.h" 15 #include "qemu/iov.h" 16 #include "qemu/error-report.h" 17 #include "trace.h" 18 #include "hw/block/block.h" 19 #include "sysemu/block-backend.h" 20 #include "sysemu/blockdev.h" 21 #include "hw/virtio/virtio-blk.h" 22 #include "dataplane/virtio-blk.h" 23 #include "migration/migration.h" 24 #include "block/scsi.h" 25 #ifdef __linux__ 26 # include <scsi/sg.h> 27 #endif 28 #include "hw/virtio/virtio-bus.h" 29 #include "hw/virtio/virtio-access.h" 30 31 VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) 32 { 33 VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); 34 req->dev = s; 35 req->qiov.size = 0; 36 req->next = NULL; 37 return req; 38 } 39 40 void virtio_blk_free_request(VirtIOBlockReq *req) 41 { 42 if (req) { 43 g_slice_free(VirtIOBlockReq, req); 44 } 45 } 46 47 static void virtio_blk_complete_request(VirtIOBlockReq *req, 48 unsigned char status) 49 { 50 VirtIOBlock *s = req->dev; 51 VirtIODevice *vdev = VIRTIO_DEVICE(s); 52 53 trace_virtio_blk_req_complete(req, status); 54 55 stb_p(&req->in->status, status); 56 virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); 57 virtio_notify(vdev, s->vq); 58 } 59 60 static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) 61 { 62 req->dev->complete_request(req, status); 63 } 64 65 static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, 66 bool is_read) 67 { 68 BlockErrorAction action = blk_get_error_action(req->dev->blk, 69 is_read, error); 70 VirtIOBlock *s = req->dev; 71 72 if (action == BLOCK_ERROR_ACTION_STOP) { 73 req->next = s->rq; 74 s->rq = req; 75 } else if (action == BLOCK_ERROR_ACTION_REPORT) { 76 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); 77 block_acct_done(blk_get_stats(s->blk), &req->acct); 78 virtio_blk_free_request(req); 79 } 80 81 blk_error_action(s->blk, action, is_read, error); 82 return action != BLOCK_ERROR_ACTION_IGNORE; 83 } 84 85 static void virtio_blk_rw_complete(void *opaque, int ret) 86 { 87 VirtIOBlockReq *req = opaque; 88 89 trace_virtio_blk_rw_complete(req, ret); 90 91 if (ret) { 92 int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); 93 bool is_read = !(p & VIRTIO_BLK_T_OUT); 94 if (virtio_blk_handle_rw_error(req, -ret, is_read)) 95 return; 96 } 97 98 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); 99 block_acct_done(blk_get_stats(req->dev->blk), &req->acct); 100 virtio_blk_free_request(req); 101 } 102 103 static void virtio_blk_flush_complete(void *opaque, int ret) 104 { 105 VirtIOBlockReq *req = opaque; 106 107 if (ret) { 108 if (virtio_blk_handle_rw_error(req, -ret, 0)) { 109 return; 110 } 111 } 112 113 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); 114 block_acct_done(blk_get_stats(req->dev->blk), &req->acct); 115 virtio_blk_free_request(req); 116 } 117 118 static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s) 119 { 120 VirtIOBlockReq *req = virtio_blk_alloc_request(s); 121 122 if (!virtqueue_pop(s->vq, &req->elem)) { 123 virtio_blk_free_request(req); 124 return NULL; 125 } 126 127 return req; 128 } 129 130 int virtio_blk_handle_scsi_req(VirtIOBlock *blk, 131 VirtQueueElement *elem) 132 { 133 int status = VIRTIO_BLK_S_OK; 134 struct virtio_scsi_inhdr *scsi = NULL; 135 VirtIODevice *vdev = VIRTIO_DEVICE(blk); 136 137 #ifdef __linux__ 138 int i; 139 struct sg_io_hdr hdr; 140 #endif 141 142 /* 143 * We require at least one output segment each for the virtio_blk_outhdr 144 * and the SCSI command block. 145 * 146 * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr 147 * and the sense buffer pointer in the input segments. 148 */ 149 if (elem->out_num < 2 || elem->in_num < 3) { 150 status = VIRTIO_BLK_S_IOERR; 151 goto fail; 152 } 153 154 /* 155 * The scsi inhdr is placed in the second-to-last input segment, just 156 * before the regular inhdr. 157 */ 158 scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base; 159 160 if (!blk->conf.scsi) { 161 status = VIRTIO_BLK_S_UNSUPP; 162 goto fail; 163 } 164 165 /* 166 * No support for bidirection commands yet. 167 */ 168 if (elem->out_num > 2 && elem->in_num > 3) { 169 status = VIRTIO_BLK_S_UNSUPP; 170 goto fail; 171 } 172 173 #ifdef __linux__ 174 memset(&hdr, 0, sizeof(struct sg_io_hdr)); 175 hdr.interface_id = 'S'; 176 hdr.cmd_len = elem->out_sg[1].iov_len; 177 hdr.cmdp = elem->out_sg[1].iov_base; 178 hdr.dxfer_len = 0; 179 180 if (elem->out_num > 2) { 181 /* 182 * If there are more than the minimally required 2 output segments 183 * there is write payload starting from the third iovec. 184 */ 185 hdr.dxfer_direction = SG_DXFER_TO_DEV; 186 hdr.iovec_count = elem->out_num - 2; 187 188 for (i = 0; i < hdr.iovec_count; i++) 189 hdr.dxfer_len += elem->out_sg[i + 2].iov_len; 190 191 hdr.dxferp = elem->out_sg + 2; 192 193 } else if (elem->in_num > 3) { 194 /* 195 * If we have more than 3 input segments the guest wants to actually 196 * read data. 197 */ 198 hdr.dxfer_direction = SG_DXFER_FROM_DEV; 199 hdr.iovec_count = elem->in_num - 3; 200 for (i = 0; i < hdr.iovec_count; i++) 201 hdr.dxfer_len += elem->in_sg[i].iov_len; 202 203 hdr.dxferp = elem->in_sg; 204 } else { 205 /* 206 * Some SCSI commands don't actually transfer any data. 207 */ 208 hdr.dxfer_direction = SG_DXFER_NONE; 209 } 210 211 hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base; 212 hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len; 213 214 status = blk_ioctl(blk->blk, SG_IO, &hdr); 215 if (status) { 216 status = VIRTIO_BLK_S_UNSUPP; 217 goto fail; 218 } 219 220 /* 221 * From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi) 222 * clear the masked_status field [hence status gets cleared too, see 223 * block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED 224 * status has occurred. However they do set DRIVER_SENSE in driver_status 225 * field. Also a (sb_len_wr > 0) indicates there is a sense buffer. 226 */ 227 if (hdr.status == 0 && hdr.sb_len_wr > 0) { 228 hdr.status = CHECK_CONDITION; 229 } 230 231 virtio_stl_p(vdev, &scsi->errors, 232 hdr.status | (hdr.msg_status << 8) | 233 (hdr.host_status << 16) | (hdr.driver_status << 24)); 234 virtio_stl_p(vdev, &scsi->residual, hdr.resid); 235 virtio_stl_p(vdev, &scsi->sense_len, hdr.sb_len_wr); 236 virtio_stl_p(vdev, &scsi->data_len, hdr.dxfer_len); 237 238 return status; 239 #else 240 abort(); 241 #endif 242 243 fail: 244 /* Just put anything nonzero so that the ioctl fails in the guest. */ 245 if (scsi) { 246 virtio_stl_p(vdev, &scsi->errors, 255); 247 } 248 return status; 249 } 250 251 static void virtio_blk_handle_scsi(VirtIOBlockReq *req) 252 { 253 int status; 254 255 status = virtio_blk_handle_scsi_req(req->dev, &req->elem); 256 virtio_blk_req_complete(req, status); 257 virtio_blk_free_request(req); 258 } 259 260 void virtio_submit_multiwrite(BlockBackend *blk, MultiReqBuffer *mrb) 261 { 262 int i, ret; 263 264 if (!mrb->num_writes) { 265 return; 266 } 267 268 ret = blk_aio_multiwrite(blk, mrb->blkreq, mrb->num_writes); 269 if (ret != 0) { 270 for (i = 0; i < mrb->num_writes; i++) { 271 if (mrb->blkreq[i].error) { 272 virtio_blk_rw_complete(mrb->blkreq[i].opaque, -EIO); 273 } 274 } 275 } 276 277 mrb->num_writes = 0; 278 } 279 280 static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) 281 { 282 block_acct_start(blk_get_stats(req->dev->blk), &req->acct, 0, 283 BLOCK_ACCT_FLUSH); 284 285 /* 286 * Make sure all outstanding writes are posted to the backing device. 287 */ 288 virtio_submit_multiwrite(req->dev->blk, mrb); 289 blk_aio_flush(req->dev->blk, virtio_blk_flush_complete, req); 290 } 291 292 static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, 293 uint64_t sector, size_t size) 294 { 295 uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; 296 uint64_t total_sectors; 297 298 if (sector & dev->sector_mask) { 299 return false; 300 } 301 if (size % dev->conf.conf.logical_block_size) { 302 return false; 303 } 304 blk_get_geometry(dev->blk, &total_sectors); 305 if (sector > total_sectors || nb_sectors > total_sectors - sector) { 306 return false; 307 } 308 return true; 309 } 310 311 static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb) 312 { 313 BlockRequest *blkreq; 314 uint64_t sector; 315 316 sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector); 317 318 trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512); 319 320 if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) { 321 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); 322 virtio_blk_free_request(req); 323 return; 324 } 325 326 block_acct_start(blk_get_stats(req->dev->blk), &req->acct, req->qiov.size, 327 BLOCK_ACCT_WRITE); 328 329 if (mrb->num_writes == 32) { 330 virtio_submit_multiwrite(req->dev->blk, mrb); 331 } 332 333 blkreq = &mrb->blkreq[mrb->num_writes]; 334 blkreq->sector = sector; 335 blkreq->nb_sectors = req->qiov.size / BDRV_SECTOR_SIZE; 336 blkreq->qiov = &req->qiov; 337 blkreq->cb = virtio_blk_rw_complete; 338 blkreq->opaque = req; 339 blkreq->error = 0; 340 341 mrb->num_writes++; 342 } 343 344 static void virtio_blk_handle_read(VirtIOBlockReq *req) 345 { 346 uint64_t sector; 347 348 sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector); 349 350 trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512); 351 352 if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) { 353 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); 354 virtio_blk_free_request(req); 355 return; 356 } 357 358 block_acct_start(blk_get_stats(req->dev->blk), &req->acct, req->qiov.size, 359 BLOCK_ACCT_READ); 360 blk_aio_readv(req->dev->blk, sector, &req->qiov, 361 req->qiov.size / BDRV_SECTOR_SIZE, 362 virtio_blk_rw_complete, req); 363 } 364 365 void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) 366 { 367 uint32_t type; 368 struct iovec *in_iov = req->elem.in_sg; 369 struct iovec *iov = req->elem.out_sg; 370 unsigned in_num = req->elem.in_num; 371 unsigned out_num = req->elem.out_num; 372 373 if (req->elem.out_num < 1 || req->elem.in_num < 1) { 374 error_report("virtio-blk missing headers"); 375 exit(1); 376 } 377 378 if (unlikely(iov_to_buf(iov, out_num, 0, &req->out, 379 sizeof(req->out)) != sizeof(req->out))) { 380 error_report("virtio-blk request outhdr too short"); 381 exit(1); 382 } 383 384 iov_discard_front(&iov, &out_num, sizeof(req->out)); 385 386 if (in_num < 1 || 387 in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { 388 error_report("virtio-blk request inhdr too short"); 389 exit(1); 390 } 391 392 req->in = (void *)in_iov[in_num - 1].iov_base 393 + in_iov[in_num - 1].iov_len 394 - sizeof(struct virtio_blk_inhdr); 395 iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr)); 396 397 type = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); 398 399 if (type & VIRTIO_BLK_T_FLUSH) { 400 virtio_blk_handle_flush(req, mrb); 401 } else if (type & VIRTIO_BLK_T_SCSI_CMD) { 402 virtio_blk_handle_scsi(req); 403 } else if (type & VIRTIO_BLK_T_GET_ID) { 404 VirtIOBlock *s = req->dev; 405 406 /* 407 * NB: per existing s/n string convention the string is 408 * terminated by '\0' only when shorter than buffer. 409 */ 410 const char *serial = s->conf.serial ? s->conf.serial : ""; 411 size_t size = MIN(strlen(serial) + 1, 412 MIN(iov_size(in_iov, in_num), 413 VIRTIO_BLK_ID_BYTES)); 414 iov_from_buf(in_iov, in_num, 0, serial, size); 415 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); 416 virtio_blk_free_request(req); 417 } else if (type & VIRTIO_BLK_T_OUT) { 418 qemu_iovec_init_external(&req->qiov, iov, out_num); 419 virtio_blk_handle_write(req, mrb); 420 } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) { 421 /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */ 422 qemu_iovec_init_external(&req->qiov, in_iov, in_num); 423 virtio_blk_handle_read(req); 424 } else { 425 virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); 426 virtio_blk_free_request(req); 427 } 428 } 429 430 static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) 431 { 432 VirtIOBlock *s = VIRTIO_BLK(vdev); 433 VirtIOBlockReq *req; 434 MultiReqBuffer mrb = { 435 .num_writes = 0, 436 }; 437 438 /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start 439 * dataplane here instead of waiting for .set_status(). 440 */ 441 if (s->dataplane) { 442 virtio_blk_data_plane_start(s->dataplane); 443 return; 444 } 445 446 while ((req = virtio_blk_get_request(s))) { 447 virtio_blk_handle_request(req, &mrb); 448 } 449 450 virtio_submit_multiwrite(s->blk, &mrb); 451 452 /* 453 * FIXME: Want to check for completions before returning to guest mode, 454 * so cached reads and writes are reported as quickly as possible. But 455 * that should be done in the generic block layer. 456 */ 457 } 458 459 static void virtio_blk_dma_restart_bh(void *opaque) 460 { 461 VirtIOBlock *s = opaque; 462 VirtIOBlockReq *req = s->rq; 463 MultiReqBuffer mrb = { 464 .num_writes = 0, 465 }; 466 467 qemu_bh_delete(s->bh); 468 s->bh = NULL; 469 470 s->rq = NULL; 471 472 while (req) { 473 VirtIOBlockReq *next = req->next; 474 virtio_blk_handle_request(req, &mrb); 475 req = next; 476 } 477 478 virtio_submit_multiwrite(s->blk, &mrb); 479 } 480 481 static void virtio_blk_dma_restart_cb(void *opaque, int running, 482 RunState state) 483 { 484 VirtIOBlock *s = opaque; 485 486 if (!running) { 487 return; 488 } 489 490 if (!s->bh) { 491 s->bh = aio_bh_new(blk_get_aio_context(s->conf.conf.blk), 492 virtio_blk_dma_restart_bh, s); 493 qemu_bh_schedule(s->bh); 494 } 495 } 496 497 static void virtio_blk_reset(VirtIODevice *vdev) 498 { 499 VirtIOBlock *s = VIRTIO_BLK(vdev); 500 501 if (s->dataplane) { 502 virtio_blk_data_plane_stop(s->dataplane); 503 } 504 505 /* 506 * This should cancel pending requests, but can't do nicely until there 507 * are per-device request lists. 508 */ 509 blk_drain_all(); 510 blk_set_enable_write_cache(s->blk, s->original_wce); 511 } 512 513 /* coalesce internal state, copy to pci i/o region 0 514 */ 515 static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) 516 { 517 VirtIOBlock *s = VIRTIO_BLK(vdev); 518 BlockConf *conf = &s->conf.conf; 519 struct virtio_blk_config blkcfg; 520 uint64_t capacity; 521 int blk_size = conf->logical_block_size; 522 523 blk_get_geometry(s->blk, &capacity); 524 memset(&blkcfg, 0, sizeof(blkcfg)); 525 virtio_stq_p(vdev, &blkcfg.capacity, capacity); 526 virtio_stl_p(vdev, &blkcfg.seg_max, 128 - 2); 527 virtio_stw_p(vdev, &blkcfg.cylinders, conf->cyls); 528 virtio_stl_p(vdev, &blkcfg.blk_size, blk_size); 529 virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size); 530 virtio_stw_p(vdev, &blkcfg.opt_io_size, conf->opt_io_size / blk_size); 531 blkcfg.heads = conf->heads; 532 /* 533 * We must ensure that the block device capacity is a multiple of 534 * the logical block size. If that is not the case, let's use 535 * sector_mask to adopt the geometry to have a correct picture. 536 * For those devices where the capacity is ok for the given geometry 537 * we don't touch the sector value of the geometry, since some devices 538 * (like s390 dasd) need a specific value. Here the capacity is already 539 * cyls*heads*secs*blk_size and the sector value is not block size 540 * divided by 512 - instead it is the amount of blk_size blocks 541 * per track (cylinder). 542 */ 543 if (blk_getlength(s->blk) / conf->heads / conf->secs % blk_size) { 544 blkcfg.sectors = conf->secs & ~s->sector_mask; 545 } else { 546 blkcfg.sectors = conf->secs; 547 } 548 blkcfg.size_max = 0; 549 blkcfg.physical_block_exp = get_physical_block_exp(conf); 550 blkcfg.alignment_offset = 0; 551 blkcfg.wce = blk_enable_write_cache(s->blk); 552 memcpy(config, &blkcfg, sizeof(struct virtio_blk_config)); 553 } 554 555 static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) 556 { 557 VirtIOBlock *s = VIRTIO_BLK(vdev); 558 struct virtio_blk_config blkcfg; 559 560 memcpy(&blkcfg, config, sizeof(blkcfg)); 561 562 aio_context_acquire(blk_get_aio_context(s->blk)); 563 blk_set_enable_write_cache(s->blk, blkcfg.wce != 0); 564 aio_context_release(blk_get_aio_context(s->blk)); 565 } 566 567 static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features) 568 { 569 VirtIOBlock *s = VIRTIO_BLK(vdev); 570 571 features |= (1 << VIRTIO_BLK_F_SEG_MAX); 572 features |= (1 << VIRTIO_BLK_F_GEOMETRY); 573 features |= (1 << VIRTIO_BLK_F_TOPOLOGY); 574 features |= (1 << VIRTIO_BLK_F_BLK_SIZE); 575 features |= (1 << VIRTIO_BLK_F_SCSI); 576 577 if (s->conf.config_wce) { 578 features |= (1 << VIRTIO_BLK_F_CONFIG_WCE); 579 } 580 if (blk_enable_write_cache(s->blk)) { 581 features |= (1 << VIRTIO_BLK_F_WCE); 582 } 583 if (blk_is_read_only(s->blk)) { 584 features |= 1 << VIRTIO_BLK_F_RO; 585 } 586 587 return features; 588 } 589 590 static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) 591 { 592 VirtIOBlock *s = VIRTIO_BLK(vdev); 593 uint32_t features; 594 595 if (s->dataplane && !(status & (VIRTIO_CONFIG_S_DRIVER | 596 VIRTIO_CONFIG_S_DRIVER_OK))) { 597 virtio_blk_data_plane_stop(s->dataplane); 598 } 599 600 if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { 601 return; 602 } 603 604 features = vdev->guest_features; 605 606 /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send 607 * cache flushes. Thus, the "auto writethrough" behavior is never 608 * necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature. 609 * Leaving it enabled would break the following sequence: 610 * 611 * Guest started with "-drive cache=writethrough" 612 * Guest sets status to 0 613 * Guest sets DRIVER bit in status field 614 * Guest reads host features (WCE=0, CONFIG_WCE=1) 615 * Guest writes guest features (WCE=0, CONFIG_WCE=1) 616 * Guest writes 1 to the WCE configuration field (writeback mode) 617 * Guest sets DRIVER_OK bit in status field 618 * 619 * s->blk would erroneously be placed in writethrough mode. 620 */ 621 if (!(features & (1 << VIRTIO_BLK_F_CONFIG_WCE))) { 622 aio_context_acquire(blk_get_aio_context(s->blk)); 623 blk_set_enable_write_cache(s->blk, 624 !!(features & (1 << VIRTIO_BLK_F_WCE))); 625 aio_context_release(blk_get_aio_context(s->blk)); 626 } 627 } 628 629 static void virtio_blk_save(QEMUFile *f, void *opaque) 630 { 631 VirtIODevice *vdev = VIRTIO_DEVICE(opaque); 632 633 virtio_save(vdev, f); 634 } 635 636 static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) 637 { 638 VirtIOBlock *s = VIRTIO_BLK(vdev); 639 VirtIOBlockReq *req = s->rq; 640 641 while (req) { 642 qemu_put_sbyte(f, 1); 643 qemu_put_buffer(f, (unsigned char *)&req->elem, 644 sizeof(VirtQueueElement)); 645 req = req->next; 646 } 647 qemu_put_sbyte(f, 0); 648 } 649 650 static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) 651 { 652 VirtIOBlock *s = opaque; 653 VirtIODevice *vdev = VIRTIO_DEVICE(s); 654 655 if (version_id != 2) 656 return -EINVAL; 657 658 return virtio_load(vdev, f, version_id); 659 } 660 661 static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, 662 int version_id) 663 { 664 VirtIOBlock *s = VIRTIO_BLK(vdev); 665 666 while (qemu_get_sbyte(f)) { 667 VirtIOBlockReq *req = virtio_blk_alloc_request(s); 668 qemu_get_buffer(f, (unsigned char *)&req->elem, 669 sizeof(VirtQueueElement)); 670 req->next = s->rq; 671 s->rq = req; 672 673 virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr, 674 req->elem.in_num, 1); 675 virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr, 676 req->elem.out_num, 0); 677 } 678 679 return 0; 680 } 681 682 static void virtio_blk_resize(void *opaque) 683 { 684 VirtIODevice *vdev = VIRTIO_DEVICE(opaque); 685 686 virtio_notify_config(vdev); 687 } 688 689 static const BlockDevOps virtio_block_ops = { 690 .resize_cb = virtio_blk_resize, 691 }; 692 693 /* Disable dataplane thread during live migration since it does not 694 * update the dirty memory bitmap yet. 695 */ 696 static void virtio_blk_migration_state_changed(Notifier *notifier, void *data) 697 { 698 VirtIOBlock *s = container_of(notifier, VirtIOBlock, 699 migration_state_notifier); 700 MigrationState *mig = data; 701 Error *err = NULL; 702 703 if (migration_in_setup(mig)) { 704 if (!s->dataplane) { 705 return; 706 } 707 virtio_blk_data_plane_destroy(s->dataplane); 708 s->dataplane = NULL; 709 } else if (migration_has_finished(mig) || 710 migration_has_failed(mig)) { 711 if (s->dataplane) { 712 return; 713 } 714 blk_drain_all(); /* complete in-flight non-dataplane requests */ 715 virtio_blk_data_plane_create(VIRTIO_DEVICE(s), &s->conf, 716 &s->dataplane, &err); 717 if (err != NULL) { 718 error_report("%s", error_get_pretty(err)); 719 error_free(err); 720 } 721 } 722 } 723 724 static void virtio_blk_device_realize(DeviceState *dev, Error **errp) 725 { 726 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 727 VirtIOBlock *s = VIRTIO_BLK(dev); 728 VirtIOBlkConf *conf = &s->conf; 729 Error *err = NULL; 730 static int virtio_blk_id; 731 732 if (!conf->conf.blk) { 733 error_setg(errp, "drive property not set"); 734 return; 735 } 736 if (!blk_is_inserted(conf->conf.blk)) { 737 error_setg(errp, "Device needs media, but drive is empty"); 738 return; 739 } 740 741 blkconf_serial(&conf->conf, &conf->serial); 742 s->original_wce = blk_enable_write_cache(conf->conf.blk); 743 blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, &err); 744 if (err) { 745 error_propagate(errp, err); 746 return; 747 } 748 749 virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, 750 sizeof(struct virtio_blk_config)); 751 752 s->blk = conf->conf.blk; 753 s->rq = NULL; 754 s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1; 755 756 s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output); 757 s->complete_request = virtio_blk_complete_request; 758 virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); 759 if (err != NULL) { 760 error_propagate(errp, err); 761 virtio_cleanup(vdev); 762 return; 763 } 764 s->migration_state_notifier.notify = virtio_blk_migration_state_changed; 765 add_migration_state_change_notifier(&s->migration_state_notifier); 766 767 s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); 768 register_savevm(dev, "virtio-blk", virtio_blk_id++, 2, 769 virtio_blk_save, virtio_blk_load, s); 770 blk_set_dev_ops(s->blk, &virtio_block_ops, s); 771 blk_set_guest_block_size(s->blk, s->conf.conf.logical_block_size); 772 773 blk_iostatus_enable(s->blk); 774 } 775 776 static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp) 777 { 778 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 779 VirtIOBlock *s = VIRTIO_BLK(dev); 780 781 remove_migration_state_change_notifier(&s->migration_state_notifier); 782 virtio_blk_data_plane_destroy(s->dataplane); 783 s->dataplane = NULL; 784 qemu_del_vm_change_state_handler(s->change); 785 unregister_savevm(dev, "virtio-blk", s); 786 blockdev_mark_auto_del(s->blk); 787 virtio_cleanup(vdev); 788 } 789 790 static void virtio_blk_instance_init(Object *obj) 791 { 792 VirtIOBlock *s = VIRTIO_BLK(obj); 793 794 object_property_add_link(obj, "iothread", TYPE_IOTHREAD, 795 (Object **)&s->conf.iothread, 796 qdev_prop_allow_set_link_before_realize, 797 OBJ_PROP_LINK_UNREF_ON_RELEASE, NULL); 798 device_add_bootindex_property(obj, &s->conf.conf.bootindex, 799 "bootindex", "/disk@0,0", 800 DEVICE(obj), NULL); 801 } 802 803 static Property virtio_blk_properties[] = { 804 DEFINE_BLOCK_PROPERTIES(VirtIOBlock, conf.conf), 805 DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, conf.conf), 806 DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial), 807 DEFINE_PROP_BIT("config-wce", VirtIOBlock, conf.config_wce, 0, true), 808 #ifdef __linux__ 809 DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, true), 810 #endif 811 DEFINE_PROP_BIT("x-data-plane", VirtIOBlock, conf.data_plane, 0, false), 812 DEFINE_PROP_END_OF_LIST(), 813 }; 814 815 static void virtio_blk_class_init(ObjectClass *klass, void *data) 816 { 817 DeviceClass *dc = DEVICE_CLASS(klass); 818 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 819 820 dc->props = virtio_blk_properties; 821 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); 822 vdc->realize = virtio_blk_device_realize; 823 vdc->unrealize = virtio_blk_device_unrealize; 824 vdc->get_config = virtio_blk_update_config; 825 vdc->set_config = virtio_blk_set_config; 826 vdc->get_features = virtio_blk_get_features; 827 vdc->set_status = virtio_blk_set_status; 828 vdc->reset = virtio_blk_reset; 829 vdc->save = virtio_blk_save_device; 830 vdc->load = virtio_blk_load_device; 831 } 832 833 static const TypeInfo virtio_device_info = { 834 .name = TYPE_VIRTIO_BLK, 835 .parent = TYPE_VIRTIO_DEVICE, 836 .instance_size = sizeof(VirtIOBlock), 837 .instance_init = virtio_blk_instance_init, 838 .class_init = virtio_blk_class_init, 839 }; 840 841 static void virtio_register_types(void) 842 { 843 type_register_static(&virtio_device_info); 844 } 845 846 type_init(virtio_register_types) 847