1 /* 2 * Virtio Block Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu-common.h" 15 #include "qemu/iov.h" 16 #include "qemu/error-report.h" 17 #include "trace.h" 18 #include "hw/block/block.h" 19 #include "sysemu/blockdev.h" 20 #include "hw/virtio/virtio-blk.h" 21 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE 22 # include "dataplane/virtio-blk.h" 23 # include "migration/migration.h" 24 #endif 25 #include "block/scsi.h" 26 #ifdef __linux__ 27 # include <scsi/sg.h> 28 #endif 29 #include "hw/virtio/virtio-bus.h" 30 #include "hw/virtio/virtio-access.h" 31 32 VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) 33 { 34 VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); 35 req->dev = s; 36 req->qiov.size = 0; 37 req->next = NULL; 38 return req; 39 } 40 41 void virtio_blk_free_request(VirtIOBlockReq *req) 42 { 43 if (req) { 44 g_slice_free(VirtIOBlockReq, req); 45 } 46 } 47 48 static void virtio_blk_complete_request(VirtIOBlockReq *req, 49 unsigned char status) 50 { 51 VirtIOBlock *s = req->dev; 52 VirtIODevice *vdev = VIRTIO_DEVICE(s); 53 54 trace_virtio_blk_req_complete(req, status); 55 56 stb_p(&req->in->status, status); 57 virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); 58 virtio_notify(vdev, s->vq); 59 } 60 61 static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) 62 { 63 req->dev->complete_request(req, status); 64 } 65 66 static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, 67 bool is_read) 68 { 69 BlockErrorAction action = bdrv_get_error_action(req->dev->bs, is_read, error); 70 VirtIOBlock *s = req->dev; 71 72 if (action == BLOCK_ERROR_ACTION_STOP) { 73 req->next = s->rq; 74 s->rq = req; 75 } else if (action == BLOCK_ERROR_ACTION_REPORT) { 76 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); 77 bdrv_acct_done(s->bs, &req->acct); 78 virtio_blk_free_request(req); 79 } 80 81 bdrv_error_action(s->bs, action, is_read, error); 82 return action != BLOCK_ERROR_ACTION_IGNORE; 83 } 84 85 static void virtio_blk_rw_complete(void *opaque, int ret) 86 { 87 VirtIOBlockReq *req = opaque; 88 89 trace_virtio_blk_rw_complete(req, ret); 90 91 if (ret) { 92 int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); 93 bool is_read = !(p & VIRTIO_BLK_T_OUT); 94 if (virtio_blk_handle_rw_error(req, -ret, is_read)) 95 return; 96 } 97 98 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); 99 bdrv_acct_done(req->dev->bs, &req->acct); 100 virtio_blk_free_request(req); 101 } 102 103 static void virtio_blk_flush_complete(void *opaque, int ret) 104 { 105 VirtIOBlockReq *req = opaque; 106 107 if (ret) { 108 if (virtio_blk_handle_rw_error(req, -ret, 0)) { 109 return; 110 } 111 } 112 113 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); 114 bdrv_acct_done(req->dev->bs, &req->acct); 115 virtio_blk_free_request(req); 116 } 117 118 static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s) 119 { 120 VirtIOBlockReq *req = virtio_blk_alloc_request(s); 121 122 if (!virtqueue_pop(s->vq, &req->elem)) { 123 virtio_blk_free_request(req); 124 return NULL; 125 } 126 127 return req; 128 } 129 130 int virtio_blk_handle_scsi_req(VirtIOBlock *blk, 131 VirtQueueElement *elem) 132 { 133 int status = VIRTIO_BLK_S_OK; 134 struct virtio_scsi_inhdr *scsi = NULL; 135 VirtIODevice *vdev = VIRTIO_DEVICE(blk); 136 137 #ifdef __linux__ 138 int i; 139 struct sg_io_hdr hdr; 140 #endif 141 142 /* 143 * We require at least one output segment each for the virtio_blk_outhdr 144 * and the SCSI command block. 145 * 146 * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr 147 * and the sense buffer pointer in the input segments. 148 */ 149 if (elem->out_num < 2 || elem->in_num < 3) { 150 status = VIRTIO_BLK_S_IOERR; 151 goto fail; 152 } 153 154 /* 155 * The scsi inhdr is placed in the second-to-last input segment, just 156 * before the regular inhdr. 157 */ 158 scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base; 159 160 if (!blk->blk.scsi) { 161 status = VIRTIO_BLK_S_UNSUPP; 162 goto fail; 163 } 164 165 /* 166 * No support for bidirection commands yet. 167 */ 168 if (elem->out_num > 2 && elem->in_num > 3) { 169 status = VIRTIO_BLK_S_UNSUPP; 170 goto fail; 171 } 172 173 #ifdef __linux__ 174 memset(&hdr, 0, sizeof(struct sg_io_hdr)); 175 hdr.interface_id = 'S'; 176 hdr.cmd_len = elem->out_sg[1].iov_len; 177 hdr.cmdp = elem->out_sg[1].iov_base; 178 hdr.dxfer_len = 0; 179 180 if (elem->out_num > 2) { 181 /* 182 * If there are more than the minimally required 2 output segments 183 * there is write payload starting from the third iovec. 184 */ 185 hdr.dxfer_direction = SG_DXFER_TO_DEV; 186 hdr.iovec_count = elem->out_num - 2; 187 188 for (i = 0; i < hdr.iovec_count; i++) 189 hdr.dxfer_len += elem->out_sg[i + 2].iov_len; 190 191 hdr.dxferp = elem->out_sg + 2; 192 193 } else if (elem->in_num > 3) { 194 /* 195 * If we have more than 3 input segments the guest wants to actually 196 * read data. 197 */ 198 hdr.dxfer_direction = SG_DXFER_FROM_DEV; 199 hdr.iovec_count = elem->in_num - 3; 200 for (i = 0; i < hdr.iovec_count; i++) 201 hdr.dxfer_len += elem->in_sg[i].iov_len; 202 203 hdr.dxferp = elem->in_sg; 204 } else { 205 /* 206 * Some SCSI commands don't actually transfer any data. 207 */ 208 hdr.dxfer_direction = SG_DXFER_NONE; 209 } 210 211 hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base; 212 hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len; 213 214 status = bdrv_ioctl(blk->bs, SG_IO, &hdr); 215 if (status) { 216 status = VIRTIO_BLK_S_UNSUPP; 217 goto fail; 218 } 219 220 /* 221 * From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi) 222 * clear the masked_status field [hence status gets cleared too, see 223 * block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED 224 * status has occurred. However they do set DRIVER_SENSE in driver_status 225 * field. Also a (sb_len_wr > 0) indicates there is a sense buffer. 226 */ 227 if (hdr.status == 0 && hdr.sb_len_wr > 0) { 228 hdr.status = CHECK_CONDITION; 229 } 230 231 virtio_stl_p(vdev, &scsi->errors, 232 hdr.status | (hdr.msg_status << 8) | 233 (hdr.host_status << 16) | (hdr.driver_status << 24)); 234 virtio_stl_p(vdev, &scsi->residual, hdr.resid); 235 virtio_stl_p(vdev, &scsi->sense_len, hdr.sb_len_wr); 236 virtio_stl_p(vdev, &scsi->data_len, hdr.dxfer_len); 237 238 return status; 239 #else 240 abort(); 241 #endif 242 243 fail: 244 /* Just put anything nonzero so that the ioctl fails in the guest. */ 245 if (scsi) { 246 virtio_stl_p(vdev, &scsi->errors, 255); 247 } 248 return status; 249 } 250 251 static void virtio_blk_handle_scsi(VirtIOBlockReq *req) 252 { 253 int status; 254 255 status = virtio_blk_handle_scsi_req(req->dev, &req->elem); 256 virtio_blk_req_complete(req, status); 257 virtio_blk_free_request(req); 258 } 259 260 void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb) 261 { 262 int i, ret; 263 264 if (!mrb->num_writes) { 265 return; 266 } 267 268 ret = bdrv_aio_multiwrite(bs, mrb->blkreq, mrb->num_writes); 269 if (ret != 0) { 270 for (i = 0; i < mrb->num_writes; i++) { 271 if (mrb->blkreq[i].error) { 272 virtio_blk_rw_complete(mrb->blkreq[i].opaque, -EIO); 273 } 274 } 275 } 276 277 mrb->num_writes = 0; 278 } 279 280 static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) 281 { 282 bdrv_acct_start(req->dev->bs, &req->acct, 0, BDRV_ACCT_FLUSH); 283 284 /* 285 * Make sure all outstanding writes are posted to the backing device. 286 */ 287 virtio_submit_multiwrite(req->dev->bs, mrb); 288 bdrv_aio_flush(req->dev->bs, virtio_blk_flush_complete, req); 289 } 290 291 static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, 292 uint64_t sector, size_t size) 293 { 294 uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; 295 uint64_t total_sectors; 296 297 if (sector & dev->sector_mask) { 298 return false; 299 } 300 if (size % dev->conf->logical_block_size) { 301 return false; 302 } 303 bdrv_get_geometry(dev->bs, &total_sectors); 304 if (sector > total_sectors || nb_sectors > total_sectors - sector) { 305 return false; 306 } 307 return true; 308 } 309 310 static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb) 311 { 312 BlockRequest *blkreq; 313 uint64_t sector; 314 315 sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector); 316 317 trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512); 318 319 if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) { 320 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); 321 virtio_blk_free_request(req); 322 return; 323 } 324 325 bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE); 326 327 if (mrb->num_writes == 32) { 328 virtio_submit_multiwrite(req->dev->bs, mrb); 329 } 330 331 blkreq = &mrb->blkreq[mrb->num_writes]; 332 blkreq->sector = sector; 333 blkreq->nb_sectors = req->qiov.size / BDRV_SECTOR_SIZE; 334 blkreq->qiov = &req->qiov; 335 blkreq->cb = virtio_blk_rw_complete; 336 blkreq->opaque = req; 337 blkreq->error = 0; 338 339 mrb->num_writes++; 340 } 341 342 static void virtio_blk_handle_read(VirtIOBlockReq *req) 343 { 344 uint64_t sector; 345 346 sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector); 347 348 trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512); 349 350 if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) { 351 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); 352 virtio_blk_free_request(req); 353 return; 354 } 355 356 bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ); 357 bdrv_aio_readv(req->dev->bs, sector, &req->qiov, 358 req->qiov.size / BDRV_SECTOR_SIZE, 359 virtio_blk_rw_complete, req); 360 } 361 362 void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) 363 { 364 uint32_t type; 365 struct iovec *in_iov = req->elem.in_sg; 366 struct iovec *iov = req->elem.out_sg; 367 unsigned in_num = req->elem.in_num; 368 unsigned out_num = req->elem.out_num; 369 370 if (req->elem.out_num < 1 || req->elem.in_num < 1) { 371 error_report("virtio-blk missing headers"); 372 exit(1); 373 } 374 375 if (unlikely(iov_to_buf(iov, out_num, 0, &req->out, 376 sizeof(req->out)) != sizeof(req->out))) { 377 error_report("virtio-blk request outhdr too short"); 378 exit(1); 379 } 380 381 iov_discard_front(&iov, &out_num, sizeof(req->out)); 382 383 if (in_num < 1 || 384 in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { 385 error_report("virtio-blk request inhdr too short"); 386 exit(1); 387 } 388 389 req->in = (void *)in_iov[in_num - 1].iov_base 390 + in_iov[in_num - 1].iov_len 391 - sizeof(struct virtio_blk_inhdr); 392 iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr)); 393 394 type = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); 395 396 if (type & VIRTIO_BLK_T_FLUSH) { 397 virtio_blk_handle_flush(req, mrb); 398 } else if (type & VIRTIO_BLK_T_SCSI_CMD) { 399 virtio_blk_handle_scsi(req); 400 } else if (type & VIRTIO_BLK_T_GET_ID) { 401 VirtIOBlock *s = req->dev; 402 403 /* 404 * NB: per existing s/n string convention the string is 405 * terminated by '\0' only when shorter than buffer. 406 */ 407 strncpy(req->elem.in_sg[0].iov_base, 408 s->blk.serial ? s->blk.serial : "", 409 MIN(req->elem.in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES)); 410 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); 411 virtio_blk_free_request(req); 412 } else if (type & VIRTIO_BLK_T_OUT) { 413 qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1], 414 req->elem.out_num - 1); 415 virtio_blk_handle_write(req, mrb); 416 } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) { 417 /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */ 418 qemu_iovec_init_external(&req->qiov, &req->elem.in_sg[0], 419 req->elem.in_num - 1); 420 virtio_blk_handle_read(req); 421 } else { 422 virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); 423 virtio_blk_free_request(req); 424 } 425 } 426 427 static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) 428 { 429 VirtIOBlock *s = VIRTIO_BLK(vdev); 430 VirtIOBlockReq *req; 431 MultiReqBuffer mrb = { 432 .num_writes = 0, 433 }; 434 435 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE 436 /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start 437 * dataplane here instead of waiting for .set_status(). 438 */ 439 if (s->dataplane) { 440 virtio_blk_data_plane_start(s->dataplane); 441 return; 442 } 443 #endif 444 445 while ((req = virtio_blk_get_request(s))) { 446 virtio_blk_handle_request(req, &mrb); 447 } 448 449 virtio_submit_multiwrite(s->bs, &mrb); 450 451 /* 452 * FIXME: Want to check for completions before returning to guest mode, 453 * so cached reads and writes are reported as quickly as possible. But 454 * that should be done in the generic block layer. 455 */ 456 } 457 458 static void virtio_blk_dma_restart_bh(void *opaque) 459 { 460 VirtIOBlock *s = opaque; 461 VirtIOBlockReq *req = s->rq; 462 MultiReqBuffer mrb = { 463 .num_writes = 0, 464 }; 465 466 qemu_bh_delete(s->bh); 467 s->bh = NULL; 468 469 s->rq = NULL; 470 471 while (req) { 472 virtio_blk_handle_request(req, &mrb); 473 req = req->next; 474 } 475 476 virtio_submit_multiwrite(s->bs, &mrb); 477 } 478 479 static void virtio_blk_dma_restart_cb(void *opaque, int running, 480 RunState state) 481 { 482 VirtIOBlock *s = opaque; 483 484 if (!running) { 485 return; 486 } 487 488 if (!s->bh) { 489 s->bh = aio_bh_new(bdrv_get_aio_context(s->blk.conf.bs), 490 virtio_blk_dma_restart_bh, s); 491 qemu_bh_schedule(s->bh); 492 } 493 } 494 495 static void virtio_blk_reset(VirtIODevice *vdev) 496 { 497 VirtIOBlock *s = VIRTIO_BLK(vdev); 498 499 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE 500 if (s->dataplane) { 501 virtio_blk_data_plane_stop(s->dataplane); 502 } 503 #endif 504 505 /* 506 * This should cancel pending requests, but can't do nicely until there 507 * are per-device request lists. 508 */ 509 bdrv_drain_all(); 510 bdrv_set_enable_write_cache(s->bs, s->original_wce); 511 } 512 513 /* coalesce internal state, copy to pci i/o region 0 514 */ 515 static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) 516 { 517 VirtIOBlock *s = VIRTIO_BLK(vdev); 518 struct virtio_blk_config blkcfg; 519 uint64_t capacity; 520 int blk_size = s->conf->logical_block_size; 521 522 bdrv_get_geometry(s->bs, &capacity); 523 memset(&blkcfg, 0, sizeof(blkcfg)); 524 virtio_stq_p(vdev, &blkcfg.capacity, capacity); 525 virtio_stl_p(vdev, &blkcfg.seg_max, 128 - 2); 526 virtio_stw_p(vdev, &blkcfg.cylinders, s->conf->cyls); 527 virtio_stl_p(vdev, &blkcfg.blk_size, blk_size); 528 virtio_stw_p(vdev, &blkcfg.min_io_size, s->conf->min_io_size / blk_size); 529 virtio_stw_p(vdev, &blkcfg.opt_io_size, s->conf->opt_io_size / blk_size); 530 blkcfg.heads = s->conf->heads; 531 /* 532 * We must ensure that the block device capacity is a multiple of 533 * the logical block size. If that is not the case, let's use 534 * sector_mask to adopt the geometry to have a correct picture. 535 * For those devices where the capacity is ok for the given geometry 536 * we don't touch the sector value of the geometry, since some devices 537 * (like s390 dasd) need a specific value. Here the capacity is already 538 * cyls*heads*secs*blk_size and the sector value is not block size 539 * divided by 512 - instead it is the amount of blk_size blocks 540 * per track (cylinder). 541 */ 542 if (bdrv_getlength(s->bs) / s->conf->heads / s->conf->secs % blk_size) { 543 blkcfg.sectors = s->conf->secs & ~s->sector_mask; 544 } else { 545 blkcfg.sectors = s->conf->secs; 546 } 547 blkcfg.size_max = 0; 548 blkcfg.physical_block_exp = get_physical_block_exp(s->conf); 549 blkcfg.alignment_offset = 0; 550 blkcfg.wce = bdrv_enable_write_cache(s->bs); 551 memcpy(config, &blkcfg, sizeof(struct virtio_blk_config)); 552 } 553 554 static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) 555 { 556 VirtIOBlock *s = VIRTIO_BLK(vdev); 557 struct virtio_blk_config blkcfg; 558 559 memcpy(&blkcfg, config, sizeof(blkcfg)); 560 561 aio_context_acquire(bdrv_get_aio_context(s->bs)); 562 bdrv_set_enable_write_cache(s->bs, blkcfg.wce != 0); 563 aio_context_release(bdrv_get_aio_context(s->bs)); 564 } 565 566 static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features) 567 { 568 VirtIOBlock *s = VIRTIO_BLK(vdev); 569 570 features |= (1 << VIRTIO_BLK_F_SEG_MAX); 571 features |= (1 << VIRTIO_BLK_F_GEOMETRY); 572 features |= (1 << VIRTIO_BLK_F_TOPOLOGY); 573 features |= (1 << VIRTIO_BLK_F_BLK_SIZE); 574 features |= (1 << VIRTIO_BLK_F_SCSI); 575 576 if (s->blk.config_wce) { 577 features |= (1 << VIRTIO_BLK_F_CONFIG_WCE); 578 } 579 if (bdrv_enable_write_cache(s->bs)) 580 features |= (1 << VIRTIO_BLK_F_WCE); 581 582 if (bdrv_is_read_only(s->bs)) 583 features |= 1 << VIRTIO_BLK_F_RO; 584 585 return features; 586 } 587 588 static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) 589 { 590 VirtIOBlock *s = VIRTIO_BLK(vdev); 591 uint32_t features; 592 593 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE 594 if (s->dataplane && !(status & (VIRTIO_CONFIG_S_DRIVER | 595 VIRTIO_CONFIG_S_DRIVER_OK))) { 596 virtio_blk_data_plane_stop(s->dataplane); 597 } 598 #endif 599 600 if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { 601 return; 602 } 603 604 features = vdev->guest_features; 605 606 /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send 607 * cache flushes. Thus, the "auto writethrough" behavior is never 608 * necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature. 609 * Leaving it enabled would break the following sequence: 610 * 611 * Guest started with "-drive cache=writethrough" 612 * Guest sets status to 0 613 * Guest sets DRIVER bit in status field 614 * Guest reads host features (WCE=0, CONFIG_WCE=1) 615 * Guest writes guest features (WCE=0, CONFIG_WCE=1) 616 * Guest writes 1 to the WCE configuration field (writeback mode) 617 * Guest sets DRIVER_OK bit in status field 618 * 619 * s->bs would erroneously be placed in writethrough mode. 620 */ 621 if (!(features & (1 << VIRTIO_BLK_F_CONFIG_WCE))) { 622 aio_context_acquire(bdrv_get_aio_context(s->bs)); 623 bdrv_set_enable_write_cache(s->bs, 624 !!(features & (1 << VIRTIO_BLK_F_WCE))); 625 aio_context_release(bdrv_get_aio_context(s->bs)); 626 } 627 } 628 629 static void virtio_blk_save(QEMUFile *f, void *opaque) 630 { 631 VirtIODevice *vdev = VIRTIO_DEVICE(opaque); 632 633 virtio_save(vdev, f); 634 } 635 636 static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) 637 { 638 VirtIOBlock *s = VIRTIO_BLK(vdev); 639 VirtIOBlockReq *req = s->rq; 640 641 while (req) { 642 qemu_put_sbyte(f, 1); 643 qemu_put_buffer(f, (unsigned char *)&req->elem, 644 sizeof(VirtQueueElement)); 645 req = req->next; 646 } 647 qemu_put_sbyte(f, 0); 648 } 649 650 static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) 651 { 652 VirtIOBlock *s = opaque; 653 VirtIODevice *vdev = VIRTIO_DEVICE(s); 654 655 if (version_id != 2) 656 return -EINVAL; 657 658 return virtio_load(vdev, f, version_id); 659 } 660 661 static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, 662 int version_id) 663 { 664 VirtIOBlock *s = VIRTIO_BLK(vdev); 665 666 while (qemu_get_sbyte(f)) { 667 VirtIOBlockReq *req = virtio_blk_alloc_request(s); 668 qemu_get_buffer(f, (unsigned char *)&req->elem, 669 sizeof(VirtQueueElement)); 670 req->next = s->rq; 671 s->rq = req; 672 673 virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr, 674 req->elem.in_num, 1); 675 virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr, 676 req->elem.out_num, 0); 677 } 678 679 return 0; 680 } 681 682 static void virtio_blk_resize(void *opaque) 683 { 684 VirtIODevice *vdev = VIRTIO_DEVICE(opaque); 685 686 virtio_notify_config(vdev); 687 } 688 689 static const BlockDevOps virtio_block_ops = { 690 .resize_cb = virtio_blk_resize, 691 }; 692 693 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE 694 /* Disable dataplane thread during live migration since it does not 695 * update the dirty memory bitmap yet. 696 */ 697 static void virtio_blk_migration_state_changed(Notifier *notifier, void *data) 698 { 699 VirtIOBlock *s = container_of(notifier, VirtIOBlock, 700 migration_state_notifier); 701 MigrationState *mig = data; 702 Error *err = NULL; 703 704 if (migration_in_setup(mig)) { 705 if (!s->dataplane) { 706 return; 707 } 708 virtio_blk_data_plane_destroy(s->dataplane); 709 s->dataplane = NULL; 710 } else if (migration_has_finished(mig) || 711 migration_has_failed(mig)) { 712 if (s->dataplane) { 713 return; 714 } 715 bdrv_drain_all(); /* complete in-flight non-dataplane requests */ 716 virtio_blk_data_plane_create(VIRTIO_DEVICE(s), &s->blk, 717 &s->dataplane, &err); 718 if (err != NULL) { 719 error_report("%s", error_get_pretty(err)); 720 error_free(err); 721 } 722 } 723 } 724 #endif /* CONFIG_VIRTIO_BLK_DATA_PLANE */ 725 726 static void virtio_blk_device_realize(DeviceState *dev, Error **errp) 727 { 728 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 729 VirtIOBlock *s = VIRTIO_BLK(dev); 730 VirtIOBlkConf *blk = &(s->blk); 731 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE 732 Error *err = NULL; 733 #endif 734 static int virtio_blk_id; 735 736 if (!blk->conf.bs) { 737 error_setg(errp, "drive property not set"); 738 return; 739 } 740 if (!bdrv_is_inserted(blk->conf.bs)) { 741 error_setg(errp, "Device needs media, but drive is empty"); 742 return; 743 } 744 745 blkconf_serial(&blk->conf, &blk->serial); 746 s->original_wce = bdrv_enable_write_cache(blk->conf.bs); 747 if (blkconf_geometry(&blk->conf, NULL, 65535, 255, 255) < 0) { 748 error_setg(errp, "Error setting geometry"); 749 return; 750 } 751 752 virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, 753 sizeof(struct virtio_blk_config)); 754 755 s->bs = blk->conf.bs; 756 s->conf = &blk->conf; 757 s->rq = NULL; 758 s->sector_mask = (s->conf->logical_block_size / BDRV_SECTOR_SIZE) - 1; 759 760 s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output); 761 s->complete_request = virtio_blk_complete_request; 762 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE 763 virtio_blk_data_plane_create(vdev, blk, &s->dataplane, &err); 764 if (err != NULL) { 765 error_propagate(errp, err); 766 virtio_cleanup(vdev); 767 return; 768 } 769 s->migration_state_notifier.notify = virtio_blk_migration_state_changed; 770 add_migration_state_change_notifier(&s->migration_state_notifier); 771 #endif 772 773 s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); 774 register_savevm(dev, "virtio-blk", virtio_blk_id++, 2, 775 virtio_blk_save, virtio_blk_load, s); 776 bdrv_set_dev_ops(s->bs, &virtio_block_ops, s); 777 bdrv_set_guest_block_size(s->bs, s->conf->logical_block_size); 778 779 bdrv_iostatus_enable(s->bs); 780 781 add_boot_device_path(s->conf->bootindex, dev, "/disk@0,0"); 782 } 783 784 static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp) 785 { 786 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 787 VirtIOBlock *s = VIRTIO_BLK(dev); 788 789 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE 790 remove_migration_state_change_notifier(&s->migration_state_notifier); 791 virtio_blk_data_plane_destroy(s->dataplane); 792 s->dataplane = NULL; 793 #endif 794 qemu_del_vm_change_state_handler(s->change); 795 unregister_savevm(dev, "virtio-blk", s); 796 blockdev_mark_auto_del(s->bs); 797 virtio_cleanup(vdev); 798 } 799 800 static void virtio_blk_instance_init(Object *obj) 801 { 802 VirtIOBlock *s = VIRTIO_BLK(obj); 803 804 object_property_add_link(obj, "iothread", TYPE_IOTHREAD, 805 (Object **)&s->blk.iothread, 806 qdev_prop_allow_set_link_before_realize, 807 OBJ_PROP_LINK_UNREF_ON_RELEASE, NULL); 808 } 809 810 static Property virtio_blk_properties[] = { 811 DEFINE_BLOCK_PROPERTIES(VirtIOBlock, blk.conf), 812 DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, blk.conf), 813 DEFINE_PROP_STRING("serial", VirtIOBlock, blk.serial), 814 DEFINE_PROP_BIT("config-wce", VirtIOBlock, blk.config_wce, 0, true), 815 #ifdef __linux__ 816 DEFINE_PROP_BIT("scsi", VirtIOBlock, blk.scsi, 0, true), 817 #endif 818 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE 819 DEFINE_PROP_BIT("x-data-plane", VirtIOBlock, blk.data_plane, 0, false), 820 #endif 821 DEFINE_PROP_END_OF_LIST(), 822 }; 823 824 static void virtio_blk_class_init(ObjectClass *klass, void *data) 825 { 826 DeviceClass *dc = DEVICE_CLASS(klass); 827 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 828 829 dc->props = virtio_blk_properties; 830 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); 831 vdc->realize = virtio_blk_device_realize; 832 vdc->unrealize = virtio_blk_device_unrealize; 833 vdc->get_config = virtio_blk_update_config; 834 vdc->set_config = virtio_blk_set_config; 835 vdc->get_features = virtio_blk_get_features; 836 vdc->set_status = virtio_blk_set_status; 837 vdc->reset = virtio_blk_reset; 838 vdc->save = virtio_blk_save_device; 839 vdc->load = virtio_blk_load_device; 840 } 841 842 static const TypeInfo virtio_device_info = { 843 .name = TYPE_VIRTIO_BLK, 844 .parent = TYPE_VIRTIO_DEVICE, 845 .instance_size = sizeof(VirtIOBlock), 846 .instance_init = virtio_blk_instance_init, 847 .class_init = virtio_blk_class_init, 848 }; 849 850 static void virtio_register_types(void) 851 { 852 type_register_static(&virtio_device_info); 853 } 854 855 type_init(virtio_register_types) 856