1 /* 2 * QEMU Block backends 3 * 4 * Copyright (C) 2014-2016 Red Hat, Inc. 5 * 6 * Authors: 7 * Markus Armbruster <armbru@redhat.com>, 8 * 9 * This work is licensed under the terms of the GNU LGPL, version 2.1 10 * or later. See the COPYING.LIB file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "sysemu/block-backend.h" 15 #include "block/block_int.h" 16 #include "block/blockjob.h" 17 #include "block/coroutines.h" 18 #include "block/throttle-groups.h" 19 #include "hw/qdev-core.h" 20 #include "sysemu/blockdev.h" 21 #include "sysemu/runstate.h" 22 #include "sysemu/replay.h" 23 #include "qapi/error.h" 24 #include "qapi/qapi-events-block.h" 25 #include "qemu/id.h" 26 #include "qemu/main-loop.h" 27 #include "qemu/option.h" 28 #include "trace.h" 29 #include "migration/misc.h" 30 31 /* Number of coroutines to reserve per attached device model */ 32 #define COROUTINE_POOL_RESERVATION 64 33 34 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 35 36 typedef struct BlockBackendAioNotifier { 37 void (*attached_aio_context)(AioContext *new_context, void *opaque); 38 void (*detach_aio_context)(void *opaque); 39 void *opaque; 40 QLIST_ENTRY(BlockBackendAioNotifier) list; 41 } BlockBackendAioNotifier; 42 43 struct BlockBackend { 44 char *name; 45 int refcnt; 46 BdrvChild *root; 47 AioContext *ctx; /* access with atomic operations only */ 48 DriveInfo *legacy_dinfo; /* null unless created by drive_new() */ 49 QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */ 50 QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */ 51 BlockBackendPublic public; 52 53 DeviceState *dev; /* attached device model, if any */ 54 const BlockDevOps *dev_ops; 55 void *dev_opaque; 56 57 /* If the BDS tree is removed, some of its options are stored here (which 58 * can be used to restore those options in the new BDS on insert) */ 59 BlockBackendRootState root_state; 60 61 bool enable_write_cache; 62 63 /* I/O stats (display with "info blockstats"). */ 64 BlockAcctStats stats; 65 66 BlockdevOnError on_read_error, on_write_error; 67 bool iostatus_enabled; 68 BlockDeviceIoStatus iostatus; 69 70 uint64_t perm; 71 uint64_t shared_perm; 72 bool disable_perm; 73 74 bool allow_aio_context_change; 75 bool allow_write_beyond_eof; 76 77 /* Protected by BQL */ 78 NotifierList remove_bs_notifiers, insert_bs_notifiers; 79 QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers; 80 81 int quiesce_counter; /* atomic: written under BQL, read by other threads */ 82 QemuMutex queued_requests_lock; /* protects queued_requests */ 83 CoQueue queued_requests; 84 bool disable_request_queuing; /* atomic */ 85 86 VMChangeStateEntry *vmsh; 87 bool force_allow_inactivate; 88 89 /* Number of in-flight aio requests. BlockDriverState also counts 90 * in-flight requests but aio requests can exist even when blk->root is 91 * NULL, so we cannot rely on its counter for that case. 92 * Accessed with atomic ops. 93 */ 94 unsigned int in_flight; 95 }; 96 97 typedef struct BlockBackendAIOCB { 98 BlockAIOCB common; 99 BlockBackend *blk; 100 int ret; 101 } BlockBackendAIOCB; 102 103 static const AIOCBInfo block_backend_aiocb_info = { 104 .aiocb_size = sizeof(BlockBackendAIOCB), 105 }; 106 107 static void drive_info_del(DriveInfo *dinfo); 108 static BlockBackend *bdrv_first_blk(BlockDriverState *bs); 109 110 /* All BlockBackends. Protected by BQL. */ 111 static QTAILQ_HEAD(, BlockBackend) block_backends = 112 QTAILQ_HEAD_INITIALIZER(block_backends); 113 114 /* 115 * All BlockBackends referenced by the monitor and which are iterated through by 116 * blk_next(). Protected by BQL. 117 */ 118 static QTAILQ_HEAD(, BlockBackend) monitor_block_backends = 119 QTAILQ_HEAD_INITIALIZER(monitor_block_backends); 120 121 static int coroutine_mixed_fn GRAPH_RDLOCK 122 blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, 123 Error **errp); 124 125 static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format, 126 int *child_flags, QDict *child_options, 127 int parent_flags, QDict *parent_options) 128 { 129 /* We're not supposed to call this function for root nodes */ 130 abort(); 131 } 132 static void blk_root_drained_begin(BdrvChild *child); 133 static bool blk_root_drained_poll(BdrvChild *child); 134 static void blk_root_drained_end(BdrvChild *child); 135 136 static void blk_root_change_media(BdrvChild *child, bool load); 137 static void blk_root_resize(BdrvChild *child); 138 139 static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, 140 GHashTable *visited, Transaction *tran, 141 Error **errp); 142 143 static char *blk_root_get_parent_desc(BdrvChild *child) 144 { 145 BlockBackend *blk = child->opaque; 146 g_autofree char *dev_id = NULL; 147 148 if (blk->name) { 149 return g_strdup_printf("block device '%s'", blk->name); 150 } 151 152 dev_id = blk_get_attached_dev_id(blk); 153 if (*dev_id) { 154 return g_strdup_printf("block device '%s'", dev_id); 155 } else { 156 /* TODO Callback into the BB owner for something more detailed */ 157 return g_strdup("an unnamed block device"); 158 } 159 } 160 161 static const char *blk_root_get_name(BdrvChild *child) 162 { 163 return blk_name(child->opaque); 164 } 165 166 static void blk_vm_state_changed(void *opaque, bool running, RunState state) 167 { 168 Error *local_err = NULL; 169 BlockBackend *blk = opaque; 170 171 if (state == RUN_STATE_INMIGRATE) { 172 return; 173 } 174 175 qemu_del_vm_change_state_handler(blk->vmsh); 176 blk->vmsh = NULL; 177 blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err); 178 if (local_err) { 179 error_report_err(local_err); 180 } 181 } 182 183 /* 184 * Notifies the user of the BlockBackend that migration has completed. qdev 185 * devices can tighten their permissions in response (specifically revoke 186 * shared write permissions that we needed for storage migration). 187 * 188 * If an error is returned, the VM cannot be allowed to be resumed. 189 */ 190 static void GRAPH_RDLOCK blk_root_activate(BdrvChild *child, Error **errp) 191 { 192 BlockBackend *blk = child->opaque; 193 Error *local_err = NULL; 194 uint64_t saved_shared_perm; 195 196 if (!blk->disable_perm) { 197 return; 198 } 199 200 blk->disable_perm = false; 201 202 /* 203 * blk->shared_perm contains the permissions we want to share once 204 * migration is really completely done. For now, we need to share 205 * all; but we also need to retain blk->shared_perm, which is 206 * overwritten by a successful blk_set_perm() call. Save it and 207 * restore it below. 208 */ 209 saved_shared_perm = blk->shared_perm; 210 211 blk_set_perm_locked(blk, blk->perm, BLK_PERM_ALL, &local_err); 212 if (local_err) { 213 error_propagate(errp, local_err); 214 blk->disable_perm = true; 215 return; 216 } 217 blk->shared_perm = saved_shared_perm; 218 219 if (runstate_check(RUN_STATE_INMIGRATE)) { 220 /* Activation can happen when migration process is still active, for 221 * example when nbd_server_add is called during non-shared storage 222 * migration. Defer the shared_perm update to migration completion. */ 223 if (!blk->vmsh) { 224 blk->vmsh = qemu_add_vm_change_state_handler(blk_vm_state_changed, 225 blk); 226 } 227 return; 228 } 229 230 blk_set_perm_locked(blk, blk->perm, blk->shared_perm, &local_err); 231 if (local_err) { 232 error_propagate(errp, local_err); 233 blk->disable_perm = true; 234 return; 235 } 236 } 237 238 void blk_set_force_allow_inactivate(BlockBackend *blk) 239 { 240 GLOBAL_STATE_CODE(); 241 blk->force_allow_inactivate = true; 242 } 243 244 static bool blk_can_inactivate(BlockBackend *blk) 245 { 246 /* If it is a guest device, inactivate is ok. */ 247 if (blk->dev || blk_name(blk)[0]) { 248 return true; 249 } 250 251 /* Inactivating means no more writes to the image can be done, 252 * even if those writes would be changes invisible to the 253 * guest. For block job BBs that satisfy this, we can just allow 254 * it. This is the case for mirror job source, which is required 255 * by libvirt non-shared block migration. */ 256 if (!(blk->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED))) { 257 return true; 258 } 259 260 return blk->force_allow_inactivate; 261 } 262 263 static int GRAPH_RDLOCK blk_root_inactivate(BdrvChild *child) 264 { 265 BlockBackend *blk = child->opaque; 266 267 if (blk->disable_perm) { 268 return 0; 269 } 270 271 if (!blk_can_inactivate(blk)) { 272 return -EPERM; 273 } 274 275 blk->disable_perm = true; 276 if (blk->root) { 277 bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort); 278 } 279 280 return 0; 281 } 282 283 static void blk_root_attach(BdrvChild *child) 284 { 285 BlockBackend *blk = child->opaque; 286 BlockBackendAioNotifier *notifier; 287 288 trace_blk_root_attach(child, blk, child->bs); 289 290 QLIST_FOREACH(notifier, &blk->aio_notifiers, list) { 291 bdrv_add_aio_context_notifier(child->bs, 292 notifier->attached_aio_context, 293 notifier->detach_aio_context, 294 notifier->opaque); 295 } 296 } 297 298 static void blk_root_detach(BdrvChild *child) 299 { 300 BlockBackend *blk = child->opaque; 301 BlockBackendAioNotifier *notifier; 302 303 trace_blk_root_detach(child, blk, child->bs); 304 305 QLIST_FOREACH(notifier, &blk->aio_notifiers, list) { 306 bdrv_remove_aio_context_notifier(child->bs, 307 notifier->attached_aio_context, 308 notifier->detach_aio_context, 309 notifier->opaque); 310 } 311 } 312 313 static AioContext *blk_root_get_parent_aio_context(BdrvChild *c) 314 { 315 BlockBackend *blk = c->opaque; 316 IO_CODE(); 317 318 return blk_get_aio_context(blk); 319 } 320 321 static const BdrvChildClass child_root = { 322 .inherit_options = blk_root_inherit_options, 323 324 .change_media = blk_root_change_media, 325 .resize = blk_root_resize, 326 .get_name = blk_root_get_name, 327 .get_parent_desc = blk_root_get_parent_desc, 328 329 .drained_begin = blk_root_drained_begin, 330 .drained_poll = blk_root_drained_poll, 331 .drained_end = blk_root_drained_end, 332 333 .activate = blk_root_activate, 334 .inactivate = blk_root_inactivate, 335 336 .attach = blk_root_attach, 337 .detach = blk_root_detach, 338 339 .change_aio_ctx = blk_root_change_aio_ctx, 340 341 .get_parent_aio_context = blk_root_get_parent_aio_context, 342 }; 343 344 /* 345 * Create a new BlockBackend with a reference count of one. 346 * 347 * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions 348 * to request for a block driver node that is attached to this BlockBackend. 349 * @shared_perm is a bitmask which describes which permissions may be granted 350 * to other users of the attached node. 351 * Both sets of permissions can be changed later using blk_set_perm(). 352 * 353 * Return the new BlockBackend on success, null on failure. 354 */ 355 BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) 356 { 357 BlockBackend *blk; 358 359 GLOBAL_STATE_CODE(); 360 361 blk = g_new0(BlockBackend, 1); 362 blk->refcnt = 1; 363 blk->ctx = ctx; 364 blk->perm = perm; 365 blk->shared_perm = shared_perm; 366 blk_set_enable_write_cache(blk, true); 367 368 blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT; 369 blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; 370 371 block_acct_init(&blk->stats); 372 373 qemu_mutex_init(&blk->queued_requests_lock); 374 qemu_co_queue_init(&blk->queued_requests); 375 notifier_list_init(&blk->remove_bs_notifiers); 376 notifier_list_init(&blk->insert_bs_notifiers); 377 QLIST_INIT(&blk->aio_notifiers); 378 379 QTAILQ_INSERT_TAIL(&block_backends, blk, link); 380 return blk; 381 } 382 383 /* 384 * Create a new BlockBackend connected to an existing BlockDriverState. 385 * 386 * @perm is a bitmasks of BLK_PERM_* constants which describes the 387 * permissions to request for @bs that is attached to this 388 * BlockBackend. @shared_perm is a bitmask which describes which 389 * permissions may be granted to other users of the attached node. 390 * Both sets of permissions can be changed later using blk_set_perm(). 391 * 392 * Return the new BlockBackend on success, null on failure. 393 */ 394 BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, 395 uint64_t shared_perm, Error **errp) 396 { 397 BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm); 398 399 GLOBAL_STATE_CODE(); 400 401 if (blk_insert_bs(blk, bs, errp) < 0) { 402 blk_unref(blk); 403 return NULL; 404 } 405 return blk; 406 } 407 408 /* 409 * Creates a new BlockBackend, opens a new BlockDriverState, and connects both. 410 * By default, the new BlockBackend is in the main AioContext, but if the 411 * parameters connect it with any existing node in a different AioContext, it 412 * may end up there instead. 413 * 414 * Just as with bdrv_open(), after having called this function the reference to 415 * @options belongs to the block layer (even on failure). 416 * 417 * TODO: Remove @filename and @flags; it should be possible to specify a whole 418 * BDS tree just by specifying the @options QDict (or @reference, 419 * alternatively). At the time of adding this function, this is not possible, 420 * though, so callers of this function have to be able to specify @filename and 421 * @flags. 422 */ 423 BlockBackend *blk_new_open(const char *filename, const char *reference, 424 QDict *options, int flags, Error **errp) 425 { 426 BlockBackend *blk; 427 BlockDriverState *bs; 428 uint64_t perm = 0; 429 uint64_t shared = BLK_PERM_ALL; 430 431 GLOBAL_STATE_CODE(); 432 433 /* 434 * blk_new_open() is mainly used in .bdrv_create implementations and the 435 * tools where sharing isn't a major concern because the BDS stays private 436 * and the file is generally not supposed to be used by a second process, 437 * so we just request permission according to the flags. 438 * 439 * The exceptions are xen_disk and blockdev_init(); in these cases, the 440 * caller of blk_new_open() doesn't make use of the permissions, but they 441 * shouldn't hurt either. We can still share everything here because the 442 * guest devices will add their own blockers if they can't share. 443 */ 444 if ((flags & BDRV_O_NO_IO) == 0) { 445 perm |= BLK_PERM_CONSISTENT_READ; 446 if (flags & BDRV_O_RDWR) { 447 perm |= BLK_PERM_WRITE; 448 } 449 } 450 if (flags & BDRV_O_RESIZE) { 451 perm |= BLK_PERM_RESIZE; 452 } 453 if (flags & BDRV_O_NO_SHARE) { 454 shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED; 455 } 456 457 bs = bdrv_open(filename, reference, options, flags, errp); 458 if (!bs) { 459 return NULL; 460 } 461 462 /* bdrv_open() could have moved bs to a different AioContext */ 463 blk = blk_new(bdrv_get_aio_context(bs), perm, shared); 464 blk->perm = perm; 465 blk->shared_perm = shared; 466 467 blk_insert_bs(blk, bs, errp); 468 bdrv_unref(bs); 469 470 if (!blk->root) { 471 blk_unref(blk); 472 return NULL; 473 } 474 475 return blk; 476 } 477 478 static void blk_delete(BlockBackend *blk) 479 { 480 assert(!blk->refcnt); 481 assert(!blk->name); 482 assert(!blk->dev); 483 if (blk->public.throttle_group_member.throttle_state) { 484 blk_io_limits_disable(blk); 485 } 486 if (blk->root) { 487 blk_remove_bs(blk); 488 } 489 if (blk->vmsh) { 490 qemu_del_vm_change_state_handler(blk->vmsh); 491 blk->vmsh = NULL; 492 } 493 assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers)); 494 assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers)); 495 assert(QLIST_EMPTY(&blk->aio_notifiers)); 496 assert(qemu_co_queue_empty(&blk->queued_requests)); 497 qemu_mutex_destroy(&blk->queued_requests_lock); 498 QTAILQ_REMOVE(&block_backends, blk, link); 499 drive_info_del(blk->legacy_dinfo); 500 block_acct_cleanup(&blk->stats); 501 g_free(blk); 502 } 503 504 static void drive_info_del(DriveInfo *dinfo) 505 { 506 if (!dinfo) { 507 return; 508 } 509 qemu_opts_del(dinfo->opts); 510 g_free(dinfo); 511 } 512 513 int blk_get_refcnt(BlockBackend *blk) 514 { 515 GLOBAL_STATE_CODE(); 516 return blk ? blk->refcnt : 0; 517 } 518 519 /* 520 * Increment @blk's reference count. 521 * @blk must not be null. 522 */ 523 void blk_ref(BlockBackend *blk) 524 { 525 assert(blk->refcnt > 0); 526 GLOBAL_STATE_CODE(); 527 blk->refcnt++; 528 } 529 530 /* 531 * Decrement @blk's reference count. 532 * If this drops it to zero, destroy @blk. 533 * For convenience, do nothing if @blk is null. 534 */ 535 void blk_unref(BlockBackend *blk) 536 { 537 GLOBAL_STATE_CODE(); 538 if (blk) { 539 assert(blk->refcnt > 0); 540 if (blk->refcnt > 1) { 541 blk->refcnt--; 542 } else { 543 blk_drain(blk); 544 /* blk_drain() cannot resurrect blk, nobody held a reference */ 545 assert(blk->refcnt == 1); 546 blk->refcnt = 0; 547 blk_delete(blk); 548 } 549 } 550 } 551 552 /* 553 * Behaves similarly to blk_next() but iterates over all BlockBackends, even the 554 * ones which are hidden (i.e. are not referenced by the monitor). 555 */ 556 BlockBackend *blk_all_next(BlockBackend *blk) 557 { 558 GLOBAL_STATE_CODE(); 559 return blk ? QTAILQ_NEXT(blk, link) 560 : QTAILQ_FIRST(&block_backends); 561 } 562 563 void blk_remove_all_bs(void) 564 { 565 BlockBackend *blk = NULL; 566 567 GLOBAL_STATE_CODE(); 568 569 while ((blk = blk_all_next(blk)) != NULL) { 570 if (blk->root) { 571 blk_remove_bs(blk); 572 } 573 } 574 } 575 576 /* 577 * Return the monitor-owned BlockBackend after @blk. 578 * If @blk is null, return the first one. 579 * Else, return @blk's next sibling, which may be null. 580 * 581 * To iterate over all BlockBackends, do 582 * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { 583 * ... 584 * } 585 */ 586 BlockBackend *blk_next(BlockBackend *blk) 587 { 588 GLOBAL_STATE_CODE(); 589 return blk ? QTAILQ_NEXT(blk, monitor_link) 590 : QTAILQ_FIRST(&monitor_block_backends); 591 } 592 593 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by 594 * the monitor or attached to a BlockBackend */ 595 BlockDriverState *bdrv_next(BdrvNextIterator *it) 596 { 597 BlockDriverState *bs, *old_bs; 598 599 /* Must be called from the main loop */ 600 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 601 602 /* First, return all root nodes of BlockBackends. In order to avoid 603 * returning a BDS twice when multiple BBs refer to it, we only return it 604 * if the BB is the first one in the parent list of the BDS. */ 605 if (it->phase == BDRV_NEXT_BACKEND_ROOTS) { 606 BlockBackend *old_blk = it->blk; 607 608 old_bs = old_blk ? blk_bs(old_blk) : NULL; 609 610 do { 611 it->blk = blk_all_next(it->blk); 612 bs = it->blk ? blk_bs(it->blk) : NULL; 613 } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk)); 614 615 if (it->blk) { 616 blk_ref(it->blk); 617 } 618 blk_unref(old_blk); 619 620 if (bs) { 621 bdrv_ref(bs); 622 bdrv_unref(old_bs); 623 return bs; 624 } 625 it->phase = BDRV_NEXT_MONITOR_OWNED; 626 } else { 627 old_bs = it->bs; 628 } 629 630 /* Then return the monitor-owned BDSes without a BB attached. Ignore all 631 * BDSes that are attached to a BlockBackend here; they have been handled 632 * by the above block already */ 633 do { 634 it->bs = bdrv_next_monitor_owned(it->bs); 635 bs = it->bs; 636 } while (bs && bdrv_has_blk(bs)); 637 638 if (bs) { 639 bdrv_ref(bs); 640 } 641 bdrv_unref(old_bs); 642 643 return bs; 644 } 645 646 static void bdrv_next_reset(BdrvNextIterator *it) 647 { 648 *it = (BdrvNextIterator) { 649 .phase = BDRV_NEXT_BACKEND_ROOTS, 650 }; 651 } 652 653 BlockDriverState *bdrv_first(BdrvNextIterator *it) 654 { 655 GLOBAL_STATE_CODE(); 656 bdrv_next_reset(it); 657 return bdrv_next(it); 658 } 659 660 /* Must be called when aborting a bdrv_next() iteration before 661 * bdrv_next() returns NULL */ 662 void bdrv_next_cleanup(BdrvNextIterator *it) 663 { 664 /* Must be called from the main loop */ 665 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 666 667 if (it->phase == BDRV_NEXT_BACKEND_ROOTS) { 668 if (it->blk) { 669 bdrv_unref(blk_bs(it->blk)); 670 blk_unref(it->blk); 671 } 672 } else { 673 bdrv_unref(it->bs); 674 } 675 676 bdrv_next_reset(it); 677 } 678 679 /* 680 * Add a BlockBackend into the list of backends referenced by the monitor, with 681 * the given @name acting as the handle for the monitor. 682 * Strictly for use by blockdev.c. 683 * 684 * @name must not be null or empty. 685 * 686 * Returns true on success and false on failure. In the latter case, an Error 687 * object is returned through @errp. 688 */ 689 bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp) 690 { 691 assert(!blk->name); 692 assert(name && name[0]); 693 GLOBAL_STATE_CODE(); 694 695 if (!id_wellformed(name)) { 696 error_setg(errp, "Invalid device name"); 697 return false; 698 } 699 if (blk_by_name(name)) { 700 error_setg(errp, "Device with id '%s' already exists", name); 701 return false; 702 } 703 if (bdrv_find_node(name)) { 704 error_setg(errp, 705 "Device name '%s' conflicts with an existing node name", 706 name); 707 return false; 708 } 709 710 blk->name = g_strdup(name); 711 QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link); 712 return true; 713 } 714 715 /* 716 * Remove a BlockBackend from the list of backends referenced by the monitor. 717 * Strictly for use by blockdev.c. 718 */ 719 void monitor_remove_blk(BlockBackend *blk) 720 { 721 GLOBAL_STATE_CODE(); 722 723 if (!blk->name) { 724 return; 725 } 726 727 QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link); 728 g_free(blk->name); 729 blk->name = NULL; 730 } 731 732 /* 733 * Return @blk's name, a non-null string. 734 * Returns an empty string iff @blk is not referenced by the monitor. 735 */ 736 const char *blk_name(const BlockBackend *blk) 737 { 738 IO_CODE(); 739 return blk->name ?: ""; 740 } 741 742 /* 743 * Return the BlockBackend with name @name if it exists, else null. 744 * @name must not be null. 745 */ 746 BlockBackend *blk_by_name(const char *name) 747 { 748 BlockBackend *blk = NULL; 749 750 GLOBAL_STATE_CODE(); 751 assert(name); 752 while ((blk = blk_next(blk)) != NULL) { 753 if (!strcmp(name, blk->name)) { 754 return blk; 755 } 756 } 757 return NULL; 758 } 759 760 /* 761 * Return the BlockDriverState attached to @blk if any, else null. 762 */ 763 BlockDriverState *blk_bs(BlockBackend *blk) 764 { 765 IO_CODE(); 766 return blk->root ? blk->root->bs : NULL; 767 } 768 769 static BlockBackend * GRAPH_RDLOCK bdrv_first_blk(BlockDriverState *bs) 770 { 771 BdrvChild *child; 772 773 GLOBAL_STATE_CODE(); 774 assert_bdrv_graph_readable(); 775 776 QLIST_FOREACH(child, &bs->parents, next_parent) { 777 if (child->klass == &child_root) { 778 return child->opaque; 779 } 780 } 781 782 return NULL; 783 } 784 785 /* 786 * Returns true if @bs has an associated BlockBackend. 787 */ 788 bool bdrv_has_blk(BlockDriverState *bs) 789 { 790 GLOBAL_STATE_CODE(); 791 return bdrv_first_blk(bs) != NULL; 792 } 793 794 /* 795 * Returns true if @bs has only BlockBackends as parents. 796 */ 797 bool bdrv_is_root_node(BlockDriverState *bs) 798 { 799 BdrvChild *c; 800 801 GLOBAL_STATE_CODE(); 802 assert_bdrv_graph_readable(); 803 804 QLIST_FOREACH(c, &bs->parents, next_parent) { 805 if (c->klass != &child_root) { 806 return false; 807 } 808 } 809 810 return true; 811 } 812 813 /* 814 * Return @blk's DriveInfo if any, else null. 815 */ 816 DriveInfo *blk_legacy_dinfo(BlockBackend *blk) 817 { 818 GLOBAL_STATE_CODE(); 819 return blk->legacy_dinfo; 820 } 821 822 /* 823 * Set @blk's DriveInfo to @dinfo, and return it. 824 * @blk must not have a DriveInfo set already. 825 * No other BlockBackend may have the same DriveInfo set. 826 */ 827 DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo) 828 { 829 assert(!blk->legacy_dinfo); 830 GLOBAL_STATE_CODE(); 831 return blk->legacy_dinfo = dinfo; 832 } 833 834 /* 835 * Return the BlockBackend with DriveInfo @dinfo. 836 * It must exist. 837 */ 838 BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo) 839 { 840 BlockBackend *blk = NULL; 841 GLOBAL_STATE_CODE(); 842 843 while ((blk = blk_next(blk)) != NULL) { 844 if (blk->legacy_dinfo == dinfo) { 845 return blk; 846 } 847 } 848 abort(); 849 } 850 851 /* 852 * Returns a pointer to the publicly accessible fields of @blk. 853 */ 854 BlockBackendPublic *blk_get_public(BlockBackend *blk) 855 { 856 GLOBAL_STATE_CODE(); 857 return &blk->public; 858 } 859 860 /* 861 * Returns a BlockBackend given the associated @public fields. 862 */ 863 BlockBackend *blk_by_public(BlockBackendPublic *public) 864 { 865 GLOBAL_STATE_CODE(); 866 return container_of(public, BlockBackend, public); 867 } 868 869 /* 870 * Disassociates the currently associated BlockDriverState from @blk. 871 */ 872 void blk_remove_bs(BlockBackend *blk) 873 { 874 ThrottleGroupMember *tgm = &blk->public.throttle_group_member; 875 BdrvChild *root; 876 877 GLOBAL_STATE_CODE(); 878 879 notifier_list_notify(&blk->remove_bs_notifiers, blk); 880 if (tgm->throttle_state) { 881 BlockDriverState *bs = blk_bs(blk); 882 883 /* 884 * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for 885 * example, if a temporary filter node is removed by a blockjob. 886 */ 887 bdrv_ref(bs); 888 bdrv_drained_begin(bs); 889 throttle_group_detach_aio_context(tgm); 890 throttle_group_attach_aio_context(tgm, qemu_get_aio_context()); 891 bdrv_drained_end(bs); 892 bdrv_unref(bs); 893 } 894 895 blk_update_root_state(blk); 896 897 /* bdrv_root_unref_child() will cause blk->root to become stale and may 898 * switch to a completion coroutine later on. Let's drain all I/O here 899 * to avoid that and a potential QEMU crash. 900 */ 901 blk_drain(blk); 902 root = blk->root; 903 blk->root = NULL; 904 905 bdrv_graph_wrlock(); 906 bdrv_root_unref_child(root); 907 bdrv_graph_wrunlock(); 908 } 909 910 /* 911 * Associates a new BlockDriverState with @blk. 912 */ 913 int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) 914 { 915 ThrottleGroupMember *tgm = &blk->public.throttle_group_member; 916 917 GLOBAL_STATE_CODE(); 918 bdrv_ref(bs); 919 bdrv_graph_wrlock(); 920 blk->root = bdrv_root_attach_child(bs, "root", &child_root, 921 BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, 922 blk->perm, blk->shared_perm, 923 blk, errp); 924 bdrv_graph_wrunlock(); 925 if (blk->root == NULL) { 926 return -EPERM; 927 } 928 929 notifier_list_notify(&blk->insert_bs_notifiers, blk); 930 if (tgm->throttle_state) { 931 throttle_group_detach_aio_context(tgm); 932 throttle_group_attach_aio_context(tgm, bdrv_get_aio_context(bs)); 933 } 934 935 return 0; 936 } 937 938 /* 939 * Change BlockDriverState associated with @blk. 940 */ 941 int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp) 942 { 943 GLOBAL_STATE_CODE(); 944 return bdrv_replace_child_bs(blk->root, new_bs, errp); 945 } 946 947 /* 948 * Sets the permission bitmasks that the user of the BlockBackend needs. 949 */ 950 static int coroutine_mixed_fn GRAPH_RDLOCK 951 blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, 952 Error **errp) 953 { 954 int ret; 955 GLOBAL_STATE_CODE(); 956 957 if (blk->root && !blk->disable_perm) { 958 ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp); 959 if (ret < 0) { 960 return ret; 961 } 962 } 963 964 blk->perm = perm; 965 blk->shared_perm = shared_perm; 966 967 return 0; 968 } 969 970 int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, 971 Error **errp) 972 { 973 GLOBAL_STATE_CODE(); 974 GRAPH_RDLOCK_GUARD_MAINLOOP(); 975 976 return blk_set_perm_locked(blk, perm, shared_perm, errp); 977 } 978 979 void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) 980 { 981 GLOBAL_STATE_CODE(); 982 *perm = blk->perm; 983 *shared_perm = blk->shared_perm; 984 } 985 986 /* 987 * Attach device model @dev to @blk. 988 * Return 0 on success, -EBUSY when a device model is attached already. 989 */ 990 int blk_attach_dev(BlockBackend *blk, DeviceState *dev) 991 { 992 GLOBAL_STATE_CODE(); 993 if (blk->dev) { 994 return -EBUSY; 995 } 996 997 /* While migration is still incoming, we don't need to apply the 998 * permissions of guest device BlockBackends. We might still have a block 999 * job or NBD server writing to the image for storage migration. */ 1000 if (runstate_check(RUN_STATE_INMIGRATE)) { 1001 blk->disable_perm = true; 1002 } 1003 1004 blk_ref(blk); 1005 blk->dev = dev; 1006 blk_iostatus_reset(blk); 1007 1008 return 0; 1009 } 1010 1011 /* 1012 * Detach device model @dev from @blk. 1013 * @dev must be currently attached to @blk. 1014 */ 1015 void blk_detach_dev(BlockBackend *blk, DeviceState *dev) 1016 { 1017 assert(blk->dev == dev); 1018 GLOBAL_STATE_CODE(); 1019 blk->dev = NULL; 1020 blk->dev_ops = NULL; 1021 blk->dev_opaque = NULL; 1022 blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort); 1023 blk_unref(blk); 1024 } 1025 1026 /* 1027 * Return the device model attached to @blk if any, else null. 1028 */ 1029 DeviceState *blk_get_attached_dev(BlockBackend *blk) 1030 { 1031 GLOBAL_STATE_CODE(); 1032 return blk->dev; 1033 } 1034 1035 /* Return the qdev ID, or if no ID is assigned the QOM path, of the block 1036 * device attached to the BlockBackend. */ 1037 char *blk_get_attached_dev_id(BlockBackend *blk) 1038 { 1039 DeviceState *dev = blk->dev; 1040 IO_CODE(); 1041 1042 if (!dev) { 1043 return g_strdup(""); 1044 } else if (dev->id) { 1045 return g_strdup(dev->id); 1046 } 1047 1048 return object_get_canonical_path(OBJECT(dev)) ?: g_strdup(""); 1049 } 1050 1051 /* 1052 * Return the BlockBackend which has the device model @dev attached if it 1053 * exists, else null. 1054 * 1055 * @dev must not be null. 1056 */ 1057 BlockBackend *blk_by_dev(void *dev) 1058 { 1059 BlockBackend *blk = NULL; 1060 1061 GLOBAL_STATE_CODE(); 1062 1063 assert(dev != NULL); 1064 while ((blk = blk_all_next(blk)) != NULL) { 1065 if (blk->dev == dev) { 1066 return blk; 1067 } 1068 } 1069 return NULL; 1070 } 1071 1072 /* 1073 * Set @blk's device model callbacks to @ops. 1074 * @opaque is the opaque argument to pass to the callbacks. 1075 * This is for use by device models. 1076 */ 1077 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, 1078 void *opaque) 1079 { 1080 GLOBAL_STATE_CODE(); 1081 blk->dev_ops = ops; 1082 blk->dev_opaque = opaque; 1083 1084 /* Are we currently quiesced? Should we enforce this right now? */ 1085 if (qatomic_read(&blk->quiesce_counter) && ops && ops->drained_begin) { 1086 ops->drained_begin(opaque); 1087 } 1088 } 1089 1090 /* 1091 * Notify @blk's attached device model of media change. 1092 * 1093 * If @load is true, notify of media load. This action can fail, meaning that 1094 * the medium cannot be loaded. @errp is set then. 1095 * 1096 * If @load is false, notify of media eject. This can never fail. 1097 * 1098 * Also send DEVICE_TRAY_MOVED events as appropriate. 1099 */ 1100 void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp) 1101 { 1102 GLOBAL_STATE_CODE(); 1103 if (blk->dev_ops && blk->dev_ops->change_media_cb) { 1104 bool tray_was_open, tray_is_open; 1105 Error *local_err = NULL; 1106 1107 tray_was_open = blk_dev_is_tray_open(blk); 1108 blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err); 1109 if (local_err) { 1110 assert(load == true); 1111 error_propagate(errp, local_err); 1112 return; 1113 } 1114 tray_is_open = blk_dev_is_tray_open(blk); 1115 1116 if (tray_was_open != tray_is_open) { 1117 char *id = blk_get_attached_dev_id(blk); 1118 qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open); 1119 g_free(id); 1120 } 1121 } 1122 } 1123 1124 static void blk_root_change_media(BdrvChild *child, bool load) 1125 { 1126 blk_dev_change_media_cb(child->opaque, load, NULL); 1127 } 1128 1129 /* 1130 * Does @blk's attached device model have removable media? 1131 * %true if no device model is attached. 1132 */ 1133 bool blk_dev_has_removable_media(BlockBackend *blk) 1134 { 1135 GLOBAL_STATE_CODE(); 1136 return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb); 1137 } 1138 1139 /* 1140 * Does @blk's attached device model have a tray? 1141 */ 1142 bool blk_dev_has_tray(BlockBackend *blk) 1143 { 1144 IO_CODE(); 1145 return blk->dev_ops && blk->dev_ops->is_tray_open; 1146 } 1147 1148 /* 1149 * Notify @blk's attached device model of a media eject request. 1150 * If @force is true, the medium is about to be yanked out forcefully. 1151 */ 1152 void blk_dev_eject_request(BlockBackend *blk, bool force) 1153 { 1154 GLOBAL_STATE_CODE(); 1155 if (blk->dev_ops && blk->dev_ops->eject_request_cb) { 1156 blk->dev_ops->eject_request_cb(blk->dev_opaque, force); 1157 } 1158 } 1159 1160 /* 1161 * Does @blk's attached device model have a tray, and is it open? 1162 */ 1163 bool blk_dev_is_tray_open(BlockBackend *blk) 1164 { 1165 IO_CODE(); 1166 if (blk_dev_has_tray(blk)) { 1167 return blk->dev_ops->is_tray_open(blk->dev_opaque); 1168 } 1169 return false; 1170 } 1171 1172 /* 1173 * Does @blk's attached device model have the medium locked? 1174 * %false if the device model has no such lock. 1175 */ 1176 bool blk_dev_is_medium_locked(BlockBackend *blk) 1177 { 1178 GLOBAL_STATE_CODE(); 1179 if (blk->dev_ops && blk->dev_ops->is_medium_locked) { 1180 return blk->dev_ops->is_medium_locked(blk->dev_opaque); 1181 } 1182 return false; 1183 } 1184 1185 /* 1186 * Notify @blk's attached device model of a backend size change. 1187 */ 1188 static void blk_root_resize(BdrvChild *child) 1189 { 1190 BlockBackend *blk = child->opaque; 1191 1192 if (blk->dev_ops && blk->dev_ops->resize_cb) { 1193 blk->dev_ops->resize_cb(blk->dev_opaque); 1194 } 1195 } 1196 1197 void blk_iostatus_enable(BlockBackend *blk) 1198 { 1199 GLOBAL_STATE_CODE(); 1200 blk->iostatus_enabled = true; 1201 blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 1202 } 1203 1204 /* The I/O status is only enabled if the drive explicitly 1205 * enables it _and_ the VM is configured to stop on errors */ 1206 bool blk_iostatus_is_enabled(const BlockBackend *blk) 1207 { 1208 IO_CODE(); 1209 return (blk->iostatus_enabled && 1210 (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || 1211 blk->on_write_error == BLOCKDEV_ON_ERROR_STOP || 1212 blk->on_read_error == BLOCKDEV_ON_ERROR_STOP)); 1213 } 1214 1215 BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk) 1216 { 1217 GLOBAL_STATE_CODE(); 1218 return blk->iostatus; 1219 } 1220 1221 void blk_iostatus_disable(BlockBackend *blk) 1222 { 1223 GLOBAL_STATE_CODE(); 1224 blk->iostatus_enabled = false; 1225 } 1226 1227 void blk_iostatus_reset(BlockBackend *blk) 1228 { 1229 GLOBAL_STATE_CODE(); 1230 if (blk_iostatus_is_enabled(blk)) { 1231 blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 1232 } 1233 } 1234 1235 void blk_iostatus_set_err(BlockBackend *blk, int error) 1236 { 1237 IO_CODE(); 1238 assert(blk_iostatus_is_enabled(blk)); 1239 if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { 1240 blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : 1241 BLOCK_DEVICE_IO_STATUS_FAILED; 1242 } 1243 } 1244 1245 void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow) 1246 { 1247 IO_CODE(); 1248 blk->allow_write_beyond_eof = allow; 1249 } 1250 1251 void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow) 1252 { 1253 IO_CODE(); 1254 blk->allow_aio_context_change = allow; 1255 } 1256 1257 void blk_set_disable_request_queuing(BlockBackend *blk, bool disable) 1258 { 1259 IO_CODE(); 1260 qatomic_set(&blk->disable_request_queuing, disable); 1261 } 1262 1263 static int coroutine_fn GRAPH_RDLOCK 1264 blk_check_byte_request(BlockBackend *blk, int64_t offset, int64_t bytes) 1265 { 1266 int64_t len; 1267 1268 if (bytes < 0) { 1269 return -EIO; 1270 } 1271 1272 if (!blk_co_is_available(blk)) { 1273 return -ENOMEDIUM; 1274 } 1275 1276 if (offset < 0) { 1277 return -EIO; 1278 } 1279 1280 if (!blk->allow_write_beyond_eof) { 1281 len = bdrv_co_getlength(blk_bs(blk)); 1282 if (len < 0) { 1283 return len; 1284 } 1285 1286 if (offset > len || len - offset < bytes) { 1287 return -EIO; 1288 } 1289 } 1290 1291 return 0; 1292 } 1293 1294 /* Are we currently in a drained section? */ 1295 bool blk_in_drain(BlockBackend *blk) 1296 { 1297 GLOBAL_STATE_CODE(); /* change to IO_OR_GS_CODE(), if necessary */ 1298 return qatomic_read(&blk->quiesce_counter); 1299 } 1300 1301 /* To be called between exactly one pair of blk_inc/dec_in_flight() */ 1302 static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) 1303 { 1304 assert(blk->in_flight > 0); 1305 1306 if (qatomic_read(&blk->quiesce_counter) && 1307 !qatomic_read(&blk->disable_request_queuing)) { 1308 /* 1309 * Take lock before decrementing in flight counter so main loop thread 1310 * waits for us to enqueue ourselves before it can leave the drained 1311 * section. 1312 */ 1313 qemu_mutex_lock(&blk->queued_requests_lock); 1314 blk_dec_in_flight(blk); 1315 qemu_co_queue_wait(&blk->queued_requests, &blk->queued_requests_lock); 1316 blk_inc_in_flight(blk); 1317 qemu_mutex_unlock(&blk->queued_requests_lock); 1318 } 1319 } 1320 1321 /* To be called between exactly one pair of blk_inc/dec_in_flight() */ 1322 static int coroutine_fn 1323 blk_co_do_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes, 1324 QEMUIOVector *qiov, size_t qiov_offset, 1325 BdrvRequestFlags flags) 1326 { 1327 int ret; 1328 BlockDriverState *bs; 1329 IO_CODE(); 1330 1331 blk_wait_while_drained(blk); 1332 GRAPH_RDLOCK_GUARD(); 1333 1334 /* Call blk_bs() only after waiting, the graph may have changed */ 1335 bs = blk_bs(blk); 1336 trace_blk_co_preadv(blk, bs, offset, bytes, flags); 1337 1338 ret = blk_check_byte_request(blk, offset, bytes); 1339 if (ret < 0) { 1340 return ret; 1341 } 1342 1343 bdrv_inc_in_flight(bs); 1344 1345 /* throttling disk I/O */ 1346 if (blk->public.throttle_group_member.throttle_state) { 1347 throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member, 1348 bytes, THROTTLE_READ); 1349 } 1350 1351 ret = bdrv_co_preadv_part(blk->root, offset, bytes, qiov, qiov_offset, 1352 flags); 1353 bdrv_dec_in_flight(bs); 1354 return ret; 1355 } 1356 1357 int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, int64_t bytes, 1358 void *buf, BdrvRequestFlags flags) 1359 { 1360 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); 1361 IO_OR_GS_CODE(); 1362 1363 assert(bytes <= SIZE_MAX); 1364 1365 return blk_co_preadv(blk, offset, bytes, &qiov, flags); 1366 } 1367 1368 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, 1369 int64_t bytes, QEMUIOVector *qiov, 1370 BdrvRequestFlags flags) 1371 { 1372 int ret; 1373 IO_OR_GS_CODE(); 1374 1375 blk_inc_in_flight(blk); 1376 ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, 0, flags); 1377 blk_dec_in_flight(blk); 1378 1379 return ret; 1380 } 1381 1382 int coroutine_fn blk_co_preadv_part(BlockBackend *blk, int64_t offset, 1383 int64_t bytes, QEMUIOVector *qiov, 1384 size_t qiov_offset, BdrvRequestFlags flags) 1385 { 1386 int ret; 1387 IO_OR_GS_CODE(); 1388 1389 blk_inc_in_flight(blk); 1390 ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, qiov_offset, flags); 1391 blk_dec_in_flight(blk); 1392 1393 return ret; 1394 } 1395 1396 /* To be called between exactly one pair of blk_inc/dec_in_flight() */ 1397 static int coroutine_fn 1398 blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, 1399 QEMUIOVector *qiov, size_t qiov_offset, 1400 BdrvRequestFlags flags) 1401 { 1402 int ret; 1403 BlockDriverState *bs; 1404 IO_CODE(); 1405 1406 blk_wait_while_drained(blk); 1407 GRAPH_RDLOCK_GUARD(); 1408 1409 /* Call blk_bs() only after waiting, the graph may have changed */ 1410 bs = blk_bs(blk); 1411 trace_blk_co_pwritev(blk, bs, offset, bytes, flags); 1412 1413 ret = blk_check_byte_request(blk, offset, bytes); 1414 if (ret < 0) { 1415 return ret; 1416 } 1417 1418 bdrv_inc_in_flight(bs); 1419 /* throttling disk I/O */ 1420 if (blk->public.throttle_group_member.throttle_state) { 1421 throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member, 1422 bytes, THROTTLE_WRITE); 1423 } 1424 1425 if (!blk->enable_write_cache) { 1426 flags |= BDRV_REQ_FUA; 1427 } 1428 1429 ret = bdrv_co_pwritev_part(blk->root, offset, bytes, qiov, qiov_offset, 1430 flags); 1431 bdrv_dec_in_flight(bs); 1432 return ret; 1433 } 1434 1435 int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, 1436 int64_t bytes, 1437 QEMUIOVector *qiov, size_t qiov_offset, 1438 BdrvRequestFlags flags) 1439 { 1440 int ret; 1441 IO_OR_GS_CODE(); 1442 1443 blk_inc_in_flight(blk); 1444 ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); 1445 blk_dec_in_flight(blk); 1446 1447 return ret; 1448 } 1449 1450 int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, int64_t bytes, 1451 const void *buf, BdrvRequestFlags flags) 1452 { 1453 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); 1454 IO_OR_GS_CODE(); 1455 1456 assert(bytes <= SIZE_MAX); 1457 1458 return blk_co_pwritev(blk, offset, bytes, &qiov, flags); 1459 } 1460 1461 int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, 1462 int64_t bytes, QEMUIOVector *qiov, 1463 BdrvRequestFlags flags) 1464 { 1465 IO_OR_GS_CODE(); 1466 return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags); 1467 } 1468 1469 int coroutine_fn blk_co_block_status_above(BlockBackend *blk, 1470 BlockDriverState *base, 1471 int64_t offset, int64_t bytes, 1472 int64_t *pnum, int64_t *map, 1473 BlockDriverState **file) 1474 { 1475 IO_CODE(); 1476 GRAPH_RDLOCK_GUARD(); 1477 return bdrv_co_block_status_above(blk_bs(blk), base, offset, bytes, pnum, 1478 map, file); 1479 } 1480 1481 int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk, 1482 BlockDriverState *base, 1483 bool include_base, int64_t offset, 1484 int64_t bytes, int64_t *pnum) 1485 { 1486 IO_CODE(); 1487 GRAPH_RDLOCK_GUARD(); 1488 return bdrv_co_is_allocated_above(blk_bs(blk), base, include_base, offset, 1489 bytes, pnum); 1490 } 1491 1492 typedef struct BlkRwCo { 1493 BlockBackend *blk; 1494 int64_t offset; 1495 void *iobuf; 1496 int ret; 1497 BdrvRequestFlags flags; 1498 } BlkRwCo; 1499 1500 int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags) 1501 { 1502 GLOBAL_STATE_CODE(); 1503 return bdrv_make_zero(blk->root, flags); 1504 } 1505 1506 void blk_inc_in_flight(BlockBackend *blk) 1507 { 1508 IO_CODE(); 1509 qatomic_inc(&blk->in_flight); 1510 } 1511 1512 void blk_dec_in_flight(BlockBackend *blk) 1513 { 1514 IO_CODE(); 1515 qatomic_dec(&blk->in_flight); 1516 aio_wait_kick(); 1517 } 1518 1519 static void error_callback_bh(void *opaque) 1520 { 1521 struct BlockBackendAIOCB *acb = opaque; 1522 1523 blk_dec_in_flight(acb->blk); 1524 acb->common.cb(acb->common.opaque, acb->ret); 1525 qemu_aio_unref(acb); 1526 } 1527 1528 BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, 1529 BlockCompletionFunc *cb, 1530 void *opaque, int ret) 1531 { 1532 struct BlockBackendAIOCB *acb; 1533 IO_CODE(); 1534 1535 blk_inc_in_flight(blk); 1536 acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque); 1537 acb->blk = blk; 1538 acb->ret = ret; 1539 1540 replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(), 1541 error_callback_bh, acb); 1542 return &acb->common; 1543 } 1544 1545 typedef struct BlkAioEmAIOCB { 1546 BlockAIOCB common; 1547 BlkRwCo rwco; 1548 int64_t bytes; 1549 bool has_returned; 1550 } BlkAioEmAIOCB; 1551 1552 static const AIOCBInfo blk_aio_em_aiocb_info = { 1553 .aiocb_size = sizeof(BlkAioEmAIOCB), 1554 }; 1555 1556 static void blk_aio_complete(BlkAioEmAIOCB *acb) 1557 { 1558 if (acb->has_returned) { 1559 acb->common.cb(acb->common.opaque, acb->rwco.ret); 1560 blk_dec_in_flight(acb->rwco.blk); 1561 qemu_aio_unref(acb); 1562 } 1563 } 1564 1565 static void blk_aio_complete_bh(void *opaque) 1566 { 1567 BlkAioEmAIOCB *acb = opaque; 1568 assert(acb->has_returned); 1569 blk_aio_complete(acb); 1570 } 1571 1572 static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, 1573 int64_t bytes, 1574 void *iobuf, CoroutineEntry co_entry, 1575 BdrvRequestFlags flags, 1576 BlockCompletionFunc *cb, void *opaque) 1577 { 1578 BlkAioEmAIOCB *acb; 1579 Coroutine *co; 1580 1581 blk_inc_in_flight(blk); 1582 acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); 1583 acb->rwco = (BlkRwCo) { 1584 .blk = blk, 1585 .offset = offset, 1586 .iobuf = iobuf, 1587 .flags = flags, 1588 .ret = NOT_DONE, 1589 }; 1590 acb->bytes = bytes; 1591 acb->has_returned = false; 1592 1593 co = qemu_coroutine_create(co_entry, acb); 1594 aio_co_enter(qemu_get_current_aio_context(), co); 1595 1596 acb->has_returned = true; 1597 if (acb->rwco.ret != NOT_DONE) { 1598 replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(), 1599 blk_aio_complete_bh, acb); 1600 } 1601 1602 return &acb->common; 1603 } 1604 1605 static void coroutine_fn blk_aio_read_entry(void *opaque) 1606 { 1607 BlkAioEmAIOCB *acb = opaque; 1608 BlkRwCo *rwco = &acb->rwco; 1609 QEMUIOVector *qiov = rwco->iobuf; 1610 1611 assert(qiov->size == acb->bytes); 1612 rwco->ret = blk_co_do_preadv_part(rwco->blk, rwco->offset, acb->bytes, qiov, 1613 0, rwco->flags); 1614 blk_aio_complete(acb); 1615 } 1616 1617 static void coroutine_fn blk_aio_write_entry(void *opaque) 1618 { 1619 BlkAioEmAIOCB *acb = opaque; 1620 BlkRwCo *rwco = &acb->rwco; 1621 QEMUIOVector *qiov = rwco->iobuf; 1622 1623 assert(!qiov || qiov->size == acb->bytes); 1624 rwco->ret = blk_co_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, 1625 qiov, 0, rwco->flags); 1626 blk_aio_complete(acb); 1627 } 1628 1629 BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, 1630 int64_t bytes, BdrvRequestFlags flags, 1631 BlockCompletionFunc *cb, void *opaque) 1632 { 1633 IO_CODE(); 1634 return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_write_entry, 1635 flags | BDRV_REQ_ZERO_WRITE, cb, opaque); 1636 } 1637 1638 int64_t coroutine_fn blk_co_getlength(BlockBackend *blk) 1639 { 1640 IO_CODE(); 1641 GRAPH_RDLOCK_GUARD(); 1642 1643 if (!blk_co_is_available(blk)) { 1644 return -ENOMEDIUM; 1645 } 1646 1647 return bdrv_co_getlength(blk_bs(blk)); 1648 } 1649 1650 int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk) 1651 { 1652 BlockDriverState *bs = blk_bs(blk); 1653 1654 IO_CODE(); 1655 GRAPH_RDLOCK_GUARD(); 1656 1657 if (!bs) { 1658 return -ENOMEDIUM; 1659 } else { 1660 return bdrv_co_nb_sectors(bs); 1661 } 1662 } 1663 1664 /* 1665 * This wrapper is written by hand because this function is in the hot I/O path, 1666 * via blk_get_geometry. 1667 */ 1668 int64_t coroutine_mixed_fn blk_nb_sectors(BlockBackend *blk) 1669 { 1670 BlockDriverState *bs = blk_bs(blk); 1671 1672 IO_CODE(); 1673 1674 if (!bs) { 1675 return -ENOMEDIUM; 1676 } else { 1677 return bdrv_nb_sectors(bs); 1678 } 1679 } 1680 1681 /* return 0 as number of sectors if no device present or error */ 1682 void coroutine_fn blk_co_get_geometry(BlockBackend *blk, 1683 uint64_t *nb_sectors_ptr) 1684 { 1685 int64_t ret = blk_co_nb_sectors(blk); 1686 *nb_sectors_ptr = ret < 0 ? 0 : ret; 1687 } 1688 1689 /* 1690 * This wrapper is written by hand because this function is in the hot I/O path. 1691 */ 1692 void coroutine_mixed_fn blk_get_geometry(BlockBackend *blk, 1693 uint64_t *nb_sectors_ptr) 1694 { 1695 int64_t ret = blk_nb_sectors(blk); 1696 *nb_sectors_ptr = ret < 0 ? 0 : ret; 1697 } 1698 1699 BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, 1700 QEMUIOVector *qiov, BdrvRequestFlags flags, 1701 BlockCompletionFunc *cb, void *opaque) 1702 { 1703 IO_CODE(); 1704 assert((uint64_t)qiov->size <= INT64_MAX); 1705 return blk_aio_prwv(blk, offset, qiov->size, qiov, 1706 blk_aio_read_entry, flags, cb, opaque); 1707 } 1708 1709 BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, 1710 QEMUIOVector *qiov, BdrvRequestFlags flags, 1711 BlockCompletionFunc *cb, void *opaque) 1712 { 1713 IO_CODE(); 1714 assert((uint64_t)qiov->size <= INT64_MAX); 1715 return blk_aio_prwv(blk, offset, qiov->size, qiov, 1716 blk_aio_write_entry, flags, cb, opaque); 1717 } 1718 1719 void blk_aio_cancel(BlockAIOCB *acb) 1720 { 1721 GLOBAL_STATE_CODE(); 1722 bdrv_aio_cancel(acb); 1723 } 1724 1725 void blk_aio_cancel_async(BlockAIOCB *acb) 1726 { 1727 IO_CODE(); 1728 bdrv_aio_cancel_async(acb); 1729 } 1730 1731 /* To be called between exactly one pair of blk_inc/dec_in_flight() */ 1732 static int coroutine_fn 1733 blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) 1734 { 1735 IO_CODE(); 1736 1737 blk_wait_while_drained(blk); 1738 GRAPH_RDLOCK_GUARD(); 1739 1740 if (!blk_co_is_available(blk)) { 1741 return -ENOMEDIUM; 1742 } 1743 1744 return bdrv_co_ioctl(blk_bs(blk), req, buf); 1745 } 1746 1747 int coroutine_fn blk_co_ioctl(BlockBackend *blk, unsigned long int req, 1748 void *buf) 1749 { 1750 int ret; 1751 IO_OR_GS_CODE(); 1752 1753 blk_inc_in_flight(blk); 1754 ret = blk_co_do_ioctl(blk, req, buf); 1755 blk_dec_in_flight(blk); 1756 1757 return ret; 1758 } 1759 1760 static void coroutine_fn blk_aio_ioctl_entry(void *opaque) 1761 { 1762 BlkAioEmAIOCB *acb = opaque; 1763 BlkRwCo *rwco = &acb->rwco; 1764 1765 rwco->ret = blk_co_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf); 1766 1767 blk_aio_complete(acb); 1768 } 1769 1770 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, 1771 BlockCompletionFunc *cb, void *opaque) 1772 { 1773 IO_CODE(); 1774 return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); 1775 } 1776 1777 /* To be called between exactly one pair of blk_inc/dec_in_flight() */ 1778 static int coroutine_fn 1779 blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes) 1780 { 1781 int ret; 1782 IO_CODE(); 1783 1784 blk_wait_while_drained(blk); 1785 GRAPH_RDLOCK_GUARD(); 1786 1787 ret = blk_check_byte_request(blk, offset, bytes); 1788 if (ret < 0) { 1789 return ret; 1790 } 1791 1792 return bdrv_co_pdiscard(blk->root, offset, bytes); 1793 } 1794 1795 static void coroutine_fn blk_aio_pdiscard_entry(void *opaque) 1796 { 1797 BlkAioEmAIOCB *acb = opaque; 1798 BlkRwCo *rwco = &acb->rwco; 1799 1800 rwco->ret = blk_co_do_pdiscard(rwco->blk, rwco->offset, acb->bytes); 1801 blk_aio_complete(acb); 1802 } 1803 1804 BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, 1805 int64_t offset, int64_t bytes, 1806 BlockCompletionFunc *cb, void *opaque) 1807 { 1808 IO_CODE(); 1809 return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, 1810 cb, opaque); 1811 } 1812 1813 int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, 1814 int64_t bytes) 1815 { 1816 int ret; 1817 IO_OR_GS_CODE(); 1818 1819 blk_inc_in_flight(blk); 1820 ret = blk_co_do_pdiscard(blk, offset, bytes); 1821 blk_dec_in_flight(blk); 1822 1823 return ret; 1824 } 1825 1826 /* To be called between exactly one pair of blk_inc/dec_in_flight() */ 1827 static int coroutine_fn blk_co_do_flush(BlockBackend *blk) 1828 { 1829 IO_CODE(); 1830 blk_wait_while_drained(blk); 1831 GRAPH_RDLOCK_GUARD(); 1832 1833 if (!blk_co_is_available(blk)) { 1834 return -ENOMEDIUM; 1835 } 1836 1837 return bdrv_co_flush(blk_bs(blk)); 1838 } 1839 1840 static void coroutine_fn blk_aio_flush_entry(void *opaque) 1841 { 1842 BlkAioEmAIOCB *acb = opaque; 1843 BlkRwCo *rwco = &acb->rwco; 1844 1845 rwco->ret = blk_co_do_flush(rwco->blk); 1846 blk_aio_complete(acb); 1847 } 1848 1849 BlockAIOCB *blk_aio_flush(BlockBackend *blk, 1850 BlockCompletionFunc *cb, void *opaque) 1851 { 1852 IO_CODE(); 1853 return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); 1854 } 1855 1856 int coroutine_fn blk_co_flush(BlockBackend *blk) 1857 { 1858 int ret; 1859 IO_OR_GS_CODE(); 1860 1861 blk_inc_in_flight(blk); 1862 ret = blk_co_do_flush(blk); 1863 blk_dec_in_flight(blk); 1864 1865 return ret; 1866 } 1867 1868 static void coroutine_fn blk_aio_zone_report_entry(void *opaque) 1869 { 1870 BlkAioEmAIOCB *acb = opaque; 1871 BlkRwCo *rwco = &acb->rwco; 1872 1873 rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset, 1874 (unsigned int*)(uintptr_t)acb->bytes, 1875 rwco->iobuf); 1876 blk_aio_complete(acb); 1877 } 1878 1879 BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset, 1880 unsigned int *nr_zones, 1881 BlockZoneDescriptor *zones, 1882 BlockCompletionFunc *cb, void *opaque) 1883 { 1884 BlkAioEmAIOCB *acb; 1885 Coroutine *co; 1886 IO_CODE(); 1887 1888 blk_inc_in_flight(blk); 1889 acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); 1890 acb->rwco = (BlkRwCo) { 1891 .blk = blk, 1892 .offset = offset, 1893 .iobuf = zones, 1894 .ret = NOT_DONE, 1895 }; 1896 acb->bytes = (int64_t)(uintptr_t)nr_zones, 1897 acb->has_returned = false; 1898 1899 co = qemu_coroutine_create(blk_aio_zone_report_entry, acb); 1900 aio_co_enter(qemu_get_current_aio_context(), co); 1901 1902 acb->has_returned = true; 1903 if (acb->rwco.ret != NOT_DONE) { 1904 replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(), 1905 blk_aio_complete_bh, acb); 1906 } 1907 1908 return &acb->common; 1909 } 1910 1911 static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque) 1912 { 1913 BlkAioEmAIOCB *acb = opaque; 1914 BlkRwCo *rwco = &acb->rwco; 1915 1916 rwco->ret = blk_co_zone_mgmt(rwco->blk, 1917 (BlockZoneOp)(uintptr_t)rwco->iobuf, 1918 rwco->offset, acb->bytes); 1919 blk_aio_complete(acb); 1920 } 1921 1922 BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op, 1923 int64_t offset, int64_t len, 1924 BlockCompletionFunc *cb, void *opaque) { 1925 BlkAioEmAIOCB *acb; 1926 Coroutine *co; 1927 IO_CODE(); 1928 1929 blk_inc_in_flight(blk); 1930 acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); 1931 acb->rwco = (BlkRwCo) { 1932 .blk = blk, 1933 .offset = offset, 1934 .iobuf = (void *)(uintptr_t)op, 1935 .ret = NOT_DONE, 1936 }; 1937 acb->bytes = len; 1938 acb->has_returned = false; 1939 1940 co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb); 1941 aio_co_enter(qemu_get_current_aio_context(), co); 1942 1943 acb->has_returned = true; 1944 if (acb->rwco.ret != NOT_DONE) { 1945 replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(), 1946 blk_aio_complete_bh, acb); 1947 } 1948 1949 return &acb->common; 1950 } 1951 1952 static void coroutine_fn blk_aio_zone_append_entry(void *opaque) 1953 { 1954 BlkAioEmAIOCB *acb = opaque; 1955 BlkRwCo *rwco = &acb->rwco; 1956 1957 rwco->ret = blk_co_zone_append(rwco->blk, (int64_t *)(uintptr_t)acb->bytes, 1958 rwco->iobuf, rwco->flags); 1959 blk_aio_complete(acb); 1960 } 1961 1962 BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset, 1963 QEMUIOVector *qiov, BdrvRequestFlags flags, 1964 BlockCompletionFunc *cb, void *opaque) { 1965 BlkAioEmAIOCB *acb; 1966 Coroutine *co; 1967 IO_CODE(); 1968 1969 blk_inc_in_flight(blk); 1970 acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); 1971 acb->rwco = (BlkRwCo) { 1972 .blk = blk, 1973 .ret = NOT_DONE, 1974 .flags = flags, 1975 .iobuf = qiov, 1976 }; 1977 acb->bytes = (int64_t)(uintptr_t)offset; 1978 acb->has_returned = false; 1979 1980 co = qemu_coroutine_create(blk_aio_zone_append_entry, acb); 1981 aio_co_enter(qemu_get_current_aio_context(), co); 1982 acb->has_returned = true; 1983 if (acb->rwco.ret != NOT_DONE) { 1984 replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(), 1985 blk_aio_complete_bh, acb); 1986 } 1987 1988 return &acb->common; 1989 } 1990 1991 /* 1992 * Send a zone_report command. 1993 * offset is a byte offset from the start of the device. No alignment 1994 * required for offset. 1995 * nr_zones represents IN maximum and OUT actual. 1996 */ 1997 int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset, 1998 unsigned int *nr_zones, 1999 BlockZoneDescriptor *zones) 2000 { 2001 int ret; 2002 IO_CODE(); 2003 2004 blk_inc_in_flight(blk); /* increase before waiting */ 2005 blk_wait_while_drained(blk); 2006 GRAPH_RDLOCK_GUARD(); 2007 if (!blk_is_available(blk)) { 2008 blk_dec_in_flight(blk); 2009 return -ENOMEDIUM; 2010 } 2011 ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones); 2012 blk_dec_in_flight(blk); 2013 return ret; 2014 } 2015 2016 /* 2017 * Send a zone_management command. 2018 * op is the zone operation; 2019 * offset is the byte offset from the start of the zoned device; 2020 * len is the maximum number of bytes the command should operate on. It 2021 * should be aligned with the device zone size. 2022 */ 2023 int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op, 2024 int64_t offset, int64_t len) 2025 { 2026 int ret; 2027 IO_CODE(); 2028 2029 blk_inc_in_flight(blk); 2030 blk_wait_while_drained(blk); 2031 GRAPH_RDLOCK_GUARD(); 2032 2033 ret = blk_check_byte_request(blk, offset, len); 2034 if (ret < 0) { 2035 blk_dec_in_flight(blk); 2036 return ret; 2037 } 2038 2039 ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len); 2040 blk_dec_in_flight(blk); 2041 return ret; 2042 } 2043 2044 /* 2045 * Send a zone_append command. 2046 */ 2047 int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset, 2048 QEMUIOVector *qiov, BdrvRequestFlags flags) 2049 { 2050 int ret; 2051 IO_CODE(); 2052 2053 blk_inc_in_flight(blk); 2054 blk_wait_while_drained(blk); 2055 GRAPH_RDLOCK_GUARD(); 2056 if (!blk_is_available(blk)) { 2057 blk_dec_in_flight(blk); 2058 return -ENOMEDIUM; 2059 } 2060 2061 ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags); 2062 blk_dec_in_flight(blk); 2063 return ret; 2064 } 2065 2066 void blk_drain(BlockBackend *blk) 2067 { 2068 BlockDriverState *bs = blk_bs(blk); 2069 GLOBAL_STATE_CODE(); 2070 2071 if (bs) { 2072 bdrv_ref(bs); 2073 bdrv_drained_begin(bs); 2074 } 2075 2076 /* We may have -ENOMEDIUM completions in flight */ 2077 AIO_WAIT_WHILE(blk_get_aio_context(blk), 2078 qatomic_read(&blk->in_flight) > 0); 2079 2080 if (bs) { 2081 bdrv_drained_end(bs); 2082 bdrv_unref(bs); 2083 } 2084 } 2085 2086 void blk_drain_all(void) 2087 { 2088 BlockBackend *blk = NULL; 2089 2090 GLOBAL_STATE_CODE(); 2091 2092 bdrv_drain_all_begin(); 2093 2094 while ((blk = blk_all_next(blk)) != NULL) { 2095 /* We may have -ENOMEDIUM completions in flight */ 2096 AIO_WAIT_WHILE_UNLOCKED(NULL, qatomic_read(&blk->in_flight) > 0); 2097 } 2098 2099 bdrv_drain_all_end(); 2100 } 2101 2102 void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, 2103 BlockdevOnError on_write_error) 2104 { 2105 GLOBAL_STATE_CODE(); 2106 blk->on_read_error = on_read_error; 2107 blk->on_write_error = on_write_error; 2108 } 2109 2110 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read) 2111 { 2112 IO_CODE(); 2113 return is_read ? blk->on_read_error : blk->on_write_error; 2114 } 2115 2116 BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, 2117 int error) 2118 { 2119 BlockdevOnError on_err = blk_get_on_error(blk, is_read); 2120 IO_CODE(); 2121 2122 switch (on_err) { 2123 case BLOCKDEV_ON_ERROR_ENOSPC: 2124 return (error == ENOSPC) ? 2125 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; 2126 case BLOCKDEV_ON_ERROR_STOP: 2127 return BLOCK_ERROR_ACTION_STOP; 2128 case BLOCKDEV_ON_ERROR_REPORT: 2129 return BLOCK_ERROR_ACTION_REPORT; 2130 case BLOCKDEV_ON_ERROR_IGNORE: 2131 return BLOCK_ERROR_ACTION_IGNORE; 2132 case BLOCKDEV_ON_ERROR_AUTO: 2133 default: 2134 abort(); 2135 } 2136 } 2137 2138 static void send_qmp_error_event(BlockBackend *blk, 2139 BlockErrorAction action, 2140 bool is_read, int error) 2141 { 2142 IoOperationType optype; 2143 BlockDriverState *bs = blk_bs(blk); 2144 2145 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; 2146 qapi_event_send_block_io_error(blk_name(blk), 2147 bs ? bdrv_get_node_name(bs) : NULL, optype, 2148 action, blk_iostatus_is_enabled(blk), 2149 error == ENOSPC, strerror(error)); 2150 } 2151 2152 /* This is done by device models because, while the block layer knows 2153 * about the error, it does not know whether an operation comes from 2154 * the device or the block layer (from a job, for example). 2155 */ 2156 void blk_error_action(BlockBackend *blk, BlockErrorAction action, 2157 bool is_read, int error) 2158 { 2159 assert(error >= 0); 2160 IO_CODE(); 2161 2162 if (action == BLOCK_ERROR_ACTION_STOP) { 2163 /* First set the iostatus, so that "info block" returns an iostatus 2164 * that matches the events raised so far (an additional error iostatus 2165 * is fine, but not a lost one). 2166 */ 2167 blk_iostatus_set_err(blk, error); 2168 2169 /* Then raise the request to stop the VM and the event. 2170 * qemu_system_vmstop_request_prepare has two effects. First, 2171 * it ensures that the STOP event always comes after the 2172 * BLOCK_IO_ERROR event. Second, it ensures that even if management 2173 * can observe the STOP event and do a "cont" before the STOP 2174 * event is issued, the VM will not stop. In this case, vm_start() 2175 * also ensures that the STOP/RESUME pair of events is emitted. 2176 */ 2177 qemu_system_vmstop_request_prepare(); 2178 send_qmp_error_event(blk, action, is_read, error); 2179 qemu_system_vmstop_request(RUN_STATE_IO_ERROR); 2180 } else { 2181 send_qmp_error_event(blk, action, is_read, error); 2182 } 2183 } 2184 2185 /* 2186 * Returns true if the BlockBackend can support taking write permissions 2187 * (because its root node is not read-only). 2188 */ 2189 bool blk_supports_write_perm(BlockBackend *blk) 2190 { 2191 BlockDriverState *bs = blk_bs(blk); 2192 GLOBAL_STATE_CODE(); 2193 2194 if (bs) { 2195 return !bdrv_is_read_only(bs); 2196 } else { 2197 return blk->root_state.open_flags & BDRV_O_RDWR; 2198 } 2199 } 2200 2201 /* 2202 * Returns true if the BlockBackend can be written to in its current 2203 * configuration (i.e. if write permission have been requested) 2204 */ 2205 bool blk_is_writable(BlockBackend *blk) 2206 { 2207 IO_CODE(); 2208 return blk->perm & BLK_PERM_WRITE; 2209 } 2210 2211 bool blk_is_sg(BlockBackend *blk) 2212 { 2213 BlockDriverState *bs = blk_bs(blk); 2214 GLOBAL_STATE_CODE(); 2215 2216 if (!bs) { 2217 return false; 2218 } 2219 2220 return bdrv_is_sg(bs); 2221 } 2222 2223 bool blk_enable_write_cache(BlockBackend *blk) 2224 { 2225 IO_CODE(); 2226 return blk->enable_write_cache; 2227 } 2228 2229 void blk_set_enable_write_cache(BlockBackend *blk, bool wce) 2230 { 2231 IO_CODE(); 2232 blk->enable_write_cache = wce; 2233 } 2234 2235 void blk_activate(BlockBackend *blk, Error **errp) 2236 { 2237 BlockDriverState *bs = blk_bs(blk); 2238 GLOBAL_STATE_CODE(); 2239 2240 if (!bs) { 2241 error_setg(errp, "Device '%s' has no medium", blk->name); 2242 return; 2243 } 2244 2245 /* 2246 * Migration code can call this function in coroutine context, so leave 2247 * coroutine context if necessary. 2248 */ 2249 if (qemu_in_coroutine()) { 2250 bdrv_co_activate(bs, errp); 2251 } else { 2252 GRAPH_RDLOCK_GUARD_MAINLOOP(); 2253 bdrv_activate(bs, errp); 2254 } 2255 } 2256 2257 bool coroutine_fn blk_co_is_inserted(BlockBackend *blk) 2258 { 2259 BlockDriverState *bs = blk_bs(blk); 2260 IO_CODE(); 2261 assert_bdrv_graph_readable(); 2262 2263 return bs && bdrv_co_is_inserted(bs); 2264 } 2265 2266 bool coroutine_fn blk_co_is_available(BlockBackend *blk) 2267 { 2268 IO_CODE(); 2269 return blk_co_is_inserted(blk) && !blk_dev_is_tray_open(blk); 2270 } 2271 2272 void coroutine_fn blk_co_lock_medium(BlockBackend *blk, bool locked) 2273 { 2274 BlockDriverState *bs = blk_bs(blk); 2275 IO_CODE(); 2276 GRAPH_RDLOCK_GUARD(); 2277 2278 if (bs) { 2279 bdrv_co_lock_medium(bs, locked); 2280 } 2281 } 2282 2283 void coroutine_fn blk_co_eject(BlockBackend *blk, bool eject_flag) 2284 { 2285 BlockDriverState *bs = blk_bs(blk); 2286 char *id; 2287 IO_CODE(); 2288 GRAPH_RDLOCK_GUARD(); 2289 2290 if (bs) { 2291 bdrv_co_eject(bs, eject_flag); 2292 } 2293 2294 /* Whether or not we ejected on the backend, 2295 * the frontend experienced a tray event. */ 2296 id = blk_get_attached_dev_id(blk); 2297 qapi_event_send_device_tray_moved(blk_name(blk), id, 2298 eject_flag); 2299 g_free(id); 2300 } 2301 2302 int blk_get_flags(BlockBackend *blk) 2303 { 2304 BlockDriverState *bs = blk_bs(blk); 2305 GLOBAL_STATE_CODE(); 2306 2307 if (bs) { 2308 return bdrv_get_flags(bs); 2309 } else { 2310 return blk->root_state.open_flags; 2311 } 2312 } 2313 2314 /* Returns the minimum request alignment, in bytes; guaranteed nonzero */ 2315 uint32_t blk_get_request_alignment(BlockBackend *blk) 2316 { 2317 BlockDriverState *bs = blk_bs(blk); 2318 IO_CODE(); 2319 return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE; 2320 } 2321 2322 /* Returns the maximum hardware transfer length, in bytes; guaranteed nonzero */ 2323 uint64_t blk_get_max_hw_transfer(BlockBackend *blk) 2324 { 2325 BlockDriverState *bs = blk_bs(blk); 2326 uint64_t max = INT_MAX; 2327 IO_CODE(); 2328 2329 if (bs) { 2330 max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer); 2331 max = MIN_NON_ZERO(max, bs->bl.max_transfer); 2332 } 2333 return ROUND_DOWN(max, blk_get_request_alignment(blk)); 2334 } 2335 2336 /* Returns the maximum transfer length, in bytes; guaranteed nonzero */ 2337 uint32_t blk_get_max_transfer(BlockBackend *blk) 2338 { 2339 BlockDriverState *bs = blk_bs(blk); 2340 uint32_t max = INT_MAX; 2341 IO_CODE(); 2342 2343 if (bs) { 2344 max = MIN_NON_ZERO(max, bs->bl.max_transfer); 2345 } 2346 return ROUND_DOWN(max, blk_get_request_alignment(blk)); 2347 } 2348 2349 int blk_get_max_hw_iov(BlockBackend *blk) 2350 { 2351 IO_CODE(); 2352 return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov, 2353 blk->root->bs->bl.max_iov); 2354 } 2355 2356 int blk_get_max_iov(BlockBackend *blk) 2357 { 2358 IO_CODE(); 2359 return blk->root->bs->bl.max_iov; 2360 } 2361 2362 void *blk_try_blockalign(BlockBackend *blk, size_t size) 2363 { 2364 IO_CODE(); 2365 return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size); 2366 } 2367 2368 void *blk_blockalign(BlockBackend *blk, size_t size) 2369 { 2370 IO_CODE(); 2371 return qemu_blockalign(blk ? blk_bs(blk) : NULL, size); 2372 } 2373 2374 bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp) 2375 { 2376 BlockDriverState *bs = blk_bs(blk); 2377 GLOBAL_STATE_CODE(); 2378 GRAPH_RDLOCK_GUARD_MAINLOOP(); 2379 2380 if (!bs) { 2381 return false; 2382 } 2383 2384 return bdrv_op_is_blocked(bs, op, errp); 2385 } 2386 2387 void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason) 2388 { 2389 BlockDriverState *bs = blk_bs(blk); 2390 GLOBAL_STATE_CODE(); 2391 2392 if (bs) { 2393 bdrv_op_unblock(bs, op, reason); 2394 } 2395 } 2396 2397 void blk_op_block_all(BlockBackend *blk, Error *reason) 2398 { 2399 BlockDriverState *bs = blk_bs(blk); 2400 GLOBAL_STATE_CODE(); 2401 2402 if (bs) { 2403 bdrv_op_block_all(bs, reason); 2404 } 2405 } 2406 2407 void blk_op_unblock_all(BlockBackend *blk, Error *reason) 2408 { 2409 BlockDriverState *bs = blk_bs(blk); 2410 GLOBAL_STATE_CODE(); 2411 2412 if (bs) { 2413 bdrv_op_unblock_all(bs, reason); 2414 } 2415 } 2416 2417 /** 2418 * Return BB's current AioContext. Note that this context may change 2419 * concurrently at any time, with one exception: If the BB has a root node 2420 * attached, its context will only change through bdrv_try_change_aio_context(), 2421 * which creates a drained section. Therefore, incrementing such a BB's 2422 * in-flight counter will prevent its context from changing. 2423 */ 2424 AioContext *blk_get_aio_context(BlockBackend *blk) 2425 { 2426 IO_CODE(); 2427 2428 if (!blk) { 2429 return qemu_get_aio_context(); 2430 } 2431 2432 return qatomic_read(&blk->ctx); 2433 } 2434 2435 int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, 2436 Error **errp) 2437 { 2438 bool old_allow_change; 2439 BlockDriverState *bs = blk_bs(blk); 2440 int ret; 2441 2442 GLOBAL_STATE_CODE(); 2443 2444 if (!bs) { 2445 qatomic_set(&blk->ctx, new_context); 2446 return 0; 2447 } 2448 2449 bdrv_ref(bs); 2450 2451 old_allow_change = blk->allow_aio_context_change; 2452 blk->allow_aio_context_change = true; 2453 2454 ret = bdrv_try_change_aio_context(bs, new_context, NULL, errp); 2455 2456 blk->allow_aio_context_change = old_allow_change; 2457 2458 bdrv_unref(bs); 2459 return ret; 2460 } 2461 2462 typedef struct BdrvStateBlkRootContext { 2463 AioContext *new_ctx; 2464 BlockBackend *blk; 2465 } BdrvStateBlkRootContext; 2466 2467 static void blk_root_set_aio_ctx_commit(void *opaque) 2468 { 2469 BdrvStateBlkRootContext *s = opaque; 2470 BlockBackend *blk = s->blk; 2471 AioContext *new_context = s->new_ctx; 2472 ThrottleGroupMember *tgm = &blk->public.throttle_group_member; 2473 2474 qatomic_set(&blk->ctx, new_context); 2475 if (tgm->throttle_state) { 2476 throttle_group_detach_aio_context(tgm); 2477 throttle_group_attach_aio_context(tgm, new_context); 2478 } 2479 } 2480 2481 static TransactionActionDrv set_blk_root_context = { 2482 .commit = blk_root_set_aio_ctx_commit, 2483 .clean = g_free, 2484 }; 2485 2486 static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, 2487 GHashTable *visited, Transaction *tran, 2488 Error **errp) 2489 { 2490 BlockBackend *blk = child->opaque; 2491 BdrvStateBlkRootContext *s; 2492 2493 if (!blk->allow_aio_context_change) { 2494 /* 2495 * Manually created BlockBackends (those with a name) that are not 2496 * attached to anything can change their AioContext without updating 2497 * their user; return an error for others. 2498 */ 2499 if (!blk->name || blk->dev) { 2500 /* TODO Add BB name/QOM path */ 2501 error_setg(errp, "Cannot change iothread of active block backend"); 2502 return false; 2503 } 2504 } 2505 2506 s = g_new(BdrvStateBlkRootContext, 1); 2507 *s = (BdrvStateBlkRootContext) { 2508 .new_ctx = ctx, 2509 .blk = blk, 2510 }; 2511 2512 tran_add(tran, &set_blk_root_context, s); 2513 return true; 2514 } 2515 2516 void blk_add_aio_context_notifier(BlockBackend *blk, 2517 void (*attached_aio_context)(AioContext *new_context, void *opaque), 2518 void (*detach_aio_context)(void *opaque), void *opaque) 2519 { 2520 BlockBackendAioNotifier *notifier; 2521 BlockDriverState *bs = blk_bs(blk); 2522 GLOBAL_STATE_CODE(); 2523 2524 notifier = g_new(BlockBackendAioNotifier, 1); 2525 notifier->attached_aio_context = attached_aio_context; 2526 notifier->detach_aio_context = detach_aio_context; 2527 notifier->opaque = opaque; 2528 QLIST_INSERT_HEAD(&blk->aio_notifiers, notifier, list); 2529 2530 if (bs) { 2531 bdrv_add_aio_context_notifier(bs, attached_aio_context, 2532 detach_aio_context, opaque); 2533 } 2534 } 2535 2536 void blk_remove_aio_context_notifier(BlockBackend *blk, 2537 void (*attached_aio_context)(AioContext *, 2538 void *), 2539 void (*detach_aio_context)(void *), 2540 void *opaque) 2541 { 2542 BlockBackendAioNotifier *notifier; 2543 BlockDriverState *bs = blk_bs(blk); 2544 2545 GLOBAL_STATE_CODE(); 2546 2547 if (bs) { 2548 bdrv_remove_aio_context_notifier(bs, attached_aio_context, 2549 detach_aio_context, opaque); 2550 } 2551 2552 QLIST_FOREACH(notifier, &blk->aio_notifiers, list) { 2553 if (notifier->attached_aio_context == attached_aio_context && 2554 notifier->detach_aio_context == detach_aio_context && 2555 notifier->opaque == opaque) { 2556 QLIST_REMOVE(notifier, list); 2557 g_free(notifier); 2558 return; 2559 } 2560 } 2561 2562 abort(); 2563 } 2564 2565 void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify) 2566 { 2567 GLOBAL_STATE_CODE(); 2568 notifier_list_add(&blk->remove_bs_notifiers, notify); 2569 } 2570 2571 void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify) 2572 { 2573 GLOBAL_STATE_CODE(); 2574 notifier_list_add(&blk->insert_bs_notifiers, notify); 2575 } 2576 2577 BlockAcctStats *blk_get_stats(BlockBackend *blk) 2578 { 2579 IO_CODE(); 2580 return &blk->stats; 2581 } 2582 2583 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, 2584 BlockCompletionFunc *cb, void *opaque) 2585 { 2586 IO_CODE(); 2587 return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque); 2588 } 2589 2590 int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, 2591 int64_t bytes, BdrvRequestFlags flags) 2592 { 2593 IO_OR_GS_CODE(); 2594 return blk_co_pwritev(blk, offset, bytes, NULL, 2595 flags | BDRV_REQ_ZERO_WRITE); 2596 } 2597 2598 int coroutine_fn blk_co_pwrite_compressed(BlockBackend *blk, int64_t offset, 2599 int64_t bytes, const void *buf) 2600 { 2601 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); 2602 IO_OR_GS_CODE(); 2603 return blk_co_pwritev_part(blk, offset, bytes, &qiov, 0, 2604 BDRV_REQ_WRITE_COMPRESSED); 2605 } 2606 2607 int coroutine_fn blk_co_truncate(BlockBackend *blk, int64_t offset, bool exact, 2608 PreallocMode prealloc, BdrvRequestFlags flags, 2609 Error **errp) 2610 { 2611 IO_OR_GS_CODE(); 2612 GRAPH_RDLOCK_GUARD(); 2613 if (!blk_co_is_available(blk)) { 2614 error_setg(errp, "No medium inserted"); 2615 return -ENOMEDIUM; 2616 } 2617 2618 return bdrv_co_truncate(blk->root, offset, exact, prealloc, flags, errp); 2619 } 2620 2621 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, 2622 int64_t pos, int size) 2623 { 2624 int ret; 2625 GLOBAL_STATE_CODE(); 2626 2627 if (!blk_is_available(blk)) { 2628 return -ENOMEDIUM; 2629 } 2630 2631 ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size); 2632 if (ret < 0) { 2633 return ret; 2634 } 2635 2636 if (ret == size && !blk->enable_write_cache) { 2637 ret = bdrv_flush(blk_bs(blk)); 2638 } 2639 2640 return ret < 0 ? ret : size; 2641 } 2642 2643 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size) 2644 { 2645 GLOBAL_STATE_CODE(); 2646 if (!blk_is_available(blk)) { 2647 return -ENOMEDIUM; 2648 } 2649 2650 return bdrv_load_vmstate(blk_bs(blk), buf, pos, size); 2651 } 2652 2653 int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz) 2654 { 2655 GLOBAL_STATE_CODE(); 2656 GRAPH_RDLOCK_GUARD_MAINLOOP(); 2657 2658 if (!blk_is_available(blk)) { 2659 return -ENOMEDIUM; 2660 } 2661 2662 return bdrv_probe_blocksizes(blk_bs(blk), bsz); 2663 } 2664 2665 int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo) 2666 { 2667 GLOBAL_STATE_CODE(); 2668 if (!blk_is_available(blk)) { 2669 return -ENOMEDIUM; 2670 } 2671 2672 return bdrv_probe_geometry(blk_bs(blk), geo); 2673 } 2674 2675 /* 2676 * Updates the BlockBackendRootState object with data from the currently 2677 * attached BlockDriverState. 2678 */ 2679 void blk_update_root_state(BlockBackend *blk) 2680 { 2681 GLOBAL_STATE_CODE(); 2682 assert(blk->root); 2683 2684 blk->root_state.open_flags = blk->root->bs->open_flags; 2685 blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes; 2686 } 2687 2688 /* 2689 * Returns the detect-zeroes setting to be used for bdrv_open() of a 2690 * BlockDriverState which is supposed to inherit the root state. 2691 */ 2692 bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) 2693 { 2694 GLOBAL_STATE_CODE(); 2695 return blk->root_state.detect_zeroes; 2696 } 2697 2698 /* 2699 * Returns the flags to be used for bdrv_open() of a BlockDriverState which is 2700 * supposed to inherit the root state. 2701 */ 2702 int blk_get_open_flags_from_root_state(BlockBackend *blk) 2703 { 2704 GLOBAL_STATE_CODE(); 2705 return blk->root_state.open_flags; 2706 } 2707 2708 BlockBackendRootState *blk_get_root_state(BlockBackend *blk) 2709 { 2710 GLOBAL_STATE_CODE(); 2711 return &blk->root_state; 2712 } 2713 2714 int blk_commit_all(void) 2715 { 2716 BlockBackend *blk = NULL; 2717 GLOBAL_STATE_CODE(); 2718 GRAPH_RDLOCK_GUARD_MAINLOOP(); 2719 2720 while ((blk = blk_all_next(blk)) != NULL) { 2721 BlockDriverState *unfiltered_bs = bdrv_skip_filters(blk_bs(blk)); 2722 2723 if (blk_is_inserted(blk) && bdrv_cow_child(unfiltered_bs)) { 2724 int ret; 2725 2726 ret = bdrv_commit(unfiltered_bs); 2727 if (ret < 0) { 2728 return ret; 2729 } 2730 } 2731 } 2732 return 0; 2733 } 2734 2735 2736 /* throttling disk I/O limits */ 2737 void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg) 2738 { 2739 GLOBAL_STATE_CODE(); 2740 throttle_group_config(&blk->public.throttle_group_member, cfg); 2741 } 2742 2743 void blk_io_limits_disable(BlockBackend *blk) 2744 { 2745 BlockDriverState *bs = blk_bs(blk); 2746 ThrottleGroupMember *tgm = &blk->public.throttle_group_member; 2747 assert(tgm->throttle_state); 2748 GLOBAL_STATE_CODE(); 2749 if (bs) { 2750 bdrv_ref(bs); 2751 bdrv_drained_begin(bs); 2752 } 2753 throttle_group_unregister_tgm(tgm); 2754 if (bs) { 2755 bdrv_drained_end(bs); 2756 bdrv_unref(bs); 2757 } 2758 } 2759 2760 /* should be called before blk_set_io_limits if a limit is set */ 2761 void blk_io_limits_enable(BlockBackend *blk, const char *group) 2762 { 2763 assert(!blk->public.throttle_group_member.throttle_state); 2764 GLOBAL_STATE_CODE(); 2765 throttle_group_register_tgm(&blk->public.throttle_group_member, 2766 group, blk_get_aio_context(blk)); 2767 } 2768 2769 void blk_io_limits_update_group(BlockBackend *blk, const char *group) 2770 { 2771 GLOBAL_STATE_CODE(); 2772 /* this BB is not part of any group */ 2773 if (!blk->public.throttle_group_member.throttle_state) { 2774 return; 2775 } 2776 2777 /* this BB is a part of the same group than the one we want */ 2778 if (!g_strcmp0(throttle_group_get_name(&blk->public.throttle_group_member), 2779 group)) { 2780 return; 2781 } 2782 2783 /* need to change the group this bs belong to */ 2784 blk_io_limits_disable(blk); 2785 blk_io_limits_enable(blk, group); 2786 } 2787 2788 static void blk_root_drained_begin(BdrvChild *child) 2789 { 2790 BlockBackend *blk = child->opaque; 2791 ThrottleGroupMember *tgm = &blk->public.throttle_group_member; 2792 2793 if (qatomic_fetch_inc(&blk->quiesce_counter) == 0) { 2794 if (blk->dev_ops && blk->dev_ops->drained_begin) { 2795 blk->dev_ops->drained_begin(blk->dev_opaque); 2796 } 2797 } 2798 2799 /* Note that blk->root may not be accessible here yet if we are just 2800 * attaching to a BlockDriverState that is drained. Use child instead. */ 2801 2802 if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) { 2803 throttle_group_restart_tgm(tgm); 2804 } 2805 } 2806 2807 static bool blk_root_drained_poll(BdrvChild *child) 2808 { 2809 BlockBackend *blk = child->opaque; 2810 bool busy = false; 2811 assert(qatomic_read(&blk->quiesce_counter)); 2812 2813 if (blk->dev_ops && blk->dev_ops->drained_poll) { 2814 busy = blk->dev_ops->drained_poll(blk->dev_opaque); 2815 } 2816 return busy || !!blk->in_flight; 2817 } 2818 2819 static void blk_root_drained_end(BdrvChild *child) 2820 { 2821 BlockBackend *blk = child->opaque; 2822 assert(qatomic_read(&blk->quiesce_counter)); 2823 2824 assert(blk->public.throttle_group_member.io_limits_disabled); 2825 qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled); 2826 2827 if (qatomic_fetch_dec(&blk->quiesce_counter) == 1) { 2828 if (blk->dev_ops && blk->dev_ops->drained_end) { 2829 blk->dev_ops->drained_end(blk->dev_opaque); 2830 } 2831 qemu_mutex_lock(&blk->queued_requests_lock); 2832 while (qemu_co_enter_next(&blk->queued_requests, 2833 &blk->queued_requests_lock)) { 2834 /* Resume all queued requests */ 2835 } 2836 qemu_mutex_unlock(&blk->queued_requests_lock); 2837 } 2838 } 2839 2840 bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp) 2841 { 2842 BlockDriverState *bs = blk_bs(blk); 2843 2844 GLOBAL_STATE_CODE(); 2845 2846 if (bs) { 2847 return bdrv_register_buf(bs, host, size, errp); 2848 } 2849 return true; 2850 } 2851 2852 void blk_unregister_buf(BlockBackend *blk, void *host, size_t size) 2853 { 2854 BlockDriverState *bs = blk_bs(blk); 2855 2856 GLOBAL_STATE_CODE(); 2857 2858 if (bs) { 2859 bdrv_unregister_buf(bs, host, size); 2860 } 2861 } 2862 2863 int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, 2864 BlockBackend *blk_out, int64_t off_out, 2865 int64_t bytes, BdrvRequestFlags read_flags, 2866 BdrvRequestFlags write_flags) 2867 { 2868 int r; 2869 IO_CODE(); 2870 GRAPH_RDLOCK_GUARD(); 2871 2872 r = blk_check_byte_request(blk_in, off_in, bytes); 2873 if (r) { 2874 return r; 2875 } 2876 r = blk_check_byte_request(blk_out, off_out, bytes); 2877 if (r) { 2878 return r; 2879 } 2880 2881 return bdrv_co_copy_range(blk_in->root, off_in, 2882 blk_out->root, off_out, 2883 bytes, read_flags, write_flags); 2884 } 2885 2886 const BdrvChild *blk_root(BlockBackend *blk) 2887 { 2888 GLOBAL_STATE_CODE(); 2889 return blk->root; 2890 } 2891 2892 int blk_make_empty(BlockBackend *blk, Error **errp) 2893 { 2894 GLOBAL_STATE_CODE(); 2895 GRAPH_RDLOCK_GUARD_MAINLOOP(); 2896 2897 if (!blk_is_available(blk)) { 2898 error_setg(errp, "No medium inserted"); 2899 return -ENOMEDIUM; 2900 } 2901 2902 return bdrv_make_empty(blk->root, errp); 2903 } 2904