1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2020 Virtuozzo International GmbH. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "block/trace.h" 28 #include "block/block_int.h" 29 #include "block/blockjob.h" 30 #include "block/dirty-bitmap.h" 31 #include "block/fuse.h" 32 #include "block/nbd.h" 33 #include "block/qdict.h" 34 #include "qemu/error-report.h" 35 #include "block/module_block.h" 36 #include "qemu/main-loop.h" 37 #include "qemu/module.h" 38 #include "qapi/error.h" 39 #include "qapi/qmp/qdict.h" 40 #include "qapi/qmp/qjson.h" 41 #include "qapi/qmp/qnull.h" 42 #include "qapi/qmp/qstring.h" 43 #include "qapi/qobject-output-visitor.h" 44 #include "qapi/qapi-visit-block-core.h" 45 #include "sysemu/block-backend.h" 46 #include "qemu/notify.h" 47 #include "qemu/option.h" 48 #include "qemu/coroutine.h" 49 #include "block/qapi.h" 50 #include "qemu/timer.h" 51 #include "qemu/cutils.h" 52 #include "qemu/id.h" 53 #include "qemu/range.h" 54 #include "qemu/rcu.h" 55 #include "block/coroutines.h" 56 57 #ifdef CONFIG_BSD 58 #include <sys/ioctl.h> 59 #include <sys/queue.h> 60 #if defined(HAVE_SYS_DISK_H) 61 #include <sys/disk.h> 62 #endif 63 #endif 64 65 #ifdef _WIN32 66 #include <windows.h> 67 #endif 68 69 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 70 71 /* Protected by BQL */ 72 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 73 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 74 75 /* Protected by BQL */ 76 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states = 77 QTAILQ_HEAD_INITIALIZER(all_bdrv_states); 78 79 /* Protected by BQL */ 80 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 81 QLIST_HEAD_INITIALIZER(bdrv_drivers); 82 83 static BlockDriverState *bdrv_open_inherit(const char *filename, 84 const char *reference, 85 QDict *options, int flags, 86 BlockDriverState *parent, 87 const BdrvChildClass *child_class, 88 BdrvChildRole child_role, 89 bool parse_filename, 90 Error **errp); 91 92 static bool bdrv_recurse_has_child(BlockDriverState *bs, 93 BlockDriverState *child); 94 95 static void GRAPH_WRLOCK 96 bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs); 97 98 static void GRAPH_WRLOCK 99 bdrv_remove_child(BdrvChild *child, Transaction *tran); 100 101 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, 102 BlockReopenQueue *queue, 103 Transaction *change_child_tran, Error **errp); 104 static void bdrv_reopen_commit(BDRVReopenState *reopen_state); 105 static void bdrv_reopen_abort(BDRVReopenState *reopen_state); 106 107 static bool bdrv_backing_overridden(BlockDriverState *bs); 108 109 static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, 110 GHashTable *visited, Transaction *tran, 111 Error **errp); 112 113 /* If non-zero, use only whitelisted block drivers */ 114 static int use_bdrv_whitelist; 115 116 #ifdef _WIN32 117 static int is_windows_drive_prefix(const char *filename) 118 { 119 return (((filename[0] >= 'a' && filename[0] <= 'z') || 120 (filename[0] >= 'A' && filename[0] <= 'Z')) && 121 filename[1] == ':'); 122 } 123 124 int is_windows_drive(const char *filename) 125 { 126 if (is_windows_drive_prefix(filename) && 127 filename[2] == '\0') 128 return 1; 129 if (strstart(filename, "\\\\.\\", NULL) || 130 strstart(filename, "//./", NULL)) 131 return 1; 132 return 0; 133 } 134 #endif 135 136 size_t bdrv_opt_mem_align(BlockDriverState *bs) 137 { 138 if (!bs || !bs->drv) { 139 /* page size or 4k (hdd sector size) should be on the safe side */ 140 return MAX(4096, qemu_real_host_page_size()); 141 } 142 IO_CODE(); 143 144 return bs->bl.opt_mem_alignment; 145 } 146 147 size_t bdrv_min_mem_align(BlockDriverState *bs) 148 { 149 if (!bs || !bs->drv) { 150 /* page size or 4k (hdd sector size) should be on the safe side */ 151 return MAX(4096, qemu_real_host_page_size()); 152 } 153 IO_CODE(); 154 155 return bs->bl.min_mem_alignment; 156 } 157 158 /* check if the path starts with "<protocol>:" */ 159 int path_has_protocol(const char *path) 160 { 161 const char *p; 162 163 #ifdef _WIN32 164 if (is_windows_drive(path) || 165 is_windows_drive_prefix(path)) { 166 return 0; 167 } 168 p = path + strcspn(path, ":/\\"); 169 #else 170 p = path + strcspn(path, ":/"); 171 #endif 172 173 return *p == ':'; 174 } 175 176 int path_is_absolute(const char *path) 177 { 178 #ifdef _WIN32 179 /* specific case for names like: "\\.\d:" */ 180 if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 181 return 1; 182 } 183 return (*path == '/' || *path == '\\'); 184 #else 185 return (*path == '/'); 186 #endif 187 } 188 189 /* if filename is absolute, just return its duplicate. Otherwise, build a 190 path to it by considering it is relative to base_path. URL are 191 supported. */ 192 char *path_combine(const char *base_path, const char *filename) 193 { 194 const char *protocol_stripped = NULL; 195 const char *p, *p1; 196 char *result; 197 int len; 198 199 if (path_is_absolute(filename)) { 200 return g_strdup(filename); 201 } 202 203 if (path_has_protocol(base_path)) { 204 protocol_stripped = strchr(base_path, ':'); 205 if (protocol_stripped) { 206 protocol_stripped++; 207 } 208 } 209 p = protocol_stripped ?: base_path; 210 211 p1 = strrchr(base_path, '/'); 212 #ifdef _WIN32 213 { 214 const char *p2; 215 p2 = strrchr(base_path, '\\'); 216 if (!p1 || p2 > p1) { 217 p1 = p2; 218 } 219 } 220 #endif 221 if (p1) { 222 p1++; 223 } else { 224 p1 = base_path; 225 } 226 if (p1 > p) { 227 p = p1; 228 } 229 len = p - base_path; 230 231 result = g_malloc(len + strlen(filename) + 1); 232 memcpy(result, base_path, len); 233 strcpy(result + len, filename); 234 235 return result; 236 } 237 238 /* 239 * Helper function for bdrv_parse_filename() implementations to remove optional 240 * protocol prefixes (especially "file:") from a filename and for putting the 241 * stripped filename into the options QDict if there is such a prefix. 242 */ 243 void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, 244 QDict *options) 245 { 246 if (strstart(filename, prefix, &filename)) { 247 /* Stripping the explicit protocol prefix may result in a protocol 248 * prefix being (wrongly) detected (if the filename contains a colon) */ 249 if (path_has_protocol(filename)) { 250 GString *fat_filename; 251 252 /* This means there is some colon before the first slash; therefore, 253 * this cannot be an absolute path */ 254 assert(!path_is_absolute(filename)); 255 256 /* And we can thus fix the protocol detection issue by prefixing it 257 * by "./" */ 258 fat_filename = g_string_new("./"); 259 g_string_append(fat_filename, filename); 260 261 assert(!path_has_protocol(fat_filename->str)); 262 263 qdict_put(options, "filename", 264 qstring_from_gstring(fat_filename)); 265 } else { 266 /* If no protocol prefix was detected, we can use the shortened 267 * filename as-is */ 268 qdict_put_str(options, "filename", filename); 269 } 270 } 271 } 272 273 274 /* Returns whether the image file is opened as read-only. Note that this can 275 * return false and writing to the image file is still not possible because the 276 * image is inactivated. */ 277 bool bdrv_is_read_only(BlockDriverState *bs) 278 { 279 IO_CODE(); 280 return !(bs->open_flags & BDRV_O_RDWR); 281 } 282 283 static int GRAPH_RDLOCK 284 bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, 285 bool ignore_allow_rdw, Error **errp) 286 { 287 IO_CODE(); 288 289 /* Do not set read_only if copy_on_read is enabled */ 290 if (bs->copy_on_read && read_only) { 291 error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled", 292 bdrv_get_device_or_node_name(bs)); 293 return -EINVAL; 294 } 295 296 /* Do not clear read_only if it is prohibited */ 297 if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) && 298 !ignore_allow_rdw) 299 { 300 error_setg(errp, "Node '%s' is read only", 301 bdrv_get_device_or_node_name(bs)); 302 return -EPERM; 303 } 304 305 return 0; 306 } 307 308 /* 309 * Called by a driver that can only provide a read-only image. 310 * 311 * Returns 0 if the node is already read-only or it could switch the node to 312 * read-only because BDRV_O_AUTO_RDONLY is set. 313 * 314 * Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set 315 * or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg 316 * is not NULL, it is used as the error message for the Error object. 317 */ 318 int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, 319 Error **errp) 320 { 321 int ret = 0; 322 IO_CODE(); 323 324 if (!(bs->open_flags & BDRV_O_RDWR)) { 325 return 0; 326 } 327 if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) { 328 goto fail; 329 } 330 331 ret = bdrv_can_set_read_only(bs, true, false, NULL); 332 if (ret < 0) { 333 goto fail; 334 } 335 336 bs->open_flags &= ~BDRV_O_RDWR; 337 338 return 0; 339 340 fail: 341 error_setg(errp, "%s", errmsg ?: "Image is read-only"); 342 return -EACCES; 343 } 344 345 /* 346 * If @backing is empty, this function returns NULL without setting 347 * @errp. In all other cases, NULL will only be returned with @errp 348 * set. 349 * 350 * Therefore, a return value of NULL without @errp set means that 351 * there is no backing file; if @errp is set, there is one but its 352 * absolute filename cannot be generated. 353 */ 354 char *bdrv_get_full_backing_filename_from_filename(const char *backed, 355 const char *backing, 356 Error **errp) 357 { 358 if (backing[0] == '\0') { 359 return NULL; 360 } else if (path_has_protocol(backing) || path_is_absolute(backing)) { 361 return g_strdup(backing); 362 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 363 error_setg(errp, "Cannot use relative backing file names for '%s'", 364 backed); 365 return NULL; 366 } else { 367 return path_combine(backed, backing); 368 } 369 } 370 371 /* 372 * If @filename is empty or NULL, this function returns NULL without 373 * setting @errp. In all other cases, NULL will only be returned with 374 * @errp set. 375 */ 376 static char * GRAPH_RDLOCK 377 bdrv_make_absolute_filename(BlockDriverState *relative_to, 378 const char *filename, Error **errp) 379 { 380 char *dir, *full_name; 381 382 if (!filename || filename[0] == '\0') { 383 return NULL; 384 } else if (path_has_protocol(filename) || path_is_absolute(filename)) { 385 return g_strdup(filename); 386 } 387 388 dir = bdrv_dirname(relative_to, errp); 389 if (!dir) { 390 return NULL; 391 } 392 393 full_name = g_strconcat(dir, filename, NULL); 394 g_free(dir); 395 return full_name; 396 } 397 398 char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp) 399 { 400 GLOBAL_STATE_CODE(); 401 return bdrv_make_absolute_filename(bs, bs->backing_file, errp); 402 } 403 404 void bdrv_register(BlockDriver *bdrv) 405 { 406 assert(bdrv->format_name); 407 GLOBAL_STATE_CODE(); 408 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 409 } 410 411 BlockDriverState *bdrv_new(void) 412 { 413 BlockDriverState *bs; 414 int i; 415 416 GLOBAL_STATE_CODE(); 417 418 bs = g_new0(BlockDriverState, 1); 419 QLIST_INIT(&bs->dirty_bitmaps); 420 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 421 QLIST_INIT(&bs->op_blockers[i]); 422 } 423 qemu_mutex_init(&bs->reqs_lock); 424 qemu_mutex_init(&bs->dirty_bitmap_mutex); 425 bs->refcnt = 1; 426 bs->aio_context = qemu_get_aio_context(); 427 428 qemu_co_queue_init(&bs->flush_queue); 429 430 qemu_co_mutex_init(&bs->bsc_modify_lock); 431 bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1); 432 433 for (i = 0; i < bdrv_drain_all_count; i++) { 434 bdrv_drained_begin(bs); 435 } 436 437 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list); 438 439 return bs; 440 } 441 442 static BlockDriver *bdrv_do_find_format(const char *format_name) 443 { 444 BlockDriver *drv1; 445 GLOBAL_STATE_CODE(); 446 447 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 448 if (!strcmp(drv1->format_name, format_name)) { 449 return drv1; 450 } 451 } 452 453 return NULL; 454 } 455 456 BlockDriver *bdrv_find_format(const char *format_name) 457 { 458 BlockDriver *drv1; 459 int i; 460 461 GLOBAL_STATE_CODE(); 462 463 drv1 = bdrv_do_find_format(format_name); 464 if (drv1) { 465 return drv1; 466 } 467 468 /* The driver isn't registered, maybe we need to load a module */ 469 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { 470 if (!strcmp(block_driver_modules[i].format_name, format_name)) { 471 Error *local_err = NULL; 472 int rv = block_module_load(block_driver_modules[i].library_name, 473 &local_err); 474 if (rv > 0) { 475 return bdrv_do_find_format(format_name); 476 } else if (rv < 0) { 477 error_report_err(local_err); 478 } 479 break; 480 } 481 } 482 return NULL; 483 } 484 485 static int bdrv_format_is_whitelisted(const char *format_name, bool read_only) 486 { 487 static const char *whitelist_rw[] = { 488 CONFIG_BDRV_RW_WHITELIST 489 NULL 490 }; 491 static const char *whitelist_ro[] = { 492 CONFIG_BDRV_RO_WHITELIST 493 NULL 494 }; 495 const char **p; 496 497 if (!whitelist_rw[0] && !whitelist_ro[0]) { 498 return 1; /* no whitelist, anything goes */ 499 } 500 501 for (p = whitelist_rw; *p; p++) { 502 if (!strcmp(format_name, *p)) { 503 return 1; 504 } 505 } 506 if (read_only) { 507 for (p = whitelist_ro; *p; p++) { 508 if (!strcmp(format_name, *p)) { 509 return 1; 510 } 511 } 512 } 513 return 0; 514 } 515 516 int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 517 { 518 GLOBAL_STATE_CODE(); 519 return bdrv_format_is_whitelisted(drv->format_name, read_only); 520 } 521 522 bool bdrv_uses_whitelist(void) 523 { 524 return use_bdrv_whitelist; 525 } 526 527 typedef struct CreateCo { 528 BlockDriver *drv; 529 char *filename; 530 QemuOpts *opts; 531 int ret; 532 Error *err; 533 } CreateCo; 534 535 int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, 536 QemuOpts *opts, Error **errp) 537 { 538 ERRP_GUARD(); 539 int ret; 540 GLOBAL_STATE_CODE(); 541 542 if (!drv->bdrv_co_create_opts) { 543 error_setg(errp, "Driver '%s' does not support image creation", 544 drv->format_name); 545 return -ENOTSUP; 546 } 547 548 ret = drv->bdrv_co_create_opts(drv, filename, opts, errp); 549 if (ret < 0 && !*errp) { 550 error_setg_errno(errp, -ret, "Could not create image"); 551 } 552 553 return ret; 554 } 555 556 /** 557 * Helper function for bdrv_create_file_fallback(): Resize @blk to at 558 * least the given @minimum_size. 559 * 560 * On success, return @blk's actual length. 561 * Otherwise, return -errno. 562 */ 563 static int64_t coroutine_fn GRAPH_UNLOCKED 564 create_file_fallback_truncate(BlockBackend *blk, int64_t minimum_size, 565 Error **errp) 566 { 567 Error *local_err = NULL; 568 int64_t size; 569 int ret; 570 571 GLOBAL_STATE_CODE(); 572 573 ret = blk_co_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, 574 &local_err); 575 if (ret < 0 && ret != -ENOTSUP) { 576 error_propagate(errp, local_err); 577 return ret; 578 } 579 580 size = blk_co_getlength(blk); 581 if (size < 0) { 582 error_free(local_err); 583 error_setg_errno(errp, -size, 584 "Failed to inquire the new image file's length"); 585 return size; 586 } 587 588 if (size < minimum_size) { 589 /* Need to grow the image, but we failed to do that */ 590 error_propagate(errp, local_err); 591 return -ENOTSUP; 592 } 593 594 error_free(local_err); 595 local_err = NULL; 596 597 return size; 598 } 599 600 /** 601 * Helper function for bdrv_create_file_fallback(): Zero the first 602 * sector to remove any potentially pre-existing image header. 603 */ 604 static int coroutine_fn 605 create_file_fallback_zero_first_sector(BlockBackend *blk, 606 int64_t current_size, 607 Error **errp) 608 { 609 int64_t bytes_to_clear; 610 int ret; 611 612 GLOBAL_STATE_CODE(); 613 614 bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); 615 if (bytes_to_clear) { 616 ret = blk_co_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); 617 if (ret < 0) { 618 error_setg_errno(errp, -ret, 619 "Failed to clear the new image's first sector"); 620 return ret; 621 } 622 } 623 624 return 0; 625 } 626 627 /** 628 * Simple implementation of bdrv_co_create_opts for protocol drivers 629 * which only support creation via opening a file 630 * (usually existing raw storage device) 631 */ 632 int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, 633 const char *filename, 634 QemuOpts *opts, 635 Error **errp) 636 { 637 ERRP_GUARD(); 638 BlockBackend *blk; 639 QDict *options; 640 int64_t size = 0; 641 char *buf = NULL; 642 PreallocMode prealloc; 643 Error *local_err = NULL; 644 int ret; 645 646 GLOBAL_STATE_CODE(); 647 648 size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); 649 buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); 650 prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, 651 PREALLOC_MODE_OFF, &local_err); 652 g_free(buf); 653 if (local_err) { 654 error_propagate(errp, local_err); 655 return -EINVAL; 656 } 657 658 if (prealloc != PREALLOC_MODE_OFF) { 659 error_setg(errp, "Unsupported preallocation mode '%s'", 660 PreallocMode_str(prealloc)); 661 return -ENOTSUP; 662 } 663 664 options = qdict_new(); 665 qdict_put_str(options, "driver", drv->format_name); 666 667 blk = blk_co_new_open(filename, NULL, options, 668 BDRV_O_RDWR | BDRV_O_RESIZE, errp); 669 if (!blk) { 670 error_prepend(errp, "Protocol driver '%s' does not support creating " 671 "new images, so an existing image must be selected as " 672 "the target; however, opening the given target as an " 673 "existing image failed: ", 674 drv->format_name); 675 return -EINVAL; 676 } 677 678 size = create_file_fallback_truncate(blk, size, errp); 679 if (size < 0) { 680 ret = size; 681 goto out; 682 } 683 684 ret = create_file_fallback_zero_first_sector(blk, size, errp); 685 if (ret < 0) { 686 goto out; 687 } 688 689 ret = 0; 690 out: 691 blk_co_unref(blk); 692 return ret; 693 } 694 695 int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts, 696 Error **errp) 697 { 698 QemuOpts *protocol_opts; 699 BlockDriver *drv; 700 QDict *qdict; 701 int ret; 702 703 GLOBAL_STATE_CODE(); 704 705 drv = bdrv_find_protocol(filename, true, errp); 706 if (drv == NULL) { 707 return -ENOENT; 708 } 709 710 if (!drv->create_opts) { 711 error_setg(errp, "Driver '%s' does not support image creation", 712 drv->format_name); 713 return -ENOTSUP; 714 } 715 716 /* 717 * 'opts' contains a QemuOptsList with a combination of format and protocol 718 * default values. 719 * 720 * The format properly removes its options, but the default values remain 721 * in 'opts->list'. So if the protocol has options with the same name 722 * (e.g. rbd has 'cluster_size' as qcow2), it will see the default values 723 * of the format, since for overlapping options, the format wins. 724 * 725 * To avoid this issue, lets convert QemuOpts to QDict, in this way we take 726 * only the set options, and then convert it back to QemuOpts, using the 727 * create_opts of the protocol. So the new QemuOpts, will contain only the 728 * protocol defaults. 729 */ 730 qdict = qemu_opts_to_qdict(opts, NULL); 731 protocol_opts = qemu_opts_from_qdict(drv->create_opts, qdict, errp); 732 if (protocol_opts == NULL) { 733 ret = -EINVAL; 734 goto out; 735 } 736 737 ret = bdrv_co_create(drv, filename, protocol_opts, errp); 738 out: 739 qemu_opts_del(protocol_opts); 740 qobject_unref(qdict); 741 return ret; 742 } 743 744 int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp) 745 { 746 Error *local_err = NULL; 747 int ret; 748 749 IO_CODE(); 750 assert(bs != NULL); 751 assert_bdrv_graph_readable(); 752 753 if (!bs->drv) { 754 error_setg(errp, "Block node '%s' is not opened", bs->filename); 755 return -ENOMEDIUM; 756 } 757 758 if (!bs->drv->bdrv_co_delete_file) { 759 error_setg(errp, "Driver '%s' does not support image deletion", 760 bs->drv->format_name); 761 return -ENOTSUP; 762 } 763 764 ret = bs->drv->bdrv_co_delete_file(bs, &local_err); 765 if (ret < 0) { 766 error_propagate(errp, local_err); 767 } 768 769 return ret; 770 } 771 772 void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs) 773 { 774 Error *local_err = NULL; 775 int ret; 776 IO_CODE(); 777 778 if (!bs) { 779 return; 780 } 781 782 ret = bdrv_co_delete_file(bs, &local_err); 783 /* 784 * ENOTSUP will happen if the block driver doesn't support 785 * the 'bdrv_co_delete_file' interface. This is a predictable 786 * scenario and shouldn't be reported back to the user. 787 */ 788 if (ret == -ENOTSUP) { 789 error_free(local_err); 790 } else if (ret < 0) { 791 error_report_err(local_err); 792 } 793 } 794 795 /** 796 * Try to get @bs's logical and physical block size. 797 * On success, store them in @bsz struct and return 0. 798 * On failure return -errno. 799 * @bs must not be empty. 800 */ 801 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 802 { 803 BlockDriver *drv = bs->drv; 804 BlockDriverState *filtered = bdrv_filter_bs(bs); 805 GLOBAL_STATE_CODE(); 806 807 if (drv && drv->bdrv_probe_blocksizes) { 808 return drv->bdrv_probe_blocksizes(bs, bsz); 809 } else if (filtered) { 810 return bdrv_probe_blocksizes(filtered, bsz); 811 } 812 813 return -ENOTSUP; 814 } 815 816 /** 817 * Try to get @bs's geometry (cyls, heads, sectors). 818 * On success, store them in @geo struct and return 0. 819 * On failure return -errno. 820 * @bs must not be empty. 821 */ 822 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 823 { 824 BlockDriver *drv = bs->drv; 825 BlockDriverState *filtered; 826 827 GLOBAL_STATE_CODE(); 828 GRAPH_RDLOCK_GUARD_MAINLOOP(); 829 830 if (drv && drv->bdrv_probe_geometry) { 831 return drv->bdrv_probe_geometry(bs, geo); 832 } 833 834 filtered = bdrv_filter_bs(bs); 835 if (filtered) { 836 return bdrv_probe_geometry(filtered, geo); 837 } 838 839 return -ENOTSUP; 840 } 841 842 /* 843 * Create a uniquely-named empty temporary file. 844 * Return the actual file name used upon success, otherwise NULL. 845 * This string should be freed with g_free() when not needed any longer. 846 * 847 * Note: creating a temporary file for the caller to (re)open is 848 * inherently racy. Use g_file_open_tmp() instead whenever practical. 849 */ 850 char *create_tmp_file(Error **errp) 851 { 852 int fd; 853 const char *tmpdir; 854 g_autofree char *filename = NULL; 855 856 tmpdir = g_get_tmp_dir(); 857 #ifndef _WIN32 858 /* 859 * See commit 69bef79 ("block: use /var/tmp instead of /tmp for -snapshot") 860 * 861 * This function is used to create temporary disk images (like -snapshot), 862 * so the files can become very large. /tmp is often a tmpfs where as 863 * /var/tmp is usually on a disk, so more appropriate for disk images. 864 */ 865 if (!g_strcmp0(tmpdir, "/tmp")) { 866 tmpdir = "/var/tmp"; 867 } 868 #endif 869 870 filename = g_strdup_printf("%s/vl.XXXXXX", tmpdir); 871 fd = g_mkstemp(filename); 872 if (fd < 0) { 873 error_setg_errno(errp, errno, "Could not open temporary file '%s'", 874 filename); 875 return NULL; 876 } 877 close(fd); 878 879 return g_steal_pointer(&filename); 880 } 881 882 /* 883 * Detect host devices. By convention, /dev/cdrom[N] is always 884 * recognized as a host CDROM. 885 */ 886 static BlockDriver *find_hdev_driver(const char *filename) 887 { 888 int score_max = 0, score; 889 BlockDriver *drv = NULL, *d; 890 GLOBAL_STATE_CODE(); 891 892 QLIST_FOREACH(d, &bdrv_drivers, list) { 893 if (d->bdrv_probe_device) { 894 score = d->bdrv_probe_device(filename); 895 if (score > score_max) { 896 score_max = score; 897 drv = d; 898 } 899 } 900 } 901 902 return drv; 903 } 904 905 static BlockDriver *bdrv_do_find_protocol(const char *protocol) 906 { 907 BlockDriver *drv1; 908 GLOBAL_STATE_CODE(); 909 910 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 911 if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) { 912 return drv1; 913 } 914 } 915 916 return NULL; 917 } 918 919 BlockDriver *bdrv_find_protocol(const char *filename, 920 bool allow_protocol_prefix, 921 Error **errp) 922 { 923 BlockDriver *drv1; 924 char protocol[128]; 925 int len; 926 const char *p; 927 int i; 928 929 GLOBAL_STATE_CODE(); 930 931 /* 932 * XXX(hch): we really should not let host device detection 933 * override an explicit protocol specification, but moving this 934 * later breaks access to device names with colons in them. 935 * Thanks to the brain-dead persistent naming schemes on udev- 936 * based Linux systems those actually are quite common. 937 */ 938 drv1 = find_hdev_driver(filename); 939 if (drv1) { 940 return drv1; 941 } 942 943 if (!path_has_protocol(filename) || !allow_protocol_prefix) { 944 return &bdrv_file; 945 } 946 947 p = strchr(filename, ':'); 948 assert(p != NULL); 949 len = p - filename; 950 if (len > sizeof(protocol) - 1) 951 len = sizeof(protocol) - 1; 952 memcpy(protocol, filename, len); 953 protocol[len] = '\0'; 954 955 drv1 = bdrv_do_find_protocol(protocol); 956 if (drv1) { 957 return drv1; 958 } 959 960 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { 961 if (block_driver_modules[i].protocol_name && 962 !strcmp(block_driver_modules[i].protocol_name, protocol)) { 963 int rv = block_module_load(block_driver_modules[i].library_name, errp); 964 if (rv > 0) { 965 drv1 = bdrv_do_find_protocol(protocol); 966 } else if (rv < 0) { 967 return NULL; 968 } 969 break; 970 } 971 } 972 973 if (!drv1) { 974 error_setg(errp, "Unknown protocol '%s'", protocol); 975 } 976 return drv1; 977 } 978 979 /* 980 * Guess image format by probing its contents. 981 * This is not a good idea when your image is raw (CVE-2008-2004), but 982 * we do it anyway for backward compatibility. 983 * 984 * @buf contains the image's first @buf_size bytes. 985 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 986 * but can be smaller if the image file is smaller) 987 * @filename is its filename. 988 * 989 * For all block drivers, call the bdrv_probe() method to get its 990 * probing score. 991 * Return the first block driver with the highest probing score. 992 */ 993 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 994 const char *filename) 995 { 996 int score_max = 0, score; 997 BlockDriver *drv = NULL, *d; 998 IO_CODE(); 999 1000 QLIST_FOREACH(d, &bdrv_drivers, list) { 1001 if (d->bdrv_probe) { 1002 score = d->bdrv_probe(buf, buf_size, filename); 1003 if (score > score_max) { 1004 score_max = score; 1005 drv = d; 1006 } 1007 } 1008 } 1009 1010 return drv; 1011 } 1012 1013 static int find_image_format(BlockBackend *file, const char *filename, 1014 BlockDriver **pdrv, Error **errp) 1015 { 1016 BlockDriver *drv; 1017 uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 1018 int ret = 0; 1019 1020 GLOBAL_STATE_CODE(); 1021 1022 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 1023 if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) { 1024 *pdrv = &bdrv_raw; 1025 return ret; 1026 } 1027 1028 ret = blk_pread(file, 0, sizeof(buf), buf, 0); 1029 if (ret < 0) { 1030 error_setg_errno(errp, -ret, "Could not read image for determining its " 1031 "format"); 1032 *pdrv = NULL; 1033 return ret; 1034 } 1035 1036 drv = bdrv_probe_all(buf, sizeof(buf), filename); 1037 if (!drv) { 1038 error_setg(errp, "Could not determine image format: No compatible " 1039 "driver found"); 1040 *pdrv = NULL; 1041 return -ENOENT; 1042 } 1043 1044 *pdrv = drv; 1045 return 0; 1046 } 1047 1048 /** 1049 * Set the current 'total_sectors' value 1050 * Return 0 on success, -errno on error. 1051 */ 1052 int coroutine_fn bdrv_co_refresh_total_sectors(BlockDriverState *bs, 1053 int64_t hint) 1054 { 1055 BlockDriver *drv = bs->drv; 1056 IO_CODE(); 1057 assert_bdrv_graph_readable(); 1058 1059 if (!drv) { 1060 return -ENOMEDIUM; 1061 } 1062 1063 /* Do not attempt drv->bdrv_co_getlength() on scsi-generic devices */ 1064 if (bdrv_is_sg(bs)) 1065 return 0; 1066 1067 /* query actual device if possible, otherwise just trust the hint */ 1068 if (drv->bdrv_co_getlength) { 1069 int64_t length = drv->bdrv_co_getlength(bs); 1070 if (length < 0) { 1071 return length; 1072 } 1073 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 1074 } 1075 1076 bs->total_sectors = hint; 1077 1078 if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) { 1079 return -EFBIG; 1080 } 1081 1082 return 0; 1083 } 1084 1085 /** 1086 * Combines a QDict of new block driver @options with any missing options taken 1087 * from @old_options, so that leaving out an option defaults to its old value. 1088 */ 1089 static void bdrv_join_options(BlockDriverState *bs, QDict *options, 1090 QDict *old_options) 1091 { 1092 GLOBAL_STATE_CODE(); 1093 if (bs->drv && bs->drv->bdrv_join_options) { 1094 bs->drv->bdrv_join_options(options, old_options); 1095 } else { 1096 qdict_join(options, old_options, false); 1097 } 1098 } 1099 1100 static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts, 1101 int open_flags, 1102 Error **errp) 1103 { 1104 Error *local_err = NULL; 1105 char *value = qemu_opt_get_del(opts, "detect-zeroes"); 1106 BlockdevDetectZeroesOptions detect_zeroes = 1107 qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value, 1108 BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err); 1109 GLOBAL_STATE_CODE(); 1110 g_free(value); 1111 if (local_err) { 1112 error_propagate(errp, local_err); 1113 return detect_zeroes; 1114 } 1115 1116 if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && 1117 !(open_flags & BDRV_O_UNMAP)) 1118 { 1119 error_setg(errp, "setting detect-zeroes to unmap is not allowed " 1120 "without setting discard operation to unmap"); 1121 } 1122 1123 return detect_zeroes; 1124 } 1125 1126 /** 1127 * Set open flags for aio engine 1128 * 1129 * Return 0 on success, -1 if the engine specified is invalid 1130 */ 1131 int bdrv_parse_aio(const char *mode, int *flags) 1132 { 1133 if (!strcmp(mode, "threads")) { 1134 /* do nothing, default */ 1135 } else if (!strcmp(mode, "native")) { 1136 *flags |= BDRV_O_NATIVE_AIO; 1137 #ifdef CONFIG_LINUX_IO_URING 1138 } else if (!strcmp(mode, "io_uring")) { 1139 *flags |= BDRV_O_IO_URING; 1140 #endif 1141 } else { 1142 return -1; 1143 } 1144 1145 return 0; 1146 } 1147 1148 /** 1149 * Set open flags for a given discard mode 1150 * 1151 * Return 0 on success, -1 if the discard mode was invalid. 1152 */ 1153 int bdrv_parse_discard_flags(const char *mode, int *flags) 1154 { 1155 *flags &= ~BDRV_O_UNMAP; 1156 1157 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 1158 /* do nothing */ 1159 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 1160 *flags |= BDRV_O_UNMAP; 1161 } else { 1162 return -1; 1163 } 1164 1165 return 0; 1166 } 1167 1168 /** 1169 * Set open flags for a given cache mode 1170 * 1171 * Return 0 on success, -1 if the cache mode was invalid. 1172 */ 1173 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) 1174 { 1175 *flags &= ~BDRV_O_CACHE_MASK; 1176 1177 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 1178 *writethrough = false; 1179 *flags |= BDRV_O_NOCACHE; 1180 } else if (!strcmp(mode, "directsync")) { 1181 *writethrough = true; 1182 *flags |= BDRV_O_NOCACHE; 1183 } else if (!strcmp(mode, "writeback")) { 1184 *writethrough = false; 1185 } else if (!strcmp(mode, "unsafe")) { 1186 *writethrough = false; 1187 *flags |= BDRV_O_NO_FLUSH; 1188 } else if (!strcmp(mode, "writethrough")) { 1189 *writethrough = true; 1190 } else { 1191 return -1; 1192 } 1193 1194 return 0; 1195 } 1196 1197 static char *bdrv_child_get_parent_desc(BdrvChild *c) 1198 { 1199 BlockDriverState *parent = c->opaque; 1200 return g_strdup_printf("node '%s'", bdrv_get_node_name(parent)); 1201 } 1202 1203 static void GRAPH_RDLOCK bdrv_child_cb_drained_begin(BdrvChild *child) 1204 { 1205 BlockDriverState *bs = child->opaque; 1206 bdrv_do_drained_begin_quiesce(bs, NULL); 1207 } 1208 1209 static bool GRAPH_RDLOCK bdrv_child_cb_drained_poll(BdrvChild *child) 1210 { 1211 BlockDriverState *bs = child->opaque; 1212 return bdrv_drain_poll(bs, NULL, false); 1213 } 1214 1215 static void GRAPH_RDLOCK bdrv_child_cb_drained_end(BdrvChild *child) 1216 { 1217 BlockDriverState *bs = child->opaque; 1218 bdrv_drained_end(bs); 1219 } 1220 1221 static int bdrv_child_cb_inactivate(BdrvChild *child) 1222 { 1223 BlockDriverState *bs = child->opaque; 1224 GLOBAL_STATE_CODE(); 1225 assert(bs->open_flags & BDRV_O_INACTIVE); 1226 return 0; 1227 } 1228 1229 static bool bdrv_child_cb_change_aio_ctx(BdrvChild *child, AioContext *ctx, 1230 GHashTable *visited, Transaction *tran, 1231 Error **errp) 1232 { 1233 BlockDriverState *bs = child->opaque; 1234 return bdrv_change_aio_context(bs, ctx, visited, tran, errp); 1235 } 1236 1237 /* 1238 * Returns the options and flags that a temporary snapshot should get, based on 1239 * the originally requested flags (the originally requested image will have 1240 * flags like a backing file) 1241 */ 1242 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, 1243 int parent_flags, QDict *parent_options) 1244 { 1245 GLOBAL_STATE_CODE(); 1246 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 1247 1248 /* For temporary files, unconditional cache=unsafe is fine */ 1249 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); 1250 qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); 1251 1252 /* Copy the read-only and discard options from the parent */ 1253 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); 1254 qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD); 1255 1256 /* aio=native doesn't work for cache.direct=off, so disable it for the 1257 * temporary snapshot */ 1258 *child_flags &= ~BDRV_O_NATIVE_AIO; 1259 } 1260 1261 static void GRAPH_WRLOCK bdrv_backing_attach(BdrvChild *c) 1262 { 1263 BlockDriverState *parent = c->opaque; 1264 BlockDriverState *backing_hd = c->bs; 1265 1266 GLOBAL_STATE_CODE(); 1267 assert(!parent->backing_blocker); 1268 error_setg(&parent->backing_blocker, 1269 "node is used as backing hd of '%s'", 1270 bdrv_get_device_or_node_name(parent)); 1271 1272 bdrv_refresh_filename(backing_hd); 1273 1274 parent->open_flags &= ~BDRV_O_NO_BACKING; 1275 1276 bdrv_op_block_all(backing_hd, parent->backing_blocker); 1277 /* Otherwise we won't be able to commit or stream */ 1278 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1279 parent->backing_blocker); 1280 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, 1281 parent->backing_blocker); 1282 /* 1283 * We do backup in 3 ways: 1284 * 1. drive backup 1285 * The target bs is new opened, and the source is top BDS 1286 * 2. blockdev backup 1287 * Both the source and the target are top BDSes. 1288 * 3. internal backup(used for block replication) 1289 * Both the source and the target are backing file 1290 * 1291 * In case 1 and 2, neither the source nor the target is the backing file. 1292 * In case 3, we will block the top BDS, so there is only one block job 1293 * for the top BDS and its backing chain. 1294 */ 1295 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, 1296 parent->backing_blocker); 1297 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, 1298 parent->backing_blocker); 1299 } 1300 1301 static void bdrv_backing_detach(BdrvChild *c) 1302 { 1303 BlockDriverState *parent = c->opaque; 1304 1305 GLOBAL_STATE_CODE(); 1306 assert(parent->backing_blocker); 1307 bdrv_op_unblock_all(c->bs, parent->backing_blocker); 1308 error_free(parent->backing_blocker); 1309 parent->backing_blocker = NULL; 1310 } 1311 1312 static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, 1313 const char *filename, 1314 bool backing_mask_protocol, 1315 Error **errp) 1316 { 1317 BlockDriverState *parent = c->opaque; 1318 bool read_only = bdrv_is_read_only(parent); 1319 int ret; 1320 const char *format_name; 1321 GLOBAL_STATE_CODE(); 1322 1323 if (read_only) { 1324 ret = bdrv_reopen_set_read_only(parent, false, errp); 1325 if (ret < 0) { 1326 return ret; 1327 } 1328 } 1329 1330 if (base->drv) { 1331 /* 1332 * If the new base image doesn't have a format driver layer, which we 1333 * detect by the fact that @base is a protocol driver, we record 1334 * 'raw' as the format instead of putting the protocol name as the 1335 * backing format 1336 */ 1337 if (backing_mask_protocol && base->drv->protocol_name) { 1338 format_name = "raw"; 1339 } else { 1340 format_name = base->drv->format_name; 1341 } 1342 } else { 1343 format_name = ""; 1344 } 1345 1346 ret = bdrv_change_backing_file(parent, filename, format_name, false); 1347 if (ret < 0) { 1348 error_setg_errno(errp, -ret, "Could not update backing file link"); 1349 } 1350 1351 if (read_only) { 1352 bdrv_reopen_set_read_only(parent, true, NULL); 1353 } 1354 1355 return ret; 1356 } 1357 1358 /* 1359 * Returns the options and flags that a generic child of a BDS should 1360 * get, based on the given options and flags for the parent BDS. 1361 */ 1362 static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, 1363 int *child_flags, QDict *child_options, 1364 int parent_flags, QDict *parent_options) 1365 { 1366 int flags = parent_flags; 1367 GLOBAL_STATE_CODE(); 1368 1369 /* 1370 * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL. 1371 * Generally, the question to answer is: Should this child be 1372 * format-probed by default? 1373 */ 1374 1375 /* 1376 * Pure and non-filtered data children of non-format nodes should 1377 * be probed by default (even when the node itself has BDRV_O_PROTOCOL 1378 * set). This only affects a very limited set of drivers (namely 1379 * quorum and blkverify when this comment was written). 1380 * Force-clear BDRV_O_PROTOCOL then. 1381 */ 1382 if (!parent_is_format && 1383 (role & BDRV_CHILD_DATA) && 1384 !(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED))) 1385 { 1386 flags &= ~BDRV_O_PROTOCOL; 1387 } 1388 1389 /* 1390 * All children of format nodes (except for COW children) and all 1391 * metadata children in general should never be format-probed. 1392 * Force-set BDRV_O_PROTOCOL then. 1393 */ 1394 if ((parent_is_format && !(role & BDRV_CHILD_COW)) || 1395 (role & BDRV_CHILD_METADATA)) 1396 { 1397 flags |= BDRV_O_PROTOCOL; 1398 } 1399 1400 /* 1401 * If the cache mode isn't explicitly set, inherit direct and no-flush from 1402 * the parent. 1403 */ 1404 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); 1405 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); 1406 qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE); 1407 1408 if (role & BDRV_CHILD_COW) { 1409 /* backing files are opened read-only by default */ 1410 qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on"); 1411 qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off"); 1412 } else { 1413 /* Inherit the read-only option from the parent if it's not set */ 1414 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); 1415 qdict_copy_default(child_options, parent_options, 1416 BDRV_OPT_AUTO_READ_ONLY); 1417 } 1418 1419 /* 1420 * bdrv_co_pdiscard() respects unmap policy for the parent, so we 1421 * can default to enable it on lower layers regardless of the 1422 * parent option. 1423 */ 1424 qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap"); 1425 1426 /* Clear flags that only apply to the top layer */ 1427 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); 1428 1429 if (role & BDRV_CHILD_METADATA) { 1430 flags &= ~BDRV_O_NO_IO; 1431 } 1432 if (role & BDRV_CHILD_COW) { 1433 flags &= ~BDRV_O_TEMPORARY; 1434 } 1435 1436 *child_flags = flags; 1437 } 1438 1439 static void GRAPH_WRLOCK bdrv_child_cb_attach(BdrvChild *child) 1440 { 1441 BlockDriverState *bs = child->opaque; 1442 1443 assert_bdrv_graph_writable(); 1444 QLIST_INSERT_HEAD(&bs->children, child, next); 1445 if (bs->drv->is_filter || (child->role & BDRV_CHILD_FILTERED)) { 1446 /* 1447 * Here we handle filters and block/raw-format.c when it behave like 1448 * filter. They generally have a single PRIMARY child, which is also the 1449 * FILTERED child, and that they may have multiple more children, which 1450 * are neither PRIMARY nor FILTERED. And never we have a COW child here. 1451 * So bs->file will be the PRIMARY child, unless the PRIMARY child goes 1452 * into bs->backing on exceptional cases; and bs->backing will be 1453 * nothing else. 1454 */ 1455 assert(!(child->role & BDRV_CHILD_COW)); 1456 if (child->role & BDRV_CHILD_PRIMARY) { 1457 assert(child->role & BDRV_CHILD_FILTERED); 1458 assert(!bs->backing); 1459 assert(!bs->file); 1460 1461 if (bs->drv->filtered_child_is_backing) { 1462 bs->backing = child; 1463 } else { 1464 bs->file = child; 1465 } 1466 } else { 1467 assert(!(child->role & BDRV_CHILD_FILTERED)); 1468 } 1469 } else if (child->role & BDRV_CHILD_COW) { 1470 assert(bs->drv->supports_backing); 1471 assert(!(child->role & BDRV_CHILD_PRIMARY)); 1472 assert(!bs->backing); 1473 bs->backing = child; 1474 bdrv_backing_attach(child); 1475 } else if (child->role & BDRV_CHILD_PRIMARY) { 1476 assert(!bs->file); 1477 bs->file = child; 1478 } 1479 } 1480 1481 static void GRAPH_WRLOCK bdrv_child_cb_detach(BdrvChild *child) 1482 { 1483 BlockDriverState *bs = child->opaque; 1484 1485 if (child->role & BDRV_CHILD_COW) { 1486 bdrv_backing_detach(child); 1487 } 1488 1489 assert_bdrv_graph_writable(); 1490 QLIST_REMOVE(child, next); 1491 if (child == bs->backing) { 1492 assert(child != bs->file); 1493 bs->backing = NULL; 1494 } else if (child == bs->file) { 1495 bs->file = NULL; 1496 } 1497 } 1498 1499 static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, 1500 const char *filename, 1501 bool backing_mask_protocol, 1502 Error **errp) 1503 { 1504 if (c->role & BDRV_CHILD_COW) { 1505 return bdrv_backing_update_filename(c, base, filename, 1506 backing_mask_protocol, 1507 errp); 1508 } 1509 return 0; 1510 } 1511 1512 AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c) 1513 { 1514 BlockDriverState *bs = c->opaque; 1515 IO_CODE(); 1516 1517 return bdrv_get_aio_context(bs); 1518 } 1519 1520 const BdrvChildClass child_of_bds = { 1521 .parent_is_bds = true, 1522 .get_parent_desc = bdrv_child_get_parent_desc, 1523 .inherit_options = bdrv_inherited_options, 1524 .drained_begin = bdrv_child_cb_drained_begin, 1525 .drained_poll = bdrv_child_cb_drained_poll, 1526 .drained_end = bdrv_child_cb_drained_end, 1527 .attach = bdrv_child_cb_attach, 1528 .detach = bdrv_child_cb_detach, 1529 .inactivate = bdrv_child_cb_inactivate, 1530 .change_aio_ctx = bdrv_child_cb_change_aio_ctx, 1531 .update_filename = bdrv_child_cb_update_filename, 1532 .get_parent_aio_context = child_of_bds_get_parent_aio_context, 1533 }; 1534 1535 AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) 1536 { 1537 IO_CODE(); 1538 return c->klass->get_parent_aio_context(c); 1539 } 1540 1541 static int bdrv_open_flags(BlockDriverState *bs, int flags) 1542 { 1543 int open_flags = flags; 1544 GLOBAL_STATE_CODE(); 1545 1546 /* 1547 * Clear flags that are internal to the block layer before opening the 1548 * image. 1549 */ 1550 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 1551 1552 return open_flags; 1553 } 1554 1555 static void update_flags_from_options(int *flags, QemuOpts *opts) 1556 { 1557 GLOBAL_STATE_CODE(); 1558 1559 *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY); 1560 1561 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { 1562 *flags |= BDRV_O_NO_FLUSH; 1563 } 1564 1565 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) { 1566 *flags |= BDRV_O_NOCACHE; 1567 } 1568 1569 if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) { 1570 *flags |= BDRV_O_RDWR; 1571 } 1572 1573 if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) { 1574 *flags |= BDRV_O_AUTO_RDONLY; 1575 } 1576 } 1577 1578 static void update_options_from_flags(QDict *options, int flags) 1579 { 1580 GLOBAL_STATE_CODE(); 1581 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) { 1582 qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE); 1583 } 1584 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) { 1585 qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH, 1586 flags & BDRV_O_NO_FLUSH); 1587 } 1588 if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) { 1589 qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR)); 1590 } 1591 if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) { 1592 qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY, 1593 flags & BDRV_O_AUTO_RDONLY); 1594 } 1595 } 1596 1597 static void bdrv_assign_node_name(BlockDriverState *bs, 1598 const char *node_name, 1599 Error **errp) 1600 { 1601 char *gen_node_name = NULL; 1602 GLOBAL_STATE_CODE(); 1603 1604 if (!node_name) { 1605 node_name = gen_node_name = id_generate(ID_BLOCK); 1606 } else if (!id_wellformed(node_name)) { 1607 /* 1608 * Check for empty string or invalid characters, but not if it is 1609 * generated (generated names use characters not available to the user) 1610 */ 1611 error_setg(errp, "Invalid node-name: '%s'", node_name); 1612 return; 1613 } 1614 1615 /* takes care of avoiding namespaces collisions */ 1616 if (blk_by_name(node_name)) { 1617 error_setg(errp, "node-name=%s is conflicting with a device id", 1618 node_name); 1619 goto out; 1620 } 1621 1622 /* takes care of avoiding duplicates node names */ 1623 if (bdrv_find_node(node_name)) { 1624 error_setg(errp, "Duplicate nodes with node-name='%s'", node_name); 1625 goto out; 1626 } 1627 1628 /* Make sure that the node name isn't truncated */ 1629 if (strlen(node_name) >= sizeof(bs->node_name)) { 1630 error_setg(errp, "Node name too long"); 1631 goto out; 1632 } 1633 1634 /* copy node name into the bs and insert it into the graph list */ 1635 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 1636 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 1637 out: 1638 g_free(gen_node_name); 1639 } 1640 1641 static int no_coroutine_fn GRAPH_UNLOCKED 1642 bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, 1643 QDict *options, int open_flags, Error **errp) 1644 { 1645 Error *local_err = NULL; 1646 int i, ret; 1647 GLOBAL_STATE_CODE(); 1648 1649 bdrv_assign_node_name(bs, node_name, &local_err); 1650 if (local_err) { 1651 error_propagate(errp, local_err); 1652 return -EINVAL; 1653 } 1654 1655 bs->drv = drv; 1656 bs->opaque = g_malloc0(drv->instance_size); 1657 1658 assert(!drv->bdrv_needs_filename || bs->filename[0]); 1659 if (drv->bdrv_open) { 1660 ret = drv->bdrv_open(bs, options, open_flags, &local_err); 1661 } else { 1662 ret = 0; 1663 } 1664 1665 if (ret < 0) { 1666 if (local_err) { 1667 error_propagate(errp, local_err); 1668 } else if (bs->filename[0]) { 1669 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 1670 } else { 1671 error_setg_errno(errp, -ret, "Could not open image"); 1672 } 1673 goto open_failed; 1674 } 1675 1676 assert(!(bs->supported_read_flags & ~BDRV_REQ_MASK)); 1677 assert(!(bs->supported_write_flags & ~BDRV_REQ_MASK)); 1678 1679 /* 1680 * Always allow the BDRV_REQ_REGISTERED_BUF optimization hint. This saves 1681 * drivers that pass read/write requests through to a child the trouble of 1682 * declaring support explicitly. 1683 * 1684 * Drivers must not propagate this flag accidentally when they initiate I/O 1685 * to a bounce buffer. That case should be rare though. 1686 */ 1687 bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF; 1688 bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF; 1689 1690 ret = bdrv_refresh_total_sectors(bs, bs->total_sectors); 1691 if (ret < 0) { 1692 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 1693 return ret; 1694 } 1695 1696 bdrv_graph_rdlock_main_loop(); 1697 bdrv_refresh_limits(bs, NULL, &local_err); 1698 bdrv_graph_rdunlock_main_loop(); 1699 1700 if (local_err) { 1701 error_propagate(errp, local_err); 1702 return -EINVAL; 1703 } 1704 1705 assert(bdrv_opt_mem_align(bs) != 0); 1706 assert(bdrv_min_mem_align(bs) != 0); 1707 assert(is_power_of_2(bs->bl.request_alignment)); 1708 1709 for (i = 0; i < bs->quiesce_counter; i++) { 1710 if (drv->bdrv_drain_begin) { 1711 drv->bdrv_drain_begin(bs); 1712 } 1713 } 1714 1715 return 0; 1716 open_failed: 1717 bs->drv = NULL; 1718 1719 bdrv_graph_wrlock(); 1720 if (bs->file != NULL) { 1721 bdrv_unref_child(bs, bs->file); 1722 assert(!bs->file); 1723 } 1724 bdrv_graph_wrunlock(); 1725 1726 g_free(bs->opaque); 1727 bs->opaque = NULL; 1728 return ret; 1729 } 1730 1731 /* 1732 * Create and open a block node. 1733 * 1734 * @options is a QDict of options to pass to the block drivers, or NULL for an 1735 * empty set of options. The reference to the QDict belongs to the block layer 1736 * after the call (even on failure), so if the caller intends to reuse the 1737 * dictionary, it needs to use qobject_ref() before calling bdrv_open. 1738 */ 1739 BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, 1740 const char *node_name, 1741 QDict *options, int flags, 1742 Error **errp) 1743 { 1744 BlockDriverState *bs; 1745 int ret; 1746 1747 GLOBAL_STATE_CODE(); 1748 1749 bs = bdrv_new(); 1750 bs->open_flags = flags; 1751 bs->options = options ?: qdict_new(); 1752 bs->explicit_options = qdict_clone_shallow(bs->options); 1753 bs->opaque = NULL; 1754 1755 update_options_from_flags(bs->options, flags); 1756 1757 ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp); 1758 if (ret < 0) { 1759 qobject_unref(bs->explicit_options); 1760 bs->explicit_options = NULL; 1761 qobject_unref(bs->options); 1762 bs->options = NULL; 1763 bdrv_unref(bs); 1764 return NULL; 1765 } 1766 1767 return bs; 1768 } 1769 1770 /* Create and open a block node. */ 1771 BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, 1772 int flags, Error **errp) 1773 { 1774 GLOBAL_STATE_CODE(); 1775 return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp); 1776 } 1777 1778 QemuOptsList bdrv_runtime_opts = { 1779 .name = "bdrv_common", 1780 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), 1781 .desc = { 1782 { 1783 .name = "node-name", 1784 .type = QEMU_OPT_STRING, 1785 .help = "Node name of the block device node", 1786 }, 1787 { 1788 .name = "driver", 1789 .type = QEMU_OPT_STRING, 1790 .help = "Block driver to use for the node", 1791 }, 1792 { 1793 .name = BDRV_OPT_CACHE_DIRECT, 1794 .type = QEMU_OPT_BOOL, 1795 .help = "Bypass software writeback cache on the host", 1796 }, 1797 { 1798 .name = BDRV_OPT_CACHE_NO_FLUSH, 1799 .type = QEMU_OPT_BOOL, 1800 .help = "Ignore flush requests", 1801 }, 1802 { 1803 .name = BDRV_OPT_READ_ONLY, 1804 .type = QEMU_OPT_BOOL, 1805 .help = "Node is opened in read-only mode", 1806 }, 1807 { 1808 .name = BDRV_OPT_AUTO_READ_ONLY, 1809 .type = QEMU_OPT_BOOL, 1810 .help = "Node can become read-only if opening read-write fails", 1811 }, 1812 { 1813 .name = "detect-zeroes", 1814 .type = QEMU_OPT_STRING, 1815 .help = "try to optimize zero writes (off, on, unmap)", 1816 }, 1817 { 1818 .name = BDRV_OPT_DISCARD, 1819 .type = QEMU_OPT_STRING, 1820 .help = "discard operation (ignore/off, unmap/on)", 1821 }, 1822 { 1823 .name = BDRV_OPT_FORCE_SHARE, 1824 .type = QEMU_OPT_BOOL, 1825 .help = "always accept other writers (default: off)", 1826 }, 1827 { /* end of list */ } 1828 }, 1829 }; 1830 1831 QemuOptsList bdrv_create_opts_simple = { 1832 .name = "simple-create-opts", 1833 .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head), 1834 .desc = { 1835 { 1836 .name = BLOCK_OPT_SIZE, 1837 .type = QEMU_OPT_SIZE, 1838 .help = "Virtual disk size" 1839 }, 1840 { 1841 .name = BLOCK_OPT_PREALLOC, 1842 .type = QEMU_OPT_STRING, 1843 .help = "Preallocation mode (allowed values: off)" 1844 }, 1845 { /* end of list */ } 1846 } 1847 }; 1848 1849 /* 1850 * Common part for opening disk images and files 1851 * 1852 * Removes all processed options from *options. 1853 */ 1854 static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, 1855 QDict *options, Error **errp) 1856 { 1857 int ret, open_flags; 1858 const char *filename; 1859 const char *driver_name = NULL; 1860 const char *node_name = NULL; 1861 const char *discard; 1862 QemuOpts *opts; 1863 BlockDriver *drv; 1864 Error *local_err = NULL; 1865 bool ro; 1866 1867 GLOBAL_STATE_CODE(); 1868 1869 bdrv_graph_rdlock_main_loop(); 1870 assert(bs->file == NULL); 1871 assert(options != NULL && bs->options != options); 1872 bdrv_graph_rdunlock_main_loop(); 1873 1874 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 1875 if (!qemu_opts_absorb_qdict(opts, options, errp)) { 1876 ret = -EINVAL; 1877 goto fail_opts; 1878 } 1879 1880 update_flags_from_options(&bs->open_flags, opts); 1881 1882 driver_name = qemu_opt_get(opts, "driver"); 1883 drv = bdrv_find_format(driver_name); 1884 assert(drv != NULL); 1885 1886 bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false); 1887 1888 if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) { 1889 error_setg(errp, 1890 BDRV_OPT_FORCE_SHARE 1891 "=on can only be used with read-only images"); 1892 ret = -EINVAL; 1893 goto fail_opts; 1894 } 1895 1896 if (file != NULL) { 1897 bdrv_graph_rdlock_main_loop(); 1898 bdrv_refresh_filename(blk_bs(file)); 1899 bdrv_graph_rdunlock_main_loop(); 1900 1901 filename = blk_bs(file)->filename; 1902 } else { 1903 /* 1904 * Caution: while qdict_get_try_str() is fine, getting 1905 * non-string types would require more care. When @options 1906 * come from -blockdev or blockdev_add, its members are typed 1907 * according to the QAPI schema, but when they come from 1908 * -drive, they're all QString. 1909 */ 1910 filename = qdict_get_try_str(options, "filename"); 1911 } 1912 1913 if (drv->bdrv_needs_filename && (!filename || !filename[0])) { 1914 error_setg(errp, "The '%s' block driver requires a file name", 1915 drv->format_name); 1916 ret = -EINVAL; 1917 goto fail_opts; 1918 } 1919 1920 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags, 1921 drv->format_name); 1922 1923 ro = bdrv_is_read_only(bs); 1924 1925 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) { 1926 if (!ro && bdrv_is_whitelisted(drv, true)) { 1927 bdrv_graph_rdlock_main_loop(); 1928 ret = bdrv_apply_auto_read_only(bs, NULL, NULL); 1929 bdrv_graph_rdunlock_main_loop(); 1930 } else { 1931 ret = -ENOTSUP; 1932 } 1933 if (ret < 0) { 1934 error_setg(errp, 1935 !ro && bdrv_is_whitelisted(drv, true) 1936 ? "Driver '%s' can only be used for read-only devices" 1937 : "Driver '%s' is not whitelisted", 1938 drv->format_name); 1939 goto fail_opts; 1940 } 1941 } 1942 1943 /* bdrv_new() and bdrv_close() make it so */ 1944 assert(qatomic_read(&bs->copy_on_read) == 0); 1945 1946 if (bs->open_flags & BDRV_O_COPY_ON_READ) { 1947 if (!ro) { 1948 bdrv_enable_copy_on_read(bs); 1949 } else { 1950 error_setg(errp, "Can't use copy-on-read on read-only device"); 1951 ret = -EINVAL; 1952 goto fail_opts; 1953 } 1954 } 1955 1956 discard = qemu_opt_get(opts, BDRV_OPT_DISCARD); 1957 if (discard != NULL) { 1958 if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) { 1959 error_setg(errp, "Invalid discard option"); 1960 ret = -EINVAL; 1961 goto fail_opts; 1962 } 1963 } 1964 1965 bs->detect_zeroes = 1966 bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err); 1967 if (local_err) { 1968 error_propagate(errp, local_err); 1969 ret = -EINVAL; 1970 goto fail_opts; 1971 } 1972 1973 if (filename != NULL) { 1974 pstrcpy(bs->filename, sizeof(bs->filename), filename); 1975 } else { 1976 bs->filename[0] = '\0'; 1977 } 1978 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 1979 1980 /* Open the image, either directly or using a protocol */ 1981 open_flags = bdrv_open_flags(bs, bs->open_flags); 1982 node_name = qemu_opt_get(opts, "node-name"); 1983 1984 assert(!drv->protocol_name || file == NULL); 1985 ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp); 1986 if (ret < 0) { 1987 goto fail_opts; 1988 } 1989 1990 qemu_opts_del(opts); 1991 return 0; 1992 1993 fail_opts: 1994 qemu_opts_del(opts); 1995 return ret; 1996 } 1997 1998 static QDict *parse_json_filename(const char *filename, Error **errp) 1999 { 2000 ERRP_GUARD(); 2001 QObject *options_obj; 2002 QDict *options; 2003 int ret; 2004 GLOBAL_STATE_CODE(); 2005 2006 ret = strstart(filename, "json:", &filename); 2007 assert(ret); 2008 2009 options_obj = qobject_from_json(filename, errp); 2010 if (!options_obj) { 2011 error_prepend(errp, "Could not parse the JSON options: "); 2012 return NULL; 2013 } 2014 2015 options = qobject_to(QDict, options_obj); 2016 if (!options) { 2017 qobject_unref(options_obj); 2018 error_setg(errp, "Invalid JSON object given"); 2019 return NULL; 2020 } 2021 2022 qdict_flatten(options); 2023 2024 return options; 2025 } 2026 2027 static void parse_json_protocol(QDict *options, const char **pfilename, 2028 Error **errp) 2029 { 2030 QDict *json_options; 2031 Error *local_err = NULL; 2032 GLOBAL_STATE_CODE(); 2033 2034 /* Parse json: pseudo-protocol */ 2035 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) { 2036 return; 2037 } 2038 2039 json_options = parse_json_filename(*pfilename, &local_err); 2040 if (local_err) { 2041 error_propagate(errp, local_err); 2042 return; 2043 } 2044 2045 /* Options given in the filename have lower priority than options 2046 * specified directly */ 2047 qdict_join(options, json_options, false); 2048 qobject_unref(json_options); 2049 *pfilename = NULL; 2050 } 2051 2052 /* 2053 * Fills in default options for opening images and converts the legacy 2054 * filename/flags pair to option QDict entries. 2055 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a 2056 * block driver has been specified explicitly. 2057 */ 2058 static int bdrv_fill_options(QDict **options, const char *filename, 2059 int *flags, bool allow_parse_filename, 2060 Error **errp) 2061 { 2062 const char *drvname; 2063 bool protocol = *flags & BDRV_O_PROTOCOL; 2064 bool parse_filename = false; 2065 BlockDriver *drv = NULL; 2066 Error *local_err = NULL; 2067 2068 GLOBAL_STATE_CODE(); 2069 2070 /* 2071 * Caution: while qdict_get_try_str() is fine, getting non-string 2072 * types would require more care. When @options come from 2073 * -blockdev or blockdev_add, its members are typed according to 2074 * the QAPI schema, but when they come from -drive, they're all 2075 * QString. 2076 */ 2077 drvname = qdict_get_try_str(*options, "driver"); 2078 if (drvname) { 2079 drv = bdrv_find_format(drvname); 2080 if (!drv) { 2081 error_setg(errp, "Unknown driver '%s'", drvname); 2082 return -ENOENT; 2083 } 2084 /* If the user has explicitly specified the driver, this choice should 2085 * override the BDRV_O_PROTOCOL flag */ 2086 protocol = drv->protocol_name; 2087 } 2088 2089 if (protocol) { 2090 *flags |= BDRV_O_PROTOCOL; 2091 } else { 2092 *flags &= ~BDRV_O_PROTOCOL; 2093 } 2094 2095 /* Translate cache options from flags into options */ 2096 update_options_from_flags(*options, *flags); 2097 2098 /* Fetch the file name from the options QDict if necessary */ 2099 if (protocol && filename) { 2100 if (!qdict_haskey(*options, "filename")) { 2101 qdict_put_str(*options, "filename", filename); 2102 parse_filename = allow_parse_filename; 2103 } else { 2104 error_setg(errp, "Can't specify 'file' and 'filename' options at " 2105 "the same time"); 2106 return -EINVAL; 2107 } 2108 } 2109 2110 /* Find the right block driver */ 2111 /* See cautionary note on accessing @options above */ 2112 filename = qdict_get_try_str(*options, "filename"); 2113 2114 if (!drvname && protocol) { 2115 if (filename) { 2116 drv = bdrv_find_protocol(filename, parse_filename, errp); 2117 if (!drv) { 2118 return -EINVAL; 2119 } 2120 2121 drvname = drv->format_name; 2122 qdict_put_str(*options, "driver", drvname); 2123 } else { 2124 error_setg(errp, "Must specify either driver or file"); 2125 return -EINVAL; 2126 } 2127 } 2128 2129 assert(drv || !protocol); 2130 2131 /* Driver-specific filename parsing */ 2132 if (drv && drv->bdrv_parse_filename && parse_filename) { 2133 drv->bdrv_parse_filename(filename, *options, &local_err); 2134 if (local_err) { 2135 error_propagate(errp, local_err); 2136 return -EINVAL; 2137 } 2138 2139 if (!drv->bdrv_needs_filename) { 2140 qdict_del(*options, "filename"); 2141 } 2142 } 2143 2144 return 0; 2145 } 2146 2147 typedef struct BlockReopenQueueEntry { 2148 bool prepared; 2149 BDRVReopenState state; 2150 QTAILQ_ENTRY(BlockReopenQueueEntry) entry; 2151 } BlockReopenQueueEntry; 2152 2153 /* 2154 * Return the flags that @bs will have after the reopens in @q have 2155 * successfully completed. If @q is NULL (or @bs is not contained in @q), 2156 * return the current flags. 2157 */ 2158 static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs) 2159 { 2160 BlockReopenQueueEntry *entry; 2161 2162 if (q != NULL) { 2163 QTAILQ_FOREACH(entry, q, entry) { 2164 if (entry->state.bs == bs) { 2165 return entry->state.flags; 2166 } 2167 } 2168 } 2169 2170 return bs->open_flags; 2171 } 2172 2173 /* Returns whether the image file can be written to after the reopen queue @q 2174 * has been successfully applied, or right now if @q is NULL. */ 2175 static bool bdrv_is_writable_after_reopen(BlockDriverState *bs, 2176 BlockReopenQueue *q) 2177 { 2178 int flags = bdrv_reopen_get_flags(q, bs); 2179 2180 return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR; 2181 } 2182 2183 /* 2184 * Return whether the BDS can be written to. This is not necessarily 2185 * the same as !bdrv_is_read_only(bs), as inactivated images may not 2186 * be written to but do not count as read-only images. 2187 */ 2188 bool bdrv_is_writable(BlockDriverState *bs) 2189 { 2190 IO_CODE(); 2191 return bdrv_is_writable_after_reopen(bs, NULL); 2192 } 2193 2194 static char *bdrv_child_user_desc(BdrvChild *c) 2195 { 2196 GLOBAL_STATE_CODE(); 2197 return c->klass->get_parent_desc(c); 2198 } 2199 2200 /* 2201 * Check that @a allows everything that @b needs. @a and @b must reference same 2202 * child node. 2203 */ 2204 static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) 2205 { 2206 const char *child_bs_name; 2207 g_autofree char *a_user = NULL; 2208 g_autofree char *b_user = NULL; 2209 g_autofree char *perms = NULL; 2210 2211 assert(a->bs); 2212 assert(a->bs == b->bs); 2213 GLOBAL_STATE_CODE(); 2214 2215 if ((b->perm & a->shared_perm) == b->perm) { 2216 return true; 2217 } 2218 2219 child_bs_name = bdrv_get_node_name(b->bs); 2220 a_user = bdrv_child_user_desc(a); 2221 b_user = bdrv_child_user_desc(b); 2222 perms = bdrv_perm_names(b->perm & ~a->shared_perm); 2223 2224 error_setg(errp, "Permission conflict on node '%s': permissions '%s' are " 2225 "both required by %s (uses node '%s' as '%s' child) and " 2226 "unshared by %s (uses node '%s' as '%s' child).", 2227 child_bs_name, perms, 2228 b_user, child_bs_name, b->name, 2229 a_user, child_bs_name, a->name); 2230 2231 return false; 2232 } 2233 2234 static bool GRAPH_RDLOCK 2235 bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp) 2236 { 2237 BdrvChild *a, *b; 2238 GLOBAL_STATE_CODE(); 2239 2240 /* 2241 * During the loop we'll look at each pair twice. That's correct because 2242 * bdrv_a_allow_b() is asymmetric and we should check each pair in both 2243 * directions. 2244 */ 2245 QLIST_FOREACH(a, &bs->parents, next_parent) { 2246 QLIST_FOREACH(b, &bs->parents, next_parent) { 2247 if (a == b) { 2248 continue; 2249 } 2250 2251 if (!bdrv_a_allow_b(a, b, errp)) { 2252 return true; 2253 } 2254 } 2255 } 2256 2257 return false; 2258 } 2259 2260 static void GRAPH_RDLOCK 2261 bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, 2262 BdrvChild *c, BdrvChildRole role, 2263 BlockReopenQueue *reopen_queue, 2264 uint64_t parent_perm, uint64_t parent_shared, 2265 uint64_t *nperm, uint64_t *nshared) 2266 { 2267 assert(bs->drv && bs->drv->bdrv_child_perm); 2268 GLOBAL_STATE_CODE(); 2269 bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, 2270 parent_perm, parent_shared, 2271 nperm, nshared); 2272 /* TODO Take force_share from reopen_queue */ 2273 if (child_bs && child_bs->force_share) { 2274 *nshared = BLK_PERM_ALL; 2275 } 2276 } 2277 2278 /* 2279 * Adds the whole subtree of @bs (including @bs itself) to the @list (except for 2280 * nodes that are already in the @list, of course) so that final list is 2281 * topologically sorted. Return the result (GSList @list object is updated, so 2282 * don't use old reference after function call). 2283 * 2284 * On function start @list must be already topologically sorted and for any node 2285 * in the @list the whole subtree of the node must be in the @list as well. The 2286 * simplest way to satisfy this criteria: use only result of 2287 * bdrv_topological_dfs() or NULL as @list parameter. 2288 */ 2289 static GSList * GRAPH_RDLOCK 2290 bdrv_topological_dfs(GSList *list, GHashTable *found, BlockDriverState *bs) 2291 { 2292 BdrvChild *child; 2293 g_autoptr(GHashTable) local_found = NULL; 2294 2295 GLOBAL_STATE_CODE(); 2296 2297 if (!found) { 2298 assert(!list); 2299 found = local_found = g_hash_table_new(NULL, NULL); 2300 } 2301 2302 if (g_hash_table_contains(found, bs)) { 2303 return list; 2304 } 2305 g_hash_table_add(found, bs); 2306 2307 QLIST_FOREACH(child, &bs->children, next) { 2308 list = bdrv_topological_dfs(list, found, child->bs); 2309 } 2310 2311 return g_slist_prepend(list, bs); 2312 } 2313 2314 typedef struct BdrvChildSetPermState { 2315 BdrvChild *child; 2316 uint64_t old_perm; 2317 uint64_t old_shared_perm; 2318 } BdrvChildSetPermState; 2319 2320 static void bdrv_child_set_perm_abort(void *opaque) 2321 { 2322 BdrvChildSetPermState *s = opaque; 2323 2324 GLOBAL_STATE_CODE(); 2325 2326 s->child->perm = s->old_perm; 2327 s->child->shared_perm = s->old_shared_perm; 2328 } 2329 2330 static TransactionActionDrv bdrv_child_set_pem_drv = { 2331 .abort = bdrv_child_set_perm_abort, 2332 .clean = g_free, 2333 }; 2334 2335 static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, 2336 uint64_t shared, Transaction *tran) 2337 { 2338 BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1); 2339 GLOBAL_STATE_CODE(); 2340 2341 *s = (BdrvChildSetPermState) { 2342 .child = c, 2343 .old_perm = c->perm, 2344 .old_shared_perm = c->shared_perm, 2345 }; 2346 2347 c->perm = perm; 2348 c->shared_perm = shared; 2349 2350 tran_add(tran, &bdrv_child_set_pem_drv, s); 2351 } 2352 2353 static void GRAPH_RDLOCK bdrv_drv_set_perm_commit(void *opaque) 2354 { 2355 BlockDriverState *bs = opaque; 2356 uint64_t cumulative_perms, cumulative_shared_perms; 2357 GLOBAL_STATE_CODE(); 2358 2359 if (bs->drv->bdrv_set_perm) { 2360 bdrv_get_cumulative_perm(bs, &cumulative_perms, 2361 &cumulative_shared_perms); 2362 bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms); 2363 } 2364 } 2365 2366 static void GRAPH_RDLOCK bdrv_drv_set_perm_abort(void *opaque) 2367 { 2368 BlockDriverState *bs = opaque; 2369 GLOBAL_STATE_CODE(); 2370 2371 if (bs->drv->bdrv_abort_perm_update) { 2372 bs->drv->bdrv_abort_perm_update(bs); 2373 } 2374 } 2375 2376 TransactionActionDrv bdrv_drv_set_perm_drv = { 2377 .abort = bdrv_drv_set_perm_abort, 2378 .commit = bdrv_drv_set_perm_commit, 2379 }; 2380 2381 /* 2382 * After calling this function, the transaction @tran may only be completed 2383 * while holding a reader lock for the graph. 2384 */ 2385 static int GRAPH_RDLOCK 2386 bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm, 2387 Transaction *tran, Error **errp) 2388 { 2389 GLOBAL_STATE_CODE(); 2390 if (!bs->drv) { 2391 return 0; 2392 } 2393 2394 if (bs->drv->bdrv_check_perm) { 2395 int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp); 2396 if (ret < 0) { 2397 return ret; 2398 } 2399 } 2400 2401 if (tran) { 2402 tran_add(tran, &bdrv_drv_set_perm_drv, bs); 2403 } 2404 2405 return 0; 2406 } 2407 2408 typedef struct BdrvReplaceChildState { 2409 BdrvChild *child; 2410 BlockDriverState *old_bs; 2411 } BdrvReplaceChildState; 2412 2413 static void GRAPH_WRLOCK bdrv_replace_child_commit(void *opaque) 2414 { 2415 BdrvReplaceChildState *s = opaque; 2416 GLOBAL_STATE_CODE(); 2417 2418 bdrv_schedule_unref(s->old_bs); 2419 } 2420 2421 static void GRAPH_WRLOCK bdrv_replace_child_abort(void *opaque) 2422 { 2423 BdrvReplaceChildState *s = opaque; 2424 BlockDriverState *new_bs = s->child->bs; 2425 2426 GLOBAL_STATE_CODE(); 2427 assert_bdrv_graph_writable(); 2428 2429 /* old_bs reference is transparently moved from @s to @s->child */ 2430 if (!s->child->bs) { 2431 /* 2432 * The parents were undrained when removing old_bs from the child. New 2433 * requests can't have been made, though, because the child was empty. 2434 * 2435 * TODO Make bdrv_replace_child_noperm() transactionable to avoid 2436 * undraining the parent in the first place. Once this is done, having 2437 * new_bs drained when calling bdrv_replace_child_tran() is not a 2438 * requirement any more. 2439 */ 2440 bdrv_parent_drained_begin_single(s->child); 2441 assert(!bdrv_parent_drained_poll_single(s->child)); 2442 } 2443 assert(s->child->quiesced_parent); 2444 bdrv_replace_child_noperm(s->child, s->old_bs); 2445 2446 bdrv_unref(new_bs); 2447 } 2448 2449 static TransactionActionDrv bdrv_replace_child_drv = { 2450 .commit = bdrv_replace_child_commit, 2451 .abort = bdrv_replace_child_abort, 2452 .clean = g_free, 2453 }; 2454 2455 /* 2456 * bdrv_replace_child_tran 2457 * 2458 * Note: real unref of old_bs is done only on commit. 2459 * 2460 * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be 2461 * kept drained until the transaction is completed. 2462 * 2463 * After calling this function, the transaction @tran may only be completed 2464 * while holding a writer lock for the graph. 2465 * 2466 * The function doesn't update permissions, caller is responsible for this. 2467 */ 2468 static void GRAPH_WRLOCK 2469 bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, 2470 Transaction *tran) 2471 { 2472 BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); 2473 2474 assert(child->quiesced_parent); 2475 assert(!new_bs || new_bs->quiesce_counter); 2476 2477 *s = (BdrvReplaceChildState) { 2478 .child = child, 2479 .old_bs = child->bs, 2480 }; 2481 tran_add(tran, &bdrv_replace_child_drv, s); 2482 2483 if (new_bs) { 2484 bdrv_ref(new_bs); 2485 } 2486 2487 bdrv_replace_child_noperm(child, new_bs); 2488 /* old_bs reference is transparently moved from @child to @s */ 2489 } 2490 2491 /* 2492 * Refresh permissions in @bs subtree. The function is intended to be called 2493 * after some graph modification that was done without permission update. 2494 * 2495 * After calling this function, the transaction @tran may only be completed 2496 * while holding a reader lock for the graph. 2497 */ 2498 static int GRAPH_RDLOCK 2499 bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q, 2500 Transaction *tran, Error **errp) 2501 { 2502 BlockDriver *drv = bs->drv; 2503 BdrvChild *c; 2504 int ret; 2505 uint64_t cumulative_perms, cumulative_shared_perms; 2506 GLOBAL_STATE_CODE(); 2507 2508 bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms); 2509 2510 /* Write permissions never work with read-only images */ 2511 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && 2512 !bdrv_is_writable_after_reopen(bs, q)) 2513 { 2514 if (!bdrv_is_writable_after_reopen(bs, NULL)) { 2515 error_setg(errp, "Block node is read-only"); 2516 } else { 2517 error_setg(errp, "Read-only block node '%s' cannot support " 2518 "read-write users", bdrv_get_node_name(bs)); 2519 } 2520 2521 return -EPERM; 2522 } 2523 2524 /* 2525 * Unaligned requests will automatically be aligned to bl.request_alignment 2526 * and without RESIZE we can't extend requests to write to space beyond the 2527 * end of the image, so it's required that the image size is aligned. 2528 */ 2529 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && 2530 !(cumulative_perms & BLK_PERM_RESIZE)) 2531 { 2532 if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) { 2533 error_setg(errp, "Cannot get 'write' permission without 'resize': " 2534 "Image size is not a multiple of request " 2535 "alignment"); 2536 return -EPERM; 2537 } 2538 } 2539 2540 /* Check this node */ 2541 if (!drv) { 2542 return 0; 2543 } 2544 2545 ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran, 2546 errp); 2547 if (ret < 0) { 2548 return ret; 2549 } 2550 2551 /* Drivers that never have children can omit .bdrv_child_perm() */ 2552 if (!drv->bdrv_child_perm) { 2553 assert(QLIST_EMPTY(&bs->children)); 2554 return 0; 2555 } 2556 2557 /* Check all children */ 2558 QLIST_FOREACH(c, &bs->children, next) { 2559 uint64_t cur_perm, cur_shared; 2560 2561 bdrv_child_perm(bs, c->bs, c, c->role, q, 2562 cumulative_perms, cumulative_shared_perms, 2563 &cur_perm, &cur_shared); 2564 bdrv_child_set_perm(c, cur_perm, cur_shared, tran); 2565 } 2566 2567 return 0; 2568 } 2569 2570 /* 2571 * @list is a product of bdrv_topological_dfs() (may be called several times) - 2572 * a topologically sorted subgraph. 2573 * 2574 * After calling this function, the transaction @tran may only be completed 2575 * while holding a reader lock for the graph. 2576 */ 2577 static int GRAPH_RDLOCK 2578 bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q, Transaction *tran, 2579 Error **errp) 2580 { 2581 int ret; 2582 BlockDriverState *bs; 2583 GLOBAL_STATE_CODE(); 2584 2585 for ( ; list; list = list->next) { 2586 bs = list->data; 2587 2588 if (bdrv_parent_perms_conflict(bs, errp)) { 2589 return -EINVAL; 2590 } 2591 2592 ret = bdrv_node_refresh_perm(bs, q, tran, errp); 2593 if (ret < 0) { 2594 return ret; 2595 } 2596 } 2597 2598 return 0; 2599 } 2600 2601 /* 2602 * @list is any list of nodes. List is completed by all subtrees and 2603 * topologically sorted. It's not a problem if some node occurs in the @list 2604 * several times. 2605 * 2606 * After calling this function, the transaction @tran may only be completed 2607 * while holding a reader lock for the graph. 2608 */ 2609 static int GRAPH_RDLOCK 2610 bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, Transaction *tran, 2611 Error **errp) 2612 { 2613 g_autoptr(GHashTable) found = g_hash_table_new(NULL, NULL); 2614 g_autoptr(GSList) refresh_list = NULL; 2615 2616 for ( ; list; list = list->next) { 2617 refresh_list = bdrv_topological_dfs(refresh_list, found, list->data); 2618 } 2619 2620 return bdrv_do_refresh_perms(refresh_list, q, tran, errp); 2621 } 2622 2623 void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, 2624 uint64_t *shared_perm) 2625 { 2626 BdrvChild *c; 2627 uint64_t cumulative_perms = 0; 2628 uint64_t cumulative_shared_perms = BLK_PERM_ALL; 2629 2630 GLOBAL_STATE_CODE(); 2631 2632 QLIST_FOREACH(c, &bs->parents, next_parent) { 2633 cumulative_perms |= c->perm; 2634 cumulative_shared_perms &= c->shared_perm; 2635 } 2636 2637 *perm = cumulative_perms; 2638 *shared_perm = cumulative_shared_perms; 2639 } 2640 2641 char *bdrv_perm_names(uint64_t perm) 2642 { 2643 struct perm_name { 2644 uint64_t perm; 2645 const char *name; 2646 } permissions[] = { 2647 { BLK_PERM_CONSISTENT_READ, "consistent read" }, 2648 { BLK_PERM_WRITE, "write" }, 2649 { BLK_PERM_WRITE_UNCHANGED, "write unchanged" }, 2650 { BLK_PERM_RESIZE, "resize" }, 2651 { 0, NULL } 2652 }; 2653 2654 GString *result = g_string_sized_new(30); 2655 struct perm_name *p; 2656 2657 for (p = permissions; p->name; p++) { 2658 if (perm & p->perm) { 2659 if (result->len > 0) { 2660 g_string_append(result, ", "); 2661 } 2662 g_string_append(result, p->name); 2663 } 2664 } 2665 2666 return g_string_free(result, FALSE); 2667 } 2668 2669 2670 /* 2671 * @tran is allowed to be NULL. In this case no rollback is possible. 2672 * 2673 * After calling this function, the transaction @tran may only be completed 2674 * while holding a reader lock for the graph. 2675 */ 2676 static int GRAPH_RDLOCK 2677 bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran, Error **errp) 2678 { 2679 int ret; 2680 Transaction *local_tran = NULL; 2681 g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs); 2682 GLOBAL_STATE_CODE(); 2683 2684 if (!tran) { 2685 tran = local_tran = tran_new(); 2686 } 2687 2688 ret = bdrv_do_refresh_perms(list, NULL, tran, errp); 2689 2690 if (local_tran) { 2691 tran_finalize(local_tran, ret); 2692 } 2693 2694 return ret; 2695 } 2696 2697 int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, 2698 Error **errp) 2699 { 2700 Error *local_err = NULL; 2701 Transaction *tran = tran_new(); 2702 int ret; 2703 2704 GLOBAL_STATE_CODE(); 2705 2706 bdrv_child_set_perm(c, perm, shared, tran); 2707 2708 ret = bdrv_refresh_perms(c->bs, tran, &local_err); 2709 2710 tran_finalize(tran, ret); 2711 2712 if (ret < 0) { 2713 if ((perm & ~c->perm) || (c->shared_perm & ~shared)) { 2714 /* tighten permissions */ 2715 error_propagate(errp, local_err); 2716 } else { 2717 /* 2718 * Our caller may intend to only loosen restrictions and 2719 * does not expect this function to fail. Errors are not 2720 * fatal in such a case, so we can just hide them from our 2721 * caller. 2722 */ 2723 error_free(local_err); 2724 ret = 0; 2725 } 2726 } 2727 2728 return ret; 2729 } 2730 2731 int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) 2732 { 2733 uint64_t parent_perms, parent_shared; 2734 uint64_t perms, shared; 2735 2736 GLOBAL_STATE_CODE(); 2737 2738 bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared); 2739 bdrv_child_perm(bs, c->bs, c, c->role, NULL, 2740 parent_perms, parent_shared, &perms, &shared); 2741 2742 return bdrv_child_try_set_perm(c, perms, shared, errp); 2743 } 2744 2745 /* 2746 * Default implementation for .bdrv_child_perm() for block filters: 2747 * Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the 2748 * filtered child. 2749 */ 2750 static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, 2751 BdrvChildRole role, 2752 BlockReopenQueue *reopen_queue, 2753 uint64_t perm, uint64_t shared, 2754 uint64_t *nperm, uint64_t *nshared) 2755 { 2756 GLOBAL_STATE_CODE(); 2757 *nperm = perm & DEFAULT_PERM_PASSTHROUGH; 2758 *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; 2759 } 2760 2761 static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c, 2762 BdrvChildRole role, 2763 BlockReopenQueue *reopen_queue, 2764 uint64_t perm, uint64_t shared, 2765 uint64_t *nperm, uint64_t *nshared) 2766 { 2767 assert(role & BDRV_CHILD_COW); 2768 GLOBAL_STATE_CODE(); 2769 2770 /* 2771 * We want consistent read from backing files if the parent needs it. 2772 * No other operations are performed on backing files. 2773 */ 2774 perm &= BLK_PERM_CONSISTENT_READ; 2775 2776 /* 2777 * If the parent can deal with changing data, we're okay with a 2778 * writable and resizable backing file. 2779 * TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? 2780 */ 2781 if (shared & BLK_PERM_WRITE) { 2782 shared = BLK_PERM_WRITE | BLK_PERM_RESIZE; 2783 } else { 2784 shared = 0; 2785 } 2786 2787 shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED; 2788 2789 if (bs->open_flags & BDRV_O_INACTIVE) { 2790 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2791 } 2792 2793 *nperm = perm; 2794 *nshared = shared; 2795 } 2796 2797 static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c, 2798 BdrvChildRole role, 2799 BlockReopenQueue *reopen_queue, 2800 uint64_t perm, uint64_t shared, 2801 uint64_t *nperm, uint64_t *nshared) 2802 { 2803 int flags; 2804 2805 GLOBAL_STATE_CODE(); 2806 assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)); 2807 2808 flags = bdrv_reopen_get_flags(reopen_queue, bs); 2809 2810 /* 2811 * Apart from the modifications below, the same permissions are 2812 * forwarded and left alone as for filters 2813 */ 2814 bdrv_filter_default_perms(bs, c, role, reopen_queue, 2815 perm, shared, &perm, &shared); 2816 2817 if (role & BDRV_CHILD_METADATA) { 2818 /* Format drivers may touch metadata even if the guest doesn't write */ 2819 if (bdrv_is_writable_after_reopen(bs, reopen_queue)) { 2820 perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2821 } 2822 2823 /* 2824 * bs->file always needs to be consistent because of the 2825 * metadata. We can never allow other users to resize or write 2826 * to it. 2827 */ 2828 if (!(flags & BDRV_O_NO_IO)) { 2829 perm |= BLK_PERM_CONSISTENT_READ; 2830 } 2831 shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); 2832 } 2833 2834 if (role & BDRV_CHILD_DATA) { 2835 /* 2836 * Technically, everything in this block is a subset of the 2837 * BDRV_CHILD_METADATA path taken above, and so this could 2838 * be an "else if" branch. However, that is not obvious, and 2839 * this function is not performance critical, therefore we let 2840 * this be an independent "if". 2841 */ 2842 2843 /* 2844 * We cannot allow other users to resize the file because the 2845 * format driver might have some assumptions about the size 2846 * (e.g. because it is stored in metadata, or because the file 2847 * is split into fixed-size data files). 2848 */ 2849 shared &= ~BLK_PERM_RESIZE; 2850 2851 /* 2852 * WRITE_UNCHANGED often cannot be performed as such on the 2853 * data file. For example, the qcow2 driver may still need to 2854 * write copied clusters on copy-on-read. 2855 */ 2856 if (perm & BLK_PERM_WRITE_UNCHANGED) { 2857 perm |= BLK_PERM_WRITE; 2858 } 2859 2860 /* 2861 * If the data file is written to, the format driver may 2862 * expect to be able to resize it by writing beyond the EOF. 2863 */ 2864 if (perm & BLK_PERM_WRITE) { 2865 perm |= BLK_PERM_RESIZE; 2866 } 2867 } 2868 2869 if (bs->open_flags & BDRV_O_INACTIVE) { 2870 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2871 } 2872 2873 *nperm = perm; 2874 *nshared = shared; 2875 } 2876 2877 void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, 2878 BdrvChildRole role, BlockReopenQueue *reopen_queue, 2879 uint64_t perm, uint64_t shared, 2880 uint64_t *nperm, uint64_t *nshared) 2881 { 2882 GLOBAL_STATE_CODE(); 2883 if (role & BDRV_CHILD_FILTERED) { 2884 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | 2885 BDRV_CHILD_COW))); 2886 bdrv_filter_default_perms(bs, c, role, reopen_queue, 2887 perm, shared, nperm, nshared); 2888 } else if (role & BDRV_CHILD_COW) { 2889 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA))); 2890 bdrv_default_perms_for_cow(bs, c, role, reopen_queue, 2891 perm, shared, nperm, nshared); 2892 } else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) { 2893 bdrv_default_perms_for_storage(bs, c, role, reopen_queue, 2894 perm, shared, nperm, nshared); 2895 } else { 2896 g_assert_not_reached(); 2897 } 2898 } 2899 2900 uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) 2901 { 2902 static const uint64_t permissions[] = { 2903 [BLOCK_PERMISSION_CONSISTENT_READ] = BLK_PERM_CONSISTENT_READ, 2904 [BLOCK_PERMISSION_WRITE] = BLK_PERM_WRITE, 2905 [BLOCK_PERMISSION_WRITE_UNCHANGED] = BLK_PERM_WRITE_UNCHANGED, 2906 [BLOCK_PERMISSION_RESIZE] = BLK_PERM_RESIZE, 2907 }; 2908 2909 QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX); 2910 QEMU_BUILD_BUG_ON(1UL << ARRAY_SIZE(permissions) != BLK_PERM_ALL + 1); 2911 2912 assert(qapi_perm < BLOCK_PERMISSION__MAX); 2913 2914 return permissions[qapi_perm]; 2915 } 2916 2917 /* 2918 * Replaces the node that a BdrvChild points to without updating permissions. 2919 * 2920 * If @new_bs is non-NULL, the parent of @child must already be drained through 2921 * @child. 2922 */ 2923 static void GRAPH_WRLOCK 2924 bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) 2925 { 2926 BlockDriverState *old_bs = child->bs; 2927 int new_bs_quiesce_counter; 2928 2929 assert(!child->frozen); 2930 2931 /* 2932 * If we want to change the BdrvChild to point to a drained node as its new 2933 * child->bs, we need to make sure that its new parent is drained, too. In 2934 * other words, either child->quiesce_parent must already be true or we must 2935 * be able to set it and keep the parent's quiesce_counter consistent with 2936 * that, but without polling or starting new requests (this function 2937 * guarantees that it doesn't poll, and starting new requests would be 2938 * against the invariants of drain sections). 2939 * 2940 * To keep things simple, we pick the first option (child->quiesce_parent 2941 * must already be true). We also generalise the rule a bit to make it 2942 * easier to verify in callers and more likely to be covered in test cases: 2943 * The parent must be quiesced through this child even if new_bs isn't 2944 * currently drained. 2945 * 2946 * The only exception is for callers that always pass new_bs == NULL. In 2947 * this case, we obviously never need to consider the case of a drained 2948 * new_bs, so we can keep the callers simpler by allowing them not to drain 2949 * the parent. 2950 */ 2951 assert(!new_bs || child->quiesced_parent); 2952 assert(old_bs != new_bs); 2953 GLOBAL_STATE_CODE(); 2954 2955 if (old_bs && new_bs) { 2956 assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); 2957 } 2958 2959 if (old_bs) { 2960 if (child->klass->detach) { 2961 child->klass->detach(child); 2962 } 2963 QLIST_REMOVE(child, next_parent); 2964 } 2965 2966 child->bs = new_bs; 2967 2968 if (new_bs) { 2969 QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); 2970 if (child->klass->attach) { 2971 child->klass->attach(child); 2972 } 2973 } 2974 2975 /* 2976 * If the parent was drained through this BdrvChild previously, but new_bs 2977 * is not drained, allow requests to come in only after the new node has 2978 * been attached. 2979 */ 2980 new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); 2981 if (!new_bs_quiesce_counter && child->quiesced_parent) { 2982 bdrv_parent_drained_end_single(child); 2983 } 2984 } 2985 2986 /** 2987 * Free the given @child. 2988 * 2989 * The child must be empty (i.e. `child->bs == NULL`) and it must be 2990 * unused (i.e. not in a children list). 2991 */ 2992 static void bdrv_child_free(BdrvChild *child) 2993 { 2994 assert(!child->bs); 2995 GLOBAL_STATE_CODE(); 2996 GRAPH_RDLOCK_GUARD_MAINLOOP(); 2997 2998 assert(!child->next.le_prev); /* not in children list */ 2999 3000 g_free(child->name); 3001 g_free(child); 3002 } 3003 3004 typedef struct BdrvAttachChildCommonState { 3005 BdrvChild *child; 3006 AioContext *old_parent_ctx; 3007 AioContext *old_child_ctx; 3008 } BdrvAttachChildCommonState; 3009 3010 static void GRAPH_WRLOCK bdrv_attach_child_common_abort(void *opaque) 3011 { 3012 BdrvAttachChildCommonState *s = opaque; 3013 BlockDriverState *bs = s->child->bs; 3014 3015 GLOBAL_STATE_CODE(); 3016 assert_bdrv_graph_writable(); 3017 3018 bdrv_replace_child_noperm(s->child, NULL); 3019 3020 if (bdrv_get_aio_context(bs) != s->old_child_ctx) { 3021 bdrv_try_change_aio_context(bs, s->old_child_ctx, NULL, &error_abort); 3022 } 3023 3024 if (bdrv_child_get_parent_aio_context(s->child) != s->old_parent_ctx) { 3025 Transaction *tran; 3026 GHashTable *visited; 3027 bool ret; 3028 3029 tran = tran_new(); 3030 3031 /* No need to visit `child`, because it has been detached already */ 3032 visited = g_hash_table_new(NULL, NULL); 3033 ret = s->child->klass->change_aio_ctx(s->child, s->old_parent_ctx, 3034 visited, tran, &error_abort); 3035 g_hash_table_destroy(visited); 3036 3037 /* transaction is supposed to always succeed */ 3038 assert(ret == true); 3039 tran_commit(tran); 3040 } 3041 3042 bdrv_schedule_unref(bs); 3043 bdrv_child_free(s->child); 3044 } 3045 3046 static TransactionActionDrv bdrv_attach_child_common_drv = { 3047 .abort = bdrv_attach_child_common_abort, 3048 .clean = g_free, 3049 }; 3050 3051 /* 3052 * Common part of attaching bdrv child to bs or to blk or to job 3053 * 3054 * Function doesn't update permissions, caller is responsible for this. 3055 * 3056 * After calling this function, the transaction @tran may only be completed 3057 * while holding a writer lock for the graph. 3058 * 3059 * Returns new created child. 3060 * 3061 * Both @parent_bs and @child_bs can move to a different AioContext in this 3062 * function. 3063 */ 3064 static BdrvChild * GRAPH_WRLOCK 3065 bdrv_attach_child_common(BlockDriverState *child_bs, 3066 const char *child_name, 3067 const BdrvChildClass *child_class, 3068 BdrvChildRole child_role, 3069 uint64_t perm, uint64_t shared_perm, 3070 void *opaque, 3071 Transaction *tran, Error **errp) 3072 { 3073 BdrvChild *new_child; 3074 AioContext *parent_ctx; 3075 AioContext *child_ctx = bdrv_get_aio_context(child_bs); 3076 3077 assert(child_class->get_parent_desc); 3078 GLOBAL_STATE_CODE(); 3079 3080 new_child = g_new(BdrvChild, 1); 3081 *new_child = (BdrvChild) { 3082 .bs = NULL, 3083 .name = g_strdup(child_name), 3084 .klass = child_class, 3085 .role = child_role, 3086 .perm = perm, 3087 .shared_perm = shared_perm, 3088 .opaque = opaque, 3089 }; 3090 3091 /* 3092 * If the AioContexts don't match, first try to move the subtree of 3093 * child_bs into the AioContext of the new parent. If this doesn't work, 3094 * try moving the parent into the AioContext of child_bs instead. 3095 */ 3096 parent_ctx = bdrv_child_get_parent_aio_context(new_child); 3097 if (child_ctx != parent_ctx) { 3098 Error *local_err = NULL; 3099 int ret = bdrv_try_change_aio_context(child_bs, parent_ctx, NULL, 3100 &local_err); 3101 3102 if (ret < 0 && child_class->change_aio_ctx) { 3103 Transaction *aio_ctx_tran = tran_new(); 3104 GHashTable *visited = g_hash_table_new(NULL, NULL); 3105 bool ret_child; 3106 3107 g_hash_table_add(visited, new_child); 3108 ret_child = child_class->change_aio_ctx(new_child, child_ctx, 3109 visited, aio_ctx_tran, 3110 NULL); 3111 if (ret_child == true) { 3112 error_free(local_err); 3113 ret = 0; 3114 } 3115 tran_finalize(aio_ctx_tran, ret_child == true ? 0 : -1); 3116 g_hash_table_destroy(visited); 3117 } 3118 3119 if (ret < 0) { 3120 error_propagate(errp, local_err); 3121 bdrv_child_free(new_child); 3122 return NULL; 3123 } 3124 } 3125 3126 bdrv_ref(child_bs); 3127 /* 3128 * Let every new BdrvChild start with a drained parent. Inserting the child 3129 * in the graph with bdrv_replace_child_noperm() will undrain it if 3130 * @child_bs is not drained. 3131 * 3132 * The child was only just created and is not yet visible in global state 3133 * until bdrv_replace_child_noperm() inserts it into the graph, so nobody 3134 * could have sent requests and polling is not necessary. 3135 * 3136 * Note that this means that the parent isn't fully drained yet, we only 3137 * stop new requests from coming in. This is fine, we don't care about the 3138 * old requests here, they are not for this child. If another place enters a 3139 * drain section for the same parent, but wants it to be fully quiesced, it 3140 * will not run most of the the code in .drained_begin() again (which is not 3141 * a problem, we already did this), but it will still poll until the parent 3142 * is fully quiesced, so it will not be negatively affected either. 3143 */ 3144 bdrv_parent_drained_begin_single(new_child); 3145 bdrv_replace_child_noperm(new_child, child_bs); 3146 3147 BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); 3148 *s = (BdrvAttachChildCommonState) { 3149 .child = new_child, 3150 .old_parent_ctx = parent_ctx, 3151 .old_child_ctx = child_ctx, 3152 }; 3153 tran_add(tran, &bdrv_attach_child_common_drv, s); 3154 3155 return new_child; 3156 } 3157 3158 /* 3159 * Function doesn't update permissions, caller is responsible for this. 3160 * 3161 * Both @parent_bs and @child_bs can move to a different AioContext in this 3162 * function. 3163 * 3164 * After calling this function, the transaction @tran may only be completed 3165 * while holding a writer lock for the graph. 3166 */ 3167 static BdrvChild * GRAPH_WRLOCK 3168 bdrv_attach_child_noperm(BlockDriverState *parent_bs, 3169 BlockDriverState *child_bs, 3170 const char *child_name, 3171 const BdrvChildClass *child_class, 3172 BdrvChildRole child_role, 3173 Transaction *tran, 3174 Error **errp) 3175 { 3176 uint64_t perm, shared_perm; 3177 3178 assert(parent_bs->drv); 3179 GLOBAL_STATE_CODE(); 3180 3181 if (bdrv_recurse_has_child(child_bs, parent_bs)) { 3182 error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle", 3183 child_bs->node_name, child_name, parent_bs->node_name); 3184 return NULL; 3185 } 3186 3187 bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); 3188 bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, 3189 perm, shared_perm, &perm, &shared_perm); 3190 3191 return bdrv_attach_child_common(child_bs, child_name, child_class, 3192 child_role, perm, shared_perm, parent_bs, 3193 tran, errp); 3194 } 3195 3196 /* 3197 * This function steals the reference to child_bs from the caller. 3198 * That reference is later dropped by bdrv_root_unref_child(). 3199 * 3200 * On failure NULL is returned, errp is set and the reference to 3201 * child_bs is also dropped. 3202 */ 3203 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, 3204 const char *child_name, 3205 const BdrvChildClass *child_class, 3206 BdrvChildRole child_role, 3207 uint64_t perm, uint64_t shared_perm, 3208 void *opaque, Error **errp) 3209 { 3210 int ret; 3211 BdrvChild *child; 3212 Transaction *tran = tran_new(); 3213 3214 GLOBAL_STATE_CODE(); 3215 3216 child = bdrv_attach_child_common(child_bs, child_name, child_class, 3217 child_role, perm, shared_perm, opaque, 3218 tran, errp); 3219 if (!child) { 3220 ret = -EINVAL; 3221 goto out; 3222 } 3223 3224 ret = bdrv_refresh_perms(child_bs, tran, errp); 3225 3226 out: 3227 tran_finalize(tran, ret); 3228 3229 bdrv_schedule_unref(child_bs); 3230 3231 return ret < 0 ? NULL : child; 3232 } 3233 3234 /* 3235 * This function transfers the reference to child_bs from the caller 3236 * to parent_bs. That reference is later dropped by parent_bs on 3237 * bdrv_close() or if someone calls bdrv_unref_child(). 3238 * 3239 * On failure NULL is returned, errp is set and the reference to 3240 * child_bs is also dropped. 3241 */ 3242 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, 3243 BlockDriverState *child_bs, 3244 const char *child_name, 3245 const BdrvChildClass *child_class, 3246 BdrvChildRole child_role, 3247 Error **errp) 3248 { 3249 int ret; 3250 BdrvChild *child; 3251 Transaction *tran = tran_new(); 3252 3253 GLOBAL_STATE_CODE(); 3254 3255 child = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, 3256 child_class, child_role, tran, errp); 3257 if (!child) { 3258 ret = -EINVAL; 3259 goto out; 3260 } 3261 3262 ret = bdrv_refresh_perms(parent_bs, tran, errp); 3263 if (ret < 0) { 3264 goto out; 3265 } 3266 3267 out: 3268 tran_finalize(tran, ret); 3269 3270 bdrv_schedule_unref(child_bs); 3271 3272 return ret < 0 ? NULL : child; 3273 } 3274 3275 /* Callers must ensure that child->frozen is false. */ 3276 void bdrv_root_unref_child(BdrvChild *child) 3277 { 3278 BlockDriverState *child_bs = child->bs; 3279 3280 GLOBAL_STATE_CODE(); 3281 bdrv_replace_child_noperm(child, NULL); 3282 bdrv_child_free(child); 3283 3284 if (child_bs) { 3285 /* 3286 * Update permissions for old node. We're just taking a parent away, so 3287 * we're loosening restrictions. Errors of permission update are not 3288 * fatal in this case, ignore them. 3289 */ 3290 bdrv_refresh_perms(child_bs, NULL, NULL); 3291 3292 /* 3293 * When the parent requiring a non-default AioContext is removed, the 3294 * node moves back to the main AioContext 3295 */ 3296 bdrv_try_change_aio_context(child_bs, qemu_get_aio_context(), NULL, 3297 NULL); 3298 } 3299 3300 bdrv_schedule_unref(child_bs); 3301 } 3302 3303 typedef struct BdrvSetInheritsFrom { 3304 BlockDriverState *bs; 3305 BlockDriverState *old_inherits_from; 3306 } BdrvSetInheritsFrom; 3307 3308 static void bdrv_set_inherits_from_abort(void *opaque) 3309 { 3310 BdrvSetInheritsFrom *s = opaque; 3311 3312 s->bs->inherits_from = s->old_inherits_from; 3313 } 3314 3315 static TransactionActionDrv bdrv_set_inherits_from_drv = { 3316 .abort = bdrv_set_inherits_from_abort, 3317 .clean = g_free, 3318 }; 3319 3320 /* @tran is allowed to be NULL. In this case no rollback is possible */ 3321 static void bdrv_set_inherits_from(BlockDriverState *bs, 3322 BlockDriverState *new_inherits_from, 3323 Transaction *tran) 3324 { 3325 if (tran) { 3326 BdrvSetInheritsFrom *s = g_new(BdrvSetInheritsFrom, 1); 3327 3328 *s = (BdrvSetInheritsFrom) { 3329 .bs = bs, 3330 .old_inherits_from = bs->inherits_from, 3331 }; 3332 3333 tran_add(tran, &bdrv_set_inherits_from_drv, s); 3334 } 3335 3336 bs->inherits_from = new_inherits_from; 3337 } 3338 3339 /** 3340 * Clear all inherits_from pointers from children and grandchildren of 3341 * @root that point to @root, where necessary. 3342 * @tran is allowed to be NULL. In this case no rollback is possible 3343 */ 3344 static void GRAPH_WRLOCK 3345 bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child, 3346 Transaction *tran) 3347 { 3348 BdrvChild *c; 3349 3350 if (child->bs->inherits_from == root) { 3351 /* 3352 * Remove inherits_from only when the last reference between root and 3353 * child->bs goes away. 3354 */ 3355 QLIST_FOREACH(c, &root->children, next) { 3356 if (c != child && c->bs == child->bs) { 3357 break; 3358 } 3359 } 3360 if (c == NULL) { 3361 bdrv_set_inherits_from(child->bs, NULL, tran); 3362 } 3363 } 3364 3365 QLIST_FOREACH(c, &child->bs->children, next) { 3366 bdrv_unset_inherits_from(root, c, tran); 3367 } 3368 } 3369 3370 /* Callers must ensure that child->frozen is false. */ 3371 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) 3372 { 3373 GLOBAL_STATE_CODE(); 3374 if (child == NULL) { 3375 return; 3376 } 3377 3378 bdrv_unset_inherits_from(parent, child, NULL); 3379 bdrv_root_unref_child(child); 3380 } 3381 3382 3383 static void GRAPH_RDLOCK 3384 bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) 3385 { 3386 BdrvChild *c; 3387 GLOBAL_STATE_CODE(); 3388 QLIST_FOREACH(c, &bs->parents, next_parent) { 3389 if (c->klass->change_media) { 3390 c->klass->change_media(c, load); 3391 } 3392 } 3393 } 3394 3395 /* Return true if you can reach parent going through child->inherits_from 3396 * recursively. If parent or child are NULL, return false */ 3397 static bool bdrv_inherits_from_recursive(BlockDriverState *child, 3398 BlockDriverState *parent) 3399 { 3400 while (child && child != parent) { 3401 child = child->inherits_from; 3402 } 3403 3404 return child != NULL; 3405 } 3406 3407 /* 3408 * Return the BdrvChildRole for @bs's backing child. bs->backing is 3409 * mostly used for COW backing children (role = COW), but also for 3410 * filtered children (role = FILTERED | PRIMARY). 3411 */ 3412 static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) 3413 { 3414 if (bs->drv && bs->drv->is_filter) { 3415 return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY; 3416 } else { 3417 return BDRV_CHILD_COW; 3418 } 3419 } 3420 3421 /* 3422 * Sets the bs->backing or bs->file link of a BDS. A new reference is created; 3423 * callers which don't need their own reference any more must call bdrv_unref(). 3424 * 3425 * If the respective child is already present (i.e. we're detaching a node), 3426 * that child node must be drained. 3427 * 3428 * Function doesn't update permissions, caller is responsible for this. 3429 * 3430 * Both @parent_bs and @child_bs can move to a different AioContext in this 3431 * function. 3432 * 3433 * After calling this function, the transaction @tran may only be completed 3434 * while holding a writer lock for the graph. 3435 */ 3436 static int GRAPH_WRLOCK 3437 bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, 3438 BlockDriverState *child_bs, 3439 bool is_backing, 3440 Transaction *tran, Error **errp) 3441 { 3442 bool update_inherits_from = 3443 bdrv_inherits_from_recursive(child_bs, parent_bs); 3444 BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file; 3445 BdrvChildRole role; 3446 3447 GLOBAL_STATE_CODE(); 3448 3449 if (!parent_bs->drv) { 3450 /* 3451 * Node without drv is an object without a class :/. TODO: finally fix 3452 * qcow2 driver to never clear bs->drv and implement format corruption 3453 * handling in other way. 3454 */ 3455 error_setg(errp, "Node corrupted"); 3456 return -EINVAL; 3457 } 3458 3459 if (child && child->frozen) { 3460 error_setg(errp, "Cannot change frozen '%s' link from '%s' to '%s'", 3461 child->name, parent_bs->node_name, child->bs->node_name); 3462 return -EPERM; 3463 } 3464 3465 if (is_backing && !parent_bs->drv->is_filter && 3466 !parent_bs->drv->supports_backing) 3467 { 3468 error_setg(errp, "Driver '%s' of node '%s' does not support backing " 3469 "files", parent_bs->drv->format_name, parent_bs->node_name); 3470 return -EINVAL; 3471 } 3472 3473 if (parent_bs->drv->is_filter) { 3474 role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY; 3475 } else if (is_backing) { 3476 role = BDRV_CHILD_COW; 3477 } else { 3478 /* 3479 * We only can use same role as it is in existing child. We don't have 3480 * infrastructure to determine role of file child in generic way 3481 */ 3482 if (!child) { 3483 error_setg(errp, "Cannot set file child to format node without " 3484 "file child"); 3485 return -EINVAL; 3486 } 3487 role = child->role; 3488 } 3489 3490 if (child) { 3491 assert(child->bs->quiesce_counter); 3492 bdrv_unset_inherits_from(parent_bs, child, tran); 3493 bdrv_remove_child(child, tran); 3494 } 3495 3496 if (!child_bs) { 3497 goto out; 3498 } 3499 3500 child = bdrv_attach_child_noperm(parent_bs, child_bs, 3501 is_backing ? "backing" : "file", 3502 &child_of_bds, role, 3503 tran, errp); 3504 if (!child) { 3505 return -EINVAL; 3506 } 3507 3508 3509 /* 3510 * If inherits_from pointed recursively to bs then let's update it to 3511 * point directly to bs (else it will become NULL). 3512 */ 3513 if (update_inherits_from) { 3514 bdrv_set_inherits_from(child_bs, parent_bs, tran); 3515 } 3516 3517 out: 3518 bdrv_refresh_limits(parent_bs, tran, NULL); 3519 3520 return 0; 3521 } 3522 3523 /* 3524 * Both @bs and @backing_hd can move to a different AioContext in this 3525 * function. 3526 * 3527 * If a backing child is already present (i.e. we're detaching a node), that 3528 * child node must be drained. 3529 */ 3530 int bdrv_set_backing_hd_drained(BlockDriverState *bs, 3531 BlockDriverState *backing_hd, 3532 Error **errp) 3533 { 3534 int ret; 3535 Transaction *tran = tran_new(); 3536 3537 GLOBAL_STATE_CODE(); 3538 assert(bs->quiesce_counter > 0); 3539 if (bs->backing) { 3540 assert(bs->backing->bs->quiesce_counter > 0); 3541 } 3542 3543 ret = bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); 3544 if (ret < 0) { 3545 goto out; 3546 } 3547 3548 ret = bdrv_refresh_perms(bs, tran, errp); 3549 out: 3550 tran_finalize(tran, ret); 3551 return ret; 3552 } 3553 3554 int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, 3555 Error **errp) 3556 { 3557 BlockDriverState *drain_bs; 3558 int ret; 3559 GLOBAL_STATE_CODE(); 3560 3561 bdrv_graph_rdlock_main_loop(); 3562 drain_bs = bs->backing ? bs->backing->bs : bs; 3563 bdrv_graph_rdunlock_main_loop(); 3564 3565 bdrv_ref(drain_bs); 3566 bdrv_drained_begin(drain_bs); 3567 bdrv_graph_wrlock(); 3568 ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); 3569 bdrv_graph_wrunlock(); 3570 bdrv_drained_end(drain_bs); 3571 bdrv_unref(drain_bs); 3572 3573 return ret; 3574 } 3575 3576 /* 3577 * Opens the backing file for a BlockDriverState if not yet open 3578 * 3579 * bdref_key specifies the key for the image's BlockdevRef in the options QDict. 3580 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 3581 * itself, all options starting with "${bdref_key}." are considered part of the 3582 * BlockdevRef. 3583 * 3584 * TODO Can this be unified with bdrv_open_image()? 3585 */ 3586 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, 3587 const char *bdref_key, Error **errp) 3588 { 3589 ERRP_GUARD(); 3590 char *backing_filename = NULL; 3591 char *bdref_key_dot; 3592 const char *reference = NULL; 3593 int ret = 0; 3594 bool implicit_backing = false; 3595 BlockDriverState *backing_hd; 3596 QDict *options; 3597 QDict *tmp_parent_options = NULL; 3598 Error *local_err = NULL; 3599 3600 GLOBAL_STATE_CODE(); 3601 GRAPH_RDLOCK_GUARD_MAINLOOP(); 3602 3603 if (bs->backing != NULL) { 3604 goto free_exit; 3605 } 3606 3607 /* NULL means an empty set of options */ 3608 if (parent_options == NULL) { 3609 tmp_parent_options = qdict_new(); 3610 parent_options = tmp_parent_options; 3611 } 3612 3613 bs->open_flags &= ~BDRV_O_NO_BACKING; 3614 3615 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 3616 qdict_extract_subqdict(parent_options, &options, bdref_key_dot); 3617 g_free(bdref_key_dot); 3618 3619 /* 3620 * Caution: while qdict_get_try_str() is fine, getting non-string 3621 * types would require more care. When @parent_options come from 3622 * -blockdev or blockdev_add, its members are typed according to 3623 * the QAPI schema, but when they come from -drive, they're all 3624 * QString. 3625 */ 3626 reference = qdict_get_try_str(parent_options, bdref_key); 3627 if (reference || qdict_haskey(options, "file.filename")) { 3628 /* keep backing_filename NULL */ 3629 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 3630 qobject_unref(options); 3631 goto free_exit; 3632 } else { 3633 if (qdict_size(options) == 0) { 3634 /* If the user specifies options that do not modify the 3635 * backing file's behavior, we might still consider it the 3636 * implicit backing file. But it's easier this way, and 3637 * just specifying some of the backing BDS's options is 3638 * only possible with -drive anyway (otherwise the QAPI 3639 * schema forces the user to specify everything). */ 3640 implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file); 3641 } 3642 3643 backing_filename = bdrv_get_full_backing_filename(bs, &local_err); 3644 if (local_err) { 3645 ret = -EINVAL; 3646 error_propagate(errp, local_err); 3647 qobject_unref(options); 3648 goto free_exit; 3649 } 3650 } 3651 3652 if (!bs->drv || !bs->drv->supports_backing) { 3653 ret = -EINVAL; 3654 error_setg(errp, "Driver doesn't support backing files"); 3655 qobject_unref(options); 3656 goto free_exit; 3657 } 3658 3659 if (!reference && 3660 bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 3661 qdict_put_str(options, "driver", bs->backing_format); 3662 } 3663 3664 backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs, 3665 &child_of_bds, bdrv_backing_role(bs), true, 3666 errp); 3667 if (!backing_hd) { 3668 bs->open_flags |= BDRV_O_NO_BACKING; 3669 error_prepend(errp, "Could not open backing file: "); 3670 ret = -EINVAL; 3671 goto free_exit; 3672 } 3673 3674 if (implicit_backing) { 3675 bdrv_refresh_filename(backing_hd); 3676 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), 3677 backing_hd->filename); 3678 } 3679 3680 /* Hook up the backing file link; drop our reference, bs owns the 3681 * backing_hd reference now */ 3682 ret = bdrv_set_backing_hd(bs, backing_hd, errp); 3683 bdrv_unref(backing_hd); 3684 3685 if (ret < 0) { 3686 goto free_exit; 3687 } 3688 3689 qdict_del(parent_options, bdref_key); 3690 3691 free_exit: 3692 g_free(backing_filename); 3693 qobject_unref(tmp_parent_options); 3694 return ret; 3695 } 3696 3697 static BlockDriverState * 3698 bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, 3699 BlockDriverState *parent, const BdrvChildClass *child_class, 3700 BdrvChildRole child_role, bool allow_none, 3701 bool parse_filename, Error **errp) 3702 { 3703 BlockDriverState *bs = NULL; 3704 QDict *image_options; 3705 char *bdref_key_dot; 3706 const char *reference; 3707 3708 assert(child_class != NULL); 3709 3710 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 3711 qdict_extract_subqdict(options, &image_options, bdref_key_dot); 3712 g_free(bdref_key_dot); 3713 3714 /* 3715 * Caution: while qdict_get_try_str() is fine, getting non-string 3716 * types would require more care. When @options come from 3717 * -blockdev or blockdev_add, its members are typed according to 3718 * the QAPI schema, but when they come from -drive, they're all 3719 * QString. 3720 */ 3721 reference = qdict_get_try_str(options, bdref_key); 3722 if (!filename && !reference && !qdict_size(image_options)) { 3723 if (!allow_none) { 3724 error_setg(errp, "A block device must be specified for \"%s\"", 3725 bdref_key); 3726 } 3727 qobject_unref(image_options); 3728 goto done; 3729 } 3730 3731 bs = bdrv_open_inherit(filename, reference, image_options, 0, 3732 parent, child_class, child_role, parse_filename, 3733 errp); 3734 if (!bs) { 3735 goto done; 3736 } 3737 3738 done: 3739 qdict_del(options, bdref_key); 3740 return bs; 3741 } 3742 3743 static BdrvChild *bdrv_open_child_common(const char *filename, 3744 QDict *options, const char *bdref_key, 3745 BlockDriverState *parent, 3746 const BdrvChildClass *child_class, 3747 BdrvChildRole child_role, 3748 bool allow_none, bool parse_filename, 3749 Error **errp) 3750 { 3751 BlockDriverState *bs; 3752 BdrvChild *child; 3753 3754 GLOBAL_STATE_CODE(); 3755 3756 bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, 3757 child_role, allow_none, parse_filename, errp); 3758 if (bs == NULL) { 3759 return NULL; 3760 } 3761 3762 bdrv_graph_wrlock(); 3763 child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, 3764 errp); 3765 bdrv_graph_wrunlock(); 3766 3767 return child; 3768 } 3769 3770 /* 3771 * Opens a disk image whose options are given as BlockdevRef in another block 3772 * device's options. 3773 * 3774 * If allow_none is true, no image will be opened if filename is false and no 3775 * BlockdevRef is given. NULL will be returned, but errp remains unset. 3776 * 3777 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 3778 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 3779 * itself, all options starting with "${bdref_key}." are considered part of the 3780 * BlockdevRef. 3781 * 3782 * The BlockdevRef will be removed from the options QDict. 3783 * 3784 * @parent can move to a different AioContext in this function. 3785 */ 3786 BdrvChild *bdrv_open_child(const char *filename, 3787 QDict *options, const char *bdref_key, 3788 BlockDriverState *parent, 3789 const BdrvChildClass *child_class, 3790 BdrvChildRole child_role, 3791 bool allow_none, Error **errp) 3792 { 3793 return bdrv_open_child_common(filename, options, bdref_key, parent, 3794 child_class, child_role, allow_none, false, 3795 errp); 3796 } 3797 3798 /* 3799 * This does mostly the same as bdrv_open_child(), but for opening the primary 3800 * child of a node. A notable difference from bdrv_open_child() is that it 3801 * enables filename parsing for protocol names (including json:). 3802 * 3803 * @parent can move to a different AioContext in this function. 3804 */ 3805 int bdrv_open_file_child(const char *filename, 3806 QDict *options, const char *bdref_key, 3807 BlockDriverState *parent, Error **errp) 3808 { 3809 BdrvChildRole role; 3810 3811 /* commit_top and mirror_top don't use this function */ 3812 assert(!parent->drv->filtered_child_is_backing); 3813 role = parent->drv->is_filter ? 3814 (BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE; 3815 3816 if (!bdrv_open_child_common(filename, options, bdref_key, parent, 3817 &child_of_bds, role, false, true, errp)) 3818 { 3819 return -EINVAL; 3820 } 3821 3822 return 0; 3823 } 3824 3825 /* 3826 * TODO Future callers may need to specify parent/child_class in order for 3827 * option inheritance to work. Existing callers use it for the root node. 3828 */ 3829 BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) 3830 { 3831 BlockDriverState *bs = NULL; 3832 QObject *obj = NULL; 3833 QDict *qdict = NULL; 3834 const char *reference = NULL; 3835 Visitor *v = NULL; 3836 3837 GLOBAL_STATE_CODE(); 3838 3839 if (ref->type == QTYPE_QSTRING) { 3840 reference = ref->u.reference; 3841 } else { 3842 BlockdevOptions *options = &ref->u.definition; 3843 assert(ref->type == QTYPE_QDICT); 3844 3845 v = qobject_output_visitor_new(&obj); 3846 visit_type_BlockdevOptions(v, NULL, &options, &error_abort); 3847 visit_complete(v, &obj); 3848 3849 qdict = qobject_to(QDict, obj); 3850 qdict_flatten(qdict); 3851 3852 /* bdrv_open_inherit() defaults to the values in bdrv_flags (for 3853 * compatibility with other callers) rather than what we want as the 3854 * real defaults. Apply the defaults here instead. */ 3855 qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off"); 3856 qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off"); 3857 qdict_set_default_str(qdict, BDRV_OPT_READ_ONLY, "off"); 3858 qdict_set_default_str(qdict, BDRV_OPT_AUTO_READ_ONLY, "off"); 3859 3860 } 3861 3862 bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, false, 3863 errp); 3864 obj = NULL; 3865 qobject_unref(obj); 3866 visit_free(v); 3867 return bs; 3868 } 3869 3870 static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, 3871 int flags, 3872 QDict *snapshot_options, 3873 Error **errp) 3874 { 3875 ERRP_GUARD(); 3876 g_autofree char *tmp_filename = NULL; 3877 int64_t total_size; 3878 QemuOpts *opts = NULL; 3879 BlockDriverState *bs_snapshot = NULL; 3880 int ret; 3881 3882 GLOBAL_STATE_CODE(); 3883 3884 /* if snapshot, we create a temporary backing file and open it 3885 instead of opening 'filename' directly */ 3886 3887 /* Get the required size from the image */ 3888 total_size = bdrv_getlength(bs); 3889 3890 if (total_size < 0) { 3891 error_setg_errno(errp, -total_size, "Could not get image size"); 3892 goto out; 3893 } 3894 3895 /* Create the temporary image */ 3896 tmp_filename = create_tmp_file(errp); 3897 if (!tmp_filename) { 3898 goto out; 3899 } 3900 3901 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 3902 &error_abort); 3903 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 3904 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp); 3905 qemu_opts_del(opts); 3906 if (ret < 0) { 3907 error_prepend(errp, "Could not create temporary overlay '%s': ", 3908 tmp_filename); 3909 goto out; 3910 } 3911 3912 /* Prepare options QDict for the temporary file */ 3913 qdict_put_str(snapshot_options, "file.driver", "file"); 3914 qdict_put_str(snapshot_options, "file.filename", tmp_filename); 3915 qdict_put_str(snapshot_options, "driver", "qcow2"); 3916 3917 bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp); 3918 snapshot_options = NULL; 3919 if (!bs_snapshot) { 3920 goto out; 3921 } 3922 3923 ret = bdrv_append(bs_snapshot, bs, errp); 3924 if (ret < 0) { 3925 bs_snapshot = NULL; 3926 goto out; 3927 } 3928 3929 out: 3930 qobject_unref(snapshot_options); 3931 return bs_snapshot; 3932 } 3933 3934 /* 3935 * Opens a disk image (raw, qcow2, vmdk, ...) 3936 * 3937 * options is a QDict of options to pass to the block drivers, or NULL for an 3938 * empty set of options. The reference to the QDict belongs to the block layer 3939 * after the call (even on failure), so if the caller intends to reuse the 3940 * dictionary, it needs to use qobject_ref() before calling bdrv_open. 3941 * 3942 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 3943 * If it is not NULL, the referenced BDS will be reused. 3944 * 3945 * The reference parameter may be used to specify an existing block device which 3946 * should be opened. If specified, neither options nor a filename may be given, 3947 * nor can an existing BDS be reused (that is, *pbs has to be NULL). 3948 */ 3949 static BlockDriverState * no_coroutine_fn 3950 bdrv_open_inherit(const char *filename, const char *reference, QDict *options, 3951 int flags, BlockDriverState *parent, 3952 const BdrvChildClass *child_class, BdrvChildRole child_role, 3953 bool parse_filename, Error **errp) 3954 { 3955 int ret; 3956 BlockBackend *file = NULL; 3957 BlockDriverState *bs; 3958 BlockDriver *drv = NULL; 3959 BdrvChild *child; 3960 const char *drvname; 3961 const char *backing; 3962 Error *local_err = NULL; 3963 QDict *snapshot_options = NULL; 3964 int snapshot_flags = 0; 3965 3966 assert(!child_class || !flags); 3967 assert(!child_class == !parent); 3968 GLOBAL_STATE_CODE(); 3969 assert(!qemu_in_coroutine()); 3970 3971 /* TODO We'll eventually have to take a writer lock in this function */ 3972 GRAPH_RDLOCK_GUARD_MAINLOOP(); 3973 3974 if (reference) { 3975 bool options_non_empty = options ? qdict_size(options) : false; 3976 qobject_unref(options); 3977 3978 if (filename || options_non_empty) { 3979 error_setg(errp, "Cannot reference an existing block device with " 3980 "additional options or a new filename"); 3981 return NULL; 3982 } 3983 3984 bs = bdrv_lookup_bs(reference, reference, errp); 3985 if (!bs) { 3986 return NULL; 3987 } 3988 3989 bdrv_ref(bs); 3990 return bs; 3991 } 3992 3993 bs = bdrv_new(); 3994 3995 /* NULL means an empty set of options */ 3996 if (options == NULL) { 3997 options = qdict_new(); 3998 } 3999 4000 /* json: syntax counts as explicit options, as if in the QDict */ 4001 if (parse_filename) { 4002 parse_json_protocol(options, &filename, &local_err); 4003 if (local_err) { 4004 goto fail; 4005 } 4006 } 4007 4008 bs->explicit_options = qdict_clone_shallow(options); 4009 4010 if (child_class) { 4011 bool parent_is_format; 4012 4013 if (parent->drv) { 4014 parent_is_format = parent->drv->is_format; 4015 } else { 4016 /* 4017 * parent->drv is not set yet because this node is opened for 4018 * (potential) format probing. That means that @parent is going 4019 * to be a format node. 4020 */ 4021 parent_is_format = true; 4022 } 4023 4024 bs->inherits_from = parent; 4025 child_class->inherit_options(child_role, parent_is_format, 4026 &flags, options, 4027 parent->open_flags, parent->options); 4028 } 4029 4030 ret = bdrv_fill_options(&options, filename, &flags, parse_filename, 4031 &local_err); 4032 if (ret < 0) { 4033 goto fail; 4034 } 4035 4036 /* 4037 * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags. 4038 * Caution: getting a boolean member of @options requires care. 4039 * When @options come from -blockdev or blockdev_add, members are 4040 * typed according to the QAPI schema, but when they come from 4041 * -drive, they're all QString. 4042 */ 4043 if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") && 4044 !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) { 4045 flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR); 4046 } else { 4047 flags &= ~BDRV_O_RDWR; 4048 } 4049 4050 if (flags & BDRV_O_SNAPSHOT) { 4051 snapshot_options = qdict_new(); 4052 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options, 4053 flags, options); 4054 /* Let bdrv_backing_options() override "read-only" */ 4055 qdict_del(options, BDRV_OPT_READ_ONLY); 4056 bdrv_inherited_options(BDRV_CHILD_COW, true, 4057 &flags, options, flags, options); 4058 } 4059 4060 bs->open_flags = flags; 4061 bs->options = options; 4062 options = qdict_clone_shallow(options); 4063 4064 /* Find the right image format driver */ 4065 /* See cautionary note on accessing @options above */ 4066 drvname = qdict_get_try_str(options, "driver"); 4067 if (drvname) { 4068 drv = bdrv_find_format(drvname); 4069 if (!drv) { 4070 error_setg(errp, "Unknown driver: '%s'", drvname); 4071 goto fail; 4072 } 4073 } 4074 4075 assert(drvname || !(flags & BDRV_O_PROTOCOL)); 4076 4077 /* See cautionary note on accessing @options above */ 4078 backing = qdict_get_try_str(options, "backing"); 4079 if (qobject_to(QNull, qdict_get(options, "backing")) != NULL || 4080 (backing && *backing == '\0')) 4081 { 4082 if (backing) { 4083 warn_report("Use of \"backing\": \"\" is deprecated; " 4084 "use \"backing\": null instead"); 4085 } 4086 flags |= BDRV_O_NO_BACKING; 4087 qdict_del(bs->explicit_options, "backing"); 4088 qdict_del(bs->options, "backing"); 4089 qdict_del(options, "backing"); 4090 } 4091 4092 /* Open image file without format layer. This BlockBackend is only used for 4093 * probing, the block drivers will do their own bdrv_open_child() for the 4094 * same BDS, which is why we put the node name back into options. */ 4095 if ((flags & BDRV_O_PROTOCOL) == 0) { 4096 BlockDriverState *file_bs; 4097 4098 file_bs = bdrv_open_child_bs(filename, options, "file", bs, 4099 &child_of_bds, BDRV_CHILD_IMAGE, 4100 true, true, &local_err); 4101 if (local_err) { 4102 goto fail; 4103 } 4104 if (file_bs != NULL) { 4105 /* Not requesting BLK_PERM_CONSISTENT_READ because we're only 4106 * looking at the header to guess the image format. This works even 4107 * in cases where a guest would not see a consistent state. */ 4108 AioContext *ctx = bdrv_get_aio_context(file_bs); 4109 file = blk_new(ctx, 0, BLK_PERM_ALL); 4110 blk_insert_bs(file, file_bs, &local_err); 4111 bdrv_unref(file_bs); 4112 4113 if (local_err) { 4114 goto fail; 4115 } 4116 4117 qdict_put_str(options, "file", bdrv_get_node_name(file_bs)); 4118 } 4119 } 4120 4121 /* Image format probing */ 4122 bs->probed = !drv; 4123 if (!drv && file) { 4124 ret = find_image_format(file, filename, &drv, &local_err); 4125 if (ret < 0) { 4126 goto fail; 4127 } 4128 /* 4129 * This option update would logically belong in bdrv_fill_options(), 4130 * but we first need to open bs->file for the probing to work, while 4131 * opening bs->file already requires the (mostly) final set of options 4132 * so that cache mode etc. can be inherited. 4133 * 4134 * Adding the driver later is somewhat ugly, but it's not an option 4135 * that would ever be inherited, so it's correct. We just need to make 4136 * sure to update both bs->options (which has the full effective 4137 * options for bs) and options (which has file.* already removed). 4138 */ 4139 qdict_put_str(bs->options, "driver", drv->format_name); 4140 qdict_put_str(options, "driver", drv->format_name); 4141 } else if (!drv) { 4142 error_setg(errp, "Must specify either driver or file"); 4143 goto fail; 4144 } 4145 4146 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ 4147 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->protocol_name); 4148 /* file must be NULL if a protocol BDS is about to be created 4149 * (the inverse results in an error message from bdrv_open_common()) */ 4150 assert(!(flags & BDRV_O_PROTOCOL) || !file); 4151 4152 /* Open the image */ 4153 ret = bdrv_open_common(bs, file, options, &local_err); 4154 if (ret < 0) { 4155 goto fail; 4156 } 4157 4158 if (file) { 4159 blk_unref(file); 4160 file = NULL; 4161 } 4162 4163 /* If there is a backing file, use it */ 4164 if ((flags & BDRV_O_NO_BACKING) == 0) { 4165 ret = bdrv_open_backing_file(bs, options, "backing", &local_err); 4166 if (ret < 0) { 4167 goto close_and_fail; 4168 } 4169 } 4170 4171 /* Remove all children options and references 4172 * from bs->options and bs->explicit_options */ 4173 QLIST_FOREACH(child, &bs->children, next) { 4174 char *child_key_dot; 4175 child_key_dot = g_strdup_printf("%s.", child->name); 4176 qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot); 4177 qdict_extract_subqdict(bs->options, NULL, child_key_dot); 4178 qdict_del(bs->explicit_options, child->name); 4179 qdict_del(bs->options, child->name); 4180 g_free(child_key_dot); 4181 } 4182 4183 /* Check if any unknown options were used */ 4184 if (qdict_size(options) != 0) { 4185 const QDictEntry *entry = qdict_first(options); 4186 if (flags & BDRV_O_PROTOCOL) { 4187 error_setg(errp, "Block protocol '%s' doesn't support the option " 4188 "'%s'", drv->format_name, entry->key); 4189 } else { 4190 error_setg(errp, 4191 "Block format '%s' does not support the option '%s'", 4192 drv->format_name, entry->key); 4193 } 4194 4195 goto close_and_fail; 4196 } 4197 4198 bdrv_parent_cb_change_media(bs, true); 4199 4200 qobject_unref(options); 4201 options = NULL; 4202 4203 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 4204 * temporary snapshot afterwards. */ 4205 if (snapshot_flags) { 4206 BlockDriverState *snapshot_bs; 4207 snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags, 4208 snapshot_options, &local_err); 4209 snapshot_options = NULL; 4210 if (local_err) { 4211 goto close_and_fail; 4212 } 4213 /* We are not going to return bs but the overlay on top of it 4214 * (snapshot_bs); thus, we have to drop the strong reference to bs 4215 * (which we obtained by calling bdrv_new()). bs will not be deleted, 4216 * though, because the overlay still has a reference to it. */ 4217 bdrv_unref(bs); 4218 bs = snapshot_bs; 4219 } 4220 4221 return bs; 4222 4223 fail: 4224 blk_unref(file); 4225 qobject_unref(snapshot_options); 4226 qobject_unref(bs->explicit_options); 4227 qobject_unref(bs->options); 4228 qobject_unref(options); 4229 bs->options = NULL; 4230 bs->explicit_options = NULL; 4231 bdrv_unref(bs); 4232 error_propagate(errp, local_err); 4233 return NULL; 4234 4235 close_and_fail: 4236 bdrv_unref(bs); 4237 qobject_unref(snapshot_options); 4238 qobject_unref(options); 4239 error_propagate(errp, local_err); 4240 return NULL; 4241 } 4242 4243 BlockDriverState *bdrv_open(const char *filename, const char *reference, 4244 QDict *options, int flags, Error **errp) 4245 { 4246 GLOBAL_STATE_CODE(); 4247 4248 return bdrv_open_inherit(filename, reference, options, flags, NULL, 4249 NULL, 0, true, errp); 4250 } 4251 4252 /* Return true if the NULL-terminated @list contains @str */ 4253 static bool is_str_in_list(const char *str, const char *const *list) 4254 { 4255 if (str && list) { 4256 int i; 4257 for (i = 0; list[i] != NULL; i++) { 4258 if (!strcmp(str, list[i])) { 4259 return true; 4260 } 4261 } 4262 } 4263 return false; 4264 } 4265 4266 /* 4267 * Check that every option set in @bs->options is also set in 4268 * @new_opts. 4269 * 4270 * Options listed in the common_options list and in 4271 * @bs->drv->mutable_opts are skipped. 4272 * 4273 * Return 0 on success, otherwise return -EINVAL and set @errp. 4274 */ 4275 static int bdrv_reset_options_allowed(BlockDriverState *bs, 4276 const QDict *new_opts, Error **errp) 4277 { 4278 const QDictEntry *e; 4279 /* These options are common to all block drivers and are handled 4280 * in bdrv_reopen_prepare() so they can be left out of @new_opts */ 4281 const char *const common_options[] = { 4282 "node-name", "discard", "cache.direct", "cache.no-flush", 4283 "read-only", "auto-read-only", "detect-zeroes", NULL 4284 }; 4285 4286 for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) { 4287 if (!qdict_haskey(new_opts, e->key) && 4288 !is_str_in_list(e->key, common_options) && 4289 !is_str_in_list(e->key, bs->drv->mutable_opts)) { 4290 error_setg(errp, "Option '%s' cannot be reset " 4291 "to its default value", e->key); 4292 return -EINVAL; 4293 } 4294 } 4295 4296 return 0; 4297 } 4298 4299 /* 4300 * Returns true if @child can be reached recursively from @bs 4301 */ 4302 static bool GRAPH_RDLOCK 4303 bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child) 4304 { 4305 BdrvChild *c; 4306 4307 if (bs == child) { 4308 return true; 4309 } 4310 4311 QLIST_FOREACH(c, &bs->children, next) { 4312 if (bdrv_recurse_has_child(c->bs, child)) { 4313 return true; 4314 } 4315 } 4316 4317 return false; 4318 } 4319 4320 /* 4321 * Adds a BlockDriverState to a simple queue for an atomic, transactional 4322 * reopen of multiple devices. 4323 * 4324 * bs_queue can either be an existing BlockReopenQueue that has had QTAILQ_INIT 4325 * already performed, or alternatively may be NULL a new BlockReopenQueue will 4326 * be created and initialized. This newly created BlockReopenQueue should be 4327 * passed back in for subsequent calls that are intended to be of the same 4328 * atomic 'set'. 4329 * 4330 * bs is the BlockDriverState to add to the reopen queue. 4331 * 4332 * options contains the changed options for the associated bs 4333 * (the BlockReopenQueue takes ownership) 4334 * 4335 * flags contains the open flags for the associated bs 4336 * 4337 * returns a pointer to bs_queue, which is either the newly allocated 4338 * bs_queue, or the existing bs_queue being used. 4339 * 4340 * bs is drained here and undrained by bdrv_reopen_queue_free(). 4341 * 4342 * To be called with bs->aio_context locked. 4343 */ 4344 static BlockReopenQueue * GRAPH_RDLOCK 4345 bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, 4346 QDict *options, const BdrvChildClass *klass, 4347 BdrvChildRole role, bool parent_is_format, 4348 QDict *parent_options, int parent_flags, 4349 bool keep_old_opts) 4350 { 4351 assert(bs != NULL); 4352 4353 BlockReopenQueueEntry *bs_entry; 4354 BdrvChild *child; 4355 QDict *old_options, *explicit_options, *options_copy; 4356 int flags; 4357 QemuOpts *opts; 4358 4359 GLOBAL_STATE_CODE(); 4360 4361 /* 4362 * Strictly speaking, draining is illegal under GRAPH_RDLOCK. We know that 4363 * we've been called with bdrv_graph_rdlock_main_loop(), though, so it's ok 4364 * in practice. 4365 */ 4366 bdrv_drained_begin(bs); 4367 4368 if (bs_queue == NULL) { 4369 bs_queue = g_new0(BlockReopenQueue, 1); 4370 QTAILQ_INIT(bs_queue); 4371 } 4372 4373 if (!options) { 4374 options = qdict_new(); 4375 } 4376 4377 /* Check if this BlockDriverState is already in the queue */ 4378 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4379 if (bs == bs_entry->state.bs) { 4380 break; 4381 } 4382 } 4383 4384 /* 4385 * Precedence of options: 4386 * 1. Explicitly passed in options (highest) 4387 * 2. Retained from explicitly set options of bs 4388 * 3. Inherited from parent node 4389 * 4. Retained from effective options of bs 4390 */ 4391 4392 /* Old explicitly set values (don't overwrite by inherited value) */ 4393 if (bs_entry || keep_old_opts) { 4394 old_options = qdict_clone_shallow(bs_entry ? 4395 bs_entry->state.explicit_options : 4396 bs->explicit_options); 4397 bdrv_join_options(bs, options, old_options); 4398 qobject_unref(old_options); 4399 } 4400 4401 explicit_options = qdict_clone_shallow(options); 4402 4403 /* Inherit from parent node */ 4404 if (parent_options) { 4405 flags = 0; 4406 klass->inherit_options(role, parent_is_format, &flags, options, 4407 parent_flags, parent_options); 4408 } else { 4409 flags = bdrv_get_flags(bs); 4410 } 4411 4412 if (keep_old_opts) { 4413 /* Old values are used for options that aren't set yet */ 4414 old_options = qdict_clone_shallow(bs->options); 4415 bdrv_join_options(bs, options, old_options); 4416 qobject_unref(old_options); 4417 } 4418 4419 /* We have the final set of options so let's update the flags */ 4420 options_copy = qdict_clone_shallow(options); 4421 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 4422 qemu_opts_absorb_qdict(opts, options_copy, NULL); 4423 update_flags_from_options(&flags, opts); 4424 qemu_opts_del(opts); 4425 qobject_unref(options_copy); 4426 4427 /* bdrv_open_inherit() sets and clears some additional flags internally */ 4428 flags &= ~BDRV_O_PROTOCOL; 4429 if (flags & BDRV_O_RDWR) { 4430 flags |= BDRV_O_ALLOW_RDWR; 4431 } 4432 4433 if (!bs_entry) { 4434 bs_entry = g_new0(BlockReopenQueueEntry, 1); 4435 QTAILQ_INSERT_TAIL(bs_queue, bs_entry, entry); 4436 } else { 4437 qobject_unref(bs_entry->state.options); 4438 qobject_unref(bs_entry->state.explicit_options); 4439 } 4440 4441 bs_entry->state.bs = bs; 4442 bs_entry->state.options = options; 4443 bs_entry->state.explicit_options = explicit_options; 4444 bs_entry->state.flags = flags; 4445 4446 /* 4447 * If keep_old_opts is false then it means that unspecified 4448 * options must be reset to their original value. We don't allow 4449 * resetting 'backing' but we need to know if the option is 4450 * missing in order to decide if we have to return an error. 4451 */ 4452 if (!keep_old_opts) { 4453 bs_entry->state.backing_missing = 4454 !qdict_haskey(options, "backing") && 4455 !qdict_haskey(options, "backing.driver"); 4456 } 4457 4458 QLIST_FOREACH(child, &bs->children, next) { 4459 QDict *new_child_options = NULL; 4460 bool child_keep_old = keep_old_opts; 4461 4462 /* reopen can only change the options of block devices that were 4463 * implicitly created and inherited options. For other (referenced) 4464 * block devices, a syntax like "backing.foo" results in an error. */ 4465 if (child->bs->inherits_from != bs) { 4466 continue; 4467 } 4468 4469 /* Check if the options contain a child reference */ 4470 if (qdict_haskey(options, child->name)) { 4471 const char *childref = qdict_get_try_str(options, child->name); 4472 /* 4473 * The current child must not be reopened if the child 4474 * reference is null or points to a different node. 4475 */ 4476 if (g_strcmp0(childref, child->bs->node_name)) { 4477 continue; 4478 } 4479 /* 4480 * If the child reference points to the current child then 4481 * reopen it with its existing set of options (note that 4482 * it can still inherit new options from the parent). 4483 */ 4484 child_keep_old = true; 4485 } else { 4486 /* Extract child options ("child-name.*") */ 4487 char *child_key_dot = g_strdup_printf("%s.", child->name); 4488 qdict_extract_subqdict(explicit_options, NULL, child_key_dot); 4489 qdict_extract_subqdict(options, &new_child_options, child_key_dot); 4490 g_free(child_key_dot); 4491 } 4492 4493 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 4494 child->klass, child->role, bs->drv->is_format, 4495 options, flags, child_keep_old); 4496 } 4497 4498 return bs_queue; 4499 } 4500 4501 /* To be called with bs->aio_context locked */ 4502 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 4503 BlockDriverState *bs, 4504 QDict *options, bool keep_old_opts) 4505 { 4506 GLOBAL_STATE_CODE(); 4507 GRAPH_RDLOCK_GUARD_MAINLOOP(); 4508 4509 return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false, 4510 NULL, 0, keep_old_opts); 4511 } 4512 4513 void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) 4514 { 4515 GLOBAL_STATE_CODE(); 4516 if (bs_queue) { 4517 BlockReopenQueueEntry *bs_entry, *next; 4518 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 4519 bdrv_drained_end(bs_entry->state.bs); 4520 qobject_unref(bs_entry->state.explicit_options); 4521 qobject_unref(bs_entry->state.options); 4522 g_free(bs_entry); 4523 } 4524 g_free(bs_queue); 4525 } 4526 } 4527 4528 /* 4529 * Reopen multiple BlockDriverStates atomically & transactionally. 4530 * 4531 * The queue passed in (bs_queue) must have been built up previous 4532 * via bdrv_reopen_queue(). 4533 * 4534 * Reopens all BDS specified in the queue, with the appropriate 4535 * flags. All devices are prepared for reopen, and failure of any 4536 * device will cause all device changes to be abandoned, and intermediate 4537 * data cleaned up. 4538 * 4539 * If all devices prepare successfully, then the changes are committed 4540 * to all devices. 4541 * 4542 * All affected nodes must be drained between bdrv_reopen_queue() and 4543 * bdrv_reopen_multiple(). 4544 * 4545 * To be called from the main thread, with all other AioContexts unlocked. 4546 */ 4547 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 4548 { 4549 int ret = -1; 4550 BlockReopenQueueEntry *bs_entry, *next; 4551 Transaction *tran = tran_new(); 4552 g_autoptr(GSList) refresh_list = NULL; 4553 4554 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 4555 assert(bs_queue != NULL); 4556 GLOBAL_STATE_CODE(); 4557 4558 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4559 ret = bdrv_flush(bs_entry->state.bs); 4560 if (ret < 0) { 4561 error_setg_errno(errp, -ret, "Error flushing drive"); 4562 goto abort; 4563 } 4564 } 4565 4566 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4567 assert(bs_entry->state.bs->quiesce_counter > 0); 4568 ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp); 4569 if (ret < 0) { 4570 goto abort; 4571 } 4572 bs_entry->prepared = true; 4573 } 4574 4575 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4576 BDRVReopenState *state = &bs_entry->state; 4577 4578 refresh_list = g_slist_prepend(refresh_list, state->bs); 4579 if (state->old_backing_bs) { 4580 refresh_list = g_slist_prepend(refresh_list, state->old_backing_bs); 4581 } 4582 if (state->old_file_bs) { 4583 refresh_list = g_slist_prepend(refresh_list, state->old_file_bs); 4584 } 4585 } 4586 4587 /* 4588 * Note that file-posix driver rely on permission update done during reopen 4589 * (even if no permission changed), because it wants "new" permissions for 4590 * reconfiguring the fd and that's why it does it in raw_check_perm(), not 4591 * in raw_reopen_prepare() which is called with "old" permissions. 4592 */ 4593 bdrv_graph_rdlock_main_loop(); 4594 ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp); 4595 bdrv_graph_rdunlock_main_loop(); 4596 4597 if (ret < 0) { 4598 goto abort; 4599 } 4600 4601 /* 4602 * If we reach this point, we have success and just need to apply the 4603 * changes. 4604 * 4605 * Reverse order is used to comfort qcow2 driver: on commit it need to write 4606 * IN_USE flag to the image, to mark bitmaps in the image as invalid. But 4607 * children are usually goes after parents in reopen-queue, so go from last 4608 * to first element. 4609 */ 4610 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { 4611 bdrv_reopen_commit(&bs_entry->state); 4612 } 4613 4614 bdrv_graph_wrlock(); 4615 tran_commit(tran); 4616 bdrv_graph_wrunlock(); 4617 4618 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { 4619 BlockDriverState *bs = bs_entry->state.bs; 4620 4621 if (bs->drv->bdrv_reopen_commit_post) { 4622 bs->drv->bdrv_reopen_commit_post(&bs_entry->state); 4623 } 4624 } 4625 4626 ret = 0; 4627 goto cleanup; 4628 4629 abort: 4630 bdrv_graph_wrlock(); 4631 tran_abort(tran); 4632 bdrv_graph_wrunlock(); 4633 4634 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 4635 if (bs_entry->prepared) { 4636 bdrv_reopen_abort(&bs_entry->state); 4637 } 4638 } 4639 4640 cleanup: 4641 bdrv_reopen_queue_free(bs_queue); 4642 4643 return ret; 4644 } 4645 4646 int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, 4647 Error **errp) 4648 { 4649 BlockReopenQueue *queue; 4650 4651 GLOBAL_STATE_CODE(); 4652 4653 queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); 4654 4655 return bdrv_reopen_multiple(queue, errp); 4656 } 4657 4658 int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, 4659 Error **errp) 4660 { 4661 QDict *opts = qdict_new(); 4662 4663 GLOBAL_STATE_CODE(); 4664 4665 qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only); 4666 4667 return bdrv_reopen(bs, opts, true, errp); 4668 } 4669 4670 /* 4671 * Take a BDRVReopenState and check if the value of 'backing' in the 4672 * reopen_state->options QDict is valid or not. 4673 * 4674 * If 'backing' is missing from the QDict then return 0. 4675 * 4676 * If 'backing' contains the node name of the backing file of 4677 * reopen_state->bs then return 0. 4678 * 4679 * If 'backing' contains a different node name (or is null) then check 4680 * whether the current backing file can be replaced with the new one. 4681 * If that's the case then reopen_state->replace_backing_bs is set to 4682 * true and reopen_state->new_backing_bs contains a pointer to the new 4683 * backing BlockDriverState (or NULL). 4684 * 4685 * After calling this function, the transaction @tran may only be completed 4686 * while holding a writer lock for the graph. 4687 * 4688 * Return 0 on success, otherwise return < 0 and set @errp. 4689 * 4690 * @reopen_state->bs can move to a different AioContext in this function. 4691 */ 4692 static int GRAPH_UNLOCKED 4693 bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, 4694 bool is_backing, Transaction *tran, 4695 Error **errp) 4696 { 4697 BlockDriverState *bs = reopen_state->bs; 4698 BlockDriverState *new_child_bs; 4699 BlockDriverState *old_child_bs; 4700 4701 const char *child_name = is_backing ? "backing" : "file"; 4702 QObject *value; 4703 const char *str; 4704 bool has_child; 4705 int ret; 4706 4707 GLOBAL_STATE_CODE(); 4708 4709 value = qdict_get(reopen_state->options, child_name); 4710 if (value == NULL) { 4711 return 0; 4712 } 4713 4714 bdrv_graph_rdlock_main_loop(); 4715 4716 switch (qobject_type(value)) { 4717 case QTYPE_QNULL: 4718 assert(is_backing); /* The 'file' option does not allow a null value */ 4719 new_child_bs = NULL; 4720 break; 4721 case QTYPE_QSTRING: 4722 str = qstring_get_str(qobject_to(QString, value)); 4723 new_child_bs = bdrv_lookup_bs(NULL, str, errp); 4724 if (new_child_bs == NULL) { 4725 ret = -EINVAL; 4726 goto out_rdlock; 4727 } 4728 4729 has_child = bdrv_recurse_has_child(new_child_bs, bs); 4730 if (has_child) { 4731 error_setg(errp, "Making '%s' a %s child of '%s' would create a " 4732 "cycle", str, child_name, bs->node_name); 4733 ret = -EINVAL; 4734 goto out_rdlock; 4735 } 4736 break; 4737 default: 4738 /* 4739 * The options QDict has been flattened, so 'backing' and 'file' 4740 * do not allow any other data type here. 4741 */ 4742 g_assert_not_reached(); 4743 } 4744 4745 old_child_bs = is_backing ? child_bs(bs->backing) : child_bs(bs->file); 4746 if (old_child_bs == new_child_bs) { 4747 ret = 0; 4748 goto out_rdlock; 4749 } 4750 4751 if (old_child_bs) { 4752 if (bdrv_skip_implicit_filters(old_child_bs) == new_child_bs) { 4753 ret = 0; 4754 goto out_rdlock; 4755 } 4756 4757 if (old_child_bs->implicit) { 4758 error_setg(errp, "Cannot replace implicit %s child of %s", 4759 child_name, bs->node_name); 4760 ret = -EPERM; 4761 goto out_rdlock; 4762 } 4763 } 4764 4765 if (bs->drv->is_filter && !old_child_bs) { 4766 /* 4767 * Filters always have a file or a backing child, so we are trying to 4768 * change wrong child 4769 */ 4770 error_setg(errp, "'%s' is a %s filter node that does not support a " 4771 "%s child", bs->node_name, bs->drv->format_name, child_name); 4772 ret = -EINVAL; 4773 goto out_rdlock; 4774 } 4775 4776 if (is_backing) { 4777 reopen_state->old_backing_bs = old_child_bs; 4778 } else { 4779 reopen_state->old_file_bs = old_child_bs; 4780 } 4781 4782 if (old_child_bs) { 4783 bdrv_ref(old_child_bs); 4784 bdrv_drained_begin(old_child_bs); 4785 } 4786 4787 bdrv_graph_rdunlock_main_loop(); 4788 bdrv_graph_wrlock(); 4789 4790 ret = bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing, 4791 tran, errp); 4792 4793 bdrv_graph_wrunlock(); 4794 4795 if (old_child_bs) { 4796 bdrv_drained_end(old_child_bs); 4797 bdrv_unref(old_child_bs); 4798 } 4799 4800 return ret; 4801 4802 out_rdlock: 4803 bdrv_graph_rdunlock_main_loop(); 4804 return ret; 4805 } 4806 4807 /* 4808 * Prepares a BlockDriverState for reopen. All changes are staged in the 4809 * 'opaque' field of the BDRVReopenState, which is used and allocated by 4810 * the block driver layer .bdrv_reopen_prepare() 4811 * 4812 * bs is the BlockDriverState to reopen 4813 * flags are the new open flags 4814 * queue is the reopen queue 4815 * 4816 * Returns 0 on success, non-zero on error. On error errp will be set 4817 * as well. 4818 * 4819 * On failure, bdrv_reopen_abort() will be called to clean up any data. 4820 * It is the responsibility of the caller to then call the abort() or 4821 * commit() for any other BDS that have been left in a prepare() state 4822 * 4823 * After calling this function, the transaction @change_child_tran may only be 4824 * completed while holding a writer lock for the graph. 4825 */ 4826 static int GRAPH_UNLOCKED 4827 bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, 4828 Transaction *change_child_tran, Error **errp) 4829 { 4830 int ret = -1; 4831 int old_flags; 4832 Error *local_err = NULL; 4833 BlockDriver *drv; 4834 QemuOpts *opts; 4835 QDict *orig_reopen_opts; 4836 char *discard = NULL; 4837 bool read_only; 4838 bool drv_prepared = false; 4839 4840 assert(reopen_state != NULL); 4841 assert(reopen_state->bs->drv != NULL); 4842 GLOBAL_STATE_CODE(); 4843 drv = reopen_state->bs->drv; 4844 4845 /* This function and each driver's bdrv_reopen_prepare() remove 4846 * entries from reopen_state->options as they are processed, so 4847 * we need to make a copy of the original QDict. */ 4848 orig_reopen_opts = qdict_clone_shallow(reopen_state->options); 4849 4850 /* Process generic block layer options */ 4851 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 4852 if (!qemu_opts_absorb_qdict(opts, reopen_state->options, errp)) { 4853 ret = -EINVAL; 4854 goto error; 4855 } 4856 4857 /* This was already called in bdrv_reopen_queue_child() so the flags 4858 * are up-to-date. This time we simply want to remove the options from 4859 * QemuOpts in order to indicate that they have been processed. */ 4860 old_flags = reopen_state->flags; 4861 update_flags_from_options(&reopen_state->flags, opts); 4862 assert(old_flags == reopen_state->flags); 4863 4864 discard = qemu_opt_get_del(opts, BDRV_OPT_DISCARD); 4865 if (discard != NULL) { 4866 if (bdrv_parse_discard_flags(discard, &reopen_state->flags) != 0) { 4867 error_setg(errp, "Invalid discard option"); 4868 ret = -EINVAL; 4869 goto error; 4870 } 4871 } 4872 4873 reopen_state->detect_zeroes = 4874 bdrv_parse_detect_zeroes(opts, reopen_state->flags, &local_err); 4875 if (local_err) { 4876 error_propagate(errp, local_err); 4877 ret = -EINVAL; 4878 goto error; 4879 } 4880 4881 /* All other options (including node-name and driver) must be unchanged. 4882 * Put them back into the QDict, so that they are checked at the end 4883 * of this function. */ 4884 qemu_opts_to_qdict(opts, reopen_state->options); 4885 4886 /* If we are to stay read-only, do not allow permission change 4887 * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is 4888 * not set, or if the BDS still has copy_on_read enabled */ 4889 read_only = !(reopen_state->flags & BDRV_O_RDWR); 4890 4891 bdrv_graph_rdlock_main_loop(); 4892 ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err); 4893 bdrv_graph_rdunlock_main_loop(); 4894 if (local_err) { 4895 error_propagate(errp, local_err); 4896 goto error; 4897 } 4898 4899 if (drv->bdrv_reopen_prepare) { 4900 /* 4901 * If a driver-specific option is missing, it means that we 4902 * should reset it to its default value. 4903 * But not all options allow that, so we need to check it first. 4904 */ 4905 ret = bdrv_reset_options_allowed(reopen_state->bs, 4906 reopen_state->options, errp); 4907 if (ret) { 4908 goto error; 4909 } 4910 4911 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 4912 if (ret) { 4913 if (local_err != NULL) { 4914 error_propagate(errp, local_err); 4915 } else { 4916 bdrv_graph_rdlock_main_loop(); 4917 bdrv_refresh_filename(reopen_state->bs); 4918 bdrv_graph_rdunlock_main_loop(); 4919 error_setg(errp, "failed while preparing to reopen image '%s'", 4920 reopen_state->bs->filename); 4921 } 4922 goto error; 4923 } 4924 } else { 4925 /* It is currently mandatory to have a bdrv_reopen_prepare() 4926 * handler for each supported drv. */ 4927 bdrv_graph_rdlock_main_loop(); 4928 error_setg(errp, "Block format '%s' used by node '%s' " 4929 "does not support reopening files", drv->format_name, 4930 bdrv_get_device_or_node_name(reopen_state->bs)); 4931 bdrv_graph_rdunlock_main_loop(); 4932 ret = -1; 4933 goto error; 4934 } 4935 4936 drv_prepared = true; 4937 4938 /* 4939 * We must provide the 'backing' option if the BDS has a backing 4940 * file or if the image file has a backing file name as part of 4941 * its metadata. Otherwise the 'backing' option can be omitted. 4942 */ 4943 bdrv_graph_rdlock_main_loop(); 4944 if (drv->supports_backing && reopen_state->backing_missing && 4945 (reopen_state->bs->backing || reopen_state->bs->backing_file[0])) { 4946 error_setg(errp, "backing is missing for '%s'", 4947 reopen_state->bs->node_name); 4948 bdrv_graph_rdunlock_main_loop(); 4949 ret = -EINVAL; 4950 goto error; 4951 } 4952 bdrv_graph_rdunlock_main_loop(); 4953 4954 /* 4955 * Allow changing the 'backing' option. The new value can be 4956 * either a reference to an existing node (using its node name) 4957 * or NULL to simply detach the current backing file. 4958 */ 4959 ret = bdrv_reopen_parse_file_or_backing(reopen_state, true, 4960 change_child_tran, errp); 4961 if (ret < 0) { 4962 goto error; 4963 } 4964 qdict_del(reopen_state->options, "backing"); 4965 4966 /* Allow changing the 'file' option. In this case NULL is not allowed */ 4967 ret = bdrv_reopen_parse_file_or_backing(reopen_state, false, 4968 change_child_tran, errp); 4969 if (ret < 0) { 4970 goto error; 4971 } 4972 qdict_del(reopen_state->options, "file"); 4973 4974 /* Options that are not handled are only okay if they are unchanged 4975 * compared to the old state. It is expected that some options are only 4976 * used for the initial open, but not reopen (e.g. filename) */ 4977 if (qdict_size(reopen_state->options)) { 4978 const QDictEntry *entry = qdict_first(reopen_state->options); 4979 4980 GRAPH_RDLOCK_GUARD_MAINLOOP(); 4981 4982 do { 4983 QObject *new = entry->value; 4984 QObject *old = qdict_get(reopen_state->bs->options, entry->key); 4985 4986 /* Allow child references (child_name=node_name) as long as they 4987 * point to the current child (i.e. everything stays the same). */ 4988 if (qobject_type(new) == QTYPE_QSTRING) { 4989 BdrvChild *child; 4990 QLIST_FOREACH(child, &reopen_state->bs->children, next) { 4991 if (!strcmp(child->name, entry->key)) { 4992 break; 4993 } 4994 } 4995 4996 if (child) { 4997 if (!strcmp(child->bs->node_name, 4998 qstring_get_str(qobject_to(QString, new)))) { 4999 continue; /* Found child with this name, skip option */ 5000 } 5001 } 5002 } 5003 5004 /* 5005 * TODO: When using -drive to specify blockdev options, all values 5006 * will be strings; however, when using -blockdev, blockdev-add or 5007 * filenames using the json:{} pseudo-protocol, they will be 5008 * correctly typed. 5009 * In contrast, reopening options are (currently) always strings 5010 * (because you can only specify them through qemu-io; all other 5011 * callers do not specify any options). 5012 * Therefore, when using anything other than -drive to create a BDS, 5013 * this cannot detect non-string options as unchanged, because 5014 * qobject_is_equal() always returns false for objects of different 5015 * type. In the future, this should be remedied by correctly typing 5016 * all options. For now, this is not too big of an issue because 5017 * the user can simply omit options which cannot be changed anyway, 5018 * so they will stay unchanged. 5019 */ 5020 if (!qobject_is_equal(new, old)) { 5021 error_setg(errp, "Cannot change the option '%s'", entry->key); 5022 ret = -EINVAL; 5023 goto error; 5024 } 5025 } while ((entry = qdict_next(reopen_state->options, entry))); 5026 } 5027 5028 ret = 0; 5029 5030 /* Restore the original reopen_state->options QDict */ 5031 qobject_unref(reopen_state->options); 5032 reopen_state->options = qobject_ref(orig_reopen_opts); 5033 5034 error: 5035 if (ret < 0 && drv_prepared) { 5036 /* drv->bdrv_reopen_prepare() has succeeded, so we need to 5037 * call drv->bdrv_reopen_abort() before signaling an error 5038 * (bdrv_reopen_multiple() will not call bdrv_reopen_abort() 5039 * when the respective bdrv_reopen_prepare() has failed) */ 5040 if (drv->bdrv_reopen_abort) { 5041 drv->bdrv_reopen_abort(reopen_state); 5042 } 5043 } 5044 qemu_opts_del(opts); 5045 qobject_unref(orig_reopen_opts); 5046 g_free(discard); 5047 return ret; 5048 } 5049 5050 /* 5051 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 5052 * makes them final by swapping the staging BlockDriverState contents into 5053 * the active BlockDriverState contents. 5054 */ 5055 static void GRAPH_UNLOCKED bdrv_reopen_commit(BDRVReopenState *reopen_state) 5056 { 5057 BlockDriver *drv; 5058 BlockDriverState *bs; 5059 BdrvChild *child; 5060 5061 assert(reopen_state != NULL); 5062 bs = reopen_state->bs; 5063 drv = bs->drv; 5064 assert(drv != NULL); 5065 GLOBAL_STATE_CODE(); 5066 5067 /* If there are any driver level actions to take */ 5068 if (drv->bdrv_reopen_commit) { 5069 drv->bdrv_reopen_commit(reopen_state); 5070 } 5071 5072 GRAPH_RDLOCK_GUARD_MAINLOOP(); 5073 5074 /* set BDS specific flags now */ 5075 qobject_unref(bs->explicit_options); 5076 qobject_unref(bs->options); 5077 qobject_ref(reopen_state->explicit_options); 5078 qobject_ref(reopen_state->options); 5079 5080 bs->explicit_options = reopen_state->explicit_options; 5081 bs->options = reopen_state->options; 5082 bs->open_flags = reopen_state->flags; 5083 bs->detect_zeroes = reopen_state->detect_zeroes; 5084 5085 /* Remove child references from bs->options and bs->explicit_options. 5086 * Child options were already removed in bdrv_reopen_queue_child() */ 5087 QLIST_FOREACH(child, &bs->children, next) { 5088 qdict_del(bs->explicit_options, child->name); 5089 qdict_del(bs->options, child->name); 5090 } 5091 /* backing is probably removed, so it's not handled by previous loop */ 5092 qdict_del(bs->explicit_options, "backing"); 5093 qdict_del(bs->options, "backing"); 5094 5095 bdrv_refresh_limits(bs, NULL, NULL); 5096 bdrv_refresh_total_sectors(bs, bs->total_sectors); 5097 } 5098 5099 /* 5100 * Abort the reopen, and delete and free the staged changes in 5101 * reopen_state 5102 */ 5103 static void GRAPH_UNLOCKED bdrv_reopen_abort(BDRVReopenState *reopen_state) 5104 { 5105 BlockDriver *drv; 5106 5107 assert(reopen_state != NULL); 5108 drv = reopen_state->bs->drv; 5109 assert(drv != NULL); 5110 GLOBAL_STATE_CODE(); 5111 5112 if (drv->bdrv_reopen_abort) { 5113 drv->bdrv_reopen_abort(reopen_state); 5114 } 5115 } 5116 5117 5118 static void bdrv_close(BlockDriverState *bs) 5119 { 5120 BdrvAioNotifier *ban, *ban_next; 5121 BdrvChild *child, *next; 5122 5123 GLOBAL_STATE_CODE(); 5124 assert(!bs->refcnt); 5125 5126 bdrv_drained_begin(bs); /* complete I/O */ 5127 bdrv_flush(bs); 5128 bdrv_drain(bs); /* in case flush left pending I/O */ 5129 5130 if (bs->drv) { 5131 if (bs->drv->bdrv_close) { 5132 /* Must unfreeze all children, so bdrv_unref_child() works */ 5133 bs->drv->bdrv_close(bs); 5134 } 5135 bs->drv = NULL; 5136 } 5137 5138 bdrv_graph_wrlock(); 5139 QLIST_FOREACH_SAFE(child, &bs->children, next, next) { 5140 bdrv_unref_child(bs, child); 5141 } 5142 5143 assert(!bs->backing); 5144 assert(!bs->file); 5145 bdrv_graph_wrunlock(); 5146 5147 g_free(bs->opaque); 5148 bs->opaque = NULL; 5149 qatomic_set(&bs->copy_on_read, 0); 5150 bs->backing_file[0] = '\0'; 5151 bs->backing_format[0] = '\0'; 5152 bs->total_sectors = 0; 5153 bs->encrypted = false; 5154 bs->sg = false; 5155 qobject_unref(bs->options); 5156 qobject_unref(bs->explicit_options); 5157 bs->options = NULL; 5158 bs->explicit_options = NULL; 5159 qobject_unref(bs->full_open_options); 5160 bs->full_open_options = NULL; 5161 g_free(bs->block_status_cache); 5162 bs->block_status_cache = NULL; 5163 5164 bdrv_release_named_dirty_bitmaps(bs); 5165 assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 5166 5167 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 5168 g_free(ban); 5169 } 5170 QLIST_INIT(&bs->aio_notifiers); 5171 bdrv_drained_end(bs); 5172 5173 /* 5174 * If we're still inside some bdrv_drain_all_begin()/end() sections, end 5175 * them now since this BDS won't exist anymore when bdrv_drain_all_end() 5176 * gets called. 5177 */ 5178 if (bs->quiesce_counter) { 5179 bdrv_drain_all_end_quiesce(bs); 5180 } 5181 } 5182 5183 void bdrv_close_all(void) 5184 { 5185 GLOBAL_STATE_CODE(); 5186 assert(job_next(NULL) == NULL); 5187 5188 /* Drop references from requests still in flight, such as canceled block 5189 * jobs whose AIO context has not been polled yet */ 5190 bdrv_drain_all(); 5191 5192 blk_remove_all_bs(); 5193 blockdev_close_all_bdrv_states(); 5194 5195 assert(QTAILQ_EMPTY(&all_bdrv_states)); 5196 } 5197 5198 static bool GRAPH_RDLOCK should_update_child(BdrvChild *c, BlockDriverState *to) 5199 { 5200 GQueue *queue; 5201 GHashTable *found; 5202 bool ret; 5203 5204 if (c->klass->stay_at_node) { 5205 return false; 5206 } 5207 5208 /* If the child @c belongs to the BDS @to, replacing the current 5209 * c->bs by @to would mean to create a loop. 5210 * 5211 * Such a case occurs when appending a BDS to a backing chain. 5212 * For instance, imagine the following chain: 5213 * 5214 * guest device -> node A -> further backing chain... 5215 * 5216 * Now we create a new BDS B which we want to put on top of this 5217 * chain, so we first attach A as its backing node: 5218 * 5219 * node B 5220 * | 5221 * v 5222 * guest device -> node A -> further backing chain... 5223 * 5224 * Finally we want to replace A by B. When doing that, we want to 5225 * replace all pointers to A by pointers to B -- except for the 5226 * pointer from B because (1) that would create a loop, and (2) 5227 * that pointer should simply stay intact: 5228 * 5229 * guest device -> node B 5230 * | 5231 * v 5232 * node A -> further backing chain... 5233 * 5234 * In general, when replacing a node A (c->bs) by a node B (@to), 5235 * if A is a child of B, that means we cannot replace A by B there 5236 * because that would create a loop. Silently detaching A from B 5237 * is also not really an option. So overall just leaving A in 5238 * place there is the most sensible choice. 5239 * 5240 * We would also create a loop in any cases where @c is only 5241 * indirectly referenced by @to. Prevent this by returning false 5242 * if @c is found (by breadth-first search) anywhere in the whole 5243 * subtree of @to. 5244 */ 5245 5246 ret = true; 5247 found = g_hash_table_new(NULL, NULL); 5248 g_hash_table_add(found, to); 5249 queue = g_queue_new(); 5250 g_queue_push_tail(queue, to); 5251 5252 while (!g_queue_is_empty(queue)) { 5253 BlockDriverState *v = g_queue_pop_head(queue); 5254 BdrvChild *c2; 5255 5256 QLIST_FOREACH(c2, &v->children, next) { 5257 if (c2 == c) { 5258 ret = false; 5259 break; 5260 } 5261 5262 if (g_hash_table_contains(found, c2->bs)) { 5263 continue; 5264 } 5265 5266 g_queue_push_tail(queue, c2->bs); 5267 g_hash_table_add(found, c2->bs); 5268 } 5269 } 5270 5271 g_queue_free(queue); 5272 g_hash_table_destroy(found); 5273 5274 return ret; 5275 } 5276 5277 static void bdrv_remove_child_commit(void *opaque) 5278 { 5279 GLOBAL_STATE_CODE(); 5280 bdrv_child_free(opaque); 5281 } 5282 5283 static TransactionActionDrv bdrv_remove_child_drv = { 5284 .commit = bdrv_remove_child_commit, 5285 }; 5286 5287 /* 5288 * Function doesn't update permissions, caller is responsible for this. 5289 * 5290 * @child->bs (if non-NULL) must be drained. 5291 * 5292 * After calling this function, the transaction @tran may only be completed 5293 * while holding a writer lock for the graph. 5294 */ 5295 static void GRAPH_WRLOCK bdrv_remove_child(BdrvChild *child, Transaction *tran) 5296 { 5297 if (!child) { 5298 return; 5299 } 5300 5301 if (child->bs) { 5302 assert(child->quiesced_parent); 5303 bdrv_replace_child_tran(child, NULL, tran); 5304 } 5305 5306 tran_add(tran, &bdrv_remove_child_drv, child); 5307 } 5308 5309 /* 5310 * Both @from and @to (if non-NULL) must be drained. @to must be kept drained 5311 * until the transaction is completed. 5312 * 5313 * After calling this function, the transaction @tran may only be completed 5314 * while holding a writer lock for the graph. 5315 */ 5316 static int GRAPH_WRLOCK 5317 bdrv_replace_node_noperm(BlockDriverState *from, 5318 BlockDriverState *to, 5319 bool auto_skip, Transaction *tran, 5320 Error **errp) 5321 { 5322 BdrvChild *c, *next; 5323 5324 GLOBAL_STATE_CODE(); 5325 5326 assert(from->quiesce_counter); 5327 assert(to->quiesce_counter); 5328 5329 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { 5330 assert(c->bs == from); 5331 if (!should_update_child(c, to)) { 5332 if (auto_skip) { 5333 continue; 5334 } 5335 error_setg(errp, "Should not change '%s' link to '%s'", 5336 c->name, from->node_name); 5337 return -EINVAL; 5338 } 5339 if (c->frozen) { 5340 error_setg(errp, "Cannot change '%s' link to '%s'", 5341 c->name, from->node_name); 5342 return -EPERM; 5343 } 5344 bdrv_replace_child_tran(c, to, tran); 5345 } 5346 5347 return 0; 5348 } 5349 5350 /* 5351 * Switch all parents of @from to point to @to instead. @from and @to must be in 5352 * the same AioContext and both must be drained. 5353 * 5354 * With auto_skip=true bdrv_replace_node_common skips updating from parents 5355 * if it creates a parent-child relation loop or if parent is block-job. 5356 * 5357 * With auto_skip=false the error is returned if from has a parent which should 5358 * not be updated. 5359 * 5360 * With @detach_subchain=true @to must be in a backing chain of @from. In this 5361 * case backing link of the cow-parent of @to is removed. 5362 */ 5363 static int GRAPH_WRLOCK 5364 bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to, 5365 bool auto_skip, bool detach_subchain, Error **errp) 5366 { 5367 Transaction *tran = tran_new(); 5368 g_autoptr(GSList) refresh_list = NULL; 5369 BlockDriverState *to_cow_parent = NULL; 5370 int ret; 5371 5372 GLOBAL_STATE_CODE(); 5373 5374 assert(from->quiesce_counter); 5375 assert(to->quiesce_counter); 5376 assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); 5377 5378 if (detach_subchain) { 5379 assert(bdrv_chain_contains(from, to)); 5380 assert(from != to); 5381 for (to_cow_parent = from; 5382 bdrv_filter_or_cow_bs(to_cow_parent) != to; 5383 to_cow_parent = bdrv_filter_or_cow_bs(to_cow_parent)) 5384 { 5385 ; 5386 } 5387 } 5388 5389 /* 5390 * Do the replacement without permission update. 5391 * Replacement may influence the permissions, we should calculate new 5392 * permissions based on new graph. If we fail, we'll roll-back the 5393 * replacement. 5394 */ 5395 ret = bdrv_replace_node_noperm(from, to, auto_skip, tran, errp); 5396 if (ret < 0) { 5397 goto out; 5398 } 5399 5400 if (detach_subchain) { 5401 /* to_cow_parent is already drained because from is drained */ 5402 bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran); 5403 } 5404 5405 refresh_list = g_slist_prepend(refresh_list, to); 5406 refresh_list = g_slist_prepend(refresh_list, from); 5407 5408 ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); 5409 if (ret < 0) { 5410 goto out; 5411 } 5412 5413 ret = 0; 5414 5415 out: 5416 tran_finalize(tran, ret); 5417 return ret; 5418 } 5419 5420 int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, 5421 Error **errp) 5422 { 5423 return bdrv_replace_node_common(from, to, true, false, errp); 5424 } 5425 5426 int bdrv_drop_filter(BlockDriverState *bs, Error **errp) 5427 { 5428 BlockDriverState *child_bs; 5429 int ret; 5430 5431 GLOBAL_STATE_CODE(); 5432 5433 bdrv_graph_rdlock_main_loop(); 5434 child_bs = bdrv_filter_or_cow_bs(bs); 5435 bdrv_graph_rdunlock_main_loop(); 5436 5437 bdrv_drained_begin(child_bs); 5438 bdrv_graph_wrlock(); 5439 ret = bdrv_replace_node_common(bs, child_bs, true, true, errp); 5440 bdrv_graph_wrunlock(); 5441 bdrv_drained_end(child_bs); 5442 5443 return ret; 5444 } 5445 5446 /* 5447 * Add new bs contents at the top of an image chain while the chain is 5448 * live, while keeping required fields on the top layer. 5449 * 5450 * This will modify the BlockDriverState fields, and swap contents 5451 * between bs_new and bs_top. Both bs_new and bs_top are modified. 5452 * 5453 * bs_new must not be attached to a BlockBackend and must not have backing 5454 * child. 5455 * 5456 * This function does not create any image files. 5457 */ 5458 int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, 5459 Error **errp) 5460 { 5461 int ret; 5462 BdrvChild *child; 5463 Transaction *tran = tran_new(); 5464 5465 GLOBAL_STATE_CODE(); 5466 5467 bdrv_graph_rdlock_main_loop(); 5468 assert(!bs_new->backing); 5469 bdrv_graph_rdunlock_main_loop(); 5470 5471 bdrv_drained_begin(bs_top); 5472 bdrv_drained_begin(bs_new); 5473 5474 bdrv_graph_wrlock(); 5475 5476 child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", 5477 &child_of_bds, bdrv_backing_role(bs_new), 5478 tran, errp); 5479 if (!child) { 5480 ret = -EINVAL; 5481 goto out; 5482 } 5483 5484 ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); 5485 if (ret < 0) { 5486 goto out; 5487 } 5488 5489 ret = bdrv_refresh_perms(bs_new, tran, errp); 5490 out: 5491 tran_finalize(tran, ret); 5492 5493 bdrv_refresh_limits(bs_top, NULL, NULL); 5494 bdrv_graph_wrunlock(); 5495 5496 bdrv_drained_end(bs_top); 5497 bdrv_drained_end(bs_new); 5498 5499 return ret; 5500 } 5501 5502 /* Not for empty child */ 5503 int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, 5504 Error **errp) 5505 { 5506 int ret; 5507 Transaction *tran = tran_new(); 5508 g_autoptr(GSList) refresh_list = NULL; 5509 BlockDriverState *old_bs = child->bs; 5510 5511 GLOBAL_STATE_CODE(); 5512 5513 bdrv_ref(old_bs); 5514 bdrv_drained_begin(old_bs); 5515 bdrv_drained_begin(new_bs); 5516 bdrv_graph_wrlock(); 5517 5518 bdrv_replace_child_tran(child, new_bs, tran); 5519 5520 refresh_list = g_slist_prepend(refresh_list, old_bs); 5521 refresh_list = g_slist_prepend(refresh_list, new_bs); 5522 5523 ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); 5524 5525 tran_finalize(tran, ret); 5526 5527 bdrv_graph_wrunlock(); 5528 bdrv_drained_end(old_bs); 5529 bdrv_drained_end(new_bs); 5530 bdrv_unref(old_bs); 5531 5532 return ret; 5533 } 5534 5535 static void bdrv_delete(BlockDriverState *bs) 5536 { 5537 assert(bdrv_op_blocker_is_empty(bs)); 5538 assert(!bs->refcnt); 5539 GLOBAL_STATE_CODE(); 5540 5541 /* remove from list, if necessary */ 5542 if (bs->node_name[0] != '\0') { 5543 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 5544 } 5545 QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list); 5546 5547 bdrv_close(bs); 5548 5549 qemu_mutex_destroy(&bs->reqs_lock); 5550 5551 g_free(bs); 5552 } 5553 5554 5555 /* 5556 * Replace @bs by newly created block node. 5557 * 5558 * @options is a QDict of options to pass to the block drivers, or NULL for an 5559 * empty set of options. The reference to the QDict belongs to the block layer 5560 * after the call (even on failure), so if the caller intends to reuse the 5561 * dictionary, it needs to use qobject_ref() before calling bdrv_open. 5562 * 5563 * The caller must make sure that @bs stays in the same AioContext, i.e. 5564 * @options must not refer to nodes in a different AioContext. 5565 */ 5566 BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, 5567 int flags, Error **errp) 5568 { 5569 ERRP_GUARD(); 5570 int ret; 5571 AioContext *ctx = bdrv_get_aio_context(bs); 5572 BlockDriverState *new_node_bs = NULL; 5573 const char *drvname, *node_name; 5574 BlockDriver *drv; 5575 5576 drvname = qdict_get_try_str(options, "driver"); 5577 if (!drvname) { 5578 error_setg(errp, "driver is not specified"); 5579 goto fail; 5580 } 5581 5582 drv = bdrv_find_format(drvname); 5583 if (!drv) { 5584 error_setg(errp, "Unknown driver: '%s'", drvname); 5585 goto fail; 5586 } 5587 5588 node_name = qdict_get_try_str(options, "node-name"); 5589 5590 GLOBAL_STATE_CODE(); 5591 5592 new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags, 5593 errp); 5594 assert(bdrv_get_aio_context(bs) == ctx); 5595 5596 options = NULL; /* bdrv_new_open_driver() eats options */ 5597 if (!new_node_bs) { 5598 error_prepend(errp, "Could not create node: "); 5599 goto fail; 5600 } 5601 5602 /* 5603 * Make sure that @bs doesn't go away until we have successfully attached 5604 * all of its parents to @new_node_bs and undrained it again. 5605 */ 5606 bdrv_ref(bs); 5607 bdrv_drained_begin(bs); 5608 bdrv_drained_begin(new_node_bs); 5609 bdrv_graph_wrlock(); 5610 ret = bdrv_replace_node(bs, new_node_bs, errp); 5611 bdrv_graph_wrunlock(); 5612 bdrv_drained_end(new_node_bs); 5613 bdrv_drained_end(bs); 5614 bdrv_unref(bs); 5615 5616 if (ret < 0) { 5617 error_prepend(errp, "Could not replace node: "); 5618 goto fail; 5619 } 5620 5621 return new_node_bs; 5622 5623 fail: 5624 qobject_unref(options); 5625 bdrv_unref(new_node_bs); 5626 return NULL; 5627 } 5628 5629 /* 5630 * Run consistency checks on an image 5631 * 5632 * Returns 0 if the check could be completed (it doesn't mean that the image is 5633 * free of errors) or -errno when an internal error occurred. The results of the 5634 * check are stored in res. 5635 */ 5636 int coroutine_fn bdrv_co_check(BlockDriverState *bs, 5637 BdrvCheckResult *res, BdrvCheckMode fix) 5638 { 5639 IO_CODE(); 5640 assert_bdrv_graph_readable(); 5641 if (bs->drv == NULL) { 5642 return -ENOMEDIUM; 5643 } 5644 if (bs->drv->bdrv_co_check == NULL) { 5645 return -ENOTSUP; 5646 } 5647 5648 memset(res, 0, sizeof(*res)); 5649 return bs->drv->bdrv_co_check(bs, res, fix); 5650 } 5651 5652 /* 5653 * Return values: 5654 * 0 - success 5655 * -EINVAL - backing format specified, but no file 5656 * -ENOSPC - can't update the backing file because no space is left in the 5657 * image file header 5658 * -ENOTSUP - format driver doesn't support changing the backing file 5659 */ 5660 int coroutine_fn 5661 bdrv_co_change_backing_file(BlockDriverState *bs, const char *backing_file, 5662 const char *backing_fmt, bool require) 5663 { 5664 BlockDriver *drv = bs->drv; 5665 int ret; 5666 5667 IO_CODE(); 5668 5669 if (!drv) { 5670 return -ENOMEDIUM; 5671 } 5672 5673 /* Backing file format doesn't make sense without a backing file */ 5674 if (backing_fmt && !backing_file) { 5675 return -EINVAL; 5676 } 5677 5678 if (require && backing_file && !backing_fmt) { 5679 return -EINVAL; 5680 } 5681 5682 if (drv->bdrv_co_change_backing_file != NULL) { 5683 ret = drv->bdrv_co_change_backing_file(bs, backing_file, backing_fmt); 5684 } else { 5685 ret = -ENOTSUP; 5686 } 5687 5688 if (ret == 0) { 5689 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 5690 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 5691 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), 5692 backing_file ?: ""); 5693 } 5694 return ret; 5695 } 5696 5697 /* 5698 * Finds the first non-filter node above bs in the chain between 5699 * active and bs. The returned node is either an immediate parent of 5700 * bs, or there are only filter nodes between the two. 5701 * 5702 * Returns NULL if bs is not found in active's image chain, 5703 * or if active == bs. 5704 * 5705 * Returns the bottommost base image if bs == NULL. 5706 */ 5707 BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 5708 BlockDriverState *bs) 5709 { 5710 5711 GLOBAL_STATE_CODE(); 5712 5713 bs = bdrv_skip_filters(bs); 5714 active = bdrv_skip_filters(active); 5715 5716 while (active) { 5717 BlockDriverState *next = bdrv_backing_chain_next(active); 5718 if (bs == next) { 5719 return active; 5720 } 5721 active = next; 5722 } 5723 5724 return NULL; 5725 } 5726 5727 /* Given a BDS, searches for the base layer. */ 5728 BlockDriverState *bdrv_find_base(BlockDriverState *bs) 5729 { 5730 GLOBAL_STATE_CODE(); 5731 5732 return bdrv_find_overlay(bs, NULL); 5733 } 5734 5735 /* 5736 * Return true if at least one of the COW (backing) and filter links 5737 * between @bs and @base is frozen. @errp is set if that's the case. 5738 * @base must be reachable from @bs, or NULL. 5739 */ 5740 static bool GRAPH_RDLOCK 5741 bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, 5742 Error **errp) 5743 { 5744 BlockDriverState *i; 5745 BdrvChild *child; 5746 5747 GLOBAL_STATE_CODE(); 5748 5749 for (i = bs; i != base; i = child_bs(child)) { 5750 child = bdrv_filter_or_cow_child(i); 5751 5752 if (child && child->frozen) { 5753 error_setg(errp, "Cannot change '%s' link from '%s' to '%s'", 5754 child->name, i->node_name, child->bs->node_name); 5755 return true; 5756 } 5757 } 5758 5759 return false; 5760 } 5761 5762 /* 5763 * Freeze all COW (backing) and filter links between @bs and @base. 5764 * If any of the links is already frozen the operation is aborted and 5765 * none of the links are modified. 5766 * @base must be reachable from @bs, or NULL. 5767 * Returns 0 on success. On failure returns < 0 and sets @errp. 5768 */ 5769 int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, 5770 Error **errp) 5771 { 5772 BlockDriverState *i; 5773 BdrvChild *child; 5774 5775 GLOBAL_STATE_CODE(); 5776 5777 if (bdrv_is_backing_chain_frozen(bs, base, errp)) { 5778 return -EPERM; 5779 } 5780 5781 for (i = bs; i != base; i = child_bs(child)) { 5782 child = bdrv_filter_or_cow_child(i); 5783 if (child && child->bs->never_freeze) { 5784 error_setg(errp, "Cannot freeze '%s' link to '%s'", 5785 child->name, child->bs->node_name); 5786 return -EPERM; 5787 } 5788 } 5789 5790 for (i = bs; i != base; i = child_bs(child)) { 5791 child = bdrv_filter_or_cow_child(i); 5792 if (child) { 5793 child->frozen = true; 5794 } 5795 } 5796 5797 return 0; 5798 } 5799 5800 /* 5801 * Unfreeze all COW (backing) and filter links between @bs and @base. 5802 * The caller must ensure that all links are frozen before using this 5803 * function. 5804 * @base must be reachable from @bs, or NULL. 5805 */ 5806 void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base) 5807 { 5808 BlockDriverState *i; 5809 BdrvChild *child; 5810 5811 GLOBAL_STATE_CODE(); 5812 5813 for (i = bs; i != base; i = child_bs(child)) { 5814 child = bdrv_filter_or_cow_child(i); 5815 if (child) { 5816 assert(child->frozen); 5817 child->frozen = false; 5818 } 5819 } 5820 } 5821 5822 /* 5823 * Drops images above 'base' up to and including 'top', and sets the image 5824 * above 'top' to have base as its backing file. 5825 * 5826 * Requires that the overlay to 'top' is opened r/w, so that the backing file 5827 * information in 'bs' can be properly updated. 5828 * 5829 * E.g., this will convert the following chain: 5830 * bottom <- base <- intermediate <- top <- active 5831 * 5832 * to 5833 * 5834 * bottom <- base <- active 5835 * 5836 * It is allowed for bottom==base, in which case it converts: 5837 * 5838 * base <- intermediate <- top <- active 5839 * 5840 * to 5841 * 5842 * base <- active 5843 * 5844 * If backing_file_str is non-NULL, it will be used when modifying top's 5845 * overlay image metadata. 5846 * 5847 * Error conditions: 5848 * if active == top, that is considered an error 5849 * 5850 */ 5851 int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, 5852 const char *backing_file_str, 5853 bool backing_mask_protocol) 5854 { 5855 BlockDriverState *explicit_top = top; 5856 bool update_inherits_from; 5857 BdrvChild *c; 5858 Error *local_err = NULL; 5859 int ret = -EIO; 5860 g_autoptr(GSList) updated_children = NULL; 5861 GSList *p; 5862 5863 GLOBAL_STATE_CODE(); 5864 5865 bdrv_ref(top); 5866 bdrv_drained_begin(base); 5867 bdrv_graph_wrlock(); 5868 5869 if (!top->drv || !base->drv) { 5870 goto exit_wrlock; 5871 } 5872 5873 /* Make sure that base is in the backing chain of top */ 5874 if (!bdrv_chain_contains(top, base)) { 5875 goto exit_wrlock; 5876 } 5877 5878 /* If 'base' recursively inherits from 'top' then we should set 5879 * base->inherits_from to top->inherits_from after 'top' and all 5880 * other intermediate nodes have been dropped. 5881 * If 'top' is an implicit node (e.g. "commit_top") we should skip 5882 * it because no one inherits from it. We use explicit_top for that. */ 5883 explicit_top = bdrv_skip_implicit_filters(explicit_top); 5884 update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top); 5885 5886 /* success - we can delete the intermediate states, and link top->base */ 5887 if (!backing_file_str) { 5888 bdrv_refresh_filename(base); 5889 backing_file_str = base->filename; 5890 } 5891 5892 QLIST_FOREACH(c, &top->parents, next_parent) { 5893 updated_children = g_slist_prepend(updated_children, c); 5894 } 5895 5896 /* 5897 * It seems correct to pass detach_subchain=true here, but it triggers 5898 * one more yet not fixed bug, when due to nested aio_poll loop we switch to 5899 * another drained section, which modify the graph (for example, removing 5900 * the child, which we keep in updated_children list). So, it's a TODO. 5901 * 5902 * Note, bug triggered if pass detach_subchain=true here and run 5903 * test-bdrv-drain. test_drop_intermediate_poll() test-case will crash. 5904 * That's a FIXME. 5905 */ 5906 bdrv_replace_node_common(top, base, false, false, &local_err); 5907 bdrv_graph_wrunlock(); 5908 5909 if (local_err) { 5910 error_report_err(local_err); 5911 goto exit; 5912 } 5913 5914 for (p = updated_children; p; p = p->next) { 5915 c = p->data; 5916 5917 if (c->klass->update_filename) { 5918 ret = c->klass->update_filename(c, base, backing_file_str, 5919 backing_mask_protocol, 5920 &local_err); 5921 if (ret < 0) { 5922 /* 5923 * TODO: Actually, we want to rollback all previous iterations 5924 * of this loop, and (which is almost impossible) previous 5925 * bdrv_replace_node()... 5926 * 5927 * Note, that c->klass->update_filename may lead to permission 5928 * update, so it's a bad idea to call it inside permission 5929 * update transaction of bdrv_replace_node. 5930 */ 5931 error_report_err(local_err); 5932 goto exit; 5933 } 5934 } 5935 } 5936 5937 if (update_inherits_from) { 5938 base->inherits_from = explicit_top->inherits_from; 5939 } 5940 5941 ret = 0; 5942 goto exit; 5943 5944 exit_wrlock: 5945 bdrv_graph_wrunlock(); 5946 exit: 5947 bdrv_drained_end(base); 5948 bdrv_unref(top); 5949 return ret; 5950 } 5951 5952 /** 5953 * Implementation of BlockDriver.bdrv_co_get_allocated_file_size() that 5954 * sums the size of all data-bearing children. (This excludes backing 5955 * children.) 5956 */ 5957 static int64_t coroutine_fn GRAPH_RDLOCK 5958 bdrv_sum_allocated_file_size(BlockDriverState *bs) 5959 { 5960 BdrvChild *child; 5961 int64_t child_size, sum = 0; 5962 5963 QLIST_FOREACH(child, &bs->children, next) { 5964 if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | 5965 BDRV_CHILD_FILTERED)) 5966 { 5967 child_size = bdrv_co_get_allocated_file_size(child->bs); 5968 if (child_size < 0) { 5969 return child_size; 5970 } 5971 sum += child_size; 5972 } 5973 } 5974 5975 return sum; 5976 } 5977 5978 /** 5979 * Length of a allocated file in bytes. Sparse files are counted by actual 5980 * allocated space. Return < 0 if error or unknown. 5981 */ 5982 int64_t coroutine_fn bdrv_co_get_allocated_file_size(BlockDriverState *bs) 5983 { 5984 BlockDriver *drv = bs->drv; 5985 IO_CODE(); 5986 assert_bdrv_graph_readable(); 5987 5988 if (!drv) { 5989 return -ENOMEDIUM; 5990 } 5991 if (drv->bdrv_co_get_allocated_file_size) { 5992 return drv->bdrv_co_get_allocated_file_size(bs); 5993 } 5994 5995 if (drv->protocol_name) { 5996 /* 5997 * Protocol drivers default to -ENOTSUP (most of their data is 5998 * not stored in any of their children (if they even have any), 5999 * so there is no generic way to figure it out). 6000 */ 6001 return -ENOTSUP; 6002 } else if (drv->is_filter) { 6003 /* Filter drivers default to the size of their filtered child */ 6004 return bdrv_co_get_allocated_file_size(bdrv_filter_bs(bs)); 6005 } else { 6006 /* Other drivers default to summing their children's sizes */ 6007 return bdrv_sum_allocated_file_size(bs); 6008 } 6009 } 6010 6011 /* 6012 * bdrv_measure: 6013 * @drv: Format driver 6014 * @opts: Creation options for new image 6015 * @in_bs: Existing image containing data for new image (may be NULL) 6016 * @errp: Error object 6017 * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo()) 6018 * or NULL on error 6019 * 6020 * Calculate file size required to create a new image. 6021 * 6022 * If @in_bs is given then space for allocated clusters and zero clusters 6023 * from that image are included in the calculation. If @opts contains a 6024 * backing file that is shared by @in_bs then backing clusters may be omitted 6025 * from the calculation. 6026 * 6027 * If @in_bs is NULL then the calculation includes no allocated clusters 6028 * unless a preallocation option is given in @opts. 6029 * 6030 * Note that @in_bs may use a different BlockDriver from @drv. 6031 * 6032 * If an error occurs the @errp pointer is set. 6033 */ 6034 BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, 6035 BlockDriverState *in_bs, Error **errp) 6036 { 6037 IO_CODE(); 6038 if (!drv->bdrv_measure) { 6039 error_setg(errp, "Block driver '%s' does not support size measurement", 6040 drv->format_name); 6041 return NULL; 6042 } 6043 6044 return drv->bdrv_measure(opts, in_bs, errp); 6045 } 6046 6047 /** 6048 * Return number of sectors on success, -errno on error. 6049 */ 6050 int64_t coroutine_fn bdrv_co_nb_sectors(BlockDriverState *bs) 6051 { 6052 BlockDriver *drv = bs->drv; 6053 IO_CODE(); 6054 assert_bdrv_graph_readable(); 6055 6056 if (!drv) 6057 return -ENOMEDIUM; 6058 6059 if (bs->bl.has_variable_length) { 6060 int ret = bdrv_co_refresh_total_sectors(bs, bs->total_sectors); 6061 if (ret < 0) { 6062 return ret; 6063 } 6064 } 6065 return bs->total_sectors; 6066 } 6067 6068 /* 6069 * This wrapper is written by hand because this function is in the hot I/O path, 6070 * via blk_get_geometry. 6071 */ 6072 int64_t coroutine_mixed_fn bdrv_nb_sectors(BlockDriverState *bs) 6073 { 6074 BlockDriver *drv = bs->drv; 6075 IO_CODE(); 6076 6077 if (!drv) 6078 return -ENOMEDIUM; 6079 6080 if (bs->bl.has_variable_length) { 6081 int ret = bdrv_refresh_total_sectors(bs, bs->total_sectors); 6082 if (ret < 0) { 6083 return ret; 6084 } 6085 } 6086 6087 return bs->total_sectors; 6088 } 6089 6090 /** 6091 * Return length in bytes on success, -errno on error. 6092 * The length is always a multiple of BDRV_SECTOR_SIZE. 6093 */ 6094 int64_t coroutine_fn bdrv_co_getlength(BlockDriverState *bs) 6095 { 6096 int64_t ret; 6097 IO_CODE(); 6098 assert_bdrv_graph_readable(); 6099 6100 ret = bdrv_co_nb_sectors(bs); 6101 if (ret < 0) { 6102 return ret; 6103 } 6104 if (ret > INT64_MAX / BDRV_SECTOR_SIZE) { 6105 return -EFBIG; 6106 } 6107 return ret * BDRV_SECTOR_SIZE; 6108 } 6109 6110 bool bdrv_is_sg(BlockDriverState *bs) 6111 { 6112 IO_CODE(); 6113 return bs->sg; 6114 } 6115 6116 /** 6117 * Return whether the given node supports compressed writes. 6118 */ 6119 bool bdrv_supports_compressed_writes(BlockDriverState *bs) 6120 { 6121 BlockDriverState *filtered; 6122 IO_CODE(); 6123 6124 if (!bs->drv || !block_driver_can_compress(bs->drv)) { 6125 return false; 6126 } 6127 6128 filtered = bdrv_filter_bs(bs); 6129 if (filtered) { 6130 /* 6131 * Filters can only forward compressed writes, so we have to 6132 * check the child. 6133 */ 6134 return bdrv_supports_compressed_writes(filtered); 6135 } 6136 6137 return true; 6138 } 6139 6140 const char *bdrv_get_format_name(BlockDriverState *bs) 6141 { 6142 IO_CODE(); 6143 return bs->drv ? bs->drv->format_name : NULL; 6144 } 6145 6146 static int qsort_strcmp(const void *a, const void *b) 6147 { 6148 return strcmp(*(char *const *)a, *(char *const *)b); 6149 } 6150 6151 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 6152 void *opaque, bool read_only) 6153 { 6154 BlockDriver *drv; 6155 int count = 0; 6156 int i; 6157 const char **formats = NULL; 6158 6159 GLOBAL_STATE_CODE(); 6160 6161 QLIST_FOREACH(drv, &bdrv_drivers, list) { 6162 if (drv->format_name) { 6163 bool found = false; 6164 6165 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, read_only)) { 6166 continue; 6167 } 6168 6169 i = count; 6170 while (formats && i && !found) { 6171 found = !strcmp(formats[--i], drv->format_name); 6172 } 6173 6174 if (!found) { 6175 formats = g_renew(const char *, formats, count + 1); 6176 formats[count++] = drv->format_name; 6177 } 6178 } 6179 } 6180 6181 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) { 6182 const char *format_name = block_driver_modules[i].format_name; 6183 6184 if (format_name) { 6185 bool found = false; 6186 int j = count; 6187 6188 if (use_bdrv_whitelist && 6189 !bdrv_format_is_whitelisted(format_name, read_only)) { 6190 continue; 6191 } 6192 6193 while (formats && j && !found) { 6194 found = !strcmp(formats[--j], format_name); 6195 } 6196 6197 if (!found) { 6198 formats = g_renew(const char *, formats, count + 1); 6199 formats[count++] = format_name; 6200 } 6201 } 6202 } 6203 6204 qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 6205 6206 for (i = 0; i < count; i++) { 6207 it(opaque, formats[i]); 6208 } 6209 6210 g_free(formats); 6211 } 6212 6213 /* This function is to find a node in the bs graph */ 6214 BlockDriverState *bdrv_find_node(const char *node_name) 6215 { 6216 BlockDriverState *bs; 6217 6218 assert(node_name); 6219 GLOBAL_STATE_CODE(); 6220 6221 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 6222 if (!strcmp(node_name, bs->node_name)) { 6223 return bs; 6224 } 6225 } 6226 return NULL; 6227 } 6228 6229 /* Put this QMP function here so it can access the static graph_bdrv_states. */ 6230 BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, 6231 Error **errp) 6232 { 6233 BlockDeviceInfoList *list; 6234 BlockDriverState *bs; 6235 6236 GLOBAL_STATE_CODE(); 6237 GRAPH_RDLOCK_GUARD_MAINLOOP(); 6238 6239 list = NULL; 6240 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 6241 BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp); 6242 if (!info) { 6243 qapi_free_BlockDeviceInfoList(list); 6244 return NULL; 6245 } 6246 QAPI_LIST_PREPEND(list, info); 6247 } 6248 6249 return list; 6250 } 6251 6252 typedef struct XDbgBlockGraphConstructor { 6253 XDbgBlockGraph *graph; 6254 GHashTable *graph_nodes; 6255 } XDbgBlockGraphConstructor; 6256 6257 static XDbgBlockGraphConstructor *xdbg_graph_new(void) 6258 { 6259 XDbgBlockGraphConstructor *gr = g_new(XDbgBlockGraphConstructor, 1); 6260 6261 gr->graph = g_new0(XDbgBlockGraph, 1); 6262 gr->graph_nodes = g_hash_table_new(NULL, NULL); 6263 6264 return gr; 6265 } 6266 6267 static XDbgBlockGraph *xdbg_graph_finalize(XDbgBlockGraphConstructor *gr) 6268 { 6269 XDbgBlockGraph *graph = gr->graph; 6270 6271 g_hash_table_destroy(gr->graph_nodes); 6272 g_free(gr); 6273 6274 return graph; 6275 } 6276 6277 static uintptr_t xdbg_graph_node_num(XDbgBlockGraphConstructor *gr, void *node) 6278 { 6279 uintptr_t ret = (uintptr_t)g_hash_table_lookup(gr->graph_nodes, node); 6280 6281 if (ret != 0) { 6282 return ret; 6283 } 6284 6285 /* 6286 * Start counting from 1, not 0, because 0 interferes with not-found (NULL) 6287 * answer of g_hash_table_lookup. 6288 */ 6289 ret = g_hash_table_size(gr->graph_nodes) + 1; 6290 g_hash_table_insert(gr->graph_nodes, node, (void *)ret); 6291 6292 return ret; 6293 } 6294 6295 static void xdbg_graph_add_node(XDbgBlockGraphConstructor *gr, void *node, 6296 XDbgBlockGraphNodeType type, const char *name) 6297 { 6298 XDbgBlockGraphNode *n; 6299 6300 n = g_new0(XDbgBlockGraphNode, 1); 6301 6302 n->id = xdbg_graph_node_num(gr, node); 6303 n->type = type; 6304 n->name = g_strdup(name); 6305 6306 QAPI_LIST_PREPEND(gr->graph->nodes, n); 6307 } 6308 6309 static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent, 6310 const BdrvChild *child) 6311 { 6312 BlockPermission qapi_perm; 6313 XDbgBlockGraphEdge *edge; 6314 GLOBAL_STATE_CODE(); 6315 6316 edge = g_new0(XDbgBlockGraphEdge, 1); 6317 6318 edge->parent = xdbg_graph_node_num(gr, parent); 6319 edge->child = xdbg_graph_node_num(gr, child->bs); 6320 edge->name = g_strdup(child->name); 6321 6322 for (qapi_perm = 0; qapi_perm < BLOCK_PERMISSION__MAX; qapi_perm++) { 6323 uint64_t flag = bdrv_qapi_perm_to_blk_perm(qapi_perm); 6324 6325 if (flag & child->perm) { 6326 QAPI_LIST_PREPEND(edge->perm, qapi_perm); 6327 } 6328 if (flag & child->shared_perm) { 6329 QAPI_LIST_PREPEND(edge->shared_perm, qapi_perm); 6330 } 6331 } 6332 6333 QAPI_LIST_PREPEND(gr->graph->edges, edge); 6334 } 6335 6336 6337 XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp) 6338 { 6339 BlockBackend *blk; 6340 BlockJob *job; 6341 BlockDriverState *bs; 6342 BdrvChild *child; 6343 XDbgBlockGraphConstructor *gr = xdbg_graph_new(); 6344 6345 GLOBAL_STATE_CODE(); 6346 6347 for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { 6348 char *allocated_name = NULL; 6349 const char *name = blk_name(blk); 6350 6351 if (!*name) { 6352 name = allocated_name = blk_get_attached_dev_id(blk); 6353 } 6354 xdbg_graph_add_node(gr, blk, XDBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND, 6355 name); 6356 g_free(allocated_name); 6357 if (blk_root(blk)) { 6358 xdbg_graph_add_edge(gr, blk, blk_root(blk)); 6359 } 6360 } 6361 6362 WITH_JOB_LOCK_GUARD() { 6363 for (job = block_job_next_locked(NULL); job; 6364 job = block_job_next_locked(job)) { 6365 GSList *el; 6366 6367 xdbg_graph_add_node(gr, job, XDBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB, 6368 job->job.id); 6369 for (el = job->nodes; el; el = el->next) { 6370 xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data); 6371 } 6372 } 6373 } 6374 6375 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 6376 xdbg_graph_add_node(gr, bs, XDBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_DRIVER, 6377 bs->node_name); 6378 QLIST_FOREACH(child, &bs->children, next) { 6379 xdbg_graph_add_edge(gr, bs, child); 6380 } 6381 } 6382 6383 return xdbg_graph_finalize(gr); 6384 } 6385 6386 BlockDriverState *bdrv_lookup_bs(const char *device, 6387 const char *node_name, 6388 Error **errp) 6389 { 6390 BlockBackend *blk; 6391 BlockDriverState *bs; 6392 6393 GLOBAL_STATE_CODE(); 6394 6395 if (device) { 6396 blk = blk_by_name(device); 6397 6398 if (blk) { 6399 bs = blk_bs(blk); 6400 if (!bs) { 6401 error_setg(errp, "Device '%s' has no medium", device); 6402 } 6403 6404 return bs; 6405 } 6406 } 6407 6408 if (node_name) { 6409 bs = bdrv_find_node(node_name); 6410 6411 if (bs) { 6412 return bs; 6413 } 6414 } 6415 6416 error_setg(errp, "Cannot find device=\'%s\' nor node-name=\'%s\'", 6417 device ? device : "", 6418 node_name ? node_name : ""); 6419 return NULL; 6420 } 6421 6422 /* If 'base' is in the same chain as 'top', return true. Otherwise, 6423 * return false. If either argument is NULL, return false. */ 6424 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 6425 { 6426 6427 GLOBAL_STATE_CODE(); 6428 6429 while (top && top != base) { 6430 top = bdrv_filter_or_cow_bs(top); 6431 } 6432 6433 return top != NULL; 6434 } 6435 6436 BlockDriverState *bdrv_next_node(BlockDriverState *bs) 6437 { 6438 GLOBAL_STATE_CODE(); 6439 if (!bs) { 6440 return QTAILQ_FIRST(&graph_bdrv_states); 6441 } 6442 return QTAILQ_NEXT(bs, node_list); 6443 } 6444 6445 BlockDriverState *bdrv_next_all_states(BlockDriverState *bs) 6446 { 6447 GLOBAL_STATE_CODE(); 6448 if (!bs) { 6449 return QTAILQ_FIRST(&all_bdrv_states); 6450 } 6451 return QTAILQ_NEXT(bs, bs_list); 6452 } 6453 6454 const char *bdrv_get_node_name(const BlockDriverState *bs) 6455 { 6456 IO_CODE(); 6457 return bs->node_name; 6458 } 6459 6460 const char *bdrv_get_parent_name(const BlockDriverState *bs) 6461 { 6462 BdrvChild *c; 6463 const char *name; 6464 IO_CODE(); 6465 6466 /* If multiple parents have a name, just pick the first one. */ 6467 QLIST_FOREACH(c, &bs->parents, next_parent) { 6468 if (c->klass->get_name) { 6469 name = c->klass->get_name(c); 6470 if (name && *name) { 6471 return name; 6472 } 6473 } 6474 } 6475 6476 return NULL; 6477 } 6478 6479 /* TODO check what callers really want: bs->node_name or blk_name() */ 6480 const char *bdrv_get_device_name(const BlockDriverState *bs) 6481 { 6482 IO_CODE(); 6483 return bdrv_get_parent_name(bs) ?: ""; 6484 } 6485 6486 /* This can be used to identify nodes that might not have a device 6487 * name associated. Since node and device names live in the same 6488 * namespace, the result is unambiguous. The exception is if both are 6489 * absent, then this returns an empty (non-null) string. */ 6490 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 6491 { 6492 IO_CODE(); 6493 return bdrv_get_parent_name(bs) ?: bs->node_name; 6494 } 6495 6496 int bdrv_get_flags(BlockDriverState *bs) 6497 { 6498 IO_CODE(); 6499 return bs->open_flags; 6500 } 6501 6502 int bdrv_has_zero_init_1(BlockDriverState *bs) 6503 { 6504 GLOBAL_STATE_CODE(); 6505 return 1; 6506 } 6507 6508 int coroutine_mixed_fn bdrv_has_zero_init(BlockDriverState *bs) 6509 { 6510 BlockDriverState *filtered; 6511 GLOBAL_STATE_CODE(); 6512 6513 if (!bs->drv) { 6514 return 0; 6515 } 6516 6517 /* If BS is a copy on write image, it is initialized to 6518 the contents of the base image, which may not be zeroes. */ 6519 if (bdrv_cow_child(bs)) { 6520 return 0; 6521 } 6522 if (bs->drv->bdrv_has_zero_init) { 6523 return bs->drv->bdrv_has_zero_init(bs); 6524 } 6525 6526 filtered = bdrv_filter_bs(bs); 6527 if (filtered) { 6528 return bdrv_has_zero_init(filtered); 6529 } 6530 6531 /* safe default */ 6532 return 0; 6533 } 6534 6535 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 6536 { 6537 IO_CODE(); 6538 if (!(bs->open_flags & BDRV_O_UNMAP)) { 6539 return false; 6540 } 6541 6542 return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP; 6543 } 6544 6545 void bdrv_get_backing_filename(BlockDriverState *bs, 6546 char *filename, int filename_size) 6547 { 6548 IO_CODE(); 6549 pstrcpy(filename, filename_size, bs->backing_file); 6550 } 6551 6552 int coroutine_fn bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 6553 { 6554 int ret; 6555 BlockDriver *drv = bs->drv; 6556 IO_CODE(); 6557 assert_bdrv_graph_readable(); 6558 6559 /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ 6560 if (!drv) { 6561 return -ENOMEDIUM; 6562 } 6563 if (!drv->bdrv_co_get_info) { 6564 BlockDriverState *filtered = bdrv_filter_bs(bs); 6565 if (filtered) { 6566 return bdrv_co_get_info(filtered, bdi); 6567 } 6568 return -ENOTSUP; 6569 } 6570 memset(bdi, 0, sizeof(*bdi)); 6571 ret = drv->bdrv_co_get_info(bs, bdi); 6572 if (bdi->subcluster_size == 0) { 6573 /* 6574 * If the driver left this unset, subclusters are not supported. 6575 * Then it is safe to treat each cluster as having only one subcluster. 6576 */ 6577 bdi->subcluster_size = bdi->cluster_size; 6578 } 6579 if (ret < 0) { 6580 return ret; 6581 } 6582 6583 if (bdi->cluster_size > BDRV_MAX_ALIGNMENT) { 6584 return -EINVAL; 6585 } 6586 6587 return 0; 6588 } 6589 6590 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, 6591 Error **errp) 6592 { 6593 BlockDriver *drv = bs->drv; 6594 IO_CODE(); 6595 if (drv && drv->bdrv_get_specific_info) { 6596 return drv->bdrv_get_specific_info(bs, errp); 6597 } 6598 return NULL; 6599 } 6600 6601 BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs) 6602 { 6603 BlockDriver *drv = bs->drv; 6604 IO_CODE(); 6605 if (!drv || !drv->bdrv_get_specific_stats) { 6606 return NULL; 6607 } 6608 return drv->bdrv_get_specific_stats(bs); 6609 } 6610 6611 void coroutine_fn bdrv_co_debug_event(BlockDriverState *bs, BlkdebugEvent event) 6612 { 6613 IO_CODE(); 6614 assert_bdrv_graph_readable(); 6615 6616 if (!bs || !bs->drv || !bs->drv->bdrv_co_debug_event) { 6617 return; 6618 } 6619 6620 bs->drv->bdrv_co_debug_event(bs, event); 6621 } 6622 6623 static BlockDriverState * GRAPH_RDLOCK 6624 bdrv_find_debug_node(BlockDriverState *bs) 6625 { 6626 GLOBAL_STATE_CODE(); 6627 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 6628 bs = bdrv_primary_bs(bs); 6629 } 6630 6631 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 6632 assert(bs->drv->bdrv_debug_remove_breakpoint); 6633 return bs; 6634 } 6635 6636 return NULL; 6637 } 6638 6639 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 6640 const char *tag) 6641 { 6642 GLOBAL_STATE_CODE(); 6643 GRAPH_RDLOCK_GUARD_MAINLOOP(); 6644 6645 bs = bdrv_find_debug_node(bs); 6646 if (bs) { 6647 return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 6648 } 6649 6650 return -ENOTSUP; 6651 } 6652 6653 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 6654 { 6655 GLOBAL_STATE_CODE(); 6656 GRAPH_RDLOCK_GUARD_MAINLOOP(); 6657 6658 bs = bdrv_find_debug_node(bs); 6659 if (bs) { 6660 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 6661 } 6662 6663 return -ENOTSUP; 6664 } 6665 6666 int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 6667 { 6668 GLOBAL_STATE_CODE(); 6669 GRAPH_RDLOCK_GUARD_MAINLOOP(); 6670 6671 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 6672 bs = bdrv_primary_bs(bs); 6673 } 6674 6675 if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 6676 return bs->drv->bdrv_debug_resume(bs, tag); 6677 } 6678 6679 return -ENOTSUP; 6680 } 6681 6682 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 6683 { 6684 GLOBAL_STATE_CODE(); 6685 GRAPH_RDLOCK_GUARD_MAINLOOP(); 6686 6687 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 6688 bs = bdrv_primary_bs(bs); 6689 } 6690 6691 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 6692 return bs->drv->bdrv_debug_is_suspended(bs, tag); 6693 } 6694 6695 return false; 6696 } 6697 6698 /* backing_file can either be relative, or absolute, or a protocol. If it is 6699 * relative, it must be relative to the chain. So, passing in bs->filename 6700 * from a BDS as backing_file should not be done, as that may be relative to 6701 * the CWD rather than the chain. */ 6702 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 6703 const char *backing_file) 6704 { 6705 char *filename_full = NULL; 6706 char *backing_file_full = NULL; 6707 char *filename_tmp = NULL; 6708 int is_protocol = 0; 6709 bool filenames_refreshed = false; 6710 BlockDriverState *curr_bs = NULL; 6711 BlockDriverState *retval = NULL; 6712 BlockDriverState *bs_below; 6713 6714 GLOBAL_STATE_CODE(); 6715 GRAPH_RDLOCK_GUARD_MAINLOOP(); 6716 6717 if (!bs || !bs->drv || !backing_file) { 6718 return NULL; 6719 } 6720 6721 filename_full = g_malloc(PATH_MAX); 6722 backing_file_full = g_malloc(PATH_MAX); 6723 6724 is_protocol = path_has_protocol(backing_file); 6725 6726 /* 6727 * Being largely a legacy function, skip any filters here 6728 * (because filters do not have normal filenames, so they cannot 6729 * match anyway; and allowing json:{} filenames is a bit out of 6730 * scope). 6731 */ 6732 for (curr_bs = bdrv_skip_filters(bs); 6733 bdrv_cow_child(curr_bs) != NULL; 6734 curr_bs = bs_below) 6735 { 6736 bs_below = bdrv_backing_chain_next(curr_bs); 6737 6738 if (bdrv_backing_overridden(curr_bs)) { 6739 /* 6740 * If the backing file was overridden, we can only compare 6741 * directly against the backing node's filename. 6742 */ 6743 6744 if (!filenames_refreshed) { 6745 /* 6746 * This will automatically refresh all of the 6747 * filenames in the rest of the backing chain, so we 6748 * only need to do this once. 6749 */ 6750 bdrv_refresh_filename(bs_below); 6751 filenames_refreshed = true; 6752 } 6753 6754 if (strcmp(backing_file, bs_below->filename) == 0) { 6755 retval = bs_below; 6756 break; 6757 } 6758 } else if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 6759 /* 6760 * If either of the filename paths is actually a protocol, then 6761 * compare unmodified paths; otherwise make paths relative. 6762 */ 6763 char *backing_file_full_ret; 6764 6765 if (strcmp(backing_file, curr_bs->backing_file) == 0) { 6766 retval = bs_below; 6767 break; 6768 } 6769 /* Also check against the full backing filename for the image */ 6770 backing_file_full_ret = bdrv_get_full_backing_filename(curr_bs, 6771 NULL); 6772 if (backing_file_full_ret) { 6773 bool equal = strcmp(backing_file, backing_file_full_ret) == 0; 6774 g_free(backing_file_full_ret); 6775 if (equal) { 6776 retval = bs_below; 6777 break; 6778 } 6779 } 6780 } else { 6781 /* If not an absolute filename path, make it relative to the current 6782 * image's filename path */ 6783 filename_tmp = bdrv_make_absolute_filename(curr_bs, backing_file, 6784 NULL); 6785 /* We are going to compare canonicalized absolute pathnames */ 6786 if (!filename_tmp || !realpath(filename_tmp, filename_full)) { 6787 g_free(filename_tmp); 6788 continue; 6789 } 6790 g_free(filename_tmp); 6791 6792 /* We need to make sure the backing filename we are comparing against 6793 * is relative to the current image filename (or absolute) */ 6794 filename_tmp = bdrv_get_full_backing_filename(curr_bs, NULL); 6795 if (!filename_tmp || !realpath(filename_tmp, backing_file_full)) { 6796 g_free(filename_tmp); 6797 continue; 6798 } 6799 g_free(filename_tmp); 6800 6801 if (strcmp(backing_file_full, filename_full) == 0) { 6802 retval = bs_below; 6803 break; 6804 } 6805 } 6806 } 6807 6808 g_free(filename_full); 6809 g_free(backing_file_full); 6810 return retval; 6811 } 6812 6813 void bdrv_init(void) 6814 { 6815 #ifdef CONFIG_BDRV_WHITELIST_TOOLS 6816 use_bdrv_whitelist = 1; 6817 #endif 6818 module_call_init(MODULE_INIT_BLOCK); 6819 } 6820 6821 void bdrv_init_with_whitelist(void) 6822 { 6823 use_bdrv_whitelist = 1; 6824 bdrv_init(); 6825 } 6826 6827 int bdrv_activate(BlockDriverState *bs, Error **errp) 6828 { 6829 BdrvChild *child, *parent; 6830 Error *local_err = NULL; 6831 int ret; 6832 BdrvDirtyBitmap *bm; 6833 6834 GLOBAL_STATE_CODE(); 6835 GRAPH_RDLOCK_GUARD_MAINLOOP(); 6836 6837 if (!bs->drv) { 6838 return -ENOMEDIUM; 6839 } 6840 6841 QLIST_FOREACH(child, &bs->children, next) { 6842 bdrv_activate(child->bs, &local_err); 6843 if (local_err) { 6844 error_propagate(errp, local_err); 6845 return -EINVAL; 6846 } 6847 } 6848 6849 /* 6850 * Update permissions, they may differ for inactive nodes. 6851 * 6852 * Note that the required permissions of inactive images are always a 6853 * subset of the permissions required after activating the image. This 6854 * allows us to just get the permissions upfront without restricting 6855 * bdrv_co_invalidate_cache(). 6856 * 6857 * It also means that in error cases, we don't have to try and revert to 6858 * the old permissions (which is an operation that could fail, too). We can 6859 * just keep the extended permissions for the next time that an activation 6860 * of the image is tried. 6861 */ 6862 if (bs->open_flags & BDRV_O_INACTIVE) { 6863 bs->open_flags &= ~BDRV_O_INACTIVE; 6864 ret = bdrv_refresh_perms(bs, NULL, errp); 6865 if (ret < 0) { 6866 bs->open_flags |= BDRV_O_INACTIVE; 6867 return ret; 6868 } 6869 6870 ret = bdrv_invalidate_cache(bs, errp); 6871 if (ret < 0) { 6872 bs->open_flags |= BDRV_O_INACTIVE; 6873 return ret; 6874 } 6875 6876 FOR_EACH_DIRTY_BITMAP(bs, bm) { 6877 bdrv_dirty_bitmap_skip_store(bm, false); 6878 } 6879 6880 ret = bdrv_refresh_total_sectors(bs, bs->total_sectors); 6881 if (ret < 0) { 6882 bs->open_flags |= BDRV_O_INACTIVE; 6883 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 6884 return ret; 6885 } 6886 } 6887 6888 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6889 if (parent->klass->activate) { 6890 parent->klass->activate(parent, &local_err); 6891 if (local_err) { 6892 bs->open_flags |= BDRV_O_INACTIVE; 6893 error_propagate(errp, local_err); 6894 return -EINVAL; 6895 } 6896 } 6897 } 6898 6899 return 0; 6900 } 6901 6902 int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) 6903 { 6904 Error *local_err = NULL; 6905 IO_CODE(); 6906 6907 assert(!(bs->open_flags & BDRV_O_INACTIVE)); 6908 assert_bdrv_graph_readable(); 6909 6910 if (bs->drv->bdrv_co_invalidate_cache) { 6911 bs->drv->bdrv_co_invalidate_cache(bs, &local_err); 6912 if (local_err) { 6913 error_propagate(errp, local_err); 6914 return -EINVAL; 6915 } 6916 } 6917 6918 return 0; 6919 } 6920 6921 void bdrv_activate_all(Error **errp) 6922 { 6923 BlockDriverState *bs; 6924 BdrvNextIterator it; 6925 6926 GLOBAL_STATE_CODE(); 6927 GRAPH_RDLOCK_GUARD_MAINLOOP(); 6928 6929 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 6930 int ret; 6931 6932 ret = bdrv_activate(bs, errp); 6933 if (ret < 0) { 6934 bdrv_next_cleanup(&it); 6935 return; 6936 } 6937 } 6938 } 6939 6940 static bool GRAPH_RDLOCK 6941 bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) 6942 { 6943 BdrvChild *parent; 6944 GLOBAL_STATE_CODE(); 6945 6946 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6947 if (parent->klass->parent_is_bds) { 6948 BlockDriverState *parent_bs = parent->opaque; 6949 if (!only_active || !(parent_bs->open_flags & BDRV_O_INACTIVE)) { 6950 return true; 6951 } 6952 } 6953 } 6954 6955 return false; 6956 } 6957 6958 static int GRAPH_RDLOCK bdrv_inactivate_recurse(BlockDriverState *bs) 6959 { 6960 BdrvChild *child, *parent; 6961 int ret; 6962 uint64_t cumulative_perms, cumulative_shared_perms; 6963 6964 GLOBAL_STATE_CODE(); 6965 6966 if (!bs->drv) { 6967 return -ENOMEDIUM; 6968 } 6969 6970 /* Make sure that we don't inactivate a child before its parent. 6971 * It will be covered by recursion from the yet active parent. */ 6972 if (bdrv_has_bds_parent(bs, true)) { 6973 return 0; 6974 } 6975 6976 assert(!(bs->open_flags & BDRV_O_INACTIVE)); 6977 6978 /* Inactivate this node */ 6979 if (bs->drv->bdrv_inactivate) { 6980 ret = bs->drv->bdrv_inactivate(bs); 6981 if (ret < 0) { 6982 return ret; 6983 } 6984 } 6985 6986 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6987 if (parent->klass->inactivate) { 6988 ret = parent->klass->inactivate(parent); 6989 if (ret < 0) { 6990 return ret; 6991 } 6992 } 6993 } 6994 6995 bdrv_get_cumulative_perm(bs, &cumulative_perms, 6996 &cumulative_shared_perms); 6997 if (cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) { 6998 /* Our inactive parents still need write access. Inactivation failed. */ 6999 return -EPERM; 7000 } 7001 7002 bs->open_flags |= BDRV_O_INACTIVE; 7003 7004 /* 7005 * Update permissions, they may differ for inactive nodes. 7006 * We only tried to loosen restrictions, so errors are not fatal, ignore 7007 * them. 7008 */ 7009 bdrv_refresh_perms(bs, NULL, NULL); 7010 7011 /* Recursively inactivate children */ 7012 QLIST_FOREACH(child, &bs->children, next) { 7013 ret = bdrv_inactivate_recurse(child->bs); 7014 if (ret < 0) { 7015 return ret; 7016 } 7017 } 7018 7019 return 0; 7020 } 7021 7022 int bdrv_inactivate_all(void) 7023 { 7024 BlockDriverState *bs = NULL; 7025 BdrvNextIterator it; 7026 int ret = 0; 7027 7028 GLOBAL_STATE_CODE(); 7029 GRAPH_RDLOCK_GUARD_MAINLOOP(); 7030 7031 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 7032 /* Nodes with BDS parents are covered by recursion from the last 7033 * parent that gets inactivated. Don't inactivate them a second 7034 * time if that has already happened. */ 7035 if (bdrv_has_bds_parent(bs, false)) { 7036 continue; 7037 } 7038 ret = bdrv_inactivate_recurse(bs); 7039 if (ret < 0) { 7040 bdrv_next_cleanup(&it); 7041 break; 7042 } 7043 } 7044 7045 return ret; 7046 } 7047 7048 /**************************************************************/ 7049 /* removable device support */ 7050 7051 /** 7052 * Return TRUE if the media is present 7053 */ 7054 bool coroutine_fn bdrv_co_is_inserted(BlockDriverState *bs) 7055 { 7056 BlockDriver *drv = bs->drv; 7057 BdrvChild *child; 7058 IO_CODE(); 7059 assert_bdrv_graph_readable(); 7060 7061 if (!drv) { 7062 return false; 7063 } 7064 if (drv->bdrv_co_is_inserted) { 7065 return drv->bdrv_co_is_inserted(bs); 7066 } 7067 QLIST_FOREACH(child, &bs->children, next) { 7068 if (!bdrv_co_is_inserted(child->bs)) { 7069 return false; 7070 } 7071 } 7072 return true; 7073 } 7074 7075 /** 7076 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 7077 */ 7078 void coroutine_fn bdrv_co_eject(BlockDriverState *bs, bool eject_flag) 7079 { 7080 BlockDriver *drv = bs->drv; 7081 IO_CODE(); 7082 assert_bdrv_graph_readable(); 7083 7084 if (drv && drv->bdrv_co_eject) { 7085 drv->bdrv_co_eject(bs, eject_flag); 7086 } 7087 } 7088 7089 /** 7090 * Lock or unlock the media (if it is locked, the user won't be able 7091 * to eject it manually). 7092 */ 7093 void coroutine_fn bdrv_co_lock_medium(BlockDriverState *bs, bool locked) 7094 { 7095 BlockDriver *drv = bs->drv; 7096 IO_CODE(); 7097 assert_bdrv_graph_readable(); 7098 trace_bdrv_lock_medium(bs, locked); 7099 7100 if (drv && drv->bdrv_co_lock_medium) { 7101 drv->bdrv_co_lock_medium(bs, locked); 7102 } 7103 } 7104 7105 /* Get a reference to bs */ 7106 void bdrv_ref(BlockDriverState *bs) 7107 { 7108 GLOBAL_STATE_CODE(); 7109 bs->refcnt++; 7110 } 7111 7112 /* Release a previously grabbed reference to bs. 7113 * If after releasing, reference count is zero, the BlockDriverState is 7114 * deleted. */ 7115 void bdrv_unref(BlockDriverState *bs) 7116 { 7117 GLOBAL_STATE_CODE(); 7118 if (!bs) { 7119 return; 7120 } 7121 assert(bs->refcnt > 0); 7122 if (--bs->refcnt == 0) { 7123 bdrv_delete(bs); 7124 } 7125 } 7126 7127 static void bdrv_schedule_unref_bh(void *opaque) 7128 { 7129 BlockDriverState *bs = opaque; 7130 7131 bdrv_unref(bs); 7132 } 7133 7134 /* 7135 * Release a BlockDriverState reference while holding the graph write lock. 7136 * 7137 * Calling bdrv_unref() directly is forbidden while holding the graph lock 7138 * because bdrv_close() both involves polling and taking the graph lock 7139 * internally. bdrv_schedule_unref() instead delays decreasing the refcount and 7140 * possibly closing @bs until the graph lock is released. 7141 */ 7142 void bdrv_schedule_unref(BlockDriverState *bs) 7143 { 7144 if (!bs) { 7145 return; 7146 } 7147 aio_bh_schedule_oneshot(qemu_get_aio_context(), bdrv_schedule_unref_bh, bs); 7148 } 7149 7150 struct BdrvOpBlocker { 7151 Error *reason; 7152 QLIST_ENTRY(BdrvOpBlocker) list; 7153 }; 7154 7155 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 7156 { 7157 BdrvOpBlocker *blocker; 7158 GLOBAL_STATE_CODE(); 7159 7160 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 7161 if (!QLIST_EMPTY(&bs->op_blockers[op])) { 7162 blocker = QLIST_FIRST(&bs->op_blockers[op]); 7163 error_propagate_prepend(errp, error_copy(blocker->reason), 7164 "Node '%s' is busy: ", 7165 bdrv_get_device_or_node_name(bs)); 7166 return true; 7167 } 7168 return false; 7169 } 7170 7171 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 7172 { 7173 BdrvOpBlocker *blocker; 7174 GLOBAL_STATE_CODE(); 7175 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 7176 7177 blocker = g_new0(BdrvOpBlocker, 1); 7178 blocker->reason = reason; 7179 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 7180 } 7181 7182 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 7183 { 7184 BdrvOpBlocker *blocker, *next; 7185 GLOBAL_STATE_CODE(); 7186 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 7187 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 7188 if (blocker->reason == reason) { 7189 QLIST_REMOVE(blocker, list); 7190 g_free(blocker); 7191 } 7192 } 7193 } 7194 7195 void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 7196 { 7197 int i; 7198 GLOBAL_STATE_CODE(); 7199 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 7200 bdrv_op_block(bs, i, reason); 7201 } 7202 } 7203 7204 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 7205 { 7206 int i; 7207 GLOBAL_STATE_CODE(); 7208 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 7209 bdrv_op_unblock(bs, i, reason); 7210 } 7211 } 7212 7213 bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 7214 { 7215 int i; 7216 GLOBAL_STATE_CODE(); 7217 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 7218 if (!QLIST_EMPTY(&bs->op_blockers[i])) { 7219 return false; 7220 } 7221 } 7222 return true; 7223 } 7224 7225 /* 7226 * Must not be called while holding the lock of an AioContext other than the 7227 * current one. 7228 */ 7229 void bdrv_img_create(const char *filename, const char *fmt, 7230 const char *base_filename, const char *base_fmt, 7231 char *options, uint64_t img_size, int flags, bool quiet, 7232 Error **errp) 7233 { 7234 QemuOptsList *create_opts = NULL; 7235 QemuOpts *opts = NULL; 7236 const char *backing_fmt, *backing_file; 7237 int64_t size; 7238 BlockDriver *drv, *proto_drv; 7239 Error *local_err = NULL; 7240 int ret = 0; 7241 7242 GLOBAL_STATE_CODE(); 7243 7244 /* Find driver and parse its options */ 7245 drv = bdrv_find_format(fmt); 7246 if (!drv) { 7247 error_setg(errp, "Unknown file format '%s'", fmt); 7248 return; 7249 } 7250 7251 proto_drv = bdrv_find_protocol(filename, true, errp); 7252 if (!proto_drv) { 7253 return; 7254 } 7255 7256 if (!drv->create_opts) { 7257 error_setg(errp, "Format driver '%s' does not support image creation", 7258 drv->format_name); 7259 return; 7260 } 7261 7262 if (!proto_drv->create_opts) { 7263 error_setg(errp, "Protocol driver '%s' does not support image creation", 7264 proto_drv->format_name); 7265 return; 7266 } 7267 7268 /* Create parameter list */ 7269 create_opts = qemu_opts_append(create_opts, drv->create_opts); 7270 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 7271 7272 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 7273 7274 /* Parse -o options */ 7275 if (options) { 7276 if (!qemu_opts_do_parse(opts, options, NULL, errp)) { 7277 goto out; 7278 } 7279 } 7280 7281 if (!qemu_opt_get(opts, BLOCK_OPT_SIZE)) { 7282 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 7283 } else if (img_size != UINT64_C(-1)) { 7284 error_setg(errp, "The image size must be specified only once"); 7285 goto out; 7286 } 7287 7288 if (base_filename) { 7289 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, 7290 NULL)) { 7291 error_setg(errp, "Backing file not supported for file format '%s'", 7292 fmt); 7293 goto out; 7294 } 7295 } 7296 7297 if (base_fmt) { 7298 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) { 7299 error_setg(errp, "Backing file format not supported for file " 7300 "format '%s'", fmt); 7301 goto out; 7302 } 7303 } 7304 7305 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 7306 if (backing_file) { 7307 if (!strcmp(filename, backing_file)) { 7308 error_setg(errp, "Error: Trying to create an image with the " 7309 "same filename as the backing file"); 7310 goto out; 7311 } 7312 if (backing_file[0] == '\0') { 7313 error_setg(errp, "Expected backing file name, got empty string"); 7314 goto out; 7315 } 7316 } 7317 7318 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 7319 7320 /* The size for the image must always be specified, unless we have a backing 7321 * file and we have not been forbidden from opening it. */ 7322 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size); 7323 if (backing_file && !(flags & BDRV_O_NO_BACKING)) { 7324 BlockDriverState *bs; 7325 char *full_backing; 7326 int back_flags; 7327 QDict *backing_options = NULL; 7328 7329 full_backing = 7330 bdrv_get_full_backing_filename_from_filename(filename, backing_file, 7331 &local_err); 7332 if (local_err) { 7333 goto out; 7334 } 7335 assert(full_backing); 7336 7337 /* 7338 * No need to do I/O here, which allows us to open encrypted 7339 * backing images without needing the secret 7340 */ 7341 back_flags = flags; 7342 back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 7343 back_flags |= BDRV_O_NO_IO; 7344 7345 backing_options = qdict_new(); 7346 if (backing_fmt) { 7347 qdict_put_str(backing_options, "driver", backing_fmt); 7348 } 7349 qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true); 7350 7351 bs = bdrv_open(full_backing, NULL, backing_options, back_flags, 7352 &local_err); 7353 g_free(full_backing); 7354 if (!bs) { 7355 error_append_hint(&local_err, "Could not open backing image.\n"); 7356 goto out; 7357 } else { 7358 if (!backing_fmt) { 7359 error_setg(&local_err, 7360 "Backing file specified without backing format"); 7361 error_append_hint(&local_err, "Detected format of %s.\n", 7362 bs->drv->format_name); 7363 goto out; 7364 } 7365 if (size == -1) { 7366 /* Opened BS, have no size */ 7367 size = bdrv_getlength(bs); 7368 if (size < 0) { 7369 error_setg_errno(errp, -size, "Could not get size of '%s'", 7370 backing_file); 7371 bdrv_unref(bs); 7372 goto out; 7373 } 7374 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 7375 } 7376 bdrv_unref(bs); 7377 } 7378 /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */ 7379 } else if (backing_file && !backing_fmt) { 7380 error_setg(&local_err, 7381 "Backing file specified without backing format"); 7382 goto out; 7383 } 7384 7385 /* Parameter 'size' is not needed for detached LUKS header */ 7386 if (size == -1 && 7387 !(!strcmp(fmt, "luks") && 7388 qemu_opt_get_bool(opts, "detached-header", false))) { 7389 error_setg(errp, "Image creation needs a size parameter"); 7390 goto out; 7391 } 7392 7393 if (!quiet) { 7394 printf("Formatting '%s', fmt=%s ", filename, fmt); 7395 qemu_opts_print(opts, " "); 7396 puts(""); 7397 fflush(stdout); 7398 } 7399 7400 ret = bdrv_create(drv, filename, opts, &local_err); 7401 7402 if (ret == -EFBIG) { 7403 /* This is generally a better message than whatever the driver would 7404 * deliver (especially because of the cluster_size_hint), since that 7405 * is most probably not much different from "image too large". */ 7406 const char *cluster_size_hint = ""; 7407 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 7408 cluster_size_hint = " (try using a larger cluster size)"; 7409 } 7410 error_setg(errp, "The image size is too large for file format '%s'" 7411 "%s", fmt, cluster_size_hint); 7412 error_free(local_err); 7413 local_err = NULL; 7414 } 7415 7416 out: 7417 qemu_opts_del(opts); 7418 qemu_opts_free(create_opts); 7419 error_propagate(errp, local_err); 7420 } 7421 7422 AioContext *bdrv_get_aio_context(BlockDriverState *bs) 7423 { 7424 IO_CODE(); 7425 return bs ? bs->aio_context : qemu_get_aio_context(); 7426 } 7427 7428 AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs) 7429 { 7430 Coroutine *self = qemu_coroutine_self(); 7431 AioContext *old_ctx = qemu_coroutine_get_aio_context(self); 7432 AioContext *new_ctx; 7433 IO_CODE(); 7434 7435 /* 7436 * Increase bs->in_flight to ensure that this operation is completed before 7437 * moving the node to a different AioContext. Read new_ctx only afterwards. 7438 */ 7439 bdrv_inc_in_flight(bs); 7440 7441 new_ctx = bdrv_get_aio_context(bs); 7442 aio_co_reschedule_self(new_ctx); 7443 return old_ctx; 7444 } 7445 7446 void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) 7447 { 7448 IO_CODE(); 7449 aio_co_reschedule_self(old_ctx); 7450 bdrv_dec_in_flight(bs); 7451 } 7452 7453 static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) 7454 { 7455 GLOBAL_STATE_CODE(); 7456 QLIST_REMOVE(ban, list); 7457 g_free(ban); 7458 } 7459 7460 static void bdrv_detach_aio_context(BlockDriverState *bs) 7461 { 7462 BdrvAioNotifier *baf, *baf_tmp; 7463 7464 assert(!bs->walking_aio_notifiers); 7465 GLOBAL_STATE_CODE(); 7466 bs->walking_aio_notifiers = true; 7467 QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) { 7468 if (baf->deleted) { 7469 bdrv_do_remove_aio_context_notifier(baf); 7470 } else { 7471 baf->detach_aio_context(baf->opaque); 7472 } 7473 } 7474 /* Never mind iterating again to check for ->deleted. bdrv_close() will 7475 * remove remaining aio notifiers if we aren't called again. 7476 */ 7477 bs->walking_aio_notifiers = false; 7478 7479 if (bs->drv && bs->drv->bdrv_detach_aio_context) { 7480 bs->drv->bdrv_detach_aio_context(bs); 7481 } 7482 7483 bs->aio_context = NULL; 7484 } 7485 7486 static void bdrv_attach_aio_context(BlockDriverState *bs, 7487 AioContext *new_context) 7488 { 7489 BdrvAioNotifier *ban, *ban_tmp; 7490 GLOBAL_STATE_CODE(); 7491 7492 bs->aio_context = new_context; 7493 7494 if (bs->drv && bs->drv->bdrv_attach_aio_context) { 7495 bs->drv->bdrv_attach_aio_context(bs, new_context); 7496 } 7497 7498 assert(!bs->walking_aio_notifiers); 7499 bs->walking_aio_notifiers = true; 7500 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) { 7501 if (ban->deleted) { 7502 bdrv_do_remove_aio_context_notifier(ban); 7503 } else { 7504 ban->attached_aio_context(new_context, ban->opaque); 7505 } 7506 } 7507 bs->walking_aio_notifiers = false; 7508 } 7509 7510 typedef struct BdrvStateSetAioContext { 7511 AioContext *new_ctx; 7512 BlockDriverState *bs; 7513 } BdrvStateSetAioContext; 7514 7515 static bool bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx, 7516 GHashTable *visited, 7517 Transaction *tran, 7518 Error **errp) 7519 { 7520 GLOBAL_STATE_CODE(); 7521 if (g_hash_table_contains(visited, c)) { 7522 return true; 7523 } 7524 g_hash_table_add(visited, c); 7525 7526 /* 7527 * A BdrvChildClass that doesn't handle AioContext changes cannot 7528 * tolerate any AioContext changes 7529 */ 7530 if (!c->klass->change_aio_ctx) { 7531 char *user = bdrv_child_user_desc(c); 7532 error_setg(errp, "Changing iothreads is not supported by %s", user); 7533 g_free(user); 7534 return false; 7535 } 7536 if (!c->klass->change_aio_ctx(c, ctx, visited, tran, errp)) { 7537 assert(!errp || *errp); 7538 return false; 7539 } 7540 return true; 7541 } 7542 7543 bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, 7544 GHashTable *visited, Transaction *tran, 7545 Error **errp) 7546 { 7547 GLOBAL_STATE_CODE(); 7548 if (g_hash_table_contains(visited, c)) { 7549 return true; 7550 } 7551 g_hash_table_add(visited, c); 7552 return bdrv_change_aio_context(c->bs, ctx, visited, tran, errp); 7553 } 7554 7555 static void bdrv_set_aio_context_clean(void *opaque) 7556 { 7557 BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque; 7558 BlockDriverState *bs = (BlockDriverState *) state->bs; 7559 7560 /* Paired with bdrv_drained_begin in bdrv_change_aio_context() */ 7561 bdrv_drained_end(bs); 7562 7563 g_free(state); 7564 } 7565 7566 static void bdrv_set_aio_context_commit(void *opaque) 7567 { 7568 BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque; 7569 BlockDriverState *bs = (BlockDriverState *) state->bs; 7570 AioContext *new_context = state->new_ctx; 7571 7572 bdrv_detach_aio_context(bs); 7573 bdrv_attach_aio_context(bs, new_context); 7574 } 7575 7576 static TransactionActionDrv set_aio_context = { 7577 .commit = bdrv_set_aio_context_commit, 7578 .clean = bdrv_set_aio_context_clean, 7579 }; 7580 7581 /* 7582 * Changes the AioContext used for fd handlers, timers, and BHs by this 7583 * BlockDriverState and all its children and parents. 7584 * 7585 * Must be called from the main AioContext. 7586 * 7587 * @visited will accumulate all visited BdrvChild objects. The caller is 7588 * responsible for freeing the list afterwards. 7589 */ 7590 static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, 7591 GHashTable *visited, Transaction *tran, 7592 Error **errp) 7593 { 7594 BdrvChild *c; 7595 BdrvStateSetAioContext *state; 7596 7597 GLOBAL_STATE_CODE(); 7598 7599 if (bdrv_get_aio_context(bs) == ctx) { 7600 return true; 7601 } 7602 7603 bdrv_graph_rdlock_main_loop(); 7604 QLIST_FOREACH(c, &bs->parents, next_parent) { 7605 if (!bdrv_parent_change_aio_context(c, ctx, visited, tran, errp)) { 7606 bdrv_graph_rdunlock_main_loop(); 7607 return false; 7608 } 7609 } 7610 7611 QLIST_FOREACH(c, &bs->children, next) { 7612 if (!bdrv_child_change_aio_context(c, ctx, visited, tran, errp)) { 7613 bdrv_graph_rdunlock_main_loop(); 7614 return false; 7615 } 7616 } 7617 bdrv_graph_rdunlock_main_loop(); 7618 7619 state = g_new(BdrvStateSetAioContext, 1); 7620 *state = (BdrvStateSetAioContext) { 7621 .new_ctx = ctx, 7622 .bs = bs, 7623 }; 7624 7625 /* Paired with bdrv_drained_end in bdrv_set_aio_context_clean() */ 7626 bdrv_drained_begin(bs); 7627 7628 tran_add(tran, &set_aio_context, state); 7629 7630 return true; 7631 } 7632 7633 /* 7634 * Change bs's and recursively all of its parents' and children's AioContext 7635 * to the given new context, returning an error if that isn't possible. 7636 * 7637 * If ignore_child is not NULL, that child (and its subgraph) will not 7638 * be touched. 7639 */ 7640 int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, 7641 BdrvChild *ignore_child, Error **errp) 7642 { 7643 Transaction *tran; 7644 GHashTable *visited; 7645 int ret; 7646 GLOBAL_STATE_CODE(); 7647 7648 /* 7649 * Recursion phase: go through all nodes of the graph. 7650 * Take care of checking that all nodes support changing AioContext 7651 * and drain them, building a linear list of callbacks to run if everything 7652 * is successful (the transaction itself). 7653 */ 7654 tran = tran_new(); 7655 visited = g_hash_table_new(NULL, NULL); 7656 if (ignore_child) { 7657 g_hash_table_add(visited, ignore_child); 7658 } 7659 ret = bdrv_change_aio_context(bs, ctx, visited, tran, errp); 7660 g_hash_table_destroy(visited); 7661 7662 /* 7663 * Linear phase: go through all callbacks collected in the transaction. 7664 * Run all callbacks collected in the recursion to switch every node's 7665 * AioContext (transaction commit), or undo all changes done in the 7666 * recursion (transaction abort). 7667 */ 7668 7669 if (!ret) { 7670 /* Just run clean() callbacks. No AioContext changed. */ 7671 tran_abort(tran); 7672 return -EPERM; 7673 } 7674 7675 tran_commit(tran); 7676 return 0; 7677 } 7678 7679 void bdrv_add_aio_context_notifier(BlockDriverState *bs, 7680 void (*attached_aio_context)(AioContext *new_context, void *opaque), 7681 void (*detach_aio_context)(void *opaque), void *opaque) 7682 { 7683 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 7684 *ban = (BdrvAioNotifier){ 7685 .attached_aio_context = attached_aio_context, 7686 .detach_aio_context = detach_aio_context, 7687 .opaque = opaque 7688 }; 7689 GLOBAL_STATE_CODE(); 7690 7691 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 7692 } 7693 7694 void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 7695 void (*attached_aio_context)(AioContext *, 7696 void *), 7697 void (*detach_aio_context)(void *), 7698 void *opaque) 7699 { 7700 BdrvAioNotifier *ban, *ban_next; 7701 GLOBAL_STATE_CODE(); 7702 7703 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 7704 if (ban->attached_aio_context == attached_aio_context && 7705 ban->detach_aio_context == detach_aio_context && 7706 ban->opaque == opaque && 7707 ban->deleted == false) 7708 { 7709 if (bs->walking_aio_notifiers) { 7710 ban->deleted = true; 7711 } else { 7712 bdrv_do_remove_aio_context_notifier(ban); 7713 } 7714 return; 7715 } 7716 } 7717 7718 abort(); 7719 } 7720 7721 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 7722 BlockDriverAmendStatusCB *status_cb, void *cb_opaque, 7723 bool force, 7724 Error **errp) 7725 { 7726 GLOBAL_STATE_CODE(); 7727 if (!bs->drv) { 7728 error_setg(errp, "Node is ejected"); 7729 return -ENOMEDIUM; 7730 } 7731 if (!bs->drv->bdrv_amend_options) { 7732 error_setg(errp, "Block driver '%s' does not support option amendment", 7733 bs->drv->format_name); 7734 return -ENOTSUP; 7735 } 7736 return bs->drv->bdrv_amend_options(bs, opts, status_cb, 7737 cb_opaque, force, errp); 7738 } 7739 7740 /* 7741 * This function checks whether the given @to_replace is allowed to be 7742 * replaced by a node that always shows the same data as @bs. This is 7743 * used for example to verify whether the mirror job can replace 7744 * @to_replace by the target mirrored from @bs. 7745 * To be replaceable, @bs and @to_replace may either be guaranteed to 7746 * always show the same data (because they are only connected through 7747 * filters), or some driver may allow replacing one of its children 7748 * because it can guarantee that this child's data is not visible at 7749 * all (for example, for dissenting quorum children that have no other 7750 * parents). 7751 */ 7752 bool bdrv_recurse_can_replace(BlockDriverState *bs, 7753 BlockDriverState *to_replace) 7754 { 7755 BlockDriverState *filtered; 7756 7757 GLOBAL_STATE_CODE(); 7758 7759 if (!bs || !bs->drv) { 7760 return false; 7761 } 7762 7763 if (bs == to_replace) { 7764 return true; 7765 } 7766 7767 /* See what the driver can do */ 7768 if (bs->drv->bdrv_recurse_can_replace) { 7769 return bs->drv->bdrv_recurse_can_replace(bs, to_replace); 7770 } 7771 7772 /* For filters without an own implementation, we can recurse on our own */ 7773 filtered = bdrv_filter_bs(bs); 7774 if (filtered) { 7775 return bdrv_recurse_can_replace(filtered, to_replace); 7776 } 7777 7778 /* Safe default */ 7779 return false; 7780 } 7781 7782 /* 7783 * Check whether the given @node_name can be replaced by a node that 7784 * has the same data as @parent_bs. If so, return @node_name's BDS; 7785 * NULL otherwise. 7786 * 7787 * @node_name must be a (recursive) *child of @parent_bs (or this 7788 * function will return NULL). 7789 * 7790 * The result (whether the node can be replaced or not) is only valid 7791 * for as long as no graph or permission changes occur. 7792 */ 7793 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, 7794 const char *node_name, Error **errp) 7795 { 7796 BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 7797 7798 GLOBAL_STATE_CODE(); 7799 7800 if (!to_replace_bs) { 7801 error_setg(errp, "Failed to find node with node-name='%s'", node_name); 7802 return NULL; 7803 } 7804 7805 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 7806 return NULL; 7807 } 7808 7809 /* We don't want arbitrary node of the BDS chain to be replaced only the top 7810 * most non filter in order to prevent data corruption. 7811 * Another benefit is that this tests exclude backing files which are 7812 * blocked by the backing blockers. 7813 */ 7814 if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) { 7815 error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', " 7816 "because it cannot be guaranteed that doing so would not " 7817 "lead to an abrupt change of visible data", 7818 node_name, parent_bs->node_name); 7819 return NULL; 7820 } 7821 7822 return to_replace_bs; 7823 } 7824 7825 /** 7826 * Iterates through the list of runtime option keys that are said to 7827 * be "strong" for a BDS. An option is called "strong" if it changes 7828 * a BDS's data. For example, the null block driver's "size" and 7829 * "read-zeroes" options are strong, but its "latency-ns" option is 7830 * not. 7831 * 7832 * If a key returned by this function ends with a dot, all options 7833 * starting with that prefix are strong. 7834 */ 7835 static const char *const *strong_options(BlockDriverState *bs, 7836 const char *const *curopt) 7837 { 7838 static const char *const global_options[] = { 7839 "driver", "filename", NULL 7840 }; 7841 7842 if (!curopt) { 7843 return &global_options[0]; 7844 } 7845 7846 curopt++; 7847 if (curopt == &global_options[ARRAY_SIZE(global_options) - 1] && bs->drv) { 7848 curopt = bs->drv->strong_runtime_opts; 7849 } 7850 7851 return (curopt && *curopt) ? curopt : NULL; 7852 } 7853 7854 /** 7855 * Copies all strong runtime options from bs->options to the given 7856 * QDict. The set of strong option keys is determined by invoking 7857 * strong_options(). 7858 * 7859 * Returns true iff any strong option was present in bs->options (and 7860 * thus copied to the target QDict) with the exception of "filename" 7861 * and "driver". The caller is expected to use this value to decide 7862 * whether the existence of strong options prevents the generation of 7863 * a plain filename. 7864 */ 7865 static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs) 7866 { 7867 bool found_any = false; 7868 const char *const *option_name = NULL; 7869 7870 if (!bs->drv) { 7871 return false; 7872 } 7873 7874 while ((option_name = strong_options(bs, option_name))) { 7875 bool option_given = false; 7876 7877 assert(strlen(*option_name) > 0); 7878 if ((*option_name)[strlen(*option_name) - 1] != '.') { 7879 QObject *entry = qdict_get(bs->options, *option_name); 7880 if (!entry) { 7881 continue; 7882 } 7883 7884 qdict_put_obj(d, *option_name, qobject_ref(entry)); 7885 option_given = true; 7886 } else { 7887 const QDictEntry *entry; 7888 for (entry = qdict_first(bs->options); entry; 7889 entry = qdict_next(bs->options, entry)) 7890 { 7891 if (strstart(qdict_entry_key(entry), *option_name, NULL)) { 7892 qdict_put_obj(d, qdict_entry_key(entry), 7893 qobject_ref(qdict_entry_value(entry))); 7894 option_given = true; 7895 } 7896 } 7897 } 7898 7899 /* While "driver" and "filename" need to be included in a JSON filename, 7900 * their existence does not prohibit generation of a plain filename. */ 7901 if (!found_any && option_given && 7902 strcmp(*option_name, "driver") && strcmp(*option_name, "filename")) 7903 { 7904 found_any = true; 7905 } 7906 } 7907 7908 if (!qdict_haskey(d, "driver")) { 7909 /* Drivers created with bdrv_new_open_driver() may not have a 7910 * @driver option. Add it here. */ 7911 qdict_put_str(d, "driver", bs->drv->format_name); 7912 } 7913 7914 return found_any; 7915 } 7916 7917 /* Note: This function may return false positives; it may return true 7918 * even if opening the backing file specified by bs's image header 7919 * would result in exactly bs->backing. */ 7920 static bool GRAPH_RDLOCK bdrv_backing_overridden(BlockDriverState *bs) 7921 { 7922 GLOBAL_STATE_CODE(); 7923 if (bs->backing) { 7924 return strcmp(bs->auto_backing_file, 7925 bs->backing->bs->filename); 7926 } else { 7927 /* No backing BDS, so if the image header reports any backing 7928 * file, it must have been suppressed */ 7929 return bs->auto_backing_file[0] != '\0'; 7930 } 7931 } 7932 7933 /* Updates the following BDS fields: 7934 * - exact_filename: A filename which may be used for opening a block device 7935 * which (mostly) equals the given BDS (even without any 7936 * other options; so reading and writing must return the same 7937 * results, but caching etc. may be different) 7938 * - full_open_options: Options which, when given when opening a block device 7939 * (without a filename), result in a BDS (mostly) 7940 * equalling the given one 7941 * - filename: If exact_filename is set, it is copied here. Otherwise, 7942 * full_open_options is converted to a JSON object, prefixed with 7943 * "json:" (for use through the JSON pseudo protocol) and put here. 7944 */ 7945 void bdrv_refresh_filename(BlockDriverState *bs) 7946 { 7947 BlockDriver *drv = bs->drv; 7948 BdrvChild *child; 7949 BlockDriverState *primary_child_bs; 7950 QDict *opts; 7951 bool backing_overridden; 7952 bool generate_json_filename; /* Whether our default implementation should 7953 fill exact_filename (false) or not (true) */ 7954 7955 GLOBAL_STATE_CODE(); 7956 7957 if (!drv) { 7958 return; 7959 } 7960 7961 /* This BDS's file name may depend on any of its children's file names, so 7962 * refresh those first */ 7963 QLIST_FOREACH(child, &bs->children, next) { 7964 bdrv_refresh_filename(child->bs); 7965 } 7966 7967 if (bs->implicit) { 7968 /* For implicit nodes, just copy everything from the single child */ 7969 child = QLIST_FIRST(&bs->children); 7970 assert(QLIST_NEXT(child, next) == NULL); 7971 7972 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), 7973 child->bs->exact_filename); 7974 pstrcpy(bs->filename, sizeof(bs->filename), child->bs->filename); 7975 7976 qobject_unref(bs->full_open_options); 7977 bs->full_open_options = qobject_ref(child->bs->full_open_options); 7978 7979 return; 7980 } 7981 7982 backing_overridden = bdrv_backing_overridden(bs); 7983 7984 if (bs->open_flags & BDRV_O_NO_IO) { 7985 /* Without I/O, the backing file does not change anything. 7986 * Therefore, in such a case (primarily qemu-img), we can 7987 * pretend the backing file has not been overridden even if 7988 * it technically has been. */ 7989 backing_overridden = false; 7990 } 7991 7992 /* Gather the options QDict */ 7993 opts = qdict_new(); 7994 generate_json_filename = append_strong_runtime_options(opts, bs); 7995 generate_json_filename |= backing_overridden; 7996 7997 if (drv->bdrv_gather_child_options) { 7998 /* Some block drivers may not want to present all of their children's 7999 * options, or name them differently from BdrvChild.name */ 8000 drv->bdrv_gather_child_options(bs, opts, backing_overridden); 8001 } else { 8002 QLIST_FOREACH(child, &bs->children, next) { 8003 if (child == bs->backing && !backing_overridden) { 8004 /* We can skip the backing BDS if it has not been overridden */ 8005 continue; 8006 } 8007 8008 qdict_put(opts, child->name, 8009 qobject_ref(child->bs->full_open_options)); 8010 } 8011 8012 if (backing_overridden && !bs->backing) { 8013 /* Force no backing file */ 8014 qdict_put_null(opts, "backing"); 8015 } 8016 } 8017 8018 qobject_unref(bs->full_open_options); 8019 bs->full_open_options = opts; 8020 8021 primary_child_bs = bdrv_primary_bs(bs); 8022 8023 if (drv->bdrv_refresh_filename) { 8024 /* Obsolete information is of no use here, so drop the old file name 8025 * information before refreshing it */ 8026 bs->exact_filename[0] = '\0'; 8027 8028 drv->bdrv_refresh_filename(bs); 8029 } else if (primary_child_bs) { 8030 /* 8031 * Try to reconstruct valid information from the underlying 8032 * file -- this only works for format nodes (filter nodes 8033 * cannot be probed and as such must be selected by the user 8034 * either through an options dict, or through a special 8035 * filename which the filter driver must construct in its 8036 * .bdrv_refresh_filename() implementation). 8037 */ 8038 8039 bs->exact_filename[0] = '\0'; 8040 8041 /* 8042 * We can use the underlying file's filename if: 8043 * - it has a filename, 8044 * - the current BDS is not a filter, 8045 * - the file is a protocol BDS, and 8046 * - opening that file (as this BDS's format) will automatically create 8047 * the BDS tree we have right now, that is: 8048 * - the user did not significantly change this BDS's behavior with 8049 * some explicit (strong) options 8050 * - no non-file child of this BDS has been overridden by the user 8051 * Both of these conditions are represented by generate_json_filename. 8052 */ 8053 if (primary_child_bs->exact_filename[0] && 8054 primary_child_bs->drv->protocol_name && 8055 !drv->is_filter && !generate_json_filename) 8056 { 8057 strcpy(bs->exact_filename, primary_child_bs->exact_filename); 8058 } 8059 } 8060 8061 if (bs->exact_filename[0]) { 8062 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 8063 } else { 8064 GString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 8065 if (snprintf(bs->filename, sizeof(bs->filename), "json:%s", 8066 json->str) >= sizeof(bs->filename)) { 8067 /* Give user a hint if we truncated things. */ 8068 strcpy(bs->filename + sizeof(bs->filename) - 4, "..."); 8069 } 8070 g_string_free(json, true); 8071 } 8072 } 8073 8074 char *bdrv_dirname(BlockDriverState *bs, Error **errp) 8075 { 8076 BlockDriver *drv = bs->drv; 8077 BlockDriverState *child_bs; 8078 8079 GLOBAL_STATE_CODE(); 8080 8081 if (!drv) { 8082 error_setg(errp, "Node '%s' is ejected", bs->node_name); 8083 return NULL; 8084 } 8085 8086 if (drv->bdrv_dirname) { 8087 return drv->bdrv_dirname(bs, errp); 8088 } 8089 8090 child_bs = bdrv_primary_bs(bs); 8091 if (child_bs) { 8092 return bdrv_dirname(child_bs, errp); 8093 } 8094 8095 bdrv_refresh_filename(bs); 8096 if (bs->exact_filename[0] != '\0') { 8097 return path_combine(bs->exact_filename, ""); 8098 } 8099 8100 error_setg(errp, "Cannot generate a base directory for %s nodes", 8101 drv->format_name); 8102 return NULL; 8103 } 8104 8105 /* 8106 * Hot add/remove a BDS's child. So the user can take a child offline when 8107 * it is broken and take a new child online 8108 */ 8109 void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, 8110 Error **errp) 8111 { 8112 GLOBAL_STATE_CODE(); 8113 if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) { 8114 error_setg(errp, "The node %s does not support adding a child", 8115 bdrv_get_device_or_node_name(parent_bs)); 8116 return; 8117 } 8118 8119 /* 8120 * Non-zoned block drivers do not follow zoned storage constraints 8121 * (i.e. sequential writes to zones). Refuse mixing zoned and non-zoned 8122 * drivers in a graph. 8123 */ 8124 if (!parent_bs->drv->supports_zoned_children && 8125 child_bs->bl.zoned == BLK_Z_HM) { 8126 /* 8127 * The host-aware model allows zoned storage constraints and random 8128 * write. Allow mixing host-aware and non-zoned drivers. Using 8129 * host-aware device as a regular device. 8130 */ 8131 error_setg(errp, "Cannot add a %s child to a %s parent", 8132 child_bs->bl.zoned == BLK_Z_HM ? "zoned" : "non-zoned", 8133 parent_bs->drv->supports_zoned_children ? 8134 "support zoned children" : "not support zoned children"); 8135 return; 8136 } 8137 8138 if (!QLIST_EMPTY(&child_bs->parents)) { 8139 error_setg(errp, "The node %s already has a parent", 8140 child_bs->node_name); 8141 return; 8142 } 8143 8144 parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp); 8145 } 8146 8147 void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp) 8148 { 8149 BdrvChild *tmp; 8150 8151 GLOBAL_STATE_CODE(); 8152 if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) { 8153 error_setg(errp, "The node %s does not support removing a child", 8154 bdrv_get_device_or_node_name(parent_bs)); 8155 return; 8156 } 8157 8158 QLIST_FOREACH(tmp, &parent_bs->children, next) { 8159 if (tmp == child) { 8160 break; 8161 } 8162 } 8163 8164 if (!tmp) { 8165 error_setg(errp, "The node %s does not have a child named %s", 8166 bdrv_get_device_or_node_name(parent_bs), 8167 bdrv_get_device_or_node_name(child->bs)); 8168 return; 8169 } 8170 8171 parent_bs->drv->bdrv_del_child(parent_bs, child, errp); 8172 } 8173 8174 int bdrv_make_empty(BdrvChild *c, Error **errp) 8175 { 8176 BlockDriver *drv = c->bs->drv; 8177 int ret; 8178 8179 GLOBAL_STATE_CODE(); 8180 assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)); 8181 8182 if (!drv->bdrv_make_empty) { 8183 error_setg(errp, "%s does not support emptying nodes", 8184 drv->format_name); 8185 return -ENOTSUP; 8186 } 8187 8188 ret = drv->bdrv_make_empty(c->bs); 8189 if (ret < 0) { 8190 error_setg_errno(errp, -ret, "Failed to empty %s", 8191 c->bs->filename); 8192 return ret; 8193 } 8194 8195 return 0; 8196 } 8197 8198 /* 8199 * Return the child that @bs acts as an overlay for, and from which data may be 8200 * copied in COW or COR operations. Usually this is the backing file. 8201 */ 8202 BdrvChild *bdrv_cow_child(BlockDriverState *bs) 8203 { 8204 IO_CODE(); 8205 8206 if (!bs || !bs->drv) { 8207 return NULL; 8208 } 8209 8210 if (bs->drv->is_filter) { 8211 return NULL; 8212 } 8213 8214 if (!bs->backing) { 8215 return NULL; 8216 } 8217 8218 assert(bs->backing->role & BDRV_CHILD_COW); 8219 return bs->backing; 8220 } 8221 8222 /* 8223 * If @bs acts as a filter for exactly one of its children, return 8224 * that child. 8225 */ 8226 BdrvChild *bdrv_filter_child(BlockDriverState *bs) 8227 { 8228 BdrvChild *c; 8229 IO_CODE(); 8230 8231 if (!bs || !bs->drv) { 8232 return NULL; 8233 } 8234 8235 if (!bs->drv->is_filter) { 8236 return NULL; 8237 } 8238 8239 /* Only one of @backing or @file may be used */ 8240 assert(!(bs->backing && bs->file)); 8241 8242 c = bs->backing ?: bs->file; 8243 if (!c) { 8244 return NULL; 8245 } 8246 8247 assert(c->role & BDRV_CHILD_FILTERED); 8248 return c; 8249 } 8250 8251 /* 8252 * Return either the result of bdrv_cow_child() or bdrv_filter_child(), 8253 * whichever is non-NULL. 8254 * 8255 * Return NULL if both are NULL. 8256 */ 8257 BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs) 8258 { 8259 BdrvChild *cow_child = bdrv_cow_child(bs); 8260 BdrvChild *filter_child = bdrv_filter_child(bs); 8261 IO_CODE(); 8262 8263 /* Filter nodes cannot have COW backing files */ 8264 assert(!(cow_child && filter_child)); 8265 8266 return cow_child ?: filter_child; 8267 } 8268 8269 /* 8270 * Return the primary child of this node: For filters, that is the 8271 * filtered child. For other nodes, that is usually the child storing 8272 * metadata. 8273 * (A generally more helpful description is that this is (usually) the 8274 * child that has the same filename as @bs.) 8275 * 8276 * Drivers do not necessarily have a primary child; for example quorum 8277 * does not. 8278 */ 8279 BdrvChild *bdrv_primary_child(BlockDriverState *bs) 8280 { 8281 BdrvChild *c, *found = NULL; 8282 IO_CODE(); 8283 8284 QLIST_FOREACH(c, &bs->children, next) { 8285 if (c->role & BDRV_CHILD_PRIMARY) { 8286 assert(!found); 8287 found = c; 8288 } 8289 } 8290 8291 return found; 8292 } 8293 8294 static BlockDriverState * GRAPH_RDLOCK 8295 bdrv_do_skip_filters(BlockDriverState *bs, bool stop_on_explicit_filter) 8296 { 8297 BdrvChild *c; 8298 8299 if (!bs) { 8300 return NULL; 8301 } 8302 8303 while (!(stop_on_explicit_filter && !bs->implicit)) { 8304 c = bdrv_filter_child(bs); 8305 if (!c) { 8306 /* 8307 * A filter that is embedded in a working block graph must 8308 * have a child. Assert this here so this function does 8309 * not return a filter node that is not expected by the 8310 * caller. 8311 */ 8312 assert(!bs->drv || !bs->drv->is_filter); 8313 break; 8314 } 8315 bs = c->bs; 8316 } 8317 /* 8318 * Note that this treats nodes with bs->drv == NULL as not being 8319 * filters (bs->drv == NULL should be replaced by something else 8320 * anyway). 8321 * The advantage of this behavior is that this function will thus 8322 * always return a non-NULL value (given a non-NULL @bs). 8323 */ 8324 8325 return bs; 8326 } 8327 8328 /* 8329 * Return the first BDS that has not been added implicitly or that 8330 * does not have a filtered child down the chain starting from @bs 8331 * (including @bs itself). 8332 */ 8333 BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs) 8334 { 8335 GLOBAL_STATE_CODE(); 8336 return bdrv_do_skip_filters(bs, true); 8337 } 8338 8339 /* 8340 * Return the first BDS that does not have a filtered child down the 8341 * chain starting from @bs (including @bs itself). 8342 */ 8343 BlockDriverState *bdrv_skip_filters(BlockDriverState *bs) 8344 { 8345 IO_CODE(); 8346 return bdrv_do_skip_filters(bs, false); 8347 } 8348 8349 /* 8350 * For a backing chain, return the first non-filter backing image of 8351 * the first non-filter image. 8352 */ 8353 BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs) 8354 { 8355 IO_CODE(); 8356 return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs))); 8357 } 8358 8359 /** 8360 * Check whether [offset, offset + bytes) overlaps with the cached 8361 * block-status data region. 8362 * 8363 * If so, and @pnum is not NULL, set *pnum to `bsc.data_end - offset`, 8364 * which is what bdrv_bsc_is_data()'s interface needs. 8365 * Otherwise, *pnum is not touched. 8366 */ 8367 static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs, 8368 int64_t offset, int64_t bytes, 8369 int64_t *pnum) 8370 { 8371 BdrvBlockStatusCache *bsc = qatomic_rcu_read(&bs->block_status_cache); 8372 bool overlaps; 8373 8374 overlaps = 8375 qatomic_read(&bsc->valid) && 8376 ranges_overlap(offset, bytes, bsc->data_start, 8377 bsc->data_end - bsc->data_start); 8378 8379 if (overlaps && pnum) { 8380 *pnum = bsc->data_end - offset; 8381 } 8382 8383 return overlaps; 8384 } 8385 8386 /** 8387 * See block_int.h for this function's documentation. 8388 */ 8389 bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum) 8390 { 8391 IO_CODE(); 8392 RCU_READ_LOCK_GUARD(); 8393 return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum); 8394 } 8395 8396 /** 8397 * See block_int.h for this function's documentation. 8398 */ 8399 void bdrv_bsc_invalidate_range(BlockDriverState *bs, 8400 int64_t offset, int64_t bytes) 8401 { 8402 IO_CODE(); 8403 RCU_READ_LOCK_GUARD(); 8404 8405 if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) { 8406 qatomic_set(&bs->block_status_cache->valid, false); 8407 } 8408 } 8409 8410 /** 8411 * See block_int.h for this function's documentation. 8412 */ 8413 void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes) 8414 { 8415 BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1); 8416 BdrvBlockStatusCache *old_bsc; 8417 IO_CODE(); 8418 8419 *new_bsc = (BdrvBlockStatusCache) { 8420 .valid = true, 8421 .data_start = offset, 8422 .data_end = offset + bytes, 8423 }; 8424 8425 QEMU_LOCK_GUARD(&bs->bsc_modify_lock); 8426 8427 old_bsc = qatomic_rcu_read(&bs->block_status_cache); 8428 qatomic_rcu_set(&bs->block_status_cache, new_bsc); 8429 if (old_bsc) { 8430 g_free_rcu(old_bsc, rcu); 8431 } 8432 } 8433