1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2020 Virtuozzo International GmbH. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "block/trace.h" 28 #include "block/block_int.h" 29 #include "block/blockjob.h" 30 #include "block/fuse.h" 31 #include "block/nbd.h" 32 #include "block/qdict.h" 33 #include "qemu/error-report.h" 34 #include "block/module_block.h" 35 #include "qemu/main-loop.h" 36 #include "qemu/module.h" 37 #include "qapi/error.h" 38 #include "qapi/qmp/qdict.h" 39 #include "qapi/qmp/qjson.h" 40 #include "qapi/qmp/qnull.h" 41 #include "qapi/qmp/qstring.h" 42 #include "qapi/qobject-output-visitor.h" 43 #include "qapi/qapi-visit-block-core.h" 44 #include "sysemu/block-backend.h" 45 #include "qemu/notify.h" 46 #include "qemu/option.h" 47 #include "qemu/coroutine.h" 48 #include "block/qapi.h" 49 #include "qemu/timer.h" 50 #include "qemu/cutils.h" 51 #include "qemu/id.h" 52 #include "qemu/range.h" 53 #include "qemu/rcu.h" 54 #include "block/coroutines.h" 55 56 #ifdef CONFIG_BSD 57 #include <sys/ioctl.h> 58 #include <sys/queue.h> 59 #if defined(HAVE_SYS_DISK_H) 60 #include <sys/disk.h> 61 #endif 62 #endif 63 64 #ifdef _WIN32 65 #include <windows.h> 66 #endif 67 68 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 69 70 /* Protected by BQL */ 71 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 72 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 73 74 /* Protected by BQL */ 75 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states = 76 QTAILQ_HEAD_INITIALIZER(all_bdrv_states); 77 78 /* Protected by BQL */ 79 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 80 QLIST_HEAD_INITIALIZER(bdrv_drivers); 81 82 static BlockDriverState *bdrv_open_inherit(const char *filename, 83 const char *reference, 84 QDict *options, int flags, 85 BlockDriverState *parent, 86 const BdrvChildClass *child_class, 87 BdrvChildRole child_role, 88 Error **errp); 89 90 static bool bdrv_recurse_has_child(BlockDriverState *bs, 91 BlockDriverState *child); 92 93 static void bdrv_replace_child_noperm(BdrvChild *child, 94 BlockDriverState *new_bs); 95 static void bdrv_remove_child(BdrvChild *child, Transaction *tran); 96 static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, 97 Transaction *tran); 98 99 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, 100 BlockReopenQueue *queue, 101 Transaction *change_child_tran, Error **errp); 102 static void bdrv_reopen_commit(BDRVReopenState *reopen_state); 103 static void bdrv_reopen_abort(BDRVReopenState *reopen_state); 104 105 static bool bdrv_backing_overridden(BlockDriverState *bs); 106 107 static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, 108 GHashTable *visited, Transaction *tran, 109 Error **errp); 110 111 /* If non-zero, use only whitelisted block drivers */ 112 static int use_bdrv_whitelist; 113 114 #ifdef _WIN32 115 static int is_windows_drive_prefix(const char *filename) 116 { 117 return (((filename[0] >= 'a' && filename[0] <= 'z') || 118 (filename[0] >= 'A' && filename[0] <= 'Z')) && 119 filename[1] == ':'); 120 } 121 122 int is_windows_drive(const char *filename) 123 { 124 if (is_windows_drive_prefix(filename) && 125 filename[2] == '\0') 126 return 1; 127 if (strstart(filename, "\\\\.\\", NULL) || 128 strstart(filename, "//./", NULL)) 129 return 1; 130 return 0; 131 } 132 #endif 133 134 size_t bdrv_opt_mem_align(BlockDriverState *bs) 135 { 136 if (!bs || !bs->drv) { 137 /* page size or 4k (hdd sector size) should be on the safe side */ 138 return MAX(4096, qemu_real_host_page_size()); 139 } 140 IO_CODE(); 141 142 return bs->bl.opt_mem_alignment; 143 } 144 145 size_t bdrv_min_mem_align(BlockDriverState *bs) 146 { 147 if (!bs || !bs->drv) { 148 /* page size or 4k (hdd sector size) should be on the safe side */ 149 return MAX(4096, qemu_real_host_page_size()); 150 } 151 IO_CODE(); 152 153 return bs->bl.min_mem_alignment; 154 } 155 156 /* check if the path starts with "<protocol>:" */ 157 int path_has_protocol(const char *path) 158 { 159 const char *p; 160 161 #ifdef _WIN32 162 if (is_windows_drive(path) || 163 is_windows_drive_prefix(path)) { 164 return 0; 165 } 166 p = path + strcspn(path, ":/\\"); 167 #else 168 p = path + strcspn(path, ":/"); 169 #endif 170 171 return *p == ':'; 172 } 173 174 int path_is_absolute(const char *path) 175 { 176 #ifdef _WIN32 177 /* specific case for names like: "\\.\d:" */ 178 if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 179 return 1; 180 } 181 return (*path == '/' || *path == '\\'); 182 #else 183 return (*path == '/'); 184 #endif 185 } 186 187 /* if filename is absolute, just return its duplicate. Otherwise, build a 188 path to it by considering it is relative to base_path. URL are 189 supported. */ 190 char *path_combine(const char *base_path, const char *filename) 191 { 192 const char *protocol_stripped = NULL; 193 const char *p, *p1; 194 char *result; 195 int len; 196 197 if (path_is_absolute(filename)) { 198 return g_strdup(filename); 199 } 200 201 if (path_has_protocol(base_path)) { 202 protocol_stripped = strchr(base_path, ':'); 203 if (protocol_stripped) { 204 protocol_stripped++; 205 } 206 } 207 p = protocol_stripped ?: base_path; 208 209 p1 = strrchr(base_path, '/'); 210 #ifdef _WIN32 211 { 212 const char *p2; 213 p2 = strrchr(base_path, '\\'); 214 if (!p1 || p2 > p1) { 215 p1 = p2; 216 } 217 } 218 #endif 219 if (p1) { 220 p1++; 221 } else { 222 p1 = base_path; 223 } 224 if (p1 > p) { 225 p = p1; 226 } 227 len = p - base_path; 228 229 result = g_malloc(len + strlen(filename) + 1); 230 memcpy(result, base_path, len); 231 strcpy(result + len, filename); 232 233 return result; 234 } 235 236 /* 237 * Helper function for bdrv_parse_filename() implementations to remove optional 238 * protocol prefixes (especially "file:") from a filename and for putting the 239 * stripped filename into the options QDict if there is such a prefix. 240 */ 241 void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, 242 QDict *options) 243 { 244 if (strstart(filename, prefix, &filename)) { 245 /* Stripping the explicit protocol prefix may result in a protocol 246 * prefix being (wrongly) detected (if the filename contains a colon) */ 247 if (path_has_protocol(filename)) { 248 GString *fat_filename; 249 250 /* This means there is some colon before the first slash; therefore, 251 * this cannot be an absolute path */ 252 assert(!path_is_absolute(filename)); 253 254 /* And we can thus fix the protocol detection issue by prefixing it 255 * by "./" */ 256 fat_filename = g_string_new("./"); 257 g_string_append(fat_filename, filename); 258 259 assert(!path_has_protocol(fat_filename->str)); 260 261 qdict_put(options, "filename", 262 qstring_from_gstring(fat_filename)); 263 } else { 264 /* If no protocol prefix was detected, we can use the shortened 265 * filename as-is */ 266 qdict_put_str(options, "filename", filename); 267 } 268 } 269 } 270 271 272 /* Returns whether the image file is opened as read-only. Note that this can 273 * return false and writing to the image file is still not possible because the 274 * image is inactivated. */ 275 bool bdrv_is_read_only(BlockDriverState *bs) 276 { 277 IO_CODE(); 278 return !(bs->open_flags & BDRV_O_RDWR); 279 } 280 281 int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, 282 bool ignore_allow_rdw, Error **errp) 283 { 284 IO_CODE(); 285 286 /* Do not set read_only if copy_on_read is enabled */ 287 if (bs->copy_on_read && read_only) { 288 error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled", 289 bdrv_get_device_or_node_name(bs)); 290 return -EINVAL; 291 } 292 293 /* Do not clear read_only if it is prohibited */ 294 if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) && 295 !ignore_allow_rdw) 296 { 297 error_setg(errp, "Node '%s' is read only", 298 bdrv_get_device_or_node_name(bs)); 299 return -EPERM; 300 } 301 302 return 0; 303 } 304 305 /* 306 * Called by a driver that can only provide a read-only image. 307 * 308 * Returns 0 if the node is already read-only or it could switch the node to 309 * read-only because BDRV_O_AUTO_RDONLY is set. 310 * 311 * Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set 312 * or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg 313 * is not NULL, it is used as the error message for the Error object. 314 */ 315 int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, 316 Error **errp) 317 { 318 int ret = 0; 319 IO_CODE(); 320 321 if (!(bs->open_flags & BDRV_O_RDWR)) { 322 return 0; 323 } 324 if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) { 325 goto fail; 326 } 327 328 ret = bdrv_can_set_read_only(bs, true, false, NULL); 329 if (ret < 0) { 330 goto fail; 331 } 332 333 bs->open_flags &= ~BDRV_O_RDWR; 334 335 return 0; 336 337 fail: 338 error_setg(errp, "%s", errmsg ?: "Image is read-only"); 339 return -EACCES; 340 } 341 342 /* 343 * If @backing is empty, this function returns NULL without setting 344 * @errp. In all other cases, NULL will only be returned with @errp 345 * set. 346 * 347 * Therefore, a return value of NULL without @errp set means that 348 * there is no backing file; if @errp is set, there is one but its 349 * absolute filename cannot be generated. 350 */ 351 char *bdrv_get_full_backing_filename_from_filename(const char *backed, 352 const char *backing, 353 Error **errp) 354 { 355 if (backing[0] == '\0') { 356 return NULL; 357 } else if (path_has_protocol(backing) || path_is_absolute(backing)) { 358 return g_strdup(backing); 359 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 360 error_setg(errp, "Cannot use relative backing file names for '%s'", 361 backed); 362 return NULL; 363 } else { 364 return path_combine(backed, backing); 365 } 366 } 367 368 /* 369 * If @filename is empty or NULL, this function returns NULL without 370 * setting @errp. In all other cases, NULL will only be returned with 371 * @errp set. 372 */ 373 static char *bdrv_make_absolute_filename(BlockDriverState *relative_to, 374 const char *filename, Error **errp) 375 { 376 char *dir, *full_name; 377 378 if (!filename || filename[0] == '\0') { 379 return NULL; 380 } else if (path_has_protocol(filename) || path_is_absolute(filename)) { 381 return g_strdup(filename); 382 } 383 384 dir = bdrv_dirname(relative_to, errp); 385 if (!dir) { 386 return NULL; 387 } 388 389 full_name = g_strconcat(dir, filename, NULL); 390 g_free(dir); 391 return full_name; 392 } 393 394 char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp) 395 { 396 GLOBAL_STATE_CODE(); 397 return bdrv_make_absolute_filename(bs, bs->backing_file, errp); 398 } 399 400 void bdrv_register(BlockDriver *bdrv) 401 { 402 assert(bdrv->format_name); 403 GLOBAL_STATE_CODE(); 404 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 405 } 406 407 BlockDriverState *bdrv_new(void) 408 { 409 BlockDriverState *bs; 410 int i; 411 412 GLOBAL_STATE_CODE(); 413 414 bs = g_new0(BlockDriverState, 1); 415 QLIST_INIT(&bs->dirty_bitmaps); 416 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 417 QLIST_INIT(&bs->op_blockers[i]); 418 } 419 qemu_co_mutex_init(&bs->reqs_lock); 420 qemu_mutex_init(&bs->dirty_bitmap_mutex); 421 bs->refcnt = 1; 422 bs->aio_context = qemu_get_aio_context(); 423 424 qemu_co_queue_init(&bs->flush_queue); 425 426 qemu_co_mutex_init(&bs->bsc_modify_lock); 427 bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1); 428 429 for (i = 0; i < bdrv_drain_all_count; i++) { 430 bdrv_drained_begin(bs); 431 } 432 433 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list); 434 435 return bs; 436 } 437 438 static BlockDriver *bdrv_do_find_format(const char *format_name) 439 { 440 BlockDriver *drv1; 441 GLOBAL_STATE_CODE(); 442 443 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 444 if (!strcmp(drv1->format_name, format_name)) { 445 return drv1; 446 } 447 } 448 449 return NULL; 450 } 451 452 BlockDriver *bdrv_find_format(const char *format_name) 453 { 454 BlockDriver *drv1; 455 int i; 456 457 GLOBAL_STATE_CODE(); 458 459 drv1 = bdrv_do_find_format(format_name); 460 if (drv1) { 461 return drv1; 462 } 463 464 /* The driver isn't registered, maybe we need to load a module */ 465 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { 466 if (!strcmp(block_driver_modules[i].format_name, format_name)) { 467 Error *local_err = NULL; 468 int rv = block_module_load(block_driver_modules[i].library_name, 469 &local_err); 470 if (rv > 0) { 471 return bdrv_do_find_format(format_name); 472 } else if (rv < 0) { 473 error_report_err(local_err); 474 } 475 break; 476 } 477 } 478 return NULL; 479 } 480 481 static int bdrv_format_is_whitelisted(const char *format_name, bool read_only) 482 { 483 static const char *whitelist_rw[] = { 484 CONFIG_BDRV_RW_WHITELIST 485 NULL 486 }; 487 static const char *whitelist_ro[] = { 488 CONFIG_BDRV_RO_WHITELIST 489 NULL 490 }; 491 const char **p; 492 493 if (!whitelist_rw[0] && !whitelist_ro[0]) { 494 return 1; /* no whitelist, anything goes */ 495 } 496 497 for (p = whitelist_rw; *p; p++) { 498 if (!strcmp(format_name, *p)) { 499 return 1; 500 } 501 } 502 if (read_only) { 503 for (p = whitelist_ro; *p; p++) { 504 if (!strcmp(format_name, *p)) { 505 return 1; 506 } 507 } 508 } 509 return 0; 510 } 511 512 int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 513 { 514 GLOBAL_STATE_CODE(); 515 return bdrv_format_is_whitelisted(drv->format_name, read_only); 516 } 517 518 bool bdrv_uses_whitelist(void) 519 { 520 return use_bdrv_whitelist; 521 } 522 523 typedef struct CreateCo { 524 BlockDriver *drv; 525 char *filename; 526 QemuOpts *opts; 527 int ret; 528 Error *err; 529 } CreateCo; 530 531 static void coroutine_fn bdrv_create_co_entry(void *opaque) 532 { 533 Error *local_err = NULL; 534 int ret; 535 536 CreateCo *cco = opaque; 537 assert(cco->drv); 538 GLOBAL_STATE_CODE(); 539 540 ret = cco->drv->bdrv_co_create_opts(cco->drv, 541 cco->filename, cco->opts, &local_err); 542 error_propagate(&cco->err, local_err); 543 cco->ret = ret; 544 } 545 546 int bdrv_create(BlockDriver *drv, const char* filename, 547 QemuOpts *opts, Error **errp) 548 { 549 int ret; 550 551 GLOBAL_STATE_CODE(); 552 553 Coroutine *co; 554 CreateCo cco = { 555 .drv = drv, 556 .filename = g_strdup(filename), 557 .opts = opts, 558 .ret = NOT_DONE, 559 .err = NULL, 560 }; 561 562 if (!drv->bdrv_co_create_opts) { 563 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 564 ret = -ENOTSUP; 565 goto out; 566 } 567 568 if (qemu_in_coroutine()) { 569 /* Fast-path if already in coroutine context */ 570 bdrv_create_co_entry(&cco); 571 } else { 572 co = qemu_coroutine_create(bdrv_create_co_entry, &cco); 573 qemu_coroutine_enter(co); 574 while (cco.ret == NOT_DONE) { 575 aio_poll(qemu_get_aio_context(), true); 576 } 577 } 578 579 ret = cco.ret; 580 if (ret < 0) { 581 if (cco.err) { 582 error_propagate(errp, cco.err); 583 } else { 584 error_setg_errno(errp, -ret, "Could not create image"); 585 } 586 } 587 588 out: 589 g_free(cco.filename); 590 return ret; 591 } 592 593 /** 594 * Helper function for bdrv_create_file_fallback(): Resize @blk to at 595 * least the given @minimum_size. 596 * 597 * On success, return @blk's actual length. 598 * Otherwise, return -errno. 599 */ 600 static int64_t create_file_fallback_truncate(BlockBackend *blk, 601 int64_t minimum_size, Error **errp) 602 { 603 Error *local_err = NULL; 604 int64_t size; 605 int ret; 606 607 GLOBAL_STATE_CODE(); 608 609 ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, 610 &local_err); 611 if (ret < 0 && ret != -ENOTSUP) { 612 error_propagate(errp, local_err); 613 return ret; 614 } 615 616 size = blk_getlength(blk); 617 if (size < 0) { 618 error_free(local_err); 619 error_setg_errno(errp, -size, 620 "Failed to inquire the new image file's length"); 621 return size; 622 } 623 624 if (size < minimum_size) { 625 /* Need to grow the image, but we failed to do that */ 626 error_propagate(errp, local_err); 627 return -ENOTSUP; 628 } 629 630 error_free(local_err); 631 local_err = NULL; 632 633 return size; 634 } 635 636 /** 637 * Helper function for bdrv_create_file_fallback(): Zero the first 638 * sector to remove any potentially pre-existing image header. 639 */ 640 static int coroutine_fn 641 create_file_fallback_zero_first_sector(BlockBackend *blk, 642 int64_t current_size, 643 Error **errp) 644 { 645 int64_t bytes_to_clear; 646 int ret; 647 648 GLOBAL_STATE_CODE(); 649 650 bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); 651 if (bytes_to_clear) { 652 ret = blk_co_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); 653 if (ret < 0) { 654 error_setg_errno(errp, -ret, 655 "Failed to clear the new image's first sector"); 656 return ret; 657 } 658 } 659 660 return 0; 661 } 662 663 /** 664 * Simple implementation of bdrv_co_create_opts for protocol drivers 665 * which only support creation via opening a file 666 * (usually existing raw storage device) 667 */ 668 int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, 669 const char *filename, 670 QemuOpts *opts, 671 Error **errp) 672 { 673 BlockBackend *blk; 674 QDict *options; 675 int64_t size = 0; 676 char *buf = NULL; 677 PreallocMode prealloc; 678 Error *local_err = NULL; 679 int ret; 680 681 GLOBAL_STATE_CODE(); 682 683 size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); 684 buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); 685 prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, 686 PREALLOC_MODE_OFF, &local_err); 687 g_free(buf); 688 if (local_err) { 689 error_propagate(errp, local_err); 690 return -EINVAL; 691 } 692 693 if (prealloc != PREALLOC_MODE_OFF) { 694 error_setg(errp, "Unsupported preallocation mode '%s'", 695 PreallocMode_str(prealloc)); 696 return -ENOTSUP; 697 } 698 699 options = qdict_new(); 700 qdict_put_str(options, "driver", drv->format_name); 701 702 blk = blk_new_open(filename, NULL, options, 703 BDRV_O_RDWR | BDRV_O_RESIZE, errp); 704 if (!blk) { 705 error_prepend(errp, "Protocol driver '%s' does not support image " 706 "creation, and opening the image failed: ", 707 drv->format_name); 708 return -EINVAL; 709 } 710 711 size = create_file_fallback_truncate(blk, size, errp); 712 if (size < 0) { 713 ret = size; 714 goto out; 715 } 716 717 ret = create_file_fallback_zero_first_sector(blk, size, errp); 718 if (ret < 0) { 719 goto out; 720 } 721 722 ret = 0; 723 out: 724 blk_unref(blk); 725 return ret; 726 } 727 728 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 729 { 730 QemuOpts *protocol_opts; 731 BlockDriver *drv; 732 QDict *qdict; 733 int ret; 734 735 GLOBAL_STATE_CODE(); 736 737 drv = bdrv_find_protocol(filename, true, errp); 738 if (drv == NULL) { 739 return -ENOENT; 740 } 741 742 if (!drv->create_opts) { 743 error_setg(errp, "Driver '%s' does not support image creation", 744 drv->format_name); 745 return -ENOTSUP; 746 } 747 748 /* 749 * 'opts' contains a QemuOptsList with a combination of format and protocol 750 * default values. 751 * 752 * The format properly removes its options, but the default values remain 753 * in 'opts->list'. So if the protocol has options with the same name 754 * (e.g. rbd has 'cluster_size' as qcow2), it will see the default values 755 * of the format, since for overlapping options, the format wins. 756 * 757 * To avoid this issue, lets convert QemuOpts to QDict, in this way we take 758 * only the set options, and then convert it back to QemuOpts, using the 759 * create_opts of the protocol. So the new QemuOpts, will contain only the 760 * protocol defaults. 761 */ 762 qdict = qemu_opts_to_qdict(opts, NULL); 763 protocol_opts = qemu_opts_from_qdict(drv->create_opts, qdict, errp); 764 if (protocol_opts == NULL) { 765 ret = -EINVAL; 766 goto out; 767 } 768 769 ret = bdrv_create(drv, filename, protocol_opts, errp); 770 out: 771 qemu_opts_del(protocol_opts); 772 qobject_unref(qdict); 773 return ret; 774 } 775 776 int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp) 777 { 778 Error *local_err = NULL; 779 int ret; 780 781 IO_CODE(); 782 assert(bs != NULL); 783 784 if (!bs->drv) { 785 error_setg(errp, "Block node '%s' is not opened", bs->filename); 786 return -ENOMEDIUM; 787 } 788 789 if (!bs->drv->bdrv_co_delete_file) { 790 error_setg(errp, "Driver '%s' does not support image deletion", 791 bs->drv->format_name); 792 return -ENOTSUP; 793 } 794 795 ret = bs->drv->bdrv_co_delete_file(bs, &local_err); 796 if (ret < 0) { 797 error_propagate(errp, local_err); 798 } 799 800 return ret; 801 } 802 803 void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs) 804 { 805 Error *local_err = NULL; 806 int ret; 807 IO_CODE(); 808 809 if (!bs) { 810 return; 811 } 812 813 ret = bdrv_co_delete_file(bs, &local_err); 814 /* 815 * ENOTSUP will happen if the block driver doesn't support 816 * the 'bdrv_co_delete_file' interface. This is a predictable 817 * scenario and shouldn't be reported back to the user. 818 */ 819 if (ret == -ENOTSUP) { 820 error_free(local_err); 821 } else if (ret < 0) { 822 error_report_err(local_err); 823 } 824 } 825 826 /** 827 * Try to get @bs's logical and physical block size. 828 * On success, store them in @bsz struct and return 0. 829 * On failure return -errno. 830 * @bs must not be empty. 831 */ 832 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 833 { 834 BlockDriver *drv = bs->drv; 835 BlockDriverState *filtered = bdrv_filter_bs(bs); 836 GLOBAL_STATE_CODE(); 837 838 if (drv && drv->bdrv_probe_blocksizes) { 839 return drv->bdrv_probe_blocksizes(bs, bsz); 840 } else if (filtered) { 841 return bdrv_probe_blocksizes(filtered, bsz); 842 } 843 844 return -ENOTSUP; 845 } 846 847 /** 848 * Try to get @bs's geometry (cyls, heads, sectors). 849 * On success, store them in @geo struct and return 0. 850 * On failure return -errno. 851 * @bs must not be empty. 852 */ 853 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 854 { 855 BlockDriver *drv = bs->drv; 856 BlockDriverState *filtered = bdrv_filter_bs(bs); 857 GLOBAL_STATE_CODE(); 858 859 if (drv && drv->bdrv_probe_geometry) { 860 return drv->bdrv_probe_geometry(bs, geo); 861 } else if (filtered) { 862 return bdrv_probe_geometry(filtered, geo); 863 } 864 865 return -ENOTSUP; 866 } 867 868 /* 869 * Create a uniquely-named empty temporary file. 870 * Return the actual file name used upon success, otherwise NULL. 871 * This string should be freed with g_free() when not needed any longer. 872 * 873 * Note: creating a temporary file for the caller to (re)open is 874 * inherently racy. Use g_file_open_tmp() instead whenever practical. 875 */ 876 char *create_tmp_file(Error **errp) 877 { 878 int fd; 879 const char *tmpdir; 880 g_autofree char *filename = NULL; 881 882 tmpdir = g_get_tmp_dir(); 883 #ifndef _WIN32 884 /* 885 * See commit 69bef79 ("block: use /var/tmp instead of /tmp for -snapshot") 886 * 887 * This function is used to create temporary disk images (like -snapshot), 888 * so the files can become very large. /tmp is often a tmpfs where as 889 * /var/tmp is usually on a disk, so more appropriate for disk images. 890 */ 891 if (!g_strcmp0(tmpdir, "/tmp")) { 892 tmpdir = "/var/tmp"; 893 } 894 #endif 895 896 filename = g_strdup_printf("%s/vl.XXXXXX", tmpdir); 897 fd = g_mkstemp(filename); 898 if (fd < 0) { 899 error_setg_errno(errp, errno, "Could not open temporary file '%s'", 900 filename); 901 return NULL; 902 } 903 close(fd); 904 905 return g_steal_pointer(&filename); 906 } 907 908 /* 909 * Detect host devices. By convention, /dev/cdrom[N] is always 910 * recognized as a host CDROM. 911 */ 912 static BlockDriver *find_hdev_driver(const char *filename) 913 { 914 int score_max = 0, score; 915 BlockDriver *drv = NULL, *d; 916 GLOBAL_STATE_CODE(); 917 918 QLIST_FOREACH(d, &bdrv_drivers, list) { 919 if (d->bdrv_probe_device) { 920 score = d->bdrv_probe_device(filename); 921 if (score > score_max) { 922 score_max = score; 923 drv = d; 924 } 925 } 926 } 927 928 return drv; 929 } 930 931 static BlockDriver *bdrv_do_find_protocol(const char *protocol) 932 { 933 BlockDriver *drv1; 934 GLOBAL_STATE_CODE(); 935 936 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 937 if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) { 938 return drv1; 939 } 940 } 941 942 return NULL; 943 } 944 945 BlockDriver *bdrv_find_protocol(const char *filename, 946 bool allow_protocol_prefix, 947 Error **errp) 948 { 949 BlockDriver *drv1; 950 char protocol[128]; 951 int len; 952 const char *p; 953 int i; 954 955 GLOBAL_STATE_CODE(); 956 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 957 958 /* 959 * XXX(hch): we really should not let host device detection 960 * override an explicit protocol specification, but moving this 961 * later breaks access to device names with colons in them. 962 * Thanks to the brain-dead persistent naming schemes on udev- 963 * based Linux systems those actually are quite common. 964 */ 965 drv1 = find_hdev_driver(filename); 966 if (drv1) { 967 return drv1; 968 } 969 970 if (!path_has_protocol(filename) || !allow_protocol_prefix) { 971 return &bdrv_file; 972 } 973 974 p = strchr(filename, ':'); 975 assert(p != NULL); 976 len = p - filename; 977 if (len > sizeof(protocol) - 1) 978 len = sizeof(protocol) - 1; 979 memcpy(protocol, filename, len); 980 protocol[len] = '\0'; 981 982 drv1 = bdrv_do_find_protocol(protocol); 983 if (drv1) { 984 return drv1; 985 } 986 987 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { 988 if (block_driver_modules[i].protocol_name && 989 !strcmp(block_driver_modules[i].protocol_name, protocol)) { 990 int rv = block_module_load(block_driver_modules[i].library_name, errp); 991 if (rv > 0) { 992 drv1 = bdrv_do_find_protocol(protocol); 993 } else if (rv < 0) { 994 return NULL; 995 } 996 break; 997 } 998 } 999 1000 if (!drv1) { 1001 error_setg(errp, "Unknown protocol '%s'", protocol); 1002 } 1003 return drv1; 1004 } 1005 1006 /* 1007 * Guess image format by probing its contents. 1008 * This is not a good idea when your image is raw (CVE-2008-2004), but 1009 * we do it anyway for backward compatibility. 1010 * 1011 * @buf contains the image's first @buf_size bytes. 1012 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 1013 * but can be smaller if the image file is smaller) 1014 * @filename is its filename. 1015 * 1016 * For all block drivers, call the bdrv_probe() method to get its 1017 * probing score. 1018 * Return the first block driver with the highest probing score. 1019 */ 1020 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 1021 const char *filename) 1022 { 1023 int score_max = 0, score; 1024 BlockDriver *drv = NULL, *d; 1025 IO_CODE(); 1026 1027 QLIST_FOREACH(d, &bdrv_drivers, list) { 1028 if (d->bdrv_probe) { 1029 score = d->bdrv_probe(buf, buf_size, filename); 1030 if (score > score_max) { 1031 score_max = score; 1032 drv = d; 1033 } 1034 } 1035 } 1036 1037 return drv; 1038 } 1039 1040 static int find_image_format(BlockBackend *file, const char *filename, 1041 BlockDriver **pdrv, Error **errp) 1042 { 1043 BlockDriver *drv; 1044 uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 1045 int ret = 0; 1046 1047 GLOBAL_STATE_CODE(); 1048 1049 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 1050 if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) { 1051 *pdrv = &bdrv_raw; 1052 return ret; 1053 } 1054 1055 ret = blk_pread(file, 0, sizeof(buf), buf, 0); 1056 if (ret < 0) { 1057 error_setg_errno(errp, -ret, "Could not read image for determining its " 1058 "format"); 1059 *pdrv = NULL; 1060 return ret; 1061 } 1062 1063 drv = bdrv_probe_all(buf, sizeof(buf), filename); 1064 if (!drv) { 1065 error_setg(errp, "Could not determine image format: No compatible " 1066 "driver found"); 1067 *pdrv = NULL; 1068 return -ENOENT; 1069 } 1070 1071 *pdrv = drv; 1072 return 0; 1073 } 1074 1075 /** 1076 * Set the current 'total_sectors' value 1077 * Return 0 on success, -errno on error. 1078 */ 1079 int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 1080 { 1081 BlockDriver *drv = bs->drv; 1082 IO_CODE(); 1083 1084 if (!drv) { 1085 return -ENOMEDIUM; 1086 } 1087 1088 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 1089 if (bdrv_is_sg(bs)) 1090 return 0; 1091 1092 /* query actual device if possible, otherwise just trust the hint */ 1093 if (drv->bdrv_getlength) { 1094 int64_t length = drv->bdrv_getlength(bs); 1095 if (length < 0) { 1096 return length; 1097 } 1098 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 1099 } 1100 1101 bs->total_sectors = hint; 1102 1103 if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) { 1104 return -EFBIG; 1105 } 1106 1107 return 0; 1108 } 1109 1110 /** 1111 * Combines a QDict of new block driver @options with any missing options taken 1112 * from @old_options, so that leaving out an option defaults to its old value. 1113 */ 1114 static void bdrv_join_options(BlockDriverState *bs, QDict *options, 1115 QDict *old_options) 1116 { 1117 GLOBAL_STATE_CODE(); 1118 if (bs->drv && bs->drv->bdrv_join_options) { 1119 bs->drv->bdrv_join_options(options, old_options); 1120 } else { 1121 qdict_join(options, old_options, false); 1122 } 1123 } 1124 1125 static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts, 1126 int open_flags, 1127 Error **errp) 1128 { 1129 Error *local_err = NULL; 1130 char *value = qemu_opt_get_del(opts, "detect-zeroes"); 1131 BlockdevDetectZeroesOptions detect_zeroes = 1132 qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value, 1133 BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err); 1134 GLOBAL_STATE_CODE(); 1135 g_free(value); 1136 if (local_err) { 1137 error_propagate(errp, local_err); 1138 return detect_zeroes; 1139 } 1140 1141 if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && 1142 !(open_flags & BDRV_O_UNMAP)) 1143 { 1144 error_setg(errp, "setting detect-zeroes to unmap is not allowed " 1145 "without setting discard operation to unmap"); 1146 } 1147 1148 return detect_zeroes; 1149 } 1150 1151 /** 1152 * Set open flags for aio engine 1153 * 1154 * Return 0 on success, -1 if the engine specified is invalid 1155 */ 1156 int bdrv_parse_aio(const char *mode, int *flags) 1157 { 1158 if (!strcmp(mode, "threads")) { 1159 /* do nothing, default */ 1160 } else if (!strcmp(mode, "native")) { 1161 *flags |= BDRV_O_NATIVE_AIO; 1162 #ifdef CONFIG_LINUX_IO_URING 1163 } else if (!strcmp(mode, "io_uring")) { 1164 *flags |= BDRV_O_IO_URING; 1165 #endif 1166 } else { 1167 return -1; 1168 } 1169 1170 return 0; 1171 } 1172 1173 /** 1174 * Set open flags for a given discard mode 1175 * 1176 * Return 0 on success, -1 if the discard mode was invalid. 1177 */ 1178 int bdrv_parse_discard_flags(const char *mode, int *flags) 1179 { 1180 *flags &= ~BDRV_O_UNMAP; 1181 1182 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 1183 /* do nothing */ 1184 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 1185 *flags |= BDRV_O_UNMAP; 1186 } else { 1187 return -1; 1188 } 1189 1190 return 0; 1191 } 1192 1193 /** 1194 * Set open flags for a given cache mode 1195 * 1196 * Return 0 on success, -1 if the cache mode was invalid. 1197 */ 1198 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) 1199 { 1200 *flags &= ~BDRV_O_CACHE_MASK; 1201 1202 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 1203 *writethrough = false; 1204 *flags |= BDRV_O_NOCACHE; 1205 } else if (!strcmp(mode, "directsync")) { 1206 *writethrough = true; 1207 *flags |= BDRV_O_NOCACHE; 1208 } else if (!strcmp(mode, "writeback")) { 1209 *writethrough = false; 1210 } else if (!strcmp(mode, "unsafe")) { 1211 *writethrough = false; 1212 *flags |= BDRV_O_NO_FLUSH; 1213 } else if (!strcmp(mode, "writethrough")) { 1214 *writethrough = true; 1215 } else { 1216 return -1; 1217 } 1218 1219 return 0; 1220 } 1221 1222 static char *bdrv_child_get_parent_desc(BdrvChild *c) 1223 { 1224 BlockDriverState *parent = c->opaque; 1225 return g_strdup_printf("node '%s'", bdrv_get_node_name(parent)); 1226 } 1227 1228 static void bdrv_child_cb_drained_begin(BdrvChild *child) 1229 { 1230 BlockDriverState *bs = child->opaque; 1231 bdrv_do_drained_begin_quiesce(bs, NULL, false); 1232 } 1233 1234 static bool bdrv_child_cb_drained_poll(BdrvChild *child) 1235 { 1236 BlockDriverState *bs = child->opaque; 1237 return bdrv_drain_poll(bs, false, NULL, false); 1238 } 1239 1240 static void bdrv_child_cb_drained_end(BdrvChild *child, 1241 int *drained_end_counter) 1242 { 1243 BlockDriverState *bs = child->opaque; 1244 bdrv_drained_end_no_poll(bs, drained_end_counter); 1245 } 1246 1247 static int bdrv_child_cb_inactivate(BdrvChild *child) 1248 { 1249 BlockDriverState *bs = child->opaque; 1250 GLOBAL_STATE_CODE(); 1251 assert(bs->open_flags & BDRV_O_INACTIVE); 1252 return 0; 1253 } 1254 1255 static bool bdrv_child_cb_change_aio_ctx(BdrvChild *child, AioContext *ctx, 1256 GHashTable *visited, Transaction *tran, 1257 Error **errp) 1258 { 1259 BlockDriverState *bs = child->opaque; 1260 return bdrv_change_aio_context(bs, ctx, visited, tran, errp); 1261 } 1262 1263 /* 1264 * Returns the options and flags that a temporary snapshot should get, based on 1265 * the originally requested flags (the originally requested image will have 1266 * flags like a backing file) 1267 */ 1268 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, 1269 int parent_flags, QDict *parent_options) 1270 { 1271 GLOBAL_STATE_CODE(); 1272 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 1273 1274 /* For temporary files, unconditional cache=unsafe is fine */ 1275 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); 1276 qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); 1277 1278 /* Copy the read-only and discard options from the parent */ 1279 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); 1280 qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD); 1281 1282 /* aio=native doesn't work for cache.direct=off, so disable it for the 1283 * temporary snapshot */ 1284 *child_flags &= ~BDRV_O_NATIVE_AIO; 1285 } 1286 1287 static void bdrv_backing_attach(BdrvChild *c) 1288 { 1289 BlockDriverState *parent = c->opaque; 1290 BlockDriverState *backing_hd = c->bs; 1291 1292 GLOBAL_STATE_CODE(); 1293 assert(!parent->backing_blocker); 1294 error_setg(&parent->backing_blocker, 1295 "node is used as backing hd of '%s'", 1296 bdrv_get_device_or_node_name(parent)); 1297 1298 bdrv_refresh_filename(backing_hd); 1299 1300 parent->open_flags &= ~BDRV_O_NO_BACKING; 1301 1302 bdrv_op_block_all(backing_hd, parent->backing_blocker); 1303 /* Otherwise we won't be able to commit or stream */ 1304 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1305 parent->backing_blocker); 1306 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, 1307 parent->backing_blocker); 1308 /* 1309 * We do backup in 3 ways: 1310 * 1. drive backup 1311 * The target bs is new opened, and the source is top BDS 1312 * 2. blockdev backup 1313 * Both the source and the target are top BDSes. 1314 * 3. internal backup(used for block replication) 1315 * Both the source and the target are backing file 1316 * 1317 * In case 1 and 2, neither the source nor the target is the backing file. 1318 * In case 3, we will block the top BDS, so there is only one block job 1319 * for the top BDS and its backing chain. 1320 */ 1321 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, 1322 parent->backing_blocker); 1323 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, 1324 parent->backing_blocker); 1325 } 1326 1327 static void bdrv_backing_detach(BdrvChild *c) 1328 { 1329 BlockDriverState *parent = c->opaque; 1330 1331 GLOBAL_STATE_CODE(); 1332 assert(parent->backing_blocker); 1333 bdrv_op_unblock_all(c->bs, parent->backing_blocker); 1334 error_free(parent->backing_blocker); 1335 parent->backing_blocker = NULL; 1336 } 1337 1338 static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, 1339 const char *filename, Error **errp) 1340 { 1341 BlockDriverState *parent = c->opaque; 1342 bool read_only = bdrv_is_read_only(parent); 1343 int ret; 1344 GLOBAL_STATE_CODE(); 1345 1346 if (read_only) { 1347 ret = bdrv_reopen_set_read_only(parent, false, errp); 1348 if (ret < 0) { 1349 return ret; 1350 } 1351 } 1352 1353 ret = bdrv_change_backing_file(parent, filename, 1354 base->drv ? base->drv->format_name : "", 1355 false); 1356 if (ret < 0) { 1357 error_setg_errno(errp, -ret, "Could not update backing file link"); 1358 } 1359 1360 if (read_only) { 1361 bdrv_reopen_set_read_only(parent, true, NULL); 1362 } 1363 1364 return ret; 1365 } 1366 1367 /* 1368 * Returns the options and flags that a generic child of a BDS should 1369 * get, based on the given options and flags for the parent BDS. 1370 */ 1371 static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, 1372 int *child_flags, QDict *child_options, 1373 int parent_flags, QDict *parent_options) 1374 { 1375 int flags = parent_flags; 1376 GLOBAL_STATE_CODE(); 1377 1378 /* 1379 * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL. 1380 * Generally, the question to answer is: Should this child be 1381 * format-probed by default? 1382 */ 1383 1384 /* 1385 * Pure and non-filtered data children of non-format nodes should 1386 * be probed by default (even when the node itself has BDRV_O_PROTOCOL 1387 * set). This only affects a very limited set of drivers (namely 1388 * quorum and blkverify when this comment was written). 1389 * Force-clear BDRV_O_PROTOCOL then. 1390 */ 1391 if (!parent_is_format && 1392 (role & BDRV_CHILD_DATA) && 1393 !(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED))) 1394 { 1395 flags &= ~BDRV_O_PROTOCOL; 1396 } 1397 1398 /* 1399 * All children of format nodes (except for COW children) and all 1400 * metadata children in general should never be format-probed. 1401 * Force-set BDRV_O_PROTOCOL then. 1402 */ 1403 if ((parent_is_format && !(role & BDRV_CHILD_COW)) || 1404 (role & BDRV_CHILD_METADATA)) 1405 { 1406 flags |= BDRV_O_PROTOCOL; 1407 } 1408 1409 /* 1410 * If the cache mode isn't explicitly set, inherit direct and no-flush from 1411 * the parent. 1412 */ 1413 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); 1414 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); 1415 qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE); 1416 1417 if (role & BDRV_CHILD_COW) { 1418 /* backing files are opened read-only by default */ 1419 qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on"); 1420 qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off"); 1421 } else { 1422 /* Inherit the read-only option from the parent if it's not set */ 1423 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); 1424 qdict_copy_default(child_options, parent_options, 1425 BDRV_OPT_AUTO_READ_ONLY); 1426 } 1427 1428 /* 1429 * bdrv_co_pdiscard() respects unmap policy for the parent, so we 1430 * can default to enable it on lower layers regardless of the 1431 * parent option. 1432 */ 1433 qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap"); 1434 1435 /* Clear flags that only apply to the top layer */ 1436 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); 1437 1438 if (role & BDRV_CHILD_METADATA) { 1439 flags &= ~BDRV_O_NO_IO; 1440 } 1441 if (role & BDRV_CHILD_COW) { 1442 flags &= ~BDRV_O_TEMPORARY; 1443 } 1444 1445 *child_flags = flags; 1446 } 1447 1448 static void bdrv_child_cb_attach(BdrvChild *child) 1449 { 1450 BlockDriverState *bs = child->opaque; 1451 1452 assert_bdrv_graph_writable(bs); 1453 QLIST_INSERT_HEAD(&bs->children, child, next); 1454 if (bs->drv->is_filter || (child->role & BDRV_CHILD_FILTERED)) { 1455 /* 1456 * Here we handle filters and block/raw-format.c when it behave like 1457 * filter. They generally have a single PRIMARY child, which is also the 1458 * FILTERED child, and that they may have multiple more children, which 1459 * are neither PRIMARY nor FILTERED. And never we have a COW child here. 1460 * So bs->file will be the PRIMARY child, unless the PRIMARY child goes 1461 * into bs->backing on exceptional cases; and bs->backing will be 1462 * nothing else. 1463 */ 1464 assert(!(child->role & BDRV_CHILD_COW)); 1465 if (child->role & BDRV_CHILD_PRIMARY) { 1466 assert(child->role & BDRV_CHILD_FILTERED); 1467 assert(!bs->backing); 1468 assert(!bs->file); 1469 1470 if (bs->drv->filtered_child_is_backing) { 1471 bs->backing = child; 1472 } else { 1473 bs->file = child; 1474 } 1475 } else { 1476 assert(!(child->role & BDRV_CHILD_FILTERED)); 1477 } 1478 } else if (child->role & BDRV_CHILD_COW) { 1479 assert(bs->drv->supports_backing); 1480 assert(!(child->role & BDRV_CHILD_PRIMARY)); 1481 assert(!bs->backing); 1482 bs->backing = child; 1483 bdrv_backing_attach(child); 1484 } else if (child->role & BDRV_CHILD_PRIMARY) { 1485 assert(!bs->file); 1486 bs->file = child; 1487 } 1488 1489 bdrv_apply_subtree_drain(child, bs); 1490 } 1491 1492 static void bdrv_child_cb_detach(BdrvChild *child) 1493 { 1494 BlockDriverState *bs = child->opaque; 1495 1496 if (child->role & BDRV_CHILD_COW) { 1497 bdrv_backing_detach(child); 1498 } 1499 1500 bdrv_unapply_subtree_drain(child, bs); 1501 1502 assert_bdrv_graph_writable(bs); 1503 QLIST_REMOVE(child, next); 1504 if (child == bs->backing) { 1505 assert(child != bs->file); 1506 bs->backing = NULL; 1507 } else if (child == bs->file) { 1508 bs->file = NULL; 1509 } 1510 } 1511 1512 static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, 1513 const char *filename, Error **errp) 1514 { 1515 if (c->role & BDRV_CHILD_COW) { 1516 return bdrv_backing_update_filename(c, base, filename, errp); 1517 } 1518 return 0; 1519 } 1520 1521 AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c) 1522 { 1523 BlockDriverState *bs = c->opaque; 1524 IO_CODE(); 1525 1526 return bdrv_get_aio_context(bs); 1527 } 1528 1529 const BdrvChildClass child_of_bds = { 1530 .parent_is_bds = true, 1531 .get_parent_desc = bdrv_child_get_parent_desc, 1532 .inherit_options = bdrv_inherited_options, 1533 .drained_begin = bdrv_child_cb_drained_begin, 1534 .drained_poll = bdrv_child_cb_drained_poll, 1535 .drained_end = bdrv_child_cb_drained_end, 1536 .attach = bdrv_child_cb_attach, 1537 .detach = bdrv_child_cb_detach, 1538 .inactivate = bdrv_child_cb_inactivate, 1539 .change_aio_ctx = bdrv_child_cb_change_aio_ctx, 1540 .update_filename = bdrv_child_cb_update_filename, 1541 .get_parent_aio_context = child_of_bds_get_parent_aio_context, 1542 }; 1543 1544 AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) 1545 { 1546 GLOBAL_STATE_CODE(); 1547 return c->klass->get_parent_aio_context(c); 1548 } 1549 1550 static int bdrv_open_flags(BlockDriverState *bs, int flags) 1551 { 1552 int open_flags = flags; 1553 GLOBAL_STATE_CODE(); 1554 1555 /* 1556 * Clear flags that are internal to the block layer before opening the 1557 * image. 1558 */ 1559 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 1560 1561 return open_flags; 1562 } 1563 1564 static void update_flags_from_options(int *flags, QemuOpts *opts) 1565 { 1566 GLOBAL_STATE_CODE(); 1567 1568 *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY); 1569 1570 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { 1571 *flags |= BDRV_O_NO_FLUSH; 1572 } 1573 1574 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) { 1575 *flags |= BDRV_O_NOCACHE; 1576 } 1577 1578 if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) { 1579 *flags |= BDRV_O_RDWR; 1580 } 1581 1582 if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) { 1583 *flags |= BDRV_O_AUTO_RDONLY; 1584 } 1585 } 1586 1587 static void update_options_from_flags(QDict *options, int flags) 1588 { 1589 GLOBAL_STATE_CODE(); 1590 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) { 1591 qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE); 1592 } 1593 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) { 1594 qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH, 1595 flags & BDRV_O_NO_FLUSH); 1596 } 1597 if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) { 1598 qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR)); 1599 } 1600 if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) { 1601 qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY, 1602 flags & BDRV_O_AUTO_RDONLY); 1603 } 1604 } 1605 1606 static void bdrv_assign_node_name(BlockDriverState *bs, 1607 const char *node_name, 1608 Error **errp) 1609 { 1610 char *gen_node_name = NULL; 1611 GLOBAL_STATE_CODE(); 1612 1613 if (!node_name) { 1614 node_name = gen_node_name = id_generate(ID_BLOCK); 1615 } else if (!id_wellformed(node_name)) { 1616 /* 1617 * Check for empty string or invalid characters, but not if it is 1618 * generated (generated names use characters not available to the user) 1619 */ 1620 error_setg(errp, "Invalid node-name: '%s'", node_name); 1621 return; 1622 } 1623 1624 /* takes care of avoiding namespaces collisions */ 1625 if (blk_by_name(node_name)) { 1626 error_setg(errp, "node-name=%s is conflicting with a device id", 1627 node_name); 1628 goto out; 1629 } 1630 1631 /* takes care of avoiding duplicates node names */ 1632 if (bdrv_find_node(node_name)) { 1633 error_setg(errp, "Duplicate nodes with node-name='%s'", node_name); 1634 goto out; 1635 } 1636 1637 /* Make sure that the node name isn't truncated */ 1638 if (strlen(node_name) >= sizeof(bs->node_name)) { 1639 error_setg(errp, "Node name too long"); 1640 goto out; 1641 } 1642 1643 /* copy node name into the bs and insert it into the graph list */ 1644 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 1645 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 1646 out: 1647 g_free(gen_node_name); 1648 } 1649 1650 static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, 1651 const char *node_name, QDict *options, 1652 int open_flags, Error **errp) 1653 { 1654 Error *local_err = NULL; 1655 int i, ret; 1656 GLOBAL_STATE_CODE(); 1657 1658 bdrv_assign_node_name(bs, node_name, &local_err); 1659 if (local_err) { 1660 error_propagate(errp, local_err); 1661 return -EINVAL; 1662 } 1663 1664 bs->drv = drv; 1665 bs->opaque = g_malloc0(drv->instance_size); 1666 1667 if (drv->bdrv_file_open) { 1668 assert(!drv->bdrv_needs_filename || bs->filename[0]); 1669 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 1670 } else if (drv->bdrv_open) { 1671 ret = drv->bdrv_open(bs, options, open_flags, &local_err); 1672 } else { 1673 ret = 0; 1674 } 1675 1676 if (ret < 0) { 1677 if (local_err) { 1678 error_propagate(errp, local_err); 1679 } else if (bs->filename[0]) { 1680 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 1681 } else { 1682 error_setg_errno(errp, -ret, "Could not open image"); 1683 } 1684 goto open_failed; 1685 } 1686 1687 assert(!(bs->supported_read_flags & ~BDRV_REQ_MASK)); 1688 assert(!(bs->supported_write_flags & ~BDRV_REQ_MASK)); 1689 1690 /* 1691 * Always allow the BDRV_REQ_REGISTERED_BUF optimization hint. This saves 1692 * drivers that pass read/write requests through to a child the trouble of 1693 * declaring support explicitly. 1694 * 1695 * Drivers must not propagate this flag accidentally when they initiate I/O 1696 * to a bounce buffer. That case should be rare though. 1697 */ 1698 bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF; 1699 bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF; 1700 1701 ret = refresh_total_sectors(bs, bs->total_sectors); 1702 if (ret < 0) { 1703 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 1704 return ret; 1705 } 1706 1707 bdrv_refresh_limits(bs, NULL, &local_err); 1708 if (local_err) { 1709 error_propagate(errp, local_err); 1710 return -EINVAL; 1711 } 1712 1713 assert(bdrv_opt_mem_align(bs) != 0); 1714 assert(bdrv_min_mem_align(bs) != 0); 1715 assert(is_power_of_2(bs->bl.request_alignment)); 1716 1717 for (i = 0; i < bs->quiesce_counter; i++) { 1718 if (drv->bdrv_co_drain_begin) { 1719 drv->bdrv_co_drain_begin(bs); 1720 } 1721 } 1722 1723 return 0; 1724 open_failed: 1725 bs->drv = NULL; 1726 if (bs->file != NULL) { 1727 bdrv_unref_child(bs, bs->file); 1728 assert(!bs->file); 1729 } 1730 g_free(bs->opaque); 1731 bs->opaque = NULL; 1732 return ret; 1733 } 1734 1735 /* 1736 * Create and open a block node. 1737 * 1738 * @options is a QDict of options to pass to the block drivers, or NULL for an 1739 * empty set of options. The reference to the QDict belongs to the block layer 1740 * after the call (even on failure), so if the caller intends to reuse the 1741 * dictionary, it needs to use qobject_ref() before calling bdrv_open. 1742 */ 1743 BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, 1744 const char *node_name, 1745 QDict *options, int flags, 1746 Error **errp) 1747 { 1748 BlockDriverState *bs; 1749 int ret; 1750 1751 GLOBAL_STATE_CODE(); 1752 1753 bs = bdrv_new(); 1754 bs->open_flags = flags; 1755 bs->options = options ?: qdict_new(); 1756 bs->explicit_options = qdict_clone_shallow(bs->options); 1757 bs->opaque = NULL; 1758 1759 update_options_from_flags(bs->options, flags); 1760 1761 ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp); 1762 if (ret < 0) { 1763 qobject_unref(bs->explicit_options); 1764 bs->explicit_options = NULL; 1765 qobject_unref(bs->options); 1766 bs->options = NULL; 1767 bdrv_unref(bs); 1768 return NULL; 1769 } 1770 1771 return bs; 1772 } 1773 1774 /* Create and open a block node. */ 1775 BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, 1776 int flags, Error **errp) 1777 { 1778 GLOBAL_STATE_CODE(); 1779 return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp); 1780 } 1781 1782 QemuOptsList bdrv_runtime_opts = { 1783 .name = "bdrv_common", 1784 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), 1785 .desc = { 1786 { 1787 .name = "node-name", 1788 .type = QEMU_OPT_STRING, 1789 .help = "Node name of the block device node", 1790 }, 1791 { 1792 .name = "driver", 1793 .type = QEMU_OPT_STRING, 1794 .help = "Block driver to use for the node", 1795 }, 1796 { 1797 .name = BDRV_OPT_CACHE_DIRECT, 1798 .type = QEMU_OPT_BOOL, 1799 .help = "Bypass software writeback cache on the host", 1800 }, 1801 { 1802 .name = BDRV_OPT_CACHE_NO_FLUSH, 1803 .type = QEMU_OPT_BOOL, 1804 .help = "Ignore flush requests", 1805 }, 1806 { 1807 .name = BDRV_OPT_READ_ONLY, 1808 .type = QEMU_OPT_BOOL, 1809 .help = "Node is opened in read-only mode", 1810 }, 1811 { 1812 .name = BDRV_OPT_AUTO_READ_ONLY, 1813 .type = QEMU_OPT_BOOL, 1814 .help = "Node can become read-only if opening read-write fails", 1815 }, 1816 { 1817 .name = "detect-zeroes", 1818 .type = QEMU_OPT_STRING, 1819 .help = "try to optimize zero writes (off, on, unmap)", 1820 }, 1821 { 1822 .name = BDRV_OPT_DISCARD, 1823 .type = QEMU_OPT_STRING, 1824 .help = "discard operation (ignore/off, unmap/on)", 1825 }, 1826 { 1827 .name = BDRV_OPT_FORCE_SHARE, 1828 .type = QEMU_OPT_BOOL, 1829 .help = "always accept other writers (default: off)", 1830 }, 1831 { /* end of list */ } 1832 }, 1833 }; 1834 1835 QemuOptsList bdrv_create_opts_simple = { 1836 .name = "simple-create-opts", 1837 .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head), 1838 .desc = { 1839 { 1840 .name = BLOCK_OPT_SIZE, 1841 .type = QEMU_OPT_SIZE, 1842 .help = "Virtual disk size" 1843 }, 1844 { 1845 .name = BLOCK_OPT_PREALLOC, 1846 .type = QEMU_OPT_STRING, 1847 .help = "Preallocation mode (allowed values: off)" 1848 }, 1849 { /* end of list */ } 1850 } 1851 }; 1852 1853 /* 1854 * Common part for opening disk images and files 1855 * 1856 * Removes all processed options from *options. 1857 */ 1858 static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, 1859 QDict *options, Error **errp) 1860 { 1861 int ret, open_flags; 1862 const char *filename; 1863 const char *driver_name = NULL; 1864 const char *node_name = NULL; 1865 const char *discard; 1866 QemuOpts *opts; 1867 BlockDriver *drv; 1868 Error *local_err = NULL; 1869 bool ro; 1870 1871 assert(bs->file == NULL); 1872 assert(options != NULL && bs->options != options); 1873 GLOBAL_STATE_CODE(); 1874 1875 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 1876 if (!qemu_opts_absorb_qdict(opts, options, errp)) { 1877 ret = -EINVAL; 1878 goto fail_opts; 1879 } 1880 1881 update_flags_from_options(&bs->open_flags, opts); 1882 1883 driver_name = qemu_opt_get(opts, "driver"); 1884 drv = bdrv_find_format(driver_name); 1885 assert(drv != NULL); 1886 1887 bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false); 1888 1889 if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) { 1890 error_setg(errp, 1891 BDRV_OPT_FORCE_SHARE 1892 "=on can only be used with read-only images"); 1893 ret = -EINVAL; 1894 goto fail_opts; 1895 } 1896 1897 if (file != NULL) { 1898 bdrv_refresh_filename(blk_bs(file)); 1899 filename = blk_bs(file)->filename; 1900 } else { 1901 /* 1902 * Caution: while qdict_get_try_str() is fine, getting 1903 * non-string types would require more care. When @options 1904 * come from -blockdev or blockdev_add, its members are typed 1905 * according to the QAPI schema, but when they come from 1906 * -drive, they're all QString. 1907 */ 1908 filename = qdict_get_try_str(options, "filename"); 1909 } 1910 1911 if (drv->bdrv_needs_filename && (!filename || !filename[0])) { 1912 error_setg(errp, "The '%s' block driver requires a file name", 1913 drv->format_name); 1914 ret = -EINVAL; 1915 goto fail_opts; 1916 } 1917 1918 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags, 1919 drv->format_name); 1920 1921 ro = bdrv_is_read_only(bs); 1922 1923 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) { 1924 if (!ro && bdrv_is_whitelisted(drv, true)) { 1925 ret = bdrv_apply_auto_read_only(bs, NULL, NULL); 1926 } else { 1927 ret = -ENOTSUP; 1928 } 1929 if (ret < 0) { 1930 error_setg(errp, 1931 !ro && bdrv_is_whitelisted(drv, true) 1932 ? "Driver '%s' can only be used for read-only devices" 1933 : "Driver '%s' is not whitelisted", 1934 drv->format_name); 1935 goto fail_opts; 1936 } 1937 } 1938 1939 /* bdrv_new() and bdrv_close() make it so */ 1940 assert(qatomic_read(&bs->copy_on_read) == 0); 1941 1942 if (bs->open_flags & BDRV_O_COPY_ON_READ) { 1943 if (!ro) { 1944 bdrv_enable_copy_on_read(bs); 1945 } else { 1946 error_setg(errp, "Can't use copy-on-read on read-only device"); 1947 ret = -EINVAL; 1948 goto fail_opts; 1949 } 1950 } 1951 1952 discard = qemu_opt_get(opts, BDRV_OPT_DISCARD); 1953 if (discard != NULL) { 1954 if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) { 1955 error_setg(errp, "Invalid discard option"); 1956 ret = -EINVAL; 1957 goto fail_opts; 1958 } 1959 } 1960 1961 bs->detect_zeroes = 1962 bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err); 1963 if (local_err) { 1964 error_propagate(errp, local_err); 1965 ret = -EINVAL; 1966 goto fail_opts; 1967 } 1968 1969 if (filename != NULL) { 1970 pstrcpy(bs->filename, sizeof(bs->filename), filename); 1971 } else { 1972 bs->filename[0] = '\0'; 1973 } 1974 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 1975 1976 /* Open the image, either directly or using a protocol */ 1977 open_flags = bdrv_open_flags(bs, bs->open_flags); 1978 node_name = qemu_opt_get(opts, "node-name"); 1979 1980 assert(!drv->bdrv_file_open || file == NULL); 1981 ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp); 1982 if (ret < 0) { 1983 goto fail_opts; 1984 } 1985 1986 qemu_opts_del(opts); 1987 return 0; 1988 1989 fail_opts: 1990 qemu_opts_del(opts); 1991 return ret; 1992 } 1993 1994 static QDict *parse_json_filename(const char *filename, Error **errp) 1995 { 1996 QObject *options_obj; 1997 QDict *options; 1998 int ret; 1999 GLOBAL_STATE_CODE(); 2000 2001 ret = strstart(filename, "json:", &filename); 2002 assert(ret); 2003 2004 options_obj = qobject_from_json(filename, errp); 2005 if (!options_obj) { 2006 error_prepend(errp, "Could not parse the JSON options: "); 2007 return NULL; 2008 } 2009 2010 options = qobject_to(QDict, options_obj); 2011 if (!options) { 2012 qobject_unref(options_obj); 2013 error_setg(errp, "Invalid JSON object given"); 2014 return NULL; 2015 } 2016 2017 qdict_flatten(options); 2018 2019 return options; 2020 } 2021 2022 static void parse_json_protocol(QDict *options, const char **pfilename, 2023 Error **errp) 2024 { 2025 QDict *json_options; 2026 Error *local_err = NULL; 2027 GLOBAL_STATE_CODE(); 2028 2029 /* Parse json: pseudo-protocol */ 2030 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) { 2031 return; 2032 } 2033 2034 json_options = parse_json_filename(*pfilename, &local_err); 2035 if (local_err) { 2036 error_propagate(errp, local_err); 2037 return; 2038 } 2039 2040 /* Options given in the filename have lower priority than options 2041 * specified directly */ 2042 qdict_join(options, json_options, false); 2043 qobject_unref(json_options); 2044 *pfilename = NULL; 2045 } 2046 2047 /* 2048 * Fills in default options for opening images and converts the legacy 2049 * filename/flags pair to option QDict entries. 2050 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a 2051 * block driver has been specified explicitly. 2052 */ 2053 static int bdrv_fill_options(QDict **options, const char *filename, 2054 int *flags, Error **errp) 2055 { 2056 const char *drvname; 2057 bool protocol = *flags & BDRV_O_PROTOCOL; 2058 bool parse_filename = false; 2059 BlockDriver *drv = NULL; 2060 Error *local_err = NULL; 2061 2062 GLOBAL_STATE_CODE(); 2063 2064 /* 2065 * Caution: while qdict_get_try_str() is fine, getting non-string 2066 * types would require more care. When @options come from 2067 * -blockdev or blockdev_add, its members are typed according to 2068 * the QAPI schema, but when they come from -drive, they're all 2069 * QString. 2070 */ 2071 drvname = qdict_get_try_str(*options, "driver"); 2072 if (drvname) { 2073 drv = bdrv_find_format(drvname); 2074 if (!drv) { 2075 error_setg(errp, "Unknown driver '%s'", drvname); 2076 return -ENOENT; 2077 } 2078 /* If the user has explicitly specified the driver, this choice should 2079 * override the BDRV_O_PROTOCOL flag */ 2080 protocol = drv->bdrv_file_open; 2081 } 2082 2083 if (protocol) { 2084 *flags |= BDRV_O_PROTOCOL; 2085 } else { 2086 *flags &= ~BDRV_O_PROTOCOL; 2087 } 2088 2089 /* Translate cache options from flags into options */ 2090 update_options_from_flags(*options, *flags); 2091 2092 /* Fetch the file name from the options QDict if necessary */ 2093 if (protocol && filename) { 2094 if (!qdict_haskey(*options, "filename")) { 2095 qdict_put_str(*options, "filename", filename); 2096 parse_filename = true; 2097 } else { 2098 error_setg(errp, "Can't specify 'file' and 'filename' options at " 2099 "the same time"); 2100 return -EINVAL; 2101 } 2102 } 2103 2104 /* Find the right block driver */ 2105 /* See cautionary note on accessing @options above */ 2106 filename = qdict_get_try_str(*options, "filename"); 2107 2108 if (!drvname && protocol) { 2109 if (filename) { 2110 drv = bdrv_find_protocol(filename, parse_filename, errp); 2111 if (!drv) { 2112 return -EINVAL; 2113 } 2114 2115 drvname = drv->format_name; 2116 qdict_put_str(*options, "driver", drvname); 2117 } else { 2118 error_setg(errp, "Must specify either driver or file"); 2119 return -EINVAL; 2120 } 2121 } 2122 2123 assert(drv || !protocol); 2124 2125 /* Driver-specific filename parsing */ 2126 if (drv && drv->bdrv_parse_filename && parse_filename) { 2127 drv->bdrv_parse_filename(filename, *options, &local_err); 2128 if (local_err) { 2129 error_propagate(errp, local_err); 2130 return -EINVAL; 2131 } 2132 2133 if (!drv->bdrv_needs_filename) { 2134 qdict_del(*options, "filename"); 2135 } 2136 } 2137 2138 return 0; 2139 } 2140 2141 typedef struct BlockReopenQueueEntry { 2142 bool prepared; 2143 bool perms_checked; 2144 BDRVReopenState state; 2145 QTAILQ_ENTRY(BlockReopenQueueEntry) entry; 2146 } BlockReopenQueueEntry; 2147 2148 /* 2149 * Return the flags that @bs will have after the reopens in @q have 2150 * successfully completed. If @q is NULL (or @bs is not contained in @q), 2151 * return the current flags. 2152 */ 2153 static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs) 2154 { 2155 BlockReopenQueueEntry *entry; 2156 2157 if (q != NULL) { 2158 QTAILQ_FOREACH(entry, q, entry) { 2159 if (entry->state.bs == bs) { 2160 return entry->state.flags; 2161 } 2162 } 2163 } 2164 2165 return bs->open_flags; 2166 } 2167 2168 /* Returns whether the image file can be written to after the reopen queue @q 2169 * has been successfully applied, or right now if @q is NULL. */ 2170 static bool bdrv_is_writable_after_reopen(BlockDriverState *bs, 2171 BlockReopenQueue *q) 2172 { 2173 int flags = bdrv_reopen_get_flags(q, bs); 2174 2175 return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR; 2176 } 2177 2178 /* 2179 * Return whether the BDS can be written to. This is not necessarily 2180 * the same as !bdrv_is_read_only(bs), as inactivated images may not 2181 * be written to but do not count as read-only images. 2182 */ 2183 bool bdrv_is_writable(BlockDriverState *bs) 2184 { 2185 IO_CODE(); 2186 return bdrv_is_writable_after_reopen(bs, NULL); 2187 } 2188 2189 static char *bdrv_child_user_desc(BdrvChild *c) 2190 { 2191 GLOBAL_STATE_CODE(); 2192 return c->klass->get_parent_desc(c); 2193 } 2194 2195 /* 2196 * Check that @a allows everything that @b needs. @a and @b must reference same 2197 * child node. 2198 */ 2199 static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) 2200 { 2201 const char *child_bs_name; 2202 g_autofree char *a_user = NULL; 2203 g_autofree char *b_user = NULL; 2204 g_autofree char *perms = NULL; 2205 2206 assert(a->bs); 2207 assert(a->bs == b->bs); 2208 GLOBAL_STATE_CODE(); 2209 2210 if ((b->perm & a->shared_perm) == b->perm) { 2211 return true; 2212 } 2213 2214 child_bs_name = bdrv_get_node_name(b->bs); 2215 a_user = bdrv_child_user_desc(a); 2216 b_user = bdrv_child_user_desc(b); 2217 perms = bdrv_perm_names(b->perm & ~a->shared_perm); 2218 2219 error_setg(errp, "Permission conflict on node '%s': permissions '%s' are " 2220 "both required by %s (uses node '%s' as '%s' child) and " 2221 "unshared by %s (uses node '%s' as '%s' child).", 2222 child_bs_name, perms, 2223 b_user, child_bs_name, b->name, 2224 a_user, child_bs_name, a->name); 2225 2226 return false; 2227 } 2228 2229 static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp) 2230 { 2231 BdrvChild *a, *b; 2232 GLOBAL_STATE_CODE(); 2233 2234 /* 2235 * During the loop we'll look at each pair twice. That's correct because 2236 * bdrv_a_allow_b() is asymmetric and we should check each pair in both 2237 * directions. 2238 */ 2239 QLIST_FOREACH(a, &bs->parents, next_parent) { 2240 QLIST_FOREACH(b, &bs->parents, next_parent) { 2241 if (a == b) { 2242 continue; 2243 } 2244 2245 if (!bdrv_a_allow_b(a, b, errp)) { 2246 return true; 2247 } 2248 } 2249 } 2250 2251 return false; 2252 } 2253 2254 static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, 2255 BdrvChild *c, BdrvChildRole role, 2256 BlockReopenQueue *reopen_queue, 2257 uint64_t parent_perm, uint64_t parent_shared, 2258 uint64_t *nperm, uint64_t *nshared) 2259 { 2260 assert(bs->drv && bs->drv->bdrv_child_perm); 2261 GLOBAL_STATE_CODE(); 2262 bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, 2263 parent_perm, parent_shared, 2264 nperm, nshared); 2265 /* TODO Take force_share from reopen_queue */ 2266 if (child_bs && child_bs->force_share) { 2267 *nshared = BLK_PERM_ALL; 2268 } 2269 } 2270 2271 /* 2272 * Adds the whole subtree of @bs (including @bs itself) to the @list (except for 2273 * nodes that are already in the @list, of course) so that final list is 2274 * topologically sorted. Return the result (GSList @list object is updated, so 2275 * don't use old reference after function call). 2276 * 2277 * On function start @list must be already topologically sorted and for any node 2278 * in the @list the whole subtree of the node must be in the @list as well. The 2279 * simplest way to satisfy this criteria: use only result of 2280 * bdrv_topological_dfs() or NULL as @list parameter. 2281 */ 2282 static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found, 2283 BlockDriverState *bs) 2284 { 2285 BdrvChild *child; 2286 g_autoptr(GHashTable) local_found = NULL; 2287 2288 GLOBAL_STATE_CODE(); 2289 2290 if (!found) { 2291 assert(!list); 2292 found = local_found = g_hash_table_new(NULL, NULL); 2293 } 2294 2295 if (g_hash_table_contains(found, bs)) { 2296 return list; 2297 } 2298 g_hash_table_add(found, bs); 2299 2300 QLIST_FOREACH(child, &bs->children, next) { 2301 list = bdrv_topological_dfs(list, found, child->bs); 2302 } 2303 2304 return g_slist_prepend(list, bs); 2305 } 2306 2307 typedef struct BdrvChildSetPermState { 2308 BdrvChild *child; 2309 uint64_t old_perm; 2310 uint64_t old_shared_perm; 2311 } BdrvChildSetPermState; 2312 2313 static void bdrv_child_set_perm_abort(void *opaque) 2314 { 2315 BdrvChildSetPermState *s = opaque; 2316 2317 GLOBAL_STATE_CODE(); 2318 2319 s->child->perm = s->old_perm; 2320 s->child->shared_perm = s->old_shared_perm; 2321 } 2322 2323 static TransactionActionDrv bdrv_child_set_pem_drv = { 2324 .abort = bdrv_child_set_perm_abort, 2325 .clean = g_free, 2326 }; 2327 2328 static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, 2329 uint64_t shared, Transaction *tran) 2330 { 2331 BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1); 2332 GLOBAL_STATE_CODE(); 2333 2334 *s = (BdrvChildSetPermState) { 2335 .child = c, 2336 .old_perm = c->perm, 2337 .old_shared_perm = c->shared_perm, 2338 }; 2339 2340 c->perm = perm; 2341 c->shared_perm = shared; 2342 2343 tran_add(tran, &bdrv_child_set_pem_drv, s); 2344 } 2345 2346 static void bdrv_drv_set_perm_commit(void *opaque) 2347 { 2348 BlockDriverState *bs = opaque; 2349 uint64_t cumulative_perms, cumulative_shared_perms; 2350 GLOBAL_STATE_CODE(); 2351 2352 if (bs->drv->bdrv_set_perm) { 2353 bdrv_get_cumulative_perm(bs, &cumulative_perms, 2354 &cumulative_shared_perms); 2355 bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms); 2356 } 2357 } 2358 2359 static void bdrv_drv_set_perm_abort(void *opaque) 2360 { 2361 BlockDriverState *bs = opaque; 2362 GLOBAL_STATE_CODE(); 2363 2364 if (bs->drv->bdrv_abort_perm_update) { 2365 bs->drv->bdrv_abort_perm_update(bs); 2366 } 2367 } 2368 2369 TransactionActionDrv bdrv_drv_set_perm_drv = { 2370 .abort = bdrv_drv_set_perm_abort, 2371 .commit = bdrv_drv_set_perm_commit, 2372 }; 2373 2374 static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, 2375 uint64_t shared_perm, Transaction *tran, 2376 Error **errp) 2377 { 2378 GLOBAL_STATE_CODE(); 2379 if (!bs->drv) { 2380 return 0; 2381 } 2382 2383 if (bs->drv->bdrv_check_perm) { 2384 int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp); 2385 if (ret < 0) { 2386 return ret; 2387 } 2388 } 2389 2390 if (tran) { 2391 tran_add(tran, &bdrv_drv_set_perm_drv, bs); 2392 } 2393 2394 return 0; 2395 } 2396 2397 typedef struct BdrvReplaceChildState { 2398 BdrvChild *child; 2399 BlockDriverState *old_bs; 2400 } BdrvReplaceChildState; 2401 2402 static void bdrv_replace_child_commit(void *opaque) 2403 { 2404 BdrvReplaceChildState *s = opaque; 2405 GLOBAL_STATE_CODE(); 2406 2407 bdrv_unref(s->old_bs); 2408 } 2409 2410 static void bdrv_replace_child_abort(void *opaque) 2411 { 2412 BdrvReplaceChildState *s = opaque; 2413 BlockDriverState *new_bs = s->child->bs; 2414 2415 GLOBAL_STATE_CODE(); 2416 /* old_bs reference is transparently moved from @s to @s->child */ 2417 bdrv_replace_child_noperm(s->child, s->old_bs); 2418 bdrv_unref(new_bs); 2419 } 2420 2421 static TransactionActionDrv bdrv_replace_child_drv = { 2422 .commit = bdrv_replace_child_commit, 2423 .abort = bdrv_replace_child_abort, 2424 .clean = g_free, 2425 }; 2426 2427 /* 2428 * bdrv_replace_child_tran 2429 * 2430 * Note: real unref of old_bs is done only on commit. 2431 * 2432 * The function doesn't update permissions, caller is responsible for this. 2433 */ 2434 static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, 2435 Transaction *tran) 2436 { 2437 BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); 2438 *s = (BdrvReplaceChildState) { 2439 .child = child, 2440 .old_bs = child->bs, 2441 }; 2442 tran_add(tran, &bdrv_replace_child_drv, s); 2443 2444 if (new_bs) { 2445 bdrv_ref(new_bs); 2446 } 2447 bdrv_replace_child_noperm(child, new_bs); 2448 /* old_bs reference is transparently moved from @child to @s */ 2449 } 2450 2451 /* 2452 * Refresh permissions in @bs subtree. The function is intended to be called 2453 * after some graph modification that was done without permission update. 2454 */ 2455 static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q, 2456 Transaction *tran, Error **errp) 2457 { 2458 BlockDriver *drv = bs->drv; 2459 BdrvChild *c; 2460 int ret; 2461 uint64_t cumulative_perms, cumulative_shared_perms; 2462 GLOBAL_STATE_CODE(); 2463 2464 bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms); 2465 2466 /* Write permissions never work with read-only images */ 2467 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && 2468 !bdrv_is_writable_after_reopen(bs, q)) 2469 { 2470 if (!bdrv_is_writable_after_reopen(bs, NULL)) { 2471 error_setg(errp, "Block node is read-only"); 2472 } else { 2473 error_setg(errp, "Read-only block node '%s' cannot support " 2474 "read-write users", bdrv_get_node_name(bs)); 2475 } 2476 2477 return -EPERM; 2478 } 2479 2480 /* 2481 * Unaligned requests will automatically be aligned to bl.request_alignment 2482 * and without RESIZE we can't extend requests to write to space beyond the 2483 * end of the image, so it's required that the image size is aligned. 2484 */ 2485 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && 2486 !(cumulative_perms & BLK_PERM_RESIZE)) 2487 { 2488 if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) { 2489 error_setg(errp, "Cannot get 'write' permission without 'resize': " 2490 "Image size is not a multiple of request " 2491 "alignment"); 2492 return -EPERM; 2493 } 2494 } 2495 2496 /* Check this node */ 2497 if (!drv) { 2498 return 0; 2499 } 2500 2501 ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran, 2502 errp); 2503 if (ret < 0) { 2504 return ret; 2505 } 2506 2507 /* Drivers that never have children can omit .bdrv_child_perm() */ 2508 if (!drv->bdrv_child_perm) { 2509 assert(QLIST_EMPTY(&bs->children)); 2510 return 0; 2511 } 2512 2513 /* Check all children */ 2514 QLIST_FOREACH(c, &bs->children, next) { 2515 uint64_t cur_perm, cur_shared; 2516 2517 bdrv_child_perm(bs, c->bs, c, c->role, q, 2518 cumulative_perms, cumulative_shared_perms, 2519 &cur_perm, &cur_shared); 2520 bdrv_child_set_perm(c, cur_perm, cur_shared, tran); 2521 } 2522 2523 return 0; 2524 } 2525 2526 static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, 2527 Transaction *tran, Error **errp) 2528 { 2529 int ret; 2530 BlockDriverState *bs; 2531 GLOBAL_STATE_CODE(); 2532 2533 for ( ; list; list = list->next) { 2534 bs = list->data; 2535 2536 if (bdrv_parent_perms_conflict(bs, errp)) { 2537 return -EINVAL; 2538 } 2539 2540 ret = bdrv_node_refresh_perm(bs, q, tran, errp); 2541 if (ret < 0) { 2542 return ret; 2543 } 2544 } 2545 2546 return 0; 2547 } 2548 2549 void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, 2550 uint64_t *shared_perm) 2551 { 2552 BdrvChild *c; 2553 uint64_t cumulative_perms = 0; 2554 uint64_t cumulative_shared_perms = BLK_PERM_ALL; 2555 2556 GLOBAL_STATE_CODE(); 2557 2558 QLIST_FOREACH(c, &bs->parents, next_parent) { 2559 cumulative_perms |= c->perm; 2560 cumulative_shared_perms &= c->shared_perm; 2561 } 2562 2563 *perm = cumulative_perms; 2564 *shared_perm = cumulative_shared_perms; 2565 } 2566 2567 char *bdrv_perm_names(uint64_t perm) 2568 { 2569 struct perm_name { 2570 uint64_t perm; 2571 const char *name; 2572 } permissions[] = { 2573 { BLK_PERM_CONSISTENT_READ, "consistent read" }, 2574 { BLK_PERM_WRITE, "write" }, 2575 { BLK_PERM_WRITE_UNCHANGED, "write unchanged" }, 2576 { BLK_PERM_RESIZE, "resize" }, 2577 { 0, NULL } 2578 }; 2579 2580 GString *result = g_string_sized_new(30); 2581 struct perm_name *p; 2582 2583 for (p = permissions; p->name; p++) { 2584 if (perm & p->perm) { 2585 if (result->len > 0) { 2586 g_string_append(result, ", "); 2587 } 2588 g_string_append(result, p->name); 2589 } 2590 } 2591 2592 return g_string_free(result, FALSE); 2593 } 2594 2595 2596 static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp) 2597 { 2598 int ret; 2599 Transaction *tran = tran_new(); 2600 g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs); 2601 GLOBAL_STATE_CODE(); 2602 2603 ret = bdrv_list_refresh_perms(list, NULL, tran, errp); 2604 tran_finalize(tran, ret); 2605 2606 return ret; 2607 } 2608 2609 int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, 2610 Error **errp) 2611 { 2612 Error *local_err = NULL; 2613 Transaction *tran = tran_new(); 2614 int ret; 2615 2616 GLOBAL_STATE_CODE(); 2617 2618 bdrv_child_set_perm(c, perm, shared, tran); 2619 2620 ret = bdrv_refresh_perms(c->bs, &local_err); 2621 2622 tran_finalize(tran, ret); 2623 2624 if (ret < 0) { 2625 if ((perm & ~c->perm) || (c->shared_perm & ~shared)) { 2626 /* tighten permissions */ 2627 error_propagate(errp, local_err); 2628 } else { 2629 /* 2630 * Our caller may intend to only loosen restrictions and 2631 * does not expect this function to fail. Errors are not 2632 * fatal in such a case, so we can just hide them from our 2633 * caller. 2634 */ 2635 error_free(local_err); 2636 ret = 0; 2637 } 2638 } 2639 2640 return ret; 2641 } 2642 2643 int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) 2644 { 2645 uint64_t parent_perms, parent_shared; 2646 uint64_t perms, shared; 2647 2648 GLOBAL_STATE_CODE(); 2649 2650 bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared); 2651 bdrv_child_perm(bs, c->bs, c, c->role, NULL, 2652 parent_perms, parent_shared, &perms, &shared); 2653 2654 return bdrv_child_try_set_perm(c, perms, shared, errp); 2655 } 2656 2657 /* 2658 * Default implementation for .bdrv_child_perm() for block filters: 2659 * Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the 2660 * filtered child. 2661 */ 2662 static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, 2663 BdrvChildRole role, 2664 BlockReopenQueue *reopen_queue, 2665 uint64_t perm, uint64_t shared, 2666 uint64_t *nperm, uint64_t *nshared) 2667 { 2668 GLOBAL_STATE_CODE(); 2669 *nperm = perm & DEFAULT_PERM_PASSTHROUGH; 2670 *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; 2671 } 2672 2673 static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c, 2674 BdrvChildRole role, 2675 BlockReopenQueue *reopen_queue, 2676 uint64_t perm, uint64_t shared, 2677 uint64_t *nperm, uint64_t *nshared) 2678 { 2679 assert(role & BDRV_CHILD_COW); 2680 GLOBAL_STATE_CODE(); 2681 2682 /* 2683 * We want consistent read from backing files if the parent needs it. 2684 * No other operations are performed on backing files. 2685 */ 2686 perm &= BLK_PERM_CONSISTENT_READ; 2687 2688 /* 2689 * If the parent can deal with changing data, we're okay with a 2690 * writable and resizable backing file. 2691 * TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? 2692 */ 2693 if (shared & BLK_PERM_WRITE) { 2694 shared = BLK_PERM_WRITE | BLK_PERM_RESIZE; 2695 } else { 2696 shared = 0; 2697 } 2698 2699 shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED; 2700 2701 if (bs->open_flags & BDRV_O_INACTIVE) { 2702 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2703 } 2704 2705 *nperm = perm; 2706 *nshared = shared; 2707 } 2708 2709 static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c, 2710 BdrvChildRole role, 2711 BlockReopenQueue *reopen_queue, 2712 uint64_t perm, uint64_t shared, 2713 uint64_t *nperm, uint64_t *nshared) 2714 { 2715 int flags; 2716 2717 GLOBAL_STATE_CODE(); 2718 assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)); 2719 2720 flags = bdrv_reopen_get_flags(reopen_queue, bs); 2721 2722 /* 2723 * Apart from the modifications below, the same permissions are 2724 * forwarded and left alone as for filters 2725 */ 2726 bdrv_filter_default_perms(bs, c, role, reopen_queue, 2727 perm, shared, &perm, &shared); 2728 2729 if (role & BDRV_CHILD_METADATA) { 2730 /* Format drivers may touch metadata even if the guest doesn't write */ 2731 if (bdrv_is_writable_after_reopen(bs, reopen_queue)) { 2732 perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2733 } 2734 2735 /* 2736 * bs->file always needs to be consistent because of the 2737 * metadata. We can never allow other users to resize or write 2738 * to it. 2739 */ 2740 if (!(flags & BDRV_O_NO_IO)) { 2741 perm |= BLK_PERM_CONSISTENT_READ; 2742 } 2743 shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); 2744 } 2745 2746 if (role & BDRV_CHILD_DATA) { 2747 /* 2748 * Technically, everything in this block is a subset of the 2749 * BDRV_CHILD_METADATA path taken above, and so this could 2750 * be an "else if" branch. However, that is not obvious, and 2751 * this function is not performance critical, therefore we let 2752 * this be an independent "if". 2753 */ 2754 2755 /* 2756 * We cannot allow other users to resize the file because the 2757 * format driver might have some assumptions about the size 2758 * (e.g. because it is stored in metadata, or because the file 2759 * is split into fixed-size data files). 2760 */ 2761 shared &= ~BLK_PERM_RESIZE; 2762 2763 /* 2764 * WRITE_UNCHANGED often cannot be performed as such on the 2765 * data file. For example, the qcow2 driver may still need to 2766 * write copied clusters on copy-on-read. 2767 */ 2768 if (perm & BLK_PERM_WRITE_UNCHANGED) { 2769 perm |= BLK_PERM_WRITE; 2770 } 2771 2772 /* 2773 * If the data file is written to, the format driver may 2774 * expect to be able to resize it by writing beyond the EOF. 2775 */ 2776 if (perm & BLK_PERM_WRITE) { 2777 perm |= BLK_PERM_RESIZE; 2778 } 2779 } 2780 2781 if (bs->open_flags & BDRV_O_INACTIVE) { 2782 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2783 } 2784 2785 *nperm = perm; 2786 *nshared = shared; 2787 } 2788 2789 void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, 2790 BdrvChildRole role, BlockReopenQueue *reopen_queue, 2791 uint64_t perm, uint64_t shared, 2792 uint64_t *nperm, uint64_t *nshared) 2793 { 2794 GLOBAL_STATE_CODE(); 2795 if (role & BDRV_CHILD_FILTERED) { 2796 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | 2797 BDRV_CHILD_COW))); 2798 bdrv_filter_default_perms(bs, c, role, reopen_queue, 2799 perm, shared, nperm, nshared); 2800 } else if (role & BDRV_CHILD_COW) { 2801 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA))); 2802 bdrv_default_perms_for_cow(bs, c, role, reopen_queue, 2803 perm, shared, nperm, nshared); 2804 } else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) { 2805 bdrv_default_perms_for_storage(bs, c, role, reopen_queue, 2806 perm, shared, nperm, nshared); 2807 } else { 2808 g_assert_not_reached(); 2809 } 2810 } 2811 2812 uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) 2813 { 2814 static const uint64_t permissions[] = { 2815 [BLOCK_PERMISSION_CONSISTENT_READ] = BLK_PERM_CONSISTENT_READ, 2816 [BLOCK_PERMISSION_WRITE] = BLK_PERM_WRITE, 2817 [BLOCK_PERMISSION_WRITE_UNCHANGED] = BLK_PERM_WRITE_UNCHANGED, 2818 [BLOCK_PERMISSION_RESIZE] = BLK_PERM_RESIZE, 2819 }; 2820 2821 QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX); 2822 QEMU_BUILD_BUG_ON(1UL << ARRAY_SIZE(permissions) != BLK_PERM_ALL + 1); 2823 2824 assert(qapi_perm < BLOCK_PERMISSION__MAX); 2825 2826 return permissions[qapi_perm]; 2827 } 2828 2829 static void bdrv_replace_child_noperm(BdrvChild *child, 2830 BlockDriverState *new_bs) 2831 { 2832 BlockDriverState *old_bs = child->bs; 2833 int new_bs_quiesce_counter; 2834 int drain_saldo; 2835 2836 assert(!child->frozen); 2837 assert(old_bs != new_bs); 2838 GLOBAL_STATE_CODE(); 2839 2840 if (old_bs && new_bs) { 2841 assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); 2842 } 2843 2844 new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); 2845 drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; 2846 2847 /* 2848 * If the new child node is drained but the old one was not, flush 2849 * all outstanding requests to the old child node. 2850 */ 2851 while (drain_saldo > 0 && child->klass->drained_begin) { 2852 bdrv_parent_drained_begin_single(child, true); 2853 drain_saldo--; 2854 } 2855 2856 if (old_bs) { 2857 /* Detach first so that the recursive drain sections coming from @child 2858 * are already gone and we only end the drain sections that came from 2859 * elsewhere. */ 2860 if (child->klass->detach) { 2861 child->klass->detach(child); 2862 } 2863 assert_bdrv_graph_writable(old_bs); 2864 QLIST_REMOVE(child, next_parent); 2865 } 2866 2867 child->bs = new_bs; 2868 2869 if (new_bs) { 2870 assert_bdrv_graph_writable(new_bs); 2871 QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); 2872 2873 /* 2874 * Detaching the old node may have led to the new node's 2875 * quiesce_counter having been decreased. Not a problem, we 2876 * just need to recognize this here and then invoke 2877 * drained_end appropriately more often. 2878 */ 2879 assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); 2880 drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; 2881 2882 /* Attach only after starting new drained sections, so that recursive 2883 * drain sections coming from @child don't get an extra .drained_begin 2884 * callback. */ 2885 if (child->klass->attach) { 2886 child->klass->attach(child); 2887 } 2888 } 2889 2890 /* 2891 * If the old child node was drained but the new one is not, allow 2892 * requests to come in only after the new node has been attached. 2893 */ 2894 while (drain_saldo < 0 && child->klass->drained_end) { 2895 bdrv_parent_drained_end_single(child); 2896 drain_saldo++; 2897 } 2898 } 2899 2900 /** 2901 * Free the given @child. 2902 * 2903 * The child must be empty (i.e. `child->bs == NULL`) and it must be 2904 * unused (i.e. not in a children list). 2905 */ 2906 static void bdrv_child_free(BdrvChild *child) 2907 { 2908 assert(!child->bs); 2909 GLOBAL_STATE_CODE(); 2910 assert(!child->next.le_prev); /* not in children list */ 2911 2912 g_free(child->name); 2913 g_free(child); 2914 } 2915 2916 typedef struct BdrvAttachChildCommonState { 2917 BdrvChild *child; 2918 AioContext *old_parent_ctx; 2919 AioContext *old_child_ctx; 2920 } BdrvAttachChildCommonState; 2921 2922 static void bdrv_attach_child_common_abort(void *opaque) 2923 { 2924 BdrvAttachChildCommonState *s = opaque; 2925 BlockDriverState *bs = s->child->bs; 2926 2927 GLOBAL_STATE_CODE(); 2928 bdrv_replace_child_noperm(s->child, NULL); 2929 2930 if (bdrv_get_aio_context(bs) != s->old_child_ctx) { 2931 bdrv_try_change_aio_context(bs, s->old_child_ctx, NULL, &error_abort); 2932 } 2933 2934 if (bdrv_child_get_parent_aio_context(s->child) != s->old_parent_ctx) { 2935 Transaction *tran; 2936 GHashTable *visited; 2937 bool ret; 2938 2939 tran = tran_new(); 2940 2941 /* No need to visit `child`, because it has been detached already */ 2942 visited = g_hash_table_new(NULL, NULL); 2943 ret = s->child->klass->change_aio_ctx(s->child, s->old_parent_ctx, 2944 visited, tran, &error_abort); 2945 g_hash_table_destroy(visited); 2946 2947 /* transaction is supposed to always succeed */ 2948 assert(ret == true); 2949 tran_commit(tran); 2950 } 2951 2952 bdrv_unref(bs); 2953 bdrv_child_free(s->child); 2954 } 2955 2956 static TransactionActionDrv bdrv_attach_child_common_drv = { 2957 .abort = bdrv_attach_child_common_abort, 2958 .clean = g_free, 2959 }; 2960 2961 /* 2962 * Common part of attaching bdrv child to bs or to blk or to job 2963 * 2964 * Function doesn't update permissions, caller is responsible for this. 2965 * 2966 * Returns new created child. 2967 */ 2968 static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, 2969 const char *child_name, 2970 const BdrvChildClass *child_class, 2971 BdrvChildRole child_role, 2972 uint64_t perm, uint64_t shared_perm, 2973 void *opaque, 2974 Transaction *tran, Error **errp) 2975 { 2976 BdrvChild *new_child; 2977 AioContext *parent_ctx; 2978 AioContext *child_ctx = bdrv_get_aio_context(child_bs); 2979 2980 assert(child_class->get_parent_desc); 2981 GLOBAL_STATE_CODE(); 2982 2983 new_child = g_new(BdrvChild, 1); 2984 *new_child = (BdrvChild) { 2985 .bs = NULL, 2986 .name = g_strdup(child_name), 2987 .klass = child_class, 2988 .role = child_role, 2989 .perm = perm, 2990 .shared_perm = shared_perm, 2991 .opaque = opaque, 2992 }; 2993 2994 /* 2995 * If the AioContexts don't match, first try to move the subtree of 2996 * child_bs into the AioContext of the new parent. If this doesn't work, 2997 * try moving the parent into the AioContext of child_bs instead. 2998 */ 2999 parent_ctx = bdrv_child_get_parent_aio_context(new_child); 3000 if (child_ctx != parent_ctx) { 3001 Error *local_err = NULL; 3002 int ret = bdrv_try_change_aio_context(child_bs, parent_ctx, NULL, 3003 &local_err); 3004 3005 if (ret < 0 && child_class->change_aio_ctx) { 3006 Transaction *tran = tran_new(); 3007 GHashTable *visited = g_hash_table_new(NULL, NULL); 3008 bool ret_child; 3009 3010 g_hash_table_add(visited, new_child); 3011 ret_child = child_class->change_aio_ctx(new_child, child_ctx, 3012 visited, tran, NULL); 3013 if (ret_child == true) { 3014 error_free(local_err); 3015 ret = 0; 3016 } 3017 tran_finalize(tran, ret_child == true ? 0 : -1); 3018 g_hash_table_destroy(visited); 3019 } 3020 3021 if (ret < 0) { 3022 error_propagate(errp, local_err); 3023 bdrv_child_free(new_child); 3024 return NULL; 3025 } 3026 } 3027 3028 bdrv_ref(child_bs); 3029 bdrv_replace_child_noperm(new_child, child_bs); 3030 3031 BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); 3032 *s = (BdrvAttachChildCommonState) { 3033 .child = new_child, 3034 .old_parent_ctx = parent_ctx, 3035 .old_child_ctx = child_ctx, 3036 }; 3037 tran_add(tran, &bdrv_attach_child_common_drv, s); 3038 3039 return new_child; 3040 } 3041 3042 /* 3043 * Function doesn't update permissions, caller is responsible for this. 3044 */ 3045 static BdrvChild *bdrv_attach_child_noperm(BlockDriverState *parent_bs, 3046 BlockDriverState *child_bs, 3047 const char *child_name, 3048 const BdrvChildClass *child_class, 3049 BdrvChildRole child_role, 3050 Transaction *tran, 3051 Error **errp) 3052 { 3053 uint64_t perm, shared_perm; 3054 3055 assert(parent_bs->drv); 3056 GLOBAL_STATE_CODE(); 3057 3058 if (bdrv_recurse_has_child(child_bs, parent_bs)) { 3059 error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle", 3060 child_bs->node_name, child_name, parent_bs->node_name); 3061 return NULL; 3062 } 3063 3064 bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); 3065 bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, 3066 perm, shared_perm, &perm, &shared_perm); 3067 3068 return bdrv_attach_child_common(child_bs, child_name, child_class, 3069 child_role, perm, shared_perm, parent_bs, 3070 tran, errp); 3071 } 3072 3073 static void bdrv_detach_child(BdrvChild *child) 3074 { 3075 BlockDriverState *old_bs = child->bs; 3076 3077 GLOBAL_STATE_CODE(); 3078 bdrv_replace_child_noperm(child, NULL); 3079 bdrv_child_free(child); 3080 3081 if (old_bs) { 3082 /* 3083 * Update permissions for old node. We're just taking a parent away, so 3084 * we're loosening restrictions. Errors of permission update are not 3085 * fatal in this case, ignore them. 3086 */ 3087 bdrv_refresh_perms(old_bs, NULL); 3088 3089 /* 3090 * When the parent requiring a non-default AioContext is removed, the 3091 * node moves back to the main AioContext 3092 */ 3093 bdrv_try_change_aio_context(old_bs, qemu_get_aio_context(), NULL, NULL); 3094 } 3095 } 3096 3097 /* 3098 * This function steals the reference to child_bs from the caller. 3099 * That reference is later dropped by bdrv_root_unref_child(). 3100 * 3101 * On failure NULL is returned, errp is set and the reference to 3102 * child_bs is also dropped. 3103 * 3104 * The caller must hold the AioContext lock @child_bs, but not that of @ctx 3105 * (unless @child_bs is already in @ctx). 3106 */ 3107 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, 3108 const char *child_name, 3109 const BdrvChildClass *child_class, 3110 BdrvChildRole child_role, 3111 uint64_t perm, uint64_t shared_perm, 3112 void *opaque, Error **errp) 3113 { 3114 int ret; 3115 BdrvChild *child; 3116 Transaction *tran = tran_new(); 3117 3118 GLOBAL_STATE_CODE(); 3119 3120 child = bdrv_attach_child_common(child_bs, child_name, child_class, 3121 child_role, perm, shared_perm, opaque, 3122 tran, errp); 3123 if (!child) { 3124 ret = -EINVAL; 3125 goto out; 3126 } 3127 3128 ret = bdrv_refresh_perms(child_bs, errp); 3129 3130 out: 3131 tran_finalize(tran, ret); 3132 3133 bdrv_unref(child_bs); 3134 3135 return ret < 0 ? NULL : child; 3136 } 3137 3138 /* 3139 * This function transfers the reference to child_bs from the caller 3140 * to parent_bs. That reference is later dropped by parent_bs on 3141 * bdrv_close() or if someone calls bdrv_unref_child(). 3142 * 3143 * On failure NULL is returned, errp is set and the reference to 3144 * child_bs is also dropped. 3145 * 3146 * If @parent_bs and @child_bs are in different AioContexts, the caller must 3147 * hold the AioContext lock for @child_bs, but not for @parent_bs. 3148 */ 3149 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, 3150 BlockDriverState *child_bs, 3151 const char *child_name, 3152 const BdrvChildClass *child_class, 3153 BdrvChildRole child_role, 3154 Error **errp) 3155 { 3156 int ret; 3157 BdrvChild *child; 3158 Transaction *tran = tran_new(); 3159 3160 GLOBAL_STATE_CODE(); 3161 3162 child = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, 3163 child_class, child_role, tran, errp); 3164 if (!child) { 3165 ret = -EINVAL; 3166 goto out; 3167 } 3168 3169 ret = bdrv_refresh_perms(parent_bs, errp); 3170 if (ret < 0) { 3171 goto out; 3172 } 3173 3174 out: 3175 tran_finalize(tran, ret); 3176 3177 bdrv_unref(child_bs); 3178 3179 return ret < 0 ? NULL : child; 3180 } 3181 3182 /* Callers must ensure that child->frozen is false. */ 3183 void bdrv_root_unref_child(BdrvChild *child) 3184 { 3185 BlockDriverState *child_bs; 3186 3187 GLOBAL_STATE_CODE(); 3188 3189 child_bs = child->bs; 3190 bdrv_detach_child(child); 3191 bdrv_unref(child_bs); 3192 } 3193 3194 typedef struct BdrvSetInheritsFrom { 3195 BlockDriverState *bs; 3196 BlockDriverState *old_inherits_from; 3197 } BdrvSetInheritsFrom; 3198 3199 static void bdrv_set_inherits_from_abort(void *opaque) 3200 { 3201 BdrvSetInheritsFrom *s = opaque; 3202 3203 s->bs->inherits_from = s->old_inherits_from; 3204 } 3205 3206 static TransactionActionDrv bdrv_set_inherits_from_drv = { 3207 .abort = bdrv_set_inherits_from_abort, 3208 .clean = g_free, 3209 }; 3210 3211 /* @tran is allowed to be NULL. In this case no rollback is possible */ 3212 static void bdrv_set_inherits_from(BlockDriverState *bs, 3213 BlockDriverState *new_inherits_from, 3214 Transaction *tran) 3215 { 3216 if (tran) { 3217 BdrvSetInheritsFrom *s = g_new(BdrvSetInheritsFrom, 1); 3218 3219 *s = (BdrvSetInheritsFrom) { 3220 .bs = bs, 3221 .old_inherits_from = bs->inherits_from, 3222 }; 3223 3224 tran_add(tran, &bdrv_set_inherits_from_drv, s); 3225 } 3226 3227 bs->inherits_from = new_inherits_from; 3228 } 3229 3230 /** 3231 * Clear all inherits_from pointers from children and grandchildren of 3232 * @root that point to @root, where necessary. 3233 * @tran is allowed to be NULL. In this case no rollback is possible 3234 */ 3235 static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child, 3236 Transaction *tran) 3237 { 3238 BdrvChild *c; 3239 3240 if (child->bs->inherits_from == root) { 3241 /* 3242 * Remove inherits_from only when the last reference between root and 3243 * child->bs goes away. 3244 */ 3245 QLIST_FOREACH(c, &root->children, next) { 3246 if (c != child && c->bs == child->bs) { 3247 break; 3248 } 3249 } 3250 if (c == NULL) { 3251 bdrv_set_inherits_from(child->bs, NULL, tran); 3252 } 3253 } 3254 3255 QLIST_FOREACH(c, &child->bs->children, next) { 3256 bdrv_unset_inherits_from(root, c, tran); 3257 } 3258 } 3259 3260 /* Callers must ensure that child->frozen is false. */ 3261 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) 3262 { 3263 GLOBAL_STATE_CODE(); 3264 if (child == NULL) { 3265 return; 3266 } 3267 3268 bdrv_unset_inherits_from(parent, child, NULL); 3269 bdrv_root_unref_child(child); 3270 } 3271 3272 3273 static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) 3274 { 3275 BdrvChild *c; 3276 GLOBAL_STATE_CODE(); 3277 QLIST_FOREACH(c, &bs->parents, next_parent) { 3278 if (c->klass->change_media) { 3279 c->klass->change_media(c, load); 3280 } 3281 } 3282 } 3283 3284 /* Return true if you can reach parent going through child->inherits_from 3285 * recursively. If parent or child are NULL, return false */ 3286 static bool bdrv_inherits_from_recursive(BlockDriverState *child, 3287 BlockDriverState *parent) 3288 { 3289 while (child && child != parent) { 3290 child = child->inherits_from; 3291 } 3292 3293 return child != NULL; 3294 } 3295 3296 /* 3297 * Return the BdrvChildRole for @bs's backing child. bs->backing is 3298 * mostly used for COW backing children (role = COW), but also for 3299 * filtered children (role = FILTERED | PRIMARY). 3300 */ 3301 static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) 3302 { 3303 if (bs->drv && bs->drv->is_filter) { 3304 return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY; 3305 } else { 3306 return BDRV_CHILD_COW; 3307 } 3308 } 3309 3310 /* 3311 * Sets the bs->backing or bs->file link of a BDS. A new reference is created; 3312 * callers which don't need their own reference any more must call bdrv_unref(). 3313 * 3314 * Function doesn't update permissions, caller is responsible for this. 3315 */ 3316 static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, 3317 BlockDriverState *child_bs, 3318 bool is_backing, 3319 Transaction *tran, Error **errp) 3320 { 3321 bool update_inherits_from = 3322 bdrv_inherits_from_recursive(child_bs, parent_bs); 3323 BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file; 3324 BdrvChildRole role; 3325 3326 GLOBAL_STATE_CODE(); 3327 3328 if (!parent_bs->drv) { 3329 /* 3330 * Node without drv is an object without a class :/. TODO: finally fix 3331 * qcow2 driver to never clear bs->drv and implement format corruption 3332 * handling in other way. 3333 */ 3334 error_setg(errp, "Node corrupted"); 3335 return -EINVAL; 3336 } 3337 3338 if (child && child->frozen) { 3339 error_setg(errp, "Cannot change frozen '%s' link from '%s' to '%s'", 3340 child->name, parent_bs->node_name, child->bs->node_name); 3341 return -EPERM; 3342 } 3343 3344 if (is_backing && !parent_bs->drv->is_filter && 3345 !parent_bs->drv->supports_backing) 3346 { 3347 error_setg(errp, "Driver '%s' of node '%s' does not support backing " 3348 "files", parent_bs->drv->format_name, parent_bs->node_name); 3349 return -EINVAL; 3350 } 3351 3352 if (parent_bs->drv->is_filter) { 3353 role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY; 3354 } else if (is_backing) { 3355 role = BDRV_CHILD_COW; 3356 } else { 3357 /* 3358 * We only can use same role as it is in existing child. We don't have 3359 * infrastructure to determine role of file child in generic way 3360 */ 3361 if (!child) { 3362 error_setg(errp, "Cannot set file child to format node without " 3363 "file child"); 3364 return -EINVAL; 3365 } 3366 role = child->role; 3367 } 3368 3369 if (child) { 3370 bdrv_unset_inherits_from(parent_bs, child, tran); 3371 bdrv_remove_child(child, tran); 3372 } 3373 3374 if (!child_bs) { 3375 goto out; 3376 } 3377 3378 child = bdrv_attach_child_noperm(parent_bs, child_bs, 3379 is_backing ? "backing" : "file", 3380 &child_of_bds, role, 3381 tran, errp); 3382 if (!child) { 3383 return -EINVAL; 3384 } 3385 3386 3387 /* 3388 * If inherits_from pointed recursively to bs then let's update it to 3389 * point directly to bs (else it will become NULL). 3390 */ 3391 if (update_inherits_from) { 3392 bdrv_set_inherits_from(child_bs, parent_bs, tran); 3393 } 3394 3395 out: 3396 bdrv_refresh_limits(parent_bs, tran, NULL); 3397 3398 return 0; 3399 } 3400 3401 static int bdrv_set_backing_noperm(BlockDriverState *bs, 3402 BlockDriverState *backing_hd, 3403 Transaction *tran, Error **errp) 3404 { 3405 GLOBAL_STATE_CODE(); 3406 return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); 3407 } 3408 3409 int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, 3410 Error **errp) 3411 { 3412 int ret; 3413 Transaction *tran = tran_new(); 3414 3415 GLOBAL_STATE_CODE(); 3416 bdrv_drained_begin(bs); 3417 3418 ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); 3419 if (ret < 0) { 3420 goto out; 3421 } 3422 3423 ret = bdrv_refresh_perms(bs, errp); 3424 out: 3425 tran_finalize(tran, ret); 3426 3427 bdrv_drained_end(bs); 3428 3429 return ret; 3430 } 3431 3432 /* 3433 * Opens the backing file for a BlockDriverState if not yet open 3434 * 3435 * bdref_key specifies the key for the image's BlockdevRef in the options QDict. 3436 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 3437 * itself, all options starting with "${bdref_key}." are considered part of the 3438 * BlockdevRef. 3439 * 3440 * TODO Can this be unified with bdrv_open_image()? 3441 */ 3442 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, 3443 const char *bdref_key, Error **errp) 3444 { 3445 char *backing_filename = NULL; 3446 char *bdref_key_dot; 3447 const char *reference = NULL; 3448 int ret = 0; 3449 bool implicit_backing = false; 3450 BlockDriverState *backing_hd; 3451 QDict *options; 3452 QDict *tmp_parent_options = NULL; 3453 Error *local_err = NULL; 3454 3455 GLOBAL_STATE_CODE(); 3456 3457 if (bs->backing != NULL) { 3458 goto free_exit; 3459 } 3460 3461 /* NULL means an empty set of options */ 3462 if (parent_options == NULL) { 3463 tmp_parent_options = qdict_new(); 3464 parent_options = tmp_parent_options; 3465 } 3466 3467 bs->open_flags &= ~BDRV_O_NO_BACKING; 3468 3469 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 3470 qdict_extract_subqdict(parent_options, &options, bdref_key_dot); 3471 g_free(bdref_key_dot); 3472 3473 /* 3474 * Caution: while qdict_get_try_str() is fine, getting non-string 3475 * types would require more care. When @parent_options come from 3476 * -blockdev or blockdev_add, its members are typed according to 3477 * the QAPI schema, but when they come from -drive, they're all 3478 * QString. 3479 */ 3480 reference = qdict_get_try_str(parent_options, bdref_key); 3481 if (reference || qdict_haskey(options, "file.filename")) { 3482 /* keep backing_filename NULL */ 3483 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 3484 qobject_unref(options); 3485 goto free_exit; 3486 } else { 3487 if (qdict_size(options) == 0) { 3488 /* If the user specifies options that do not modify the 3489 * backing file's behavior, we might still consider it the 3490 * implicit backing file. But it's easier this way, and 3491 * just specifying some of the backing BDS's options is 3492 * only possible with -drive anyway (otherwise the QAPI 3493 * schema forces the user to specify everything). */ 3494 implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file); 3495 } 3496 3497 backing_filename = bdrv_get_full_backing_filename(bs, &local_err); 3498 if (local_err) { 3499 ret = -EINVAL; 3500 error_propagate(errp, local_err); 3501 qobject_unref(options); 3502 goto free_exit; 3503 } 3504 } 3505 3506 if (!bs->drv || !bs->drv->supports_backing) { 3507 ret = -EINVAL; 3508 error_setg(errp, "Driver doesn't support backing files"); 3509 qobject_unref(options); 3510 goto free_exit; 3511 } 3512 3513 if (!reference && 3514 bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 3515 qdict_put_str(options, "driver", bs->backing_format); 3516 } 3517 3518 backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs, 3519 &child_of_bds, bdrv_backing_role(bs), errp); 3520 if (!backing_hd) { 3521 bs->open_flags |= BDRV_O_NO_BACKING; 3522 error_prepend(errp, "Could not open backing file: "); 3523 ret = -EINVAL; 3524 goto free_exit; 3525 } 3526 3527 if (implicit_backing) { 3528 bdrv_refresh_filename(backing_hd); 3529 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), 3530 backing_hd->filename); 3531 } 3532 3533 /* Hook up the backing file link; drop our reference, bs owns the 3534 * backing_hd reference now */ 3535 ret = bdrv_set_backing_hd(bs, backing_hd, errp); 3536 bdrv_unref(backing_hd); 3537 if (ret < 0) { 3538 goto free_exit; 3539 } 3540 3541 qdict_del(parent_options, bdref_key); 3542 3543 free_exit: 3544 g_free(backing_filename); 3545 qobject_unref(tmp_parent_options); 3546 return ret; 3547 } 3548 3549 static BlockDriverState * 3550 bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, 3551 BlockDriverState *parent, const BdrvChildClass *child_class, 3552 BdrvChildRole child_role, bool allow_none, Error **errp) 3553 { 3554 BlockDriverState *bs = NULL; 3555 QDict *image_options; 3556 char *bdref_key_dot; 3557 const char *reference; 3558 3559 assert(child_class != NULL); 3560 3561 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 3562 qdict_extract_subqdict(options, &image_options, bdref_key_dot); 3563 g_free(bdref_key_dot); 3564 3565 /* 3566 * Caution: while qdict_get_try_str() is fine, getting non-string 3567 * types would require more care. When @options come from 3568 * -blockdev or blockdev_add, its members are typed according to 3569 * the QAPI schema, but when they come from -drive, they're all 3570 * QString. 3571 */ 3572 reference = qdict_get_try_str(options, bdref_key); 3573 if (!filename && !reference && !qdict_size(image_options)) { 3574 if (!allow_none) { 3575 error_setg(errp, "A block device must be specified for \"%s\"", 3576 bdref_key); 3577 } 3578 qobject_unref(image_options); 3579 goto done; 3580 } 3581 3582 bs = bdrv_open_inherit(filename, reference, image_options, 0, 3583 parent, child_class, child_role, errp); 3584 if (!bs) { 3585 goto done; 3586 } 3587 3588 done: 3589 qdict_del(options, bdref_key); 3590 return bs; 3591 } 3592 3593 /* 3594 * Opens a disk image whose options are given as BlockdevRef in another block 3595 * device's options. 3596 * 3597 * If allow_none is true, no image will be opened if filename is false and no 3598 * BlockdevRef is given. NULL will be returned, but errp remains unset. 3599 * 3600 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 3601 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 3602 * itself, all options starting with "${bdref_key}." are considered part of the 3603 * BlockdevRef. 3604 * 3605 * The BlockdevRef will be removed from the options QDict. 3606 */ 3607 BdrvChild *bdrv_open_child(const char *filename, 3608 QDict *options, const char *bdref_key, 3609 BlockDriverState *parent, 3610 const BdrvChildClass *child_class, 3611 BdrvChildRole child_role, 3612 bool allow_none, Error **errp) 3613 { 3614 BlockDriverState *bs; 3615 3616 GLOBAL_STATE_CODE(); 3617 3618 bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, 3619 child_role, allow_none, errp); 3620 if (bs == NULL) { 3621 return NULL; 3622 } 3623 3624 return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, 3625 errp); 3626 } 3627 3628 /* 3629 * Wrapper on bdrv_open_child() for most popular case: open primary child of bs. 3630 */ 3631 int bdrv_open_file_child(const char *filename, 3632 QDict *options, const char *bdref_key, 3633 BlockDriverState *parent, Error **errp) 3634 { 3635 BdrvChildRole role; 3636 3637 /* commit_top and mirror_top don't use this function */ 3638 assert(!parent->drv->filtered_child_is_backing); 3639 role = parent->drv->is_filter ? 3640 (BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE; 3641 3642 if (!bdrv_open_child(filename, options, bdref_key, parent, 3643 &child_of_bds, role, false, errp)) 3644 { 3645 return -EINVAL; 3646 } 3647 3648 return 0; 3649 } 3650 3651 /* 3652 * TODO Future callers may need to specify parent/child_class in order for 3653 * option inheritance to work. Existing callers use it for the root node. 3654 */ 3655 BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) 3656 { 3657 BlockDriverState *bs = NULL; 3658 QObject *obj = NULL; 3659 QDict *qdict = NULL; 3660 const char *reference = NULL; 3661 Visitor *v = NULL; 3662 3663 GLOBAL_STATE_CODE(); 3664 3665 if (ref->type == QTYPE_QSTRING) { 3666 reference = ref->u.reference; 3667 } else { 3668 BlockdevOptions *options = &ref->u.definition; 3669 assert(ref->type == QTYPE_QDICT); 3670 3671 v = qobject_output_visitor_new(&obj); 3672 visit_type_BlockdevOptions(v, NULL, &options, &error_abort); 3673 visit_complete(v, &obj); 3674 3675 qdict = qobject_to(QDict, obj); 3676 qdict_flatten(qdict); 3677 3678 /* bdrv_open_inherit() defaults to the values in bdrv_flags (for 3679 * compatibility with other callers) rather than what we want as the 3680 * real defaults. Apply the defaults here instead. */ 3681 qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off"); 3682 qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off"); 3683 qdict_set_default_str(qdict, BDRV_OPT_READ_ONLY, "off"); 3684 qdict_set_default_str(qdict, BDRV_OPT_AUTO_READ_ONLY, "off"); 3685 3686 } 3687 3688 bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp); 3689 obj = NULL; 3690 qobject_unref(obj); 3691 visit_free(v); 3692 return bs; 3693 } 3694 3695 static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, 3696 int flags, 3697 QDict *snapshot_options, 3698 Error **errp) 3699 { 3700 g_autofree char *tmp_filename = NULL; 3701 int64_t total_size; 3702 QemuOpts *opts = NULL; 3703 BlockDriverState *bs_snapshot = NULL; 3704 int ret; 3705 3706 GLOBAL_STATE_CODE(); 3707 3708 /* if snapshot, we create a temporary backing file and open it 3709 instead of opening 'filename' directly */ 3710 3711 /* Get the required size from the image */ 3712 total_size = bdrv_getlength(bs); 3713 if (total_size < 0) { 3714 error_setg_errno(errp, -total_size, "Could not get image size"); 3715 goto out; 3716 } 3717 3718 /* Create the temporary image */ 3719 tmp_filename = create_tmp_file(errp); 3720 if (!tmp_filename) { 3721 goto out; 3722 } 3723 3724 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 3725 &error_abort); 3726 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 3727 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp); 3728 qemu_opts_del(opts); 3729 if (ret < 0) { 3730 error_prepend(errp, "Could not create temporary overlay '%s': ", 3731 tmp_filename); 3732 goto out; 3733 } 3734 3735 /* Prepare options QDict for the temporary file */ 3736 qdict_put_str(snapshot_options, "file.driver", "file"); 3737 qdict_put_str(snapshot_options, "file.filename", tmp_filename); 3738 qdict_put_str(snapshot_options, "driver", "qcow2"); 3739 3740 bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp); 3741 snapshot_options = NULL; 3742 if (!bs_snapshot) { 3743 goto out; 3744 } 3745 3746 ret = bdrv_append(bs_snapshot, bs, errp); 3747 if (ret < 0) { 3748 bs_snapshot = NULL; 3749 goto out; 3750 } 3751 3752 out: 3753 qobject_unref(snapshot_options); 3754 return bs_snapshot; 3755 } 3756 3757 /* 3758 * Opens a disk image (raw, qcow2, vmdk, ...) 3759 * 3760 * options is a QDict of options to pass to the block drivers, or NULL for an 3761 * empty set of options. The reference to the QDict belongs to the block layer 3762 * after the call (even on failure), so if the caller intends to reuse the 3763 * dictionary, it needs to use qobject_ref() before calling bdrv_open. 3764 * 3765 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 3766 * If it is not NULL, the referenced BDS will be reused. 3767 * 3768 * The reference parameter may be used to specify an existing block device which 3769 * should be opened. If specified, neither options nor a filename may be given, 3770 * nor can an existing BDS be reused (that is, *pbs has to be NULL). 3771 */ 3772 static BlockDriverState *bdrv_open_inherit(const char *filename, 3773 const char *reference, 3774 QDict *options, int flags, 3775 BlockDriverState *parent, 3776 const BdrvChildClass *child_class, 3777 BdrvChildRole child_role, 3778 Error **errp) 3779 { 3780 int ret; 3781 BlockBackend *file = NULL; 3782 BlockDriverState *bs; 3783 BlockDriver *drv = NULL; 3784 BdrvChild *child; 3785 const char *drvname; 3786 const char *backing; 3787 Error *local_err = NULL; 3788 QDict *snapshot_options = NULL; 3789 int snapshot_flags = 0; 3790 3791 assert(!child_class || !flags); 3792 assert(!child_class == !parent); 3793 GLOBAL_STATE_CODE(); 3794 3795 if (reference) { 3796 bool options_non_empty = options ? qdict_size(options) : false; 3797 qobject_unref(options); 3798 3799 if (filename || options_non_empty) { 3800 error_setg(errp, "Cannot reference an existing block device with " 3801 "additional options or a new filename"); 3802 return NULL; 3803 } 3804 3805 bs = bdrv_lookup_bs(reference, reference, errp); 3806 if (!bs) { 3807 return NULL; 3808 } 3809 3810 bdrv_ref(bs); 3811 return bs; 3812 } 3813 3814 bs = bdrv_new(); 3815 3816 /* NULL means an empty set of options */ 3817 if (options == NULL) { 3818 options = qdict_new(); 3819 } 3820 3821 /* json: syntax counts as explicit options, as if in the QDict */ 3822 parse_json_protocol(options, &filename, &local_err); 3823 if (local_err) { 3824 goto fail; 3825 } 3826 3827 bs->explicit_options = qdict_clone_shallow(options); 3828 3829 if (child_class) { 3830 bool parent_is_format; 3831 3832 if (parent->drv) { 3833 parent_is_format = parent->drv->is_format; 3834 } else { 3835 /* 3836 * parent->drv is not set yet because this node is opened for 3837 * (potential) format probing. That means that @parent is going 3838 * to be a format node. 3839 */ 3840 parent_is_format = true; 3841 } 3842 3843 bs->inherits_from = parent; 3844 child_class->inherit_options(child_role, parent_is_format, 3845 &flags, options, 3846 parent->open_flags, parent->options); 3847 } 3848 3849 ret = bdrv_fill_options(&options, filename, &flags, &local_err); 3850 if (ret < 0) { 3851 goto fail; 3852 } 3853 3854 /* 3855 * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags. 3856 * Caution: getting a boolean member of @options requires care. 3857 * When @options come from -blockdev or blockdev_add, members are 3858 * typed according to the QAPI schema, but when they come from 3859 * -drive, they're all QString. 3860 */ 3861 if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") && 3862 !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) { 3863 flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR); 3864 } else { 3865 flags &= ~BDRV_O_RDWR; 3866 } 3867 3868 if (flags & BDRV_O_SNAPSHOT) { 3869 snapshot_options = qdict_new(); 3870 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options, 3871 flags, options); 3872 /* Let bdrv_backing_options() override "read-only" */ 3873 qdict_del(options, BDRV_OPT_READ_ONLY); 3874 bdrv_inherited_options(BDRV_CHILD_COW, true, 3875 &flags, options, flags, options); 3876 } 3877 3878 bs->open_flags = flags; 3879 bs->options = options; 3880 options = qdict_clone_shallow(options); 3881 3882 /* Find the right image format driver */ 3883 /* See cautionary note on accessing @options above */ 3884 drvname = qdict_get_try_str(options, "driver"); 3885 if (drvname) { 3886 drv = bdrv_find_format(drvname); 3887 if (!drv) { 3888 error_setg(errp, "Unknown driver: '%s'", drvname); 3889 goto fail; 3890 } 3891 } 3892 3893 assert(drvname || !(flags & BDRV_O_PROTOCOL)); 3894 3895 /* See cautionary note on accessing @options above */ 3896 backing = qdict_get_try_str(options, "backing"); 3897 if (qobject_to(QNull, qdict_get(options, "backing")) != NULL || 3898 (backing && *backing == '\0')) 3899 { 3900 if (backing) { 3901 warn_report("Use of \"backing\": \"\" is deprecated; " 3902 "use \"backing\": null instead"); 3903 } 3904 flags |= BDRV_O_NO_BACKING; 3905 qdict_del(bs->explicit_options, "backing"); 3906 qdict_del(bs->options, "backing"); 3907 qdict_del(options, "backing"); 3908 } 3909 3910 /* Open image file without format layer. This BlockBackend is only used for 3911 * probing, the block drivers will do their own bdrv_open_child() for the 3912 * same BDS, which is why we put the node name back into options. */ 3913 if ((flags & BDRV_O_PROTOCOL) == 0) { 3914 BlockDriverState *file_bs; 3915 3916 file_bs = bdrv_open_child_bs(filename, options, "file", bs, 3917 &child_of_bds, BDRV_CHILD_IMAGE, 3918 true, &local_err); 3919 if (local_err) { 3920 goto fail; 3921 } 3922 if (file_bs != NULL) { 3923 /* Not requesting BLK_PERM_CONSISTENT_READ because we're only 3924 * looking at the header to guess the image format. This works even 3925 * in cases where a guest would not see a consistent state. */ 3926 file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL); 3927 blk_insert_bs(file, file_bs, &local_err); 3928 bdrv_unref(file_bs); 3929 if (local_err) { 3930 goto fail; 3931 } 3932 3933 qdict_put_str(options, "file", bdrv_get_node_name(file_bs)); 3934 } 3935 } 3936 3937 /* Image format probing */ 3938 bs->probed = !drv; 3939 if (!drv && file) { 3940 ret = find_image_format(file, filename, &drv, &local_err); 3941 if (ret < 0) { 3942 goto fail; 3943 } 3944 /* 3945 * This option update would logically belong in bdrv_fill_options(), 3946 * but we first need to open bs->file for the probing to work, while 3947 * opening bs->file already requires the (mostly) final set of options 3948 * so that cache mode etc. can be inherited. 3949 * 3950 * Adding the driver later is somewhat ugly, but it's not an option 3951 * that would ever be inherited, so it's correct. We just need to make 3952 * sure to update both bs->options (which has the full effective 3953 * options for bs) and options (which has file.* already removed). 3954 */ 3955 qdict_put_str(bs->options, "driver", drv->format_name); 3956 qdict_put_str(options, "driver", drv->format_name); 3957 } else if (!drv) { 3958 error_setg(errp, "Must specify either driver or file"); 3959 goto fail; 3960 } 3961 3962 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ 3963 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open); 3964 /* file must be NULL if a protocol BDS is about to be created 3965 * (the inverse results in an error message from bdrv_open_common()) */ 3966 assert(!(flags & BDRV_O_PROTOCOL) || !file); 3967 3968 /* Open the image */ 3969 ret = bdrv_open_common(bs, file, options, &local_err); 3970 if (ret < 0) { 3971 goto fail; 3972 } 3973 3974 if (file) { 3975 blk_unref(file); 3976 file = NULL; 3977 } 3978 3979 /* If there is a backing file, use it */ 3980 if ((flags & BDRV_O_NO_BACKING) == 0) { 3981 ret = bdrv_open_backing_file(bs, options, "backing", &local_err); 3982 if (ret < 0) { 3983 goto close_and_fail; 3984 } 3985 } 3986 3987 /* Remove all children options and references 3988 * from bs->options and bs->explicit_options */ 3989 QLIST_FOREACH(child, &bs->children, next) { 3990 char *child_key_dot; 3991 child_key_dot = g_strdup_printf("%s.", child->name); 3992 qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot); 3993 qdict_extract_subqdict(bs->options, NULL, child_key_dot); 3994 qdict_del(bs->explicit_options, child->name); 3995 qdict_del(bs->options, child->name); 3996 g_free(child_key_dot); 3997 } 3998 3999 /* Check if any unknown options were used */ 4000 if (qdict_size(options) != 0) { 4001 const QDictEntry *entry = qdict_first(options); 4002 if (flags & BDRV_O_PROTOCOL) { 4003 error_setg(errp, "Block protocol '%s' doesn't support the option " 4004 "'%s'", drv->format_name, entry->key); 4005 } else { 4006 error_setg(errp, 4007 "Block format '%s' does not support the option '%s'", 4008 drv->format_name, entry->key); 4009 } 4010 4011 goto close_and_fail; 4012 } 4013 4014 bdrv_parent_cb_change_media(bs, true); 4015 4016 qobject_unref(options); 4017 options = NULL; 4018 4019 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 4020 * temporary snapshot afterwards. */ 4021 if (snapshot_flags) { 4022 BlockDriverState *snapshot_bs; 4023 snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags, 4024 snapshot_options, &local_err); 4025 snapshot_options = NULL; 4026 if (local_err) { 4027 goto close_and_fail; 4028 } 4029 /* We are not going to return bs but the overlay on top of it 4030 * (snapshot_bs); thus, we have to drop the strong reference to bs 4031 * (which we obtained by calling bdrv_new()). bs will not be deleted, 4032 * though, because the overlay still has a reference to it. */ 4033 bdrv_unref(bs); 4034 bs = snapshot_bs; 4035 } 4036 4037 return bs; 4038 4039 fail: 4040 blk_unref(file); 4041 qobject_unref(snapshot_options); 4042 qobject_unref(bs->explicit_options); 4043 qobject_unref(bs->options); 4044 qobject_unref(options); 4045 bs->options = NULL; 4046 bs->explicit_options = NULL; 4047 bdrv_unref(bs); 4048 error_propagate(errp, local_err); 4049 return NULL; 4050 4051 close_and_fail: 4052 bdrv_unref(bs); 4053 qobject_unref(snapshot_options); 4054 qobject_unref(options); 4055 error_propagate(errp, local_err); 4056 return NULL; 4057 } 4058 4059 BlockDriverState *bdrv_open(const char *filename, const char *reference, 4060 QDict *options, int flags, Error **errp) 4061 { 4062 GLOBAL_STATE_CODE(); 4063 4064 return bdrv_open_inherit(filename, reference, options, flags, NULL, 4065 NULL, 0, errp); 4066 } 4067 4068 /* Return true if the NULL-terminated @list contains @str */ 4069 static bool is_str_in_list(const char *str, const char *const *list) 4070 { 4071 if (str && list) { 4072 int i; 4073 for (i = 0; list[i] != NULL; i++) { 4074 if (!strcmp(str, list[i])) { 4075 return true; 4076 } 4077 } 4078 } 4079 return false; 4080 } 4081 4082 /* 4083 * Check that every option set in @bs->options is also set in 4084 * @new_opts. 4085 * 4086 * Options listed in the common_options list and in 4087 * @bs->drv->mutable_opts are skipped. 4088 * 4089 * Return 0 on success, otherwise return -EINVAL and set @errp. 4090 */ 4091 static int bdrv_reset_options_allowed(BlockDriverState *bs, 4092 const QDict *new_opts, Error **errp) 4093 { 4094 const QDictEntry *e; 4095 /* These options are common to all block drivers and are handled 4096 * in bdrv_reopen_prepare() so they can be left out of @new_opts */ 4097 const char *const common_options[] = { 4098 "node-name", "discard", "cache.direct", "cache.no-flush", 4099 "read-only", "auto-read-only", "detect-zeroes", NULL 4100 }; 4101 4102 for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) { 4103 if (!qdict_haskey(new_opts, e->key) && 4104 !is_str_in_list(e->key, common_options) && 4105 !is_str_in_list(e->key, bs->drv->mutable_opts)) { 4106 error_setg(errp, "Option '%s' cannot be reset " 4107 "to its default value", e->key); 4108 return -EINVAL; 4109 } 4110 } 4111 4112 return 0; 4113 } 4114 4115 /* 4116 * Returns true if @child can be reached recursively from @bs 4117 */ 4118 static bool bdrv_recurse_has_child(BlockDriverState *bs, 4119 BlockDriverState *child) 4120 { 4121 BdrvChild *c; 4122 4123 if (bs == child) { 4124 return true; 4125 } 4126 4127 QLIST_FOREACH(c, &bs->children, next) { 4128 if (bdrv_recurse_has_child(c->bs, child)) { 4129 return true; 4130 } 4131 } 4132 4133 return false; 4134 } 4135 4136 /* 4137 * Adds a BlockDriverState to a simple queue for an atomic, transactional 4138 * reopen of multiple devices. 4139 * 4140 * bs_queue can either be an existing BlockReopenQueue that has had QTAILQ_INIT 4141 * already performed, or alternatively may be NULL a new BlockReopenQueue will 4142 * be created and initialized. This newly created BlockReopenQueue should be 4143 * passed back in for subsequent calls that are intended to be of the same 4144 * atomic 'set'. 4145 * 4146 * bs is the BlockDriverState to add to the reopen queue. 4147 * 4148 * options contains the changed options for the associated bs 4149 * (the BlockReopenQueue takes ownership) 4150 * 4151 * flags contains the open flags for the associated bs 4152 * 4153 * returns a pointer to bs_queue, which is either the newly allocated 4154 * bs_queue, or the existing bs_queue being used. 4155 * 4156 * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). 4157 */ 4158 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, 4159 BlockDriverState *bs, 4160 QDict *options, 4161 const BdrvChildClass *klass, 4162 BdrvChildRole role, 4163 bool parent_is_format, 4164 QDict *parent_options, 4165 int parent_flags, 4166 bool keep_old_opts) 4167 { 4168 assert(bs != NULL); 4169 4170 BlockReopenQueueEntry *bs_entry; 4171 BdrvChild *child; 4172 QDict *old_options, *explicit_options, *options_copy; 4173 int flags; 4174 QemuOpts *opts; 4175 4176 /* Make sure that the caller remembered to use a drained section. This is 4177 * important to avoid graph changes between the recursive queuing here and 4178 * bdrv_reopen_multiple(). */ 4179 assert(bs->quiesce_counter > 0); 4180 GLOBAL_STATE_CODE(); 4181 4182 if (bs_queue == NULL) { 4183 bs_queue = g_new0(BlockReopenQueue, 1); 4184 QTAILQ_INIT(bs_queue); 4185 } 4186 4187 if (!options) { 4188 options = qdict_new(); 4189 } 4190 4191 /* Check if this BlockDriverState is already in the queue */ 4192 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4193 if (bs == bs_entry->state.bs) { 4194 break; 4195 } 4196 } 4197 4198 /* 4199 * Precedence of options: 4200 * 1. Explicitly passed in options (highest) 4201 * 2. Retained from explicitly set options of bs 4202 * 3. Inherited from parent node 4203 * 4. Retained from effective options of bs 4204 */ 4205 4206 /* Old explicitly set values (don't overwrite by inherited value) */ 4207 if (bs_entry || keep_old_opts) { 4208 old_options = qdict_clone_shallow(bs_entry ? 4209 bs_entry->state.explicit_options : 4210 bs->explicit_options); 4211 bdrv_join_options(bs, options, old_options); 4212 qobject_unref(old_options); 4213 } 4214 4215 explicit_options = qdict_clone_shallow(options); 4216 4217 /* Inherit from parent node */ 4218 if (parent_options) { 4219 flags = 0; 4220 klass->inherit_options(role, parent_is_format, &flags, options, 4221 parent_flags, parent_options); 4222 } else { 4223 flags = bdrv_get_flags(bs); 4224 } 4225 4226 if (keep_old_opts) { 4227 /* Old values are used for options that aren't set yet */ 4228 old_options = qdict_clone_shallow(bs->options); 4229 bdrv_join_options(bs, options, old_options); 4230 qobject_unref(old_options); 4231 } 4232 4233 /* We have the final set of options so let's update the flags */ 4234 options_copy = qdict_clone_shallow(options); 4235 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 4236 qemu_opts_absorb_qdict(opts, options_copy, NULL); 4237 update_flags_from_options(&flags, opts); 4238 qemu_opts_del(opts); 4239 qobject_unref(options_copy); 4240 4241 /* bdrv_open_inherit() sets and clears some additional flags internally */ 4242 flags &= ~BDRV_O_PROTOCOL; 4243 if (flags & BDRV_O_RDWR) { 4244 flags |= BDRV_O_ALLOW_RDWR; 4245 } 4246 4247 if (!bs_entry) { 4248 bs_entry = g_new0(BlockReopenQueueEntry, 1); 4249 QTAILQ_INSERT_TAIL(bs_queue, bs_entry, entry); 4250 } else { 4251 qobject_unref(bs_entry->state.options); 4252 qobject_unref(bs_entry->state.explicit_options); 4253 } 4254 4255 bs_entry->state.bs = bs; 4256 bs_entry->state.options = options; 4257 bs_entry->state.explicit_options = explicit_options; 4258 bs_entry->state.flags = flags; 4259 4260 /* 4261 * If keep_old_opts is false then it means that unspecified 4262 * options must be reset to their original value. We don't allow 4263 * resetting 'backing' but we need to know if the option is 4264 * missing in order to decide if we have to return an error. 4265 */ 4266 if (!keep_old_opts) { 4267 bs_entry->state.backing_missing = 4268 !qdict_haskey(options, "backing") && 4269 !qdict_haskey(options, "backing.driver"); 4270 } 4271 4272 QLIST_FOREACH(child, &bs->children, next) { 4273 QDict *new_child_options = NULL; 4274 bool child_keep_old = keep_old_opts; 4275 4276 /* reopen can only change the options of block devices that were 4277 * implicitly created and inherited options. For other (referenced) 4278 * block devices, a syntax like "backing.foo" results in an error. */ 4279 if (child->bs->inherits_from != bs) { 4280 continue; 4281 } 4282 4283 /* Check if the options contain a child reference */ 4284 if (qdict_haskey(options, child->name)) { 4285 const char *childref = qdict_get_try_str(options, child->name); 4286 /* 4287 * The current child must not be reopened if the child 4288 * reference is null or points to a different node. 4289 */ 4290 if (g_strcmp0(childref, child->bs->node_name)) { 4291 continue; 4292 } 4293 /* 4294 * If the child reference points to the current child then 4295 * reopen it with its existing set of options (note that 4296 * it can still inherit new options from the parent). 4297 */ 4298 child_keep_old = true; 4299 } else { 4300 /* Extract child options ("child-name.*") */ 4301 char *child_key_dot = g_strdup_printf("%s.", child->name); 4302 qdict_extract_subqdict(explicit_options, NULL, child_key_dot); 4303 qdict_extract_subqdict(options, &new_child_options, child_key_dot); 4304 g_free(child_key_dot); 4305 } 4306 4307 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 4308 child->klass, child->role, bs->drv->is_format, 4309 options, flags, child_keep_old); 4310 } 4311 4312 return bs_queue; 4313 } 4314 4315 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 4316 BlockDriverState *bs, 4317 QDict *options, bool keep_old_opts) 4318 { 4319 GLOBAL_STATE_CODE(); 4320 4321 return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false, 4322 NULL, 0, keep_old_opts); 4323 } 4324 4325 void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) 4326 { 4327 GLOBAL_STATE_CODE(); 4328 if (bs_queue) { 4329 BlockReopenQueueEntry *bs_entry, *next; 4330 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 4331 qobject_unref(bs_entry->state.explicit_options); 4332 qobject_unref(bs_entry->state.options); 4333 g_free(bs_entry); 4334 } 4335 g_free(bs_queue); 4336 } 4337 } 4338 4339 /* 4340 * Reopen multiple BlockDriverStates atomically & transactionally. 4341 * 4342 * The queue passed in (bs_queue) must have been built up previous 4343 * via bdrv_reopen_queue(). 4344 * 4345 * Reopens all BDS specified in the queue, with the appropriate 4346 * flags. All devices are prepared for reopen, and failure of any 4347 * device will cause all device changes to be abandoned, and intermediate 4348 * data cleaned up. 4349 * 4350 * If all devices prepare successfully, then the changes are committed 4351 * to all devices. 4352 * 4353 * All affected nodes must be drained between bdrv_reopen_queue() and 4354 * bdrv_reopen_multiple(). 4355 * 4356 * To be called from the main thread, with all other AioContexts unlocked. 4357 */ 4358 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 4359 { 4360 int ret = -1; 4361 BlockReopenQueueEntry *bs_entry, *next; 4362 AioContext *ctx; 4363 Transaction *tran = tran_new(); 4364 g_autoptr(GHashTable) found = NULL; 4365 g_autoptr(GSList) refresh_list = NULL; 4366 4367 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 4368 assert(bs_queue != NULL); 4369 GLOBAL_STATE_CODE(); 4370 4371 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4372 ctx = bdrv_get_aio_context(bs_entry->state.bs); 4373 aio_context_acquire(ctx); 4374 ret = bdrv_flush(bs_entry->state.bs); 4375 aio_context_release(ctx); 4376 if (ret < 0) { 4377 error_setg_errno(errp, -ret, "Error flushing drive"); 4378 goto abort; 4379 } 4380 } 4381 4382 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4383 assert(bs_entry->state.bs->quiesce_counter > 0); 4384 ctx = bdrv_get_aio_context(bs_entry->state.bs); 4385 aio_context_acquire(ctx); 4386 ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp); 4387 aio_context_release(ctx); 4388 if (ret < 0) { 4389 goto abort; 4390 } 4391 bs_entry->prepared = true; 4392 } 4393 4394 found = g_hash_table_new(NULL, NULL); 4395 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4396 BDRVReopenState *state = &bs_entry->state; 4397 4398 refresh_list = bdrv_topological_dfs(refresh_list, found, state->bs); 4399 if (state->old_backing_bs) { 4400 refresh_list = bdrv_topological_dfs(refresh_list, found, 4401 state->old_backing_bs); 4402 } 4403 if (state->old_file_bs) { 4404 refresh_list = bdrv_topological_dfs(refresh_list, found, 4405 state->old_file_bs); 4406 } 4407 } 4408 4409 /* 4410 * Note that file-posix driver rely on permission update done during reopen 4411 * (even if no permission changed), because it wants "new" permissions for 4412 * reconfiguring the fd and that's why it does it in raw_check_perm(), not 4413 * in raw_reopen_prepare() which is called with "old" permissions. 4414 */ 4415 ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp); 4416 if (ret < 0) { 4417 goto abort; 4418 } 4419 4420 /* 4421 * If we reach this point, we have success and just need to apply the 4422 * changes. 4423 * 4424 * Reverse order is used to comfort qcow2 driver: on commit it need to write 4425 * IN_USE flag to the image, to mark bitmaps in the image as invalid. But 4426 * children are usually goes after parents in reopen-queue, so go from last 4427 * to first element. 4428 */ 4429 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { 4430 ctx = bdrv_get_aio_context(bs_entry->state.bs); 4431 aio_context_acquire(ctx); 4432 bdrv_reopen_commit(&bs_entry->state); 4433 aio_context_release(ctx); 4434 } 4435 4436 tran_commit(tran); 4437 4438 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { 4439 BlockDriverState *bs = bs_entry->state.bs; 4440 4441 if (bs->drv->bdrv_reopen_commit_post) { 4442 ctx = bdrv_get_aio_context(bs); 4443 aio_context_acquire(ctx); 4444 bs->drv->bdrv_reopen_commit_post(&bs_entry->state); 4445 aio_context_release(ctx); 4446 } 4447 } 4448 4449 ret = 0; 4450 goto cleanup; 4451 4452 abort: 4453 tran_abort(tran); 4454 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 4455 if (bs_entry->prepared) { 4456 ctx = bdrv_get_aio_context(bs_entry->state.bs); 4457 aio_context_acquire(ctx); 4458 bdrv_reopen_abort(&bs_entry->state); 4459 aio_context_release(ctx); 4460 } 4461 } 4462 4463 cleanup: 4464 bdrv_reopen_queue_free(bs_queue); 4465 4466 return ret; 4467 } 4468 4469 int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, 4470 Error **errp) 4471 { 4472 AioContext *ctx = bdrv_get_aio_context(bs); 4473 BlockReopenQueue *queue; 4474 int ret; 4475 4476 GLOBAL_STATE_CODE(); 4477 4478 bdrv_subtree_drained_begin(bs); 4479 if (ctx != qemu_get_aio_context()) { 4480 aio_context_release(ctx); 4481 } 4482 4483 queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); 4484 ret = bdrv_reopen_multiple(queue, errp); 4485 4486 if (ctx != qemu_get_aio_context()) { 4487 aio_context_acquire(ctx); 4488 } 4489 bdrv_subtree_drained_end(bs); 4490 4491 return ret; 4492 } 4493 4494 int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, 4495 Error **errp) 4496 { 4497 QDict *opts = qdict_new(); 4498 4499 GLOBAL_STATE_CODE(); 4500 4501 qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only); 4502 4503 return bdrv_reopen(bs, opts, true, errp); 4504 } 4505 4506 /* 4507 * Take a BDRVReopenState and check if the value of 'backing' in the 4508 * reopen_state->options QDict is valid or not. 4509 * 4510 * If 'backing' is missing from the QDict then return 0. 4511 * 4512 * If 'backing' contains the node name of the backing file of 4513 * reopen_state->bs then return 0. 4514 * 4515 * If 'backing' contains a different node name (or is null) then check 4516 * whether the current backing file can be replaced with the new one. 4517 * If that's the case then reopen_state->replace_backing_bs is set to 4518 * true and reopen_state->new_backing_bs contains a pointer to the new 4519 * backing BlockDriverState (or NULL). 4520 * 4521 * Return 0 on success, otherwise return < 0 and set @errp. 4522 */ 4523 static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, 4524 bool is_backing, Transaction *tran, 4525 Error **errp) 4526 { 4527 BlockDriverState *bs = reopen_state->bs; 4528 BlockDriverState *new_child_bs; 4529 BlockDriverState *old_child_bs = is_backing ? child_bs(bs->backing) : 4530 child_bs(bs->file); 4531 const char *child_name = is_backing ? "backing" : "file"; 4532 QObject *value; 4533 const char *str; 4534 4535 GLOBAL_STATE_CODE(); 4536 4537 value = qdict_get(reopen_state->options, child_name); 4538 if (value == NULL) { 4539 return 0; 4540 } 4541 4542 switch (qobject_type(value)) { 4543 case QTYPE_QNULL: 4544 assert(is_backing); /* The 'file' option does not allow a null value */ 4545 new_child_bs = NULL; 4546 break; 4547 case QTYPE_QSTRING: 4548 str = qstring_get_str(qobject_to(QString, value)); 4549 new_child_bs = bdrv_lookup_bs(NULL, str, errp); 4550 if (new_child_bs == NULL) { 4551 return -EINVAL; 4552 } else if (bdrv_recurse_has_child(new_child_bs, bs)) { 4553 error_setg(errp, "Making '%s' a %s child of '%s' would create a " 4554 "cycle", str, child_name, bs->node_name); 4555 return -EINVAL; 4556 } 4557 break; 4558 default: 4559 /* 4560 * The options QDict has been flattened, so 'backing' and 'file' 4561 * do not allow any other data type here. 4562 */ 4563 g_assert_not_reached(); 4564 } 4565 4566 if (old_child_bs == new_child_bs) { 4567 return 0; 4568 } 4569 4570 if (old_child_bs) { 4571 if (bdrv_skip_implicit_filters(old_child_bs) == new_child_bs) { 4572 return 0; 4573 } 4574 4575 if (old_child_bs->implicit) { 4576 error_setg(errp, "Cannot replace implicit %s child of %s", 4577 child_name, bs->node_name); 4578 return -EPERM; 4579 } 4580 } 4581 4582 if (bs->drv->is_filter && !old_child_bs) { 4583 /* 4584 * Filters always have a file or a backing child, so we are trying to 4585 * change wrong child 4586 */ 4587 error_setg(errp, "'%s' is a %s filter node that does not support a " 4588 "%s child", bs->node_name, bs->drv->format_name, child_name); 4589 return -EINVAL; 4590 } 4591 4592 if (is_backing) { 4593 reopen_state->old_backing_bs = old_child_bs; 4594 } else { 4595 reopen_state->old_file_bs = old_child_bs; 4596 } 4597 4598 return bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing, 4599 tran, errp); 4600 } 4601 4602 /* 4603 * Prepares a BlockDriverState for reopen. All changes are staged in the 4604 * 'opaque' field of the BDRVReopenState, which is used and allocated by 4605 * the block driver layer .bdrv_reopen_prepare() 4606 * 4607 * bs is the BlockDriverState to reopen 4608 * flags are the new open flags 4609 * queue is the reopen queue 4610 * 4611 * Returns 0 on success, non-zero on error. On error errp will be set 4612 * as well. 4613 * 4614 * On failure, bdrv_reopen_abort() will be called to clean up any data. 4615 * It is the responsibility of the caller to then call the abort() or 4616 * commit() for any other BDS that have been left in a prepare() state 4617 * 4618 */ 4619 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, 4620 BlockReopenQueue *queue, 4621 Transaction *change_child_tran, Error **errp) 4622 { 4623 int ret = -1; 4624 int old_flags; 4625 Error *local_err = NULL; 4626 BlockDriver *drv; 4627 QemuOpts *opts; 4628 QDict *orig_reopen_opts; 4629 char *discard = NULL; 4630 bool read_only; 4631 bool drv_prepared = false; 4632 4633 assert(reopen_state != NULL); 4634 assert(reopen_state->bs->drv != NULL); 4635 GLOBAL_STATE_CODE(); 4636 drv = reopen_state->bs->drv; 4637 4638 /* This function and each driver's bdrv_reopen_prepare() remove 4639 * entries from reopen_state->options as they are processed, so 4640 * we need to make a copy of the original QDict. */ 4641 orig_reopen_opts = qdict_clone_shallow(reopen_state->options); 4642 4643 /* Process generic block layer options */ 4644 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 4645 if (!qemu_opts_absorb_qdict(opts, reopen_state->options, errp)) { 4646 ret = -EINVAL; 4647 goto error; 4648 } 4649 4650 /* This was already called in bdrv_reopen_queue_child() so the flags 4651 * are up-to-date. This time we simply want to remove the options from 4652 * QemuOpts in order to indicate that they have been processed. */ 4653 old_flags = reopen_state->flags; 4654 update_flags_from_options(&reopen_state->flags, opts); 4655 assert(old_flags == reopen_state->flags); 4656 4657 discard = qemu_opt_get_del(opts, BDRV_OPT_DISCARD); 4658 if (discard != NULL) { 4659 if (bdrv_parse_discard_flags(discard, &reopen_state->flags) != 0) { 4660 error_setg(errp, "Invalid discard option"); 4661 ret = -EINVAL; 4662 goto error; 4663 } 4664 } 4665 4666 reopen_state->detect_zeroes = 4667 bdrv_parse_detect_zeroes(opts, reopen_state->flags, &local_err); 4668 if (local_err) { 4669 error_propagate(errp, local_err); 4670 ret = -EINVAL; 4671 goto error; 4672 } 4673 4674 /* All other options (including node-name and driver) must be unchanged. 4675 * Put them back into the QDict, so that they are checked at the end 4676 * of this function. */ 4677 qemu_opts_to_qdict(opts, reopen_state->options); 4678 4679 /* If we are to stay read-only, do not allow permission change 4680 * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is 4681 * not set, or if the BDS still has copy_on_read enabled */ 4682 read_only = !(reopen_state->flags & BDRV_O_RDWR); 4683 ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err); 4684 if (local_err) { 4685 error_propagate(errp, local_err); 4686 goto error; 4687 } 4688 4689 if (drv->bdrv_reopen_prepare) { 4690 /* 4691 * If a driver-specific option is missing, it means that we 4692 * should reset it to its default value. 4693 * But not all options allow that, so we need to check it first. 4694 */ 4695 ret = bdrv_reset_options_allowed(reopen_state->bs, 4696 reopen_state->options, errp); 4697 if (ret) { 4698 goto error; 4699 } 4700 4701 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 4702 if (ret) { 4703 if (local_err != NULL) { 4704 error_propagate(errp, local_err); 4705 } else { 4706 bdrv_refresh_filename(reopen_state->bs); 4707 error_setg(errp, "failed while preparing to reopen image '%s'", 4708 reopen_state->bs->filename); 4709 } 4710 goto error; 4711 } 4712 } else { 4713 /* It is currently mandatory to have a bdrv_reopen_prepare() 4714 * handler for each supported drv. */ 4715 error_setg(errp, "Block format '%s' used by node '%s' " 4716 "does not support reopening files", drv->format_name, 4717 bdrv_get_device_or_node_name(reopen_state->bs)); 4718 ret = -1; 4719 goto error; 4720 } 4721 4722 drv_prepared = true; 4723 4724 /* 4725 * We must provide the 'backing' option if the BDS has a backing 4726 * file or if the image file has a backing file name as part of 4727 * its metadata. Otherwise the 'backing' option can be omitted. 4728 */ 4729 if (drv->supports_backing && reopen_state->backing_missing && 4730 (reopen_state->bs->backing || reopen_state->bs->backing_file[0])) { 4731 error_setg(errp, "backing is missing for '%s'", 4732 reopen_state->bs->node_name); 4733 ret = -EINVAL; 4734 goto error; 4735 } 4736 4737 /* 4738 * Allow changing the 'backing' option. The new value can be 4739 * either a reference to an existing node (using its node name) 4740 * or NULL to simply detach the current backing file. 4741 */ 4742 ret = bdrv_reopen_parse_file_or_backing(reopen_state, true, 4743 change_child_tran, errp); 4744 if (ret < 0) { 4745 goto error; 4746 } 4747 qdict_del(reopen_state->options, "backing"); 4748 4749 /* Allow changing the 'file' option. In this case NULL is not allowed */ 4750 ret = bdrv_reopen_parse_file_or_backing(reopen_state, false, 4751 change_child_tran, errp); 4752 if (ret < 0) { 4753 goto error; 4754 } 4755 qdict_del(reopen_state->options, "file"); 4756 4757 /* Options that are not handled are only okay if they are unchanged 4758 * compared to the old state. It is expected that some options are only 4759 * used for the initial open, but not reopen (e.g. filename) */ 4760 if (qdict_size(reopen_state->options)) { 4761 const QDictEntry *entry = qdict_first(reopen_state->options); 4762 4763 do { 4764 QObject *new = entry->value; 4765 QObject *old = qdict_get(reopen_state->bs->options, entry->key); 4766 4767 /* Allow child references (child_name=node_name) as long as they 4768 * point to the current child (i.e. everything stays the same). */ 4769 if (qobject_type(new) == QTYPE_QSTRING) { 4770 BdrvChild *child; 4771 QLIST_FOREACH(child, &reopen_state->bs->children, next) { 4772 if (!strcmp(child->name, entry->key)) { 4773 break; 4774 } 4775 } 4776 4777 if (child) { 4778 if (!strcmp(child->bs->node_name, 4779 qstring_get_str(qobject_to(QString, new)))) { 4780 continue; /* Found child with this name, skip option */ 4781 } 4782 } 4783 } 4784 4785 /* 4786 * TODO: When using -drive to specify blockdev options, all values 4787 * will be strings; however, when using -blockdev, blockdev-add or 4788 * filenames using the json:{} pseudo-protocol, they will be 4789 * correctly typed. 4790 * In contrast, reopening options are (currently) always strings 4791 * (because you can only specify them through qemu-io; all other 4792 * callers do not specify any options). 4793 * Therefore, when using anything other than -drive to create a BDS, 4794 * this cannot detect non-string options as unchanged, because 4795 * qobject_is_equal() always returns false for objects of different 4796 * type. In the future, this should be remedied by correctly typing 4797 * all options. For now, this is not too big of an issue because 4798 * the user can simply omit options which cannot be changed anyway, 4799 * so they will stay unchanged. 4800 */ 4801 if (!qobject_is_equal(new, old)) { 4802 error_setg(errp, "Cannot change the option '%s'", entry->key); 4803 ret = -EINVAL; 4804 goto error; 4805 } 4806 } while ((entry = qdict_next(reopen_state->options, entry))); 4807 } 4808 4809 ret = 0; 4810 4811 /* Restore the original reopen_state->options QDict */ 4812 qobject_unref(reopen_state->options); 4813 reopen_state->options = qobject_ref(orig_reopen_opts); 4814 4815 error: 4816 if (ret < 0 && drv_prepared) { 4817 /* drv->bdrv_reopen_prepare() has succeeded, so we need to 4818 * call drv->bdrv_reopen_abort() before signaling an error 4819 * (bdrv_reopen_multiple() will not call bdrv_reopen_abort() 4820 * when the respective bdrv_reopen_prepare() has failed) */ 4821 if (drv->bdrv_reopen_abort) { 4822 drv->bdrv_reopen_abort(reopen_state); 4823 } 4824 } 4825 qemu_opts_del(opts); 4826 qobject_unref(orig_reopen_opts); 4827 g_free(discard); 4828 return ret; 4829 } 4830 4831 /* 4832 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 4833 * makes them final by swapping the staging BlockDriverState contents into 4834 * the active BlockDriverState contents. 4835 */ 4836 static void bdrv_reopen_commit(BDRVReopenState *reopen_state) 4837 { 4838 BlockDriver *drv; 4839 BlockDriverState *bs; 4840 BdrvChild *child; 4841 4842 assert(reopen_state != NULL); 4843 bs = reopen_state->bs; 4844 drv = bs->drv; 4845 assert(drv != NULL); 4846 GLOBAL_STATE_CODE(); 4847 4848 /* If there are any driver level actions to take */ 4849 if (drv->bdrv_reopen_commit) { 4850 drv->bdrv_reopen_commit(reopen_state); 4851 } 4852 4853 /* set BDS specific flags now */ 4854 qobject_unref(bs->explicit_options); 4855 qobject_unref(bs->options); 4856 qobject_ref(reopen_state->explicit_options); 4857 qobject_ref(reopen_state->options); 4858 4859 bs->explicit_options = reopen_state->explicit_options; 4860 bs->options = reopen_state->options; 4861 bs->open_flags = reopen_state->flags; 4862 bs->detect_zeroes = reopen_state->detect_zeroes; 4863 4864 /* Remove child references from bs->options and bs->explicit_options. 4865 * Child options were already removed in bdrv_reopen_queue_child() */ 4866 QLIST_FOREACH(child, &bs->children, next) { 4867 qdict_del(bs->explicit_options, child->name); 4868 qdict_del(bs->options, child->name); 4869 } 4870 /* backing is probably removed, so it's not handled by previous loop */ 4871 qdict_del(bs->explicit_options, "backing"); 4872 qdict_del(bs->options, "backing"); 4873 4874 bdrv_refresh_limits(bs, NULL, NULL); 4875 } 4876 4877 /* 4878 * Abort the reopen, and delete and free the staged changes in 4879 * reopen_state 4880 */ 4881 static void bdrv_reopen_abort(BDRVReopenState *reopen_state) 4882 { 4883 BlockDriver *drv; 4884 4885 assert(reopen_state != NULL); 4886 drv = reopen_state->bs->drv; 4887 assert(drv != NULL); 4888 GLOBAL_STATE_CODE(); 4889 4890 if (drv->bdrv_reopen_abort) { 4891 drv->bdrv_reopen_abort(reopen_state); 4892 } 4893 } 4894 4895 4896 static void bdrv_close(BlockDriverState *bs) 4897 { 4898 BdrvAioNotifier *ban, *ban_next; 4899 BdrvChild *child, *next; 4900 4901 GLOBAL_STATE_CODE(); 4902 assert(!bs->refcnt); 4903 4904 bdrv_drained_begin(bs); /* complete I/O */ 4905 bdrv_flush(bs); 4906 bdrv_drain(bs); /* in case flush left pending I/O */ 4907 4908 if (bs->drv) { 4909 if (bs->drv->bdrv_close) { 4910 /* Must unfreeze all children, so bdrv_unref_child() works */ 4911 bs->drv->bdrv_close(bs); 4912 } 4913 bs->drv = NULL; 4914 } 4915 4916 QLIST_FOREACH_SAFE(child, &bs->children, next, next) { 4917 bdrv_unref_child(bs, child); 4918 } 4919 4920 assert(!bs->backing); 4921 assert(!bs->file); 4922 g_free(bs->opaque); 4923 bs->opaque = NULL; 4924 qatomic_set(&bs->copy_on_read, 0); 4925 bs->backing_file[0] = '\0'; 4926 bs->backing_format[0] = '\0'; 4927 bs->total_sectors = 0; 4928 bs->encrypted = false; 4929 bs->sg = false; 4930 qobject_unref(bs->options); 4931 qobject_unref(bs->explicit_options); 4932 bs->options = NULL; 4933 bs->explicit_options = NULL; 4934 qobject_unref(bs->full_open_options); 4935 bs->full_open_options = NULL; 4936 g_free(bs->block_status_cache); 4937 bs->block_status_cache = NULL; 4938 4939 bdrv_release_named_dirty_bitmaps(bs); 4940 assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 4941 4942 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 4943 g_free(ban); 4944 } 4945 QLIST_INIT(&bs->aio_notifiers); 4946 bdrv_drained_end(bs); 4947 4948 /* 4949 * If we're still inside some bdrv_drain_all_begin()/end() sections, end 4950 * them now since this BDS won't exist anymore when bdrv_drain_all_end() 4951 * gets called. 4952 */ 4953 if (bs->quiesce_counter) { 4954 bdrv_drain_all_end_quiesce(bs); 4955 } 4956 } 4957 4958 void bdrv_close_all(void) 4959 { 4960 GLOBAL_STATE_CODE(); 4961 assert(job_next(NULL) == NULL); 4962 4963 /* Drop references from requests still in flight, such as canceled block 4964 * jobs whose AIO context has not been polled yet */ 4965 bdrv_drain_all(); 4966 4967 blk_remove_all_bs(); 4968 blockdev_close_all_bdrv_states(); 4969 4970 assert(QTAILQ_EMPTY(&all_bdrv_states)); 4971 } 4972 4973 static bool should_update_child(BdrvChild *c, BlockDriverState *to) 4974 { 4975 GQueue *queue; 4976 GHashTable *found; 4977 bool ret; 4978 4979 if (c->klass->stay_at_node) { 4980 return false; 4981 } 4982 4983 /* If the child @c belongs to the BDS @to, replacing the current 4984 * c->bs by @to would mean to create a loop. 4985 * 4986 * Such a case occurs when appending a BDS to a backing chain. 4987 * For instance, imagine the following chain: 4988 * 4989 * guest device -> node A -> further backing chain... 4990 * 4991 * Now we create a new BDS B which we want to put on top of this 4992 * chain, so we first attach A as its backing node: 4993 * 4994 * node B 4995 * | 4996 * v 4997 * guest device -> node A -> further backing chain... 4998 * 4999 * Finally we want to replace A by B. When doing that, we want to 5000 * replace all pointers to A by pointers to B -- except for the 5001 * pointer from B because (1) that would create a loop, and (2) 5002 * that pointer should simply stay intact: 5003 * 5004 * guest device -> node B 5005 * | 5006 * v 5007 * node A -> further backing chain... 5008 * 5009 * In general, when replacing a node A (c->bs) by a node B (@to), 5010 * if A is a child of B, that means we cannot replace A by B there 5011 * because that would create a loop. Silently detaching A from B 5012 * is also not really an option. So overall just leaving A in 5013 * place there is the most sensible choice. 5014 * 5015 * We would also create a loop in any cases where @c is only 5016 * indirectly referenced by @to. Prevent this by returning false 5017 * if @c is found (by breadth-first search) anywhere in the whole 5018 * subtree of @to. 5019 */ 5020 5021 ret = true; 5022 found = g_hash_table_new(NULL, NULL); 5023 g_hash_table_add(found, to); 5024 queue = g_queue_new(); 5025 g_queue_push_tail(queue, to); 5026 5027 while (!g_queue_is_empty(queue)) { 5028 BlockDriverState *v = g_queue_pop_head(queue); 5029 BdrvChild *c2; 5030 5031 QLIST_FOREACH(c2, &v->children, next) { 5032 if (c2 == c) { 5033 ret = false; 5034 break; 5035 } 5036 5037 if (g_hash_table_contains(found, c2->bs)) { 5038 continue; 5039 } 5040 5041 g_queue_push_tail(queue, c2->bs); 5042 g_hash_table_add(found, c2->bs); 5043 } 5044 } 5045 5046 g_queue_free(queue); 5047 g_hash_table_destroy(found); 5048 5049 return ret; 5050 } 5051 5052 static void bdrv_remove_child_commit(void *opaque) 5053 { 5054 GLOBAL_STATE_CODE(); 5055 bdrv_child_free(opaque); 5056 } 5057 5058 static TransactionActionDrv bdrv_remove_child_drv = { 5059 .commit = bdrv_remove_child_commit, 5060 }; 5061 5062 /* Function doesn't update permissions, caller is responsible for this. */ 5063 static void bdrv_remove_child(BdrvChild *child, Transaction *tran) 5064 { 5065 if (!child) { 5066 return; 5067 } 5068 5069 if (child->bs) { 5070 bdrv_replace_child_tran(child, NULL, tran); 5071 } 5072 5073 tran_add(tran, &bdrv_remove_child_drv, child); 5074 } 5075 5076 /* 5077 * A function to remove backing-chain child of @bs if exists: cow child for 5078 * format nodes (always .backing) and filter child for filters (may be .file or 5079 * .backing) 5080 */ 5081 static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, 5082 Transaction *tran) 5083 { 5084 bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran); 5085 } 5086 5087 static int bdrv_replace_node_noperm(BlockDriverState *from, 5088 BlockDriverState *to, 5089 bool auto_skip, Transaction *tran, 5090 Error **errp) 5091 { 5092 BdrvChild *c, *next; 5093 5094 GLOBAL_STATE_CODE(); 5095 5096 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { 5097 assert(c->bs == from); 5098 if (!should_update_child(c, to)) { 5099 if (auto_skip) { 5100 continue; 5101 } 5102 error_setg(errp, "Should not change '%s' link to '%s'", 5103 c->name, from->node_name); 5104 return -EINVAL; 5105 } 5106 if (c->frozen) { 5107 error_setg(errp, "Cannot change '%s' link to '%s'", 5108 c->name, from->node_name); 5109 return -EPERM; 5110 } 5111 bdrv_replace_child_tran(c, to, tran); 5112 } 5113 5114 return 0; 5115 } 5116 5117 /* 5118 * With auto_skip=true bdrv_replace_node_common skips updating from parents 5119 * if it creates a parent-child relation loop or if parent is block-job. 5120 * 5121 * With auto_skip=false the error is returned if from has a parent which should 5122 * not be updated. 5123 * 5124 * With @detach_subchain=true @to must be in a backing chain of @from. In this 5125 * case backing link of the cow-parent of @to is removed. 5126 */ 5127 static int bdrv_replace_node_common(BlockDriverState *from, 5128 BlockDriverState *to, 5129 bool auto_skip, bool detach_subchain, 5130 Error **errp) 5131 { 5132 Transaction *tran = tran_new(); 5133 g_autoptr(GHashTable) found = NULL; 5134 g_autoptr(GSList) refresh_list = NULL; 5135 BlockDriverState *to_cow_parent = NULL; 5136 int ret; 5137 5138 GLOBAL_STATE_CODE(); 5139 5140 if (detach_subchain) { 5141 assert(bdrv_chain_contains(from, to)); 5142 assert(from != to); 5143 for (to_cow_parent = from; 5144 bdrv_filter_or_cow_bs(to_cow_parent) != to; 5145 to_cow_parent = bdrv_filter_or_cow_bs(to_cow_parent)) 5146 { 5147 ; 5148 } 5149 } 5150 5151 /* Make sure that @from doesn't go away until we have successfully attached 5152 * all of its parents to @to. */ 5153 bdrv_ref(from); 5154 5155 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 5156 assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); 5157 bdrv_drained_begin(from); 5158 5159 /* 5160 * Do the replacement without permission update. 5161 * Replacement may influence the permissions, we should calculate new 5162 * permissions based on new graph. If we fail, we'll roll-back the 5163 * replacement. 5164 */ 5165 ret = bdrv_replace_node_noperm(from, to, auto_skip, tran, errp); 5166 if (ret < 0) { 5167 goto out; 5168 } 5169 5170 if (detach_subchain) { 5171 bdrv_remove_filter_or_cow_child(to_cow_parent, tran); 5172 } 5173 5174 found = g_hash_table_new(NULL, NULL); 5175 5176 refresh_list = bdrv_topological_dfs(refresh_list, found, to); 5177 refresh_list = bdrv_topological_dfs(refresh_list, found, from); 5178 5179 ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); 5180 if (ret < 0) { 5181 goto out; 5182 } 5183 5184 ret = 0; 5185 5186 out: 5187 tran_finalize(tran, ret); 5188 5189 bdrv_drained_end(from); 5190 bdrv_unref(from); 5191 5192 return ret; 5193 } 5194 5195 int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, 5196 Error **errp) 5197 { 5198 GLOBAL_STATE_CODE(); 5199 5200 return bdrv_replace_node_common(from, to, true, false, errp); 5201 } 5202 5203 int bdrv_drop_filter(BlockDriverState *bs, Error **errp) 5204 { 5205 GLOBAL_STATE_CODE(); 5206 5207 return bdrv_replace_node_common(bs, bdrv_filter_or_cow_bs(bs), true, true, 5208 errp); 5209 } 5210 5211 /* 5212 * Add new bs contents at the top of an image chain while the chain is 5213 * live, while keeping required fields on the top layer. 5214 * 5215 * This will modify the BlockDriverState fields, and swap contents 5216 * between bs_new and bs_top. Both bs_new and bs_top are modified. 5217 * 5218 * bs_new must not be attached to a BlockBackend and must not have backing 5219 * child. 5220 * 5221 * This function does not create any image files. 5222 */ 5223 int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, 5224 Error **errp) 5225 { 5226 int ret; 5227 BdrvChild *child; 5228 Transaction *tran = tran_new(); 5229 5230 GLOBAL_STATE_CODE(); 5231 5232 assert(!bs_new->backing); 5233 5234 child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", 5235 &child_of_bds, bdrv_backing_role(bs_new), 5236 tran, errp); 5237 if (!child) { 5238 ret = -EINVAL; 5239 goto out; 5240 } 5241 5242 ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); 5243 if (ret < 0) { 5244 goto out; 5245 } 5246 5247 ret = bdrv_refresh_perms(bs_new, errp); 5248 out: 5249 tran_finalize(tran, ret); 5250 5251 bdrv_refresh_limits(bs_top, NULL, NULL); 5252 5253 return ret; 5254 } 5255 5256 /* Not for empty child */ 5257 int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, 5258 Error **errp) 5259 { 5260 int ret; 5261 Transaction *tran = tran_new(); 5262 g_autoptr(GHashTable) found = NULL; 5263 g_autoptr(GSList) refresh_list = NULL; 5264 BlockDriverState *old_bs = child->bs; 5265 5266 GLOBAL_STATE_CODE(); 5267 5268 bdrv_ref(old_bs); 5269 bdrv_drained_begin(old_bs); 5270 bdrv_drained_begin(new_bs); 5271 5272 bdrv_replace_child_tran(child, new_bs, tran); 5273 5274 found = g_hash_table_new(NULL, NULL); 5275 refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); 5276 refresh_list = bdrv_topological_dfs(refresh_list, found, new_bs); 5277 5278 ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); 5279 5280 tran_finalize(tran, ret); 5281 5282 bdrv_drained_end(old_bs); 5283 bdrv_drained_end(new_bs); 5284 bdrv_unref(old_bs); 5285 5286 return ret; 5287 } 5288 5289 static void bdrv_delete(BlockDriverState *bs) 5290 { 5291 assert(bdrv_op_blocker_is_empty(bs)); 5292 assert(!bs->refcnt); 5293 GLOBAL_STATE_CODE(); 5294 5295 /* remove from list, if necessary */ 5296 if (bs->node_name[0] != '\0') { 5297 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 5298 } 5299 QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list); 5300 5301 bdrv_close(bs); 5302 5303 g_free(bs); 5304 } 5305 5306 5307 /* 5308 * Replace @bs by newly created block node. 5309 * 5310 * @options is a QDict of options to pass to the block drivers, or NULL for an 5311 * empty set of options. The reference to the QDict belongs to the block layer 5312 * after the call (even on failure), so if the caller intends to reuse the 5313 * dictionary, it needs to use qobject_ref() before calling bdrv_open. 5314 */ 5315 BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, 5316 int flags, Error **errp) 5317 { 5318 ERRP_GUARD(); 5319 int ret; 5320 BlockDriverState *new_node_bs = NULL; 5321 const char *drvname, *node_name; 5322 BlockDriver *drv; 5323 5324 drvname = qdict_get_try_str(options, "driver"); 5325 if (!drvname) { 5326 error_setg(errp, "driver is not specified"); 5327 goto fail; 5328 } 5329 5330 drv = bdrv_find_format(drvname); 5331 if (!drv) { 5332 error_setg(errp, "Unknown driver: '%s'", drvname); 5333 goto fail; 5334 } 5335 5336 node_name = qdict_get_try_str(options, "node-name"); 5337 5338 GLOBAL_STATE_CODE(); 5339 5340 new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags, 5341 errp); 5342 options = NULL; /* bdrv_new_open_driver() eats options */ 5343 if (!new_node_bs) { 5344 error_prepend(errp, "Could not create node: "); 5345 goto fail; 5346 } 5347 5348 bdrv_drained_begin(bs); 5349 ret = bdrv_replace_node(bs, new_node_bs, errp); 5350 bdrv_drained_end(bs); 5351 5352 if (ret < 0) { 5353 error_prepend(errp, "Could not replace node: "); 5354 goto fail; 5355 } 5356 5357 return new_node_bs; 5358 5359 fail: 5360 qobject_unref(options); 5361 bdrv_unref(new_node_bs); 5362 return NULL; 5363 } 5364 5365 /* 5366 * Run consistency checks on an image 5367 * 5368 * Returns 0 if the check could be completed (it doesn't mean that the image is 5369 * free of errors) or -errno when an internal error occurred. The results of the 5370 * check are stored in res. 5371 */ 5372 int coroutine_fn bdrv_co_check(BlockDriverState *bs, 5373 BdrvCheckResult *res, BdrvCheckMode fix) 5374 { 5375 IO_CODE(); 5376 if (bs->drv == NULL) { 5377 return -ENOMEDIUM; 5378 } 5379 if (bs->drv->bdrv_co_check == NULL) { 5380 return -ENOTSUP; 5381 } 5382 5383 memset(res, 0, sizeof(*res)); 5384 return bs->drv->bdrv_co_check(bs, res, fix); 5385 } 5386 5387 /* 5388 * Return values: 5389 * 0 - success 5390 * -EINVAL - backing format specified, but no file 5391 * -ENOSPC - can't update the backing file because no space is left in the 5392 * image file header 5393 * -ENOTSUP - format driver doesn't support changing the backing file 5394 */ 5395 int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, 5396 const char *backing_fmt, bool require) 5397 { 5398 BlockDriver *drv = bs->drv; 5399 int ret; 5400 5401 GLOBAL_STATE_CODE(); 5402 5403 if (!drv) { 5404 return -ENOMEDIUM; 5405 } 5406 5407 /* Backing file format doesn't make sense without a backing file */ 5408 if (backing_fmt && !backing_file) { 5409 return -EINVAL; 5410 } 5411 5412 if (require && backing_file && !backing_fmt) { 5413 return -EINVAL; 5414 } 5415 5416 if (drv->bdrv_change_backing_file != NULL) { 5417 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 5418 } else { 5419 ret = -ENOTSUP; 5420 } 5421 5422 if (ret == 0) { 5423 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 5424 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 5425 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), 5426 backing_file ?: ""); 5427 } 5428 return ret; 5429 } 5430 5431 /* 5432 * Finds the first non-filter node above bs in the chain between 5433 * active and bs. The returned node is either an immediate parent of 5434 * bs, or there are only filter nodes between the two. 5435 * 5436 * Returns NULL if bs is not found in active's image chain, 5437 * or if active == bs. 5438 * 5439 * Returns the bottommost base image if bs == NULL. 5440 */ 5441 BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 5442 BlockDriverState *bs) 5443 { 5444 5445 GLOBAL_STATE_CODE(); 5446 5447 bs = bdrv_skip_filters(bs); 5448 active = bdrv_skip_filters(active); 5449 5450 while (active) { 5451 BlockDriverState *next = bdrv_backing_chain_next(active); 5452 if (bs == next) { 5453 return active; 5454 } 5455 active = next; 5456 } 5457 5458 return NULL; 5459 } 5460 5461 /* Given a BDS, searches for the base layer. */ 5462 BlockDriverState *bdrv_find_base(BlockDriverState *bs) 5463 { 5464 GLOBAL_STATE_CODE(); 5465 5466 return bdrv_find_overlay(bs, NULL); 5467 } 5468 5469 /* 5470 * Return true if at least one of the COW (backing) and filter links 5471 * between @bs and @base is frozen. @errp is set if that's the case. 5472 * @base must be reachable from @bs, or NULL. 5473 */ 5474 bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, 5475 Error **errp) 5476 { 5477 BlockDriverState *i; 5478 BdrvChild *child; 5479 5480 GLOBAL_STATE_CODE(); 5481 5482 for (i = bs; i != base; i = child_bs(child)) { 5483 child = bdrv_filter_or_cow_child(i); 5484 5485 if (child && child->frozen) { 5486 error_setg(errp, "Cannot change '%s' link from '%s' to '%s'", 5487 child->name, i->node_name, child->bs->node_name); 5488 return true; 5489 } 5490 } 5491 5492 return false; 5493 } 5494 5495 /* 5496 * Freeze all COW (backing) and filter links between @bs and @base. 5497 * If any of the links is already frozen the operation is aborted and 5498 * none of the links are modified. 5499 * @base must be reachable from @bs, or NULL. 5500 * Returns 0 on success. On failure returns < 0 and sets @errp. 5501 */ 5502 int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, 5503 Error **errp) 5504 { 5505 BlockDriverState *i; 5506 BdrvChild *child; 5507 5508 GLOBAL_STATE_CODE(); 5509 5510 if (bdrv_is_backing_chain_frozen(bs, base, errp)) { 5511 return -EPERM; 5512 } 5513 5514 for (i = bs; i != base; i = child_bs(child)) { 5515 child = bdrv_filter_or_cow_child(i); 5516 if (child && child->bs->never_freeze) { 5517 error_setg(errp, "Cannot freeze '%s' link to '%s'", 5518 child->name, child->bs->node_name); 5519 return -EPERM; 5520 } 5521 } 5522 5523 for (i = bs; i != base; i = child_bs(child)) { 5524 child = bdrv_filter_or_cow_child(i); 5525 if (child) { 5526 child->frozen = true; 5527 } 5528 } 5529 5530 return 0; 5531 } 5532 5533 /* 5534 * Unfreeze all COW (backing) and filter links between @bs and @base. 5535 * The caller must ensure that all links are frozen before using this 5536 * function. 5537 * @base must be reachable from @bs, or NULL. 5538 */ 5539 void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base) 5540 { 5541 BlockDriverState *i; 5542 BdrvChild *child; 5543 5544 GLOBAL_STATE_CODE(); 5545 5546 for (i = bs; i != base; i = child_bs(child)) { 5547 child = bdrv_filter_or_cow_child(i); 5548 if (child) { 5549 assert(child->frozen); 5550 child->frozen = false; 5551 } 5552 } 5553 } 5554 5555 /* 5556 * Drops images above 'base' up to and including 'top', and sets the image 5557 * above 'top' to have base as its backing file. 5558 * 5559 * Requires that the overlay to 'top' is opened r/w, so that the backing file 5560 * information in 'bs' can be properly updated. 5561 * 5562 * E.g., this will convert the following chain: 5563 * bottom <- base <- intermediate <- top <- active 5564 * 5565 * to 5566 * 5567 * bottom <- base <- active 5568 * 5569 * It is allowed for bottom==base, in which case it converts: 5570 * 5571 * base <- intermediate <- top <- active 5572 * 5573 * to 5574 * 5575 * base <- active 5576 * 5577 * If backing_file_str is non-NULL, it will be used when modifying top's 5578 * overlay image metadata. 5579 * 5580 * Error conditions: 5581 * if active == top, that is considered an error 5582 * 5583 */ 5584 int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, 5585 const char *backing_file_str) 5586 { 5587 BlockDriverState *explicit_top = top; 5588 bool update_inherits_from; 5589 BdrvChild *c; 5590 Error *local_err = NULL; 5591 int ret = -EIO; 5592 g_autoptr(GSList) updated_children = NULL; 5593 GSList *p; 5594 5595 GLOBAL_STATE_CODE(); 5596 5597 bdrv_ref(top); 5598 bdrv_subtree_drained_begin(top); 5599 5600 if (!top->drv || !base->drv) { 5601 goto exit; 5602 } 5603 5604 /* Make sure that base is in the backing chain of top */ 5605 if (!bdrv_chain_contains(top, base)) { 5606 goto exit; 5607 } 5608 5609 /* If 'base' recursively inherits from 'top' then we should set 5610 * base->inherits_from to top->inherits_from after 'top' and all 5611 * other intermediate nodes have been dropped. 5612 * If 'top' is an implicit node (e.g. "commit_top") we should skip 5613 * it because no one inherits from it. We use explicit_top for that. */ 5614 explicit_top = bdrv_skip_implicit_filters(explicit_top); 5615 update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top); 5616 5617 /* success - we can delete the intermediate states, and link top->base */ 5618 if (!backing_file_str) { 5619 bdrv_refresh_filename(base); 5620 backing_file_str = base->filename; 5621 } 5622 5623 QLIST_FOREACH(c, &top->parents, next_parent) { 5624 updated_children = g_slist_prepend(updated_children, c); 5625 } 5626 5627 /* 5628 * It seems correct to pass detach_subchain=true here, but it triggers 5629 * one more yet not fixed bug, when due to nested aio_poll loop we switch to 5630 * another drained section, which modify the graph (for example, removing 5631 * the child, which we keep in updated_children list). So, it's a TODO. 5632 * 5633 * Note, bug triggered if pass detach_subchain=true here and run 5634 * test-bdrv-drain. test_drop_intermediate_poll() test-case will crash. 5635 * That's a FIXME. 5636 */ 5637 bdrv_replace_node_common(top, base, false, false, &local_err); 5638 if (local_err) { 5639 error_report_err(local_err); 5640 goto exit; 5641 } 5642 5643 for (p = updated_children; p; p = p->next) { 5644 c = p->data; 5645 5646 if (c->klass->update_filename) { 5647 ret = c->klass->update_filename(c, base, backing_file_str, 5648 &local_err); 5649 if (ret < 0) { 5650 /* 5651 * TODO: Actually, we want to rollback all previous iterations 5652 * of this loop, and (which is almost impossible) previous 5653 * bdrv_replace_node()... 5654 * 5655 * Note, that c->klass->update_filename may lead to permission 5656 * update, so it's a bad idea to call it inside permission 5657 * update transaction of bdrv_replace_node. 5658 */ 5659 error_report_err(local_err); 5660 goto exit; 5661 } 5662 } 5663 } 5664 5665 if (update_inherits_from) { 5666 base->inherits_from = explicit_top->inherits_from; 5667 } 5668 5669 ret = 0; 5670 exit: 5671 bdrv_subtree_drained_end(top); 5672 bdrv_unref(top); 5673 return ret; 5674 } 5675 5676 /** 5677 * Implementation of BlockDriver.bdrv_get_allocated_file_size() that 5678 * sums the size of all data-bearing children. (This excludes backing 5679 * children.) 5680 */ 5681 static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs) 5682 { 5683 BdrvChild *child; 5684 int64_t child_size, sum = 0; 5685 5686 QLIST_FOREACH(child, &bs->children, next) { 5687 if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | 5688 BDRV_CHILD_FILTERED)) 5689 { 5690 child_size = bdrv_get_allocated_file_size(child->bs); 5691 if (child_size < 0) { 5692 return child_size; 5693 } 5694 sum += child_size; 5695 } 5696 } 5697 5698 return sum; 5699 } 5700 5701 /** 5702 * Length of a allocated file in bytes. Sparse files are counted by actual 5703 * allocated space. Return < 0 if error or unknown. 5704 */ 5705 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 5706 { 5707 BlockDriver *drv = bs->drv; 5708 IO_CODE(); 5709 5710 if (!drv) { 5711 return -ENOMEDIUM; 5712 } 5713 if (drv->bdrv_get_allocated_file_size) { 5714 return drv->bdrv_get_allocated_file_size(bs); 5715 } 5716 5717 if (drv->bdrv_file_open) { 5718 /* 5719 * Protocol drivers default to -ENOTSUP (most of their data is 5720 * not stored in any of their children (if they even have any), 5721 * so there is no generic way to figure it out). 5722 */ 5723 return -ENOTSUP; 5724 } else if (drv->is_filter) { 5725 /* Filter drivers default to the size of their filtered child */ 5726 return bdrv_get_allocated_file_size(bdrv_filter_bs(bs)); 5727 } else { 5728 /* Other drivers default to summing their children's sizes */ 5729 return bdrv_sum_allocated_file_size(bs); 5730 } 5731 } 5732 5733 /* 5734 * bdrv_measure: 5735 * @drv: Format driver 5736 * @opts: Creation options for new image 5737 * @in_bs: Existing image containing data for new image (may be NULL) 5738 * @errp: Error object 5739 * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo()) 5740 * or NULL on error 5741 * 5742 * Calculate file size required to create a new image. 5743 * 5744 * If @in_bs is given then space for allocated clusters and zero clusters 5745 * from that image are included in the calculation. If @opts contains a 5746 * backing file that is shared by @in_bs then backing clusters may be omitted 5747 * from the calculation. 5748 * 5749 * If @in_bs is NULL then the calculation includes no allocated clusters 5750 * unless a preallocation option is given in @opts. 5751 * 5752 * Note that @in_bs may use a different BlockDriver from @drv. 5753 * 5754 * If an error occurs the @errp pointer is set. 5755 */ 5756 BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, 5757 BlockDriverState *in_bs, Error **errp) 5758 { 5759 IO_CODE(); 5760 if (!drv->bdrv_measure) { 5761 error_setg(errp, "Block driver '%s' does not support size measurement", 5762 drv->format_name); 5763 return NULL; 5764 } 5765 5766 return drv->bdrv_measure(opts, in_bs, errp); 5767 } 5768 5769 /** 5770 * Return number of sectors on success, -errno on error. 5771 */ 5772 int64_t bdrv_nb_sectors(BlockDriverState *bs) 5773 { 5774 BlockDriver *drv = bs->drv; 5775 IO_CODE(); 5776 5777 if (!drv) 5778 return -ENOMEDIUM; 5779 5780 if (drv->has_variable_length) { 5781 int ret = refresh_total_sectors(bs, bs->total_sectors); 5782 if (ret < 0) { 5783 return ret; 5784 } 5785 } 5786 return bs->total_sectors; 5787 } 5788 5789 /** 5790 * Return length in bytes on success, -errno on error. 5791 * The length is always a multiple of BDRV_SECTOR_SIZE. 5792 */ 5793 int64_t bdrv_getlength(BlockDriverState *bs) 5794 { 5795 int64_t ret = bdrv_nb_sectors(bs); 5796 IO_CODE(); 5797 5798 if (ret < 0) { 5799 return ret; 5800 } 5801 if (ret > INT64_MAX / BDRV_SECTOR_SIZE) { 5802 return -EFBIG; 5803 } 5804 return ret * BDRV_SECTOR_SIZE; 5805 } 5806 5807 /* return 0 as number of sectors if no device present or error */ 5808 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 5809 { 5810 int64_t nb_sectors = bdrv_nb_sectors(bs); 5811 IO_CODE(); 5812 5813 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 5814 } 5815 5816 bool bdrv_is_sg(BlockDriverState *bs) 5817 { 5818 IO_CODE(); 5819 return bs->sg; 5820 } 5821 5822 /** 5823 * Return whether the given node supports compressed writes. 5824 */ 5825 bool bdrv_supports_compressed_writes(BlockDriverState *bs) 5826 { 5827 BlockDriverState *filtered; 5828 IO_CODE(); 5829 5830 if (!bs->drv || !block_driver_can_compress(bs->drv)) { 5831 return false; 5832 } 5833 5834 filtered = bdrv_filter_bs(bs); 5835 if (filtered) { 5836 /* 5837 * Filters can only forward compressed writes, so we have to 5838 * check the child. 5839 */ 5840 return bdrv_supports_compressed_writes(filtered); 5841 } 5842 5843 return true; 5844 } 5845 5846 const char *bdrv_get_format_name(BlockDriverState *bs) 5847 { 5848 IO_CODE(); 5849 return bs->drv ? bs->drv->format_name : NULL; 5850 } 5851 5852 static int qsort_strcmp(const void *a, const void *b) 5853 { 5854 return strcmp(*(char *const *)a, *(char *const *)b); 5855 } 5856 5857 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 5858 void *opaque, bool read_only) 5859 { 5860 BlockDriver *drv; 5861 int count = 0; 5862 int i; 5863 const char **formats = NULL; 5864 5865 GLOBAL_STATE_CODE(); 5866 5867 QLIST_FOREACH(drv, &bdrv_drivers, list) { 5868 if (drv->format_name) { 5869 bool found = false; 5870 int i = count; 5871 5872 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, read_only)) { 5873 continue; 5874 } 5875 5876 while (formats && i && !found) { 5877 found = !strcmp(formats[--i], drv->format_name); 5878 } 5879 5880 if (!found) { 5881 formats = g_renew(const char *, formats, count + 1); 5882 formats[count++] = drv->format_name; 5883 } 5884 } 5885 } 5886 5887 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) { 5888 const char *format_name = block_driver_modules[i].format_name; 5889 5890 if (format_name) { 5891 bool found = false; 5892 int j = count; 5893 5894 if (use_bdrv_whitelist && 5895 !bdrv_format_is_whitelisted(format_name, read_only)) { 5896 continue; 5897 } 5898 5899 while (formats && j && !found) { 5900 found = !strcmp(formats[--j], format_name); 5901 } 5902 5903 if (!found) { 5904 formats = g_renew(const char *, formats, count + 1); 5905 formats[count++] = format_name; 5906 } 5907 } 5908 } 5909 5910 qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 5911 5912 for (i = 0; i < count; i++) { 5913 it(opaque, formats[i]); 5914 } 5915 5916 g_free(formats); 5917 } 5918 5919 /* This function is to find a node in the bs graph */ 5920 BlockDriverState *bdrv_find_node(const char *node_name) 5921 { 5922 BlockDriverState *bs; 5923 5924 assert(node_name); 5925 GLOBAL_STATE_CODE(); 5926 5927 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 5928 if (!strcmp(node_name, bs->node_name)) { 5929 return bs; 5930 } 5931 } 5932 return NULL; 5933 } 5934 5935 /* Put this QMP function here so it can access the static graph_bdrv_states. */ 5936 BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, 5937 Error **errp) 5938 { 5939 BlockDeviceInfoList *list; 5940 BlockDriverState *bs; 5941 5942 GLOBAL_STATE_CODE(); 5943 5944 list = NULL; 5945 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 5946 BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp); 5947 if (!info) { 5948 qapi_free_BlockDeviceInfoList(list); 5949 return NULL; 5950 } 5951 QAPI_LIST_PREPEND(list, info); 5952 } 5953 5954 return list; 5955 } 5956 5957 typedef struct XDbgBlockGraphConstructor { 5958 XDbgBlockGraph *graph; 5959 GHashTable *graph_nodes; 5960 } XDbgBlockGraphConstructor; 5961 5962 static XDbgBlockGraphConstructor *xdbg_graph_new(void) 5963 { 5964 XDbgBlockGraphConstructor *gr = g_new(XDbgBlockGraphConstructor, 1); 5965 5966 gr->graph = g_new0(XDbgBlockGraph, 1); 5967 gr->graph_nodes = g_hash_table_new(NULL, NULL); 5968 5969 return gr; 5970 } 5971 5972 static XDbgBlockGraph *xdbg_graph_finalize(XDbgBlockGraphConstructor *gr) 5973 { 5974 XDbgBlockGraph *graph = gr->graph; 5975 5976 g_hash_table_destroy(gr->graph_nodes); 5977 g_free(gr); 5978 5979 return graph; 5980 } 5981 5982 static uintptr_t xdbg_graph_node_num(XDbgBlockGraphConstructor *gr, void *node) 5983 { 5984 uintptr_t ret = (uintptr_t)g_hash_table_lookup(gr->graph_nodes, node); 5985 5986 if (ret != 0) { 5987 return ret; 5988 } 5989 5990 /* 5991 * Start counting from 1, not 0, because 0 interferes with not-found (NULL) 5992 * answer of g_hash_table_lookup. 5993 */ 5994 ret = g_hash_table_size(gr->graph_nodes) + 1; 5995 g_hash_table_insert(gr->graph_nodes, node, (void *)ret); 5996 5997 return ret; 5998 } 5999 6000 static void xdbg_graph_add_node(XDbgBlockGraphConstructor *gr, void *node, 6001 XDbgBlockGraphNodeType type, const char *name) 6002 { 6003 XDbgBlockGraphNode *n; 6004 6005 n = g_new0(XDbgBlockGraphNode, 1); 6006 6007 n->id = xdbg_graph_node_num(gr, node); 6008 n->type = type; 6009 n->name = g_strdup(name); 6010 6011 QAPI_LIST_PREPEND(gr->graph->nodes, n); 6012 } 6013 6014 static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent, 6015 const BdrvChild *child) 6016 { 6017 BlockPermission qapi_perm; 6018 XDbgBlockGraphEdge *edge; 6019 GLOBAL_STATE_CODE(); 6020 6021 edge = g_new0(XDbgBlockGraphEdge, 1); 6022 6023 edge->parent = xdbg_graph_node_num(gr, parent); 6024 edge->child = xdbg_graph_node_num(gr, child->bs); 6025 edge->name = g_strdup(child->name); 6026 6027 for (qapi_perm = 0; qapi_perm < BLOCK_PERMISSION__MAX; qapi_perm++) { 6028 uint64_t flag = bdrv_qapi_perm_to_blk_perm(qapi_perm); 6029 6030 if (flag & child->perm) { 6031 QAPI_LIST_PREPEND(edge->perm, qapi_perm); 6032 } 6033 if (flag & child->shared_perm) { 6034 QAPI_LIST_PREPEND(edge->shared_perm, qapi_perm); 6035 } 6036 } 6037 6038 QAPI_LIST_PREPEND(gr->graph->edges, edge); 6039 } 6040 6041 6042 XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp) 6043 { 6044 BlockBackend *blk; 6045 BlockJob *job; 6046 BlockDriverState *bs; 6047 BdrvChild *child; 6048 XDbgBlockGraphConstructor *gr = xdbg_graph_new(); 6049 6050 GLOBAL_STATE_CODE(); 6051 6052 for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { 6053 char *allocated_name = NULL; 6054 const char *name = blk_name(blk); 6055 6056 if (!*name) { 6057 name = allocated_name = blk_get_attached_dev_id(blk); 6058 } 6059 xdbg_graph_add_node(gr, blk, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND, 6060 name); 6061 g_free(allocated_name); 6062 if (blk_root(blk)) { 6063 xdbg_graph_add_edge(gr, blk, blk_root(blk)); 6064 } 6065 } 6066 6067 WITH_JOB_LOCK_GUARD() { 6068 for (job = block_job_next_locked(NULL); job; 6069 job = block_job_next_locked(job)) { 6070 GSList *el; 6071 6072 xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB, 6073 job->job.id); 6074 for (el = job->nodes; el; el = el->next) { 6075 xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data); 6076 } 6077 } 6078 } 6079 6080 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 6081 xdbg_graph_add_node(gr, bs, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_DRIVER, 6082 bs->node_name); 6083 QLIST_FOREACH(child, &bs->children, next) { 6084 xdbg_graph_add_edge(gr, bs, child); 6085 } 6086 } 6087 6088 return xdbg_graph_finalize(gr); 6089 } 6090 6091 BlockDriverState *bdrv_lookup_bs(const char *device, 6092 const char *node_name, 6093 Error **errp) 6094 { 6095 BlockBackend *blk; 6096 BlockDriverState *bs; 6097 6098 GLOBAL_STATE_CODE(); 6099 6100 if (device) { 6101 blk = blk_by_name(device); 6102 6103 if (blk) { 6104 bs = blk_bs(blk); 6105 if (!bs) { 6106 error_setg(errp, "Device '%s' has no medium", device); 6107 } 6108 6109 return bs; 6110 } 6111 } 6112 6113 if (node_name) { 6114 bs = bdrv_find_node(node_name); 6115 6116 if (bs) { 6117 return bs; 6118 } 6119 } 6120 6121 error_setg(errp, "Cannot find device=\'%s\' nor node-name=\'%s\'", 6122 device ? device : "", 6123 node_name ? node_name : ""); 6124 return NULL; 6125 } 6126 6127 /* If 'base' is in the same chain as 'top', return true. Otherwise, 6128 * return false. If either argument is NULL, return false. */ 6129 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 6130 { 6131 6132 GLOBAL_STATE_CODE(); 6133 6134 while (top && top != base) { 6135 top = bdrv_filter_or_cow_bs(top); 6136 } 6137 6138 return top != NULL; 6139 } 6140 6141 BlockDriverState *bdrv_next_node(BlockDriverState *bs) 6142 { 6143 GLOBAL_STATE_CODE(); 6144 if (!bs) { 6145 return QTAILQ_FIRST(&graph_bdrv_states); 6146 } 6147 return QTAILQ_NEXT(bs, node_list); 6148 } 6149 6150 BlockDriverState *bdrv_next_all_states(BlockDriverState *bs) 6151 { 6152 GLOBAL_STATE_CODE(); 6153 if (!bs) { 6154 return QTAILQ_FIRST(&all_bdrv_states); 6155 } 6156 return QTAILQ_NEXT(bs, bs_list); 6157 } 6158 6159 const char *bdrv_get_node_name(const BlockDriverState *bs) 6160 { 6161 IO_CODE(); 6162 return bs->node_name; 6163 } 6164 6165 const char *bdrv_get_parent_name(const BlockDriverState *bs) 6166 { 6167 BdrvChild *c; 6168 const char *name; 6169 IO_CODE(); 6170 6171 /* If multiple parents have a name, just pick the first one. */ 6172 QLIST_FOREACH(c, &bs->parents, next_parent) { 6173 if (c->klass->get_name) { 6174 name = c->klass->get_name(c); 6175 if (name && *name) { 6176 return name; 6177 } 6178 } 6179 } 6180 6181 return NULL; 6182 } 6183 6184 /* TODO check what callers really want: bs->node_name or blk_name() */ 6185 const char *bdrv_get_device_name(const BlockDriverState *bs) 6186 { 6187 IO_CODE(); 6188 return bdrv_get_parent_name(bs) ?: ""; 6189 } 6190 6191 /* This can be used to identify nodes that might not have a device 6192 * name associated. Since node and device names live in the same 6193 * namespace, the result is unambiguous. The exception is if both are 6194 * absent, then this returns an empty (non-null) string. */ 6195 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 6196 { 6197 IO_CODE(); 6198 return bdrv_get_parent_name(bs) ?: bs->node_name; 6199 } 6200 6201 int bdrv_get_flags(BlockDriverState *bs) 6202 { 6203 IO_CODE(); 6204 return bs->open_flags; 6205 } 6206 6207 int bdrv_has_zero_init_1(BlockDriverState *bs) 6208 { 6209 GLOBAL_STATE_CODE(); 6210 return 1; 6211 } 6212 6213 int bdrv_has_zero_init(BlockDriverState *bs) 6214 { 6215 BlockDriverState *filtered; 6216 GLOBAL_STATE_CODE(); 6217 6218 if (!bs->drv) { 6219 return 0; 6220 } 6221 6222 /* If BS is a copy on write image, it is initialized to 6223 the contents of the base image, which may not be zeroes. */ 6224 if (bdrv_cow_child(bs)) { 6225 return 0; 6226 } 6227 if (bs->drv->bdrv_has_zero_init) { 6228 return bs->drv->bdrv_has_zero_init(bs); 6229 } 6230 6231 filtered = bdrv_filter_bs(bs); 6232 if (filtered) { 6233 return bdrv_has_zero_init(filtered); 6234 } 6235 6236 /* safe default */ 6237 return 0; 6238 } 6239 6240 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 6241 { 6242 IO_CODE(); 6243 if (!(bs->open_flags & BDRV_O_UNMAP)) { 6244 return false; 6245 } 6246 6247 return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP; 6248 } 6249 6250 void bdrv_get_backing_filename(BlockDriverState *bs, 6251 char *filename, int filename_size) 6252 { 6253 IO_CODE(); 6254 pstrcpy(filename, filename_size, bs->backing_file); 6255 } 6256 6257 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 6258 { 6259 int ret; 6260 BlockDriver *drv = bs->drv; 6261 IO_CODE(); 6262 /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ 6263 if (!drv) { 6264 return -ENOMEDIUM; 6265 } 6266 if (!drv->bdrv_get_info) { 6267 BlockDriverState *filtered = bdrv_filter_bs(bs); 6268 if (filtered) { 6269 return bdrv_get_info(filtered, bdi); 6270 } 6271 return -ENOTSUP; 6272 } 6273 memset(bdi, 0, sizeof(*bdi)); 6274 ret = drv->bdrv_get_info(bs, bdi); 6275 if (ret < 0) { 6276 return ret; 6277 } 6278 6279 if (bdi->cluster_size > BDRV_MAX_ALIGNMENT) { 6280 return -EINVAL; 6281 } 6282 6283 return 0; 6284 } 6285 6286 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, 6287 Error **errp) 6288 { 6289 BlockDriver *drv = bs->drv; 6290 IO_CODE(); 6291 if (drv && drv->bdrv_get_specific_info) { 6292 return drv->bdrv_get_specific_info(bs, errp); 6293 } 6294 return NULL; 6295 } 6296 6297 BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs) 6298 { 6299 BlockDriver *drv = bs->drv; 6300 IO_CODE(); 6301 if (!drv || !drv->bdrv_get_specific_stats) { 6302 return NULL; 6303 } 6304 return drv->bdrv_get_specific_stats(bs); 6305 } 6306 6307 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event) 6308 { 6309 IO_CODE(); 6310 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 6311 return; 6312 } 6313 6314 bs->drv->bdrv_debug_event(bs, event); 6315 } 6316 6317 static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs) 6318 { 6319 GLOBAL_STATE_CODE(); 6320 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 6321 bs = bdrv_primary_bs(bs); 6322 } 6323 6324 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 6325 assert(bs->drv->bdrv_debug_remove_breakpoint); 6326 return bs; 6327 } 6328 6329 return NULL; 6330 } 6331 6332 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 6333 const char *tag) 6334 { 6335 GLOBAL_STATE_CODE(); 6336 bs = bdrv_find_debug_node(bs); 6337 if (bs) { 6338 return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 6339 } 6340 6341 return -ENOTSUP; 6342 } 6343 6344 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 6345 { 6346 GLOBAL_STATE_CODE(); 6347 bs = bdrv_find_debug_node(bs); 6348 if (bs) { 6349 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 6350 } 6351 6352 return -ENOTSUP; 6353 } 6354 6355 int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 6356 { 6357 GLOBAL_STATE_CODE(); 6358 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 6359 bs = bdrv_primary_bs(bs); 6360 } 6361 6362 if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 6363 return bs->drv->bdrv_debug_resume(bs, tag); 6364 } 6365 6366 return -ENOTSUP; 6367 } 6368 6369 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 6370 { 6371 GLOBAL_STATE_CODE(); 6372 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 6373 bs = bdrv_primary_bs(bs); 6374 } 6375 6376 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 6377 return bs->drv->bdrv_debug_is_suspended(bs, tag); 6378 } 6379 6380 return false; 6381 } 6382 6383 /* backing_file can either be relative, or absolute, or a protocol. If it is 6384 * relative, it must be relative to the chain. So, passing in bs->filename 6385 * from a BDS as backing_file should not be done, as that may be relative to 6386 * the CWD rather than the chain. */ 6387 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 6388 const char *backing_file) 6389 { 6390 char *filename_full = NULL; 6391 char *backing_file_full = NULL; 6392 char *filename_tmp = NULL; 6393 int is_protocol = 0; 6394 bool filenames_refreshed = false; 6395 BlockDriverState *curr_bs = NULL; 6396 BlockDriverState *retval = NULL; 6397 BlockDriverState *bs_below; 6398 6399 GLOBAL_STATE_CODE(); 6400 6401 if (!bs || !bs->drv || !backing_file) { 6402 return NULL; 6403 } 6404 6405 filename_full = g_malloc(PATH_MAX); 6406 backing_file_full = g_malloc(PATH_MAX); 6407 6408 is_protocol = path_has_protocol(backing_file); 6409 6410 /* 6411 * Being largely a legacy function, skip any filters here 6412 * (because filters do not have normal filenames, so they cannot 6413 * match anyway; and allowing json:{} filenames is a bit out of 6414 * scope). 6415 */ 6416 for (curr_bs = bdrv_skip_filters(bs); 6417 bdrv_cow_child(curr_bs) != NULL; 6418 curr_bs = bs_below) 6419 { 6420 bs_below = bdrv_backing_chain_next(curr_bs); 6421 6422 if (bdrv_backing_overridden(curr_bs)) { 6423 /* 6424 * If the backing file was overridden, we can only compare 6425 * directly against the backing node's filename. 6426 */ 6427 6428 if (!filenames_refreshed) { 6429 /* 6430 * This will automatically refresh all of the 6431 * filenames in the rest of the backing chain, so we 6432 * only need to do this once. 6433 */ 6434 bdrv_refresh_filename(bs_below); 6435 filenames_refreshed = true; 6436 } 6437 6438 if (strcmp(backing_file, bs_below->filename) == 0) { 6439 retval = bs_below; 6440 break; 6441 } 6442 } else if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 6443 /* 6444 * If either of the filename paths is actually a protocol, then 6445 * compare unmodified paths; otherwise make paths relative. 6446 */ 6447 char *backing_file_full_ret; 6448 6449 if (strcmp(backing_file, curr_bs->backing_file) == 0) { 6450 retval = bs_below; 6451 break; 6452 } 6453 /* Also check against the full backing filename for the image */ 6454 backing_file_full_ret = bdrv_get_full_backing_filename(curr_bs, 6455 NULL); 6456 if (backing_file_full_ret) { 6457 bool equal = strcmp(backing_file, backing_file_full_ret) == 0; 6458 g_free(backing_file_full_ret); 6459 if (equal) { 6460 retval = bs_below; 6461 break; 6462 } 6463 } 6464 } else { 6465 /* If not an absolute filename path, make it relative to the current 6466 * image's filename path */ 6467 filename_tmp = bdrv_make_absolute_filename(curr_bs, backing_file, 6468 NULL); 6469 /* We are going to compare canonicalized absolute pathnames */ 6470 if (!filename_tmp || !realpath(filename_tmp, filename_full)) { 6471 g_free(filename_tmp); 6472 continue; 6473 } 6474 g_free(filename_tmp); 6475 6476 /* We need to make sure the backing filename we are comparing against 6477 * is relative to the current image filename (or absolute) */ 6478 filename_tmp = bdrv_get_full_backing_filename(curr_bs, NULL); 6479 if (!filename_tmp || !realpath(filename_tmp, backing_file_full)) { 6480 g_free(filename_tmp); 6481 continue; 6482 } 6483 g_free(filename_tmp); 6484 6485 if (strcmp(backing_file_full, filename_full) == 0) { 6486 retval = bs_below; 6487 break; 6488 } 6489 } 6490 } 6491 6492 g_free(filename_full); 6493 g_free(backing_file_full); 6494 return retval; 6495 } 6496 6497 void bdrv_init(void) 6498 { 6499 #ifdef CONFIG_BDRV_WHITELIST_TOOLS 6500 use_bdrv_whitelist = 1; 6501 #endif 6502 module_call_init(MODULE_INIT_BLOCK); 6503 } 6504 6505 void bdrv_init_with_whitelist(void) 6506 { 6507 use_bdrv_whitelist = 1; 6508 bdrv_init(); 6509 } 6510 6511 int bdrv_activate(BlockDriverState *bs, Error **errp) 6512 { 6513 BdrvChild *child, *parent; 6514 Error *local_err = NULL; 6515 int ret; 6516 BdrvDirtyBitmap *bm; 6517 6518 GLOBAL_STATE_CODE(); 6519 6520 if (!bs->drv) { 6521 return -ENOMEDIUM; 6522 } 6523 6524 QLIST_FOREACH(child, &bs->children, next) { 6525 bdrv_activate(child->bs, &local_err); 6526 if (local_err) { 6527 error_propagate(errp, local_err); 6528 return -EINVAL; 6529 } 6530 } 6531 6532 /* 6533 * Update permissions, they may differ for inactive nodes. 6534 * 6535 * Note that the required permissions of inactive images are always a 6536 * subset of the permissions required after activating the image. This 6537 * allows us to just get the permissions upfront without restricting 6538 * bdrv_co_invalidate_cache(). 6539 * 6540 * It also means that in error cases, we don't have to try and revert to 6541 * the old permissions (which is an operation that could fail, too). We can 6542 * just keep the extended permissions for the next time that an activation 6543 * of the image is tried. 6544 */ 6545 if (bs->open_flags & BDRV_O_INACTIVE) { 6546 bs->open_flags &= ~BDRV_O_INACTIVE; 6547 ret = bdrv_refresh_perms(bs, errp); 6548 if (ret < 0) { 6549 bs->open_flags |= BDRV_O_INACTIVE; 6550 return ret; 6551 } 6552 6553 ret = bdrv_invalidate_cache(bs, errp); 6554 if (ret < 0) { 6555 bs->open_flags |= BDRV_O_INACTIVE; 6556 return ret; 6557 } 6558 6559 FOR_EACH_DIRTY_BITMAP(bs, bm) { 6560 bdrv_dirty_bitmap_skip_store(bm, false); 6561 } 6562 6563 ret = refresh_total_sectors(bs, bs->total_sectors); 6564 if (ret < 0) { 6565 bs->open_flags |= BDRV_O_INACTIVE; 6566 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 6567 return ret; 6568 } 6569 } 6570 6571 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6572 if (parent->klass->activate) { 6573 parent->klass->activate(parent, &local_err); 6574 if (local_err) { 6575 bs->open_flags |= BDRV_O_INACTIVE; 6576 error_propagate(errp, local_err); 6577 return -EINVAL; 6578 } 6579 } 6580 } 6581 6582 return 0; 6583 } 6584 6585 int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) 6586 { 6587 Error *local_err = NULL; 6588 IO_CODE(); 6589 6590 assert(!(bs->open_flags & BDRV_O_INACTIVE)); 6591 6592 if (bs->drv->bdrv_co_invalidate_cache) { 6593 bs->drv->bdrv_co_invalidate_cache(bs, &local_err); 6594 if (local_err) { 6595 error_propagate(errp, local_err); 6596 return -EINVAL; 6597 } 6598 } 6599 6600 return 0; 6601 } 6602 6603 void bdrv_activate_all(Error **errp) 6604 { 6605 BlockDriverState *bs; 6606 BdrvNextIterator it; 6607 6608 GLOBAL_STATE_CODE(); 6609 6610 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 6611 AioContext *aio_context = bdrv_get_aio_context(bs); 6612 int ret; 6613 6614 aio_context_acquire(aio_context); 6615 ret = bdrv_activate(bs, errp); 6616 aio_context_release(aio_context); 6617 if (ret < 0) { 6618 bdrv_next_cleanup(&it); 6619 return; 6620 } 6621 } 6622 } 6623 6624 static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) 6625 { 6626 BdrvChild *parent; 6627 GLOBAL_STATE_CODE(); 6628 6629 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6630 if (parent->klass->parent_is_bds) { 6631 BlockDriverState *parent_bs = parent->opaque; 6632 if (!only_active || !(parent_bs->open_flags & BDRV_O_INACTIVE)) { 6633 return true; 6634 } 6635 } 6636 } 6637 6638 return false; 6639 } 6640 6641 static int bdrv_inactivate_recurse(BlockDriverState *bs) 6642 { 6643 BdrvChild *child, *parent; 6644 int ret; 6645 uint64_t cumulative_perms, cumulative_shared_perms; 6646 6647 GLOBAL_STATE_CODE(); 6648 6649 if (!bs->drv) { 6650 return -ENOMEDIUM; 6651 } 6652 6653 /* Make sure that we don't inactivate a child before its parent. 6654 * It will be covered by recursion from the yet active parent. */ 6655 if (bdrv_has_bds_parent(bs, true)) { 6656 return 0; 6657 } 6658 6659 assert(!(bs->open_flags & BDRV_O_INACTIVE)); 6660 6661 /* Inactivate this node */ 6662 if (bs->drv->bdrv_inactivate) { 6663 ret = bs->drv->bdrv_inactivate(bs); 6664 if (ret < 0) { 6665 return ret; 6666 } 6667 } 6668 6669 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6670 if (parent->klass->inactivate) { 6671 ret = parent->klass->inactivate(parent); 6672 if (ret < 0) { 6673 return ret; 6674 } 6675 } 6676 } 6677 6678 bdrv_get_cumulative_perm(bs, &cumulative_perms, 6679 &cumulative_shared_perms); 6680 if (cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) { 6681 /* Our inactive parents still need write access. Inactivation failed. */ 6682 return -EPERM; 6683 } 6684 6685 bs->open_flags |= BDRV_O_INACTIVE; 6686 6687 /* 6688 * Update permissions, they may differ for inactive nodes. 6689 * We only tried to loosen restrictions, so errors are not fatal, ignore 6690 * them. 6691 */ 6692 bdrv_refresh_perms(bs, NULL); 6693 6694 /* Recursively inactivate children */ 6695 QLIST_FOREACH(child, &bs->children, next) { 6696 ret = bdrv_inactivate_recurse(child->bs); 6697 if (ret < 0) { 6698 return ret; 6699 } 6700 } 6701 6702 return 0; 6703 } 6704 6705 int bdrv_inactivate_all(void) 6706 { 6707 BlockDriverState *bs = NULL; 6708 BdrvNextIterator it; 6709 int ret = 0; 6710 GSList *aio_ctxs = NULL, *ctx; 6711 6712 GLOBAL_STATE_CODE(); 6713 6714 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 6715 AioContext *aio_context = bdrv_get_aio_context(bs); 6716 6717 if (!g_slist_find(aio_ctxs, aio_context)) { 6718 aio_ctxs = g_slist_prepend(aio_ctxs, aio_context); 6719 aio_context_acquire(aio_context); 6720 } 6721 } 6722 6723 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 6724 /* Nodes with BDS parents are covered by recursion from the last 6725 * parent that gets inactivated. Don't inactivate them a second 6726 * time if that has already happened. */ 6727 if (bdrv_has_bds_parent(bs, false)) { 6728 continue; 6729 } 6730 ret = bdrv_inactivate_recurse(bs); 6731 if (ret < 0) { 6732 bdrv_next_cleanup(&it); 6733 goto out; 6734 } 6735 } 6736 6737 out: 6738 for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { 6739 AioContext *aio_context = ctx->data; 6740 aio_context_release(aio_context); 6741 } 6742 g_slist_free(aio_ctxs); 6743 6744 return ret; 6745 } 6746 6747 /**************************************************************/ 6748 /* removable device support */ 6749 6750 /** 6751 * Return TRUE if the media is present 6752 */ 6753 bool bdrv_is_inserted(BlockDriverState *bs) 6754 { 6755 BlockDriver *drv = bs->drv; 6756 BdrvChild *child; 6757 IO_CODE(); 6758 6759 if (!drv) { 6760 return false; 6761 } 6762 if (drv->bdrv_is_inserted) { 6763 return drv->bdrv_is_inserted(bs); 6764 } 6765 QLIST_FOREACH(child, &bs->children, next) { 6766 if (!bdrv_is_inserted(child->bs)) { 6767 return false; 6768 } 6769 } 6770 return true; 6771 } 6772 6773 /** 6774 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 6775 */ 6776 void bdrv_eject(BlockDriverState *bs, bool eject_flag) 6777 { 6778 BlockDriver *drv = bs->drv; 6779 IO_CODE(); 6780 6781 if (drv && drv->bdrv_eject) { 6782 drv->bdrv_eject(bs, eject_flag); 6783 } 6784 } 6785 6786 /** 6787 * Lock or unlock the media (if it is locked, the user won't be able 6788 * to eject it manually). 6789 */ 6790 void bdrv_lock_medium(BlockDriverState *bs, bool locked) 6791 { 6792 BlockDriver *drv = bs->drv; 6793 IO_CODE(); 6794 trace_bdrv_lock_medium(bs, locked); 6795 6796 if (drv && drv->bdrv_lock_medium) { 6797 drv->bdrv_lock_medium(bs, locked); 6798 } 6799 } 6800 6801 /* Get a reference to bs */ 6802 void bdrv_ref(BlockDriverState *bs) 6803 { 6804 GLOBAL_STATE_CODE(); 6805 bs->refcnt++; 6806 } 6807 6808 /* Release a previously grabbed reference to bs. 6809 * If after releasing, reference count is zero, the BlockDriverState is 6810 * deleted. */ 6811 void bdrv_unref(BlockDriverState *bs) 6812 { 6813 GLOBAL_STATE_CODE(); 6814 if (!bs) { 6815 return; 6816 } 6817 assert(bs->refcnt > 0); 6818 if (--bs->refcnt == 0) { 6819 bdrv_delete(bs); 6820 } 6821 } 6822 6823 struct BdrvOpBlocker { 6824 Error *reason; 6825 QLIST_ENTRY(BdrvOpBlocker) list; 6826 }; 6827 6828 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 6829 { 6830 BdrvOpBlocker *blocker; 6831 GLOBAL_STATE_CODE(); 6832 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 6833 if (!QLIST_EMPTY(&bs->op_blockers[op])) { 6834 blocker = QLIST_FIRST(&bs->op_blockers[op]); 6835 error_propagate_prepend(errp, error_copy(blocker->reason), 6836 "Node '%s' is busy: ", 6837 bdrv_get_device_or_node_name(bs)); 6838 return true; 6839 } 6840 return false; 6841 } 6842 6843 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 6844 { 6845 BdrvOpBlocker *blocker; 6846 GLOBAL_STATE_CODE(); 6847 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 6848 6849 blocker = g_new0(BdrvOpBlocker, 1); 6850 blocker->reason = reason; 6851 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 6852 } 6853 6854 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 6855 { 6856 BdrvOpBlocker *blocker, *next; 6857 GLOBAL_STATE_CODE(); 6858 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 6859 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 6860 if (blocker->reason == reason) { 6861 QLIST_REMOVE(blocker, list); 6862 g_free(blocker); 6863 } 6864 } 6865 } 6866 6867 void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 6868 { 6869 int i; 6870 GLOBAL_STATE_CODE(); 6871 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 6872 bdrv_op_block(bs, i, reason); 6873 } 6874 } 6875 6876 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 6877 { 6878 int i; 6879 GLOBAL_STATE_CODE(); 6880 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 6881 bdrv_op_unblock(bs, i, reason); 6882 } 6883 } 6884 6885 bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 6886 { 6887 int i; 6888 GLOBAL_STATE_CODE(); 6889 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 6890 if (!QLIST_EMPTY(&bs->op_blockers[i])) { 6891 return false; 6892 } 6893 } 6894 return true; 6895 } 6896 6897 void bdrv_img_create(const char *filename, const char *fmt, 6898 const char *base_filename, const char *base_fmt, 6899 char *options, uint64_t img_size, int flags, bool quiet, 6900 Error **errp) 6901 { 6902 QemuOptsList *create_opts = NULL; 6903 QemuOpts *opts = NULL; 6904 const char *backing_fmt, *backing_file; 6905 int64_t size; 6906 BlockDriver *drv, *proto_drv; 6907 Error *local_err = NULL; 6908 int ret = 0; 6909 6910 GLOBAL_STATE_CODE(); 6911 6912 /* Find driver and parse its options */ 6913 drv = bdrv_find_format(fmt); 6914 if (!drv) { 6915 error_setg(errp, "Unknown file format '%s'", fmt); 6916 return; 6917 } 6918 6919 proto_drv = bdrv_find_protocol(filename, true, errp); 6920 if (!proto_drv) { 6921 return; 6922 } 6923 6924 if (!drv->create_opts) { 6925 error_setg(errp, "Format driver '%s' does not support image creation", 6926 drv->format_name); 6927 return; 6928 } 6929 6930 if (!proto_drv->create_opts) { 6931 error_setg(errp, "Protocol driver '%s' does not support image creation", 6932 proto_drv->format_name); 6933 return; 6934 } 6935 6936 /* Create parameter list */ 6937 create_opts = qemu_opts_append(create_opts, drv->create_opts); 6938 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 6939 6940 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 6941 6942 /* Parse -o options */ 6943 if (options) { 6944 if (!qemu_opts_do_parse(opts, options, NULL, errp)) { 6945 goto out; 6946 } 6947 } 6948 6949 if (!qemu_opt_get(opts, BLOCK_OPT_SIZE)) { 6950 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 6951 } else if (img_size != UINT64_C(-1)) { 6952 error_setg(errp, "The image size must be specified only once"); 6953 goto out; 6954 } 6955 6956 if (base_filename) { 6957 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, 6958 NULL)) { 6959 error_setg(errp, "Backing file not supported for file format '%s'", 6960 fmt); 6961 goto out; 6962 } 6963 } 6964 6965 if (base_fmt) { 6966 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) { 6967 error_setg(errp, "Backing file format not supported for file " 6968 "format '%s'", fmt); 6969 goto out; 6970 } 6971 } 6972 6973 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 6974 if (backing_file) { 6975 if (!strcmp(filename, backing_file)) { 6976 error_setg(errp, "Error: Trying to create an image with the " 6977 "same filename as the backing file"); 6978 goto out; 6979 } 6980 if (backing_file[0] == '\0') { 6981 error_setg(errp, "Expected backing file name, got empty string"); 6982 goto out; 6983 } 6984 } 6985 6986 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 6987 6988 /* The size for the image must always be specified, unless we have a backing 6989 * file and we have not been forbidden from opening it. */ 6990 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size); 6991 if (backing_file && !(flags & BDRV_O_NO_BACKING)) { 6992 BlockDriverState *bs; 6993 char *full_backing; 6994 int back_flags; 6995 QDict *backing_options = NULL; 6996 6997 full_backing = 6998 bdrv_get_full_backing_filename_from_filename(filename, backing_file, 6999 &local_err); 7000 if (local_err) { 7001 goto out; 7002 } 7003 assert(full_backing); 7004 7005 /* 7006 * No need to do I/O here, which allows us to open encrypted 7007 * backing images without needing the secret 7008 */ 7009 back_flags = flags; 7010 back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 7011 back_flags |= BDRV_O_NO_IO; 7012 7013 backing_options = qdict_new(); 7014 if (backing_fmt) { 7015 qdict_put_str(backing_options, "driver", backing_fmt); 7016 } 7017 qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true); 7018 7019 bs = bdrv_open(full_backing, NULL, backing_options, back_flags, 7020 &local_err); 7021 g_free(full_backing); 7022 if (!bs) { 7023 error_append_hint(&local_err, "Could not open backing image.\n"); 7024 goto out; 7025 } else { 7026 if (!backing_fmt) { 7027 error_setg(&local_err, 7028 "Backing file specified without backing format"); 7029 error_append_hint(&local_err, "Detected format of %s.", 7030 bs->drv->format_name); 7031 goto out; 7032 } 7033 if (size == -1) { 7034 /* Opened BS, have no size */ 7035 size = bdrv_getlength(bs); 7036 if (size < 0) { 7037 error_setg_errno(errp, -size, "Could not get size of '%s'", 7038 backing_file); 7039 bdrv_unref(bs); 7040 goto out; 7041 } 7042 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 7043 } 7044 bdrv_unref(bs); 7045 } 7046 /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */ 7047 } else if (backing_file && !backing_fmt) { 7048 error_setg(&local_err, 7049 "Backing file specified without backing format"); 7050 goto out; 7051 } 7052 7053 if (size == -1) { 7054 error_setg(errp, "Image creation needs a size parameter"); 7055 goto out; 7056 } 7057 7058 if (!quiet) { 7059 printf("Formatting '%s', fmt=%s ", filename, fmt); 7060 qemu_opts_print(opts, " "); 7061 puts(""); 7062 fflush(stdout); 7063 } 7064 7065 ret = bdrv_create(drv, filename, opts, &local_err); 7066 7067 if (ret == -EFBIG) { 7068 /* This is generally a better message than whatever the driver would 7069 * deliver (especially because of the cluster_size_hint), since that 7070 * is most probably not much different from "image too large". */ 7071 const char *cluster_size_hint = ""; 7072 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 7073 cluster_size_hint = " (try using a larger cluster size)"; 7074 } 7075 error_setg(errp, "The image size is too large for file format '%s'" 7076 "%s", fmt, cluster_size_hint); 7077 error_free(local_err); 7078 local_err = NULL; 7079 } 7080 7081 out: 7082 qemu_opts_del(opts); 7083 qemu_opts_free(create_opts); 7084 error_propagate(errp, local_err); 7085 } 7086 7087 AioContext *bdrv_get_aio_context(BlockDriverState *bs) 7088 { 7089 IO_CODE(); 7090 return bs ? bs->aio_context : qemu_get_aio_context(); 7091 } 7092 7093 AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs) 7094 { 7095 Coroutine *self = qemu_coroutine_self(); 7096 AioContext *old_ctx = qemu_coroutine_get_aio_context(self); 7097 AioContext *new_ctx; 7098 IO_CODE(); 7099 7100 /* 7101 * Increase bs->in_flight to ensure that this operation is completed before 7102 * moving the node to a different AioContext. Read new_ctx only afterwards. 7103 */ 7104 bdrv_inc_in_flight(bs); 7105 7106 new_ctx = bdrv_get_aio_context(bs); 7107 aio_co_reschedule_self(new_ctx); 7108 return old_ctx; 7109 } 7110 7111 void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) 7112 { 7113 IO_CODE(); 7114 aio_co_reschedule_self(old_ctx); 7115 bdrv_dec_in_flight(bs); 7116 } 7117 7118 void coroutine_fn bdrv_co_lock(BlockDriverState *bs) 7119 { 7120 AioContext *ctx = bdrv_get_aio_context(bs); 7121 7122 /* In the main thread, bs->aio_context won't change concurrently */ 7123 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 7124 7125 /* 7126 * We're in coroutine context, so we already hold the lock of the main 7127 * loop AioContext. Don't lock it twice to avoid deadlocks. 7128 */ 7129 assert(qemu_in_coroutine()); 7130 if (ctx != qemu_get_aio_context()) { 7131 aio_context_acquire(ctx); 7132 } 7133 } 7134 7135 void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) 7136 { 7137 AioContext *ctx = bdrv_get_aio_context(bs); 7138 7139 assert(qemu_in_coroutine()); 7140 if (ctx != qemu_get_aio_context()) { 7141 aio_context_release(ctx); 7142 } 7143 } 7144 7145 void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co) 7146 { 7147 IO_CODE(); 7148 aio_co_enter(bdrv_get_aio_context(bs), co); 7149 } 7150 7151 static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) 7152 { 7153 GLOBAL_STATE_CODE(); 7154 QLIST_REMOVE(ban, list); 7155 g_free(ban); 7156 } 7157 7158 static void bdrv_detach_aio_context(BlockDriverState *bs) 7159 { 7160 BdrvAioNotifier *baf, *baf_tmp; 7161 7162 assert(!bs->walking_aio_notifiers); 7163 GLOBAL_STATE_CODE(); 7164 bs->walking_aio_notifiers = true; 7165 QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) { 7166 if (baf->deleted) { 7167 bdrv_do_remove_aio_context_notifier(baf); 7168 } else { 7169 baf->detach_aio_context(baf->opaque); 7170 } 7171 } 7172 /* Never mind iterating again to check for ->deleted. bdrv_close() will 7173 * remove remaining aio notifiers if we aren't called again. 7174 */ 7175 bs->walking_aio_notifiers = false; 7176 7177 if (bs->drv && bs->drv->bdrv_detach_aio_context) { 7178 bs->drv->bdrv_detach_aio_context(bs); 7179 } 7180 7181 if (bs->quiesce_counter) { 7182 aio_enable_external(bs->aio_context); 7183 } 7184 assert_bdrv_graph_writable(bs); 7185 bs->aio_context = NULL; 7186 } 7187 7188 static void bdrv_attach_aio_context(BlockDriverState *bs, 7189 AioContext *new_context) 7190 { 7191 BdrvAioNotifier *ban, *ban_tmp; 7192 GLOBAL_STATE_CODE(); 7193 7194 if (bs->quiesce_counter) { 7195 aio_disable_external(new_context); 7196 } 7197 7198 assert_bdrv_graph_writable(bs); 7199 bs->aio_context = new_context; 7200 7201 if (bs->drv && bs->drv->bdrv_attach_aio_context) { 7202 bs->drv->bdrv_attach_aio_context(bs, new_context); 7203 } 7204 7205 assert(!bs->walking_aio_notifiers); 7206 bs->walking_aio_notifiers = true; 7207 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) { 7208 if (ban->deleted) { 7209 bdrv_do_remove_aio_context_notifier(ban); 7210 } else { 7211 ban->attached_aio_context(new_context, ban->opaque); 7212 } 7213 } 7214 bs->walking_aio_notifiers = false; 7215 } 7216 7217 typedef struct BdrvStateSetAioContext { 7218 AioContext *new_ctx; 7219 BlockDriverState *bs; 7220 } BdrvStateSetAioContext; 7221 7222 static bool bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx, 7223 GHashTable *visited, 7224 Transaction *tran, 7225 Error **errp) 7226 { 7227 GLOBAL_STATE_CODE(); 7228 if (g_hash_table_contains(visited, c)) { 7229 return true; 7230 } 7231 g_hash_table_add(visited, c); 7232 7233 /* 7234 * A BdrvChildClass that doesn't handle AioContext changes cannot 7235 * tolerate any AioContext changes 7236 */ 7237 if (!c->klass->change_aio_ctx) { 7238 char *user = bdrv_child_user_desc(c); 7239 error_setg(errp, "Changing iothreads is not supported by %s", user); 7240 g_free(user); 7241 return false; 7242 } 7243 if (!c->klass->change_aio_ctx(c, ctx, visited, tran, errp)) { 7244 assert(!errp || *errp); 7245 return false; 7246 } 7247 return true; 7248 } 7249 7250 bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, 7251 GHashTable *visited, Transaction *tran, 7252 Error **errp) 7253 { 7254 GLOBAL_STATE_CODE(); 7255 if (g_hash_table_contains(visited, c)) { 7256 return true; 7257 } 7258 g_hash_table_add(visited, c); 7259 return bdrv_change_aio_context(c->bs, ctx, visited, tran, errp); 7260 } 7261 7262 static void bdrv_set_aio_context_clean(void *opaque) 7263 { 7264 BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque; 7265 BlockDriverState *bs = (BlockDriverState *) state->bs; 7266 7267 /* Paired with bdrv_drained_begin in bdrv_change_aio_context() */ 7268 bdrv_drained_end(bs); 7269 7270 g_free(state); 7271 } 7272 7273 static void bdrv_set_aio_context_commit(void *opaque) 7274 { 7275 BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque; 7276 BlockDriverState *bs = (BlockDriverState *) state->bs; 7277 AioContext *new_context = state->new_ctx; 7278 AioContext *old_context = bdrv_get_aio_context(bs); 7279 assert_bdrv_graph_writable(bs); 7280 7281 /* 7282 * Take the old AioContex when detaching it from bs. 7283 * At this point, new_context lock is already acquired, and we are now 7284 * also taking old_context. This is safe as long as bdrv_detach_aio_context 7285 * does not call AIO_POLL_WHILE(). 7286 */ 7287 if (old_context != qemu_get_aio_context()) { 7288 aio_context_acquire(old_context); 7289 } 7290 bdrv_detach_aio_context(bs); 7291 if (old_context != qemu_get_aio_context()) { 7292 aio_context_release(old_context); 7293 } 7294 bdrv_attach_aio_context(bs, new_context); 7295 } 7296 7297 static TransactionActionDrv set_aio_context = { 7298 .commit = bdrv_set_aio_context_commit, 7299 .clean = bdrv_set_aio_context_clean, 7300 }; 7301 7302 /* 7303 * Changes the AioContext used for fd handlers, timers, and BHs by this 7304 * BlockDriverState and all its children and parents. 7305 * 7306 * Must be called from the main AioContext. 7307 * 7308 * The caller must own the AioContext lock for the old AioContext of bs, but it 7309 * must not own the AioContext lock for new_context (unless new_context is the 7310 * same as the current context of bs). 7311 * 7312 * @visited will accumulate all visited BdrvChild objects. The caller is 7313 * responsible for freeing the list afterwards. 7314 */ 7315 static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, 7316 GHashTable *visited, Transaction *tran, 7317 Error **errp) 7318 { 7319 BdrvChild *c; 7320 BdrvStateSetAioContext *state; 7321 7322 GLOBAL_STATE_CODE(); 7323 7324 if (bdrv_get_aio_context(bs) == ctx) { 7325 return true; 7326 } 7327 7328 QLIST_FOREACH(c, &bs->parents, next_parent) { 7329 if (!bdrv_parent_change_aio_context(c, ctx, visited, tran, errp)) { 7330 return false; 7331 } 7332 } 7333 7334 QLIST_FOREACH(c, &bs->children, next) { 7335 if (!bdrv_child_change_aio_context(c, ctx, visited, tran, errp)) { 7336 return false; 7337 } 7338 } 7339 7340 state = g_new(BdrvStateSetAioContext, 1); 7341 *state = (BdrvStateSetAioContext) { 7342 .new_ctx = ctx, 7343 .bs = bs, 7344 }; 7345 7346 /* Paired with bdrv_drained_end in bdrv_set_aio_context_clean() */ 7347 bdrv_drained_begin(bs); 7348 7349 tran_add(tran, &set_aio_context, state); 7350 7351 return true; 7352 } 7353 7354 /* 7355 * Change bs's and recursively all of its parents' and children's AioContext 7356 * to the given new context, returning an error if that isn't possible. 7357 * 7358 * If ignore_child is not NULL, that child (and its subgraph) will not 7359 * be touched. 7360 * 7361 * This function still requires the caller to take the bs current 7362 * AioContext lock, otherwise draining will fail since AIO_WAIT_WHILE 7363 * assumes the lock is always held if bs is in another AioContext. 7364 * For the same reason, it temporarily also holds the new AioContext, since 7365 * bdrv_drained_end calls BDRV_POLL_WHILE that assumes the lock is taken too. 7366 * Therefore the new AioContext lock must not be taken by the caller. 7367 */ 7368 int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, 7369 BdrvChild *ignore_child, Error **errp) 7370 { 7371 Transaction *tran; 7372 GHashTable *visited; 7373 int ret; 7374 AioContext *old_context = bdrv_get_aio_context(bs); 7375 GLOBAL_STATE_CODE(); 7376 7377 /* 7378 * Recursion phase: go through all nodes of the graph. 7379 * Take care of checking that all nodes support changing AioContext 7380 * and drain them, builing a linear list of callbacks to run if everything 7381 * is successful (the transaction itself). 7382 */ 7383 tran = tran_new(); 7384 visited = g_hash_table_new(NULL, NULL); 7385 if (ignore_child) { 7386 g_hash_table_add(visited, ignore_child); 7387 } 7388 ret = bdrv_change_aio_context(bs, ctx, visited, tran, errp); 7389 g_hash_table_destroy(visited); 7390 7391 /* 7392 * Linear phase: go through all callbacks collected in the transaction. 7393 * Run all callbacks collected in the recursion to switch all nodes 7394 * AioContext lock (transaction commit), or undo all changes done in the 7395 * recursion (transaction abort). 7396 */ 7397 7398 if (!ret) { 7399 /* Just run clean() callbacks. No AioContext changed. */ 7400 tran_abort(tran); 7401 return -EPERM; 7402 } 7403 7404 /* 7405 * Release old AioContext, it won't be needed anymore, as all 7406 * bdrv_drained_begin() have been called already. 7407 */ 7408 if (qemu_get_aio_context() != old_context) { 7409 aio_context_release(old_context); 7410 } 7411 7412 /* 7413 * Acquire new AioContext since bdrv_drained_end() is going to be called 7414 * after we switched all nodes in the new AioContext, and the function 7415 * assumes that the lock of the bs is always taken. 7416 */ 7417 if (qemu_get_aio_context() != ctx) { 7418 aio_context_acquire(ctx); 7419 } 7420 7421 tran_commit(tran); 7422 7423 if (qemu_get_aio_context() != ctx) { 7424 aio_context_release(ctx); 7425 } 7426 7427 /* Re-acquire the old AioContext, since the caller takes and releases it. */ 7428 if (qemu_get_aio_context() != old_context) { 7429 aio_context_acquire(old_context); 7430 } 7431 7432 return 0; 7433 } 7434 7435 void bdrv_add_aio_context_notifier(BlockDriverState *bs, 7436 void (*attached_aio_context)(AioContext *new_context, void *opaque), 7437 void (*detach_aio_context)(void *opaque), void *opaque) 7438 { 7439 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 7440 *ban = (BdrvAioNotifier){ 7441 .attached_aio_context = attached_aio_context, 7442 .detach_aio_context = detach_aio_context, 7443 .opaque = opaque 7444 }; 7445 GLOBAL_STATE_CODE(); 7446 7447 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 7448 } 7449 7450 void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 7451 void (*attached_aio_context)(AioContext *, 7452 void *), 7453 void (*detach_aio_context)(void *), 7454 void *opaque) 7455 { 7456 BdrvAioNotifier *ban, *ban_next; 7457 GLOBAL_STATE_CODE(); 7458 7459 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 7460 if (ban->attached_aio_context == attached_aio_context && 7461 ban->detach_aio_context == detach_aio_context && 7462 ban->opaque == opaque && 7463 ban->deleted == false) 7464 { 7465 if (bs->walking_aio_notifiers) { 7466 ban->deleted = true; 7467 } else { 7468 bdrv_do_remove_aio_context_notifier(ban); 7469 } 7470 return; 7471 } 7472 } 7473 7474 abort(); 7475 } 7476 7477 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 7478 BlockDriverAmendStatusCB *status_cb, void *cb_opaque, 7479 bool force, 7480 Error **errp) 7481 { 7482 GLOBAL_STATE_CODE(); 7483 if (!bs->drv) { 7484 error_setg(errp, "Node is ejected"); 7485 return -ENOMEDIUM; 7486 } 7487 if (!bs->drv->bdrv_amend_options) { 7488 error_setg(errp, "Block driver '%s' does not support option amendment", 7489 bs->drv->format_name); 7490 return -ENOTSUP; 7491 } 7492 return bs->drv->bdrv_amend_options(bs, opts, status_cb, 7493 cb_opaque, force, errp); 7494 } 7495 7496 /* 7497 * This function checks whether the given @to_replace is allowed to be 7498 * replaced by a node that always shows the same data as @bs. This is 7499 * used for example to verify whether the mirror job can replace 7500 * @to_replace by the target mirrored from @bs. 7501 * To be replaceable, @bs and @to_replace may either be guaranteed to 7502 * always show the same data (because they are only connected through 7503 * filters), or some driver may allow replacing one of its children 7504 * because it can guarantee that this child's data is not visible at 7505 * all (for example, for dissenting quorum children that have no other 7506 * parents). 7507 */ 7508 bool bdrv_recurse_can_replace(BlockDriverState *bs, 7509 BlockDriverState *to_replace) 7510 { 7511 BlockDriverState *filtered; 7512 7513 GLOBAL_STATE_CODE(); 7514 7515 if (!bs || !bs->drv) { 7516 return false; 7517 } 7518 7519 if (bs == to_replace) { 7520 return true; 7521 } 7522 7523 /* See what the driver can do */ 7524 if (bs->drv->bdrv_recurse_can_replace) { 7525 return bs->drv->bdrv_recurse_can_replace(bs, to_replace); 7526 } 7527 7528 /* For filters without an own implementation, we can recurse on our own */ 7529 filtered = bdrv_filter_bs(bs); 7530 if (filtered) { 7531 return bdrv_recurse_can_replace(filtered, to_replace); 7532 } 7533 7534 /* Safe default */ 7535 return false; 7536 } 7537 7538 /* 7539 * Check whether the given @node_name can be replaced by a node that 7540 * has the same data as @parent_bs. If so, return @node_name's BDS; 7541 * NULL otherwise. 7542 * 7543 * @node_name must be a (recursive) *child of @parent_bs (or this 7544 * function will return NULL). 7545 * 7546 * The result (whether the node can be replaced or not) is only valid 7547 * for as long as no graph or permission changes occur. 7548 */ 7549 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, 7550 const char *node_name, Error **errp) 7551 { 7552 BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 7553 AioContext *aio_context; 7554 7555 GLOBAL_STATE_CODE(); 7556 7557 if (!to_replace_bs) { 7558 error_setg(errp, "Failed to find node with node-name='%s'", node_name); 7559 return NULL; 7560 } 7561 7562 aio_context = bdrv_get_aio_context(to_replace_bs); 7563 aio_context_acquire(aio_context); 7564 7565 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 7566 to_replace_bs = NULL; 7567 goto out; 7568 } 7569 7570 /* We don't want arbitrary node of the BDS chain to be replaced only the top 7571 * most non filter in order to prevent data corruption. 7572 * Another benefit is that this tests exclude backing files which are 7573 * blocked by the backing blockers. 7574 */ 7575 if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) { 7576 error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', " 7577 "because it cannot be guaranteed that doing so would not " 7578 "lead to an abrupt change of visible data", 7579 node_name, parent_bs->node_name); 7580 to_replace_bs = NULL; 7581 goto out; 7582 } 7583 7584 out: 7585 aio_context_release(aio_context); 7586 return to_replace_bs; 7587 } 7588 7589 /** 7590 * Iterates through the list of runtime option keys that are said to 7591 * be "strong" for a BDS. An option is called "strong" if it changes 7592 * a BDS's data. For example, the null block driver's "size" and 7593 * "read-zeroes" options are strong, but its "latency-ns" option is 7594 * not. 7595 * 7596 * If a key returned by this function ends with a dot, all options 7597 * starting with that prefix are strong. 7598 */ 7599 static const char *const *strong_options(BlockDriverState *bs, 7600 const char *const *curopt) 7601 { 7602 static const char *const global_options[] = { 7603 "driver", "filename", NULL 7604 }; 7605 7606 if (!curopt) { 7607 return &global_options[0]; 7608 } 7609 7610 curopt++; 7611 if (curopt == &global_options[ARRAY_SIZE(global_options) - 1] && bs->drv) { 7612 curopt = bs->drv->strong_runtime_opts; 7613 } 7614 7615 return (curopt && *curopt) ? curopt : NULL; 7616 } 7617 7618 /** 7619 * Copies all strong runtime options from bs->options to the given 7620 * QDict. The set of strong option keys is determined by invoking 7621 * strong_options(). 7622 * 7623 * Returns true iff any strong option was present in bs->options (and 7624 * thus copied to the target QDict) with the exception of "filename" 7625 * and "driver". The caller is expected to use this value to decide 7626 * whether the existence of strong options prevents the generation of 7627 * a plain filename. 7628 */ 7629 static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs) 7630 { 7631 bool found_any = false; 7632 const char *const *option_name = NULL; 7633 7634 if (!bs->drv) { 7635 return false; 7636 } 7637 7638 while ((option_name = strong_options(bs, option_name))) { 7639 bool option_given = false; 7640 7641 assert(strlen(*option_name) > 0); 7642 if ((*option_name)[strlen(*option_name) - 1] != '.') { 7643 QObject *entry = qdict_get(bs->options, *option_name); 7644 if (!entry) { 7645 continue; 7646 } 7647 7648 qdict_put_obj(d, *option_name, qobject_ref(entry)); 7649 option_given = true; 7650 } else { 7651 const QDictEntry *entry; 7652 for (entry = qdict_first(bs->options); entry; 7653 entry = qdict_next(bs->options, entry)) 7654 { 7655 if (strstart(qdict_entry_key(entry), *option_name, NULL)) { 7656 qdict_put_obj(d, qdict_entry_key(entry), 7657 qobject_ref(qdict_entry_value(entry))); 7658 option_given = true; 7659 } 7660 } 7661 } 7662 7663 /* While "driver" and "filename" need to be included in a JSON filename, 7664 * their existence does not prohibit generation of a plain filename. */ 7665 if (!found_any && option_given && 7666 strcmp(*option_name, "driver") && strcmp(*option_name, "filename")) 7667 { 7668 found_any = true; 7669 } 7670 } 7671 7672 if (!qdict_haskey(d, "driver")) { 7673 /* Drivers created with bdrv_new_open_driver() may not have a 7674 * @driver option. Add it here. */ 7675 qdict_put_str(d, "driver", bs->drv->format_name); 7676 } 7677 7678 return found_any; 7679 } 7680 7681 /* Note: This function may return false positives; it may return true 7682 * even if opening the backing file specified by bs's image header 7683 * would result in exactly bs->backing. */ 7684 static bool bdrv_backing_overridden(BlockDriverState *bs) 7685 { 7686 GLOBAL_STATE_CODE(); 7687 if (bs->backing) { 7688 return strcmp(bs->auto_backing_file, 7689 bs->backing->bs->filename); 7690 } else { 7691 /* No backing BDS, so if the image header reports any backing 7692 * file, it must have been suppressed */ 7693 return bs->auto_backing_file[0] != '\0'; 7694 } 7695 } 7696 7697 /* Updates the following BDS fields: 7698 * - exact_filename: A filename which may be used for opening a block device 7699 * which (mostly) equals the given BDS (even without any 7700 * other options; so reading and writing must return the same 7701 * results, but caching etc. may be different) 7702 * - full_open_options: Options which, when given when opening a block device 7703 * (without a filename), result in a BDS (mostly) 7704 * equalling the given one 7705 * - filename: If exact_filename is set, it is copied here. Otherwise, 7706 * full_open_options is converted to a JSON object, prefixed with 7707 * "json:" (for use through the JSON pseudo protocol) and put here. 7708 */ 7709 void bdrv_refresh_filename(BlockDriverState *bs) 7710 { 7711 BlockDriver *drv = bs->drv; 7712 BdrvChild *child; 7713 BlockDriverState *primary_child_bs; 7714 QDict *opts; 7715 bool backing_overridden; 7716 bool generate_json_filename; /* Whether our default implementation should 7717 fill exact_filename (false) or not (true) */ 7718 7719 GLOBAL_STATE_CODE(); 7720 7721 if (!drv) { 7722 return; 7723 } 7724 7725 /* This BDS's file name may depend on any of its children's file names, so 7726 * refresh those first */ 7727 QLIST_FOREACH(child, &bs->children, next) { 7728 bdrv_refresh_filename(child->bs); 7729 } 7730 7731 if (bs->implicit) { 7732 /* For implicit nodes, just copy everything from the single child */ 7733 child = QLIST_FIRST(&bs->children); 7734 assert(QLIST_NEXT(child, next) == NULL); 7735 7736 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), 7737 child->bs->exact_filename); 7738 pstrcpy(bs->filename, sizeof(bs->filename), child->bs->filename); 7739 7740 qobject_unref(bs->full_open_options); 7741 bs->full_open_options = qobject_ref(child->bs->full_open_options); 7742 7743 return; 7744 } 7745 7746 backing_overridden = bdrv_backing_overridden(bs); 7747 7748 if (bs->open_flags & BDRV_O_NO_IO) { 7749 /* Without I/O, the backing file does not change anything. 7750 * Therefore, in such a case (primarily qemu-img), we can 7751 * pretend the backing file has not been overridden even if 7752 * it technically has been. */ 7753 backing_overridden = false; 7754 } 7755 7756 /* Gather the options QDict */ 7757 opts = qdict_new(); 7758 generate_json_filename = append_strong_runtime_options(opts, bs); 7759 generate_json_filename |= backing_overridden; 7760 7761 if (drv->bdrv_gather_child_options) { 7762 /* Some block drivers may not want to present all of their children's 7763 * options, or name them differently from BdrvChild.name */ 7764 drv->bdrv_gather_child_options(bs, opts, backing_overridden); 7765 } else { 7766 QLIST_FOREACH(child, &bs->children, next) { 7767 if (child == bs->backing && !backing_overridden) { 7768 /* We can skip the backing BDS if it has not been overridden */ 7769 continue; 7770 } 7771 7772 qdict_put(opts, child->name, 7773 qobject_ref(child->bs->full_open_options)); 7774 } 7775 7776 if (backing_overridden && !bs->backing) { 7777 /* Force no backing file */ 7778 qdict_put_null(opts, "backing"); 7779 } 7780 } 7781 7782 qobject_unref(bs->full_open_options); 7783 bs->full_open_options = opts; 7784 7785 primary_child_bs = bdrv_primary_bs(bs); 7786 7787 if (drv->bdrv_refresh_filename) { 7788 /* Obsolete information is of no use here, so drop the old file name 7789 * information before refreshing it */ 7790 bs->exact_filename[0] = '\0'; 7791 7792 drv->bdrv_refresh_filename(bs); 7793 } else if (primary_child_bs) { 7794 /* 7795 * Try to reconstruct valid information from the underlying 7796 * file -- this only works for format nodes (filter nodes 7797 * cannot be probed and as such must be selected by the user 7798 * either through an options dict, or through a special 7799 * filename which the filter driver must construct in its 7800 * .bdrv_refresh_filename() implementation). 7801 */ 7802 7803 bs->exact_filename[0] = '\0'; 7804 7805 /* 7806 * We can use the underlying file's filename if: 7807 * - it has a filename, 7808 * - the current BDS is not a filter, 7809 * - the file is a protocol BDS, and 7810 * - opening that file (as this BDS's format) will automatically create 7811 * the BDS tree we have right now, that is: 7812 * - the user did not significantly change this BDS's behavior with 7813 * some explicit (strong) options 7814 * - no non-file child of this BDS has been overridden by the user 7815 * Both of these conditions are represented by generate_json_filename. 7816 */ 7817 if (primary_child_bs->exact_filename[0] && 7818 primary_child_bs->drv->bdrv_file_open && 7819 !drv->is_filter && !generate_json_filename) 7820 { 7821 strcpy(bs->exact_filename, primary_child_bs->exact_filename); 7822 } 7823 } 7824 7825 if (bs->exact_filename[0]) { 7826 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 7827 } else { 7828 GString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 7829 if (snprintf(bs->filename, sizeof(bs->filename), "json:%s", 7830 json->str) >= sizeof(bs->filename)) { 7831 /* Give user a hint if we truncated things. */ 7832 strcpy(bs->filename + sizeof(bs->filename) - 4, "..."); 7833 } 7834 g_string_free(json, true); 7835 } 7836 } 7837 7838 char *bdrv_dirname(BlockDriverState *bs, Error **errp) 7839 { 7840 BlockDriver *drv = bs->drv; 7841 BlockDriverState *child_bs; 7842 7843 GLOBAL_STATE_CODE(); 7844 7845 if (!drv) { 7846 error_setg(errp, "Node '%s' is ejected", bs->node_name); 7847 return NULL; 7848 } 7849 7850 if (drv->bdrv_dirname) { 7851 return drv->bdrv_dirname(bs, errp); 7852 } 7853 7854 child_bs = bdrv_primary_bs(bs); 7855 if (child_bs) { 7856 return bdrv_dirname(child_bs, errp); 7857 } 7858 7859 bdrv_refresh_filename(bs); 7860 if (bs->exact_filename[0] != '\0') { 7861 return path_combine(bs->exact_filename, ""); 7862 } 7863 7864 error_setg(errp, "Cannot generate a base directory for %s nodes", 7865 drv->format_name); 7866 return NULL; 7867 } 7868 7869 /* 7870 * Hot add/remove a BDS's child. So the user can take a child offline when 7871 * it is broken and take a new child online 7872 */ 7873 void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, 7874 Error **errp) 7875 { 7876 GLOBAL_STATE_CODE(); 7877 if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) { 7878 error_setg(errp, "The node %s does not support adding a child", 7879 bdrv_get_device_or_node_name(parent_bs)); 7880 return; 7881 } 7882 7883 if (!QLIST_EMPTY(&child_bs->parents)) { 7884 error_setg(errp, "The node %s already has a parent", 7885 child_bs->node_name); 7886 return; 7887 } 7888 7889 parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp); 7890 } 7891 7892 void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp) 7893 { 7894 BdrvChild *tmp; 7895 7896 GLOBAL_STATE_CODE(); 7897 if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) { 7898 error_setg(errp, "The node %s does not support removing a child", 7899 bdrv_get_device_or_node_name(parent_bs)); 7900 return; 7901 } 7902 7903 QLIST_FOREACH(tmp, &parent_bs->children, next) { 7904 if (tmp == child) { 7905 break; 7906 } 7907 } 7908 7909 if (!tmp) { 7910 error_setg(errp, "The node %s does not have a child named %s", 7911 bdrv_get_device_or_node_name(parent_bs), 7912 bdrv_get_device_or_node_name(child->bs)); 7913 return; 7914 } 7915 7916 parent_bs->drv->bdrv_del_child(parent_bs, child, errp); 7917 } 7918 7919 int bdrv_make_empty(BdrvChild *c, Error **errp) 7920 { 7921 BlockDriver *drv = c->bs->drv; 7922 int ret; 7923 7924 GLOBAL_STATE_CODE(); 7925 assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)); 7926 7927 if (!drv->bdrv_make_empty) { 7928 error_setg(errp, "%s does not support emptying nodes", 7929 drv->format_name); 7930 return -ENOTSUP; 7931 } 7932 7933 ret = drv->bdrv_make_empty(c->bs); 7934 if (ret < 0) { 7935 error_setg_errno(errp, -ret, "Failed to empty %s", 7936 c->bs->filename); 7937 return ret; 7938 } 7939 7940 return 0; 7941 } 7942 7943 /* 7944 * Return the child that @bs acts as an overlay for, and from which data may be 7945 * copied in COW or COR operations. Usually this is the backing file. 7946 */ 7947 BdrvChild *bdrv_cow_child(BlockDriverState *bs) 7948 { 7949 IO_CODE(); 7950 7951 if (!bs || !bs->drv) { 7952 return NULL; 7953 } 7954 7955 if (bs->drv->is_filter) { 7956 return NULL; 7957 } 7958 7959 if (!bs->backing) { 7960 return NULL; 7961 } 7962 7963 assert(bs->backing->role & BDRV_CHILD_COW); 7964 return bs->backing; 7965 } 7966 7967 /* 7968 * If @bs acts as a filter for exactly one of its children, return 7969 * that child. 7970 */ 7971 BdrvChild *bdrv_filter_child(BlockDriverState *bs) 7972 { 7973 BdrvChild *c; 7974 IO_CODE(); 7975 7976 if (!bs || !bs->drv) { 7977 return NULL; 7978 } 7979 7980 if (!bs->drv->is_filter) { 7981 return NULL; 7982 } 7983 7984 /* Only one of @backing or @file may be used */ 7985 assert(!(bs->backing && bs->file)); 7986 7987 c = bs->backing ?: bs->file; 7988 if (!c) { 7989 return NULL; 7990 } 7991 7992 assert(c->role & BDRV_CHILD_FILTERED); 7993 return c; 7994 } 7995 7996 /* 7997 * Return either the result of bdrv_cow_child() or bdrv_filter_child(), 7998 * whichever is non-NULL. 7999 * 8000 * Return NULL if both are NULL. 8001 */ 8002 BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs) 8003 { 8004 BdrvChild *cow_child = bdrv_cow_child(bs); 8005 BdrvChild *filter_child = bdrv_filter_child(bs); 8006 IO_CODE(); 8007 8008 /* Filter nodes cannot have COW backing files */ 8009 assert(!(cow_child && filter_child)); 8010 8011 return cow_child ?: filter_child; 8012 } 8013 8014 /* 8015 * Return the primary child of this node: For filters, that is the 8016 * filtered child. For other nodes, that is usually the child storing 8017 * metadata. 8018 * (A generally more helpful description is that this is (usually) the 8019 * child that has the same filename as @bs.) 8020 * 8021 * Drivers do not necessarily have a primary child; for example quorum 8022 * does not. 8023 */ 8024 BdrvChild *bdrv_primary_child(BlockDriverState *bs) 8025 { 8026 BdrvChild *c, *found = NULL; 8027 IO_CODE(); 8028 8029 QLIST_FOREACH(c, &bs->children, next) { 8030 if (c->role & BDRV_CHILD_PRIMARY) { 8031 assert(!found); 8032 found = c; 8033 } 8034 } 8035 8036 return found; 8037 } 8038 8039 static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs, 8040 bool stop_on_explicit_filter) 8041 { 8042 BdrvChild *c; 8043 8044 if (!bs) { 8045 return NULL; 8046 } 8047 8048 while (!(stop_on_explicit_filter && !bs->implicit)) { 8049 c = bdrv_filter_child(bs); 8050 if (!c) { 8051 /* 8052 * A filter that is embedded in a working block graph must 8053 * have a child. Assert this here so this function does 8054 * not return a filter node that is not expected by the 8055 * caller. 8056 */ 8057 assert(!bs->drv || !bs->drv->is_filter); 8058 break; 8059 } 8060 bs = c->bs; 8061 } 8062 /* 8063 * Note that this treats nodes with bs->drv == NULL as not being 8064 * filters (bs->drv == NULL should be replaced by something else 8065 * anyway). 8066 * The advantage of this behavior is that this function will thus 8067 * always return a non-NULL value (given a non-NULL @bs). 8068 */ 8069 8070 return bs; 8071 } 8072 8073 /* 8074 * Return the first BDS that has not been added implicitly or that 8075 * does not have a filtered child down the chain starting from @bs 8076 * (including @bs itself). 8077 */ 8078 BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs) 8079 { 8080 GLOBAL_STATE_CODE(); 8081 return bdrv_do_skip_filters(bs, true); 8082 } 8083 8084 /* 8085 * Return the first BDS that does not have a filtered child down the 8086 * chain starting from @bs (including @bs itself). 8087 */ 8088 BlockDriverState *bdrv_skip_filters(BlockDriverState *bs) 8089 { 8090 IO_CODE(); 8091 return bdrv_do_skip_filters(bs, false); 8092 } 8093 8094 /* 8095 * For a backing chain, return the first non-filter backing image of 8096 * the first non-filter image. 8097 */ 8098 BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs) 8099 { 8100 IO_CODE(); 8101 return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs))); 8102 } 8103 8104 /** 8105 * Check whether [offset, offset + bytes) overlaps with the cached 8106 * block-status data region. 8107 * 8108 * If so, and @pnum is not NULL, set *pnum to `bsc.data_end - offset`, 8109 * which is what bdrv_bsc_is_data()'s interface needs. 8110 * Otherwise, *pnum is not touched. 8111 */ 8112 static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs, 8113 int64_t offset, int64_t bytes, 8114 int64_t *pnum) 8115 { 8116 BdrvBlockStatusCache *bsc = qatomic_rcu_read(&bs->block_status_cache); 8117 bool overlaps; 8118 8119 overlaps = 8120 qatomic_read(&bsc->valid) && 8121 ranges_overlap(offset, bytes, bsc->data_start, 8122 bsc->data_end - bsc->data_start); 8123 8124 if (overlaps && pnum) { 8125 *pnum = bsc->data_end - offset; 8126 } 8127 8128 return overlaps; 8129 } 8130 8131 /** 8132 * See block_int.h for this function's documentation. 8133 */ 8134 bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum) 8135 { 8136 IO_CODE(); 8137 RCU_READ_LOCK_GUARD(); 8138 return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum); 8139 } 8140 8141 /** 8142 * See block_int.h for this function's documentation. 8143 */ 8144 void bdrv_bsc_invalidate_range(BlockDriverState *bs, 8145 int64_t offset, int64_t bytes) 8146 { 8147 IO_CODE(); 8148 RCU_READ_LOCK_GUARD(); 8149 8150 if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) { 8151 qatomic_set(&bs->block_status_cache->valid, false); 8152 } 8153 } 8154 8155 /** 8156 * See block_int.h for this function's documentation. 8157 */ 8158 void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes) 8159 { 8160 BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1); 8161 BdrvBlockStatusCache *old_bsc; 8162 IO_CODE(); 8163 8164 *new_bsc = (BdrvBlockStatusCache) { 8165 .valid = true, 8166 .data_start = offset, 8167 .data_end = offset + bytes, 8168 }; 8169 8170 QEMU_LOCK_GUARD(&bs->bsc_modify_lock); 8171 8172 old_bsc = qatomic_rcu_read(&bs->block_status_cache); 8173 qatomic_rcu_set(&bs->block_status_cache, new_bsc); 8174 if (old_bsc) { 8175 g_free_rcu(old_bsc, rcu); 8176 } 8177 } 8178