1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "block/trace.h" 27 #include "block/block_int.h" 28 #include "block/blockjob.h" 29 #include "block/fuse.h" 30 #include "block/nbd.h" 31 #include "block/qdict.h" 32 #include "qemu/error-report.h" 33 #include "block/module_block.h" 34 #include "qemu/main-loop.h" 35 #include "qemu/module.h" 36 #include "qapi/error.h" 37 #include "qapi/qmp/qdict.h" 38 #include "qapi/qmp/qjson.h" 39 #include "qapi/qmp/qnull.h" 40 #include "qapi/qmp/qstring.h" 41 #include "qapi/qobject-output-visitor.h" 42 #include "qapi/qapi-visit-block-core.h" 43 #include "sysemu/block-backend.h" 44 #include "sysemu/sysemu.h" 45 #include "qemu/notify.h" 46 #include "qemu/option.h" 47 #include "qemu/coroutine.h" 48 #include "block/qapi.h" 49 #include "qemu/timer.h" 50 #include "qemu/cutils.h" 51 #include "qemu/id.h" 52 #include "block/coroutines.h" 53 54 #ifdef CONFIG_BSD 55 #include <sys/ioctl.h> 56 #include <sys/queue.h> 57 #ifndef __DragonFly__ 58 #include <sys/disk.h> 59 #endif 60 #endif 61 62 #ifdef _WIN32 63 #include <windows.h> 64 #endif 65 66 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 67 68 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 69 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 70 71 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states = 72 QTAILQ_HEAD_INITIALIZER(all_bdrv_states); 73 74 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 75 QLIST_HEAD_INITIALIZER(bdrv_drivers); 76 77 static BlockDriverState *bdrv_open_inherit(const char *filename, 78 const char *reference, 79 QDict *options, int flags, 80 BlockDriverState *parent, 81 const BdrvChildClass *child_class, 82 BdrvChildRole child_role, 83 Error **errp); 84 85 static void bdrv_replace_child_noperm(BdrvChild *child, 86 BlockDriverState *new_bs); 87 static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, 88 BlockDriverState *child_bs, 89 const char *child_name, 90 const BdrvChildClass *child_class, 91 BdrvChildRole child_role, 92 BdrvChild **child, 93 Transaction *tran, 94 Error **errp); 95 static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, 96 Transaction *tran); 97 98 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, 99 BlockReopenQueue *queue, 100 Transaction *set_backings_tran, Error **errp); 101 static void bdrv_reopen_commit(BDRVReopenState *reopen_state); 102 static void bdrv_reopen_abort(BDRVReopenState *reopen_state); 103 104 /* If non-zero, use only whitelisted block drivers */ 105 static int use_bdrv_whitelist; 106 107 #ifdef _WIN32 108 static int is_windows_drive_prefix(const char *filename) 109 { 110 return (((filename[0] >= 'a' && filename[0] <= 'z') || 111 (filename[0] >= 'A' && filename[0] <= 'Z')) && 112 filename[1] == ':'); 113 } 114 115 int is_windows_drive(const char *filename) 116 { 117 if (is_windows_drive_prefix(filename) && 118 filename[2] == '\0') 119 return 1; 120 if (strstart(filename, "\\\\.\\", NULL) || 121 strstart(filename, "//./", NULL)) 122 return 1; 123 return 0; 124 } 125 #endif 126 127 size_t bdrv_opt_mem_align(BlockDriverState *bs) 128 { 129 if (!bs || !bs->drv) { 130 /* page size or 4k (hdd sector size) should be on the safe side */ 131 return MAX(4096, qemu_real_host_page_size); 132 } 133 134 return bs->bl.opt_mem_alignment; 135 } 136 137 size_t bdrv_min_mem_align(BlockDriverState *bs) 138 { 139 if (!bs || !bs->drv) { 140 /* page size or 4k (hdd sector size) should be on the safe side */ 141 return MAX(4096, qemu_real_host_page_size); 142 } 143 144 return bs->bl.min_mem_alignment; 145 } 146 147 /* check if the path starts with "<protocol>:" */ 148 int path_has_protocol(const char *path) 149 { 150 const char *p; 151 152 #ifdef _WIN32 153 if (is_windows_drive(path) || 154 is_windows_drive_prefix(path)) { 155 return 0; 156 } 157 p = path + strcspn(path, ":/\\"); 158 #else 159 p = path + strcspn(path, ":/"); 160 #endif 161 162 return *p == ':'; 163 } 164 165 int path_is_absolute(const char *path) 166 { 167 #ifdef _WIN32 168 /* specific case for names like: "\\.\d:" */ 169 if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 170 return 1; 171 } 172 return (*path == '/' || *path == '\\'); 173 #else 174 return (*path == '/'); 175 #endif 176 } 177 178 /* if filename is absolute, just return its duplicate. Otherwise, build a 179 path to it by considering it is relative to base_path. URL are 180 supported. */ 181 char *path_combine(const char *base_path, const char *filename) 182 { 183 const char *protocol_stripped = NULL; 184 const char *p, *p1; 185 char *result; 186 int len; 187 188 if (path_is_absolute(filename)) { 189 return g_strdup(filename); 190 } 191 192 if (path_has_protocol(base_path)) { 193 protocol_stripped = strchr(base_path, ':'); 194 if (protocol_stripped) { 195 protocol_stripped++; 196 } 197 } 198 p = protocol_stripped ?: base_path; 199 200 p1 = strrchr(base_path, '/'); 201 #ifdef _WIN32 202 { 203 const char *p2; 204 p2 = strrchr(base_path, '\\'); 205 if (!p1 || p2 > p1) { 206 p1 = p2; 207 } 208 } 209 #endif 210 if (p1) { 211 p1++; 212 } else { 213 p1 = base_path; 214 } 215 if (p1 > p) { 216 p = p1; 217 } 218 len = p - base_path; 219 220 result = g_malloc(len + strlen(filename) + 1); 221 memcpy(result, base_path, len); 222 strcpy(result + len, filename); 223 224 return result; 225 } 226 227 /* 228 * Helper function for bdrv_parse_filename() implementations to remove optional 229 * protocol prefixes (especially "file:") from a filename and for putting the 230 * stripped filename into the options QDict if there is such a prefix. 231 */ 232 void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, 233 QDict *options) 234 { 235 if (strstart(filename, prefix, &filename)) { 236 /* Stripping the explicit protocol prefix may result in a protocol 237 * prefix being (wrongly) detected (if the filename contains a colon) */ 238 if (path_has_protocol(filename)) { 239 GString *fat_filename; 240 241 /* This means there is some colon before the first slash; therefore, 242 * this cannot be an absolute path */ 243 assert(!path_is_absolute(filename)); 244 245 /* And we can thus fix the protocol detection issue by prefixing it 246 * by "./" */ 247 fat_filename = g_string_new("./"); 248 g_string_append(fat_filename, filename); 249 250 assert(!path_has_protocol(fat_filename->str)); 251 252 qdict_put(options, "filename", 253 qstring_from_gstring(fat_filename)); 254 } else { 255 /* If no protocol prefix was detected, we can use the shortened 256 * filename as-is */ 257 qdict_put_str(options, "filename", filename); 258 } 259 } 260 } 261 262 263 /* Returns whether the image file is opened as read-only. Note that this can 264 * return false and writing to the image file is still not possible because the 265 * image is inactivated. */ 266 bool bdrv_is_read_only(BlockDriverState *bs) 267 { 268 return bs->read_only; 269 } 270 271 int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, 272 bool ignore_allow_rdw, Error **errp) 273 { 274 /* Do not set read_only if copy_on_read is enabled */ 275 if (bs->copy_on_read && read_only) { 276 error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled", 277 bdrv_get_device_or_node_name(bs)); 278 return -EINVAL; 279 } 280 281 /* Do not clear read_only if it is prohibited */ 282 if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) && 283 !ignore_allow_rdw) 284 { 285 error_setg(errp, "Node '%s' is read only", 286 bdrv_get_device_or_node_name(bs)); 287 return -EPERM; 288 } 289 290 return 0; 291 } 292 293 /* 294 * Called by a driver that can only provide a read-only image. 295 * 296 * Returns 0 if the node is already read-only or it could switch the node to 297 * read-only because BDRV_O_AUTO_RDONLY is set. 298 * 299 * Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set 300 * or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg 301 * is not NULL, it is used as the error message for the Error object. 302 */ 303 int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, 304 Error **errp) 305 { 306 int ret = 0; 307 308 if (!(bs->open_flags & BDRV_O_RDWR)) { 309 return 0; 310 } 311 if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) { 312 goto fail; 313 } 314 315 ret = bdrv_can_set_read_only(bs, true, false, NULL); 316 if (ret < 0) { 317 goto fail; 318 } 319 320 bs->read_only = true; 321 bs->open_flags &= ~BDRV_O_RDWR; 322 323 return 0; 324 325 fail: 326 error_setg(errp, "%s", errmsg ?: "Image is read-only"); 327 return -EACCES; 328 } 329 330 /* 331 * If @backing is empty, this function returns NULL without setting 332 * @errp. In all other cases, NULL will only be returned with @errp 333 * set. 334 * 335 * Therefore, a return value of NULL without @errp set means that 336 * there is no backing file; if @errp is set, there is one but its 337 * absolute filename cannot be generated. 338 */ 339 char *bdrv_get_full_backing_filename_from_filename(const char *backed, 340 const char *backing, 341 Error **errp) 342 { 343 if (backing[0] == '\0') { 344 return NULL; 345 } else if (path_has_protocol(backing) || path_is_absolute(backing)) { 346 return g_strdup(backing); 347 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 348 error_setg(errp, "Cannot use relative backing file names for '%s'", 349 backed); 350 return NULL; 351 } else { 352 return path_combine(backed, backing); 353 } 354 } 355 356 /* 357 * If @filename is empty or NULL, this function returns NULL without 358 * setting @errp. In all other cases, NULL will only be returned with 359 * @errp set. 360 */ 361 static char *bdrv_make_absolute_filename(BlockDriverState *relative_to, 362 const char *filename, Error **errp) 363 { 364 char *dir, *full_name; 365 366 if (!filename || filename[0] == '\0') { 367 return NULL; 368 } else if (path_has_protocol(filename) || path_is_absolute(filename)) { 369 return g_strdup(filename); 370 } 371 372 dir = bdrv_dirname(relative_to, errp); 373 if (!dir) { 374 return NULL; 375 } 376 377 full_name = g_strconcat(dir, filename, NULL); 378 g_free(dir); 379 return full_name; 380 } 381 382 char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp) 383 { 384 return bdrv_make_absolute_filename(bs, bs->backing_file, errp); 385 } 386 387 void bdrv_register(BlockDriver *bdrv) 388 { 389 assert(bdrv->format_name); 390 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 391 } 392 393 BlockDriverState *bdrv_new(void) 394 { 395 BlockDriverState *bs; 396 int i; 397 398 bs = g_new0(BlockDriverState, 1); 399 QLIST_INIT(&bs->dirty_bitmaps); 400 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 401 QLIST_INIT(&bs->op_blockers[i]); 402 } 403 notifier_with_return_list_init(&bs->before_write_notifiers); 404 qemu_co_mutex_init(&bs->reqs_lock); 405 qemu_mutex_init(&bs->dirty_bitmap_mutex); 406 bs->refcnt = 1; 407 bs->aio_context = qemu_get_aio_context(); 408 409 qemu_co_queue_init(&bs->flush_queue); 410 411 for (i = 0; i < bdrv_drain_all_count; i++) { 412 bdrv_drained_begin(bs); 413 } 414 415 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list); 416 417 return bs; 418 } 419 420 static BlockDriver *bdrv_do_find_format(const char *format_name) 421 { 422 BlockDriver *drv1; 423 424 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 425 if (!strcmp(drv1->format_name, format_name)) { 426 return drv1; 427 } 428 } 429 430 return NULL; 431 } 432 433 BlockDriver *bdrv_find_format(const char *format_name) 434 { 435 BlockDriver *drv1; 436 int i; 437 438 drv1 = bdrv_do_find_format(format_name); 439 if (drv1) { 440 return drv1; 441 } 442 443 /* The driver isn't registered, maybe we need to load a module */ 444 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { 445 if (!strcmp(block_driver_modules[i].format_name, format_name)) { 446 block_module_load_one(block_driver_modules[i].library_name); 447 break; 448 } 449 } 450 451 return bdrv_do_find_format(format_name); 452 } 453 454 static int bdrv_format_is_whitelisted(const char *format_name, bool read_only) 455 { 456 static const char *whitelist_rw[] = { 457 CONFIG_BDRV_RW_WHITELIST 458 NULL 459 }; 460 static const char *whitelist_ro[] = { 461 CONFIG_BDRV_RO_WHITELIST 462 NULL 463 }; 464 const char **p; 465 466 if (!whitelist_rw[0] && !whitelist_ro[0]) { 467 return 1; /* no whitelist, anything goes */ 468 } 469 470 for (p = whitelist_rw; *p; p++) { 471 if (!strcmp(format_name, *p)) { 472 return 1; 473 } 474 } 475 if (read_only) { 476 for (p = whitelist_ro; *p; p++) { 477 if (!strcmp(format_name, *p)) { 478 return 1; 479 } 480 } 481 } 482 return 0; 483 } 484 485 int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 486 { 487 return bdrv_format_is_whitelisted(drv->format_name, read_only); 488 } 489 490 bool bdrv_uses_whitelist(void) 491 { 492 return use_bdrv_whitelist; 493 } 494 495 typedef struct CreateCo { 496 BlockDriver *drv; 497 char *filename; 498 QemuOpts *opts; 499 int ret; 500 Error *err; 501 } CreateCo; 502 503 static void coroutine_fn bdrv_create_co_entry(void *opaque) 504 { 505 Error *local_err = NULL; 506 int ret; 507 508 CreateCo *cco = opaque; 509 assert(cco->drv); 510 511 ret = cco->drv->bdrv_co_create_opts(cco->drv, 512 cco->filename, cco->opts, &local_err); 513 error_propagate(&cco->err, local_err); 514 cco->ret = ret; 515 } 516 517 int bdrv_create(BlockDriver *drv, const char* filename, 518 QemuOpts *opts, Error **errp) 519 { 520 int ret; 521 522 Coroutine *co; 523 CreateCo cco = { 524 .drv = drv, 525 .filename = g_strdup(filename), 526 .opts = opts, 527 .ret = NOT_DONE, 528 .err = NULL, 529 }; 530 531 if (!drv->bdrv_co_create_opts) { 532 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 533 ret = -ENOTSUP; 534 goto out; 535 } 536 537 if (qemu_in_coroutine()) { 538 /* Fast-path if already in coroutine context */ 539 bdrv_create_co_entry(&cco); 540 } else { 541 co = qemu_coroutine_create(bdrv_create_co_entry, &cco); 542 qemu_coroutine_enter(co); 543 while (cco.ret == NOT_DONE) { 544 aio_poll(qemu_get_aio_context(), true); 545 } 546 } 547 548 ret = cco.ret; 549 if (ret < 0) { 550 if (cco.err) { 551 error_propagate(errp, cco.err); 552 } else { 553 error_setg_errno(errp, -ret, "Could not create image"); 554 } 555 } 556 557 out: 558 g_free(cco.filename); 559 return ret; 560 } 561 562 /** 563 * Helper function for bdrv_create_file_fallback(): Resize @blk to at 564 * least the given @minimum_size. 565 * 566 * On success, return @blk's actual length. 567 * Otherwise, return -errno. 568 */ 569 static int64_t create_file_fallback_truncate(BlockBackend *blk, 570 int64_t minimum_size, Error **errp) 571 { 572 Error *local_err = NULL; 573 int64_t size; 574 int ret; 575 576 ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, 577 &local_err); 578 if (ret < 0 && ret != -ENOTSUP) { 579 error_propagate(errp, local_err); 580 return ret; 581 } 582 583 size = blk_getlength(blk); 584 if (size < 0) { 585 error_free(local_err); 586 error_setg_errno(errp, -size, 587 "Failed to inquire the new image file's length"); 588 return size; 589 } 590 591 if (size < minimum_size) { 592 /* Need to grow the image, but we failed to do that */ 593 error_propagate(errp, local_err); 594 return -ENOTSUP; 595 } 596 597 error_free(local_err); 598 local_err = NULL; 599 600 return size; 601 } 602 603 /** 604 * Helper function for bdrv_create_file_fallback(): Zero the first 605 * sector to remove any potentially pre-existing image header. 606 */ 607 static int create_file_fallback_zero_first_sector(BlockBackend *blk, 608 int64_t current_size, 609 Error **errp) 610 { 611 int64_t bytes_to_clear; 612 int ret; 613 614 bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); 615 if (bytes_to_clear) { 616 ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); 617 if (ret < 0) { 618 error_setg_errno(errp, -ret, 619 "Failed to clear the new image's first sector"); 620 return ret; 621 } 622 } 623 624 return 0; 625 } 626 627 /** 628 * Simple implementation of bdrv_co_create_opts for protocol drivers 629 * which only support creation via opening a file 630 * (usually existing raw storage device) 631 */ 632 int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, 633 const char *filename, 634 QemuOpts *opts, 635 Error **errp) 636 { 637 BlockBackend *blk; 638 QDict *options; 639 int64_t size = 0; 640 char *buf = NULL; 641 PreallocMode prealloc; 642 Error *local_err = NULL; 643 int ret; 644 645 size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); 646 buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); 647 prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, 648 PREALLOC_MODE_OFF, &local_err); 649 g_free(buf); 650 if (local_err) { 651 error_propagate(errp, local_err); 652 return -EINVAL; 653 } 654 655 if (prealloc != PREALLOC_MODE_OFF) { 656 error_setg(errp, "Unsupported preallocation mode '%s'", 657 PreallocMode_str(prealloc)); 658 return -ENOTSUP; 659 } 660 661 options = qdict_new(); 662 qdict_put_str(options, "driver", drv->format_name); 663 664 blk = blk_new_open(filename, NULL, options, 665 BDRV_O_RDWR | BDRV_O_RESIZE, errp); 666 if (!blk) { 667 error_prepend(errp, "Protocol driver '%s' does not support image " 668 "creation, and opening the image failed: ", 669 drv->format_name); 670 return -EINVAL; 671 } 672 673 size = create_file_fallback_truncate(blk, size, errp); 674 if (size < 0) { 675 ret = size; 676 goto out; 677 } 678 679 ret = create_file_fallback_zero_first_sector(blk, size, errp); 680 if (ret < 0) { 681 goto out; 682 } 683 684 ret = 0; 685 out: 686 blk_unref(blk); 687 return ret; 688 } 689 690 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 691 { 692 QemuOpts *protocol_opts; 693 BlockDriver *drv; 694 QDict *qdict; 695 int ret; 696 697 drv = bdrv_find_protocol(filename, true, errp); 698 if (drv == NULL) { 699 return -ENOENT; 700 } 701 702 if (!drv->create_opts) { 703 error_setg(errp, "Driver '%s' does not support image creation", 704 drv->format_name); 705 return -ENOTSUP; 706 } 707 708 /* 709 * 'opts' contains a QemuOptsList with a combination of format and protocol 710 * default values. 711 * 712 * The format properly removes its options, but the default values remain 713 * in 'opts->list'. So if the protocol has options with the same name 714 * (e.g. rbd has 'cluster_size' as qcow2), it will see the default values 715 * of the format, since for overlapping options, the format wins. 716 * 717 * To avoid this issue, lets convert QemuOpts to QDict, in this way we take 718 * only the set options, and then convert it back to QemuOpts, using the 719 * create_opts of the protocol. So the new QemuOpts, will contain only the 720 * protocol defaults. 721 */ 722 qdict = qemu_opts_to_qdict(opts, NULL); 723 protocol_opts = qemu_opts_from_qdict(drv->create_opts, qdict, errp); 724 if (protocol_opts == NULL) { 725 ret = -EINVAL; 726 goto out; 727 } 728 729 ret = bdrv_create(drv, filename, protocol_opts, errp); 730 out: 731 qemu_opts_del(protocol_opts); 732 qobject_unref(qdict); 733 return ret; 734 } 735 736 int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp) 737 { 738 Error *local_err = NULL; 739 int ret; 740 741 assert(bs != NULL); 742 743 if (!bs->drv) { 744 error_setg(errp, "Block node '%s' is not opened", bs->filename); 745 return -ENOMEDIUM; 746 } 747 748 if (!bs->drv->bdrv_co_delete_file) { 749 error_setg(errp, "Driver '%s' does not support image deletion", 750 bs->drv->format_name); 751 return -ENOTSUP; 752 } 753 754 ret = bs->drv->bdrv_co_delete_file(bs, &local_err); 755 if (ret < 0) { 756 error_propagate(errp, local_err); 757 } 758 759 return ret; 760 } 761 762 void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs) 763 { 764 Error *local_err = NULL; 765 int ret; 766 767 if (!bs) { 768 return; 769 } 770 771 ret = bdrv_co_delete_file(bs, &local_err); 772 /* 773 * ENOTSUP will happen if the block driver doesn't support 774 * the 'bdrv_co_delete_file' interface. This is a predictable 775 * scenario and shouldn't be reported back to the user. 776 */ 777 if (ret == -ENOTSUP) { 778 error_free(local_err); 779 } else if (ret < 0) { 780 error_report_err(local_err); 781 } 782 } 783 784 /** 785 * Try to get @bs's logical and physical block size. 786 * On success, store them in @bsz struct and return 0. 787 * On failure return -errno. 788 * @bs must not be empty. 789 */ 790 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 791 { 792 BlockDriver *drv = bs->drv; 793 BlockDriverState *filtered = bdrv_filter_bs(bs); 794 795 if (drv && drv->bdrv_probe_blocksizes) { 796 return drv->bdrv_probe_blocksizes(bs, bsz); 797 } else if (filtered) { 798 return bdrv_probe_blocksizes(filtered, bsz); 799 } 800 801 return -ENOTSUP; 802 } 803 804 /** 805 * Try to get @bs's geometry (cyls, heads, sectors). 806 * On success, store them in @geo struct and return 0. 807 * On failure return -errno. 808 * @bs must not be empty. 809 */ 810 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 811 { 812 BlockDriver *drv = bs->drv; 813 BlockDriverState *filtered = bdrv_filter_bs(bs); 814 815 if (drv && drv->bdrv_probe_geometry) { 816 return drv->bdrv_probe_geometry(bs, geo); 817 } else if (filtered) { 818 return bdrv_probe_geometry(filtered, geo); 819 } 820 821 return -ENOTSUP; 822 } 823 824 /* 825 * Create a uniquely-named empty temporary file. 826 * Return 0 upon success, otherwise a negative errno value. 827 */ 828 int get_tmp_filename(char *filename, int size) 829 { 830 #ifdef _WIN32 831 char temp_dir[MAX_PATH]; 832 /* GetTempFileName requires that its output buffer (4th param) 833 have length MAX_PATH or greater. */ 834 assert(size >= MAX_PATH); 835 return (GetTempPath(MAX_PATH, temp_dir) 836 && GetTempFileName(temp_dir, "qem", 0, filename) 837 ? 0 : -GetLastError()); 838 #else 839 int fd; 840 const char *tmpdir; 841 tmpdir = getenv("TMPDIR"); 842 if (!tmpdir) { 843 tmpdir = "/var/tmp"; 844 } 845 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 846 return -EOVERFLOW; 847 } 848 fd = mkstemp(filename); 849 if (fd < 0) { 850 return -errno; 851 } 852 if (close(fd) != 0) { 853 unlink(filename); 854 return -errno; 855 } 856 return 0; 857 #endif 858 } 859 860 /* 861 * Detect host devices. By convention, /dev/cdrom[N] is always 862 * recognized as a host CDROM. 863 */ 864 static BlockDriver *find_hdev_driver(const char *filename) 865 { 866 int score_max = 0, score; 867 BlockDriver *drv = NULL, *d; 868 869 QLIST_FOREACH(d, &bdrv_drivers, list) { 870 if (d->bdrv_probe_device) { 871 score = d->bdrv_probe_device(filename); 872 if (score > score_max) { 873 score_max = score; 874 drv = d; 875 } 876 } 877 } 878 879 return drv; 880 } 881 882 static BlockDriver *bdrv_do_find_protocol(const char *protocol) 883 { 884 BlockDriver *drv1; 885 886 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 887 if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) { 888 return drv1; 889 } 890 } 891 892 return NULL; 893 } 894 895 BlockDriver *bdrv_find_protocol(const char *filename, 896 bool allow_protocol_prefix, 897 Error **errp) 898 { 899 BlockDriver *drv1; 900 char protocol[128]; 901 int len; 902 const char *p; 903 int i; 904 905 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 906 907 /* 908 * XXX(hch): we really should not let host device detection 909 * override an explicit protocol specification, but moving this 910 * later breaks access to device names with colons in them. 911 * Thanks to the brain-dead persistent naming schemes on udev- 912 * based Linux systems those actually are quite common. 913 */ 914 drv1 = find_hdev_driver(filename); 915 if (drv1) { 916 return drv1; 917 } 918 919 if (!path_has_protocol(filename) || !allow_protocol_prefix) { 920 return &bdrv_file; 921 } 922 923 p = strchr(filename, ':'); 924 assert(p != NULL); 925 len = p - filename; 926 if (len > sizeof(protocol) - 1) 927 len = sizeof(protocol) - 1; 928 memcpy(protocol, filename, len); 929 protocol[len] = '\0'; 930 931 drv1 = bdrv_do_find_protocol(protocol); 932 if (drv1) { 933 return drv1; 934 } 935 936 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { 937 if (block_driver_modules[i].protocol_name && 938 !strcmp(block_driver_modules[i].protocol_name, protocol)) { 939 block_module_load_one(block_driver_modules[i].library_name); 940 break; 941 } 942 } 943 944 drv1 = bdrv_do_find_protocol(protocol); 945 if (!drv1) { 946 error_setg(errp, "Unknown protocol '%s'", protocol); 947 } 948 return drv1; 949 } 950 951 /* 952 * Guess image format by probing its contents. 953 * This is not a good idea when your image is raw (CVE-2008-2004), but 954 * we do it anyway for backward compatibility. 955 * 956 * @buf contains the image's first @buf_size bytes. 957 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 958 * but can be smaller if the image file is smaller) 959 * @filename is its filename. 960 * 961 * For all block drivers, call the bdrv_probe() method to get its 962 * probing score. 963 * Return the first block driver with the highest probing score. 964 */ 965 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 966 const char *filename) 967 { 968 int score_max = 0, score; 969 BlockDriver *drv = NULL, *d; 970 971 QLIST_FOREACH(d, &bdrv_drivers, list) { 972 if (d->bdrv_probe) { 973 score = d->bdrv_probe(buf, buf_size, filename); 974 if (score > score_max) { 975 score_max = score; 976 drv = d; 977 } 978 } 979 } 980 981 return drv; 982 } 983 984 static int find_image_format(BlockBackend *file, const char *filename, 985 BlockDriver **pdrv, Error **errp) 986 { 987 BlockDriver *drv; 988 uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 989 int ret = 0; 990 991 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 992 if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) { 993 *pdrv = &bdrv_raw; 994 return ret; 995 } 996 997 ret = blk_pread(file, 0, buf, sizeof(buf)); 998 if (ret < 0) { 999 error_setg_errno(errp, -ret, "Could not read image for determining its " 1000 "format"); 1001 *pdrv = NULL; 1002 return ret; 1003 } 1004 1005 drv = bdrv_probe_all(buf, ret, filename); 1006 if (!drv) { 1007 error_setg(errp, "Could not determine image format: No compatible " 1008 "driver found"); 1009 ret = -ENOENT; 1010 } 1011 *pdrv = drv; 1012 return ret; 1013 } 1014 1015 /** 1016 * Set the current 'total_sectors' value 1017 * Return 0 on success, -errno on error. 1018 */ 1019 int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 1020 { 1021 BlockDriver *drv = bs->drv; 1022 1023 if (!drv) { 1024 return -ENOMEDIUM; 1025 } 1026 1027 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 1028 if (bdrv_is_sg(bs)) 1029 return 0; 1030 1031 /* query actual device if possible, otherwise just trust the hint */ 1032 if (drv->bdrv_getlength) { 1033 int64_t length = drv->bdrv_getlength(bs); 1034 if (length < 0) { 1035 return length; 1036 } 1037 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 1038 } 1039 1040 bs->total_sectors = hint; 1041 1042 if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) { 1043 return -EFBIG; 1044 } 1045 1046 return 0; 1047 } 1048 1049 /** 1050 * Combines a QDict of new block driver @options with any missing options taken 1051 * from @old_options, so that leaving out an option defaults to its old value. 1052 */ 1053 static void bdrv_join_options(BlockDriverState *bs, QDict *options, 1054 QDict *old_options) 1055 { 1056 if (bs->drv && bs->drv->bdrv_join_options) { 1057 bs->drv->bdrv_join_options(options, old_options); 1058 } else { 1059 qdict_join(options, old_options, false); 1060 } 1061 } 1062 1063 static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts, 1064 int open_flags, 1065 Error **errp) 1066 { 1067 Error *local_err = NULL; 1068 char *value = qemu_opt_get_del(opts, "detect-zeroes"); 1069 BlockdevDetectZeroesOptions detect_zeroes = 1070 qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value, 1071 BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err); 1072 g_free(value); 1073 if (local_err) { 1074 error_propagate(errp, local_err); 1075 return detect_zeroes; 1076 } 1077 1078 if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && 1079 !(open_flags & BDRV_O_UNMAP)) 1080 { 1081 error_setg(errp, "setting detect-zeroes to unmap is not allowed " 1082 "without setting discard operation to unmap"); 1083 } 1084 1085 return detect_zeroes; 1086 } 1087 1088 /** 1089 * Set open flags for aio engine 1090 * 1091 * Return 0 on success, -1 if the engine specified is invalid 1092 */ 1093 int bdrv_parse_aio(const char *mode, int *flags) 1094 { 1095 if (!strcmp(mode, "threads")) { 1096 /* do nothing, default */ 1097 } else if (!strcmp(mode, "native")) { 1098 *flags |= BDRV_O_NATIVE_AIO; 1099 #ifdef CONFIG_LINUX_IO_URING 1100 } else if (!strcmp(mode, "io_uring")) { 1101 *flags |= BDRV_O_IO_URING; 1102 #endif 1103 } else { 1104 return -1; 1105 } 1106 1107 return 0; 1108 } 1109 1110 /** 1111 * Set open flags for a given discard mode 1112 * 1113 * Return 0 on success, -1 if the discard mode was invalid. 1114 */ 1115 int bdrv_parse_discard_flags(const char *mode, int *flags) 1116 { 1117 *flags &= ~BDRV_O_UNMAP; 1118 1119 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 1120 /* do nothing */ 1121 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 1122 *flags |= BDRV_O_UNMAP; 1123 } else { 1124 return -1; 1125 } 1126 1127 return 0; 1128 } 1129 1130 /** 1131 * Set open flags for a given cache mode 1132 * 1133 * Return 0 on success, -1 if the cache mode was invalid. 1134 */ 1135 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) 1136 { 1137 *flags &= ~BDRV_O_CACHE_MASK; 1138 1139 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 1140 *writethrough = false; 1141 *flags |= BDRV_O_NOCACHE; 1142 } else if (!strcmp(mode, "directsync")) { 1143 *writethrough = true; 1144 *flags |= BDRV_O_NOCACHE; 1145 } else if (!strcmp(mode, "writeback")) { 1146 *writethrough = false; 1147 } else if (!strcmp(mode, "unsafe")) { 1148 *writethrough = false; 1149 *flags |= BDRV_O_NO_FLUSH; 1150 } else if (!strcmp(mode, "writethrough")) { 1151 *writethrough = true; 1152 } else { 1153 return -1; 1154 } 1155 1156 return 0; 1157 } 1158 1159 static char *bdrv_child_get_parent_desc(BdrvChild *c) 1160 { 1161 BlockDriverState *parent = c->opaque; 1162 return g_strdup(bdrv_get_device_or_node_name(parent)); 1163 } 1164 1165 static void bdrv_child_cb_drained_begin(BdrvChild *child) 1166 { 1167 BlockDriverState *bs = child->opaque; 1168 bdrv_do_drained_begin_quiesce(bs, NULL, false); 1169 } 1170 1171 static bool bdrv_child_cb_drained_poll(BdrvChild *child) 1172 { 1173 BlockDriverState *bs = child->opaque; 1174 return bdrv_drain_poll(bs, false, NULL, false); 1175 } 1176 1177 static void bdrv_child_cb_drained_end(BdrvChild *child, 1178 int *drained_end_counter) 1179 { 1180 BlockDriverState *bs = child->opaque; 1181 bdrv_drained_end_no_poll(bs, drained_end_counter); 1182 } 1183 1184 static int bdrv_child_cb_inactivate(BdrvChild *child) 1185 { 1186 BlockDriverState *bs = child->opaque; 1187 assert(bs->open_flags & BDRV_O_INACTIVE); 1188 return 0; 1189 } 1190 1191 static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, 1192 GSList **ignore, Error **errp) 1193 { 1194 BlockDriverState *bs = child->opaque; 1195 return bdrv_can_set_aio_context(bs, ctx, ignore, errp); 1196 } 1197 1198 static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx, 1199 GSList **ignore) 1200 { 1201 BlockDriverState *bs = child->opaque; 1202 return bdrv_set_aio_context_ignore(bs, ctx, ignore); 1203 } 1204 1205 /* 1206 * Returns the options and flags that a temporary snapshot should get, based on 1207 * the originally requested flags (the originally requested image will have 1208 * flags like a backing file) 1209 */ 1210 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, 1211 int parent_flags, QDict *parent_options) 1212 { 1213 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 1214 1215 /* For temporary files, unconditional cache=unsafe is fine */ 1216 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); 1217 qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); 1218 1219 /* Copy the read-only and discard options from the parent */ 1220 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); 1221 qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD); 1222 1223 /* aio=native doesn't work for cache.direct=off, so disable it for the 1224 * temporary snapshot */ 1225 *child_flags &= ~BDRV_O_NATIVE_AIO; 1226 } 1227 1228 static void bdrv_backing_attach(BdrvChild *c) 1229 { 1230 BlockDriverState *parent = c->opaque; 1231 BlockDriverState *backing_hd = c->bs; 1232 1233 assert(!parent->backing_blocker); 1234 error_setg(&parent->backing_blocker, 1235 "node is used as backing hd of '%s'", 1236 bdrv_get_device_or_node_name(parent)); 1237 1238 bdrv_refresh_filename(backing_hd); 1239 1240 parent->open_flags &= ~BDRV_O_NO_BACKING; 1241 1242 bdrv_op_block_all(backing_hd, parent->backing_blocker); 1243 /* Otherwise we won't be able to commit or stream */ 1244 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1245 parent->backing_blocker); 1246 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, 1247 parent->backing_blocker); 1248 /* 1249 * We do backup in 3 ways: 1250 * 1. drive backup 1251 * The target bs is new opened, and the source is top BDS 1252 * 2. blockdev backup 1253 * Both the source and the target are top BDSes. 1254 * 3. internal backup(used for block replication) 1255 * Both the source and the target are backing file 1256 * 1257 * In case 1 and 2, neither the source nor the target is the backing file. 1258 * In case 3, we will block the top BDS, so there is only one block job 1259 * for the top BDS and its backing chain. 1260 */ 1261 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, 1262 parent->backing_blocker); 1263 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, 1264 parent->backing_blocker); 1265 } 1266 1267 static void bdrv_backing_detach(BdrvChild *c) 1268 { 1269 BlockDriverState *parent = c->opaque; 1270 1271 assert(parent->backing_blocker); 1272 bdrv_op_unblock_all(c->bs, parent->backing_blocker); 1273 error_free(parent->backing_blocker); 1274 parent->backing_blocker = NULL; 1275 } 1276 1277 static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, 1278 const char *filename, Error **errp) 1279 { 1280 BlockDriverState *parent = c->opaque; 1281 bool read_only = bdrv_is_read_only(parent); 1282 int ret; 1283 1284 if (read_only) { 1285 ret = bdrv_reopen_set_read_only(parent, false, errp); 1286 if (ret < 0) { 1287 return ret; 1288 } 1289 } 1290 1291 ret = bdrv_change_backing_file(parent, filename, 1292 base->drv ? base->drv->format_name : "", 1293 false); 1294 if (ret < 0) { 1295 error_setg_errno(errp, -ret, "Could not update backing file link"); 1296 } 1297 1298 if (read_only) { 1299 bdrv_reopen_set_read_only(parent, true, NULL); 1300 } 1301 1302 return ret; 1303 } 1304 1305 /* 1306 * Returns the options and flags that a generic child of a BDS should 1307 * get, based on the given options and flags for the parent BDS. 1308 */ 1309 static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, 1310 int *child_flags, QDict *child_options, 1311 int parent_flags, QDict *parent_options) 1312 { 1313 int flags = parent_flags; 1314 1315 /* 1316 * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL. 1317 * Generally, the question to answer is: Should this child be 1318 * format-probed by default? 1319 */ 1320 1321 /* 1322 * Pure and non-filtered data children of non-format nodes should 1323 * be probed by default (even when the node itself has BDRV_O_PROTOCOL 1324 * set). This only affects a very limited set of drivers (namely 1325 * quorum and blkverify when this comment was written). 1326 * Force-clear BDRV_O_PROTOCOL then. 1327 */ 1328 if (!parent_is_format && 1329 (role & BDRV_CHILD_DATA) && 1330 !(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED))) 1331 { 1332 flags &= ~BDRV_O_PROTOCOL; 1333 } 1334 1335 /* 1336 * All children of format nodes (except for COW children) and all 1337 * metadata children in general should never be format-probed. 1338 * Force-set BDRV_O_PROTOCOL then. 1339 */ 1340 if ((parent_is_format && !(role & BDRV_CHILD_COW)) || 1341 (role & BDRV_CHILD_METADATA)) 1342 { 1343 flags |= BDRV_O_PROTOCOL; 1344 } 1345 1346 /* 1347 * If the cache mode isn't explicitly set, inherit direct and no-flush from 1348 * the parent. 1349 */ 1350 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); 1351 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); 1352 qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE); 1353 1354 if (role & BDRV_CHILD_COW) { 1355 /* backing files are opened read-only by default */ 1356 qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on"); 1357 qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off"); 1358 } else { 1359 /* Inherit the read-only option from the parent if it's not set */ 1360 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); 1361 qdict_copy_default(child_options, parent_options, 1362 BDRV_OPT_AUTO_READ_ONLY); 1363 } 1364 1365 /* 1366 * bdrv_co_pdiscard() respects unmap policy for the parent, so we 1367 * can default to enable it on lower layers regardless of the 1368 * parent option. 1369 */ 1370 qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap"); 1371 1372 /* Clear flags that only apply to the top layer */ 1373 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); 1374 1375 if (role & BDRV_CHILD_METADATA) { 1376 flags &= ~BDRV_O_NO_IO; 1377 } 1378 if (role & BDRV_CHILD_COW) { 1379 flags &= ~BDRV_O_TEMPORARY; 1380 } 1381 1382 *child_flags = flags; 1383 } 1384 1385 static void bdrv_child_cb_attach(BdrvChild *child) 1386 { 1387 BlockDriverState *bs = child->opaque; 1388 1389 if (child->role & BDRV_CHILD_COW) { 1390 bdrv_backing_attach(child); 1391 } 1392 1393 bdrv_apply_subtree_drain(child, bs); 1394 } 1395 1396 static void bdrv_child_cb_detach(BdrvChild *child) 1397 { 1398 BlockDriverState *bs = child->opaque; 1399 1400 if (child->role & BDRV_CHILD_COW) { 1401 bdrv_backing_detach(child); 1402 } 1403 1404 bdrv_unapply_subtree_drain(child, bs); 1405 } 1406 1407 static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, 1408 const char *filename, Error **errp) 1409 { 1410 if (c->role & BDRV_CHILD_COW) { 1411 return bdrv_backing_update_filename(c, base, filename, errp); 1412 } 1413 return 0; 1414 } 1415 1416 static AioContext *bdrv_child_cb_get_parent_aio_context(BdrvChild *c) 1417 { 1418 BlockDriverState *bs = c->opaque; 1419 1420 return bdrv_get_aio_context(bs); 1421 } 1422 1423 const BdrvChildClass child_of_bds = { 1424 .parent_is_bds = true, 1425 .get_parent_desc = bdrv_child_get_parent_desc, 1426 .inherit_options = bdrv_inherited_options, 1427 .drained_begin = bdrv_child_cb_drained_begin, 1428 .drained_poll = bdrv_child_cb_drained_poll, 1429 .drained_end = bdrv_child_cb_drained_end, 1430 .attach = bdrv_child_cb_attach, 1431 .detach = bdrv_child_cb_detach, 1432 .inactivate = bdrv_child_cb_inactivate, 1433 .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, 1434 .set_aio_ctx = bdrv_child_cb_set_aio_ctx, 1435 .update_filename = bdrv_child_cb_update_filename, 1436 .get_parent_aio_context = bdrv_child_cb_get_parent_aio_context, 1437 }; 1438 1439 AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) 1440 { 1441 return c->klass->get_parent_aio_context(c); 1442 } 1443 1444 static int bdrv_open_flags(BlockDriverState *bs, int flags) 1445 { 1446 int open_flags = flags; 1447 1448 /* 1449 * Clear flags that are internal to the block layer before opening the 1450 * image. 1451 */ 1452 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 1453 1454 return open_flags; 1455 } 1456 1457 static void update_flags_from_options(int *flags, QemuOpts *opts) 1458 { 1459 *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY); 1460 1461 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { 1462 *flags |= BDRV_O_NO_FLUSH; 1463 } 1464 1465 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) { 1466 *flags |= BDRV_O_NOCACHE; 1467 } 1468 1469 if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) { 1470 *flags |= BDRV_O_RDWR; 1471 } 1472 1473 if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) { 1474 *flags |= BDRV_O_AUTO_RDONLY; 1475 } 1476 } 1477 1478 static void update_options_from_flags(QDict *options, int flags) 1479 { 1480 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) { 1481 qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE); 1482 } 1483 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) { 1484 qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH, 1485 flags & BDRV_O_NO_FLUSH); 1486 } 1487 if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) { 1488 qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR)); 1489 } 1490 if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) { 1491 qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY, 1492 flags & BDRV_O_AUTO_RDONLY); 1493 } 1494 } 1495 1496 static void bdrv_assign_node_name(BlockDriverState *bs, 1497 const char *node_name, 1498 Error **errp) 1499 { 1500 char *gen_node_name = NULL; 1501 1502 if (!node_name) { 1503 node_name = gen_node_name = id_generate(ID_BLOCK); 1504 } else if (!id_wellformed(node_name)) { 1505 /* 1506 * Check for empty string or invalid characters, but not if it is 1507 * generated (generated names use characters not available to the user) 1508 */ 1509 error_setg(errp, "Invalid node-name: '%s'", node_name); 1510 return; 1511 } 1512 1513 /* takes care of avoiding namespaces collisions */ 1514 if (blk_by_name(node_name)) { 1515 error_setg(errp, "node-name=%s is conflicting with a device id", 1516 node_name); 1517 goto out; 1518 } 1519 1520 /* takes care of avoiding duplicates node names */ 1521 if (bdrv_find_node(node_name)) { 1522 error_setg(errp, "Duplicate nodes with node-name='%s'", node_name); 1523 goto out; 1524 } 1525 1526 /* Make sure that the node name isn't truncated */ 1527 if (strlen(node_name) >= sizeof(bs->node_name)) { 1528 error_setg(errp, "Node name too long"); 1529 goto out; 1530 } 1531 1532 /* copy node name into the bs and insert it into the graph list */ 1533 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 1534 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 1535 out: 1536 g_free(gen_node_name); 1537 } 1538 1539 static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, 1540 const char *node_name, QDict *options, 1541 int open_flags, Error **errp) 1542 { 1543 Error *local_err = NULL; 1544 int i, ret; 1545 1546 bdrv_assign_node_name(bs, node_name, &local_err); 1547 if (local_err) { 1548 error_propagate(errp, local_err); 1549 return -EINVAL; 1550 } 1551 1552 bs->drv = drv; 1553 bs->read_only = !(bs->open_flags & BDRV_O_RDWR); 1554 bs->opaque = g_malloc0(drv->instance_size); 1555 1556 if (drv->bdrv_file_open) { 1557 assert(!drv->bdrv_needs_filename || bs->filename[0]); 1558 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 1559 } else if (drv->bdrv_open) { 1560 ret = drv->bdrv_open(bs, options, open_flags, &local_err); 1561 } else { 1562 ret = 0; 1563 } 1564 1565 if (ret < 0) { 1566 if (local_err) { 1567 error_propagate(errp, local_err); 1568 } else if (bs->filename[0]) { 1569 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 1570 } else { 1571 error_setg_errno(errp, -ret, "Could not open image"); 1572 } 1573 goto open_failed; 1574 } 1575 1576 ret = refresh_total_sectors(bs, bs->total_sectors); 1577 if (ret < 0) { 1578 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 1579 return ret; 1580 } 1581 1582 bdrv_refresh_limits(bs, NULL, &local_err); 1583 if (local_err) { 1584 error_propagate(errp, local_err); 1585 return -EINVAL; 1586 } 1587 1588 assert(bdrv_opt_mem_align(bs) != 0); 1589 assert(bdrv_min_mem_align(bs) != 0); 1590 assert(is_power_of_2(bs->bl.request_alignment)); 1591 1592 for (i = 0; i < bs->quiesce_counter; i++) { 1593 if (drv->bdrv_co_drain_begin) { 1594 drv->bdrv_co_drain_begin(bs); 1595 } 1596 } 1597 1598 return 0; 1599 open_failed: 1600 bs->drv = NULL; 1601 if (bs->file != NULL) { 1602 bdrv_unref_child(bs, bs->file); 1603 bs->file = NULL; 1604 } 1605 g_free(bs->opaque); 1606 bs->opaque = NULL; 1607 return ret; 1608 } 1609 1610 BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, 1611 int flags, Error **errp) 1612 { 1613 BlockDriverState *bs; 1614 int ret; 1615 1616 bs = bdrv_new(); 1617 bs->open_flags = flags; 1618 bs->explicit_options = qdict_new(); 1619 bs->options = qdict_new(); 1620 bs->opaque = NULL; 1621 1622 update_options_from_flags(bs->options, flags); 1623 1624 ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp); 1625 if (ret < 0) { 1626 qobject_unref(bs->explicit_options); 1627 bs->explicit_options = NULL; 1628 qobject_unref(bs->options); 1629 bs->options = NULL; 1630 bdrv_unref(bs); 1631 return NULL; 1632 } 1633 1634 return bs; 1635 } 1636 1637 QemuOptsList bdrv_runtime_opts = { 1638 .name = "bdrv_common", 1639 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), 1640 .desc = { 1641 { 1642 .name = "node-name", 1643 .type = QEMU_OPT_STRING, 1644 .help = "Node name of the block device node", 1645 }, 1646 { 1647 .name = "driver", 1648 .type = QEMU_OPT_STRING, 1649 .help = "Block driver to use for the node", 1650 }, 1651 { 1652 .name = BDRV_OPT_CACHE_DIRECT, 1653 .type = QEMU_OPT_BOOL, 1654 .help = "Bypass software writeback cache on the host", 1655 }, 1656 { 1657 .name = BDRV_OPT_CACHE_NO_FLUSH, 1658 .type = QEMU_OPT_BOOL, 1659 .help = "Ignore flush requests", 1660 }, 1661 { 1662 .name = BDRV_OPT_READ_ONLY, 1663 .type = QEMU_OPT_BOOL, 1664 .help = "Node is opened in read-only mode", 1665 }, 1666 { 1667 .name = BDRV_OPT_AUTO_READ_ONLY, 1668 .type = QEMU_OPT_BOOL, 1669 .help = "Node can become read-only if opening read-write fails", 1670 }, 1671 { 1672 .name = "detect-zeroes", 1673 .type = QEMU_OPT_STRING, 1674 .help = "try to optimize zero writes (off, on, unmap)", 1675 }, 1676 { 1677 .name = BDRV_OPT_DISCARD, 1678 .type = QEMU_OPT_STRING, 1679 .help = "discard operation (ignore/off, unmap/on)", 1680 }, 1681 { 1682 .name = BDRV_OPT_FORCE_SHARE, 1683 .type = QEMU_OPT_BOOL, 1684 .help = "always accept other writers (default: off)", 1685 }, 1686 { /* end of list */ } 1687 }, 1688 }; 1689 1690 QemuOptsList bdrv_create_opts_simple = { 1691 .name = "simple-create-opts", 1692 .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head), 1693 .desc = { 1694 { 1695 .name = BLOCK_OPT_SIZE, 1696 .type = QEMU_OPT_SIZE, 1697 .help = "Virtual disk size" 1698 }, 1699 { 1700 .name = BLOCK_OPT_PREALLOC, 1701 .type = QEMU_OPT_STRING, 1702 .help = "Preallocation mode (allowed values: off)" 1703 }, 1704 { /* end of list */ } 1705 } 1706 }; 1707 1708 /* 1709 * Common part for opening disk images and files 1710 * 1711 * Removes all processed options from *options. 1712 */ 1713 static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, 1714 QDict *options, Error **errp) 1715 { 1716 int ret, open_flags; 1717 const char *filename; 1718 const char *driver_name = NULL; 1719 const char *node_name = NULL; 1720 const char *discard; 1721 QemuOpts *opts; 1722 BlockDriver *drv; 1723 Error *local_err = NULL; 1724 1725 assert(bs->file == NULL); 1726 assert(options != NULL && bs->options != options); 1727 1728 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 1729 if (!qemu_opts_absorb_qdict(opts, options, errp)) { 1730 ret = -EINVAL; 1731 goto fail_opts; 1732 } 1733 1734 update_flags_from_options(&bs->open_flags, opts); 1735 1736 driver_name = qemu_opt_get(opts, "driver"); 1737 drv = bdrv_find_format(driver_name); 1738 assert(drv != NULL); 1739 1740 bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false); 1741 1742 if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) { 1743 error_setg(errp, 1744 BDRV_OPT_FORCE_SHARE 1745 "=on can only be used with read-only images"); 1746 ret = -EINVAL; 1747 goto fail_opts; 1748 } 1749 1750 if (file != NULL) { 1751 bdrv_refresh_filename(blk_bs(file)); 1752 filename = blk_bs(file)->filename; 1753 } else { 1754 /* 1755 * Caution: while qdict_get_try_str() is fine, getting 1756 * non-string types would require more care. When @options 1757 * come from -blockdev or blockdev_add, its members are typed 1758 * according to the QAPI schema, but when they come from 1759 * -drive, they're all QString. 1760 */ 1761 filename = qdict_get_try_str(options, "filename"); 1762 } 1763 1764 if (drv->bdrv_needs_filename && (!filename || !filename[0])) { 1765 error_setg(errp, "The '%s' block driver requires a file name", 1766 drv->format_name); 1767 ret = -EINVAL; 1768 goto fail_opts; 1769 } 1770 1771 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags, 1772 drv->format_name); 1773 1774 bs->read_only = !(bs->open_flags & BDRV_O_RDWR); 1775 1776 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { 1777 if (!bs->read_only && bdrv_is_whitelisted(drv, true)) { 1778 ret = bdrv_apply_auto_read_only(bs, NULL, NULL); 1779 } else { 1780 ret = -ENOTSUP; 1781 } 1782 if (ret < 0) { 1783 error_setg(errp, 1784 !bs->read_only && bdrv_is_whitelisted(drv, true) 1785 ? "Driver '%s' can only be used for read-only devices" 1786 : "Driver '%s' is not whitelisted", 1787 drv->format_name); 1788 goto fail_opts; 1789 } 1790 } 1791 1792 /* bdrv_new() and bdrv_close() make it so */ 1793 assert(qatomic_read(&bs->copy_on_read) == 0); 1794 1795 if (bs->open_flags & BDRV_O_COPY_ON_READ) { 1796 if (!bs->read_only) { 1797 bdrv_enable_copy_on_read(bs); 1798 } else { 1799 error_setg(errp, "Can't use copy-on-read on read-only device"); 1800 ret = -EINVAL; 1801 goto fail_opts; 1802 } 1803 } 1804 1805 discard = qemu_opt_get(opts, BDRV_OPT_DISCARD); 1806 if (discard != NULL) { 1807 if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) { 1808 error_setg(errp, "Invalid discard option"); 1809 ret = -EINVAL; 1810 goto fail_opts; 1811 } 1812 } 1813 1814 bs->detect_zeroes = 1815 bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err); 1816 if (local_err) { 1817 error_propagate(errp, local_err); 1818 ret = -EINVAL; 1819 goto fail_opts; 1820 } 1821 1822 if (filename != NULL) { 1823 pstrcpy(bs->filename, sizeof(bs->filename), filename); 1824 } else { 1825 bs->filename[0] = '\0'; 1826 } 1827 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 1828 1829 /* Open the image, either directly or using a protocol */ 1830 open_flags = bdrv_open_flags(bs, bs->open_flags); 1831 node_name = qemu_opt_get(opts, "node-name"); 1832 1833 assert(!drv->bdrv_file_open || file == NULL); 1834 ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp); 1835 if (ret < 0) { 1836 goto fail_opts; 1837 } 1838 1839 qemu_opts_del(opts); 1840 return 0; 1841 1842 fail_opts: 1843 qemu_opts_del(opts); 1844 return ret; 1845 } 1846 1847 static QDict *parse_json_filename(const char *filename, Error **errp) 1848 { 1849 QObject *options_obj; 1850 QDict *options; 1851 int ret; 1852 1853 ret = strstart(filename, "json:", &filename); 1854 assert(ret); 1855 1856 options_obj = qobject_from_json(filename, errp); 1857 if (!options_obj) { 1858 error_prepend(errp, "Could not parse the JSON options: "); 1859 return NULL; 1860 } 1861 1862 options = qobject_to(QDict, options_obj); 1863 if (!options) { 1864 qobject_unref(options_obj); 1865 error_setg(errp, "Invalid JSON object given"); 1866 return NULL; 1867 } 1868 1869 qdict_flatten(options); 1870 1871 return options; 1872 } 1873 1874 static void parse_json_protocol(QDict *options, const char **pfilename, 1875 Error **errp) 1876 { 1877 QDict *json_options; 1878 Error *local_err = NULL; 1879 1880 /* Parse json: pseudo-protocol */ 1881 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) { 1882 return; 1883 } 1884 1885 json_options = parse_json_filename(*pfilename, &local_err); 1886 if (local_err) { 1887 error_propagate(errp, local_err); 1888 return; 1889 } 1890 1891 /* Options given in the filename have lower priority than options 1892 * specified directly */ 1893 qdict_join(options, json_options, false); 1894 qobject_unref(json_options); 1895 *pfilename = NULL; 1896 } 1897 1898 /* 1899 * Fills in default options for opening images and converts the legacy 1900 * filename/flags pair to option QDict entries. 1901 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a 1902 * block driver has been specified explicitly. 1903 */ 1904 static int bdrv_fill_options(QDict **options, const char *filename, 1905 int *flags, Error **errp) 1906 { 1907 const char *drvname; 1908 bool protocol = *flags & BDRV_O_PROTOCOL; 1909 bool parse_filename = false; 1910 BlockDriver *drv = NULL; 1911 Error *local_err = NULL; 1912 1913 /* 1914 * Caution: while qdict_get_try_str() is fine, getting non-string 1915 * types would require more care. When @options come from 1916 * -blockdev or blockdev_add, its members are typed according to 1917 * the QAPI schema, but when they come from -drive, they're all 1918 * QString. 1919 */ 1920 drvname = qdict_get_try_str(*options, "driver"); 1921 if (drvname) { 1922 drv = bdrv_find_format(drvname); 1923 if (!drv) { 1924 error_setg(errp, "Unknown driver '%s'", drvname); 1925 return -ENOENT; 1926 } 1927 /* If the user has explicitly specified the driver, this choice should 1928 * override the BDRV_O_PROTOCOL flag */ 1929 protocol = drv->bdrv_file_open; 1930 } 1931 1932 if (protocol) { 1933 *flags |= BDRV_O_PROTOCOL; 1934 } else { 1935 *flags &= ~BDRV_O_PROTOCOL; 1936 } 1937 1938 /* Translate cache options from flags into options */ 1939 update_options_from_flags(*options, *flags); 1940 1941 /* Fetch the file name from the options QDict if necessary */ 1942 if (protocol && filename) { 1943 if (!qdict_haskey(*options, "filename")) { 1944 qdict_put_str(*options, "filename", filename); 1945 parse_filename = true; 1946 } else { 1947 error_setg(errp, "Can't specify 'file' and 'filename' options at " 1948 "the same time"); 1949 return -EINVAL; 1950 } 1951 } 1952 1953 /* Find the right block driver */ 1954 /* See cautionary note on accessing @options above */ 1955 filename = qdict_get_try_str(*options, "filename"); 1956 1957 if (!drvname && protocol) { 1958 if (filename) { 1959 drv = bdrv_find_protocol(filename, parse_filename, errp); 1960 if (!drv) { 1961 return -EINVAL; 1962 } 1963 1964 drvname = drv->format_name; 1965 qdict_put_str(*options, "driver", drvname); 1966 } else { 1967 error_setg(errp, "Must specify either driver or file"); 1968 return -EINVAL; 1969 } 1970 } 1971 1972 assert(drv || !protocol); 1973 1974 /* Driver-specific filename parsing */ 1975 if (drv && drv->bdrv_parse_filename && parse_filename) { 1976 drv->bdrv_parse_filename(filename, *options, &local_err); 1977 if (local_err) { 1978 error_propagate(errp, local_err); 1979 return -EINVAL; 1980 } 1981 1982 if (!drv->bdrv_needs_filename) { 1983 qdict_del(*options, "filename"); 1984 } 1985 } 1986 1987 return 0; 1988 } 1989 1990 static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, 1991 uint64_t new_used_perm, 1992 uint64_t new_shared_perm, 1993 Error **errp); 1994 1995 typedef struct BlockReopenQueueEntry { 1996 bool prepared; 1997 bool perms_checked; 1998 BDRVReopenState state; 1999 QTAILQ_ENTRY(BlockReopenQueueEntry) entry; 2000 } BlockReopenQueueEntry; 2001 2002 /* 2003 * Return the flags that @bs will have after the reopens in @q have 2004 * successfully completed. If @q is NULL (or @bs is not contained in @q), 2005 * return the current flags. 2006 */ 2007 static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs) 2008 { 2009 BlockReopenQueueEntry *entry; 2010 2011 if (q != NULL) { 2012 QTAILQ_FOREACH(entry, q, entry) { 2013 if (entry->state.bs == bs) { 2014 return entry->state.flags; 2015 } 2016 } 2017 } 2018 2019 return bs->open_flags; 2020 } 2021 2022 /* Returns whether the image file can be written to after the reopen queue @q 2023 * has been successfully applied, or right now if @q is NULL. */ 2024 static bool bdrv_is_writable_after_reopen(BlockDriverState *bs, 2025 BlockReopenQueue *q) 2026 { 2027 int flags = bdrv_reopen_get_flags(q, bs); 2028 2029 return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR; 2030 } 2031 2032 /* 2033 * Return whether the BDS can be written to. This is not necessarily 2034 * the same as !bdrv_is_read_only(bs), as inactivated images may not 2035 * be written to but do not count as read-only images. 2036 */ 2037 bool bdrv_is_writable(BlockDriverState *bs) 2038 { 2039 return bdrv_is_writable_after_reopen(bs, NULL); 2040 } 2041 2042 static char *bdrv_child_user_desc(BdrvChild *c) 2043 { 2044 if (c->klass->get_parent_desc) { 2045 return c->klass->get_parent_desc(c); 2046 } 2047 2048 return g_strdup("another user"); 2049 } 2050 2051 static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) 2052 { 2053 g_autofree char *user = NULL; 2054 g_autofree char *perm_names = NULL; 2055 2056 if ((b->perm & a->shared_perm) == b->perm) { 2057 return true; 2058 } 2059 2060 perm_names = bdrv_perm_names(b->perm & ~a->shared_perm); 2061 user = bdrv_child_user_desc(a); 2062 error_setg(errp, "Conflicts with use by %s as '%s', which does not " 2063 "allow '%s' on %s", 2064 user, a->name, perm_names, bdrv_get_node_name(b->bs)); 2065 2066 return false; 2067 } 2068 2069 static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp) 2070 { 2071 BdrvChild *a, *b; 2072 2073 /* 2074 * During the loop we'll look at each pair twice. That's correct because 2075 * bdrv_a_allow_b() is asymmetric and we should check each pair in both 2076 * directions. 2077 */ 2078 QLIST_FOREACH(a, &bs->parents, next_parent) { 2079 QLIST_FOREACH(b, &bs->parents, next_parent) { 2080 if (a == b) { 2081 continue; 2082 } 2083 2084 if (!bdrv_a_allow_b(a, b, errp)) { 2085 return true; 2086 } 2087 } 2088 } 2089 2090 return false; 2091 } 2092 2093 static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, 2094 BdrvChild *c, BdrvChildRole role, 2095 BlockReopenQueue *reopen_queue, 2096 uint64_t parent_perm, uint64_t parent_shared, 2097 uint64_t *nperm, uint64_t *nshared) 2098 { 2099 assert(bs->drv && bs->drv->bdrv_child_perm); 2100 bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, 2101 parent_perm, parent_shared, 2102 nperm, nshared); 2103 /* TODO Take force_share from reopen_queue */ 2104 if (child_bs && child_bs->force_share) { 2105 *nshared = BLK_PERM_ALL; 2106 } 2107 } 2108 2109 /* 2110 * Adds the whole subtree of @bs (including @bs itself) to the @list (except for 2111 * nodes that are already in the @list, of course) so that final list is 2112 * topologically sorted. Return the result (GSList @list object is updated, so 2113 * don't use old reference after function call). 2114 * 2115 * On function start @list must be already topologically sorted and for any node 2116 * in the @list the whole subtree of the node must be in the @list as well. The 2117 * simplest way to satisfy this criteria: use only result of 2118 * bdrv_topological_dfs() or NULL as @list parameter. 2119 */ 2120 static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found, 2121 BlockDriverState *bs) 2122 { 2123 BdrvChild *child; 2124 g_autoptr(GHashTable) local_found = NULL; 2125 2126 if (!found) { 2127 assert(!list); 2128 found = local_found = g_hash_table_new(NULL, NULL); 2129 } 2130 2131 if (g_hash_table_contains(found, bs)) { 2132 return list; 2133 } 2134 g_hash_table_add(found, bs); 2135 2136 QLIST_FOREACH(child, &bs->children, next) { 2137 list = bdrv_topological_dfs(list, found, child->bs); 2138 } 2139 2140 return g_slist_prepend(list, bs); 2141 } 2142 2143 static void bdrv_child_set_perm_commit(void *opaque) 2144 { 2145 BdrvChild *c = opaque; 2146 2147 c->has_backup_perm = false; 2148 } 2149 2150 static void bdrv_child_set_perm_abort(void *opaque) 2151 { 2152 BdrvChild *c = opaque; 2153 /* 2154 * We may have child->has_backup_perm unset at this point, as in case of 2155 * _check_ stage of permission update failure we may _check_ not the whole 2156 * subtree. Still, _abort_ is called on the whole subtree anyway. 2157 */ 2158 if (c->has_backup_perm) { 2159 c->perm = c->backup_perm; 2160 c->shared_perm = c->backup_shared_perm; 2161 c->has_backup_perm = false; 2162 } 2163 } 2164 2165 static TransactionActionDrv bdrv_child_set_pem_drv = { 2166 .abort = bdrv_child_set_perm_abort, 2167 .commit = bdrv_child_set_perm_commit, 2168 }; 2169 2170 /* 2171 * With tran=NULL needs to be followed by direct call to either 2172 * bdrv_child_set_perm_commit() or bdrv_child_set_perm_abort(). 2173 * 2174 * With non-NULL tran needs to be followed by tran_abort() or tran_commit() 2175 * instead. 2176 */ 2177 static void bdrv_child_set_perm_safe(BdrvChild *c, uint64_t perm, 2178 uint64_t shared, Transaction *tran) 2179 { 2180 if (!c->has_backup_perm) { 2181 c->has_backup_perm = true; 2182 c->backup_perm = c->perm; 2183 c->backup_shared_perm = c->shared_perm; 2184 } 2185 /* 2186 * Note: it's OK if c->has_backup_perm was already set, as we can find the 2187 * same c twice during check_perm procedure 2188 */ 2189 2190 c->perm = perm; 2191 c->shared_perm = shared; 2192 2193 if (tran) { 2194 tran_add(tran, &bdrv_child_set_pem_drv, c); 2195 } 2196 } 2197 2198 static void bdrv_drv_set_perm_commit(void *opaque) 2199 { 2200 BlockDriverState *bs = opaque; 2201 uint64_t cumulative_perms, cumulative_shared_perms; 2202 2203 if (bs->drv->bdrv_set_perm) { 2204 bdrv_get_cumulative_perm(bs, &cumulative_perms, 2205 &cumulative_shared_perms); 2206 bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms); 2207 } 2208 } 2209 2210 static void bdrv_drv_set_perm_abort(void *opaque) 2211 { 2212 BlockDriverState *bs = opaque; 2213 2214 if (bs->drv->bdrv_abort_perm_update) { 2215 bs->drv->bdrv_abort_perm_update(bs); 2216 } 2217 } 2218 2219 TransactionActionDrv bdrv_drv_set_perm_drv = { 2220 .abort = bdrv_drv_set_perm_abort, 2221 .commit = bdrv_drv_set_perm_commit, 2222 }; 2223 2224 static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, 2225 uint64_t shared_perm, Transaction *tran, 2226 Error **errp) 2227 { 2228 if (!bs->drv) { 2229 return 0; 2230 } 2231 2232 if (bs->drv->bdrv_check_perm) { 2233 int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp); 2234 if (ret < 0) { 2235 return ret; 2236 } 2237 } 2238 2239 if (tran) { 2240 tran_add(tran, &bdrv_drv_set_perm_drv, bs); 2241 } 2242 2243 return 0; 2244 } 2245 2246 typedef struct BdrvReplaceChildState { 2247 BdrvChild *child; 2248 BlockDriverState *old_bs; 2249 } BdrvReplaceChildState; 2250 2251 static void bdrv_replace_child_commit(void *opaque) 2252 { 2253 BdrvReplaceChildState *s = opaque; 2254 2255 bdrv_unref(s->old_bs); 2256 } 2257 2258 static void bdrv_replace_child_abort(void *opaque) 2259 { 2260 BdrvReplaceChildState *s = opaque; 2261 BlockDriverState *new_bs = s->child->bs; 2262 2263 /* old_bs reference is transparently moved from @s to @s->child */ 2264 bdrv_replace_child_noperm(s->child, s->old_bs); 2265 bdrv_unref(new_bs); 2266 } 2267 2268 static TransactionActionDrv bdrv_replace_child_drv = { 2269 .commit = bdrv_replace_child_commit, 2270 .abort = bdrv_replace_child_abort, 2271 .clean = g_free, 2272 }; 2273 2274 /* 2275 * bdrv_replace_child_safe 2276 * 2277 * Note: real unref of old_bs is done only on commit. 2278 */ 2279 static void bdrv_replace_child_safe(BdrvChild *child, BlockDriverState *new_bs, 2280 Transaction *tran) 2281 { 2282 BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); 2283 *s = (BdrvReplaceChildState) { 2284 .child = child, 2285 .old_bs = child->bs, 2286 }; 2287 tran_add(tran, &bdrv_replace_child_drv, s); 2288 2289 if (new_bs) { 2290 bdrv_ref(new_bs); 2291 } 2292 bdrv_replace_child_noperm(child, new_bs); 2293 /* old_bs reference is transparently moved from @child to @s */ 2294 } 2295 2296 /* 2297 * Check whether permissions on this node can be changed in a way that 2298 * @cumulative_perms and @cumulative_shared_perms are the new cumulative 2299 * permissions of all its parents. This involves checking whether all necessary 2300 * permission changes to child nodes can be performed. 2301 * 2302 * A call to this function must always be followed by a call to bdrv_set_perm() 2303 * or bdrv_abort_perm_update(). 2304 */ 2305 static int bdrv_node_check_perm(BlockDriverState *bs, BlockReopenQueue *q, 2306 uint64_t cumulative_perms, 2307 uint64_t cumulative_shared_perms, 2308 Transaction *tran, Error **errp) 2309 { 2310 BlockDriver *drv = bs->drv; 2311 BdrvChild *c; 2312 int ret; 2313 2314 /* Write permissions never work with read-only images */ 2315 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && 2316 !bdrv_is_writable_after_reopen(bs, q)) 2317 { 2318 if (!bdrv_is_writable_after_reopen(bs, NULL)) { 2319 error_setg(errp, "Block node is read-only"); 2320 } else { 2321 uint64_t current_perms, current_shared; 2322 bdrv_get_cumulative_perm(bs, ¤t_perms, ¤t_shared); 2323 if (current_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) { 2324 error_setg(errp, "Cannot make block node read-only, there is " 2325 "a writer on it"); 2326 } else { 2327 error_setg(errp, "Cannot make block node read-only and create " 2328 "a writer on it"); 2329 } 2330 } 2331 2332 return -EPERM; 2333 } 2334 2335 /* 2336 * Unaligned requests will automatically be aligned to bl.request_alignment 2337 * and without RESIZE we can't extend requests to write to space beyond the 2338 * end of the image, so it's required that the image size is aligned. 2339 */ 2340 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && 2341 !(cumulative_perms & BLK_PERM_RESIZE)) 2342 { 2343 if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) { 2344 error_setg(errp, "Cannot get 'write' permission without 'resize': " 2345 "Image size is not a multiple of request " 2346 "alignment"); 2347 return -EPERM; 2348 } 2349 } 2350 2351 /* Check this node */ 2352 if (!drv) { 2353 return 0; 2354 } 2355 2356 ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran, 2357 errp); 2358 if (ret < 0) { 2359 return ret; 2360 } 2361 2362 /* Drivers that never have children can omit .bdrv_child_perm() */ 2363 if (!drv->bdrv_child_perm) { 2364 assert(QLIST_EMPTY(&bs->children)); 2365 return 0; 2366 } 2367 2368 /* Check all children */ 2369 QLIST_FOREACH(c, &bs->children, next) { 2370 uint64_t cur_perm, cur_shared; 2371 2372 bdrv_child_perm(bs, c->bs, c, c->role, q, 2373 cumulative_perms, cumulative_shared_perms, 2374 &cur_perm, &cur_shared); 2375 bdrv_child_set_perm_safe(c, cur_perm, cur_shared, tran); 2376 } 2377 2378 return 0; 2379 } 2380 2381 /* 2382 * If use_cumulative_perms is true, use cumulative_perms and 2383 * cumulative_shared_perms for first element of the list. Otherwise just refresh 2384 * all permissions. 2385 */ 2386 static int bdrv_check_perm_common(GSList *list, BlockReopenQueue *q, 2387 bool use_cumulative_perms, 2388 uint64_t cumulative_perms, 2389 uint64_t cumulative_shared_perms, 2390 Transaction *tran, Error **errp) 2391 { 2392 int ret; 2393 BlockDriverState *bs; 2394 2395 if (use_cumulative_perms) { 2396 bs = list->data; 2397 2398 ret = bdrv_node_check_perm(bs, q, cumulative_perms, 2399 cumulative_shared_perms, 2400 tran, errp); 2401 if (ret < 0) { 2402 return ret; 2403 } 2404 2405 list = list->next; 2406 } 2407 2408 for ( ; list; list = list->next) { 2409 bs = list->data; 2410 2411 if (bdrv_parent_perms_conflict(bs, errp)) { 2412 return -EINVAL; 2413 } 2414 2415 bdrv_get_cumulative_perm(bs, &cumulative_perms, 2416 &cumulative_shared_perms); 2417 2418 ret = bdrv_node_check_perm(bs, q, cumulative_perms, 2419 cumulative_shared_perms, 2420 tran, errp); 2421 if (ret < 0) { 2422 return ret; 2423 } 2424 } 2425 2426 return 0; 2427 } 2428 2429 static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, 2430 uint64_t cumulative_perms, 2431 uint64_t cumulative_shared_perms, Error **errp) 2432 { 2433 g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs); 2434 return bdrv_check_perm_common(list, q, true, cumulative_perms, 2435 cumulative_shared_perms, NULL, errp); 2436 } 2437 2438 static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, 2439 Transaction *tran, Error **errp) 2440 { 2441 return bdrv_check_perm_common(list, q, false, 0, 0, tran, errp); 2442 } 2443 2444 /* 2445 * Notifies drivers that after a previous bdrv_check_perm() call, the 2446 * permission update is not performed and any preparations made for it (e.g. 2447 * taken file locks) need to be undone. 2448 */ 2449 static void bdrv_node_abort_perm_update(BlockDriverState *bs) 2450 { 2451 BlockDriver *drv = bs->drv; 2452 BdrvChild *c; 2453 2454 if (!drv) { 2455 return; 2456 } 2457 2458 bdrv_drv_set_perm_abort(bs); 2459 2460 QLIST_FOREACH(c, &bs->children, next) { 2461 bdrv_child_set_perm_abort(c); 2462 } 2463 } 2464 2465 static void bdrv_list_abort_perm_update(GSList *list) 2466 { 2467 for ( ; list; list = list->next) { 2468 bdrv_node_abort_perm_update((BlockDriverState *)list->data); 2469 } 2470 } 2471 2472 __attribute__((unused)) 2473 static void bdrv_abort_perm_update(BlockDriverState *bs) 2474 { 2475 g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs); 2476 return bdrv_list_abort_perm_update(list); 2477 } 2478 2479 static void bdrv_node_set_perm(BlockDriverState *bs) 2480 { 2481 BlockDriver *drv = bs->drv; 2482 BdrvChild *c; 2483 2484 if (!drv) { 2485 return; 2486 } 2487 2488 bdrv_drv_set_perm_commit(bs); 2489 2490 /* Drivers that never have children can omit .bdrv_child_perm() */ 2491 if (!drv->bdrv_child_perm) { 2492 assert(QLIST_EMPTY(&bs->children)); 2493 return; 2494 } 2495 2496 /* Update all children */ 2497 QLIST_FOREACH(c, &bs->children, next) { 2498 bdrv_child_set_perm_commit(c); 2499 } 2500 } 2501 2502 static void bdrv_list_set_perm(GSList *list) 2503 { 2504 for ( ; list; list = list->next) { 2505 bdrv_node_set_perm((BlockDriverState *)list->data); 2506 } 2507 } 2508 2509 static void bdrv_set_perm(BlockDriverState *bs) 2510 { 2511 g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs); 2512 return bdrv_list_set_perm(list); 2513 } 2514 2515 void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, 2516 uint64_t *shared_perm) 2517 { 2518 BdrvChild *c; 2519 uint64_t cumulative_perms = 0; 2520 uint64_t cumulative_shared_perms = BLK_PERM_ALL; 2521 2522 QLIST_FOREACH(c, &bs->parents, next_parent) { 2523 cumulative_perms |= c->perm; 2524 cumulative_shared_perms &= c->shared_perm; 2525 } 2526 2527 *perm = cumulative_perms; 2528 *shared_perm = cumulative_shared_perms; 2529 } 2530 2531 char *bdrv_perm_names(uint64_t perm) 2532 { 2533 struct perm_name { 2534 uint64_t perm; 2535 const char *name; 2536 } permissions[] = { 2537 { BLK_PERM_CONSISTENT_READ, "consistent read" }, 2538 { BLK_PERM_WRITE, "write" }, 2539 { BLK_PERM_WRITE_UNCHANGED, "write unchanged" }, 2540 { BLK_PERM_RESIZE, "resize" }, 2541 { BLK_PERM_GRAPH_MOD, "change children" }, 2542 { 0, NULL } 2543 }; 2544 2545 GString *result = g_string_sized_new(30); 2546 struct perm_name *p; 2547 2548 for (p = permissions; p->name; p++) { 2549 if (perm & p->perm) { 2550 if (result->len > 0) { 2551 g_string_append(result, ", "); 2552 } 2553 g_string_append(result, p->name); 2554 } 2555 } 2556 2557 return g_string_free(result, FALSE); 2558 } 2559 2560 /* 2561 * Checks whether a new reference to @bs can be added if the new user requires 2562 * @new_used_perm/@new_shared_perm as its permissions. If @ignore_children is 2563 * set, the BdrvChild objects in this list are ignored in the calculations; 2564 * this allows checking permission updates for an existing reference. 2565 * 2566 * Needs to be followed by a call to either bdrv_set_perm() or 2567 * bdrv_abort_perm_update(). */ 2568 __attribute__((unused)) 2569 static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, 2570 uint64_t new_used_perm, 2571 uint64_t new_shared_perm, 2572 Error **errp) 2573 { 2574 BdrvChild *c; 2575 uint64_t cumulative_perms = new_used_perm; 2576 uint64_t cumulative_shared_perms = new_shared_perm; 2577 2578 2579 /* There is no reason why anyone couldn't tolerate write_unchanged */ 2580 assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED); 2581 2582 QLIST_FOREACH(c, &bs->parents, next_parent) { 2583 if ((new_used_perm & c->shared_perm) != new_used_perm) { 2584 char *user = bdrv_child_user_desc(c); 2585 char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm); 2586 2587 error_setg(errp, "Conflicts with use by %s as '%s', which does not " 2588 "allow '%s' on %s", 2589 user, c->name, perm_names, bdrv_get_node_name(c->bs)); 2590 g_free(user); 2591 g_free(perm_names); 2592 return -EPERM; 2593 } 2594 2595 if ((c->perm & new_shared_perm) != c->perm) { 2596 char *user = bdrv_child_user_desc(c); 2597 char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm); 2598 2599 error_setg(errp, "Conflicts with use by %s as '%s', which uses " 2600 "'%s' on %s", 2601 user, c->name, perm_names, bdrv_get_node_name(c->bs)); 2602 g_free(user); 2603 g_free(perm_names); 2604 return -EPERM; 2605 } 2606 2607 cumulative_perms |= c->perm; 2608 cumulative_shared_perms &= c->shared_perm; 2609 } 2610 2611 return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms, 2612 errp); 2613 } 2614 2615 static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp) 2616 { 2617 int ret; 2618 Transaction *tran = tran_new(); 2619 g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs); 2620 2621 ret = bdrv_list_refresh_perms(list, NULL, tran, errp); 2622 tran_finalize(tran, ret); 2623 2624 return ret; 2625 } 2626 2627 int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, 2628 Error **errp) 2629 { 2630 Error *local_err = NULL; 2631 Transaction *tran = tran_new(); 2632 int ret; 2633 2634 bdrv_child_set_perm_safe(c, perm, shared, tran); 2635 2636 ret = bdrv_refresh_perms(c->bs, &local_err); 2637 2638 tran_finalize(tran, ret); 2639 2640 if (ret < 0) { 2641 if ((perm & ~c->perm) || (c->shared_perm & ~shared)) { 2642 /* tighten permissions */ 2643 error_propagate(errp, local_err); 2644 } else { 2645 /* 2646 * Our caller may intend to only loosen restrictions and 2647 * does not expect this function to fail. Errors are not 2648 * fatal in such a case, so we can just hide them from our 2649 * caller. 2650 */ 2651 error_free(local_err); 2652 ret = 0; 2653 } 2654 } 2655 2656 return ret; 2657 } 2658 2659 int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) 2660 { 2661 uint64_t parent_perms, parent_shared; 2662 uint64_t perms, shared; 2663 2664 bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared); 2665 bdrv_child_perm(bs, c->bs, c, c->role, NULL, 2666 parent_perms, parent_shared, &perms, &shared); 2667 2668 return bdrv_child_try_set_perm(c, perms, shared, errp); 2669 } 2670 2671 /* 2672 * Default implementation for .bdrv_child_perm() for block filters: 2673 * Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the 2674 * filtered child. 2675 */ 2676 static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, 2677 BdrvChildRole role, 2678 BlockReopenQueue *reopen_queue, 2679 uint64_t perm, uint64_t shared, 2680 uint64_t *nperm, uint64_t *nshared) 2681 { 2682 *nperm = perm & DEFAULT_PERM_PASSTHROUGH; 2683 *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; 2684 } 2685 2686 static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c, 2687 BdrvChildRole role, 2688 BlockReopenQueue *reopen_queue, 2689 uint64_t perm, uint64_t shared, 2690 uint64_t *nperm, uint64_t *nshared) 2691 { 2692 assert(role & BDRV_CHILD_COW); 2693 2694 /* 2695 * We want consistent read from backing files if the parent needs it. 2696 * No other operations are performed on backing files. 2697 */ 2698 perm &= BLK_PERM_CONSISTENT_READ; 2699 2700 /* 2701 * If the parent can deal with changing data, we're okay with a 2702 * writable and resizable backing file. 2703 * TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? 2704 */ 2705 if (shared & BLK_PERM_WRITE) { 2706 shared = BLK_PERM_WRITE | BLK_PERM_RESIZE; 2707 } else { 2708 shared = 0; 2709 } 2710 2711 shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD | 2712 BLK_PERM_WRITE_UNCHANGED; 2713 2714 if (bs->open_flags & BDRV_O_INACTIVE) { 2715 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2716 } 2717 2718 *nperm = perm; 2719 *nshared = shared; 2720 } 2721 2722 static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c, 2723 BdrvChildRole role, 2724 BlockReopenQueue *reopen_queue, 2725 uint64_t perm, uint64_t shared, 2726 uint64_t *nperm, uint64_t *nshared) 2727 { 2728 int flags; 2729 2730 assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)); 2731 2732 flags = bdrv_reopen_get_flags(reopen_queue, bs); 2733 2734 /* 2735 * Apart from the modifications below, the same permissions are 2736 * forwarded and left alone as for filters 2737 */ 2738 bdrv_filter_default_perms(bs, c, role, reopen_queue, 2739 perm, shared, &perm, &shared); 2740 2741 if (role & BDRV_CHILD_METADATA) { 2742 /* Format drivers may touch metadata even if the guest doesn't write */ 2743 if (bdrv_is_writable_after_reopen(bs, reopen_queue)) { 2744 perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2745 } 2746 2747 /* 2748 * bs->file always needs to be consistent because of the 2749 * metadata. We can never allow other users to resize or write 2750 * to it. 2751 */ 2752 if (!(flags & BDRV_O_NO_IO)) { 2753 perm |= BLK_PERM_CONSISTENT_READ; 2754 } 2755 shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); 2756 } 2757 2758 if (role & BDRV_CHILD_DATA) { 2759 /* 2760 * Technically, everything in this block is a subset of the 2761 * BDRV_CHILD_METADATA path taken above, and so this could 2762 * be an "else if" branch. However, that is not obvious, and 2763 * this function is not performance critical, therefore we let 2764 * this be an independent "if". 2765 */ 2766 2767 /* 2768 * We cannot allow other users to resize the file because the 2769 * format driver might have some assumptions about the size 2770 * (e.g. because it is stored in metadata, or because the file 2771 * is split into fixed-size data files). 2772 */ 2773 shared &= ~BLK_PERM_RESIZE; 2774 2775 /* 2776 * WRITE_UNCHANGED often cannot be performed as such on the 2777 * data file. For example, the qcow2 driver may still need to 2778 * write copied clusters on copy-on-read. 2779 */ 2780 if (perm & BLK_PERM_WRITE_UNCHANGED) { 2781 perm |= BLK_PERM_WRITE; 2782 } 2783 2784 /* 2785 * If the data file is written to, the format driver may 2786 * expect to be able to resize it by writing beyond the EOF. 2787 */ 2788 if (perm & BLK_PERM_WRITE) { 2789 perm |= BLK_PERM_RESIZE; 2790 } 2791 } 2792 2793 if (bs->open_flags & BDRV_O_INACTIVE) { 2794 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2795 } 2796 2797 *nperm = perm; 2798 *nshared = shared; 2799 } 2800 2801 void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, 2802 BdrvChildRole role, BlockReopenQueue *reopen_queue, 2803 uint64_t perm, uint64_t shared, 2804 uint64_t *nperm, uint64_t *nshared) 2805 { 2806 if (role & BDRV_CHILD_FILTERED) { 2807 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | 2808 BDRV_CHILD_COW))); 2809 bdrv_filter_default_perms(bs, c, role, reopen_queue, 2810 perm, shared, nperm, nshared); 2811 } else if (role & BDRV_CHILD_COW) { 2812 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA))); 2813 bdrv_default_perms_for_cow(bs, c, role, reopen_queue, 2814 perm, shared, nperm, nshared); 2815 } else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) { 2816 bdrv_default_perms_for_storage(bs, c, role, reopen_queue, 2817 perm, shared, nperm, nshared); 2818 } else { 2819 g_assert_not_reached(); 2820 } 2821 } 2822 2823 uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) 2824 { 2825 static const uint64_t permissions[] = { 2826 [BLOCK_PERMISSION_CONSISTENT_READ] = BLK_PERM_CONSISTENT_READ, 2827 [BLOCK_PERMISSION_WRITE] = BLK_PERM_WRITE, 2828 [BLOCK_PERMISSION_WRITE_UNCHANGED] = BLK_PERM_WRITE_UNCHANGED, 2829 [BLOCK_PERMISSION_RESIZE] = BLK_PERM_RESIZE, 2830 [BLOCK_PERMISSION_GRAPH_MOD] = BLK_PERM_GRAPH_MOD, 2831 }; 2832 2833 QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX); 2834 QEMU_BUILD_BUG_ON(1UL << ARRAY_SIZE(permissions) != BLK_PERM_ALL + 1); 2835 2836 assert(qapi_perm < BLOCK_PERMISSION__MAX); 2837 2838 return permissions[qapi_perm]; 2839 } 2840 2841 static void bdrv_replace_child_noperm(BdrvChild *child, 2842 BlockDriverState *new_bs) 2843 { 2844 BlockDriverState *old_bs = child->bs; 2845 int new_bs_quiesce_counter; 2846 int drain_saldo; 2847 2848 assert(!child->frozen); 2849 2850 if (old_bs && new_bs) { 2851 assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); 2852 } 2853 2854 new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); 2855 drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; 2856 2857 /* 2858 * If the new child node is drained but the old one was not, flush 2859 * all outstanding requests to the old child node. 2860 */ 2861 while (drain_saldo > 0 && child->klass->drained_begin) { 2862 bdrv_parent_drained_begin_single(child, true); 2863 drain_saldo--; 2864 } 2865 2866 if (old_bs) { 2867 /* Detach first so that the recursive drain sections coming from @child 2868 * are already gone and we only end the drain sections that came from 2869 * elsewhere. */ 2870 if (child->klass->detach) { 2871 child->klass->detach(child); 2872 } 2873 QLIST_REMOVE(child, next_parent); 2874 } 2875 2876 child->bs = new_bs; 2877 2878 if (new_bs) { 2879 QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); 2880 2881 /* 2882 * Detaching the old node may have led to the new node's 2883 * quiesce_counter having been decreased. Not a problem, we 2884 * just need to recognize this here and then invoke 2885 * drained_end appropriately more often. 2886 */ 2887 assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); 2888 drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; 2889 2890 /* Attach only after starting new drained sections, so that recursive 2891 * drain sections coming from @child don't get an extra .drained_begin 2892 * callback. */ 2893 if (child->klass->attach) { 2894 child->klass->attach(child); 2895 } 2896 } 2897 2898 /* 2899 * If the old child node was drained but the new one is not, allow 2900 * requests to come in only after the new node has been attached. 2901 */ 2902 while (drain_saldo < 0 && child->klass->drained_end) { 2903 bdrv_parent_drained_end_single(child); 2904 drain_saldo++; 2905 } 2906 } 2907 2908 /* 2909 * Updates @child to change its reference to point to @new_bs, including 2910 * checking and applying the necessary permission updates both to the old node 2911 * and to @new_bs. 2912 * 2913 * NULL is passed as @new_bs for removing the reference before freeing @child. 2914 * 2915 * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this 2916 * function uses bdrv_set_perm() to update the permissions according to the new 2917 * reference that @new_bs gets. 2918 * 2919 * Callers must ensure that child->frozen is false. 2920 */ 2921 static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) 2922 { 2923 BlockDriverState *old_bs = child->bs; 2924 2925 /* Asserts that child->frozen == false */ 2926 bdrv_replace_child_noperm(child, new_bs); 2927 2928 /* 2929 * Start with the new node's permissions. If @new_bs is a (direct 2930 * or indirect) child of @old_bs, we must complete the permission 2931 * update on @new_bs before we loosen the restrictions on @old_bs. 2932 * Otherwise, bdrv_check_perm() on @old_bs would re-initiate 2933 * updating the permissions of @new_bs, and thus not purely loosen 2934 * restrictions. 2935 */ 2936 if (new_bs) { 2937 bdrv_set_perm(new_bs); 2938 } 2939 2940 if (old_bs) { 2941 /* 2942 * Update permissions for old node. We're just taking a parent away, so 2943 * we're loosening restrictions. Errors of permission update are not 2944 * fatal in this case, ignore them. 2945 */ 2946 bdrv_refresh_perms(old_bs, NULL); 2947 2948 /* When the parent requiring a non-default AioContext is removed, the 2949 * node moves back to the main AioContext */ 2950 bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL); 2951 } 2952 } 2953 2954 static void bdrv_child_free(void *opaque) 2955 { 2956 BdrvChild *c = opaque; 2957 2958 g_free(c->name); 2959 g_free(c); 2960 } 2961 2962 static void bdrv_remove_empty_child(BdrvChild *child) 2963 { 2964 assert(!child->bs); 2965 QLIST_SAFE_REMOVE(child, next); 2966 bdrv_child_free(child); 2967 } 2968 2969 typedef struct BdrvAttachChildCommonState { 2970 BdrvChild **child; 2971 AioContext *old_parent_ctx; 2972 AioContext *old_child_ctx; 2973 } BdrvAttachChildCommonState; 2974 2975 static void bdrv_attach_child_common_abort(void *opaque) 2976 { 2977 BdrvAttachChildCommonState *s = opaque; 2978 BdrvChild *child = *s->child; 2979 BlockDriverState *bs = child->bs; 2980 2981 bdrv_replace_child_noperm(child, NULL); 2982 2983 if (bdrv_get_aio_context(bs) != s->old_child_ctx) { 2984 bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort); 2985 } 2986 2987 if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) { 2988 GSList *ignore = g_slist_prepend(NULL, child); 2989 2990 child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore, 2991 &error_abort); 2992 g_slist_free(ignore); 2993 ignore = g_slist_prepend(NULL, child); 2994 child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore); 2995 2996 g_slist_free(ignore); 2997 } 2998 2999 bdrv_unref(bs); 3000 bdrv_remove_empty_child(child); 3001 *s->child = NULL; 3002 } 3003 3004 static TransactionActionDrv bdrv_attach_child_common_drv = { 3005 .abort = bdrv_attach_child_common_abort, 3006 .clean = g_free, 3007 }; 3008 3009 /* 3010 * Common part of attaching bdrv child to bs or to blk or to job 3011 */ 3012 static int bdrv_attach_child_common(BlockDriverState *child_bs, 3013 const char *child_name, 3014 const BdrvChildClass *child_class, 3015 BdrvChildRole child_role, 3016 uint64_t perm, uint64_t shared_perm, 3017 void *opaque, BdrvChild **child, 3018 Transaction *tran, Error **errp) 3019 { 3020 BdrvChild *new_child; 3021 AioContext *parent_ctx; 3022 AioContext *child_ctx = bdrv_get_aio_context(child_bs); 3023 3024 assert(child); 3025 assert(*child == NULL); 3026 3027 new_child = g_new(BdrvChild, 1); 3028 *new_child = (BdrvChild) { 3029 .bs = NULL, 3030 .name = g_strdup(child_name), 3031 .klass = child_class, 3032 .role = child_role, 3033 .perm = perm, 3034 .shared_perm = shared_perm, 3035 .opaque = opaque, 3036 }; 3037 3038 /* 3039 * If the AioContexts don't match, first try to move the subtree of 3040 * child_bs into the AioContext of the new parent. If this doesn't work, 3041 * try moving the parent into the AioContext of child_bs instead. 3042 */ 3043 parent_ctx = bdrv_child_get_parent_aio_context(new_child); 3044 if (child_ctx != parent_ctx) { 3045 Error *local_err = NULL; 3046 int ret = bdrv_try_set_aio_context(child_bs, parent_ctx, &local_err); 3047 3048 if (ret < 0 && child_class->can_set_aio_ctx) { 3049 GSList *ignore = g_slist_prepend(NULL, new_child); 3050 if (child_class->can_set_aio_ctx(new_child, child_ctx, &ignore, 3051 NULL)) 3052 { 3053 error_free(local_err); 3054 ret = 0; 3055 g_slist_free(ignore); 3056 ignore = g_slist_prepend(NULL, new_child); 3057 child_class->set_aio_ctx(new_child, child_ctx, &ignore); 3058 } 3059 g_slist_free(ignore); 3060 } 3061 3062 if (ret < 0) { 3063 error_propagate(errp, local_err); 3064 bdrv_remove_empty_child(new_child); 3065 return ret; 3066 } 3067 } 3068 3069 bdrv_ref(child_bs); 3070 bdrv_replace_child_noperm(new_child, child_bs); 3071 3072 *child = new_child; 3073 3074 BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); 3075 *s = (BdrvAttachChildCommonState) { 3076 .child = child, 3077 .old_parent_ctx = parent_ctx, 3078 .old_child_ctx = child_ctx, 3079 }; 3080 tran_add(tran, &bdrv_attach_child_common_drv, s); 3081 3082 return 0; 3083 } 3084 3085 static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, 3086 BlockDriverState *child_bs, 3087 const char *child_name, 3088 const BdrvChildClass *child_class, 3089 BdrvChildRole child_role, 3090 BdrvChild **child, 3091 Transaction *tran, 3092 Error **errp) 3093 { 3094 int ret; 3095 uint64_t perm, shared_perm; 3096 3097 assert(parent_bs->drv); 3098 3099 bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); 3100 bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, 3101 perm, shared_perm, &perm, &shared_perm); 3102 3103 ret = bdrv_attach_child_common(child_bs, child_name, child_class, 3104 child_role, perm, shared_perm, parent_bs, 3105 child, tran, errp); 3106 if (ret < 0) { 3107 return ret; 3108 } 3109 3110 QLIST_INSERT_HEAD(&parent_bs->children, *child, next); 3111 /* 3112 * child is removed in bdrv_attach_child_common_abort(), so don't care to 3113 * abort this change separately. 3114 */ 3115 3116 return 0; 3117 } 3118 3119 static void bdrv_detach_child(BdrvChild *child) 3120 { 3121 bdrv_replace_child(child, NULL); 3122 bdrv_remove_empty_child(child); 3123 } 3124 3125 /* 3126 * This function steals the reference to child_bs from the caller. 3127 * That reference is later dropped by bdrv_root_unref_child(). 3128 * 3129 * On failure NULL is returned, errp is set and the reference to 3130 * child_bs is also dropped. 3131 * 3132 * The caller must hold the AioContext lock @child_bs, but not that of @ctx 3133 * (unless @child_bs is already in @ctx). 3134 */ 3135 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, 3136 const char *child_name, 3137 const BdrvChildClass *child_class, 3138 BdrvChildRole child_role, 3139 uint64_t perm, uint64_t shared_perm, 3140 void *opaque, Error **errp) 3141 { 3142 int ret; 3143 BdrvChild *child = NULL; 3144 Transaction *tran = tran_new(); 3145 3146 ret = bdrv_attach_child_common(child_bs, child_name, child_class, 3147 child_role, perm, shared_perm, opaque, 3148 &child, tran, errp); 3149 if (ret < 0) { 3150 bdrv_unref(child_bs); 3151 return NULL; 3152 } 3153 3154 ret = bdrv_refresh_perms(child_bs, errp); 3155 tran_finalize(tran, ret); 3156 3157 bdrv_unref(child_bs); 3158 return child; 3159 } 3160 3161 /* 3162 * This function transfers the reference to child_bs from the caller 3163 * to parent_bs. That reference is later dropped by parent_bs on 3164 * bdrv_close() or if someone calls bdrv_unref_child(). 3165 * 3166 * On failure NULL is returned, errp is set and the reference to 3167 * child_bs is also dropped. 3168 * 3169 * If @parent_bs and @child_bs are in different AioContexts, the caller must 3170 * hold the AioContext lock for @child_bs, but not for @parent_bs. 3171 */ 3172 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, 3173 BlockDriverState *child_bs, 3174 const char *child_name, 3175 const BdrvChildClass *child_class, 3176 BdrvChildRole child_role, 3177 Error **errp) 3178 { 3179 int ret; 3180 BdrvChild *child = NULL; 3181 Transaction *tran = tran_new(); 3182 3183 ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, child_class, 3184 child_role, &child, tran, errp); 3185 if (ret < 0) { 3186 goto out; 3187 } 3188 3189 ret = bdrv_refresh_perms(parent_bs, errp); 3190 if (ret < 0) { 3191 goto out; 3192 } 3193 3194 out: 3195 tran_finalize(tran, ret); 3196 3197 bdrv_unref(child_bs); 3198 3199 return child; 3200 } 3201 3202 /* Callers must ensure that child->frozen is false. */ 3203 void bdrv_root_unref_child(BdrvChild *child) 3204 { 3205 BlockDriverState *child_bs; 3206 3207 child_bs = child->bs; 3208 bdrv_detach_child(child); 3209 bdrv_unref(child_bs); 3210 } 3211 3212 typedef struct BdrvSetInheritsFrom { 3213 BlockDriverState *bs; 3214 BlockDriverState *old_inherits_from; 3215 } BdrvSetInheritsFrom; 3216 3217 static void bdrv_set_inherits_from_abort(void *opaque) 3218 { 3219 BdrvSetInheritsFrom *s = opaque; 3220 3221 s->bs->inherits_from = s->old_inherits_from; 3222 } 3223 3224 static TransactionActionDrv bdrv_set_inherits_from_drv = { 3225 .abort = bdrv_set_inherits_from_abort, 3226 .clean = g_free, 3227 }; 3228 3229 /* @tran is allowed to be NULL. In this case no rollback is possible */ 3230 static void bdrv_set_inherits_from(BlockDriverState *bs, 3231 BlockDriverState *new_inherits_from, 3232 Transaction *tran) 3233 { 3234 if (tran) { 3235 BdrvSetInheritsFrom *s = g_new(BdrvSetInheritsFrom, 1); 3236 3237 *s = (BdrvSetInheritsFrom) { 3238 .bs = bs, 3239 .old_inherits_from = bs->inherits_from, 3240 }; 3241 3242 tran_add(tran, &bdrv_set_inherits_from_drv, s); 3243 } 3244 3245 bs->inherits_from = new_inherits_from; 3246 } 3247 3248 /** 3249 * Clear all inherits_from pointers from children and grandchildren of 3250 * @root that point to @root, where necessary. 3251 * @tran is allowed to be NULL. In this case no rollback is possible 3252 */ 3253 static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child, 3254 Transaction *tran) 3255 { 3256 BdrvChild *c; 3257 3258 if (child->bs->inherits_from == root) { 3259 /* 3260 * Remove inherits_from only when the last reference between root and 3261 * child->bs goes away. 3262 */ 3263 QLIST_FOREACH(c, &root->children, next) { 3264 if (c != child && c->bs == child->bs) { 3265 break; 3266 } 3267 } 3268 if (c == NULL) { 3269 bdrv_set_inherits_from(child->bs, NULL, tran); 3270 } 3271 } 3272 3273 QLIST_FOREACH(c, &child->bs->children, next) { 3274 bdrv_unset_inherits_from(root, c, tran); 3275 } 3276 } 3277 3278 /* Callers must ensure that child->frozen is false. */ 3279 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) 3280 { 3281 if (child == NULL) { 3282 return; 3283 } 3284 3285 bdrv_unset_inherits_from(parent, child, NULL); 3286 bdrv_root_unref_child(child); 3287 } 3288 3289 3290 static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) 3291 { 3292 BdrvChild *c; 3293 QLIST_FOREACH(c, &bs->parents, next_parent) { 3294 if (c->klass->change_media) { 3295 c->klass->change_media(c, load); 3296 } 3297 } 3298 } 3299 3300 /* Return true if you can reach parent going through child->inherits_from 3301 * recursively. If parent or child are NULL, return false */ 3302 static bool bdrv_inherits_from_recursive(BlockDriverState *child, 3303 BlockDriverState *parent) 3304 { 3305 while (child && child != parent) { 3306 child = child->inherits_from; 3307 } 3308 3309 return child != NULL; 3310 } 3311 3312 /* 3313 * Return the BdrvChildRole for @bs's backing child. bs->backing is 3314 * mostly used for COW backing children (role = COW), but also for 3315 * filtered children (role = FILTERED | PRIMARY). 3316 */ 3317 static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) 3318 { 3319 if (bs->drv && bs->drv->is_filter) { 3320 return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY; 3321 } else { 3322 return BDRV_CHILD_COW; 3323 } 3324 } 3325 3326 /* 3327 * Sets the bs->backing link of a BDS. A new reference is created; callers 3328 * which don't need their own reference any more must call bdrv_unref(). 3329 */ 3330 static int bdrv_set_backing_noperm(BlockDriverState *bs, 3331 BlockDriverState *backing_hd, 3332 Transaction *tran, Error **errp) 3333 { 3334 int ret = 0; 3335 bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) && 3336 bdrv_inherits_from_recursive(backing_hd, bs); 3337 3338 if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) { 3339 return -EPERM; 3340 } 3341 3342 if (bs->backing) { 3343 /* Cannot be frozen, we checked that above */ 3344 bdrv_unset_inherits_from(bs, bs->backing, tran); 3345 bdrv_remove_filter_or_cow_child(bs, tran); 3346 } 3347 3348 if (!backing_hd) { 3349 goto out; 3350 } 3351 3352 ret = bdrv_attach_child_noperm(bs, backing_hd, "backing", 3353 &child_of_bds, bdrv_backing_role(bs), 3354 &bs->backing, tran, errp); 3355 if (ret < 0) { 3356 return ret; 3357 } 3358 3359 3360 /* 3361 * If backing_hd was already part of bs's backing chain, and 3362 * inherits_from pointed recursively to bs then let's update it to 3363 * point directly to bs (else it will become NULL). 3364 */ 3365 if (update_inherits_from) { 3366 bdrv_set_inherits_from(backing_hd, bs, tran); 3367 } 3368 3369 out: 3370 bdrv_refresh_limits(bs, tran, NULL); 3371 3372 return 0; 3373 } 3374 3375 int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, 3376 Error **errp) 3377 { 3378 int ret; 3379 Transaction *tran = tran_new(); 3380 3381 ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); 3382 if (ret < 0) { 3383 goto out; 3384 } 3385 3386 ret = bdrv_refresh_perms(bs, errp); 3387 out: 3388 tran_finalize(tran, ret); 3389 3390 return ret; 3391 } 3392 3393 /* 3394 * Opens the backing file for a BlockDriverState if not yet open 3395 * 3396 * bdref_key specifies the key for the image's BlockdevRef in the options QDict. 3397 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 3398 * itself, all options starting with "${bdref_key}." are considered part of the 3399 * BlockdevRef. 3400 * 3401 * TODO Can this be unified with bdrv_open_image()? 3402 */ 3403 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, 3404 const char *bdref_key, Error **errp) 3405 { 3406 char *backing_filename = NULL; 3407 char *bdref_key_dot; 3408 const char *reference = NULL; 3409 int ret = 0; 3410 bool implicit_backing = false; 3411 BlockDriverState *backing_hd; 3412 QDict *options; 3413 QDict *tmp_parent_options = NULL; 3414 Error *local_err = NULL; 3415 3416 if (bs->backing != NULL) { 3417 goto free_exit; 3418 } 3419 3420 /* NULL means an empty set of options */ 3421 if (parent_options == NULL) { 3422 tmp_parent_options = qdict_new(); 3423 parent_options = tmp_parent_options; 3424 } 3425 3426 bs->open_flags &= ~BDRV_O_NO_BACKING; 3427 3428 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 3429 qdict_extract_subqdict(parent_options, &options, bdref_key_dot); 3430 g_free(bdref_key_dot); 3431 3432 /* 3433 * Caution: while qdict_get_try_str() is fine, getting non-string 3434 * types would require more care. When @parent_options come from 3435 * -blockdev or blockdev_add, its members are typed according to 3436 * the QAPI schema, but when they come from -drive, they're all 3437 * QString. 3438 */ 3439 reference = qdict_get_try_str(parent_options, bdref_key); 3440 if (reference || qdict_haskey(options, "file.filename")) { 3441 /* keep backing_filename NULL */ 3442 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 3443 qobject_unref(options); 3444 goto free_exit; 3445 } else { 3446 if (qdict_size(options) == 0) { 3447 /* If the user specifies options that do not modify the 3448 * backing file's behavior, we might still consider it the 3449 * implicit backing file. But it's easier this way, and 3450 * just specifying some of the backing BDS's options is 3451 * only possible with -drive anyway (otherwise the QAPI 3452 * schema forces the user to specify everything). */ 3453 implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file); 3454 } 3455 3456 backing_filename = bdrv_get_full_backing_filename(bs, &local_err); 3457 if (local_err) { 3458 ret = -EINVAL; 3459 error_propagate(errp, local_err); 3460 qobject_unref(options); 3461 goto free_exit; 3462 } 3463 } 3464 3465 if (!bs->drv || !bs->drv->supports_backing) { 3466 ret = -EINVAL; 3467 error_setg(errp, "Driver doesn't support backing files"); 3468 qobject_unref(options); 3469 goto free_exit; 3470 } 3471 3472 if (!reference && 3473 bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 3474 qdict_put_str(options, "driver", bs->backing_format); 3475 } 3476 3477 backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs, 3478 &child_of_bds, bdrv_backing_role(bs), errp); 3479 if (!backing_hd) { 3480 bs->open_flags |= BDRV_O_NO_BACKING; 3481 error_prepend(errp, "Could not open backing file: "); 3482 ret = -EINVAL; 3483 goto free_exit; 3484 } 3485 3486 if (implicit_backing) { 3487 bdrv_refresh_filename(backing_hd); 3488 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), 3489 backing_hd->filename); 3490 } 3491 3492 /* Hook up the backing file link; drop our reference, bs owns the 3493 * backing_hd reference now */ 3494 ret = bdrv_set_backing_hd(bs, backing_hd, errp); 3495 bdrv_unref(backing_hd); 3496 if (ret < 0) { 3497 goto free_exit; 3498 } 3499 3500 qdict_del(parent_options, bdref_key); 3501 3502 free_exit: 3503 g_free(backing_filename); 3504 qobject_unref(tmp_parent_options); 3505 return ret; 3506 } 3507 3508 static BlockDriverState * 3509 bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, 3510 BlockDriverState *parent, const BdrvChildClass *child_class, 3511 BdrvChildRole child_role, bool allow_none, Error **errp) 3512 { 3513 BlockDriverState *bs = NULL; 3514 QDict *image_options; 3515 char *bdref_key_dot; 3516 const char *reference; 3517 3518 assert(child_class != NULL); 3519 3520 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 3521 qdict_extract_subqdict(options, &image_options, bdref_key_dot); 3522 g_free(bdref_key_dot); 3523 3524 /* 3525 * Caution: while qdict_get_try_str() is fine, getting non-string 3526 * types would require more care. When @options come from 3527 * -blockdev or blockdev_add, its members are typed according to 3528 * the QAPI schema, but when they come from -drive, they're all 3529 * QString. 3530 */ 3531 reference = qdict_get_try_str(options, bdref_key); 3532 if (!filename && !reference && !qdict_size(image_options)) { 3533 if (!allow_none) { 3534 error_setg(errp, "A block device must be specified for \"%s\"", 3535 bdref_key); 3536 } 3537 qobject_unref(image_options); 3538 goto done; 3539 } 3540 3541 bs = bdrv_open_inherit(filename, reference, image_options, 0, 3542 parent, child_class, child_role, errp); 3543 if (!bs) { 3544 goto done; 3545 } 3546 3547 done: 3548 qdict_del(options, bdref_key); 3549 return bs; 3550 } 3551 3552 /* 3553 * Opens a disk image whose options are given as BlockdevRef in another block 3554 * device's options. 3555 * 3556 * If allow_none is true, no image will be opened if filename is false and no 3557 * BlockdevRef is given. NULL will be returned, but errp remains unset. 3558 * 3559 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 3560 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 3561 * itself, all options starting with "${bdref_key}." are considered part of the 3562 * BlockdevRef. 3563 * 3564 * The BlockdevRef will be removed from the options QDict. 3565 */ 3566 BdrvChild *bdrv_open_child(const char *filename, 3567 QDict *options, const char *bdref_key, 3568 BlockDriverState *parent, 3569 const BdrvChildClass *child_class, 3570 BdrvChildRole child_role, 3571 bool allow_none, Error **errp) 3572 { 3573 BlockDriverState *bs; 3574 3575 bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, 3576 child_role, allow_none, errp); 3577 if (bs == NULL) { 3578 return NULL; 3579 } 3580 3581 return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, 3582 errp); 3583 } 3584 3585 /* 3586 * TODO Future callers may need to specify parent/child_class in order for 3587 * option inheritance to work. Existing callers use it for the root node. 3588 */ 3589 BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) 3590 { 3591 BlockDriverState *bs = NULL; 3592 QObject *obj = NULL; 3593 QDict *qdict = NULL; 3594 const char *reference = NULL; 3595 Visitor *v = NULL; 3596 3597 if (ref->type == QTYPE_QSTRING) { 3598 reference = ref->u.reference; 3599 } else { 3600 BlockdevOptions *options = &ref->u.definition; 3601 assert(ref->type == QTYPE_QDICT); 3602 3603 v = qobject_output_visitor_new(&obj); 3604 visit_type_BlockdevOptions(v, NULL, &options, &error_abort); 3605 visit_complete(v, &obj); 3606 3607 qdict = qobject_to(QDict, obj); 3608 qdict_flatten(qdict); 3609 3610 /* bdrv_open_inherit() defaults to the values in bdrv_flags (for 3611 * compatibility with other callers) rather than what we want as the 3612 * real defaults. Apply the defaults here instead. */ 3613 qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off"); 3614 qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off"); 3615 qdict_set_default_str(qdict, BDRV_OPT_READ_ONLY, "off"); 3616 qdict_set_default_str(qdict, BDRV_OPT_AUTO_READ_ONLY, "off"); 3617 3618 } 3619 3620 bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp); 3621 obj = NULL; 3622 qobject_unref(obj); 3623 visit_free(v); 3624 return bs; 3625 } 3626 3627 static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, 3628 int flags, 3629 QDict *snapshot_options, 3630 Error **errp) 3631 { 3632 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 3633 char *tmp_filename = g_malloc0(PATH_MAX + 1); 3634 int64_t total_size; 3635 QemuOpts *opts = NULL; 3636 BlockDriverState *bs_snapshot = NULL; 3637 int ret; 3638 3639 /* if snapshot, we create a temporary backing file and open it 3640 instead of opening 'filename' directly */ 3641 3642 /* Get the required size from the image */ 3643 total_size = bdrv_getlength(bs); 3644 if (total_size < 0) { 3645 error_setg_errno(errp, -total_size, "Could not get image size"); 3646 goto out; 3647 } 3648 3649 /* Create the temporary image */ 3650 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); 3651 if (ret < 0) { 3652 error_setg_errno(errp, -ret, "Could not get temporary filename"); 3653 goto out; 3654 } 3655 3656 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 3657 &error_abort); 3658 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 3659 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp); 3660 qemu_opts_del(opts); 3661 if (ret < 0) { 3662 error_prepend(errp, "Could not create temporary overlay '%s': ", 3663 tmp_filename); 3664 goto out; 3665 } 3666 3667 /* Prepare options QDict for the temporary file */ 3668 qdict_put_str(snapshot_options, "file.driver", "file"); 3669 qdict_put_str(snapshot_options, "file.filename", tmp_filename); 3670 qdict_put_str(snapshot_options, "driver", "qcow2"); 3671 3672 bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp); 3673 snapshot_options = NULL; 3674 if (!bs_snapshot) { 3675 goto out; 3676 } 3677 3678 ret = bdrv_append(bs_snapshot, bs, errp); 3679 if (ret < 0) { 3680 bs_snapshot = NULL; 3681 goto out; 3682 } 3683 3684 out: 3685 qobject_unref(snapshot_options); 3686 g_free(tmp_filename); 3687 return bs_snapshot; 3688 } 3689 3690 /* 3691 * Opens a disk image (raw, qcow2, vmdk, ...) 3692 * 3693 * options is a QDict of options to pass to the block drivers, or NULL for an 3694 * empty set of options. The reference to the QDict belongs to the block layer 3695 * after the call (even on failure), so if the caller intends to reuse the 3696 * dictionary, it needs to use qobject_ref() before calling bdrv_open. 3697 * 3698 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 3699 * If it is not NULL, the referenced BDS will be reused. 3700 * 3701 * The reference parameter may be used to specify an existing block device which 3702 * should be opened. If specified, neither options nor a filename may be given, 3703 * nor can an existing BDS be reused (that is, *pbs has to be NULL). 3704 */ 3705 static BlockDriverState *bdrv_open_inherit(const char *filename, 3706 const char *reference, 3707 QDict *options, int flags, 3708 BlockDriverState *parent, 3709 const BdrvChildClass *child_class, 3710 BdrvChildRole child_role, 3711 Error **errp) 3712 { 3713 int ret; 3714 BlockBackend *file = NULL; 3715 BlockDriverState *bs; 3716 BlockDriver *drv = NULL; 3717 BdrvChild *child; 3718 const char *drvname; 3719 const char *backing; 3720 Error *local_err = NULL; 3721 QDict *snapshot_options = NULL; 3722 int snapshot_flags = 0; 3723 3724 assert(!child_class || !flags); 3725 assert(!child_class == !parent); 3726 3727 if (reference) { 3728 bool options_non_empty = options ? qdict_size(options) : false; 3729 qobject_unref(options); 3730 3731 if (filename || options_non_empty) { 3732 error_setg(errp, "Cannot reference an existing block device with " 3733 "additional options or a new filename"); 3734 return NULL; 3735 } 3736 3737 bs = bdrv_lookup_bs(reference, reference, errp); 3738 if (!bs) { 3739 return NULL; 3740 } 3741 3742 bdrv_ref(bs); 3743 return bs; 3744 } 3745 3746 bs = bdrv_new(); 3747 3748 /* NULL means an empty set of options */ 3749 if (options == NULL) { 3750 options = qdict_new(); 3751 } 3752 3753 /* json: syntax counts as explicit options, as if in the QDict */ 3754 parse_json_protocol(options, &filename, &local_err); 3755 if (local_err) { 3756 goto fail; 3757 } 3758 3759 bs->explicit_options = qdict_clone_shallow(options); 3760 3761 if (child_class) { 3762 bool parent_is_format; 3763 3764 if (parent->drv) { 3765 parent_is_format = parent->drv->is_format; 3766 } else { 3767 /* 3768 * parent->drv is not set yet because this node is opened for 3769 * (potential) format probing. That means that @parent is going 3770 * to be a format node. 3771 */ 3772 parent_is_format = true; 3773 } 3774 3775 bs->inherits_from = parent; 3776 child_class->inherit_options(child_role, parent_is_format, 3777 &flags, options, 3778 parent->open_flags, parent->options); 3779 } 3780 3781 ret = bdrv_fill_options(&options, filename, &flags, &local_err); 3782 if (ret < 0) { 3783 goto fail; 3784 } 3785 3786 /* 3787 * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags. 3788 * Caution: getting a boolean member of @options requires care. 3789 * When @options come from -blockdev or blockdev_add, members are 3790 * typed according to the QAPI schema, but when they come from 3791 * -drive, they're all QString. 3792 */ 3793 if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") && 3794 !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) { 3795 flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR); 3796 } else { 3797 flags &= ~BDRV_O_RDWR; 3798 } 3799 3800 if (flags & BDRV_O_SNAPSHOT) { 3801 snapshot_options = qdict_new(); 3802 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options, 3803 flags, options); 3804 /* Let bdrv_backing_options() override "read-only" */ 3805 qdict_del(options, BDRV_OPT_READ_ONLY); 3806 bdrv_inherited_options(BDRV_CHILD_COW, true, 3807 &flags, options, flags, options); 3808 } 3809 3810 bs->open_flags = flags; 3811 bs->options = options; 3812 options = qdict_clone_shallow(options); 3813 3814 /* Find the right image format driver */ 3815 /* See cautionary note on accessing @options above */ 3816 drvname = qdict_get_try_str(options, "driver"); 3817 if (drvname) { 3818 drv = bdrv_find_format(drvname); 3819 if (!drv) { 3820 error_setg(errp, "Unknown driver: '%s'", drvname); 3821 goto fail; 3822 } 3823 } 3824 3825 assert(drvname || !(flags & BDRV_O_PROTOCOL)); 3826 3827 /* See cautionary note on accessing @options above */ 3828 backing = qdict_get_try_str(options, "backing"); 3829 if (qobject_to(QNull, qdict_get(options, "backing")) != NULL || 3830 (backing && *backing == '\0')) 3831 { 3832 if (backing) { 3833 warn_report("Use of \"backing\": \"\" is deprecated; " 3834 "use \"backing\": null instead"); 3835 } 3836 flags |= BDRV_O_NO_BACKING; 3837 qdict_del(bs->explicit_options, "backing"); 3838 qdict_del(bs->options, "backing"); 3839 qdict_del(options, "backing"); 3840 } 3841 3842 /* Open image file without format layer. This BlockBackend is only used for 3843 * probing, the block drivers will do their own bdrv_open_child() for the 3844 * same BDS, which is why we put the node name back into options. */ 3845 if ((flags & BDRV_O_PROTOCOL) == 0) { 3846 BlockDriverState *file_bs; 3847 3848 file_bs = bdrv_open_child_bs(filename, options, "file", bs, 3849 &child_of_bds, BDRV_CHILD_IMAGE, 3850 true, &local_err); 3851 if (local_err) { 3852 goto fail; 3853 } 3854 if (file_bs != NULL) { 3855 /* Not requesting BLK_PERM_CONSISTENT_READ because we're only 3856 * looking at the header to guess the image format. This works even 3857 * in cases where a guest would not see a consistent state. */ 3858 file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL); 3859 blk_insert_bs(file, file_bs, &local_err); 3860 bdrv_unref(file_bs); 3861 if (local_err) { 3862 goto fail; 3863 } 3864 3865 qdict_put_str(options, "file", bdrv_get_node_name(file_bs)); 3866 } 3867 } 3868 3869 /* Image format probing */ 3870 bs->probed = !drv; 3871 if (!drv && file) { 3872 ret = find_image_format(file, filename, &drv, &local_err); 3873 if (ret < 0) { 3874 goto fail; 3875 } 3876 /* 3877 * This option update would logically belong in bdrv_fill_options(), 3878 * but we first need to open bs->file for the probing to work, while 3879 * opening bs->file already requires the (mostly) final set of options 3880 * so that cache mode etc. can be inherited. 3881 * 3882 * Adding the driver later is somewhat ugly, but it's not an option 3883 * that would ever be inherited, so it's correct. We just need to make 3884 * sure to update both bs->options (which has the full effective 3885 * options for bs) and options (which has file.* already removed). 3886 */ 3887 qdict_put_str(bs->options, "driver", drv->format_name); 3888 qdict_put_str(options, "driver", drv->format_name); 3889 } else if (!drv) { 3890 error_setg(errp, "Must specify either driver or file"); 3891 goto fail; 3892 } 3893 3894 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ 3895 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open); 3896 /* file must be NULL if a protocol BDS is about to be created 3897 * (the inverse results in an error message from bdrv_open_common()) */ 3898 assert(!(flags & BDRV_O_PROTOCOL) || !file); 3899 3900 /* Open the image */ 3901 ret = bdrv_open_common(bs, file, options, &local_err); 3902 if (ret < 0) { 3903 goto fail; 3904 } 3905 3906 if (file) { 3907 blk_unref(file); 3908 file = NULL; 3909 } 3910 3911 /* If there is a backing file, use it */ 3912 if ((flags & BDRV_O_NO_BACKING) == 0) { 3913 ret = bdrv_open_backing_file(bs, options, "backing", &local_err); 3914 if (ret < 0) { 3915 goto close_and_fail; 3916 } 3917 } 3918 3919 /* Remove all children options and references 3920 * from bs->options and bs->explicit_options */ 3921 QLIST_FOREACH(child, &bs->children, next) { 3922 char *child_key_dot; 3923 child_key_dot = g_strdup_printf("%s.", child->name); 3924 qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot); 3925 qdict_extract_subqdict(bs->options, NULL, child_key_dot); 3926 qdict_del(bs->explicit_options, child->name); 3927 qdict_del(bs->options, child->name); 3928 g_free(child_key_dot); 3929 } 3930 3931 /* Check if any unknown options were used */ 3932 if (qdict_size(options) != 0) { 3933 const QDictEntry *entry = qdict_first(options); 3934 if (flags & BDRV_O_PROTOCOL) { 3935 error_setg(errp, "Block protocol '%s' doesn't support the option " 3936 "'%s'", drv->format_name, entry->key); 3937 } else { 3938 error_setg(errp, 3939 "Block format '%s' does not support the option '%s'", 3940 drv->format_name, entry->key); 3941 } 3942 3943 goto close_and_fail; 3944 } 3945 3946 bdrv_parent_cb_change_media(bs, true); 3947 3948 qobject_unref(options); 3949 options = NULL; 3950 3951 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 3952 * temporary snapshot afterwards. */ 3953 if (snapshot_flags) { 3954 BlockDriverState *snapshot_bs; 3955 snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags, 3956 snapshot_options, &local_err); 3957 snapshot_options = NULL; 3958 if (local_err) { 3959 goto close_and_fail; 3960 } 3961 /* We are not going to return bs but the overlay on top of it 3962 * (snapshot_bs); thus, we have to drop the strong reference to bs 3963 * (which we obtained by calling bdrv_new()). bs will not be deleted, 3964 * though, because the overlay still has a reference to it. */ 3965 bdrv_unref(bs); 3966 bs = snapshot_bs; 3967 } 3968 3969 return bs; 3970 3971 fail: 3972 blk_unref(file); 3973 qobject_unref(snapshot_options); 3974 qobject_unref(bs->explicit_options); 3975 qobject_unref(bs->options); 3976 qobject_unref(options); 3977 bs->options = NULL; 3978 bs->explicit_options = NULL; 3979 bdrv_unref(bs); 3980 error_propagate(errp, local_err); 3981 return NULL; 3982 3983 close_and_fail: 3984 bdrv_unref(bs); 3985 qobject_unref(snapshot_options); 3986 qobject_unref(options); 3987 error_propagate(errp, local_err); 3988 return NULL; 3989 } 3990 3991 BlockDriverState *bdrv_open(const char *filename, const char *reference, 3992 QDict *options, int flags, Error **errp) 3993 { 3994 return bdrv_open_inherit(filename, reference, options, flags, NULL, 3995 NULL, 0, errp); 3996 } 3997 3998 /* Return true if the NULL-terminated @list contains @str */ 3999 static bool is_str_in_list(const char *str, const char *const *list) 4000 { 4001 if (str && list) { 4002 int i; 4003 for (i = 0; list[i] != NULL; i++) { 4004 if (!strcmp(str, list[i])) { 4005 return true; 4006 } 4007 } 4008 } 4009 return false; 4010 } 4011 4012 /* 4013 * Check that every option set in @bs->options is also set in 4014 * @new_opts. 4015 * 4016 * Options listed in the common_options list and in 4017 * @bs->drv->mutable_opts are skipped. 4018 * 4019 * Return 0 on success, otherwise return -EINVAL and set @errp. 4020 */ 4021 static int bdrv_reset_options_allowed(BlockDriverState *bs, 4022 const QDict *new_opts, Error **errp) 4023 { 4024 const QDictEntry *e; 4025 /* These options are common to all block drivers and are handled 4026 * in bdrv_reopen_prepare() so they can be left out of @new_opts */ 4027 const char *const common_options[] = { 4028 "node-name", "discard", "cache.direct", "cache.no-flush", 4029 "read-only", "auto-read-only", "detect-zeroes", NULL 4030 }; 4031 4032 for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) { 4033 if (!qdict_haskey(new_opts, e->key) && 4034 !is_str_in_list(e->key, common_options) && 4035 !is_str_in_list(e->key, bs->drv->mutable_opts)) { 4036 error_setg(errp, "Option '%s' cannot be reset " 4037 "to its default value", e->key); 4038 return -EINVAL; 4039 } 4040 } 4041 4042 return 0; 4043 } 4044 4045 /* 4046 * Returns true if @child can be reached recursively from @bs 4047 */ 4048 static bool bdrv_recurse_has_child(BlockDriverState *bs, 4049 BlockDriverState *child) 4050 { 4051 BdrvChild *c; 4052 4053 if (bs == child) { 4054 return true; 4055 } 4056 4057 QLIST_FOREACH(c, &bs->children, next) { 4058 if (bdrv_recurse_has_child(c->bs, child)) { 4059 return true; 4060 } 4061 } 4062 4063 return false; 4064 } 4065 4066 /* 4067 * Adds a BlockDriverState to a simple queue for an atomic, transactional 4068 * reopen of multiple devices. 4069 * 4070 * bs_queue can either be an existing BlockReopenQueue that has had QTAILQ_INIT 4071 * already performed, or alternatively may be NULL a new BlockReopenQueue will 4072 * be created and initialized. This newly created BlockReopenQueue should be 4073 * passed back in for subsequent calls that are intended to be of the same 4074 * atomic 'set'. 4075 * 4076 * bs is the BlockDriverState to add to the reopen queue. 4077 * 4078 * options contains the changed options for the associated bs 4079 * (the BlockReopenQueue takes ownership) 4080 * 4081 * flags contains the open flags for the associated bs 4082 * 4083 * returns a pointer to bs_queue, which is either the newly allocated 4084 * bs_queue, or the existing bs_queue being used. 4085 * 4086 * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). 4087 */ 4088 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, 4089 BlockDriverState *bs, 4090 QDict *options, 4091 const BdrvChildClass *klass, 4092 BdrvChildRole role, 4093 bool parent_is_format, 4094 QDict *parent_options, 4095 int parent_flags, 4096 bool keep_old_opts) 4097 { 4098 assert(bs != NULL); 4099 4100 BlockReopenQueueEntry *bs_entry; 4101 BdrvChild *child; 4102 QDict *old_options, *explicit_options, *options_copy; 4103 int flags; 4104 QemuOpts *opts; 4105 4106 /* Make sure that the caller remembered to use a drained section. This is 4107 * important to avoid graph changes between the recursive queuing here and 4108 * bdrv_reopen_multiple(). */ 4109 assert(bs->quiesce_counter > 0); 4110 4111 if (bs_queue == NULL) { 4112 bs_queue = g_new0(BlockReopenQueue, 1); 4113 QTAILQ_INIT(bs_queue); 4114 } 4115 4116 if (!options) { 4117 options = qdict_new(); 4118 } 4119 4120 /* Check if this BlockDriverState is already in the queue */ 4121 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4122 if (bs == bs_entry->state.bs) { 4123 break; 4124 } 4125 } 4126 4127 /* 4128 * Precedence of options: 4129 * 1. Explicitly passed in options (highest) 4130 * 2. Retained from explicitly set options of bs 4131 * 3. Inherited from parent node 4132 * 4. Retained from effective options of bs 4133 */ 4134 4135 /* Old explicitly set values (don't overwrite by inherited value) */ 4136 if (bs_entry || keep_old_opts) { 4137 old_options = qdict_clone_shallow(bs_entry ? 4138 bs_entry->state.explicit_options : 4139 bs->explicit_options); 4140 bdrv_join_options(bs, options, old_options); 4141 qobject_unref(old_options); 4142 } 4143 4144 explicit_options = qdict_clone_shallow(options); 4145 4146 /* Inherit from parent node */ 4147 if (parent_options) { 4148 flags = 0; 4149 klass->inherit_options(role, parent_is_format, &flags, options, 4150 parent_flags, parent_options); 4151 } else { 4152 flags = bdrv_get_flags(bs); 4153 } 4154 4155 if (keep_old_opts) { 4156 /* Old values are used for options that aren't set yet */ 4157 old_options = qdict_clone_shallow(bs->options); 4158 bdrv_join_options(bs, options, old_options); 4159 qobject_unref(old_options); 4160 } 4161 4162 /* We have the final set of options so let's update the flags */ 4163 options_copy = qdict_clone_shallow(options); 4164 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 4165 qemu_opts_absorb_qdict(opts, options_copy, NULL); 4166 update_flags_from_options(&flags, opts); 4167 qemu_opts_del(opts); 4168 qobject_unref(options_copy); 4169 4170 /* bdrv_open_inherit() sets and clears some additional flags internally */ 4171 flags &= ~BDRV_O_PROTOCOL; 4172 if (flags & BDRV_O_RDWR) { 4173 flags |= BDRV_O_ALLOW_RDWR; 4174 } 4175 4176 if (!bs_entry) { 4177 bs_entry = g_new0(BlockReopenQueueEntry, 1); 4178 QTAILQ_INSERT_TAIL(bs_queue, bs_entry, entry); 4179 } else { 4180 qobject_unref(bs_entry->state.options); 4181 qobject_unref(bs_entry->state.explicit_options); 4182 } 4183 4184 bs_entry->state.bs = bs; 4185 bs_entry->state.options = options; 4186 bs_entry->state.explicit_options = explicit_options; 4187 bs_entry->state.flags = flags; 4188 4189 /* 4190 * If keep_old_opts is false then it means that unspecified 4191 * options must be reset to their original value. We don't allow 4192 * resetting 'backing' but we need to know if the option is 4193 * missing in order to decide if we have to return an error. 4194 */ 4195 if (!keep_old_opts) { 4196 bs_entry->state.backing_missing = 4197 !qdict_haskey(options, "backing") && 4198 !qdict_haskey(options, "backing.driver"); 4199 } 4200 4201 QLIST_FOREACH(child, &bs->children, next) { 4202 QDict *new_child_options = NULL; 4203 bool child_keep_old = keep_old_opts; 4204 4205 /* reopen can only change the options of block devices that were 4206 * implicitly created and inherited options. For other (referenced) 4207 * block devices, a syntax like "backing.foo" results in an error. */ 4208 if (child->bs->inherits_from != bs) { 4209 continue; 4210 } 4211 4212 /* Check if the options contain a child reference */ 4213 if (qdict_haskey(options, child->name)) { 4214 const char *childref = qdict_get_try_str(options, child->name); 4215 /* 4216 * The current child must not be reopened if the child 4217 * reference is null or points to a different node. 4218 */ 4219 if (g_strcmp0(childref, child->bs->node_name)) { 4220 continue; 4221 } 4222 /* 4223 * If the child reference points to the current child then 4224 * reopen it with its existing set of options (note that 4225 * it can still inherit new options from the parent). 4226 */ 4227 child_keep_old = true; 4228 } else { 4229 /* Extract child options ("child-name.*") */ 4230 char *child_key_dot = g_strdup_printf("%s.", child->name); 4231 qdict_extract_subqdict(explicit_options, NULL, child_key_dot); 4232 qdict_extract_subqdict(options, &new_child_options, child_key_dot); 4233 g_free(child_key_dot); 4234 } 4235 4236 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 4237 child->klass, child->role, bs->drv->is_format, 4238 options, flags, child_keep_old); 4239 } 4240 4241 return bs_queue; 4242 } 4243 4244 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 4245 BlockDriverState *bs, 4246 QDict *options, bool keep_old_opts) 4247 { 4248 return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false, 4249 NULL, 0, keep_old_opts); 4250 } 4251 4252 /* 4253 * Reopen multiple BlockDriverStates atomically & transactionally. 4254 * 4255 * The queue passed in (bs_queue) must have been built up previous 4256 * via bdrv_reopen_queue(). 4257 * 4258 * Reopens all BDS specified in the queue, with the appropriate 4259 * flags. All devices are prepared for reopen, and failure of any 4260 * device will cause all device changes to be abandoned, and intermediate 4261 * data cleaned up. 4262 * 4263 * If all devices prepare successfully, then the changes are committed 4264 * to all devices. 4265 * 4266 * All affected nodes must be drained between bdrv_reopen_queue() and 4267 * bdrv_reopen_multiple(). 4268 */ 4269 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 4270 { 4271 int ret = -1; 4272 BlockReopenQueueEntry *bs_entry, *next; 4273 Transaction *tran = tran_new(); 4274 g_autoptr(GHashTable) found = NULL; 4275 g_autoptr(GSList) refresh_list = NULL; 4276 4277 assert(bs_queue != NULL); 4278 4279 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4280 ret = bdrv_flush(bs_entry->state.bs); 4281 if (ret < 0) { 4282 error_setg_errno(errp, -ret, "Error flushing drive"); 4283 goto cleanup; 4284 } 4285 } 4286 4287 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4288 assert(bs_entry->state.bs->quiesce_counter > 0); 4289 ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp); 4290 if (ret < 0) { 4291 goto abort; 4292 } 4293 bs_entry->prepared = true; 4294 } 4295 4296 found = g_hash_table_new(NULL, NULL); 4297 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4298 BDRVReopenState *state = &bs_entry->state; 4299 4300 refresh_list = bdrv_topological_dfs(refresh_list, found, state->bs); 4301 if (state->old_backing_bs) { 4302 refresh_list = bdrv_topological_dfs(refresh_list, found, 4303 state->old_backing_bs); 4304 } 4305 } 4306 4307 /* 4308 * Note that file-posix driver rely on permission update done during reopen 4309 * (even if no permission changed), because it wants "new" permissions for 4310 * reconfiguring the fd and that's why it does it in raw_check_perm(), not 4311 * in raw_reopen_prepare() which is called with "old" permissions. 4312 */ 4313 ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp); 4314 if (ret < 0) { 4315 goto abort; 4316 } 4317 4318 /* 4319 * If we reach this point, we have success and just need to apply the 4320 * changes. 4321 * 4322 * Reverse order is used to comfort qcow2 driver: on commit it need to write 4323 * IN_USE flag to the image, to mark bitmaps in the image as invalid. But 4324 * children are usually goes after parents in reopen-queue, so go from last 4325 * to first element. 4326 */ 4327 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { 4328 bdrv_reopen_commit(&bs_entry->state); 4329 } 4330 4331 tran_commit(tran); 4332 4333 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { 4334 BlockDriverState *bs = bs_entry->state.bs; 4335 4336 if (bs->drv->bdrv_reopen_commit_post) { 4337 bs->drv->bdrv_reopen_commit_post(&bs_entry->state); 4338 } 4339 } 4340 4341 ret = 0; 4342 goto cleanup; 4343 4344 abort: 4345 tran_abort(tran); 4346 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 4347 if (bs_entry->prepared) { 4348 bdrv_reopen_abort(&bs_entry->state); 4349 } 4350 qobject_unref(bs_entry->state.explicit_options); 4351 qobject_unref(bs_entry->state.options); 4352 } 4353 4354 cleanup: 4355 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 4356 g_free(bs_entry); 4357 } 4358 g_free(bs_queue); 4359 4360 return ret; 4361 } 4362 4363 int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, 4364 Error **errp) 4365 { 4366 int ret; 4367 BlockReopenQueue *queue; 4368 QDict *opts = qdict_new(); 4369 4370 qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only); 4371 4372 bdrv_subtree_drained_begin(bs); 4373 queue = bdrv_reopen_queue(NULL, bs, opts, true); 4374 ret = bdrv_reopen_multiple(queue, errp); 4375 bdrv_subtree_drained_end(bs); 4376 4377 return ret; 4378 } 4379 4380 static bool bdrv_reopen_can_attach(BlockDriverState *parent, 4381 BdrvChild *child, 4382 BlockDriverState *new_child, 4383 Error **errp) 4384 { 4385 AioContext *parent_ctx = bdrv_get_aio_context(parent); 4386 AioContext *child_ctx = bdrv_get_aio_context(new_child); 4387 GSList *ignore; 4388 bool ret; 4389 4390 ignore = g_slist_prepend(NULL, child); 4391 ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL); 4392 g_slist_free(ignore); 4393 if (ret) { 4394 return ret; 4395 } 4396 4397 ignore = g_slist_prepend(NULL, child); 4398 ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp); 4399 g_slist_free(ignore); 4400 return ret; 4401 } 4402 4403 /* 4404 * Take a BDRVReopenState and check if the value of 'backing' in the 4405 * reopen_state->options QDict is valid or not. 4406 * 4407 * If 'backing' is missing from the QDict then return 0. 4408 * 4409 * If 'backing' contains the node name of the backing file of 4410 * reopen_state->bs then return 0. 4411 * 4412 * If 'backing' contains a different node name (or is null) then check 4413 * whether the current backing file can be replaced with the new one. 4414 * If that's the case then reopen_state->replace_backing_bs is set to 4415 * true and reopen_state->new_backing_bs contains a pointer to the new 4416 * backing BlockDriverState (or NULL). 4417 * 4418 * Return 0 on success, otherwise return < 0 and set @errp. 4419 */ 4420 static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state, 4421 Transaction *set_backings_tran, 4422 Error **errp) 4423 { 4424 BlockDriverState *bs = reopen_state->bs; 4425 BlockDriverState *overlay_bs, *below_bs, *new_backing_bs; 4426 QObject *value; 4427 const char *str; 4428 4429 value = qdict_get(reopen_state->options, "backing"); 4430 if (value == NULL) { 4431 return 0; 4432 } 4433 4434 switch (qobject_type(value)) { 4435 case QTYPE_QNULL: 4436 new_backing_bs = NULL; 4437 break; 4438 case QTYPE_QSTRING: 4439 str = qstring_get_str(qobject_to(QString, value)); 4440 new_backing_bs = bdrv_lookup_bs(NULL, str, errp); 4441 if (new_backing_bs == NULL) { 4442 return -EINVAL; 4443 } else if (bdrv_recurse_has_child(new_backing_bs, bs)) { 4444 error_setg(errp, "Making '%s' a backing file of '%s' " 4445 "would create a cycle", str, bs->node_name); 4446 return -EINVAL; 4447 } 4448 break; 4449 default: 4450 /* 'backing' does not allow any other data type */ 4451 g_assert_not_reached(); 4452 } 4453 4454 /* 4455 * Check AioContext compatibility so that the bdrv_set_backing_hd() call in 4456 * bdrv_reopen_commit() won't fail. 4457 */ 4458 if (new_backing_bs) { 4459 if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) { 4460 return -EINVAL; 4461 } 4462 } 4463 4464 /* 4465 * Ensure that @bs can really handle backing files, because we are 4466 * about to give it one (or swap the existing one) 4467 */ 4468 if (bs->drv->is_filter) { 4469 /* Filters always have a file or a backing child */ 4470 if (!bs->backing) { 4471 error_setg(errp, "'%s' is a %s filter node that does not support a " 4472 "backing child", bs->node_name, bs->drv->format_name); 4473 return -EINVAL; 4474 } 4475 } else if (!bs->drv->supports_backing) { 4476 error_setg(errp, "Driver '%s' of node '%s' does not support backing " 4477 "files", bs->drv->format_name, bs->node_name); 4478 return -EINVAL; 4479 } 4480 4481 /* 4482 * Find the "actual" backing file by skipping all links that point 4483 * to an implicit node, if any (e.g. a commit filter node). 4484 * We cannot use any of the bdrv_skip_*() functions here because 4485 * those return the first explicit node, while we are looking for 4486 * its overlay here. 4487 */ 4488 overlay_bs = bs; 4489 for (below_bs = bdrv_filter_or_cow_bs(overlay_bs); 4490 below_bs && below_bs->implicit; 4491 below_bs = bdrv_filter_or_cow_bs(overlay_bs)) 4492 { 4493 overlay_bs = below_bs; 4494 } 4495 4496 /* If we want to replace the backing file we need some extra checks */ 4497 if (new_backing_bs != bdrv_filter_or_cow_bs(overlay_bs)) { 4498 int ret; 4499 4500 /* Check for implicit nodes between bs and its backing file */ 4501 if (bs != overlay_bs) { 4502 error_setg(errp, "Cannot change backing link if '%s' has " 4503 "an implicit backing file", bs->node_name); 4504 return -EPERM; 4505 } 4506 /* 4507 * Check if the backing link that we want to replace is frozen. 4508 * Note that 4509 * bdrv_filter_or_cow_child(overlay_bs) == overlay_bs->backing, 4510 * because we know that overlay_bs == bs, and that @bs 4511 * either is a filter that uses ->backing or a COW format BDS 4512 * with bs->drv->supports_backing == true. 4513 */ 4514 if (bdrv_is_backing_chain_frozen(overlay_bs, 4515 child_bs(overlay_bs->backing), errp)) 4516 { 4517 return -EPERM; 4518 } 4519 reopen_state->replace_backing_bs = true; 4520 reopen_state->old_backing_bs = bs->backing ? bs->backing->bs : NULL; 4521 ret = bdrv_set_backing_noperm(bs, new_backing_bs, set_backings_tran, 4522 errp); 4523 if (ret < 0) { 4524 return ret; 4525 } 4526 } 4527 4528 return 0; 4529 } 4530 4531 /* 4532 * Prepares a BlockDriverState for reopen. All changes are staged in the 4533 * 'opaque' field of the BDRVReopenState, which is used and allocated by 4534 * the block driver layer .bdrv_reopen_prepare() 4535 * 4536 * bs is the BlockDriverState to reopen 4537 * flags are the new open flags 4538 * queue is the reopen queue 4539 * 4540 * Returns 0 on success, non-zero on error. On error errp will be set 4541 * as well. 4542 * 4543 * On failure, bdrv_reopen_abort() will be called to clean up any data. 4544 * It is the responsibility of the caller to then call the abort() or 4545 * commit() for any other BDS that have been left in a prepare() state 4546 * 4547 */ 4548 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, 4549 BlockReopenQueue *queue, 4550 Transaction *set_backings_tran, Error **errp) 4551 { 4552 int ret = -1; 4553 int old_flags; 4554 Error *local_err = NULL; 4555 BlockDriver *drv; 4556 QemuOpts *opts; 4557 QDict *orig_reopen_opts; 4558 char *discard = NULL; 4559 bool read_only; 4560 bool drv_prepared = false; 4561 4562 assert(reopen_state != NULL); 4563 assert(reopen_state->bs->drv != NULL); 4564 drv = reopen_state->bs->drv; 4565 4566 /* This function and each driver's bdrv_reopen_prepare() remove 4567 * entries from reopen_state->options as they are processed, so 4568 * we need to make a copy of the original QDict. */ 4569 orig_reopen_opts = qdict_clone_shallow(reopen_state->options); 4570 4571 /* Process generic block layer options */ 4572 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 4573 if (!qemu_opts_absorb_qdict(opts, reopen_state->options, errp)) { 4574 ret = -EINVAL; 4575 goto error; 4576 } 4577 4578 /* This was already called in bdrv_reopen_queue_child() so the flags 4579 * are up-to-date. This time we simply want to remove the options from 4580 * QemuOpts in order to indicate that they have been processed. */ 4581 old_flags = reopen_state->flags; 4582 update_flags_from_options(&reopen_state->flags, opts); 4583 assert(old_flags == reopen_state->flags); 4584 4585 discard = qemu_opt_get_del(opts, BDRV_OPT_DISCARD); 4586 if (discard != NULL) { 4587 if (bdrv_parse_discard_flags(discard, &reopen_state->flags) != 0) { 4588 error_setg(errp, "Invalid discard option"); 4589 ret = -EINVAL; 4590 goto error; 4591 } 4592 } 4593 4594 reopen_state->detect_zeroes = 4595 bdrv_parse_detect_zeroes(opts, reopen_state->flags, &local_err); 4596 if (local_err) { 4597 error_propagate(errp, local_err); 4598 ret = -EINVAL; 4599 goto error; 4600 } 4601 4602 /* All other options (including node-name and driver) must be unchanged. 4603 * Put them back into the QDict, so that they are checked at the end 4604 * of this function. */ 4605 qemu_opts_to_qdict(opts, reopen_state->options); 4606 4607 /* If we are to stay read-only, do not allow permission change 4608 * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is 4609 * not set, or if the BDS still has copy_on_read enabled */ 4610 read_only = !(reopen_state->flags & BDRV_O_RDWR); 4611 ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err); 4612 if (local_err) { 4613 error_propagate(errp, local_err); 4614 goto error; 4615 } 4616 4617 if (drv->bdrv_reopen_prepare) { 4618 /* 4619 * If a driver-specific option is missing, it means that we 4620 * should reset it to its default value. 4621 * But not all options allow that, so we need to check it first. 4622 */ 4623 ret = bdrv_reset_options_allowed(reopen_state->bs, 4624 reopen_state->options, errp); 4625 if (ret) { 4626 goto error; 4627 } 4628 4629 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 4630 if (ret) { 4631 if (local_err != NULL) { 4632 error_propagate(errp, local_err); 4633 } else { 4634 bdrv_refresh_filename(reopen_state->bs); 4635 error_setg(errp, "failed while preparing to reopen image '%s'", 4636 reopen_state->bs->filename); 4637 } 4638 goto error; 4639 } 4640 } else { 4641 /* It is currently mandatory to have a bdrv_reopen_prepare() 4642 * handler for each supported drv. */ 4643 error_setg(errp, "Block format '%s' used by node '%s' " 4644 "does not support reopening files", drv->format_name, 4645 bdrv_get_device_or_node_name(reopen_state->bs)); 4646 ret = -1; 4647 goto error; 4648 } 4649 4650 drv_prepared = true; 4651 4652 /* 4653 * We must provide the 'backing' option if the BDS has a backing 4654 * file or if the image file has a backing file name as part of 4655 * its metadata. Otherwise the 'backing' option can be omitted. 4656 */ 4657 if (drv->supports_backing && reopen_state->backing_missing && 4658 (reopen_state->bs->backing || reopen_state->bs->backing_file[0])) { 4659 error_setg(errp, "backing is missing for '%s'", 4660 reopen_state->bs->node_name); 4661 ret = -EINVAL; 4662 goto error; 4663 } 4664 4665 /* 4666 * Allow changing the 'backing' option. The new value can be 4667 * either a reference to an existing node (using its node name) 4668 * or NULL to simply detach the current backing file. 4669 */ 4670 ret = bdrv_reopen_parse_backing(reopen_state, set_backings_tran, errp); 4671 if (ret < 0) { 4672 goto error; 4673 } 4674 qdict_del(reopen_state->options, "backing"); 4675 4676 /* Options that are not handled are only okay if they are unchanged 4677 * compared to the old state. It is expected that some options are only 4678 * used for the initial open, but not reopen (e.g. filename) */ 4679 if (qdict_size(reopen_state->options)) { 4680 const QDictEntry *entry = qdict_first(reopen_state->options); 4681 4682 do { 4683 QObject *new = entry->value; 4684 QObject *old = qdict_get(reopen_state->bs->options, entry->key); 4685 4686 /* Allow child references (child_name=node_name) as long as they 4687 * point to the current child (i.e. everything stays the same). */ 4688 if (qobject_type(new) == QTYPE_QSTRING) { 4689 BdrvChild *child; 4690 QLIST_FOREACH(child, &reopen_state->bs->children, next) { 4691 if (!strcmp(child->name, entry->key)) { 4692 break; 4693 } 4694 } 4695 4696 if (child) { 4697 if (!strcmp(child->bs->node_name, 4698 qstring_get_str(qobject_to(QString, new)))) { 4699 continue; /* Found child with this name, skip option */ 4700 } 4701 } 4702 } 4703 4704 /* 4705 * TODO: When using -drive to specify blockdev options, all values 4706 * will be strings; however, when using -blockdev, blockdev-add or 4707 * filenames using the json:{} pseudo-protocol, they will be 4708 * correctly typed. 4709 * In contrast, reopening options are (currently) always strings 4710 * (because you can only specify them through qemu-io; all other 4711 * callers do not specify any options). 4712 * Therefore, when using anything other than -drive to create a BDS, 4713 * this cannot detect non-string options as unchanged, because 4714 * qobject_is_equal() always returns false for objects of different 4715 * type. In the future, this should be remedied by correctly typing 4716 * all options. For now, this is not too big of an issue because 4717 * the user can simply omit options which cannot be changed anyway, 4718 * so they will stay unchanged. 4719 */ 4720 if (!qobject_is_equal(new, old)) { 4721 error_setg(errp, "Cannot change the option '%s'", entry->key); 4722 ret = -EINVAL; 4723 goto error; 4724 } 4725 } while ((entry = qdict_next(reopen_state->options, entry))); 4726 } 4727 4728 ret = 0; 4729 4730 /* Restore the original reopen_state->options QDict */ 4731 qobject_unref(reopen_state->options); 4732 reopen_state->options = qobject_ref(orig_reopen_opts); 4733 4734 error: 4735 if (ret < 0 && drv_prepared) { 4736 /* drv->bdrv_reopen_prepare() has succeeded, so we need to 4737 * call drv->bdrv_reopen_abort() before signaling an error 4738 * (bdrv_reopen_multiple() will not call bdrv_reopen_abort() 4739 * when the respective bdrv_reopen_prepare() has failed) */ 4740 if (drv->bdrv_reopen_abort) { 4741 drv->bdrv_reopen_abort(reopen_state); 4742 } 4743 } 4744 qemu_opts_del(opts); 4745 qobject_unref(orig_reopen_opts); 4746 g_free(discard); 4747 return ret; 4748 } 4749 4750 /* 4751 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 4752 * makes them final by swapping the staging BlockDriverState contents into 4753 * the active BlockDriverState contents. 4754 */ 4755 static void bdrv_reopen_commit(BDRVReopenState *reopen_state) 4756 { 4757 BlockDriver *drv; 4758 BlockDriverState *bs; 4759 BdrvChild *child; 4760 4761 assert(reopen_state != NULL); 4762 bs = reopen_state->bs; 4763 drv = bs->drv; 4764 assert(drv != NULL); 4765 4766 /* If there are any driver level actions to take */ 4767 if (drv->bdrv_reopen_commit) { 4768 drv->bdrv_reopen_commit(reopen_state); 4769 } 4770 4771 /* set BDS specific flags now */ 4772 qobject_unref(bs->explicit_options); 4773 qobject_unref(bs->options); 4774 4775 bs->explicit_options = reopen_state->explicit_options; 4776 bs->options = reopen_state->options; 4777 bs->open_flags = reopen_state->flags; 4778 bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); 4779 bs->detect_zeroes = reopen_state->detect_zeroes; 4780 4781 if (reopen_state->replace_backing_bs) { 4782 qdict_del(bs->explicit_options, "backing"); 4783 qdict_del(bs->options, "backing"); 4784 } 4785 4786 /* Remove child references from bs->options and bs->explicit_options. 4787 * Child options were already removed in bdrv_reopen_queue_child() */ 4788 QLIST_FOREACH(child, &bs->children, next) { 4789 qdict_del(bs->explicit_options, child->name); 4790 qdict_del(bs->options, child->name); 4791 } 4792 bdrv_refresh_limits(bs, NULL, NULL); 4793 } 4794 4795 /* 4796 * Abort the reopen, and delete and free the staged changes in 4797 * reopen_state 4798 */ 4799 static void bdrv_reopen_abort(BDRVReopenState *reopen_state) 4800 { 4801 BlockDriver *drv; 4802 4803 assert(reopen_state != NULL); 4804 drv = reopen_state->bs->drv; 4805 assert(drv != NULL); 4806 4807 if (drv->bdrv_reopen_abort) { 4808 drv->bdrv_reopen_abort(reopen_state); 4809 } 4810 } 4811 4812 4813 static void bdrv_close(BlockDriverState *bs) 4814 { 4815 BdrvAioNotifier *ban, *ban_next; 4816 BdrvChild *child, *next; 4817 4818 assert(!bs->refcnt); 4819 4820 bdrv_drained_begin(bs); /* complete I/O */ 4821 bdrv_flush(bs); 4822 bdrv_drain(bs); /* in case flush left pending I/O */ 4823 4824 if (bs->drv) { 4825 if (bs->drv->bdrv_close) { 4826 /* Must unfreeze all children, so bdrv_unref_child() works */ 4827 bs->drv->bdrv_close(bs); 4828 } 4829 bs->drv = NULL; 4830 } 4831 4832 QLIST_FOREACH_SAFE(child, &bs->children, next, next) { 4833 bdrv_unref_child(bs, child); 4834 } 4835 4836 bs->backing = NULL; 4837 bs->file = NULL; 4838 g_free(bs->opaque); 4839 bs->opaque = NULL; 4840 qatomic_set(&bs->copy_on_read, 0); 4841 bs->backing_file[0] = '\0'; 4842 bs->backing_format[0] = '\0'; 4843 bs->total_sectors = 0; 4844 bs->encrypted = false; 4845 bs->sg = false; 4846 qobject_unref(bs->options); 4847 qobject_unref(bs->explicit_options); 4848 bs->options = NULL; 4849 bs->explicit_options = NULL; 4850 qobject_unref(bs->full_open_options); 4851 bs->full_open_options = NULL; 4852 4853 bdrv_release_named_dirty_bitmaps(bs); 4854 assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 4855 4856 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 4857 g_free(ban); 4858 } 4859 QLIST_INIT(&bs->aio_notifiers); 4860 bdrv_drained_end(bs); 4861 4862 /* 4863 * If we're still inside some bdrv_drain_all_begin()/end() sections, end 4864 * them now since this BDS won't exist anymore when bdrv_drain_all_end() 4865 * gets called. 4866 */ 4867 if (bs->quiesce_counter) { 4868 bdrv_drain_all_end_quiesce(bs); 4869 } 4870 } 4871 4872 void bdrv_close_all(void) 4873 { 4874 assert(job_next(NULL) == NULL); 4875 4876 /* Drop references from requests still in flight, such as canceled block 4877 * jobs whose AIO context has not been polled yet */ 4878 bdrv_drain_all(); 4879 4880 blk_remove_all_bs(); 4881 blockdev_close_all_bdrv_states(); 4882 4883 assert(QTAILQ_EMPTY(&all_bdrv_states)); 4884 } 4885 4886 static bool should_update_child(BdrvChild *c, BlockDriverState *to) 4887 { 4888 GQueue *queue; 4889 GHashTable *found; 4890 bool ret; 4891 4892 if (c->klass->stay_at_node) { 4893 return false; 4894 } 4895 4896 /* If the child @c belongs to the BDS @to, replacing the current 4897 * c->bs by @to would mean to create a loop. 4898 * 4899 * Such a case occurs when appending a BDS to a backing chain. 4900 * For instance, imagine the following chain: 4901 * 4902 * guest device -> node A -> further backing chain... 4903 * 4904 * Now we create a new BDS B which we want to put on top of this 4905 * chain, so we first attach A as its backing node: 4906 * 4907 * node B 4908 * | 4909 * v 4910 * guest device -> node A -> further backing chain... 4911 * 4912 * Finally we want to replace A by B. When doing that, we want to 4913 * replace all pointers to A by pointers to B -- except for the 4914 * pointer from B because (1) that would create a loop, and (2) 4915 * that pointer should simply stay intact: 4916 * 4917 * guest device -> node B 4918 * | 4919 * v 4920 * node A -> further backing chain... 4921 * 4922 * In general, when replacing a node A (c->bs) by a node B (@to), 4923 * if A is a child of B, that means we cannot replace A by B there 4924 * because that would create a loop. Silently detaching A from B 4925 * is also not really an option. So overall just leaving A in 4926 * place there is the most sensible choice. 4927 * 4928 * We would also create a loop in any cases where @c is only 4929 * indirectly referenced by @to. Prevent this by returning false 4930 * if @c is found (by breadth-first search) anywhere in the whole 4931 * subtree of @to. 4932 */ 4933 4934 ret = true; 4935 found = g_hash_table_new(NULL, NULL); 4936 g_hash_table_add(found, to); 4937 queue = g_queue_new(); 4938 g_queue_push_tail(queue, to); 4939 4940 while (!g_queue_is_empty(queue)) { 4941 BlockDriverState *v = g_queue_pop_head(queue); 4942 BdrvChild *c2; 4943 4944 QLIST_FOREACH(c2, &v->children, next) { 4945 if (c2 == c) { 4946 ret = false; 4947 break; 4948 } 4949 4950 if (g_hash_table_contains(found, c2->bs)) { 4951 continue; 4952 } 4953 4954 g_queue_push_tail(queue, c2->bs); 4955 g_hash_table_add(found, c2->bs); 4956 } 4957 } 4958 4959 g_queue_free(queue); 4960 g_hash_table_destroy(found); 4961 4962 return ret; 4963 } 4964 4965 typedef struct BdrvRemoveFilterOrCowChild { 4966 BdrvChild *child; 4967 bool is_backing; 4968 } BdrvRemoveFilterOrCowChild; 4969 4970 static void bdrv_remove_filter_or_cow_child_abort(void *opaque) 4971 { 4972 BdrvRemoveFilterOrCowChild *s = opaque; 4973 BlockDriverState *parent_bs = s->child->opaque; 4974 4975 QLIST_INSERT_HEAD(&parent_bs->children, s->child, next); 4976 if (s->is_backing) { 4977 parent_bs->backing = s->child; 4978 } else { 4979 parent_bs->file = s->child; 4980 } 4981 4982 /* 4983 * We don't have to restore child->bs here to undo bdrv_replace_child() 4984 * because that function is transactionable and it registered own completion 4985 * entries in @tran, so .abort() for bdrv_replace_child_safe() will be 4986 * called automatically. 4987 */ 4988 } 4989 4990 static void bdrv_remove_filter_or_cow_child_commit(void *opaque) 4991 { 4992 BdrvRemoveFilterOrCowChild *s = opaque; 4993 4994 bdrv_child_free(s->child); 4995 } 4996 4997 static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = { 4998 .abort = bdrv_remove_filter_or_cow_child_abort, 4999 .commit = bdrv_remove_filter_or_cow_child_commit, 5000 .clean = g_free, 5001 }; 5002 5003 /* 5004 * A function to remove backing-chain child of @bs if exists: cow child for 5005 * format nodes (always .backing) and filter child for filters (may be .file or 5006 * .backing) 5007 */ 5008 static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, 5009 Transaction *tran) 5010 { 5011 BdrvRemoveFilterOrCowChild *s; 5012 BdrvChild *child = bdrv_filter_or_cow_child(bs); 5013 5014 if (!child) { 5015 return; 5016 } 5017 5018 if (child->bs) { 5019 bdrv_replace_child_safe(child, NULL, tran); 5020 } 5021 5022 s = g_new(BdrvRemoveFilterOrCowChild, 1); 5023 *s = (BdrvRemoveFilterOrCowChild) { 5024 .child = child, 5025 .is_backing = (child == bs->backing), 5026 }; 5027 tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); 5028 5029 QLIST_SAFE_REMOVE(child, next); 5030 if (s->is_backing) { 5031 bs->backing = NULL; 5032 } else { 5033 bs->file = NULL; 5034 } 5035 } 5036 5037 static int bdrv_replace_node_noperm(BlockDriverState *from, 5038 BlockDriverState *to, 5039 bool auto_skip, Transaction *tran, 5040 Error **errp) 5041 { 5042 BdrvChild *c, *next; 5043 5044 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { 5045 assert(c->bs == from); 5046 if (!should_update_child(c, to)) { 5047 if (auto_skip) { 5048 continue; 5049 } 5050 error_setg(errp, "Should not change '%s' link to '%s'", 5051 c->name, from->node_name); 5052 return -EINVAL; 5053 } 5054 if (c->frozen) { 5055 error_setg(errp, "Cannot change '%s' link to '%s'", 5056 c->name, from->node_name); 5057 return -EPERM; 5058 } 5059 bdrv_replace_child_safe(c, to, tran); 5060 } 5061 5062 return 0; 5063 } 5064 5065 /* 5066 * With auto_skip=true bdrv_replace_node_common skips updating from parents 5067 * if it creates a parent-child relation loop or if parent is block-job. 5068 * 5069 * With auto_skip=false the error is returned if from has a parent which should 5070 * not be updated. 5071 * 5072 * With @detach_subchain=true @to must be in a backing chain of @from. In this 5073 * case backing link of the cow-parent of @to is removed. 5074 */ 5075 static int bdrv_replace_node_common(BlockDriverState *from, 5076 BlockDriverState *to, 5077 bool auto_skip, bool detach_subchain, 5078 Error **errp) 5079 { 5080 Transaction *tran = tran_new(); 5081 g_autoptr(GHashTable) found = NULL; 5082 g_autoptr(GSList) refresh_list = NULL; 5083 BlockDriverState *to_cow_parent; 5084 int ret; 5085 5086 if (detach_subchain) { 5087 assert(bdrv_chain_contains(from, to)); 5088 assert(from != to); 5089 for (to_cow_parent = from; 5090 bdrv_filter_or_cow_bs(to_cow_parent) != to; 5091 to_cow_parent = bdrv_filter_or_cow_bs(to_cow_parent)) 5092 { 5093 ; 5094 } 5095 } 5096 5097 /* Make sure that @from doesn't go away until we have successfully attached 5098 * all of its parents to @to. */ 5099 bdrv_ref(from); 5100 5101 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 5102 assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); 5103 bdrv_drained_begin(from); 5104 5105 /* 5106 * Do the replacement without permission update. 5107 * Replacement may influence the permissions, we should calculate new 5108 * permissions based on new graph. If we fail, we'll roll-back the 5109 * replacement. 5110 */ 5111 ret = bdrv_replace_node_noperm(from, to, auto_skip, tran, errp); 5112 if (ret < 0) { 5113 goto out; 5114 } 5115 5116 if (detach_subchain) { 5117 bdrv_remove_filter_or_cow_child(to_cow_parent, tran); 5118 } 5119 5120 found = g_hash_table_new(NULL, NULL); 5121 5122 refresh_list = bdrv_topological_dfs(refresh_list, found, to); 5123 refresh_list = bdrv_topological_dfs(refresh_list, found, from); 5124 5125 ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); 5126 if (ret < 0) { 5127 goto out; 5128 } 5129 5130 ret = 0; 5131 5132 out: 5133 tran_finalize(tran, ret); 5134 5135 bdrv_drained_end(from); 5136 bdrv_unref(from); 5137 5138 return ret; 5139 } 5140 5141 int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, 5142 Error **errp) 5143 { 5144 return bdrv_replace_node_common(from, to, true, false, errp); 5145 } 5146 5147 int bdrv_drop_filter(BlockDriverState *bs, Error **errp) 5148 { 5149 return bdrv_replace_node_common(bs, bdrv_filter_or_cow_bs(bs), true, true, 5150 errp); 5151 } 5152 5153 /* 5154 * Add new bs contents at the top of an image chain while the chain is 5155 * live, while keeping required fields on the top layer. 5156 * 5157 * This will modify the BlockDriverState fields, and swap contents 5158 * between bs_new and bs_top. Both bs_new and bs_top are modified. 5159 * 5160 * bs_new must not be attached to a BlockBackend and must not have backing 5161 * child. 5162 * 5163 * This function does not create any image files. 5164 */ 5165 int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, 5166 Error **errp) 5167 { 5168 int ret; 5169 Transaction *tran = tran_new(); 5170 5171 assert(!bs_new->backing); 5172 5173 ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing", 5174 &child_of_bds, bdrv_backing_role(bs_new), 5175 &bs_new->backing, tran, errp); 5176 if (ret < 0) { 5177 goto out; 5178 } 5179 5180 ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); 5181 if (ret < 0) { 5182 goto out; 5183 } 5184 5185 ret = bdrv_refresh_perms(bs_new, errp); 5186 out: 5187 tran_finalize(tran, ret); 5188 5189 bdrv_refresh_limits(bs_top, NULL, NULL); 5190 5191 return ret; 5192 } 5193 5194 static void bdrv_delete(BlockDriverState *bs) 5195 { 5196 assert(bdrv_op_blocker_is_empty(bs)); 5197 assert(!bs->refcnt); 5198 5199 /* remove from list, if necessary */ 5200 if (bs->node_name[0] != '\0') { 5201 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 5202 } 5203 QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list); 5204 5205 bdrv_close(bs); 5206 5207 g_free(bs); 5208 } 5209 5210 BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options, 5211 int flags, Error **errp) 5212 { 5213 BlockDriverState *new_node_bs; 5214 Error *local_err = NULL; 5215 5216 new_node_bs = bdrv_open(NULL, NULL, node_options, flags, errp); 5217 if (new_node_bs == NULL) { 5218 error_prepend(errp, "Could not create node: "); 5219 return NULL; 5220 } 5221 5222 bdrv_drained_begin(bs); 5223 bdrv_replace_node(bs, new_node_bs, &local_err); 5224 bdrv_drained_end(bs); 5225 5226 if (local_err) { 5227 bdrv_unref(new_node_bs); 5228 error_propagate(errp, local_err); 5229 return NULL; 5230 } 5231 5232 return new_node_bs; 5233 } 5234 5235 /* 5236 * Run consistency checks on an image 5237 * 5238 * Returns 0 if the check could be completed (it doesn't mean that the image is 5239 * free of errors) or -errno when an internal error occurred. The results of the 5240 * check are stored in res. 5241 */ 5242 int coroutine_fn bdrv_co_check(BlockDriverState *bs, 5243 BdrvCheckResult *res, BdrvCheckMode fix) 5244 { 5245 if (bs->drv == NULL) { 5246 return -ENOMEDIUM; 5247 } 5248 if (bs->drv->bdrv_co_check == NULL) { 5249 return -ENOTSUP; 5250 } 5251 5252 memset(res, 0, sizeof(*res)); 5253 return bs->drv->bdrv_co_check(bs, res, fix); 5254 } 5255 5256 /* 5257 * Return values: 5258 * 0 - success 5259 * -EINVAL - backing format specified, but no file 5260 * -ENOSPC - can't update the backing file because no space is left in the 5261 * image file header 5262 * -ENOTSUP - format driver doesn't support changing the backing file 5263 */ 5264 int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, 5265 const char *backing_fmt, bool warn) 5266 { 5267 BlockDriver *drv = bs->drv; 5268 int ret; 5269 5270 if (!drv) { 5271 return -ENOMEDIUM; 5272 } 5273 5274 /* Backing file format doesn't make sense without a backing file */ 5275 if (backing_fmt && !backing_file) { 5276 return -EINVAL; 5277 } 5278 5279 if (warn && backing_file && !backing_fmt) { 5280 warn_report("Deprecated use of backing file without explicit " 5281 "backing format, use of this image requires " 5282 "potentially unsafe format probing"); 5283 } 5284 5285 if (drv->bdrv_change_backing_file != NULL) { 5286 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 5287 } else { 5288 ret = -ENOTSUP; 5289 } 5290 5291 if (ret == 0) { 5292 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 5293 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 5294 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), 5295 backing_file ?: ""); 5296 } 5297 return ret; 5298 } 5299 5300 /* 5301 * Finds the first non-filter node above bs in the chain between 5302 * active and bs. The returned node is either an immediate parent of 5303 * bs, or there are only filter nodes between the two. 5304 * 5305 * Returns NULL if bs is not found in active's image chain, 5306 * or if active == bs. 5307 * 5308 * Returns the bottommost base image if bs == NULL. 5309 */ 5310 BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 5311 BlockDriverState *bs) 5312 { 5313 bs = bdrv_skip_filters(bs); 5314 active = bdrv_skip_filters(active); 5315 5316 while (active) { 5317 BlockDriverState *next = bdrv_backing_chain_next(active); 5318 if (bs == next) { 5319 return active; 5320 } 5321 active = next; 5322 } 5323 5324 return NULL; 5325 } 5326 5327 /* Given a BDS, searches for the base layer. */ 5328 BlockDriverState *bdrv_find_base(BlockDriverState *bs) 5329 { 5330 return bdrv_find_overlay(bs, NULL); 5331 } 5332 5333 /* 5334 * Return true if at least one of the COW (backing) and filter links 5335 * between @bs and @base is frozen. @errp is set if that's the case. 5336 * @base must be reachable from @bs, or NULL. 5337 */ 5338 bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, 5339 Error **errp) 5340 { 5341 BlockDriverState *i; 5342 BdrvChild *child; 5343 5344 for (i = bs; i != base; i = child_bs(child)) { 5345 child = bdrv_filter_or_cow_child(i); 5346 5347 if (child && child->frozen) { 5348 error_setg(errp, "Cannot change '%s' link from '%s' to '%s'", 5349 child->name, i->node_name, child->bs->node_name); 5350 return true; 5351 } 5352 } 5353 5354 return false; 5355 } 5356 5357 /* 5358 * Freeze all COW (backing) and filter links between @bs and @base. 5359 * If any of the links is already frozen the operation is aborted and 5360 * none of the links are modified. 5361 * @base must be reachable from @bs, or NULL. 5362 * Returns 0 on success. On failure returns < 0 and sets @errp. 5363 */ 5364 int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, 5365 Error **errp) 5366 { 5367 BlockDriverState *i; 5368 BdrvChild *child; 5369 5370 if (bdrv_is_backing_chain_frozen(bs, base, errp)) { 5371 return -EPERM; 5372 } 5373 5374 for (i = bs; i != base; i = child_bs(child)) { 5375 child = bdrv_filter_or_cow_child(i); 5376 if (child && child->bs->never_freeze) { 5377 error_setg(errp, "Cannot freeze '%s' link to '%s'", 5378 child->name, child->bs->node_name); 5379 return -EPERM; 5380 } 5381 } 5382 5383 for (i = bs; i != base; i = child_bs(child)) { 5384 child = bdrv_filter_or_cow_child(i); 5385 if (child) { 5386 child->frozen = true; 5387 } 5388 } 5389 5390 return 0; 5391 } 5392 5393 /* 5394 * Unfreeze all COW (backing) and filter links between @bs and @base. 5395 * The caller must ensure that all links are frozen before using this 5396 * function. 5397 * @base must be reachable from @bs, or NULL. 5398 */ 5399 void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base) 5400 { 5401 BlockDriverState *i; 5402 BdrvChild *child; 5403 5404 for (i = bs; i != base; i = child_bs(child)) { 5405 child = bdrv_filter_or_cow_child(i); 5406 if (child) { 5407 assert(child->frozen); 5408 child->frozen = false; 5409 } 5410 } 5411 } 5412 5413 /* 5414 * Drops images above 'base' up to and including 'top', and sets the image 5415 * above 'top' to have base as its backing file. 5416 * 5417 * Requires that the overlay to 'top' is opened r/w, so that the backing file 5418 * information in 'bs' can be properly updated. 5419 * 5420 * E.g., this will convert the following chain: 5421 * bottom <- base <- intermediate <- top <- active 5422 * 5423 * to 5424 * 5425 * bottom <- base <- active 5426 * 5427 * It is allowed for bottom==base, in which case it converts: 5428 * 5429 * base <- intermediate <- top <- active 5430 * 5431 * to 5432 * 5433 * base <- active 5434 * 5435 * If backing_file_str is non-NULL, it will be used when modifying top's 5436 * overlay image metadata. 5437 * 5438 * Error conditions: 5439 * if active == top, that is considered an error 5440 * 5441 */ 5442 int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, 5443 const char *backing_file_str) 5444 { 5445 BlockDriverState *explicit_top = top; 5446 bool update_inherits_from; 5447 BdrvChild *c; 5448 Error *local_err = NULL; 5449 int ret = -EIO; 5450 g_autoptr(GSList) updated_children = NULL; 5451 GSList *p; 5452 5453 bdrv_ref(top); 5454 bdrv_subtree_drained_begin(top); 5455 5456 if (!top->drv || !base->drv) { 5457 goto exit; 5458 } 5459 5460 /* Make sure that base is in the backing chain of top */ 5461 if (!bdrv_chain_contains(top, base)) { 5462 goto exit; 5463 } 5464 5465 /* If 'base' recursively inherits from 'top' then we should set 5466 * base->inherits_from to top->inherits_from after 'top' and all 5467 * other intermediate nodes have been dropped. 5468 * If 'top' is an implicit node (e.g. "commit_top") we should skip 5469 * it because no one inherits from it. We use explicit_top for that. */ 5470 explicit_top = bdrv_skip_implicit_filters(explicit_top); 5471 update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top); 5472 5473 /* success - we can delete the intermediate states, and link top->base */ 5474 /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once 5475 * we've figured out how they should work. */ 5476 if (!backing_file_str) { 5477 bdrv_refresh_filename(base); 5478 backing_file_str = base->filename; 5479 } 5480 5481 QLIST_FOREACH(c, &top->parents, next_parent) { 5482 updated_children = g_slist_prepend(updated_children, c); 5483 } 5484 5485 /* 5486 * It seems correct to pass detach_subchain=true here, but it triggers 5487 * one more yet not fixed bug, when due to nested aio_poll loop we switch to 5488 * another drained section, which modify the graph (for example, removing 5489 * the child, which we keep in updated_children list). So, it's a TODO. 5490 * 5491 * Note, bug triggered if pass detach_subchain=true here and run 5492 * test-bdrv-drain. test_drop_intermediate_poll() test-case will crash. 5493 * That's a FIXME. 5494 */ 5495 bdrv_replace_node_common(top, base, false, false, &local_err); 5496 if (local_err) { 5497 error_report_err(local_err); 5498 goto exit; 5499 } 5500 5501 for (p = updated_children; p; p = p->next) { 5502 c = p->data; 5503 5504 if (c->klass->update_filename) { 5505 ret = c->klass->update_filename(c, base, backing_file_str, 5506 &local_err); 5507 if (ret < 0) { 5508 /* 5509 * TODO: Actually, we want to rollback all previous iterations 5510 * of this loop, and (which is almost impossible) previous 5511 * bdrv_replace_node()... 5512 * 5513 * Note, that c->klass->update_filename may lead to permission 5514 * update, so it's a bad idea to call it inside permission 5515 * update transaction of bdrv_replace_node. 5516 */ 5517 error_report_err(local_err); 5518 goto exit; 5519 } 5520 } 5521 } 5522 5523 if (update_inherits_from) { 5524 base->inherits_from = explicit_top->inherits_from; 5525 } 5526 5527 ret = 0; 5528 exit: 5529 bdrv_subtree_drained_end(top); 5530 bdrv_unref(top); 5531 return ret; 5532 } 5533 5534 /** 5535 * Implementation of BlockDriver.bdrv_get_allocated_file_size() that 5536 * sums the size of all data-bearing children. (This excludes backing 5537 * children.) 5538 */ 5539 static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs) 5540 { 5541 BdrvChild *child; 5542 int64_t child_size, sum = 0; 5543 5544 QLIST_FOREACH(child, &bs->children, next) { 5545 if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | 5546 BDRV_CHILD_FILTERED)) 5547 { 5548 child_size = bdrv_get_allocated_file_size(child->bs); 5549 if (child_size < 0) { 5550 return child_size; 5551 } 5552 sum += child_size; 5553 } 5554 } 5555 5556 return sum; 5557 } 5558 5559 /** 5560 * Length of a allocated file in bytes. Sparse files are counted by actual 5561 * allocated space. Return < 0 if error or unknown. 5562 */ 5563 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 5564 { 5565 BlockDriver *drv = bs->drv; 5566 if (!drv) { 5567 return -ENOMEDIUM; 5568 } 5569 if (drv->bdrv_get_allocated_file_size) { 5570 return drv->bdrv_get_allocated_file_size(bs); 5571 } 5572 5573 if (drv->bdrv_file_open) { 5574 /* 5575 * Protocol drivers default to -ENOTSUP (most of their data is 5576 * not stored in any of their children (if they even have any), 5577 * so there is no generic way to figure it out). 5578 */ 5579 return -ENOTSUP; 5580 } else if (drv->is_filter) { 5581 /* Filter drivers default to the size of their filtered child */ 5582 return bdrv_get_allocated_file_size(bdrv_filter_bs(bs)); 5583 } else { 5584 /* Other drivers default to summing their children's sizes */ 5585 return bdrv_sum_allocated_file_size(bs); 5586 } 5587 } 5588 5589 /* 5590 * bdrv_measure: 5591 * @drv: Format driver 5592 * @opts: Creation options for new image 5593 * @in_bs: Existing image containing data for new image (may be NULL) 5594 * @errp: Error object 5595 * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo()) 5596 * or NULL on error 5597 * 5598 * Calculate file size required to create a new image. 5599 * 5600 * If @in_bs is given then space for allocated clusters and zero clusters 5601 * from that image are included in the calculation. If @opts contains a 5602 * backing file that is shared by @in_bs then backing clusters may be omitted 5603 * from the calculation. 5604 * 5605 * If @in_bs is NULL then the calculation includes no allocated clusters 5606 * unless a preallocation option is given in @opts. 5607 * 5608 * Note that @in_bs may use a different BlockDriver from @drv. 5609 * 5610 * If an error occurs the @errp pointer is set. 5611 */ 5612 BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, 5613 BlockDriverState *in_bs, Error **errp) 5614 { 5615 if (!drv->bdrv_measure) { 5616 error_setg(errp, "Block driver '%s' does not support size measurement", 5617 drv->format_name); 5618 return NULL; 5619 } 5620 5621 return drv->bdrv_measure(opts, in_bs, errp); 5622 } 5623 5624 /** 5625 * Return number of sectors on success, -errno on error. 5626 */ 5627 int64_t bdrv_nb_sectors(BlockDriverState *bs) 5628 { 5629 BlockDriver *drv = bs->drv; 5630 5631 if (!drv) 5632 return -ENOMEDIUM; 5633 5634 if (drv->has_variable_length) { 5635 int ret = refresh_total_sectors(bs, bs->total_sectors); 5636 if (ret < 0) { 5637 return ret; 5638 } 5639 } 5640 return bs->total_sectors; 5641 } 5642 5643 /** 5644 * Return length in bytes on success, -errno on error. 5645 * The length is always a multiple of BDRV_SECTOR_SIZE. 5646 */ 5647 int64_t bdrv_getlength(BlockDriverState *bs) 5648 { 5649 int64_t ret = bdrv_nb_sectors(bs); 5650 5651 if (ret < 0) { 5652 return ret; 5653 } 5654 if (ret > INT64_MAX / BDRV_SECTOR_SIZE) { 5655 return -EFBIG; 5656 } 5657 return ret * BDRV_SECTOR_SIZE; 5658 } 5659 5660 /* return 0 as number of sectors if no device present or error */ 5661 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 5662 { 5663 int64_t nb_sectors = bdrv_nb_sectors(bs); 5664 5665 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 5666 } 5667 5668 bool bdrv_is_sg(BlockDriverState *bs) 5669 { 5670 return bs->sg; 5671 } 5672 5673 /** 5674 * Return whether the given node supports compressed writes. 5675 */ 5676 bool bdrv_supports_compressed_writes(BlockDriverState *bs) 5677 { 5678 BlockDriverState *filtered; 5679 5680 if (!bs->drv || !block_driver_can_compress(bs->drv)) { 5681 return false; 5682 } 5683 5684 filtered = bdrv_filter_bs(bs); 5685 if (filtered) { 5686 /* 5687 * Filters can only forward compressed writes, so we have to 5688 * check the child. 5689 */ 5690 return bdrv_supports_compressed_writes(filtered); 5691 } 5692 5693 return true; 5694 } 5695 5696 const char *bdrv_get_format_name(BlockDriverState *bs) 5697 { 5698 return bs->drv ? bs->drv->format_name : NULL; 5699 } 5700 5701 static int qsort_strcmp(const void *a, const void *b) 5702 { 5703 return strcmp(*(char *const *)a, *(char *const *)b); 5704 } 5705 5706 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 5707 void *opaque, bool read_only) 5708 { 5709 BlockDriver *drv; 5710 int count = 0; 5711 int i; 5712 const char **formats = NULL; 5713 5714 QLIST_FOREACH(drv, &bdrv_drivers, list) { 5715 if (drv->format_name) { 5716 bool found = false; 5717 int i = count; 5718 5719 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, read_only)) { 5720 continue; 5721 } 5722 5723 while (formats && i && !found) { 5724 found = !strcmp(formats[--i], drv->format_name); 5725 } 5726 5727 if (!found) { 5728 formats = g_renew(const char *, formats, count + 1); 5729 formats[count++] = drv->format_name; 5730 } 5731 } 5732 } 5733 5734 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) { 5735 const char *format_name = block_driver_modules[i].format_name; 5736 5737 if (format_name) { 5738 bool found = false; 5739 int j = count; 5740 5741 if (use_bdrv_whitelist && 5742 !bdrv_format_is_whitelisted(format_name, read_only)) { 5743 continue; 5744 } 5745 5746 while (formats && j && !found) { 5747 found = !strcmp(formats[--j], format_name); 5748 } 5749 5750 if (!found) { 5751 formats = g_renew(const char *, formats, count + 1); 5752 formats[count++] = format_name; 5753 } 5754 } 5755 } 5756 5757 qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 5758 5759 for (i = 0; i < count; i++) { 5760 it(opaque, formats[i]); 5761 } 5762 5763 g_free(formats); 5764 } 5765 5766 /* This function is to find a node in the bs graph */ 5767 BlockDriverState *bdrv_find_node(const char *node_name) 5768 { 5769 BlockDriverState *bs; 5770 5771 assert(node_name); 5772 5773 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 5774 if (!strcmp(node_name, bs->node_name)) { 5775 return bs; 5776 } 5777 } 5778 return NULL; 5779 } 5780 5781 /* Put this QMP function here so it can access the static graph_bdrv_states. */ 5782 BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, 5783 Error **errp) 5784 { 5785 BlockDeviceInfoList *list; 5786 BlockDriverState *bs; 5787 5788 list = NULL; 5789 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 5790 BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp); 5791 if (!info) { 5792 qapi_free_BlockDeviceInfoList(list); 5793 return NULL; 5794 } 5795 QAPI_LIST_PREPEND(list, info); 5796 } 5797 5798 return list; 5799 } 5800 5801 typedef struct XDbgBlockGraphConstructor { 5802 XDbgBlockGraph *graph; 5803 GHashTable *graph_nodes; 5804 } XDbgBlockGraphConstructor; 5805 5806 static XDbgBlockGraphConstructor *xdbg_graph_new(void) 5807 { 5808 XDbgBlockGraphConstructor *gr = g_new(XDbgBlockGraphConstructor, 1); 5809 5810 gr->graph = g_new0(XDbgBlockGraph, 1); 5811 gr->graph_nodes = g_hash_table_new(NULL, NULL); 5812 5813 return gr; 5814 } 5815 5816 static XDbgBlockGraph *xdbg_graph_finalize(XDbgBlockGraphConstructor *gr) 5817 { 5818 XDbgBlockGraph *graph = gr->graph; 5819 5820 g_hash_table_destroy(gr->graph_nodes); 5821 g_free(gr); 5822 5823 return graph; 5824 } 5825 5826 static uintptr_t xdbg_graph_node_num(XDbgBlockGraphConstructor *gr, void *node) 5827 { 5828 uintptr_t ret = (uintptr_t)g_hash_table_lookup(gr->graph_nodes, node); 5829 5830 if (ret != 0) { 5831 return ret; 5832 } 5833 5834 /* 5835 * Start counting from 1, not 0, because 0 interferes with not-found (NULL) 5836 * answer of g_hash_table_lookup. 5837 */ 5838 ret = g_hash_table_size(gr->graph_nodes) + 1; 5839 g_hash_table_insert(gr->graph_nodes, node, (void *)ret); 5840 5841 return ret; 5842 } 5843 5844 static void xdbg_graph_add_node(XDbgBlockGraphConstructor *gr, void *node, 5845 XDbgBlockGraphNodeType type, const char *name) 5846 { 5847 XDbgBlockGraphNode *n; 5848 5849 n = g_new0(XDbgBlockGraphNode, 1); 5850 5851 n->id = xdbg_graph_node_num(gr, node); 5852 n->type = type; 5853 n->name = g_strdup(name); 5854 5855 QAPI_LIST_PREPEND(gr->graph->nodes, n); 5856 } 5857 5858 static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent, 5859 const BdrvChild *child) 5860 { 5861 BlockPermission qapi_perm; 5862 XDbgBlockGraphEdge *edge; 5863 5864 edge = g_new0(XDbgBlockGraphEdge, 1); 5865 5866 edge->parent = xdbg_graph_node_num(gr, parent); 5867 edge->child = xdbg_graph_node_num(gr, child->bs); 5868 edge->name = g_strdup(child->name); 5869 5870 for (qapi_perm = 0; qapi_perm < BLOCK_PERMISSION__MAX; qapi_perm++) { 5871 uint64_t flag = bdrv_qapi_perm_to_blk_perm(qapi_perm); 5872 5873 if (flag & child->perm) { 5874 QAPI_LIST_PREPEND(edge->perm, qapi_perm); 5875 } 5876 if (flag & child->shared_perm) { 5877 QAPI_LIST_PREPEND(edge->shared_perm, qapi_perm); 5878 } 5879 } 5880 5881 QAPI_LIST_PREPEND(gr->graph->edges, edge); 5882 } 5883 5884 5885 XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp) 5886 { 5887 BlockBackend *blk; 5888 BlockJob *job; 5889 BlockDriverState *bs; 5890 BdrvChild *child; 5891 XDbgBlockGraphConstructor *gr = xdbg_graph_new(); 5892 5893 for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { 5894 char *allocated_name = NULL; 5895 const char *name = blk_name(blk); 5896 5897 if (!*name) { 5898 name = allocated_name = blk_get_attached_dev_id(blk); 5899 } 5900 xdbg_graph_add_node(gr, blk, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND, 5901 name); 5902 g_free(allocated_name); 5903 if (blk_root(blk)) { 5904 xdbg_graph_add_edge(gr, blk, blk_root(blk)); 5905 } 5906 } 5907 5908 for (job = block_job_next(NULL); job; job = block_job_next(job)) { 5909 GSList *el; 5910 5911 xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB, 5912 job->job.id); 5913 for (el = job->nodes; el; el = el->next) { 5914 xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data); 5915 } 5916 } 5917 5918 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 5919 xdbg_graph_add_node(gr, bs, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_DRIVER, 5920 bs->node_name); 5921 QLIST_FOREACH(child, &bs->children, next) { 5922 xdbg_graph_add_edge(gr, bs, child); 5923 } 5924 } 5925 5926 return xdbg_graph_finalize(gr); 5927 } 5928 5929 BlockDriverState *bdrv_lookup_bs(const char *device, 5930 const char *node_name, 5931 Error **errp) 5932 { 5933 BlockBackend *blk; 5934 BlockDriverState *bs; 5935 5936 if (device) { 5937 blk = blk_by_name(device); 5938 5939 if (blk) { 5940 bs = blk_bs(blk); 5941 if (!bs) { 5942 error_setg(errp, "Device '%s' has no medium", device); 5943 } 5944 5945 return bs; 5946 } 5947 } 5948 5949 if (node_name) { 5950 bs = bdrv_find_node(node_name); 5951 5952 if (bs) { 5953 return bs; 5954 } 5955 } 5956 5957 error_setg(errp, "Cannot find device=\'%s\' nor node-name=\'%s\'", 5958 device ? device : "", 5959 node_name ? node_name : ""); 5960 return NULL; 5961 } 5962 5963 /* If 'base' is in the same chain as 'top', return true. Otherwise, 5964 * return false. If either argument is NULL, return false. */ 5965 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 5966 { 5967 while (top && top != base) { 5968 top = bdrv_filter_or_cow_bs(top); 5969 } 5970 5971 return top != NULL; 5972 } 5973 5974 BlockDriverState *bdrv_next_node(BlockDriverState *bs) 5975 { 5976 if (!bs) { 5977 return QTAILQ_FIRST(&graph_bdrv_states); 5978 } 5979 return QTAILQ_NEXT(bs, node_list); 5980 } 5981 5982 BlockDriverState *bdrv_next_all_states(BlockDriverState *bs) 5983 { 5984 if (!bs) { 5985 return QTAILQ_FIRST(&all_bdrv_states); 5986 } 5987 return QTAILQ_NEXT(bs, bs_list); 5988 } 5989 5990 const char *bdrv_get_node_name(const BlockDriverState *bs) 5991 { 5992 return bs->node_name; 5993 } 5994 5995 const char *bdrv_get_parent_name(const BlockDriverState *bs) 5996 { 5997 BdrvChild *c; 5998 const char *name; 5999 6000 /* If multiple parents have a name, just pick the first one. */ 6001 QLIST_FOREACH(c, &bs->parents, next_parent) { 6002 if (c->klass->get_name) { 6003 name = c->klass->get_name(c); 6004 if (name && *name) { 6005 return name; 6006 } 6007 } 6008 } 6009 6010 return NULL; 6011 } 6012 6013 /* TODO check what callers really want: bs->node_name or blk_name() */ 6014 const char *bdrv_get_device_name(const BlockDriverState *bs) 6015 { 6016 return bdrv_get_parent_name(bs) ?: ""; 6017 } 6018 6019 /* This can be used to identify nodes that might not have a device 6020 * name associated. Since node and device names live in the same 6021 * namespace, the result is unambiguous. The exception is if both are 6022 * absent, then this returns an empty (non-null) string. */ 6023 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 6024 { 6025 return bdrv_get_parent_name(bs) ?: bs->node_name; 6026 } 6027 6028 int bdrv_get_flags(BlockDriverState *bs) 6029 { 6030 return bs->open_flags; 6031 } 6032 6033 int bdrv_has_zero_init_1(BlockDriverState *bs) 6034 { 6035 return 1; 6036 } 6037 6038 int bdrv_has_zero_init(BlockDriverState *bs) 6039 { 6040 BlockDriverState *filtered; 6041 6042 if (!bs->drv) { 6043 return 0; 6044 } 6045 6046 /* If BS is a copy on write image, it is initialized to 6047 the contents of the base image, which may not be zeroes. */ 6048 if (bdrv_cow_child(bs)) { 6049 return 0; 6050 } 6051 if (bs->drv->bdrv_has_zero_init) { 6052 return bs->drv->bdrv_has_zero_init(bs); 6053 } 6054 6055 filtered = bdrv_filter_bs(bs); 6056 if (filtered) { 6057 return bdrv_has_zero_init(filtered); 6058 } 6059 6060 /* safe default */ 6061 return 0; 6062 } 6063 6064 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 6065 { 6066 if (!(bs->open_flags & BDRV_O_UNMAP)) { 6067 return false; 6068 } 6069 6070 return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP; 6071 } 6072 6073 void bdrv_get_backing_filename(BlockDriverState *bs, 6074 char *filename, int filename_size) 6075 { 6076 pstrcpy(filename, filename_size, bs->backing_file); 6077 } 6078 6079 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 6080 { 6081 int ret; 6082 BlockDriver *drv = bs->drv; 6083 /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ 6084 if (!drv) { 6085 return -ENOMEDIUM; 6086 } 6087 if (!drv->bdrv_get_info) { 6088 BlockDriverState *filtered = bdrv_filter_bs(bs); 6089 if (filtered) { 6090 return bdrv_get_info(filtered, bdi); 6091 } 6092 return -ENOTSUP; 6093 } 6094 memset(bdi, 0, sizeof(*bdi)); 6095 ret = drv->bdrv_get_info(bs, bdi); 6096 if (ret < 0) { 6097 return ret; 6098 } 6099 6100 if (bdi->cluster_size > BDRV_MAX_ALIGNMENT) { 6101 return -EINVAL; 6102 } 6103 6104 return 0; 6105 } 6106 6107 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, 6108 Error **errp) 6109 { 6110 BlockDriver *drv = bs->drv; 6111 if (drv && drv->bdrv_get_specific_info) { 6112 return drv->bdrv_get_specific_info(bs, errp); 6113 } 6114 return NULL; 6115 } 6116 6117 BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs) 6118 { 6119 BlockDriver *drv = bs->drv; 6120 if (!drv || !drv->bdrv_get_specific_stats) { 6121 return NULL; 6122 } 6123 return drv->bdrv_get_specific_stats(bs); 6124 } 6125 6126 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event) 6127 { 6128 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 6129 return; 6130 } 6131 6132 bs->drv->bdrv_debug_event(bs, event); 6133 } 6134 6135 static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs) 6136 { 6137 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 6138 bs = bdrv_primary_bs(bs); 6139 } 6140 6141 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 6142 assert(bs->drv->bdrv_debug_remove_breakpoint); 6143 return bs; 6144 } 6145 6146 return NULL; 6147 } 6148 6149 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 6150 const char *tag) 6151 { 6152 bs = bdrv_find_debug_node(bs); 6153 if (bs) { 6154 return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 6155 } 6156 6157 return -ENOTSUP; 6158 } 6159 6160 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 6161 { 6162 bs = bdrv_find_debug_node(bs); 6163 if (bs) { 6164 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 6165 } 6166 6167 return -ENOTSUP; 6168 } 6169 6170 int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 6171 { 6172 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 6173 bs = bdrv_primary_bs(bs); 6174 } 6175 6176 if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 6177 return bs->drv->bdrv_debug_resume(bs, tag); 6178 } 6179 6180 return -ENOTSUP; 6181 } 6182 6183 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 6184 { 6185 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 6186 bs = bdrv_primary_bs(bs); 6187 } 6188 6189 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 6190 return bs->drv->bdrv_debug_is_suspended(bs, tag); 6191 } 6192 6193 return false; 6194 } 6195 6196 /* backing_file can either be relative, or absolute, or a protocol. If it is 6197 * relative, it must be relative to the chain. So, passing in bs->filename 6198 * from a BDS as backing_file should not be done, as that may be relative to 6199 * the CWD rather than the chain. */ 6200 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 6201 const char *backing_file) 6202 { 6203 char *filename_full = NULL; 6204 char *backing_file_full = NULL; 6205 char *filename_tmp = NULL; 6206 int is_protocol = 0; 6207 bool filenames_refreshed = false; 6208 BlockDriverState *curr_bs = NULL; 6209 BlockDriverState *retval = NULL; 6210 BlockDriverState *bs_below; 6211 6212 if (!bs || !bs->drv || !backing_file) { 6213 return NULL; 6214 } 6215 6216 filename_full = g_malloc(PATH_MAX); 6217 backing_file_full = g_malloc(PATH_MAX); 6218 6219 is_protocol = path_has_protocol(backing_file); 6220 6221 /* 6222 * Being largely a legacy function, skip any filters here 6223 * (because filters do not have normal filenames, so they cannot 6224 * match anyway; and allowing json:{} filenames is a bit out of 6225 * scope). 6226 */ 6227 for (curr_bs = bdrv_skip_filters(bs); 6228 bdrv_cow_child(curr_bs) != NULL; 6229 curr_bs = bs_below) 6230 { 6231 bs_below = bdrv_backing_chain_next(curr_bs); 6232 6233 if (bdrv_backing_overridden(curr_bs)) { 6234 /* 6235 * If the backing file was overridden, we can only compare 6236 * directly against the backing node's filename. 6237 */ 6238 6239 if (!filenames_refreshed) { 6240 /* 6241 * This will automatically refresh all of the 6242 * filenames in the rest of the backing chain, so we 6243 * only need to do this once. 6244 */ 6245 bdrv_refresh_filename(bs_below); 6246 filenames_refreshed = true; 6247 } 6248 6249 if (strcmp(backing_file, bs_below->filename) == 0) { 6250 retval = bs_below; 6251 break; 6252 } 6253 } else if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 6254 /* 6255 * If either of the filename paths is actually a protocol, then 6256 * compare unmodified paths; otherwise make paths relative. 6257 */ 6258 char *backing_file_full_ret; 6259 6260 if (strcmp(backing_file, curr_bs->backing_file) == 0) { 6261 retval = bs_below; 6262 break; 6263 } 6264 /* Also check against the full backing filename for the image */ 6265 backing_file_full_ret = bdrv_get_full_backing_filename(curr_bs, 6266 NULL); 6267 if (backing_file_full_ret) { 6268 bool equal = strcmp(backing_file, backing_file_full_ret) == 0; 6269 g_free(backing_file_full_ret); 6270 if (equal) { 6271 retval = bs_below; 6272 break; 6273 } 6274 } 6275 } else { 6276 /* If not an absolute filename path, make it relative to the current 6277 * image's filename path */ 6278 filename_tmp = bdrv_make_absolute_filename(curr_bs, backing_file, 6279 NULL); 6280 /* We are going to compare canonicalized absolute pathnames */ 6281 if (!filename_tmp || !realpath(filename_tmp, filename_full)) { 6282 g_free(filename_tmp); 6283 continue; 6284 } 6285 g_free(filename_tmp); 6286 6287 /* We need to make sure the backing filename we are comparing against 6288 * is relative to the current image filename (or absolute) */ 6289 filename_tmp = bdrv_get_full_backing_filename(curr_bs, NULL); 6290 if (!filename_tmp || !realpath(filename_tmp, backing_file_full)) { 6291 g_free(filename_tmp); 6292 continue; 6293 } 6294 g_free(filename_tmp); 6295 6296 if (strcmp(backing_file_full, filename_full) == 0) { 6297 retval = bs_below; 6298 break; 6299 } 6300 } 6301 } 6302 6303 g_free(filename_full); 6304 g_free(backing_file_full); 6305 return retval; 6306 } 6307 6308 void bdrv_init(void) 6309 { 6310 module_call_init(MODULE_INIT_BLOCK); 6311 } 6312 6313 void bdrv_init_with_whitelist(void) 6314 { 6315 use_bdrv_whitelist = 1; 6316 bdrv_init(); 6317 } 6318 6319 int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) 6320 { 6321 BdrvChild *child, *parent; 6322 Error *local_err = NULL; 6323 int ret; 6324 BdrvDirtyBitmap *bm; 6325 6326 if (!bs->drv) { 6327 return -ENOMEDIUM; 6328 } 6329 6330 QLIST_FOREACH(child, &bs->children, next) { 6331 bdrv_co_invalidate_cache(child->bs, &local_err); 6332 if (local_err) { 6333 error_propagate(errp, local_err); 6334 return -EINVAL; 6335 } 6336 } 6337 6338 /* 6339 * Update permissions, they may differ for inactive nodes. 6340 * 6341 * Note that the required permissions of inactive images are always a 6342 * subset of the permissions required after activating the image. This 6343 * allows us to just get the permissions upfront without restricting 6344 * drv->bdrv_invalidate_cache(). 6345 * 6346 * It also means that in error cases, we don't have to try and revert to 6347 * the old permissions (which is an operation that could fail, too). We can 6348 * just keep the extended permissions for the next time that an activation 6349 * of the image is tried. 6350 */ 6351 if (bs->open_flags & BDRV_O_INACTIVE) { 6352 bs->open_flags &= ~BDRV_O_INACTIVE; 6353 ret = bdrv_refresh_perms(bs, errp); 6354 if (ret < 0) { 6355 bs->open_flags |= BDRV_O_INACTIVE; 6356 return ret; 6357 } 6358 6359 if (bs->drv->bdrv_co_invalidate_cache) { 6360 bs->drv->bdrv_co_invalidate_cache(bs, &local_err); 6361 if (local_err) { 6362 bs->open_flags |= BDRV_O_INACTIVE; 6363 error_propagate(errp, local_err); 6364 return -EINVAL; 6365 } 6366 } 6367 6368 FOR_EACH_DIRTY_BITMAP(bs, bm) { 6369 bdrv_dirty_bitmap_skip_store(bm, false); 6370 } 6371 6372 ret = refresh_total_sectors(bs, bs->total_sectors); 6373 if (ret < 0) { 6374 bs->open_flags |= BDRV_O_INACTIVE; 6375 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 6376 return ret; 6377 } 6378 } 6379 6380 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6381 if (parent->klass->activate) { 6382 parent->klass->activate(parent, &local_err); 6383 if (local_err) { 6384 bs->open_flags |= BDRV_O_INACTIVE; 6385 error_propagate(errp, local_err); 6386 return -EINVAL; 6387 } 6388 } 6389 } 6390 6391 return 0; 6392 } 6393 6394 void bdrv_invalidate_cache_all(Error **errp) 6395 { 6396 BlockDriverState *bs; 6397 BdrvNextIterator it; 6398 6399 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 6400 AioContext *aio_context = bdrv_get_aio_context(bs); 6401 int ret; 6402 6403 aio_context_acquire(aio_context); 6404 ret = bdrv_invalidate_cache(bs, errp); 6405 aio_context_release(aio_context); 6406 if (ret < 0) { 6407 bdrv_next_cleanup(&it); 6408 return; 6409 } 6410 } 6411 } 6412 6413 static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) 6414 { 6415 BdrvChild *parent; 6416 6417 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6418 if (parent->klass->parent_is_bds) { 6419 BlockDriverState *parent_bs = parent->opaque; 6420 if (!only_active || !(parent_bs->open_flags & BDRV_O_INACTIVE)) { 6421 return true; 6422 } 6423 } 6424 } 6425 6426 return false; 6427 } 6428 6429 static int bdrv_inactivate_recurse(BlockDriverState *bs) 6430 { 6431 BdrvChild *child, *parent; 6432 int ret; 6433 6434 if (!bs->drv) { 6435 return -ENOMEDIUM; 6436 } 6437 6438 /* Make sure that we don't inactivate a child before its parent. 6439 * It will be covered by recursion from the yet active parent. */ 6440 if (bdrv_has_bds_parent(bs, true)) { 6441 return 0; 6442 } 6443 6444 assert(!(bs->open_flags & BDRV_O_INACTIVE)); 6445 6446 /* Inactivate this node */ 6447 if (bs->drv->bdrv_inactivate) { 6448 ret = bs->drv->bdrv_inactivate(bs); 6449 if (ret < 0) { 6450 return ret; 6451 } 6452 } 6453 6454 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6455 if (parent->klass->inactivate) { 6456 ret = parent->klass->inactivate(parent); 6457 if (ret < 0) { 6458 return ret; 6459 } 6460 } 6461 } 6462 6463 bs->open_flags |= BDRV_O_INACTIVE; 6464 6465 /* 6466 * Update permissions, they may differ for inactive nodes. 6467 * We only tried to loosen restrictions, so errors are not fatal, ignore 6468 * them. 6469 */ 6470 bdrv_refresh_perms(bs, NULL); 6471 6472 /* Recursively inactivate children */ 6473 QLIST_FOREACH(child, &bs->children, next) { 6474 ret = bdrv_inactivate_recurse(child->bs); 6475 if (ret < 0) { 6476 return ret; 6477 } 6478 } 6479 6480 return 0; 6481 } 6482 6483 int bdrv_inactivate_all(void) 6484 { 6485 BlockDriverState *bs = NULL; 6486 BdrvNextIterator it; 6487 int ret = 0; 6488 GSList *aio_ctxs = NULL, *ctx; 6489 6490 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 6491 AioContext *aio_context = bdrv_get_aio_context(bs); 6492 6493 if (!g_slist_find(aio_ctxs, aio_context)) { 6494 aio_ctxs = g_slist_prepend(aio_ctxs, aio_context); 6495 aio_context_acquire(aio_context); 6496 } 6497 } 6498 6499 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 6500 /* Nodes with BDS parents are covered by recursion from the last 6501 * parent that gets inactivated. Don't inactivate them a second 6502 * time if that has already happened. */ 6503 if (bdrv_has_bds_parent(bs, false)) { 6504 continue; 6505 } 6506 ret = bdrv_inactivate_recurse(bs); 6507 if (ret < 0) { 6508 bdrv_next_cleanup(&it); 6509 goto out; 6510 } 6511 } 6512 6513 out: 6514 for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { 6515 AioContext *aio_context = ctx->data; 6516 aio_context_release(aio_context); 6517 } 6518 g_slist_free(aio_ctxs); 6519 6520 return ret; 6521 } 6522 6523 /**************************************************************/ 6524 /* removable device support */ 6525 6526 /** 6527 * Return TRUE if the media is present 6528 */ 6529 bool bdrv_is_inserted(BlockDriverState *bs) 6530 { 6531 BlockDriver *drv = bs->drv; 6532 BdrvChild *child; 6533 6534 if (!drv) { 6535 return false; 6536 } 6537 if (drv->bdrv_is_inserted) { 6538 return drv->bdrv_is_inserted(bs); 6539 } 6540 QLIST_FOREACH(child, &bs->children, next) { 6541 if (!bdrv_is_inserted(child->bs)) { 6542 return false; 6543 } 6544 } 6545 return true; 6546 } 6547 6548 /** 6549 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 6550 */ 6551 void bdrv_eject(BlockDriverState *bs, bool eject_flag) 6552 { 6553 BlockDriver *drv = bs->drv; 6554 6555 if (drv && drv->bdrv_eject) { 6556 drv->bdrv_eject(bs, eject_flag); 6557 } 6558 } 6559 6560 /** 6561 * Lock or unlock the media (if it is locked, the user won't be able 6562 * to eject it manually). 6563 */ 6564 void bdrv_lock_medium(BlockDriverState *bs, bool locked) 6565 { 6566 BlockDriver *drv = bs->drv; 6567 6568 trace_bdrv_lock_medium(bs, locked); 6569 6570 if (drv && drv->bdrv_lock_medium) { 6571 drv->bdrv_lock_medium(bs, locked); 6572 } 6573 } 6574 6575 /* Get a reference to bs */ 6576 void bdrv_ref(BlockDriverState *bs) 6577 { 6578 bs->refcnt++; 6579 } 6580 6581 /* Release a previously grabbed reference to bs. 6582 * If after releasing, reference count is zero, the BlockDriverState is 6583 * deleted. */ 6584 void bdrv_unref(BlockDriverState *bs) 6585 { 6586 if (!bs) { 6587 return; 6588 } 6589 assert(bs->refcnt > 0); 6590 if (--bs->refcnt == 0) { 6591 bdrv_delete(bs); 6592 } 6593 } 6594 6595 struct BdrvOpBlocker { 6596 Error *reason; 6597 QLIST_ENTRY(BdrvOpBlocker) list; 6598 }; 6599 6600 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 6601 { 6602 BdrvOpBlocker *blocker; 6603 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 6604 if (!QLIST_EMPTY(&bs->op_blockers[op])) { 6605 blocker = QLIST_FIRST(&bs->op_blockers[op]); 6606 error_propagate_prepend(errp, error_copy(blocker->reason), 6607 "Node '%s' is busy: ", 6608 bdrv_get_device_or_node_name(bs)); 6609 return true; 6610 } 6611 return false; 6612 } 6613 6614 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 6615 { 6616 BdrvOpBlocker *blocker; 6617 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 6618 6619 blocker = g_new0(BdrvOpBlocker, 1); 6620 blocker->reason = reason; 6621 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 6622 } 6623 6624 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 6625 { 6626 BdrvOpBlocker *blocker, *next; 6627 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 6628 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 6629 if (blocker->reason == reason) { 6630 QLIST_REMOVE(blocker, list); 6631 g_free(blocker); 6632 } 6633 } 6634 } 6635 6636 void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 6637 { 6638 int i; 6639 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 6640 bdrv_op_block(bs, i, reason); 6641 } 6642 } 6643 6644 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 6645 { 6646 int i; 6647 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 6648 bdrv_op_unblock(bs, i, reason); 6649 } 6650 } 6651 6652 bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 6653 { 6654 int i; 6655 6656 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 6657 if (!QLIST_EMPTY(&bs->op_blockers[i])) { 6658 return false; 6659 } 6660 } 6661 return true; 6662 } 6663 6664 void bdrv_img_create(const char *filename, const char *fmt, 6665 const char *base_filename, const char *base_fmt, 6666 char *options, uint64_t img_size, int flags, bool quiet, 6667 Error **errp) 6668 { 6669 QemuOptsList *create_opts = NULL; 6670 QemuOpts *opts = NULL; 6671 const char *backing_fmt, *backing_file; 6672 int64_t size; 6673 BlockDriver *drv, *proto_drv; 6674 Error *local_err = NULL; 6675 int ret = 0; 6676 6677 /* Find driver and parse its options */ 6678 drv = bdrv_find_format(fmt); 6679 if (!drv) { 6680 error_setg(errp, "Unknown file format '%s'", fmt); 6681 return; 6682 } 6683 6684 proto_drv = bdrv_find_protocol(filename, true, errp); 6685 if (!proto_drv) { 6686 return; 6687 } 6688 6689 if (!drv->create_opts) { 6690 error_setg(errp, "Format driver '%s' does not support image creation", 6691 drv->format_name); 6692 return; 6693 } 6694 6695 if (!proto_drv->create_opts) { 6696 error_setg(errp, "Protocol driver '%s' does not support image creation", 6697 proto_drv->format_name); 6698 return; 6699 } 6700 6701 /* Create parameter list */ 6702 create_opts = qemu_opts_append(create_opts, drv->create_opts); 6703 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 6704 6705 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 6706 6707 /* Parse -o options */ 6708 if (options) { 6709 if (!qemu_opts_do_parse(opts, options, NULL, errp)) { 6710 goto out; 6711 } 6712 } 6713 6714 if (!qemu_opt_get(opts, BLOCK_OPT_SIZE)) { 6715 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 6716 } else if (img_size != UINT64_C(-1)) { 6717 error_setg(errp, "The image size must be specified only once"); 6718 goto out; 6719 } 6720 6721 if (base_filename) { 6722 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, 6723 NULL)) { 6724 error_setg(errp, "Backing file not supported for file format '%s'", 6725 fmt); 6726 goto out; 6727 } 6728 } 6729 6730 if (base_fmt) { 6731 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) { 6732 error_setg(errp, "Backing file format not supported for file " 6733 "format '%s'", fmt); 6734 goto out; 6735 } 6736 } 6737 6738 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 6739 if (backing_file) { 6740 if (!strcmp(filename, backing_file)) { 6741 error_setg(errp, "Error: Trying to create an image with the " 6742 "same filename as the backing file"); 6743 goto out; 6744 } 6745 if (backing_file[0] == '\0') { 6746 error_setg(errp, "Expected backing file name, got empty string"); 6747 goto out; 6748 } 6749 } 6750 6751 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 6752 6753 /* The size for the image must always be specified, unless we have a backing 6754 * file and we have not been forbidden from opening it. */ 6755 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size); 6756 if (backing_file && !(flags & BDRV_O_NO_BACKING)) { 6757 BlockDriverState *bs; 6758 char *full_backing; 6759 int back_flags; 6760 QDict *backing_options = NULL; 6761 6762 full_backing = 6763 bdrv_get_full_backing_filename_from_filename(filename, backing_file, 6764 &local_err); 6765 if (local_err) { 6766 goto out; 6767 } 6768 assert(full_backing); 6769 6770 /* backing files always opened read-only */ 6771 back_flags = flags; 6772 back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 6773 6774 backing_options = qdict_new(); 6775 if (backing_fmt) { 6776 qdict_put_str(backing_options, "driver", backing_fmt); 6777 } 6778 qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true); 6779 6780 bs = bdrv_open(full_backing, NULL, backing_options, back_flags, 6781 &local_err); 6782 g_free(full_backing); 6783 if (!bs) { 6784 error_append_hint(&local_err, "Could not open backing image.\n"); 6785 goto out; 6786 } else { 6787 if (!backing_fmt) { 6788 warn_report("Deprecated use of backing file without explicit " 6789 "backing format (detected format of %s)", 6790 bs->drv->format_name); 6791 if (bs->drv != &bdrv_raw) { 6792 /* 6793 * A probe of raw deserves the most attention: 6794 * leaving the backing format out of the image 6795 * will ensure bs->probed is set (ensuring we 6796 * don't accidentally commit into the backing 6797 * file), and allow more spots to warn the users 6798 * to fix their toolchain when opening this image 6799 * later. For other images, we can safely record 6800 * the format that we probed. 6801 */ 6802 backing_fmt = bs->drv->format_name; 6803 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, backing_fmt, 6804 NULL); 6805 } 6806 } 6807 if (size == -1) { 6808 /* Opened BS, have no size */ 6809 size = bdrv_getlength(bs); 6810 if (size < 0) { 6811 error_setg_errno(errp, -size, "Could not get size of '%s'", 6812 backing_file); 6813 bdrv_unref(bs); 6814 goto out; 6815 } 6816 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 6817 } 6818 bdrv_unref(bs); 6819 } 6820 /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */ 6821 } else if (backing_file && !backing_fmt) { 6822 warn_report("Deprecated use of unopened backing file without " 6823 "explicit backing format, use of this image requires " 6824 "potentially unsafe format probing"); 6825 } 6826 6827 if (size == -1) { 6828 error_setg(errp, "Image creation needs a size parameter"); 6829 goto out; 6830 } 6831 6832 if (!quiet) { 6833 printf("Formatting '%s', fmt=%s ", filename, fmt); 6834 qemu_opts_print(opts, " "); 6835 puts(""); 6836 fflush(stdout); 6837 } 6838 6839 ret = bdrv_create(drv, filename, opts, &local_err); 6840 6841 if (ret == -EFBIG) { 6842 /* This is generally a better message than whatever the driver would 6843 * deliver (especially because of the cluster_size_hint), since that 6844 * is most probably not much different from "image too large". */ 6845 const char *cluster_size_hint = ""; 6846 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 6847 cluster_size_hint = " (try using a larger cluster size)"; 6848 } 6849 error_setg(errp, "The image size is too large for file format '%s'" 6850 "%s", fmt, cluster_size_hint); 6851 error_free(local_err); 6852 local_err = NULL; 6853 } 6854 6855 out: 6856 qemu_opts_del(opts); 6857 qemu_opts_free(create_opts); 6858 error_propagate(errp, local_err); 6859 } 6860 6861 AioContext *bdrv_get_aio_context(BlockDriverState *bs) 6862 { 6863 return bs ? bs->aio_context : qemu_get_aio_context(); 6864 } 6865 6866 AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs) 6867 { 6868 Coroutine *self = qemu_coroutine_self(); 6869 AioContext *old_ctx = qemu_coroutine_get_aio_context(self); 6870 AioContext *new_ctx; 6871 6872 /* 6873 * Increase bs->in_flight to ensure that this operation is completed before 6874 * moving the node to a different AioContext. Read new_ctx only afterwards. 6875 */ 6876 bdrv_inc_in_flight(bs); 6877 6878 new_ctx = bdrv_get_aio_context(bs); 6879 aio_co_reschedule_self(new_ctx); 6880 return old_ctx; 6881 } 6882 6883 void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) 6884 { 6885 aio_co_reschedule_self(old_ctx); 6886 bdrv_dec_in_flight(bs); 6887 } 6888 6889 void coroutine_fn bdrv_co_lock(BlockDriverState *bs) 6890 { 6891 AioContext *ctx = bdrv_get_aio_context(bs); 6892 6893 /* In the main thread, bs->aio_context won't change concurrently */ 6894 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 6895 6896 /* 6897 * We're in coroutine context, so we already hold the lock of the main 6898 * loop AioContext. Don't lock it twice to avoid deadlocks. 6899 */ 6900 assert(qemu_in_coroutine()); 6901 if (ctx != qemu_get_aio_context()) { 6902 aio_context_acquire(ctx); 6903 } 6904 } 6905 6906 void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) 6907 { 6908 AioContext *ctx = bdrv_get_aio_context(bs); 6909 6910 assert(qemu_in_coroutine()); 6911 if (ctx != qemu_get_aio_context()) { 6912 aio_context_release(ctx); 6913 } 6914 } 6915 6916 void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co) 6917 { 6918 aio_co_enter(bdrv_get_aio_context(bs), co); 6919 } 6920 6921 static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) 6922 { 6923 QLIST_REMOVE(ban, list); 6924 g_free(ban); 6925 } 6926 6927 static void bdrv_detach_aio_context(BlockDriverState *bs) 6928 { 6929 BdrvAioNotifier *baf, *baf_tmp; 6930 6931 assert(!bs->walking_aio_notifiers); 6932 bs->walking_aio_notifiers = true; 6933 QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) { 6934 if (baf->deleted) { 6935 bdrv_do_remove_aio_context_notifier(baf); 6936 } else { 6937 baf->detach_aio_context(baf->opaque); 6938 } 6939 } 6940 /* Never mind iterating again to check for ->deleted. bdrv_close() will 6941 * remove remaining aio notifiers if we aren't called again. 6942 */ 6943 bs->walking_aio_notifiers = false; 6944 6945 if (bs->drv && bs->drv->bdrv_detach_aio_context) { 6946 bs->drv->bdrv_detach_aio_context(bs); 6947 } 6948 6949 if (bs->quiesce_counter) { 6950 aio_enable_external(bs->aio_context); 6951 } 6952 bs->aio_context = NULL; 6953 } 6954 6955 static void bdrv_attach_aio_context(BlockDriverState *bs, 6956 AioContext *new_context) 6957 { 6958 BdrvAioNotifier *ban, *ban_tmp; 6959 6960 if (bs->quiesce_counter) { 6961 aio_disable_external(new_context); 6962 } 6963 6964 bs->aio_context = new_context; 6965 6966 if (bs->drv && bs->drv->bdrv_attach_aio_context) { 6967 bs->drv->bdrv_attach_aio_context(bs, new_context); 6968 } 6969 6970 assert(!bs->walking_aio_notifiers); 6971 bs->walking_aio_notifiers = true; 6972 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) { 6973 if (ban->deleted) { 6974 bdrv_do_remove_aio_context_notifier(ban); 6975 } else { 6976 ban->attached_aio_context(new_context, ban->opaque); 6977 } 6978 } 6979 bs->walking_aio_notifiers = false; 6980 } 6981 6982 /* 6983 * Changes the AioContext used for fd handlers, timers, and BHs by this 6984 * BlockDriverState and all its children and parents. 6985 * 6986 * Must be called from the main AioContext. 6987 * 6988 * The caller must own the AioContext lock for the old AioContext of bs, but it 6989 * must not own the AioContext lock for new_context (unless new_context is the 6990 * same as the current context of bs). 6991 * 6992 * @ignore will accumulate all visited BdrvChild object. The caller is 6993 * responsible for freeing the list afterwards. 6994 */ 6995 void bdrv_set_aio_context_ignore(BlockDriverState *bs, 6996 AioContext *new_context, GSList **ignore) 6997 { 6998 AioContext *old_context = bdrv_get_aio_context(bs); 6999 GSList *children_to_process = NULL; 7000 GSList *parents_to_process = NULL; 7001 GSList *entry; 7002 BdrvChild *child, *parent; 7003 7004 g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 7005 7006 if (old_context == new_context) { 7007 return; 7008 } 7009 7010 bdrv_drained_begin(bs); 7011 7012 QLIST_FOREACH(child, &bs->children, next) { 7013 if (g_slist_find(*ignore, child)) { 7014 continue; 7015 } 7016 *ignore = g_slist_prepend(*ignore, child); 7017 children_to_process = g_slist_prepend(children_to_process, child); 7018 } 7019 7020 QLIST_FOREACH(parent, &bs->parents, next_parent) { 7021 if (g_slist_find(*ignore, parent)) { 7022 continue; 7023 } 7024 *ignore = g_slist_prepend(*ignore, parent); 7025 parents_to_process = g_slist_prepend(parents_to_process, parent); 7026 } 7027 7028 for (entry = children_to_process; 7029 entry != NULL; 7030 entry = g_slist_next(entry)) { 7031 child = entry->data; 7032 bdrv_set_aio_context_ignore(child->bs, new_context, ignore); 7033 } 7034 g_slist_free(children_to_process); 7035 7036 for (entry = parents_to_process; 7037 entry != NULL; 7038 entry = g_slist_next(entry)) { 7039 parent = entry->data; 7040 assert(parent->klass->set_aio_ctx); 7041 parent->klass->set_aio_ctx(parent, new_context, ignore); 7042 } 7043 g_slist_free(parents_to_process); 7044 7045 bdrv_detach_aio_context(bs); 7046 7047 /* Acquire the new context, if necessary */ 7048 if (qemu_get_aio_context() != new_context) { 7049 aio_context_acquire(new_context); 7050 } 7051 7052 bdrv_attach_aio_context(bs, new_context); 7053 7054 /* 7055 * If this function was recursively called from 7056 * bdrv_set_aio_context_ignore(), there may be nodes in the 7057 * subtree that have not yet been moved to the new AioContext. 7058 * Release the old one so bdrv_drained_end() can poll them. 7059 */ 7060 if (qemu_get_aio_context() != old_context) { 7061 aio_context_release(old_context); 7062 } 7063 7064 bdrv_drained_end(bs); 7065 7066 if (qemu_get_aio_context() != old_context) { 7067 aio_context_acquire(old_context); 7068 } 7069 if (qemu_get_aio_context() != new_context) { 7070 aio_context_release(new_context); 7071 } 7072 } 7073 7074 static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, 7075 GSList **ignore, Error **errp) 7076 { 7077 if (g_slist_find(*ignore, c)) { 7078 return true; 7079 } 7080 *ignore = g_slist_prepend(*ignore, c); 7081 7082 /* 7083 * A BdrvChildClass that doesn't handle AioContext changes cannot 7084 * tolerate any AioContext changes 7085 */ 7086 if (!c->klass->can_set_aio_ctx) { 7087 char *user = bdrv_child_user_desc(c); 7088 error_setg(errp, "Changing iothreads is not supported by %s", user); 7089 g_free(user); 7090 return false; 7091 } 7092 if (!c->klass->can_set_aio_ctx(c, ctx, ignore, errp)) { 7093 assert(!errp || *errp); 7094 return false; 7095 } 7096 return true; 7097 } 7098 7099 bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, 7100 GSList **ignore, Error **errp) 7101 { 7102 if (g_slist_find(*ignore, c)) { 7103 return true; 7104 } 7105 *ignore = g_slist_prepend(*ignore, c); 7106 return bdrv_can_set_aio_context(c->bs, ctx, ignore, errp); 7107 } 7108 7109 /* @ignore will accumulate all visited BdrvChild object. The caller is 7110 * responsible for freeing the list afterwards. */ 7111 bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, 7112 GSList **ignore, Error **errp) 7113 { 7114 BdrvChild *c; 7115 7116 if (bdrv_get_aio_context(bs) == ctx) { 7117 return true; 7118 } 7119 7120 QLIST_FOREACH(c, &bs->parents, next_parent) { 7121 if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) { 7122 return false; 7123 } 7124 } 7125 QLIST_FOREACH(c, &bs->children, next) { 7126 if (!bdrv_child_can_set_aio_context(c, ctx, ignore, errp)) { 7127 return false; 7128 } 7129 } 7130 7131 return true; 7132 } 7133 7134 int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, 7135 BdrvChild *ignore_child, Error **errp) 7136 { 7137 GSList *ignore; 7138 bool ret; 7139 7140 ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; 7141 ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp); 7142 g_slist_free(ignore); 7143 7144 if (!ret) { 7145 return -EPERM; 7146 } 7147 7148 ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; 7149 bdrv_set_aio_context_ignore(bs, ctx, &ignore); 7150 g_slist_free(ignore); 7151 7152 return 0; 7153 } 7154 7155 int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, 7156 Error **errp) 7157 { 7158 return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp); 7159 } 7160 7161 void bdrv_add_aio_context_notifier(BlockDriverState *bs, 7162 void (*attached_aio_context)(AioContext *new_context, void *opaque), 7163 void (*detach_aio_context)(void *opaque), void *opaque) 7164 { 7165 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 7166 *ban = (BdrvAioNotifier){ 7167 .attached_aio_context = attached_aio_context, 7168 .detach_aio_context = detach_aio_context, 7169 .opaque = opaque 7170 }; 7171 7172 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 7173 } 7174 7175 void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 7176 void (*attached_aio_context)(AioContext *, 7177 void *), 7178 void (*detach_aio_context)(void *), 7179 void *opaque) 7180 { 7181 BdrvAioNotifier *ban, *ban_next; 7182 7183 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 7184 if (ban->attached_aio_context == attached_aio_context && 7185 ban->detach_aio_context == detach_aio_context && 7186 ban->opaque == opaque && 7187 ban->deleted == false) 7188 { 7189 if (bs->walking_aio_notifiers) { 7190 ban->deleted = true; 7191 } else { 7192 bdrv_do_remove_aio_context_notifier(ban); 7193 } 7194 return; 7195 } 7196 } 7197 7198 abort(); 7199 } 7200 7201 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 7202 BlockDriverAmendStatusCB *status_cb, void *cb_opaque, 7203 bool force, 7204 Error **errp) 7205 { 7206 if (!bs->drv) { 7207 error_setg(errp, "Node is ejected"); 7208 return -ENOMEDIUM; 7209 } 7210 if (!bs->drv->bdrv_amend_options) { 7211 error_setg(errp, "Block driver '%s' does not support option amendment", 7212 bs->drv->format_name); 7213 return -ENOTSUP; 7214 } 7215 return bs->drv->bdrv_amend_options(bs, opts, status_cb, 7216 cb_opaque, force, errp); 7217 } 7218 7219 /* 7220 * This function checks whether the given @to_replace is allowed to be 7221 * replaced by a node that always shows the same data as @bs. This is 7222 * used for example to verify whether the mirror job can replace 7223 * @to_replace by the target mirrored from @bs. 7224 * To be replaceable, @bs and @to_replace may either be guaranteed to 7225 * always show the same data (because they are only connected through 7226 * filters), or some driver may allow replacing one of its children 7227 * because it can guarantee that this child's data is not visible at 7228 * all (for example, for dissenting quorum children that have no other 7229 * parents). 7230 */ 7231 bool bdrv_recurse_can_replace(BlockDriverState *bs, 7232 BlockDriverState *to_replace) 7233 { 7234 BlockDriverState *filtered; 7235 7236 if (!bs || !bs->drv) { 7237 return false; 7238 } 7239 7240 if (bs == to_replace) { 7241 return true; 7242 } 7243 7244 /* See what the driver can do */ 7245 if (bs->drv->bdrv_recurse_can_replace) { 7246 return bs->drv->bdrv_recurse_can_replace(bs, to_replace); 7247 } 7248 7249 /* For filters without an own implementation, we can recurse on our own */ 7250 filtered = bdrv_filter_bs(bs); 7251 if (filtered) { 7252 return bdrv_recurse_can_replace(filtered, to_replace); 7253 } 7254 7255 /* Safe default */ 7256 return false; 7257 } 7258 7259 /* 7260 * Check whether the given @node_name can be replaced by a node that 7261 * has the same data as @parent_bs. If so, return @node_name's BDS; 7262 * NULL otherwise. 7263 * 7264 * @node_name must be a (recursive) *child of @parent_bs (or this 7265 * function will return NULL). 7266 * 7267 * The result (whether the node can be replaced or not) is only valid 7268 * for as long as no graph or permission changes occur. 7269 */ 7270 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, 7271 const char *node_name, Error **errp) 7272 { 7273 BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 7274 AioContext *aio_context; 7275 7276 if (!to_replace_bs) { 7277 error_setg(errp, "Failed to find node with node-name='%s'", node_name); 7278 return NULL; 7279 } 7280 7281 aio_context = bdrv_get_aio_context(to_replace_bs); 7282 aio_context_acquire(aio_context); 7283 7284 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 7285 to_replace_bs = NULL; 7286 goto out; 7287 } 7288 7289 /* We don't want arbitrary node of the BDS chain to be replaced only the top 7290 * most non filter in order to prevent data corruption. 7291 * Another benefit is that this tests exclude backing files which are 7292 * blocked by the backing blockers. 7293 */ 7294 if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) { 7295 error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', " 7296 "because it cannot be guaranteed that doing so would not " 7297 "lead to an abrupt change of visible data", 7298 node_name, parent_bs->node_name); 7299 to_replace_bs = NULL; 7300 goto out; 7301 } 7302 7303 out: 7304 aio_context_release(aio_context); 7305 return to_replace_bs; 7306 } 7307 7308 /** 7309 * Iterates through the list of runtime option keys that are said to 7310 * be "strong" for a BDS. An option is called "strong" if it changes 7311 * a BDS's data. For example, the null block driver's "size" and 7312 * "read-zeroes" options are strong, but its "latency-ns" option is 7313 * not. 7314 * 7315 * If a key returned by this function ends with a dot, all options 7316 * starting with that prefix are strong. 7317 */ 7318 static const char *const *strong_options(BlockDriverState *bs, 7319 const char *const *curopt) 7320 { 7321 static const char *const global_options[] = { 7322 "driver", "filename", NULL 7323 }; 7324 7325 if (!curopt) { 7326 return &global_options[0]; 7327 } 7328 7329 curopt++; 7330 if (curopt == &global_options[ARRAY_SIZE(global_options) - 1] && bs->drv) { 7331 curopt = bs->drv->strong_runtime_opts; 7332 } 7333 7334 return (curopt && *curopt) ? curopt : NULL; 7335 } 7336 7337 /** 7338 * Copies all strong runtime options from bs->options to the given 7339 * QDict. The set of strong option keys is determined by invoking 7340 * strong_options(). 7341 * 7342 * Returns true iff any strong option was present in bs->options (and 7343 * thus copied to the target QDict) with the exception of "filename" 7344 * and "driver". The caller is expected to use this value to decide 7345 * whether the existence of strong options prevents the generation of 7346 * a plain filename. 7347 */ 7348 static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs) 7349 { 7350 bool found_any = false; 7351 const char *const *option_name = NULL; 7352 7353 if (!bs->drv) { 7354 return false; 7355 } 7356 7357 while ((option_name = strong_options(bs, option_name))) { 7358 bool option_given = false; 7359 7360 assert(strlen(*option_name) > 0); 7361 if ((*option_name)[strlen(*option_name) - 1] != '.') { 7362 QObject *entry = qdict_get(bs->options, *option_name); 7363 if (!entry) { 7364 continue; 7365 } 7366 7367 qdict_put_obj(d, *option_name, qobject_ref(entry)); 7368 option_given = true; 7369 } else { 7370 const QDictEntry *entry; 7371 for (entry = qdict_first(bs->options); entry; 7372 entry = qdict_next(bs->options, entry)) 7373 { 7374 if (strstart(qdict_entry_key(entry), *option_name, NULL)) { 7375 qdict_put_obj(d, qdict_entry_key(entry), 7376 qobject_ref(qdict_entry_value(entry))); 7377 option_given = true; 7378 } 7379 } 7380 } 7381 7382 /* While "driver" and "filename" need to be included in a JSON filename, 7383 * their existence does not prohibit generation of a plain filename. */ 7384 if (!found_any && option_given && 7385 strcmp(*option_name, "driver") && strcmp(*option_name, "filename")) 7386 { 7387 found_any = true; 7388 } 7389 } 7390 7391 if (!qdict_haskey(d, "driver")) { 7392 /* Drivers created with bdrv_new_open_driver() may not have a 7393 * @driver option. Add it here. */ 7394 qdict_put_str(d, "driver", bs->drv->format_name); 7395 } 7396 7397 return found_any; 7398 } 7399 7400 /* Note: This function may return false positives; it may return true 7401 * even if opening the backing file specified by bs's image header 7402 * would result in exactly bs->backing. */ 7403 bool bdrv_backing_overridden(BlockDriverState *bs) 7404 { 7405 if (bs->backing) { 7406 return strcmp(bs->auto_backing_file, 7407 bs->backing->bs->filename); 7408 } else { 7409 /* No backing BDS, so if the image header reports any backing 7410 * file, it must have been suppressed */ 7411 return bs->auto_backing_file[0] != '\0'; 7412 } 7413 } 7414 7415 /* Updates the following BDS fields: 7416 * - exact_filename: A filename which may be used for opening a block device 7417 * which (mostly) equals the given BDS (even without any 7418 * other options; so reading and writing must return the same 7419 * results, but caching etc. may be different) 7420 * - full_open_options: Options which, when given when opening a block device 7421 * (without a filename), result in a BDS (mostly) 7422 * equalling the given one 7423 * - filename: If exact_filename is set, it is copied here. Otherwise, 7424 * full_open_options is converted to a JSON object, prefixed with 7425 * "json:" (for use through the JSON pseudo protocol) and put here. 7426 */ 7427 void bdrv_refresh_filename(BlockDriverState *bs) 7428 { 7429 BlockDriver *drv = bs->drv; 7430 BdrvChild *child; 7431 BlockDriverState *primary_child_bs; 7432 QDict *opts; 7433 bool backing_overridden; 7434 bool generate_json_filename; /* Whether our default implementation should 7435 fill exact_filename (false) or not (true) */ 7436 7437 if (!drv) { 7438 return; 7439 } 7440 7441 /* This BDS's file name may depend on any of its children's file names, so 7442 * refresh those first */ 7443 QLIST_FOREACH(child, &bs->children, next) { 7444 bdrv_refresh_filename(child->bs); 7445 } 7446 7447 if (bs->implicit) { 7448 /* For implicit nodes, just copy everything from the single child */ 7449 child = QLIST_FIRST(&bs->children); 7450 assert(QLIST_NEXT(child, next) == NULL); 7451 7452 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), 7453 child->bs->exact_filename); 7454 pstrcpy(bs->filename, sizeof(bs->filename), child->bs->filename); 7455 7456 qobject_unref(bs->full_open_options); 7457 bs->full_open_options = qobject_ref(child->bs->full_open_options); 7458 7459 return; 7460 } 7461 7462 backing_overridden = bdrv_backing_overridden(bs); 7463 7464 if (bs->open_flags & BDRV_O_NO_IO) { 7465 /* Without I/O, the backing file does not change anything. 7466 * Therefore, in such a case (primarily qemu-img), we can 7467 * pretend the backing file has not been overridden even if 7468 * it technically has been. */ 7469 backing_overridden = false; 7470 } 7471 7472 /* Gather the options QDict */ 7473 opts = qdict_new(); 7474 generate_json_filename = append_strong_runtime_options(opts, bs); 7475 generate_json_filename |= backing_overridden; 7476 7477 if (drv->bdrv_gather_child_options) { 7478 /* Some block drivers may not want to present all of their children's 7479 * options, or name them differently from BdrvChild.name */ 7480 drv->bdrv_gather_child_options(bs, opts, backing_overridden); 7481 } else { 7482 QLIST_FOREACH(child, &bs->children, next) { 7483 if (child == bs->backing && !backing_overridden) { 7484 /* We can skip the backing BDS if it has not been overridden */ 7485 continue; 7486 } 7487 7488 qdict_put(opts, child->name, 7489 qobject_ref(child->bs->full_open_options)); 7490 } 7491 7492 if (backing_overridden && !bs->backing) { 7493 /* Force no backing file */ 7494 qdict_put_null(opts, "backing"); 7495 } 7496 } 7497 7498 qobject_unref(bs->full_open_options); 7499 bs->full_open_options = opts; 7500 7501 primary_child_bs = bdrv_primary_bs(bs); 7502 7503 if (drv->bdrv_refresh_filename) { 7504 /* Obsolete information is of no use here, so drop the old file name 7505 * information before refreshing it */ 7506 bs->exact_filename[0] = '\0'; 7507 7508 drv->bdrv_refresh_filename(bs); 7509 } else if (primary_child_bs) { 7510 /* 7511 * Try to reconstruct valid information from the underlying 7512 * file -- this only works for format nodes (filter nodes 7513 * cannot be probed and as such must be selected by the user 7514 * either through an options dict, or through a special 7515 * filename which the filter driver must construct in its 7516 * .bdrv_refresh_filename() implementation). 7517 */ 7518 7519 bs->exact_filename[0] = '\0'; 7520 7521 /* 7522 * We can use the underlying file's filename if: 7523 * - it has a filename, 7524 * - the current BDS is not a filter, 7525 * - the file is a protocol BDS, and 7526 * - opening that file (as this BDS's format) will automatically create 7527 * the BDS tree we have right now, that is: 7528 * - the user did not significantly change this BDS's behavior with 7529 * some explicit (strong) options 7530 * - no non-file child of this BDS has been overridden by the user 7531 * Both of these conditions are represented by generate_json_filename. 7532 */ 7533 if (primary_child_bs->exact_filename[0] && 7534 primary_child_bs->drv->bdrv_file_open && 7535 !drv->is_filter && !generate_json_filename) 7536 { 7537 strcpy(bs->exact_filename, primary_child_bs->exact_filename); 7538 } 7539 } 7540 7541 if (bs->exact_filename[0]) { 7542 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 7543 } else { 7544 GString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 7545 if (snprintf(bs->filename, sizeof(bs->filename), "json:%s", 7546 json->str) >= sizeof(bs->filename)) { 7547 /* Give user a hint if we truncated things. */ 7548 strcpy(bs->filename + sizeof(bs->filename) - 4, "..."); 7549 } 7550 g_string_free(json, true); 7551 } 7552 } 7553 7554 char *bdrv_dirname(BlockDriverState *bs, Error **errp) 7555 { 7556 BlockDriver *drv = bs->drv; 7557 BlockDriverState *child_bs; 7558 7559 if (!drv) { 7560 error_setg(errp, "Node '%s' is ejected", bs->node_name); 7561 return NULL; 7562 } 7563 7564 if (drv->bdrv_dirname) { 7565 return drv->bdrv_dirname(bs, errp); 7566 } 7567 7568 child_bs = bdrv_primary_bs(bs); 7569 if (child_bs) { 7570 return bdrv_dirname(child_bs, errp); 7571 } 7572 7573 bdrv_refresh_filename(bs); 7574 if (bs->exact_filename[0] != '\0') { 7575 return path_combine(bs->exact_filename, ""); 7576 } 7577 7578 error_setg(errp, "Cannot generate a base directory for %s nodes", 7579 drv->format_name); 7580 return NULL; 7581 } 7582 7583 /* 7584 * Hot add/remove a BDS's child. So the user can take a child offline when 7585 * it is broken and take a new child online 7586 */ 7587 void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, 7588 Error **errp) 7589 { 7590 7591 if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) { 7592 error_setg(errp, "The node %s does not support adding a child", 7593 bdrv_get_device_or_node_name(parent_bs)); 7594 return; 7595 } 7596 7597 if (!QLIST_EMPTY(&child_bs->parents)) { 7598 error_setg(errp, "The node %s already has a parent", 7599 child_bs->node_name); 7600 return; 7601 } 7602 7603 parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp); 7604 } 7605 7606 void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp) 7607 { 7608 BdrvChild *tmp; 7609 7610 if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) { 7611 error_setg(errp, "The node %s does not support removing a child", 7612 bdrv_get_device_or_node_name(parent_bs)); 7613 return; 7614 } 7615 7616 QLIST_FOREACH(tmp, &parent_bs->children, next) { 7617 if (tmp == child) { 7618 break; 7619 } 7620 } 7621 7622 if (!tmp) { 7623 error_setg(errp, "The node %s does not have a child named %s", 7624 bdrv_get_device_or_node_name(parent_bs), 7625 bdrv_get_device_or_node_name(child->bs)); 7626 return; 7627 } 7628 7629 parent_bs->drv->bdrv_del_child(parent_bs, child, errp); 7630 } 7631 7632 int bdrv_make_empty(BdrvChild *c, Error **errp) 7633 { 7634 BlockDriver *drv = c->bs->drv; 7635 int ret; 7636 7637 assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)); 7638 7639 if (!drv->bdrv_make_empty) { 7640 error_setg(errp, "%s does not support emptying nodes", 7641 drv->format_name); 7642 return -ENOTSUP; 7643 } 7644 7645 ret = drv->bdrv_make_empty(c->bs); 7646 if (ret < 0) { 7647 error_setg_errno(errp, -ret, "Failed to empty %s", 7648 c->bs->filename); 7649 return ret; 7650 } 7651 7652 return 0; 7653 } 7654 7655 /* 7656 * Return the child that @bs acts as an overlay for, and from which data may be 7657 * copied in COW or COR operations. Usually this is the backing file. 7658 */ 7659 BdrvChild *bdrv_cow_child(BlockDriverState *bs) 7660 { 7661 if (!bs || !bs->drv) { 7662 return NULL; 7663 } 7664 7665 if (bs->drv->is_filter) { 7666 return NULL; 7667 } 7668 7669 if (!bs->backing) { 7670 return NULL; 7671 } 7672 7673 assert(bs->backing->role & BDRV_CHILD_COW); 7674 return bs->backing; 7675 } 7676 7677 /* 7678 * If @bs acts as a filter for exactly one of its children, return 7679 * that child. 7680 */ 7681 BdrvChild *bdrv_filter_child(BlockDriverState *bs) 7682 { 7683 BdrvChild *c; 7684 7685 if (!bs || !bs->drv) { 7686 return NULL; 7687 } 7688 7689 if (!bs->drv->is_filter) { 7690 return NULL; 7691 } 7692 7693 /* Only one of @backing or @file may be used */ 7694 assert(!(bs->backing && bs->file)); 7695 7696 c = bs->backing ?: bs->file; 7697 if (!c) { 7698 return NULL; 7699 } 7700 7701 assert(c->role & BDRV_CHILD_FILTERED); 7702 return c; 7703 } 7704 7705 /* 7706 * Return either the result of bdrv_cow_child() or bdrv_filter_child(), 7707 * whichever is non-NULL. 7708 * 7709 * Return NULL if both are NULL. 7710 */ 7711 BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs) 7712 { 7713 BdrvChild *cow_child = bdrv_cow_child(bs); 7714 BdrvChild *filter_child = bdrv_filter_child(bs); 7715 7716 /* Filter nodes cannot have COW backing files */ 7717 assert(!(cow_child && filter_child)); 7718 7719 return cow_child ?: filter_child; 7720 } 7721 7722 /* 7723 * Return the primary child of this node: For filters, that is the 7724 * filtered child. For other nodes, that is usually the child storing 7725 * metadata. 7726 * (A generally more helpful description is that this is (usually) the 7727 * child that has the same filename as @bs.) 7728 * 7729 * Drivers do not necessarily have a primary child; for example quorum 7730 * does not. 7731 */ 7732 BdrvChild *bdrv_primary_child(BlockDriverState *bs) 7733 { 7734 BdrvChild *c, *found = NULL; 7735 7736 QLIST_FOREACH(c, &bs->children, next) { 7737 if (c->role & BDRV_CHILD_PRIMARY) { 7738 assert(!found); 7739 found = c; 7740 } 7741 } 7742 7743 return found; 7744 } 7745 7746 static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs, 7747 bool stop_on_explicit_filter) 7748 { 7749 BdrvChild *c; 7750 7751 if (!bs) { 7752 return NULL; 7753 } 7754 7755 while (!(stop_on_explicit_filter && !bs->implicit)) { 7756 c = bdrv_filter_child(bs); 7757 if (!c) { 7758 /* 7759 * A filter that is embedded in a working block graph must 7760 * have a child. Assert this here so this function does 7761 * not return a filter node that is not expected by the 7762 * caller. 7763 */ 7764 assert(!bs->drv || !bs->drv->is_filter); 7765 break; 7766 } 7767 bs = c->bs; 7768 } 7769 /* 7770 * Note that this treats nodes with bs->drv == NULL as not being 7771 * filters (bs->drv == NULL should be replaced by something else 7772 * anyway). 7773 * The advantage of this behavior is that this function will thus 7774 * always return a non-NULL value (given a non-NULL @bs). 7775 */ 7776 7777 return bs; 7778 } 7779 7780 /* 7781 * Return the first BDS that has not been added implicitly or that 7782 * does not have a filtered child down the chain starting from @bs 7783 * (including @bs itself). 7784 */ 7785 BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs) 7786 { 7787 return bdrv_do_skip_filters(bs, true); 7788 } 7789 7790 /* 7791 * Return the first BDS that does not have a filtered child down the 7792 * chain starting from @bs (including @bs itself). 7793 */ 7794 BlockDriverState *bdrv_skip_filters(BlockDriverState *bs) 7795 { 7796 return bdrv_do_skip_filters(bs, false); 7797 } 7798 7799 /* 7800 * For a backing chain, return the first non-filter backing image of 7801 * the first non-filter image. 7802 */ 7803 BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs) 7804 { 7805 return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs))); 7806 } 7807