1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2020 Virtuozzo International GmbH. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "block/trace.h" 28 #include "block/block_int.h" 29 #include "block/blockjob.h" 30 #include "block/fuse.h" 31 #include "block/nbd.h" 32 #include "block/qdict.h" 33 #include "qemu/error-report.h" 34 #include "block/module_block.h" 35 #include "qemu/main-loop.h" 36 #include "qemu/module.h" 37 #include "qapi/error.h" 38 #include "qapi/qmp/qdict.h" 39 #include "qapi/qmp/qjson.h" 40 #include "qapi/qmp/qnull.h" 41 #include "qapi/qmp/qstring.h" 42 #include "qapi/qobject-output-visitor.h" 43 #include "qapi/qapi-visit-block-core.h" 44 #include "sysemu/block-backend.h" 45 #include "qemu/notify.h" 46 #include "qemu/option.h" 47 #include "qemu/coroutine.h" 48 #include "block/qapi.h" 49 #include "qemu/timer.h" 50 #include "qemu/cutils.h" 51 #include "qemu/id.h" 52 #include "qemu/range.h" 53 #include "qemu/rcu.h" 54 #include "block/coroutines.h" 55 56 #ifdef CONFIG_BSD 57 #include <sys/ioctl.h> 58 #include <sys/queue.h> 59 #if defined(HAVE_SYS_DISK_H) 60 #include <sys/disk.h> 61 #endif 62 #endif 63 64 #ifdef _WIN32 65 #include <windows.h> 66 #endif 67 68 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 69 70 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 71 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 72 73 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states = 74 QTAILQ_HEAD_INITIALIZER(all_bdrv_states); 75 76 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 77 QLIST_HEAD_INITIALIZER(bdrv_drivers); 78 79 static BlockDriverState *bdrv_open_inherit(const char *filename, 80 const char *reference, 81 QDict *options, int flags, 82 BlockDriverState *parent, 83 const BdrvChildClass *child_class, 84 BdrvChildRole child_role, 85 Error **errp); 86 87 static bool bdrv_recurse_has_child(BlockDriverState *bs, 88 BlockDriverState *child); 89 90 static void bdrv_replace_child_noperm(BdrvChild *child, 91 BlockDriverState *new_bs); 92 static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, 93 BdrvChild *child, 94 Transaction *tran); 95 static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, 96 Transaction *tran); 97 98 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, 99 BlockReopenQueue *queue, 100 Transaction *change_child_tran, Error **errp); 101 static void bdrv_reopen_commit(BDRVReopenState *reopen_state); 102 static void bdrv_reopen_abort(BDRVReopenState *reopen_state); 103 104 /* If non-zero, use only whitelisted block drivers */ 105 static int use_bdrv_whitelist; 106 107 #ifdef _WIN32 108 static int is_windows_drive_prefix(const char *filename) 109 { 110 return (((filename[0] >= 'a' && filename[0] <= 'z') || 111 (filename[0] >= 'A' && filename[0] <= 'Z')) && 112 filename[1] == ':'); 113 } 114 115 int is_windows_drive(const char *filename) 116 { 117 if (is_windows_drive_prefix(filename) && 118 filename[2] == '\0') 119 return 1; 120 if (strstart(filename, "\\\\.\\", NULL) || 121 strstart(filename, "//./", NULL)) 122 return 1; 123 return 0; 124 } 125 #endif 126 127 size_t bdrv_opt_mem_align(BlockDriverState *bs) 128 { 129 if (!bs || !bs->drv) { 130 /* page size or 4k (hdd sector size) should be on the safe side */ 131 return MAX(4096, qemu_real_host_page_size); 132 } 133 134 return bs->bl.opt_mem_alignment; 135 } 136 137 size_t bdrv_min_mem_align(BlockDriverState *bs) 138 { 139 if (!bs || !bs->drv) { 140 /* page size or 4k (hdd sector size) should be on the safe side */ 141 return MAX(4096, qemu_real_host_page_size); 142 } 143 144 return bs->bl.min_mem_alignment; 145 } 146 147 /* check if the path starts with "<protocol>:" */ 148 int path_has_protocol(const char *path) 149 { 150 const char *p; 151 152 #ifdef _WIN32 153 if (is_windows_drive(path) || 154 is_windows_drive_prefix(path)) { 155 return 0; 156 } 157 p = path + strcspn(path, ":/\\"); 158 #else 159 p = path + strcspn(path, ":/"); 160 #endif 161 162 return *p == ':'; 163 } 164 165 int path_is_absolute(const char *path) 166 { 167 #ifdef _WIN32 168 /* specific case for names like: "\\.\d:" */ 169 if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 170 return 1; 171 } 172 return (*path == '/' || *path == '\\'); 173 #else 174 return (*path == '/'); 175 #endif 176 } 177 178 /* if filename is absolute, just return its duplicate. Otherwise, build a 179 path to it by considering it is relative to base_path. URL are 180 supported. */ 181 char *path_combine(const char *base_path, const char *filename) 182 { 183 const char *protocol_stripped = NULL; 184 const char *p, *p1; 185 char *result; 186 int len; 187 188 if (path_is_absolute(filename)) { 189 return g_strdup(filename); 190 } 191 192 if (path_has_protocol(base_path)) { 193 protocol_stripped = strchr(base_path, ':'); 194 if (protocol_stripped) { 195 protocol_stripped++; 196 } 197 } 198 p = protocol_stripped ?: base_path; 199 200 p1 = strrchr(base_path, '/'); 201 #ifdef _WIN32 202 { 203 const char *p2; 204 p2 = strrchr(base_path, '\\'); 205 if (!p1 || p2 > p1) { 206 p1 = p2; 207 } 208 } 209 #endif 210 if (p1) { 211 p1++; 212 } else { 213 p1 = base_path; 214 } 215 if (p1 > p) { 216 p = p1; 217 } 218 len = p - base_path; 219 220 result = g_malloc(len + strlen(filename) + 1); 221 memcpy(result, base_path, len); 222 strcpy(result + len, filename); 223 224 return result; 225 } 226 227 /* 228 * Helper function for bdrv_parse_filename() implementations to remove optional 229 * protocol prefixes (especially "file:") from a filename and for putting the 230 * stripped filename into the options QDict if there is such a prefix. 231 */ 232 void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, 233 QDict *options) 234 { 235 if (strstart(filename, prefix, &filename)) { 236 /* Stripping the explicit protocol prefix may result in a protocol 237 * prefix being (wrongly) detected (if the filename contains a colon) */ 238 if (path_has_protocol(filename)) { 239 GString *fat_filename; 240 241 /* This means there is some colon before the first slash; therefore, 242 * this cannot be an absolute path */ 243 assert(!path_is_absolute(filename)); 244 245 /* And we can thus fix the protocol detection issue by prefixing it 246 * by "./" */ 247 fat_filename = g_string_new("./"); 248 g_string_append(fat_filename, filename); 249 250 assert(!path_has_protocol(fat_filename->str)); 251 252 qdict_put(options, "filename", 253 qstring_from_gstring(fat_filename)); 254 } else { 255 /* If no protocol prefix was detected, we can use the shortened 256 * filename as-is */ 257 qdict_put_str(options, "filename", filename); 258 } 259 } 260 } 261 262 263 /* Returns whether the image file is opened as read-only. Note that this can 264 * return false and writing to the image file is still not possible because the 265 * image is inactivated. */ 266 bool bdrv_is_read_only(BlockDriverState *bs) 267 { 268 return !(bs->open_flags & BDRV_O_RDWR); 269 } 270 271 int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, 272 bool ignore_allow_rdw, Error **errp) 273 { 274 /* Do not set read_only if copy_on_read is enabled */ 275 if (bs->copy_on_read && read_only) { 276 error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled", 277 bdrv_get_device_or_node_name(bs)); 278 return -EINVAL; 279 } 280 281 /* Do not clear read_only if it is prohibited */ 282 if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) && 283 !ignore_allow_rdw) 284 { 285 error_setg(errp, "Node '%s' is read only", 286 bdrv_get_device_or_node_name(bs)); 287 return -EPERM; 288 } 289 290 return 0; 291 } 292 293 /* 294 * Called by a driver that can only provide a read-only image. 295 * 296 * Returns 0 if the node is already read-only or it could switch the node to 297 * read-only because BDRV_O_AUTO_RDONLY is set. 298 * 299 * Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set 300 * or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg 301 * is not NULL, it is used as the error message for the Error object. 302 */ 303 int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, 304 Error **errp) 305 { 306 int ret = 0; 307 308 if (!(bs->open_flags & BDRV_O_RDWR)) { 309 return 0; 310 } 311 if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) { 312 goto fail; 313 } 314 315 ret = bdrv_can_set_read_only(bs, true, false, NULL); 316 if (ret < 0) { 317 goto fail; 318 } 319 320 bs->open_flags &= ~BDRV_O_RDWR; 321 322 return 0; 323 324 fail: 325 error_setg(errp, "%s", errmsg ?: "Image is read-only"); 326 return -EACCES; 327 } 328 329 /* 330 * If @backing is empty, this function returns NULL without setting 331 * @errp. In all other cases, NULL will only be returned with @errp 332 * set. 333 * 334 * Therefore, a return value of NULL without @errp set means that 335 * there is no backing file; if @errp is set, there is one but its 336 * absolute filename cannot be generated. 337 */ 338 char *bdrv_get_full_backing_filename_from_filename(const char *backed, 339 const char *backing, 340 Error **errp) 341 { 342 if (backing[0] == '\0') { 343 return NULL; 344 } else if (path_has_protocol(backing) || path_is_absolute(backing)) { 345 return g_strdup(backing); 346 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 347 error_setg(errp, "Cannot use relative backing file names for '%s'", 348 backed); 349 return NULL; 350 } else { 351 return path_combine(backed, backing); 352 } 353 } 354 355 /* 356 * If @filename is empty or NULL, this function returns NULL without 357 * setting @errp. In all other cases, NULL will only be returned with 358 * @errp set. 359 */ 360 static char *bdrv_make_absolute_filename(BlockDriverState *relative_to, 361 const char *filename, Error **errp) 362 { 363 char *dir, *full_name; 364 365 if (!filename || filename[0] == '\0') { 366 return NULL; 367 } else if (path_has_protocol(filename) || path_is_absolute(filename)) { 368 return g_strdup(filename); 369 } 370 371 dir = bdrv_dirname(relative_to, errp); 372 if (!dir) { 373 return NULL; 374 } 375 376 full_name = g_strconcat(dir, filename, NULL); 377 g_free(dir); 378 return full_name; 379 } 380 381 char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp) 382 { 383 return bdrv_make_absolute_filename(bs, bs->backing_file, errp); 384 } 385 386 void bdrv_register(BlockDriver *bdrv) 387 { 388 assert(bdrv->format_name); 389 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 390 } 391 392 BlockDriverState *bdrv_new(void) 393 { 394 BlockDriverState *bs; 395 int i; 396 397 bs = g_new0(BlockDriverState, 1); 398 QLIST_INIT(&bs->dirty_bitmaps); 399 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 400 QLIST_INIT(&bs->op_blockers[i]); 401 } 402 qemu_co_mutex_init(&bs->reqs_lock); 403 qemu_mutex_init(&bs->dirty_bitmap_mutex); 404 bs->refcnt = 1; 405 bs->aio_context = qemu_get_aio_context(); 406 407 qemu_co_queue_init(&bs->flush_queue); 408 409 qemu_co_mutex_init(&bs->bsc_modify_lock); 410 bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1); 411 412 for (i = 0; i < bdrv_drain_all_count; i++) { 413 bdrv_drained_begin(bs); 414 } 415 416 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list); 417 418 return bs; 419 } 420 421 static BlockDriver *bdrv_do_find_format(const char *format_name) 422 { 423 BlockDriver *drv1; 424 425 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 426 if (!strcmp(drv1->format_name, format_name)) { 427 return drv1; 428 } 429 } 430 431 return NULL; 432 } 433 434 BlockDriver *bdrv_find_format(const char *format_name) 435 { 436 BlockDriver *drv1; 437 int i; 438 439 drv1 = bdrv_do_find_format(format_name); 440 if (drv1) { 441 return drv1; 442 } 443 444 /* The driver isn't registered, maybe we need to load a module */ 445 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { 446 if (!strcmp(block_driver_modules[i].format_name, format_name)) { 447 block_module_load_one(block_driver_modules[i].library_name); 448 break; 449 } 450 } 451 452 return bdrv_do_find_format(format_name); 453 } 454 455 static int bdrv_format_is_whitelisted(const char *format_name, bool read_only) 456 { 457 static const char *whitelist_rw[] = { 458 CONFIG_BDRV_RW_WHITELIST 459 NULL 460 }; 461 static const char *whitelist_ro[] = { 462 CONFIG_BDRV_RO_WHITELIST 463 NULL 464 }; 465 const char **p; 466 467 if (!whitelist_rw[0] && !whitelist_ro[0]) { 468 return 1; /* no whitelist, anything goes */ 469 } 470 471 for (p = whitelist_rw; *p; p++) { 472 if (!strcmp(format_name, *p)) { 473 return 1; 474 } 475 } 476 if (read_only) { 477 for (p = whitelist_ro; *p; p++) { 478 if (!strcmp(format_name, *p)) { 479 return 1; 480 } 481 } 482 } 483 return 0; 484 } 485 486 int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 487 { 488 return bdrv_format_is_whitelisted(drv->format_name, read_only); 489 } 490 491 bool bdrv_uses_whitelist(void) 492 { 493 return use_bdrv_whitelist; 494 } 495 496 typedef struct CreateCo { 497 BlockDriver *drv; 498 char *filename; 499 QemuOpts *opts; 500 int ret; 501 Error *err; 502 } CreateCo; 503 504 static void coroutine_fn bdrv_create_co_entry(void *opaque) 505 { 506 Error *local_err = NULL; 507 int ret; 508 509 CreateCo *cco = opaque; 510 assert(cco->drv); 511 512 ret = cco->drv->bdrv_co_create_opts(cco->drv, 513 cco->filename, cco->opts, &local_err); 514 error_propagate(&cco->err, local_err); 515 cco->ret = ret; 516 } 517 518 int bdrv_create(BlockDriver *drv, const char* filename, 519 QemuOpts *opts, Error **errp) 520 { 521 int ret; 522 523 Coroutine *co; 524 CreateCo cco = { 525 .drv = drv, 526 .filename = g_strdup(filename), 527 .opts = opts, 528 .ret = NOT_DONE, 529 .err = NULL, 530 }; 531 532 if (!drv->bdrv_co_create_opts) { 533 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 534 ret = -ENOTSUP; 535 goto out; 536 } 537 538 if (qemu_in_coroutine()) { 539 /* Fast-path if already in coroutine context */ 540 bdrv_create_co_entry(&cco); 541 } else { 542 co = qemu_coroutine_create(bdrv_create_co_entry, &cco); 543 qemu_coroutine_enter(co); 544 while (cco.ret == NOT_DONE) { 545 aio_poll(qemu_get_aio_context(), true); 546 } 547 } 548 549 ret = cco.ret; 550 if (ret < 0) { 551 if (cco.err) { 552 error_propagate(errp, cco.err); 553 } else { 554 error_setg_errno(errp, -ret, "Could not create image"); 555 } 556 } 557 558 out: 559 g_free(cco.filename); 560 return ret; 561 } 562 563 /** 564 * Helper function for bdrv_create_file_fallback(): Resize @blk to at 565 * least the given @minimum_size. 566 * 567 * On success, return @blk's actual length. 568 * Otherwise, return -errno. 569 */ 570 static int64_t create_file_fallback_truncate(BlockBackend *blk, 571 int64_t minimum_size, Error **errp) 572 { 573 Error *local_err = NULL; 574 int64_t size; 575 int ret; 576 577 ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, 578 &local_err); 579 if (ret < 0 && ret != -ENOTSUP) { 580 error_propagate(errp, local_err); 581 return ret; 582 } 583 584 size = blk_getlength(blk); 585 if (size < 0) { 586 error_free(local_err); 587 error_setg_errno(errp, -size, 588 "Failed to inquire the new image file's length"); 589 return size; 590 } 591 592 if (size < minimum_size) { 593 /* Need to grow the image, but we failed to do that */ 594 error_propagate(errp, local_err); 595 return -ENOTSUP; 596 } 597 598 error_free(local_err); 599 local_err = NULL; 600 601 return size; 602 } 603 604 /** 605 * Helper function for bdrv_create_file_fallback(): Zero the first 606 * sector to remove any potentially pre-existing image header. 607 */ 608 static int create_file_fallback_zero_first_sector(BlockBackend *blk, 609 int64_t current_size, 610 Error **errp) 611 { 612 int64_t bytes_to_clear; 613 int ret; 614 615 bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); 616 if (bytes_to_clear) { 617 ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); 618 if (ret < 0) { 619 error_setg_errno(errp, -ret, 620 "Failed to clear the new image's first sector"); 621 return ret; 622 } 623 } 624 625 return 0; 626 } 627 628 /** 629 * Simple implementation of bdrv_co_create_opts for protocol drivers 630 * which only support creation via opening a file 631 * (usually existing raw storage device) 632 */ 633 int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, 634 const char *filename, 635 QemuOpts *opts, 636 Error **errp) 637 { 638 BlockBackend *blk; 639 QDict *options; 640 int64_t size = 0; 641 char *buf = NULL; 642 PreallocMode prealloc; 643 Error *local_err = NULL; 644 int ret; 645 646 size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); 647 buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); 648 prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, 649 PREALLOC_MODE_OFF, &local_err); 650 g_free(buf); 651 if (local_err) { 652 error_propagate(errp, local_err); 653 return -EINVAL; 654 } 655 656 if (prealloc != PREALLOC_MODE_OFF) { 657 error_setg(errp, "Unsupported preallocation mode '%s'", 658 PreallocMode_str(prealloc)); 659 return -ENOTSUP; 660 } 661 662 options = qdict_new(); 663 qdict_put_str(options, "driver", drv->format_name); 664 665 blk = blk_new_open(filename, NULL, options, 666 BDRV_O_RDWR | BDRV_O_RESIZE, errp); 667 if (!blk) { 668 error_prepend(errp, "Protocol driver '%s' does not support image " 669 "creation, and opening the image failed: ", 670 drv->format_name); 671 return -EINVAL; 672 } 673 674 size = create_file_fallback_truncate(blk, size, errp); 675 if (size < 0) { 676 ret = size; 677 goto out; 678 } 679 680 ret = create_file_fallback_zero_first_sector(blk, size, errp); 681 if (ret < 0) { 682 goto out; 683 } 684 685 ret = 0; 686 out: 687 blk_unref(blk); 688 return ret; 689 } 690 691 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 692 { 693 QemuOpts *protocol_opts; 694 BlockDriver *drv; 695 QDict *qdict; 696 int ret; 697 698 drv = bdrv_find_protocol(filename, true, errp); 699 if (drv == NULL) { 700 return -ENOENT; 701 } 702 703 if (!drv->create_opts) { 704 error_setg(errp, "Driver '%s' does not support image creation", 705 drv->format_name); 706 return -ENOTSUP; 707 } 708 709 /* 710 * 'opts' contains a QemuOptsList with a combination of format and protocol 711 * default values. 712 * 713 * The format properly removes its options, but the default values remain 714 * in 'opts->list'. So if the protocol has options with the same name 715 * (e.g. rbd has 'cluster_size' as qcow2), it will see the default values 716 * of the format, since for overlapping options, the format wins. 717 * 718 * To avoid this issue, lets convert QemuOpts to QDict, in this way we take 719 * only the set options, and then convert it back to QemuOpts, using the 720 * create_opts of the protocol. So the new QemuOpts, will contain only the 721 * protocol defaults. 722 */ 723 qdict = qemu_opts_to_qdict(opts, NULL); 724 protocol_opts = qemu_opts_from_qdict(drv->create_opts, qdict, errp); 725 if (protocol_opts == NULL) { 726 ret = -EINVAL; 727 goto out; 728 } 729 730 ret = bdrv_create(drv, filename, protocol_opts, errp); 731 out: 732 qemu_opts_del(protocol_opts); 733 qobject_unref(qdict); 734 return ret; 735 } 736 737 int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp) 738 { 739 Error *local_err = NULL; 740 int ret; 741 742 assert(bs != NULL); 743 744 if (!bs->drv) { 745 error_setg(errp, "Block node '%s' is not opened", bs->filename); 746 return -ENOMEDIUM; 747 } 748 749 if (!bs->drv->bdrv_co_delete_file) { 750 error_setg(errp, "Driver '%s' does not support image deletion", 751 bs->drv->format_name); 752 return -ENOTSUP; 753 } 754 755 ret = bs->drv->bdrv_co_delete_file(bs, &local_err); 756 if (ret < 0) { 757 error_propagate(errp, local_err); 758 } 759 760 return ret; 761 } 762 763 void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs) 764 { 765 Error *local_err = NULL; 766 int ret; 767 768 if (!bs) { 769 return; 770 } 771 772 ret = bdrv_co_delete_file(bs, &local_err); 773 /* 774 * ENOTSUP will happen if the block driver doesn't support 775 * the 'bdrv_co_delete_file' interface. This is a predictable 776 * scenario and shouldn't be reported back to the user. 777 */ 778 if (ret == -ENOTSUP) { 779 error_free(local_err); 780 } else if (ret < 0) { 781 error_report_err(local_err); 782 } 783 } 784 785 /** 786 * Try to get @bs's logical and physical block size. 787 * On success, store them in @bsz struct and return 0. 788 * On failure return -errno. 789 * @bs must not be empty. 790 */ 791 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 792 { 793 BlockDriver *drv = bs->drv; 794 BlockDriverState *filtered = bdrv_filter_bs(bs); 795 796 if (drv && drv->bdrv_probe_blocksizes) { 797 return drv->bdrv_probe_blocksizes(bs, bsz); 798 } else if (filtered) { 799 return bdrv_probe_blocksizes(filtered, bsz); 800 } 801 802 return -ENOTSUP; 803 } 804 805 /** 806 * Try to get @bs's geometry (cyls, heads, sectors). 807 * On success, store them in @geo struct and return 0. 808 * On failure return -errno. 809 * @bs must not be empty. 810 */ 811 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 812 { 813 BlockDriver *drv = bs->drv; 814 BlockDriverState *filtered = bdrv_filter_bs(bs); 815 816 if (drv && drv->bdrv_probe_geometry) { 817 return drv->bdrv_probe_geometry(bs, geo); 818 } else if (filtered) { 819 return bdrv_probe_geometry(filtered, geo); 820 } 821 822 return -ENOTSUP; 823 } 824 825 /* 826 * Create a uniquely-named empty temporary file. 827 * Return 0 upon success, otherwise a negative errno value. 828 */ 829 int get_tmp_filename(char *filename, int size) 830 { 831 #ifdef _WIN32 832 char temp_dir[MAX_PATH]; 833 /* GetTempFileName requires that its output buffer (4th param) 834 have length MAX_PATH or greater. */ 835 assert(size >= MAX_PATH); 836 return (GetTempPath(MAX_PATH, temp_dir) 837 && GetTempFileName(temp_dir, "qem", 0, filename) 838 ? 0 : -GetLastError()); 839 #else 840 int fd; 841 const char *tmpdir; 842 tmpdir = getenv("TMPDIR"); 843 if (!tmpdir) { 844 tmpdir = "/var/tmp"; 845 } 846 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 847 return -EOVERFLOW; 848 } 849 fd = mkstemp(filename); 850 if (fd < 0) { 851 return -errno; 852 } 853 if (close(fd) != 0) { 854 unlink(filename); 855 return -errno; 856 } 857 return 0; 858 #endif 859 } 860 861 /* 862 * Detect host devices. By convention, /dev/cdrom[N] is always 863 * recognized as a host CDROM. 864 */ 865 static BlockDriver *find_hdev_driver(const char *filename) 866 { 867 int score_max = 0, score; 868 BlockDriver *drv = NULL, *d; 869 870 QLIST_FOREACH(d, &bdrv_drivers, list) { 871 if (d->bdrv_probe_device) { 872 score = d->bdrv_probe_device(filename); 873 if (score > score_max) { 874 score_max = score; 875 drv = d; 876 } 877 } 878 } 879 880 return drv; 881 } 882 883 static BlockDriver *bdrv_do_find_protocol(const char *protocol) 884 { 885 BlockDriver *drv1; 886 887 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 888 if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) { 889 return drv1; 890 } 891 } 892 893 return NULL; 894 } 895 896 BlockDriver *bdrv_find_protocol(const char *filename, 897 bool allow_protocol_prefix, 898 Error **errp) 899 { 900 BlockDriver *drv1; 901 char protocol[128]; 902 int len; 903 const char *p; 904 int i; 905 906 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 907 908 /* 909 * XXX(hch): we really should not let host device detection 910 * override an explicit protocol specification, but moving this 911 * later breaks access to device names with colons in them. 912 * Thanks to the brain-dead persistent naming schemes on udev- 913 * based Linux systems those actually are quite common. 914 */ 915 drv1 = find_hdev_driver(filename); 916 if (drv1) { 917 return drv1; 918 } 919 920 if (!path_has_protocol(filename) || !allow_protocol_prefix) { 921 return &bdrv_file; 922 } 923 924 p = strchr(filename, ':'); 925 assert(p != NULL); 926 len = p - filename; 927 if (len > sizeof(protocol) - 1) 928 len = sizeof(protocol) - 1; 929 memcpy(protocol, filename, len); 930 protocol[len] = '\0'; 931 932 drv1 = bdrv_do_find_protocol(protocol); 933 if (drv1) { 934 return drv1; 935 } 936 937 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { 938 if (block_driver_modules[i].protocol_name && 939 !strcmp(block_driver_modules[i].protocol_name, protocol)) { 940 block_module_load_one(block_driver_modules[i].library_name); 941 break; 942 } 943 } 944 945 drv1 = bdrv_do_find_protocol(protocol); 946 if (!drv1) { 947 error_setg(errp, "Unknown protocol '%s'", protocol); 948 } 949 return drv1; 950 } 951 952 /* 953 * Guess image format by probing its contents. 954 * This is not a good idea when your image is raw (CVE-2008-2004), but 955 * we do it anyway for backward compatibility. 956 * 957 * @buf contains the image's first @buf_size bytes. 958 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 959 * but can be smaller if the image file is smaller) 960 * @filename is its filename. 961 * 962 * For all block drivers, call the bdrv_probe() method to get its 963 * probing score. 964 * Return the first block driver with the highest probing score. 965 */ 966 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 967 const char *filename) 968 { 969 int score_max = 0, score; 970 BlockDriver *drv = NULL, *d; 971 972 QLIST_FOREACH(d, &bdrv_drivers, list) { 973 if (d->bdrv_probe) { 974 score = d->bdrv_probe(buf, buf_size, filename); 975 if (score > score_max) { 976 score_max = score; 977 drv = d; 978 } 979 } 980 } 981 982 return drv; 983 } 984 985 static int find_image_format(BlockBackend *file, const char *filename, 986 BlockDriver **pdrv, Error **errp) 987 { 988 BlockDriver *drv; 989 uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 990 int ret = 0; 991 992 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 993 if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) { 994 *pdrv = &bdrv_raw; 995 return ret; 996 } 997 998 ret = blk_pread(file, 0, buf, sizeof(buf)); 999 if (ret < 0) { 1000 error_setg_errno(errp, -ret, "Could not read image for determining its " 1001 "format"); 1002 *pdrv = NULL; 1003 return ret; 1004 } 1005 1006 drv = bdrv_probe_all(buf, ret, filename); 1007 if (!drv) { 1008 error_setg(errp, "Could not determine image format: No compatible " 1009 "driver found"); 1010 ret = -ENOENT; 1011 } 1012 *pdrv = drv; 1013 return ret; 1014 } 1015 1016 /** 1017 * Set the current 'total_sectors' value 1018 * Return 0 on success, -errno on error. 1019 */ 1020 int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 1021 { 1022 BlockDriver *drv = bs->drv; 1023 1024 if (!drv) { 1025 return -ENOMEDIUM; 1026 } 1027 1028 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 1029 if (bdrv_is_sg(bs)) 1030 return 0; 1031 1032 /* query actual device if possible, otherwise just trust the hint */ 1033 if (drv->bdrv_getlength) { 1034 int64_t length = drv->bdrv_getlength(bs); 1035 if (length < 0) { 1036 return length; 1037 } 1038 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 1039 } 1040 1041 bs->total_sectors = hint; 1042 1043 if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) { 1044 return -EFBIG; 1045 } 1046 1047 return 0; 1048 } 1049 1050 /** 1051 * Combines a QDict of new block driver @options with any missing options taken 1052 * from @old_options, so that leaving out an option defaults to its old value. 1053 */ 1054 static void bdrv_join_options(BlockDriverState *bs, QDict *options, 1055 QDict *old_options) 1056 { 1057 if (bs->drv && bs->drv->bdrv_join_options) { 1058 bs->drv->bdrv_join_options(options, old_options); 1059 } else { 1060 qdict_join(options, old_options, false); 1061 } 1062 } 1063 1064 static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts, 1065 int open_flags, 1066 Error **errp) 1067 { 1068 Error *local_err = NULL; 1069 char *value = qemu_opt_get_del(opts, "detect-zeroes"); 1070 BlockdevDetectZeroesOptions detect_zeroes = 1071 qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value, 1072 BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err); 1073 g_free(value); 1074 if (local_err) { 1075 error_propagate(errp, local_err); 1076 return detect_zeroes; 1077 } 1078 1079 if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && 1080 !(open_flags & BDRV_O_UNMAP)) 1081 { 1082 error_setg(errp, "setting detect-zeroes to unmap is not allowed " 1083 "without setting discard operation to unmap"); 1084 } 1085 1086 return detect_zeroes; 1087 } 1088 1089 /** 1090 * Set open flags for aio engine 1091 * 1092 * Return 0 on success, -1 if the engine specified is invalid 1093 */ 1094 int bdrv_parse_aio(const char *mode, int *flags) 1095 { 1096 if (!strcmp(mode, "threads")) { 1097 /* do nothing, default */ 1098 } else if (!strcmp(mode, "native")) { 1099 *flags |= BDRV_O_NATIVE_AIO; 1100 #ifdef CONFIG_LINUX_IO_URING 1101 } else if (!strcmp(mode, "io_uring")) { 1102 *flags |= BDRV_O_IO_URING; 1103 #endif 1104 } else { 1105 return -1; 1106 } 1107 1108 return 0; 1109 } 1110 1111 /** 1112 * Set open flags for a given discard mode 1113 * 1114 * Return 0 on success, -1 if the discard mode was invalid. 1115 */ 1116 int bdrv_parse_discard_flags(const char *mode, int *flags) 1117 { 1118 *flags &= ~BDRV_O_UNMAP; 1119 1120 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 1121 /* do nothing */ 1122 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 1123 *flags |= BDRV_O_UNMAP; 1124 } else { 1125 return -1; 1126 } 1127 1128 return 0; 1129 } 1130 1131 /** 1132 * Set open flags for a given cache mode 1133 * 1134 * Return 0 on success, -1 if the cache mode was invalid. 1135 */ 1136 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) 1137 { 1138 *flags &= ~BDRV_O_CACHE_MASK; 1139 1140 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 1141 *writethrough = false; 1142 *flags |= BDRV_O_NOCACHE; 1143 } else if (!strcmp(mode, "directsync")) { 1144 *writethrough = true; 1145 *flags |= BDRV_O_NOCACHE; 1146 } else if (!strcmp(mode, "writeback")) { 1147 *writethrough = false; 1148 } else if (!strcmp(mode, "unsafe")) { 1149 *writethrough = false; 1150 *flags |= BDRV_O_NO_FLUSH; 1151 } else if (!strcmp(mode, "writethrough")) { 1152 *writethrough = true; 1153 } else { 1154 return -1; 1155 } 1156 1157 return 0; 1158 } 1159 1160 static char *bdrv_child_get_parent_desc(BdrvChild *c) 1161 { 1162 BlockDriverState *parent = c->opaque; 1163 return g_strdup_printf("node '%s'", bdrv_get_node_name(parent)); 1164 } 1165 1166 static void bdrv_child_cb_drained_begin(BdrvChild *child) 1167 { 1168 BlockDriverState *bs = child->opaque; 1169 bdrv_do_drained_begin_quiesce(bs, NULL, false); 1170 } 1171 1172 static bool bdrv_child_cb_drained_poll(BdrvChild *child) 1173 { 1174 BlockDriverState *bs = child->opaque; 1175 return bdrv_drain_poll(bs, false, NULL, false); 1176 } 1177 1178 static void bdrv_child_cb_drained_end(BdrvChild *child, 1179 int *drained_end_counter) 1180 { 1181 BlockDriverState *bs = child->opaque; 1182 bdrv_drained_end_no_poll(bs, drained_end_counter); 1183 } 1184 1185 static int bdrv_child_cb_inactivate(BdrvChild *child) 1186 { 1187 BlockDriverState *bs = child->opaque; 1188 assert(bs->open_flags & BDRV_O_INACTIVE); 1189 return 0; 1190 } 1191 1192 static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, 1193 GSList **ignore, Error **errp) 1194 { 1195 BlockDriverState *bs = child->opaque; 1196 return bdrv_can_set_aio_context(bs, ctx, ignore, errp); 1197 } 1198 1199 static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx, 1200 GSList **ignore) 1201 { 1202 BlockDriverState *bs = child->opaque; 1203 return bdrv_set_aio_context_ignore(bs, ctx, ignore); 1204 } 1205 1206 /* 1207 * Returns the options and flags that a temporary snapshot should get, based on 1208 * the originally requested flags (the originally requested image will have 1209 * flags like a backing file) 1210 */ 1211 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, 1212 int parent_flags, QDict *parent_options) 1213 { 1214 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 1215 1216 /* For temporary files, unconditional cache=unsafe is fine */ 1217 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); 1218 qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); 1219 1220 /* Copy the read-only and discard options from the parent */ 1221 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); 1222 qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD); 1223 1224 /* aio=native doesn't work for cache.direct=off, so disable it for the 1225 * temporary snapshot */ 1226 *child_flags &= ~BDRV_O_NATIVE_AIO; 1227 } 1228 1229 static void bdrv_backing_attach(BdrvChild *c) 1230 { 1231 BlockDriverState *parent = c->opaque; 1232 BlockDriverState *backing_hd = c->bs; 1233 1234 assert(!parent->backing_blocker); 1235 error_setg(&parent->backing_blocker, 1236 "node is used as backing hd of '%s'", 1237 bdrv_get_device_or_node_name(parent)); 1238 1239 bdrv_refresh_filename(backing_hd); 1240 1241 parent->open_flags &= ~BDRV_O_NO_BACKING; 1242 1243 bdrv_op_block_all(backing_hd, parent->backing_blocker); 1244 /* Otherwise we won't be able to commit or stream */ 1245 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1246 parent->backing_blocker); 1247 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, 1248 parent->backing_blocker); 1249 /* 1250 * We do backup in 3 ways: 1251 * 1. drive backup 1252 * The target bs is new opened, and the source is top BDS 1253 * 2. blockdev backup 1254 * Both the source and the target are top BDSes. 1255 * 3. internal backup(used for block replication) 1256 * Both the source and the target are backing file 1257 * 1258 * In case 1 and 2, neither the source nor the target is the backing file. 1259 * In case 3, we will block the top BDS, so there is only one block job 1260 * for the top BDS and its backing chain. 1261 */ 1262 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, 1263 parent->backing_blocker); 1264 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, 1265 parent->backing_blocker); 1266 } 1267 1268 static void bdrv_backing_detach(BdrvChild *c) 1269 { 1270 BlockDriverState *parent = c->opaque; 1271 1272 assert(parent->backing_blocker); 1273 bdrv_op_unblock_all(c->bs, parent->backing_blocker); 1274 error_free(parent->backing_blocker); 1275 parent->backing_blocker = NULL; 1276 } 1277 1278 static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, 1279 const char *filename, Error **errp) 1280 { 1281 BlockDriverState *parent = c->opaque; 1282 bool read_only = bdrv_is_read_only(parent); 1283 int ret; 1284 1285 if (read_only) { 1286 ret = bdrv_reopen_set_read_only(parent, false, errp); 1287 if (ret < 0) { 1288 return ret; 1289 } 1290 } 1291 1292 ret = bdrv_change_backing_file(parent, filename, 1293 base->drv ? base->drv->format_name : "", 1294 false); 1295 if (ret < 0) { 1296 error_setg_errno(errp, -ret, "Could not update backing file link"); 1297 } 1298 1299 if (read_only) { 1300 bdrv_reopen_set_read_only(parent, true, NULL); 1301 } 1302 1303 return ret; 1304 } 1305 1306 /* 1307 * Returns the options and flags that a generic child of a BDS should 1308 * get, based on the given options and flags for the parent BDS. 1309 */ 1310 static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, 1311 int *child_flags, QDict *child_options, 1312 int parent_flags, QDict *parent_options) 1313 { 1314 int flags = parent_flags; 1315 1316 /* 1317 * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL. 1318 * Generally, the question to answer is: Should this child be 1319 * format-probed by default? 1320 */ 1321 1322 /* 1323 * Pure and non-filtered data children of non-format nodes should 1324 * be probed by default (even when the node itself has BDRV_O_PROTOCOL 1325 * set). This only affects a very limited set of drivers (namely 1326 * quorum and blkverify when this comment was written). 1327 * Force-clear BDRV_O_PROTOCOL then. 1328 */ 1329 if (!parent_is_format && 1330 (role & BDRV_CHILD_DATA) && 1331 !(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED))) 1332 { 1333 flags &= ~BDRV_O_PROTOCOL; 1334 } 1335 1336 /* 1337 * All children of format nodes (except for COW children) and all 1338 * metadata children in general should never be format-probed. 1339 * Force-set BDRV_O_PROTOCOL then. 1340 */ 1341 if ((parent_is_format && !(role & BDRV_CHILD_COW)) || 1342 (role & BDRV_CHILD_METADATA)) 1343 { 1344 flags |= BDRV_O_PROTOCOL; 1345 } 1346 1347 /* 1348 * If the cache mode isn't explicitly set, inherit direct and no-flush from 1349 * the parent. 1350 */ 1351 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); 1352 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); 1353 qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE); 1354 1355 if (role & BDRV_CHILD_COW) { 1356 /* backing files are opened read-only by default */ 1357 qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on"); 1358 qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off"); 1359 } else { 1360 /* Inherit the read-only option from the parent if it's not set */ 1361 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); 1362 qdict_copy_default(child_options, parent_options, 1363 BDRV_OPT_AUTO_READ_ONLY); 1364 } 1365 1366 /* 1367 * bdrv_co_pdiscard() respects unmap policy for the parent, so we 1368 * can default to enable it on lower layers regardless of the 1369 * parent option. 1370 */ 1371 qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap"); 1372 1373 /* Clear flags that only apply to the top layer */ 1374 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); 1375 1376 if (role & BDRV_CHILD_METADATA) { 1377 flags &= ~BDRV_O_NO_IO; 1378 } 1379 if (role & BDRV_CHILD_COW) { 1380 flags &= ~BDRV_O_TEMPORARY; 1381 } 1382 1383 *child_flags = flags; 1384 } 1385 1386 static void bdrv_child_cb_attach(BdrvChild *child) 1387 { 1388 BlockDriverState *bs = child->opaque; 1389 1390 if (child->role & BDRV_CHILD_COW) { 1391 bdrv_backing_attach(child); 1392 } 1393 1394 bdrv_apply_subtree_drain(child, bs); 1395 } 1396 1397 static void bdrv_child_cb_detach(BdrvChild *child) 1398 { 1399 BlockDriverState *bs = child->opaque; 1400 1401 if (child->role & BDRV_CHILD_COW) { 1402 bdrv_backing_detach(child); 1403 } 1404 1405 bdrv_unapply_subtree_drain(child, bs); 1406 } 1407 1408 static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, 1409 const char *filename, Error **errp) 1410 { 1411 if (c->role & BDRV_CHILD_COW) { 1412 return bdrv_backing_update_filename(c, base, filename, errp); 1413 } 1414 return 0; 1415 } 1416 1417 AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c) 1418 { 1419 BlockDriverState *bs = c->opaque; 1420 1421 return bdrv_get_aio_context(bs); 1422 } 1423 1424 const BdrvChildClass child_of_bds = { 1425 .parent_is_bds = true, 1426 .get_parent_desc = bdrv_child_get_parent_desc, 1427 .inherit_options = bdrv_inherited_options, 1428 .drained_begin = bdrv_child_cb_drained_begin, 1429 .drained_poll = bdrv_child_cb_drained_poll, 1430 .drained_end = bdrv_child_cb_drained_end, 1431 .attach = bdrv_child_cb_attach, 1432 .detach = bdrv_child_cb_detach, 1433 .inactivate = bdrv_child_cb_inactivate, 1434 .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, 1435 .set_aio_ctx = bdrv_child_cb_set_aio_ctx, 1436 .update_filename = bdrv_child_cb_update_filename, 1437 .get_parent_aio_context = child_of_bds_get_parent_aio_context, 1438 }; 1439 1440 AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) 1441 { 1442 return c->klass->get_parent_aio_context(c); 1443 } 1444 1445 static int bdrv_open_flags(BlockDriverState *bs, int flags) 1446 { 1447 int open_flags = flags; 1448 1449 /* 1450 * Clear flags that are internal to the block layer before opening the 1451 * image. 1452 */ 1453 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 1454 1455 return open_flags; 1456 } 1457 1458 static void update_flags_from_options(int *flags, QemuOpts *opts) 1459 { 1460 *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY); 1461 1462 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { 1463 *flags |= BDRV_O_NO_FLUSH; 1464 } 1465 1466 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) { 1467 *flags |= BDRV_O_NOCACHE; 1468 } 1469 1470 if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) { 1471 *flags |= BDRV_O_RDWR; 1472 } 1473 1474 if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) { 1475 *flags |= BDRV_O_AUTO_RDONLY; 1476 } 1477 } 1478 1479 static void update_options_from_flags(QDict *options, int flags) 1480 { 1481 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) { 1482 qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE); 1483 } 1484 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) { 1485 qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH, 1486 flags & BDRV_O_NO_FLUSH); 1487 } 1488 if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) { 1489 qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR)); 1490 } 1491 if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) { 1492 qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY, 1493 flags & BDRV_O_AUTO_RDONLY); 1494 } 1495 } 1496 1497 static void bdrv_assign_node_name(BlockDriverState *bs, 1498 const char *node_name, 1499 Error **errp) 1500 { 1501 char *gen_node_name = NULL; 1502 1503 if (!node_name) { 1504 node_name = gen_node_name = id_generate(ID_BLOCK); 1505 } else if (!id_wellformed(node_name)) { 1506 /* 1507 * Check for empty string or invalid characters, but not if it is 1508 * generated (generated names use characters not available to the user) 1509 */ 1510 error_setg(errp, "Invalid node-name: '%s'", node_name); 1511 return; 1512 } 1513 1514 /* takes care of avoiding namespaces collisions */ 1515 if (blk_by_name(node_name)) { 1516 error_setg(errp, "node-name=%s is conflicting with a device id", 1517 node_name); 1518 goto out; 1519 } 1520 1521 /* takes care of avoiding duplicates node names */ 1522 if (bdrv_find_node(node_name)) { 1523 error_setg(errp, "Duplicate nodes with node-name='%s'", node_name); 1524 goto out; 1525 } 1526 1527 /* Make sure that the node name isn't truncated */ 1528 if (strlen(node_name) >= sizeof(bs->node_name)) { 1529 error_setg(errp, "Node name too long"); 1530 goto out; 1531 } 1532 1533 /* copy node name into the bs and insert it into the graph list */ 1534 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 1535 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 1536 out: 1537 g_free(gen_node_name); 1538 } 1539 1540 static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, 1541 const char *node_name, QDict *options, 1542 int open_flags, Error **errp) 1543 { 1544 Error *local_err = NULL; 1545 int i, ret; 1546 1547 bdrv_assign_node_name(bs, node_name, &local_err); 1548 if (local_err) { 1549 error_propagate(errp, local_err); 1550 return -EINVAL; 1551 } 1552 1553 bs->drv = drv; 1554 bs->opaque = g_malloc0(drv->instance_size); 1555 1556 if (drv->bdrv_file_open) { 1557 assert(!drv->bdrv_needs_filename || bs->filename[0]); 1558 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 1559 } else if (drv->bdrv_open) { 1560 ret = drv->bdrv_open(bs, options, open_flags, &local_err); 1561 } else { 1562 ret = 0; 1563 } 1564 1565 if (ret < 0) { 1566 if (local_err) { 1567 error_propagate(errp, local_err); 1568 } else if (bs->filename[0]) { 1569 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 1570 } else { 1571 error_setg_errno(errp, -ret, "Could not open image"); 1572 } 1573 goto open_failed; 1574 } 1575 1576 ret = refresh_total_sectors(bs, bs->total_sectors); 1577 if (ret < 0) { 1578 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 1579 return ret; 1580 } 1581 1582 bdrv_refresh_limits(bs, NULL, &local_err); 1583 if (local_err) { 1584 error_propagate(errp, local_err); 1585 return -EINVAL; 1586 } 1587 1588 assert(bdrv_opt_mem_align(bs) != 0); 1589 assert(bdrv_min_mem_align(bs) != 0); 1590 assert(is_power_of_2(bs->bl.request_alignment)); 1591 1592 for (i = 0; i < bs->quiesce_counter; i++) { 1593 if (drv->bdrv_co_drain_begin) { 1594 drv->bdrv_co_drain_begin(bs); 1595 } 1596 } 1597 1598 return 0; 1599 open_failed: 1600 bs->drv = NULL; 1601 if (bs->file != NULL) { 1602 bdrv_unref_child(bs, bs->file); 1603 bs->file = NULL; 1604 } 1605 g_free(bs->opaque); 1606 bs->opaque = NULL; 1607 return ret; 1608 } 1609 1610 /* 1611 * Create and open a block node. 1612 * 1613 * @options is a QDict of options to pass to the block drivers, or NULL for an 1614 * empty set of options. The reference to the QDict belongs to the block layer 1615 * after the call (even on failure), so if the caller intends to reuse the 1616 * dictionary, it needs to use qobject_ref() before calling bdrv_open. 1617 */ 1618 BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, 1619 const char *node_name, 1620 QDict *options, int flags, 1621 Error **errp) 1622 { 1623 BlockDriverState *bs; 1624 int ret; 1625 1626 bs = bdrv_new(); 1627 bs->open_flags = flags; 1628 bs->options = options ?: qdict_new(); 1629 bs->explicit_options = qdict_clone_shallow(bs->options); 1630 bs->opaque = NULL; 1631 1632 update_options_from_flags(bs->options, flags); 1633 1634 ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp); 1635 if (ret < 0) { 1636 qobject_unref(bs->explicit_options); 1637 bs->explicit_options = NULL; 1638 qobject_unref(bs->options); 1639 bs->options = NULL; 1640 bdrv_unref(bs); 1641 return NULL; 1642 } 1643 1644 return bs; 1645 } 1646 1647 /* Create and open a block node. */ 1648 BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, 1649 int flags, Error **errp) 1650 { 1651 return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp); 1652 } 1653 1654 QemuOptsList bdrv_runtime_opts = { 1655 .name = "bdrv_common", 1656 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), 1657 .desc = { 1658 { 1659 .name = "node-name", 1660 .type = QEMU_OPT_STRING, 1661 .help = "Node name of the block device node", 1662 }, 1663 { 1664 .name = "driver", 1665 .type = QEMU_OPT_STRING, 1666 .help = "Block driver to use for the node", 1667 }, 1668 { 1669 .name = BDRV_OPT_CACHE_DIRECT, 1670 .type = QEMU_OPT_BOOL, 1671 .help = "Bypass software writeback cache on the host", 1672 }, 1673 { 1674 .name = BDRV_OPT_CACHE_NO_FLUSH, 1675 .type = QEMU_OPT_BOOL, 1676 .help = "Ignore flush requests", 1677 }, 1678 { 1679 .name = BDRV_OPT_READ_ONLY, 1680 .type = QEMU_OPT_BOOL, 1681 .help = "Node is opened in read-only mode", 1682 }, 1683 { 1684 .name = BDRV_OPT_AUTO_READ_ONLY, 1685 .type = QEMU_OPT_BOOL, 1686 .help = "Node can become read-only if opening read-write fails", 1687 }, 1688 { 1689 .name = "detect-zeroes", 1690 .type = QEMU_OPT_STRING, 1691 .help = "try to optimize zero writes (off, on, unmap)", 1692 }, 1693 { 1694 .name = BDRV_OPT_DISCARD, 1695 .type = QEMU_OPT_STRING, 1696 .help = "discard operation (ignore/off, unmap/on)", 1697 }, 1698 { 1699 .name = BDRV_OPT_FORCE_SHARE, 1700 .type = QEMU_OPT_BOOL, 1701 .help = "always accept other writers (default: off)", 1702 }, 1703 { /* end of list */ } 1704 }, 1705 }; 1706 1707 QemuOptsList bdrv_create_opts_simple = { 1708 .name = "simple-create-opts", 1709 .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head), 1710 .desc = { 1711 { 1712 .name = BLOCK_OPT_SIZE, 1713 .type = QEMU_OPT_SIZE, 1714 .help = "Virtual disk size" 1715 }, 1716 { 1717 .name = BLOCK_OPT_PREALLOC, 1718 .type = QEMU_OPT_STRING, 1719 .help = "Preallocation mode (allowed values: off)" 1720 }, 1721 { /* end of list */ } 1722 } 1723 }; 1724 1725 /* 1726 * Common part for opening disk images and files 1727 * 1728 * Removes all processed options from *options. 1729 */ 1730 static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, 1731 QDict *options, Error **errp) 1732 { 1733 int ret, open_flags; 1734 const char *filename; 1735 const char *driver_name = NULL; 1736 const char *node_name = NULL; 1737 const char *discard; 1738 QemuOpts *opts; 1739 BlockDriver *drv; 1740 Error *local_err = NULL; 1741 bool ro; 1742 1743 assert(bs->file == NULL); 1744 assert(options != NULL && bs->options != options); 1745 1746 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 1747 if (!qemu_opts_absorb_qdict(opts, options, errp)) { 1748 ret = -EINVAL; 1749 goto fail_opts; 1750 } 1751 1752 update_flags_from_options(&bs->open_flags, opts); 1753 1754 driver_name = qemu_opt_get(opts, "driver"); 1755 drv = bdrv_find_format(driver_name); 1756 assert(drv != NULL); 1757 1758 bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false); 1759 1760 if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) { 1761 error_setg(errp, 1762 BDRV_OPT_FORCE_SHARE 1763 "=on can only be used with read-only images"); 1764 ret = -EINVAL; 1765 goto fail_opts; 1766 } 1767 1768 if (file != NULL) { 1769 bdrv_refresh_filename(blk_bs(file)); 1770 filename = blk_bs(file)->filename; 1771 } else { 1772 /* 1773 * Caution: while qdict_get_try_str() is fine, getting 1774 * non-string types would require more care. When @options 1775 * come from -blockdev or blockdev_add, its members are typed 1776 * according to the QAPI schema, but when they come from 1777 * -drive, they're all QString. 1778 */ 1779 filename = qdict_get_try_str(options, "filename"); 1780 } 1781 1782 if (drv->bdrv_needs_filename && (!filename || !filename[0])) { 1783 error_setg(errp, "The '%s' block driver requires a file name", 1784 drv->format_name); 1785 ret = -EINVAL; 1786 goto fail_opts; 1787 } 1788 1789 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags, 1790 drv->format_name); 1791 1792 ro = bdrv_is_read_only(bs); 1793 1794 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) { 1795 if (!ro && bdrv_is_whitelisted(drv, true)) { 1796 ret = bdrv_apply_auto_read_only(bs, NULL, NULL); 1797 } else { 1798 ret = -ENOTSUP; 1799 } 1800 if (ret < 0) { 1801 error_setg(errp, 1802 !ro && bdrv_is_whitelisted(drv, true) 1803 ? "Driver '%s' can only be used for read-only devices" 1804 : "Driver '%s' is not whitelisted", 1805 drv->format_name); 1806 goto fail_opts; 1807 } 1808 } 1809 1810 /* bdrv_new() and bdrv_close() make it so */ 1811 assert(qatomic_read(&bs->copy_on_read) == 0); 1812 1813 if (bs->open_flags & BDRV_O_COPY_ON_READ) { 1814 if (!ro) { 1815 bdrv_enable_copy_on_read(bs); 1816 } else { 1817 error_setg(errp, "Can't use copy-on-read on read-only device"); 1818 ret = -EINVAL; 1819 goto fail_opts; 1820 } 1821 } 1822 1823 discard = qemu_opt_get(opts, BDRV_OPT_DISCARD); 1824 if (discard != NULL) { 1825 if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) { 1826 error_setg(errp, "Invalid discard option"); 1827 ret = -EINVAL; 1828 goto fail_opts; 1829 } 1830 } 1831 1832 bs->detect_zeroes = 1833 bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err); 1834 if (local_err) { 1835 error_propagate(errp, local_err); 1836 ret = -EINVAL; 1837 goto fail_opts; 1838 } 1839 1840 if (filename != NULL) { 1841 pstrcpy(bs->filename, sizeof(bs->filename), filename); 1842 } else { 1843 bs->filename[0] = '\0'; 1844 } 1845 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 1846 1847 /* Open the image, either directly or using a protocol */ 1848 open_flags = bdrv_open_flags(bs, bs->open_flags); 1849 node_name = qemu_opt_get(opts, "node-name"); 1850 1851 assert(!drv->bdrv_file_open || file == NULL); 1852 ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp); 1853 if (ret < 0) { 1854 goto fail_opts; 1855 } 1856 1857 qemu_opts_del(opts); 1858 return 0; 1859 1860 fail_opts: 1861 qemu_opts_del(opts); 1862 return ret; 1863 } 1864 1865 static QDict *parse_json_filename(const char *filename, Error **errp) 1866 { 1867 QObject *options_obj; 1868 QDict *options; 1869 int ret; 1870 1871 ret = strstart(filename, "json:", &filename); 1872 assert(ret); 1873 1874 options_obj = qobject_from_json(filename, errp); 1875 if (!options_obj) { 1876 error_prepend(errp, "Could not parse the JSON options: "); 1877 return NULL; 1878 } 1879 1880 options = qobject_to(QDict, options_obj); 1881 if (!options) { 1882 qobject_unref(options_obj); 1883 error_setg(errp, "Invalid JSON object given"); 1884 return NULL; 1885 } 1886 1887 qdict_flatten(options); 1888 1889 return options; 1890 } 1891 1892 static void parse_json_protocol(QDict *options, const char **pfilename, 1893 Error **errp) 1894 { 1895 QDict *json_options; 1896 Error *local_err = NULL; 1897 1898 /* Parse json: pseudo-protocol */ 1899 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) { 1900 return; 1901 } 1902 1903 json_options = parse_json_filename(*pfilename, &local_err); 1904 if (local_err) { 1905 error_propagate(errp, local_err); 1906 return; 1907 } 1908 1909 /* Options given in the filename have lower priority than options 1910 * specified directly */ 1911 qdict_join(options, json_options, false); 1912 qobject_unref(json_options); 1913 *pfilename = NULL; 1914 } 1915 1916 /* 1917 * Fills in default options for opening images and converts the legacy 1918 * filename/flags pair to option QDict entries. 1919 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a 1920 * block driver has been specified explicitly. 1921 */ 1922 static int bdrv_fill_options(QDict **options, const char *filename, 1923 int *flags, Error **errp) 1924 { 1925 const char *drvname; 1926 bool protocol = *flags & BDRV_O_PROTOCOL; 1927 bool parse_filename = false; 1928 BlockDriver *drv = NULL; 1929 Error *local_err = NULL; 1930 1931 /* 1932 * Caution: while qdict_get_try_str() is fine, getting non-string 1933 * types would require more care. When @options come from 1934 * -blockdev or blockdev_add, its members are typed according to 1935 * the QAPI schema, but when they come from -drive, they're all 1936 * QString. 1937 */ 1938 drvname = qdict_get_try_str(*options, "driver"); 1939 if (drvname) { 1940 drv = bdrv_find_format(drvname); 1941 if (!drv) { 1942 error_setg(errp, "Unknown driver '%s'", drvname); 1943 return -ENOENT; 1944 } 1945 /* If the user has explicitly specified the driver, this choice should 1946 * override the BDRV_O_PROTOCOL flag */ 1947 protocol = drv->bdrv_file_open; 1948 } 1949 1950 if (protocol) { 1951 *flags |= BDRV_O_PROTOCOL; 1952 } else { 1953 *flags &= ~BDRV_O_PROTOCOL; 1954 } 1955 1956 /* Translate cache options from flags into options */ 1957 update_options_from_flags(*options, *flags); 1958 1959 /* Fetch the file name from the options QDict if necessary */ 1960 if (protocol && filename) { 1961 if (!qdict_haskey(*options, "filename")) { 1962 qdict_put_str(*options, "filename", filename); 1963 parse_filename = true; 1964 } else { 1965 error_setg(errp, "Can't specify 'file' and 'filename' options at " 1966 "the same time"); 1967 return -EINVAL; 1968 } 1969 } 1970 1971 /* Find the right block driver */ 1972 /* See cautionary note on accessing @options above */ 1973 filename = qdict_get_try_str(*options, "filename"); 1974 1975 if (!drvname && protocol) { 1976 if (filename) { 1977 drv = bdrv_find_protocol(filename, parse_filename, errp); 1978 if (!drv) { 1979 return -EINVAL; 1980 } 1981 1982 drvname = drv->format_name; 1983 qdict_put_str(*options, "driver", drvname); 1984 } else { 1985 error_setg(errp, "Must specify either driver or file"); 1986 return -EINVAL; 1987 } 1988 } 1989 1990 assert(drv || !protocol); 1991 1992 /* Driver-specific filename parsing */ 1993 if (drv && drv->bdrv_parse_filename && parse_filename) { 1994 drv->bdrv_parse_filename(filename, *options, &local_err); 1995 if (local_err) { 1996 error_propagate(errp, local_err); 1997 return -EINVAL; 1998 } 1999 2000 if (!drv->bdrv_needs_filename) { 2001 qdict_del(*options, "filename"); 2002 } 2003 } 2004 2005 return 0; 2006 } 2007 2008 typedef struct BlockReopenQueueEntry { 2009 bool prepared; 2010 bool perms_checked; 2011 BDRVReopenState state; 2012 QTAILQ_ENTRY(BlockReopenQueueEntry) entry; 2013 } BlockReopenQueueEntry; 2014 2015 /* 2016 * Return the flags that @bs will have after the reopens in @q have 2017 * successfully completed. If @q is NULL (or @bs is not contained in @q), 2018 * return the current flags. 2019 */ 2020 static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs) 2021 { 2022 BlockReopenQueueEntry *entry; 2023 2024 if (q != NULL) { 2025 QTAILQ_FOREACH(entry, q, entry) { 2026 if (entry->state.bs == bs) { 2027 return entry->state.flags; 2028 } 2029 } 2030 } 2031 2032 return bs->open_flags; 2033 } 2034 2035 /* Returns whether the image file can be written to after the reopen queue @q 2036 * has been successfully applied, or right now if @q is NULL. */ 2037 static bool bdrv_is_writable_after_reopen(BlockDriverState *bs, 2038 BlockReopenQueue *q) 2039 { 2040 int flags = bdrv_reopen_get_flags(q, bs); 2041 2042 return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR; 2043 } 2044 2045 /* 2046 * Return whether the BDS can be written to. This is not necessarily 2047 * the same as !bdrv_is_read_only(bs), as inactivated images may not 2048 * be written to but do not count as read-only images. 2049 */ 2050 bool bdrv_is_writable(BlockDriverState *bs) 2051 { 2052 return bdrv_is_writable_after_reopen(bs, NULL); 2053 } 2054 2055 static char *bdrv_child_user_desc(BdrvChild *c) 2056 { 2057 return c->klass->get_parent_desc(c); 2058 } 2059 2060 /* 2061 * Check that @a allows everything that @b needs. @a and @b must reference same 2062 * child node. 2063 */ 2064 static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) 2065 { 2066 const char *child_bs_name; 2067 g_autofree char *a_user = NULL; 2068 g_autofree char *b_user = NULL; 2069 g_autofree char *perms = NULL; 2070 2071 assert(a->bs); 2072 assert(a->bs == b->bs); 2073 2074 if ((b->perm & a->shared_perm) == b->perm) { 2075 return true; 2076 } 2077 2078 child_bs_name = bdrv_get_node_name(b->bs); 2079 a_user = bdrv_child_user_desc(a); 2080 b_user = bdrv_child_user_desc(b); 2081 perms = bdrv_perm_names(b->perm & ~a->shared_perm); 2082 2083 error_setg(errp, "Permission conflict on node '%s': permissions '%s' are " 2084 "both required by %s (uses node '%s' as '%s' child) and " 2085 "unshared by %s (uses node '%s' as '%s' child).", 2086 child_bs_name, perms, 2087 b_user, child_bs_name, b->name, 2088 a_user, child_bs_name, a->name); 2089 2090 return false; 2091 } 2092 2093 static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp) 2094 { 2095 BdrvChild *a, *b; 2096 2097 /* 2098 * During the loop we'll look at each pair twice. That's correct because 2099 * bdrv_a_allow_b() is asymmetric and we should check each pair in both 2100 * directions. 2101 */ 2102 QLIST_FOREACH(a, &bs->parents, next_parent) { 2103 QLIST_FOREACH(b, &bs->parents, next_parent) { 2104 if (a == b) { 2105 continue; 2106 } 2107 2108 if (!bdrv_a_allow_b(a, b, errp)) { 2109 return true; 2110 } 2111 } 2112 } 2113 2114 return false; 2115 } 2116 2117 static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, 2118 BdrvChild *c, BdrvChildRole role, 2119 BlockReopenQueue *reopen_queue, 2120 uint64_t parent_perm, uint64_t parent_shared, 2121 uint64_t *nperm, uint64_t *nshared) 2122 { 2123 assert(bs->drv && bs->drv->bdrv_child_perm); 2124 bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, 2125 parent_perm, parent_shared, 2126 nperm, nshared); 2127 /* TODO Take force_share from reopen_queue */ 2128 if (child_bs && child_bs->force_share) { 2129 *nshared = BLK_PERM_ALL; 2130 } 2131 } 2132 2133 /* 2134 * Adds the whole subtree of @bs (including @bs itself) to the @list (except for 2135 * nodes that are already in the @list, of course) so that final list is 2136 * topologically sorted. Return the result (GSList @list object is updated, so 2137 * don't use old reference after function call). 2138 * 2139 * On function start @list must be already topologically sorted and for any node 2140 * in the @list the whole subtree of the node must be in the @list as well. The 2141 * simplest way to satisfy this criteria: use only result of 2142 * bdrv_topological_dfs() or NULL as @list parameter. 2143 */ 2144 static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found, 2145 BlockDriverState *bs) 2146 { 2147 BdrvChild *child; 2148 g_autoptr(GHashTable) local_found = NULL; 2149 2150 if (!found) { 2151 assert(!list); 2152 found = local_found = g_hash_table_new(NULL, NULL); 2153 } 2154 2155 if (g_hash_table_contains(found, bs)) { 2156 return list; 2157 } 2158 g_hash_table_add(found, bs); 2159 2160 QLIST_FOREACH(child, &bs->children, next) { 2161 list = bdrv_topological_dfs(list, found, child->bs); 2162 } 2163 2164 return g_slist_prepend(list, bs); 2165 } 2166 2167 typedef struct BdrvChildSetPermState { 2168 BdrvChild *child; 2169 uint64_t old_perm; 2170 uint64_t old_shared_perm; 2171 } BdrvChildSetPermState; 2172 2173 static void bdrv_child_set_perm_abort(void *opaque) 2174 { 2175 BdrvChildSetPermState *s = opaque; 2176 2177 s->child->perm = s->old_perm; 2178 s->child->shared_perm = s->old_shared_perm; 2179 } 2180 2181 static TransactionActionDrv bdrv_child_set_pem_drv = { 2182 .abort = bdrv_child_set_perm_abort, 2183 .clean = g_free, 2184 }; 2185 2186 static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, 2187 uint64_t shared, Transaction *tran) 2188 { 2189 BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1); 2190 2191 *s = (BdrvChildSetPermState) { 2192 .child = c, 2193 .old_perm = c->perm, 2194 .old_shared_perm = c->shared_perm, 2195 }; 2196 2197 c->perm = perm; 2198 c->shared_perm = shared; 2199 2200 tran_add(tran, &bdrv_child_set_pem_drv, s); 2201 } 2202 2203 static void bdrv_drv_set_perm_commit(void *opaque) 2204 { 2205 BlockDriverState *bs = opaque; 2206 uint64_t cumulative_perms, cumulative_shared_perms; 2207 2208 if (bs->drv->bdrv_set_perm) { 2209 bdrv_get_cumulative_perm(bs, &cumulative_perms, 2210 &cumulative_shared_perms); 2211 bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms); 2212 } 2213 } 2214 2215 static void bdrv_drv_set_perm_abort(void *opaque) 2216 { 2217 BlockDriverState *bs = opaque; 2218 2219 if (bs->drv->bdrv_abort_perm_update) { 2220 bs->drv->bdrv_abort_perm_update(bs); 2221 } 2222 } 2223 2224 TransactionActionDrv bdrv_drv_set_perm_drv = { 2225 .abort = bdrv_drv_set_perm_abort, 2226 .commit = bdrv_drv_set_perm_commit, 2227 }; 2228 2229 static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, 2230 uint64_t shared_perm, Transaction *tran, 2231 Error **errp) 2232 { 2233 if (!bs->drv) { 2234 return 0; 2235 } 2236 2237 if (bs->drv->bdrv_check_perm) { 2238 int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp); 2239 if (ret < 0) { 2240 return ret; 2241 } 2242 } 2243 2244 if (tran) { 2245 tran_add(tran, &bdrv_drv_set_perm_drv, bs); 2246 } 2247 2248 return 0; 2249 } 2250 2251 typedef struct BdrvReplaceChildState { 2252 BdrvChild *child; 2253 BlockDriverState *old_bs; 2254 } BdrvReplaceChildState; 2255 2256 static void bdrv_replace_child_commit(void *opaque) 2257 { 2258 BdrvReplaceChildState *s = opaque; 2259 2260 bdrv_unref(s->old_bs); 2261 } 2262 2263 static void bdrv_replace_child_abort(void *opaque) 2264 { 2265 BdrvReplaceChildState *s = opaque; 2266 BlockDriverState *new_bs = s->child->bs; 2267 2268 /* old_bs reference is transparently moved from @s to @s->child */ 2269 bdrv_replace_child_noperm(s->child, s->old_bs); 2270 bdrv_unref(new_bs); 2271 } 2272 2273 static TransactionActionDrv bdrv_replace_child_drv = { 2274 .commit = bdrv_replace_child_commit, 2275 .abort = bdrv_replace_child_abort, 2276 .clean = g_free, 2277 }; 2278 2279 /* 2280 * bdrv_replace_child_tran 2281 * 2282 * Note: real unref of old_bs is done only on commit. 2283 * 2284 * The function doesn't update permissions, caller is responsible for this. 2285 */ 2286 static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, 2287 Transaction *tran) 2288 { 2289 BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); 2290 *s = (BdrvReplaceChildState) { 2291 .child = child, 2292 .old_bs = child->bs, 2293 }; 2294 tran_add(tran, &bdrv_replace_child_drv, s); 2295 2296 if (new_bs) { 2297 bdrv_ref(new_bs); 2298 } 2299 bdrv_replace_child_noperm(child, new_bs); 2300 /* old_bs reference is transparently moved from @child to @s */ 2301 } 2302 2303 /* 2304 * Refresh permissions in @bs subtree. The function is intended to be called 2305 * after some graph modification that was done without permission update. 2306 */ 2307 static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q, 2308 Transaction *tran, Error **errp) 2309 { 2310 BlockDriver *drv = bs->drv; 2311 BdrvChild *c; 2312 int ret; 2313 uint64_t cumulative_perms, cumulative_shared_perms; 2314 2315 bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms); 2316 2317 /* Write permissions never work with read-only images */ 2318 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && 2319 !bdrv_is_writable_after_reopen(bs, q)) 2320 { 2321 if (!bdrv_is_writable_after_reopen(bs, NULL)) { 2322 error_setg(errp, "Block node is read-only"); 2323 } else { 2324 error_setg(errp, "Read-only block node '%s' cannot support " 2325 "read-write users", bdrv_get_node_name(bs)); 2326 } 2327 2328 return -EPERM; 2329 } 2330 2331 /* 2332 * Unaligned requests will automatically be aligned to bl.request_alignment 2333 * and without RESIZE we can't extend requests to write to space beyond the 2334 * end of the image, so it's required that the image size is aligned. 2335 */ 2336 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && 2337 !(cumulative_perms & BLK_PERM_RESIZE)) 2338 { 2339 if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) { 2340 error_setg(errp, "Cannot get 'write' permission without 'resize': " 2341 "Image size is not a multiple of request " 2342 "alignment"); 2343 return -EPERM; 2344 } 2345 } 2346 2347 /* Check this node */ 2348 if (!drv) { 2349 return 0; 2350 } 2351 2352 ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran, 2353 errp); 2354 if (ret < 0) { 2355 return ret; 2356 } 2357 2358 /* Drivers that never have children can omit .bdrv_child_perm() */ 2359 if (!drv->bdrv_child_perm) { 2360 assert(QLIST_EMPTY(&bs->children)); 2361 return 0; 2362 } 2363 2364 /* Check all children */ 2365 QLIST_FOREACH(c, &bs->children, next) { 2366 uint64_t cur_perm, cur_shared; 2367 2368 bdrv_child_perm(bs, c->bs, c, c->role, q, 2369 cumulative_perms, cumulative_shared_perms, 2370 &cur_perm, &cur_shared); 2371 bdrv_child_set_perm(c, cur_perm, cur_shared, tran); 2372 } 2373 2374 return 0; 2375 } 2376 2377 static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, 2378 Transaction *tran, Error **errp) 2379 { 2380 int ret; 2381 BlockDriverState *bs; 2382 2383 for ( ; list; list = list->next) { 2384 bs = list->data; 2385 2386 if (bdrv_parent_perms_conflict(bs, errp)) { 2387 return -EINVAL; 2388 } 2389 2390 ret = bdrv_node_refresh_perm(bs, q, tran, errp); 2391 if (ret < 0) { 2392 return ret; 2393 } 2394 } 2395 2396 return 0; 2397 } 2398 2399 void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, 2400 uint64_t *shared_perm) 2401 { 2402 BdrvChild *c; 2403 uint64_t cumulative_perms = 0; 2404 uint64_t cumulative_shared_perms = BLK_PERM_ALL; 2405 2406 QLIST_FOREACH(c, &bs->parents, next_parent) { 2407 cumulative_perms |= c->perm; 2408 cumulative_shared_perms &= c->shared_perm; 2409 } 2410 2411 *perm = cumulative_perms; 2412 *shared_perm = cumulative_shared_perms; 2413 } 2414 2415 char *bdrv_perm_names(uint64_t perm) 2416 { 2417 struct perm_name { 2418 uint64_t perm; 2419 const char *name; 2420 } permissions[] = { 2421 { BLK_PERM_CONSISTENT_READ, "consistent read" }, 2422 { BLK_PERM_WRITE, "write" }, 2423 { BLK_PERM_WRITE_UNCHANGED, "write unchanged" }, 2424 { BLK_PERM_RESIZE, "resize" }, 2425 { BLK_PERM_GRAPH_MOD, "change children" }, 2426 { 0, NULL } 2427 }; 2428 2429 GString *result = g_string_sized_new(30); 2430 struct perm_name *p; 2431 2432 for (p = permissions; p->name; p++) { 2433 if (perm & p->perm) { 2434 if (result->len > 0) { 2435 g_string_append(result, ", "); 2436 } 2437 g_string_append(result, p->name); 2438 } 2439 } 2440 2441 return g_string_free(result, FALSE); 2442 } 2443 2444 2445 static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp) 2446 { 2447 int ret; 2448 Transaction *tran = tran_new(); 2449 g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs); 2450 2451 ret = bdrv_list_refresh_perms(list, NULL, tran, errp); 2452 tran_finalize(tran, ret); 2453 2454 return ret; 2455 } 2456 2457 int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, 2458 Error **errp) 2459 { 2460 Error *local_err = NULL; 2461 Transaction *tran = tran_new(); 2462 int ret; 2463 2464 bdrv_child_set_perm(c, perm, shared, tran); 2465 2466 ret = bdrv_refresh_perms(c->bs, &local_err); 2467 2468 tran_finalize(tran, ret); 2469 2470 if (ret < 0) { 2471 if ((perm & ~c->perm) || (c->shared_perm & ~shared)) { 2472 /* tighten permissions */ 2473 error_propagate(errp, local_err); 2474 } else { 2475 /* 2476 * Our caller may intend to only loosen restrictions and 2477 * does not expect this function to fail. Errors are not 2478 * fatal in such a case, so we can just hide them from our 2479 * caller. 2480 */ 2481 error_free(local_err); 2482 ret = 0; 2483 } 2484 } 2485 2486 return ret; 2487 } 2488 2489 int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) 2490 { 2491 uint64_t parent_perms, parent_shared; 2492 uint64_t perms, shared; 2493 2494 bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared); 2495 bdrv_child_perm(bs, c->bs, c, c->role, NULL, 2496 parent_perms, parent_shared, &perms, &shared); 2497 2498 return bdrv_child_try_set_perm(c, perms, shared, errp); 2499 } 2500 2501 /* 2502 * Default implementation for .bdrv_child_perm() for block filters: 2503 * Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the 2504 * filtered child. 2505 */ 2506 static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, 2507 BdrvChildRole role, 2508 BlockReopenQueue *reopen_queue, 2509 uint64_t perm, uint64_t shared, 2510 uint64_t *nperm, uint64_t *nshared) 2511 { 2512 *nperm = perm & DEFAULT_PERM_PASSTHROUGH; 2513 *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; 2514 } 2515 2516 static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c, 2517 BdrvChildRole role, 2518 BlockReopenQueue *reopen_queue, 2519 uint64_t perm, uint64_t shared, 2520 uint64_t *nperm, uint64_t *nshared) 2521 { 2522 assert(role & BDRV_CHILD_COW); 2523 2524 /* 2525 * We want consistent read from backing files if the parent needs it. 2526 * No other operations are performed on backing files. 2527 */ 2528 perm &= BLK_PERM_CONSISTENT_READ; 2529 2530 /* 2531 * If the parent can deal with changing data, we're okay with a 2532 * writable and resizable backing file. 2533 * TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? 2534 */ 2535 if (shared & BLK_PERM_WRITE) { 2536 shared = BLK_PERM_WRITE | BLK_PERM_RESIZE; 2537 } else { 2538 shared = 0; 2539 } 2540 2541 shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD | 2542 BLK_PERM_WRITE_UNCHANGED; 2543 2544 if (bs->open_flags & BDRV_O_INACTIVE) { 2545 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2546 } 2547 2548 *nperm = perm; 2549 *nshared = shared; 2550 } 2551 2552 static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c, 2553 BdrvChildRole role, 2554 BlockReopenQueue *reopen_queue, 2555 uint64_t perm, uint64_t shared, 2556 uint64_t *nperm, uint64_t *nshared) 2557 { 2558 int flags; 2559 2560 assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)); 2561 2562 flags = bdrv_reopen_get_flags(reopen_queue, bs); 2563 2564 /* 2565 * Apart from the modifications below, the same permissions are 2566 * forwarded and left alone as for filters 2567 */ 2568 bdrv_filter_default_perms(bs, c, role, reopen_queue, 2569 perm, shared, &perm, &shared); 2570 2571 if (role & BDRV_CHILD_METADATA) { 2572 /* Format drivers may touch metadata even if the guest doesn't write */ 2573 if (bdrv_is_writable_after_reopen(bs, reopen_queue)) { 2574 perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2575 } 2576 2577 /* 2578 * bs->file always needs to be consistent because of the 2579 * metadata. We can never allow other users to resize or write 2580 * to it. 2581 */ 2582 if (!(flags & BDRV_O_NO_IO)) { 2583 perm |= BLK_PERM_CONSISTENT_READ; 2584 } 2585 shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); 2586 } 2587 2588 if (role & BDRV_CHILD_DATA) { 2589 /* 2590 * Technically, everything in this block is a subset of the 2591 * BDRV_CHILD_METADATA path taken above, and so this could 2592 * be an "else if" branch. However, that is not obvious, and 2593 * this function is not performance critical, therefore we let 2594 * this be an independent "if". 2595 */ 2596 2597 /* 2598 * We cannot allow other users to resize the file because the 2599 * format driver might have some assumptions about the size 2600 * (e.g. because it is stored in metadata, or because the file 2601 * is split into fixed-size data files). 2602 */ 2603 shared &= ~BLK_PERM_RESIZE; 2604 2605 /* 2606 * WRITE_UNCHANGED often cannot be performed as such on the 2607 * data file. For example, the qcow2 driver may still need to 2608 * write copied clusters on copy-on-read. 2609 */ 2610 if (perm & BLK_PERM_WRITE_UNCHANGED) { 2611 perm |= BLK_PERM_WRITE; 2612 } 2613 2614 /* 2615 * If the data file is written to, the format driver may 2616 * expect to be able to resize it by writing beyond the EOF. 2617 */ 2618 if (perm & BLK_PERM_WRITE) { 2619 perm |= BLK_PERM_RESIZE; 2620 } 2621 } 2622 2623 if (bs->open_flags & BDRV_O_INACTIVE) { 2624 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2625 } 2626 2627 *nperm = perm; 2628 *nshared = shared; 2629 } 2630 2631 void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, 2632 BdrvChildRole role, BlockReopenQueue *reopen_queue, 2633 uint64_t perm, uint64_t shared, 2634 uint64_t *nperm, uint64_t *nshared) 2635 { 2636 if (role & BDRV_CHILD_FILTERED) { 2637 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | 2638 BDRV_CHILD_COW))); 2639 bdrv_filter_default_perms(bs, c, role, reopen_queue, 2640 perm, shared, nperm, nshared); 2641 } else if (role & BDRV_CHILD_COW) { 2642 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA))); 2643 bdrv_default_perms_for_cow(bs, c, role, reopen_queue, 2644 perm, shared, nperm, nshared); 2645 } else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) { 2646 bdrv_default_perms_for_storage(bs, c, role, reopen_queue, 2647 perm, shared, nperm, nshared); 2648 } else { 2649 g_assert_not_reached(); 2650 } 2651 } 2652 2653 uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) 2654 { 2655 static const uint64_t permissions[] = { 2656 [BLOCK_PERMISSION_CONSISTENT_READ] = BLK_PERM_CONSISTENT_READ, 2657 [BLOCK_PERMISSION_WRITE] = BLK_PERM_WRITE, 2658 [BLOCK_PERMISSION_WRITE_UNCHANGED] = BLK_PERM_WRITE_UNCHANGED, 2659 [BLOCK_PERMISSION_RESIZE] = BLK_PERM_RESIZE, 2660 [BLOCK_PERMISSION_GRAPH_MOD] = BLK_PERM_GRAPH_MOD, 2661 }; 2662 2663 QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX); 2664 QEMU_BUILD_BUG_ON(1UL << ARRAY_SIZE(permissions) != BLK_PERM_ALL + 1); 2665 2666 assert(qapi_perm < BLOCK_PERMISSION__MAX); 2667 2668 return permissions[qapi_perm]; 2669 } 2670 2671 static void bdrv_replace_child_noperm(BdrvChild *child, 2672 BlockDriverState *new_bs) 2673 { 2674 BlockDriverState *old_bs = child->bs; 2675 int new_bs_quiesce_counter; 2676 int drain_saldo; 2677 2678 assert(!child->frozen); 2679 assert(old_bs != new_bs); 2680 2681 if (old_bs && new_bs) { 2682 assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); 2683 } 2684 2685 new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); 2686 drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; 2687 2688 /* 2689 * If the new child node is drained but the old one was not, flush 2690 * all outstanding requests to the old child node. 2691 */ 2692 while (drain_saldo > 0 && child->klass->drained_begin) { 2693 bdrv_parent_drained_begin_single(child, true); 2694 drain_saldo--; 2695 } 2696 2697 if (old_bs) { 2698 /* Detach first so that the recursive drain sections coming from @child 2699 * are already gone and we only end the drain sections that came from 2700 * elsewhere. */ 2701 if (child->klass->detach) { 2702 child->klass->detach(child); 2703 } 2704 QLIST_REMOVE(child, next_parent); 2705 } 2706 2707 child->bs = new_bs; 2708 2709 if (new_bs) { 2710 QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); 2711 2712 /* 2713 * Detaching the old node may have led to the new node's 2714 * quiesce_counter having been decreased. Not a problem, we 2715 * just need to recognize this here and then invoke 2716 * drained_end appropriately more often. 2717 */ 2718 assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); 2719 drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; 2720 2721 /* Attach only after starting new drained sections, so that recursive 2722 * drain sections coming from @child don't get an extra .drained_begin 2723 * callback. */ 2724 if (child->klass->attach) { 2725 child->klass->attach(child); 2726 } 2727 } 2728 2729 /* 2730 * If the old child node was drained but the new one is not, allow 2731 * requests to come in only after the new node has been attached. 2732 */ 2733 while (drain_saldo < 0 && child->klass->drained_end) { 2734 bdrv_parent_drained_end_single(child); 2735 drain_saldo++; 2736 } 2737 } 2738 2739 static void bdrv_child_free(void *opaque) 2740 { 2741 BdrvChild *c = opaque; 2742 2743 g_free(c->name); 2744 g_free(c); 2745 } 2746 2747 static void bdrv_remove_empty_child(BdrvChild *child) 2748 { 2749 assert(!child->bs); 2750 QLIST_SAFE_REMOVE(child, next); 2751 bdrv_child_free(child); 2752 } 2753 2754 typedef struct BdrvAttachChildCommonState { 2755 BdrvChild **child; 2756 AioContext *old_parent_ctx; 2757 AioContext *old_child_ctx; 2758 } BdrvAttachChildCommonState; 2759 2760 static void bdrv_attach_child_common_abort(void *opaque) 2761 { 2762 BdrvAttachChildCommonState *s = opaque; 2763 BdrvChild *child = *s->child; 2764 BlockDriverState *bs = child->bs; 2765 2766 bdrv_replace_child_noperm(child, NULL); 2767 2768 if (bdrv_get_aio_context(bs) != s->old_child_ctx) { 2769 bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort); 2770 } 2771 2772 if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) { 2773 GSList *ignore = g_slist_prepend(NULL, child); 2774 2775 child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore, 2776 &error_abort); 2777 g_slist_free(ignore); 2778 ignore = g_slist_prepend(NULL, child); 2779 child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore); 2780 2781 g_slist_free(ignore); 2782 } 2783 2784 bdrv_unref(bs); 2785 bdrv_remove_empty_child(child); 2786 *s->child = NULL; 2787 } 2788 2789 static TransactionActionDrv bdrv_attach_child_common_drv = { 2790 .abort = bdrv_attach_child_common_abort, 2791 .clean = g_free, 2792 }; 2793 2794 /* 2795 * Common part of attaching bdrv child to bs or to blk or to job 2796 * 2797 * Resulting new child is returned through @child. 2798 * At start *@child must be NULL. 2799 * @child is saved to a new entry of @tran, so that *@child could be reverted to 2800 * NULL on abort(). So referenced variable must live at least until transaction 2801 * end. 2802 * 2803 * Function doesn't update permissions, caller is responsible for this. 2804 */ 2805 static int bdrv_attach_child_common(BlockDriverState *child_bs, 2806 const char *child_name, 2807 const BdrvChildClass *child_class, 2808 BdrvChildRole child_role, 2809 uint64_t perm, uint64_t shared_perm, 2810 void *opaque, BdrvChild **child, 2811 Transaction *tran, Error **errp) 2812 { 2813 BdrvChild *new_child; 2814 AioContext *parent_ctx; 2815 AioContext *child_ctx = bdrv_get_aio_context(child_bs); 2816 2817 assert(child); 2818 assert(*child == NULL); 2819 assert(child_class->get_parent_desc); 2820 2821 new_child = g_new(BdrvChild, 1); 2822 *new_child = (BdrvChild) { 2823 .bs = NULL, 2824 .name = g_strdup(child_name), 2825 .klass = child_class, 2826 .role = child_role, 2827 .perm = perm, 2828 .shared_perm = shared_perm, 2829 .opaque = opaque, 2830 }; 2831 2832 /* 2833 * If the AioContexts don't match, first try to move the subtree of 2834 * child_bs into the AioContext of the new parent. If this doesn't work, 2835 * try moving the parent into the AioContext of child_bs instead. 2836 */ 2837 parent_ctx = bdrv_child_get_parent_aio_context(new_child); 2838 if (child_ctx != parent_ctx) { 2839 Error *local_err = NULL; 2840 int ret = bdrv_try_set_aio_context(child_bs, parent_ctx, &local_err); 2841 2842 if (ret < 0 && child_class->can_set_aio_ctx) { 2843 GSList *ignore = g_slist_prepend(NULL, new_child); 2844 if (child_class->can_set_aio_ctx(new_child, child_ctx, &ignore, 2845 NULL)) 2846 { 2847 error_free(local_err); 2848 ret = 0; 2849 g_slist_free(ignore); 2850 ignore = g_slist_prepend(NULL, new_child); 2851 child_class->set_aio_ctx(new_child, child_ctx, &ignore); 2852 } 2853 g_slist_free(ignore); 2854 } 2855 2856 if (ret < 0) { 2857 error_propagate(errp, local_err); 2858 bdrv_remove_empty_child(new_child); 2859 return ret; 2860 } 2861 } 2862 2863 bdrv_ref(child_bs); 2864 bdrv_replace_child_noperm(new_child, child_bs); 2865 2866 *child = new_child; 2867 2868 BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); 2869 *s = (BdrvAttachChildCommonState) { 2870 .child = child, 2871 .old_parent_ctx = parent_ctx, 2872 .old_child_ctx = child_ctx, 2873 }; 2874 tran_add(tran, &bdrv_attach_child_common_drv, s); 2875 2876 return 0; 2877 } 2878 2879 /* 2880 * Variable referenced by @child must live at least until transaction end. 2881 * (see bdrv_attach_child_common() doc for details) 2882 * 2883 * Function doesn't update permissions, caller is responsible for this. 2884 */ 2885 static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, 2886 BlockDriverState *child_bs, 2887 const char *child_name, 2888 const BdrvChildClass *child_class, 2889 BdrvChildRole child_role, 2890 BdrvChild **child, 2891 Transaction *tran, 2892 Error **errp) 2893 { 2894 int ret; 2895 uint64_t perm, shared_perm; 2896 2897 assert(parent_bs->drv); 2898 2899 if (bdrv_recurse_has_child(child_bs, parent_bs)) { 2900 error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle", 2901 child_bs->node_name, child_name, parent_bs->node_name); 2902 return -EINVAL; 2903 } 2904 2905 bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); 2906 bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, 2907 perm, shared_perm, &perm, &shared_perm); 2908 2909 ret = bdrv_attach_child_common(child_bs, child_name, child_class, 2910 child_role, perm, shared_perm, parent_bs, 2911 child, tran, errp); 2912 if (ret < 0) { 2913 return ret; 2914 } 2915 2916 QLIST_INSERT_HEAD(&parent_bs->children, *child, next); 2917 /* 2918 * child is removed in bdrv_attach_child_common_abort(), so don't care to 2919 * abort this change separately. 2920 */ 2921 2922 return 0; 2923 } 2924 2925 static void bdrv_detach_child(BdrvChild *child) 2926 { 2927 BlockDriverState *old_bs = child->bs; 2928 2929 bdrv_replace_child_noperm(child, NULL); 2930 bdrv_remove_empty_child(child); 2931 2932 if (old_bs) { 2933 /* 2934 * Update permissions for old node. We're just taking a parent away, so 2935 * we're loosening restrictions. Errors of permission update are not 2936 * fatal in this case, ignore them. 2937 */ 2938 bdrv_refresh_perms(old_bs, NULL); 2939 2940 /* 2941 * When the parent requiring a non-default AioContext is removed, the 2942 * node moves back to the main AioContext 2943 */ 2944 bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL); 2945 } 2946 } 2947 2948 /* 2949 * This function steals the reference to child_bs from the caller. 2950 * That reference is later dropped by bdrv_root_unref_child(). 2951 * 2952 * On failure NULL is returned, errp is set and the reference to 2953 * child_bs is also dropped. 2954 * 2955 * The caller must hold the AioContext lock @child_bs, but not that of @ctx 2956 * (unless @child_bs is already in @ctx). 2957 */ 2958 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, 2959 const char *child_name, 2960 const BdrvChildClass *child_class, 2961 BdrvChildRole child_role, 2962 uint64_t perm, uint64_t shared_perm, 2963 void *opaque, Error **errp) 2964 { 2965 int ret; 2966 BdrvChild *child = NULL; 2967 Transaction *tran = tran_new(); 2968 2969 ret = bdrv_attach_child_common(child_bs, child_name, child_class, 2970 child_role, perm, shared_perm, opaque, 2971 &child, tran, errp); 2972 if (ret < 0) { 2973 goto out; 2974 } 2975 2976 ret = bdrv_refresh_perms(child_bs, errp); 2977 2978 out: 2979 tran_finalize(tran, ret); 2980 /* child is unset on failure by bdrv_attach_child_common_abort() */ 2981 assert((ret < 0) == !child); 2982 2983 bdrv_unref(child_bs); 2984 return child; 2985 } 2986 2987 /* 2988 * This function transfers the reference to child_bs from the caller 2989 * to parent_bs. That reference is later dropped by parent_bs on 2990 * bdrv_close() or if someone calls bdrv_unref_child(). 2991 * 2992 * On failure NULL is returned, errp is set and the reference to 2993 * child_bs is also dropped. 2994 * 2995 * If @parent_bs and @child_bs are in different AioContexts, the caller must 2996 * hold the AioContext lock for @child_bs, but not for @parent_bs. 2997 */ 2998 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, 2999 BlockDriverState *child_bs, 3000 const char *child_name, 3001 const BdrvChildClass *child_class, 3002 BdrvChildRole child_role, 3003 Error **errp) 3004 { 3005 int ret; 3006 BdrvChild *child = NULL; 3007 Transaction *tran = tran_new(); 3008 3009 ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, child_class, 3010 child_role, &child, tran, errp); 3011 if (ret < 0) { 3012 goto out; 3013 } 3014 3015 ret = bdrv_refresh_perms(parent_bs, errp); 3016 if (ret < 0) { 3017 goto out; 3018 } 3019 3020 out: 3021 tran_finalize(tran, ret); 3022 /* child is unset on failure by bdrv_attach_child_common_abort() */ 3023 assert((ret < 0) == !child); 3024 3025 bdrv_unref(child_bs); 3026 3027 return child; 3028 } 3029 3030 /* Callers must ensure that child->frozen is false. */ 3031 void bdrv_root_unref_child(BdrvChild *child) 3032 { 3033 BlockDriverState *child_bs; 3034 3035 child_bs = child->bs; 3036 bdrv_detach_child(child); 3037 bdrv_unref(child_bs); 3038 } 3039 3040 typedef struct BdrvSetInheritsFrom { 3041 BlockDriverState *bs; 3042 BlockDriverState *old_inherits_from; 3043 } BdrvSetInheritsFrom; 3044 3045 static void bdrv_set_inherits_from_abort(void *opaque) 3046 { 3047 BdrvSetInheritsFrom *s = opaque; 3048 3049 s->bs->inherits_from = s->old_inherits_from; 3050 } 3051 3052 static TransactionActionDrv bdrv_set_inherits_from_drv = { 3053 .abort = bdrv_set_inherits_from_abort, 3054 .clean = g_free, 3055 }; 3056 3057 /* @tran is allowed to be NULL. In this case no rollback is possible */ 3058 static void bdrv_set_inherits_from(BlockDriverState *bs, 3059 BlockDriverState *new_inherits_from, 3060 Transaction *tran) 3061 { 3062 if (tran) { 3063 BdrvSetInheritsFrom *s = g_new(BdrvSetInheritsFrom, 1); 3064 3065 *s = (BdrvSetInheritsFrom) { 3066 .bs = bs, 3067 .old_inherits_from = bs->inherits_from, 3068 }; 3069 3070 tran_add(tran, &bdrv_set_inherits_from_drv, s); 3071 } 3072 3073 bs->inherits_from = new_inherits_from; 3074 } 3075 3076 /** 3077 * Clear all inherits_from pointers from children and grandchildren of 3078 * @root that point to @root, where necessary. 3079 * @tran is allowed to be NULL. In this case no rollback is possible 3080 */ 3081 static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child, 3082 Transaction *tran) 3083 { 3084 BdrvChild *c; 3085 3086 if (child->bs->inherits_from == root) { 3087 /* 3088 * Remove inherits_from only when the last reference between root and 3089 * child->bs goes away. 3090 */ 3091 QLIST_FOREACH(c, &root->children, next) { 3092 if (c != child && c->bs == child->bs) { 3093 break; 3094 } 3095 } 3096 if (c == NULL) { 3097 bdrv_set_inherits_from(child->bs, NULL, tran); 3098 } 3099 } 3100 3101 QLIST_FOREACH(c, &child->bs->children, next) { 3102 bdrv_unset_inherits_from(root, c, tran); 3103 } 3104 } 3105 3106 /* Callers must ensure that child->frozen is false. */ 3107 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) 3108 { 3109 if (child == NULL) { 3110 return; 3111 } 3112 3113 bdrv_unset_inherits_from(parent, child, NULL); 3114 bdrv_root_unref_child(child); 3115 } 3116 3117 3118 static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) 3119 { 3120 BdrvChild *c; 3121 QLIST_FOREACH(c, &bs->parents, next_parent) { 3122 if (c->klass->change_media) { 3123 c->klass->change_media(c, load); 3124 } 3125 } 3126 } 3127 3128 /* Return true if you can reach parent going through child->inherits_from 3129 * recursively. If parent or child are NULL, return false */ 3130 static bool bdrv_inherits_from_recursive(BlockDriverState *child, 3131 BlockDriverState *parent) 3132 { 3133 while (child && child != parent) { 3134 child = child->inherits_from; 3135 } 3136 3137 return child != NULL; 3138 } 3139 3140 /* 3141 * Return the BdrvChildRole for @bs's backing child. bs->backing is 3142 * mostly used for COW backing children (role = COW), but also for 3143 * filtered children (role = FILTERED | PRIMARY). 3144 */ 3145 static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) 3146 { 3147 if (bs->drv && bs->drv->is_filter) { 3148 return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY; 3149 } else { 3150 return BDRV_CHILD_COW; 3151 } 3152 } 3153 3154 /* 3155 * Sets the bs->backing or bs->file link of a BDS. A new reference is created; 3156 * callers which don't need their own reference any more must call bdrv_unref(). 3157 * 3158 * Function doesn't update permissions, caller is responsible for this. 3159 */ 3160 static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, 3161 BlockDriverState *child_bs, 3162 bool is_backing, 3163 Transaction *tran, Error **errp) 3164 { 3165 int ret = 0; 3166 bool update_inherits_from = 3167 bdrv_inherits_from_recursive(child_bs, parent_bs); 3168 BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file; 3169 BdrvChildRole role; 3170 3171 if (!parent_bs->drv) { 3172 /* 3173 * Node without drv is an object without a class :/. TODO: finally fix 3174 * qcow2 driver to never clear bs->drv and implement format corruption 3175 * handling in other way. 3176 */ 3177 error_setg(errp, "Node corrupted"); 3178 return -EINVAL; 3179 } 3180 3181 if (child && child->frozen) { 3182 error_setg(errp, "Cannot change frozen '%s' link from '%s' to '%s'", 3183 child->name, parent_bs->node_name, child->bs->node_name); 3184 return -EPERM; 3185 } 3186 3187 if (is_backing && !parent_bs->drv->is_filter && 3188 !parent_bs->drv->supports_backing) 3189 { 3190 error_setg(errp, "Driver '%s' of node '%s' does not support backing " 3191 "files", parent_bs->drv->format_name, parent_bs->node_name); 3192 return -EINVAL; 3193 } 3194 3195 if (parent_bs->drv->is_filter) { 3196 role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY; 3197 } else if (is_backing) { 3198 role = BDRV_CHILD_COW; 3199 } else { 3200 /* 3201 * We only can use same role as it is in existing child. We don't have 3202 * infrastructure to determine role of file child in generic way 3203 */ 3204 if (!child) { 3205 error_setg(errp, "Cannot set file child to format node without " 3206 "file child"); 3207 return -EINVAL; 3208 } 3209 role = child->role; 3210 } 3211 3212 if (child) { 3213 bdrv_unset_inherits_from(parent_bs, child, tran); 3214 bdrv_remove_file_or_backing_child(parent_bs, child, tran); 3215 } 3216 3217 if (!child_bs) { 3218 goto out; 3219 } 3220 3221 ret = bdrv_attach_child_noperm(parent_bs, child_bs, 3222 is_backing ? "backing" : "file", 3223 &child_of_bds, role, 3224 is_backing ? &parent_bs->backing : 3225 &parent_bs->file, 3226 tran, errp); 3227 if (ret < 0) { 3228 return ret; 3229 } 3230 3231 3232 /* 3233 * If inherits_from pointed recursively to bs then let's update it to 3234 * point directly to bs (else it will become NULL). 3235 */ 3236 if (update_inherits_from) { 3237 bdrv_set_inherits_from(child_bs, parent_bs, tran); 3238 } 3239 3240 out: 3241 bdrv_refresh_limits(parent_bs, tran, NULL); 3242 3243 return 0; 3244 } 3245 3246 static int bdrv_set_backing_noperm(BlockDriverState *bs, 3247 BlockDriverState *backing_hd, 3248 Transaction *tran, Error **errp) 3249 { 3250 return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); 3251 } 3252 3253 int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, 3254 Error **errp) 3255 { 3256 int ret; 3257 Transaction *tran = tran_new(); 3258 3259 ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); 3260 if (ret < 0) { 3261 goto out; 3262 } 3263 3264 ret = bdrv_refresh_perms(bs, errp); 3265 out: 3266 tran_finalize(tran, ret); 3267 3268 return ret; 3269 } 3270 3271 /* 3272 * Opens the backing file for a BlockDriverState if not yet open 3273 * 3274 * bdref_key specifies the key for the image's BlockdevRef in the options QDict. 3275 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 3276 * itself, all options starting with "${bdref_key}." are considered part of the 3277 * BlockdevRef. 3278 * 3279 * TODO Can this be unified with bdrv_open_image()? 3280 */ 3281 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, 3282 const char *bdref_key, Error **errp) 3283 { 3284 char *backing_filename = NULL; 3285 char *bdref_key_dot; 3286 const char *reference = NULL; 3287 int ret = 0; 3288 bool implicit_backing = false; 3289 BlockDriverState *backing_hd; 3290 QDict *options; 3291 QDict *tmp_parent_options = NULL; 3292 Error *local_err = NULL; 3293 3294 if (bs->backing != NULL) { 3295 goto free_exit; 3296 } 3297 3298 /* NULL means an empty set of options */ 3299 if (parent_options == NULL) { 3300 tmp_parent_options = qdict_new(); 3301 parent_options = tmp_parent_options; 3302 } 3303 3304 bs->open_flags &= ~BDRV_O_NO_BACKING; 3305 3306 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 3307 qdict_extract_subqdict(parent_options, &options, bdref_key_dot); 3308 g_free(bdref_key_dot); 3309 3310 /* 3311 * Caution: while qdict_get_try_str() is fine, getting non-string 3312 * types would require more care. When @parent_options come from 3313 * -blockdev or blockdev_add, its members are typed according to 3314 * the QAPI schema, but when they come from -drive, they're all 3315 * QString. 3316 */ 3317 reference = qdict_get_try_str(parent_options, bdref_key); 3318 if (reference || qdict_haskey(options, "file.filename")) { 3319 /* keep backing_filename NULL */ 3320 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 3321 qobject_unref(options); 3322 goto free_exit; 3323 } else { 3324 if (qdict_size(options) == 0) { 3325 /* If the user specifies options that do not modify the 3326 * backing file's behavior, we might still consider it the 3327 * implicit backing file. But it's easier this way, and 3328 * just specifying some of the backing BDS's options is 3329 * only possible with -drive anyway (otherwise the QAPI 3330 * schema forces the user to specify everything). */ 3331 implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file); 3332 } 3333 3334 backing_filename = bdrv_get_full_backing_filename(bs, &local_err); 3335 if (local_err) { 3336 ret = -EINVAL; 3337 error_propagate(errp, local_err); 3338 qobject_unref(options); 3339 goto free_exit; 3340 } 3341 } 3342 3343 if (!bs->drv || !bs->drv->supports_backing) { 3344 ret = -EINVAL; 3345 error_setg(errp, "Driver doesn't support backing files"); 3346 qobject_unref(options); 3347 goto free_exit; 3348 } 3349 3350 if (!reference && 3351 bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 3352 qdict_put_str(options, "driver", bs->backing_format); 3353 } 3354 3355 backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs, 3356 &child_of_bds, bdrv_backing_role(bs), errp); 3357 if (!backing_hd) { 3358 bs->open_flags |= BDRV_O_NO_BACKING; 3359 error_prepend(errp, "Could not open backing file: "); 3360 ret = -EINVAL; 3361 goto free_exit; 3362 } 3363 3364 if (implicit_backing) { 3365 bdrv_refresh_filename(backing_hd); 3366 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), 3367 backing_hd->filename); 3368 } 3369 3370 /* Hook up the backing file link; drop our reference, bs owns the 3371 * backing_hd reference now */ 3372 ret = bdrv_set_backing_hd(bs, backing_hd, errp); 3373 bdrv_unref(backing_hd); 3374 if (ret < 0) { 3375 goto free_exit; 3376 } 3377 3378 qdict_del(parent_options, bdref_key); 3379 3380 free_exit: 3381 g_free(backing_filename); 3382 qobject_unref(tmp_parent_options); 3383 return ret; 3384 } 3385 3386 static BlockDriverState * 3387 bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, 3388 BlockDriverState *parent, const BdrvChildClass *child_class, 3389 BdrvChildRole child_role, bool allow_none, Error **errp) 3390 { 3391 BlockDriverState *bs = NULL; 3392 QDict *image_options; 3393 char *bdref_key_dot; 3394 const char *reference; 3395 3396 assert(child_class != NULL); 3397 3398 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 3399 qdict_extract_subqdict(options, &image_options, bdref_key_dot); 3400 g_free(bdref_key_dot); 3401 3402 /* 3403 * Caution: while qdict_get_try_str() is fine, getting non-string 3404 * types would require more care. When @options come from 3405 * -blockdev or blockdev_add, its members are typed according to 3406 * the QAPI schema, but when they come from -drive, they're all 3407 * QString. 3408 */ 3409 reference = qdict_get_try_str(options, bdref_key); 3410 if (!filename && !reference && !qdict_size(image_options)) { 3411 if (!allow_none) { 3412 error_setg(errp, "A block device must be specified for \"%s\"", 3413 bdref_key); 3414 } 3415 qobject_unref(image_options); 3416 goto done; 3417 } 3418 3419 bs = bdrv_open_inherit(filename, reference, image_options, 0, 3420 parent, child_class, child_role, errp); 3421 if (!bs) { 3422 goto done; 3423 } 3424 3425 done: 3426 qdict_del(options, bdref_key); 3427 return bs; 3428 } 3429 3430 /* 3431 * Opens a disk image whose options are given as BlockdevRef in another block 3432 * device's options. 3433 * 3434 * If allow_none is true, no image will be opened if filename is false and no 3435 * BlockdevRef is given. NULL will be returned, but errp remains unset. 3436 * 3437 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 3438 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 3439 * itself, all options starting with "${bdref_key}." are considered part of the 3440 * BlockdevRef. 3441 * 3442 * The BlockdevRef will be removed from the options QDict. 3443 */ 3444 BdrvChild *bdrv_open_child(const char *filename, 3445 QDict *options, const char *bdref_key, 3446 BlockDriverState *parent, 3447 const BdrvChildClass *child_class, 3448 BdrvChildRole child_role, 3449 bool allow_none, Error **errp) 3450 { 3451 BlockDriverState *bs; 3452 3453 bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, 3454 child_role, allow_none, errp); 3455 if (bs == NULL) { 3456 return NULL; 3457 } 3458 3459 return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, 3460 errp); 3461 } 3462 3463 /* 3464 * TODO Future callers may need to specify parent/child_class in order for 3465 * option inheritance to work. Existing callers use it for the root node. 3466 */ 3467 BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) 3468 { 3469 BlockDriverState *bs = NULL; 3470 QObject *obj = NULL; 3471 QDict *qdict = NULL; 3472 const char *reference = NULL; 3473 Visitor *v = NULL; 3474 3475 if (ref->type == QTYPE_QSTRING) { 3476 reference = ref->u.reference; 3477 } else { 3478 BlockdevOptions *options = &ref->u.definition; 3479 assert(ref->type == QTYPE_QDICT); 3480 3481 v = qobject_output_visitor_new(&obj); 3482 visit_type_BlockdevOptions(v, NULL, &options, &error_abort); 3483 visit_complete(v, &obj); 3484 3485 qdict = qobject_to(QDict, obj); 3486 qdict_flatten(qdict); 3487 3488 /* bdrv_open_inherit() defaults to the values in bdrv_flags (for 3489 * compatibility with other callers) rather than what we want as the 3490 * real defaults. Apply the defaults here instead. */ 3491 qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off"); 3492 qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off"); 3493 qdict_set_default_str(qdict, BDRV_OPT_READ_ONLY, "off"); 3494 qdict_set_default_str(qdict, BDRV_OPT_AUTO_READ_ONLY, "off"); 3495 3496 } 3497 3498 bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp); 3499 obj = NULL; 3500 qobject_unref(obj); 3501 visit_free(v); 3502 return bs; 3503 } 3504 3505 static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, 3506 int flags, 3507 QDict *snapshot_options, 3508 Error **errp) 3509 { 3510 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 3511 char *tmp_filename = g_malloc0(PATH_MAX + 1); 3512 int64_t total_size; 3513 QemuOpts *opts = NULL; 3514 BlockDriverState *bs_snapshot = NULL; 3515 int ret; 3516 3517 /* if snapshot, we create a temporary backing file and open it 3518 instead of opening 'filename' directly */ 3519 3520 /* Get the required size from the image */ 3521 total_size = bdrv_getlength(bs); 3522 if (total_size < 0) { 3523 error_setg_errno(errp, -total_size, "Could not get image size"); 3524 goto out; 3525 } 3526 3527 /* Create the temporary image */ 3528 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); 3529 if (ret < 0) { 3530 error_setg_errno(errp, -ret, "Could not get temporary filename"); 3531 goto out; 3532 } 3533 3534 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 3535 &error_abort); 3536 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 3537 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp); 3538 qemu_opts_del(opts); 3539 if (ret < 0) { 3540 error_prepend(errp, "Could not create temporary overlay '%s': ", 3541 tmp_filename); 3542 goto out; 3543 } 3544 3545 /* Prepare options QDict for the temporary file */ 3546 qdict_put_str(snapshot_options, "file.driver", "file"); 3547 qdict_put_str(snapshot_options, "file.filename", tmp_filename); 3548 qdict_put_str(snapshot_options, "driver", "qcow2"); 3549 3550 bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp); 3551 snapshot_options = NULL; 3552 if (!bs_snapshot) { 3553 goto out; 3554 } 3555 3556 ret = bdrv_append(bs_snapshot, bs, errp); 3557 if (ret < 0) { 3558 bs_snapshot = NULL; 3559 goto out; 3560 } 3561 3562 out: 3563 qobject_unref(snapshot_options); 3564 g_free(tmp_filename); 3565 return bs_snapshot; 3566 } 3567 3568 /* 3569 * Opens a disk image (raw, qcow2, vmdk, ...) 3570 * 3571 * options is a QDict of options to pass to the block drivers, or NULL for an 3572 * empty set of options. The reference to the QDict belongs to the block layer 3573 * after the call (even on failure), so if the caller intends to reuse the 3574 * dictionary, it needs to use qobject_ref() before calling bdrv_open. 3575 * 3576 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 3577 * If it is not NULL, the referenced BDS will be reused. 3578 * 3579 * The reference parameter may be used to specify an existing block device which 3580 * should be opened. If specified, neither options nor a filename may be given, 3581 * nor can an existing BDS be reused (that is, *pbs has to be NULL). 3582 */ 3583 static BlockDriverState *bdrv_open_inherit(const char *filename, 3584 const char *reference, 3585 QDict *options, int flags, 3586 BlockDriverState *parent, 3587 const BdrvChildClass *child_class, 3588 BdrvChildRole child_role, 3589 Error **errp) 3590 { 3591 int ret; 3592 BlockBackend *file = NULL; 3593 BlockDriverState *bs; 3594 BlockDriver *drv = NULL; 3595 BdrvChild *child; 3596 const char *drvname; 3597 const char *backing; 3598 Error *local_err = NULL; 3599 QDict *snapshot_options = NULL; 3600 int snapshot_flags = 0; 3601 3602 assert(!child_class || !flags); 3603 assert(!child_class == !parent); 3604 3605 if (reference) { 3606 bool options_non_empty = options ? qdict_size(options) : false; 3607 qobject_unref(options); 3608 3609 if (filename || options_non_empty) { 3610 error_setg(errp, "Cannot reference an existing block device with " 3611 "additional options or a new filename"); 3612 return NULL; 3613 } 3614 3615 bs = bdrv_lookup_bs(reference, reference, errp); 3616 if (!bs) { 3617 return NULL; 3618 } 3619 3620 bdrv_ref(bs); 3621 return bs; 3622 } 3623 3624 bs = bdrv_new(); 3625 3626 /* NULL means an empty set of options */ 3627 if (options == NULL) { 3628 options = qdict_new(); 3629 } 3630 3631 /* json: syntax counts as explicit options, as if in the QDict */ 3632 parse_json_protocol(options, &filename, &local_err); 3633 if (local_err) { 3634 goto fail; 3635 } 3636 3637 bs->explicit_options = qdict_clone_shallow(options); 3638 3639 if (child_class) { 3640 bool parent_is_format; 3641 3642 if (parent->drv) { 3643 parent_is_format = parent->drv->is_format; 3644 } else { 3645 /* 3646 * parent->drv is not set yet because this node is opened for 3647 * (potential) format probing. That means that @parent is going 3648 * to be a format node. 3649 */ 3650 parent_is_format = true; 3651 } 3652 3653 bs->inherits_from = parent; 3654 child_class->inherit_options(child_role, parent_is_format, 3655 &flags, options, 3656 parent->open_flags, parent->options); 3657 } 3658 3659 ret = bdrv_fill_options(&options, filename, &flags, &local_err); 3660 if (ret < 0) { 3661 goto fail; 3662 } 3663 3664 /* 3665 * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags. 3666 * Caution: getting a boolean member of @options requires care. 3667 * When @options come from -blockdev or blockdev_add, members are 3668 * typed according to the QAPI schema, but when they come from 3669 * -drive, they're all QString. 3670 */ 3671 if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") && 3672 !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) { 3673 flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR); 3674 } else { 3675 flags &= ~BDRV_O_RDWR; 3676 } 3677 3678 if (flags & BDRV_O_SNAPSHOT) { 3679 snapshot_options = qdict_new(); 3680 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options, 3681 flags, options); 3682 /* Let bdrv_backing_options() override "read-only" */ 3683 qdict_del(options, BDRV_OPT_READ_ONLY); 3684 bdrv_inherited_options(BDRV_CHILD_COW, true, 3685 &flags, options, flags, options); 3686 } 3687 3688 bs->open_flags = flags; 3689 bs->options = options; 3690 options = qdict_clone_shallow(options); 3691 3692 /* Find the right image format driver */ 3693 /* See cautionary note on accessing @options above */ 3694 drvname = qdict_get_try_str(options, "driver"); 3695 if (drvname) { 3696 drv = bdrv_find_format(drvname); 3697 if (!drv) { 3698 error_setg(errp, "Unknown driver: '%s'", drvname); 3699 goto fail; 3700 } 3701 } 3702 3703 assert(drvname || !(flags & BDRV_O_PROTOCOL)); 3704 3705 /* See cautionary note on accessing @options above */ 3706 backing = qdict_get_try_str(options, "backing"); 3707 if (qobject_to(QNull, qdict_get(options, "backing")) != NULL || 3708 (backing && *backing == '\0')) 3709 { 3710 if (backing) { 3711 warn_report("Use of \"backing\": \"\" is deprecated; " 3712 "use \"backing\": null instead"); 3713 } 3714 flags |= BDRV_O_NO_BACKING; 3715 qdict_del(bs->explicit_options, "backing"); 3716 qdict_del(bs->options, "backing"); 3717 qdict_del(options, "backing"); 3718 } 3719 3720 /* Open image file without format layer. This BlockBackend is only used for 3721 * probing, the block drivers will do their own bdrv_open_child() for the 3722 * same BDS, which is why we put the node name back into options. */ 3723 if ((flags & BDRV_O_PROTOCOL) == 0) { 3724 BlockDriverState *file_bs; 3725 3726 file_bs = bdrv_open_child_bs(filename, options, "file", bs, 3727 &child_of_bds, BDRV_CHILD_IMAGE, 3728 true, &local_err); 3729 if (local_err) { 3730 goto fail; 3731 } 3732 if (file_bs != NULL) { 3733 /* Not requesting BLK_PERM_CONSISTENT_READ because we're only 3734 * looking at the header to guess the image format. This works even 3735 * in cases where a guest would not see a consistent state. */ 3736 file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL); 3737 blk_insert_bs(file, file_bs, &local_err); 3738 bdrv_unref(file_bs); 3739 if (local_err) { 3740 goto fail; 3741 } 3742 3743 qdict_put_str(options, "file", bdrv_get_node_name(file_bs)); 3744 } 3745 } 3746 3747 /* Image format probing */ 3748 bs->probed = !drv; 3749 if (!drv && file) { 3750 ret = find_image_format(file, filename, &drv, &local_err); 3751 if (ret < 0) { 3752 goto fail; 3753 } 3754 /* 3755 * This option update would logically belong in bdrv_fill_options(), 3756 * but we first need to open bs->file for the probing to work, while 3757 * opening bs->file already requires the (mostly) final set of options 3758 * so that cache mode etc. can be inherited. 3759 * 3760 * Adding the driver later is somewhat ugly, but it's not an option 3761 * that would ever be inherited, so it's correct. We just need to make 3762 * sure to update both bs->options (which has the full effective 3763 * options for bs) and options (which has file.* already removed). 3764 */ 3765 qdict_put_str(bs->options, "driver", drv->format_name); 3766 qdict_put_str(options, "driver", drv->format_name); 3767 } else if (!drv) { 3768 error_setg(errp, "Must specify either driver or file"); 3769 goto fail; 3770 } 3771 3772 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ 3773 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open); 3774 /* file must be NULL if a protocol BDS is about to be created 3775 * (the inverse results in an error message from bdrv_open_common()) */ 3776 assert(!(flags & BDRV_O_PROTOCOL) || !file); 3777 3778 /* Open the image */ 3779 ret = bdrv_open_common(bs, file, options, &local_err); 3780 if (ret < 0) { 3781 goto fail; 3782 } 3783 3784 if (file) { 3785 blk_unref(file); 3786 file = NULL; 3787 } 3788 3789 /* If there is a backing file, use it */ 3790 if ((flags & BDRV_O_NO_BACKING) == 0) { 3791 ret = bdrv_open_backing_file(bs, options, "backing", &local_err); 3792 if (ret < 0) { 3793 goto close_and_fail; 3794 } 3795 } 3796 3797 /* Remove all children options and references 3798 * from bs->options and bs->explicit_options */ 3799 QLIST_FOREACH(child, &bs->children, next) { 3800 char *child_key_dot; 3801 child_key_dot = g_strdup_printf("%s.", child->name); 3802 qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot); 3803 qdict_extract_subqdict(bs->options, NULL, child_key_dot); 3804 qdict_del(bs->explicit_options, child->name); 3805 qdict_del(bs->options, child->name); 3806 g_free(child_key_dot); 3807 } 3808 3809 /* Check if any unknown options were used */ 3810 if (qdict_size(options) != 0) { 3811 const QDictEntry *entry = qdict_first(options); 3812 if (flags & BDRV_O_PROTOCOL) { 3813 error_setg(errp, "Block protocol '%s' doesn't support the option " 3814 "'%s'", drv->format_name, entry->key); 3815 } else { 3816 error_setg(errp, 3817 "Block format '%s' does not support the option '%s'", 3818 drv->format_name, entry->key); 3819 } 3820 3821 goto close_and_fail; 3822 } 3823 3824 bdrv_parent_cb_change_media(bs, true); 3825 3826 qobject_unref(options); 3827 options = NULL; 3828 3829 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 3830 * temporary snapshot afterwards. */ 3831 if (snapshot_flags) { 3832 BlockDriverState *snapshot_bs; 3833 snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags, 3834 snapshot_options, &local_err); 3835 snapshot_options = NULL; 3836 if (local_err) { 3837 goto close_and_fail; 3838 } 3839 /* We are not going to return bs but the overlay on top of it 3840 * (snapshot_bs); thus, we have to drop the strong reference to bs 3841 * (which we obtained by calling bdrv_new()). bs will not be deleted, 3842 * though, because the overlay still has a reference to it. */ 3843 bdrv_unref(bs); 3844 bs = snapshot_bs; 3845 } 3846 3847 return bs; 3848 3849 fail: 3850 blk_unref(file); 3851 qobject_unref(snapshot_options); 3852 qobject_unref(bs->explicit_options); 3853 qobject_unref(bs->options); 3854 qobject_unref(options); 3855 bs->options = NULL; 3856 bs->explicit_options = NULL; 3857 bdrv_unref(bs); 3858 error_propagate(errp, local_err); 3859 return NULL; 3860 3861 close_and_fail: 3862 bdrv_unref(bs); 3863 qobject_unref(snapshot_options); 3864 qobject_unref(options); 3865 error_propagate(errp, local_err); 3866 return NULL; 3867 } 3868 3869 BlockDriverState *bdrv_open(const char *filename, const char *reference, 3870 QDict *options, int flags, Error **errp) 3871 { 3872 return bdrv_open_inherit(filename, reference, options, flags, NULL, 3873 NULL, 0, errp); 3874 } 3875 3876 /* Return true if the NULL-terminated @list contains @str */ 3877 static bool is_str_in_list(const char *str, const char *const *list) 3878 { 3879 if (str && list) { 3880 int i; 3881 for (i = 0; list[i] != NULL; i++) { 3882 if (!strcmp(str, list[i])) { 3883 return true; 3884 } 3885 } 3886 } 3887 return false; 3888 } 3889 3890 /* 3891 * Check that every option set in @bs->options is also set in 3892 * @new_opts. 3893 * 3894 * Options listed in the common_options list and in 3895 * @bs->drv->mutable_opts are skipped. 3896 * 3897 * Return 0 on success, otherwise return -EINVAL and set @errp. 3898 */ 3899 static int bdrv_reset_options_allowed(BlockDriverState *bs, 3900 const QDict *new_opts, Error **errp) 3901 { 3902 const QDictEntry *e; 3903 /* These options are common to all block drivers and are handled 3904 * in bdrv_reopen_prepare() so they can be left out of @new_opts */ 3905 const char *const common_options[] = { 3906 "node-name", "discard", "cache.direct", "cache.no-flush", 3907 "read-only", "auto-read-only", "detect-zeroes", NULL 3908 }; 3909 3910 for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) { 3911 if (!qdict_haskey(new_opts, e->key) && 3912 !is_str_in_list(e->key, common_options) && 3913 !is_str_in_list(e->key, bs->drv->mutable_opts)) { 3914 error_setg(errp, "Option '%s' cannot be reset " 3915 "to its default value", e->key); 3916 return -EINVAL; 3917 } 3918 } 3919 3920 return 0; 3921 } 3922 3923 /* 3924 * Returns true if @child can be reached recursively from @bs 3925 */ 3926 static bool bdrv_recurse_has_child(BlockDriverState *bs, 3927 BlockDriverState *child) 3928 { 3929 BdrvChild *c; 3930 3931 if (bs == child) { 3932 return true; 3933 } 3934 3935 QLIST_FOREACH(c, &bs->children, next) { 3936 if (bdrv_recurse_has_child(c->bs, child)) { 3937 return true; 3938 } 3939 } 3940 3941 return false; 3942 } 3943 3944 /* 3945 * Adds a BlockDriverState to a simple queue for an atomic, transactional 3946 * reopen of multiple devices. 3947 * 3948 * bs_queue can either be an existing BlockReopenQueue that has had QTAILQ_INIT 3949 * already performed, or alternatively may be NULL a new BlockReopenQueue will 3950 * be created and initialized. This newly created BlockReopenQueue should be 3951 * passed back in for subsequent calls that are intended to be of the same 3952 * atomic 'set'. 3953 * 3954 * bs is the BlockDriverState to add to the reopen queue. 3955 * 3956 * options contains the changed options for the associated bs 3957 * (the BlockReopenQueue takes ownership) 3958 * 3959 * flags contains the open flags for the associated bs 3960 * 3961 * returns a pointer to bs_queue, which is either the newly allocated 3962 * bs_queue, or the existing bs_queue being used. 3963 * 3964 * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). 3965 */ 3966 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, 3967 BlockDriverState *bs, 3968 QDict *options, 3969 const BdrvChildClass *klass, 3970 BdrvChildRole role, 3971 bool parent_is_format, 3972 QDict *parent_options, 3973 int parent_flags, 3974 bool keep_old_opts) 3975 { 3976 assert(bs != NULL); 3977 3978 BlockReopenQueueEntry *bs_entry; 3979 BdrvChild *child; 3980 QDict *old_options, *explicit_options, *options_copy; 3981 int flags; 3982 QemuOpts *opts; 3983 3984 /* Make sure that the caller remembered to use a drained section. This is 3985 * important to avoid graph changes between the recursive queuing here and 3986 * bdrv_reopen_multiple(). */ 3987 assert(bs->quiesce_counter > 0); 3988 3989 if (bs_queue == NULL) { 3990 bs_queue = g_new0(BlockReopenQueue, 1); 3991 QTAILQ_INIT(bs_queue); 3992 } 3993 3994 if (!options) { 3995 options = qdict_new(); 3996 } 3997 3998 /* Check if this BlockDriverState is already in the queue */ 3999 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4000 if (bs == bs_entry->state.bs) { 4001 break; 4002 } 4003 } 4004 4005 /* 4006 * Precedence of options: 4007 * 1. Explicitly passed in options (highest) 4008 * 2. Retained from explicitly set options of bs 4009 * 3. Inherited from parent node 4010 * 4. Retained from effective options of bs 4011 */ 4012 4013 /* Old explicitly set values (don't overwrite by inherited value) */ 4014 if (bs_entry || keep_old_opts) { 4015 old_options = qdict_clone_shallow(bs_entry ? 4016 bs_entry->state.explicit_options : 4017 bs->explicit_options); 4018 bdrv_join_options(bs, options, old_options); 4019 qobject_unref(old_options); 4020 } 4021 4022 explicit_options = qdict_clone_shallow(options); 4023 4024 /* Inherit from parent node */ 4025 if (parent_options) { 4026 flags = 0; 4027 klass->inherit_options(role, parent_is_format, &flags, options, 4028 parent_flags, parent_options); 4029 } else { 4030 flags = bdrv_get_flags(bs); 4031 } 4032 4033 if (keep_old_opts) { 4034 /* Old values are used for options that aren't set yet */ 4035 old_options = qdict_clone_shallow(bs->options); 4036 bdrv_join_options(bs, options, old_options); 4037 qobject_unref(old_options); 4038 } 4039 4040 /* We have the final set of options so let's update the flags */ 4041 options_copy = qdict_clone_shallow(options); 4042 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 4043 qemu_opts_absorb_qdict(opts, options_copy, NULL); 4044 update_flags_from_options(&flags, opts); 4045 qemu_opts_del(opts); 4046 qobject_unref(options_copy); 4047 4048 /* bdrv_open_inherit() sets and clears some additional flags internally */ 4049 flags &= ~BDRV_O_PROTOCOL; 4050 if (flags & BDRV_O_RDWR) { 4051 flags |= BDRV_O_ALLOW_RDWR; 4052 } 4053 4054 if (!bs_entry) { 4055 bs_entry = g_new0(BlockReopenQueueEntry, 1); 4056 QTAILQ_INSERT_TAIL(bs_queue, bs_entry, entry); 4057 } else { 4058 qobject_unref(bs_entry->state.options); 4059 qobject_unref(bs_entry->state.explicit_options); 4060 } 4061 4062 bs_entry->state.bs = bs; 4063 bs_entry->state.options = options; 4064 bs_entry->state.explicit_options = explicit_options; 4065 bs_entry->state.flags = flags; 4066 4067 /* 4068 * If keep_old_opts is false then it means that unspecified 4069 * options must be reset to their original value. We don't allow 4070 * resetting 'backing' but we need to know if the option is 4071 * missing in order to decide if we have to return an error. 4072 */ 4073 if (!keep_old_opts) { 4074 bs_entry->state.backing_missing = 4075 !qdict_haskey(options, "backing") && 4076 !qdict_haskey(options, "backing.driver"); 4077 } 4078 4079 QLIST_FOREACH(child, &bs->children, next) { 4080 QDict *new_child_options = NULL; 4081 bool child_keep_old = keep_old_opts; 4082 4083 /* reopen can only change the options of block devices that were 4084 * implicitly created and inherited options. For other (referenced) 4085 * block devices, a syntax like "backing.foo" results in an error. */ 4086 if (child->bs->inherits_from != bs) { 4087 continue; 4088 } 4089 4090 /* Check if the options contain a child reference */ 4091 if (qdict_haskey(options, child->name)) { 4092 const char *childref = qdict_get_try_str(options, child->name); 4093 /* 4094 * The current child must not be reopened if the child 4095 * reference is null or points to a different node. 4096 */ 4097 if (g_strcmp0(childref, child->bs->node_name)) { 4098 continue; 4099 } 4100 /* 4101 * If the child reference points to the current child then 4102 * reopen it with its existing set of options (note that 4103 * it can still inherit new options from the parent). 4104 */ 4105 child_keep_old = true; 4106 } else { 4107 /* Extract child options ("child-name.*") */ 4108 char *child_key_dot = g_strdup_printf("%s.", child->name); 4109 qdict_extract_subqdict(explicit_options, NULL, child_key_dot); 4110 qdict_extract_subqdict(options, &new_child_options, child_key_dot); 4111 g_free(child_key_dot); 4112 } 4113 4114 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 4115 child->klass, child->role, bs->drv->is_format, 4116 options, flags, child_keep_old); 4117 } 4118 4119 return bs_queue; 4120 } 4121 4122 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 4123 BlockDriverState *bs, 4124 QDict *options, bool keep_old_opts) 4125 { 4126 return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false, 4127 NULL, 0, keep_old_opts); 4128 } 4129 4130 void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) 4131 { 4132 if (bs_queue) { 4133 BlockReopenQueueEntry *bs_entry, *next; 4134 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 4135 qobject_unref(bs_entry->state.explicit_options); 4136 qobject_unref(bs_entry->state.options); 4137 g_free(bs_entry); 4138 } 4139 g_free(bs_queue); 4140 } 4141 } 4142 4143 /* 4144 * Reopen multiple BlockDriverStates atomically & transactionally. 4145 * 4146 * The queue passed in (bs_queue) must have been built up previous 4147 * via bdrv_reopen_queue(). 4148 * 4149 * Reopens all BDS specified in the queue, with the appropriate 4150 * flags. All devices are prepared for reopen, and failure of any 4151 * device will cause all device changes to be abandoned, and intermediate 4152 * data cleaned up. 4153 * 4154 * If all devices prepare successfully, then the changes are committed 4155 * to all devices. 4156 * 4157 * All affected nodes must be drained between bdrv_reopen_queue() and 4158 * bdrv_reopen_multiple(). 4159 * 4160 * To be called from the main thread, with all other AioContexts unlocked. 4161 */ 4162 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 4163 { 4164 int ret = -1; 4165 BlockReopenQueueEntry *bs_entry, *next; 4166 AioContext *ctx; 4167 Transaction *tran = tran_new(); 4168 g_autoptr(GHashTable) found = NULL; 4169 g_autoptr(GSList) refresh_list = NULL; 4170 4171 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 4172 assert(bs_queue != NULL); 4173 4174 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4175 ctx = bdrv_get_aio_context(bs_entry->state.bs); 4176 aio_context_acquire(ctx); 4177 ret = bdrv_flush(bs_entry->state.bs); 4178 aio_context_release(ctx); 4179 if (ret < 0) { 4180 error_setg_errno(errp, -ret, "Error flushing drive"); 4181 goto abort; 4182 } 4183 } 4184 4185 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4186 assert(bs_entry->state.bs->quiesce_counter > 0); 4187 ctx = bdrv_get_aio_context(bs_entry->state.bs); 4188 aio_context_acquire(ctx); 4189 ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp); 4190 aio_context_release(ctx); 4191 if (ret < 0) { 4192 goto abort; 4193 } 4194 bs_entry->prepared = true; 4195 } 4196 4197 found = g_hash_table_new(NULL, NULL); 4198 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4199 BDRVReopenState *state = &bs_entry->state; 4200 4201 refresh_list = bdrv_topological_dfs(refresh_list, found, state->bs); 4202 if (state->old_backing_bs) { 4203 refresh_list = bdrv_topological_dfs(refresh_list, found, 4204 state->old_backing_bs); 4205 } 4206 if (state->old_file_bs) { 4207 refresh_list = bdrv_topological_dfs(refresh_list, found, 4208 state->old_file_bs); 4209 } 4210 } 4211 4212 /* 4213 * Note that file-posix driver rely on permission update done during reopen 4214 * (even if no permission changed), because it wants "new" permissions for 4215 * reconfiguring the fd and that's why it does it in raw_check_perm(), not 4216 * in raw_reopen_prepare() which is called with "old" permissions. 4217 */ 4218 ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp); 4219 if (ret < 0) { 4220 goto abort; 4221 } 4222 4223 /* 4224 * If we reach this point, we have success and just need to apply the 4225 * changes. 4226 * 4227 * Reverse order is used to comfort qcow2 driver: on commit it need to write 4228 * IN_USE flag to the image, to mark bitmaps in the image as invalid. But 4229 * children are usually goes after parents in reopen-queue, so go from last 4230 * to first element. 4231 */ 4232 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { 4233 ctx = bdrv_get_aio_context(bs_entry->state.bs); 4234 aio_context_acquire(ctx); 4235 bdrv_reopen_commit(&bs_entry->state); 4236 aio_context_release(ctx); 4237 } 4238 4239 tran_commit(tran); 4240 4241 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { 4242 BlockDriverState *bs = bs_entry->state.bs; 4243 4244 if (bs->drv->bdrv_reopen_commit_post) { 4245 ctx = bdrv_get_aio_context(bs); 4246 aio_context_acquire(ctx); 4247 bs->drv->bdrv_reopen_commit_post(&bs_entry->state); 4248 aio_context_release(ctx); 4249 } 4250 } 4251 4252 ret = 0; 4253 goto cleanup; 4254 4255 abort: 4256 tran_abort(tran); 4257 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 4258 if (bs_entry->prepared) { 4259 ctx = bdrv_get_aio_context(bs_entry->state.bs); 4260 aio_context_acquire(ctx); 4261 bdrv_reopen_abort(&bs_entry->state); 4262 aio_context_release(ctx); 4263 } 4264 } 4265 4266 cleanup: 4267 bdrv_reopen_queue_free(bs_queue); 4268 4269 return ret; 4270 } 4271 4272 int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, 4273 Error **errp) 4274 { 4275 AioContext *ctx = bdrv_get_aio_context(bs); 4276 BlockReopenQueue *queue; 4277 int ret; 4278 4279 bdrv_subtree_drained_begin(bs); 4280 if (ctx != qemu_get_aio_context()) { 4281 aio_context_release(ctx); 4282 } 4283 4284 queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); 4285 ret = bdrv_reopen_multiple(queue, errp); 4286 4287 if (ctx != qemu_get_aio_context()) { 4288 aio_context_acquire(ctx); 4289 } 4290 bdrv_subtree_drained_end(bs); 4291 4292 return ret; 4293 } 4294 4295 int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, 4296 Error **errp) 4297 { 4298 QDict *opts = qdict_new(); 4299 4300 qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only); 4301 4302 return bdrv_reopen(bs, opts, true, errp); 4303 } 4304 4305 /* 4306 * Take a BDRVReopenState and check if the value of 'backing' in the 4307 * reopen_state->options QDict is valid or not. 4308 * 4309 * If 'backing' is missing from the QDict then return 0. 4310 * 4311 * If 'backing' contains the node name of the backing file of 4312 * reopen_state->bs then return 0. 4313 * 4314 * If 'backing' contains a different node name (or is null) then check 4315 * whether the current backing file can be replaced with the new one. 4316 * If that's the case then reopen_state->replace_backing_bs is set to 4317 * true and reopen_state->new_backing_bs contains a pointer to the new 4318 * backing BlockDriverState (or NULL). 4319 * 4320 * Return 0 on success, otherwise return < 0 and set @errp. 4321 */ 4322 static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, 4323 bool is_backing, Transaction *tran, 4324 Error **errp) 4325 { 4326 BlockDriverState *bs = reopen_state->bs; 4327 BlockDriverState *new_child_bs; 4328 BlockDriverState *old_child_bs = is_backing ? child_bs(bs->backing) : 4329 child_bs(bs->file); 4330 const char *child_name = is_backing ? "backing" : "file"; 4331 QObject *value; 4332 const char *str; 4333 4334 value = qdict_get(reopen_state->options, child_name); 4335 if (value == NULL) { 4336 return 0; 4337 } 4338 4339 switch (qobject_type(value)) { 4340 case QTYPE_QNULL: 4341 assert(is_backing); /* The 'file' option does not allow a null value */ 4342 new_child_bs = NULL; 4343 break; 4344 case QTYPE_QSTRING: 4345 str = qstring_get_str(qobject_to(QString, value)); 4346 new_child_bs = bdrv_lookup_bs(NULL, str, errp); 4347 if (new_child_bs == NULL) { 4348 return -EINVAL; 4349 } else if (bdrv_recurse_has_child(new_child_bs, bs)) { 4350 error_setg(errp, "Making '%s' a %s child of '%s' would create a " 4351 "cycle", str, child_name, bs->node_name); 4352 return -EINVAL; 4353 } 4354 break; 4355 default: 4356 /* 4357 * The options QDict has been flattened, so 'backing' and 'file' 4358 * do not allow any other data type here. 4359 */ 4360 g_assert_not_reached(); 4361 } 4362 4363 if (old_child_bs == new_child_bs) { 4364 return 0; 4365 } 4366 4367 if (old_child_bs) { 4368 if (bdrv_skip_implicit_filters(old_child_bs) == new_child_bs) { 4369 return 0; 4370 } 4371 4372 if (old_child_bs->implicit) { 4373 error_setg(errp, "Cannot replace implicit %s child of %s", 4374 child_name, bs->node_name); 4375 return -EPERM; 4376 } 4377 } 4378 4379 if (bs->drv->is_filter && !old_child_bs) { 4380 /* 4381 * Filters always have a file or a backing child, so we are trying to 4382 * change wrong child 4383 */ 4384 error_setg(errp, "'%s' is a %s filter node that does not support a " 4385 "%s child", bs->node_name, bs->drv->format_name, child_name); 4386 return -EINVAL; 4387 } 4388 4389 if (is_backing) { 4390 reopen_state->old_backing_bs = old_child_bs; 4391 } else { 4392 reopen_state->old_file_bs = old_child_bs; 4393 } 4394 4395 return bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing, 4396 tran, errp); 4397 } 4398 4399 /* 4400 * Prepares a BlockDriverState for reopen. All changes are staged in the 4401 * 'opaque' field of the BDRVReopenState, which is used and allocated by 4402 * the block driver layer .bdrv_reopen_prepare() 4403 * 4404 * bs is the BlockDriverState to reopen 4405 * flags are the new open flags 4406 * queue is the reopen queue 4407 * 4408 * Returns 0 on success, non-zero on error. On error errp will be set 4409 * as well. 4410 * 4411 * On failure, bdrv_reopen_abort() will be called to clean up any data. 4412 * It is the responsibility of the caller to then call the abort() or 4413 * commit() for any other BDS that have been left in a prepare() state 4414 * 4415 */ 4416 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, 4417 BlockReopenQueue *queue, 4418 Transaction *change_child_tran, Error **errp) 4419 { 4420 int ret = -1; 4421 int old_flags; 4422 Error *local_err = NULL; 4423 BlockDriver *drv; 4424 QemuOpts *opts; 4425 QDict *orig_reopen_opts; 4426 char *discard = NULL; 4427 bool read_only; 4428 bool drv_prepared = false; 4429 4430 assert(reopen_state != NULL); 4431 assert(reopen_state->bs->drv != NULL); 4432 drv = reopen_state->bs->drv; 4433 4434 /* This function and each driver's bdrv_reopen_prepare() remove 4435 * entries from reopen_state->options as they are processed, so 4436 * we need to make a copy of the original QDict. */ 4437 orig_reopen_opts = qdict_clone_shallow(reopen_state->options); 4438 4439 /* Process generic block layer options */ 4440 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 4441 if (!qemu_opts_absorb_qdict(opts, reopen_state->options, errp)) { 4442 ret = -EINVAL; 4443 goto error; 4444 } 4445 4446 /* This was already called in bdrv_reopen_queue_child() so the flags 4447 * are up-to-date. This time we simply want to remove the options from 4448 * QemuOpts in order to indicate that they have been processed. */ 4449 old_flags = reopen_state->flags; 4450 update_flags_from_options(&reopen_state->flags, opts); 4451 assert(old_flags == reopen_state->flags); 4452 4453 discard = qemu_opt_get_del(opts, BDRV_OPT_DISCARD); 4454 if (discard != NULL) { 4455 if (bdrv_parse_discard_flags(discard, &reopen_state->flags) != 0) { 4456 error_setg(errp, "Invalid discard option"); 4457 ret = -EINVAL; 4458 goto error; 4459 } 4460 } 4461 4462 reopen_state->detect_zeroes = 4463 bdrv_parse_detect_zeroes(opts, reopen_state->flags, &local_err); 4464 if (local_err) { 4465 error_propagate(errp, local_err); 4466 ret = -EINVAL; 4467 goto error; 4468 } 4469 4470 /* All other options (including node-name and driver) must be unchanged. 4471 * Put them back into the QDict, so that they are checked at the end 4472 * of this function. */ 4473 qemu_opts_to_qdict(opts, reopen_state->options); 4474 4475 /* If we are to stay read-only, do not allow permission change 4476 * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is 4477 * not set, or if the BDS still has copy_on_read enabled */ 4478 read_only = !(reopen_state->flags & BDRV_O_RDWR); 4479 ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err); 4480 if (local_err) { 4481 error_propagate(errp, local_err); 4482 goto error; 4483 } 4484 4485 if (drv->bdrv_reopen_prepare) { 4486 /* 4487 * If a driver-specific option is missing, it means that we 4488 * should reset it to its default value. 4489 * But not all options allow that, so we need to check it first. 4490 */ 4491 ret = bdrv_reset_options_allowed(reopen_state->bs, 4492 reopen_state->options, errp); 4493 if (ret) { 4494 goto error; 4495 } 4496 4497 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 4498 if (ret) { 4499 if (local_err != NULL) { 4500 error_propagate(errp, local_err); 4501 } else { 4502 bdrv_refresh_filename(reopen_state->bs); 4503 error_setg(errp, "failed while preparing to reopen image '%s'", 4504 reopen_state->bs->filename); 4505 } 4506 goto error; 4507 } 4508 } else { 4509 /* It is currently mandatory to have a bdrv_reopen_prepare() 4510 * handler for each supported drv. */ 4511 error_setg(errp, "Block format '%s' used by node '%s' " 4512 "does not support reopening files", drv->format_name, 4513 bdrv_get_device_or_node_name(reopen_state->bs)); 4514 ret = -1; 4515 goto error; 4516 } 4517 4518 drv_prepared = true; 4519 4520 /* 4521 * We must provide the 'backing' option if the BDS has a backing 4522 * file or if the image file has a backing file name as part of 4523 * its metadata. Otherwise the 'backing' option can be omitted. 4524 */ 4525 if (drv->supports_backing && reopen_state->backing_missing && 4526 (reopen_state->bs->backing || reopen_state->bs->backing_file[0])) { 4527 error_setg(errp, "backing is missing for '%s'", 4528 reopen_state->bs->node_name); 4529 ret = -EINVAL; 4530 goto error; 4531 } 4532 4533 /* 4534 * Allow changing the 'backing' option. The new value can be 4535 * either a reference to an existing node (using its node name) 4536 * or NULL to simply detach the current backing file. 4537 */ 4538 ret = bdrv_reopen_parse_file_or_backing(reopen_state, true, 4539 change_child_tran, errp); 4540 if (ret < 0) { 4541 goto error; 4542 } 4543 qdict_del(reopen_state->options, "backing"); 4544 4545 /* Allow changing the 'file' option. In this case NULL is not allowed */ 4546 ret = bdrv_reopen_parse_file_or_backing(reopen_state, false, 4547 change_child_tran, errp); 4548 if (ret < 0) { 4549 goto error; 4550 } 4551 qdict_del(reopen_state->options, "file"); 4552 4553 /* Options that are not handled are only okay if they are unchanged 4554 * compared to the old state. It is expected that some options are only 4555 * used for the initial open, but not reopen (e.g. filename) */ 4556 if (qdict_size(reopen_state->options)) { 4557 const QDictEntry *entry = qdict_first(reopen_state->options); 4558 4559 do { 4560 QObject *new = entry->value; 4561 QObject *old = qdict_get(reopen_state->bs->options, entry->key); 4562 4563 /* Allow child references (child_name=node_name) as long as they 4564 * point to the current child (i.e. everything stays the same). */ 4565 if (qobject_type(new) == QTYPE_QSTRING) { 4566 BdrvChild *child; 4567 QLIST_FOREACH(child, &reopen_state->bs->children, next) { 4568 if (!strcmp(child->name, entry->key)) { 4569 break; 4570 } 4571 } 4572 4573 if (child) { 4574 if (!strcmp(child->bs->node_name, 4575 qstring_get_str(qobject_to(QString, new)))) { 4576 continue; /* Found child with this name, skip option */ 4577 } 4578 } 4579 } 4580 4581 /* 4582 * TODO: When using -drive to specify blockdev options, all values 4583 * will be strings; however, when using -blockdev, blockdev-add or 4584 * filenames using the json:{} pseudo-protocol, they will be 4585 * correctly typed. 4586 * In contrast, reopening options are (currently) always strings 4587 * (because you can only specify them through qemu-io; all other 4588 * callers do not specify any options). 4589 * Therefore, when using anything other than -drive to create a BDS, 4590 * this cannot detect non-string options as unchanged, because 4591 * qobject_is_equal() always returns false for objects of different 4592 * type. In the future, this should be remedied by correctly typing 4593 * all options. For now, this is not too big of an issue because 4594 * the user can simply omit options which cannot be changed anyway, 4595 * so they will stay unchanged. 4596 */ 4597 if (!qobject_is_equal(new, old)) { 4598 error_setg(errp, "Cannot change the option '%s'", entry->key); 4599 ret = -EINVAL; 4600 goto error; 4601 } 4602 } while ((entry = qdict_next(reopen_state->options, entry))); 4603 } 4604 4605 ret = 0; 4606 4607 /* Restore the original reopen_state->options QDict */ 4608 qobject_unref(reopen_state->options); 4609 reopen_state->options = qobject_ref(orig_reopen_opts); 4610 4611 error: 4612 if (ret < 0 && drv_prepared) { 4613 /* drv->bdrv_reopen_prepare() has succeeded, so we need to 4614 * call drv->bdrv_reopen_abort() before signaling an error 4615 * (bdrv_reopen_multiple() will not call bdrv_reopen_abort() 4616 * when the respective bdrv_reopen_prepare() has failed) */ 4617 if (drv->bdrv_reopen_abort) { 4618 drv->bdrv_reopen_abort(reopen_state); 4619 } 4620 } 4621 qemu_opts_del(opts); 4622 qobject_unref(orig_reopen_opts); 4623 g_free(discard); 4624 return ret; 4625 } 4626 4627 /* 4628 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 4629 * makes them final by swapping the staging BlockDriverState contents into 4630 * the active BlockDriverState contents. 4631 */ 4632 static void bdrv_reopen_commit(BDRVReopenState *reopen_state) 4633 { 4634 BlockDriver *drv; 4635 BlockDriverState *bs; 4636 BdrvChild *child; 4637 4638 assert(reopen_state != NULL); 4639 bs = reopen_state->bs; 4640 drv = bs->drv; 4641 assert(drv != NULL); 4642 4643 /* If there are any driver level actions to take */ 4644 if (drv->bdrv_reopen_commit) { 4645 drv->bdrv_reopen_commit(reopen_state); 4646 } 4647 4648 /* set BDS specific flags now */ 4649 qobject_unref(bs->explicit_options); 4650 qobject_unref(bs->options); 4651 qobject_ref(reopen_state->explicit_options); 4652 qobject_ref(reopen_state->options); 4653 4654 bs->explicit_options = reopen_state->explicit_options; 4655 bs->options = reopen_state->options; 4656 bs->open_flags = reopen_state->flags; 4657 bs->detect_zeroes = reopen_state->detect_zeroes; 4658 4659 /* Remove child references from bs->options and bs->explicit_options. 4660 * Child options were already removed in bdrv_reopen_queue_child() */ 4661 QLIST_FOREACH(child, &bs->children, next) { 4662 qdict_del(bs->explicit_options, child->name); 4663 qdict_del(bs->options, child->name); 4664 } 4665 /* backing is probably removed, so it's not handled by previous loop */ 4666 qdict_del(bs->explicit_options, "backing"); 4667 qdict_del(bs->options, "backing"); 4668 4669 bdrv_refresh_limits(bs, NULL, NULL); 4670 } 4671 4672 /* 4673 * Abort the reopen, and delete and free the staged changes in 4674 * reopen_state 4675 */ 4676 static void bdrv_reopen_abort(BDRVReopenState *reopen_state) 4677 { 4678 BlockDriver *drv; 4679 4680 assert(reopen_state != NULL); 4681 drv = reopen_state->bs->drv; 4682 assert(drv != NULL); 4683 4684 if (drv->bdrv_reopen_abort) { 4685 drv->bdrv_reopen_abort(reopen_state); 4686 } 4687 } 4688 4689 4690 static void bdrv_close(BlockDriverState *bs) 4691 { 4692 BdrvAioNotifier *ban, *ban_next; 4693 BdrvChild *child, *next; 4694 4695 assert(!bs->refcnt); 4696 4697 bdrv_drained_begin(bs); /* complete I/O */ 4698 bdrv_flush(bs); 4699 bdrv_drain(bs); /* in case flush left pending I/O */ 4700 4701 if (bs->drv) { 4702 if (bs->drv->bdrv_close) { 4703 /* Must unfreeze all children, so bdrv_unref_child() works */ 4704 bs->drv->bdrv_close(bs); 4705 } 4706 bs->drv = NULL; 4707 } 4708 4709 QLIST_FOREACH_SAFE(child, &bs->children, next, next) { 4710 bdrv_unref_child(bs, child); 4711 } 4712 4713 bs->backing = NULL; 4714 bs->file = NULL; 4715 g_free(bs->opaque); 4716 bs->opaque = NULL; 4717 qatomic_set(&bs->copy_on_read, 0); 4718 bs->backing_file[0] = '\0'; 4719 bs->backing_format[0] = '\0'; 4720 bs->total_sectors = 0; 4721 bs->encrypted = false; 4722 bs->sg = false; 4723 qobject_unref(bs->options); 4724 qobject_unref(bs->explicit_options); 4725 bs->options = NULL; 4726 bs->explicit_options = NULL; 4727 qobject_unref(bs->full_open_options); 4728 bs->full_open_options = NULL; 4729 g_free(bs->block_status_cache); 4730 bs->block_status_cache = NULL; 4731 4732 bdrv_release_named_dirty_bitmaps(bs); 4733 assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 4734 4735 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 4736 g_free(ban); 4737 } 4738 QLIST_INIT(&bs->aio_notifiers); 4739 bdrv_drained_end(bs); 4740 4741 /* 4742 * If we're still inside some bdrv_drain_all_begin()/end() sections, end 4743 * them now since this BDS won't exist anymore when bdrv_drain_all_end() 4744 * gets called. 4745 */ 4746 if (bs->quiesce_counter) { 4747 bdrv_drain_all_end_quiesce(bs); 4748 } 4749 } 4750 4751 void bdrv_close_all(void) 4752 { 4753 assert(job_next(NULL) == NULL); 4754 4755 /* Drop references from requests still in flight, such as canceled block 4756 * jobs whose AIO context has not been polled yet */ 4757 bdrv_drain_all(); 4758 4759 blk_remove_all_bs(); 4760 blockdev_close_all_bdrv_states(); 4761 4762 assert(QTAILQ_EMPTY(&all_bdrv_states)); 4763 } 4764 4765 static bool should_update_child(BdrvChild *c, BlockDriverState *to) 4766 { 4767 GQueue *queue; 4768 GHashTable *found; 4769 bool ret; 4770 4771 if (c->klass->stay_at_node) { 4772 return false; 4773 } 4774 4775 /* If the child @c belongs to the BDS @to, replacing the current 4776 * c->bs by @to would mean to create a loop. 4777 * 4778 * Such a case occurs when appending a BDS to a backing chain. 4779 * For instance, imagine the following chain: 4780 * 4781 * guest device -> node A -> further backing chain... 4782 * 4783 * Now we create a new BDS B which we want to put on top of this 4784 * chain, so we first attach A as its backing node: 4785 * 4786 * node B 4787 * | 4788 * v 4789 * guest device -> node A -> further backing chain... 4790 * 4791 * Finally we want to replace A by B. When doing that, we want to 4792 * replace all pointers to A by pointers to B -- except for the 4793 * pointer from B because (1) that would create a loop, and (2) 4794 * that pointer should simply stay intact: 4795 * 4796 * guest device -> node B 4797 * | 4798 * v 4799 * node A -> further backing chain... 4800 * 4801 * In general, when replacing a node A (c->bs) by a node B (@to), 4802 * if A is a child of B, that means we cannot replace A by B there 4803 * because that would create a loop. Silently detaching A from B 4804 * is also not really an option. So overall just leaving A in 4805 * place there is the most sensible choice. 4806 * 4807 * We would also create a loop in any cases where @c is only 4808 * indirectly referenced by @to. Prevent this by returning false 4809 * if @c is found (by breadth-first search) anywhere in the whole 4810 * subtree of @to. 4811 */ 4812 4813 ret = true; 4814 found = g_hash_table_new(NULL, NULL); 4815 g_hash_table_add(found, to); 4816 queue = g_queue_new(); 4817 g_queue_push_tail(queue, to); 4818 4819 while (!g_queue_is_empty(queue)) { 4820 BlockDriverState *v = g_queue_pop_head(queue); 4821 BdrvChild *c2; 4822 4823 QLIST_FOREACH(c2, &v->children, next) { 4824 if (c2 == c) { 4825 ret = false; 4826 break; 4827 } 4828 4829 if (g_hash_table_contains(found, c2->bs)) { 4830 continue; 4831 } 4832 4833 g_queue_push_tail(queue, c2->bs); 4834 g_hash_table_add(found, c2->bs); 4835 } 4836 } 4837 4838 g_queue_free(queue); 4839 g_hash_table_destroy(found); 4840 4841 return ret; 4842 } 4843 4844 typedef struct BdrvRemoveFilterOrCowChild { 4845 BdrvChild *child; 4846 bool is_backing; 4847 } BdrvRemoveFilterOrCowChild; 4848 4849 static void bdrv_remove_filter_or_cow_child_abort(void *opaque) 4850 { 4851 BdrvRemoveFilterOrCowChild *s = opaque; 4852 BlockDriverState *parent_bs = s->child->opaque; 4853 4854 QLIST_INSERT_HEAD(&parent_bs->children, s->child, next); 4855 if (s->is_backing) { 4856 parent_bs->backing = s->child; 4857 } else { 4858 parent_bs->file = s->child; 4859 } 4860 4861 /* 4862 * We don't have to restore child->bs here to undo bdrv_replace_child_tran() 4863 * because that function is transactionable and it registered own completion 4864 * entries in @tran, so .abort() for bdrv_replace_child_safe() will be 4865 * called automatically. 4866 */ 4867 } 4868 4869 static void bdrv_remove_filter_or_cow_child_commit(void *opaque) 4870 { 4871 BdrvRemoveFilterOrCowChild *s = opaque; 4872 4873 bdrv_child_free(s->child); 4874 } 4875 4876 static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = { 4877 .abort = bdrv_remove_filter_or_cow_child_abort, 4878 .commit = bdrv_remove_filter_or_cow_child_commit, 4879 .clean = g_free, 4880 }; 4881 4882 /* 4883 * A function to remove backing or file child of @bs. 4884 * Function doesn't update permissions, caller is responsible for this. 4885 */ 4886 static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, 4887 BdrvChild *child, 4888 Transaction *tran) 4889 { 4890 BdrvRemoveFilterOrCowChild *s; 4891 4892 assert(child == bs->backing || child == bs->file); 4893 4894 if (!child) { 4895 return; 4896 } 4897 4898 if (child->bs) { 4899 bdrv_replace_child_tran(child, NULL, tran); 4900 } 4901 4902 s = g_new(BdrvRemoveFilterOrCowChild, 1); 4903 *s = (BdrvRemoveFilterOrCowChild) { 4904 .child = child, 4905 .is_backing = (child == bs->backing), 4906 }; 4907 tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); 4908 4909 QLIST_SAFE_REMOVE(child, next); 4910 if (s->is_backing) { 4911 bs->backing = NULL; 4912 } else { 4913 bs->file = NULL; 4914 } 4915 } 4916 4917 /* 4918 * A function to remove backing-chain child of @bs if exists: cow child for 4919 * format nodes (always .backing) and filter child for filters (may be .file or 4920 * .backing) 4921 */ 4922 static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, 4923 Transaction *tran) 4924 { 4925 bdrv_remove_file_or_backing_child(bs, bdrv_filter_or_cow_child(bs), tran); 4926 } 4927 4928 static int bdrv_replace_node_noperm(BlockDriverState *from, 4929 BlockDriverState *to, 4930 bool auto_skip, Transaction *tran, 4931 Error **errp) 4932 { 4933 BdrvChild *c, *next; 4934 4935 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { 4936 assert(c->bs == from); 4937 if (!should_update_child(c, to)) { 4938 if (auto_skip) { 4939 continue; 4940 } 4941 error_setg(errp, "Should not change '%s' link to '%s'", 4942 c->name, from->node_name); 4943 return -EINVAL; 4944 } 4945 if (c->frozen) { 4946 error_setg(errp, "Cannot change '%s' link to '%s'", 4947 c->name, from->node_name); 4948 return -EPERM; 4949 } 4950 bdrv_replace_child_tran(c, to, tran); 4951 } 4952 4953 return 0; 4954 } 4955 4956 /* 4957 * With auto_skip=true bdrv_replace_node_common skips updating from parents 4958 * if it creates a parent-child relation loop or if parent is block-job. 4959 * 4960 * With auto_skip=false the error is returned if from has a parent which should 4961 * not be updated. 4962 * 4963 * With @detach_subchain=true @to must be in a backing chain of @from. In this 4964 * case backing link of the cow-parent of @to is removed. 4965 */ 4966 static int bdrv_replace_node_common(BlockDriverState *from, 4967 BlockDriverState *to, 4968 bool auto_skip, bool detach_subchain, 4969 Error **errp) 4970 { 4971 Transaction *tran = tran_new(); 4972 g_autoptr(GHashTable) found = NULL; 4973 g_autoptr(GSList) refresh_list = NULL; 4974 BlockDriverState *to_cow_parent = NULL; 4975 int ret; 4976 4977 if (detach_subchain) { 4978 assert(bdrv_chain_contains(from, to)); 4979 assert(from != to); 4980 for (to_cow_parent = from; 4981 bdrv_filter_or_cow_bs(to_cow_parent) != to; 4982 to_cow_parent = bdrv_filter_or_cow_bs(to_cow_parent)) 4983 { 4984 ; 4985 } 4986 } 4987 4988 /* Make sure that @from doesn't go away until we have successfully attached 4989 * all of its parents to @to. */ 4990 bdrv_ref(from); 4991 4992 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 4993 assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); 4994 bdrv_drained_begin(from); 4995 4996 /* 4997 * Do the replacement without permission update. 4998 * Replacement may influence the permissions, we should calculate new 4999 * permissions based on new graph. If we fail, we'll roll-back the 5000 * replacement. 5001 */ 5002 ret = bdrv_replace_node_noperm(from, to, auto_skip, tran, errp); 5003 if (ret < 0) { 5004 goto out; 5005 } 5006 5007 if (detach_subchain) { 5008 bdrv_remove_filter_or_cow_child(to_cow_parent, tran); 5009 } 5010 5011 found = g_hash_table_new(NULL, NULL); 5012 5013 refresh_list = bdrv_topological_dfs(refresh_list, found, to); 5014 refresh_list = bdrv_topological_dfs(refresh_list, found, from); 5015 5016 ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); 5017 if (ret < 0) { 5018 goto out; 5019 } 5020 5021 ret = 0; 5022 5023 out: 5024 tran_finalize(tran, ret); 5025 5026 bdrv_drained_end(from); 5027 bdrv_unref(from); 5028 5029 return ret; 5030 } 5031 5032 int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, 5033 Error **errp) 5034 { 5035 return bdrv_replace_node_common(from, to, true, false, errp); 5036 } 5037 5038 int bdrv_drop_filter(BlockDriverState *bs, Error **errp) 5039 { 5040 return bdrv_replace_node_common(bs, bdrv_filter_or_cow_bs(bs), true, true, 5041 errp); 5042 } 5043 5044 /* 5045 * Add new bs contents at the top of an image chain while the chain is 5046 * live, while keeping required fields on the top layer. 5047 * 5048 * This will modify the BlockDriverState fields, and swap contents 5049 * between bs_new and bs_top. Both bs_new and bs_top are modified. 5050 * 5051 * bs_new must not be attached to a BlockBackend and must not have backing 5052 * child. 5053 * 5054 * This function does not create any image files. 5055 */ 5056 int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, 5057 Error **errp) 5058 { 5059 int ret; 5060 Transaction *tran = tran_new(); 5061 5062 assert(!bs_new->backing); 5063 5064 ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing", 5065 &child_of_bds, bdrv_backing_role(bs_new), 5066 &bs_new->backing, tran, errp); 5067 if (ret < 0) { 5068 goto out; 5069 } 5070 5071 ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); 5072 if (ret < 0) { 5073 goto out; 5074 } 5075 5076 ret = bdrv_refresh_perms(bs_new, errp); 5077 out: 5078 tran_finalize(tran, ret); 5079 5080 bdrv_refresh_limits(bs_top, NULL, NULL); 5081 5082 return ret; 5083 } 5084 5085 /* Not for empty child */ 5086 int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, 5087 Error **errp) 5088 { 5089 int ret; 5090 Transaction *tran = tran_new(); 5091 g_autoptr(GHashTable) found = NULL; 5092 g_autoptr(GSList) refresh_list = NULL; 5093 BlockDriverState *old_bs = child->bs; 5094 5095 bdrv_ref(old_bs); 5096 bdrv_drained_begin(old_bs); 5097 bdrv_drained_begin(new_bs); 5098 5099 bdrv_replace_child_tran(child, new_bs, tran); 5100 5101 found = g_hash_table_new(NULL, NULL); 5102 refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); 5103 refresh_list = bdrv_topological_dfs(refresh_list, found, new_bs); 5104 5105 ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); 5106 5107 tran_finalize(tran, ret); 5108 5109 bdrv_drained_end(old_bs); 5110 bdrv_drained_end(new_bs); 5111 bdrv_unref(old_bs); 5112 5113 return ret; 5114 } 5115 5116 static void bdrv_delete(BlockDriverState *bs) 5117 { 5118 assert(bdrv_op_blocker_is_empty(bs)); 5119 assert(!bs->refcnt); 5120 5121 /* remove from list, if necessary */ 5122 if (bs->node_name[0] != '\0') { 5123 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 5124 } 5125 QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list); 5126 5127 bdrv_close(bs); 5128 5129 g_free(bs); 5130 } 5131 5132 5133 /* 5134 * Replace @bs by newly created block node. 5135 * 5136 * @options is a QDict of options to pass to the block drivers, or NULL for an 5137 * empty set of options. The reference to the QDict belongs to the block layer 5138 * after the call (even on failure), so if the caller intends to reuse the 5139 * dictionary, it needs to use qobject_ref() before calling bdrv_open. 5140 */ 5141 BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, 5142 int flags, Error **errp) 5143 { 5144 ERRP_GUARD(); 5145 int ret; 5146 BlockDriverState *new_node_bs = NULL; 5147 const char *drvname, *node_name; 5148 BlockDriver *drv; 5149 5150 drvname = qdict_get_try_str(options, "driver"); 5151 if (!drvname) { 5152 error_setg(errp, "driver is not specified"); 5153 goto fail; 5154 } 5155 5156 drv = bdrv_find_format(drvname); 5157 if (!drv) { 5158 error_setg(errp, "Unknown driver: '%s'", drvname); 5159 goto fail; 5160 } 5161 5162 node_name = qdict_get_try_str(options, "node-name"); 5163 5164 new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags, 5165 errp); 5166 options = NULL; /* bdrv_new_open_driver() eats options */ 5167 if (!new_node_bs) { 5168 error_prepend(errp, "Could not create node: "); 5169 goto fail; 5170 } 5171 5172 bdrv_drained_begin(bs); 5173 ret = bdrv_replace_node(bs, new_node_bs, errp); 5174 bdrv_drained_end(bs); 5175 5176 if (ret < 0) { 5177 error_prepend(errp, "Could not replace node: "); 5178 goto fail; 5179 } 5180 5181 return new_node_bs; 5182 5183 fail: 5184 qobject_unref(options); 5185 bdrv_unref(new_node_bs); 5186 return NULL; 5187 } 5188 5189 /* 5190 * Run consistency checks on an image 5191 * 5192 * Returns 0 if the check could be completed (it doesn't mean that the image is 5193 * free of errors) or -errno when an internal error occurred. The results of the 5194 * check are stored in res. 5195 */ 5196 int coroutine_fn bdrv_co_check(BlockDriverState *bs, 5197 BdrvCheckResult *res, BdrvCheckMode fix) 5198 { 5199 if (bs->drv == NULL) { 5200 return -ENOMEDIUM; 5201 } 5202 if (bs->drv->bdrv_co_check == NULL) { 5203 return -ENOTSUP; 5204 } 5205 5206 memset(res, 0, sizeof(*res)); 5207 return bs->drv->bdrv_co_check(bs, res, fix); 5208 } 5209 5210 /* 5211 * Return values: 5212 * 0 - success 5213 * -EINVAL - backing format specified, but no file 5214 * -ENOSPC - can't update the backing file because no space is left in the 5215 * image file header 5216 * -ENOTSUP - format driver doesn't support changing the backing file 5217 */ 5218 int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, 5219 const char *backing_fmt, bool require) 5220 { 5221 BlockDriver *drv = bs->drv; 5222 int ret; 5223 5224 if (!drv) { 5225 return -ENOMEDIUM; 5226 } 5227 5228 /* Backing file format doesn't make sense without a backing file */ 5229 if (backing_fmt && !backing_file) { 5230 return -EINVAL; 5231 } 5232 5233 if (require && backing_file && !backing_fmt) { 5234 return -EINVAL; 5235 } 5236 5237 if (drv->bdrv_change_backing_file != NULL) { 5238 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 5239 } else { 5240 ret = -ENOTSUP; 5241 } 5242 5243 if (ret == 0) { 5244 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 5245 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 5246 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), 5247 backing_file ?: ""); 5248 } 5249 return ret; 5250 } 5251 5252 /* 5253 * Finds the first non-filter node above bs in the chain between 5254 * active and bs. The returned node is either an immediate parent of 5255 * bs, or there are only filter nodes between the two. 5256 * 5257 * Returns NULL if bs is not found in active's image chain, 5258 * or if active == bs. 5259 * 5260 * Returns the bottommost base image if bs == NULL. 5261 */ 5262 BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 5263 BlockDriverState *bs) 5264 { 5265 bs = bdrv_skip_filters(bs); 5266 active = bdrv_skip_filters(active); 5267 5268 while (active) { 5269 BlockDriverState *next = bdrv_backing_chain_next(active); 5270 if (bs == next) { 5271 return active; 5272 } 5273 active = next; 5274 } 5275 5276 return NULL; 5277 } 5278 5279 /* Given a BDS, searches for the base layer. */ 5280 BlockDriverState *bdrv_find_base(BlockDriverState *bs) 5281 { 5282 return bdrv_find_overlay(bs, NULL); 5283 } 5284 5285 /* 5286 * Return true if at least one of the COW (backing) and filter links 5287 * between @bs and @base is frozen. @errp is set if that's the case. 5288 * @base must be reachable from @bs, or NULL. 5289 */ 5290 bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, 5291 Error **errp) 5292 { 5293 BlockDriverState *i; 5294 BdrvChild *child; 5295 5296 for (i = bs; i != base; i = child_bs(child)) { 5297 child = bdrv_filter_or_cow_child(i); 5298 5299 if (child && child->frozen) { 5300 error_setg(errp, "Cannot change '%s' link from '%s' to '%s'", 5301 child->name, i->node_name, child->bs->node_name); 5302 return true; 5303 } 5304 } 5305 5306 return false; 5307 } 5308 5309 /* 5310 * Freeze all COW (backing) and filter links between @bs and @base. 5311 * If any of the links is already frozen the operation is aborted and 5312 * none of the links are modified. 5313 * @base must be reachable from @bs, or NULL. 5314 * Returns 0 on success. On failure returns < 0 and sets @errp. 5315 */ 5316 int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, 5317 Error **errp) 5318 { 5319 BlockDriverState *i; 5320 BdrvChild *child; 5321 5322 if (bdrv_is_backing_chain_frozen(bs, base, errp)) { 5323 return -EPERM; 5324 } 5325 5326 for (i = bs; i != base; i = child_bs(child)) { 5327 child = bdrv_filter_or_cow_child(i); 5328 if (child && child->bs->never_freeze) { 5329 error_setg(errp, "Cannot freeze '%s' link to '%s'", 5330 child->name, child->bs->node_name); 5331 return -EPERM; 5332 } 5333 } 5334 5335 for (i = bs; i != base; i = child_bs(child)) { 5336 child = bdrv_filter_or_cow_child(i); 5337 if (child) { 5338 child->frozen = true; 5339 } 5340 } 5341 5342 return 0; 5343 } 5344 5345 /* 5346 * Unfreeze all COW (backing) and filter links between @bs and @base. 5347 * The caller must ensure that all links are frozen before using this 5348 * function. 5349 * @base must be reachable from @bs, or NULL. 5350 */ 5351 void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base) 5352 { 5353 BlockDriverState *i; 5354 BdrvChild *child; 5355 5356 for (i = bs; i != base; i = child_bs(child)) { 5357 child = bdrv_filter_or_cow_child(i); 5358 if (child) { 5359 assert(child->frozen); 5360 child->frozen = false; 5361 } 5362 } 5363 } 5364 5365 /* 5366 * Drops images above 'base' up to and including 'top', and sets the image 5367 * above 'top' to have base as its backing file. 5368 * 5369 * Requires that the overlay to 'top' is opened r/w, so that the backing file 5370 * information in 'bs' can be properly updated. 5371 * 5372 * E.g., this will convert the following chain: 5373 * bottom <- base <- intermediate <- top <- active 5374 * 5375 * to 5376 * 5377 * bottom <- base <- active 5378 * 5379 * It is allowed for bottom==base, in which case it converts: 5380 * 5381 * base <- intermediate <- top <- active 5382 * 5383 * to 5384 * 5385 * base <- active 5386 * 5387 * If backing_file_str is non-NULL, it will be used when modifying top's 5388 * overlay image metadata. 5389 * 5390 * Error conditions: 5391 * if active == top, that is considered an error 5392 * 5393 */ 5394 int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, 5395 const char *backing_file_str) 5396 { 5397 BlockDriverState *explicit_top = top; 5398 bool update_inherits_from; 5399 BdrvChild *c; 5400 Error *local_err = NULL; 5401 int ret = -EIO; 5402 g_autoptr(GSList) updated_children = NULL; 5403 GSList *p; 5404 5405 bdrv_ref(top); 5406 bdrv_subtree_drained_begin(top); 5407 5408 if (!top->drv || !base->drv) { 5409 goto exit; 5410 } 5411 5412 /* Make sure that base is in the backing chain of top */ 5413 if (!bdrv_chain_contains(top, base)) { 5414 goto exit; 5415 } 5416 5417 /* If 'base' recursively inherits from 'top' then we should set 5418 * base->inherits_from to top->inherits_from after 'top' and all 5419 * other intermediate nodes have been dropped. 5420 * If 'top' is an implicit node (e.g. "commit_top") we should skip 5421 * it because no one inherits from it. We use explicit_top for that. */ 5422 explicit_top = bdrv_skip_implicit_filters(explicit_top); 5423 update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top); 5424 5425 /* success - we can delete the intermediate states, and link top->base */ 5426 /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once 5427 * we've figured out how they should work. */ 5428 if (!backing_file_str) { 5429 bdrv_refresh_filename(base); 5430 backing_file_str = base->filename; 5431 } 5432 5433 QLIST_FOREACH(c, &top->parents, next_parent) { 5434 updated_children = g_slist_prepend(updated_children, c); 5435 } 5436 5437 /* 5438 * It seems correct to pass detach_subchain=true here, but it triggers 5439 * one more yet not fixed bug, when due to nested aio_poll loop we switch to 5440 * another drained section, which modify the graph (for example, removing 5441 * the child, which we keep in updated_children list). So, it's a TODO. 5442 * 5443 * Note, bug triggered if pass detach_subchain=true here and run 5444 * test-bdrv-drain. test_drop_intermediate_poll() test-case will crash. 5445 * That's a FIXME. 5446 */ 5447 bdrv_replace_node_common(top, base, false, false, &local_err); 5448 if (local_err) { 5449 error_report_err(local_err); 5450 goto exit; 5451 } 5452 5453 for (p = updated_children; p; p = p->next) { 5454 c = p->data; 5455 5456 if (c->klass->update_filename) { 5457 ret = c->klass->update_filename(c, base, backing_file_str, 5458 &local_err); 5459 if (ret < 0) { 5460 /* 5461 * TODO: Actually, we want to rollback all previous iterations 5462 * of this loop, and (which is almost impossible) previous 5463 * bdrv_replace_node()... 5464 * 5465 * Note, that c->klass->update_filename may lead to permission 5466 * update, so it's a bad idea to call it inside permission 5467 * update transaction of bdrv_replace_node. 5468 */ 5469 error_report_err(local_err); 5470 goto exit; 5471 } 5472 } 5473 } 5474 5475 if (update_inherits_from) { 5476 base->inherits_from = explicit_top->inherits_from; 5477 } 5478 5479 ret = 0; 5480 exit: 5481 bdrv_subtree_drained_end(top); 5482 bdrv_unref(top); 5483 return ret; 5484 } 5485 5486 /** 5487 * Implementation of BlockDriver.bdrv_get_allocated_file_size() that 5488 * sums the size of all data-bearing children. (This excludes backing 5489 * children.) 5490 */ 5491 static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs) 5492 { 5493 BdrvChild *child; 5494 int64_t child_size, sum = 0; 5495 5496 QLIST_FOREACH(child, &bs->children, next) { 5497 if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | 5498 BDRV_CHILD_FILTERED)) 5499 { 5500 child_size = bdrv_get_allocated_file_size(child->bs); 5501 if (child_size < 0) { 5502 return child_size; 5503 } 5504 sum += child_size; 5505 } 5506 } 5507 5508 return sum; 5509 } 5510 5511 /** 5512 * Length of a allocated file in bytes. Sparse files are counted by actual 5513 * allocated space. Return < 0 if error or unknown. 5514 */ 5515 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 5516 { 5517 BlockDriver *drv = bs->drv; 5518 if (!drv) { 5519 return -ENOMEDIUM; 5520 } 5521 if (drv->bdrv_get_allocated_file_size) { 5522 return drv->bdrv_get_allocated_file_size(bs); 5523 } 5524 5525 if (drv->bdrv_file_open) { 5526 /* 5527 * Protocol drivers default to -ENOTSUP (most of their data is 5528 * not stored in any of their children (if they even have any), 5529 * so there is no generic way to figure it out). 5530 */ 5531 return -ENOTSUP; 5532 } else if (drv->is_filter) { 5533 /* Filter drivers default to the size of their filtered child */ 5534 return bdrv_get_allocated_file_size(bdrv_filter_bs(bs)); 5535 } else { 5536 /* Other drivers default to summing their children's sizes */ 5537 return bdrv_sum_allocated_file_size(bs); 5538 } 5539 } 5540 5541 /* 5542 * bdrv_measure: 5543 * @drv: Format driver 5544 * @opts: Creation options for new image 5545 * @in_bs: Existing image containing data for new image (may be NULL) 5546 * @errp: Error object 5547 * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo()) 5548 * or NULL on error 5549 * 5550 * Calculate file size required to create a new image. 5551 * 5552 * If @in_bs is given then space for allocated clusters and zero clusters 5553 * from that image are included in the calculation. If @opts contains a 5554 * backing file that is shared by @in_bs then backing clusters may be omitted 5555 * from the calculation. 5556 * 5557 * If @in_bs is NULL then the calculation includes no allocated clusters 5558 * unless a preallocation option is given in @opts. 5559 * 5560 * Note that @in_bs may use a different BlockDriver from @drv. 5561 * 5562 * If an error occurs the @errp pointer is set. 5563 */ 5564 BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, 5565 BlockDriverState *in_bs, Error **errp) 5566 { 5567 if (!drv->bdrv_measure) { 5568 error_setg(errp, "Block driver '%s' does not support size measurement", 5569 drv->format_name); 5570 return NULL; 5571 } 5572 5573 return drv->bdrv_measure(opts, in_bs, errp); 5574 } 5575 5576 /** 5577 * Return number of sectors on success, -errno on error. 5578 */ 5579 int64_t bdrv_nb_sectors(BlockDriverState *bs) 5580 { 5581 BlockDriver *drv = bs->drv; 5582 5583 if (!drv) 5584 return -ENOMEDIUM; 5585 5586 if (drv->has_variable_length) { 5587 int ret = refresh_total_sectors(bs, bs->total_sectors); 5588 if (ret < 0) { 5589 return ret; 5590 } 5591 } 5592 return bs->total_sectors; 5593 } 5594 5595 /** 5596 * Return length in bytes on success, -errno on error. 5597 * The length is always a multiple of BDRV_SECTOR_SIZE. 5598 */ 5599 int64_t bdrv_getlength(BlockDriverState *bs) 5600 { 5601 int64_t ret = bdrv_nb_sectors(bs); 5602 5603 if (ret < 0) { 5604 return ret; 5605 } 5606 if (ret > INT64_MAX / BDRV_SECTOR_SIZE) { 5607 return -EFBIG; 5608 } 5609 return ret * BDRV_SECTOR_SIZE; 5610 } 5611 5612 /* return 0 as number of sectors if no device present or error */ 5613 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 5614 { 5615 int64_t nb_sectors = bdrv_nb_sectors(bs); 5616 5617 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 5618 } 5619 5620 bool bdrv_is_sg(BlockDriverState *bs) 5621 { 5622 return bs->sg; 5623 } 5624 5625 /** 5626 * Return whether the given node supports compressed writes. 5627 */ 5628 bool bdrv_supports_compressed_writes(BlockDriverState *bs) 5629 { 5630 BlockDriverState *filtered; 5631 5632 if (!bs->drv || !block_driver_can_compress(bs->drv)) { 5633 return false; 5634 } 5635 5636 filtered = bdrv_filter_bs(bs); 5637 if (filtered) { 5638 /* 5639 * Filters can only forward compressed writes, so we have to 5640 * check the child. 5641 */ 5642 return bdrv_supports_compressed_writes(filtered); 5643 } 5644 5645 return true; 5646 } 5647 5648 const char *bdrv_get_format_name(BlockDriverState *bs) 5649 { 5650 return bs->drv ? bs->drv->format_name : NULL; 5651 } 5652 5653 static int qsort_strcmp(const void *a, const void *b) 5654 { 5655 return strcmp(*(char *const *)a, *(char *const *)b); 5656 } 5657 5658 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 5659 void *opaque, bool read_only) 5660 { 5661 BlockDriver *drv; 5662 int count = 0; 5663 int i; 5664 const char **formats = NULL; 5665 5666 QLIST_FOREACH(drv, &bdrv_drivers, list) { 5667 if (drv->format_name) { 5668 bool found = false; 5669 int i = count; 5670 5671 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, read_only)) { 5672 continue; 5673 } 5674 5675 while (formats && i && !found) { 5676 found = !strcmp(formats[--i], drv->format_name); 5677 } 5678 5679 if (!found) { 5680 formats = g_renew(const char *, formats, count + 1); 5681 formats[count++] = drv->format_name; 5682 } 5683 } 5684 } 5685 5686 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) { 5687 const char *format_name = block_driver_modules[i].format_name; 5688 5689 if (format_name) { 5690 bool found = false; 5691 int j = count; 5692 5693 if (use_bdrv_whitelist && 5694 !bdrv_format_is_whitelisted(format_name, read_only)) { 5695 continue; 5696 } 5697 5698 while (formats && j && !found) { 5699 found = !strcmp(formats[--j], format_name); 5700 } 5701 5702 if (!found) { 5703 formats = g_renew(const char *, formats, count + 1); 5704 formats[count++] = format_name; 5705 } 5706 } 5707 } 5708 5709 qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 5710 5711 for (i = 0; i < count; i++) { 5712 it(opaque, formats[i]); 5713 } 5714 5715 g_free(formats); 5716 } 5717 5718 /* This function is to find a node in the bs graph */ 5719 BlockDriverState *bdrv_find_node(const char *node_name) 5720 { 5721 BlockDriverState *bs; 5722 5723 assert(node_name); 5724 5725 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 5726 if (!strcmp(node_name, bs->node_name)) { 5727 return bs; 5728 } 5729 } 5730 return NULL; 5731 } 5732 5733 /* Put this QMP function here so it can access the static graph_bdrv_states. */ 5734 BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, 5735 Error **errp) 5736 { 5737 BlockDeviceInfoList *list; 5738 BlockDriverState *bs; 5739 5740 list = NULL; 5741 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 5742 BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp); 5743 if (!info) { 5744 qapi_free_BlockDeviceInfoList(list); 5745 return NULL; 5746 } 5747 QAPI_LIST_PREPEND(list, info); 5748 } 5749 5750 return list; 5751 } 5752 5753 typedef struct XDbgBlockGraphConstructor { 5754 XDbgBlockGraph *graph; 5755 GHashTable *graph_nodes; 5756 } XDbgBlockGraphConstructor; 5757 5758 static XDbgBlockGraphConstructor *xdbg_graph_new(void) 5759 { 5760 XDbgBlockGraphConstructor *gr = g_new(XDbgBlockGraphConstructor, 1); 5761 5762 gr->graph = g_new0(XDbgBlockGraph, 1); 5763 gr->graph_nodes = g_hash_table_new(NULL, NULL); 5764 5765 return gr; 5766 } 5767 5768 static XDbgBlockGraph *xdbg_graph_finalize(XDbgBlockGraphConstructor *gr) 5769 { 5770 XDbgBlockGraph *graph = gr->graph; 5771 5772 g_hash_table_destroy(gr->graph_nodes); 5773 g_free(gr); 5774 5775 return graph; 5776 } 5777 5778 static uintptr_t xdbg_graph_node_num(XDbgBlockGraphConstructor *gr, void *node) 5779 { 5780 uintptr_t ret = (uintptr_t)g_hash_table_lookup(gr->graph_nodes, node); 5781 5782 if (ret != 0) { 5783 return ret; 5784 } 5785 5786 /* 5787 * Start counting from 1, not 0, because 0 interferes with not-found (NULL) 5788 * answer of g_hash_table_lookup. 5789 */ 5790 ret = g_hash_table_size(gr->graph_nodes) + 1; 5791 g_hash_table_insert(gr->graph_nodes, node, (void *)ret); 5792 5793 return ret; 5794 } 5795 5796 static void xdbg_graph_add_node(XDbgBlockGraphConstructor *gr, void *node, 5797 XDbgBlockGraphNodeType type, const char *name) 5798 { 5799 XDbgBlockGraphNode *n; 5800 5801 n = g_new0(XDbgBlockGraphNode, 1); 5802 5803 n->id = xdbg_graph_node_num(gr, node); 5804 n->type = type; 5805 n->name = g_strdup(name); 5806 5807 QAPI_LIST_PREPEND(gr->graph->nodes, n); 5808 } 5809 5810 static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent, 5811 const BdrvChild *child) 5812 { 5813 BlockPermission qapi_perm; 5814 XDbgBlockGraphEdge *edge; 5815 5816 edge = g_new0(XDbgBlockGraphEdge, 1); 5817 5818 edge->parent = xdbg_graph_node_num(gr, parent); 5819 edge->child = xdbg_graph_node_num(gr, child->bs); 5820 edge->name = g_strdup(child->name); 5821 5822 for (qapi_perm = 0; qapi_perm < BLOCK_PERMISSION__MAX; qapi_perm++) { 5823 uint64_t flag = bdrv_qapi_perm_to_blk_perm(qapi_perm); 5824 5825 if (flag & child->perm) { 5826 QAPI_LIST_PREPEND(edge->perm, qapi_perm); 5827 } 5828 if (flag & child->shared_perm) { 5829 QAPI_LIST_PREPEND(edge->shared_perm, qapi_perm); 5830 } 5831 } 5832 5833 QAPI_LIST_PREPEND(gr->graph->edges, edge); 5834 } 5835 5836 5837 XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp) 5838 { 5839 BlockBackend *blk; 5840 BlockJob *job; 5841 BlockDriverState *bs; 5842 BdrvChild *child; 5843 XDbgBlockGraphConstructor *gr = xdbg_graph_new(); 5844 5845 for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { 5846 char *allocated_name = NULL; 5847 const char *name = blk_name(blk); 5848 5849 if (!*name) { 5850 name = allocated_name = blk_get_attached_dev_id(blk); 5851 } 5852 xdbg_graph_add_node(gr, blk, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND, 5853 name); 5854 g_free(allocated_name); 5855 if (blk_root(blk)) { 5856 xdbg_graph_add_edge(gr, blk, blk_root(blk)); 5857 } 5858 } 5859 5860 for (job = block_job_next(NULL); job; job = block_job_next(job)) { 5861 GSList *el; 5862 5863 xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB, 5864 job->job.id); 5865 for (el = job->nodes; el; el = el->next) { 5866 xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data); 5867 } 5868 } 5869 5870 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 5871 xdbg_graph_add_node(gr, bs, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_DRIVER, 5872 bs->node_name); 5873 QLIST_FOREACH(child, &bs->children, next) { 5874 xdbg_graph_add_edge(gr, bs, child); 5875 } 5876 } 5877 5878 return xdbg_graph_finalize(gr); 5879 } 5880 5881 BlockDriverState *bdrv_lookup_bs(const char *device, 5882 const char *node_name, 5883 Error **errp) 5884 { 5885 BlockBackend *blk; 5886 BlockDriverState *bs; 5887 5888 if (device) { 5889 blk = blk_by_name(device); 5890 5891 if (blk) { 5892 bs = blk_bs(blk); 5893 if (!bs) { 5894 error_setg(errp, "Device '%s' has no medium", device); 5895 } 5896 5897 return bs; 5898 } 5899 } 5900 5901 if (node_name) { 5902 bs = bdrv_find_node(node_name); 5903 5904 if (bs) { 5905 return bs; 5906 } 5907 } 5908 5909 error_setg(errp, "Cannot find device=\'%s\' nor node-name=\'%s\'", 5910 device ? device : "", 5911 node_name ? node_name : ""); 5912 return NULL; 5913 } 5914 5915 /* If 'base' is in the same chain as 'top', return true. Otherwise, 5916 * return false. If either argument is NULL, return false. */ 5917 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 5918 { 5919 while (top && top != base) { 5920 top = bdrv_filter_or_cow_bs(top); 5921 } 5922 5923 return top != NULL; 5924 } 5925 5926 BlockDriverState *bdrv_next_node(BlockDriverState *bs) 5927 { 5928 if (!bs) { 5929 return QTAILQ_FIRST(&graph_bdrv_states); 5930 } 5931 return QTAILQ_NEXT(bs, node_list); 5932 } 5933 5934 BlockDriverState *bdrv_next_all_states(BlockDriverState *bs) 5935 { 5936 if (!bs) { 5937 return QTAILQ_FIRST(&all_bdrv_states); 5938 } 5939 return QTAILQ_NEXT(bs, bs_list); 5940 } 5941 5942 const char *bdrv_get_node_name(const BlockDriverState *bs) 5943 { 5944 return bs->node_name; 5945 } 5946 5947 const char *bdrv_get_parent_name(const BlockDriverState *bs) 5948 { 5949 BdrvChild *c; 5950 const char *name; 5951 5952 /* If multiple parents have a name, just pick the first one. */ 5953 QLIST_FOREACH(c, &bs->parents, next_parent) { 5954 if (c->klass->get_name) { 5955 name = c->klass->get_name(c); 5956 if (name && *name) { 5957 return name; 5958 } 5959 } 5960 } 5961 5962 return NULL; 5963 } 5964 5965 /* TODO check what callers really want: bs->node_name or blk_name() */ 5966 const char *bdrv_get_device_name(const BlockDriverState *bs) 5967 { 5968 return bdrv_get_parent_name(bs) ?: ""; 5969 } 5970 5971 /* This can be used to identify nodes that might not have a device 5972 * name associated. Since node and device names live in the same 5973 * namespace, the result is unambiguous. The exception is if both are 5974 * absent, then this returns an empty (non-null) string. */ 5975 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 5976 { 5977 return bdrv_get_parent_name(bs) ?: bs->node_name; 5978 } 5979 5980 int bdrv_get_flags(BlockDriverState *bs) 5981 { 5982 return bs->open_flags; 5983 } 5984 5985 int bdrv_has_zero_init_1(BlockDriverState *bs) 5986 { 5987 return 1; 5988 } 5989 5990 int bdrv_has_zero_init(BlockDriverState *bs) 5991 { 5992 BlockDriverState *filtered; 5993 5994 if (!bs->drv) { 5995 return 0; 5996 } 5997 5998 /* If BS is a copy on write image, it is initialized to 5999 the contents of the base image, which may not be zeroes. */ 6000 if (bdrv_cow_child(bs)) { 6001 return 0; 6002 } 6003 if (bs->drv->bdrv_has_zero_init) { 6004 return bs->drv->bdrv_has_zero_init(bs); 6005 } 6006 6007 filtered = bdrv_filter_bs(bs); 6008 if (filtered) { 6009 return bdrv_has_zero_init(filtered); 6010 } 6011 6012 /* safe default */ 6013 return 0; 6014 } 6015 6016 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 6017 { 6018 if (!(bs->open_flags & BDRV_O_UNMAP)) { 6019 return false; 6020 } 6021 6022 return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP; 6023 } 6024 6025 void bdrv_get_backing_filename(BlockDriverState *bs, 6026 char *filename, int filename_size) 6027 { 6028 pstrcpy(filename, filename_size, bs->backing_file); 6029 } 6030 6031 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 6032 { 6033 int ret; 6034 BlockDriver *drv = bs->drv; 6035 /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ 6036 if (!drv) { 6037 return -ENOMEDIUM; 6038 } 6039 if (!drv->bdrv_get_info) { 6040 BlockDriverState *filtered = bdrv_filter_bs(bs); 6041 if (filtered) { 6042 return bdrv_get_info(filtered, bdi); 6043 } 6044 return -ENOTSUP; 6045 } 6046 memset(bdi, 0, sizeof(*bdi)); 6047 ret = drv->bdrv_get_info(bs, bdi); 6048 if (ret < 0) { 6049 return ret; 6050 } 6051 6052 if (bdi->cluster_size > BDRV_MAX_ALIGNMENT) { 6053 return -EINVAL; 6054 } 6055 6056 return 0; 6057 } 6058 6059 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, 6060 Error **errp) 6061 { 6062 BlockDriver *drv = bs->drv; 6063 if (drv && drv->bdrv_get_specific_info) { 6064 return drv->bdrv_get_specific_info(bs, errp); 6065 } 6066 return NULL; 6067 } 6068 6069 BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs) 6070 { 6071 BlockDriver *drv = bs->drv; 6072 if (!drv || !drv->bdrv_get_specific_stats) { 6073 return NULL; 6074 } 6075 return drv->bdrv_get_specific_stats(bs); 6076 } 6077 6078 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event) 6079 { 6080 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 6081 return; 6082 } 6083 6084 bs->drv->bdrv_debug_event(bs, event); 6085 } 6086 6087 static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs) 6088 { 6089 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 6090 bs = bdrv_primary_bs(bs); 6091 } 6092 6093 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 6094 assert(bs->drv->bdrv_debug_remove_breakpoint); 6095 return bs; 6096 } 6097 6098 return NULL; 6099 } 6100 6101 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 6102 const char *tag) 6103 { 6104 bs = bdrv_find_debug_node(bs); 6105 if (bs) { 6106 return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 6107 } 6108 6109 return -ENOTSUP; 6110 } 6111 6112 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 6113 { 6114 bs = bdrv_find_debug_node(bs); 6115 if (bs) { 6116 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 6117 } 6118 6119 return -ENOTSUP; 6120 } 6121 6122 int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 6123 { 6124 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 6125 bs = bdrv_primary_bs(bs); 6126 } 6127 6128 if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 6129 return bs->drv->bdrv_debug_resume(bs, tag); 6130 } 6131 6132 return -ENOTSUP; 6133 } 6134 6135 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 6136 { 6137 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 6138 bs = bdrv_primary_bs(bs); 6139 } 6140 6141 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 6142 return bs->drv->bdrv_debug_is_suspended(bs, tag); 6143 } 6144 6145 return false; 6146 } 6147 6148 /* backing_file can either be relative, or absolute, or a protocol. If it is 6149 * relative, it must be relative to the chain. So, passing in bs->filename 6150 * from a BDS as backing_file should not be done, as that may be relative to 6151 * the CWD rather than the chain. */ 6152 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 6153 const char *backing_file) 6154 { 6155 char *filename_full = NULL; 6156 char *backing_file_full = NULL; 6157 char *filename_tmp = NULL; 6158 int is_protocol = 0; 6159 bool filenames_refreshed = false; 6160 BlockDriverState *curr_bs = NULL; 6161 BlockDriverState *retval = NULL; 6162 BlockDriverState *bs_below; 6163 6164 if (!bs || !bs->drv || !backing_file) { 6165 return NULL; 6166 } 6167 6168 filename_full = g_malloc(PATH_MAX); 6169 backing_file_full = g_malloc(PATH_MAX); 6170 6171 is_protocol = path_has_protocol(backing_file); 6172 6173 /* 6174 * Being largely a legacy function, skip any filters here 6175 * (because filters do not have normal filenames, so they cannot 6176 * match anyway; and allowing json:{} filenames is a bit out of 6177 * scope). 6178 */ 6179 for (curr_bs = bdrv_skip_filters(bs); 6180 bdrv_cow_child(curr_bs) != NULL; 6181 curr_bs = bs_below) 6182 { 6183 bs_below = bdrv_backing_chain_next(curr_bs); 6184 6185 if (bdrv_backing_overridden(curr_bs)) { 6186 /* 6187 * If the backing file was overridden, we can only compare 6188 * directly against the backing node's filename. 6189 */ 6190 6191 if (!filenames_refreshed) { 6192 /* 6193 * This will automatically refresh all of the 6194 * filenames in the rest of the backing chain, so we 6195 * only need to do this once. 6196 */ 6197 bdrv_refresh_filename(bs_below); 6198 filenames_refreshed = true; 6199 } 6200 6201 if (strcmp(backing_file, bs_below->filename) == 0) { 6202 retval = bs_below; 6203 break; 6204 } 6205 } else if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 6206 /* 6207 * If either of the filename paths is actually a protocol, then 6208 * compare unmodified paths; otherwise make paths relative. 6209 */ 6210 char *backing_file_full_ret; 6211 6212 if (strcmp(backing_file, curr_bs->backing_file) == 0) { 6213 retval = bs_below; 6214 break; 6215 } 6216 /* Also check against the full backing filename for the image */ 6217 backing_file_full_ret = bdrv_get_full_backing_filename(curr_bs, 6218 NULL); 6219 if (backing_file_full_ret) { 6220 bool equal = strcmp(backing_file, backing_file_full_ret) == 0; 6221 g_free(backing_file_full_ret); 6222 if (equal) { 6223 retval = bs_below; 6224 break; 6225 } 6226 } 6227 } else { 6228 /* If not an absolute filename path, make it relative to the current 6229 * image's filename path */ 6230 filename_tmp = bdrv_make_absolute_filename(curr_bs, backing_file, 6231 NULL); 6232 /* We are going to compare canonicalized absolute pathnames */ 6233 if (!filename_tmp || !realpath(filename_tmp, filename_full)) { 6234 g_free(filename_tmp); 6235 continue; 6236 } 6237 g_free(filename_tmp); 6238 6239 /* We need to make sure the backing filename we are comparing against 6240 * is relative to the current image filename (or absolute) */ 6241 filename_tmp = bdrv_get_full_backing_filename(curr_bs, NULL); 6242 if (!filename_tmp || !realpath(filename_tmp, backing_file_full)) { 6243 g_free(filename_tmp); 6244 continue; 6245 } 6246 g_free(filename_tmp); 6247 6248 if (strcmp(backing_file_full, filename_full) == 0) { 6249 retval = bs_below; 6250 break; 6251 } 6252 } 6253 } 6254 6255 g_free(filename_full); 6256 g_free(backing_file_full); 6257 return retval; 6258 } 6259 6260 void bdrv_init(void) 6261 { 6262 #ifdef CONFIG_BDRV_WHITELIST_TOOLS 6263 use_bdrv_whitelist = 1; 6264 #endif 6265 module_call_init(MODULE_INIT_BLOCK); 6266 } 6267 6268 void bdrv_init_with_whitelist(void) 6269 { 6270 use_bdrv_whitelist = 1; 6271 bdrv_init(); 6272 } 6273 6274 int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) 6275 { 6276 BdrvChild *child, *parent; 6277 Error *local_err = NULL; 6278 int ret; 6279 BdrvDirtyBitmap *bm; 6280 6281 if (!bs->drv) { 6282 return -ENOMEDIUM; 6283 } 6284 6285 QLIST_FOREACH(child, &bs->children, next) { 6286 bdrv_co_invalidate_cache(child->bs, &local_err); 6287 if (local_err) { 6288 error_propagate(errp, local_err); 6289 return -EINVAL; 6290 } 6291 } 6292 6293 /* 6294 * Update permissions, they may differ for inactive nodes. 6295 * 6296 * Note that the required permissions of inactive images are always a 6297 * subset of the permissions required after activating the image. This 6298 * allows us to just get the permissions upfront without restricting 6299 * drv->bdrv_invalidate_cache(). 6300 * 6301 * It also means that in error cases, we don't have to try and revert to 6302 * the old permissions (which is an operation that could fail, too). We can 6303 * just keep the extended permissions for the next time that an activation 6304 * of the image is tried. 6305 */ 6306 if (bs->open_flags & BDRV_O_INACTIVE) { 6307 bs->open_flags &= ~BDRV_O_INACTIVE; 6308 ret = bdrv_refresh_perms(bs, errp); 6309 if (ret < 0) { 6310 bs->open_flags |= BDRV_O_INACTIVE; 6311 return ret; 6312 } 6313 6314 if (bs->drv->bdrv_co_invalidate_cache) { 6315 bs->drv->bdrv_co_invalidate_cache(bs, &local_err); 6316 if (local_err) { 6317 bs->open_flags |= BDRV_O_INACTIVE; 6318 error_propagate(errp, local_err); 6319 return -EINVAL; 6320 } 6321 } 6322 6323 FOR_EACH_DIRTY_BITMAP(bs, bm) { 6324 bdrv_dirty_bitmap_skip_store(bm, false); 6325 } 6326 6327 ret = refresh_total_sectors(bs, bs->total_sectors); 6328 if (ret < 0) { 6329 bs->open_flags |= BDRV_O_INACTIVE; 6330 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 6331 return ret; 6332 } 6333 } 6334 6335 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6336 if (parent->klass->activate) { 6337 parent->klass->activate(parent, &local_err); 6338 if (local_err) { 6339 bs->open_flags |= BDRV_O_INACTIVE; 6340 error_propagate(errp, local_err); 6341 return -EINVAL; 6342 } 6343 } 6344 } 6345 6346 return 0; 6347 } 6348 6349 void bdrv_invalidate_cache_all(Error **errp) 6350 { 6351 BlockDriverState *bs; 6352 BdrvNextIterator it; 6353 6354 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 6355 AioContext *aio_context = bdrv_get_aio_context(bs); 6356 int ret; 6357 6358 aio_context_acquire(aio_context); 6359 ret = bdrv_invalidate_cache(bs, errp); 6360 aio_context_release(aio_context); 6361 if (ret < 0) { 6362 bdrv_next_cleanup(&it); 6363 return; 6364 } 6365 } 6366 } 6367 6368 static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) 6369 { 6370 BdrvChild *parent; 6371 6372 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6373 if (parent->klass->parent_is_bds) { 6374 BlockDriverState *parent_bs = parent->opaque; 6375 if (!only_active || !(parent_bs->open_flags & BDRV_O_INACTIVE)) { 6376 return true; 6377 } 6378 } 6379 } 6380 6381 return false; 6382 } 6383 6384 static int bdrv_inactivate_recurse(BlockDriverState *bs) 6385 { 6386 BdrvChild *child, *parent; 6387 int ret; 6388 uint64_t cumulative_perms, cumulative_shared_perms; 6389 6390 if (!bs->drv) { 6391 return -ENOMEDIUM; 6392 } 6393 6394 /* Make sure that we don't inactivate a child before its parent. 6395 * It will be covered by recursion from the yet active parent. */ 6396 if (bdrv_has_bds_parent(bs, true)) { 6397 return 0; 6398 } 6399 6400 assert(!(bs->open_flags & BDRV_O_INACTIVE)); 6401 6402 /* Inactivate this node */ 6403 if (bs->drv->bdrv_inactivate) { 6404 ret = bs->drv->bdrv_inactivate(bs); 6405 if (ret < 0) { 6406 return ret; 6407 } 6408 } 6409 6410 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6411 if (parent->klass->inactivate) { 6412 ret = parent->klass->inactivate(parent); 6413 if (ret < 0) { 6414 return ret; 6415 } 6416 } 6417 } 6418 6419 bdrv_get_cumulative_perm(bs, &cumulative_perms, 6420 &cumulative_shared_perms); 6421 if (cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) { 6422 /* Our inactive parents still need write access. Inactivation failed. */ 6423 return -EPERM; 6424 } 6425 6426 bs->open_flags |= BDRV_O_INACTIVE; 6427 6428 /* 6429 * Update permissions, they may differ for inactive nodes. 6430 * We only tried to loosen restrictions, so errors are not fatal, ignore 6431 * them. 6432 */ 6433 bdrv_refresh_perms(bs, NULL); 6434 6435 /* Recursively inactivate children */ 6436 QLIST_FOREACH(child, &bs->children, next) { 6437 ret = bdrv_inactivate_recurse(child->bs); 6438 if (ret < 0) { 6439 return ret; 6440 } 6441 } 6442 6443 return 0; 6444 } 6445 6446 int bdrv_inactivate_all(void) 6447 { 6448 BlockDriverState *bs = NULL; 6449 BdrvNextIterator it; 6450 int ret = 0; 6451 GSList *aio_ctxs = NULL, *ctx; 6452 6453 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 6454 AioContext *aio_context = bdrv_get_aio_context(bs); 6455 6456 if (!g_slist_find(aio_ctxs, aio_context)) { 6457 aio_ctxs = g_slist_prepend(aio_ctxs, aio_context); 6458 aio_context_acquire(aio_context); 6459 } 6460 } 6461 6462 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 6463 /* Nodes with BDS parents are covered by recursion from the last 6464 * parent that gets inactivated. Don't inactivate them a second 6465 * time if that has already happened. */ 6466 if (bdrv_has_bds_parent(bs, false)) { 6467 continue; 6468 } 6469 ret = bdrv_inactivate_recurse(bs); 6470 if (ret < 0) { 6471 bdrv_next_cleanup(&it); 6472 goto out; 6473 } 6474 } 6475 6476 out: 6477 for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { 6478 AioContext *aio_context = ctx->data; 6479 aio_context_release(aio_context); 6480 } 6481 g_slist_free(aio_ctxs); 6482 6483 return ret; 6484 } 6485 6486 /**************************************************************/ 6487 /* removable device support */ 6488 6489 /** 6490 * Return TRUE if the media is present 6491 */ 6492 bool bdrv_is_inserted(BlockDriverState *bs) 6493 { 6494 BlockDriver *drv = bs->drv; 6495 BdrvChild *child; 6496 6497 if (!drv) { 6498 return false; 6499 } 6500 if (drv->bdrv_is_inserted) { 6501 return drv->bdrv_is_inserted(bs); 6502 } 6503 QLIST_FOREACH(child, &bs->children, next) { 6504 if (!bdrv_is_inserted(child->bs)) { 6505 return false; 6506 } 6507 } 6508 return true; 6509 } 6510 6511 /** 6512 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 6513 */ 6514 void bdrv_eject(BlockDriverState *bs, bool eject_flag) 6515 { 6516 BlockDriver *drv = bs->drv; 6517 6518 if (drv && drv->bdrv_eject) { 6519 drv->bdrv_eject(bs, eject_flag); 6520 } 6521 } 6522 6523 /** 6524 * Lock or unlock the media (if it is locked, the user won't be able 6525 * to eject it manually). 6526 */ 6527 void bdrv_lock_medium(BlockDriverState *bs, bool locked) 6528 { 6529 BlockDriver *drv = bs->drv; 6530 6531 trace_bdrv_lock_medium(bs, locked); 6532 6533 if (drv && drv->bdrv_lock_medium) { 6534 drv->bdrv_lock_medium(bs, locked); 6535 } 6536 } 6537 6538 /* Get a reference to bs */ 6539 void bdrv_ref(BlockDriverState *bs) 6540 { 6541 bs->refcnt++; 6542 } 6543 6544 /* Release a previously grabbed reference to bs. 6545 * If after releasing, reference count is zero, the BlockDriverState is 6546 * deleted. */ 6547 void bdrv_unref(BlockDriverState *bs) 6548 { 6549 if (!bs) { 6550 return; 6551 } 6552 assert(bs->refcnt > 0); 6553 if (--bs->refcnt == 0) { 6554 bdrv_delete(bs); 6555 } 6556 } 6557 6558 struct BdrvOpBlocker { 6559 Error *reason; 6560 QLIST_ENTRY(BdrvOpBlocker) list; 6561 }; 6562 6563 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 6564 { 6565 BdrvOpBlocker *blocker; 6566 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 6567 if (!QLIST_EMPTY(&bs->op_blockers[op])) { 6568 blocker = QLIST_FIRST(&bs->op_blockers[op]); 6569 error_propagate_prepend(errp, error_copy(blocker->reason), 6570 "Node '%s' is busy: ", 6571 bdrv_get_device_or_node_name(bs)); 6572 return true; 6573 } 6574 return false; 6575 } 6576 6577 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 6578 { 6579 BdrvOpBlocker *blocker; 6580 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 6581 6582 blocker = g_new0(BdrvOpBlocker, 1); 6583 blocker->reason = reason; 6584 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 6585 } 6586 6587 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 6588 { 6589 BdrvOpBlocker *blocker, *next; 6590 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 6591 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 6592 if (blocker->reason == reason) { 6593 QLIST_REMOVE(blocker, list); 6594 g_free(blocker); 6595 } 6596 } 6597 } 6598 6599 void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 6600 { 6601 int i; 6602 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 6603 bdrv_op_block(bs, i, reason); 6604 } 6605 } 6606 6607 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 6608 { 6609 int i; 6610 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 6611 bdrv_op_unblock(bs, i, reason); 6612 } 6613 } 6614 6615 bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 6616 { 6617 int i; 6618 6619 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 6620 if (!QLIST_EMPTY(&bs->op_blockers[i])) { 6621 return false; 6622 } 6623 } 6624 return true; 6625 } 6626 6627 void bdrv_img_create(const char *filename, const char *fmt, 6628 const char *base_filename, const char *base_fmt, 6629 char *options, uint64_t img_size, int flags, bool quiet, 6630 Error **errp) 6631 { 6632 QemuOptsList *create_opts = NULL; 6633 QemuOpts *opts = NULL; 6634 const char *backing_fmt, *backing_file; 6635 int64_t size; 6636 BlockDriver *drv, *proto_drv; 6637 Error *local_err = NULL; 6638 int ret = 0; 6639 6640 /* Find driver and parse its options */ 6641 drv = bdrv_find_format(fmt); 6642 if (!drv) { 6643 error_setg(errp, "Unknown file format '%s'", fmt); 6644 return; 6645 } 6646 6647 proto_drv = bdrv_find_protocol(filename, true, errp); 6648 if (!proto_drv) { 6649 return; 6650 } 6651 6652 if (!drv->create_opts) { 6653 error_setg(errp, "Format driver '%s' does not support image creation", 6654 drv->format_name); 6655 return; 6656 } 6657 6658 if (!proto_drv->create_opts) { 6659 error_setg(errp, "Protocol driver '%s' does not support image creation", 6660 proto_drv->format_name); 6661 return; 6662 } 6663 6664 /* Create parameter list */ 6665 create_opts = qemu_opts_append(create_opts, drv->create_opts); 6666 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 6667 6668 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 6669 6670 /* Parse -o options */ 6671 if (options) { 6672 if (!qemu_opts_do_parse(opts, options, NULL, errp)) { 6673 goto out; 6674 } 6675 } 6676 6677 if (!qemu_opt_get(opts, BLOCK_OPT_SIZE)) { 6678 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 6679 } else if (img_size != UINT64_C(-1)) { 6680 error_setg(errp, "The image size must be specified only once"); 6681 goto out; 6682 } 6683 6684 if (base_filename) { 6685 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, 6686 NULL)) { 6687 error_setg(errp, "Backing file not supported for file format '%s'", 6688 fmt); 6689 goto out; 6690 } 6691 } 6692 6693 if (base_fmt) { 6694 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) { 6695 error_setg(errp, "Backing file format not supported for file " 6696 "format '%s'", fmt); 6697 goto out; 6698 } 6699 } 6700 6701 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 6702 if (backing_file) { 6703 if (!strcmp(filename, backing_file)) { 6704 error_setg(errp, "Error: Trying to create an image with the " 6705 "same filename as the backing file"); 6706 goto out; 6707 } 6708 if (backing_file[0] == '\0') { 6709 error_setg(errp, "Expected backing file name, got empty string"); 6710 goto out; 6711 } 6712 } 6713 6714 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 6715 6716 /* The size for the image must always be specified, unless we have a backing 6717 * file and we have not been forbidden from opening it. */ 6718 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size); 6719 if (backing_file && !(flags & BDRV_O_NO_BACKING)) { 6720 BlockDriverState *bs; 6721 char *full_backing; 6722 int back_flags; 6723 QDict *backing_options = NULL; 6724 6725 full_backing = 6726 bdrv_get_full_backing_filename_from_filename(filename, backing_file, 6727 &local_err); 6728 if (local_err) { 6729 goto out; 6730 } 6731 assert(full_backing); 6732 6733 /* 6734 * No need to do I/O here, which allows us to open encrypted 6735 * backing images without needing the secret 6736 */ 6737 back_flags = flags; 6738 back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 6739 back_flags |= BDRV_O_NO_IO; 6740 6741 backing_options = qdict_new(); 6742 if (backing_fmt) { 6743 qdict_put_str(backing_options, "driver", backing_fmt); 6744 } 6745 qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true); 6746 6747 bs = bdrv_open(full_backing, NULL, backing_options, back_flags, 6748 &local_err); 6749 g_free(full_backing); 6750 if (!bs) { 6751 error_append_hint(&local_err, "Could not open backing image.\n"); 6752 goto out; 6753 } else { 6754 if (!backing_fmt) { 6755 error_setg(&local_err, 6756 "Backing file specified without backing format"); 6757 error_append_hint(&local_err, "Detected format of %s.", 6758 bs->drv->format_name); 6759 goto out; 6760 } 6761 if (size == -1) { 6762 /* Opened BS, have no size */ 6763 size = bdrv_getlength(bs); 6764 if (size < 0) { 6765 error_setg_errno(errp, -size, "Could not get size of '%s'", 6766 backing_file); 6767 bdrv_unref(bs); 6768 goto out; 6769 } 6770 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 6771 } 6772 bdrv_unref(bs); 6773 } 6774 /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */ 6775 } else if (backing_file && !backing_fmt) { 6776 error_setg(&local_err, 6777 "Backing file specified without backing format"); 6778 goto out; 6779 } 6780 6781 if (size == -1) { 6782 error_setg(errp, "Image creation needs a size parameter"); 6783 goto out; 6784 } 6785 6786 if (!quiet) { 6787 printf("Formatting '%s', fmt=%s ", filename, fmt); 6788 qemu_opts_print(opts, " "); 6789 puts(""); 6790 fflush(stdout); 6791 } 6792 6793 ret = bdrv_create(drv, filename, opts, &local_err); 6794 6795 if (ret == -EFBIG) { 6796 /* This is generally a better message than whatever the driver would 6797 * deliver (especially because of the cluster_size_hint), since that 6798 * is most probably not much different from "image too large". */ 6799 const char *cluster_size_hint = ""; 6800 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 6801 cluster_size_hint = " (try using a larger cluster size)"; 6802 } 6803 error_setg(errp, "The image size is too large for file format '%s'" 6804 "%s", fmt, cluster_size_hint); 6805 error_free(local_err); 6806 local_err = NULL; 6807 } 6808 6809 out: 6810 qemu_opts_del(opts); 6811 qemu_opts_free(create_opts); 6812 error_propagate(errp, local_err); 6813 } 6814 6815 AioContext *bdrv_get_aio_context(BlockDriverState *bs) 6816 { 6817 return bs ? bs->aio_context : qemu_get_aio_context(); 6818 } 6819 6820 AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs) 6821 { 6822 Coroutine *self = qemu_coroutine_self(); 6823 AioContext *old_ctx = qemu_coroutine_get_aio_context(self); 6824 AioContext *new_ctx; 6825 6826 /* 6827 * Increase bs->in_flight to ensure that this operation is completed before 6828 * moving the node to a different AioContext. Read new_ctx only afterwards. 6829 */ 6830 bdrv_inc_in_flight(bs); 6831 6832 new_ctx = bdrv_get_aio_context(bs); 6833 aio_co_reschedule_self(new_ctx); 6834 return old_ctx; 6835 } 6836 6837 void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) 6838 { 6839 aio_co_reschedule_self(old_ctx); 6840 bdrv_dec_in_flight(bs); 6841 } 6842 6843 void coroutine_fn bdrv_co_lock(BlockDriverState *bs) 6844 { 6845 AioContext *ctx = bdrv_get_aio_context(bs); 6846 6847 /* In the main thread, bs->aio_context won't change concurrently */ 6848 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 6849 6850 /* 6851 * We're in coroutine context, so we already hold the lock of the main 6852 * loop AioContext. Don't lock it twice to avoid deadlocks. 6853 */ 6854 assert(qemu_in_coroutine()); 6855 if (ctx != qemu_get_aio_context()) { 6856 aio_context_acquire(ctx); 6857 } 6858 } 6859 6860 void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) 6861 { 6862 AioContext *ctx = bdrv_get_aio_context(bs); 6863 6864 assert(qemu_in_coroutine()); 6865 if (ctx != qemu_get_aio_context()) { 6866 aio_context_release(ctx); 6867 } 6868 } 6869 6870 void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co) 6871 { 6872 aio_co_enter(bdrv_get_aio_context(bs), co); 6873 } 6874 6875 static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) 6876 { 6877 QLIST_REMOVE(ban, list); 6878 g_free(ban); 6879 } 6880 6881 static void bdrv_detach_aio_context(BlockDriverState *bs) 6882 { 6883 BdrvAioNotifier *baf, *baf_tmp; 6884 6885 assert(!bs->walking_aio_notifiers); 6886 bs->walking_aio_notifiers = true; 6887 QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) { 6888 if (baf->deleted) { 6889 bdrv_do_remove_aio_context_notifier(baf); 6890 } else { 6891 baf->detach_aio_context(baf->opaque); 6892 } 6893 } 6894 /* Never mind iterating again to check for ->deleted. bdrv_close() will 6895 * remove remaining aio notifiers if we aren't called again. 6896 */ 6897 bs->walking_aio_notifiers = false; 6898 6899 if (bs->drv && bs->drv->bdrv_detach_aio_context) { 6900 bs->drv->bdrv_detach_aio_context(bs); 6901 } 6902 6903 if (bs->quiesce_counter) { 6904 aio_enable_external(bs->aio_context); 6905 } 6906 bs->aio_context = NULL; 6907 } 6908 6909 static void bdrv_attach_aio_context(BlockDriverState *bs, 6910 AioContext *new_context) 6911 { 6912 BdrvAioNotifier *ban, *ban_tmp; 6913 6914 if (bs->quiesce_counter) { 6915 aio_disable_external(new_context); 6916 } 6917 6918 bs->aio_context = new_context; 6919 6920 if (bs->drv && bs->drv->bdrv_attach_aio_context) { 6921 bs->drv->bdrv_attach_aio_context(bs, new_context); 6922 } 6923 6924 assert(!bs->walking_aio_notifiers); 6925 bs->walking_aio_notifiers = true; 6926 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) { 6927 if (ban->deleted) { 6928 bdrv_do_remove_aio_context_notifier(ban); 6929 } else { 6930 ban->attached_aio_context(new_context, ban->opaque); 6931 } 6932 } 6933 bs->walking_aio_notifiers = false; 6934 } 6935 6936 /* 6937 * Changes the AioContext used for fd handlers, timers, and BHs by this 6938 * BlockDriverState and all its children and parents. 6939 * 6940 * Must be called from the main AioContext. 6941 * 6942 * The caller must own the AioContext lock for the old AioContext of bs, but it 6943 * must not own the AioContext lock for new_context (unless new_context is the 6944 * same as the current context of bs). 6945 * 6946 * @ignore will accumulate all visited BdrvChild object. The caller is 6947 * responsible for freeing the list afterwards. 6948 */ 6949 void bdrv_set_aio_context_ignore(BlockDriverState *bs, 6950 AioContext *new_context, GSList **ignore) 6951 { 6952 AioContext *old_context = bdrv_get_aio_context(bs); 6953 GSList *children_to_process = NULL; 6954 GSList *parents_to_process = NULL; 6955 GSList *entry; 6956 BdrvChild *child, *parent; 6957 6958 g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 6959 6960 if (old_context == new_context) { 6961 return; 6962 } 6963 6964 bdrv_drained_begin(bs); 6965 6966 QLIST_FOREACH(child, &bs->children, next) { 6967 if (g_slist_find(*ignore, child)) { 6968 continue; 6969 } 6970 *ignore = g_slist_prepend(*ignore, child); 6971 children_to_process = g_slist_prepend(children_to_process, child); 6972 } 6973 6974 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6975 if (g_slist_find(*ignore, parent)) { 6976 continue; 6977 } 6978 *ignore = g_slist_prepend(*ignore, parent); 6979 parents_to_process = g_slist_prepend(parents_to_process, parent); 6980 } 6981 6982 for (entry = children_to_process; 6983 entry != NULL; 6984 entry = g_slist_next(entry)) { 6985 child = entry->data; 6986 bdrv_set_aio_context_ignore(child->bs, new_context, ignore); 6987 } 6988 g_slist_free(children_to_process); 6989 6990 for (entry = parents_to_process; 6991 entry != NULL; 6992 entry = g_slist_next(entry)) { 6993 parent = entry->data; 6994 assert(parent->klass->set_aio_ctx); 6995 parent->klass->set_aio_ctx(parent, new_context, ignore); 6996 } 6997 g_slist_free(parents_to_process); 6998 6999 bdrv_detach_aio_context(bs); 7000 7001 /* Acquire the new context, if necessary */ 7002 if (qemu_get_aio_context() != new_context) { 7003 aio_context_acquire(new_context); 7004 } 7005 7006 bdrv_attach_aio_context(bs, new_context); 7007 7008 /* 7009 * If this function was recursively called from 7010 * bdrv_set_aio_context_ignore(), there may be nodes in the 7011 * subtree that have not yet been moved to the new AioContext. 7012 * Release the old one so bdrv_drained_end() can poll them. 7013 */ 7014 if (qemu_get_aio_context() != old_context) { 7015 aio_context_release(old_context); 7016 } 7017 7018 bdrv_drained_end(bs); 7019 7020 if (qemu_get_aio_context() != old_context) { 7021 aio_context_acquire(old_context); 7022 } 7023 if (qemu_get_aio_context() != new_context) { 7024 aio_context_release(new_context); 7025 } 7026 } 7027 7028 static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, 7029 GSList **ignore, Error **errp) 7030 { 7031 if (g_slist_find(*ignore, c)) { 7032 return true; 7033 } 7034 *ignore = g_slist_prepend(*ignore, c); 7035 7036 /* 7037 * A BdrvChildClass that doesn't handle AioContext changes cannot 7038 * tolerate any AioContext changes 7039 */ 7040 if (!c->klass->can_set_aio_ctx) { 7041 char *user = bdrv_child_user_desc(c); 7042 error_setg(errp, "Changing iothreads is not supported by %s", user); 7043 g_free(user); 7044 return false; 7045 } 7046 if (!c->klass->can_set_aio_ctx(c, ctx, ignore, errp)) { 7047 assert(!errp || *errp); 7048 return false; 7049 } 7050 return true; 7051 } 7052 7053 bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, 7054 GSList **ignore, Error **errp) 7055 { 7056 if (g_slist_find(*ignore, c)) { 7057 return true; 7058 } 7059 *ignore = g_slist_prepend(*ignore, c); 7060 return bdrv_can_set_aio_context(c->bs, ctx, ignore, errp); 7061 } 7062 7063 /* @ignore will accumulate all visited BdrvChild object. The caller is 7064 * responsible for freeing the list afterwards. */ 7065 bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, 7066 GSList **ignore, Error **errp) 7067 { 7068 BdrvChild *c; 7069 7070 if (bdrv_get_aio_context(bs) == ctx) { 7071 return true; 7072 } 7073 7074 QLIST_FOREACH(c, &bs->parents, next_parent) { 7075 if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) { 7076 return false; 7077 } 7078 } 7079 QLIST_FOREACH(c, &bs->children, next) { 7080 if (!bdrv_child_can_set_aio_context(c, ctx, ignore, errp)) { 7081 return false; 7082 } 7083 } 7084 7085 return true; 7086 } 7087 7088 int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, 7089 BdrvChild *ignore_child, Error **errp) 7090 { 7091 GSList *ignore; 7092 bool ret; 7093 7094 ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; 7095 ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp); 7096 g_slist_free(ignore); 7097 7098 if (!ret) { 7099 return -EPERM; 7100 } 7101 7102 ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; 7103 bdrv_set_aio_context_ignore(bs, ctx, &ignore); 7104 g_slist_free(ignore); 7105 7106 return 0; 7107 } 7108 7109 int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, 7110 Error **errp) 7111 { 7112 return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp); 7113 } 7114 7115 void bdrv_add_aio_context_notifier(BlockDriverState *bs, 7116 void (*attached_aio_context)(AioContext *new_context, void *opaque), 7117 void (*detach_aio_context)(void *opaque), void *opaque) 7118 { 7119 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 7120 *ban = (BdrvAioNotifier){ 7121 .attached_aio_context = attached_aio_context, 7122 .detach_aio_context = detach_aio_context, 7123 .opaque = opaque 7124 }; 7125 7126 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 7127 } 7128 7129 void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 7130 void (*attached_aio_context)(AioContext *, 7131 void *), 7132 void (*detach_aio_context)(void *), 7133 void *opaque) 7134 { 7135 BdrvAioNotifier *ban, *ban_next; 7136 7137 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 7138 if (ban->attached_aio_context == attached_aio_context && 7139 ban->detach_aio_context == detach_aio_context && 7140 ban->opaque == opaque && 7141 ban->deleted == false) 7142 { 7143 if (bs->walking_aio_notifiers) { 7144 ban->deleted = true; 7145 } else { 7146 bdrv_do_remove_aio_context_notifier(ban); 7147 } 7148 return; 7149 } 7150 } 7151 7152 abort(); 7153 } 7154 7155 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 7156 BlockDriverAmendStatusCB *status_cb, void *cb_opaque, 7157 bool force, 7158 Error **errp) 7159 { 7160 if (!bs->drv) { 7161 error_setg(errp, "Node is ejected"); 7162 return -ENOMEDIUM; 7163 } 7164 if (!bs->drv->bdrv_amend_options) { 7165 error_setg(errp, "Block driver '%s' does not support option amendment", 7166 bs->drv->format_name); 7167 return -ENOTSUP; 7168 } 7169 return bs->drv->bdrv_amend_options(bs, opts, status_cb, 7170 cb_opaque, force, errp); 7171 } 7172 7173 /* 7174 * This function checks whether the given @to_replace is allowed to be 7175 * replaced by a node that always shows the same data as @bs. This is 7176 * used for example to verify whether the mirror job can replace 7177 * @to_replace by the target mirrored from @bs. 7178 * To be replaceable, @bs and @to_replace may either be guaranteed to 7179 * always show the same data (because they are only connected through 7180 * filters), or some driver may allow replacing one of its children 7181 * because it can guarantee that this child's data is not visible at 7182 * all (for example, for dissenting quorum children that have no other 7183 * parents). 7184 */ 7185 bool bdrv_recurse_can_replace(BlockDriverState *bs, 7186 BlockDriverState *to_replace) 7187 { 7188 BlockDriverState *filtered; 7189 7190 if (!bs || !bs->drv) { 7191 return false; 7192 } 7193 7194 if (bs == to_replace) { 7195 return true; 7196 } 7197 7198 /* See what the driver can do */ 7199 if (bs->drv->bdrv_recurse_can_replace) { 7200 return bs->drv->bdrv_recurse_can_replace(bs, to_replace); 7201 } 7202 7203 /* For filters without an own implementation, we can recurse on our own */ 7204 filtered = bdrv_filter_bs(bs); 7205 if (filtered) { 7206 return bdrv_recurse_can_replace(filtered, to_replace); 7207 } 7208 7209 /* Safe default */ 7210 return false; 7211 } 7212 7213 /* 7214 * Check whether the given @node_name can be replaced by a node that 7215 * has the same data as @parent_bs. If so, return @node_name's BDS; 7216 * NULL otherwise. 7217 * 7218 * @node_name must be a (recursive) *child of @parent_bs (or this 7219 * function will return NULL). 7220 * 7221 * The result (whether the node can be replaced or not) is only valid 7222 * for as long as no graph or permission changes occur. 7223 */ 7224 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, 7225 const char *node_name, Error **errp) 7226 { 7227 BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 7228 AioContext *aio_context; 7229 7230 if (!to_replace_bs) { 7231 error_setg(errp, "Failed to find node with node-name='%s'", node_name); 7232 return NULL; 7233 } 7234 7235 aio_context = bdrv_get_aio_context(to_replace_bs); 7236 aio_context_acquire(aio_context); 7237 7238 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 7239 to_replace_bs = NULL; 7240 goto out; 7241 } 7242 7243 /* We don't want arbitrary node of the BDS chain to be replaced only the top 7244 * most non filter in order to prevent data corruption. 7245 * Another benefit is that this tests exclude backing files which are 7246 * blocked by the backing blockers. 7247 */ 7248 if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) { 7249 error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', " 7250 "because it cannot be guaranteed that doing so would not " 7251 "lead to an abrupt change of visible data", 7252 node_name, parent_bs->node_name); 7253 to_replace_bs = NULL; 7254 goto out; 7255 } 7256 7257 out: 7258 aio_context_release(aio_context); 7259 return to_replace_bs; 7260 } 7261 7262 /** 7263 * Iterates through the list of runtime option keys that are said to 7264 * be "strong" for a BDS. An option is called "strong" if it changes 7265 * a BDS's data. For example, the null block driver's "size" and 7266 * "read-zeroes" options are strong, but its "latency-ns" option is 7267 * not. 7268 * 7269 * If a key returned by this function ends with a dot, all options 7270 * starting with that prefix are strong. 7271 */ 7272 static const char *const *strong_options(BlockDriverState *bs, 7273 const char *const *curopt) 7274 { 7275 static const char *const global_options[] = { 7276 "driver", "filename", NULL 7277 }; 7278 7279 if (!curopt) { 7280 return &global_options[0]; 7281 } 7282 7283 curopt++; 7284 if (curopt == &global_options[ARRAY_SIZE(global_options) - 1] && bs->drv) { 7285 curopt = bs->drv->strong_runtime_opts; 7286 } 7287 7288 return (curopt && *curopt) ? curopt : NULL; 7289 } 7290 7291 /** 7292 * Copies all strong runtime options from bs->options to the given 7293 * QDict. The set of strong option keys is determined by invoking 7294 * strong_options(). 7295 * 7296 * Returns true iff any strong option was present in bs->options (and 7297 * thus copied to the target QDict) with the exception of "filename" 7298 * and "driver". The caller is expected to use this value to decide 7299 * whether the existence of strong options prevents the generation of 7300 * a plain filename. 7301 */ 7302 static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs) 7303 { 7304 bool found_any = false; 7305 const char *const *option_name = NULL; 7306 7307 if (!bs->drv) { 7308 return false; 7309 } 7310 7311 while ((option_name = strong_options(bs, option_name))) { 7312 bool option_given = false; 7313 7314 assert(strlen(*option_name) > 0); 7315 if ((*option_name)[strlen(*option_name) - 1] != '.') { 7316 QObject *entry = qdict_get(bs->options, *option_name); 7317 if (!entry) { 7318 continue; 7319 } 7320 7321 qdict_put_obj(d, *option_name, qobject_ref(entry)); 7322 option_given = true; 7323 } else { 7324 const QDictEntry *entry; 7325 for (entry = qdict_first(bs->options); entry; 7326 entry = qdict_next(bs->options, entry)) 7327 { 7328 if (strstart(qdict_entry_key(entry), *option_name, NULL)) { 7329 qdict_put_obj(d, qdict_entry_key(entry), 7330 qobject_ref(qdict_entry_value(entry))); 7331 option_given = true; 7332 } 7333 } 7334 } 7335 7336 /* While "driver" and "filename" need to be included in a JSON filename, 7337 * their existence does not prohibit generation of a plain filename. */ 7338 if (!found_any && option_given && 7339 strcmp(*option_name, "driver") && strcmp(*option_name, "filename")) 7340 { 7341 found_any = true; 7342 } 7343 } 7344 7345 if (!qdict_haskey(d, "driver")) { 7346 /* Drivers created with bdrv_new_open_driver() may not have a 7347 * @driver option. Add it here. */ 7348 qdict_put_str(d, "driver", bs->drv->format_name); 7349 } 7350 7351 return found_any; 7352 } 7353 7354 /* Note: This function may return false positives; it may return true 7355 * even if opening the backing file specified by bs's image header 7356 * would result in exactly bs->backing. */ 7357 bool bdrv_backing_overridden(BlockDriverState *bs) 7358 { 7359 if (bs->backing) { 7360 return strcmp(bs->auto_backing_file, 7361 bs->backing->bs->filename); 7362 } else { 7363 /* No backing BDS, so if the image header reports any backing 7364 * file, it must have been suppressed */ 7365 return bs->auto_backing_file[0] != '\0'; 7366 } 7367 } 7368 7369 /* Updates the following BDS fields: 7370 * - exact_filename: A filename which may be used for opening a block device 7371 * which (mostly) equals the given BDS (even without any 7372 * other options; so reading and writing must return the same 7373 * results, but caching etc. may be different) 7374 * - full_open_options: Options which, when given when opening a block device 7375 * (without a filename), result in a BDS (mostly) 7376 * equalling the given one 7377 * - filename: If exact_filename is set, it is copied here. Otherwise, 7378 * full_open_options is converted to a JSON object, prefixed with 7379 * "json:" (for use through the JSON pseudo protocol) and put here. 7380 */ 7381 void bdrv_refresh_filename(BlockDriverState *bs) 7382 { 7383 BlockDriver *drv = bs->drv; 7384 BdrvChild *child; 7385 BlockDriverState *primary_child_bs; 7386 QDict *opts; 7387 bool backing_overridden; 7388 bool generate_json_filename; /* Whether our default implementation should 7389 fill exact_filename (false) or not (true) */ 7390 7391 if (!drv) { 7392 return; 7393 } 7394 7395 /* This BDS's file name may depend on any of its children's file names, so 7396 * refresh those first */ 7397 QLIST_FOREACH(child, &bs->children, next) { 7398 bdrv_refresh_filename(child->bs); 7399 } 7400 7401 if (bs->implicit) { 7402 /* For implicit nodes, just copy everything from the single child */ 7403 child = QLIST_FIRST(&bs->children); 7404 assert(QLIST_NEXT(child, next) == NULL); 7405 7406 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), 7407 child->bs->exact_filename); 7408 pstrcpy(bs->filename, sizeof(bs->filename), child->bs->filename); 7409 7410 qobject_unref(bs->full_open_options); 7411 bs->full_open_options = qobject_ref(child->bs->full_open_options); 7412 7413 return; 7414 } 7415 7416 backing_overridden = bdrv_backing_overridden(bs); 7417 7418 if (bs->open_flags & BDRV_O_NO_IO) { 7419 /* Without I/O, the backing file does not change anything. 7420 * Therefore, in such a case (primarily qemu-img), we can 7421 * pretend the backing file has not been overridden even if 7422 * it technically has been. */ 7423 backing_overridden = false; 7424 } 7425 7426 /* Gather the options QDict */ 7427 opts = qdict_new(); 7428 generate_json_filename = append_strong_runtime_options(opts, bs); 7429 generate_json_filename |= backing_overridden; 7430 7431 if (drv->bdrv_gather_child_options) { 7432 /* Some block drivers may not want to present all of their children's 7433 * options, or name them differently from BdrvChild.name */ 7434 drv->bdrv_gather_child_options(bs, opts, backing_overridden); 7435 } else { 7436 QLIST_FOREACH(child, &bs->children, next) { 7437 if (child == bs->backing && !backing_overridden) { 7438 /* We can skip the backing BDS if it has not been overridden */ 7439 continue; 7440 } 7441 7442 qdict_put(opts, child->name, 7443 qobject_ref(child->bs->full_open_options)); 7444 } 7445 7446 if (backing_overridden && !bs->backing) { 7447 /* Force no backing file */ 7448 qdict_put_null(opts, "backing"); 7449 } 7450 } 7451 7452 qobject_unref(bs->full_open_options); 7453 bs->full_open_options = opts; 7454 7455 primary_child_bs = bdrv_primary_bs(bs); 7456 7457 if (drv->bdrv_refresh_filename) { 7458 /* Obsolete information is of no use here, so drop the old file name 7459 * information before refreshing it */ 7460 bs->exact_filename[0] = '\0'; 7461 7462 drv->bdrv_refresh_filename(bs); 7463 } else if (primary_child_bs) { 7464 /* 7465 * Try to reconstruct valid information from the underlying 7466 * file -- this only works for format nodes (filter nodes 7467 * cannot be probed and as such must be selected by the user 7468 * either through an options dict, or through a special 7469 * filename which the filter driver must construct in its 7470 * .bdrv_refresh_filename() implementation). 7471 */ 7472 7473 bs->exact_filename[0] = '\0'; 7474 7475 /* 7476 * We can use the underlying file's filename if: 7477 * - it has a filename, 7478 * - the current BDS is not a filter, 7479 * - the file is a protocol BDS, and 7480 * - opening that file (as this BDS's format) will automatically create 7481 * the BDS tree we have right now, that is: 7482 * - the user did not significantly change this BDS's behavior with 7483 * some explicit (strong) options 7484 * - no non-file child of this BDS has been overridden by the user 7485 * Both of these conditions are represented by generate_json_filename. 7486 */ 7487 if (primary_child_bs->exact_filename[0] && 7488 primary_child_bs->drv->bdrv_file_open && 7489 !drv->is_filter && !generate_json_filename) 7490 { 7491 strcpy(bs->exact_filename, primary_child_bs->exact_filename); 7492 } 7493 } 7494 7495 if (bs->exact_filename[0]) { 7496 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 7497 } else { 7498 GString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 7499 if (snprintf(bs->filename, sizeof(bs->filename), "json:%s", 7500 json->str) >= sizeof(bs->filename)) { 7501 /* Give user a hint if we truncated things. */ 7502 strcpy(bs->filename + sizeof(bs->filename) - 4, "..."); 7503 } 7504 g_string_free(json, true); 7505 } 7506 } 7507 7508 char *bdrv_dirname(BlockDriverState *bs, Error **errp) 7509 { 7510 BlockDriver *drv = bs->drv; 7511 BlockDriverState *child_bs; 7512 7513 if (!drv) { 7514 error_setg(errp, "Node '%s' is ejected", bs->node_name); 7515 return NULL; 7516 } 7517 7518 if (drv->bdrv_dirname) { 7519 return drv->bdrv_dirname(bs, errp); 7520 } 7521 7522 child_bs = bdrv_primary_bs(bs); 7523 if (child_bs) { 7524 return bdrv_dirname(child_bs, errp); 7525 } 7526 7527 bdrv_refresh_filename(bs); 7528 if (bs->exact_filename[0] != '\0') { 7529 return path_combine(bs->exact_filename, ""); 7530 } 7531 7532 error_setg(errp, "Cannot generate a base directory for %s nodes", 7533 drv->format_name); 7534 return NULL; 7535 } 7536 7537 /* 7538 * Hot add/remove a BDS's child. So the user can take a child offline when 7539 * it is broken and take a new child online 7540 */ 7541 void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, 7542 Error **errp) 7543 { 7544 7545 if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) { 7546 error_setg(errp, "The node %s does not support adding a child", 7547 bdrv_get_device_or_node_name(parent_bs)); 7548 return; 7549 } 7550 7551 if (!QLIST_EMPTY(&child_bs->parents)) { 7552 error_setg(errp, "The node %s already has a parent", 7553 child_bs->node_name); 7554 return; 7555 } 7556 7557 parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp); 7558 } 7559 7560 void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp) 7561 { 7562 BdrvChild *tmp; 7563 7564 if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) { 7565 error_setg(errp, "The node %s does not support removing a child", 7566 bdrv_get_device_or_node_name(parent_bs)); 7567 return; 7568 } 7569 7570 QLIST_FOREACH(tmp, &parent_bs->children, next) { 7571 if (tmp == child) { 7572 break; 7573 } 7574 } 7575 7576 if (!tmp) { 7577 error_setg(errp, "The node %s does not have a child named %s", 7578 bdrv_get_device_or_node_name(parent_bs), 7579 bdrv_get_device_or_node_name(child->bs)); 7580 return; 7581 } 7582 7583 parent_bs->drv->bdrv_del_child(parent_bs, child, errp); 7584 } 7585 7586 int bdrv_make_empty(BdrvChild *c, Error **errp) 7587 { 7588 BlockDriver *drv = c->bs->drv; 7589 int ret; 7590 7591 assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)); 7592 7593 if (!drv->bdrv_make_empty) { 7594 error_setg(errp, "%s does not support emptying nodes", 7595 drv->format_name); 7596 return -ENOTSUP; 7597 } 7598 7599 ret = drv->bdrv_make_empty(c->bs); 7600 if (ret < 0) { 7601 error_setg_errno(errp, -ret, "Failed to empty %s", 7602 c->bs->filename); 7603 return ret; 7604 } 7605 7606 return 0; 7607 } 7608 7609 /* 7610 * Return the child that @bs acts as an overlay for, and from which data may be 7611 * copied in COW or COR operations. Usually this is the backing file. 7612 */ 7613 BdrvChild *bdrv_cow_child(BlockDriverState *bs) 7614 { 7615 if (!bs || !bs->drv) { 7616 return NULL; 7617 } 7618 7619 if (bs->drv->is_filter) { 7620 return NULL; 7621 } 7622 7623 if (!bs->backing) { 7624 return NULL; 7625 } 7626 7627 assert(bs->backing->role & BDRV_CHILD_COW); 7628 return bs->backing; 7629 } 7630 7631 /* 7632 * If @bs acts as a filter for exactly one of its children, return 7633 * that child. 7634 */ 7635 BdrvChild *bdrv_filter_child(BlockDriverState *bs) 7636 { 7637 BdrvChild *c; 7638 7639 if (!bs || !bs->drv) { 7640 return NULL; 7641 } 7642 7643 if (!bs->drv->is_filter) { 7644 return NULL; 7645 } 7646 7647 /* Only one of @backing or @file may be used */ 7648 assert(!(bs->backing && bs->file)); 7649 7650 c = bs->backing ?: bs->file; 7651 if (!c) { 7652 return NULL; 7653 } 7654 7655 assert(c->role & BDRV_CHILD_FILTERED); 7656 return c; 7657 } 7658 7659 /* 7660 * Return either the result of bdrv_cow_child() or bdrv_filter_child(), 7661 * whichever is non-NULL. 7662 * 7663 * Return NULL if both are NULL. 7664 */ 7665 BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs) 7666 { 7667 BdrvChild *cow_child = bdrv_cow_child(bs); 7668 BdrvChild *filter_child = bdrv_filter_child(bs); 7669 7670 /* Filter nodes cannot have COW backing files */ 7671 assert(!(cow_child && filter_child)); 7672 7673 return cow_child ?: filter_child; 7674 } 7675 7676 /* 7677 * Return the primary child of this node: For filters, that is the 7678 * filtered child. For other nodes, that is usually the child storing 7679 * metadata. 7680 * (A generally more helpful description is that this is (usually) the 7681 * child that has the same filename as @bs.) 7682 * 7683 * Drivers do not necessarily have a primary child; for example quorum 7684 * does not. 7685 */ 7686 BdrvChild *bdrv_primary_child(BlockDriverState *bs) 7687 { 7688 BdrvChild *c, *found = NULL; 7689 7690 QLIST_FOREACH(c, &bs->children, next) { 7691 if (c->role & BDRV_CHILD_PRIMARY) { 7692 assert(!found); 7693 found = c; 7694 } 7695 } 7696 7697 return found; 7698 } 7699 7700 static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs, 7701 bool stop_on_explicit_filter) 7702 { 7703 BdrvChild *c; 7704 7705 if (!bs) { 7706 return NULL; 7707 } 7708 7709 while (!(stop_on_explicit_filter && !bs->implicit)) { 7710 c = bdrv_filter_child(bs); 7711 if (!c) { 7712 /* 7713 * A filter that is embedded in a working block graph must 7714 * have a child. Assert this here so this function does 7715 * not return a filter node that is not expected by the 7716 * caller. 7717 */ 7718 assert(!bs->drv || !bs->drv->is_filter); 7719 break; 7720 } 7721 bs = c->bs; 7722 } 7723 /* 7724 * Note that this treats nodes with bs->drv == NULL as not being 7725 * filters (bs->drv == NULL should be replaced by something else 7726 * anyway). 7727 * The advantage of this behavior is that this function will thus 7728 * always return a non-NULL value (given a non-NULL @bs). 7729 */ 7730 7731 return bs; 7732 } 7733 7734 /* 7735 * Return the first BDS that has not been added implicitly or that 7736 * does not have a filtered child down the chain starting from @bs 7737 * (including @bs itself). 7738 */ 7739 BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs) 7740 { 7741 return bdrv_do_skip_filters(bs, true); 7742 } 7743 7744 /* 7745 * Return the first BDS that does not have a filtered child down the 7746 * chain starting from @bs (including @bs itself). 7747 */ 7748 BlockDriverState *bdrv_skip_filters(BlockDriverState *bs) 7749 { 7750 return bdrv_do_skip_filters(bs, false); 7751 } 7752 7753 /* 7754 * For a backing chain, return the first non-filter backing image of 7755 * the first non-filter image. 7756 */ 7757 BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs) 7758 { 7759 return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs))); 7760 } 7761 7762 /** 7763 * Check whether [offset, offset + bytes) overlaps with the cached 7764 * block-status data region. 7765 * 7766 * If so, and @pnum is not NULL, set *pnum to `bsc.data_end - offset`, 7767 * which is what bdrv_bsc_is_data()'s interface needs. 7768 * Otherwise, *pnum is not touched. 7769 */ 7770 static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs, 7771 int64_t offset, int64_t bytes, 7772 int64_t *pnum) 7773 { 7774 BdrvBlockStatusCache *bsc = qatomic_rcu_read(&bs->block_status_cache); 7775 bool overlaps; 7776 7777 overlaps = 7778 qatomic_read(&bsc->valid) && 7779 ranges_overlap(offset, bytes, bsc->data_start, 7780 bsc->data_end - bsc->data_start); 7781 7782 if (overlaps && pnum) { 7783 *pnum = bsc->data_end - offset; 7784 } 7785 7786 return overlaps; 7787 } 7788 7789 /** 7790 * See block_int.h for this function's documentation. 7791 */ 7792 bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum) 7793 { 7794 RCU_READ_LOCK_GUARD(); 7795 7796 return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum); 7797 } 7798 7799 /** 7800 * See block_int.h for this function's documentation. 7801 */ 7802 void bdrv_bsc_invalidate_range(BlockDriverState *bs, 7803 int64_t offset, int64_t bytes) 7804 { 7805 RCU_READ_LOCK_GUARD(); 7806 7807 if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) { 7808 qatomic_set(&bs->block_status_cache->valid, false); 7809 } 7810 } 7811 7812 /** 7813 * See block_int.h for this function's documentation. 7814 */ 7815 void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes) 7816 { 7817 BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1); 7818 BdrvBlockStatusCache *old_bsc; 7819 7820 *new_bsc = (BdrvBlockStatusCache) { 7821 .valid = true, 7822 .data_start = offset, 7823 .data_end = offset + bytes, 7824 }; 7825 7826 QEMU_LOCK_GUARD(&bs->bsc_modify_lock); 7827 7828 old_bsc = qatomic_rcu_read(&bs->block_status_cache); 7829 qatomic_rcu_set(&bs->block_status_cache, new_bsc); 7830 if (old_bsc) { 7831 g_free_rcu(old_bsc, rcu); 7832 } 7833 } 7834