1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2020 Virtuozzo International GmbH. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "block/trace.h" 28 #include "block/block_int.h" 29 #include "block/blockjob.h" 30 #include "block/fuse.h" 31 #include "block/nbd.h" 32 #include "block/qdict.h" 33 #include "qemu/error-report.h" 34 #include "block/module_block.h" 35 #include "qemu/main-loop.h" 36 #include "qemu/module.h" 37 #include "qapi/error.h" 38 #include "qapi/qmp/qdict.h" 39 #include "qapi/qmp/qjson.h" 40 #include "qapi/qmp/qnull.h" 41 #include "qapi/qmp/qstring.h" 42 #include "qapi/qobject-output-visitor.h" 43 #include "qapi/qapi-visit-block-core.h" 44 #include "sysemu/block-backend.h" 45 #include "qemu/notify.h" 46 #include "qemu/option.h" 47 #include "qemu/coroutine.h" 48 #include "block/qapi.h" 49 #include "qemu/timer.h" 50 #include "qemu/cutils.h" 51 #include "qemu/id.h" 52 #include "block/coroutines.h" 53 54 #ifdef CONFIG_BSD 55 #include <sys/ioctl.h> 56 #include <sys/queue.h> 57 #ifndef __DragonFly__ 58 #include <sys/disk.h> 59 #endif 60 #endif 61 62 #ifdef _WIN32 63 #include <windows.h> 64 #endif 65 66 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 67 68 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 69 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 70 71 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states = 72 QTAILQ_HEAD_INITIALIZER(all_bdrv_states); 73 74 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 75 QLIST_HEAD_INITIALIZER(bdrv_drivers); 76 77 static BlockDriverState *bdrv_open_inherit(const char *filename, 78 const char *reference, 79 QDict *options, int flags, 80 BlockDriverState *parent, 81 const BdrvChildClass *child_class, 82 BdrvChildRole child_role, 83 Error **errp); 84 85 static void bdrv_replace_child_noperm(BdrvChild *child, 86 BlockDriverState *new_bs); 87 static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, 88 Transaction *tran); 89 90 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, 91 BlockReopenQueue *queue, 92 Transaction *set_backings_tran, Error **errp); 93 static void bdrv_reopen_commit(BDRVReopenState *reopen_state); 94 static void bdrv_reopen_abort(BDRVReopenState *reopen_state); 95 96 /* If non-zero, use only whitelisted block drivers */ 97 static int use_bdrv_whitelist; 98 99 #ifdef _WIN32 100 static int is_windows_drive_prefix(const char *filename) 101 { 102 return (((filename[0] >= 'a' && filename[0] <= 'z') || 103 (filename[0] >= 'A' && filename[0] <= 'Z')) && 104 filename[1] == ':'); 105 } 106 107 int is_windows_drive(const char *filename) 108 { 109 if (is_windows_drive_prefix(filename) && 110 filename[2] == '\0') 111 return 1; 112 if (strstart(filename, "\\\\.\\", NULL) || 113 strstart(filename, "//./", NULL)) 114 return 1; 115 return 0; 116 } 117 #endif 118 119 size_t bdrv_opt_mem_align(BlockDriverState *bs) 120 { 121 if (!bs || !bs->drv) { 122 /* page size or 4k (hdd sector size) should be on the safe side */ 123 return MAX(4096, qemu_real_host_page_size); 124 } 125 126 return bs->bl.opt_mem_alignment; 127 } 128 129 size_t bdrv_min_mem_align(BlockDriverState *bs) 130 { 131 if (!bs || !bs->drv) { 132 /* page size or 4k (hdd sector size) should be on the safe side */ 133 return MAX(4096, qemu_real_host_page_size); 134 } 135 136 return bs->bl.min_mem_alignment; 137 } 138 139 /* check if the path starts with "<protocol>:" */ 140 int path_has_protocol(const char *path) 141 { 142 const char *p; 143 144 #ifdef _WIN32 145 if (is_windows_drive(path) || 146 is_windows_drive_prefix(path)) { 147 return 0; 148 } 149 p = path + strcspn(path, ":/\\"); 150 #else 151 p = path + strcspn(path, ":/"); 152 #endif 153 154 return *p == ':'; 155 } 156 157 int path_is_absolute(const char *path) 158 { 159 #ifdef _WIN32 160 /* specific case for names like: "\\.\d:" */ 161 if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 162 return 1; 163 } 164 return (*path == '/' || *path == '\\'); 165 #else 166 return (*path == '/'); 167 #endif 168 } 169 170 /* if filename is absolute, just return its duplicate. Otherwise, build a 171 path to it by considering it is relative to base_path. URL are 172 supported. */ 173 char *path_combine(const char *base_path, const char *filename) 174 { 175 const char *protocol_stripped = NULL; 176 const char *p, *p1; 177 char *result; 178 int len; 179 180 if (path_is_absolute(filename)) { 181 return g_strdup(filename); 182 } 183 184 if (path_has_protocol(base_path)) { 185 protocol_stripped = strchr(base_path, ':'); 186 if (protocol_stripped) { 187 protocol_stripped++; 188 } 189 } 190 p = protocol_stripped ?: base_path; 191 192 p1 = strrchr(base_path, '/'); 193 #ifdef _WIN32 194 { 195 const char *p2; 196 p2 = strrchr(base_path, '\\'); 197 if (!p1 || p2 > p1) { 198 p1 = p2; 199 } 200 } 201 #endif 202 if (p1) { 203 p1++; 204 } else { 205 p1 = base_path; 206 } 207 if (p1 > p) { 208 p = p1; 209 } 210 len = p - base_path; 211 212 result = g_malloc(len + strlen(filename) + 1); 213 memcpy(result, base_path, len); 214 strcpy(result + len, filename); 215 216 return result; 217 } 218 219 /* 220 * Helper function for bdrv_parse_filename() implementations to remove optional 221 * protocol prefixes (especially "file:") from a filename and for putting the 222 * stripped filename into the options QDict if there is such a prefix. 223 */ 224 void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, 225 QDict *options) 226 { 227 if (strstart(filename, prefix, &filename)) { 228 /* Stripping the explicit protocol prefix may result in a protocol 229 * prefix being (wrongly) detected (if the filename contains a colon) */ 230 if (path_has_protocol(filename)) { 231 GString *fat_filename; 232 233 /* This means there is some colon before the first slash; therefore, 234 * this cannot be an absolute path */ 235 assert(!path_is_absolute(filename)); 236 237 /* And we can thus fix the protocol detection issue by prefixing it 238 * by "./" */ 239 fat_filename = g_string_new("./"); 240 g_string_append(fat_filename, filename); 241 242 assert(!path_has_protocol(fat_filename->str)); 243 244 qdict_put(options, "filename", 245 qstring_from_gstring(fat_filename)); 246 } else { 247 /* If no protocol prefix was detected, we can use the shortened 248 * filename as-is */ 249 qdict_put_str(options, "filename", filename); 250 } 251 } 252 } 253 254 255 /* Returns whether the image file is opened as read-only. Note that this can 256 * return false and writing to the image file is still not possible because the 257 * image is inactivated. */ 258 bool bdrv_is_read_only(BlockDriverState *bs) 259 { 260 return !(bs->open_flags & BDRV_O_RDWR); 261 } 262 263 int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, 264 bool ignore_allow_rdw, Error **errp) 265 { 266 /* Do not set read_only if copy_on_read is enabled */ 267 if (bs->copy_on_read && read_only) { 268 error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled", 269 bdrv_get_device_or_node_name(bs)); 270 return -EINVAL; 271 } 272 273 /* Do not clear read_only if it is prohibited */ 274 if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) && 275 !ignore_allow_rdw) 276 { 277 error_setg(errp, "Node '%s' is read only", 278 bdrv_get_device_or_node_name(bs)); 279 return -EPERM; 280 } 281 282 return 0; 283 } 284 285 /* 286 * Called by a driver that can only provide a read-only image. 287 * 288 * Returns 0 if the node is already read-only or it could switch the node to 289 * read-only because BDRV_O_AUTO_RDONLY is set. 290 * 291 * Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set 292 * or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg 293 * is not NULL, it is used as the error message for the Error object. 294 */ 295 int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, 296 Error **errp) 297 { 298 int ret = 0; 299 300 if (!(bs->open_flags & BDRV_O_RDWR)) { 301 return 0; 302 } 303 if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) { 304 goto fail; 305 } 306 307 ret = bdrv_can_set_read_only(bs, true, false, NULL); 308 if (ret < 0) { 309 goto fail; 310 } 311 312 bs->open_flags &= ~BDRV_O_RDWR; 313 314 return 0; 315 316 fail: 317 error_setg(errp, "%s", errmsg ?: "Image is read-only"); 318 return -EACCES; 319 } 320 321 /* 322 * If @backing is empty, this function returns NULL without setting 323 * @errp. In all other cases, NULL will only be returned with @errp 324 * set. 325 * 326 * Therefore, a return value of NULL without @errp set means that 327 * there is no backing file; if @errp is set, there is one but its 328 * absolute filename cannot be generated. 329 */ 330 char *bdrv_get_full_backing_filename_from_filename(const char *backed, 331 const char *backing, 332 Error **errp) 333 { 334 if (backing[0] == '\0') { 335 return NULL; 336 } else if (path_has_protocol(backing) || path_is_absolute(backing)) { 337 return g_strdup(backing); 338 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 339 error_setg(errp, "Cannot use relative backing file names for '%s'", 340 backed); 341 return NULL; 342 } else { 343 return path_combine(backed, backing); 344 } 345 } 346 347 /* 348 * If @filename is empty or NULL, this function returns NULL without 349 * setting @errp. In all other cases, NULL will only be returned with 350 * @errp set. 351 */ 352 static char *bdrv_make_absolute_filename(BlockDriverState *relative_to, 353 const char *filename, Error **errp) 354 { 355 char *dir, *full_name; 356 357 if (!filename || filename[0] == '\0') { 358 return NULL; 359 } else if (path_has_protocol(filename) || path_is_absolute(filename)) { 360 return g_strdup(filename); 361 } 362 363 dir = bdrv_dirname(relative_to, errp); 364 if (!dir) { 365 return NULL; 366 } 367 368 full_name = g_strconcat(dir, filename, NULL); 369 g_free(dir); 370 return full_name; 371 } 372 373 char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp) 374 { 375 return bdrv_make_absolute_filename(bs, bs->backing_file, errp); 376 } 377 378 void bdrv_register(BlockDriver *bdrv) 379 { 380 assert(bdrv->format_name); 381 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 382 } 383 384 BlockDriverState *bdrv_new(void) 385 { 386 BlockDriverState *bs; 387 int i; 388 389 bs = g_new0(BlockDriverState, 1); 390 QLIST_INIT(&bs->dirty_bitmaps); 391 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 392 QLIST_INIT(&bs->op_blockers[i]); 393 } 394 qemu_co_mutex_init(&bs->reqs_lock); 395 qemu_mutex_init(&bs->dirty_bitmap_mutex); 396 bs->refcnt = 1; 397 bs->aio_context = qemu_get_aio_context(); 398 399 qemu_co_queue_init(&bs->flush_queue); 400 401 for (i = 0; i < bdrv_drain_all_count; i++) { 402 bdrv_drained_begin(bs); 403 } 404 405 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list); 406 407 return bs; 408 } 409 410 static BlockDriver *bdrv_do_find_format(const char *format_name) 411 { 412 BlockDriver *drv1; 413 414 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 415 if (!strcmp(drv1->format_name, format_name)) { 416 return drv1; 417 } 418 } 419 420 return NULL; 421 } 422 423 BlockDriver *bdrv_find_format(const char *format_name) 424 { 425 BlockDriver *drv1; 426 int i; 427 428 drv1 = bdrv_do_find_format(format_name); 429 if (drv1) { 430 return drv1; 431 } 432 433 /* The driver isn't registered, maybe we need to load a module */ 434 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { 435 if (!strcmp(block_driver_modules[i].format_name, format_name)) { 436 block_module_load_one(block_driver_modules[i].library_name); 437 break; 438 } 439 } 440 441 return bdrv_do_find_format(format_name); 442 } 443 444 static int bdrv_format_is_whitelisted(const char *format_name, bool read_only) 445 { 446 static const char *whitelist_rw[] = { 447 CONFIG_BDRV_RW_WHITELIST 448 NULL 449 }; 450 static const char *whitelist_ro[] = { 451 CONFIG_BDRV_RO_WHITELIST 452 NULL 453 }; 454 const char **p; 455 456 if (!whitelist_rw[0] && !whitelist_ro[0]) { 457 return 1; /* no whitelist, anything goes */ 458 } 459 460 for (p = whitelist_rw; *p; p++) { 461 if (!strcmp(format_name, *p)) { 462 return 1; 463 } 464 } 465 if (read_only) { 466 for (p = whitelist_ro; *p; p++) { 467 if (!strcmp(format_name, *p)) { 468 return 1; 469 } 470 } 471 } 472 return 0; 473 } 474 475 int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 476 { 477 return bdrv_format_is_whitelisted(drv->format_name, read_only); 478 } 479 480 bool bdrv_uses_whitelist(void) 481 { 482 return use_bdrv_whitelist; 483 } 484 485 typedef struct CreateCo { 486 BlockDriver *drv; 487 char *filename; 488 QemuOpts *opts; 489 int ret; 490 Error *err; 491 } CreateCo; 492 493 static void coroutine_fn bdrv_create_co_entry(void *opaque) 494 { 495 Error *local_err = NULL; 496 int ret; 497 498 CreateCo *cco = opaque; 499 assert(cco->drv); 500 501 ret = cco->drv->bdrv_co_create_opts(cco->drv, 502 cco->filename, cco->opts, &local_err); 503 error_propagate(&cco->err, local_err); 504 cco->ret = ret; 505 } 506 507 int bdrv_create(BlockDriver *drv, const char* filename, 508 QemuOpts *opts, Error **errp) 509 { 510 int ret; 511 512 Coroutine *co; 513 CreateCo cco = { 514 .drv = drv, 515 .filename = g_strdup(filename), 516 .opts = opts, 517 .ret = NOT_DONE, 518 .err = NULL, 519 }; 520 521 if (!drv->bdrv_co_create_opts) { 522 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 523 ret = -ENOTSUP; 524 goto out; 525 } 526 527 if (qemu_in_coroutine()) { 528 /* Fast-path if already in coroutine context */ 529 bdrv_create_co_entry(&cco); 530 } else { 531 co = qemu_coroutine_create(bdrv_create_co_entry, &cco); 532 qemu_coroutine_enter(co); 533 while (cco.ret == NOT_DONE) { 534 aio_poll(qemu_get_aio_context(), true); 535 } 536 } 537 538 ret = cco.ret; 539 if (ret < 0) { 540 if (cco.err) { 541 error_propagate(errp, cco.err); 542 } else { 543 error_setg_errno(errp, -ret, "Could not create image"); 544 } 545 } 546 547 out: 548 g_free(cco.filename); 549 return ret; 550 } 551 552 /** 553 * Helper function for bdrv_create_file_fallback(): Resize @blk to at 554 * least the given @minimum_size. 555 * 556 * On success, return @blk's actual length. 557 * Otherwise, return -errno. 558 */ 559 static int64_t create_file_fallback_truncate(BlockBackend *blk, 560 int64_t minimum_size, Error **errp) 561 { 562 Error *local_err = NULL; 563 int64_t size; 564 int ret; 565 566 ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, 567 &local_err); 568 if (ret < 0 && ret != -ENOTSUP) { 569 error_propagate(errp, local_err); 570 return ret; 571 } 572 573 size = blk_getlength(blk); 574 if (size < 0) { 575 error_free(local_err); 576 error_setg_errno(errp, -size, 577 "Failed to inquire the new image file's length"); 578 return size; 579 } 580 581 if (size < minimum_size) { 582 /* Need to grow the image, but we failed to do that */ 583 error_propagate(errp, local_err); 584 return -ENOTSUP; 585 } 586 587 error_free(local_err); 588 local_err = NULL; 589 590 return size; 591 } 592 593 /** 594 * Helper function for bdrv_create_file_fallback(): Zero the first 595 * sector to remove any potentially pre-existing image header. 596 */ 597 static int create_file_fallback_zero_first_sector(BlockBackend *blk, 598 int64_t current_size, 599 Error **errp) 600 { 601 int64_t bytes_to_clear; 602 int ret; 603 604 bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); 605 if (bytes_to_clear) { 606 ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); 607 if (ret < 0) { 608 error_setg_errno(errp, -ret, 609 "Failed to clear the new image's first sector"); 610 return ret; 611 } 612 } 613 614 return 0; 615 } 616 617 /** 618 * Simple implementation of bdrv_co_create_opts for protocol drivers 619 * which only support creation via opening a file 620 * (usually existing raw storage device) 621 */ 622 int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, 623 const char *filename, 624 QemuOpts *opts, 625 Error **errp) 626 { 627 BlockBackend *blk; 628 QDict *options; 629 int64_t size = 0; 630 char *buf = NULL; 631 PreallocMode prealloc; 632 Error *local_err = NULL; 633 int ret; 634 635 size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); 636 buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); 637 prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, 638 PREALLOC_MODE_OFF, &local_err); 639 g_free(buf); 640 if (local_err) { 641 error_propagate(errp, local_err); 642 return -EINVAL; 643 } 644 645 if (prealloc != PREALLOC_MODE_OFF) { 646 error_setg(errp, "Unsupported preallocation mode '%s'", 647 PreallocMode_str(prealloc)); 648 return -ENOTSUP; 649 } 650 651 options = qdict_new(); 652 qdict_put_str(options, "driver", drv->format_name); 653 654 blk = blk_new_open(filename, NULL, options, 655 BDRV_O_RDWR | BDRV_O_RESIZE, errp); 656 if (!blk) { 657 error_prepend(errp, "Protocol driver '%s' does not support image " 658 "creation, and opening the image failed: ", 659 drv->format_name); 660 return -EINVAL; 661 } 662 663 size = create_file_fallback_truncate(blk, size, errp); 664 if (size < 0) { 665 ret = size; 666 goto out; 667 } 668 669 ret = create_file_fallback_zero_first_sector(blk, size, errp); 670 if (ret < 0) { 671 goto out; 672 } 673 674 ret = 0; 675 out: 676 blk_unref(blk); 677 return ret; 678 } 679 680 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 681 { 682 QemuOpts *protocol_opts; 683 BlockDriver *drv; 684 QDict *qdict; 685 int ret; 686 687 drv = bdrv_find_protocol(filename, true, errp); 688 if (drv == NULL) { 689 return -ENOENT; 690 } 691 692 if (!drv->create_opts) { 693 error_setg(errp, "Driver '%s' does not support image creation", 694 drv->format_name); 695 return -ENOTSUP; 696 } 697 698 /* 699 * 'opts' contains a QemuOptsList with a combination of format and protocol 700 * default values. 701 * 702 * The format properly removes its options, but the default values remain 703 * in 'opts->list'. So if the protocol has options with the same name 704 * (e.g. rbd has 'cluster_size' as qcow2), it will see the default values 705 * of the format, since for overlapping options, the format wins. 706 * 707 * To avoid this issue, lets convert QemuOpts to QDict, in this way we take 708 * only the set options, and then convert it back to QemuOpts, using the 709 * create_opts of the protocol. So the new QemuOpts, will contain only the 710 * protocol defaults. 711 */ 712 qdict = qemu_opts_to_qdict(opts, NULL); 713 protocol_opts = qemu_opts_from_qdict(drv->create_opts, qdict, errp); 714 if (protocol_opts == NULL) { 715 ret = -EINVAL; 716 goto out; 717 } 718 719 ret = bdrv_create(drv, filename, protocol_opts, errp); 720 out: 721 qemu_opts_del(protocol_opts); 722 qobject_unref(qdict); 723 return ret; 724 } 725 726 int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp) 727 { 728 Error *local_err = NULL; 729 int ret; 730 731 assert(bs != NULL); 732 733 if (!bs->drv) { 734 error_setg(errp, "Block node '%s' is not opened", bs->filename); 735 return -ENOMEDIUM; 736 } 737 738 if (!bs->drv->bdrv_co_delete_file) { 739 error_setg(errp, "Driver '%s' does not support image deletion", 740 bs->drv->format_name); 741 return -ENOTSUP; 742 } 743 744 ret = bs->drv->bdrv_co_delete_file(bs, &local_err); 745 if (ret < 0) { 746 error_propagate(errp, local_err); 747 } 748 749 return ret; 750 } 751 752 void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs) 753 { 754 Error *local_err = NULL; 755 int ret; 756 757 if (!bs) { 758 return; 759 } 760 761 ret = bdrv_co_delete_file(bs, &local_err); 762 /* 763 * ENOTSUP will happen if the block driver doesn't support 764 * the 'bdrv_co_delete_file' interface. This is a predictable 765 * scenario and shouldn't be reported back to the user. 766 */ 767 if (ret == -ENOTSUP) { 768 error_free(local_err); 769 } else if (ret < 0) { 770 error_report_err(local_err); 771 } 772 } 773 774 /** 775 * Try to get @bs's logical and physical block size. 776 * On success, store them in @bsz struct and return 0. 777 * On failure return -errno. 778 * @bs must not be empty. 779 */ 780 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 781 { 782 BlockDriver *drv = bs->drv; 783 BlockDriverState *filtered = bdrv_filter_bs(bs); 784 785 if (drv && drv->bdrv_probe_blocksizes) { 786 return drv->bdrv_probe_blocksizes(bs, bsz); 787 } else if (filtered) { 788 return bdrv_probe_blocksizes(filtered, bsz); 789 } 790 791 return -ENOTSUP; 792 } 793 794 /** 795 * Try to get @bs's geometry (cyls, heads, sectors). 796 * On success, store them in @geo struct and return 0. 797 * On failure return -errno. 798 * @bs must not be empty. 799 */ 800 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 801 { 802 BlockDriver *drv = bs->drv; 803 BlockDriverState *filtered = bdrv_filter_bs(bs); 804 805 if (drv && drv->bdrv_probe_geometry) { 806 return drv->bdrv_probe_geometry(bs, geo); 807 } else if (filtered) { 808 return bdrv_probe_geometry(filtered, geo); 809 } 810 811 return -ENOTSUP; 812 } 813 814 /* 815 * Create a uniquely-named empty temporary file. 816 * Return 0 upon success, otherwise a negative errno value. 817 */ 818 int get_tmp_filename(char *filename, int size) 819 { 820 #ifdef _WIN32 821 char temp_dir[MAX_PATH]; 822 /* GetTempFileName requires that its output buffer (4th param) 823 have length MAX_PATH or greater. */ 824 assert(size >= MAX_PATH); 825 return (GetTempPath(MAX_PATH, temp_dir) 826 && GetTempFileName(temp_dir, "qem", 0, filename) 827 ? 0 : -GetLastError()); 828 #else 829 int fd; 830 const char *tmpdir; 831 tmpdir = getenv("TMPDIR"); 832 if (!tmpdir) { 833 tmpdir = "/var/tmp"; 834 } 835 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 836 return -EOVERFLOW; 837 } 838 fd = mkstemp(filename); 839 if (fd < 0) { 840 return -errno; 841 } 842 if (close(fd) != 0) { 843 unlink(filename); 844 return -errno; 845 } 846 return 0; 847 #endif 848 } 849 850 /* 851 * Detect host devices. By convention, /dev/cdrom[N] is always 852 * recognized as a host CDROM. 853 */ 854 static BlockDriver *find_hdev_driver(const char *filename) 855 { 856 int score_max = 0, score; 857 BlockDriver *drv = NULL, *d; 858 859 QLIST_FOREACH(d, &bdrv_drivers, list) { 860 if (d->bdrv_probe_device) { 861 score = d->bdrv_probe_device(filename); 862 if (score > score_max) { 863 score_max = score; 864 drv = d; 865 } 866 } 867 } 868 869 return drv; 870 } 871 872 static BlockDriver *bdrv_do_find_protocol(const char *protocol) 873 { 874 BlockDriver *drv1; 875 876 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 877 if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) { 878 return drv1; 879 } 880 } 881 882 return NULL; 883 } 884 885 BlockDriver *bdrv_find_protocol(const char *filename, 886 bool allow_protocol_prefix, 887 Error **errp) 888 { 889 BlockDriver *drv1; 890 char protocol[128]; 891 int len; 892 const char *p; 893 int i; 894 895 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 896 897 /* 898 * XXX(hch): we really should not let host device detection 899 * override an explicit protocol specification, but moving this 900 * later breaks access to device names with colons in them. 901 * Thanks to the brain-dead persistent naming schemes on udev- 902 * based Linux systems those actually are quite common. 903 */ 904 drv1 = find_hdev_driver(filename); 905 if (drv1) { 906 return drv1; 907 } 908 909 if (!path_has_protocol(filename) || !allow_protocol_prefix) { 910 return &bdrv_file; 911 } 912 913 p = strchr(filename, ':'); 914 assert(p != NULL); 915 len = p - filename; 916 if (len > sizeof(protocol) - 1) 917 len = sizeof(protocol) - 1; 918 memcpy(protocol, filename, len); 919 protocol[len] = '\0'; 920 921 drv1 = bdrv_do_find_protocol(protocol); 922 if (drv1) { 923 return drv1; 924 } 925 926 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { 927 if (block_driver_modules[i].protocol_name && 928 !strcmp(block_driver_modules[i].protocol_name, protocol)) { 929 block_module_load_one(block_driver_modules[i].library_name); 930 break; 931 } 932 } 933 934 drv1 = bdrv_do_find_protocol(protocol); 935 if (!drv1) { 936 error_setg(errp, "Unknown protocol '%s'", protocol); 937 } 938 return drv1; 939 } 940 941 /* 942 * Guess image format by probing its contents. 943 * This is not a good idea when your image is raw (CVE-2008-2004), but 944 * we do it anyway for backward compatibility. 945 * 946 * @buf contains the image's first @buf_size bytes. 947 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 948 * but can be smaller if the image file is smaller) 949 * @filename is its filename. 950 * 951 * For all block drivers, call the bdrv_probe() method to get its 952 * probing score. 953 * Return the first block driver with the highest probing score. 954 */ 955 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 956 const char *filename) 957 { 958 int score_max = 0, score; 959 BlockDriver *drv = NULL, *d; 960 961 QLIST_FOREACH(d, &bdrv_drivers, list) { 962 if (d->bdrv_probe) { 963 score = d->bdrv_probe(buf, buf_size, filename); 964 if (score > score_max) { 965 score_max = score; 966 drv = d; 967 } 968 } 969 } 970 971 return drv; 972 } 973 974 static int find_image_format(BlockBackend *file, const char *filename, 975 BlockDriver **pdrv, Error **errp) 976 { 977 BlockDriver *drv; 978 uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 979 int ret = 0; 980 981 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 982 if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) { 983 *pdrv = &bdrv_raw; 984 return ret; 985 } 986 987 ret = blk_pread(file, 0, buf, sizeof(buf)); 988 if (ret < 0) { 989 error_setg_errno(errp, -ret, "Could not read image for determining its " 990 "format"); 991 *pdrv = NULL; 992 return ret; 993 } 994 995 drv = bdrv_probe_all(buf, ret, filename); 996 if (!drv) { 997 error_setg(errp, "Could not determine image format: No compatible " 998 "driver found"); 999 ret = -ENOENT; 1000 } 1001 *pdrv = drv; 1002 return ret; 1003 } 1004 1005 /** 1006 * Set the current 'total_sectors' value 1007 * Return 0 on success, -errno on error. 1008 */ 1009 int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 1010 { 1011 BlockDriver *drv = bs->drv; 1012 1013 if (!drv) { 1014 return -ENOMEDIUM; 1015 } 1016 1017 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 1018 if (bdrv_is_sg(bs)) 1019 return 0; 1020 1021 /* query actual device if possible, otherwise just trust the hint */ 1022 if (drv->bdrv_getlength) { 1023 int64_t length = drv->bdrv_getlength(bs); 1024 if (length < 0) { 1025 return length; 1026 } 1027 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 1028 } 1029 1030 bs->total_sectors = hint; 1031 1032 if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) { 1033 return -EFBIG; 1034 } 1035 1036 return 0; 1037 } 1038 1039 /** 1040 * Combines a QDict of new block driver @options with any missing options taken 1041 * from @old_options, so that leaving out an option defaults to its old value. 1042 */ 1043 static void bdrv_join_options(BlockDriverState *bs, QDict *options, 1044 QDict *old_options) 1045 { 1046 if (bs->drv && bs->drv->bdrv_join_options) { 1047 bs->drv->bdrv_join_options(options, old_options); 1048 } else { 1049 qdict_join(options, old_options, false); 1050 } 1051 } 1052 1053 static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts, 1054 int open_flags, 1055 Error **errp) 1056 { 1057 Error *local_err = NULL; 1058 char *value = qemu_opt_get_del(opts, "detect-zeroes"); 1059 BlockdevDetectZeroesOptions detect_zeroes = 1060 qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value, 1061 BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err); 1062 g_free(value); 1063 if (local_err) { 1064 error_propagate(errp, local_err); 1065 return detect_zeroes; 1066 } 1067 1068 if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && 1069 !(open_flags & BDRV_O_UNMAP)) 1070 { 1071 error_setg(errp, "setting detect-zeroes to unmap is not allowed " 1072 "without setting discard operation to unmap"); 1073 } 1074 1075 return detect_zeroes; 1076 } 1077 1078 /** 1079 * Set open flags for aio engine 1080 * 1081 * Return 0 on success, -1 if the engine specified is invalid 1082 */ 1083 int bdrv_parse_aio(const char *mode, int *flags) 1084 { 1085 if (!strcmp(mode, "threads")) { 1086 /* do nothing, default */ 1087 } else if (!strcmp(mode, "native")) { 1088 *flags |= BDRV_O_NATIVE_AIO; 1089 #ifdef CONFIG_LINUX_IO_URING 1090 } else if (!strcmp(mode, "io_uring")) { 1091 *flags |= BDRV_O_IO_URING; 1092 #endif 1093 } else { 1094 return -1; 1095 } 1096 1097 return 0; 1098 } 1099 1100 /** 1101 * Set open flags for a given discard mode 1102 * 1103 * Return 0 on success, -1 if the discard mode was invalid. 1104 */ 1105 int bdrv_parse_discard_flags(const char *mode, int *flags) 1106 { 1107 *flags &= ~BDRV_O_UNMAP; 1108 1109 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 1110 /* do nothing */ 1111 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 1112 *flags |= BDRV_O_UNMAP; 1113 } else { 1114 return -1; 1115 } 1116 1117 return 0; 1118 } 1119 1120 /** 1121 * Set open flags for a given cache mode 1122 * 1123 * Return 0 on success, -1 if the cache mode was invalid. 1124 */ 1125 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) 1126 { 1127 *flags &= ~BDRV_O_CACHE_MASK; 1128 1129 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 1130 *writethrough = false; 1131 *flags |= BDRV_O_NOCACHE; 1132 } else if (!strcmp(mode, "directsync")) { 1133 *writethrough = true; 1134 *flags |= BDRV_O_NOCACHE; 1135 } else if (!strcmp(mode, "writeback")) { 1136 *writethrough = false; 1137 } else if (!strcmp(mode, "unsafe")) { 1138 *writethrough = false; 1139 *flags |= BDRV_O_NO_FLUSH; 1140 } else if (!strcmp(mode, "writethrough")) { 1141 *writethrough = true; 1142 } else { 1143 return -1; 1144 } 1145 1146 return 0; 1147 } 1148 1149 static char *bdrv_child_get_parent_desc(BdrvChild *c) 1150 { 1151 BlockDriverState *parent = c->opaque; 1152 return g_strdup_printf("node '%s'", bdrv_get_node_name(parent)); 1153 } 1154 1155 static void bdrv_child_cb_drained_begin(BdrvChild *child) 1156 { 1157 BlockDriverState *bs = child->opaque; 1158 bdrv_do_drained_begin_quiesce(bs, NULL, false); 1159 } 1160 1161 static bool bdrv_child_cb_drained_poll(BdrvChild *child) 1162 { 1163 BlockDriverState *bs = child->opaque; 1164 return bdrv_drain_poll(bs, false, NULL, false); 1165 } 1166 1167 static void bdrv_child_cb_drained_end(BdrvChild *child, 1168 int *drained_end_counter) 1169 { 1170 BlockDriverState *bs = child->opaque; 1171 bdrv_drained_end_no_poll(bs, drained_end_counter); 1172 } 1173 1174 static int bdrv_child_cb_inactivate(BdrvChild *child) 1175 { 1176 BlockDriverState *bs = child->opaque; 1177 assert(bs->open_flags & BDRV_O_INACTIVE); 1178 return 0; 1179 } 1180 1181 static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, 1182 GSList **ignore, Error **errp) 1183 { 1184 BlockDriverState *bs = child->opaque; 1185 return bdrv_can_set_aio_context(bs, ctx, ignore, errp); 1186 } 1187 1188 static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx, 1189 GSList **ignore) 1190 { 1191 BlockDriverState *bs = child->opaque; 1192 return bdrv_set_aio_context_ignore(bs, ctx, ignore); 1193 } 1194 1195 /* 1196 * Returns the options and flags that a temporary snapshot should get, based on 1197 * the originally requested flags (the originally requested image will have 1198 * flags like a backing file) 1199 */ 1200 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, 1201 int parent_flags, QDict *parent_options) 1202 { 1203 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 1204 1205 /* For temporary files, unconditional cache=unsafe is fine */ 1206 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); 1207 qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); 1208 1209 /* Copy the read-only and discard options from the parent */ 1210 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); 1211 qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD); 1212 1213 /* aio=native doesn't work for cache.direct=off, so disable it for the 1214 * temporary snapshot */ 1215 *child_flags &= ~BDRV_O_NATIVE_AIO; 1216 } 1217 1218 static void bdrv_backing_attach(BdrvChild *c) 1219 { 1220 BlockDriverState *parent = c->opaque; 1221 BlockDriverState *backing_hd = c->bs; 1222 1223 assert(!parent->backing_blocker); 1224 error_setg(&parent->backing_blocker, 1225 "node is used as backing hd of '%s'", 1226 bdrv_get_device_or_node_name(parent)); 1227 1228 bdrv_refresh_filename(backing_hd); 1229 1230 parent->open_flags &= ~BDRV_O_NO_BACKING; 1231 1232 bdrv_op_block_all(backing_hd, parent->backing_blocker); 1233 /* Otherwise we won't be able to commit or stream */ 1234 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1235 parent->backing_blocker); 1236 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, 1237 parent->backing_blocker); 1238 /* 1239 * We do backup in 3 ways: 1240 * 1. drive backup 1241 * The target bs is new opened, and the source is top BDS 1242 * 2. blockdev backup 1243 * Both the source and the target are top BDSes. 1244 * 3. internal backup(used for block replication) 1245 * Both the source and the target are backing file 1246 * 1247 * In case 1 and 2, neither the source nor the target is the backing file. 1248 * In case 3, we will block the top BDS, so there is only one block job 1249 * for the top BDS and its backing chain. 1250 */ 1251 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, 1252 parent->backing_blocker); 1253 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, 1254 parent->backing_blocker); 1255 } 1256 1257 static void bdrv_backing_detach(BdrvChild *c) 1258 { 1259 BlockDriverState *parent = c->opaque; 1260 1261 assert(parent->backing_blocker); 1262 bdrv_op_unblock_all(c->bs, parent->backing_blocker); 1263 error_free(parent->backing_blocker); 1264 parent->backing_blocker = NULL; 1265 } 1266 1267 static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, 1268 const char *filename, Error **errp) 1269 { 1270 BlockDriverState *parent = c->opaque; 1271 bool read_only = bdrv_is_read_only(parent); 1272 int ret; 1273 1274 if (read_only) { 1275 ret = bdrv_reopen_set_read_only(parent, false, errp); 1276 if (ret < 0) { 1277 return ret; 1278 } 1279 } 1280 1281 ret = bdrv_change_backing_file(parent, filename, 1282 base->drv ? base->drv->format_name : "", 1283 false); 1284 if (ret < 0) { 1285 error_setg_errno(errp, -ret, "Could not update backing file link"); 1286 } 1287 1288 if (read_only) { 1289 bdrv_reopen_set_read_only(parent, true, NULL); 1290 } 1291 1292 return ret; 1293 } 1294 1295 /* 1296 * Returns the options and flags that a generic child of a BDS should 1297 * get, based on the given options and flags for the parent BDS. 1298 */ 1299 static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, 1300 int *child_flags, QDict *child_options, 1301 int parent_flags, QDict *parent_options) 1302 { 1303 int flags = parent_flags; 1304 1305 /* 1306 * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL. 1307 * Generally, the question to answer is: Should this child be 1308 * format-probed by default? 1309 */ 1310 1311 /* 1312 * Pure and non-filtered data children of non-format nodes should 1313 * be probed by default (even when the node itself has BDRV_O_PROTOCOL 1314 * set). This only affects a very limited set of drivers (namely 1315 * quorum and blkverify when this comment was written). 1316 * Force-clear BDRV_O_PROTOCOL then. 1317 */ 1318 if (!parent_is_format && 1319 (role & BDRV_CHILD_DATA) && 1320 !(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED))) 1321 { 1322 flags &= ~BDRV_O_PROTOCOL; 1323 } 1324 1325 /* 1326 * All children of format nodes (except for COW children) and all 1327 * metadata children in general should never be format-probed. 1328 * Force-set BDRV_O_PROTOCOL then. 1329 */ 1330 if ((parent_is_format && !(role & BDRV_CHILD_COW)) || 1331 (role & BDRV_CHILD_METADATA)) 1332 { 1333 flags |= BDRV_O_PROTOCOL; 1334 } 1335 1336 /* 1337 * If the cache mode isn't explicitly set, inherit direct and no-flush from 1338 * the parent. 1339 */ 1340 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); 1341 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); 1342 qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE); 1343 1344 if (role & BDRV_CHILD_COW) { 1345 /* backing files are opened read-only by default */ 1346 qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on"); 1347 qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off"); 1348 } else { 1349 /* Inherit the read-only option from the parent if it's not set */ 1350 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); 1351 qdict_copy_default(child_options, parent_options, 1352 BDRV_OPT_AUTO_READ_ONLY); 1353 } 1354 1355 /* 1356 * bdrv_co_pdiscard() respects unmap policy for the parent, so we 1357 * can default to enable it on lower layers regardless of the 1358 * parent option. 1359 */ 1360 qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap"); 1361 1362 /* Clear flags that only apply to the top layer */ 1363 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); 1364 1365 if (role & BDRV_CHILD_METADATA) { 1366 flags &= ~BDRV_O_NO_IO; 1367 } 1368 if (role & BDRV_CHILD_COW) { 1369 flags &= ~BDRV_O_TEMPORARY; 1370 } 1371 1372 *child_flags = flags; 1373 } 1374 1375 static void bdrv_child_cb_attach(BdrvChild *child) 1376 { 1377 BlockDriverState *bs = child->opaque; 1378 1379 if (child->role & BDRV_CHILD_COW) { 1380 bdrv_backing_attach(child); 1381 } 1382 1383 bdrv_apply_subtree_drain(child, bs); 1384 } 1385 1386 static void bdrv_child_cb_detach(BdrvChild *child) 1387 { 1388 BlockDriverState *bs = child->opaque; 1389 1390 if (child->role & BDRV_CHILD_COW) { 1391 bdrv_backing_detach(child); 1392 } 1393 1394 bdrv_unapply_subtree_drain(child, bs); 1395 } 1396 1397 static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, 1398 const char *filename, Error **errp) 1399 { 1400 if (c->role & BDRV_CHILD_COW) { 1401 return bdrv_backing_update_filename(c, base, filename, errp); 1402 } 1403 return 0; 1404 } 1405 1406 AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c) 1407 { 1408 BlockDriverState *bs = c->opaque; 1409 1410 return bdrv_get_aio_context(bs); 1411 } 1412 1413 const BdrvChildClass child_of_bds = { 1414 .parent_is_bds = true, 1415 .get_parent_desc = bdrv_child_get_parent_desc, 1416 .inherit_options = bdrv_inherited_options, 1417 .drained_begin = bdrv_child_cb_drained_begin, 1418 .drained_poll = bdrv_child_cb_drained_poll, 1419 .drained_end = bdrv_child_cb_drained_end, 1420 .attach = bdrv_child_cb_attach, 1421 .detach = bdrv_child_cb_detach, 1422 .inactivate = bdrv_child_cb_inactivate, 1423 .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, 1424 .set_aio_ctx = bdrv_child_cb_set_aio_ctx, 1425 .update_filename = bdrv_child_cb_update_filename, 1426 .get_parent_aio_context = child_of_bds_get_parent_aio_context, 1427 }; 1428 1429 AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) 1430 { 1431 return c->klass->get_parent_aio_context(c); 1432 } 1433 1434 static int bdrv_open_flags(BlockDriverState *bs, int flags) 1435 { 1436 int open_flags = flags; 1437 1438 /* 1439 * Clear flags that are internal to the block layer before opening the 1440 * image. 1441 */ 1442 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 1443 1444 return open_flags; 1445 } 1446 1447 static void update_flags_from_options(int *flags, QemuOpts *opts) 1448 { 1449 *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY); 1450 1451 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { 1452 *flags |= BDRV_O_NO_FLUSH; 1453 } 1454 1455 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) { 1456 *flags |= BDRV_O_NOCACHE; 1457 } 1458 1459 if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) { 1460 *flags |= BDRV_O_RDWR; 1461 } 1462 1463 if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) { 1464 *flags |= BDRV_O_AUTO_RDONLY; 1465 } 1466 } 1467 1468 static void update_options_from_flags(QDict *options, int flags) 1469 { 1470 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) { 1471 qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE); 1472 } 1473 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) { 1474 qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH, 1475 flags & BDRV_O_NO_FLUSH); 1476 } 1477 if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) { 1478 qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR)); 1479 } 1480 if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) { 1481 qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY, 1482 flags & BDRV_O_AUTO_RDONLY); 1483 } 1484 } 1485 1486 static void bdrv_assign_node_name(BlockDriverState *bs, 1487 const char *node_name, 1488 Error **errp) 1489 { 1490 char *gen_node_name = NULL; 1491 1492 if (!node_name) { 1493 node_name = gen_node_name = id_generate(ID_BLOCK); 1494 } else if (!id_wellformed(node_name)) { 1495 /* 1496 * Check for empty string or invalid characters, but not if it is 1497 * generated (generated names use characters not available to the user) 1498 */ 1499 error_setg(errp, "Invalid node-name: '%s'", node_name); 1500 return; 1501 } 1502 1503 /* takes care of avoiding namespaces collisions */ 1504 if (blk_by_name(node_name)) { 1505 error_setg(errp, "node-name=%s is conflicting with a device id", 1506 node_name); 1507 goto out; 1508 } 1509 1510 /* takes care of avoiding duplicates node names */ 1511 if (bdrv_find_node(node_name)) { 1512 error_setg(errp, "Duplicate nodes with node-name='%s'", node_name); 1513 goto out; 1514 } 1515 1516 /* Make sure that the node name isn't truncated */ 1517 if (strlen(node_name) >= sizeof(bs->node_name)) { 1518 error_setg(errp, "Node name too long"); 1519 goto out; 1520 } 1521 1522 /* copy node name into the bs and insert it into the graph list */ 1523 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 1524 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 1525 out: 1526 g_free(gen_node_name); 1527 } 1528 1529 static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, 1530 const char *node_name, QDict *options, 1531 int open_flags, Error **errp) 1532 { 1533 Error *local_err = NULL; 1534 int i, ret; 1535 1536 bdrv_assign_node_name(bs, node_name, &local_err); 1537 if (local_err) { 1538 error_propagate(errp, local_err); 1539 return -EINVAL; 1540 } 1541 1542 bs->drv = drv; 1543 bs->opaque = g_malloc0(drv->instance_size); 1544 1545 if (drv->bdrv_file_open) { 1546 assert(!drv->bdrv_needs_filename || bs->filename[0]); 1547 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 1548 } else if (drv->bdrv_open) { 1549 ret = drv->bdrv_open(bs, options, open_flags, &local_err); 1550 } else { 1551 ret = 0; 1552 } 1553 1554 if (ret < 0) { 1555 if (local_err) { 1556 error_propagate(errp, local_err); 1557 } else if (bs->filename[0]) { 1558 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 1559 } else { 1560 error_setg_errno(errp, -ret, "Could not open image"); 1561 } 1562 goto open_failed; 1563 } 1564 1565 ret = refresh_total_sectors(bs, bs->total_sectors); 1566 if (ret < 0) { 1567 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 1568 return ret; 1569 } 1570 1571 bdrv_refresh_limits(bs, NULL, &local_err); 1572 if (local_err) { 1573 error_propagate(errp, local_err); 1574 return -EINVAL; 1575 } 1576 1577 assert(bdrv_opt_mem_align(bs) != 0); 1578 assert(bdrv_min_mem_align(bs) != 0); 1579 assert(is_power_of_2(bs->bl.request_alignment)); 1580 1581 for (i = 0; i < bs->quiesce_counter; i++) { 1582 if (drv->bdrv_co_drain_begin) { 1583 drv->bdrv_co_drain_begin(bs); 1584 } 1585 } 1586 1587 return 0; 1588 open_failed: 1589 bs->drv = NULL; 1590 if (bs->file != NULL) { 1591 bdrv_unref_child(bs, bs->file); 1592 bs->file = NULL; 1593 } 1594 g_free(bs->opaque); 1595 bs->opaque = NULL; 1596 return ret; 1597 } 1598 1599 BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, 1600 int flags, Error **errp) 1601 { 1602 BlockDriverState *bs; 1603 int ret; 1604 1605 bs = bdrv_new(); 1606 bs->open_flags = flags; 1607 bs->explicit_options = qdict_new(); 1608 bs->options = qdict_new(); 1609 bs->opaque = NULL; 1610 1611 update_options_from_flags(bs->options, flags); 1612 1613 ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp); 1614 if (ret < 0) { 1615 qobject_unref(bs->explicit_options); 1616 bs->explicit_options = NULL; 1617 qobject_unref(bs->options); 1618 bs->options = NULL; 1619 bdrv_unref(bs); 1620 return NULL; 1621 } 1622 1623 return bs; 1624 } 1625 1626 QemuOptsList bdrv_runtime_opts = { 1627 .name = "bdrv_common", 1628 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), 1629 .desc = { 1630 { 1631 .name = "node-name", 1632 .type = QEMU_OPT_STRING, 1633 .help = "Node name of the block device node", 1634 }, 1635 { 1636 .name = "driver", 1637 .type = QEMU_OPT_STRING, 1638 .help = "Block driver to use for the node", 1639 }, 1640 { 1641 .name = BDRV_OPT_CACHE_DIRECT, 1642 .type = QEMU_OPT_BOOL, 1643 .help = "Bypass software writeback cache on the host", 1644 }, 1645 { 1646 .name = BDRV_OPT_CACHE_NO_FLUSH, 1647 .type = QEMU_OPT_BOOL, 1648 .help = "Ignore flush requests", 1649 }, 1650 { 1651 .name = BDRV_OPT_READ_ONLY, 1652 .type = QEMU_OPT_BOOL, 1653 .help = "Node is opened in read-only mode", 1654 }, 1655 { 1656 .name = BDRV_OPT_AUTO_READ_ONLY, 1657 .type = QEMU_OPT_BOOL, 1658 .help = "Node can become read-only if opening read-write fails", 1659 }, 1660 { 1661 .name = "detect-zeroes", 1662 .type = QEMU_OPT_STRING, 1663 .help = "try to optimize zero writes (off, on, unmap)", 1664 }, 1665 { 1666 .name = BDRV_OPT_DISCARD, 1667 .type = QEMU_OPT_STRING, 1668 .help = "discard operation (ignore/off, unmap/on)", 1669 }, 1670 { 1671 .name = BDRV_OPT_FORCE_SHARE, 1672 .type = QEMU_OPT_BOOL, 1673 .help = "always accept other writers (default: off)", 1674 }, 1675 { /* end of list */ } 1676 }, 1677 }; 1678 1679 QemuOptsList bdrv_create_opts_simple = { 1680 .name = "simple-create-opts", 1681 .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head), 1682 .desc = { 1683 { 1684 .name = BLOCK_OPT_SIZE, 1685 .type = QEMU_OPT_SIZE, 1686 .help = "Virtual disk size" 1687 }, 1688 { 1689 .name = BLOCK_OPT_PREALLOC, 1690 .type = QEMU_OPT_STRING, 1691 .help = "Preallocation mode (allowed values: off)" 1692 }, 1693 { /* end of list */ } 1694 } 1695 }; 1696 1697 /* 1698 * Common part for opening disk images and files 1699 * 1700 * Removes all processed options from *options. 1701 */ 1702 static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, 1703 QDict *options, Error **errp) 1704 { 1705 int ret, open_flags; 1706 const char *filename; 1707 const char *driver_name = NULL; 1708 const char *node_name = NULL; 1709 const char *discard; 1710 QemuOpts *opts; 1711 BlockDriver *drv; 1712 Error *local_err = NULL; 1713 bool ro; 1714 1715 assert(bs->file == NULL); 1716 assert(options != NULL && bs->options != options); 1717 1718 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 1719 if (!qemu_opts_absorb_qdict(opts, options, errp)) { 1720 ret = -EINVAL; 1721 goto fail_opts; 1722 } 1723 1724 update_flags_from_options(&bs->open_flags, opts); 1725 1726 driver_name = qemu_opt_get(opts, "driver"); 1727 drv = bdrv_find_format(driver_name); 1728 assert(drv != NULL); 1729 1730 bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false); 1731 1732 if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) { 1733 error_setg(errp, 1734 BDRV_OPT_FORCE_SHARE 1735 "=on can only be used with read-only images"); 1736 ret = -EINVAL; 1737 goto fail_opts; 1738 } 1739 1740 if (file != NULL) { 1741 bdrv_refresh_filename(blk_bs(file)); 1742 filename = blk_bs(file)->filename; 1743 } else { 1744 /* 1745 * Caution: while qdict_get_try_str() is fine, getting 1746 * non-string types would require more care. When @options 1747 * come from -blockdev or blockdev_add, its members are typed 1748 * according to the QAPI schema, but when they come from 1749 * -drive, they're all QString. 1750 */ 1751 filename = qdict_get_try_str(options, "filename"); 1752 } 1753 1754 if (drv->bdrv_needs_filename && (!filename || !filename[0])) { 1755 error_setg(errp, "The '%s' block driver requires a file name", 1756 drv->format_name); 1757 ret = -EINVAL; 1758 goto fail_opts; 1759 } 1760 1761 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags, 1762 drv->format_name); 1763 1764 ro = bdrv_is_read_only(bs); 1765 1766 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) { 1767 if (!ro && bdrv_is_whitelisted(drv, true)) { 1768 ret = bdrv_apply_auto_read_only(bs, NULL, NULL); 1769 } else { 1770 ret = -ENOTSUP; 1771 } 1772 if (ret < 0) { 1773 error_setg(errp, 1774 !ro && bdrv_is_whitelisted(drv, true) 1775 ? "Driver '%s' can only be used for read-only devices" 1776 : "Driver '%s' is not whitelisted", 1777 drv->format_name); 1778 goto fail_opts; 1779 } 1780 } 1781 1782 /* bdrv_new() and bdrv_close() make it so */ 1783 assert(qatomic_read(&bs->copy_on_read) == 0); 1784 1785 if (bs->open_flags & BDRV_O_COPY_ON_READ) { 1786 if (!ro) { 1787 bdrv_enable_copy_on_read(bs); 1788 } else { 1789 error_setg(errp, "Can't use copy-on-read on read-only device"); 1790 ret = -EINVAL; 1791 goto fail_opts; 1792 } 1793 } 1794 1795 discard = qemu_opt_get(opts, BDRV_OPT_DISCARD); 1796 if (discard != NULL) { 1797 if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) { 1798 error_setg(errp, "Invalid discard option"); 1799 ret = -EINVAL; 1800 goto fail_opts; 1801 } 1802 } 1803 1804 bs->detect_zeroes = 1805 bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err); 1806 if (local_err) { 1807 error_propagate(errp, local_err); 1808 ret = -EINVAL; 1809 goto fail_opts; 1810 } 1811 1812 if (filename != NULL) { 1813 pstrcpy(bs->filename, sizeof(bs->filename), filename); 1814 } else { 1815 bs->filename[0] = '\0'; 1816 } 1817 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 1818 1819 /* Open the image, either directly or using a protocol */ 1820 open_flags = bdrv_open_flags(bs, bs->open_flags); 1821 node_name = qemu_opt_get(opts, "node-name"); 1822 1823 assert(!drv->bdrv_file_open || file == NULL); 1824 ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp); 1825 if (ret < 0) { 1826 goto fail_opts; 1827 } 1828 1829 qemu_opts_del(opts); 1830 return 0; 1831 1832 fail_opts: 1833 qemu_opts_del(opts); 1834 return ret; 1835 } 1836 1837 static QDict *parse_json_filename(const char *filename, Error **errp) 1838 { 1839 QObject *options_obj; 1840 QDict *options; 1841 int ret; 1842 1843 ret = strstart(filename, "json:", &filename); 1844 assert(ret); 1845 1846 options_obj = qobject_from_json(filename, errp); 1847 if (!options_obj) { 1848 error_prepend(errp, "Could not parse the JSON options: "); 1849 return NULL; 1850 } 1851 1852 options = qobject_to(QDict, options_obj); 1853 if (!options) { 1854 qobject_unref(options_obj); 1855 error_setg(errp, "Invalid JSON object given"); 1856 return NULL; 1857 } 1858 1859 qdict_flatten(options); 1860 1861 return options; 1862 } 1863 1864 static void parse_json_protocol(QDict *options, const char **pfilename, 1865 Error **errp) 1866 { 1867 QDict *json_options; 1868 Error *local_err = NULL; 1869 1870 /* Parse json: pseudo-protocol */ 1871 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) { 1872 return; 1873 } 1874 1875 json_options = parse_json_filename(*pfilename, &local_err); 1876 if (local_err) { 1877 error_propagate(errp, local_err); 1878 return; 1879 } 1880 1881 /* Options given in the filename have lower priority than options 1882 * specified directly */ 1883 qdict_join(options, json_options, false); 1884 qobject_unref(json_options); 1885 *pfilename = NULL; 1886 } 1887 1888 /* 1889 * Fills in default options for opening images and converts the legacy 1890 * filename/flags pair to option QDict entries. 1891 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a 1892 * block driver has been specified explicitly. 1893 */ 1894 static int bdrv_fill_options(QDict **options, const char *filename, 1895 int *flags, Error **errp) 1896 { 1897 const char *drvname; 1898 bool protocol = *flags & BDRV_O_PROTOCOL; 1899 bool parse_filename = false; 1900 BlockDriver *drv = NULL; 1901 Error *local_err = NULL; 1902 1903 /* 1904 * Caution: while qdict_get_try_str() is fine, getting non-string 1905 * types would require more care. When @options come from 1906 * -blockdev or blockdev_add, its members are typed according to 1907 * the QAPI schema, but when they come from -drive, they're all 1908 * QString. 1909 */ 1910 drvname = qdict_get_try_str(*options, "driver"); 1911 if (drvname) { 1912 drv = bdrv_find_format(drvname); 1913 if (!drv) { 1914 error_setg(errp, "Unknown driver '%s'", drvname); 1915 return -ENOENT; 1916 } 1917 /* If the user has explicitly specified the driver, this choice should 1918 * override the BDRV_O_PROTOCOL flag */ 1919 protocol = drv->bdrv_file_open; 1920 } 1921 1922 if (protocol) { 1923 *flags |= BDRV_O_PROTOCOL; 1924 } else { 1925 *flags &= ~BDRV_O_PROTOCOL; 1926 } 1927 1928 /* Translate cache options from flags into options */ 1929 update_options_from_flags(*options, *flags); 1930 1931 /* Fetch the file name from the options QDict if necessary */ 1932 if (protocol && filename) { 1933 if (!qdict_haskey(*options, "filename")) { 1934 qdict_put_str(*options, "filename", filename); 1935 parse_filename = true; 1936 } else { 1937 error_setg(errp, "Can't specify 'file' and 'filename' options at " 1938 "the same time"); 1939 return -EINVAL; 1940 } 1941 } 1942 1943 /* Find the right block driver */ 1944 /* See cautionary note on accessing @options above */ 1945 filename = qdict_get_try_str(*options, "filename"); 1946 1947 if (!drvname && protocol) { 1948 if (filename) { 1949 drv = bdrv_find_protocol(filename, parse_filename, errp); 1950 if (!drv) { 1951 return -EINVAL; 1952 } 1953 1954 drvname = drv->format_name; 1955 qdict_put_str(*options, "driver", drvname); 1956 } else { 1957 error_setg(errp, "Must specify either driver or file"); 1958 return -EINVAL; 1959 } 1960 } 1961 1962 assert(drv || !protocol); 1963 1964 /* Driver-specific filename parsing */ 1965 if (drv && drv->bdrv_parse_filename && parse_filename) { 1966 drv->bdrv_parse_filename(filename, *options, &local_err); 1967 if (local_err) { 1968 error_propagate(errp, local_err); 1969 return -EINVAL; 1970 } 1971 1972 if (!drv->bdrv_needs_filename) { 1973 qdict_del(*options, "filename"); 1974 } 1975 } 1976 1977 return 0; 1978 } 1979 1980 typedef struct BlockReopenQueueEntry { 1981 bool prepared; 1982 bool perms_checked; 1983 BDRVReopenState state; 1984 QTAILQ_ENTRY(BlockReopenQueueEntry) entry; 1985 } BlockReopenQueueEntry; 1986 1987 /* 1988 * Return the flags that @bs will have after the reopens in @q have 1989 * successfully completed. If @q is NULL (or @bs is not contained in @q), 1990 * return the current flags. 1991 */ 1992 static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs) 1993 { 1994 BlockReopenQueueEntry *entry; 1995 1996 if (q != NULL) { 1997 QTAILQ_FOREACH(entry, q, entry) { 1998 if (entry->state.bs == bs) { 1999 return entry->state.flags; 2000 } 2001 } 2002 } 2003 2004 return bs->open_flags; 2005 } 2006 2007 /* Returns whether the image file can be written to after the reopen queue @q 2008 * has been successfully applied, or right now if @q is NULL. */ 2009 static bool bdrv_is_writable_after_reopen(BlockDriverState *bs, 2010 BlockReopenQueue *q) 2011 { 2012 int flags = bdrv_reopen_get_flags(q, bs); 2013 2014 return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR; 2015 } 2016 2017 /* 2018 * Return whether the BDS can be written to. This is not necessarily 2019 * the same as !bdrv_is_read_only(bs), as inactivated images may not 2020 * be written to but do not count as read-only images. 2021 */ 2022 bool bdrv_is_writable(BlockDriverState *bs) 2023 { 2024 return bdrv_is_writable_after_reopen(bs, NULL); 2025 } 2026 2027 static char *bdrv_child_user_desc(BdrvChild *c) 2028 { 2029 return c->klass->get_parent_desc(c); 2030 } 2031 2032 static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) 2033 { 2034 g_autofree char *user = NULL; 2035 g_autofree char *perm_names = NULL; 2036 2037 if ((b->perm & a->shared_perm) == b->perm) { 2038 return true; 2039 } 2040 2041 perm_names = bdrv_perm_names(b->perm & ~a->shared_perm); 2042 user = bdrv_child_user_desc(a); 2043 error_setg(errp, "Conflicts with use by %s as '%s', which does not " 2044 "allow '%s' on %s", 2045 user, a->name, perm_names, bdrv_get_node_name(b->bs)); 2046 2047 return false; 2048 } 2049 2050 static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp) 2051 { 2052 BdrvChild *a, *b; 2053 2054 /* 2055 * During the loop we'll look at each pair twice. That's correct because 2056 * bdrv_a_allow_b() is asymmetric and we should check each pair in both 2057 * directions. 2058 */ 2059 QLIST_FOREACH(a, &bs->parents, next_parent) { 2060 QLIST_FOREACH(b, &bs->parents, next_parent) { 2061 if (a == b) { 2062 continue; 2063 } 2064 2065 if (!bdrv_a_allow_b(a, b, errp)) { 2066 return true; 2067 } 2068 } 2069 } 2070 2071 return false; 2072 } 2073 2074 static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, 2075 BdrvChild *c, BdrvChildRole role, 2076 BlockReopenQueue *reopen_queue, 2077 uint64_t parent_perm, uint64_t parent_shared, 2078 uint64_t *nperm, uint64_t *nshared) 2079 { 2080 assert(bs->drv && bs->drv->bdrv_child_perm); 2081 bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, 2082 parent_perm, parent_shared, 2083 nperm, nshared); 2084 /* TODO Take force_share from reopen_queue */ 2085 if (child_bs && child_bs->force_share) { 2086 *nshared = BLK_PERM_ALL; 2087 } 2088 } 2089 2090 /* 2091 * Adds the whole subtree of @bs (including @bs itself) to the @list (except for 2092 * nodes that are already in the @list, of course) so that final list is 2093 * topologically sorted. Return the result (GSList @list object is updated, so 2094 * don't use old reference after function call). 2095 * 2096 * On function start @list must be already topologically sorted and for any node 2097 * in the @list the whole subtree of the node must be in the @list as well. The 2098 * simplest way to satisfy this criteria: use only result of 2099 * bdrv_topological_dfs() or NULL as @list parameter. 2100 */ 2101 static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found, 2102 BlockDriverState *bs) 2103 { 2104 BdrvChild *child; 2105 g_autoptr(GHashTable) local_found = NULL; 2106 2107 if (!found) { 2108 assert(!list); 2109 found = local_found = g_hash_table_new(NULL, NULL); 2110 } 2111 2112 if (g_hash_table_contains(found, bs)) { 2113 return list; 2114 } 2115 g_hash_table_add(found, bs); 2116 2117 QLIST_FOREACH(child, &bs->children, next) { 2118 list = bdrv_topological_dfs(list, found, child->bs); 2119 } 2120 2121 return g_slist_prepend(list, bs); 2122 } 2123 2124 typedef struct BdrvChildSetPermState { 2125 BdrvChild *child; 2126 uint64_t old_perm; 2127 uint64_t old_shared_perm; 2128 } BdrvChildSetPermState; 2129 2130 static void bdrv_child_set_perm_abort(void *opaque) 2131 { 2132 BdrvChildSetPermState *s = opaque; 2133 2134 s->child->perm = s->old_perm; 2135 s->child->shared_perm = s->old_shared_perm; 2136 } 2137 2138 static TransactionActionDrv bdrv_child_set_pem_drv = { 2139 .abort = bdrv_child_set_perm_abort, 2140 .clean = g_free, 2141 }; 2142 2143 static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, 2144 uint64_t shared, Transaction *tran) 2145 { 2146 BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1); 2147 2148 *s = (BdrvChildSetPermState) { 2149 .child = c, 2150 .old_perm = c->perm, 2151 .old_shared_perm = c->shared_perm, 2152 }; 2153 2154 c->perm = perm; 2155 c->shared_perm = shared; 2156 2157 tran_add(tran, &bdrv_child_set_pem_drv, s); 2158 } 2159 2160 static void bdrv_drv_set_perm_commit(void *opaque) 2161 { 2162 BlockDriverState *bs = opaque; 2163 uint64_t cumulative_perms, cumulative_shared_perms; 2164 2165 if (bs->drv->bdrv_set_perm) { 2166 bdrv_get_cumulative_perm(bs, &cumulative_perms, 2167 &cumulative_shared_perms); 2168 bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms); 2169 } 2170 } 2171 2172 static void bdrv_drv_set_perm_abort(void *opaque) 2173 { 2174 BlockDriverState *bs = opaque; 2175 2176 if (bs->drv->bdrv_abort_perm_update) { 2177 bs->drv->bdrv_abort_perm_update(bs); 2178 } 2179 } 2180 2181 TransactionActionDrv bdrv_drv_set_perm_drv = { 2182 .abort = bdrv_drv_set_perm_abort, 2183 .commit = bdrv_drv_set_perm_commit, 2184 }; 2185 2186 static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, 2187 uint64_t shared_perm, Transaction *tran, 2188 Error **errp) 2189 { 2190 if (!bs->drv) { 2191 return 0; 2192 } 2193 2194 if (bs->drv->bdrv_check_perm) { 2195 int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp); 2196 if (ret < 0) { 2197 return ret; 2198 } 2199 } 2200 2201 if (tran) { 2202 tran_add(tran, &bdrv_drv_set_perm_drv, bs); 2203 } 2204 2205 return 0; 2206 } 2207 2208 typedef struct BdrvReplaceChildState { 2209 BdrvChild *child; 2210 BlockDriverState *old_bs; 2211 } BdrvReplaceChildState; 2212 2213 static void bdrv_replace_child_commit(void *opaque) 2214 { 2215 BdrvReplaceChildState *s = opaque; 2216 2217 bdrv_unref(s->old_bs); 2218 } 2219 2220 static void bdrv_replace_child_abort(void *opaque) 2221 { 2222 BdrvReplaceChildState *s = opaque; 2223 BlockDriverState *new_bs = s->child->bs; 2224 2225 /* old_bs reference is transparently moved from @s to @s->child */ 2226 bdrv_replace_child_noperm(s->child, s->old_bs); 2227 bdrv_unref(new_bs); 2228 } 2229 2230 static TransactionActionDrv bdrv_replace_child_drv = { 2231 .commit = bdrv_replace_child_commit, 2232 .abort = bdrv_replace_child_abort, 2233 .clean = g_free, 2234 }; 2235 2236 /* 2237 * bdrv_replace_child 2238 * 2239 * Note: real unref of old_bs is done only on commit. 2240 */ 2241 static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs, 2242 Transaction *tran) 2243 { 2244 BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); 2245 *s = (BdrvReplaceChildState) { 2246 .child = child, 2247 .old_bs = child->bs, 2248 }; 2249 tran_add(tran, &bdrv_replace_child_drv, s); 2250 2251 if (new_bs) { 2252 bdrv_ref(new_bs); 2253 } 2254 bdrv_replace_child_noperm(child, new_bs); 2255 /* old_bs reference is transparently moved from @child to @s */ 2256 } 2257 2258 /* 2259 * Refresh permissions in @bs subtree. The function is intended to be called 2260 * after some graph modification that was done without permission update. 2261 */ 2262 static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q, 2263 Transaction *tran, Error **errp) 2264 { 2265 BlockDriver *drv = bs->drv; 2266 BdrvChild *c; 2267 int ret; 2268 uint64_t cumulative_perms, cumulative_shared_perms; 2269 2270 bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms); 2271 2272 /* Write permissions never work with read-only images */ 2273 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && 2274 !bdrv_is_writable_after_reopen(bs, q)) 2275 { 2276 if (!bdrv_is_writable_after_reopen(bs, NULL)) { 2277 error_setg(errp, "Block node is read-only"); 2278 } else { 2279 error_setg(errp, "Read-only block node '%s' cannot support " 2280 "read-write users", bdrv_get_node_name(bs)); 2281 } 2282 2283 return -EPERM; 2284 } 2285 2286 /* 2287 * Unaligned requests will automatically be aligned to bl.request_alignment 2288 * and without RESIZE we can't extend requests to write to space beyond the 2289 * end of the image, so it's required that the image size is aligned. 2290 */ 2291 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && 2292 !(cumulative_perms & BLK_PERM_RESIZE)) 2293 { 2294 if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) { 2295 error_setg(errp, "Cannot get 'write' permission without 'resize': " 2296 "Image size is not a multiple of request " 2297 "alignment"); 2298 return -EPERM; 2299 } 2300 } 2301 2302 /* Check this node */ 2303 if (!drv) { 2304 return 0; 2305 } 2306 2307 ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran, 2308 errp); 2309 if (ret < 0) { 2310 return ret; 2311 } 2312 2313 /* Drivers that never have children can omit .bdrv_child_perm() */ 2314 if (!drv->bdrv_child_perm) { 2315 assert(QLIST_EMPTY(&bs->children)); 2316 return 0; 2317 } 2318 2319 /* Check all children */ 2320 QLIST_FOREACH(c, &bs->children, next) { 2321 uint64_t cur_perm, cur_shared; 2322 2323 bdrv_child_perm(bs, c->bs, c, c->role, q, 2324 cumulative_perms, cumulative_shared_perms, 2325 &cur_perm, &cur_shared); 2326 bdrv_child_set_perm(c, cur_perm, cur_shared, tran); 2327 } 2328 2329 return 0; 2330 } 2331 2332 static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, 2333 Transaction *tran, Error **errp) 2334 { 2335 int ret; 2336 BlockDriverState *bs; 2337 2338 for ( ; list; list = list->next) { 2339 bs = list->data; 2340 2341 if (bdrv_parent_perms_conflict(bs, errp)) { 2342 return -EINVAL; 2343 } 2344 2345 ret = bdrv_node_refresh_perm(bs, q, tran, errp); 2346 if (ret < 0) { 2347 return ret; 2348 } 2349 } 2350 2351 return 0; 2352 } 2353 2354 void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, 2355 uint64_t *shared_perm) 2356 { 2357 BdrvChild *c; 2358 uint64_t cumulative_perms = 0; 2359 uint64_t cumulative_shared_perms = BLK_PERM_ALL; 2360 2361 QLIST_FOREACH(c, &bs->parents, next_parent) { 2362 cumulative_perms |= c->perm; 2363 cumulative_shared_perms &= c->shared_perm; 2364 } 2365 2366 *perm = cumulative_perms; 2367 *shared_perm = cumulative_shared_perms; 2368 } 2369 2370 char *bdrv_perm_names(uint64_t perm) 2371 { 2372 struct perm_name { 2373 uint64_t perm; 2374 const char *name; 2375 } permissions[] = { 2376 { BLK_PERM_CONSISTENT_READ, "consistent read" }, 2377 { BLK_PERM_WRITE, "write" }, 2378 { BLK_PERM_WRITE_UNCHANGED, "write unchanged" }, 2379 { BLK_PERM_RESIZE, "resize" }, 2380 { BLK_PERM_GRAPH_MOD, "change children" }, 2381 { 0, NULL } 2382 }; 2383 2384 GString *result = g_string_sized_new(30); 2385 struct perm_name *p; 2386 2387 for (p = permissions; p->name; p++) { 2388 if (perm & p->perm) { 2389 if (result->len > 0) { 2390 g_string_append(result, ", "); 2391 } 2392 g_string_append(result, p->name); 2393 } 2394 } 2395 2396 return g_string_free(result, FALSE); 2397 } 2398 2399 2400 static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp) 2401 { 2402 int ret; 2403 Transaction *tran = tran_new(); 2404 g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs); 2405 2406 ret = bdrv_list_refresh_perms(list, NULL, tran, errp); 2407 tran_finalize(tran, ret); 2408 2409 return ret; 2410 } 2411 2412 int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, 2413 Error **errp) 2414 { 2415 Error *local_err = NULL; 2416 Transaction *tran = tran_new(); 2417 int ret; 2418 2419 bdrv_child_set_perm(c, perm, shared, tran); 2420 2421 ret = bdrv_refresh_perms(c->bs, &local_err); 2422 2423 tran_finalize(tran, ret); 2424 2425 if (ret < 0) { 2426 if ((perm & ~c->perm) || (c->shared_perm & ~shared)) { 2427 /* tighten permissions */ 2428 error_propagate(errp, local_err); 2429 } else { 2430 /* 2431 * Our caller may intend to only loosen restrictions and 2432 * does not expect this function to fail. Errors are not 2433 * fatal in such a case, so we can just hide them from our 2434 * caller. 2435 */ 2436 error_free(local_err); 2437 ret = 0; 2438 } 2439 } 2440 2441 return ret; 2442 } 2443 2444 int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) 2445 { 2446 uint64_t parent_perms, parent_shared; 2447 uint64_t perms, shared; 2448 2449 bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared); 2450 bdrv_child_perm(bs, c->bs, c, c->role, NULL, 2451 parent_perms, parent_shared, &perms, &shared); 2452 2453 return bdrv_child_try_set_perm(c, perms, shared, errp); 2454 } 2455 2456 /* 2457 * Default implementation for .bdrv_child_perm() for block filters: 2458 * Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the 2459 * filtered child. 2460 */ 2461 static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, 2462 BdrvChildRole role, 2463 BlockReopenQueue *reopen_queue, 2464 uint64_t perm, uint64_t shared, 2465 uint64_t *nperm, uint64_t *nshared) 2466 { 2467 *nperm = perm & DEFAULT_PERM_PASSTHROUGH; 2468 *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; 2469 } 2470 2471 static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c, 2472 BdrvChildRole role, 2473 BlockReopenQueue *reopen_queue, 2474 uint64_t perm, uint64_t shared, 2475 uint64_t *nperm, uint64_t *nshared) 2476 { 2477 assert(role & BDRV_CHILD_COW); 2478 2479 /* 2480 * We want consistent read from backing files if the parent needs it. 2481 * No other operations are performed on backing files. 2482 */ 2483 perm &= BLK_PERM_CONSISTENT_READ; 2484 2485 /* 2486 * If the parent can deal with changing data, we're okay with a 2487 * writable and resizable backing file. 2488 * TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? 2489 */ 2490 if (shared & BLK_PERM_WRITE) { 2491 shared = BLK_PERM_WRITE | BLK_PERM_RESIZE; 2492 } else { 2493 shared = 0; 2494 } 2495 2496 shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD | 2497 BLK_PERM_WRITE_UNCHANGED; 2498 2499 if (bs->open_flags & BDRV_O_INACTIVE) { 2500 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2501 } 2502 2503 *nperm = perm; 2504 *nshared = shared; 2505 } 2506 2507 static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c, 2508 BdrvChildRole role, 2509 BlockReopenQueue *reopen_queue, 2510 uint64_t perm, uint64_t shared, 2511 uint64_t *nperm, uint64_t *nshared) 2512 { 2513 int flags; 2514 2515 assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)); 2516 2517 flags = bdrv_reopen_get_flags(reopen_queue, bs); 2518 2519 /* 2520 * Apart from the modifications below, the same permissions are 2521 * forwarded and left alone as for filters 2522 */ 2523 bdrv_filter_default_perms(bs, c, role, reopen_queue, 2524 perm, shared, &perm, &shared); 2525 2526 if (role & BDRV_CHILD_METADATA) { 2527 /* Format drivers may touch metadata even if the guest doesn't write */ 2528 if (bdrv_is_writable_after_reopen(bs, reopen_queue)) { 2529 perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2530 } 2531 2532 /* 2533 * bs->file always needs to be consistent because of the 2534 * metadata. We can never allow other users to resize or write 2535 * to it. 2536 */ 2537 if (!(flags & BDRV_O_NO_IO)) { 2538 perm |= BLK_PERM_CONSISTENT_READ; 2539 } 2540 shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); 2541 } 2542 2543 if (role & BDRV_CHILD_DATA) { 2544 /* 2545 * Technically, everything in this block is a subset of the 2546 * BDRV_CHILD_METADATA path taken above, and so this could 2547 * be an "else if" branch. However, that is not obvious, and 2548 * this function is not performance critical, therefore we let 2549 * this be an independent "if". 2550 */ 2551 2552 /* 2553 * We cannot allow other users to resize the file because the 2554 * format driver might have some assumptions about the size 2555 * (e.g. because it is stored in metadata, or because the file 2556 * is split into fixed-size data files). 2557 */ 2558 shared &= ~BLK_PERM_RESIZE; 2559 2560 /* 2561 * WRITE_UNCHANGED often cannot be performed as such on the 2562 * data file. For example, the qcow2 driver may still need to 2563 * write copied clusters on copy-on-read. 2564 */ 2565 if (perm & BLK_PERM_WRITE_UNCHANGED) { 2566 perm |= BLK_PERM_WRITE; 2567 } 2568 2569 /* 2570 * If the data file is written to, the format driver may 2571 * expect to be able to resize it by writing beyond the EOF. 2572 */ 2573 if (perm & BLK_PERM_WRITE) { 2574 perm |= BLK_PERM_RESIZE; 2575 } 2576 } 2577 2578 if (bs->open_flags & BDRV_O_INACTIVE) { 2579 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2580 } 2581 2582 *nperm = perm; 2583 *nshared = shared; 2584 } 2585 2586 void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, 2587 BdrvChildRole role, BlockReopenQueue *reopen_queue, 2588 uint64_t perm, uint64_t shared, 2589 uint64_t *nperm, uint64_t *nshared) 2590 { 2591 if (role & BDRV_CHILD_FILTERED) { 2592 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | 2593 BDRV_CHILD_COW))); 2594 bdrv_filter_default_perms(bs, c, role, reopen_queue, 2595 perm, shared, nperm, nshared); 2596 } else if (role & BDRV_CHILD_COW) { 2597 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA))); 2598 bdrv_default_perms_for_cow(bs, c, role, reopen_queue, 2599 perm, shared, nperm, nshared); 2600 } else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) { 2601 bdrv_default_perms_for_storage(bs, c, role, reopen_queue, 2602 perm, shared, nperm, nshared); 2603 } else { 2604 g_assert_not_reached(); 2605 } 2606 } 2607 2608 uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) 2609 { 2610 static const uint64_t permissions[] = { 2611 [BLOCK_PERMISSION_CONSISTENT_READ] = BLK_PERM_CONSISTENT_READ, 2612 [BLOCK_PERMISSION_WRITE] = BLK_PERM_WRITE, 2613 [BLOCK_PERMISSION_WRITE_UNCHANGED] = BLK_PERM_WRITE_UNCHANGED, 2614 [BLOCK_PERMISSION_RESIZE] = BLK_PERM_RESIZE, 2615 [BLOCK_PERMISSION_GRAPH_MOD] = BLK_PERM_GRAPH_MOD, 2616 }; 2617 2618 QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX); 2619 QEMU_BUILD_BUG_ON(1UL << ARRAY_SIZE(permissions) != BLK_PERM_ALL + 1); 2620 2621 assert(qapi_perm < BLOCK_PERMISSION__MAX); 2622 2623 return permissions[qapi_perm]; 2624 } 2625 2626 static void bdrv_replace_child_noperm(BdrvChild *child, 2627 BlockDriverState *new_bs) 2628 { 2629 BlockDriverState *old_bs = child->bs; 2630 int new_bs_quiesce_counter; 2631 int drain_saldo; 2632 2633 assert(!child->frozen); 2634 2635 if (old_bs && new_bs) { 2636 assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); 2637 } 2638 2639 new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); 2640 drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; 2641 2642 /* 2643 * If the new child node is drained but the old one was not, flush 2644 * all outstanding requests to the old child node. 2645 */ 2646 while (drain_saldo > 0 && child->klass->drained_begin) { 2647 bdrv_parent_drained_begin_single(child, true); 2648 drain_saldo--; 2649 } 2650 2651 if (old_bs) { 2652 /* Detach first so that the recursive drain sections coming from @child 2653 * are already gone and we only end the drain sections that came from 2654 * elsewhere. */ 2655 if (child->klass->detach) { 2656 child->klass->detach(child); 2657 } 2658 QLIST_REMOVE(child, next_parent); 2659 } 2660 2661 child->bs = new_bs; 2662 2663 if (new_bs) { 2664 QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); 2665 2666 /* 2667 * Detaching the old node may have led to the new node's 2668 * quiesce_counter having been decreased. Not a problem, we 2669 * just need to recognize this here and then invoke 2670 * drained_end appropriately more often. 2671 */ 2672 assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); 2673 drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; 2674 2675 /* Attach only after starting new drained sections, so that recursive 2676 * drain sections coming from @child don't get an extra .drained_begin 2677 * callback. */ 2678 if (child->klass->attach) { 2679 child->klass->attach(child); 2680 } 2681 } 2682 2683 /* 2684 * If the old child node was drained but the new one is not, allow 2685 * requests to come in only after the new node has been attached. 2686 */ 2687 while (drain_saldo < 0 && child->klass->drained_end) { 2688 bdrv_parent_drained_end_single(child); 2689 drain_saldo++; 2690 } 2691 } 2692 2693 static void bdrv_child_free(void *opaque) 2694 { 2695 BdrvChild *c = opaque; 2696 2697 g_free(c->name); 2698 g_free(c); 2699 } 2700 2701 static void bdrv_remove_empty_child(BdrvChild *child) 2702 { 2703 assert(!child->bs); 2704 QLIST_SAFE_REMOVE(child, next); 2705 bdrv_child_free(child); 2706 } 2707 2708 typedef struct BdrvAttachChildCommonState { 2709 BdrvChild **child; 2710 AioContext *old_parent_ctx; 2711 AioContext *old_child_ctx; 2712 } BdrvAttachChildCommonState; 2713 2714 static void bdrv_attach_child_common_abort(void *opaque) 2715 { 2716 BdrvAttachChildCommonState *s = opaque; 2717 BdrvChild *child = *s->child; 2718 BlockDriverState *bs = child->bs; 2719 2720 bdrv_replace_child_noperm(child, NULL); 2721 2722 if (bdrv_get_aio_context(bs) != s->old_child_ctx) { 2723 bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort); 2724 } 2725 2726 if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) { 2727 GSList *ignore = g_slist_prepend(NULL, child); 2728 2729 child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore, 2730 &error_abort); 2731 g_slist_free(ignore); 2732 ignore = g_slist_prepend(NULL, child); 2733 child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore); 2734 2735 g_slist_free(ignore); 2736 } 2737 2738 bdrv_unref(bs); 2739 bdrv_remove_empty_child(child); 2740 *s->child = NULL; 2741 } 2742 2743 static TransactionActionDrv bdrv_attach_child_common_drv = { 2744 .abort = bdrv_attach_child_common_abort, 2745 .clean = g_free, 2746 }; 2747 2748 /* 2749 * Common part of attaching bdrv child to bs or to blk or to job 2750 * 2751 * Resulting new child is returned through @child. 2752 * At start *@child must be NULL. 2753 * @child is saved to a new entry of @tran, so that *@child could be reverted to 2754 * NULL on abort(). So referenced variable must live at least until transaction 2755 * end. 2756 */ 2757 static int bdrv_attach_child_common(BlockDriverState *child_bs, 2758 const char *child_name, 2759 const BdrvChildClass *child_class, 2760 BdrvChildRole child_role, 2761 uint64_t perm, uint64_t shared_perm, 2762 void *opaque, BdrvChild **child, 2763 Transaction *tran, Error **errp) 2764 { 2765 BdrvChild *new_child; 2766 AioContext *parent_ctx; 2767 AioContext *child_ctx = bdrv_get_aio_context(child_bs); 2768 2769 assert(child); 2770 assert(*child == NULL); 2771 assert(child_class->get_parent_desc); 2772 2773 new_child = g_new(BdrvChild, 1); 2774 *new_child = (BdrvChild) { 2775 .bs = NULL, 2776 .name = g_strdup(child_name), 2777 .klass = child_class, 2778 .role = child_role, 2779 .perm = perm, 2780 .shared_perm = shared_perm, 2781 .opaque = opaque, 2782 }; 2783 2784 /* 2785 * If the AioContexts don't match, first try to move the subtree of 2786 * child_bs into the AioContext of the new parent. If this doesn't work, 2787 * try moving the parent into the AioContext of child_bs instead. 2788 */ 2789 parent_ctx = bdrv_child_get_parent_aio_context(new_child); 2790 if (child_ctx != parent_ctx) { 2791 Error *local_err = NULL; 2792 int ret = bdrv_try_set_aio_context(child_bs, parent_ctx, &local_err); 2793 2794 if (ret < 0 && child_class->can_set_aio_ctx) { 2795 GSList *ignore = g_slist_prepend(NULL, new_child); 2796 if (child_class->can_set_aio_ctx(new_child, child_ctx, &ignore, 2797 NULL)) 2798 { 2799 error_free(local_err); 2800 ret = 0; 2801 g_slist_free(ignore); 2802 ignore = g_slist_prepend(NULL, new_child); 2803 child_class->set_aio_ctx(new_child, child_ctx, &ignore); 2804 } 2805 g_slist_free(ignore); 2806 } 2807 2808 if (ret < 0) { 2809 error_propagate(errp, local_err); 2810 bdrv_remove_empty_child(new_child); 2811 return ret; 2812 } 2813 } 2814 2815 bdrv_ref(child_bs); 2816 bdrv_replace_child_noperm(new_child, child_bs); 2817 2818 *child = new_child; 2819 2820 BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); 2821 *s = (BdrvAttachChildCommonState) { 2822 .child = child, 2823 .old_parent_ctx = parent_ctx, 2824 .old_child_ctx = child_ctx, 2825 }; 2826 tran_add(tran, &bdrv_attach_child_common_drv, s); 2827 2828 return 0; 2829 } 2830 2831 /* 2832 * Variable referenced by @child must live at least until transaction end. 2833 * (see bdrv_attach_child_common() doc for details) 2834 */ 2835 static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, 2836 BlockDriverState *child_bs, 2837 const char *child_name, 2838 const BdrvChildClass *child_class, 2839 BdrvChildRole child_role, 2840 BdrvChild **child, 2841 Transaction *tran, 2842 Error **errp) 2843 { 2844 int ret; 2845 uint64_t perm, shared_perm; 2846 2847 assert(parent_bs->drv); 2848 2849 bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); 2850 bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, 2851 perm, shared_perm, &perm, &shared_perm); 2852 2853 ret = bdrv_attach_child_common(child_bs, child_name, child_class, 2854 child_role, perm, shared_perm, parent_bs, 2855 child, tran, errp); 2856 if (ret < 0) { 2857 return ret; 2858 } 2859 2860 QLIST_INSERT_HEAD(&parent_bs->children, *child, next); 2861 /* 2862 * child is removed in bdrv_attach_child_common_abort(), so don't care to 2863 * abort this change separately. 2864 */ 2865 2866 return 0; 2867 } 2868 2869 static void bdrv_detach_child(BdrvChild *child) 2870 { 2871 BlockDriverState *old_bs = child->bs; 2872 2873 bdrv_replace_child_noperm(child, NULL); 2874 bdrv_remove_empty_child(child); 2875 2876 if (old_bs) { 2877 /* 2878 * Update permissions for old node. We're just taking a parent away, so 2879 * we're loosening restrictions. Errors of permission update are not 2880 * fatal in this case, ignore them. 2881 */ 2882 bdrv_refresh_perms(old_bs, NULL); 2883 2884 /* 2885 * When the parent requiring a non-default AioContext is removed, the 2886 * node moves back to the main AioContext 2887 */ 2888 bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL); 2889 } 2890 } 2891 2892 /* 2893 * This function steals the reference to child_bs from the caller. 2894 * That reference is later dropped by bdrv_root_unref_child(). 2895 * 2896 * On failure NULL is returned, errp is set and the reference to 2897 * child_bs is also dropped. 2898 * 2899 * The caller must hold the AioContext lock @child_bs, but not that of @ctx 2900 * (unless @child_bs is already in @ctx). 2901 */ 2902 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, 2903 const char *child_name, 2904 const BdrvChildClass *child_class, 2905 BdrvChildRole child_role, 2906 uint64_t perm, uint64_t shared_perm, 2907 void *opaque, Error **errp) 2908 { 2909 int ret; 2910 BdrvChild *child = NULL; 2911 Transaction *tran = tran_new(); 2912 2913 ret = bdrv_attach_child_common(child_bs, child_name, child_class, 2914 child_role, perm, shared_perm, opaque, 2915 &child, tran, errp); 2916 if (ret < 0) { 2917 goto out; 2918 } 2919 2920 ret = bdrv_refresh_perms(child_bs, errp); 2921 2922 out: 2923 tran_finalize(tran, ret); 2924 /* child is unset on failure by bdrv_attach_child_common_abort() */ 2925 assert((ret < 0) == !child); 2926 2927 bdrv_unref(child_bs); 2928 return child; 2929 } 2930 2931 /* 2932 * This function transfers the reference to child_bs from the caller 2933 * to parent_bs. That reference is later dropped by parent_bs on 2934 * bdrv_close() or if someone calls bdrv_unref_child(). 2935 * 2936 * On failure NULL is returned, errp is set and the reference to 2937 * child_bs is also dropped. 2938 * 2939 * If @parent_bs and @child_bs are in different AioContexts, the caller must 2940 * hold the AioContext lock for @child_bs, but not for @parent_bs. 2941 */ 2942 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, 2943 BlockDriverState *child_bs, 2944 const char *child_name, 2945 const BdrvChildClass *child_class, 2946 BdrvChildRole child_role, 2947 Error **errp) 2948 { 2949 int ret; 2950 BdrvChild *child = NULL; 2951 Transaction *tran = tran_new(); 2952 2953 ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, child_class, 2954 child_role, &child, tran, errp); 2955 if (ret < 0) { 2956 goto out; 2957 } 2958 2959 ret = bdrv_refresh_perms(parent_bs, errp); 2960 if (ret < 0) { 2961 goto out; 2962 } 2963 2964 out: 2965 tran_finalize(tran, ret); 2966 /* child is unset on failure by bdrv_attach_child_common_abort() */ 2967 assert((ret < 0) == !child); 2968 2969 bdrv_unref(child_bs); 2970 2971 return child; 2972 } 2973 2974 /* Callers must ensure that child->frozen is false. */ 2975 void bdrv_root_unref_child(BdrvChild *child) 2976 { 2977 BlockDriverState *child_bs; 2978 2979 child_bs = child->bs; 2980 bdrv_detach_child(child); 2981 bdrv_unref(child_bs); 2982 } 2983 2984 typedef struct BdrvSetInheritsFrom { 2985 BlockDriverState *bs; 2986 BlockDriverState *old_inherits_from; 2987 } BdrvSetInheritsFrom; 2988 2989 static void bdrv_set_inherits_from_abort(void *opaque) 2990 { 2991 BdrvSetInheritsFrom *s = opaque; 2992 2993 s->bs->inherits_from = s->old_inherits_from; 2994 } 2995 2996 static TransactionActionDrv bdrv_set_inherits_from_drv = { 2997 .abort = bdrv_set_inherits_from_abort, 2998 .clean = g_free, 2999 }; 3000 3001 /* @tran is allowed to be NULL. In this case no rollback is possible */ 3002 static void bdrv_set_inherits_from(BlockDriverState *bs, 3003 BlockDriverState *new_inherits_from, 3004 Transaction *tran) 3005 { 3006 if (tran) { 3007 BdrvSetInheritsFrom *s = g_new(BdrvSetInheritsFrom, 1); 3008 3009 *s = (BdrvSetInheritsFrom) { 3010 .bs = bs, 3011 .old_inherits_from = bs->inherits_from, 3012 }; 3013 3014 tran_add(tran, &bdrv_set_inherits_from_drv, s); 3015 } 3016 3017 bs->inherits_from = new_inherits_from; 3018 } 3019 3020 /** 3021 * Clear all inherits_from pointers from children and grandchildren of 3022 * @root that point to @root, where necessary. 3023 * @tran is allowed to be NULL. In this case no rollback is possible 3024 */ 3025 static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child, 3026 Transaction *tran) 3027 { 3028 BdrvChild *c; 3029 3030 if (child->bs->inherits_from == root) { 3031 /* 3032 * Remove inherits_from only when the last reference between root and 3033 * child->bs goes away. 3034 */ 3035 QLIST_FOREACH(c, &root->children, next) { 3036 if (c != child && c->bs == child->bs) { 3037 break; 3038 } 3039 } 3040 if (c == NULL) { 3041 bdrv_set_inherits_from(child->bs, NULL, tran); 3042 } 3043 } 3044 3045 QLIST_FOREACH(c, &child->bs->children, next) { 3046 bdrv_unset_inherits_from(root, c, tran); 3047 } 3048 } 3049 3050 /* Callers must ensure that child->frozen is false. */ 3051 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) 3052 { 3053 if (child == NULL) { 3054 return; 3055 } 3056 3057 bdrv_unset_inherits_from(parent, child, NULL); 3058 bdrv_root_unref_child(child); 3059 } 3060 3061 3062 static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) 3063 { 3064 BdrvChild *c; 3065 QLIST_FOREACH(c, &bs->parents, next_parent) { 3066 if (c->klass->change_media) { 3067 c->klass->change_media(c, load); 3068 } 3069 } 3070 } 3071 3072 /* Return true if you can reach parent going through child->inherits_from 3073 * recursively. If parent or child are NULL, return false */ 3074 static bool bdrv_inherits_from_recursive(BlockDriverState *child, 3075 BlockDriverState *parent) 3076 { 3077 while (child && child != parent) { 3078 child = child->inherits_from; 3079 } 3080 3081 return child != NULL; 3082 } 3083 3084 /* 3085 * Return the BdrvChildRole for @bs's backing child. bs->backing is 3086 * mostly used for COW backing children (role = COW), but also for 3087 * filtered children (role = FILTERED | PRIMARY). 3088 */ 3089 static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) 3090 { 3091 if (bs->drv && bs->drv->is_filter) { 3092 return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY; 3093 } else { 3094 return BDRV_CHILD_COW; 3095 } 3096 } 3097 3098 /* 3099 * Sets the bs->backing link of a BDS. A new reference is created; callers 3100 * which don't need their own reference any more must call bdrv_unref(). 3101 */ 3102 static int bdrv_set_backing_noperm(BlockDriverState *bs, 3103 BlockDriverState *backing_hd, 3104 Transaction *tran, Error **errp) 3105 { 3106 int ret = 0; 3107 bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) && 3108 bdrv_inherits_from_recursive(backing_hd, bs); 3109 3110 if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) { 3111 return -EPERM; 3112 } 3113 3114 if (bs->backing) { 3115 /* Cannot be frozen, we checked that above */ 3116 bdrv_unset_inherits_from(bs, bs->backing, tran); 3117 bdrv_remove_filter_or_cow_child(bs, tran); 3118 } 3119 3120 if (!backing_hd) { 3121 goto out; 3122 } 3123 3124 ret = bdrv_attach_child_noperm(bs, backing_hd, "backing", 3125 &child_of_bds, bdrv_backing_role(bs), 3126 &bs->backing, tran, errp); 3127 if (ret < 0) { 3128 return ret; 3129 } 3130 3131 3132 /* 3133 * If backing_hd was already part of bs's backing chain, and 3134 * inherits_from pointed recursively to bs then let's update it to 3135 * point directly to bs (else it will become NULL). 3136 */ 3137 if (update_inherits_from) { 3138 bdrv_set_inherits_from(backing_hd, bs, tran); 3139 } 3140 3141 out: 3142 bdrv_refresh_limits(bs, tran, NULL); 3143 3144 return 0; 3145 } 3146 3147 int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, 3148 Error **errp) 3149 { 3150 int ret; 3151 Transaction *tran = tran_new(); 3152 3153 ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); 3154 if (ret < 0) { 3155 goto out; 3156 } 3157 3158 ret = bdrv_refresh_perms(bs, errp); 3159 out: 3160 tran_finalize(tran, ret); 3161 3162 return ret; 3163 } 3164 3165 /* 3166 * Opens the backing file for a BlockDriverState if not yet open 3167 * 3168 * bdref_key specifies the key for the image's BlockdevRef in the options QDict. 3169 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 3170 * itself, all options starting with "${bdref_key}." are considered part of the 3171 * BlockdevRef. 3172 * 3173 * TODO Can this be unified with bdrv_open_image()? 3174 */ 3175 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, 3176 const char *bdref_key, Error **errp) 3177 { 3178 char *backing_filename = NULL; 3179 char *bdref_key_dot; 3180 const char *reference = NULL; 3181 int ret = 0; 3182 bool implicit_backing = false; 3183 BlockDriverState *backing_hd; 3184 QDict *options; 3185 QDict *tmp_parent_options = NULL; 3186 Error *local_err = NULL; 3187 3188 if (bs->backing != NULL) { 3189 goto free_exit; 3190 } 3191 3192 /* NULL means an empty set of options */ 3193 if (parent_options == NULL) { 3194 tmp_parent_options = qdict_new(); 3195 parent_options = tmp_parent_options; 3196 } 3197 3198 bs->open_flags &= ~BDRV_O_NO_BACKING; 3199 3200 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 3201 qdict_extract_subqdict(parent_options, &options, bdref_key_dot); 3202 g_free(bdref_key_dot); 3203 3204 /* 3205 * Caution: while qdict_get_try_str() is fine, getting non-string 3206 * types would require more care. When @parent_options come from 3207 * -blockdev or blockdev_add, its members are typed according to 3208 * the QAPI schema, but when they come from -drive, they're all 3209 * QString. 3210 */ 3211 reference = qdict_get_try_str(parent_options, bdref_key); 3212 if (reference || qdict_haskey(options, "file.filename")) { 3213 /* keep backing_filename NULL */ 3214 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 3215 qobject_unref(options); 3216 goto free_exit; 3217 } else { 3218 if (qdict_size(options) == 0) { 3219 /* If the user specifies options that do not modify the 3220 * backing file's behavior, we might still consider it the 3221 * implicit backing file. But it's easier this way, and 3222 * just specifying some of the backing BDS's options is 3223 * only possible with -drive anyway (otherwise the QAPI 3224 * schema forces the user to specify everything). */ 3225 implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file); 3226 } 3227 3228 backing_filename = bdrv_get_full_backing_filename(bs, &local_err); 3229 if (local_err) { 3230 ret = -EINVAL; 3231 error_propagate(errp, local_err); 3232 qobject_unref(options); 3233 goto free_exit; 3234 } 3235 } 3236 3237 if (!bs->drv || !bs->drv->supports_backing) { 3238 ret = -EINVAL; 3239 error_setg(errp, "Driver doesn't support backing files"); 3240 qobject_unref(options); 3241 goto free_exit; 3242 } 3243 3244 if (!reference && 3245 bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 3246 qdict_put_str(options, "driver", bs->backing_format); 3247 } 3248 3249 backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs, 3250 &child_of_bds, bdrv_backing_role(bs), errp); 3251 if (!backing_hd) { 3252 bs->open_flags |= BDRV_O_NO_BACKING; 3253 error_prepend(errp, "Could not open backing file: "); 3254 ret = -EINVAL; 3255 goto free_exit; 3256 } 3257 3258 if (implicit_backing) { 3259 bdrv_refresh_filename(backing_hd); 3260 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), 3261 backing_hd->filename); 3262 } 3263 3264 /* Hook up the backing file link; drop our reference, bs owns the 3265 * backing_hd reference now */ 3266 ret = bdrv_set_backing_hd(bs, backing_hd, errp); 3267 bdrv_unref(backing_hd); 3268 if (ret < 0) { 3269 goto free_exit; 3270 } 3271 3272 qdict_del(parent_options, bdref_key); 3273 3274 free_exit: 3275 g_free(backing_filename); 3276 qobject_unref(tmp_parent_options); 3277 return ret; 3278 } 3279 3280 static BlockDriverState * 3281 bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, 3282 BlockDriverState *parent, const BdrvChildClass *child_class, 3283 BdrvChildRole child_role, bool allow_none, Error **errp) 3284 { 3285 BlockDriverState *bs = NULL; 3286 QDict *image_options; 3287 char *bdref_key_dot; 3288 const char *reference; 3289 3290 assert(child_class != NULL); 3291 3292 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 3293 qdict_extract_subqdict(options, &image_options, bdref_key_dot); 3294 g_free(bdref_key_dot); 3295 3296 /* 3297 * Caution: while qdict_get_try_str() is fine, getting non-string 3298 * types would require more care. When @options come from 3299 * -blockdev or blockdev_add, its members are typed according to 3300 * the QAPI schema, but when they come from -drive, they're all 3301 * QString. 3302 */ 3303 reference = qdict_get_try_str(options, bdref_key); 3304 if (!filename && !reference && !qdict_size(image_options)) { 3305 if (!allow_none) { 3306 error_setg(errp, "A block device must be specified for \"%s\"", 3307 bdref_key); 3308 } 3309 qobject_unref(image_options); 3310 goto done; 3311 } 3312 3313 bs = bdrv_open_inherit(filename, reference, image_options, 0, 3314 parent, child_class, child_role, errp); 3315 if (!bs) { 3316 goto done; 3317 } 3318 3319 done: 3320 qdict_del(options, bdref_key); 3321 return bs; 3322 } 3323 3324 /* 3325 * Opens a disk image whose options are given as BlockdevRef in another block 3326 * device's options. 3327 * 3328 * If allow_none is true, no image will be opened if filename is false and no 3329 * BlockdevRef is given. NULL will be returned, but errp remains unset. 3330 * 3331 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 3332 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 3333 * itself, all options starting with "${bdref_key}." are considered part of the 3334 * BlockdevRef. 3335 * 3336 * The BlockdevRef will be removed from the options QDict. 3337 */ 3338 BdrvChild *bdrv_open_child(const char *filename, 3339 QDict *options, const char *bdref_key, 3340 BlockDriverState *parent, 3341 const BdrvChildClass *child_class, 3342 BdrvChildRole child_role, 3343 bool allow_none, Error **errp) 3344 { 3345 BlockDriverState *bs; 3346 3347 bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, 3348 child_role, allow_none, errp); 3349 if (bs == NULL) { 3350 return NULL; 3351 } 3352 3353 return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, 3354 errp); 3355 } 3356 3357 /* 3358 * TODO Future callers may need to specify parent/child_class in order for 3359 * option inheritance to work. Existing callers use it for the root node. 3360 */ 3361 BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) 3362 { 3363 BlockDriverState *bs = NULL; 3364 QObject *obj = NULL; 3365 QDict *qdict = NULL; 3366 const char *reference = NULL; 3367 Visitor *v = NULL; 3368 3369 if (ref->type == QTYPE_QSTRING) { 3370 reference = ref->u.reference; 3371 } else { 3372 BlockdevOptions *options = &ref->u.definition; 3373 assert(ref->type == QTYPE_QDICT); 3374 3375 v = qobject_output_visitor_new(&obj); 3376 visit_type_BlockdevOptions(v, NULL, &options, &error_abort); 3377 visit_complete(v, &obj); 3378 3379 qdict = qobject_to(QDict, obj); 3380 qdict_flatten(qdict); 3381 3382 /* bdrv_open_inherit() defaults to the values in bdrv_flags (for 3383 * compatibility with other callers) rather than what we want as the 3384 * real defaults. Apply the defaults here instead. */ 3385 qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off"); 3386 qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off"); 3387 qdict_set_default_str(qdict, BDRV_OPT_READ_ONLY, "off"); 3388 qdict_set_default_str(qdict, BDRV_OPT_AUTO_READ_ONLY, "off"); 3389 3390 } 3391 3392 bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp); 3393 obj = NULL; 3394 qobject_unref(obj); 3395 visit_free(v); 3396 return bs; 3397 } 3398 3399 static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, 3400 int flags, 3401 QDict *snapshot_options, 3402 Error **errp) 3403 { 3404 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 3405 char *tmp_filename = g_malloc0(PATH_MAX + 1); 3406 int64_t total_size; 3407 QemuOpts *opts = NULL; 3408 BlockDriverState *bs_snapshot = NULL; 3409 int ret; 3410 3411 /* if snapshot, we create a temporary backing file and open it 3412 instead of opening 'filename' directly */ 3413 3414 /* Get the required size from the image */ 3415 total_size = bdrv_getlength(bs); 3416 if (total_size < 0) { 3417 error_setg_errno(errp, -total_size, "Could not get image size"); 3418 goto out; 3419 } 3420 3421 /* Create the temporary image */ 3422 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); 3423 if (ret < 0) { 3424 error_setg_errno(errp, -ret, "Could not get temporary filename"); 3425 goto out; 3426 } 3427 3428 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 3429 &error_abort); 3430 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 3431 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp); 3432 qemu_opts_del(opts); 3433 if (ret < 0) { 3434 error_prepend(errp, "Could not create temporary overlay '%s': ", 3435 tmp_filename); 3436 goto out; 3437 } 3438 3439 /* Prepare options QDict for the temporary file */ 3440 qdict_put_str(snapshot_options, "file.driver", "file"); 3441 qdict_put_str(snapshot_options, "file.filename", tmp_filename); 3442 qdict_put_str(snapshot_options, "driver", "qcow2"); 3443 3444 bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp); 3445 snapshot_options = NULL; 3446 if (!bs_snapshot) { 3447 goto out; 3448 } 3449 3450 ret = bdrv_append(bs_snapshot, bs, errp); 3451 if (ret < 0) { 3452 bs_snapshot = NULL; 3453 goto out; 3454 } 3455 3456 out: 3457 qobject_unref(snapshot_options); 3458 g_free(tmp_filename); 3459 return bs_snapshot; 3460 } 3461 3462 /* 3463 * Opens a disk image (raw, qcow2, vmdk, ...) 3464 * 3465 * options is a QDict of options to pass to the block drivers, or NULL for an 3466 * empty set of options. The reference to the QDict belongs to the block layer 3467 * after the call (even on failure), so if the caller intends to reuse the 3468 * dictionary, it needs to use qobject_ref() before calling bdrv_open. 3469 * 3470 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 3471 * If it is not NULL, the referenced BDS will be reused. 3472 * 3473 * The reference parameter may be used to specify an existing block device which 3474 * should be opened. If specified, neither options nor a filename may be given, 3475 * nor can an existing BDS be reused (that is, *pbs has to be NULL). 3476 */ 3477 static BlockDriverState *bdrv_open_inherit(const char *filename, 3478 const char *reference, 3479 QDict *options, int flags, 3480 BlockDriverState *parent, 3481 const BdrvChildClass *child_class, 3482 BdrvChildRole child_role, 3483 Error **errp) 3484 { 3485 int ret; 3486 BlockBackend *file = NULL; 3487 BlockDriverState *bs; 3488 BlockDriver *drv = NULL; 3489 BdrvChild *child; 3490 const char *drvname; 3491 const char *backing; 3492 Error *local_err = NULL; 3493 QDict *snapshot_options = NULL; 3494 int snapshot_flags = 0; 3495 3496 assert(!child_class || !flags); 3497 assert(!child_class == !parent); 3498 3499 if (reference) { 3500 bool options_non_empty = options ? qdict_size(options) : false; 3501 qobject_unref(options); 3502 3503 if (filename || options_non_empty) { 3504 error_setg(errp, "Cannot reference an existing block device with " 3505 "additional options or a new filename"); 3506 return NULL; 3507 } 3508 3509 bs = bdrv_lookup_bs(reference, reference, errp); 3510 if (!bs) { 3511 return NULL; 3512 } 3513 3514 bdrv_ref(bs); 3515 return bs; 3516 } 3517 3518 bs = bdrv_new(); 3519 3520 /* NULL means an empty set of options */ 3521 if (options == NULL) { 3522 options = qdict_new(); 3523 } 3524 3525 /* json: syntax counts as explicit options, as if in the QDict */ 3526 parse_json_protocol(options, &filename, &local_err); 3527 if (local_err) { 3528 goto fail; 3529 } 3530 3531 bs->explicit_options = qdict_clone_shallow(options); 3532 3533 if (child_class) { 3534 bool parent_is_format; 3535 3536 if (parent->drv) { 3537 parent_is_format = parent->drv->is_format; 3538 } else { 3539 /* 3540 * parent->drv is not set yet because this node is opened for 3541 * (potential) format probing. That means that @parent is going 3542 * to be a format node. 3543 */ 3544 parent_is_format = true; 3545 } 3546 3547 bs->inherits_from = parent; 3548 child_class->inherit_options(child_role, parent_is_format, 3549 &flags, options, 3550 parent->open_flags, parent->options); 3551 } 3552 3553 ret = bdrv_fill_options(&options, filename, &flags, &local_err); 3554 if (ret < 0) { 3555 goto fail; 3556 } 3557 3558 /* 3559 * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags. 3560 * Caution: getting a boolean member of @options requires care. 3561 * When @options come from -blockdev or blockdev_add, members are 3562 * typed according to the QAPI schema, but when they come from 3563 * -drive, they're all QString. 3564 */ 3565 if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") && 3566 !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) { 3567 flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR); 3568 } else { 3569 flags &= ~BDRV_O_RDWR; 3570 } 3571 3572 if (flags & BDRV_O_SNAPSHOT) { 3573 snapshot_options = qdict_new(); 3574 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options, 3575 flags, options); 3576 /* Let bdrv_backing_options() override "read-only" */ 3577 qdict_del(options, BDRV_OPT_READ_ONLY); 3578 bdrv_inherited_options(BDRV_CHILD_COW, true, 3579 &flags, options, flags, options); 3580 } 3581 3582 bs->open_flags = flags; 3583 bs->options = options; 3584 options = qdict_clone_shallow(options); 3585 3586 /* Find the right image format driver */ 3587 /* See cautionary note on accessing @options above */ 3588 drvname = qdict_get_try_str(options, "driver"); 3589 if (drvname) { 3590 drv = bdrv_find_format(drvname); 3591 if (!drv) { 3592 error_setg(errp, "Unknown driver: '%s'", drvname); 3593 goto fail; 3594 } 3595 } 3596 3597 assert(drvname || !(flags & BDRV_O_PROTOCOL)); 3598 3599 /* See cautionary note on accessing @options above */ 3600 backing = qdict_get_try_str(options, "backing"); 3601 if (qobject_to(QNull, qdict_get(options, "backing")) != NULL || 3602 (backing && *backing == '\0')) 3603 { 3604 if (backing) { 3605 warn_report("Use of \"backing\": \"\" is deprecated; " 3606 "use \"backing\": null instead"); 3607 } 3608 flags |= BDRV_O_NO_BACKING; 3609 qdict_del(bs->explicit_options, "backing"); 3610 qdict_del(bs->options, "backing"); 3611 qdict_del(options, "backing"); 3612 } 3613 3614 /* Open image file without format layer. This BlockBackend is only used for 3615 * probing, the block drivers will do their own bdrv_open_child() for the 3616 * same BDS, which is why we put the node name back into options. */ 3617 if ((flags & BDRV_O_PROTOCOL) == 0) { 3618 BlockDriverState *file_bs; 3619 3620 file_bs = bdrv_open_child_bs(filename, options, "file", bs, 3621 &child_of_bds, BDRV_CHILD_IMAGE, 3622 true, &local_err); 3623 if (local_err) { 3624 goto fail; 3625 } 3626 if (file_bs != NULL) { 3627 /* Not requesting BLK_PERM_CONSISTENT_READ because we're only 3628 * looking at the header to guess the image format. This works even 3629 * in cases where a guest would not see a consistent state. */ 3630 file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL); 3631 blk_insert_bs(file, file_bs, &local_err); 3632 bdrv_unref(file_bs); 3633 if (local_err) { 3634 goto fail; 3635 } 3636 3637 qdict_put_str(options, "file", bdrv_get_node_name(file_bs)); 3638 } 3639 } 3640 3641 /* Image format probing */ 3642 bs->probed = !drv; 3643 if (!drv && file) { 3644 ret = find_image_format(file, filename, &drv, &local_err); 3645 if (ret < 0) { 3646 goto fail; 3647 } 3648 /* 3649 * This option update would logically belong in bdrv_fill_options(), 3650 * but we first need to open bs->file for the probing to work, while 3651 * opening bs->file already requires the (mostly) final set of options 3652 * so that cache mode etc. can be inherited. 3653 * 3654 * Adding the driver later is somewhat ugly, but it's not an option 3655 * that would ever be inherited, so it's correct. We just need to make 3656 * sure to update both bs->options (which has the full effective 3657 * options for bs) and options (which has file.* already removed). 3658 */ 3659 qdict_put_str(bs->options, "driver", drv->format_name); 3660 qdict_put_str(options, "driver", drv->format_name); 3661 } else if (!drv) { 3662 error_setg(errp, "Must specify either driver or file"); 3663 goto fail; 3664 } 3665 3666 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ 3667 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open); 3668 /* file must be NULL if a protocol BDS is about to be created 3669 * (the inverse results in an error message from bdrv_open_common()) */ 3670 assert(!(flags & BDRV_O_PROTOCOL) || !file); 3671 3672 /* Open the image */ 3673 ret = bdrv_open_common(bs, file, options, &local_err); 3674 if (ret < 0) { 3675 goto fail; 3676 } 3677 3678 if (file) { 3679 blk_unref(file); 3680 file = NULL; 3681 } 3682 3683 /* If there is a backing file, use it */ 3684 if ((flags & BDRV_O_NO_BACKING) == 0) { 3685 ret = bdrv_open_backing_file(bs, options, "backing", &local_err); 3686 if (ret < 0) { 3687 goto close_and_fail; 3688 } 3689 } 3690 3691 /* Remove all children options and references 3692 * from bs->options and bs->explicit_options */ 3693 QLIST_FOREACH(child, &bs->children, next) { 3694 char *child_key_dot; 3695 child_key_dot = g_strdup_printf("%s.", child->name); 3696 qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot); 3697 qdict_extract_subqdict(bs->options, NULL, child_key_dot); 3698 qdict_del(bs->explicit_options, child->name); 3699 qdict_del(bs->options, child->name); 3700 g_free(child_key_dot); 3701 } 3702 3703 /* Check if any unknown options were used */ 3704 if (qdict_size(options) != 0) { 3705 const QDictEntry *entry = qdict_first(options); 3706 if (flags & BDRV_O_PROTOCOL) { 3707 error_setg(errp, "Block protocol '%s' doesn't support the option " 3708 "'%s'", drv->format_name, entry->key); 3709 } else { 3710 error_setg(errp, 3711 "Block format '%s' does not support the option '%s'", 3712 drv->format_name, entry->key); 3713 } 3714 3715 goto close_and_fail; 3716 } 3717 3718 bdrv_parent_cb_change_media(bs, true); 3719 3720 qobject_unref(options); 3721 options = NULL; 3722 3723 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 3724 * temporary snapshot afterwards. */ 3725 if (snapshot_flags) { 3726 BlockDriverState *snapshot_bs; 3727 snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags, 3728 snapshot_options, &local_err); 3729 snapshot_options = NULL; 3730 if (local_err) { 3731 goto close_and_fail; 3732 } 3733 /* We are not going to return bs but the overlay on top of it 3734 * (snapshot_bs); thus, we have to drop the strong reference to bs 3735 * (which we obtained by calling bdrv_new()). bs will not be deleted, 3736 * though, because the overlay still has a reference to it. */ 3737 bdrv_unref(bs); 3738 bs = snapshot_bs; 3739 } 3740 3741 return bs; 3742 3743 fail: 3744 blk_unref(file); 3745 qobject_unref(snapshot_options); 3746 qobject_unref(bs->explicit_options); 3747 qobject_unref(bs->options); 3748 qobject_unref(options); 3749 bs->options = NULL; 3750 bs->explicit_options = NULL; 3751 bdrv_unref(bs); 3752 error_propagate(errp, local_err); 3753 return NULL; 3754 3755 close_and_fail: 3756 bdrv_unref(bs); 3757 qobject_unref(snapshot_options); 3758 qobject_unref(options); 3759 error_propagate(errp, local_err); 3760 return NULL; 3761 } 3762 3763 BlockDriverState *bdrv_open(const char *filename, const char *reference, 3764 QDict *options, int flags, Error **errp) 3765 { 3766 return bdrv_open_inherit(filename, reference, options, flags, NULL, 3767 NULL, 0, errp); 3768 } 3769 3770 /* Return true if the NULL-terminated @list contains @str */ 3771 static bool is_str_in_list(const char *str, const char *const *list) 3772 { 3773 if (str && list) { 3774 int i; 3775 for (i = 0; list[i] != NULL; i++) { 3776 if (!strcmp(str, list[i])) { 3777 return true; 3778 } 3779 } 3780 } 3781 return false; 3782 } 3783 3784 /* 3785 * Check that every option set in @bs->options is also set in 3786 * @new_opts. 3787 * 3788 * Options listed in the common_options list and in 3789 * @bs->drv->mutable_opts are skipped. 3790 * 3791 * Return 0 on success, otherwise return -EINVAL and set @errp. 3792 */ 3793 static int bdrv_reset_options_allowed(BlockDriverState *bs, 3794 const QDict *new_opts, Error **errp) 3795 { 3796 const QDictEntry *e; 3797 /* These options are common to all block drivers and are handled 3798 * in bdrv_reopen_prepare() so they can be left out of @new_opts */ 3799 const char *const common_options[] = { 3800 "node-name", "discard", "cache.direct", "cache.no-flush", 3801 "read-only", "auto-read-only", "detect-zeroes", NULL 3802 }; 3803 3804 for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) { 3805 if (!qdict_haskey(new_opts, e->key) && 3806 !is_str_in_list(e->key, common_options) && 3807 !is_str_in_list(e->key, bs->drv->mutable_opts)) { 3808 error_setg(errp, "Option '%s' cannot be reset " 3809 "to its default value", e->key); 3810 return -EINVAL; 3811 } 3812 } 3813 3814 return 0; 3815 } 3816 3817 /* 3818 * Returns true if @child can be reached recursively from @bs 3819 */ 3820 static bool bdrv_recurse_has_child(BlockDriverState *bs, 3821 BlockDriverState *child) 3822 { 3823 BdrvChild *c; 3824 3825 if (bs == child) { 3826 return true; 3827 } 3828 3829 QLIST_FOREACH(c, &bs->children, next) { 3830 if (bdrv_recurse_has_child(c->bs, child)) { 3831 return true; 3832 } 3833 } 3834 3835 return false; 3836 } 3837 3838 /* 3839 * Adds a BlockDriverState to a simple queue for an atomic, transactional 3840 * reopen of multiple devices. 3841 * 3842 * bs_queue can either be an existing BlockReopenQueue that has had QTAILQ_INIT 3843 * already performed, or alternatively may be NULL a new BlockReopenQueue will 3844 * be created and initialized. This newly created BlockReopenQueue should be 3845 * passed back in for subsequent calls that are intended to be of the same 3846 * atomic 'set'. 3847 * 3848 * bs is the BlockDriverState to add to the reopen queue. 3849 * 3850 * options contains the changed options for the associated bs 3851 * (the BlockReopenQueue takes ownership) 3852 * 3853 * flags contains the open flags for the associated bs 3854 * 3855 * returns a pointer to bs_queue, which is either the newly allocated 3856 * bs_queue, or the existing bs_queue being used. 3857 * 3858 * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). 3859 */ 3860 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, 3861 BlockDriverState *bs, 3862 QDict *options, 3863 const BdrvChildClass *klass, 3864 BdrvChildRole role, 3865 bool parent_is_format, 3866 QDict *parent_options, 3867 int parent_flags, 3868 bool keep_old_opts) 3869 { 3870 assert(bs != NULL); 3871 3872 BlockReopenQueueEntry *bs_entry; 3873 BdrvChild *child; 3874 QDict *old_options, *explicit_options, *options_copy; 3875 int flags; 3876 QemuOpts *opts; 3877 3878 /* Make sure that the caller remembered to use a drained section. This is 3879 * important to avoid graph changes between the recursive queuing here and 3880 * bdrv_reopen_multiple(). */ 3881 assert(bs->quiesce_counter > 0); 3882 3883 if (bs_queue == NULL) { 3884 bs_queue = g_new0(BlockReopenQueue, 1); 3885 QTAILQ_INIT(bs_queue); 3886 } 3887 3888 if (!options) { 3889 options = qdict_new(); 3890 } 3891 3892 /* Check if this BlockDriverState is already in the queue */ 3893 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 3894 if (bs == bs_entry->state.bs) { 3895 break; 3896 } 3897 } 3898 3899 /* 3900 * Precedence of options: 3901 * 1. Explicitly passed in options (highest) 3902 * 2. Retained from explicitly set options of bs 3903 * 3. Inherited from parent node 3904 * 4. Retained from effective options of bs 3905 */ 3906 3907 /* Old explicitly set values (don't overwrite by inherited value) */ 3908 if (bs_entry || keep_old_opts) { 3909 old_options = qdict_clone_shallow(bs_entry ? 3910 bs_entry->state.explicit_options : 3911 bs->explicit_options); 3912 bdrv_join_options(bs, options, old_options); 3913 qobject_unref(old_options); 3914 } 3915 3916 explicit_options = qdict_clone_shallow(options); 3917 3918 /* Inherit from parent node */ 3919 if (parent_options) { 3920 flags = 0; 3921 klass->inherit_options(role, parent_is_format, &flags, options, 3922 parent_flags, parent_options); 3923 } else { 3924 flags = bdrv_get_flags(bs); 3925 } 3926 3927 if (keep_old_opts) { 3928 /* Old values are used for options that aren't set yet */ 3929 old_options = qdict_clone_shallow(bs->options); 3930 bdrv_join_options(bs, options, old_options); 3931 qobject_unref(old_options); 3932 } 3933 3934 /* We have the final set of options so let's update the flags */ 3935 options_copy = qdict_clone_shallow(options); 3936 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 3937 qemu_opts_absorb_qdict(opts, options_copy, NULL); 3938 update_flags_from_options(&flags, opts); 3939 qemu_opts_del(opts); 3940 qobject_unref(options_copy); 3941 3942 /* bdrv_open_inherit() sets and clears some additional flags internally */ 3943 flags &= ~BDRV_O_PROTOCOL; 3944 if (flags & BDRV_O_RDWR) { 3945 flags |= BDRV_O_ALLOW_RDWR; 3946 } 3947 3948 if (!bs_entry) { 3949 bs_entry = g_new0(BlockReopenQueueEntry, 1); 3950 QTAILQ_INSERT_TAIL(bs_queue, bs_entry, entry); 3951 } else { 3952 qobject_unref(bs_entry->state.options); 3953 qobject_unref(bs_entry->state.explicit_options); 3954 } 3955 3956 bs_entry->state.bs = bs; 3957 bs_entry->state.options = options; 3958 bs_entry->state.explicit_options = explicit_options; 3959 bs_entry->state.flags = flags; 3960 3961 /* 3962 * If keep_old_opts is false then it means that unspecified 3963 * options must be reset to their original value. We don't allow 3964 * resetting 'backing' but we need to know if the option is 3965 * missing in order to decide if we have to return an error. 3966 */ 3967 if (!keep_old_opts) { 3968 bs_entry->state.backing_missing = 3969 !qdict_haskey(options, "backing") && 3970 !qdict_haskey(options, "backing.driver"); 3971 } 3972 3973 QLIST_FOREACH(child, &bs->children, next) { 3974 QDict *new_child_options = NULL; 3975 bool child_keep_old = keep_old_opts; 3976 3977 /* reopen can only change the options of block devices that were 3978 * implicitly created and inherited options. For other (referenced) 3979 * block devices, a syntax like "backing.foo" results in an error. */ 3980 if (child->bs->inherits_from != bs) { 3981 continue; 3982 } 3983 3984 /* Check if the options contain a child reference */ 3985 if (qdict_haskey(options, child->name)) { 3986 const char *childref = qdict_get_try_str(options, child->name); 3987 /* 3988 * The current child must not be reopened if the child 3989 * reference is null or points to a different node. 3990 */ 3991 if (g_strcmp0(childref, child->bs->node_name)) { 3992 continue; 3993 } 3994 /* 3995 * If the child reference points to the current child then 3996 * reopen it with its existing set of options (note that 3997 * it can still inherit new options from the parent). 3998 */ 3999 child_keep_old = true; 4000 } else { 4001 /* Extract child options ("child-name.*") */ 4002 char *child_key_dot = g_strdup_printf("%s.", child->name); 4003 qdict_extract_subqdict(explicit_options, NULL, child_key_dot); 4004 qdict_extract_subqdict(options, &new_child_options, child_key_dot); 4005 g_free(child_key_dot); 4006 } 4007 4008 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 4009 child->klass, child->role, bs->drv->is_format, 4010 options, flags, child_keep_old); 4011 } 4012 4013 return bs_queue; 4014 } 4015 4016 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 4017 BlockDriverState *bs, 4018 QDict *options, bool keep_old_opts) 4019 { 4020 return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false, 4021 NULL, 0, keep_old_opts); 4022 } 4023 4024 /* 4025 * Reopen multiple BlockDriverStates atomically & transactionally. 4026 * 4027 * The queue passed in (bs_queue) must have been built up previous 4028 * via bdrv_reopen_queue(). 4029 * 4030 * Reopens all BDS specified in the queue, with the appropriate 4031 * flags. All devices are prepared for reopen, and failure of any 4032 * device will cause all device changes to be abandoned, and intermediate 4033 * data cleaned up. 4034 * 4035 * If all devices prepare successfully, then the changes are committed 4036 * to all devices. 4037 * 4038 * All affected nodes must be drained between bdrv_reopen_queue() and 4039 * bdrv_reopen_multiple(). 4040 */ 4041 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 4042 { 4043 int ret = -1; 4044 BlockReopenQueueEntry *bs_entry, *next; 4045 Transaction *tran = tran_new(); 4046 g_autoptr(GHashTable) found = NULL; 4047 g_autoptr(GSList) refresh_list = NULL; 4048 4049 assert(bs_queue != NULL); 4050 4051 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4052 ret = bdrv_flush(bs_entry->state.bs); 4053 if (ret < 0) { 4054 error_setg_errno(errp, -ret, "Error flushing drive"); 4055 goto abort; 4056 } 4057 } 4058 4059 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4060 assert(bs_entry->state.bs->quiesce_counter > 0); 4061 ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp); 4062 if (ret < 0) { 4063 goto abort; 4064 } 4065 bs_entry->prepared = true; 4066 } 4067 4068 found = g_hash_table_new(NULL, NULL); 4069 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 4070 BDRVReopenState *state = &bs_entry->state; 4071 4072 refresh_list = bdrv_topological_dfs(refresh_list, found, state->bs); 4073 if (state->old_backing_bs) { 4074 refresh_list = bdrv_topological_dfs(refresh_list, found, 4075 state->old_backing_bs); 4076 } 4077 } 4078 4079 /* 4080 * Note that file-posix driver rely on permission update done during reopen 4081 * (even if no permission changed), because it wants "new" permissions for 4082 * reconfiguring the fd and that's why it does it in raw_check_perm(), not 4083 * in raw_reopen_prepare() which is called with "old" permissions. 4084 */ 4085 ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp); 4086 if (ret < 0) { 4087 goto abort; 4088 } 4089 4090 /* 4091 * If we reach this point, we have success and just need to apply the 4092 * changes. 4093 * 4094 * Reverse order is used to comfort qcow2 driver: on commit it need to write 4095 * IN_USE flag to the image, to mark bitmaps in the image as invalid. But 4096 * children are usually goes after parents in reopen-queue, so go from last 4097 * to first element. 4098 */ 4099 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { 4100 bdrv_reopen_commit(&bs_entry->state); 4101 } 4102 4103 tran_commit(tran); 4104 4105 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { 4106 BlockDriverState *bs = bs_entry->state.bs; 4107 4108 if (bs->drv->bdrv_reopen_commit_post) { 4109 bs->drv->bdrv_reopen_commit_post(&bs_entry->state); 4110 } 4111 } 4112 4113 ret = 0; 4114 goto cleanup; 4115 4116 abort: 4117 tran_abort(tran); 4118 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 4119 if (bs_entry->prepared) { 4120 bdrv_reopen_abort(&bs_entry->state); 4121 } 4122 qobject_unref(bs_entry->state.explicit_options); 4123 qobject_unref(bs_entry->state.options); 4124 } 4125 4126 cleanup: 4127 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 4128 g_free(bs_entry); 4129 } 4130 g_free(bs_queue); 4131 4132 return ret; 4133 } 4134 4135 int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, 4136 Error **errp) 4137 { 4138 int ret; 4139 BlockReopenQueue *queue; 4140 QDict *opts = qdict_new(); 4141 4142 qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only); 4143 4144 bdrv_subtree_drained_begin(bs); 4145 queue = bdrv_reopen_queue(NULL, bs, opts, true); 4146 ret = bdrv_reopen_multiple(queue, errp); 4147 bdrv_subtree_drained_end(bs); 4148 4149 return ret; 4150 } 4151 4152 static bool bdrv_reopen_can_attach(BlockDriverState *parent, 4153 BdrvChild *child, 4154 BlockDriverState *new_child, 4155 Error **errp) 4156 { 4157 AioContext *parent_ctx = bdrv_get_aio_context(parent); 4158 AioContext *child_ctx = bdrv_get_aio_context(new_child); 4159 GSList *ignore; 4160 bool ret; 4161 4162 ignore = g_slist_prepend(NULL, child); 4163 ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL); 4164 g_slist_free(ignore); 4165 if (ret) { 4166 return ret; 4167 } 4168 4169 ignore = g_slist_prepend(NULL, child); 4170 ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp); 4171 g_slist_free(ignore); 4172 return ret; 4173 } 4174 4175 /* 4176 * Take a BDRVReopenState and check if the value of 'backing' in the 4177 * reopen_state->options QDict is valid or not. 4178 * 4179 * If 'backing' is missing from the QDict then return 0. 4180 * 4181 * If 'backing' contains the node name of the backing file of 4182 * reopen_state->bs then return 0. 4183 * 4184 * If 'backing' contains a different node name (or is null) then check 4185 * whether the current backing file can be replaced with the new one. 4186 * If that's the case then reopen_state->replace_backing_bs is set to 4187 * true and reopen_state->new_backing_bs contains a pointer to the new 4188 * backing BlockDriverState (or NULL). 4189 * 4190 * Return 0 on success, otherwise return < 0 and set @errp. 4191 */ 4192 static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state, 4193 Transaction *set_backings_tran, 4194 Error **errp) 4195 { 4196 BlockDriverState *bs = reopen_state->bs; 4197 BlockDriverState *overlay_bs, *below_bs, *new_backing_bs; 4198 QObject *value; 4199 const char *str; 4200 4201 value = qdict_get(reopen_state->options, "backing"); 4202 if (value == NULL) { 4203 return 0; 4204 } 4205 4206 switch (qobject_type(value)) { 4207 case QTYPE_QNULL: 4208 new_backing_bs = NULL; 4209 break; 4210 case QTYPE_QSTRING: 4211 str = qstring_get_str(qobject_to(QString, value)); 4212 new_backing_bs = bdrv_lookup_bs(NULL, str, errp); 4213 if (new_backing_bs == NULL) { 4214 return -EINVAL; 4215 } else if (bdrv_recurse_has_child(new_backing_bs, bs)) { 4216 error_setg(errp, "Making '%s' a backing file of '%s' " 4217 "would create a cycle", str, bs->node_name); 4218 return -EINVAL; 4219 } 4220 break; 4221 default: 4222 /* 'backing' does not allow any other data type */ 4223 g_assert_not_reached(); 4224 } 4225 4226 /* 4227 * Check AioContext compatibility so that the bdrv_set_backing_hd() call in 4228 * bdrv_reopen_commit() won't fail. 4229 */ 4230 if (new_backing_bs) { 4231 if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) { 4232 return -EINVAL; 4233 } 4234 } 4235 4236 /* 4237 * Ensure that @bs can really handle backing files, because we are 4238 * about to give it one (or swap the existing one) 4239 */ 4240 if (bs->drv->is_filter) { 4241 /* Filters always have a file or a backing child */ 4242 if (!bs->backing) { 4243 error_setg(errp, "'%s' is a %s filter node that does not support a " 4244 "backing child", bs->node_name, bs->drv->format_name); 4245 return -EINVAL; 4246 } 4247 } else if (!bs->drv->supports_backing) { 4248 error_setg(errp, "Driver '%s' of node '%s' does not support backing " 4249 "files", bs->drv->format_name, bs->node_name); 4250 return -EINVAL; 4251 } 4252 4253 /* 4254 * Find the "actual" backing file by skipping all links that point 4255 * to an implicit node, if any (e.g. a commit filter node). 4256 * We cannot use any of the bdrv_skip_*() functions here because 4257 * those return the first explicit node, while we are looking for 4258 * its overlay here. 4259 */ 4260 overlay_bs = bs; 4261 for (below_bs = bdrv_filter_or_cow_bs(overlay_bs); 4262 below_bs && below_bs->implicit; 4263 below_bs = bdrv_filter_or_cow_bs(overlay_bs)) 4264 { 4265 overlay_bs = below_bs; 4266 } 4267 4268 /* If we want to replace the backing file we need some extra checks */ 4269 if (new_backing_bs != bdrv_filter_or_cow_bs(overlay_bs)) { 4270 int ret; 4271 4272 /* Check for implicit nodes between bs and its backing file */ 4273 if (bs != overlay_bs) { 4274 error_setg(errp, "Cannot change backing link if '%s' has " 4275 "an implicit backing file", bs->node_name); 4276 return -EPERM; 4277 } 4278 /* 4279 * Check if the backing link that we want to replace is frozen. 4280 * Note that 4281 * bdrv_filter_or_cow_child(overlay_bs) == overlay_bs->backing, 4282 * because we know that overlay_bs == bs, and that @bs 4283 * either is a filter that uses ->backing or a COW format BDS 4284 * with bs->drv->supports_backing == true. 4285 */ 4286 if (bdrv_is_backing_chain_frozen(overlay_bs, 4287 child_bs(overlay_bs->backing), errp)) 4288 { 4289 return -EPERM; 4290 } 4291 reopen_state->replace_backing_bs = true; 4292 reopen_state->old_backing_bs = bs->backing ? bs->backing->bs : NULL; 4293 ret = bdrv_set_backing_noperm(bs, new_backing_bs, set_backings_tran, 4294 errp); 4295 if (ret < 0) { 4296 return ret; 4297 } 4298 } 4299 4300 return 0; 4301 } 4302 4303 /* 4304 * Prepares a BlockDriverState for reopen. All changes are staged in the 4305 * 'opaque' field of the BDRVReopenState, which is used and allocated by 4306 * the block driver layer .bdrv_reopen_prepare() 4307 * 4308 * bs is the BlockDriverState to reopen 4309 * flags are the new open flags 4310 * queue is the reopen queue 4311 * 4312 * Returns 0 on success, non-zero on error. On error errp will be set 4313 * as well. 4314 * 4315 * On failure, bdrv_reopen_abort() will be called to clean up any data. 4316 * It is the responsibility of the caller to then call the abort() or 4317 * commit() for any other BDS that have been left in a prepare() state 4318 * 4319 */ 4320 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, 4321 BlockReopenQueue *queue, 4322 Transaction *set_backings_tran, Error **errp) 4323 { 4324 int ret = -1; 4325 int old_flags; 4326 Error *local_err = NULL; 4327 BlockDriver *drv; 4328 QemuOpts *opts; 4329 QDict *orig_reopen_opts; 4330 char *discard = NULL; 4331 bool read_only; 4332 bool drv_prepared = false; 4333 4334 assert(reopen_state != NULL); 4335 assert(reopen_state->bs->drv != NULL); 4336 drv = reopen_state->bs->drv; 4337 4338 /* This function and each driver's bdrv_reopen_prepare() remove 4339 * entries from reopen_state->options as they are processed, so 4340 * we need to make a copy of the original QDict. */ 4341 orig_reopen_opts = qdict_clone_shallow(reopen_state->options); 4342 4343 /* Process generic block layer options */ 4344 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 4345 if (!qemu_opts_absorb_qdict(opts, reopen_state->options, errp)) { 4346 ret = -EINVAL; 4347 goto error; 4348 } 4349 4350 /* This was already called in bdrv_reopen_queue_child() so the flags 4351 * are up-to-date. This time we simply want to remove the options from 4352 * QemuOpts in order to indicate that they have been processed. */ 4353 old_flags = reopen_state->flags; 4354 update_flags_from_options(&reopen_state->flags, opts); 4355 assert(old_flags == reopen_state->flags); 4356 4357 discard = qemu_opt_get_del(opts, BDRV_OPT_DISCARD); 4358 if (discard != NULL) { 4359 if (bdrv_parse_discard_flags(discard, &reopen_state->flags) != 0) { 4360 error_setg(errp, "Invalid discard option"); 4361 ret = -EINVAL; 4362 goto error; 4363 } 4364 } 4365 4366 reopen_state->detect_zeroes = 4367 bdrv_parse_detect_zeroes(opts, reopen_state->flags, &local_err); 4368 if (local_err) { 4369 error_propagate(errp, local_err); 4370 ret = -EINVAL; 4371 goto error; 4372 } 4373 4374 /* All other options (including node-name and driver) must be unchanged. 4375 * Put them back into the QDict, so that they are checked at the end 4376 * of this function. */ 4377 qemu_opts_to_qdict(opts, reopen_state->options); 4378 4379 /* If we are to stay read-only, do not allow permission change 4380 * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is 4381 * not set, or if the BDS still has copy_on_read enabled */ 4382 read_only = !(reopen_state->flags & BDRV_O_RDWR); 4383 ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err); 4384 if (local_err) { 4385 error_propagate(errp, local_err); 4386 goto error; 4387 } 4388 4389 if (drv->bdrv_reopen_prepare) { 4390 /* 4391 * If a driver-specific option is missing, it means that we 4392 * should reset it to its default value. 4393 * But not all options allow that, so we need to check it first. 4394 */ 4395 ret = bdrv_reset_options_allowed(reopen_state->bs, 4396 reopen_state->options, errp); 4397 if (ret) { 4398 goto error; 4399 } 4400 4401 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 4402 if (ret) { 4403 if (local_err != NULL) { 4404 error_propagate(errp, local_err); 4405 } else { 4406 bdrv_refresh_filename(reopen_state->bs); 4407 error_setg(errp, "failed while preparing to reopen image '%s'", 4408 reopen_state->bs->filename); 4409 } 4410 goto error; 4411 } 4412 } else { 4413 /* It is currently mandatory to have a bdrv_reopen_prepare() 4414 * handler for each supported drv. */ 4415 error_setg(errp, "Block format '%s' used by node '%s' " 4416 "does not support reopening files", drv->format_name, 4417 bdrv_get_device_or_node_name(reopen_state->bs)); 4418 ret = -1; 4419 goto error; 4420 } 4421 4422 drv_prepared = true; 4423 4424 /* 4425 * We must provide the 'backing' option if the BDS has a backing 4426 * file or if the image file has a backing file name as part of 4427 * its metadata. Otherwise the 'backing' option can be omitted. 4428 */ 4429 if (drv->supports_backing && reopen_state->backing_missing && 4430 (reopen_state->bs->backing || reopen_state->bs->backing_file[0])) { 4431 error_setg(errp, "backing is missing for '%s'", 4432 reopen_state->bs->node_name); 4433 ret = -EINVAL; 4434 goto error; 4435 } 4436 4437 /* 4438 * Allow changing the 'backing' option. The new value can be 4439 * either a reference to an existing node (using its node name) 4440 * or NULL to simply detach the current backing file. 4441 */ 4442 ret = bdrv_reopen_parse_backing(reopen_state, set_backings_tran, errp); 4443 if (ret < 0) { 4444 goto error; 4445 } 4446 qdict_del(reopen_state->options, "backing"); 4447 4448 /* Options that are not handled are only okay if they are unchanged 4449 * compared to the old state. It is expected that some options are only 4450 * used for the initial open, but not reopen (e.g. filename) */ 4451 if (qdict_size(reopen_state->options)) { 4452 const QDictEntry *entry = qdict_first(reopen_state->options); 4453 4454 do { 4455 QObject *new = entry->value; 4456 QObject *old = qdict_get(reopen_state->bs->options, entry->key); 4457 4458 /* Allow child references (child_name=node_name) as long as they 4459 * point to the current child (i.e. everything stays the same). */ 4460 if (qobject_type(new) == QTYPE_QSTRING) { 4461 BdrvChild *child; 4462 QLIST_FOREACH(child, &reopen_state->bs->children, next) { 4463 if (!strcmp(child->name, entry->key)) { 4464 break; 4465 } 4466 } 4467 4468 if (child) { 4469 if (!strcmp(child->bs->node_name, 4470 qstring_get_str(qobject_to(QString, new)))) { 4471 continue; /* Found child with this name, skip option */ 4472 } 4473 } 4474 } 4475 4476 /* 4477 * TODO: When using -drive to specify blockdev options, all values 4478 * will be strings; however, when using -blockdev, blockdev-add or 4479 * filenames using the json:{} pseudo-protocol, they will be 4480 * correctly typed. 4481 * In contrast, reopening options are (currently) always strings 4482 * (because you can only specify them through qemu-io; all other 4483 * callers do not specify any options). 4484 * Therefore, when using anything other than -drive to create a BDS, 4485 * this cannot detect non-string options as unchanged, because 4486 * qobject_is_equal() always returns false for objects of different 4487 * type. In the future, this should be remedied by correctly typing 4488 * all options. For now, this is not too big of an issue because 4489 * the user can simply omit options which cannot be changed anyway, 4490 * so they will stay unchanged. 4491 */ 4492 if (!qobject_is_equal(new, old)) { 4493 error_setg(errp, "Cannot change the option '%s'", entry->key); 4494 ret = -EINVAL; 4495 goto error; 4496 } 4497 } while ((entry = qdict_next(reopen_state->options, entry))); 4498 } 4499 4500 ret = 0; 4501 4502 /* Restore the original reopen_state->options QDict */ 4503 qobject_unref(reopen_state->options); 4504 reopen_state->options = qobject_ref(orig_reopen_opts); 4505 4506 error: 4507 if (ret < 0 && drv_prepared) { 4508 /* drv->bdrv_reopen_prepare() has succeeded, so we need to 4509 * call drv->bdrv_reopen_abort() before signaling an error 4510 * (bdrv_reopen_multiple() will not call bdrv_reopen_abort() 4511 * when the respective bdrv_reopen_prepare() has failed) */ 4512 if (drv->bdrv_reopen_abort) { 4513 drv->bdrv_reopen_abort(reopen_state); 4514 } 4515 } 4516 qemu_opts_del(opts); 4517 qobject_unref(orig_reopen_opts); 4518 g_free(discard); 4519 return ret; 4520 } 4521 4522 /* 4523 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 4524 * makes them final by swapping the staging BlockDriverState contents into 4525 * the active BlockDriverState contents. 4526 */ 4527 static void bdrv_reopen_commit(BDRVReopenState *reopen_state) 4528 { 4529 BlockDriver *drv; 4530 BlockDriverState *bs; 4531 BdrvChild *child; 4532 4533 assert(reopen_state != NULL); 4534 bs = reopen_state->bs; 4535 drv = bs->drv; 4536 assert(drv != NULL); 4537 4538 /* If there are any driver level actions to take */ 4539 if (drv->bdrv_reopen_commit) { 4540 drv->bdrv_reopen_commit(reopen_state); 4541 } 4542 4543 /* set BDS specific flags now */ 4544 qobject_unref(bs->explicit_options); 4545 qobject_unref(bs->options); 4546 4547 bs->explicit_options = reopen_state->explicit_options; 4548 bs->options = reopen_state->options; 4549 bs->open_flags = reopen_state->flags; 4550 bs->detect_zeroes = reopen_state->detect_zeroes; 4551 4552 if (reopen_state->replace_backing_bs) { 4553 qdict_del(bs->explicit_options, "backing"); 4554 qdict_del(bs->options, "backing"); 4555 } 4556 4557 /* Remove child references from bs->options and bs->explicit_options. 4558 * Child options were already removed in bdrv_reopen_queue_child() */ 4559 QLIST_FOREACH(child, &bs->children, next) { 4560 qdict_del(bs->explicit_options, child->name); 4561 qdict_del(bs->options, child->name); 4562 } 4563 bdrv_refresh_limits(bs, NULL, NULL); 4564 } 4565 4566 /* 4567 * Abort the reopen, and delete and free the staged changes in 4568 * reopen_state 4569 */ 4570 static void bdrv_reopen_abort(BDRVReopenState *reopen_state) 4571 { 4572 BlockDriver *drv; 4573 4574 assert(reopen_state != NULL); 4575 drv = reopen_state->bs->drv; 4576 assert(drv != NULL); 4577 4578 if (drv->bdrv_reopen_abort) { 4579 drv->bdrv_reopen_abort(reopen_state); 4580 } 4581 } 4582 4583 4584 static void bdrv_close(BlockDriverState *bs) 4585 { 4586 BdrvAioNotifier *ban, *ban_next; 4587 BdrvChild *child, *next; 4588 4589 assert(!bs->refcnt); 4590 4591 bdrv_drained_begin(bs); /* complete I/O */ 4592 bdrv_flush(bs); 4593 bdrv_drain(bs); /* in case flush left pending I/O */ 4594 4595 if (bs->drv) { 4596 if (bs->drv->bdrv_close) { 4597 /* Must unfreeze all children, so bdrv_unref_child() works */ 4598 bs->drv->bdrv_close(bs); 4599 } 4600 bs->drv = NULL; 4601 } 4602 4603 QLIST_FOREACH_SAFE(child, &bs->children, next, next) { 4604 bdrv_unref_child(bs, child); 4605 } 4606 4607 bs->backing = NULL; 4608 bs->file = NULL; 4609 g_free(bs->opaque); 4610 bs->opaque = NULL; 4611 qatomic_set(&bs->copy_on_read, 0); 4612 bs->backing_file[0] = '\0'; 4613 bs->backing_format[0] = '\0'; 4614 bs->total_sectors = 0; 4615 bs->encrypted = false; 4616 bs->sg = false; 4617 qobject_unref(bs->options); 4618 qobject_unref(bs->explicit_options); 4619 bs->options = NULL; 4620 bs->explicit_options = NULL; 4621 qobject_unref(bs->full_open_options); 4622 bs->full_open_options = NULL; 4623 4624 bdrv_release_named_dirty_bitmaps(bs); 4625 assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 4626 4627 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 4628 g_free(ban); 4629 } 4630 QLIST_INIT(&bs->aio_notifiers); 4631 bdrv_drained_end(bs); 4632 4633 /* 4634 * If we're still inside some bdrv_drain_all_begin()/end() sections, end 4635 * them now since this BDS won't exist anymore when bdrv_drain_all_end() 4636 * gets called. 4637 */ 4638 if (bs->quiesce_counter) { 4639 bdrv_drain_all_end_quiesce(bs); 4640 } 4641 } 4642 4643 void bdrv_close_all(void) 4644 { 4645 assert(job_next(NULL) == NULL); 4646 4647 /* Drop references from requests still in flight, such as canceled block 4648 * jobs whose AIO context has not been polled yet */ 4649 bdrv_drain_all(); 4650 4651 blk_remove_all_bs(); 4652 blockdev_close_all_bdrv_states(); 4653 4654 assert(QTAILQ_EMPTY(&all_bdrv_states)); 4655 } 4656 4657 static bool should_update_child(BdrvChild *c, BlockDriverState *to) 4658 { 4659 GQueue *queue; 4660 GHashTable *found; 4661 bool ret; 4662 4663 if (c->klass->stay_at_node) { 4664 return false; 4665 } 4666 4667 /* If the child @c belongs to the BDS @to, replacing the current 4668 * c->bs by @to would mean to create a loop. 4669 * 4670 * Such a case occurs when appending a BDS to a backing chain. 4671 * For instance, imagine the following chain: 4672 * 4673 * guest device -> node A -> further backing chain... 4674 * 4675 * Now we create a new BDS B which we want to put on top of this 4676 * chain, so we first attach A as its backing node: 4677 * 4678 * node B 4679 * | 4680 * v 4681 * guest device -> node A -> further backing chain... 4682 * 4683 * Finally we want to replace A by B. When doing that, we want to 4684 * replace all pointers to A by pointers to B -- except for the 4685 * pointer from B because (1) that would create a loop, and (2) 4686 * that pointer should simply stay intact: 4687 * 4688 * guest device -> node B 4689 * | 4690 * v 4691 * node A -> further backing chain... 4692 * 4693 * In general, when replacing a node A (c->bs) by a node B (@to), 4694 * if A is a child of B, that means we cannot replace A by B there 4695 * because that would create a loop. Silently detaching A from B 4696 * is also not really an option. So overall just leaving A in 4697 * place there is the most sensible choice. 4698 * 4699 * We would also create a loop in any cases where @c is only 4700 * indirectly referenced by @to. Prevent this by returning false 4701 * if @c is found (by breadth-first search) anywhere in the whole 4702 * subtree of @to. 4703 */ 4704 4705 ret = true; 4706 found = g_hash_table_new(NULL, NULL); 4707 g_hash_table_add(found, to); 4708 queue = g_queue_new(); 4709 g_queue_push_tail(queue, to); 4710 4711 while (!g_queue_is_empty(queue)) { 4712 BlockDriverState *v = g_queue_pop_head(queue); 4713 BdrvChild *c2; 4714 4715 QLIST_FOREACH(c2, &v->children, next) { 4716 if (c2 == c) { 4717 ret = false; 4718 break; 4719 } 4720 4721 if (g_hash_table_contains(found, c2->bs)) { 4722 continue; 4723 } 4724 4725 g_queue_push_tail(queue, c2->bs); 4726 g_hash_table_add(found, c2->bs); 4727 } 4728 } 4729 4730 g_queue_free(queue); 4731 g_hash_table_destroy(found); 4732 4733 return ret; 4734 } 4735 4736 typedef struct BdrvRemoveFilterOrCowChild { 4737 BdrvChild *child; 4738 bool is_backing; 4739 } BdrvRemoveFilterOrCowChild; 4740 4741 static void bdrv_remove_filter_or_cow_child_abort(void *opaque) 4742 { 4743 BdrvRemoveFilterOrCowChild *s = opaque; 4744 BlockDriverState *parent_bs = s->child->opaque; 4745 4746 QLIST_INSERT_HEAD(&parent_bs->children, s->child, next); 4747 if (s->is_backing) { 4748 parent_bs->backing = s->child; 4749 } else { 4750 parent_bs->file = s->child; 4751 } 4752 4753 /* 4754 * We don't have to restore child->bs here to undo bdrv_replace_child() 4755 * because that function is transactionable and it registered own completion 4756 * entries in @tran, so .abort() for bdrv_replace_child_safe() will be 4757 * called automatically. 4758 */ 4759 } 4760 4761 static void bdrv_remove_filter_or_cow_child_commit(void *opaque) 4762 { 4763 BdrvRemoveFilterOrCowChild *s = opaque; 4764 4765 bdrv_child_free(s->child); 4766 } 4767 4768 static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = { 4769 .abort = bdrv_remove_filter_or_cow_child_abort, 4770 .commit = bdrv_remove_filter_or_cow_child_commit, 4771 .clean = g_free, 4772 }; 4773 4774 /* 4775 * A function to remove backing-chain child of @bs if exists: cow child for 4776 * format nodes (always .backing) and filter child for filters (may be .file or 4777 * .backing) 4778 */ 4779 static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, 4780 Transaction *tran) 4781 { 4782 BdrvRemoveFilterOrCowChild *s; 4783 BdrvChild *child = bdrv_filter_or_cow_child(bs); 4784 4785 if (!child) { 4786 return; 4787 } 4788 4789 if (child->bs) { 4790 bdrv_replace_child(child, NULL, tran); 4791 } 4792 4793 s = g_new(BdrvRemoveFilterOrCowChild, 1); 4794 *s = (BdrvRemoveFilterOrCowChild) { 4795 .child = child, 4796 .is_backing = (child == bs->backing), 4797 }; 4798 tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); 4799 4800 QLIST_SAFE_REMOVE(child, next); 4801 if (s->is_backing) { 4802 bs->backing = NULL; 4803 } else { 4804 bs->file = NULL; 4805 } 4806 } 4807 4808 static int bdrv_replace_node_noperm(BlockDriverState *from, 4809 BlockDriverState *to, 4810 bool auto_skip, Transaction *tran, 4811 Error **errp) 4812 { 4813 BdrvChild *c, *next; 4814 4815 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { 4816 assert(c->bs == from); 4817 if (!should_update_child(c, to)) { 4818 if (auto_skip) { 4819 continue; 4820 } 4821 error_setg(errp, "Should not change '%s' link to '%s'", 4822 c->name, from->node_name); 4823 return -EINVAL; 4824 } 4825 if (c->frozen) { 4826 error_setg(errp, "Cannot change '%s' link to '%s'", 4827 c->name, from->node_name); 4828 return -EPERM; 4829 } 4830 bdrv_replace_child(c, to, tran); 4831 } 4832 4833 return 0; 4834 } 4835 4836 /* 4837 * With auto_skip=true bdrv_replace_node_common skips updating from parents 4838 * if it creates a parent-child relation loop or if parent is block-job. 4839 * 4840 * With auto_skip=false the error is returned if from has a parent which should 4841 * not be updated. 4842 * 4843 * With @detach_subchain=true @to must be in a backing chain of @from. In this 4844 * case backing link of the cow-parent of @to is removed. 4845 */ 4846 static int bdrv_replace_node_common(BlockDriverState *from, 4847 BlockDriverState *to, 4848 bool auto_skip, bool detach_subchain, 4849 Error **errp) 4850 { 4851 Transaction *tran = tran_new(); 4852 g_autoptr(GHashTable) found = NULL; 4853 g_autoptr(GSList) refresh_list = NULL; 4854 BlockDriverState *to_cow_parent; 4855 int ret; 4856 4857 if (detach_subchain) { 4858 assert(bdrv_chain_contains(from, to)); 4859 assert(from != to); 4860 for (to_cow_parent = from; 4861 bdrv_filter_or_cow_bs(to_cow_parent) != to; 4862 to_cow_parent = bdrv_filter_or_cow_bs(to_cow_parent)) 4863 { 4864 ; 4865 } 4866 } 4867 4868 /* Make sure that @from doesn't go away until we have successfully attached 4869 * all of its parents to @to. */ 4870 bdrv_ref(from); 4871 4872 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 4873 assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); 4874 bdrv_drained_begin(from); 4875 4876 /* 4877 * Do the replacement without permission update. 4878 * Replacement may influence the permissions, we should calculate new 4879 * permissions based on new graph. If we fail, we'll roll-back the 4880 * replacement. 4881 */ 4882 ret = bdrv_replace_node_noperm(from, to, auto_skip, tran, errp); 4883 if (ret < 0) { 4884 goto out; 4885 } 4886 4887 if (detach_subchain) { 4888 bdrv_remove_filter_or_cow_child(to_cow_parent, tran); 4889 } 4890 4891 found = g_hash_table_new(NULL, NULL); 4892 4893 refresh_list = bdrv_topological_dfs(refresh_list, found, to); 4894 refresh_list = bdrv_topological_dfs(refresh_list, found, from); 4895 4896 ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); 4897 if (ret < 0) { 4898 goto out; 4899 } 4900 4901 ret = 0; 4902 4903 out: 4904 tran_finalize(tran, ret); 4905 4906 bdrv_drained_end(from); 4907 bdrv_unref(from); 4908 4909 return ret; 4910 } 4911 4912 int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, 4913 Error **errp) 4914 { 4915 return bdrv_replace_node_common(from, to, true, false, errp); 4916 } 4917 4918 int bdrv_drop_filter(BlockDriverState *bs, Error **errp) 4919 { 4920 return bdrv_replace_node_common(bs, bdrv_filter_or_cow_bs(bs), true, true, 4921 errp); 4922 } 4923 4924 /* 4925 * Add new bs contents at the top of an image chain while the chain is 4926 * live, while keeping required fields on the top layer. 4927 * 4928 * This will modify the BlockDriverState fields, and swap contents 4929 * between bs_new and bs_top. Both bs_new and bs_top are modified. 4930 * 4931 * bs_new must not be attached to a BlockBackend and must not have backing 4932 * child. 4933 * 4934 * This function does not create any image files. 4935 */ 4936 int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, 4937 Error **errp) 4938 { 4939 int ret; 4940 Transaction *tran = tran_new(); 4941 4942 assert(!bs_new->backing); 4943 4944 ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing", 4945 &child_of_bds, bdrv_backing_role(bs_new), 4946 &bs_new->backing, tran, errp); 4947 if (ret < 0) { 4948 goto out; 4949 } 4950 4951 ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); 4952 if (ret < 0) { 4953 goto out; 4954 } 4955 4956 ret = bdrv_refresh_perms(bs_new, errp); 4957 out: 4958 tran_finalize(tran, ret); 4959 4960 bdrv_refresh_limits(bs_top, NULL, NULL); 4961 4962 return ret; 4963 } 4964 4965 static void bdrv_delete(BlockDriverState *bs) 4966 { 4967 assert(bdrv_op_blocker_is_empty(bs)); 4968 assert(!bs->refcnt); 4969 4970 /* remove from list, if necessary */ 4971 if (bs->node_name[0] != '\0') { 4972 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 4973 } 4974 QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list); 4975 4976 bdrv_close(bs); 4977 4978 g_free(bs); 4979 } 4980 4981 BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options, 4982 int flags, Error **errp) 4983 { 4984 BlockDriverState *new_node_bs; 4985 Error *local_err = NULL; 4986 4987 new_node_bs = bdrv_open(NULL, NULL, node_options, flags, errp); 4988 if (new_node_bs == NULL) { 4989 error_prepend(errp, "Could not create node: "); 4990 return NULL; 4991 } 4992 4993 bdrv_drained_begin(bs); 4994 bdrv_replace_node(bs, new_node_bs, &local_err); 4995 bdrv_drained_end(bs); 4996 4997 if (local_err) { 4998 bdrv_unref(new_node_bs); 4999 error_propagate(errp, local_err); 5000 return NULL; 5001 } 5002 5003 return new_node_bs; 5004 } 5005 5006 /* 5007 * Run consistency checks on an image 5008 * 5009 * Returns 0 if the check could be completed (it doesn't mean that the image is 5010 * free of errors) or -errno when an internal error occurred. The results of the 5011 * check are stored in res. 5012 */ 5013 int coroutine_fn bdrv_co_check(BlockDriverState *bs, 5014 BdrvCheckResult *res, BdrvCheckMode fix) 5015 { 5016 if (bs->drv == NULL) { 5017 return -ENOMEDIUM; 5018 } 5019 if (bs->drv->bdrv_co_check == NULL) { 5020 return -ENOTSUP; 5021 } 5022 5023 memset(res, 0, sizeof(*res)); 5024 return bs->drv->bdrv_co_check(bs, res, fix); 5025 } 5026 5027 /* 5028 * Return values: 5029 * 0 - success 5030 * -EINVAL - backing format specified, but no file 5031 * -ENOSPC - can't update the backing file because no space is left in the 5032 * image file header 5033 * -ENOTSUP - format driver doesn't support changing the backing file 5034 */ 5035 int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, 5036 const char *backing_fmt, bool warn) 5037 { 5038 BlockDriver *drv = bs->drv; 5039 int ret; 5040 5041 if (!drv) { 5042 return -ENOMEDIUM; 5043 } 5044 5045 /* Backing file format doesn't make sense without a backing file */ 5046 if (backing_fmt && !backing_file) { 5047 return -EINVAL; 5048 } 5049 5050 if (warn && backing_file && !backing_fmt) { 5051 warn_report("Deprecated use of backing file without explicit " 5052 "backing format, use of this image requires " 5053 "potentially unsafe format probing"); 5054 } 5055 5056 if (drv->bdrv_change_backing_file != NULL) { 5057 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 5058 } else { 5059 ret = -ENOTSUP; 5060 } 5061 5062 if (ret == 0) { 5063 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 5064 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 5065 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), 5066 backing_file ?: ""); 5067 } 5068 return ret; 5069 } 5070 5071 /* 5072 * Finds the first non-filter node above bs in the chain between 5073 * active and bs. The returned node is either an immediate parent of 5074 * bs, or there are only filter nodes between the two. 5075 * 5076 * Returns NULL if bs is not found in active's image chain, 5077 * or if active == bs. 5078 * 5079 * Returns the bottommost base image if bs == NULL. 5080 */ 5081 BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 5082 BlockDriverState *bs) 5083 { 5084 bs = bdrv_skip_filters(bs); 5085 active = bdrv_skip_filters(active); 5086 5087 while (active) { 5088 BlockDriverState *next = bdrv_backing_chain_next(active); 5089 if (bs == next) { 5090 return active; 5091 } 5092 active = next; 5093 } 5094 5095 return NULL; 5096 } 5097 5098 /* Given a BDS, searches for the base layer. */ 5099 BlockDriverState *bdrv_find_base(BlockDriverState *bs) 5100 { 5101 return bdrv_find_overlay(bs, NULL); 5102 } 5103 5104 /* 5105 * Return true if at least one of the COW (backing) and filter links 5106 * between @bs and @base is frozen. @errp is set if that's the case. 5107 * @base must be reachable from @bs, or NULL. 5108 */ 5109 bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, 5110 Error **errp) 5111 { 5112 BlockDriverState *i; 5113 BdrvChild *child; 5114 5115 for (i = bs; i != base; i = child_bs(child)) { 5116 child = bdrv_filter_or_cow_child(i); 5117 5118 if (child && child->frozen) { 5119 error_setg(errp, "Cannot change '%s' link from '%s' to '%s'", 5120 child->name, i->node_name, child->bs->node_name); 5121 return true; 5122 } 5123 } 5124 5125 return false; 5126 } 5127 5128 /* 5129 * Freeze all COW (backing) and filter links between @bs and @base. 5130 * If any of the links is already frozen the operation is aborted and 5131 * none of the links are modified. 5132 * @base must be reachable from @bs, or NULL. 5133 * Returns 0 on success. On failure returns < 0 and sets @errp. 5134 */ 5135 int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, 5136 Error **errp) 5137 { 5138 BlockDriverState *i; 5139 BdrvChild *child; 5140 5141 if (bdrv_is_backing_chain_frozen(bs, base, errp)) { 5142 return -EPERM; 5143 } 5144 5145 for (i = bs; i != base; i = child_bs(child)) { 5146 child = bdrv_filter_or_cow_child(i); 5147 if (child && child->bs->never_freeze) { 5148 error_setg(errp, "Cannot freeze '%s' link to '%s'", 5149 child->name, child->bs->node_name); 5150 return -EPERM; 5151 } 5152 } 5153 5154 for (i = bs; i != base; i = child_bs(child)) { 5155 child = bdrv_filter_or_cow_child(i); 5156 if (child) { 5157 child->frozen = true; 5158 } 5159 } 5160 5161 return 0; 5162 } 5163 5164 /* 5165 * Unfreeze all COW (backing) and filter links between @bs and @base. 5166 * The caller must ensure that all links are frozen before using this 5167 * function. 5168 * @base must be reachable from @bs, or NULL. 5169 */ 5170 void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base) 5171 { 5172 BlockDriverState *i; 5173 BdrvChild *child; 5174 5175 for (i = bs; i != base; i = child_bs(child)) { 5176 child = bdrv_filter_or_cow_child(i); 5177 if (child) { 5178 assert(child->frozen); 5179 child->frozen = false; 5180 } 5181 } 5182 } 5183 5184 /* 5185 * Drops images above 'base' up to and including 'top', and sets the image 5186 * above 'top' to have base as its backing file. 5187 * 5188 * Requires that the overlay to 'top' is opened r/w, so that the backing file 5189 * information in 'bs' can be properly updated. 5190 * 5191 * E.g., this will convert the following chain: 5192 * bottom <- base <- intermediate <- top <- active 5193 * 5194 * to 5195 * 5196 * bottom <- base <- active 5197 * 5198 * It is allowed for bottom==base, in which case it converts: 5199 * 5200 * base <- intermediate <- top <- active 5201 * 5202 * to 5203 * 5204 * base <- active 5205 * 5206 * If backing_file_str is non-NULL, it will be used when modifying top's 5207 * overlay image metadata. 5208 * 5209 * Error conditions: 5210 * if active == top, that is considered an error 5211 * 5212 */ 5213 int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, 5214 const char *backing_file_str) 5215 { 5216 BlockDriverState *explicit_top = top; 5217 bool update_inherits_from; 5218 BdrvChild *c; 5219 Error *local_err = NULL; 5220 int ret = -EIO; 5221 g_autoptr(GSList) updated_children = NULL; 5222 GSList *p; 5223 5224 bdrv_ref(top); 5225 bdrv_subtree_drained_begin(top); 5226 5227 if (!top->drv || !base->drv) { 5228 goto exit; 5229 } 5230 5231 /* Make sure that base is in the backing chain of top */ 5232 if (!bdrv_chain_contains(top, base)) { 5233 goto exit; 5234 } 5235 5236 /* If 'base' recursively inherits from 'top' then we should set 5237 * base->inherits_from to top->inherits_from after 'top' and all 5238 * other intermediate nodes have been dropped. 5239 * If 'top' is an implicit node (e.g. "commit_top") we should skip 5240 * it because no one inherits from it. We use explicit_top for that. */ 5241 explicit_top = bdrv_skip_implicit_filters(explicit_top); 5242 update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top); 5243 5244 /* success - we can delete the intermediate states, and link top->base */ 5245 /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once 5246 * we've figured out how they should work. */ 5247 if (!backing_file_str) { 5248 bdrv_refresh_filename(base); 5249 backing_file_str = base->filename; 5250 } 5251 5252 QLIST_FOREACH(c, &top->parents, next_parent) { 5253 updated_children = g_slist_prepend(updated_children, c); 5254 } 5255 5256 /* 5257 * It seems correct to pass detach_subchain=true here, but it triggers 5258 * one more yet not fixed bug, when due to nested aio_poll loop we switch to 5259 * another drained section, which modify the graph (for example, removing 5260 * the child, which we keep in updated_children list). So, it's a TODO. 5261 * 5262 * Note, bug triggered if pass detach_subchain=true here and run 5263 * test-bdrv-drain. test_drop_intermediate_poll() test-case will crash. 5264 * That's a FIXME. 5265 */ 5266 bdrv_replace_node_common(top, base, false, false, &local_err); 5267 if (local_err) { 5268 error_report_err(local_err); 5269 goto exit; 5270 } 5271 5272 for (p = updated_children; p; p = p->next) { 5273 c = p->data; 5274 5275 if (c->klass->update_filename) { 5276 ret = c->klass->update_filename(c, base, backing_file_str, 5277 &local_err); 5278 if (ret < 0) { 5279 /* 5280 * TODO: Actually, we want to rollback all previous iterations 5281 * of this loop, and (which is almost impossible) previous 5282 * bdrv_replace_node()... 5283 * 5284 * Note, that c->klass->update_filename may lead to permission 5285 * update, so it's a bad idea to call it inside permission 5286 * update transaction of bdrv_replace_node. 5287 */ 5288 error_report_err(local_err); 5289 goto exit; 5290 } 5291 } 5292 } 5293 5294 if (update_inherits_from) { 5295 base->inherits_from = explicit_top->inherits_from; 5296 } 5297 5298 ret = 0; 5299 exit: 5300 bdrv_subtree_drained_end(top); 5301 bdrv_unref(top); 5302 return ret; 5303 } 5304 5305 /** 5306 * Implementation of BlockDriver.bdrv_get_allocated_file_size() that 5307 * sums the size of all data-bearing children. (This excludes backing 5308 * children.) 5309 */ 5310 static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs) 5311 { 5312 BdrvChild *child; 5313 int64_t child_size, sum = 0; 5314 5315 QLIST_FOREACH(child, &bs->children, next) { 5316 if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | 5317 BDRV_CHILD_FILTERED)) 5318 { 5319 child_size = bdrv_get_allocated_file_size(child->bs); 5320 if (child_size < 0) { 5321 return child_size; 5322 } 5323 sum += child_size; 5324 } 5325 } 5326 5327 return sum; 5328 } 5329 5330 /** 5331 * Length of a allocated file in bytes. Sparse files are counted by actual 5332 * allocated space. Return < 0 if error or unknown. 5333 */ 5334 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 5335 { 5336 BlockDriver *drv = bs->drv; 5337 if (!drv) { 5338 return -ENOMEDIUM; 5339 } 5340 if (drv->bdrv_get_allocated_file_size) { 5341 return drv->bdrv_get_allocated_file_size(bs); 5342 } 5343 5344 if (drv->bdrv_file_open) { 5345 /* 5346 * Protocol drivers default to -ENOTSUP (most of their data is 5347 * not stored in any of their children (if they even have any), 5348 * so there is no generic way to figure it out). 5349 */ 5350 return -ENOTSUP; 5351 } else if (drv->is_filter) { 5352 /* Filter drivers default to the size of their filtered child */ 5353 return bdrv_get_allocated_file_size(bdrv_filter_bs(bs)); 5354 } else { 5355 /* Other drivers default to summing their children's sizes */ 5356 return bdrv_sum_allocated_file_size(bs); 5357 } 5358 } 5359 5360 /* 5361 * bdrv_measure: 5362 * @drv: Format driver 5363 * @opts: Creation options for new image 5364 * @in_bs: Existing image containing data for new image (may be NULL) 5365 * @errp: Error object 5366 * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo()) 5367 * or NULL on error 5368 * 5369 * Calculate file size required to create a new image. 5370 * 5371 * If @in_bs is given then space for allocated clusters and zero clusters 5372 * from that image are included in the calculation. If @opts contains a 5373 * backing file that is shared by @in_bs then backing clusters may be omitted 5374 * from the calculation. 5375 * 5376 * If @in_bs is NULL then the calculation includes no allocated clusters 5377 * unless a preallocation option is given in @opts. 5378 * 5379 * Note that @in_bs may use a different BlockDriver from @drv. 5380 * 5381 * If an error occurs the @errp pointer is set. 5382 */ 5383 BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, 5384 BlockDriverState *in_bs, Error **errp) 5385 { 5386 if (!drv->bdrv_measure) { 5387 error_setg(errp, "Block driver '%s' does not support size measurement", 5388 drv->format_name); 5389 return NULL; 5390 } 5391 5392 return drv->bdrv_measure(opts, in_bs, errp); 5393 } 5394 5395 /** 5396 * Return number of sectors on success, -errno on error. 5397 */ 5398 int64_t bdrv_nb_sectors(BlockDriverState *bs) 5399 { 5400 BlockDriver *drv = bs->drv; 5401 5402 if (!drv) 5403 return -ENOMEDIUM; 5404 5405 if (drv->has_variable_length) { 5406 int ret = refresh_total_sectors(bs, bs->total_sectors); 5407 if (ret < 0) { 5408 return ret; 5409 } 5410 } 5411 return bs->total_sectors; 5412 } 5413 5414 /** 5415 * Return length in bytes on success, -errno on error. 5416 * The length is always a multiple of BDRV_SECTOR_SIZE. 5417 */ 5418 int64_t bdrv_getlength(BlockDriverState *bs) 5419 { 5420 int64_t ret = bdrv_nb_sectors(bs); 5421 5422 if (ret < 0) { 5423 return ret; 5424 } 5425 if (ret > INT64_MAX / BDRV_SECTOR_SIZE) { 5426 return -EFBIG; 5427 } 5428 return ret * BDRV_SECTOR_SIZE; 5429 } 5430 5431 /* return 0 as number of sectors if no device present or error */ 5432 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 5433 { 5434 int64_t nb_sectors = bdrv_nb_sectors(bs); 5435 5436 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 5437 } 5438 5439 bool bdrv_is_sg(BlockDriverState *bs) 5440 { 5441 return bs->sg; 5442 } 5443 5444 /** 5445 * Return whether the given node supports compressed writes. 5446 */ 5447 bool bdrv_supports_compressed_writes(BlockDriverState *bs) 5448 { 5449 BlockDriverState *filtered; 5450 5451 if (!bs->drv || !block_driver_can_compress(bs->drv)) { 5452 return false; 5453 } 5454 5455 filtered = bdrv_filter_bs(bs); 5456 if (filtered) { 5457 /* 5458 * Filters can only forward compressed writes, so we have to 5459 * check the child. 5460 */ 5461 return bdrv_supports_compressed_writes(filtered); 5462 } 5463 5464 return true; 5465 } 5466 5467 const char *bdrv_get_format_name(BlockDriverState *bs) 5468 { 5469 return bs->drv ? bs->drv->format_name : NULL; 5470 } 5471 5472 static int qsort_strcmp(const void *a, const void *b) 5473 { 5474 return strcmp(*(char *const *)a, *(char *const *)b); 5475 } 5476 5477 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 5478 void *opaque, bool read_only) 5479 { 5480 BlockDriver *drv; 5481 int count = 0; 5482 int i; 5483 const char **formats = NULL; 5484 5485 QLIST_FOREACH(drv, &bdrv_drivers, list) { 5486 if (drv->format_name) { 5487 bool found = false; 5488 int i = count; 5489 5490 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, read_only)) { 5491 continue; 5492 } 5493 5494 while (formats && i && !found) { 5495 found = !strcmp(formats[--i], drv->format_name); 5496 } 5497 5498 if (!found) { 5499 formats = g_renew(const char *, formats, count + 1); 5500 formats[count++] = drv->format_name; 5501 } 5502 } 5503 } 5504 5505 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) { 5506 const char *format_name = block_driver_modules[i].format_name; 5507 5508 if (format_name) { 5509 bool found = false; 5510 int j = count; 5511 5512 if (use_bdrv_whitelist && 5513 !bdrv_format_is_whitelisted(format_name, read_only)) { 5514 continue; 5515 } 5516 5517 while (formats && j && !found) { 5518 found = !strcmp(formats[--j], format_name); 5519 } 5520 5521 if (!found) { 5522 formats = g_renew(const char *, formats, count + 1); 5523 formats[count++] = format_name; 5524 } 5525 } 5526 } 5527 5528 qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 5529 5530 for (i = 0; i < count; i++) { 5531 it(opaque, formats[i]); 5532 } 5533 5534 g_free(formats); 5535 } 5536 5537 /* This function is to find a node in the bs graph */ 5538 BlockDriverState *bdrv_find_node(const char *node_name) 5539 { 5540 BlockDriverState *bs; 5541 5542 assert(node_name); 5543 5544 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 5545 if (!strcmp(node_name, bs->node_name)) { 5546 return bs; 5547 } 5548 } 5549 return NULL; 5550 } 5551 5552 /* Put this QMP function here so it can access the static graph_bdrv_states. */ 5553 BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, 5554 Error **errp) 5555 { 5556 BlockDeviceInfoList *list; 5557 BlockDriverState *bs; 5558 5559 list = NULL; 5560 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 5561 BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp); 5562 if (!info) { 5563 qapi_free_BlockDeviceInfoList(list); 5564 return NULL; 5565 } 5566 QAPI_LIST_PREPEND(list, info); 5567 } 5568 5569 return list; 5570 } 5571 5572 typedef struct XDbgBlockGraphConstructor { 5573 XDbgBlockGraph *graph; 5574 GHashTable *graph_nodes; 5575 } XDbgBlockGraphConstructor; 5576 5577 static XDbgBlockGraphConstructor *xdbg_graph_new(void) 5578 { 5579 XDbgBlockGraphConstructor *gr = g_new(XDbgBlockGraphConstructor, 1); 5580 5581 gr->graph = g_new0(XDbgBlockGraph, 1); 5582 gr->graph_nodes = g_hash_table_new(NULL, NULL); 5583 5584 return gr; 5585 } 5586 5587 static XDbgBlockGraph *xdbg_graph_finalize(XDbgBlockGraphConstructor *gr) 5588 { 5589 XDbgBlockGraph *graph = gr->graph; 5590 5591 g_hash_table_destroy(gr->graph_nodes); 5592 g_free(gr); 5593 5594 return graph; 5595 } 5596 5597 static uintptr_t xdbg_graph_node_num(XDbgBlockGraphConstructor *gr, void *node) 5598 { 5599 uintptr_t ret = (uintptr_t)g_hash_table_lookup(gr->graph_nodes, node); 5600 5601 if (ret != 0) { 5602 return ret; 5603 } 5604 5605 /* 5606 * Start counting from 1, not 0, because 0 interferes with not-found (NULL) 5607 * answer of g_hash_table_lookup. 5608 */ 5609 ret = g_hash_table_size(gr->graph_nodes) + 1; 5610 g_hash_table_insert(gr->graph_nodes, node, (void *)ret); 5611 5612 return ret; 5613 } 5614 5615 static void xdbg_graph_add_node(XDbgBlockGraphConstructor *gr, void *node, 5616 XDbgBlockGraphNodeType type, const char *name) 5617 { 5618 XDbgBlockGraphNode *n; 5619 5620 n = g_new0(XDbgBlockGraphNode, 1); 5621 5622 n->id = xdbg_graph_node_num(gr, node); 5623 n->type = type; 5624 n->name = g_strdup(name); 5625 5626 QAPI_LIST_PREPEND(gr->graph->nodes, n); 5627 } 5628 5629 static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent, 5630 const BdrvChild *child) 5631 { 5632 BlockPermission qapi_perm; 5633 XDbgBlockGraphEdge *edge; 5634 5635 edge = g_new0(XDbgBlockGraphEdge, 1); 5636 5637 edge->parent = xdbg_graph_node_num(gr, parent); 5638 edge->child = xdbg_graph_node_num(gr, child->bs); 5639 edge->name = g_strdup(child->name); 5640 5641 for (qapi_perm = 0; qapi_perm < BLOCK_PERMISSION__MAX; qapi_perm++) { 5642 uint64_t flag = bdrv_qapi_perm_to_blk_perm(qapi_perm); 5643 5644 if (flag & child->perm) { 5645 QAPI_LIST_PREPEND(edge->perm, qapi_perm); 5646 } 5647 if (flag & child->shared_perm) { 5648 QAPI_LIST_PREPEND(edge->shared_perm, qapi_perm); 5649 } 5650 } 5651 5652 QAPI_LIST_PREPEND(gr->graph->edges, edge); 5653 } 5654 5655 5656 XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp) 5657 { 5658 BlockBackend *blk; 5659 BlockJob *job; 5660 BlockDriverState *bs; 5661 BdrvChild *child; 5662 XDbgBlockGraphConstructor *gr = xdbg_graph_new(); 5663 5664 for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { 5665 char *allocated_name = NULL; 5666 const char *name = blk_name(blk); 5667 5668 if (!*name) { 5669 name = allocated_name = blk_get_attached_dev_id(blk); 5670 } 5671 xdbg_graph_add_node(gr, blk, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND, 5672 name); 5673 g_free(allocated_name); 5674 if (blk_root(blk)) { 5675 xdbg_graph_add_edge(gr, blk, blk_root(blk)); 5676 } 5677 } 5678 5679 for (job = block_job_next(NULL); job; job = block_job_next(job)) { 5680 GSList *el; 5681 5682 xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB, 5683 job->job.id); 5684 for (el = job->nodes; el; el = el->next) { 5685 xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data); 5686 } 5687 } 5688 5689 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 5690 xdbg_graph_add_node(gr, bs, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_DRIVER, 5691 bs->node_name); 5692 QLIST_FOREACH(child, &bs->children, next) { 5693 xdbg_graph_add_edge(gr, bs, child); 5694 } 5695 } 5696 5697 return xdbg_graph_finalize(gr); 5698 } 5699 5700 BlockDriverState *bdrv_lookup_bs(const char *device, 5701 const char *node_name, 5702 Error **errp) 5703 { 5704 BlockBackend *blk; 5705 BlockDriverState *bs; 5706 5707 if (device) { 5708 blk = blk_by_name(device); 5709 5710 if (blk) { 5711 bs = blk_bs(blk); 5712 if (!bs) { 5713 error_setg(errp, "Device '%s' has no medium", device); 5714 } 5715 5716 return bs; 5717 } 5718 } 5719 5720 if (node_name) { 5721 bs = bdrv_find_node(node_name); 5722 5723 if (bs) { 5724 return bs; 5725 } 5726 } 5727 5728 error_setg(errp, "Cannot find device=\'%s\' nor node-name=\'%s\'", 5729 device ? device : "", 5730 node_name ? node_name : ""); 5731 return NULL; 5732 } 5733 5734 /* If 'base' is in the same chain as 'top', return true. Otherwise, 5735 * return false. If either argument is NULL, return false. */ 5736 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 5737 { 5738 while (top && top != base) { 5739 top = bdrv_filter_or_cow_bs(top); 5740 } 5741 5742 return top != NULL; 5743 } 5744 5745 BlockDriverState *bdrv_next_node(BlockDriverState *bs) 5746 { 5747 if (!bs) { 5748 return QTAILQ_FIRST(&graph_bdrv_states); 5749 } 5750 return QTAILQ_NEXT(bs, node_list); 5751 } 5752 5753 BlockDriverState *bdrv_next_all_states(BlockDriverState *bs) 5754 { 5755 if (!bs) { 5756 return QTAILQ_FIRST(&all_bdrv_states); 5757 } 5758 return QTAILQ_NEXT(bs, bs_list); 5759 } 5760 5761 const char *bdrv_get_node_name(const BlockDriverState *bs) 5762 { 5763 return bs->node_name; 5764 } 5765 5766 const char *bdrv_get_parent_name(const BlockDriverState *bs) 5767 { 5768 BdrvChild *c; 5769 const char *name; 5770 5771 /* If multiple parents have a name, just pick the first one. */ 5772 QLIST_FOREACH(c, &bs->parents, next_parent) { 5773 if (c->klass->get_name) { 5774 name = c->klass->get_name(c); 5775 if (name && *name) { 5776 return name; 5777 } 5778 } 5779 } 5780 5781 return NULL; 5782 } 5783 5784 /* TODO check what callers really want: bs->node_name or blk_name() */ 5785 const char *bdrv_get_device_name(const BlockDriverState *bs) 5786 { 5787 return bdrv_get_parent_name(bs) ?: ""; 5788 } 5789 5790 /* This can be used to identify nodes that might not have a device 5791 * name associated. Since node and device names live in the same 5792 * namespace, the result is unambiguous. The exception is if both are 5793 * absent, then this returns an empty (non-null) string. */ 5794 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 5795 { 5796 return bdrv_get_parent_name(bs) ?: bs->node_name; 5797 } 5798 5799 int bdrv_get_flags(BlockDriverState *bs) 5800 { 5801 return bs->open_flags; 5802 } 5803 5804 int bdrv_has_zero_init_1(BlockDriverState *bs) 5805 { 5806 return 1; 5807 } 5808 5809 int bdrv_has_zero_init(BlockDriverState *bs) 5810 { 5811 BlockDriverState *filtered; 5812 5813 if (!bs->drv) { 5814 return 0; 5815 } 5816 5817 /* If BS is a copy on write image, it is initialized to 5818 the contents of the base image, which may not be zeroes. */ 5819 if (bdrv_cow_child(bs)) { 5820 return 0; 5821 } 5822 if (bs->drv->bdrv_has_zero_init) { 5823 return bs->drv->bdrv_has_zero_init(bs); 5824 } 5825 5826 filtered = bdrv_filter_bs(bs); 5827 if (filtered) { 5828 return bdrv_has_zero_init(filtered); 5829 } 5830 5831 /* safe default */ 5832 return 0; 5833 } 5834 5835 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 5836 { 5837 if (!(bs->open_flags & BDRV_O_UNMAP)) { 5838 return false; 5839 } 5840 5841 return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP; 5842 } 5843 5844 void bdrv_get_backing_filename(BlockDriverState *bs, 5845 char *filename, int filename_size) 5846 { 5847 pstrcpy(filename, filename_size, bs->backing_file); 5848 } 5849 5850 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 5851 { 5852 int ret; 5853 BlockDriver *drv = bs->drv; 5854 /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ 5855 if (!drv) { 5856 return -ENOMEDIUM; 5857 } 5858 if (!drv->bdrv_get_info) { 5859 BlockDriverState *filtered = bdrv_filter_bs(bs); 5860 if (filtered) { 5861 return bdrv_get_info(filtered, bdi); 5862 } 5863 return -ENOTSUP; 5864 } 5865 memset(bdi, 0, sizeof(*bdi)); 5866 ret = drv->bdrv_get_info(bs, bdi); 5867 if (ret < 0) { 5868 return ret; 5869 } 5870 5871 if (bdi->cluster_size > BDRV_MAX_ALIGNMENT) { 5872 return -EINVAL; 5873 } 5874 5875 return 0; 5876 } 5877 5878 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, 5879 Error **errp) 5880 { 5881 BlockDriver *drv = bs->drv; 5882 if (drv && drv->bdrv_get_specific_info) { 5883 return drv->bdrv_get_specific_info(bs, errp); 5884 } 5885 return NULL; 5886 } 5887 5888 BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs) 5889 { 5890 BlockDriver *drv = bs->drv; 5891 if (!drv || !drv->bdrv_get_specific_stats) { 5892 return NULL; 5893 } 5894 return drv->bdrv_get_specific_stats(bs); 5895 } 5896 5897 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event) 5898 { 5899 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 5900 return; 5901 } 5902 5903 bs->drv->bdrv_debug_event(bs, event); 5904 } 5905 5906 static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs) 5907 { 5908 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 5909 bs = bdrv_primary_bs(bs); 5910 } 5911 5912 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 5913 assert(bs->drv->bdrv_debug_remove_breakpoint); 5914 return bs; 5915 } 5916 5917 return NULL; 5918 } 5919 5920 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 5921 const char *tag) 5922 { 5923 bs = bdrv_find_debug_node(bs); 5924 if (bs) { 5925 return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 5926 } 5927 5928 return -ENOTSUP; 5929 } 5930 5931 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 5932 { 5933 bs = bdrv_find_debug_node(bs); 5934 if (bs) { 5935 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 5936 } 5937 5938 return -ENOTSUP; 5939 } 5940 5941 int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 5942 { 5943 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 5944 bs = bdrv_primary_bs(bs); 5945 } 5946 5947 if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 5948 return bs->drv->bdrv_debug_resume(bs, tag); 5949 } 5950 5951 return -ENOTSUP; 5952 } 5953 5954 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 5955 { 5956 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 5957 bs = bdrv_primary_bs(bs); 5958 } 5959 5960 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 5961 return bs->drv->bdrv_debug_is_suspended(bs, tag); 5962 } 5963 5964 return false; 5965 } 5966 5967 /* backing_file can either be relative, or absolute, or a protocol. If it is 5968 * relative, it must be relative to the chain. So, passing in bs->filename 5969 * from a BDS as backing_file should not be done, as that may be relative to 5970 * the CWD rather than the chain. */ 5971 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 5972 const char *backing_file) 5973 { 5974 char *filename_full = NULL; 5975 char *backing_file_full = NULL; 5976 char *filename_tmp = NULL; 5977 int is_protocol = 0; 5978 bool filenames_refreshed = false; 5979 BlockDriverState *curr_bs = NULL; 5980 BlockDriverState *retval = NULL; 5981 BlockDriverState *bs_below; 5982 5983 if (!bs || !bs->drv || !backing_file) { 5984 return NULL; 5985 } 5986 5987 filename_full = g_malloc(PATH_MAX); 5988 backing_file_full = g_malloc(PATH_MAX); 5989 5990 is_protocol = path_has_protocol(backing_file); 5991 5992 /* 5993 * Being largely a legacy function, skip any filters here 5994 * (because filters do not have normal filenames, so they cannot 5995 * match anyway; and allowing json:{} filenames is a bit out of 5996 * scope). 5997 */ 5998 for (curr_bs = bdrv_skip_filters(bs); 5999 bdrv_cow_child(curr_bs) != NULL; 6000 curr_bs = bs_below) 6001 { 6002 bs_below = bdrv_backing_chain_next(curr_bs); 6003 6004 if (bdrv_backing_overridden(curr_bs)) { 6005 /* 6006 * If the backing file was overridden, we can only compare 6007 * directly against the backing node's filename. 6008 */ 6009 6010 if (!filenames_refreshed) { 6011 /* 6012 * This will automatically refresh all of the 6013 * filenames in the rest of the backing chain, so we 6014 * only need to do this once. 6015 */ 6016 bdrv_refresh_filename(bs_below); 6017 filenames_refreshed = true; 6018 } 6019 6020 if (strcmp(backing_file, bs_below->filename) == 0) { 6021 retval = bs_below; 6022 break; 6023 } 6024 } else if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 6025 /* 6026 * If either of the filename paths is actually a protocol, then 6027 * compare unmodified paths; otherwise make paths relative. 6028 */ 6029 char *backing_file_full_ret; 6030 6031 if (strcmp(backing_file, curr_bs->backing_file) == 0) { 6032 retval = bs_below; 6033 break; 6034 } 6035 /* Also check against the full backing filename for the image */ 6036 backing_file_full_ret = bdrv_get_full_backing_filename(curr_bs, 6037 NULL); 6038 if (backing_file_full_ret) { 6039 bool equal = strcmp(backing_file, backing_file_full_ret) == 0; 6040 g_free(backing_file_full_ret); 6041 if (equal) { 6042 retval = bs_below; 6043 break; 6044 } 6045 } 6046 } else { 6047 /* If not an absolute filename path, make it relative to the current 6048 * image's filename path */ 6049 filename_tmp = bdrv_make_absolute_filename(curr_bs, backing_file, 6050 NULL); 6051 /* We are going to compare canonicalized absolute pathnames */ 6052 if (!filename_tmp || !realpath(filename_tmp, filename_full)) { 6053 g_free(filename_tmp); 6054 continue; 6055 } 6056 g_free(filename_tmp); 6057 6058 /* We need to make sure the backing filename we are comparing against 6059 * is relative to the current image filename (or absolute) */ 6060 filename_tmp = bdrv_get_full_backing_filename(curr_bs, NULL); 6061 if (!filename_tmp || !realpath(filename_tmp, backing_file_full)) { 6062 g_free(filename_tmp); 6063 continue; 6064 } 6065 g_free(filename_tmp); 6066 6067 if (strcmp(backing_file_full, filename_full) == 0) { 6068 retval = bs_below; 6069 break; 6070 } 6071 } 6072 } 6073 6074 g_free(filename_full); 6075 g_free(backing_file_full); 6076 return retval; 6077 } 6078 6079 void bdrv_init(void) 6080 { 6081 module_call_init(MODULE_INIT_BLOCK); 6082 } 6083 6084 void bdrv_init_with_whitelist(void) 6085 { 6086 use_bdrv_whitelist = 1; 6087 bdrv_init(); 6088 } 6089 6090 int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) 6091 { 6092 BdrvChild *child, *parent; 6093 Error *local_err = NULL; 6094 int ret; 6095 BdrvDirtyBitmap *bm; 6096 6097 if (!bs->drv) { 6098 return -ENOMEDIUM; 6099 } 6100 6101 QLIST_FOREACH(child, &bs->children, next) { 6102 bdrv_co_invalidate_cache(child->bs, &local_err); 6103 if (local_err) { 6104 error_propagate(errp, local_err); 6105 return -EINVAL; 6106 } 6107 } 6108 6109 /* 6110 * Update permissions, they may differ for inactive nodes. 6111 * 6112 * Note that the required permissions of inactive images are always a 6113 * subset of the permissions required after activating the image. This 6114 * allows us to just get the permissions upfront without restricting 6115 * drv->bdrv_invalidate_cache(). 6116 * 6117 * It also means that in error cases, we don't have to try and revert to 6118 * the old permissions (which is an operation that could fail, too). We can 6119 * just keep the extended permissions for the next time that an activation 6120 * of the image is tried. 6121 */ 6122 if (bs->open_flags & BDRV_O_INACTIVE) { 6123 bs->open_flags &= ~BDRV_O_INACTIVE; 6124 ret = bdrv_refresh_perms(bs, errp); 6125 if (ret < 0) { 6126 bs->open_flags |= BDRV_O_INACTIVE; 6127 return ret; 6128 } 6129 6130 if (bs->drv->bdrv_co_invalidate_cache) { 6131 bs->drv->bdrv_co_invalidate_cache(bs, &local_err); 6132 if (local_err) { 6133 bs->open_flags |= BDRV_O_INACTIVE; 6134 error_propagate(errp, local_err); 6135 return -EINVAL; 6136 } 6137 } 6138 6139 FOR_EACH_DIRTY_BITMAP(bs, bm) { 6140 bdrv_dirty_bitmap_skip_store(bm, false); 6141 } 6142 6143 ret = refresh_total_sectors(bs, bs->total_sectors); 6144 if (ret < 0) { 6145 bs->open_flags |= BDRV_O_INACTIVE; 6146 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 6147 return ret; 6148 } 6149 } 6150 6151 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6152 if (parent->klass->activate) { 6153 parent->klass->activate(parent, &local_err); 6154 if (local_err) { 6155 bs->open_flags |= BDRV_O_INACTIVE; 6156 error_propagate(errp, local_err); 6157 return -EINVAL; 6158 } 6159 } 6160 } 6161 6162 return 0; 6163 } 6164 6165 void bdrv_invalidate_cache_all(Error **errp) 6166 { 6167 BlockDriverState *bs; 6168 BdrvNextIterator it; 6169 6170 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 6171 AioContext *aio_context = bdrv_get_aio_context(bs); 6172 int ret; 6173 6174 aio_context_acquire(aio_context); 6175 ret = bdrv_invalidate_cache(bs, errp); 6176 aio_context_release(aio_context); 6177 if (ret < 0) { 6178 bdrv_next_cleanup(&it); 6179 return; 6180 } 6181 } 6182 } 6183 6184 static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) 6185 { 6186 BdrvChild *parent; 6187 6188 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6189 if (parent->klass->parent_is_bds) { 6190 BlockDriverState *parent_bs = parent->opaque; 6191 if (!only_active || !(parent_bs->open_flags & BDRV_O_INACTIVE)) { 6192 return true; 6193 } 6194 } 6195 } 6196 6197 return false; 6198 } 6199 6200 static int bdrv_inactivate_recurse(BlockDriverState *bs) 6201 { 6202 BdrvChild *child, *parent; 6203 int ret; 6204 6205 if (!bs->drv) { 6206 return -ENOMEDIUM; 6207 } 6208 6209 /* Make sure that we don't inactivate a child before its parent. 6210 * It will be covered by recursion from the yet active parent. */ 6211 if (bdrv_has_bds_parent(bs, true)) { 6212 return 0; 6213 } 6214 6215 assert(!(bs->open_flags & BDRV_O_INACTIVE)); 6216 6217 /* Inactivate this node */ 6218 if (bs->drv->bdrv_inactivate) { 6219 ret = bs->drv->bdrv_inactivate(bs); 6220 if (ret < 0) { 6221 return ret; 6222 } 6223 } 6224 6225 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6226 if (parent->klass->inactivate) { 6227 ret = parent->klass->inactivate(parent); 6228 if (ret < 0) { 6229 return ret; 6230 } 6231 } 6232 } 6233 6234 bs->open_flags |= BDRV_O_INACTIVE; 6235 6236 /* 6237 * Update permissions, they may differ for inactive nodes. 6238 * We only tried to loosen restrictions, so errors are not fatal, ignore 6239 * them. 6240 */ 6241 bdrv_refresh_perms(bs, NULL); 6242 6243 /* Recursively inactivate children */ 6244 QLIST_FOREACH(child, &bs->children, next) { 6245 ret = bdrv_inactivate_recurse(child->bs); 6246 if (ret < 0) { 6247 return ret; 6248 } 6249 } 6250 6251 return 0; 6252 } 6253 6254 int bdrv_inactivate_all(void) 6255 { 6256 BlockDriverState *bs = NULL; 6257 BdrvNextIterator it; 6258 int ret = 0; 6259 GSList *aio_ctxs = NULL, *ctx; 6260 6261 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 6262 AioContext *aio_context = bdrv_get_aio_context(bs); 6263 6264 if (!g_slist_find(aio_ctxs, aio_context)) { 6265 aio_ctxs = g_slist_prepend(aio_ctxs, aio_context); 6266 aio_context_acquire(aio_context); 6267 } 6268 } 6269 6270 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 6271 /* Nodes with BDS parents are covered by recursion from the last 6272 * parent that gets inactivated. Don't inactivate them a second 6273 * time if that has already happened. */ 6274 if (bdrv_has_bds_parent(bs, false)) { 6275 continue; 6276 } 6277 ret = bdrv_inactivate_recurse(bs); 6278 if (ret < 0) { 6279 bdrv_next_cleanup(&it); 6280 goto out; 6281 } 6282 } 6283 6284 out: 6285 for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { 6286 AioContext *aio_context = ctx->data; 6287 aio_context_release(aio_context); 6288 } 6289 g_slist_free(aio_ctxs); 6290 6291 return ret; 6292 } 6293 6294 /**************************************************************/ 6295 /* removable device support */ 6296 6297 /** 6298 * Return TRUE if the media is present 6299 */ 6300 bool bdrv_is_inserted(BlockDriverState *bs) 6301 { 6302 BlockDriver *drv = bs->drv; 6303 BdrvChild *child; 6304 6305 if (!drv) { 6306 return false; 6307 } 6308 if (drv->bdrv_is_inserted) { 6309 return drv->bdrv_is_inserted(bs); 6310 } 6311 QLIST_FOREACH(child, &bs->children, next) { 6312 if (!bdrv_is_inserted(child->bs)) { 6313 return false; 6314 } 6315 } 6316 return true; 6317 } 6318 6319 /** 6320 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 6321 */ 6322 void bdrv_eject(BlockDriverState *bs, bool eject_flag) 6323 { 6324 BlockDriver *drv = bs->drv; 6325 6326 if (drv && drv->bdrv_eject) { 6327 drv->bdrv_eject(bs, eject_flag); 6328 } 6329 } 6330 6331 /** 6332 * Lock or unlock the media (if it is locked, the user won't be able 6333 * to eject it manually). 6334 */ 6335 void bdrv_lock_medium(BlockDriverState *bs, bool locked) 6336 { 6337 BlockDriver *drv = bs->drv; 6338 6339 trace_bdrv_lock_medium(bs, locked); 6340 6341 if (drv && drv->bdrv_lock_medium) { 6342 drv->bdrv_lock_medium(bs, locked); 6343 } 6344 } 6345 6346 /* Get a reference to bs */ 6347 void bdrv_ref(BlockDriverState *bs) 6348 { 6349 bs->refcnt++; 6350 } 6351 6352 /* Release a previously grabbed reference to bs. 6353 * If after releasing, reference count is zero, the BlockDriverState is 6354 * deleted. */ 6355 void bdrv_unref(BlockDriverState *bs) 6356 { 6357 if (!bs) { 6358 return; 6359 } 6360 assert(bs->refcnt > 0); 6361 if (--bs->refcnt == 0) { 6362 bdrv_delete(bs); 6363 } 6364 } 6365 6366 struct BdrvOpBlocker { 6367 Error *reason; 6368 QLIST_ENTRY(BdrvOpBlocker) list; 6369 }; 6370 6371 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 6372 { 6373 BdrvOpBlocker *blocker; 6374 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 6375 if (!QLIST_EMPTY(&bs->op_blockers[op])) { 6376 blocker = QLIST_FIRST(&bs->op_blockers[op]); 6377 error_propagate_prepend(errp, error_copy(blocker->reason), 6378 "Node '%s' is busy: ", 6379 bdrv_get_device_or_node_name(bs)); 6380 return true; 6381 } 6382 return false; 6383 } 6384 6385 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 6386 { 6387 BdrvOpBlocker *blocker; 6388 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 6389 6390 blocker = g_new0(BdrvOpBlocker, 1); 6391 blocker->reason = reason; 6392 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 6393 } 6394 6395 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 6396 { 6397 BdrvOpBlocker *blocker, *next; 6398 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 6399 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 6400 if (blocker->reason == reason) { 6401 QLIST_REMOVE(blocker, list); 6402 g_free(blocker); 6403 } 6404 } 6405 } 6406 6407 void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 6408 { 6409 int i; 6410 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 6411 bdrv_op_block(bs, i, reason); 6412 } 6413 } 6414 6415 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 6416 { 6417 int i; 6418 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 6419 bdrv_op_unblock(bs, i, reason); 6420 } 6421 } 6422 6423 bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 6424 { 6425 int i; 6426 6427 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 6428 if (!QLIST_EMPTY(&bs->op_blockers[i])) { 6429 return false; 6430 } 6431 } 6432 return true; 6433 } 6434 6435 void bdrv_img_create(const char *filename, const char *fmt, 6436 const char *base_filename, const char *base_fmt, 6437 char *options, uint64_t img_size, int flags, bool quiet, 6438 Error **errp) 6439 { 6440 QemuOptsList *create_opts = NULL; 6441 QemuOpts *opts = NULL; 6442 const char *backing_fmt, *backing_file; 6443 int64_t size; 6444 BlockDriver *drv, *proto_drv; 6445 Error *local_err = NULL; 6446 int ret = 0; 6447 6448 /* Find driver and parse its options */ 6449 drv = bdrv_find_format(fmt); 6450 if (!drv) { 6451 error_setg(errp, "Unknown file format '%s'", fmt); 6452 return; 6453 } 6454 6455 proto_drv = bdrv_find_protocol(filename, true, errp); 6456 if (!proto_drv) { 6457 return; 6458 } 6459 6460 if (!drv->create_opts) { 6461 error_setg(errp, "Format driver '%s' does not support image creation", 6462 drv->format_name); 6463 return; 6464 } 6465 6466 if (!proto_drv->create_opts) { 6467 error_setg(errp, "Protocol driver '%s' does not support image creation", 6468 proto_drv->format_name); 6469 return; 6470 } 6471 6472 /* Create parameter list */ 6473 create_opts = qemu_opts_append(create_opts, drv->create_opts); 6474 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 6475 6476 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 6477 6478 /* Parse -o options */ 6479 if (options) { 6480 if (!qemu_opts_do_parse(opts, options, NULL, errp)) { 6481 goto out; 6482 } 6483 } 6484 6485 if (!qemu_opt_get(opts, BLOCK_OPT_SIZE)) { 6486 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 6487 } else if (img_size != UINT64_C(-1)) { 6488 error_setg(errp, "The image size must be specified only once"); 6489 goto out; 6490 } 6491 6492 if (base_filename) { 6493 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, 6494 NULL)) { 6495 error_setg(errp, "Backing file not supported for file format '%s'", 6496 fmt); 6497 goto out; 6498 } 6499 } 6500 6501 if (base_fmt) { 6502 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) { 6503 error_setg(errp, "Backing file format not supported for file " 6504 "format '%s'", fmt); 6505 goto out; 6506 } 6507 } 6508 6509 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 6510 if (backing_file) { 6511 if (!strcmp(filename, backing_file)) { 6512 error_setg(errp, "Error: Trying to create an image with the " 6513 "same filename as the backing file"); 6514 goto out; 6515 } 6516 if (backing_file[0] == '\0') { 6517 error_setg(errp, "Expected backing file name, got empty string"); 6518 goto out; 6519 } 6520 } 6521 6522 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 6523 6524 /* The size for the image must always be specified, unless we have a backing 6525 * file and we have not been forbidden from opening it. */ 6526 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size); 6527 if (backing_file && !(flags & BDRV_O_NO_BACKING)) { 6528 BlockDriverState *bs; 6529 char *full_backing; 6530 int back_flags; 6531 QDict *backing_options = NULL; 6532 6533 full_backing = 6534 bdrv_get_full_backing_filename_from_filename(filename, backing_file, 6535 &local_err); 6536 if (local_err) { 6537 goto out; 6538 } 6539 assert(full_backing); 6540 6541 /* backing files always opened read-only */ 6542 back_flags = flags; 6543 back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 6544 6545 backing_options = qdict_new(); 6546 if (backing_fmt) { 6547 qdict_put_str(backing_options, "driver", backing_fmt); 6548 } 6549 qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true); 6550 6551 bs = bdrv_open(full_backing, NULL, backing_options, back_flags, 6552 &local_err); 6553 g_free(full_backing); 6554 if (!bs) { 6555 error_append_hint(&local_err, "Could not open backing image.\n"); 6556 goto out; 6557 } else { 6558 if (!backing_fmt) { 6559 warn_report("Deprecated use of backing file without explicit " 6560 "backing format (detected format of %s)", 6561 bs->drv->format_name); 6562 if (bs->drv != &bdrv_raw) { 6563 /* 6564 * A probe of raw deserves the most attention: 6565 * leaving the backing format out of the image 6566 * will ensure bs->probed is set (ensuring we 6567 * don't accidentally commit into the backing 6568 * file), and allow more spots to warn the users 6569 * to fix their toolchain when opening this image 6570 * later. For other images, we can safely record 6571 * the format that we probed. 6572 */ 6573 backing_fmt = bs->drv->format_name; 6574 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, backing_fmt, 6575 NULL); 6576 } 6577 } 6578 if (size == -1) { 6579 /* Opened BS, have no size */ 6580 size = bdrv_getlength(bs); 6581 if (size < 0) { 6582 error_setg_errno(errp, -size, "Could not get size of '%s'", 6583 backing_file); 6584 bdrv_unref(bs); 6585 goto out; 6586 } 6587 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 6588 } 6589 bdrv_unref(bs); 6590 } 6591 /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */ 6592 } else if (backing_file && !backing_fmt) { 6593 warn_report("Deprecated use of unopened backing file without " 6594 "explicit backing format, use of this image requires " 6595 "potentially unsafe format probing"); 6596 } 6597 6598 if (size == -1) { 6599 error_setg(errp, "Image creation needs a size parameter"); 6600 goto out; 6601 } 6602 6603 if (!quiet) { 6604 printf("Formatting '%s', fmt=%s ", filename, fmt); 6605 qemu_opts_print(opts, " "); 6606 puts(""); 6607 fflush(stdout); 6608 } 6609 6610 ret = bdrv_create(drv, filename, opts, &local_err); 6611 6612 if (ret == -EFBIG) { 6613 /* This is generally a better message than whatever the driver would 6614 * deliver (especially because of the cluster_size_hint), since that 6615 * is most probably not much different from "image too large". */ 6616 const char *cluster_size_hint = ""; 6617 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 6618 cluster_size_hint = " (try using a larger cluster size)"; 6619 } 6620 error_setg(errp, "The image size is too large for file format '%s'" 6621 "%s", fmt, cluster_size_hint); 6622 error_free(local_err); 6623 local_err = NULL; 6624 } 6625 6626 out: 6627 qemu_opts_del(opts); 6628 qemu_opts_free(create_opts); 6629 error_propagate(errp, local_err); 6630 } 6631 6632 AioContext *bdrv_get_aio_context(BlockDriverState *bs) 6633 { 6634 return bs ? bs->aio_context : qemu_get_aio_context(); 6635 } 6636 6637 AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs) 6638 { 6639 Coroutine *self = qemu_coroutine_self(); 6640 AioContext *old_ctx = qemu_coroutine_get_aio_context(self); 6641 AioContext *new_ctx; 6642 6643 /* 6644 * Increase bs->in_flight to ensure that this operation is completed before 6645 * moving the node to a different AioContext. Read new_ctx only afterwards. 6646 */ 6647 bdrv_inc_in_flight(bs); 6648 6649 new_ctx = bdrv_get_aio_context(bs); 6650 aio_co_reschedule_self(new_ctx); 6651 return old_ctx; 6652 } 6653 6654 void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) 6655 { 6656 aio_co_reschedule_self(old_ctx); 6657 bdrv_dec_in_flight(bs); 6658 } 6659 6660 void coroutine_fn bdrv_co_lock(BlockDriverState *bs) 6661 { 6662 AioContext *ctx = bdrv_get_aio_context(bs); 6663 6664 /* In the main thread, bs->aio_context won't change concurrently */ 6665 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 6666 6667 /* 6668 * We're in coroutine context, so we already hold the lock of the main 6669 * loop AioContext. Don't lock it twice to avoid deadlocks. 6670 */ 6671 assert(qemu_in_coroutine()); 6672 if (ctx != qemu_get_aio_context()) { 6673 aio_context_acquire(ctx); 6674 } 6675 } 6676 6677 void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) 6678 { 6679 AioContext *ctx = bdrv_get_aio_context(bs); 6680 6681 assert(qemu_in_coroutine()); 6682 if (ctx != qemu_get_aio_context()) { 6683 aio_context_release(ctx); 6684 } 6685 } 6686 6687 void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co) 6688 { 6689 aio_co_enter(bdrv_get_aio_context(bs), co); 6690 } 6691 6692 static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) 6693 { 6694 QLIST_REMOVE(ban, list); 6695 g_free(ban); 6696 } 6697 6698 static void bdrv_detach_aio_context(BlockDriverState *bs) 6699 { 6700 BdrvAioNotifier *baf, *baf_tmp; 6701 6702 assert(!bs->walking_aio_notifiers); 6703 bs->walking_aio_notifiers = true; 6704 QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) { 6705 if (baf->deleted) { 6706 bdrv_do_remove_aio_context_notifier(baf); 6707 } else { 6708 baf->detach_aio_context(baf->opaque); 6709 } 6710 } 6711 /* Never mind iterating again to check for ->deleted. bdrv_close() will 6712 * remove remaining aio notifiers if we aren't called again. 6713 */ 6714 bs->walking_aio_notifiers = false; 6715 6716 if (bs->drv && bs->drv->bdrv_detach_aio_context) { 6717 bs->drv->bdrv_detach_aio_context(bs); 6718 } 6719 6720 if (bs->quiesce_counter) { 6721 aio_enable_external(bs->aio_context); 6722 } 6723 bs->aio_context = NULL; 6724 } 6725 6726 static void bdrv_attach_aio_context(BlockDriverState *bs, 6727 AioContext *new_context) 6728 { 6729 BdrvAioNotifier *ban, *ban_tmp; 6730 6731 if (bs->quiesce_counter) { 6732 aio_disable_external(new_context); 6733 } 6734 6735 bs->aio_context = new_context; 6736 6737 if (bs->drv && bs->drv->bdrv_attach_aio_context) { 6738 bs->drv->bdrv_attach_aio_context(bs, new_context); 6739 } 6740 6741 assert(!bs->walking_aio_notifiers); 6742 bs->walking_aio_notifiers = true; 6743 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) { 6744 if (ban->deleted) { 6745 bdrv_do_remove_aio_context_notifier(ban); 6746 } else { 6747 ban->attached_aio_context(new_context, ban->opaque); 6748 } 6749 } 6750 bs->walking_aio_notifiers = false; 6751 } 6752 6753 /* 6754 * Changes the AioContext used for fd handlers, timers, and BHs by this 6755 * BlockDriverState and all its children and parents. 6756 * 6757 * Must be called from the main AioContext. 6758 * 6759 * The caller must own the AioContext lock for the old AioContext of bs, but it 6760 * must not own the AioContext lock for new_context (unless new_context is the 6761 * same as the current context of bs). 6762 * 6763 * @ignore will accumulate all visited BdrvChild object. The caller is 6764 * responsible for freeing the list afterwards. 6765 */ 6766 void bdrv_set_aio_context_ignore(BlockDriverState *bs, 6767 AioContext *new_context, GSList **ignore) 6768 { 6769 AioContext *old_context = bdrv_get_aio_context(bs); 6770 GSList *children_to_process = NULL; 6771 GSList *parents_to_process = NULL; 6772 GSList *entry; 6773 BdrvChild *child, *parent; 6774 6775 g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 6776 6777 if (old_context == new_context) { 6778 return; 6779 } 6780 6781 bdrv_drained_begin(bs); 6782 6783 QLIST_FOREACH(child, &bs->children, next) { 6784 if (g_slist_find(*ignore, child)) { 6785 continue; 6786 } 6787 *ignore = g_slist_prepend(*ignore, child); 6788 children_to_process = g_slist_prepend(children_to_process, child); 6789 } 6790 6791 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6792 if (g_slist_find(*ignore, parent)) { 6793 continue; 6794 } 6795 *ignore = g_slist_prepend(*ignore, parent); 6796 parents_to_process = g_slist_prepend(parents_to_process, parent); 6797 } 6798 6799 for (entry = children_to_process; 6800 entry != NULL; 6801 entry = g_slist_next(entry)) { 6802 child = entry->data; 6803 bdrv_set_aio_context_ignore(child->bs, new_context, ignore); 6804 } 6805 g_slist_free(children_to_process); 6806 6807 for (entry = parents_to_process; 6808 entry != NULL; 6809 entry = g_slist_next(entry)) { 6810 parent = entry->data; 6811 assert(parent->klass->set_aio_ctx); 6812 parent->klass->set_aio_ctx(parent, new_context, ignore); 6813 } 6814 g_slist_free(parents_to_process); 6815 6816 bdrv_detach_aio_context(bs); 6817 6818 /* Acquire the new context, if necessary */ 6819 if (qemu_get_aio_context() != new_context) { 6820 aio_context_acquire(new_context); 6821 } 6822 6823 bdrv_attach_aio_context(bs, new_context); 6824 6825 /* 6826 * If this function was recursively called from 6827 * bdrv_set_aio_context_ignore(), there may be nodes in the 6828 * subtree that have not yet been moved to the new AioContext. 6829 * Release the old one so bdrv_drained_end() can poll them. 6830 */ 6831 if (qemu_get_aio_context() != old_context) { 6832 aio_context_release(old_context); 6833 } 6834 6835 bdrv_drained_end(bs); 6836 6837 if (qemu_get_aio_context() != old_context) { 6838 aio_context_acquire(old_context); 6839 } 6840 if (qemu_get_aio_context() != new_context) { 6841 aio_context_release(new_context); 6842 } 6843 } 6844 6845 static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, 6846 GSList **ignore, Error **errp) 6847 { 6848 if (g_slist_find(*ignore, c)) { 6849 return true; 6850 } 6851 *ignore = g_slist_prepend(*ignore, c); 6852 6853 /* 6854 * A BdrvChildClass that doesn't handle AioContext changes cannot 6855 * tolerate any AioContext changes 6856 */ 6857 if (!c->klass->can_set_aio_ctx) { 6858 char *user = bdrv_child_user_desc(c); 6859 error_setg(errp, "Changing iothreads is not supported by %s", user); 6860 g_free(user); 6861 return false; 6862 } 6863 if (!c->klass->can_set_aio_ctx(c, ctx, ignore, errp)) { 6864 assert(!errp || *errp); 6865 return false; 6866 } 6867 return true; 6868 } 6869 6870 bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, 6871 GSList **ignore, Error **errp) 6872 { 6873 if (g_slist_find(*ignore, c)) { 6874 return true; 6875 } 6876 *ignore = g_slist_prepend(*ignore, c); 6877 return bdrv_can_set_aio_context(c->bs, ctx, ignore, errp); 6878 } 6879 6880 /* @ignore will accumulate all visited BdrvChild object. The caller is 6881 * responsible for freeing the list afterwards. */ 6882 bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, 6883 GSList **ignore, Error **errp) 6884 { 6885 BdrvChild *c; 6886 6887 if (bdrv_get_aio_context(bs) == ctx) { 6888 return true; 6889 } 6890 6891 QLIST_FOREACH(c, &bs->parents, next_parent) { 6892 if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) { 6893 return false; 6894 } 6895 } 6896 QLIST_FOREACH(c, &bs->children, next) { 6897 if (!bdrv_child_can_set_aio_context(c, ctx, ignore, errp)) { 6898 return false; 6899 } 6900 } 6901 6902 return true; 6903 } 6904 6905 int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, 6906 BdrvChild *ignore_child, Error **errp) 6907 { 6908 GSList *ignore; 6909 bool ret; 6910 6911 ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; 6912 ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp); 6913 g_slist_free(ignore); 6914 6915 if (!ret) { 6916 return -EPERM; 6917 } 6918 6919 ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; 6920 bdrv_set_aio_context_ignore(bs, ctx, &ignore); 6921 g_slist_free(ignore); 6922 6923 return 0; 6924 } 6925 6926 int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, 6927 Error **errp) 6928 { 6929 return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp); 6930 } 6931 6932 void bdrv_add_aio_context_notifier(BlockDriverState *bs, 6933 void (*attached_aio_context)(AioContext *new_context, void *opaque), 6934 void (*detach_aio_context)(void *opaque), void *opaque) 6935 { 6936 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 6937 *ban = (BdrvAioNotifier){ 6938 .attached_aio_context = attached_aio_context, 6939 .detach_aio_context = detach_aio_context, 6940 .opaque = opaque 6941 }; 6942 6943 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 6944 } 6945 6946 void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 6947 void (*attached_aio_context)(AioContext *, 6948 void *), 6949 void (*detach_aio_context)(void *), 6950 void *opaque) 6951 { 6952 BdrvAioNotifier *ban, *ban_next; 6953 6954 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 6955 if (ban->attached_aio_context == attached_aio_context && 6956 ban->detach_aio_context == detach_aio_context && 6957 ban->opaque == opaque && 6958 ban->deleted == false) 6959 { 6960 if (bs->walking_aio_notifiers) { 6961 ban->deleted = true; 6962 } else { 6963 bdrv_do_remove_aio_context_notifier(ban); 6964 } 6965 return; 6966 } 6967 } 6968 6969 abort(); 6970 } 6971 6972 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 6973 BlockDriverAmendStatusCB *status_cb, void *cb_opaque, 6974 bool force, 6975 Error **errp) 6976 { 6977 if (!bs->drv) { 6978 error_setg(errp, "Node is ejected"); 6979 return -ENOMEDIUM; 6980 } 6981 if (!bs->drv->bdrv_amend_options) { 6982 error_setg(errp, "Block driver '%s' does not support option amendment", 6983 bs->drv->format_name); 6984 return -ENOTSUP; 6985 } 6986 return bs->drv->bdrv_amend_options(bs, opts, status_cb, 6987 cb_opaque, force, errp); 6988 } 6989 6990 /* 6991 * This function checks whether the given @to_replace is allowed to be 6992 * replaced by a node that always shows the same data as @bs. This is 6993 * used for example to verify whether the mirror job can replace 6994 * @to_replace by the target mirrored from @bs. 6995 * To be replaceable, @bs and @to_replace may either be guaranteed to 6996 * always show the same data (because they are only connected through 6997 * filters), or some driver may allow replacing one of its children 6998 * because it can guarantee that this child's data is not visible at 6999 * all (for example, for dissenting quorum children that have no other 7000 * parents). 7001 */ 7002 bool bdrv_recurse_can_replace(BlockDriverState *bs, 7003 BlockDriverState *to_replace) 7004 { 7005 BlockDriverState *filtered; 7006 7007 if (!bs || !bs->drv) { 7008 return false; 7009 } 7010 7011 if (bs == to_replace) { 7012 return true; 7013 } 7014 7015 /* See what the driver can do */ 7016 if (bs->drv->bdrv_recurse_can_replace) { 7017 return bs->drv->bdrv_recurse_can_replace(bs, to_replace); 7018 } 7019 7020 /* For filters without an own implementation, we can recurse on our own */ 7021 filtered = bdrv_filter_bs(bs); 7022 if (filtered) { 7023 return bdrv_recurse_can_replace(filtered, to_replace); 7024 } 7025 7026 /* Safe default */ 7027 return false; 7028 } 7029 7030 /* 7031 * Check whether the given @node_name can be replaced by a node that 7032 * has the same data as @parent_bs. If so, return @node_name's BDS; 7033 * NULL otherwise. 7034 * 7035 * @node_name must be a (recursive) *child of @parent_bs (or this 7036 * function will return NULL). 7037 * 7038 * The result (whether the node can be replaced or not) is only valid 7039 * for as long as no graph or permission changes occur. 7040 */ 7041 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, 7042 const char *node_name, Error **errp) 7043 { 7044 BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 7045 AioContext *aio_context; 7046 7047 if (!to_replace_bs) { 7048 error_setg(errp, "Failed to find node with node-name='%s'", node_name); 7049 return NULL; 7050 } 7051 7052 aio_context = bdrv_get_aio_context(to_replace_bs); 7053 aio_context_acquire(aio_context); 7054 7055 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 7056 to_replace_bs = NULL; 7057 goto out; 7058 } 7059 7060 /* We don't want arbitrary node of the BDS chain to be replaced only the top 7061 * most non filter in order to prevent data corruption. 7062 * Another benefit is that this tests exclude backing files which are 7063 * blocked by the backing blockers. 7064 */ 7065 if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) { 7066 error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', " 7067 "because it cannot be guaranteed that doing so would not " 7068 "lead to an abrupt change of visible data", 7069 node_name, parent_bs->node_name); 7070 to_replace_bs = NULL; 7071 goto out; 7072 } 7073 7074 out: 7075 aio_context_release(aio_context); 7076 return to_replace_bs; 7077 } 7078 7079 /** 7080 * Iterates through the list of runtime option keys that are said to 7081 * be "strong" for a BDS. An option is called "strong" if it changes 7082 * a BDS's data. For example, the null block driver's "size" and 7083 * "read-zeroes" options are strong, but its "latency-ns" option is 7084 * not. 7085 * 7086 * If a key returned by this function ends with a dot, all options 7087 * starting with that prefix are strong. 7088 */ 7089 static const char *const *strong_options(BlockDriverState *bs, 7090 const char *const *curopt) 7091 { 7092 static const char *const global_options[] = { 7093 "driver", "filename", NULL 7094 }; 7095 7096 if (!curopt) { 7097 return &global_options[0]; 7098 } 7099 7100 curopt++; 7101 if (curopt == &global_options[ARRAY_SIZE(global_options) - 1] && bs->drv) { 7102 curopt = bs->drv->strong_runtime_opts; 7103 } 7104 7105 return (curopt && *curopt) ? curopt : NULL; 7106 } 7107 7108 /** 7109 * Copies all strong runtime options from bs->options to the given 7110 * QDict. The set of strong option keys is determined by invoking 7111 * strong_options(). 7112 * 7113 * Returns true iff any strong option was present in bs->options (and 7114 * thus copied to the target QDict) with the exception of "filename" 7115 * and "driver". The caller is expected to use this value to decide 7116 * whether the existence of strong options prevents the generation of 7117 * a plain filename. 7118 */ 7119 static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs) 7120 { 7121 bool found_any = false; 7122 const char *const *option_name = NULL; 7123 7124 if (!bs->drv) { 7125 return false; 7126 } 7127 7128 while ((option_name = strong_options(bs, option_name))) { 7129 bool option_given = false; 7130 7131 assert(strlen(*option_name) > 0); 7132 if ((*option_name)[strlen(*option_name) - 1] != '.') { 7133 QObject *entry = qdict_get(bs->options, *option_name); 7134 if (!entry) { 7135 continue; 7136 } 7137 7138 qdict_put_obj(d, *option_name, qobject_ref(entry)); 7139 option_given = true; 7140 } else { 7141 const QDictEntry *entry; 7142 for (entry = qdict_first(bs->options); entry; 7143 entry = qdict_next(bs->options, entry)) 7144 { 7145 if (strstart(qdict_entry_key(entry), *option_name, NULL)) { 7146 qdict_put_obj(d, qdict_entry_key(entry), 7147 qobject_ref(qdict_entry_value(entry))); 7148 option_given = true; 7149 } 7150 } 7151 } 7152 7153 /* While "driver" and "filename" need to be included in a JSON filename, 7154 * their existence does not prohibit generation of a plain filename. */ 7155 if (!found_any && option_given && 7156 strcmp(*option_name, "driver") && strcmp(*option_name, "filename")) 7157 { 7158 found_any = true; 7159 } 7160 } 7161 7162 if (!qdict_haskey(d, "driver")) { 7163 /* Drivers created with bdrv_new_open_driver() may not have a 7164 * @driver option. Add it here. */ 7165 qdict_put_str(d, "driver", bs->drv->format_name); 7166 } 7167 7168 return found_any; 7169 } 7170 7171 /* Note: This function may return false positives; it may return true 7172 * even if opening the backing file specified by bs's image header 7173 * would result in exactly bs->backing. */ 7174 bool bdrv_backing_overridden(BlockDriverState *bs) 7175 { 7176 if (bs->backing) { 7177 return strcmp(bs->auto_backing_file, 7178 bs->backing->bs->filename); 7179 } else { 7180 /* No backing BDS, so if the image header reports any backing 7181 * file, it must have been suppressed */ 7182 return bs->auto_backing_file[0] != '\0'; 7183 } 7184 } 7185 7186 /* Updates the following BDS fields: 7187 * - exact_filename: A filename which may be used for opening a block device 7188 * which (mostly) equals the given BDS (even without any 7189 * other options; so reading and writing must return the same 7190 * results, but caching etc. may be different) 7191 * - full_open_options: Options which, when given when opening a block device 7192 * (without a filename), result in a BDS (mostly) 7193 * equalling the given one 7194 * - filename: If exact_filename is set, it is copied here. Otherwise, 7195 * full_open_options is converted to a JSON object, prefixed with 7196 * "json:" (for use through the JSON pseudo protocol) and put here. 7197 */ 7198 void bdrv_refresh_filename(BlockDriverState *bs) 7199 { 7200 BlockDriver *drv = bs->drv; 7201 BdrvChild *child; 7202 BlockDriverState *primary_child_bs; 7203 QDict *opts; 7204 bool backing_overridden; 7205 bool generate_json_filename; /* Whether our default implementation should 7206 fill exact_filename (false) or not (true) */ 7207 7208 if (!drv) { 7209 return; 7210 } 7211 7212 /* This BDS's file name may depend on any of its children's file names, so 7213 * refresh those first */ 7214 QLIST_FOREACH(child, &bs->children, next) { 7215 bdrv_refresh_filename(child->bs); 7216 } 7217 7218 if (bs->implicit) { 7219 /* For implicit nodes, just copy everything from the single child */ 7220 child = QLIST_FIRST(&bs->children); 7221 assert(QLIST_NEXT(child, next) == NULL); 7222 7223 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), 7224 child->bs->exact_filename); 7225 pstrcpy(bs->filename, sizeof(bs->filename), child->bs->filename); 7226 7227 qobject_unref(bs->full_open_options); 7228 bs->full_open_options = qobject_ref(child->bs->full_open_options); 7229 7230 return; 7231 } 7232 7233 backing_overridden = bdrv_backing_overridden(bs); 7234 7235 if (bs->open_flags & BDRV_O_NO_IO) { 7236 /* Without I/O, the backing file does not change anything. 7237 * Therefore, in such a case (primarily qemu-img), we can 7238 * pretend the backing file has not been overridden even if 7239 * it technically has been. */ 7240 backing_overridden = false; 7241 } 7242 7243 /* Gather the options QDict */ 7244 opts = qdict_new(); 7245 generate_json_filename = append_strong_runtime_options(opts, bs); 7246 generate_json_filename |= backing_overridden; 7247 7248 if (drv->bdrv_gather_child_options) { 7249 /* Some block drivers may not want to present all of their children's 7250 * options, or name them differently from BdrvChild.name */ 7251 drv->bdrv_gather_child_options(bs, opts, backing_overridden); 7252 } else { 7253 QLIST_FOREACH(child, &bs->children, next) { 7254 if (child == bs->backing && !backing_overridden) { 7255 /* We can skip the backing BDS if it has not been overridden */ 7256 continue; 7257 } 7258 7259 qdict_put(opts, child->name, 7260 qobject_ref(child->bs->full_open_options)); 7261 } 7262 7263 if (backing_overridden && !bs->backing) { 7264 /* Force no backing file */ 7265 qdict_put_null(opts, "backing"); 7266 } 7267 } 7268 7269 qobject_unref(bs->full_open_options); 7270 bs->full_open_options = opts; 7271 7272 primary_child_bs = bdrv_primary_bs(bs); 7273 7274 if (drv->bdrv_refresh_filename) { 7275 /* Obsolete information is of no use here, so drop the old file name 7276 * information before refreshing it */ 7277 bs->exact_filename[0] = '\0'; 7278 7279 drv->bdrv_refresh_filename(bs); 7280 } else if (primary_child_bs) { 7281 /* 7282 * Try to reconstruct valid information from the underlying 7283 * file -- this only works for format nodes (filter nodes 7284 * cannot be probed and as such must be selected by the user 7285 * either through an options dict, or through a special 7286 * filename which the filter driver must construct in its 7287 * .bdrv_refresh_filename() implementation). 7288 */ 7289 7290 bs->exact_filename[0] = '\0'; 7291 7292 /* 7293 * We can use the underlying file's filename if: 7294 * - it has a filename, 7295 * - the current BDS is not a filter, 7296 * - the file is a protocol BDS, and 7297 * - opening that file (as this BDS's format) will automatically create 7298 * the BDS tree we have right now, that is: 7299 * - the user did not significantly change this BDS's behavior with 7300 * some explicit (strong) options 7301 * - no non-file child of this BDS has been overridden by the user 7302 * Both of these conditions are represented by generate_json_filename. 7303 */ 7304 if (primary_child_bs->exact_filename[0] && 7305 primary_child_bs->drv->bdrv_file_open && 7306 !drv->is_filter && !generate_json_filename) 7307 { 7308 strcpy(bs->exact_filename, primary_child_bs->exact_filename); 7309 } 7310 } 7311 7312 if (bs->exact_filename[0]) { 7313 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 7314 } else { 7315 GString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 7316 if (snprintf(bs->filename, sizeof(bs->filename), "json:%s", 7317 json->str) >= sizeof(bs->filename)) { 7318 /* Give user a hint if we truncated things. */ 7319 strcpy(bs->filename + sizeof(bs->filename) - 4, "..."); 7320 } 7321 g_string_free(json, true); 7322 } 7323 } 7324 7325 char *bdrv_dirname(BlockDriverState *bs, Error **errp) 7326 { 7327 BlockDriver *drv = bs->drv; 7328 BlockDriverState *child_bs; 7329 7330 if (!drv) { 7331 error_setg(errp, "Node '%s' is ejected", bs->node_name); 7332 return NULL; 7333 } 7334 7335 if (drv->bdrv_dirname) { 7336 return drv->bdrv_dirname(bs, errp); 7337 } 7338 7339 child_bs = bdrv_primary_bs(bs); 7340 if (child_bs) { 7341 return bdrv_dirname(child_bs, errp); 7342 } 7343 7344 bdrv_refresh_filename(bs); 7345 if (bs->exact_filename[0] != '\0') { 7346 return path_combine(bs->exact_filename, ""); 7347 } 7348 7349 error_setg(errp, "Cannot generate a base directory for %s nodes", 7350 drv->format_name); 7351 return NULL; 7352 } 7353 7354 /* 7355 * Hot add/remove a BDS's child. So the user can take a child offline when 7356 * it is broken and take a new child online 7357 */ 7358 void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, 7359 Error **errp) 7360 { 7361 7362 if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) { 7363 error_setg(errp, "The node %s does not support adding a child", 7364 bdrv_get_device_or_node_name(parent_bs)); 7365 return; 7366 } 7367 7368 if (!QLIST_EMPTY(&child_bs->parents)) { 7369 error_setg(errp, "The node %s already has a parent", 7370 child_bs->node_name); 7371 return; 7372 } 7373 7374 parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp); 7375 } 7376 7377 void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp) 7378 { 7379 BdrvChild *tmp; 7380 7381 if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) { 7382 error_setg(errp, "The node %s does not support removing a child", 7383 bdrv_get_device_or_node_name(parent_bs)); 7384 return; 7385 } 7386 7387 QLIST_FOREACH(tmp, &parent_bs->children, next) { 7388 if (tmp == child) { 7389 break; 7390 } 7391 } 7392 7393 if (!tmp) { 7394 error_setg(errp, "The node %s does not have a child named %s", 7395 bdrv_get_device_or_node_name(parent_bs), 7396 bdrv_get_device_or_node_name(child->bs)); 7397 return; 7398 } 7399 7400 parent_bs->drv->bdrv_del_child(parent_bs, child, errp); 7401 } 7402 7403 int bdrv_make_empty(BdrvChild *c, Error **errp) 7404 { 7405 BlockDriver *drv = c->bs->drv; 7406 int ret; 7407 7408 assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)); 7409 7410 if (!drv->bdrv_make_empty) { 7411 error_setg(errp, "%s does not support emptying nodes", 7412 drv->format_name); 7413 return -ENOTSUP; 7414 } 7415 7416 ret = drv->bdrv_make_empty(c->bs); 7417 if (ret < 0) { 7418 error_setg_errno(errp, -ret, "Failed to empty %s", 7419 c->bs->filename); 7420 return ret; 7421 } 7422 7423 return 0; 7424 } 7425 7426 /* 7427 * Return the child that @bs acts as an overlay for, and from which data may be 7428 * copied in COW or COR operations. Usually this is the backing file. 7429 */ 7430 BdrvChild *bdrv_cow_child(BlockDriverState *bs) 7431 { 7432 if (!bs || !bs->drv) { 7433 return NULL; 7434 } 7435 7436 if (bs->drv->is_filter) { 7437 return NULL; 7438 } 7439 7440 if (!bs->backing) { 7441 return NULL; 7442 } 7443 7444 assert(bs->backing->role & BDRV_CHILD_COW); 7445 return bs->backing; 7446 } 7447 7448 /* 7449 * If @bs acts as a filter for exactly one of its children, return 7450 * that child. 7451 */ 7452 BdrvChild *bdrv_filter_child(BlockDriverState *bs) 7453 { 7454 BdrvChild *c; 7455 7456 if (!bs || !bs->drv) { 7457 return NULL; 7458 } 7459 7460 if (!bs->drv->is_filter) { 7461 return NULL; 7462 } 7463 7464 /* Only one of @backing or @file may be used */ 7465 assert(!(bs->backing && bs->file)); 7466 7467 c = bs->backing ?: bs->file; 7468 if (!c) { 7469 return NULL; 7470 } 7471 7472 assert(c->role & BDRV_CHILD_FILTERED); 7473 return c; 7474 } 7475 7476 /* 7477 * Return either the result of bdrv_cow_child() or bdrv_filter_child(), 7478 * whichever is non-NULL. 7479 * 7480 * Return NULL if both are NULL. 7481 */ 7482 BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs) 7483 { 7484 BdrvChild *cow_child = bdrv_cow_child(bs); 7485 BdrvChild *filter_child = bdrv_filter_child(bs); 7486 7487 /* Filter nodes cannot have COW backing files */ 7488 assert(!(cow_child && filter_child)); 7489 7490 return cow_child ?: filter_child; 7491 } 7492 7493 /* 7494 * Return the primary child of this node: For filters, that is the 7495 * filtered child. For other nodes, that is usually the child storing 7496 * metadata. 7497 * (A generally more helpful description is that this is (usually) the 7498 * child that has the same filename as @bs.) 7499 * 7500 * Drivers do not necessarily have a primary child; for example quorum 7501 * does not. 7502 */ 7503 BdrvChild *bdrv_primary_child(BlockDriverState *bs) 7504 { 7505 BdrvChild *c, *found = NULL; 7506 7507 QLIST_FOREACH(c, &bs->children, next) { 7508 if (c->role & BDRV_CHILD_PRIMARY) { 7509 assert(!found); 7510 found = c; 7511 } 7512 } 7513 7514 return found; 7515 } 7516 7517 static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs, 7518 bool stop_on_explicit_filter) 7519 { 7520 BdrvChild *c; 7521 7522 if (!bs) { 7523 return NULL; 7524 } 7525 7526 while (!(stop_on_explicit_filter && !bs->implicit)) { 7527 c = bdrv_filter_child(bs); 7528 if (!c) { 7529 /* 7530 * A filter that is embedded in a working block graph must 7531 * have a child. Assert this here so this function does 7532 * not return a filter node that is not expected by the 7533 * caller. 7534 */ 7535 assert(!bs->drv || !bs->drv->is_filter); 7536 break; 7537 } 7538 bs = c->bs; 7539 } 7540 /* 7541 * Note that this treats nodes with bs->drv == NULL as not being 7542 * filters (bs->drv == NULL should be replaced by something else 7543 * anyway). 7544 * The advantage of this behavior is that this function will thus 7545 * always return a non-NULL value (given a non-NULL @bs). 7546 */ 7547 7548 return bs; 7549 } 7550 7551 /* 7552 * Return the first BDS that has not been added implicitly or that 7553 * does not have a filtered child down the chain starting from @bs 7554 * (including @bs itself). 7555 */ 7556 BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs) 7557 { 7558 return bdrv_do_skip_filters(bs, true); 7559 } 7560 7561 /* 7562 * Return the first BDS that does not have a filtered child down the 7563 * chain starting from @bs (including @bs itself). 7564 */ 7565 BlockDriverState *bdrv_skip_filters(BlockDriverState *bs) 7566 { 7567 return bdrv_do_skip_filters(bs, false); 7568 } 7569 7570 /* 7571 * For a backing chain, return the first non-filter backing image of 7572 * the first non-filter image. 7573 */ 7574 BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs) 7575 { 7576 return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs))); 7577 } 7578