1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "block/trace.h" 27 #include "block/block_int.h" 28 #include "block/blockjob.h" 29 #include "block/fuse.h" 30 #include "block/nbd.h" 31 #include "block/qdict.h" 32 #include "qemu/error-report.h" 33 #include "block/module_block.h" 34 #include "qemu/main-loop.h" 35 #include "qemu/module.h" 36 #include "qapi/error.h" 37 #include "qapi/qmp/qdict.h" 38 #include "qapi/qmp/qjson.h" 39 #include "qapi/qmp/qnull.h" 40 #include "qapi/qmp/qstring.h" 41 #include "qapi/qobject-output-visitor.h" 42 #include "qapi/qapi-visit-block-core.h" 43 #include "sysemu/block-backend.h" 44 #include "sysemu/sysemu.h" 45 #include "qemu/notify.h" 46 #include "qemu/option.h" 47 #include "qemu/coroutine.h" 48 #include "block/qapi.h" 49 #include "qemu/timer.h" 50 #include "qemu/cutils.h" 51 #include "qemu/id.h" 52 #include "block/coroutines.h" 53 54 #ifdef CONFIG_BSD 55 #include <sys/ioctl.h> 56 #include <sys/queue.h> 57 #ifndef __DragonFly__ 58 #include <sys/disk.h> 59 #endif 60 #endif 61 62 #ifdef _WIN32 63 #include <windows.h> 64 #endif 65 66 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 67 68 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 69 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 70 71 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states = 72 QTAILQ_HEAD_INITIALIZER(all_bdrv_states); 73 74 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 75 QLIST_HEAD_INITIALIZER(bdrv_drivers); 76 77 static BlockDriverState *bdrv_open_inherit(const char *filename, 78 const char *reference, 79 QDict *options, int flags, 80 BlockDriverState *parent, 81 const BdrvChildClass *child_class, 82 BdrvChildRole child_role, 83 Error **errp); 84 85 /* If non-zero, use only whitelisted block drivers */ 86 static int use_bdrv_whitelist; 87 88 #ifdef _WIN32 89 static int is_windows_drive_prefix(const char *filename) 90 { 91 return (((filename[0] >= 'a' && filename[0] <= 'z') || 92 (filename[0] >= 'A' && filename[0] <= 'Z')) && 93 filename[1] == ':'); 94 } 95 96 int is_windows_drive(const char *filename) 97 { 98 if (is_windows_drive_prefix(filename) && 99 filename[2] == '\0') 100 return 1; 101 if (strstart(filename, "\\\\.\\", NULL) || 102 strstart(filename, "//./", NULL)) 103 return 1; 104 return 0; 105 } 106 #endif 107 108 size_t bdrv_opt_mem_align(BlockDriverState *bs) 109 { 110 if (!bs || !bs->drv) { 111 /* page size or 4k (hdd sector size) should be on the safe side */ 112 return MAX(4096, qemu_real_host_page_size); 113 } 114 115 return bs->bl.opt_mem_alignment; 116 } 117 118 size_t bdrv_min_mem_align(BlockDriverState *bs) 119 { 120 if (!bs || !bs->drv) { 121 /* page size or 4k (hdd sector size) should be on the safe side */ 122 return MAX(4096, qemu_real_host_page_size); 123 } 124 125 return bs->bl.min_mem_alignment; 126 } 127 128 /* check if the path starts with "<protocol>:" */ 129 int path_has_protocol(const char *path) 130 { 131 const char *p; 132 133 #ifdef _WIN32 134 if (is_windows_drive(path) || 135 is_windows_drive_prefix(path)) { 136 return 0; 137 } 138 p = path + strcspn(path, ":/\\"); 139 #else 140 p = path + strcspn(path, ":/"); 141 #endif 142 143 return *p == ':'; 144 } 145 146 int path_is_absolute(const char *path) 147 { 148 #ifdef _WIN32 149 /* specific case for names like: "\\.\d:" */ 150 if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 151 return 1; 152 } 153 return (*path == '/' || *path == '\\'); 154 #else 155 return (*path == '/'); 156 #endif 157 } 158 159 /* if filename is absolute, just return its duplicate. Otherwise, build a 160 path to it by considering it is relative to base_path. URL are 161 supported. */ 162 char *path_combine(const char *base_path, const char *filename) 163 { 164 const char *protocol_stripped = NULL; 165 const char *p, *p1; 166 char *result; 167 int len; 168 169 if (path_is_absolute(filename)) { 170 return g_strdup(filename); 171 } 172 173 if (path_has_protocol(base_path)) { 174 protocol_stripped = strchr(base_path, ':'); 175 if (protocol_stripped) { 176 protocol_stripped++; 177 } 178 } 179 p = protocol_stripped ?: base_path; 180 181 p1 = strrchr(base_path, '/'); 182 #ifdef _WIN32 183 { 184 const char *p2; 185 p2 = strrchr(base_path, '\\'); 186 if (!p1 || p2 > p1) { 187 p1 = p2; 188 } 189 } 190 #endif 191 if (p1) { 192 p1++; 193 } else { 194 p1 = base_path; 195 } 196 if (p1 > p) { 197 p = p1; 198 } 199 len = p - base_path; 200 201 result = g_malloc(len + strlen(filename) + 1); 202 memcpy(result, base_path, len); 203 strcpy(result + len, filename); 204 205 return result; 206 } 207 208 /* 209 * Helper function for bdrv_parse_filename() implementations to remove optional 210 * protocol prefixes (especially "file:") from a filename and for putting the 211 * stripped filename into the options QDict if there is such a prefix. 212 */ 213 void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, 214 QDict *options) 215 { 216 if (strstart(filename, prefix, &filename)) { 217 /* Stripping the explicit protocol prefix may result in a protocol 218 * prefix being (wrongly) detected (if the filename contains a colon) */ 219 if (path_has_protocol(filename)) { 220 GString *fat_filename; 221 222 /* This means there is some colon before the first slash; therefore, 223 * this cannot be an absolute path */ 224 assert(!path_is_absolute(filename)); 225 226 /* And we can thus fix the protocol detection issue by prefixing it 227 * by "./" */ 228 fat_filename = g_string_new("./"); 229 g_string_append(fat_filename, filename); 230 231 assert(!path_has_protocol(fat_filename->str)); 232 233 qdict_put(options, "filename", 234 qstring_from_gstring(fat_filename)); 235 } else { 236 /* If no protocol prefix was detected, we can use the shortened 237 * filename as-is */ 238 qdict_put_str(options, "filename", filename); 239 } 240 } 241 } 242 243 244 /* Returns whether the image file is opened as read-only. Note that this can 245 * return false and writing to the image file is still not possible because the 246 * image is inactivated. */ 247 bool bdrv_is_read_only(BlockDriverState *bs) 248 { 249 return bs->read_only; 250 } 251 252 int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, 253 bool ignore_allow_rdw, Error **errp) 254 { 255 /* Do not set read_only if copy_on_read is enabled */ 256 if (bs->copy_on_read && read_only) { 257 error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled", 258 bdrv_get_device_or_node_name(bs)); 259 return -EINVAL; 260 } 261 262 /* Do not clear read_only if it is prohibited */ 263 if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) && 264 !ignore_allow_rdw) 265 { 266 error_setg(errp, "Node '%s' is read only", 267 bdrv_get_device_or_node_name(bs)); 268 return -EPERM; 269 } 270 271 return 0; 272 } 273 274 /* 275 * Called by a driver that can only provide a read-only image. 276 * 277 * Returns 0 if the node is already read-only or it could switch the node to 278 * read-only because BDRV_O_AUTO_RDONLY is set. 279 * 280 * Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set 281 * or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg 282 * is not NULL, it is used as the error message for the Error object. 283 */ 284 int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, 285 Error **errp) 286 { 287 int ret = 0; 288 289 if (!(bs->open_flags & BDRV_O_RDWR)) { 290 return 0; 291 } 292 if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) { 293 goto fail; 294 } 295 296 ret = bdrv_can_set_read_only(bs, true, false, NULL); 297 if (ret < 0) { 298 goto fail; 299 } 300 301 bs->read_only = true; 302 bs->open_flags &= ~BDRV_O_RDWR; 303 304 return 0; 305 306 fail: 307 error_setg(errp, "%s", errmsg ?: "Image is read-only"); 308 return -EACCES; 309 } 310 311 /* 312 * If @backing is empty, this function returns NULL without setting 313 * @errp. In all other cases, NULL will only be returned with @errp 314 * set. 315 * 316 * Therefore, a return value of NULL without @errp set means that 317 * there is no backing file; if @errp is set, there is one but its 318 * absolute filename cannot be generated. 319 */ 320 char *bdrv_get_full_backing_filename_from_filename(const char *backed, 321 const char *backing, 322 Error **errp) 323 { 324 if (backing[0] == '\0') { 325 return NULL; 326 } else if (path_has_protocol(backing) || path_is_absolute(backing)) { 327 return g_strdup(backing); 328 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 329 error_setg(errp, "Cannot use relative backing file names for '%s'", 330 backed); 331 return NULL; 332 } else { 333 return path_combine(backed, backing); 334 } 335 } 336 337 /* 338 * If @filename is empty or NULL, this function returns NULL without 339 * setting @errp. In all other cases, NULL will only be returned with 340 * @errp set. 341 */ 342 static char *bdrv_make_absolute_filename(BlockDriverState *relative_to, 343 const char *filename, Error **errp) 344 { 345 char *dir, *full_name; 346 347 if (!filename || filename[0] == '\0') { 348 return NULL; 349 } else if (path_has_protocol(filename) || path_is_absolute(filename)) { 350 return g_strdup(filename); 351 } 352 353 dir = bdrv_dirname(relative_to, errp); 354 if (!dir) { 355 return NULL; 356 } 357 358 full_name = g_strconcat(dir, filename, NULL); 359 g_free(dir); 360 return full_name; 361 } 362 363 char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp) 364 { 365 return bdrv_make_absolute_filename(bs, bs->backing_file, errp); 366 } 367 368 void bdrv_register(BlockDriver *bdrv) 369 { 370 assert(bdrv->format_name); 371 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 372 } 373 374 BlockDriverState *bdrv_new(void) 375 { 376 BlockDriverState *bs; 377 int i; 378 379 bs = g_new0(BlockDriverState, 1); 380 QLIST_INIT(&bs->dirty_bitmaps); 381 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 382 QLIST_INIT(&bs->op_blockers[i]); 383 } 384 notifier_with_return_list_init(&bs->before_write_notifiers); 385 qemu_co_mutex_init(&bs->reqs_lock); 386 qemu_mutex_init(&bs->dirty_bitmap_mutex); 387 bs->refcnt = 1; 388 bs->aio_context = qemu_get_aio_context(); 389 390 qemu_co_queue_init(&bs->flush_queue); 391 392 for (i = 0; i < bdrv_drain_all_count; i++) { 393 bdrv_drained_begin(bs); 394 } 395 396 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list); 397 398 return bs; 399 } 400 401 static BlockDriver *bdrv_do_find_format(const char *format_name) 402 { 403 BlockDriver *drv1; 404 405 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 406 if (!strcmp(drv1->format_name, format_name)) { 407 return drv1; 408 } 409 } 410 411 return NULL; 412 } 413 414 BlockDriver *bdrv_find_format(const char *format_name) 415 { 416 BlockDriver *drv1; 417 int i; 418 419 drv1 = bdrv_do_find_format(format_name); 420 if (drv1) { 421 return drv1; 422 } 423 424 /* The driver isn't registered, maybe we need to load a module */ 425 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { 426 if (!strcmp(block_driver_modules[i].format_name, format_name)) { 427 block_module_load_one(block_driver_modules[i].library_name); 428 break; 429 } 430 } 431 432 return bdrv_do_find_format(format_name); 433 } 434 435 static int bdrv_format_is_whitelisted(const char *format_name, bool read_only) 436 { 437 static const char *whitelist_rw[] = { 438 CONFIG_BDRV_RW_WHITELIST 439 NULL 440 }; 441 static const char *whitelist_ro[] = { 442 CONFIG_BDRV_RO_WHITELIST 443 NULL 444 }; 445 const char **p; 446 447 if (!whitelist_rw[0] && !whitelist_ro[0]) { 448 return 1; /* no whitelist, anything goes */ 449 } 450 451 for (p = whitelist_rw; *p; p++) { 452 if (!strcmp(format_name, *p)) { 453 return 1; 454 } 455 } 456 if (read_only) { 457 for (p = whitelist_ro; *p; p++) { 458 if (!strcmp(format_name, *p)) { 459 return 1; 460 } 461 } 462 } 463 return 0; 464 } 465 466 int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 467 { 468 return bdrv_format_is_whitelisted(drv->format_name, read_only); 469 } 470 471 bool bdrv_uses_whitelist(void) 472 { 473 return use_bdrv_whitelist; 474 } 475 476 typedef struct CreateCo { 477 BlockDriver *drv; 478 char *filename; 479 QemuOpts *opts; 480 int ret; 481 Error *err; 482 } CreateCo; 483 484 static void coroutine_fn bdrv_create_co_entry(void *opaque) 485 { 486 Error *local_err = NULL; 487 int ret; 488 489 CreateCo *cco = opaque; 490 assert(cco->drv); 491 492 ret = cco->drv->bdrv_co_create_opts(cco->drv, 493 cco->filename, cco->opts, &local_err); 494 error_propagate(&cco->err, local_err); 495 cco->ret = ret; 496 } 497 498 int bdrv_create(BlockDriver *drv, const char* filename, 499 QemuOpts *opts, Error **errp) 500 { 501 int ret; 502 503 Coroutine *co; 504 CreateCo cco = { 505 .drv = drv, 506 .filename = g_strdup(filename), 507 .opts = opts, 508 .ret = NOT_DONE, 509 .err = NULL, 510 }; 511 512 if (!drv->bdrv_co_create_opts) { 513 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 514 ret = -ENOTSUP; 515 goto out; 516 } 517 518 if (qemu_in_coroutine()) { 519 /* Fast-path if already in coroutine context */ 520 bdrv_create_co_entry(&cco); 521 } else { 522 co = qemu_coroutine_create(bdrv_create_co_entry, &cco); 523 qemu_coroutine_enter(co); 524 while (cco.ret == NOT_DONE) { 525 aio_poll(qemu_get_aio_context(), true); 526 } 527 } 528 529 ret = cco.ret; 530 if (ret < 0) { 531 if (cco.err) { 532 error_propagate(errp, cco.err); 533 } else { 534 error_setg_errno(errp, -ret, "Could not create image"); 535 } 536 } 537 538 out: 539 g_free(cco.filename); 540 return ret; 541 } 542 543 /** 544 * Helper function for bdrv_create_file_fallback(): Resize @blk to at 545 * least the given @minimum_size. 546 * 547 * On success, return @blk's actual length. 548 * Otherwise, return -errno. 549 */ 550 static int64_t create_file_fallback_truncate(BlockBackend *blk, 551 int64_t minimum_size, Error **errp) 552 { 553 Error *local_err = NULL; 554 int64_t size; 555 int ret; 556 557 ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, 558 &local_err); 559 if (ret < 0 && ret != -ENOTSUP) { 560 error_propagate(errp, local_err); 561 return ret; 562 } 563 564 size = blk_getlength(blk); 565 if (size < 0) { 566 error_free(local_err); 567 error_setg_errno(errp, -size, 568 "Failed to inquire the new image file's length"); 569 return size; 570 } 571 572 if (size < minimum_size) { 573 /* Need to grow the image, but we failed to do that */ 574 error_propagate(errp, local_err); 575 return -ENOTSUP; 576 } 577 578 error_free(local_err); 579 local_err = NULL; 580 581 return size; 582 } 583 584 /** 585 * Helper function for bdrv_create_file_fallback(): Zero the first 586 * sector to remove any potentially pre-existing image header. 587 */ 588 static int create_file_fallback_zero_first_sector(BlockBackend *blk, 589 int64_t current_size, 590 Error **errp) 591 { 592 int64_t bytes_to_clear; 593 int ret; 594 595 bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); 596 if (bytes_to_clear) { 597 ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); 598 if (ret < 0) { 599 error_setg_errno(errp, -ret, 600 "Failed to clear the new image's first sector"); 601 return ret; 602 } 603 } 604 605 return 0; 606 } 607 608 /** 609 * Simple implementation of bdrv_co_create_opts for protocol drivers 610 * which only support creation via opening a file 611 * (usually existing raw storage device) 612 */ 613 int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, 614 const char *filename, 615 QemuOpts *opts, 616 Error **errp) 617 { 618 BlockBackend *blk; 619 QDict *options; 620 int64_t size = 0; 621 char *buf = NULL; 622 PreallocMode prealloc; 623 Error *local_err = NULL; 624 int ret; 625 626 size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); 627 buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); 628 prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, 629 PREALLOC_MODE_OFF, &local_err); 630 g_free(buf); 631 if (local_err) { 632 error_propagate(errp, local_err); 633 return -EINVAL; 634 } 635 636 if (prealloc != PREALLOC_MODE_OFF) { 637 error_setg(errp, "Unsupported preallocation mode '%s'", 638 PreallocMode_str(prealloc)); 639 return -ENOTSUP; 640 } 641 642 options = qdict_new(); 643 qdict_put_str(options, "driver", drv->format_name); 644 645 blk = blk_new_open(filename, NULL, options, 646 BDRV_O_RDWR | BDRV_O_RESIZE, errp); 647 if (!blk) { 648 error_prepend(errp, "Protocol driver '%s' does not support image " 649 "creation, and opening the image failed: ", 650 drv->format_name); 651 return -EINVAL; 652 } 653 654 size = create_file_fallback_truncate(blk, size, errp); 655 if (size < 0) { 656 ret = size; 657 goto out; 658 } 659 660 ret = create_file_fallback_zero_first_sector(blk, size, errp); 661 if (ret < 0) { 662 goto out; 663 } 664 665 ret = 0; 666 out: 667 blk_unref(blk); 668 return ret; 669 } 670 671 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 672 { 673 QemuOpts *protocol_opts; 674 BlockDriver *drv; 675 QDict *qdict; 676 int ret; 677 678 drv = bdrv_find_protocol(filename, true, errp); 679 if (drv == NULL) { 680 return -ENOENT; 681 } 682 683 if (!drv->create_opts) { 684 error_setg(errp, "Driver '%s' does not support image creation", 685 drv->format_name); 686 return -ENOTSUP; 687 } 688 689 /* 690 * 'opts' contains a QemuOptsList with a combination of format and protocol 691 * default values. 692 * 693 * The format properly removes its options, but the default values remain 694 * in 'opts->list'. So if the protocol has options with the same name 695 * (e.g. rbd has 'cluster_size' as qcow2), it will see the default values 696 * of the format, since for overlapping options, the format wins. 697 * 698 * To avoid this issue, lets convert QemuOpts to QDict, in this way we take 699 * only the set options, and then convert it back to QemuOpts, using the 700 * create_opts of the protocol. So the new QemuOpts, will contain only the 701 * protocol defaults. 702 */ 703 qdict = qemu_opts_to_qdict(opts, NULL); 704 protocol_opts = qemu_opts_from_qdict(drv->create_opts, qdict, errp); 705 if (protocol_opts == NULL) { 706 ret = -EINVAL; 707 goto out; 708 } 709 710 ret = bdrv_create(drv, filename, protocol_opts, errp); 711 out: 712 qemu_opts_del(protocol_opts); 713 qobject_unref(qdict); 714 return ret; 715 } 716 717 int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp) 718 { 719 Error *local_err = NULL; 720 int ret; 721 722 assert(bs != NULL); 723 724 if (!bs->drv) { 725 error_setg(errp, "Block node '%s' is not opened", bs->filename); 726 return -ENOMEDIUM; 727 } 728 729 if (!bs->drv->bdrv_co_delete_file) { 730 error_setg(errp, "Driver '%s' does not support image deletion", 731 bs->drv->format_name); 732 return -ENOTSUP; 733 } 734 735 ret = bs->drv->bdrv_co_delete_file(bs, &local_err); 736 if (ret < 0) { 737 error_propagate(errp, local_err); 738 } 739 740 return ret; 741 } 742 743 void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs) 744 { 745 Error *local_err = NULL; 746 int ret; 747 748 if (!bs) { 749 return; 750 } 751 752 ret = bdrv_co_delete_file(bs, &local_err); 753 /* 754 * ENOTSUP will happen if the block driver doesn't support 755 * the 'bdrv_co_delete_file' interface. This is a predictable 756 * scenario and shouldn't be reported back to the user. 757 */ 758 if (ret == -ENOTSUP) { 759 error_free(local_err); 760 } else if (ret < 0) { 761 error_report_err(local_err); 762 } 763 } 764 765 /** 766 * Try to get @bs's logical and physical block size. 767 * On success, store them in @bsz struct and return 0. 768 * On failure return -errno. 769 * @bs must not be empty. 770 */ 771 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 772 { 773 BlockDriver *drv = bs->drv; 774 BlockDriverState *filtered = bdrv_filter_bs(bs); 775 776 if (drv && drv->bdrv_probe_blocksizes) { 777 return drv->bdrv_probe_blocksizes(bs, bsz); 778 } else if (filtered) { 779 return bdrv_probe_blocksizes(filtered, bsz); 780 } 781 782 return -ENOTSUP; 783 } 784 785 /** 786 * Try to get @bs's geometry (cyls, heads, sectors). 787 * On success, store them in @geo struct and return 0. 788 * On failure return -errno. 789 * @bs must not be empty. 790 */ 791 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 792 { 793 BlockDriver *drv = bs->drv; 794 BlockDriverState *filtered = bdrv_filter_bs(bs); 795 796 if (drv && drv->bdrv_probe_geometry) { 797 return drv->bdrv_probe_geometry(bs, geo); 798 } else if (filtered) { 799 return bdrv_probe_geometry(filtered, geo); 800 } 801 802 return -ENOTSUP; 803 } 804 805 /* 806 * Create a uniquely-named empty temporary file. 807 * Return 0 upon success, otherwise a negative errno value. 808 */ 809 int get_tmp_filename(char *filename, int size) 810 { 811 #ifdef _WIN32 812 char temp_dir[MAX_PATH]; 813 /* GetTempFileName requires that its output buffer (4th param) 814 have length MAX_PATH or greater. */ 815 assert(size >= MAX_PATH); 816 return (GetTempPath(MAX_PATH, temp_dir) 817 && GetTempFileName(temp_dir, "qem", 0, filename) 818 ? 0 : -GetLastError()); 819 #else 820 int fd; 821 const char *tmpdir; 822 tmpdir = getenv("TMPDIR"); 823 if (!tmpdir) { 824 tmpdir = "/var/tmp"; 825 } 826 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 827 return -EOVERFLOW; 828 } 829 fd = mkstemp(filename); 830 if (fd < 0) { 831 return -errno; 832 } 833 if (close(fd) != 0) { 834 unlink(filename); 835 return -errno; 836 } 837 return 0; 838 #endif 839 } 840 841 /* 842 * Detect host devices. By convention, /dev/cdrom[N] is always 843 * recognized as a host CDROM. 844 */ 845 static BlockDriver *find_hdev_driver(const char *filename) 846 { 847 int score_max = 0, score; 848 BlockDriver *drv = NULL, *d; 849 850 QLIST_FOREACH(d, &bdrv_drivers, list) { 851 if (d->bdrv_probe_device) { 852 score = d->bdrv_probe_device(filename); 853 if (score > score_max) { 854 score_max = score; 855 drv = d; 856 } 857 } 858 } 859 860 return drv; 861 } 862 863 static BlockDriver *bdrv_do_find_protocol(const char *protocol) 864 { 865 BlockDriver *drv1; 866 867 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 868 if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) { 869 return drv1; 870 } 871 } 872 873 return NULL; 874 } 875 876 BlockDriver *bdrv_find_protocol(const char *filename, 877 bool allow_protocol_prefix, 878 Error **errp) 879 { 880 BlockDriver *drv1; 881 char protocol[128]; 882 int len; 883 const char *p; 884 int i; 885 886 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 887 888 /* 889 * XXX(hch): we really should not let host device detection 890 * override an explicit protocol specification, but moving this 891 * later breaks access to device names with colons in them. 892 * Thanks to the brain-dead persistent naming schemes on udev- 893 * based Linux systems those actually are quite common. 894 */ 895 drv1 = find_hdev_driver(filename); 896 if (drv1) { 897 return drv1; 898 } 899 900 if (!path_has_protocol(filename) || !allow_protocol_prefix) { 901 return &bdrv_file; 902 } 903 904 p = strchr(filename, ':'); 905 assert(p != NULL); 906 len = p - filename; 907 if (len > sizeof(protocol) - 1) 908 len = sizeof(protocol) - 1; 909 memcpy(protocol, filename, len); 910 protocol[len] = '\0'; 911 912 drv1 = bdrv_do_find_protocol(protocol); 913 if (drv1) { 914 return drv1; 915 } 916 917 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { 918 if (block_driver_modules[i].protocol_name && 919 !strcmp(block_driver_modules[i].protocol_name, protocol)) { 920 block_module_load_one(block_driver_modules[i].library_name); 921 break; 922 } 923 } 924 925 drv1 = bdrv_do_find_protocol(protocol); 926 if (!drv1) { 927 error_setg(errp, "Unknown protocol '%s'", protocol); 928 } 929 return drv1; 930 } 931 932 /* 933 * Guess image format by probing its contents. 934 * This is not a good idea when your image is raw (CVE-2008-2004), but 935 * we do it anyway for backward compatibility. 936 * 937 * @buf contains the image's first @buf_size bytes. 938 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 939 * but can be smaller if the image file is smaller) 940 * @filename is its filename. 941 * 942 * For all block drivers, call the bdrv_probe() method to get its 943 * probing score. 944 * Return the first block driver with the highest probing score. 945 */ 946 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 947 const char *filename) 948 { 949 int score_max = 0, score; 950 BlockDriver *drv = NULL, *d; 951 952 QLIST_FOREACH(d, &bdrv_drivers, list) { 953 if (d->bdrv_probe) { 954 score = d->bdrv_probe(buf, buf_size, filename); 955 if (score > score_max) { 956 score_max = score; 957 drv = d; 958 } 959 } 960 } 961 962 return drv; 963 } 964 965 static int find_image_format(BlockBackend *file, const char *filename, 966 BlockDriver **pdrv, Error **errp) 967 { 968 BlockDriver *drv; 969 uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 970 int ret = 0; 971 972 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 973 if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) { 974 *pdrv = &bdrv_raw; 975 return ret; 976 } 977 978 ret = blk_pread(file, 0, buf, sizeof(buf)); 979 if (ret < 0) { 980 error_setg_errno(errp, -ret, "Could not read image for determining its " 981 "format"); 982 *pdrv = NULL; 983 return ret; 984 } 985 986 drv = bdrv_probe_all(buf, ret, filename); 987 if (!drv) { 988 error_setg(errp, "Could not determine image format: No compatible " 989 "driver found"); 990 ret = -ENOENT; 991 } 992 *pdrv = drv; 993 return ret; 994 } 995 996 /** 997 * Set the current 'total_sectors' value 998 * Return 0 on success, -errno on error. 999 */ 1000 int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 1001 { 1002 BlockDriver *drv = bs->drv; 1003 1004 if (!drv) { 1005 return -ENOMEDIUM; 1006 } 1007 1008 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 1009 if (bdrv_is_sg(bs)) 1010 return 0; 1011 1012 /* query actual device if possible, otherwise just trust the hint */ 1013 if (drv->bdrv_getlength) { 1014 int64_t length = drv->bdrv_getlength(bs); 1015 if (length < 0) { 1016 return length; 1017 } 1018 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 1019 } 1020 1021 bs->total_sectors = hint; 1022 1023 if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) { 1024 return -EFBIG; 1025 } 1026 1027 return 0; 1028 } 1029 1030 /** 1031 * Combines a QDict of new block driver @options with any missing options taken 1032 * from @old_options, so that leaving out an option defaults to its old value. 1033 */ 1034 static void bdrv_join_options(BlockDriverState *bs, QDict *options, 1035 QDict *old_options) 1036 { 1037 if (bs->drv && bs->drv->bdrv_join_options) { 1038 bs->drv->bdrv_join_options(options, old_options); 1039 } else { 1040 qdict_join(options, old_options, false); 1041 } 1042 } 1043 1044 static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts, 1045 int open_flags, 1046 Error **errp) 1047 { 1048 Error *local_err = NULL; 1049 char *value = qemu_opt_get_del(opts, "detect-zeroes"); 1050 BlockdevDetectZeroesOptions detect_zeroes = 1051 qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value, 1052 BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err); 1053 g_free(value); 1054 if (local_err) { 1055 error_propagate(errp, local_err); 1056 return detect_zeroes; 1057 } 1058 1059 if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && 1060 !(open_flags & BDRV_O_UNMAP)) 1061 { 1062 error_setg(errp, "setting detect-zeroes to unmap is not allowed " 1063 "without setting discard operation to unmap"); 1064 } 1065 1066 return detect_zeroes; 1067 } 1068 1069 /** 1070 * Set open flags for aio engine 1071 * 1072 * Return 0 on success, -1 if the engine specified is invalid 1073 */ 1074 int bdrv_parse_aio(const char *mode, int *flags) 1075 { 1076 if (!strcmp(mode, "threads")) { 1077 /* do nothing, default */ 1078 } else if (!strcmp(mode, "native")) { 1079 *flags |= BDRV_O_NATIVE_AIO; 1080 #ifdef CONFIG_LINUX_IO_URING 1081 } else if (!strcmp(mode, "io_uring")) { 1082 *flags |= BDRV_O_IO_URING; 1083 #endif 1084 } else { 1085 return -1; 1086 } 1087 1088 return 0; 1089 } 1090 1091 /** 1092 * Set open flags for a given discard mode 1093 * 1094 * Return 0 on success, -1 if the discard mode was invalid. 1095 */ 1096 int bdrv_parse_discard_flags(const char *mode, int *flags) 1097 { 1098 *flags &= ~BDRV_O_UNMAP; 1099 1100 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 1101 /* do nothing */ 1102 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 1103 *flags |= BDRV_O_UNMAP; 1104 } else { 1105 return -1; 1106 } 1107 1108 return 0; 1109 } 1110 1111 /** 1112 * Set open flags for a given cache mode 1113 * 1114 * Return 0 on success, -1 if the cache mode was invalid. 1115 */ 1116 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) 1117 { 1118 *flags &= ~BDRV_O_CACHE_MASK; 1119 1120 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 1121 *writethrough = false; 1122 *flags |= BDRV_O_NOCACHE; 1123 } else if (!strcmp(mode, "directsync")) { 1124 *writethrough = true; 1125 *flags |= BDRV_O_NOCACHE; 1126 } else if (!strcmp(mode, "writeback")) { 1127 *writethrough = false; 1128 } else if (!strcmp(mode, "unsafe")) { 1129 *writethrough = false; 1130 *flags |= BDRV_O_NO_FLUSH; 1131 } else if (!strcmp(mode, "writethrough")) { 1132 *writethrough = true; 1133 } else { 1134 return -1; 1135 } 1136 1137 return 0; 1138 } 1139 1140 static char *bdrv_child_get_parent_desc(BdrvChild *c) 1141 { 1142 BlockDriverState *parent = c->opaque; 1143 return g_strdup(bdrv_get_device_or_node_name(parent)); 1144 } 1145 1146 static void bdrv_child_cb_drained_begin(BdrvChild *child) 1147 { 1148 BlockDriverState *bs = child->opaque; 1149 bdrv_do_drained_begin_quiesce(bs, NULL, false); 1150 } 1151 1152 static bool bdrv_child_cb_drained_poll(BdrvChild *child) 1153 { 1154 BlockDriverState *bs = child->opaque; 1155 return bdrv_drain_poll(bs, false, NULL, false); 1156 } 1157 1158 static void bdrv_child_cb_drained_end(BdrvChild *child, 1159 int *drained_end_counter) 1160 { 1161 BlockDriverState *bs = child->opaque; 1162 bdrv_drained_end_no_poll(bs, drained_end_counter); 1163 } 1164 1165 static int bdrv_child_cb_inactivate(BdrvChild *child) 1166 { 1167 BlockDriverState *bs = child->opaque; 1168 assert(bs->open_flags & BDRV_O_INACTIVE); 1169 return 0; 1170 } 1171 1172 static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, 1173 GSList **ignore, Error **errp) 1174 { 1175 BlockDriverState *bs = child->opaque; 1176 return bdrv_can_set_aio_context(bs, ctx, ignore, errp); 1177 } 1178 1179 static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx, 1180 GSList **ignore) 1181 { 1182 BlockDriverState *bs = child->opaque; 1183 return bdrv_set_aio_context_ignore(bs, ctx, ignore); 1184 } 1185 1186 /* 1187 * Returns the options and flags that a temporary snapshot should get, based on 1188 * the originally requested flags (the originally requested image will have 1189 * flags like a backing file) 1190 */ 1191 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, 1192 int parent_flags, QDict *parent_options) 1193 { 1194 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 1195 1196 /* For temporary files, unconditional cache=unsafe is fine */ 1197 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); 1198 qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); 1199 1200 /* Copy the read-only and discard options from the parent */ 1201 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); 1202 qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD); 1203 1204 /* aio=native doesn't work for cache.direct=off, so disable it for the 1205 * temporary snapshot */ 1206 *child_flags &= ~BDRV_O_NATIVE_AIO; 1207 } 1208 1209 static void bdrv_backing_attach(BdrvChild *c) 1210 { 1211 BlockDriverState *parent = c->opaque; 1212 BlockDriverState *backing_hd = c->bs; 1213 1214 assert(!parent->backing_blocker); 1215 error_setg(&parent->backing_blocker, 1216 "node is used as backing hd of '%s'", 1217 bdrv_get_device_or_node_name(parent)); 1218 1219 bdrv_refresh_filename(backing_hd); 1220 1221 parent->open_flags &= ~BDRV_O_NO_BACKING; 1222 1223 bdrv_op_block_all(backing_hd, parent->backing_blocker); 1224 /* Otherwise we won't be able to commit or stream */ 1225 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1226 parent->backing_blocker); 1227 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, 1228 parent->backing_blocker); 1229 /* 1230 * We do backup in 3 ways: 1231 * 1. drive backup 1232 * The target bs is new opened, and the source is top BDS 1233 * 2. blockdev backup 1234 * Both the source and the target are top BDSes. 1235 * 3. internal backup(used for block replication) 1236 * Both the source and the target are backing file 1237 * 1238 * In case 1 and 2, neither the source nor the target is the backing file. 1239 * In case 3, we will block the top BDS, so there is only one block job 1240 * for the top BDS and its backing chain. 1241 */ 1242 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, 1243 parent->backing_blocker); 1244 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, 1245 parent->backing_blocker); 1246 } 1247 1248 static void bdrv_backing_detach(BdrvChild *c) 1249 { 1250 BlockDriverState *parent = c->opaque; 1251 1252 assert(parent->backing_blocker); 1253 bdrv_op_unblock_all(c->bs, parent->backing_blocker); 1254 error_free(parent->backing_blocker); 1255 parent->backing_blocker = NULL; 1256 } 1257 1258 static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, 1259 const char *filename, Error **errp) 1260 { 1261 BlockDriverState *parent = c->opaque; 1262 bool read_only = bdrv_is_read_only(parent); 1263 int ret; 1264 1265 if (read_only) { 1266 ret = bdrv_reopen_set_read_only(parent, false, errp); 1267 if (ret < 0) { 1268 return ret; 1269 } 1270 } 1271 1272 ret = bdrv_change_backing_file(parent, filename, 1273 base->drv ? base->drv->format_name : "", 1274 false); 1275 if (ret < 0) { 1276 error_setg_errno(errp, -ret, "Could not update backing file link"); 1277 } 1278 1279 if (read_only) { 1280 bdrv_reopen_set_read_only(parent, true, NULL); 1281 } 1282 1283 return ret; 1284 } 1285 1286 /* 1287 * Returns the options and flags that a generic child of a BDS should 1288 * get, based on the given options and flags for the parent BDS. 1289 */ 1290 static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, 1291 int *child_flags, QDict *child_options, 1292 int parent_flags, QDict *parent_options) 1293 { 1294 int flags = parent_flags; 1295 1296 /* 1297 * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL. 1298 * Generally, the question to answer is: Should this child be 1299 * format-probed by default? 1300 */ 1301 1302 /* 1303 * Pure and non-filtered data children of non-format nodes should 1304 * be probed by default (even when the node itself has BDRV_O_PROTOCOL 1305 * set). This only affects a very limited set of drivers (namely 1306 * quorum and blkverify when this comment was written). 1307 * Force-clear BDRV_O_PROTOCOL then. 1308 */ 1309 if (!parent_is_format && 1310 (role & BDRV_CHILD_DATA) && 1311 !(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED))) 1312 { 1313 flags &= ~BDRV_O_PROTOCOL; 1314 } 1315 1316 /* 1317 * All children of format nodes (except for COW children) and all 1318 * metadata children in general should never be format-probed. 1319 * Force-set BDRV_O_PROTOCOL then. 1320 */ 1321 if ((parent_is_format && !(role & BDRV_CHILD_COW)) || 1322 (role & BDRV_CHILD_METADATA)) 1323 { 1324 flags |= BDRV_O_PROTOCOL; 1325 } 1326 1327 /* 1328 * If the cache mode isn't explicitly set, inherit direct and no-flush from 1329 * the parent. 1330 */ 1331 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); 1332 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); 1333 qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE); 1334 1335 if (role & BDRV_CHILD_COW) { 1336 /* backing files are opened read-only by default */ 1337 qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on"); 1338 qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off"); 1339 } else { 1340 /* Inherit the read-only option from the parent if it's not set */ 1341 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); 1342 qdict_copy_default(child_options, parent_options, 1343 BDRV_OPT_AUTO_READ_ONLY); 1344 } 1345 1346 /* 1347 * bdrv_co_pdiscard() respects unmap policy for the parent, so we 1348 * can default to enable it on lower layers regardless of the 1349 * parent option. 1350 */ 1351 qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap"); 1352 1353 /* Clear flags that only apply to the top layer */ 1354 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); 1355 1356 if (role & BDRV_CHILD_METADATA) { 1357 flags &= ~BDRV_O_NO_IO; 1358 } 1359 if (role & BDRV_CHILD_COW) { 1360 flags &= ~BDRV_O_TEMPORARY; 1361 } 1362 1363 *child_flags = flags; 1364 } 1365 1366 static void bdrv_child_cb_attach(BdrvChild *child) 1367 { 1368 BlockDriverState *bs = child->opaque; 1369 1370 if (child->role & BDRV_CHILD_COW) { 1371 bdrv_backing_attach(child); 1372 } 1373 1374 bdrv_apply_subtree_drain(child, bs); 1375 } 1376 1377 static void bdrv_child_cb_detach(BdrvChild *child) 1378 { 1379 BlockDriverState *bs = child->opaque; 1380 1381 if (child->role & BDRV_CHILD_COW) { 1382 bdrv_backing_detach(child); 1383 } 1384 1385 bdrv_unapply_subtree_drain(child, bs); 1386 } 1387 1388 static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, 1389 const char *filename, Error **errp) 1390 { 1391 if (c->role & BDRV_CHILD_COW) { 1392 return bdrv_backing_update_filename(c, base, filename, errp); 1393 } 1394 return 0; 1395 } 1396 1397 const BdrvChildClass child_of_bds = { 1398 .parent_is_bds = true, 1399 .get_parent_desc = bdrv_child_get_parent_desc, 1400 .inherit_options = bdrv_inherited_options, 1401 .drained_begin = bdrv_child_cb_drained_begin, 1402 .drained_poll = bdrv_child_cb_drained_poll, 1403 .drained_end = bdrv_child_cb_drained_end, 1404 .attach = bdrv_child_cb_attach, 1405 .detach = bdrv_child_cb_detach, 1406 .inactivate = bdrv_child_cb_inactivate, 1407 .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, 1408 .set_aio_ctx = bdrv_child_cb_set_aio_ctx, 1409 .update_filename = bdrv_child_cb_update_filename, 1410 }; 1411 1412 static int bdrv_open_flags(BlockDriverState *bs, int flags) 1413 { 1414 int open_flags = flags; 1415 1416 /* 1417 * Clear flags that are internal to the block layer before opening the 1418 * image. 1419 */ 1420 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 1421 1422 return open_flags; 1423 } 1424 1425 static void update_flags_from_options(int *flags, QemuOpts *opts) 1426 { 1427 *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY); 1428 1429 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { 1430 *flags |= BDRV_O_NO_FLUSH; 1431 } 1432 1433 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) { 1434 *flags |= BDRV_O_NOCACHE; 1435 } 1436 1437 if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) { 1438 *flags |= BDRV_O_RDWR; 1439 } 1440 1441 if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) { 1442 *flags |= BDRV_O_AUTO_RDONLY; 1443 } 1444 } 1445 1446 static void update_options_from_flags(QDict *options, int flags) 1447 { 1448 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) { 1449 qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE); 1450 } 1451 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) { 1452 qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH, 1453 flags & BDRV_O_NO_FLUSH); 1454 } 1455 if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) { 1456 qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR)); 1457 } 1458 if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) { 1459 qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY, 1460 flags & BDRV_O_AUTO_RDONLY); 1461 } 1462 } 1463 1464 static void bdrv_assign_node_name(BlockDriverState *bs, 1465 const char *node_name, 1466 Error **errp) 1467 { 1468 char *gen_node_name = NULL; 1469 1470 if (!node_name) { 1471 node_name = gen_node_name = id_generate(ID_BLOCK); 1472 } else if (!id_wellformed(node_name)) { 1473 /* 1474 * Check for empty string or invalid characters, but not if it is 1475 * generated (generated names use characters not available to the user) 1476 */ 1477 error_setg(errp, "Invalid node-name: '%s'", node_name); 1478 return; 1479 } 1480 1481 /* takes care of avoiding namespaces collisions */ 1482 if (blk_by_name(node_name)) { 1483 error_setg(errp, "node-name=%s is conflicting with a device id", 1484 node_name); 1485 goto out; 1486 } 1487 1488 /* takes care of avoiding duplicates node names */ 1489 if (bdrv_find_node(node_name)) { 1490 error_setg(errp, "Duplicate nodes with node-name='%s'", node_name); 1491 goto out; 1492 } 1493 1494 /* Make sure that the node name isn't truncated */ 1495 if (strlen(node_name) >= sizeof(bs->node_name)) { 1496 error_setg(errp, "Node name too long"); 1497 goto out; 1498 } 1499 1500 /* copy node name into the bs and insert it into the graph list */ 1501 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 1502 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 1503 out: 1504 g_free(gen_node_name); 1505 } 1506 1507 static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, 1508 const char *node_name, QDict *options, 1509 int open_flags, Error **errp) 1510 { 1511 Error *local_err = NULL; 1512 int i, ret; 1513 1514 bdrv_assign_node_name(bs, node_name, &local_err); 1515 if (local_err) { 1516 error_propagate(errp, local_err); 1517 return -EINVAL; 1518 } 1519 1520 bs->drv = drv; 1521 bs->read_only = !(bs->open_flags & BDRV_O_RDWR); 1522 bs->opaque = g_malloc0(drv->instance_size); 1523 1524 if (drv->bdrv_file_open) { 1525 assert(!drv->bdrv_needs_filename || bs->filename[0]); 1526 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 1527 } else if (drv->bdrv_open) { 1528 ret = drv->bdrv_open(bs, options, open_flags, &local_err); 1529 } else { 1530 ret = 0; 1531 } 1532 1533 if (ret < 0) { 1534 if (local_err) { 1535 error_propagate(errp, local_err); 1536 } else if (bs->filename[0]) { 1537 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 1538 } else { 1539 error_setg_errno(errp, -ret, "Could not open image"); 1540 } 1541 goto open_failed; 1542 } 1543 1544 ret = refresh_total_sectors(bs, bs->total_sectors); 1545 if (ret < 0) { 1546 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 1547 return ret; 1548 } 1549 1550 bdrv_refresh_limits(bs, &local_err); 1551 if (local_err) { 1552 error_propagate(errp, local_err); 1553 return -EINVAL; 1554 } 1555 1556 assert(bdrv_opt_mem_align(bs) != 0); 1557 assert(bdrv_min_mem_align(bs) != 0); 1558 assert(is_power_of_2(bs->bl.request_alignment)); 1559 1560 for (i = 0; i < bs->quiesce_counter; i++) { 1561 if (drv->bdrv_co_drain_begin) { 1562 drv->bdrv_co_drain_begin(bs); 1563 } 1564 } 1565 1566 return 0; 1567 open_failed: 1568 bs->drv = NULL; 1569 if (bs->file != NULL) { 1570 bdrv_unref_child(bs, bs->file); 1571 bs->file = NULL; 1572 } 1573 g_free(bs->opaque); 1574 bs->opaque = NULL; 1575 return ret; 1576 } 1577 1578 BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, 1579 int flags, Error **errp) 1580 { 1581 BlockDriverState *bs; 1582 int ret; 1583 1584 bs = bdrv_new(); 1585 bs->open_flags = flags; 1586 bs->explicit_options = qdict_new(); 1587 bs->options = qdict_new(); 1588 bs->opaque = NULL; 1589 1590 update_options_from_flags(bs->options, flags); 1591 1592 ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp); 1593 if (ret < 0) { 1594 qobject_unref(bs->explicit_options); 1595 bs->explicit_options = NULL; 1596 qobject_unref(bs->options); 1597 bs->options = NULL; 1598 bdrv_unref(bs); 1599 return NULL; 1600 } 1601 1602 return bs; 1603 } 1604 1605 QemuOptsList bdrv_runtime_opts = { 1606 .name = "bdrv_common", 1607 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), 1608 .desc = { 1609 { 1610 .name = "node-name", 1611 .type = QEMU_OPT_STRING, 1612 .help = "Node name of the block device node", 1613 }, 1614 { 1615 .name = "driver", 1616 .type = QEMU_OPT_STRING, 1617 .help = "Block driver to use for the node", 1618 }, 1619 { 1620 .name = BDRV_OPT_CACHE_DIRECT, 1621 .type = QEMU_OPT_BOOL, 1622 .help = "Bypass software writeback cache on the host", 1623 }, 1624 { 1625 .name = BDRV_OPT_CACHE_NO_FLUSH, 1626 .type = QEMU_OPT_BOOL, 1627 .help = "Ignore flush requests", 1628 }, 1629 { 1630 .name = BDRV_OPT_READ_ONLY, 1631 .type = QEMU_OPT_BOOL, 1632 .help = "Node is opened in read-only mode", 1633 }, 1634 { 1635 .name = BDRV_OPT_AUTO_READ_ONLY, 1636 .type = QEMU_OPT_BOOL, 1637 .help = "Node can become read-only if opening read-write fails", 1638 }, 1639 { 1640 .name = "detect-zeroes", 1641 .type = QEMU_OPT_STRING, 1642 .help = "try to optimize zero writes (off, on, unmap)", 1643 }, 1644 { 1645 .name = BDRV_OPT_DISCARD, 1646 .type = QEMU_OPT_STRING, 1647 .help = "discard operation (ignore/off, unmap/on)", 1648 }, 1649 { 1650 .name = BDRV_OPT_FORCE_SHARE, 1651 .type = QEMU_OPT_BOOL, 1652 .help = "always accept other writers (default: off)", 1653 }, 1654 { /* end of list */ } 1655 }, 1656 }; 1657 1658 QemuOptsList bdrv_create_opts_simple = { 1659 .name = "simple-create-opts", 1660 .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head), 1661 .desc = { 1662 { 1663 .name = BLOCK_OPT_SIZE, 1664 .type = QEMU_OPT_SIZE, 1665 .help = "Virtual disk size" 1666 }, 1667 { 1668 .name = BLOCK_OPT_PREALLOC, 1669 .type = QEMU_OPT_STRING, 1670 .help = "Preallocation mode (allowed values: off)" 1671 }, 1672 { /* end of list */ } 1673 } 1674 }; 1675 1676 /* 1677 * Common part for opening disk images and files 1678 * 1679 * Removes all processed options from *options. 1680 */ 1681 static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, 1682 QDict *options, Error **errp) 1683 { 1684 int ret, open_flags; 1685 const char *filename; 1686 const char *driver_name = NULL; 1687 const char *node_name = NULL; 1688 const char *discard; 1689 QemuOpts *opts; 1690 BlockDriver *drv; 1691 Error *local_err = NULL; 1692 1693 assert(bs->file == NULL); 1694 assert(options != NULL && bs->options != options); 1695 1696 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 1697 if (!qemu_opts_absorb_qdict(opts, options, errp)) { 1698 ret = -EINVAL; 1699 goto fail_opts; 1700 } 1701 1702 update_flags_from_options(&bs->open_flags, opts); 1703 1704 driver_name = qemu_opt_get(opts, "driver"); 1705 drv = bdrv_find_format(driver_name); 1706 assert(drv != NULL); 1707 1708 bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false); 1709 1710 if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) { 1711 error_setg(errp, 1712 BDRV_OPT_FORCE_SHARE 1713 "=on can only be used with read-only images"); 1714 ret = -EINVAL; 1715 goto fail_opts; 1716 } 1717 1718 if (file != NULL) { 1719 bdrv_refresh_filename(blk_bs(file)); 1720 filename = blk_bs(file)->filename; 1721 } else { 1722 /* 1723 * Caution: while qdict_get_try_str() is fine, getting 1724 * non-string types would require more care. When @options 1725 * come from -blockdev or blockdev_add, its members are typed 1726 * according to the QAPI schema, but when they come from 1727 * -drive, they're all QString. 1728 */ 1729 filename = qdict_get_try_str(options, "filename"); 1730 } 1731 1732 if (drv->bdrv_needs_filename && (!filename || !filename[0])) { 1733 error_setg(errp, "The '%s' block driver requires a file name", 1734 drv->format_name); 1735 ret = -EINVAL; 1736 goto fail_opts; 1737 } 1738 1739 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags, 1740 drv->format_name); 1741 1742 bs->read_only = !(bs->open_flags & BDRV_O_RDWR); 1743 1744 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { 1745 if (!bs->read_only && bdrv_is_whitelisted(drv, true)) { 1746 ret = bdrv_apply_auto_read_only(bs, NULL, NULL); 1747 } else { 1748 ret = -ENOTSUP; 1749 } 1750 if (ret < 0) { 1751 error_setg(errp, 1752 !bs->read_only && bdrv_is_whitelisted(drv, true) 1753 ? "Driver '%s' can only be used for read-only devices" 1754 : "Driver '%s' is not whitelisted", 1755 drv->format_name); 1756 goto fail_opts; 1757 } 1758 } 1759 1760 /* bdrv_new() and bdrv_close() make it so */ 1761 assert(qatomic_read(&bs->copy_on_read) == 0); 1762 1763 if (bs->open_flags & BDRV_O_COPY_ON_READ) { 1764 if (!bs->read_only) { 1765 bdrv_enable_copy_on_read(bs); 1766 } else { 1767 error_setg(errp, "Can't use copy-on-read on read-only device"); 1768 ret = -EINVAL; 1769 goto fail_opts; 1770 } 1771 } 1772 1773 discard = qemu_opt_get(opts, BDRV_OPT_DISCARD); 1774 if (discard != NULL) { 1775 if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) { 1776 error_setg(errp, "Invalid discard option"); 1777 ret = -EINVAL; 1778 goto fail_opts; 1779 } 1780 } 1781 1782 bs->detect_zeroes = 1783 bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err); 1784 if (local_err) { 1785 error_propagate(errp, local_err); 1786 ret = -EINVAL; 1787 goto fail_opts; 1788 } 1789 1790 if (filename != NULL) { 1791 pstrcpy(bs->filename, sizeof(bs->filename), filename); 1792 } else { 1793 bs->filename[0] = '\0'; 1794 } 1795 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 1796 1797 /* Open the image, either directly or using a protocol */ 1798 open_flags = bdrv_open_flags(bs, bs->open_flags); 1799 node_name = qemu_opt_get(opts, "node-name"); 1800 1801 assert(!drv->bdrv_file_open || file == NULL); 1802 ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp); 1803 if (ret < 0) { 1804 goto fail_opts; 1805 } 1806 1807 qemu_opts_del(opts); 1808 return 0; 1809 1810 fail_opts: 1811 qemu_opts_del(opts); 1812 return ret; 1813 } 1814 1815 static QDict *parse_json_filename(const char *filename, Error **errp) 1816 { 1817 QObject *options_obj; 1818 QDict *options; 1819 int ret; 1820 1821 ret = strstart(filename, "json:", &filename); 1822 assert(ret); 1823 1824 options_obj = qobject_from_json(filename, errp); 1825 if (!options_obj) { 1826 error_prepend(errp, "Could not parse the JSON options: "); 1827 return NULL; 1828 } 1829 1830 options = qobject_to(QDict, options_obj); 1831 if (!options) { 1832 qobject_unref(options_obj); 1833 error_setg(errp, "Invalid JSON object given"); 1834 return NULL; 1835 } 1836 1837 qdict_flatten(options); 1838 1839 return options; 1840 } 1841 1842 static void parse_json_protocol(QDict *options, const char **pfilename, 1843 Error **errp) 1844 { 1845 QDict *json_options; 1846 Error *local_err = NULL; 1847 1848 /* Parse json: pseudo-protocol */ 1849 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) { 1850 return; 1851 } 1852 1853 json_options = parse_json_filename(*pfilename, &local_err); 1854 if (local_err) { 1855 error_propagate(errp, local_err); 1856 return; 1857 } 1858 1859 /* Options given in the filename have lower priority than options 1860 * specified directly */ 1861 qdict_join(options, json_options, false); 1862 qobject_unref(json_options); 1863 *pfilename = NULL; 1864 } 1865 1866 /* 1867 * Fills in default options for opening images and converts the legacy 1868 * filename/flags pair to option QDict entries. 1869 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a 1870 * block driver has been specified explicitly. 1871 */ 1872 static int bdrv_fill_options(QDict **options, const char *filename, 1873 int *flags, Error **errp) 1874 { 1875 const char *drvname; 1876 bool protocol = *flags & BDRV_O_PROTOCOL; 1877 bool parse_filename = false; 1878 BlockDriver *drv = NULL; 1879 Error *local_err = NULL; 1880 1881 /* 1882 * Caution: while qdict_get_try_str() is fine, getting non-string 1883 * types would require more care. When @options come from 1884 * -blockdev or blockdev_add, its members are typed according to 1885 * the QAPI schema, but when they come from -drive, they're all 1886 * QString. 1887 */ 1888 drvname = qdict_get_try_str(*options, "driver"); 1889 if (drvname) { 1890 drv = bdrv_find_format(drvname); 1891 if (!drv) { 1892 error_setg(errp, "Unknown driver '%s'", drvname); 1893 return -ENOENT; 1894 } 1895 /* If the user has explicitly specified the driver, this choice should 1896 * override the BDRV_O_PROTOCOL flag */ 1897 protocol = drv->bdrv_file_open; 1898 } 1899 1900 if (protocol) { 1901 *flags |= BDRV_O_PROTOCOL; 1902 } else { 1903 *flags &= ~BDRV_O_PROTOCOL; 1904 } 1905 1906 /* Translate cache options from flags into options */ 1907 update_options_from_flags(*options, *flags); 1908 1909 /* Fetch the file name from the options QDict if necessary */ 1910 if (protocol && filename) { 1911 if (!qdict_haskey(*options, "filename")) { 1912 qdict_put_str(*options, "filename", filename); 1913 parse_filename = true; 1914 } else { 1915 error_setg(errp, "Can't specify 'file' and 'filename' options at " 1916 "the same time"); 1917 return -EINVAL; 1918 } 1919 } 1920 1921 /* Find the right block driver */ 1922 /* See cautionary note on accessing @options above */ 1923 filename = qdict_get_try_str(*options, "filename"); 1924 1925 if (!drvname && protocol) { 1926 if (filename) { 1927 drv = bdrv_find_protocol(filename, parse_filename, errp); 1928 if (!drv) { 1929 return -EINVAL; 1930 } 1931 1932 drvname = drv->format_name; 1933 qdict_put_str(*options, "driver", drvname); 1934 } else { 1935 error_setg(errp, "Must specify either driver or file"); 1936 return -EINVAL; 1937 } 1938 } 1939 1940 assert(drv || !protocol); 1941 1942 /* Driver-specific filename parsing */ 1943 if (drv && drv->bdrv_parse_filename && parse_filename) { 1944 drv->bdrv_parse_filename(filename, *options, &local_err); 1945 if (local_err) { 1946 error_propagate(errp, local_err); 1947 return -EINVAL; 1948 } 1949 1950 if (!drv->bdrv_needs_filename) { 1951 qdict_del(*options, "filename"); 1952 } 1953 } 1954 1955 return 0; 1956 } 1957 1958 static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, 1959 uint64_t perm, uint64_t shared, 1960 GSList *ignore_children, Error **errp); 1961 static void bdrv_child_abort_perm_update(BdrvChild *c); 1962 static void bdrv_child_set_perm(BdrvChild *c); 1963 1964 typedef struct BlockReopenQueueEntry { 1965 bool prepared; 1966 bool perms_checked; 1967 BDRVReopenState state; 1968 QTAILQ_ENTRY(BlockReopenQueueEntry) entry; 1969 } BlockReopenQueueEntry; 1970 1971 /* 1972 * Return the flags that @bs will have after the reopens in @q have 1973 * successfully completed. If @q is NULL (or @bs is not contained in @q), 1974 * return the current flags. 1975 */ 1976 static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs) 1977 { 1978 BlockReopenQueueEntry *entry; 1979 1980 if (q != NULL) { 1981 QTAILQ_FOREACH(entry, q, entry) { 1982 if (entry->state.bs == bs) { 1983 return entry->state.flags; 1984 } 1985 } 1986 } 1987 1988 return bs->open_flags; 1989 } 1990 1991 /* Returns whether the image file can be written to after the reopen queue @q 1992 * has been successfully applied, or right now if @q is NULL. */ 1993 static bool bdrv_is_writable_after_reopen(BlockDriverState *bs, 1994 BlockReopenQueue *q) 1995 { 1996 int flags = bdrv_reopen_get_flags(q, bs); 1997 1998 return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR; 1999 } 2000 2001 /* 2002 * Return whether the BDS can be written to. This is not necessarily 2003 * the same as !bdrv_is_read_only(bs), as inactivated images may not 2004 * be written to but do not count as read-only images. 2005 */ 2006 bool bdrv_is_writable(BlockDriverState *bs) 2007 { 2008 return bdrv_is_writable_after_reopen(bs, NULL); 2009 } 2010 2011 static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, 2012 BdrvChild *c, BdrvChildRole role, 2013 BlockReopenQueue *reopen_queue, 2014 uint64_t parent_perm, uint64_t parent_shared, 2015 uint64_t *nperm, uint64_t *nshared) 2016 { 2017 assert(bs->drv && bs->drv->bdrv_child_perm); 2018 bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, 2019 parent_perm, parent_shared, 2020 nperm, nshared); 2021 /* TODO Take force_share from reopen_queue */ 2022 if (child_bs && child_bs->force_share) { 2023 *nshared = BLK_PERM_ALL; 2024 } 2025 } 2026 2027 /* 2028 * Check whether permissions on this node can be changed in a way that 2029 * @cumulative_perms and @cumulative_shared_perms are the new cumulative 2030 * permissions of all its parents. This involves checking whether all necessary 2031 * permission changes to child nodes can be performed. 2032 * 2033 * A call to this function must always be followed by a call to bdrv_set_perm() 2034 * or bdrv_abort_perm_update(). 2035 */ 2036 static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, 2037 uint64_t cumulative_perms, 2038 uint64_t cumulative_shared_perms, 2039 GSList *ignore_children, Error **errp) 2040 { 2041 BlockDriver *drv = bs->drv; 2042 BdrvChild *c; 2043 int ret; 2044 2045 /* Write permissions never work with read-only images */ 2046 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && 2047 !bdrv_is_writable_after_reopen(bs, q)) 2048 { 2049 if (!bdrv_is_writable_after_reopen(bs, NULL)) { 2050 error_setg(errp, "Block node is read-only"); 2051 } else { 2052 uint64_t current_perms, current_shared; 2053 bdrv_get_cumulative_perm(bs, ¤t_perms, ¤t_shared); 2054 if (current_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) { 2055 error_setg(errp, "Cannot make block node read-only, there is " 2056 "a writer on it"); 2057 } else { 2058 error_setg(errp, "Cannot make block node read-only and create " 2059 "a writer on it"); 2060 } 2061 } 2062 2063 return -EPERM; 2064 } 2065 2066 /* 2067 * Unaligned requests will automatically be aligned to bl.request_alignment 2068 * and without RESIZE we can't extend requests to write to space beyond the 2069 * end of the image, so it's required that the image size is aligned. 2070 */ 2071 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && 2072 !(cumulative_perms & BLK_PERM_RESIZE)) 2073 { 2074 if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) { 2075 error_setg(errp, "Cannot get 'write' permission without 'resize': " 2076 "Image size is not a multiple of request " 2077 "alignment"); 2078 return -EPERM; 2079 } 2080 } 2081 2082 /* Check this node */ 2083 if (!drv) { 2084 return 0; 2085 } 2086 2087 if (drv->bdrv_check_perm) { 2088 ret = drv->bdrv_check_perm(bs, cumulative_perms, 2089 cumulative_shared_perms, errp); 2090 if (ret < 0) { 2091 return ret; 2092 } 2093 } 2094 2095 /* Drivers that never have children can omit .bdrv_child_perm() */ 2096 if (!drv->bdrv_child_perm) { 2097 assert(QLIST_EMPTY(&bs->children)); 2098 return 0; 2099 } 2100 2101 /* Check all children */ 2102 QLIST_FOREACH(c, &bs->children, next) { 2103 uint64_t cur_perm, cur_shared; 2104 2105 bdrv_child_perm(bs, c->bs, c, c->role, q, 2106 cumulative_perms, cumulative_shared_perms, 2107 &cur_perm, &cur_shared); 2108 ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, ignore_children, 2109 errp); 2110 if (ret < 0) { 2111 return ret; 2112 } 2113 } 2114 2115 return 0; 2116 } 2117 2118 /* 2119 * Notifies drivers that after a previous bdrv_check_perm() call, the 2120 * permission update is not performed and any preparations made for it (e.g. 2121 * taken file locks) need to be undone. 2122 * 2123 * This function recursively notifies all child nodes. 2124 */ 2125 static void bdrv_abort_perm_update(BlockDriverState *bs) 2126 { 2127 BlockDriver *drv = bs->drv; 2128 BdrvChild *c; 2129 2130 if (!drv) { 2131 return; 2132 } 2133 2134 if (drv->bdrv_abort_perm_update) { 2135 drv->bdrv_abort_perm_update(bs); 2136 } 2137 2138 QLIST_FOREACH(c, &bs->children, next) { 2139 bdrv_child_abort_perm_update(c); 2140 } 2141 } 2142 2143 static void bdrv_set_perm(BlockDriverState *bs) 2144 { 2145 uint64_t cumulative_perms, cumulative_shared_perms; 2146 BlockDriver *drv = bs->drv; 2147 BdrvChild *c; 2148 2149 if (!drv) { 2150 return; 2151 } 2152 2153 bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms); 2154 2155 /* Update this node */ 2156 if (drv->bdrv_set_perm) { 2157 drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms); 2158 } 2159 2160 /* Drivers that never have children can omit .bdrv_child_perm() */ 2161 if (!drv->bdrv_child_perm) { 2162 assert(QLIST_EMPTY(&bs->children)); 2163 return; 2164 } 2165 2166 /* Update all children */ 2167 QLIST_FOREACH(c, &bs->children, next) { 2168 bdrv_child_set_perm(c); 2169 } 2170 } 2171 2172 void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, 2173 uint64_t *shared_perm) 2174 { 2175 BdrvChild *c; 2176 uint64_t cumulative_perms = 0; 2177 uint64_t cumulative_shared_perms = BLK_PERM_ALL; 2178 2179 QLIST_FOREACH(c, &bs->parents, next_parent) { 2180 cumulative_perms |= c->perm; 2181 cumulative_shared_perms &= c->shared_perm; 2182 } 2183 2184 *perm = cumulative_perms; 2185 *shared_perm = cumulative_shared_perms; 2186 } 2187 2188 static char *bdrv_child_user_desc(BdrvChild *c) 2189 { 2190 if (c->klass->get_parent_desc) { 2191 return c->klass->get_parent_desc(c); 2192 } 2193 2194 return g_strdup("another user"); 2195 } 2196 2197 char *bdrv_perm_names(uint64_t perm) 2198 { 2199 struct perm_name { 2200 uint64_t perm; 2201 const char *name; 2202 } permissions[] = { 2203 { BLK_PERM_CONSISTENT_READ, "consistent read" }, 2204 { BLK_PERM_WRITE, "write" }, 2205 { BLK_PERM_WRITE_UNCHANGED, "write unchanged" }, 2206 { BLK_PERM_RESIZE, "resize" }, 2207 { BLK_PERM_GRAPH_MOD, "change children" }, 2208 { 0, NULL } 2209 }; 2210 2211 GString *result = g_string_sized_new(30); 2212 struct perm_name *p; 2213 2214 for (p = permissions; p->name; p++) { 2215 if (perm & p->perm) { 2216 if (result->len > 0) { 2217 g_string_append(result, ", "); 2218 } 2219 g_string_append(result, p->name); 2220 } 2221 } 2222 2223 return g_string_free(result, FALSE); 2224 } 2225 2226 /* 2227 * Checks whether a new reference to @bs can be added if the new user requires 2228 * @new_used_perm/@new_shared_perm as its permissions. If @ignore_children is 2229 * set, the BdrvChild objects in this list are ignored in the calculations; 2230 * this allows checking permission updates for an existing reference. 2231 * 2232 * Needs to be followed by a call to either bdrv_set_perm() or 2233 * bdrv_abort_perm_update(). */ 2234 static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, 2235 uint64_t new_used_perm, 2236 uint64_t new_shared_perm, 2237 GSList *ignore_children, 2238 Error **errp) 2239 { 2240 BdrvChild *c; 2241 uint64_t cumulative_perms = new_used_perm; 2242 uint64_t cumulative_shared_perms = new_shared_perm; 2243 2244 2245 /* There is no reason why anyone couldn't tolerate write_unchanged */ 2246 assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED); 2247 2248 QLIST_FOREACH(c, &bs->parents, next_parent) { 2249 if (g_slist_find(ignore_children, c)) { 2250 continue; 2251 } 2252 2253 if ((new_used_perm & c->shared_perm) != new_used_perm) { 2254 char *user = bdrv_child_user_desc(c); 2255 char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm); 2256 2257 error_setg(errp, "Conflicts with use by %s as '%s', which does not " 2258 "allow '%s' on %s", 2259 user, c->name, perm_names, bdrv_get_node_name(c->bs)); 2260 g_free(user); 2261 g_free(perm_names); 2262 return -EPERM; 2263 } 2264 2265 if ((c->perm & new_shared_perm) != c->perm) { 2266 char *user = bdrv_child_user_desc(c); 2267 char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm); 2268 2269 error_setg(errp, "Conflicts with use by %s as '%s', which uses " 2270 "'%s' on %s", 2271 user, c->name, perm_names, bdrv_get_node_name(c->bs)); 2272 g_free(user); 2273 g_free(perm_names); 2274 return -EPERM; 2275 } 2276 2277 cumulative_perms |= c->perm; 2278 cumulative_shared_perms &= c->shared_perm; 2279 } 2280 2281 return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms, 2282 ignore_children, errp); 2283 } 2284 2285 /* Needs to be followed by a call to either bdrv_child_set_perm() or 2286 * bdrv_child_abort_perm_update(). */ 2287 static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, 2288 uint64_t perm, uint64_t shared, 2289 GSList *ignore_children, Error **errp) 2290 { 2291 int ret; 2292 2293 ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c); 2294 ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children, errp); 2295 g_slist_free(ignore_children); 2296 2297 if (ret < 0) { 2298 return ret; 2299 } 2300 2301 if (!c->has_backup_perm) { 2302 c->has_backup_perm = true; 2303 c->backup_perm = c->perm; 2304 c->backup_shared_perm = c->shared_perm; 2305 } 2306 /* 2307 * Note: it's OK if c->has_backup_perm was already set, as we can find the 2308 * same child twice during check_perm procedure 2309 */ 2310 2311 c->perm = perm; 2312 c->shared_perm = shared; 2313 2314 return 0; 2315 } 2316 2317 static void bdrv_child_set_perm(BdrvChild *c) 2318 { 2319 c->has_backup_perm = false; 2320 2321 bdrv_set_perm(c->bs); 2322 } 2323 2324 static void bdrv_child_abort_perm_update(BdrvChild *c) 2325 { 2326 if (c->has_backup_perm) { 2327 c->perm = c->backup_perm; 2328 c->shared_perm = c->backup_shared_perm; 2329 c->has_backup_perm = false; 2330 } 2331 2332 bdrv_abort_perm_update(c->bs); 2333 } 2334 2335 static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp) 2336 { 2337 int ret; 2338 uint64_t perm, shared_perm; 2339 2340 bdrv_get_cumulative_perm(bs, &perm, &shared_perm); 2341 ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, errp); 2342 if (ret < 0) { 2343 bdrv_abort_perm_update(bs); 2344 return ret; 2345 } 2346 bdrv_set_perm(bs); 2347 2348 return 0; 2349 } 2350 2351 int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, 2352 Error **errp) 2353 { 2354 Error *local_err = NULL; 2355 int ret; 2356 2357 ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL, &local_err); 2358 if (ret < 0) { 2359 bdrv_child_abort_perm_update(c); 2360 if ((perm & ~c->perm) || (c->shared_perm & ~shared)) { 2361 /* tighten permissions */ 2362 error_propagate(errp, local_err); 2363 } else { 2364 /* 2365 * Our caller may intend to only loosen restrictions and 2366 * does not expect this function to fail. Errors are not 2367 * fatal in such a case, so we can just hide them from our 2368 * caller. 2369 */ 2370 error_free(local_err); 2371 ret = 0; 2372 } 2373 return ret; 2374 } 2375 2376 bdrv_child_set_perm(c); 2377 2378 return 0; 2379 } 2380 2381 int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) 2382 { 2383 uint64_t parent_perms, parent_shared; 2384 uint64_t perms, shared; 2385 2386 bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared); 2387 bdrv_child_perm(bs, c->bs, c, c->role, NULL, 2388 parent_perms, parent_shared, &perms, &shared); 2389 2390 return bdrv_child_try_set_perm(c, perms, shared, errp); 2391 } 2392 2393 /* 2394 * Default implementation for .bdrv_child_perm() for block filters: 2395 * Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the 2396 * filtered child. 2397 */ 2398 static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, 2399 BdrvChildRole role, 2400 BlockReopenQueue *reopen_queue, 2401 uint64_t perm, uint64_t shared, 2402 uint64_t *nperm, uint64_t *nshared) 2403 { 2404 *nperm = perm & DEFAULT_PERM_PASSTHROUGH; 2405 *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; 2406 } 2407 2408 static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c, 2409 BdrvChildRole role, 2410 BlockReopenQueue *reopen_queue, 2411 uint64_t perm, uint64_t shared, 2412 uint64_t *nperm, uint64_t *nshared) 2413 { 2414 assert(role & BDRV_CHILD_COW); 2415 2416 /* 2417 * We want consistent read from backing files if the parent needs it. 2418 * No other operations are performed on backing files. 2419 */ 2420 perm &= BLK_PERM_CONSISTENT_READ; 2421 2422 /* 2423 * If the parent can deal with changing data, we're okay with a 2424 * writable and resizable backing file. 2425 * TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? 2426 */ 2427 if (shared & BLK_PERM_WRITE) { 2428 shared = BLK_PERM_WRITE | BLK_PERM_RESIZE; 2429 } else { 2430 shared = 0; 2431 } 2432 2433 shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD | 2434 BLK_PERM_WRITE_UNCHANGED; 2435 2436 if (bs->open_flags & BDRV_O_INACTIVE) { 2437 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2438 } 2439 2440 *nperm = perm; 2441 *nshared = shared; 2442 } 2443 2444 static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c, 2445 BdrvChildRole role, 2446 BlockReopenQueue *reopen_queue, 2447 uint64_t perm, uint64_t shared, 2448 uint64_t *nperm, uint64_t *nshared) 2449 { 2450 int flags; 2451 2452 assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)); 2453 2454 flags = bdrv_reopen_get_flags(reopen_queue, bs); 2455 2456 /* 2457 * Apart from the modifications below, the same permissions are 2458 * forwarded and left alone as for filters 2459 */ 2460 bdrv_filter_default_perms(bs, c, role, reopen_queue, 2461 perm, shared, &perm, &shared); 2462 2463 if (role & BDRV_CHILD_METADATA) { 2464 /* Format drivers may touch metadata even if the guest doesn't write */ 2465 if (bdrv_is_writable_after_reopen(bs, reopen_queue)) { 2466 perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2467 } 2468 2469 /* 2470 * bs->file always needs to be consistent because of the 2471 * metadata. We can never allow other users to resize or write 2472 * to it. 2473 */ 2474 if (!(flags & BDRV_O_NO_IO)) { 2475 perm |= BLK_PERM_CONSISTENT_READ; 2476 } 2477 shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); 2478 } 2479 2480 if (role & BDRV_CHILD_DATA) { 2481 /* 2482 * Technically, everything in this block is a subset of the 2483 * BDRV_CHILD_METADATA path taken above, and so this could 2484 * be an "else if" branch. However, that is not obvious, and 2485 * this function is not performance critical, therefore we let 2486 * this be an independent "if". 2487 */ 2488 2489 /* 2490 * We cannot allow other users to resize the file because the 2491 * format driver might have some assumptions about the size 2492 * (e.g. because it is stored in metadata, or because the file 2493 * is split into fixed-size data files). 2494 */ 2495 shared &= ~BLK_PERM_RESIZE; 2496 2497 /* 2498 * WRITE_UNCHANGED often cannot be performed as such on the 2499 * data file. For example, the qcow2 driver may still need to 2500 * write copied clusters on copy-on-read. 2501 */ 2502 if (perm & BLK_PERM_WRITE_UNCHANGED) { 2503 perm |= BLK_PERM_WRITE; 2504 } 2505 2506 /* 2507 * If the data file is written to, the format driver may 2508 * expect to be able to resize it by writing beyond the EOF. 2509 */ 2510 if (perm & BLK_PERM_WRITE) { 2511 perm |= BLK_PERM_RESIZE; 2512 } 2513 } 2514 2515 if (bs->open_flags & BDRV_O_INACTIVE) { 2516 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 2517 } 2518 2519 *nperm = perm; 2520 *nshared = shared; 2521 } 2522 2523 void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, 2524 BdrvChildRole role, BlockReopenQueue *reopen_queue, 2525 uint64_t perm, uint64_t shared, 2526 uint64_t *nperm, uint64_t *nshared) 2527 { 2528 if (role & BDRV_CHILD_FILTERED) { 2529 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | 2530 BDRV_CHILD_COW))); 2531 bdrv_filter_default_perms(bs, c, role, reopen_queue, 2532 perm, shared, nperm, nshared); 2533 } else if (role & BDRV_CHILD_COW) { 2534 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA))); 2535 bdrv_default_perms_for_cow(bs, c, role, reopen_queue, 2536 perm, shared, nperm, nshared); 2537 } else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) { 2538 bdrv_default_perms_for_storage(bs, c, role, reopen_queue, 2539 perm, shared, nperm, nshared); 2540 } else { 2541 g_assert_not_reached(); 2542 } 2543 } 2544 2545 uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) 2546 { 2547 static const uint64_t permissions[] = { 2548 [BLOCK_PERMISSION_CONSISTENT_READ] = BLK_PERM_CONSISTENT_READ, 2549 [BLOCK_PERMISSION_WRITE] = BLK_PERM_WRITE, 2550 [BLOCK_PERMISSION_WRITE_UNCHANGED] = BLK_PERM_WRITE_UNCHANGED, 2551 [BLOCK_PERMISSION_RESIZE] = BLK_PERM_RESIZE, 2552 [BLOCK_PERMISSION_GRAPH_MOD] = BLK_PERM_GRAPH_MOD, 2553 }; 2554 2555 QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX); 2556 QEMU_BUILD_BUG_ON(1UL << ARRAY_SIZE(permissions) != BLK_PERM_ALL + 1); 2557 2558 assert(qapi_perm < BLOCK_PERMISSION__MAX); 2559 2560 return permissions[qapi_perm]; 2561 } 2562 2563 static void bdrv_replace_child_noperm(BdrvChild *child, 2564 BlockDriverState *new_bs) 2565 { 2566 BlockDriverState *old_bs = child->bs; 2567 int new_bs_quiesce_counter; 2568 int drain_saldo; 2569 2570 assert(!child->frozen); 2571 2572 if (old_bs && new_bs) { 2573 assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); 2574 } 2575 2576 new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); 2577 drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; 2578 2579 /* 2580 * If the new child node is drained but the old one was not, flush 2581 * all outstanding requests to the old child node. 2582 */ 2583 while (drain_saldo > 0 && child->klass->drained_begin) { 2584 bdrv_parent_drained_begin_single(child, true); 2585 drain_saldo--; 2586 } 2587 2588 if (old_bs) { 2589 /* Detach first so that the recursive drain sections coming from @child 2590 * are already gone and we only end the drain sections that came from 2591 * elsewhere. */ 2592 if (child->klass->detach) { 2593 child->klass->detach(child); 2594 } 2595 QLIST_REMOVE(child, next_parent); 2596 } 2597 2598 child->bs = new_bs; 2599 2600 if (new_bs) { 2601 QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); 2602 2603 /* 2604 * Detaching the old node may have led to the new node's 2605 * quiesce_counter having been decreased. Not a problem, we 2606 * just need to recognize this here and then invoke 2607 * drained_end appropriately more often. 2608 */ 2609 assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); 2610 drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; 2611 2612 /* Attach only after starting new drained sections, so that recursive 2613 * drain sections coming from @child don't get an extra .drained_begin 2614 * callback. */ 2615 if (child->klass->attach) { 2616 child->klass->attach(child); 2617 } 2618 } 2619 2620 /* 2621 * If the old child node was drained but the new one is not, allow 2622 * requests to come in only after the new node has been attached. 2623 */ 2624 while (drain_saldo < 0 && child->klass->drained_end) { 2625 bdrv_parent_drained_end_single(child); 2626 drain_saldo++; 2627 } 2628 } 2629 2630 /* 2631 * Updates @child to change its reference to point to @new_bs, including 2632 * checking and applying the necessary permission updates both to the old node 2633 * and to @new_bs. 2634 * 2635 * NULL is passed as @new_bs for removing the reference before freeing @child. 2636 * 2637 * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this 2638 * function uses bdrv_set_perm() to update the permissions according to the new 2639 * reference that @new_bs gets. 2640 * 2641 * Callers must ensure that child->frozen is false. 2642 */ 2643 static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) 2644 { 2645 BlockDriverState *old_bs = child->bs; 2646 2647 /* Asserts that child->frozen == false */ 2648 bdrv_replace_child_noperm(child, new_bs); 2649 2650 /* 2651 * Start with the new node's permissions. If @new_bs is a (direct 2652 * or indirect) child of @old_bs, we must complete the permission 2653 * update on @new_bs before we loosen the restrictions on @old_bs. 2654 * Otherwise, bdrv_check_perm() on @old_bs would re-initiate 2655 * updating the permissions of @new_bs, and thus not purely loosen 2656 * restrictions. 2657 */ 2658 if (new_bs) { 2659 bdrv_set_perm(new_bs); 2660 } 2661 2662 if (old_bs) { 2663 /* 2664 * Update permissions for old node. We're just taking a parent away, so 2665 * we're loosening restrictions. Errors of permission update are not 2666 * fatal in this case, ignore them. 2667 */ 2668 bdrv_refresh_perms(old_bs, NULL); 2669 2670 /* When the parent requiring a non-default AioContext is removed, the 2671 * node moves back to the main AioContext */ 2672 bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL); 2673 } 2674 } 2675 2676 /* 2677 * This function steals the reference to child_bs from the caller. 2678 * That reference is later dropped by bdrv_root_unref_child(). 2679 * 2680 * On failure NULL is returned, errp is set and the reference to 2681 * child_bs is also dropped. 2682 * 2683 * The caller must hold the AioContext lock @child_bs, but not that of @ctx 2684 * (unless @child_bs is already in @ctx). 2685 */ 2686 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, 2687 const char *child_name, 2688 const BdrvChildClass *child_class, 2689 BdrvChildRole child_role, 2690 AioContext *ctx, 2691 uint64_t perm, uint64_t shared_perm, 2692 void *opaque, Error **errp) 2693 { 2694 BdrvChild *child; 2695 Error *local_err = NULL; 2696 int ret; 2697 2698 ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp); 2699 if (ret < 0) { 2700 bdrv_abort_perm_update(child_bs); 2701 bdrv_unref(child_bs); 2702 return NULL; 2703 } 2704 2705 child = g_new(BdrvChild, 1); 2706 *child = (BdrvChild) { 2707 .bs = NULL, 2708 .name = g_strdup(child_name), 2709 .klass = child_class, 2710 .role = child_role, 2711 .perm = perm, 2712 .shared_perm = shared_perm, 2713 .opaque = opaque, 2714 }; 2715 2716 /* If the AioContexts don't match, first try to move the subtree of 2717 * child_bs into the AioContext of the new parent. If this doesn't work, 2718 * try moving the parent into the AioContext of child_bs instead. */ 2719 if (bdrv_get_aio_context(child_bs) != ctx) { 2720 ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err); 2721 if (ret < 0 && child_class->can_set_aio_ctx) { 2722 GSList *ignore = g_slist_prepend(NULL, child); 2723 ctx = bdrv_get_aio_context(child_bs); 2724 if (child_class->can_set_aio_ctx(child, ctx, &ignore, NULL)) { 2725 error_free(local_err); 2726 ret = 0; 2727 g_slist_free(ignore); 2728 ignore = g_slist_prepend(NULL, child); 2729 child_class->set_aio_ctx(child, ctx, &ignore); 2730 } 2731 g_slist_free(ignore); 2732 } 2733 if (ret < 0) { 2734 error_propagate(errp, local_err); 2735 g_free(child); 2736 bdrv_abort_perm_update(child_bs); 2737 bdrv_unref(child_bs); 2738 return NULL; 2739 } 2740 } 2741 2742 /* This performs the matching bdrv_set_perm() for the above check. */ 2743 bdrv_replace_child(child, child_bs); 2744 2745 return child; 2746 } 2747 2748 /* 2749 * This function transfers the reference to child_bs from the caller 2750 * to parent_bs. That reference is later dropped by parent_bs on 2751 * bdrv_close() or if someone calls bdrv_unref_child(). 2752 * 2753 * On failure NULL is returned, errp is set and the reference to 2754 * child_bs is also dropped. 2755 * 2756 * If @parent_bs and @child_bs are in different AioContexts, the caller must 2757 * hold the AioContext lock for @child_bs, but not for @parent_bs. 2758 */ 2759 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, 2760 BlockDriverState *child_bs, 2761 const char *child_name, 2762 const BdrvChildClass *child_class, 2763 BdrvChildRole child_role, 2764 Error **errp) 2765 { 2766 BdrvChild *child; 2767 uint64_t perm, shared_perm; 2768 2769 bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); 2770 2771 assert(parent_bs->drv); 2772 bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, 2773 perm, shared_perm, &perm, &shared_perm); 2774 2775 child = bdrv_root_attach_child(child_bs, child_name, child_class, 2776 child_role, bdrv_get_aio_context(parent_bs), 2777 perm, shared_perm, parent_bs, errp); 2778 if (child == NULL) { 2779 return NULL; 2780 } 2781 2782 QLIST_INSERT_HEAD(&parent_bs->children, child, next); 2783 return child; 2784 } 2785 2786 static void bdrv_detach_child(BdrvChild *child) 2787 { 2788 QLIST_SAFE_REMOVE(child, next); 2789 2790 bdrv_replace_child(child, NULL); 2791 2792 g_free(child->name); 2793 g_free(child); 2794 } 2795 2796 /* Callers must ensure that child->frozen is false. */ 2797 void bdrv_root_unref_child(BdrvChild *child) 2798 { 2799 BlockDriverState *child_bs; 2800 2801 child_bs = child->bs; 2802 bdrv_detach_child(child); 2803 bdrv_unref(child_bs); 2804 } 2805 2806 /** 2807 * Clear all inherits_from pointers from children and grandchildren of 2808 * @root that point to @root, where necessary. 2809 */ 2810 static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child) 2811 { 2812 BdrvChild *c; 2813 2814 if (child->bs->inherits_from == root) { 2815 /* 2816 * Remove inherits_from only when the last reference between root and 2817 * child->bs goes away. 2818 */ 2819 QLIST_FOREACH(c, &root->children, next) { 2820 if (c != child && c->bs == child->bs) { 2821 break; 2822 } 2823 } 2824 if (c == NULL) { 2825 child->bs->inherits_from = NULL; 2826 } 2827 } 2828 2829 QLIST_FOREACH(c, &child->bs->children, next) { 2830 bdrv_unset_inherits_from(root, c); 2831 } 2832 } 2833 2834 /* Callers must ensure that child->frozen is false. */ 2835 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) 2836 { 2837 if (child == NULL) { 2838 return; 2839 } 2840 2841 bdrv_unset_inherits_from(parent, child); 2842 bdrv_root_unref_child(child); 2843 } 2844 2845 2846 static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) 2847 { 2848 BdrvChild *c; 2849 QLIST_FOREACH(c, &bs->parents, next_parent) { 2850 if (c->klass->change_media) { 2851 c->klass->change_media(c, load); 2852 } 2853 } 2854 } 2855 2856 /* Return true if you can reach parent going through child->inherits_from 2857 * recursively. If parent or child are NULL, return false */ 2858 static bool bdrv_inherits_from_recursive(BlockDriverState *child, 2859 BlockDriverState *parent) 2860 { 2861 while (child && child != parent) { 2862 child = child->inherits_from; 2863 } 2864 2865 return child != NULL; 2866 } 2867 2868 /* 2869 * Return the BdrvChildRole for @bs's backing child. bs->backing is 2870 * mostly used for COW backing children (role = COW), but also for 2871 * filtered children (role = FILTERED | PRIMARY). 2872 */ 2873 static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) 2874 { 2875 if (bs->drv && bs->drv->is_filter) { 2876 return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY; 2877 } else { 2878 return BDRV_CHILD_COW; 2879 } 2880 } 2881 2882 /* 2883 * Sets the bs->backing link of a BDS. A new reference is created; callers 2884 * which don't need their own reference any more must call bdrv_unref(). 2885 */ 2886 int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, 2887 Error **errp) 2888 { 2889 int ret = 0; 2890 bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) && 2891 bdrv_inherits_from_recursive(backing_hd, bs); 2892 2893 if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) { 2894 return -EPERM; 2895 } 2896 2897 if (backing_hd) { 2898 bdrv_ref(backing_hd); 2899 } 2900 2901 if (bs->backing) { 2902 /* Cannot be frozen, we checked that above */ 2903 bdrv_unref_child(bs, bs->backing); 2904 bs->backing = NULL; 2905 } 2906 2907 if (!backing_hd) { 2908 goto out; 2909 } 2910 2911 bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_of_bds, 2912 bdrv_backing_role(bs), errp); 2913 if (!bs->backing) { 2914 ret = -EPERM; 2915 goto out; 2916 } 2917 2918 /* If backing_hd was already part of bs's backing chain, and 2919 * inherits_from pointed recursively to bs then let's update it to 2920 * point directly to bs (else it will become NULL). */ 2921 if (update_inherits_from) { 2922 backing_hd->inherits_from = bs; 2923 } 2924 2925 out: 2926 bdrv_refresh_limits(bs, NULL); 2927 2928 return ret; 2929 } 2930 2931 /* 2932 * Opens the backing file for a BlockDriverState if not yet open 2933 * 2934 * bdref_key specifies the key for the image's BlockdevRef in the options QDict. 2935 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 2936 * itself, all options starting with "${bdref_key}." are considered part of the 2937 * BlockdevRef. 2938 * 2939 * TODO Can this be unified with bdrv_open_image()? 2940 */ 2941 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, 2942 const char *bdref_key, Error **errp) 2943 { 2944 char *backing_filename = NULL; 2945 char *bdref_key_dot; 2946 const char *reference = NULL; 2947 int ret = 0; 2948 bool implicit_backing = false; 2949 BlockDriverState *backing_hd; 2950 QDict *options; 2951 QDict *tmp_parent_options = NULL; 2952 Error *local_err = NULL; 2953 2954 if (bs->backing != NULL) { 2955 goto free_exit; 2956 } 2957 2958 /* NULL means an empty set of options */ 2959 if (parent_options == NULL) { 2960 tmp_parent_options = qdict_new(); 2961 parent_options = tmp_parent_options; 2962 } 2963 2964 bs->open_flags &= ~BDRV_O_NO_BACKING; 2965 2966 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 2967 qdict_extract_subqdict(parent_options, &options, bdref_key_dot); 2968 g_free(bdref_key_dot); 2969 2970 /* 2971 * Caution: while qdict_get_try_str() is fine, getting non-string 2972 * types would require more care. When @parent_options come from 2973 * -blockdev or blockdev_add, its members are typed according to 2974 * the QAPI schema, but when they come from -drive, they're all 2975 * QString. 2976 */ 2977 reference = qdict_get_try_str(parent_options, bdref_key); 2978 if (reference || qdict_haskey(options, "file.filename")) { 2979 /* keep backing_filename NULL */ 2980 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 2981 qobject_unref(options); 2982 goto free_exit; 2983 } else { 2984 if (qdict_size(options) == 0) { 2985 /* If the user specifies options that do not modify the 2986 * backing file's behavior, we might still consider it the 2987 * implicit backing file. But it's easier this way, and 2988 * just specifying some of the backing BDS's options is 2989 * only possible with -drive anyway (otherwise the QAPI 2990 * schema forces the user to specify everything). */ 2991 implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file); 2992 } 2993 2994 backing_filename = bdrv_get_full_backing_filename(bs, &local_err); 2995 if (local_err) { 2996 ret = -EINVAL; 2997 error_propagate(errp, local_err); 2998 qobject_unref(options); 2999 goto free_exit; 3000 } 3001 } 3002 3003 if (!bs->drv || !bs->drv->supports_backing) { 3004 ret = -EINVAL; 3005 error_setg(errp, "Driver doesn't support backing files"); 3006 qobject_unref(options); 3007 goto free_exit; 3008 } 3009 3010 if (!reference && 3011 bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 3012 qdict_put_str(options, "driver", bs->backing_format); 3013 } 3014 3015 backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs, 3016 &child_of_bds, bdrv_backing_role(bs), errp); 3017 if (!backing_hd) { 3018 bs->open_flags |= BDRV_O_NO_BACKING; 3019 error_prepend(errp, "Could not open backing file: "); 3020 ret = -EINVAL; 3021 goto free_exit; 3022 } 3023 3024 if (implicit_backing) { 3025 bdrv_refresh_filename(backing_hd); 3026 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), 3027 backing_hd->filename); 3028 } 3029 3030 /* Hook up the backing file link; drop our reference, bs owns the 3031 * backing_hd reference now */ 3032 ret = bdrv_set_backing_hd(bs, backing_hd, errp); 3033 bdrv_unref(backing_hd); 3034 if (ret < 0) { 3035 goto free_exit; 3036 } 3037 3038 qdict_del(parent_options, bdref_key); 3039 3040 free_exit: 3041 g_free(backing_filename); 3042 qobject_unref(tmp_parent_options); 3043 return ret; 3044 } 3045 3046 static BlockDriverState * 3047 bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, 3048 BlockDriverState *parent, const BdrvChildClass *child_class, 3049 BdrvChildRole child_role, bool allow_none, Error **errp) 3050 { 3051 BlockDriverState *bs = NULL; 3052 QDict *image_options; 3053 char *bdref_key_dot; 3054 const char *reference; 3055 3056 assert(child_class != NULL); 3057 3058 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 3059 qdict_extract_subqdict(options, &image_options, bdref_key_dot); 3060 g_free(bdref_key_dot); 3061 3062 /* 3063 * Caution: while qdict_get_try_str() is fine, getting non-string 3064 * types would require more care. When @options come from 3065 * -blockdev or blockdev_add, its members are typed according to 3066 * the QAPI schema, but when they come from -drive, they're all 3067 * QString. 3068 */ 3069 reference = qdict_get_try_str(options, bdref_key); 3070 if (!filename && !reference && !qdict_size(image_options)) { 3071 if (!allow_none) { 3072 error_setg(errp, "A block device must be specified for \"%s\"", 3073 bdref_key); 3074 } 3075 qobject_unref(image_options); 3076 goto done; 3077 } 3078 3079 bs = bdrv_open_inherit(filename, reference, image_options, 0, 3080 parent, child_class, child_role, errp); 3081 if (!bs) { 3082 goto done; 3083 } 3084 3085 done: 3086 qdict_del(options, bdref_key); 3087 return bs; 3088 } 3089 3090 /* 3091 * Opens a disk image whose options are given as BlockdevRef in another block 3092 * device's options. 3093 * 3094 * If allow_none is true, no image will be opened if filename is false and no 3095 * BlockdevRef is given. NULL will be returned, but errp remains unset. 3096 * 3097 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 3098 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 3099 * itself, all options starting with "${bdref_key}." are considered part of the 3100 * BlockdevRef. 3101 * 3102 * The BlockdevRef will be removed from the options QDict. 3103 */ 3104 BdrvChild *bdrv_open_child(const char *filename, 3105 QDict *options, const char *bdref_key, 3106 BlockDriverState *parent, 3107 const BdrvChildClass *child_class, 3108 BdrvChildRole child_role, 3109 bool allow_none, Error **errp) 3110 { 3111 BlockDriverState *bs; 3112 3113 bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, 3114 child_role, allow_none, errp); 3115 if (bs == NULL) { 3116 return NULL; 3117 } 3118 3119 return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, 3120 errp); 3121 } 3122 3123 /* 3124 * TODO Future callers may need to specify parent/child_class in order for 3125 * option inheritance to work. Existing callers use it for the root node. 3126 */ 3127 BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) 3128 { 3129 BlockDriverState *bs = NULL; 3130 QObject *obj = NULL; 3131 QDict *qdict = NULL; 3132 const char *reference = NULL; 3133 Visitor *v = NULL; 3134 3135 if (ref->type == QTYPE_QSTRING) { 3136 reference = ref->u.reference; 3137 } else { 3138 BlockdevOptions *options = &ref->u.definition; 3139 assert(ref->type == QTYPE_QDICT); 3140 3141 v = qobject_output_visitor_new(&obj); 3142 visit_type_BlockdevOptions(v, NULL, &options, &error_abort); 3143 visit_complete(v, &obj); 3144 3145 qdict = qobject_to(QDict, obj); 3146 qdict_flatten(qdict); 3147 3148 /* bdrv_open_inherit() defaults to the values in bdrv_flags (for 3149 * compatibility with other callers) rather than what we want as the 3150 * real defaults. Apply the defaults here instead. */ 3151 qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off"); 3152 qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off"); 3153 qdict_set_default_str(qdict, BDRV_OPT_READ_ONLY, "off"); 3154 qdict_set_default_str(qdict, BDRV_OPT_AUTO_READ_ONLY, "off"); 3155 3156 } 3157 3158 bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp); 3159 obj = NULL; 3160 qobject_unref(obj); 3161 visit_free(v); 3162 return bs; 3163 } 3164 3165 static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, 3166 int flags, 3167 QDict *snapshot_options, 3168 Error **errp) 3169 { 3170 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 3171 char *tmp_filename = g_malloc0(PATH_MAX + 1); 3172 int64_t total_size; 3173 QemuOpts *opts = NULL; 3174 BlockDriverState *bs_snapshot = NULL; 3175 int ret; 3176 3177 /* if snapshot, we create a temporary backing file and open it 3178 instead of opening 'filename' directly */ 3179 3180 /* Get the required size from the image */ 3181 total_size = bdrv_getlength(bs); 3182 if (total_size < 0) { 3183 error_setg_errno(errp, -total_size, "Could not get image size"); 3184 goto out; 3185 } 3186 3187 /* Create the temporary image */ 3188 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); 3189 if (ret < 0) { 3190 error_setg_errno(errp, -ret, "Could not get temporary filename"); 3191 goto out; 3192 } 3193 3194 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 3195 &error_abort); 3196 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 3197 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp); 3198 qemu_opts_del(opts); 3199 if (ret < 0) { 3200 error_prepend(errp, "Could not create temporary overlay '%s': ", 3201 tmp_filename); 3202 goto out; 3203 } 3204 3205 /* Prepare options QDict for the temporary file */ 3206 qdict_put_str(snapshot_options, "file.driver", "file"); 3207 qdict_put_str(snapshot_options, "file.filename", tmp_filename); 3208 qdict_put_str(snapshot_options, "driver", "qcow2"); 3209 3210 bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp); 3211 snapshot_options = NULL; 3212 if (!bs_snapshot) { 3213 goto out; 3214 } 3215 3216 /* bdrv_append() consumes a strong reference to bs_snapshot 3217 * (i.e. it will call bdrv_unref() on it) even on error, so in 3218 * order to be able to return one, we have to increase 3219 * bs_snapshot's refcount here */ 3220 bdrv_ref(bs_snapshot); 3221 ret = bdrv_append(bs_snapshot, bs, errp); 3222 if (ret < 0) { 3223 bs_snapshot = NULL; 3224 goto out; 3225 } 3226 3227 out: 3228 qobject_unref(snapshot_options); 3229 g_free(tmp_filename); 3230 return bs_snapshot; 3231 } 3232 3233 /* 3234 * Opens a disk image (raw, qcow2, vmdk, ...) 3235 * 3236 * options is a QDict of options to pass to the block drivers, or NULL for an 3237 * empty set of options. The reference to the QDict belongs to the block layer 3238 * after the call (even on failure), so if the caller intends to reuse the 3239 * dictionary, it needs to use qobject_ref() before calling bdrv_open. 3240 * 3241 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 3242 * If it is not NULL, the referenced BDS will be reused. 3243 * 3244 * The reference parameter may be used to specify an existing block device which 3245 * should be opened. If specified, neither options nor a filename may be given, 3246 * nor can an existing BDS be reused (that is, *pbs has to be NULL). 3247 */ 3248 static BlockDriverState *bdrv_open_inherit(const char *filename, 3249 const char *reference, 3250 QDict *options, int flags, 3251 BlockDriverState *parent, 3252 const BdrvChildClass *child_class, 3253 BdrvChildRole child_role, 3254 Error **errp) 3255 { 3256 int ret; 3257 BlockBackend *file = NULL; 3258 BlockDriverState *bs; 3259 BlockDriver *drv = NULL; 3260 BdrvChild *child; 3261 const char *drvname; 3262 const char *backing; 3263 Error *local_err = NULL; 3264 QDict *snapshot_options = NULL; 3265 int snapshot_flags = 0; 3266 3267 assert(!child_class || !flags); 3268 assert(!child_class == !parent); 3269 3270 if (reference) { 3271 bool options_non_empty = options ? qdict_size(options) : false; 3272 qobject_unref(options); 3273 3274 if (filename || options_non_empty) { 3275 error_setg(errp, "Cannot reference an existing block device with " 3276 "additional options or a new filename"); 3277 return NULL; 3278 } 3279 3280 bs = bdrv_lookup_bs(reference, reference, errp); 3281 if (!bs) { 3282 return NULL; 3283 } 3284 3285 bdrv_ref(bs); 3286 return bs; 3287 } 3288 3289 bs = bdrv_new(); 3290 3291 /* NULL means an empty set of options */ 3292 if (options == NULL) { 3293 options = qdict_new(); 3294 } 3295 3296 /* json: syntax counts as explicit options, as if in the QDict */ 3297 parse_json_protocol(options, &filename, &local_err); 3298 if (local_err) { 3299 goto fail; 3300 } 3301 3302 bs->explicit_options = qdict_clone_shallow(options); 3303 3304 if (child_class) { 3305 bool parent_is_format; 3306 3307 if (parent->drv) { 3308 parent_is_format = parent->drv->is_format; 3309 } else { 3310 /* 3311 * parent->drv is not set yet because this node is opened for 3312 * (potential) format probing. That means that @parent is going 3313 * to be a format node. 3314 */ 3315 parent_is_format = true; 3316 } 3317 3318 bs->inherits_from = parent; 3319 child_class->inherit_options(child_role, parent_is_format, 3320 &flags, options, 3321 parent->open_flags, parent->options); 3322 } 3323 3324 ret = bdrv_fill_options(&options, filename, &flags, &local_err); 3325 if (ret < 0) { 3326 goto fail; 3327 } 3328 3329 /* 3330 * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags. 3331 * Caution: getting a boolean member of @options requires care. 3332 * When @options come from -blockdev or blockdev_add, members are 3333 * typed according to the QAPI schema, but when they come from 3334 * -drive, they're all QString. 3335 */ 3336 if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") && 3337 !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) { 3338 flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR); 3339 } else { 3340 flags &= ~BDRV_O_RDWR; 3341 } 3342 3343 if (flags & BDRV_O_SNAPSHOT) { 3344 snapshot_options = qdict_new(); 3345 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options, 3346 flags, options); 3347 /* Let bdrv_backing_options() override "read-only" */ 3348 qdict_del(options, BDRV_OPT_READ_ONLY); 3349 bdrv_inherited_options(BDRV_CHILD_COW, true, 3350 &flags, options, flags, options); 3351 } 3352 3353 bs->open_flags = flags; 3354 bs->options = options; 3355 options = qdict_clone_shallow(options); 3356 3357 /* Find the right image format driver */ 3358 /* See cautionary note on accessing @options above */ 3359 drvname = qdict_get_try_str(options, "driver"); 3360 if (drvname) { 3361 drv = bdrv_find_format(drvname); 3362 if (!drv) { 3363 error_setg(errp, "Unknown driver: '%s'", drvname); 3364 goto fail; 3365 } 3366 } 3367 3368 assert(drvname || !(flags & BDRV_O_PROTOCOL)); 3369 3370 /* See cautionary note on accessing @options above */ 3371 backing = qdict_get_try_str(options, "backing"); 3372 if (qobject_to(QNull, qdict_get(options, "backing")) != NULL || 3373 (backing && *backing == '\0')) 3374 { 3375 if (backing) { 3376 warn_report("Use of \"backing\": \"\" is deprecated; " 3377 "use \"backing\": null instead"); 3378 } 3379 flags |= BDRV_O_NO_BACKING; 3380 qdict_del(bs->explicit_options, "backing"); 3381 qdict_del(bs->options, "backing"); 3382 qdict_del(options, "backing"); 3383 } 3384 3385 /* Open image file without format layer. This BlockBackend is only used for 3386 * probing, the block drivers will do their own bdrv_open_child() for the 3387 * same BDS, which is why we put the node name back into options. */ 3388 if ((flags & BDRV_O_PROTOCOL) == 0) { 3389 BlockDriverState *file_bs; 3390 3391 file_bs = bdrv_open_child_bs(filename, options, "file", bs, 3392 &child_of_bds, BDRV_CHILD_IMAGE, 3393 true, &local_err); 3394 if (local_err) { 3395 goto fail; 3396 } 3397 if (file_bs != NULL) { 3398 /* Not requesting BLK_PERM_CONSISTENT_READ because we're only 3399 * looking at the header to guess the image format. This works even 3400 * in cases where a guest would not see a consistent state. */ 3401 file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL); 3402 blk_insert_bs(file, file_bs, &local_err); 3403 bdrv_unref(file_bs); 3404 if (local_err) { 3405 goto fail; 3406 } 3407 3408 qdict_put_str(options, "file", bdrv_get_node_name(file_bs)); 3409 } 3410 } 3411 3412 /* Image format probing */ 3413 bs->probed = !drv; 3414 if (!drv && file) { 3415 ret = find_image_format(file, filename, &drv, &local_err); 3416 if (ret < 0) { 3417 goto fail; 3418 } 3419 /* 3420 * This option update would logically belong in bdrv_fill_options(), 3421 * but we first need to open bs->file for the probing to work, while 3422 * opening bs->file already requires the (mostly) final set of options 3423 * so that cache mode etc. can be inherited. 3424 * 3425 * Adding the driver later is somewhat ugly, but it's not an option 3426 * that would ever be inherited, so it's correct. We just need to make 3427 * sure to update both bs->options (which has the full effective 3428 * options for bs) and options (which has file.* already removed). 3429 */ 3430 qdict_put_str(bs->options, "driver", drv->format_name); 3431 qdict_put_str(options, "driver", drv->format_name); 3432 } else if (!drv) { 3433 error_setg(errp, "Must specify either driver or file"); 3434 goto fail; 3435 } 3436 3437 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ 3438 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open); 3439 /* file must be NULL if a protocol BDS is about to be created 3440 * (the inverse results in an error message from bdrv_open_common()) */ 3441 assert(!(flags & BDRV_O_PROTOCOL) || !file); 3442 3443 /* Open the image */ 3444 ret = bdrv_open_common(bs, file, options, &local_err); 3445 if (ret < 0) { 3446 goto fail; 3447 } 3448 3449 if (file) { 3450 blk_unref(file); 3451 file = NULL; 3452 } 3453 3454 /* If there is a backing file, use it */ 3455 if ((flags & BDRV_O_NO_BACKING) == 0) { 3456 ret = bdrv_open_backing_file(bs, options, "backing", &local_err); 3457 if (ret < 0) { 3458 goto close_and_fail; 3459 } 3460 } 3461 3462 /* Remove all children options and references 3463 * from bs->options and bs->explicit_options */ 3464 QLIST_FOREACH(child, &bs->children, next) { 3465 char *child_key_dot; 3466 child_key_dot = g_strdup_printf("%s.", child->name); 3467 qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot); 3468 qdict_extract_subqdict(bs->options, NULL, child_key_dot); 3469 qdict_del(bs->explicit_options, child->name); 3470 qdict_del(bs->options, child->name); 3471 g_free(child_key_dot); 3472 } 3473 3474 /* Check if any unknown options were used */ 3475 if (qdict_size(options) != 0) { 3476 const QDictEntry *entry = qdict_first(options); 3477 if (flags & BDRV_O_PROTOCOL) { 3478 error_setg(errp, "Block protocol '%s' doesn't support the option " 3479 "'%s'", drv->format_name, entry->key); 3480 } else { 3481 error_setg(errp, 3482 "Block format '%s' does not support the option '%s'", 3483 drv->format_name, entry->key); 3484 } 3485 3486 goto close_and_fail; 3487 } 3488 3489 bdrv_parent_cb_change_media(bs, true); 3490 3491 qobject_unref(options); 3492 options = NULL; 3493 3494 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 3495 * temporary snapshot afterwards. */ 3496 if (snapshot_flags) { 3497 BlockDriverState *snapshot_bs; 3498 snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags, 3499 snapshot_options, &local_err); 3500 snapshot_options = NULL; 3501 if (local_err) { 3502 goto close_and_fail; 3503 } 3504 /* We are not going to return bs but the overlay on top of it 3505 * (snapshot_bs); thus, we have to drop the strong reference to bs 3506 * (which we obtained by calling bdrv_new()). bs will not be deleted, 3507 * though, because the overlay still has a reference to it. */ 3508 bdrv_unref(bs); 3509 bs = snapshot_bs; 3510 } 3511 3512 return bs; 3513 3514 fail: 3515 blk_unref(file); 3516 qobject_unref(snapshot_options); 3517 qobject_unref(bs->explicit_options); 3518 qobject_unref(bs->options); 3519 qobject_unref(options); 3520 bs->options = NULL; 3521 bs->explicit_options = NULL; 3522 bdrv_unref(bs); 3523 error_propagate(errp, local_err); 3524 return NULL; 3525 3526 close_and_fail: 3527 bdrv_unref(bs); 3528 qobject_unref(snapshot_options); 3529 qobject_unref(options); 3530 error_propagate(errp, local_err); 3531 return NULL; 3532 } 3533 3534 BlockDriverState *bdrv_open(const char *filename, const char *reference, 3535 QDict *options, int flags, Error **errp) 3536 { 3537 return bdrv_open_inherit(filename, reference, options, flags, NULL, 3538 NULL, 0, errp); 3539 } 3540 3541 /* Return true if the NULL-terminated @list contains @str */ 3542 static bool is_str_in_list(const char *str, const char *const *list) 3543 { 3544 if (str && list) { 3545 int i; 3546 for (i = 0; list[i] != NULL; i++) { 3547 if (!strcmp(str, list[i])) { 3548 return true; 3549 } 3550 } 3551 } 3552 return false; 3553 } 3554 3555 /* 3556 * Check that every option set in @bs->options is also set in 3557 * @new_opts. 3558 * 3559 * Options listed in the common_options list and in 3560 * @bs->drv->mutable_opts are skipped. 3561 * 3562 * Return 0 on success, otherwise return -EINVAL and set @errp. 3563 */ 3564 static int bdrv_reset_options_allowed(BlockDriverState *bs, 3565 const QDict *new_opts, Error **errp) 3566 { 3567 const QDictEntry *e; 3568 /* These options are common to all block drivers and are handled 3569 * in bdrv_reopen_prepare() so they can be left out of @new_opts */ 3570 const char *const common_options[] = { 3571 "node-name", "discard", "cache.direct", "cache.no-flush", 3572 "read-only", "auto-read-only", "detect-zeroes", NULL 3573 }; 3574 3575 for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) { 3576 if (!qdict_haskey(new_opts, e->key) && 3577 !is_str_in_list(e->key, common_options) && 3578 !is_str_in_list(e->key, bs->drv->mutable_opts)) { 3579 error_setg(errp, "Option '%s' cannot be reset " 3580 "to its default value", e->key); 3581 return -EINVAL; 3582 } 3583 } 3584 3585 return 0; 3586 } 3587 3588 /* 3589 * Returns true if @child can be reached recursively from @bs 3590 */ 3591 static bool bdrv_recurse_has_child(BlockDriverState *bs, 3592 BlockDriverState *child) 3593 { 3594 BdrvChild *c; 3595 3596 if (bs == child) { 3597 return true; 3598 } 3599 3600 QLIST_FOREACH(c, &bs->children, next) { 3601 if (bdrv_recurse_has_child(c->bs, child)) { 3602 return true; 3603 } 3604 } 3605 3606 return false; 3607 } 3608 3609 /* 3610 * Adds a BlockDriverState to a simple queue for an atomic, transactional 3611 * reopen of multiple devices. 3612 * 3613 * bs_queue can either be an existing BlockReopenQueue that has had QTAILQ_INIT 3614 * already performed, or alternatively may be NULL a new BlockReopenQueue will 3615 * be created and initialized. This newly created BlockReopenQueue should be 3616 * passed back in for subsequent calls that are intended to be of the same 3617 * atomic 'set'. 3618 * 3619 * bs is the BlockDriverState to add to the reopen queue. 3620 * 3621 * options contains the changed options for the associated bs 3622 * (the BlockReopenQueue takes ownership) 3623 * 3624 * flags contains the open flags for the associated bs 3625 * 3626 * returns a pointer to bs_queue, which is either the newly allocated 3627 * bs_queue, or the existing bs_queue being used. 3628 * 3629 * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). 3630 */ 3631 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, 3632 BlockDriverState *bs, 3633 QDict *options, 3634 const BdrvChildClass *klass, 3635 BdrvChildRole role, 3636 bool parent_is_format, 3637 QDict *parent_options, 3638 int parent_flags, 3639 bool keep_old_opts) 3640 { 3641 assert(bs != NULL); 3642 3643 BlockReopenQueueEntry *bs_entry; 3644 BdrvChild *child; 3645 QDict *old_options, *explicit_options, *options_copy; 3646 int flags; 3647 QemuOpts *opts; 3648 3649 /* Make sure that the caller remembered to use a drained section. This is 3650 * important to avoid graph changes between the recursive queuing here and 3651 * bdrv_reopen_multiple(). */ 3652 assert(bs->quiesce_counter > 0); 3653 3654 if (bs_queue == NULL) { 3655 bs_queue = g_new0(BlockReopenQueue, 1); 3656 QTAILQ_INIT(bs_queue); 3657 } 3658 3659 if (!options) { 3660 options = qdict_new(); 3661 } 3662 3663 /* Check if this BlockDriverState is already in the queue */ 3664 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 3665 if (bs == bs_entry->state.bs) { 3666 break; 3667 } 3668 } 3669 3670 /* 3671 * Precedence of options: 3672 * 1. Explicitly passed in options (highest) 3673 * 2. Retained from explicitly set options of bs 3674 * 3. Inherited from parent node 3675 * 4. Retained from effective options of bs 3676 */ 3677 3678 /* Old explicitly set values (don't overwrite by inherited value) */ 3679 if (bs_entry || keep_old_opts) { 3680 old_options = qdict_clone_shallow(bs_entry ? 3681 bs_entry->state.explicit_options : 3682 bs->explicit_options); 3683 bdrv_join_options(bs, options, old_options); 3684 qobject_unref(old_options); 3685 } 3686 3687 explicit_options = qdict_clone_shallow(options); 3688 3689 /* Inherit from parent node */ 3690 if (parent_options) { 3691 flags = 0; 3692 klass->inherit_options(role, parent_is_format, &flags, options, 3693 parent_flags, parent_options); 3694 } else { 3695 flags = bdrv_get_flags(bs); 3696 } 3697 3698 if (keep_old_opts) { 3699 /* Old values are used for options that aren't set yet */ 3700 old_options = qdict_clone_shallow(bs->options); 3701 bdrv_join_options(bs, options, old_options); 3702 qobject_unref(old_options); 3703 } 3704 3705 /* We have the final set of options so let's update the flags */ 3706 options_copy = qdict_clone_shallow(options); 3707 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 3708 qemu_opts_absorb_qdict(opts, options_copy, NULL); 3709 update_flags_from_options(&flags, opts); 3710 qemu_opts_del(opts); 3711 qobject_unref(options_copy); 3712 3713 /* bdrv_open_inherit() sets and clears some additional flags internally */ 3714 flags &= ~BDRV_O_PROTOCOL; 3715 if (flags & BDRV_O_RDWR) { 3716 flags |= BDRV_O_ALLOW_RDWR; 3717 } 3718 3719 if (!bs_entry) { 3720 bs_entry = g_new0(BlockReopenQueueEntry, 1); 3721 QTAILQ_INSERT_TAIL(bs_queue, bs_entry, entry); 3722 } else { 3723 qobject_unref(bs_entry->state.options); 3724 qobject_unref(bs_entry->state.explicit_options); 3725 } 3726 3727 bs_entry->state.bs = bs; 3728 bs_entry->state.options = options; 3729 bs_entry->state.explicit_options = explicit_options; 3730 bs_entry->state.flags = flags; 3731 3732 /* This needs to be overwritten in bdrv_reopen_prepare() */ 3733 bs_entry->state.perm = UINT64_MAX; 3734 bs_entry->state.shared_perm = 0; 3735 3736 /* 3737 * If keep_old_opts is false then it means that unspecified 3738 * options must be reset to their original value. We don't allow 3739 * resetting 'backing' but we need to know if the option is 3740 * missing in order to decide if we have to return an error. 3741 */ 3742 if (!keep_old_opts) { 3743 bs_entry->state.backing_missing = 3744 !qdict_haskey(options, "backing") && 3745 !qdict_haskey(options, "backing.driver"); 3746 } 3747 3748 QLIST_FOREACH(child, &bs->children, next) { 3749 QDict *new_child_options = NULL; 3750 bool child_keep_old = keep_old_opts; 3751 3752 /* reopen can only change the options of block devices that were 3753 * implicitly created and inherited options. For other (referenced) 3754 * block devices, a syntax like "backing.foo" results in an error. */ 3755 if (child->bs->inherits_from != bs) { 3756 continue; 3757 } 3758 3759 /* Check if the options contain a child reference */ 3760 if (qdict_haskey(options, child->name)) { 3761 const char *childref = qdict_get_try_str(options, child->name); 3762 /* 3763 * The current child must not be reopened if the child 3764 * reference is null or points to a different node. 3765 */ 3766 if (g_strcmp0(childref, child->bs->node_name)) { 3767 continue; 3768 } 3769 /* 3770 * If the child reference points to the current child then 3771 * reopen it with its existing set of options (note that 3772 * it can still inherit new options from the parent). 3773 */ 3774 child_keep_old = true; 3775 } else { 3776 /* Extract child options ("child-name.*") */ 3777 char *child_key_dot = g_strdup_printf("%s.", child->name); 3778 qdict_extract_subqdict(explicit_options, NULL, child_key_dot); 3779 qdict_extract_subqdict(options, &new_child_options, child_key_dot); 3780 g_free(child_key_dot); 3781 } 3782 3783 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 3784 child->klass, child->role, bs->drv->is_format, 3785 options, flags, child_keep_old); 3786 } 3787 3788 return bs_queue; 3789 } 3790 3791 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 3792 BlockDriverState *bs, 3793 QDict *options, bool keep_old_opts) 3794 { 3795 return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false, 3796 NULL, 0, keep_old_opts); 3797 } 3798 3799 /* 3800 * Reopen multiple BlockDriverStates atomically & transactionally. 3801 * 3802 * The queue passed in (bs_queue) must have been built up previous 3803 * via bdrv_reopen_queue(). 3804 * 3805 * Reopens all BDS specified in the queue, with the appropriate 3806 * flags. All devices are prepared for reopen, and failure of any 3807 * device will cause all device changes to be abandoned, and intermediate 3808 * data cleaned up. 3809 * 3810 * If all devices prepare successfully, then the changes are committed 3811 * to all devices. 3812 * 3813 * All affected nodes must be drained between bdrv_reopen_queue() and 3814 * bdrv_reopen_multiple(). 3815 */ 3816 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 3817 { 3818 int ret = -1; 3819 BlockReopenQueueEntry *bs_entry, *next; 3820 3821 assert(bs_queue != NULL); 3822 3823 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 3824 assert(bs_entry->state.bs->quiesce_counter > 0); 3825 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, errp)) { 3826 goto cleanup; 3827 } 3828 bs_entry->prepared = true; 3829 } 3830 3831 QTAILQ_FOREACH(bs_entry, bs_queue, entry) { 3832 BDRVReopenState *state = &bs_entry->state; 3833 ret = bdrv_check_perm(state->bs, bs_queue, state->perm, 3834 state->shared_perm, NULL, errp); 3835 if (ret < 0) { 3836 goto cleanup_perm; 3837 } 3838 /* Check if new_backing_bs would accept the new permissions */ 3839 if (state->replace_backing_bs && state->new_backing_bs) { 3840 uint64_t nperm, nshared; 3841 bdrv_child_perm(state->bs, state->new_backing_bs, 3842 NULL, bdrv_backing_role(state->bs), 3843 bs_queue, state->perm, state->shared_perm, 3844 &nperm, &nshared); 3845 ret = bdrv_check_update_perm(state->new_backing_bs, NULL, 3846 nperm, nshared, NULL, errp); 3847 if (ret < 0) { 3848 goto cleanup_perm; 3849 } 3850 } 3851 bs_entry->perms_checked = true; 3852 } 3853 3854 /* 3855 * If we reach this point, we have success and just need to apply the 3856 * changes. 3857 * 3858 * Reverse order is used to comfort qcow2 driver: on commit it need to write 3859 * IN_USE flag to the image, to mark bitmaps in the image as invalid. But 3860 * children are usually goes after parents in reopen-queue, so go from last 3861 * to first element. 3862 */ 3863 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { 3864 bdrv_reopen_commit(&bs_entry->state); 3865 } 3866 3867 ret = 0; 3868 cleanup_perm: 3869 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 3870 BDRVReopenState *state = &bs_entry->state; 3871 3872 if (!bs_entry->perms_checked) { 3873 continue; 3874 } 3875 3876 if (ret == 0) { 3877 uint64_t perm, shared; 3878 3879 bdrv_get_cumulative_perm(state->bs, &perm, &shared); 3880 assert(perm == state->perm); 3881 assert(shared == state->shared_perm); 3882 3883 bdrv_set_perm(state->bs); 3884 } else { 3885 bdrv_abort_perm_update(state->bs); 3886 if (state->replace_backing_bs && state->new_backing_bs) { 3887 bdrv_abort_perm_update(state->new_backing_bs); 3888 } 3889 } 3890 } 3891 3892 if (ret == 0) { 3893 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { 3894 BlockDriverState *bs = bs_entry->state.bs; 3895 3896 if (bs->drv->bdrv_reopen_commit_post) 3897 bs->drv->bdrv_reopen_commit_post(&bs_entry->state); 3898 } 3899 } 3900 cleanup: 3901 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 3902 if (ret) { 3903 if (bs_entry->prepared) { 3904 bdrv_reopen_abort(&bs_entry->state); 3905 } 3906 qobject_unref(bs_entry->state.explicit_options); 3907 qobject_unref(bs_entry->state.options); 3908 } 3909 if (bs_entry->state.new_backing_bs) { 3910 bdrv_unref(bs_entry->state.new_backing_bs); 3911 } 3912 g_free(bs_entry); 3913 } 3914 g_free(bs_queue); 3915 3916 return ret; 3917 } 3918 3919 int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, 3920 Error **errp) 3921 { 3922 int ret; 3923 BlockReopenQueue *queue; 3924 QDict *opts = qdict_new(); 3925 3926 qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only); 3927 3928 bdrv_subtree_drained_begin(bs); 3929 queue = bdrv_reopen_queue(NULL, bs, opts, true); 3930 ret = bdrv_reopen_multiple(queue, errp); 3931 bdrv_subtree_drained_end(bs); 3932 3933 return ret; 3934 } 3935 3936 static BlockReopenQueueEntry *find_parent_in_reopen_queue(BlockReopenQueue *q, 3937 BdrvChild *c) 3938 { 3939 BlockReopenQueueEntry *entry; 3940 3941 QTAILQ_FOREACH(entry, q, entry) { 3942 BlockDriverState *bs = entry->state.bs; 3943 BdrvChild *child; 3944 3945 QLIST_FOREACH(child, &bs->children, next) { 3946 if (child == c) { 3947 return entry; 3948 } 3949 } 3950 } 3951 3952 return NULL; 3953 } 3954 3955 static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs, 3956 uint64_t *perm, uint64_t *shared) 3957 { 3958 BdrvChild *c; 3959 BlockReopenQueueEntry *parent; 3960 uint64_t cumulative_perms = 0; 3961 uint64_t cumulative_shared_perms = BLK_PERM_ALL; 3962 3963 QLIST_FOREACH(c, &bs->parents, next_parent) { 3964 parent = find_parent_in_reopen_queue(q, c); 3965 if (!parent) { 3966 cumulative_perms |= c->perm; 3967 cumulative_shared_perms &= c->shared_perm; 3968 } else { 3969 uint64_t nperm, nshared; 3970 3971 bdrv_child_perm(parent->state.bs, bs, c, c->role, q, 3972 parent->state.perm, parent->state.shared_perm, 3973 &nperm, &nshared); 3974 3975 cumulative_perms |= nperm; 3976 cumulative_shared_perms &= nshared; 3977 } 3978 } 3979 *perm = cumulative_perms; 3980 *shared = cumulative_shared_perms; 3981 } 3982 3983 static bool bdrv_reopen_can_attach(BlockDriverState *parent, 3984 BdrvChild *child, 3985 BlockDriverState *new_child, 3986 Error **errp) 3987 { 3988 AioContext *parent_ctx = bdrv_get_aio_context(parent); 3989 AioContext *child_ctx = bdrv_get_aio_context(new_child); 3990 GSList *ignore; 3991 bool ret; 3992 3993 ignore = g_slist_prepend(NULL, child); 3994 ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL); 3995 g_slist_free(ignore); 3996 if (ret) { 3997 return ret; 3998 } 3999 4000 ignore = g_slist_prepend(NULL, child); 4001 ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp); 4002 g_slist_free(ignore); 4003 return ret; 4004 } 4005 4006 /* 4007 * Take a BDRVReopenState and check if the value of 'backing' in the 4008 * reopen_state->options QDict is valid or not. 4009 * 4010 * If 'backing' is missing from the QDict then return 0. 4011 * 4012 * If 'backing' contains the node name of the backing file of 4013 * reopen_state->bs then return 0. 4014 * 4015 * If 'backing' contains a different node name (or is null) then check 4016 * whether the current backing file can be replaced with the new one. 4017 * If that's the case then reopen_state->replace_backing_bs is set to 4018 * true and reopen_state->new_backing_bs contains a pointer to the new 4019 * backing BlockDriverState (or NULL). 4020 * 4021 * Return 0 on success, otherwise return < 0 and set @errp. 4022 */ 4023 static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state, 4024 Error **errp) 4025 { 4026 BlockDriverState *bs = reopen_state->bs; 4027 BlockDriverState *overlay_bs, *below_bs, *new_backing_bs; 4028 QObject *value; 4029 const char *str; 4030 4031 value = qdict_get(reopen_state->options, "backing"); 4032 if (value == NULL) { 4033 return 0; 4034 } 4035 4036 switch (qobject_type(value)) { 4037 case QTYPE_QNULL: 4038 new_backing_bs = NULL; 4039 break; 4040 case QTYPE_QSTRING: 4041 str = qstring_get_str(qobject_to(QString, value)); 4042 new_backing_bs = bdrv_lookup_bs(NULL, str, errp); 4043 if (new_backing_bs == NULL) { 4044 return -EINVAL; 4045 } else if (bdrv_recurse_has_child(new_backing_bs, bs)) { 4046 error_setg(errp, "Making '%s' a backing file of '%s' " 4047 "would create a cycle", str, bs->node_name); 4048 return -EINVAL; 4049 } 4050 break; 4051 default: 4052 /* 'backing' does not allow any other data type */ 4053 g_assert_not_reached(); 4054 } 4055 4056 /* 4057 * Check AioContext compatibility so that the bdrv_set_backing_hd() call in 4058 * bdrv_reopen_commit() won't fail. 4059 */ 4060 if (new_backing_bs) { 4061 if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) { 4062 return -EINVAL; 4063 } 4064 } 4065 4066 /* 4067 * Ensure that @bs can really handle backing files, because we are 4068 * about to give it one (or swap the existing one) 4069 */ 4070 if (bs->drv->is_filter) { 4071 /* Filters always have a file or a backing child */ 4072 if (!bs->backing) { 4073 error_setg(errp, "'%s' is a %s filter node that does not support a " 4074 "backing child", bs->node_name, bs->drv->format_name); 4075 return -EINVAL; 4076 } 4077 } else if (!bs->drv->supports_backing) { 4078 error_setg(errp, "Driver '%s' of node '%s' does not support backing " 4079 "files", bs->drv->format_name, bs->node_name); 4080 return -EINVAL; 4081 } 4082 4083 /* 4084 * Find the "actual" backing file by skipping all links that point 4085 * to an implicit node, if any (e.g. a commit filter node). 4086 * We cannot use any of the bdrv_skip_*() functions here because 4087 * those return the first explicit node, while we are looking for 4088 * its overlay here. 4089 */ 4090 overlay_bs = bs; 4091 for (below_bs = bdrv_filter_or_cow_bs(overlay_bs); 4092 below_bs && below_bs->implicit; 4093 below_bs = bdrv_filter_or_cow_bs(overlay_bs)) 4094 { 4095 overlay_bs = below_bs; 4096 } 4097 4098 /* If we want to replace the backing file we need some extra checks */ 4099 if (new_backing_bs != bdrv_filter_or_cow_bs(overlay_bs)) { 4100 /* Check for implicit nodes between bs and its backing file */ 4101 if (bs != overlay_bs) { 4102 error_setg(errp, "Cannot change backing link if '%s' has " 4103 "an implicit backing file", bs->node_name); 4104 return -EPERM; 4105 } 4106 /* 4107 * Check if the backing link that we want to replace is frozen. 4108 * Note that 4109 * bdrv_filter_or_cow_child(overlay_bs) == overlay_bs->backing, 4110 * because we know that overlay_bs == bs, and that @bs 4111 * either is a filter that uses ->backing or a COW format BDS 4112 * with bs->drv->supports_backing == true. 4113 */ 4114 if (bdrv_is_backing_chain_frozen(overlay_bs, 4115 child_bs(overlay_bs->backing), errp)) 4116 { 4117 return -EPERM; 4118 } 4119 reopen_state->replace_backing_bs = true; 4120 if (new_backing_bs) { 4121 bdrv_ref(new_backing_bs); 4122 reopen_state->new_backing_bs = new_backing_bs; 4123 } 4124 } 4125 4126 return 0; 4127 } 4128 4129 /* 4130 * Prepares a BlockDriverState for reopen. All changes are staged in the 4131 * 'opaque' field of the BDRVReopenState, which is used and allocated by 4132 * the block driver layer .bdrv_reopen_prepare() 4133 * 4134 * bs is the BlockDriverState to reopen 4135 * flags are the new open flags 4136 * queue is the reopen queue 4137 * 4138 * Returns 0 on success, non-zero on error. On error errp will be set 4139 * as well. 4140 * 4141 * On failure, bdrv_reopen_abort() will be called to clean up any data. 4142 * It is the responsibility of the caller to then call the abort() or 4143 * commit() for any other BDS that have been left in a prepare() state 4144 * 4145 */ 4146 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, 4147 Error **errp) 4148 { 4149 int ret = -1; 4150 int old_flags; 4151 Error *local_err = NULL; 4152 BlockDriver *drv; 4153 QemuOpts *opts; 4154 QDict *orig_reopen_opts; 4155 char *discard = NULL; 4156 bool read_only; 4157 bool drv_prepared = false; 4158 4159 assert(reopen_state != NULL); 4160 assert(reopen_state->bs->drv != NULL); 4161 drv = reopen_state->bs->drv; 4162 4163 /* This function and each driver's bdrv_reopen_prepare() remove 4164 * entries from reopen_state->options as they are processed, so 4165 * we need to make a copy of the original QDict. */ 4166 orig_reopen_opts = qdict_clone_shallow(reopen_state->options); 4167 4168 /* Process generic block layer options */ 4169 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 4170 if (!qemu_opts_absorb_qdict(opts, reopen_state->options, errp)) { 4171 ret = -EINVAL; 4172 goto error; 4173 } 4174 4175 /* This was already called in bdrv_reopen_queue_child() so the flags 4176 * are up-to-date. This time we simply want to remove the options from 4177 * QemuOpts in order to indicate that they have been processed. */ 4178 old_flags = reopen_state->flags; 4179 update_flags_from_options(&reopen_state->flags, opts); 4180 assert(old_flags == reopen_state->flags); 4181 4182 discard = qemu_opt_get_del(opts, BDRV_OPT_DISCARD); 4183 if (discard != NULL) { 4184 if (bdrv_parse_discard_flags(discard, &reopen_state->flags) != 0) { 4185 error_setg(errp, "Invalid discard option"); 4186 ret = -EINVAL; 4187 goto error; 4188 } 4189 } 4190 4191 reopen_state->detect_zeroes = 4192 bdrv_parse_detect_zeroes(opts, reopen_state->flags, &local_err); 4193 if (local_err) { 4194 error_propagate(errp, local_err); 4195 ret = -EINVAL; 4196 goto error; 4197 } 4198 4199 /* All other options (including node-name and driver) must be unchanged. 4200 * Put them back into the QDict, so that they are checked at the end 4201 * of this function. */ 4202 qemu_opts_to_qdict(opts, reopen_state->options); 4203 4204 /* If we are to stay read-only, do not allow permission change 4205 * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is 4206 * not set, or if the BDS still has copy_on_read enabled */ 4207 read_only = !(reopen_state->flags & BDRV_O_RDWR); 4208 ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err); 4209 if (local_err) { 4210 error_propagate(errp, local_err); 4211 goto error; 4212 } 4213 4214 /* Calculate required permissions after reopening */ 4215 bdrv_reopen_perm(queue, reopen_state->bs, 4216 &reopen_state->perm, &reopen_state->shared_perm); 4217 4218 ret = bdrv_flush(reopen_state->bs); 4219 if (ret) { 4220 error_setg_errno(errp, -ret, "Error flushing drive"); 4221 goto error; 4222 } 4223 4224 if (drv->bdrv_reopen_prepare) { 4225 /* 4226 * If a driver-specific option is missing, it means that we 4227 * should reset it to its default value. 4228 * But not all options allow that, so we need to check it first. 4229 */ 4230 ret = bdrv_reset_options_allowed(reopen_state->bs, 4231 reopen_state->options, errp); 4232 if (ret) { 4233 goto error; 4234 } 4235 4236 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 4237 if (ret) { 4238 if (local_err != NULL) { 4239 error_propagate(errp, local_err); 4240 } else { 4241 bdrv_refresh_filename(reopen_state->bs); 4242 error_setg(errp, "failed while preparing to reopen image '%s'", 4243 reopen_state->bs->filename); 4244 } 4245 goto error; 4246 } 4247 } else { 4248 /* It is currently mandatory to have a bdrv_reopen_prepare() 4249 * handler for each supported drv. */ 4250 error_setg(errp, "Block format '%s' used by node '%s' " 4251 "does not support reopening files", drv->format_name, 4252 bdrv_get_device_or_node_name(reopen_state->bs)); 4253 ret = -1; 4254 goto error; 4255 } 4256 4257 drv_prepared = true; 4258 4259 /* 4260 * We must provide the 'backing' option if the BDS has a backing 4261 * file or if the image file has a backing file name as part of 4262 * its metadata. Otherwise the 'backing' option can be omitted. 4263 */ 4264 if (drv->supports_backing && reopen_state->backing_missing && 4265 (reopen_state->bs->backing || reopen_state->bs->backing_file[0])) { 4266 error_setg(errp, "backing is missing for '%s'", 4267 reopen_state->bs->node_name); 4268 ret = -EINVAL; 4269 goto error; 4270 } 4271 4272 /* 4273 * Allow changing the 'backing' option. The new value can be 4274 * either a reference to an existing node (using its node name) 4275 * or NULL to simply detach the current backing file. 4276 */ 4277 ret = bdrv_reopen_parse_backing(reopen_state, errp); 4278 if (ret < 0) { 4279 goto error; 4280 } 4281 qdict_del(reopen_state->options, "backing"); 4282 4283 /* Options that are not handled are only okay if they are unchanged 4284 * compared to the old state. It is expected that some options are only 4285 * used for the initial open, but not reopen (e.g. filename) */ 4286 if (qdict_size(reopen_state->options)) { 4287 const QDictEntry *entry = qdict_first(reopen_state->options); 4288 4289 do { 4290 QObject *new = entry->value; 4291 QObject *old = qdict_get(reopen_state->bs->options, entry->key); 4292 4293 /* Allow child references (child_name=node_name) as long as they 4294 * point to the current child (i.e. everything stays the same). */ 4295 if (qobject_type(new) == QTYPE_QSTRING) { 4296 BdrvChild *child; 4297 QLIST_FOREACH(child, &reopen_state->bs->children, next) { 4298 if (!strcmp(child->name, entry->key)) { 4299 break; 4300 } 4301 } 4302 4303 if (child) { 4304 if (!strcmp(child->bs->node_name, 4305 qstring_get_str(qobject_to(QString, new)))) { 4306 continue; /* Found child with this name, skip option */ 4307 } 4308 } 4309 } 4310 4311 /* 4312 * TODO: When using -drive to specify blockdev options, all values 4313 * will be strings; however, when using -blockdev, blockdev-add or 4314 * filenames using the json:{} pseudo-protocol, they will be 4315 * correctly typed. 4316 * In contrast, reopening options are (currently) always strings 4317 * (because you can only specify them through qemu-io; all other 4318 * callers do not specify any options). 4319 * Therefore, when using anything other than -drive to create a BDS, 4320 * this cannot detect non-string options as unchanged, because 4321 * qobject_is_equal() always returns false for objects of different 4322 * type. In the future, this should be remedied by correctly typing 4323 * all options. For now, this is not too big of an issue because 4324 * the user can simply omit options which cannot be changed anyway, 4325 * so they will stay unchanged. 4326 */ 4327 if (!qobject_is_equal(new, old)) { 4328 error_setg(errp, "Cannot change the option '%s'", entry->key); 4329 ret = -EINVAL; 4330 goto error; 4331 } 4332 } while ((entry = qdict_next(reopen_state->options, entry))); 4333 } 4334 4335 ret = 0; 4336 4337 /* Restore the original reopen_state->options QDict */ 4338 qobject_unref(reopen_state->options); 4339 reopen_state->options = qobject_ref(orig_reopen_opts); 4340 4341 error: 4342 if (ret < 0 && drv_prepared) { 4343 /* drv->bdrv_reopen_prepare() has succeeded, so we need to 4344 * call drv->bdrv_reopen_abort() before signaling an error 4345 * (bdrv_reopen_multiple() will not call bdrv_reopen_abort() 4346 * when the respective bdrv_reopen_prepare() has failed) */ 4347 if (drv->bdrv_reopen_abort) { 4348 drv->bdrv_reopen_abort(reopen_state); 4349 } 4350 } 4351 qemu_opts_del(opts); 4352 qobject_unref(orig_reopen_opts); 4353 g_free(discard); 4354 return ret; 4355 } 4356 4357 /* 4358 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 4359 * makes them final by swapping the staging BlockDriverState contents into 4360 * the active BlockDriverState contents. 4361 */ 4362 void bdrv_reopen_commit(BDRVReopenState *reopen_state) 4363 { 4364 BlockDriver *drv; 4365 BlockDriverState *bs; 4366 BdrvChild *child; 4367 4368 assert(reopen_state != NULL); 4369 bs = reopen_state->bs; 4370 drv = bs->drv; 4371 assert(drv != NULL); 4372 4373 /* If there are any driver level actions to take */ 4374 if (drv->bdrv_reopen_commit) { 4375 drv->bdrv_reopen_commit(reopen_state); 4376 } 4377 4378 /* set BDS specific flags now */ 4379 qobject_unref(bs->explicit_options); 4380 qobject_unref(bs->options); 4381 4382 bs->explicit_options = reopen_state->explicit_options; 4383 bs->options = reopen_state->options; 4384 bs->open_flags = reopen_state->flags; 4385 bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); 4386 bs->detect_zeroes = reopen_state->detect_zeroes; 4387 4388 if (reopen_state->replace_backing_bs) { 4389 qdict_del(bs->explicit_options, "backing"); 4390 qdict_del(bs->options, "backing"); 4391 } 4392 4393 /* Remove child references from bs->options and bs->explicit_options. 4394 * Child options were already removed in bdrv_reopen_queue_child() */ 4395 QLIST_FOREACH(child, &bs->children, next) { 4396 qdict_del(bs->explicit_options, child->name); 4397 qdict_del(bs->options, child->name); 4398 } 4399 4400 /* 4401 * Change the backing file if a new one was specified. We do this 4402 * after updating bs->options, so bdrv_refresh_filename() (called 4403 * from bdrv_set_backing_hd()) has the new values. 4404 */ 4405 if (reopen_state->replace_backing_bs) { 4406 BlockDriverState *old_backing_bs = child_bs(bs->backing); 4407 assert(!old_backing_bs || !old_backing_bs->implicit); 4408 /* Abort the permission update on the backing bs we're detaching */ 4409 if (old_backing_bs) { 4410 bdrv_abort_perm_update(old_backing_bs); 4411 } 4412 bdrv_set_backing_hd(bs, reopen_state->new_backing_bs, &error_abort); 4413 } 4414 4415 bdrv_refresh_limits(bs, NULL); 4416 } 4417 4418 /* 4419 * Abort the reopen, and delete and free the staged changes in 4420 * reopen_state 4421 */ 4422 void bdrv_reopen_abort(BDRVReopenState *reopen_state) 4423 { 4424 BlockDriver *drv; 4425 4426 assert(reopen_state != NULL); 4427 drv = reopen_state->bs->drv; 4428 assert(drv != NULL); 4429 4430 if (drv->bdrv_reopen_abort) { 4431 drv->bdrv_reopen_abort(reopen_state); 4432 } 4433 } 4434 4435 4436 static void bdrv_close(BlockDriverState *bs) 4437 { 4438 BdrvAioNotifier *ban, *ban_next; 4439 BdrvChild *child, *next; 4440 4441 assert(!bs->refcnt); 4442 4443 bdrv_drained_begin(bs); /* complete I/O */ 4444 bdrv_flush(bs); 4445 bdrv_drain(bs); /* in case flush left pending I/O */ 4446 4447 if (bs->drv) { 4448 if (bs->drv->bdrv_close) { 4449 /* Must unfreeze all children, so bdrv_unref_child() works */ 4450 bs->drv->bdrv_close(bs); 4451 } 4452 bs->drv = NULL; 4453 } 4454 4455 QLIST_FOREACH_SAFE(child, &bs->children, next, next) { 4456 bdrv_unref_child(bs, child); 4457 } 4458 4459 bs->backing = NULL; 4460 bs->file = NULL; 4461 g_free(bs->opaque); 4462 bs->opaque = NULL; 4463 qatomic_set(&bs->copy_on_read, 0); 4464 bs->backing_file[0] = '\0'; 4465 bs->backing_format[0] = '\0'; 4466 bs->total_sectors = 0; 4467 bs->encrypted = false; 4468 bs->sg = false; 4469 qobject_unref(bs->options); 4470 qobject_unref(bs->explicit_options); 4471 bs->options = NULL; 4472 bs->explicit_options = NULL; 4473 qobject_unref(bs->full_open_options); 4474 bs->full_open_options = NULL; 4475 4476 bdrv_release_named_dirty_bitmaps(bs); 4477 assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 4478 4479 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 4480 g_free(ban); 4481 } 4482 QLIST_INIT(&bs->aio_notifiers); 4483 bdrv_drained_end(bs); 4484 4485 /* 4486 * If we're still inside some bdrv_drain_all_begin()/end() sections, end 4487 * them now since this BDS won't exist anymore when bdrv_drain_all_end() 4488 * gets called. 4489 */ 4490 if (bs->quiesce_counter) { 4491 bdrv_drain_all_end_quiesce(bs); 4492 } 4493 } 4494 4495 void bdrv_close_all(void) 4496 { 4497 assert(job_next(NULL) == NULL); 4498 4499 /* Drop references from requests still in flight, such as canceled block 4500 * jobs whose AIO context has not been polled yet */ 4501 bdrv_drain_all(); 4502 4503 blk_remove_all_bs(); 4504 blockdev_close_all_bdrv_states(); 4505 4506 assert(QTAILQ_EMPTY(&all_bdrv_states)); 4507 } 4508 4509 static bool should_update_child(BdrvChild *c, BlockDriverState *to) 4510 { 4511 GQueue *queue; 4512 GHashTable *found; 4513 bool ret; 4514 4515 if (c->klass->stay_at_node) { 4516 return false; 4517 } 4518 4519 /* If the child @c belongs to the BDS @to, replacing the current 4520 * c->bs by @to would mean to create a loop. 4521 * 4522 * Such a case occurs when appending a BDS to a backing chain. 4523 * For instance, imagine the following chain: 4524 * 4525 * guest device -> node A -> further backing chain... 4526 * 4527 * Now we create a new BDS B which we want to put on top of this 4528 * chain, so we first attach A as its backing node: 4529 * 4530 * node B 4531 * | 4532 * v 4533 * guest device -> node A -> further backing chain... 4534 * 4535 * Finally we want to replace A by B. When doing that, we want to 4536 * replace all pointers to A by pointers to B -- except for the 4537 * pointer from B because (1) that would create a loop, and (2) 4538 * that pointer should simply stay intact: 4539 * 4540 * guest device -> node B 4541 * | 4542 * v 4543 * node A -> further backing chain... 4544 * 4545 * In general, when replacing a node A (c->bs) by a node B (@to), 4546 * if A is a child of B, that means we cannot replace A by B there 4547 * because that would create a loop. Silently detaching A from B 4548 * is also not really an option. So overall just leaving A in 4549 * place there is the most sensible choice. 4550 * 4551 * We would also create a loop in any cases where @c is only 4552 * indirectly referenced by @to. Prevent this by returning false 4553 * if @c is found (by breadth-first search) anywhere in the whole 4554 * subtree of @to. 4555 */ 4556 4557 ret = true; 4558 found = g_hash_table_new(NULL, NULL); 4559 g_hash_table_add(found, to); 4560 queue = g_queue_new(); 4561 g_queue_push_tail(queue, to); 4562 4563 while (!g_queue_is_empty(queue)) { 4564 BlockDriverState *v = g_queue_pop_head(queue); 4565 BdrvChild *c2; 4566 4567 QLIST_FOREACH(c2, &v->children, next) { 4568 if (c2 == c) { 4569 ret = false; 4570 break; 4571 } 4572 4573 if (g_hash_table_contains(found, c2->bs)) { 4574 continue; 4575 } 4576 4577 g_queue_push_tail(queue, c2->bs); 4578 g_hash_table_add(found, c2->bs); 4579 } 4580 } 4581 4582 g_queue_free(queue); 4583 g_hash_table_destroy(found); 4584 4585 return ret; 4586 } 4587 4588 /* 4589 * With auto_skip=true bdrv_replace_node_common skips updating from parents 4590 * if it creates a parent-child relation loop or if parent is block-job. 4591 * 4592 * With auto_skip=false the error is returned if from has a parent which should 4593 * not be updated. 4594 */ 4595 static int bdrv_replace_node_common(BlockDriverState *from, 4596 BlockDriverState *to, 4597 bool auto_skip, Error **errp) 4598 { 4599 BdrvChild *c, *next; 4600 GSList *list = NULL, *p; 4601 uint64_t perm = 0, shared = BLK_PERM_ALL; 4602 int ret; 4603 4604 /* Make sure that @from doesn't go away until we have successfully attached 4605 * all of its parents to @to. */ 4606 bdrv_ref(from); 4607 4608 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 4609 assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); 4610 bdrv_drained_begin(from); 4611 4612 /* Put all parents into @list and calculate their cumulative permissions */ 4613 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { 4614 assert(c->bs == from); 4615 if (!should_update_child(c, to)) { 4616 if (auto_skip) { 4617 continue; 4618 } 4619 ret = -EINVAL; 4620 error_setg(errp, "Should not change '%s' link to '%s'", 4621 c->name, from->node_name); 4622 goto out; 4623 } 4624 if (c->frozen) { 4625 ret = -EPERM; 4626 error_setg(errp, "Cannot change '%s' link to '%s'", 4627 c->name, from->node_name); 4628 goto out; 4629 } 4630 list = g_slist_prepend(list, c); 4631 perm |= c->perm; 4632 shared &= c->shared_perm; 4633 } 4634 4635 /* Check whether the required permissions can be granted on @to, ignoring 4636 * all BdrvChild in @list so that they can't block themselves. */ 4637 ret = bdrv_check_update_perm(to, NULL, perm, shared, list, errp); 4638 if (ret < 0) { 4639 bdrv_abort_perm_update(to); 4640 goto out; 4641 } 4642 4643 /* Now actually perform the change. We performed the permission check for 4644 * all elements of @list at once, so set the permissions all at once at the 4645 * very end. */ 4646 for (p = list; p != NULL; p = p->next) { 4647 c = p->data; 4648 4649 bdrv_ref(to); 4650 bdrv_replace_child_noperm(c, to); 4651 bdrv_unref(from); 4652 } 4653 4654 bdrv_set_perm(to); 4655 4656 ret = 0; 4657 4658 out: 4659 g_slist_free(list); 4660 bdrv_drained_end(from); 4661 bdrv_unref(from); 4662 4663 return ret; 4664 } 4665 4666 int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, 4667 Error **errp) 4668 { 4669 return bdrv_replace_node_common(from, to, true, errp); 4670 } 4671 4672 /* 4673 * Add new bs contents at the top of an image chain while the chain is 4674 * live, while keeping required fields on the top layer. 4675 * 4676 * This will modify the BlockDriverState fields, and swap contents 4677 * between bs_new and bs_top. Both bs_new and bs_top are modified. 4678 * 4679 * bs_new must not be attached to a BlockBackend. 4680 * 4681 * This function does not create any image files. 4682 * 4683 * bdrv_append() takes ownership of a bs_new reference and unrefs it because 4684 * that's what the callers commonly need. bs_new will be referenced by the old 4685 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a 4686 * reference of its own, it must call bdrv_ref(). 4687 */ 4688 int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, 4689 Error **errp) 4690 { 4691 int ret = bdrv_set_backing_hd(bs_new, bs_top, errp); 4692 if (ret < 0) { 4693 goto out; 4694 } 4695 4696 ret = bdrv_replace_node(bs_top, bs_new, errp); 4697 if (ret < 0) { 4698 bdrv_set_backing_hd(bs_new, NULL, &error_abort); 4699 goto out; 4700 } 4701 4702 ret = 0; 4703 4704 out: 4705 /* 4706 * bs_new is now referenced by its new parents, we don't need the 4707 * additional reference any more. 4708 */ 4709 bdrv_unref(bs_new); 4710 4711 return ret; 4712 } 4713 4714 static void bdrv_delete(BlockDriverState *bs) 4715 { 4716 assert(bdrv_op_blocker_is_empty(bs)); 4717 assert(!bs->refcnt); 4718 4719 /* remove from list, if necessary */ 4720 if (bs->node_name[0] != '\0') { 4721 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 4722 } 4723 QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list); 4724 4725 bdrv_close(bs); 4726 4727 g_free(bs); 4728 } 4729 4730 BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options, 4731 int flags, Error **errp) 4732 { 4733 BlockDriverState *new_node_bs; 4734 Error *local_err = NULL; 4735 4736 new_node_bs = bdrv_open(NULL, NULL, node_options, flags, errp); 4737 if (new_node_bs == NULL) { 4738 error_prepend(errp, "Could not create node: "); 4739 return NULL; 4740 } 4741 4742 bdrv_drained_begin(bs); 4743 bdrv_replace_node(bs, new_node_bs, &local_err); 4744 bdrv_drained_end(bs); 4745 4746 if (local_err) { 4747 bdrv_unref(new_node_bs); 4748 error_propagate(errp, local_err); 4749 return NULL; 4750 } 4751 4752 return new_node_bs; 4753 } 4754 4755 /* 4756 * Run consistency checks on an image 4757 * 4758 * Returns 0 if the check could be completed (it doesn't mean that the image is 4759 * free of errors) or -errno when an internal error occurred. The results of the 4760 * check are stored in res. 4761 */ 4762 int coroutine_fn bdrv_co_check(BlockDriverState *bs, 4763 BdrvCheckResult *res, BdrvCheckMode fix) 4764 { 4765 if (bs->drv == NULL) { 4766 return -ENOMEDIUM; 4767 } 4768 if (bs->drv->bdrv_co_check == NULL) { 4769 return -ENOTSUP; 4770 } 4771 4772 memset(res, 0, sizeof(*res)); 4773 return bs->drv->bdrv_co_check(bs, res, fix); 4774 } 4775 4776 /* 4777 * Return values: 4778 * 0 - success 4779 * -EINVAL - backing format specified, but no file 4780 * -ENOSPC - can't update the backing file because no space is left in the 4781 * image file header 4782 * -ENOTSUP - format driver doesn't support changing the backing file 4783 */ 4784 int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, 4785 const char *backing_fmt, bool warn) 4786 { 4787 BlockDriver *drv = bs->drv; 4788 int ret; 4789 4790 if (!drv) { 4791 return -ENOMEDIUM; 4792 } 4793 4794 /* Backing file format doesn't make sense without a backing file */ 4795 if (backing_fmt && !backing_file) { 4796 return -EINVAL; 4797 } 4798 4799 if (warn && backing_file && !backing_fmt) { 4800 warn_report("Deprecated use of backing file without explicit " 4801 "backing format, use of this image requires " 4802 "potentially unsafe format probing"); 4803 } 4804 4805 if (drv->bdrv_change_backing_file != NULL) { 4806 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 4807 } else { 4808 ret = -ENOTSUP; 4809 } 4810 4811 if (ret == 0) { 4812 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 4813 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 4814 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), 4815 backing_file ?: ""); 4816 } 4817 return ret; 4818 } 4819 4820 /* 4821 * Finds the first non-filter node above bs in the chain between 4822 * active and bs. The returned node is either an immediate parent of 4823 * bs, or there are only filter nodes between the two. 4824 * 4825 * Returns NULL if bs is not found in active's image chain, 4826 * or if active == bs. 4827 * 4828 * Returns the bottommost base image if bs == NULL. 4829 */ 4830 BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 4831 BlockDriverState *bs) 4832 { 4833 bs = bdrv_skip_filters(bs); 4834 active = bdrv_skip_filters(active); 4835 4836 while (active) { 4837 BlockDriverState *next = bdrv_backing_chain_next(active); 4838 if (bs == next) { 4839 return active; 4840 } 4841 active = next; 4842 } 4843 4844 return NULL; 4845 } 4846 4847 /* Given a BDS, searches for the base layer. */ 4848 BlockDriverState *bdrv_find_base(BlockDriverState *bs) 4849 { 4850 return bdrv_find_overlay(bs, NULL); 4851 } 4852 4853 /* 4854 * Return true if at least one of the COW (backing) and filter links 4855 * between @bs and @base is frozen. @errp is set if that's the case. 4856 * @base must be reachable from @bs, or NULL. 4857 */ 4858 bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, 4859 Error **errp) 4860 { 4861 BlockDriverState *i; 4862 BdrvChild *child; 4863 4864 for (i = bs; i != base; i = child_bs(child)) { 4865 child = bdrv_filter_or_cow_child(i); 4866 4867 if (child && child->frozen) { 4868 error_setg(errp, "Cannot change '%s' link from '%s' to '%s'", 4869 child->name, i->node_name, child->bs->node_name); 4870 return true; 4871 } 4872 } 4873 4874 return false; 4875 } 4876 4877 /* 4878 * Freeze all COW (backing) and filter links between @bs and @base. 4879 * If any of the links is already frozen the operation is aborted and 4880 * none of the links are modified. 4881 * @base must be reachable from @bs, or NULL. 4882 * Returns 0 on success. On failure returns < 0 and sets @errp. 4883 */ 4884 int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, 4885 Error **errp) 4886 { 4887 BlockDriverState *i; 4888 BdrvChild *child; 4889 4890 if (bdrv_is_backing_chain_frozen(bs, base, errp)) { 4891 return -EPERM; 4892 } 4893 4894 for (i = bs; i != base; i = child_bs(child)) { 4895 child = bdrv_filter_or_cow_child(i); 4896 if (child && child->bs->never_freeze) { 4897 error_setg(errp, "Cannot freeze '%s' link to '%s'", 4898 child->name, child->bs->node_name); 4899 return -EPERM; 4900 } 4901 } 4902 4903 for (i = bs; i != base; i = child_bs(child)) { 4904 child = bdrv_filter_or_cow_child(i); 4905 if (child) { 4906 child->frozen = true; 4907 } 4908 } 4909 4910 return 0; 4911 } 4912 4913 /* 4914 * Unfreeze all COW (backing) and filter links between @bs and @base. 4915 * The caller must ensure that all links are frozen before using this 4916 * function. 4917 * @base must be reachable from @bs, or NULL. 4918 */ 4919 void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base) 4920 { 4921 BlockDriverState *i; 4922 BdrvChild *child; 4923 4924 for (i = bs; i != base; i = child_bs(child)) { 4925 child = bdrv_filter_or_cow_child(i); 4926 if (child) { 4927 assert(child->frozen); 4928 child->frozen = false; 4929 } 4930 } 4931 } 4932 4933 /* 4934 * Drops images above 'base' up to and including 'top', and sets the image 4935 * above 'top' to have base as its backing file. 4936 * 4937 * Requires that the overlay to 'top' is opened r/w, so that the backing file 4938 * information in 'bs' can be properly updated. 4939 * 4940 * E.g., this will convert the following chain: 4941 * bottom <- base <- intermediate <- top <- active 4942 * 4943 * to 4944 * 4945 * bottom <- base <- active 4946 * 4947 * It is allowed for bottom==base, in which case it converts: 4948 * 4949 * base <- intermediate <- top <- active 4950 * 4951 * to 4952 * 4953 * base <- active 4954 * 4955 * If backing_file_str is non-NULL, it will be used when modifying top's 4956 * overlay image metadata. 4957 * 4958 * Error conditions: 4959 * if active == top, that is considered an error 4960 * 4961 */ 4962 int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, 4963 const char *backing_file_str) 4964 { 4965 BlockDriverState *explicit_top = top; 4966 bool update_inherits_from; 4967 BdrvChild *c; 4968 Error *local_err = NULL; 4969 int ret = -EIO; 4970 g_autoptr(GSList) updated_children = NULL; 4971 GSList *p; 4972 4973 bdrv_ref(top); 4974 bdrv_subtree_drained_begin(top); 4975 4976 if (!top->drv || !base->drv) { 4977 goto exit; 4978 } 4979 4980 /* Make sure that base is in the backing chain of top */ 4981 if (!bdrv_chain_contains(top, base)) { 4982 goto exit; 4983 } 4984 4985 /* If 'base' recursively inherits from 'top' then we should set 4986 * base->inherits_from to top->inherits_from after 'top' and all 4987 * other intermediate nodes have been dropped. 4988 * If 'top' is an implicit node (e.g. "commit_top") we should skip 4989 * it because no one inherits from it. We use explicit_top for that. */ 4990 explicit_top = bdrv_skip_implicit_filters(explicit_top); 4991 update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top); 4992 4993 /* success - we can delete the intermediate states, and link top->base */ 4994 /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once 4995 * we've figured out how they should work. */ 4996 if (!backing_file_str) { 4997 bdrv_refresh_filename(base); 4998 backing_file_str = base->filename; 4999 } 5000 5001 QLIST_FOREACH(c, &top->parents, next_parent) { 5002 updated_children = g_slist_prepend(updated_children, c); 5003 } 5004 5005 bdrv_replace_node_common(top, base, false, &local_err); 5006 if (local_err) { 5007 error_report_err(local_err); 5008 goto exit; 5009 } 5010 5011 for (p = updated_children; p; p = p->next) { 5012 c = p->data; 5013 5014 if (c->klass->update_filename) { 5015 ret = c->klass->update_filename(c, base, backing_file_str, 5016 &local_err); 5017 if (ret < 0) { 5018 /* 5019 * TODO: Actually, we want to rollback all previous iterations 5020 * of this loop, and (which is almost impossible) previous 5021 * bdrv_replace_node()... 5022 * 5023 * Note, that c->klass->update_filename may lead to permission 5024 * update, so it's a bad idea to call it inside permission 5025 * update transaction of bdrv_replace_node. 5026 */ 5027 error_report_err(local_err); 5028 goto exit; 5029 } 5030 } 5031 } 5032 5033 if (update_inherits_from) { 5034 base->inherits_from = explicit_top->inherits_from; 5035 } 5036 5037 ret = 0; 5038 exit: 5039 bdrv_subtree_drained_end(top); 5040 bdrv_unref(top); 5041 return ret; 5042 } 5043 5044 /** 5045 * Implementation of BlockDriver.bdrv_get_allocated_file_size() that 5046 * sums the size of all data-bearing children. (This excludes backing 5047 * children.) 5048 */ 5049 static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs) 5050 { 5051 BdrvChild *child; 5052 int64_t child_size, sum = 0; 5053 5054 QLIST_FOREACH(child, &bs->children, next) { 5055 if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | 5056 BDRV_CHILD_FILTERED)) 5057 { 5058 child_size = bdrv_get_allocated_file_size(child->bs); 5059 if (child_size < 0) { 5060 return child_size; 5061 } 5062 sum += child_size; 5063 } 5064 } 5065 5066 return sum; 5067 } 5068 5069 /** 5070 * Length of a allocated file in bytes. Sparse files are counted by actual 5071 * allocated space. Return < 0 if error or unknown. 5072 */ 5073 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 5074 { 5075 BlockDriver *drv = bs->drv; 5076 if (!drv) { 5077 return -ENOMEDIUM; 5078 } 5079 if (drv->bdrv_get_allocated_file_size) { 5080 return drv->bdrv_get_allocated_file_size(bs); 5081 } 5082 5083 if (drv->bdrv_file_open) { 5084 /* 5085 * Protocol drivers default to -ENOTSUP (most of their data is 5086 * not stored in any of their children (if they even have any), 5087 * so there is no generic way to figure it out). 5088 */ 5089 return -ENOTSUP; 5090 } else if (drv->is_filter) { 5091 /* Filter drivers default to the size of their filtered child */ 5092 return bdrv_get_allocated_file_size(bdrv_filter_bs(bs)); 5093 } else { 5094 /* Other drivers default to summing their children's sizes */ 5095 return bdrv_sum_allocated_file_size(bs); 5096 } 5097 } 5098 5099 /* 5100 * bdrv_measure: 5101 * @drv: Format driver 5102 * @opts: Creation options for new image 5103 * @in_bs: Existing image containing data for new image (may be NULL) 5104 * @errp: Error object 5105 * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo()) 5106 * or NULL on error 5107 * 5108 * Calculate file size required to create a new image. 5109 * 5110 * If @in_bs is given then space for allocated clusters and zero clusters 5111 * from that image are included in the calculation. If @opts contains a 5112 * backing file that is shared by @in_bs then backing clusters may be omitted 5113 * from the calculation. 5114 * 5115 * If @in_bs is NULL then the calculation includes no allocated clusters 5116 * unless a preallocation option is given in @opts. 5117 * 5118 * Note that @in_bs may use a different BlockDriver from @drv. 5119 * 5120 * If an error occurs the @errp pointer is set. 5121 */ 5122 BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, 5123 BlockDriverState *in_bs, Error **errp) 5124 { 5125 if (!drv->bdrv_measure) { 5126 error_setg(errp, "Block driver '%s' does not support size measurement", 5127 drv->format_name); 5128 return NULL; 5129 } 5130 5131 return drv->bdrv_measure(opts, in_bs, errp); 5132 } 5133 5134 /** 5135 * Return number of sectors on success, -errno on error. 5136 */ 5137 int64_t bdrv_nb_sectors(BlockDriverState *bs) 5138 { 5139 BlockDriver *drv = bs->drv; 5140 5141 if (!drv) 5142 return -ENOMEDIUM; 5143 5144 if (drv->has_variable_length) { 5145 int ret = refresh_total_sectors(bs, bs->total_sectors); 5146 if (ret < 0) { 5147 return ret; 5148 } 5149 } 5150 return bs->total_sectors; 5151 } 5152 5153 /** 5154 * Return length in bytes on success, -errno on error. 5155 * The length is always a multiple of BDRV_SECTOR_SIZE. 5156 */ 5157 int64_t bdrv_getlength(BlockDriverState *bs) 5158 { 5159 int64_t ret = bdrv_nb_sectors(bs); 5160 5161 if (ret < 0) { 5162 return ret; 5163 } 5164 if (ret > INT64_MAX / BDRV_SECTOR_SIZE) { 5165 return -EFBIG; 5166 } 5167 return ret * BDRV_SECTOR_SIZE; 5168 } 5169 5170 /* return 0 as number of sectors if no device present or error */ 5171 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 5172 { 5173 int64_t nb_sectors = bdrv_nb_sectors(bs); 5174 5175 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 5176 } 5177 5178 bool bdrv_is_sg(BlockDriverState *bs) 5179 { 5180 return bs->sg; 5181 } 5182 5183 /** 5184 * Return whether the given node supports compressed writes. 5185 */ 5186 bool bdrv_supports_compressed_writes(BlockDriverState *bs) 5187 { 5188 BlockDriverState *filtered; 5189 5190 if (!bs->drv || !block_driver_can_compress(bs->drv)) { 5191 return false; 5192 } 5193 5194 filtered = bdrv_filter_bs(bs); 5195 if (filtered) { 5196 /* 5197 * Filters can only forward compressed writes, so we have to 5198 * check the child. 5199 */ 5200 return bdrv_supports_compressed_writes(filtered); 5201 } 5202 5203 return true; 5204 } 5205 5206 const char *bdrv_get_format_name(BlockDriverState *bs) 5207 { 5208 return bs->drv ? bs->drv->format_name : NULL; 5209 } 5210 5211 static int qsort_strcmp(const void *a, const void *b) 5212 { 5213 return strcmp(*(char *const *)a, *(char *const *)b); 5214 } 5215 5216 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 5217 void *opaque, bool read_only) 5218 { 5219 BlockDriver *drv; 5220 int count = 0; 5221 int i; 5222 const char **formats = NULL; 5223 5224 QLIST_FOREACH(drv, &bdrv_drivers, list) { 5225 if (drv->format_name) { 5226 bool found = false; 5227 int i = count; 5228 5229 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, read_only)) { 5230 continue; 5231 } 5232 5233 while (formats && i && !found) { 5234 found = !strcmp(formats[--i], drv->format_name); 5235 } 5236 5237 if (!found) { 5238 formats = g_renew(const char *, formats, count + 1); 5239 formats[count++] = drv->format_name; 5240 } 5241 } 5242 } 5243 5244 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) { 5245 const char *format_name = block_driver_modules[i].format_name; 5246 5247 if (format_name) { 5248 bool found = false; 5249 int j = count; 5250 5251 if (use_bdrv_whitelist && 5252 !bdrv_format_is_whitelisted(format_name, read_only)) { 5253 continue; 5254 } 5255 5256 while (formats && j && !found) { 5257 found = !strcmp(formats[--j], format_name); 5258 } 5259 5260 if (!found) { 5261 formats = g_renew(const char *, formats, count + 1); 5262 formats[count++] = format_name; 5263 } 5264 } 5265 } 5266 5267 qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 5268 5269 for (i = 0; i < count; i++) { 5270 it(opaque, formats[i]); 5271 } 5272 5273 g_free(formats); 5274 } 5275 5276 /* This function is to find a node in the bs graph */ 5277 BlockDriverState *bdrv_find_node(const char *node_name) 5278 { 5279 BlockDriverState *bs; 5280 5281 assert(node_name); 5282 5283 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 5284 if (!strcmp(node_name, bs->node_name)) { 5285 return bs; 5286 } 5287 } 5288 return NULL; 5289 } 5290 5291 /* Put this QMP function here so it can access the static graph_bdrv_states. */ 5292 BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, 5293 Error **errp) 5294 { 5295 BlockDeviceInfoList *list; 5296 BlockDriverState *bs; 5297 5298 list = NULL; 5299 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 5300 BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp); 5301 if (!info) { 5302 qapi_free_BlockDeviceInfoList(list); 5303 return NULL; 5304 } 5305 QAPI_LIST_PREPEND(list, info); 5306 } 5307 5308 return list; 5309 } 5310 5311 typedef struct XDbgBlockGraphConstructor { 5312 XDbgBlockGraph *graph; 5313 GHashTable *graph_nodes; 5314 } XDbgBlockGraphConstructor; 5315 5316 static XDbgBlockGraphConstructor *xdbg_graph_new(void) 5317 { 5318 XDbgBlockGraphConstructor *gr = g_new(XDbgBlockGraphConstructor, 1); 5319 5320 gr->graph = g_new0(XDbgBlockGraph, 1); 5321 gr->graph_nodes = g_hash_table_new(NULL, NULL); 5322 5323 return gr; 5324 } 5325 5326 static XDbgBlockGraph *xdbg_graph_finalize(XDbgBlockGraphConstructor *gr) 5327 { 5328 XDbgBlockGraph *graph = gr->graph; 5329 5330 g_hash_table_destroy(gr->graph_nodes); 5331 g_free(gr); 5332 5333 return graph; 5334 } 5335 5336 static uintptr_t xdbg_graph_node_num(XDbgBlockGraphConstructor *gr, void *node) 5337 { 5338 uintptr_t ret = (uintptr_t)g_hash_table_lookup(gr->graph_nodes, node); 5339 5340 if (ret != 0) { 5341 return ret; 5342 } 5343 5344 /* 5345 * Start counting from 1, not 0, because 0 interferes with not-found (NULL) 5346 * answer of g_hash_table_lookup. 5347 */ 5348 ret = g_hash_table_size(gr->graph_nodes) + 1; 5349 g_hash_table_insert(gr->graph_nodes, node, (void *)ret); 5350 5351 return ret; 5352 } 5353 5354 static void xdbg_graph_add_node(XDbgBlockGraphConstructor *gr, void *node, 5355 XDbgBlockGraphNodeType type, const char *name) 5356 { 5357 XDbgBlockGraphNode *n; 5358 5359 n = g_new0(XDbgBlockGraphNode, 1); 5360 5361 n->id = xdbg_graph_node_num(gr, node); 5362 n->type = type; 5363 n->name = g_strdup(name); 5364 5365 QAPI_LIST_PREPEND(gr->graph->nodes, n); 5366 } 5367 5368 static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent, 5369 const BdrvChild *child) 5370 { 5371 BlockPermission qapi_perm; 5372 XDbgBlockGraphEdge *edge; 5373 5374 edge = g_new0(XDbgBlockGraphEdge, 1); 5375 5376 edge->parent = xdbg_graph_node_num(gr, parent); 5377 edge->child = xdbg_graph_node_num(gr, child->bs); 5378 edge->name = g_strdup(child->name); 5379 5380 for (qapi_perm = 0; qapi_perm < BLOCK_PERMISSION__MAX; qapi_perm++) { 5381 uint64_t flag = bdrv_qapi_perm_to_blk_perm(qapi_perm); 5382 5383 if (flag & child->perm) { 5384 QAPI_LIST_PREPEND(edge->perm, qapi_perm); 5385 } 5386 if (flag & child->shared_perm) { 5387 QAPI_LIST_PREPEND(edge->shared_perm, qapi_perm); 5388 } 5389 } 5390 5391 QAPI_LIST_PREPEND(gr->graph->edges, edge); 5392 } 5393 5394 5395 XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp) 5396 { 5397 BlockBackend *blk; 5398 BlockJob *job; 5399 BlockDriverState *bs; 5400 BdrvChild *child; 5401 XDbgBlockGraphConstructor *gr = xdbg_graph_new(); 5402 5403 for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { 5404 char *allocated_name = NULL; 5405 const char *name = blk_name(blk); 5406 5407 if (!*name) { 5408 name = allocated_name = blk_get_attached_dev_id(blk); 5409 } 5410 xdbg_graph_add_node(gr, blk, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND, 5411 name); 5412 g_free(allocated_name); 5413 if (blk_root(blk)) { 5414 xdbg_graph_add_edge(gr, blk, blk_root(blk)); 5415 } 5416 } 5417 5418 for (job = block_job_next(NULL); job; job = block_job_next(job)) { 5419 GSList *el; 5420 5421 xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB, 5422 job->job.id); 5423 for (el = job->nodes; el; el = el->next) { 5424 xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data); 5425 } 5426 } 5427 5428 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 5429 xdbg_graph_add_node(gr, bs, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_DRIVER, 5430 bs->node_name); 5431 QLIST_FOREACH(child, &bs->children, next) { 5432 xdbg_graph_add_edge(gr, bs, child); 5433 } 5434 } 5435 5436 return xdbg_graph_finalize(gr); 5437 } 5438 5439 BlockDriverState *bdrv_lookup_bs(const char *device, 5440 const char *node_name, 5441 Error **errp) 5442 { 5443 BlockBackend *blk; 5444 BlockDriverState *bs; 5445 5446 if (device) { 5447 blk = blk_by_name(device); 5448 5449 if (blk) { 5450 bs = blk_bs(blk); 5451 if (!bs) { 5452 error_setg(errp, "Device '%s' has no medium", device); 5453 } 5454 5455 return bs; 5456 } 5457 } 5458 5459 if (node_name) { 5460 bs = bdrv_find_node(node_name); 5461 5462 if (bs) { 5463 return bs; 5464 } 5465 } 5466 5467 error_setg(errp, "Cannot find device=\'%s\' nor node-name=\'%s\'", 5468 device ? device : "", 5469 node_name ? node_name : ""); 5470 return NULL; 5471 } 5472 5473 /* If 'base' is in the same chain as 'top', return true. Otherwise, 5474 * return false. If either argument is NULL, return false. */ 5475 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 5476 { 5477 while (top && top != base) { 5478 top = bdrv_filter_or_cow_bs(top); 5479 } 5480 5481 return top != NULL; 5482 } 5483 5484 BlockDriverState *bdrv_next_node(BlockDriverState *bs) 5485 { 5486 if (!bs) { 5487 return QTAILQ_FIRST(&graph_bdrv_states); 5488 } 5489 return QTAILQ_NEXT(bs, node_list); 5490 } 5491 5492 BlockDriverState *bdrv_next_all_states(BlockDriverState *bs) 5493 { 5494 if (!bs) { 5495 return QTAILQ_FIRST(&all_bdrv_states); 5496 } 5497 return QTAILQ_NEXT(bs, bs_list); 5498 } 5499 5500 const char *bdrv_get_node_name(const BlockDriverState *bs) 5501 { 5502 return bs->node_name; 5503 } 5504 5505 const char *bdrv_get_parent_name(const BlockDriverState *bs) 5506 { 5507 BdrvChild *c; 5508 const char *name; 5509 5510 /* If multiple parents have a name, just pick the first one. */ 5511 QLIST_FOREACH(c, &bs->parents, next_parent) { 5512 if (c->klass->get_name) { 5513 name = c->klass->get_name(c); 5514 if (name && *name) { 5515 return name; 5516 } 5517 } 5518 } 5519 5520 return NULL; 5521 } 5522 5523 /* TODO check what callers really want: bs->node_name or blk_name() */ 5524 const char *bdrv_get_device_name(const BlockDriverState *bs) 5525 { 5526 return bdrv_get_parent_name(bs) ?: ""; 5527 } 5528 5529 /* This can be used to identify nodes that might not have a device 5530 * name associated. Since node and device names live in the same 5531 * namespace, the result is unambiguous. The exception is if both are 5532 * absent, then this returns an empty (non-null) string. */ 5533 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 5534 { 5535 return bdrv_get_parent_name(bs) ?: bs->node_name; 5536 } 5537 5538 int bdrv_get_flags(BlockDriverState *bs) 5539 { 5540 return bs->open_flags; 5541 } 5542 5543 int bdrv_has_zero_init_1(BlockDriverState *bs) 5544 { 5545 return 1; 5546 } 5547 5548 int bdrv_has_zero_init(BlockDriverState *bs) 5549 { 5550 BlockDriverState *filtered; 5551 5552 if (!bs->drv) { 5553 return 0; 5554 } 5555 5556 /* If BS is a copy on write image, it is initialized to 5557 the contents of the base image, which may not be zeroes. */ 5558 if (bdrv_cow_child(bs)) { 5559 return 0; 5560 } 5561 if (bs->drv->bdrv_has_zero_init) { 5562 return bs->drv->bdrv_has_zero_init(bs); 5563 } 5564 5565 filtered = bdrv_filter_bs(bs); 5566 if (filtered) { 5567 return bdrv_has_zero_init(filtered); 5568 } 5569 5570 /* safe default */ 5571 return 0; 5572 } 5573 5574 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 5575 { 5576 if (!(bs->open_flags & BDRV_O_UNMAP)) { 5577 return false; 5578 } 5579 5580 return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP; 5581 } 5582 5583 void bdrv_get_backing_filename(BlockDriverState *bs, 5584 char *filename, int filename_size) 5585 { 5586 pstrcpy(filename, filename_size, bs->backing_file); 5587 } 5588 5589 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 5590 { 5591 int ret; 5592 BlockDriver *drv = bs->drv; 5593 /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ 5594 if (!drv) { 5595 return -ENOMEDIUM; 5596 } 5597 if (!drv->bdrv_get_info) { 5598 BlockDriverState *filtered = bdrv_filter_bs(bs); 5599 if (filtered) { 5600 return bdrv_get_info(filtered, bdi); 5601 } 5602 return -ENOTSUP; 5603 } 5604 memset(bdi, 0, sizeof(*bdi)); 5605 ret = drv->bdrv_get_info(bs, bdi); 5606 if (ret < 0) { 5607 return ret; 5608 } 5609 5610 if (bdi->cluster_size > BDRV_MAX_ALIGNMENT) { 5611 return -EINVAL; 5612 } 5613 5614 return 0; 5615 } 5616 5617 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, 5618 Error **errp) 5619 { 5620 BlockDriver *drv = bs->drv; 5621 if (drv && drv->bdrv_get_specific_info) { 5622 return drv->bdrv_get_specific_info(bs, errp); 5623 } 5624 return NULL; 5625 } 5626 5627 BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs) 5628 { 5629 BlockDriver *drv = bs->drv; 5630 if (!drv || !drv->bdrv_get_specific_stats) { 5631 return NULL; 5632 } 5633 return drv->bdrv_get_specific_stats(bs); 5634 } 5635 5636 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event) 5637 { 5638 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 5639 return; 5640 } 5641 5642 bs->drv->bdrv_debug_event(bs, event); 5643 } 5644 5645 static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs) 5646 { 5647 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 5648 bs = bdrv_primary_bs(bs); 5649 } 5650 5651 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 5652 assert(bs->drv->bdrv_debug_remove_breakpoint); 5653 return bs; 5654 } 5655 5656 return NULL; 5657 } 5658 5659 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 5660 const char *tag) 5661 { 5662 bs = bdrv_find_debug_node(bs); 5663 if (bs) { 5664 return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 5665 } 5666 5667 return -ENOTSUP; 5668 } 5669 5670 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 5671 { 5672 bs = bdrv_find_debug_node(bs); 5673 if (bs) { 5674 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 5675 } 5676 5677 return -ENOTSUP; 5678 } 5679 5680 int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 5681 { 5682 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 5683 bs = bdrv_primary_bs(bs); 5684 } 5685 5686 if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 5687 return bs->drv->bdrv_debug_resume(bs, tag); 5688 } 5689 5690 return -ENOTSUP; 5691 } 5692 5693 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 5694 { 5695 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 5696 bs = bdrv_primary_bs(bs); 5697 } 5698 5699 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 5700 return bs->drv->bdrv_debug_is_suspended(bs, tag); 5701 } 5702 5703 return false; 5704 } 5705 5706 /* backing_file can either be relative, or absolute, or a protocol. If it is 5707 * relative, it must be relative to the chain. So, passing in bs->filename 5708 * from a BDS as backing_file should not be done, as that may be relative to 5709 * the CWD rather than the chain. */ 5710 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 5711 const char *backing_file) 5712 { 5713 char *filename_full = NULL; 5714 char *backing_file_full = NULL; 5715 char *filename_tmp = NULL; 5716 int is_protocol = 0; 5717 bool filenames_refreshed = false; 5718 BlockDriverState *curr_bs = NULL; 5719 BlockDriverState *retval = NULL; 5720 BlockDriverState *bs_below; 5721 5722 if (!bs || !bs->drv || !backing_file) { 5723 return NULL; 5724 } 5725 5726 filename_full = g_malloc(PATH_MAX); 5727 backing_file_full = g_malloc(PATH_MAX); 5728 5729 is_protocol = path_has_protocol(backing_file); 5730 5731 /* 5732 * Being largely a legacy function, skip any filters here 5733 * (because filters do not have normal filenames, so they cannot 5734 * match anyway; and allowing json:{} filenames is a bit out of 5735 * scope). 5736 */ 5737 for (curr_bs = bdrv_skip_filters(bs); 5738 bdrv_cow_child(curr_bs) != NULL; 5739 curr_bs = bs_below) 5740 { 5741 bs_below = bdrv_backing_chain_next(curr_bs); 5742 5743 if (bdrv_backing_overridden(curr_bs)) { 5744 /* 5745 * If the backing file was overridden, we can only compare 5746 * directly against the backing node's filename. 5747 */ 5748 5749 if (!filenames_refreshed) { 5750 /* 5751 * This will automatically refresh all of the 5752 * filenames in the rest of the backing chain, so we 5753 * only need to do this once. 5754 */ 5755 bdrv_refresh_filename(bs_below); 5756 filenames_refreshed = true; 5757 } 5758 5759 if (strcmp(backing_file, bs_below->filename) == 0) { 5760 retval = bs_below; 5761 break; 5762 } 5763 } else if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 5764 /* 5765 * If either of the filename paths is actually a protocol, then 5766 * compare unmodified paths; otherwise make paths relative. 5767 */ 5768 char *backing_file_full_ret; 5769 5770 if (strcmp(backing_file, curr_bs->backing_file) == 0) { 5771 retval = bs_below; 5772 break; 5773 } 5774 /* Also check against the full backing filename for the image */ 5775 backing_file_full_ret = bdrv_get_full_backing_filename(curr_bs, 5776 NULL); 5777 if (backing_file_full_ret) { 5778 bool equal = strcmp(backing_file, backing_file_full_ret) == 0; 5779 g_free(backing_file_full_ret); 5780 if (equal) { 5781 retval = bs_below; 5782 break; 5783 } 5784 } 5785 } else { 5786 /* If not an absolute filename path, make it relative to the current 5787 * image's filename path */ 5788 filename_tmp = bdrv_make_absolute_filename(curr_bs, backing_file, 5789 NULL); 5790 /* We are going to compare canonicalized absolute pathnames */ 5791 if (!filename_tmp || !realpath(filename_tmp, filename_full)) { 5792 g_free(filename_tmp); 5793 continue; 5794 } 5795 g_free(filename_tmp); 5796 5797 /* We need to make sure the backing filename we are comparing against 5798 * is relative to the current image filename (or absolute) */ 5799 filename_tmp = bdrv_get_full_backing_filename(curr_bs, NULL); 5800 if (!filename_tmp || !realpath(filename_tmp, backing_file_full)) { 5801 g_free(filename_tmp); 5802 continue; 5803 } 5804 g_free(filename_tmp); 5805 5806 if (strcmp(backing_file_full, filename_full) == 0) { 5807 retval = bs_below; 5808 break; 5809 } 5810 } 5811 } 5812 5813 g_free(filename_full); 5814 g_free(backing_file_full); 5815 return retval; 5816 } 5817 5818 void bdrv_init(void) 5819 { 5820 module_call_init(MODULE_INIT_BLOCK); 5821 } 5822 5823 void bdrv_init_with_whitelist(void) 5824 { 5825 use_bdrv_whitelist = 1; 5826 bdrv_init(); 5827 } 5828 5829 int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) 5830 { 5831 BdrvChild *child, *parent; 5832 Error *local_err = NULL; 5833 int ret; 5834 BdrvDirtyBitmap *bm; 5835 5836 if (!bs->drv) { 5837 return -ENOMEDIUM; 5838 } 5839 5840 QLIST_FOREACH(child, &bs->children, next) { 5841 bdrv_co_invalidate_cache(child->bs, &local_err); 5842 if (local_err) { 5843 error_propagate(errp, local_err); 5844 return -EINVAL; 5845 } 5846 } 5847 5848 /* 5849 * Update permissions, they may differ for inactive nodes. 5850 * 5851 * Note that the required permissions of inactive images are always a 5852 * subset of the permissions required after activating the image. This 5853 * allows us to just get the permissions upfront without restricting 5854 * drv->bdrv_invalidate_cache(). 5855 * 5856 * It also means that in error cases, we don't have to try and revert to 5857 * the old permissions (which is an operation that could fail, too). We can 5858 * just keep the extended permissions for the next time that an activation 5859 * of the image is tried. 5860 */ 5861 if (bs->open_flags & BDRV_O_INACTIVE) { 5862 bs->open_flags &= ~BDRV_O_INACTIVE; 5863 ret = bdrv_refresh_perms(bs, errp); 5864 if (ret < 0) { 5865 bs->open_flags |= BDRV_O_INACTIVE; 5866 return ret; 5867 } 5868 5869 if (bs->drv->bdrv_co_invalidate_cache) { 5870 bs->drv->bdrv_co_invalidate_cache(bs, &local_err); 5871 if (local_err) { 5872 bs->open_flags |= BDRV_O_INACTIVE; 5873 error_propagate(errp, local_err); 5874 return -EINVAL; 5875 } 5876 } 5877 5878 FOR_EACH_DIRTY_BITMAP(bs, bm) { 5879 bdrv_dirty_bitmap_skip_store(bm, false); 5880 } 5881 5882 ret = refresh_total_sectors(bs, bs->total_sectors); 5883 if (ret < 0) { 5884 bs->open_flags |= BDRV_O_INACTIVE; 5885 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 5886 return ret; 5887 } 5888 } 5889 5890 QLIST_FOREACH(parent, &bs->parents, next_parent) { 5891 if (parent->klass->activate) { 5892 parent->klass->activate(parent, &local_err); 5893 if (local_err) { 5894 bs->open_flags |= BDRV_O_INACTIVE; 5895 error_propagate(errp, local_err); 5896 return -EINVAL; 5897 } 5898 } 5899 } 5900 5901 return 0; 5902 } 5903 5904 void bdrv_invalidate_cache_all(Error **errp) 5905 { 5906 BlockDriverState *bs; 5907 BdrvNextIterator it; 5908 5909 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 5910 AioContext *aio_context = bdrv_get_aio_context(bs); 5911 int ret; 5912 5913 aio_context_acquire(aio_context); 5914 ret = bdrv_invalidate_cache(bs, errp); 5915 aio_context_release(aio_context); 5916 if (ret < 0) { 5917 bdrv_next_cleanup(&it); 5918 return; 5919 } 5920 } 5921 } 5922 5923 static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) 5924 { 5925 BdrvChild *parent; 5926 5927 QLIST_FOREACH(parent, &bs->parents, next_parent) { 5928 if (parent->klass->parent_is_bds) { 5929 BlockDriverState *parent_bs = parent->opaque; 5930 if (!only_active || !(parent_bs->open_flags & BDRV_O_INACTIVE)) { 5931 return true; 5932 } 5933 } 5934 } 5935 5936 return false; 5937 } 5938 5939 static int bdrv_inactivate_recurse(BlockDriverState *bs) 5940 { 5941 BdrvChild *child, *parent; 5942 int ret; 5943 5944 if (!bs->drv) { 5945 return -ENOMEDIUM; 5946 } 5947 5948 /* Make sure that we don't inactivate a child before its parent. 5949 * It will be covered by recursion from the yet active parent. */ 5950 if (bdrv_has_bds_parent(bs, true)) { 5951 return 0; 5952 } 5953 5954 assert(!(bs->open_flags & BDRV_O_INACTIVE)); 5955 5956 /* Inactivate this node */ 5957 if (bs->drv->bdrv_inactivate) { 5958 ret = bs->drv->bdrv_inactivate(bs); 5959 if (ret < 0) { 5960 return ret; 5961 } 5962 } 5963 5964 QLIST_FOREACH(parent, &bs->parents, next_parent) { 5965 if (parent->klass->inactivate) { 5966 ret = parent->klass->inactivate(parent); 5967 if (ret < 0) { 5968 return ret; 5969 } 5970 } 5971 } 5972 5973 bs->open_flags |= BDRV_O_INACTIVE; 5974 5975 /* 5976 * Update permissions, they may differ for inactive nodes. 5977 * We only tried to loosen restrictions, so errors are not fatal, ignore 5978 * them. 5979 */ 5980 bdrv_refresh_perms(bs, NULL); 5981 5982 /* Recursively inactivate children */ 5983 QLIST_FOREACH(child, &bs->children, next) { 5984 ret = bdrv_inactivate_recurse(child->bs); 5985 if (ret < 0) { 5986 return ret; 5987 } 5988 } 5989 5990 return 0; 5991 } 5992 5993 int bdrv_inactivate_all(void) 5994 { 5995 BlockDriverState *bs = NULL; 5996 BdrvNextIterator it; 5997 int ret = 0; 5998 GSList *aio_ctxs = NULL, *ctx; 5999 6000 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 6001 AioContext *aio_context = bdrv_get_aio_context(bs); 6002 6003 if (!g_slist_find(aio_ctxs, aio_context)) { 6004 aio_ctxs = g_slist_prepend(aio_ctxs, aio_context); 6005 aio_context_acquire(aio_context); 6006 } 6007 } 6008 6009 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 6010 /* Nodes with BDS parents are covered by recursion from the last 6011 * parent that gets inactivated. Don't inactivate them a second 6012 * time if that has already happened. */ 6013 if (bdrv_has_bds_parent(bs, false)) { 6014 continue; 6015 } 6016 ret = bdrv_inactivate_recurse(bs); 6017 if (ret < 0) { 6018 bdrv_next_cleanup(&it); 6019 goto out; 6020 } 6021 } 6022 6023 out: 6024 for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { 6025 AioContext *aio_context = ctx->data; 6026 aio_context_release(aio_context); 6027 } 6028 g_slist_free(aio_ctxs); 6029 6030 return ret; 6031 } 6032 6033 /**************************************************************/ 6034 /* removable device support */ 6035 6036 /** 6037 * Return TRUE if the media is present 6038 */ 6039 bool bdrv_is_inserted(BlockDriverState *bs) 6040 { 6041 BlockDriver *drv = bs->drv; 6042 BdrvChild *child; 6043 6044 if (!drv) { 6045 return false; 6046 } 6047 if (drv->bdrv_is_inserted) { 6048 return drv->bdrv_is_inserted(bs); 6049 } 6050 QLIST_FOREACH(child, &bs->children, next) { 6051 if (!bdrv_is_inserted(child->bs)) { 6052 return false; 6053 } 6054 } 6055 return true; 6056 } 6057 6058 /** 6059 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 6060 */ 6061 void bdrv_eject(BlockDriverState *bs, bool eject_flag) 6062 { 6063 BlockDriver *drv = bs->drv; 6064 6065 if (drv && drv->bdrv_eject) { 6066 drv->bdrv_eject(bs, eject_flag); 6067 } 6068 } 6069 6070 /** 6071 * Lock or unlock the media (if it is locked, the user won't be able 6072 * to eject it manually). 6073 */ 6074 void bdrv_lock_medium(BlockDriverState *bs, bool locked) 6075 { 6076 BlockDriver *drv = bs->drv; 6077 6078 trace_bdrv_lock_medium(bs, locked); 6079 6080 if (drv && drv->bdrv_lock_medium) { 6081 drv->bdrv_lock_medium(bs, locked); 6082 } 6083 } 6084 6085 /* Get a reference to bs */ 6086 void bdrv_ref(BlockDriverState *bs) 6087 { 6088 bs->refcnt++; 6089 } 6090 6091 /* Release a previously grabbed reference to bs. 6092 * If after releasing, reference count is zero, the BlockDriverState is 6093 * deleted. */ 6094 void bdrv_unref(BlockDriverState *bs) 6095 { 6096 if (!bs) { 6097 return; 6098 } 6099 assert(bs->refcnt > 0); 6100 if (--bs->refcnt == 0) { 6101 bdrv_delete(bs); 6102 } 6103 } 6104 6105 struct BdrvOpBlocker { 6106 Error *reason; 6107 QLIST_ENTRY(BdrvOpBlocker) list; 6108 }; 6109 6110 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 6111 { 6112 BdrvOpBlocker *blocker; 6113 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 6114 if (!QLIST_EMPTY(&bs->op_blockers[op])) { 6115 blocker = QLIST_FIRST(&bs->op_blockers[op]); 6116 error_propagate_prepend(errp, error_copy(blocker->reason), 6117 "Node '%s' is busy: ", 6118 bdrv_get_device_or_node_name(bs)); 6119 return true; 6120 } 6121 return false; 6122 } 6123 6124 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 6125 { 6126 BdrvOpBlocker *blocker; 6127 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 6128 6129 blocker = g_new0(BdrvOpBlocker, 1); 6130 blocker->reason = reason; 6131 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 6132 } 6133 6134 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 6135 { 6136 BdrvOpBlocker *blocker, *next; 6137 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 6138 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 6139 if (blocker->reason == reason) { 6140 QLIST_REMOVE(blocker, list); 6141 g_free(blocker); 6142 } 6143 } 6144 } 6145 6146 void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 6147 { 6148 int i; 6149 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 6150 bdrv_op_block(bs, i, reason); 6151 } 6152 } 6153 6154 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 6155 { 6156 int i; 6157 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 6158 bdrv_op_unblock(bs, i, reason); 6159 } 6160 } 6161 6162 bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 6163 { 6164 int i; 6165 6166 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 6167 if (!QLIST_EMPTY(&bs->op_blockers[i])) { 6168 return false; 6169 } 6170 } 6171 return true; 6172 } 6173 6174 void bdrv_img_create(const char *filename, const char *fmt, 6175 const char *base_filename, const char *base_fmt, 6176 char *options, uint64_t img_size, int flags, bool quiet, 6177 Error **errp) 6178 { 6179 QemuOptsList *create_opts = NULL; 6180 QemuOpts *opts = NULL; 6181 const char *backing_fmt, *backing_file; 6182 int64_t size; 6183 BlockDriver *drv, *proto_drv; 6184 Error *local_err = NULL; 6185 int ret = 0; 6186 6187 /* Find driver and parse its options */ 6188 drv = bdrv_find_format(fmt); 6189 if (!drv) { 6190 error_setg(errp, "Unknown file format '%s'", fmt); 6191 return; 6192 } 6193 6194 proto_drv = bdrv_find_protocol(filename, true, errp); 6195 if (!proto_drv) { 6196 return; 6197 } 6198 6199 if (!drv->create_opts) { 6200 error_setg(errp, "Format driver '%s' does not support image creation", 6201 drv->format_name); 6202 return; 6203 } 6204 6205 if (!proto_drv->create_opts) { 6206 error_setg(errp, "Protocol driver '%s' does not support image creation", 6207 proto_drv->format_name); 6208 return; 6209 } 6210 6211 /* Create parameter list */ 6212 create_opts = qemu_opts_append(create_opts, drv->create_opts); 6213 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 6214 6215 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 6216 6217 /* Parse -o options */ 6218 if (options) { 6219 if (!qemu_opts_do_parse(opts, options, NULL, errp)) { 6220 goto out; 6221 } 6222 } 6223 6224 if (!qemu_opt_get(opts, BLOCK_OPT_SIZE)) { 6225 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 6226 } else if (img_size != UINT64_C(-1)) { 6227 error_setg(errp, "The image size must be specified only once"); 6228 goto out; 6229 } 6230 6231 if (base_filename) { 6232 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, 6233 NULL)) { 6234 error_setg(errp, "Backing file not supported for file format '%s'", 6235 fmt); 6236 goto out; 6237 } 6238 } 6239 6240 if (base_fmt) { 6241 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) { 6242 error_setg(errp, "Backing file format not supported for file " 6243 "format '%s'", fmt); 6244 goto out; 6245 } 6246 } 6247 6248 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 6249 if (backing_file) { 6250 if (!strcmp(filename, backing_file)) { 6251 error_setg(errp, "Error: Trying to create an image with the " 6252 "same filename as the backing file"); 6253 goto out; 6254 } 6255 if (backing_file[0] == '\0') { 6256 error_setg(errp, "Expected backing file name, got empty string"); 6257 goto out; 6258 } 6259 } 6260 6261 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 6262 6263 /* The size for the image must always be specified, unless we have a backing 6264 * file and we have not been forbidden from opening it. */ 6265 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size); 6266 if (backing_file && !(flags & BDRV_O_NO_BACKING)) { 6267 BlockDriverState *bs; 6268 char *full_backing; 6269 int back_flags; 6270 QDict *backing_options = NULL; 6271 6272 full_backing = 6273 bdrv_get_full_backing_filename_from_filename(filename, backing_file, 6274 &local_err); 6275 if (local_err) { 6276 goto out; 6277 } 6278 assert(full_backing); 6279 6280 /* backing files always opened read-only */ 6281 back_flags = flags; 6282 back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 6283 6284 backing_options = qdict_new(); 6285 if (backing_fmt) { 6286 qdict_put_str(backing_options, "driver", backing_fmt); 6287 } 6288 qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true); 6289 6290 bs = bdrv_open(full_backing, NULL, backing_options, back_flags, 6291 &local_err); 6292 g_free(full_backing); 6293 if (!bs) { 6294 error_append_hint(&local_err, "Could not open backing image.\n"); 6295 goto out; 6296 } else { 6297 if (!backing_fmt) { 6298 warn_report("Deprecated use of backing file without explicit " 6299 "backing format (detected format of %s)", 6300 bs->drv->format_name); 6301 if (bs->drv != &bdrv_raw) { 6302 /* 6303 * A probe of raw deserves the most attention: 6304 * leaving the backing format out of the image 6305 * will ensure bs->probed is set (ensuring we 6306 * don't accidentally commit into the backing 6307 * file), and allow more spots to warn the users 6308 * to fix their toolchain when opening this image 6309 * later. For other images, we can safely record 6310 * the format that we probed. 6311 */ 6312 backing_fmt = bs->drv->format_name; 6313 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, backing_fmt, 6314 NULL); 6315 } 6316 } 6317 if (size == -1) { 6318 /* Opened BS, have no size */ 6319 size = bdrv_getlength(bs); 6320 if (size < 0) { 6321 error_setg_errno(errp, -size, "Could not get size of '%s'", 6322 backing_file); 6323 bdrv_unref(bs); 6324 goto out; 6325 } 6326 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 6327 } 6328 bdrv_unref(bs); 6329 } 6330 /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */ 6331 } else if (backing_file && !backing_fmt) { 6332 warn_report("Deprecated use of unopened backing file without " 6333 "explicit backing format, use of this image requires " 6334 "potentially unsafe format probing"); 6335 } 6336 6337 if (size == -1) { 6338 error_setg(errp, "Image creation needs a size parameter"); 6339 goto out; 6340 } 6341 6342 if (!quiet) { 6343 printf("Formatting '%s', fmt=%s ", filename, fmt); 6344 qemu_opts_print(opts, " "); 6345 puts(""); 6346 fflush(stdout); 6347 } 6348 6349 ret = bdrv_create(drv, filename, opts, &local_err); 6350 6351 if (ret == -EFBIG) { 6352 /* This is generally a better message than whatever the driver would 6353 * deliver (especially because of the cluster_size_hint), since that 6354 * is most probably not much different from "image too large". */ 6355 const char *cluster_size_hint = ""; 6356 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 6357 cluster_size_hint = " (try using a larger cluster size)"; 6358 } 6359 error_setg(errp, "The image size is too large for file format '%s'" 6360 "%s", fmt, cluster_size_hint); 6361 error_free(local_err); 6362 local_err = NULL; 6363 } 6364 6365 out: 6366 qemu_opts_del(opts); 6367 qemu_opts_free(create_opts); 6368 error_propagate(errp, local_err); 6369 } 6370 6371 AioContext *bdrv_get_aio_context(BlockDriverState *bs) 6372 { 6373 return bs ? bs->aio_context : qemu_get_aio_context(); 6374 } 6375 6376 AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs) 6377 { 6378 Coroutine *self = qemu_coroutine_self(); 6379 AioContext *old_ctx = qemu_coroutine_get_aio_context(self); 6380 AioContext *new_ctx; 6381 6382 /* 6383 * Increase bs->in_flight to ensure that this operation is completed before 6384 * moving the node to a different AioContext. Read new_ctx only afterwards. 6385 */ 6386 bdrv_inc_in_flight(bs); 6387 6388 new_ctx = bdrv_get_aio_context(bs); 6389 aio_co_reschedule_self(new_ctx); 6390 return old_ctx; 6391 } 6392 6393 void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) 6394 { 6395 aio_co_reschedule_self(old_ctx); 6396 bdrv_dec_in_flight(bs); 6397 } 6398 6399 void coroutine_fn bdrv_co_lock(BlockDriverState *bs) 6400 { 6401 AioContext *ctx = bdrv_get_aio_context(bs); 6402 6403 /* In the main thread, bs->aio_context won't change concurrently */ 6404 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 6405 6406 /* 6407 * We're in coroutine context, so we already hold the lock of the main 6408 * loop AioContext. Don't lock it twice to avoid deadlocks. 6409 */ 6410 assert(qemu_in_coroutine()); 6411 if (ctx != qemu_get_aio_context()) { 6412 aio_context_acquire(ctx); 6413 } 6414 } 6415 6416 void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) 6417 { 6418 AioContext *ctx = bdrv_get_aio_context(bs); 6419 6420 assert(qemu_in_coroutine()); 6421 if (ctx != qemu_get_aio_context()) { 6422 aio_context_release(ctx); 6423 } 6424 } 6425 6426 void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co) 6427 { 6428 aio_co_enter(bdrv_get_aio_context(bs), co); 6429 } 6430 6431 static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) 6432 { 6433 QLIST_REMOVE(ban, list); 6434 g_free(ban); 6435 } 6436 6437 static void bdrv_detach_aio_context(BlockDriverState *bs) 6438 { 6439 BdrvAioNotifier *baf, *baf_tmp; 6440 6441 assert(!bs->walking_aio_notifiers); 6442 bs->walking_aio_notifiers = true; 6443 QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) { 6444 if (baf->deleted) { 6445 bdrv_do_remove_aio_context_notifier(baf); 6446 } else { 6447 baf->detach_aio_context(baf->opaque); 6448 } 6449 } 6450 /* Never mind iterating again to check for ->deleted. bdrv_close() will 6451 * remove remaining aio notifiers if we aren't called again. 6452 */ 6453 bs->walking_aio_notifiers = false; 6454 6455 if (bs->drv && bs->drv->bdrv_detach_aio_context) { 6456 bs->drv->bdrv_detach_aio_context(bs); 6457 } 6458 6459 if (bs->quiesce_counter) { 6460 aio_enable_external(bs->aio_context); 6461 } 6462 bs->aio_context = NULL; 6463 } 6464 6465 static void bdrv_attach_aio_context(BlockDriverState *bs, 6466 AioContext *new_context) 6467 { 6468 BdrvAioNotifier *ban, *ban_tmp; 6469 6470 if (bs->quiesce_counter) { 6471 aio_disable_external(new_context); 6472 } 6473 6474 bs->aio_context = new_context; 6475 6476 if (bs->drv && bs->drv->bdrv_attach_aio_context) { 6477 bs->drv->bdrv_attach_aio_context(bs, new_context); 6478 } 6479 6480 assert(!bs->walking_aio_notifiers); 6481 bs->walking_aio_notifiers = true; 6482 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) { 6483 if (ban->deleted) { 6484 bdrv_do_remove_aio_context_notifier(ban); 6485 } else { 6486 ban->attached_aio_context(new_context, ban->opaque); 6487 } 6488 } 6489 bs->walking_aio_notifiers = false; 6490 } 6491 6492 /* 6493 * Changes the AioContext used for fd handlers, timers, and BHs by this 6494 * BlockDriverState and all its children and parents. 6495 * 6496 * Must be called from the main AioContext. 6497 * 6498 * The caller must own the AioContext lock for the old AioContext of bs, but it 6499 * must not own the AioContext lock for new_context (unless new_context is the 6500 * same as the current context of bs). 6501 * 6502 * @ignore will accumulate all visited BdrvChild object. The caller is 6503 * responsible for freeing the list afterwards. 6504 */ 6505 void bdrv_set_aio_context_ignore(BlockDriverState *bs, 6506 AioContext *new_context, GSList **ignore) 6507 { 6508 AioContext *old_context = bdrv_get_aio_context(bs); 6509 GSList *children_to_process = NULL; 6510 GSList *parents_to_process = NULL; 6511 GSList *entry; 6512 BdrvChild *child, *parent; 6513 6514 g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 6515 6516 if (old_context == new_context) { 6517 return; 6518 } 6519 6520 bdrv_drained_begin(bs); 6521 6522 QLIST_FOREACH(child, &bs->children, next) { 6523 if (g_slist_find(*ignore, child)) { 6524 continue; 6525 } 6526 *ignore = g_slist_prepend(*ignore, child); 6527 children_to_process = g_slist_prepend(children_to_process, child); 6528 } 6529 6530 QLIST_FOREACH(parent, &bs->parents, next_parent) { 6531 if (g_slist_find(*ignore, parent)) { 6532 continue; 6533 } 6534 *ignore = g_slist_prepend(*ignore, parent); 6535 parents_to_process = g_slist_prepend(parents_to_process, parent); 6536 } 6537 6538 for (entry = children_to_process; 6539 entry != NULL; 6540 entry = g_slist_next(entry)) { 6541 child = entry->data; 6542 bdrv_set_aio_context_ignore(child->bs, new_context, ignore); 6543 } 6544 g_slist_free(children_to_process); 6545 6546 for (entry = parents_to_process; 6547 entry != NULL; 6548 entry = g_slist_next(entry)) { 6549 parent = entry->data; 6550 assert(parent->klass->set_aio_ctx); 6551 parent->klass->set_aio_ctx(parent, new_context, ignore); 6552 } 6553 g_slist_free(parents_to_process); 6554 6555 bdrv_detach_aio_context(bs); 6556 6557 /* Acquire the new context, if necessary */ 6558 if (qemu_get_aio_context() != new_context) { 6559 aio_context_acquire(new_context); 6560 } 6561 6562 bdrv_attach_aio_context(bs, new_context); 6563 6564 /* 6565 * If this function was recursively called from 6566 * bdrv_set_aio_context_ignore(), there may be nodes in the 6567 * subtree that have not yet been moved to the new AioContext. 6568 * Release the old one so bdrv_drained_end() can poll them. 6569 */ 6570 if (qemu_get_aio_context() != old_context) { 6571 aio_context_release(old_context); 6572 } 6573 6574 bdrv_drained_end(bs); 6575 6576 if (qemu_get_aio_context() != old_context) { 6577 aio_context_acquire(old_context); 6578 } 6579 if (qemu_get_aio_context() != new_context) { 6580 aio_context_release(new_context); 6581 } 6582 } 6583 6584 static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, 6585 GSList **ignore, Error **errp) 6586 { 6587 if (g_slist_find(*ignore, c)) { 6588 return true; 6589 } 6590 *ignore = g_slist_prepend(*ignore, c); 6591 6592 /* 6593 * A BdrvChildClass that doesn't handle AioContext changes cannot 6594 * tolerate any AioContext changes 6595 */ 6596 if (!c->klass->can_set_aio_ctx) { 6597 char *user = bdrv_child_user_desc(c); 6598 error_setg(errp, "Changing iothreads is not supported by %s", user); 6599 g_free(user); 6600 return false; 6601 } 6602 if (!c->klass->can_set_aio_ctx(c, ctx, ignore, errp)) { 6603 assert(!errp || *errp); 6604 return false; 6605 } 6606 return true; 6607 } 6608 6609 bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, 6610 GSList **ignore, Error **errp) 6611 { 6612 if (g_slist_find(*ignore, c)) { 6613 return true; 6614 } 6615 *ignore = g_slist_prepend(*ignore, c); 6616 return bdrv_can_set_aio_context(c->bs, ctx, ignore, errp); 6617 } 6618 6619 /* @ignore will accumulate all visited BdrvChild object. The caller is 6620 * responsible for freeing the list afterwards. */ 6621 bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, 6622 GSList **ignore, Error **errp) 6623 { 6624 BdrvChild *c; 6625 6626 if (bdrv_get_aio_context(bs) == ctx) { 6627 return true; 6628 } 6629 6630 QLIST_FOREACH(c, &bs->parents, next_parent) { 6631 if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) { 6632 return false; 6633 } 6634 } 6635 QLIST_FOREACH(c, &bs->children, next) { 6636 if (!bdrv_child_can_set_aio_context(c, ctx, ignore, errp)) { 6637 return false; 6638 } 6639 } 6640 6641 return true; 6642 } 6643 6644 int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, 6645 BdrvChild *ignore_child, Error **errp) 6646 { 6647 GSList *ignore; 6648 bool ret; 6649 6650 ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; 6651 ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp); 6652 g_slist_free(ignore); 6653 6654 if (!ret) { 6655 return -EPERM; 6656 } 6657 6658 ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; 6659 bdrv_set_aio_context_ignore(bs, ctx, &ignore); 6660 g_slist_free(ignore); 6661 6662 return 0; 6663 } 6664 6665 int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, 6666 Error **errp) 6667 { 6668 return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp); 6669 } 6670 6671 void bdrv_add_aio_context_notifier(BlockDriverState *bs, 6672 void (*attached_aio_context)(AioContext *new_context, void *opaque), 6673 void (*detach_aio_context)(void *opaque), void *opaque) 6674 { 6675 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 6676 *ban = (BdrvAioNotifier){ 6677 .attached_aio_context = attached_aio_context, 6678 .detach_aio_context = detach_aio_context, 6679 .opaque = opaque 6680 }; 6681 6682 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 6683 } 6684 6685 void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 6686 void (*attached_aio_context)(AioContext *, 6687 void *), 6688 void (*detach_aio_context)(void *), 6689 void *opaque) 6690 { 6691 BdrvAioNotifier *ban, *ban_next; 6692 6693 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 6694 if (ban->attached_aio_context == attached_aio_context && 6695 ban->detach_aio_context == detach_aio_context && 6696 ban->opaque == opaque && 6697 ban->deleted == false) 6698 { 6699 if (bs->walking_aio_notifiers) { 6700 ban->deleted = true; 6701 } else { 6702 bdrv_do_remove_aio_context_notifier(ban); 6703 } 6704 return; 6705 } 6706 } 6707 6708 abort(); 6709 } 6710 6711 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 6712 BlockDriverAmendStatusCB *status_cb, void *cb_opaque, 6713 bool force, 6714 Error **errp) 6715 { 6716 if (!bs->drv) { 6717 error_setg(errp, "Node is ejected"); 6718 return -ENOMEDIUM; 6719 } 6720 if (!bs->drv->bdrv_amend_options) { 6721 error_setg(errp, "Block driver '%s' does not support option amendment", 6722 bs->drv->format_name); 6723 return -ENOTSUP; 6724 } 6725 return bs->drv->bdrv_amend_options(bs, opts, status_cb, 6726 cb_opaque, force, errp); 6727 } 6728 6729 /* 6730 * This function checks whether the given @to_replace is allowed to be 6731 * replaced by a node that always shows the same data as @bs. This is 6732 * used for example to verify whether the mirror job can replace 6733 * @to_replace by the target mirrored from @bs. 6734 * To be replaceable, @bs and @to_replace may either be guaranteed to 6735 * always show the same data (because they are only connected through 6736 * filters), or some driver may allow replacing one of its children 6737 * because it can guarantee that this child's data is not visible at 6738 * all (for example, for dissenting quorum children that have no other 6739 * parents). 6740 */ 6741 bool bdrv_recurse_can_replace(BlockDriverState *bs, 6742 BlockDriverState *to_replace) 6743 { 6744 BlockDriverState *filtered; 6745 6746 if (!bs || !bs->drv) { 6747 return false; 6748 } 6749 6750 if (bs == to_replace) { 6751 return true; 6752 } 6753 6754 /* See what the driver can do */ 6755 if (bs->drv->bdrv_recurse_can_replace) { 6756 return bs->drv->bdrv_recurse_can_replace(bs, to_replace); 6757 } 6758 6759 /* For filters without an own implementation, we can recurse on our own */ 6760 filtered = bdrv_filter_bs(bs); 6761 if (filtered) { 6762 return bdrv_recurse_can_replace(filtered, to_replace); 6763 } 6764 6765 /* Safe default */ 6766 return false; 6767 } 6768 6769 /* 6770 * Check whether the given @node_name can be replaced by a node that 6771 * has the same data as @parent_bs. If so, return @node_name's BDS; 6772 * NULL otherwise. 6773 * 6774 * @node_name must be a (recursive) *child of @parent_bs (or this 6775 * function will return NULL). 6776 * 6777 * The result (whether the node can be replaced or not) is only valid 6778 * for as long as no graph or permission changes occur. 6779 */ 6780 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, 6781 const char *node_name, Error **errp) 6782 { 6783 BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 6784 AioContext *aio_context; 6785 6786 if (!to_replace_bs) { 6787 error_setg(errp, "Failed to find node with node-name='%s'", node_name); 6788 return NULL; 6789 } 6790 6791 aio_context = bdrv_get_aio_context(to_replace_bs); 6792 aio_context_acquire(aio_context); 6793 6794 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 6795 to_replace_bs = NULL; 6796 goto out; 6797 } 6798 6799 /* We don't want arbitrary node of the BDS chain to be replaced only the top 6800 * most non filter in order to prevent data corruption. 6801 * Another benefit is that this tests exclude backing files which are 6802 * blocked by the backing blockers. 6803 */ 6804 if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) { 6805 error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', " 6806 "because it cannot be guaranteed that doing so would not " 6807 "lead to an abrupt change of visible data", 6808 node_name, parent_bs->node_name); 6809 to_replace_bs = NULL; 6810 goto out; 6811 } 6812 6813 out: 6814 aio_context_release(aio_context); 6815 return to_replace_bs; 6816 } 6817 6818 /** 6819 * Iterates through the list of runtime option keys that are said to 6820 * be "strong" for a BDS. An option is called "strong" if it changes 6821 * a BDS's data. For example, the null block driver's "size" and 6822 * "read-zeroes" options are strong, but its "latency-ns" option is 6823 * not. 6824 * 6825 * If a key returned by this function ends with a dot, all options 6826 * starting with that prefix are strong. 6827 */ 6828 static const char *const *strong_options(BlockDriverState *bs, 6829 const char *const *curopt) 6830 { 6831 static const char *const global_options[] = { 6832 "driver", "filename", NULL 6833 }; 6834 6835 if (!curopt) { 6836 return &global_options[0]; 6837 } 6838 6839 curopt++; 6840 if (curopt == &global_options[ARRAY_SIZE(global_options) - 1] && bs->drv) { 6841 curopt = bs->drv->strong_runtime_opts; 6842 } 6843 6844 return (curopt && *curopt) ? curopt : NULL; 6845 } 6846 6847 /** 6848 * Copies all strong runtime options from bs->options to the given 6849 * QDict. The set of strong option keys is determined by invoking 6850 * strong_options(). 6851 * 6852 * Returns true iff any strong option was present in bs->options (and 6853 * thus copied to the target QDict) with the exception of "filename" 6854 * and "driver". The caller is expected to use this value to decide 6855 * whether the existence of strong options prevents the generation of 6856 * a plain filename. 6857 */ 6858 static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs) 6859 { 6860 bool found_any = false; 6861 const char *const *option_name = NULL; 6862 6863 if (!bs->drv) { 6864 return false; 6865 } 6866 6867 while ((option_name = strong_options(bs, option_name))) { 6868 bool option_given = false; 6869 6870 assert(strlen(*option_name) > 0); 6871 if ((*option_name)[strlen(*option_name) - 1] != '.') { 6872 QObject *entry = qdict_get(bs->options, *option_name); 6873 if (!entry) { 6874 continue; 6875 } 6876 6877 qdict_put_obj(d, *option_name, qobject_ref(entry)); 6878 option_given = true; 6879 } else { 6880 const QDictEntry *entry; 6881 for (entry = qdict_first(bs->options); entry; 6882 entry = qdict_next(bs->options, entry)) 6883 { 6884 if (strstart(qdict_entry_key(entry), *option_name, NULL)) { 6885 qdict_put_obj(d, qdict_entry_key(entry), 6886 qobject_ref(qdict_entry_value(entry))); 6887 option_given = true; 6888 } 6889 } 6890 } 6891 6892 /* While "driver" and "filename" need to be included in a JSON filename, 6893 * their existence does not prohibit generation of a plain filename. */ 6894 if (!found_any && option_given && 6895 strcmp(*option_name, "driver") && strcmp(*option_name, "filename")) 6896 { 6897 found_any = true; 6898 } 6899 } 6900 6901 if (!qdict_haskey(d, "driver")) { 6902 /* Drivers created with bdrv_new_open_driver() may not have a 6903 * @driver option. Add it here. */ 6904 qdict_put_str(d, "driver", bs->drv->format_name); 6905 } 6906 6907 return found_any; 6908 } 6909 6910 /* Note: This function may return false positives; it may return true 6911 * even if opening the backing file specified by bs's image header 6912 * would result in exactly bs->backing. */ 6913 bool bdrv_backing_overridden(BlockDriverState *bs) 6914 { 6915 if (bs->backing) { 6916 return strcmp(bs->auto_backing_file, 6917 bs->backing->bs->filename); 6918 } else { 6919 /* No backing BDS, so if the image header reports any backing 6920 * file, it must have been suppressed */ 6921 return bs->auto_backing_file[0] != '\0'; 6922 } 6923 } 6924 6925 /* Updates the following BDS fields: 6926 * - exact_filename: A filename which may be used for opening a block device 6927 * which (mostly) equals the given BDS (even without any 6928 * other options; so reading and writing must return the same 6929 * results, but caching etc. may be different) 6930 * - full_open_options: Options which, when given when opening a block device 6931 * (without a filename), result in a BDS (mostly) 6932 * equalling the given one 6933 * - filename: If exact_filename is set, it is copied here. Otherwise, 6934 * full_open_options is converted to a JSON object, prefixed with 6935 * "json:" (for use through the JSON pseudo protocol) and put here. 6936 */ 6937 void bdrv_refresh_filename(BlockDriverState *bs) 6938 { 6939 BlockDriver *drv = bs->drv; 6940 BdrvChild *child; 6941 BlockDriverState *primary_child_bs; 6942 QDict *opts; 6943 bool backing_overridden; 6944 bool generate_json_filename; /* Whether our default implementation should 6945 fill exact_filename (false) or not (true) */ 6946 6947 if (!drv) { 6948 return; 6949 } 6950 6951 /* This BDS's file name may depend on any of its children's file names, so 6952 * refresh those first */ 6953 QLIST_FOREACH(child, &bs->children, next) { 6954 bdrv_refresh_filename(child->bs); 6955 } 6956 6957 if (bs->implicit) { 6958 /* For implicit nodes, just copy everything from the single child */ 6959 child = QLIST_FIRST(&bs->children); 6960 assert(QLIST_NEXT(child, next) == NULL); 6961 6962 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), 6963 child->bs->exact_filename); 6964 pstrcpy(bs->filename, sizeof(bs->filename), child->bs->filename); 6965 6966 qobject_unref(bs->full_open_options); 6967 bs->full_open_options = qobject_ref(child->bs->full_open_options); 6968 6969 return; 6970 } 6971 6972 backing_overridden = bdrv_backing_overridden(bs); 6973 6974 if (bs->open_flags & BDRV_O_NO_IO) { 6975 /* Without I/O, the backing file does not change anything. 6976 * Therefore, in such a case (primarily qemu-img), we can 6977 * pretend the backing file has not been overridden even if 6978 * it technically has been. */ 6979 backing_overridden = false; 6980 } 6981 6982 /* Gather the options QDict */ 6983 opts = qdict_new(); 6984 generate_json_filename = append_strong_runtime_options(opts, bs); 6985 generate_json_filename |= backing_overridden; 6986 6987 if (drv->bdrv_gather_child_options) { 6988 /* Some block drivers may not want to present all of their children's 6989 * options, or name them differently from BdrvChild.name */ 6990 drv->bdrv_gather_child_options(bs, opts, backing_overridden); 6991 } else { 6992 QLIST_FOREACH(child, &bs->children, next) { 6993 if (child == bs->backing && !backing_overridden) { 6994 /* We can skip the backing BDS if it has not been overridden */ 6995 continue; 6996 } 6997 6998 qdict_put(opts, child->name, 6999 qobject_ref(child->bs->full_open_options)); 7000 } 7001 7002 if (backing_overridden && !bs->backing) { 7003 /* Force no backing file */ 7004 qdict_put_null(opts, "backing"); 7005 } 7006 } 7007 7008 qobject_unref(bs->full_open_options); 7009 bs->full_open_options = opts; 7010 7011 primary_child_bs = bdrv_primary_bs(bs); 7012 7013 if (drv->bdrv_refresh_filename) { 7014 /* Obsolete information is of no use here, so drop the old file name 7015 * information before refreshing it */ 7016 bs->exact_filename[0] = '\0'; 7017 7018 drv->bdrv_refresh_filename(bs); 7019 } else if (primary_child_bs) { 7020 /* 7021 * Try to reconstruct valid information from the underlying 7022 * file -- this only works for format nodes (filter nodes 7023 * cannot be probed and as such must be selected by the user 7024 * either through an options dict, or through a special 7025 * filename which the filter driver must construct in its 7026 * .bdrv_refresh_filename() implementation). 7027 */ 7028 7029 bs->exact_filename[0] = '\0'; 7030 7031 /* 7032 * We can use the underlying file's filename if: 7033 * - it has a filename, 7034 * - the current BDS is not a filter, 7035 * - the file is a protocol BDS, and 7036 * - opening that file (as this BDS's format) will automatically create 7037 * the BDS tree we have right now, that is: 7038 * - the user did not significantly change this BDS's behavior with 7039 * some explicit (strong) options 7040 * - no non-file child of this BDS has been overridden by the user 7041 * Both of these conditions are represented by generate_json_filename. 7042 */ 7043 if (primary_child_bs->exact_filename[0] && 7044 primary_child_bs->drv->bdrv_file_open && 7045 !drv->is_filter && !generate_json_filename) 7046 { 7047 strcpy(bs->exact_filename, primary_child_bs->exact_filename); 7048 } 7049 } 7050 7051 if (bs->exact_filename[0]) { 7052 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 7053 } else { 7054 GString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 7055 if (snprintf(bs->filename, sizeof(bs->filename), "json:%s", 7056 json->str) >= sizeof(bs->filename)) { 7057 /* Give user a hint if we truncated things. */ 7058 strcpy(bs->filename + sizeof(bs->filename) - 4, "..."); 7059 } 7060 g_string_free(json, true); 7061 } 7062 } 7063 7064 char *bdrv_dirname(BlockDriverState *bs, Error **errp) 7065 { 7066 BlockDriver *drv = bs->drv; 7067 BlockDriverState *child_bs; 7068 7069 if (!drv) { 7070 error_setg(errp, "Node '%s' is ejected", bs->node_name); 7071 return NULL; 7072 } 7073 7074 if (drv->bdrv_dirname) { 7075 return drv->bdrv_dirname(bs, errp); 7076 } 7077 7078 child_bs = bdrv_primary_bs(bs); 7079 if (child_bs) { 7080 return bdrv_dirname(child_bs, errp); 7081 } 7082 7083 bdrv_refresh_filename(bs); 7084 if (bs->exact_filename[0] != '\0') { 7085 return path_combine(bs->exact_filename, ""); 7086 } 7087 7088 error_setg(errp, "Cannot generate a base directory for %s nodes", 7089 drv->format_name); 7090 return NULL; 7091 } 7092 7093 /* 7094 * Hot add/remove a BDS's child. So the user can take a child offline when 7095 * it is broken and take a new child online 7096 */ 7097 void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, 7098 Error **errp) 7099 { 7100 7101 if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) { 7102 error_setg(errp, "The node %s does not support adding a child", 7103 bdrv_get_device_or_node_name(parent_bs)); 7104 return; 7105 } 7106 7107 if (!QLIST_EMPTY(&child_bs->parents)) { 7108 error_setg(errp, "The node %s already has a parent", 7109 child_bs->node_name); 7110 return; 7111 } 7112 7113 parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp); 7114 } 7115 7116 void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp) 7117 { 7118 BdrvChild *tmp; 7119 7120 if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) { 7121 error_setg(errp, "The node %s does not support removing a child", 7122 bdrv_get_device_or_node_name(parent_bs)); 7123 return; 7124 } 7125 7126 QLIST_FOREACH(tmp, &parent_bs->children, next) { 7127 if (tmp == child) { 7128 break; 7129 } 7130 } 7131 7132 if (!tmp) { 7133 error_setg(errp, "The node %s does not have a child named %s", 7134 bdrv_get_device_or_node_name(parent_bs), 7135 bdrv_get_device_or_node_name(child->bs)); 7136 return; 7137 } 7138 7139 parent_bs->drv->bdrv_del_child(parent_bs, child, errp); 7140 } 7141 7142 int bdrv_make_empty(BdrvChild *c, Error **errp) 7143 { 7144 BlockDriver *drv = c->bs->drv; 7145 int ret; 7146 7147 assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)); 7148 7149 if (!drv->bdrv_make_empty) { 7150 error_setg(errp, "%s does not support emptying nodes", 7151 drv->format_name); 7152 return -ENOTSUP; 7153 } 7154 7155 ret = drv->bdrv_make_empty(c->bs); 7156 if (ret < 0) { 7157 error_setg_errno(errp, -ret, "Failed to empty %s", 7158 c->bs->filename); 7159 return ret; 7160 } 7161 7162 return 0; 7163 } 7164 7165 /* 7166 * Return the child that @bs acts as an overlay for, and from which data may be 7167 * copied in COW or COR operations. Usually this is the backing file. 7168 */ 7169 BdrvChild *bdrv_cow_child(BlockDriverState *bs) 7170 { 7171 if (!bs || !bs->drv) { 7172 return NULL; 7173 } 7174 7175 if (bs->drv->is_filter) { 7176 return NULL; 7177 } 7178 7179 if (!bs->backing) { 7180 return NULL; 7181 } 7182 7183 assert(bs->backing->role & BDRV_CHILD_COW); 7184 return bs->backing; 7185 } 7186 7187 /* 7188 * If @bs acts as a filter for exactly one of its children, return 7189 * that child. 7190 */ 7191 BdrvChild *bdrv_filter_child(BlockDriverState *bs) 7192 { 7193 BdrvChild *c; 7194 7195 if (!bs || !bs->drv) { 7196 return NULL; 7197 } 7198 7199 if (!bs->drv->is_filter) { 7200 return NULL; 7201 } 7202 7203 /* Only one of @backing or @file may be used */ 7204 assert(!(bs->backing && bs->file)); 7205 7206 c = bs->backing ?: bs->file; 7207 if (!c) { 7208 return NULL; 7209 } 7210 7211 assert(c->role & BDRV_CHILD_FILTERED); 7212 return c; 7213 } 7214 7215 /* 7216 * Return either the result of bdrv_cow_child() or bdrv_filter_child(), 7217 * whichever is non-NULL. 7218 * 7219 * Return NULL if both are NULL. 7220 */ 7221 BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs) 7222 { 7223 BdrvChild *cow_child = bdrv_cow_child(bs); 7224 BdrvChild *filter_child = bdrv_filter_child(bs); 7225 7226 /* Filter nodes cannot have COW backing files */ 7227 assert(!(cow_child && filter_child)); 7228 7229 return cow_child ?: filter_child; 7230 } 7231 7232 /* 7233 * Return the primary child of this node: For filters, that is the 7234 * filtered child. For other nodes, that is usually the child storing 7235 * metadata. 7236 * (A generally more helpful description is that this is (usually) the 7237 * child that has the same filename as @bs.) 7238 * 7239 * Drivers do not necessarily have a primary child; for example quorum 7240 * does not. 7241 */ 7242 BdrvChild *bdrv_primary_child(BlockDriverState *bs) 7243 { 7244 BdrvChild *c, *found = NULL; 7245 7246 QLIST_FOREACH(c, &bs->children, next) { 7247 if (c->role & BDRV_CHILD_PRIMARY) { 7248 assert(!found); 7249 found = c; 7250 } 7251 } 7252 7253 return found; 7254 } 7255 7256 static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs, 7257 bool stop_on_explicit_filter) 7258 { 7259 BdrvChild *c; 7260 7261 if (!bs) { 7262 return NULL; 7263 } 7264 7265 while (!(stop_on_explicit_filter && !bs->implicit)) { 7266 c = bdrv_filter_child(bs); 7267 if (!c) { 7268 /* 7269 * A filter that is embedded in a working block graph must 7270 * have a child. Assert this here so this function does 7271 * not return a filter node that is not expected by the 7272 * caller. 7273 */ 7274 assert(!bs->drv || !bs->drv->is_filter); 7275 break; 7276 } 7277 bs = c->bs; 7278 } 7279 /* 7280 * Note that this treats nodes with bs->drv == NULL as not being 7281 * filters (bs->drv == NULL should be replaced by something else 7282 * anyway). 7283 * The advantage of this behavior is that this function will thus 7284 * always return a non-NULL value (given a non-NULL @bs). 7285 */ 7286 7287 return bs; 7288 } 7289 7290 /* 7291 * Return the first BDS that has not been added implicitly or that 7292 * does not have a filtered child down the chain starting from @bs 7293 * (including @bs itself). 7294 */ 7295 BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs) 7296 { 7297 return bdrv_do_skip_filters(bs, true); 7298 } 7299 7300 /* 7301 * Return the first BDS that does not have a filtered child down the 7302 * chain starting from @bs (including @bs itself). 7303 */ 7304 BlockDriverState *bdrv_skip_filters(BlockDriverState *bs) 7305 { 7306 return bdrv_do_skip_filters(bs, false); 7307 } 7308 7309 /* 7310 * For a backing chain, return the first non-filter backing image of 7311 * the first non-filter image. 7312 */ 7313 BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs) 7314 { 7315 return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs))); 7316 } 7317