1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "block/trace.h" 27 #include "block/block_int.h" 28 #include "block/blockjob.h" 29 #include "block/nbd.h" 30 #include "qemu/error-report.h" 31 #include "module_block.h" 32 #include "qemu/module.h" 33 #include "qapi/error.h" 34 #include "qapi/qmp/qdict.h" 35 #include "qapi/qmp/qjson.h" 36 #include "qapi/qmp/qstring.h" 37 #include "sysemu/block-backend.h" 38 #include "sysemu/sysemu.h" 39 #include "qemu/notify.h" 40 #include "qemu/option.h" 41 #include "qemu/coroutine.h" 42 #include "block/qapi.h" 43 #include "qemu/timer.h" 44 #include "qemu/cutils.h" 45 #include "qemu/id.h" 46 47 #ifdef CONFIG_BSD 48 #include <sys/ioctl.h> 49 #include <sys/queue.h> 50 #ifndef __DragonFly__ 51 #include <sys/disk.h> 52 #endif 53 #endif 54 55 #ifdef _WIN32 56 #include <windows.h> 57 #endif 58 59 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 60 61 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 62 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 63 64 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states = 65 QTAILQ_HEAD_INITIALIZER(all_bdrv_states); 66 67 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 68 QLIST_HEAD_INITIALIZER(bdrv_drivers); 69 70 static BlockDriverState *bdrv_open_inherit(const char *filename, 71 const char *reference, 72 QDict *options, int flags, 73 BlockDriverState *parent, 74 const BdrvChildRole *child_role, 75 Error **errp); 76 77 /* If non-zero, use only whitelisted block drivers */ 78 static int use_bdrv_whitelist; 79 80 #ifdef _WIN32 81 static int is_windows_drive_prefix(const char *filename) 82 { 83 return (((filename[0] >= 'a' && filename[0] <= 'z') || 84 (filename[0] >= 'A' && filename[0] <= 'Z')) && 85 filename[1] == ':'); 86 } 87 88 int is_windows_drive(const char *filename) 89 { 90 if (is_windows_drive_prefix(filename) && 91 filename[2] == '\0') 92 return 1; 93 if (strstart(filename, "\\\\.\\", NULL) || 94 strstart(filename, "//./", NULL)) 95 return 1; 96 return 0; 97 } 98 #endif 99 100 size_t bdrv_opt_mem_align(BlockDriverState *bs) 101 { 102 if (!bs || !bs->drv) { 103 /* page size or 4k (hdd sector size) should be on the safe side */ 104 return MAX(4096, getpagesize()); 105 } 106 107 return bs->bl.opt_mem_alignment; 108 } 109 110 size_t bdrv_min_mem_align(BlockDriverState *bs) 111 { 112 if (!bs || !bs->drv) { 113 /* page size or 4k (hdd sector size) should be on the safe side */ 114 return MAX(4096, getpagesize()); 115 } 116 117 return bs->bl.min_mem_alignment; 118 } 119 120 /* check if the path starts with "<protocol>:" */ 121 int path_has_protocol(const char *path) 122 { 123 const char *p; 124 125 #ifdef _WIN32 126 if (is_windows_drive(path) || 127 is_windows_drive_prefix(path)) { 128 return 0; 129 } 130 p = path + strcspn(path, ":/\\"); 131 #else 132 p = path + strcspn(path, ":/"); 133 #endif 134 135 return *p == ':'; 136 } 137 138 int path_is_absolute(const char *path) 139 { 140 #ifdef _WIN32 141 /* specific case for names like: "\\.\d:" */ 142 if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 143 return 1; 144 } 145 return (*path == '/' || *path == '\\'); 146 #else 147 return (*path == '/'); 148 #endif 149 } 150 151 /* if filename is absolute, just copy it to dest. Otherwise, build a 152 path to it by considering it is relative to base_path. URL are 153 supported. */ 154 void path_combine(char *dest, int dest_size, 155 const char *base_path, 156 const char *filename) 157 { 158 const char *p, *p1; 159 int len; 160 161 if (dest_size <= 0) 162 return; 163 if (path_is_absolute(filename)) { 164 pstrcpy(dest, dest_size, filename); 165 } else { 166 const char *protocol_stripped = NULL; 167 168 if (path_has_protocol(base_path)) { 169 protocol_stripped = strchr(base_path, ':'); 170 if (protocol_stripped) { 171 protocol_stripped++; 172 } 173 } 174 p = protocol_stripped ?: base_path; 175 176 p1 = strrchr(base_path, '/'); 177 #ifdef _WIN32 178 { 179 const char *p2; 180 p2 = strrchr(base_path, '\\'); 181 if (!p1 || p2 > p1) 182 p1 = p2; 183 } 184 #endif 185 if (p1) 186 p1++; 187 else 188 p1 = base_path; 189 if (p1 > p) 190 p = p1; 191 len = p - base_path; 192 if (len > dest_size - 1) 193 len = dest_size - 1; 194 memcpy(dest, base_path, len); 195 dest[len] = '\0'; 196 pstrcat(dest, dest_size, filename); 197 } 198 } 199 200 /* 201 * Helper function for bdrv_parse_filename() implementations to remove optional 202 * protocol prefixes (especially "file:") from a filename and for putting the 203 * stripped filename into the options QDict if there is such a prefix. 204 */ 205 void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, 206 QDict *options) 207 { 208 if (strstart(filename, prefix, &filename)) { 209 /* Stripping the explicit protocol prefix may result in a protocol 210 * prefix being (wrongly) detected (if the filename contains a colon) */ 211 if (path_has_protocol(filename)) { 212 QString *fat_filename; 213 214 /* This means there is some colon before the first slash; therefore, 215 * this cannot be an absolute path */ 216 assert(!path_is_absolute(filename)); 217 218 /* And we can thus fix the protocol detection issue by prefixing it 219 * by "./" */ 220 fat_filename = qstring_from_str("./"); 221 qstring_append(fat_filename, filename); 222 223 assert(!path_has_protocol(qstring_get_str(fat_filename))); 224 225 qdict_put(options, "filename", fat_filename); 226 } else { 227 /* If no protocol prefix was detected, we can use the shortened 228 * filename as-is */ 229 qdict_put_str(options, "filename", filename); 230 } 231 } 232 } 233 234 235 /* Returns whether the image file is opened as read-only. Note that this can 236 * return false and writing to the image file is still not possible because the 237 * image is inactivated. */ 238 bool bdrv_is_read_only(BlockDriverState *bs) 239 { 240 return bs->read_only; 241 } 242 243 int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, 244 bool ignore_allow_rdw, Error **errp) 245 { 246 /* Do not set read_only if copy_on_read is enabled */ 247 if (bs->copy_on_read && read_only) { 248 error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled", 249 bdrv_get_device_or_node_name(bs)); 250 return -EINVAL; 251 } 252 253 /* Do not clear read_only if it is prohibited */ 254 if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) && 255 !ignore_allow_rdw) 256 { 257 error_setg(errp, "Node '%s' is read only", 258 bdrv_get_device_or_node_name(bs)); 259 return -EPERM; 260 } 261 262 return 0; 263 } 264 265 /* TODO Remove (deprecated since 2.11) 266 * Block drivers are not supposed to automatically change bs->read_only. 267 * Instead, they should just check whether they can provide what the user 268 * explicitly requested and error out if read-write is requested, but they can 269 * only provide read-only access. */ 270 int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp) 271 { 272 int ret = 0; 273 274 ret = bdrv_can_set_read_only(bs, read_only, false, errp); 275 if (ret < 0) { 276 return ret; 277 } 278 279 bs->read_only = read_only; 280 return 0; 281 } 282 283 void bdrv_get_full_backing_filename_from_filename(const char *backed, 284 const char *backing, 285 char *dest, size_t sz, 286 Error **errp) 287 { 288 if (backing[0] == '\0' || path_has_protocol(backing) || 289 path_is_absolute(backing)) 290 { 291 pstrcpy(dest, sz, backing); 292 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 293 error_setg(errp, "Cannot use relative backing file names for '%s'", 294 backed); 295 } else { 296 path_combine(dest, sz, backed, backing); 297 } 298 } 299 300 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz, 301 Error **errp) 302 { 303 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename; 304 305 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file, 306 dest, sz, errp); 307 } 308 309 void bdrv_register(BlockDriver *bdrv) 310 { 311 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 312 } 313 314 BlockDriverState *bdrv_new(void) 315 { 316 BlockDriverState *bs; 317 int i; 318 319 bs = g_new0(BlockDriverState, 1); 320 QLIST_INIT(&bs->dirty_bitmaps); 321 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 322 QLIST_INIT(&bs->op_blockers[i]); 323 } 324 notifier_with_return_list_init(&bs->before_write_notifiers); 325 qemu_co_mutex_init(&bs->reqs_lock); 326 qemu_mutex_init(&bs->dirty_bitmap_mutex); 327 bs->refcnt = 1; 328 bs->aio_context = qemu_get_aio_context(); 329 330 qemu_co_queue_init(&bs->flush_queue); 331 332 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list); 333 334 return bs; 335 } 336 337 static BlockDriver *bdrv_do_find_format(const char *format_name) 338 { 339 BlockDriver *drv1; 340 341 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 342 if (!strcmp(drv1->format_name, format_name)) { 343 return drv1; 344 } 345 } 346 347 return NULL; 348 } 349 350 BlockDriver *bdrv_find_format(const char *format_name) 351 { 352 BlockDriver *drv1; 353 int i; 354 355 drv1 = bdrv_do_find_format(format_name); 356 if (drv1) { 357 return drv1; 358 } 359 360 /* The driver isn't registered, maybe we need to load a module */ 361 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { 362 if (!strcmp(block_driver_modules[i].format_name, format_name)) { 363 block_module_load_one(block_driver_modules[i].library_name); 364 break; 365 } 366 } 367 368 return bdrv_do_find_format(format_name); 369 } 370 371 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 372 { 373 static const char *whitelist_rw[] = { 374 CONFIG_BDRV_RW_WHITELIST 375 }; 376 static const char *whitelist_ro[] = { 377 CONFIG_BDRV_RO_WHITELIST 378 }; 379 const char **p; 380 381 if (!whitelist_rw[0] && !whitelist_ro[0]) { 382 return 1; /* no whitelist, anything goes */ 383 } 384 385 for (p = whitelist_rw; *p; p++) { 386 if (!strcmp(drv->format_name, *p)) { 387 return 1; 388 } 389 } 390 if (read_only) { 391 for (p = whitelist_ro; *p; p++) { 392 if (!strcmp(drv->format_name, *p)) { 393 return 1; 394 } 395 } 396 } 397 return 0; 398 } 399 400 bool bdrv_uses_whitelist(void) 401 { 402 return use_bdrv_whitelist; 403 } 404 405 typedef struct CreateCo { 406 BlockDriver *drv; 407 char *filename; 408 QemuOpts *opts; 409 int ret; 410 Error *err; 411 } CreateCo; 412 413 static void coroutine_fn bdrv_create_co_entry(void *opaque) 414 { 415 Error *local_err = NULL; 416 int ret; 417 418 CreateCo *cco = opaque; 419 assert(cco->drv); 420 421 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err); 422 error_propagate(&cco->err, local_err); 423 cco->ret = ret; 424 } 425 426 int bdrv_create(BlockDriver *drv, const char* filename, 427 QemuOpts *opts, Error **errp) 428 { 429 int ret; 430 431 Coroutine *co; 432 CreateCo cco = { 433 .drv = drv, 434 .filename = g_strdup(filename), 435 .opts = opts, 436 .ret = NOT_DONE, 437 .err = NULL, 438 }; 439 440 if (!drv->bdrv_create) { 441 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 442 ret = -ENOTSUP; 443 goto out; 444 } 445 446 if (qemu_in_coroutine()) { 447 /* Fast-path if already in coroutine context */ 448 bdrv_create_co_entry(&cco); 449 } else { 450 co = qemu_coroutine_create(bdrv_create_co_entry, &cco); 451 qemu_coroutine_enter(co); 452 while (cco.ret == NOT_DONE) { 453 aio_poll(qemu_get_aio_context(), true); 454 } 455 } 456 457 ret = cco.ret; 458 if (ret < 0) { 459 if (cco.err) { 460 error_propagate(errp, cco.err); 461 } else { 462 error_setg_errno(errp, -ret, "Could not create image"); 463 } 464 } 465 466 out: 467 g_free(cco.filename); 468 return ret; 469 } 470 471 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 472 { 473 BlockDriver *drv; 474 Error *local_err = NULL; 475 int ret; 476 477 drv = bdrv_find_protocol(filename, true, errp); 478 if (drv == NULL) { 479 return -ENOENT; 480 } 481 482 ret = bdrv_create(drv, filename, opts, &local_err); 483 error_propagate(errp, local_err); 484 return ret; 485 } 486 487 /** 488 * Try to get @bs's logical and physical block size. 489 * On success, store them in @bsz struct and return 0. 490 * On failure return -errno. 491 * @bs must not be empty. 492 */ 493 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 494 { 495 BlockDriver *drv = bs->drv; 496 497 if (drv && drv->bdrv_probe_blocksizes) { 498 return drv->bdrv_probe_blocksizes(bs, bsz); 499 } else if (drv && drv->is_filter && bs->file) { 500 return bdrv_probe_blocksizes(bs->file->bs, bsz); 501 } 502 503 return -ENOTSUP; 504 } 505 506 /** 507 * Try to get @bs's geometry (cyls, heads, sectors). 508 * On success, store them in @geo struct and return 0. 509 * On failure return -errno. 510 * @bs must not be empty. 511 */ 512 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 513 { 514 BlockDriver *drv = bs->drv; 515 516 if (drv && drv->bdrv_probe_geometry) { 517 return drv->bdrv_probe_geometry(bs, geo); 518 } else if (drv && drv->is_filter && bs->file) { 519 return bdrv_probe_geometry(bs->file->bs, geo); 520 } 521 522 return -ENOTSUP; 523 } 524 525 /* 526 * Create a uniquely-named empty temporary file. 527 * Return 0 upon success, otherwise a negative errno value. 528 */ 529 int get_tmp_filename(char *filename, int size) 530 { 531 #ifdef _WIN32 532 char temp_dir[MAX_PATH]; 533 /* GetTempFileName requires that its output buffer (4th param) 534 have length MAX_PATH or greater. */ 535 assert(size >= MAX_PATH); 536 return (GetTempPath(MAX_PATH, temp_dir) 537 && GetTempFileName(temp_dir, "qem", 0, filename) 538 ? 0 : -GetLastError()); 539 #else 540 int fd; 541 const char *tmpdir; 542 tmpdir = getenv("TMPDIR"); 543 if (!tmpdir) { 544 tmpdir = "/var/tmp"; 545 } 546 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 547 return -EOVERFLOW; 548 } 549 fd = mkstemp(filename); 550 if (fd < 0) { 551 return -errno; 552 } 553 if (close(fd) != 0) { 554 unlink(filename); 555 return -errno; 556 } 557 return 0; 558 #endif 559 } 560 561 /* 562 * Detect host devices. By convention, /dev/cdrom[N] is always 563 * recognized as a host CDROM. 564 */ 565 static BlockDriver *find_hdev_driver(const char *filename) 566 { 567 int score_max = 0, score; 568 BlockDriver *drv = NULL, *d; 569 570 QLIST_FOREACH(d, &bdrv_drivers, list) { 571 if (d->bdrv_probe_device) { 572 score = d->bdrv_probe_device(filename); 573 if (score > score_max) { 574 score_max = score; 575 drv = d; 576 } 577 } 578 } 579 580 return drv; 581 } 582 583 static BlockDriver *bdrv_do_find_protocol(const char *protocol) 584 { 585 BlockDriver *drv1; 586 587 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 588 if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) { 589 return drv1; 590 } 591 } 592 593 return NULL; 594 } 595 596 BlockDriver *bdrv_find_protocol(const char *filename, 597 bool allow_protocol_prefix, 598 Error **errp) 599 { 600 BlockDriver *drv1; 601 char protocol[128]; 602 int len; 603 const char *p; 604 int i; 605 606 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 607 608 /* 609 * XXX(hch): we really should not let host device detection 610 * override an explicit protocol specification, but moving this 611 * later breaks access to device names with colons in them. 612 * Thanks to the brain-dead persistent naming schemes on udev- 613 * based Linux systems those actually are quite common. 614 */ 615 drv1 = find_hdev_driver(filename); 616 if (drv1) { 617 return drv1; 618 } 619 620 if (!path_has_protocol(filename) || !allow_protocol_prefix) { 621 return &bdrv_file; 622 } 623 624 p = strchr(filename, ':'); 625 assert(p != NULL); 626 len = p - filename; 627 if (len > sizeof(protocol) - 1) 628 len = sizeof(protocol) - 1; 629 memcpy(protocol, filename, len); 630 protocol[len] = '\0'; 631 632 drv1 = bdrv_do_find_protocol(protocol); 633 if (drv1) { 634 return drv1; 635 } 636 637 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { 638 if (block_driver_modules[i].protocol_name && 639 !strcmp(block_driver_modules[i].protocol_name, protocol)) { 640 block_module_load_one(block_driver_modules[i].library_name); 641 break; 642 } 643 } 644 645 drv1 = bdrv_do_find_protocol(protocol); 646 if (!drv1) { 647 error_setg(errp, "Unknown protocol '%s'", protocol); 648 } 649 return drv1; 650 } 651 652 /* 653 * Guess image format by probing its contents. 654 * This is not a good idea when your image is raw (CVE-2008-2004), but 655 * we do it anyway for backward compatibility. 656 * 657 * @buf contains the image's first @buf_size bytes. 658 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 659 * but can be smaller if the image file is smaller) 660 * @filename is its filename. 661 * 662 * For all block drivers, call the bdrv_probe() method to get its 663 * probing score. 664 * Return the first block driver with the highest probing score. 665 */ 666 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 667 const char *filename) 668 { 669 int score_max = 0, score; 670 BlockDriver *drv = NULL, *d; 671 672 QLIST_FOREACH(d, &bdrv_drivers, list) { 673 if (d->bdrv_probe) { 674 score = d->bdrv_probe(buf, buf_size, filename); 675 if (score > score_max) { 676 score_max = score; 677 drv = d; 678 } 679 } 680 } 681 682 return drv; 683 } 684 685 static int find_image_format(BlockBackend *file, const char *filename, 686 BlockDriver **pdrv, Error **errp) 687 { 688 BlockDriver *drv; 689 uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 690 int ret = 0; 691 692 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 693 if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) { 694 *pdrv = &bdrv_raw; 695 return ret; 696 } 697 698 ret = blk_pread(file, 0, buf, sizeof(buf)); 699 if (ret < 0) { 700 error_setg_errno(errp, -ret, "Could not read image for determining its " 701 "format"); 702 *pdrv = NULL; 703 return ret; 704 } 705 706 drv = bdrv_probe_all(buf, ret, filename); 707 if (!drv) { 708 error_setg(errp, "Could not determine image format: No compatible " 709 "driver found"); 710 ret = -ENOENT; 711 } 712 *pdrv = drv; 713 return ret; 714 } 715 716 /** 717 * Set the current 'total_sectors' value 718 * Return 0 on success, -errno on error. 719 */ 720 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 721 { 722 BlockDriver *drv = bs->drv; 723 724 if (!drv) { 725 return -ENOMEDIUM; 726 } 727 728 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 729 if (bdrv_is_sg(bs)) 730 return 0; 731 732 /* query actual device if possible, otherwise just trust the hint */ 733 if (drv->bdrv_getlength) { 734 int64_t length = drv->bdrv_getlength(bs); 735 if (length < 0) { 736 return length; 737 } 738 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 739 } 740 741 bs->total_sectors = hint; 742 return 0; 743 } 744 745 /** 746 * Combines a QDict of new block driver @options with any missing options taken 747 * from @old_options, so that leaving out an option defaults to its old value. 748 */ 749 static void bdrv_join_options(BlockDriverState *bs, QDict *options, 750 QDict *old_options) 751 { 752 if (bs->drv && bs->drv->bdrv_join_options) { 753 bs->drv->bdrv_join_options(options, old_options); 754 } else { 755 qdict_join(options, old_options, false); 756 } 757 } 758 759 /** 760 * Set open flags for a given discard mode 761 * 762 * Return 0 on success, -1 if the discard mode was invalid. 763 */ 764 int bdrv_parse_discard_flags(const char *mode, int *flags) 765 { 766 *flags &= ~BDRV_O_UNMAP; 767 768 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 769 /* do nothing */ 770 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 771 *flags |= BDRV_O_UNMAP; 772 } else { 773 return -1; 774 } 775 776 return 0; 777 } 778 779 /** 780 * Set open flags for a given cache mode 781 * 782 * Return 0 on success, -1 if the cache mode was invalid. 783 */ 784 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) 785 { 786 *flags &= ~BDRV_O_CACHE_MASK; 787 788 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 789 *writethrough = false; 790 *flags |= BDRV_O_NOCACHE; 791 } else if (!strcmp(mode, "directsync")) { 792 *writethrough = true; 793 *flags |= BDRV_O_NOCACHE; 794 } else if (!strcmp(mode, "writeback")) { 795 *writethrough = false; 796 } else if (!strcmp(mode, "unsafe")) { 797 *writethrough = false; 798 *flags |= BDRV_O_NO_FLUSH; 799 } else if (!strcmp(mode, "writethrough")) { 800 *writethrough = true; 801 } else { 802 return -1; 803 } 804 805 return 0; 806 } 807 808 static char *bdrv_child_get_parent_desc(BdrvChild *c) 809 { 810 BlockDriverState *parent = c->opaque; 811 return g_strdup(bdrv_get_device_or_node_name(parent)); 812 } 813 814 static void bdrv_child_cb_drained_begin(BdrvChild *child) 815 { 816 BlockDriverState *bs = child->opaque; 817 bdrv_drained_begin(bs); 818 } 819 820 static void bdrv_child_cb_drained_end(BdrvChild *child) 821 { 822 BlockDriverState *bs = child->opaque; 823 bdrv_drained_end(bs); 824 } 825 826 static void bdrv_child_cb_attach(BdrvChild *child) 827 { 828 BlockDriverState *bs = child->opaque; 829 bdrv_apply_subtree_drain(child, bs); 830 } 831 832 static void bdrv_child_cb_detach(BdrvChild *child) 833 { 834 BlockDriverState *bs = child->opaque; 835 bdrv_unapply_subtree_drain(child, bs); 836 } 837 838 static int bdrv_child_cb_inactivate(BdrvChild *child) 839 { 840 BlockDriverState *bs = child->opaque; 841 assert(bs->open_flags & BDRV_O_INACTIVE); 842 return 0; 843 } 844 845 /* 846 * Returns the options and flags that a temporary snapshot should get, based on 847 * the originally requested flags (the originally requested image will have 848 * flags like a backing file) 849 */ 850 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, 851 int parent_flags, QDict *parent_options) 852 { 853 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 854 855 /* For temporary files, unconditional cache=unsafe is fine */ 856 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); 857 qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); 858 859 /* Copy the read-only option from the parent */ 860 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); 861 862 /* aio=native doesn't work for cache.direct=off, so disable it for the 863 * temporary snapshot */ 864 *child_flags &= ~BDRV_O_NATIVE_AIO; 865 } 866 867 /* 868 * Returns the options and flags that bs->file should get if a protocol driver 869 * is expected, based on the given options and flags for the parent BDS 870 */ 871 static void bdrv_inherited_options(int *child_flags, QDict *child_options, 872 int parent_flags, QDict *parent_options) 873 { 874 int flags = parent_flags; 875 876 /* Enable protocol handling, disable format probing for bs->file */ 877 flags |= BDRV_O_PROTOCOL; 878 879 /* If the cache mode isn't explicitly set, inherit direct and no-flush from 880 * the parent. */ 881 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); 882 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); 883 qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE); 884 885 /* Inherit the read-only option from the parent if it's not set */ 886 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); 887 888 /* Our block drivers take care to send flushes and respect unmap policy, 889 * so we can default to enable both on lower layers regardless of the 890 * corresponding parent options. */ 891 qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap"); 892 893 /* Clear flags that only apply to the top layer */ 894 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ | 895 BDRV_O_NO_IO); 896 897 *child_flags = flags; 898 } 899 900 const BdrvChildRole child_file = { 901 .get_parent_desc = bdrv_child_get_parent_desc, 902 .inherit_options = bdrv_inherited_options, 903 .drained_begin = bdrv_child_cb_drained_begin, 904 .drained_end = bdrv_child_cb_drained_end, 905 .attach = bdrv_child_cb_attach, 906 .detach = bdrv_child_cb_detach, 907 .inactivate = bdrv_child_cb_inactivate, 908 }; 909 910 /* 911 * Returns the options and flags that bs->file should get if the use of formats 912 * (and not only protocols) is permitted for it, based on the given options and 913 * flags for the parent BDS 914 */ 915 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options, 916 int parent_flags, QDict *parent_options) 917 { 918 child_file.inherit_options(child_flags, child_options, 919 parent_flags, parent_options); 920 921 *child_flags &= ~(BDRV_O_PROTOCOL | BDRV_O_NO_IO); 922 } 923 924 const BdrvChildRole child_format = { 925 .get_parent_desc = bdrv_child_get_parent_desc, 926 .inherit_options = bdrv_inherited_fmt_options, 927 .drained_begin = bdrv_child_cb_drained_begin, 928 .drained_end = bdrv_child_cb_drained_end, 929 .attach = bdrv_child_cb_attach, 930 .detach = bdrv_child_cb_detach, 931 .inactivate = bdrv_child_cb_inactivate, 932 }; 933 934 static void bdrv_backing_attach(BdrvChild *c) 935 { 936 BlockDriverState *parent = c->opaque; 937 BlockDriverState *backing_hd = c->bs; 938 939 assert(!parent->backing_blocker); 940 error_setg(&parent->backing_blocker, 941 "node is used as backing hd of '%s'", 942 bdrv_get_device_or_node_name(parent)); 943 944 parent->open_flags &= ~BDRV_O_NO_BACKING; 945 pstrcpy(parent->backing_file, sizeof(parent->backing_file), 946 backing_hd->filename); 947 pstrcpy(parent->backing_format, sizeof(parent->backing_format), 948 backing_hd->drv ? backing_hd->drv->format_name : ""); 949 950 bdrv_op_block_all(backing_hd, parent->backing_blocker); 951 /* Otherwise we won't be able to commit or stream */ 952 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 953 parent->backing_blocker); 954 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, 955 parent->backing_blocker); 956 /* 957 * We do backup in 3 ways: 958 * 1. drive backup 959 * The target bs is new opened, and the source is top BDS 960 * 2. blockdev backup 961 * Both the source and the target are top BDSes. 962 * 3. internal backup(used for block replication) 963 * Both the source and the target are backing file 964 * 965 * In case 1 and 2, neither the source nor the target is the backing file. 966 * In case 3, we will block the top BDS, so there is only one block job 967 * for the top BDS and its backing chain. 968 */ 969 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, 970 parent->backing_blocker); 971 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, 972 parent->backing_blocker); 973 974 bdrv_child_cb_attach(c); 975 } 976 977 static void bdrv_backing_detach(BdrvChild *c) 978 { 979 BlockDriverState *parent = c->opaque; 980 981 assert(parent->backing_blocker); 982 bdrv_op_unblock_all(c->bs, parent->backing_blocker); 983 error_free(parent->backing_blocker); 984 parent->backing_blocker = NULL; 985 986 bdrv_child_cb_detach(c); 987 } 988 989 /* 990 * Returns the options and flags that bs->backing should get, based on the 991 * given options and flags for the parent BDS 992 */ 993 static void bdrv_backing_options(int *child_flags, QDict *child_options, 994 int parent_flags, QDict *parent_options) 995 { 996 int flags = parent_flags; 997 998 /* The cache mode is inherited unmodified for backing files; except WCE, 999 * which is only applied on the top level (BlockBackend) */ 1000 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); 1001 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); 1002 qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE); 1003 1004 /* backing files always opened read-only */ 1005 qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on"); 1006 flags &= ~BDRV_O_COPY_ON_READ; 1007 1008 /* snapshot=on is handled on the top layer */ 1009 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY); 1010 1011 *child_flags = flags; 1012 } 1013 1014 static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, 1015 const char *filename, Error **errp) 1016 { 1017 BlockDriverState *parent = c->opaque; 1018 int orig_flags = bdrv_get_flags(parent); 1019 int ret; 1020 1021 if (!(orig_flags & BDRV_O_RDWR)) { 1022 ret = bdrv_reopen(parent, orig_flags | BDRV_O_RDWR, errp); 1023 if (ret < 0) { 1024 return ret; 1025 } 1026 } 1027 1028 ret = bdrv_change_backing_file(parent, filename, 1029 base->drv ? base->drv->format_name : ""); 1030 if (ret < 0) { 1031 error_setg_errno(errp, -ret, "Could not update backing file link"); 1032 } 1033 1034 if (!(orig_flags & BDRV_O_RDWR)) { 1035 bdrv_reopen(parent, orig_flags, NULL); 1036 } 1037 1038 return ret; 1039 } 1040 1041 const BdrvChildRole child_backing = { 1042 .get_parent_desc = bdrv_child_get_parent_desc, 1043 .attach = bdrv_backing_attach, 1044 .detach = bdrv_backing_detach, 1045 .inherit_options = bdrv_backing_options, 1046 .drained_begin = bdrv_child_cb_drained_begin, 1047 .drained_end = bdrv_child_cb_drained_end, 1048 .inactivate = bdrv_child_cb_inactivate, 1049 .update_filename = bdrv_backing_update_filename, 1050 }; 1051 1052 static int bdrv_open_flags(BlockDriverState *bs, int flags) 1053 { 1054 int open_flags = flags; 1055 1056 /* 1057 * Clear flags that are internal to the block layer before opening the 1058 * image. 1059 */ 1060 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 1061 1062 /* 1063 * Snapshots should be writable. 1064 */ 1065 if (flags & BDRV_O_TEMPORARY) { 1066 open_flags |= BDRV_O_RDWR; 1067 } 1068 1069 return open_flags; 1070 } 1071 1072 static void update_flags_from_options(int *flags, QemuOpts *opts) 1073 { 1074 *flags &= ~BDRV_O_CACHE_MASK; 1075 1076 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH)); 1077 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { 1078 *flags |= BDRV_O_NO_FLUSH; 1079 } 1080 1081 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT)); 1082 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) { 1083 *flags |= BDRV_O_NOCACHE; 1084 } 1085 1086 *flags &= ~BDRV_O_RDWR; 1087 1088 assert(qemu_opt_find(opts, BDRV_OPT_READ_ONLY)); 1089 if (!qemu_opt_get_bool(opts, BDRV_OPT_READ_ONLY, false)) { 1090 *flags |= BDRV_O_RDWR; 1091 } 1092 1093 } 1094 1095 static void update_options_from_flags(QDict *options, int flags) 1096 { 1097 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) { 1098 qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE); 1099 } 1100 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) { 1101 qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH, 1102 flags & BDRV_O_NO_FLUSH); 1103 } 1104 if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) { 1105 qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR)); 1106 } 1107 } 1108 1109 static void bdrv_assign_node_name(BlockDriverState *bs, 1110 const char *node_name, 1111 Error **errp) 1112 { 1113 char *gen_node_name = NULL; 1114 1115 if (!node_name) { 1116 node_name = gen_node_name = id_generate(ID_BLOCK); 1117 } else if (!id_wellformed(node_name)) { 1118 /* 1119 * Check for empty string or invalid characters, but not if it is 1120 * generated (generated names use characters not available to the user) 1121 */ 1122 error_setg(errp, "Invalid node name"); 1123 return; 1124 } 1125 1126 /* takes care of avoiding namespaces collisions */ 1127 if (blk_by_name(node_name)) { 1128 error_setg(errp, "node-name=%s is conflicting with a device id", 1129 node_name); 1130 goto out; 1131 } 1132 1133 /* takes care of avoiding duplicates node names */ 1134 if (bdrv_find_node(node_name)) { 1135 error_setg(errp, "Duplicate node name"); 1136 goto out; 1137 } 1138 1139 /* copy node name into the bs and insert it into the graph list */ 1140 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 1141 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 1142 out: 1143 g_free(gen_node_name); 1144 } 1145 1146 static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, 1147 const char *node_name, QDict *options, 1148 int open_flags, Error **errp) 1149 { 1150 Error *local_err = NULL; 1151 int ret; 1152 1153 bdrv_assign_node_name(bs, node_name, &local_err); 1154 if (local_err) { 1155 error_propagate(errp, local_err); 1156 return -EINVAL; 1157 } 1158 1159 bs->drv = drv; 1160 bs->read_only = !(bs->open_flags & BDRV_O_RDWR); 1161 bs->opaque = g_malloc0(drv->instance_size); 1162 1163 if (drv->bdrv_file_open) { 1164 assert(!drv->bdrv_needs_filename || bs->filename[0]); 1165 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 1166 } else if (drv->bdrv_open) { 1167 ret = drv->bdrv_open(bs, options, open_flags, &local_err); 1168 } else { 1169 ret = 0; 1170 } 1171 1172 if (ret < 0) { 1173 if (local_err) { 1174 error_propagate(errp, local_err); 1175 } else if (bs->filename[0]) { 1176 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 1177 } else { 1178 error_setg_errno(errp, -ret, "Could not open image"); 1179 } 1180 goto open_failed; 1181 } 1182 1183 ret = refresh_total_sectors(bs, bs->total_sectors); 1184 if (ret < 0) { 1185 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 1186 return ret; 1187 } 1188 1189 bdrv_refresh_limits(bs, &local_err); 1190 if (local_err) { 1191 error_propagate(errp, local_err); 1192 return -EINVAL; 1193 } 1194 1195 assert(bdrv_opt_mem_align(bs) != 0); 1196 assert(bdrv_min_mem_align(bs) != 0); 1197 assert(is_power_of_2(bs->bl.request_alignment)); 1198 1199 return 0; 1200 open_failed: 1201 bs->drv = NULL; 1202 if (bs->file != NULL) { 1203 bdrv_unref_child(bs, bs->file); 1204 bs->file = NULL; 1205 } 1206 g_free(bs->opaque); 1207 bs->opaque = NULL; 1208 return ret; 1209 } 1210 1211 BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, 1212 int flags, Error **errp) 1213 { 1214 BlockDriverState *bs; 1215 int ret; 1216 1217 bs = bdrv_new(); 1218 bs->open_flags = flags; 1219 bs->explicit_options = qdict_new(); 1220 bs->options = qdict_new(); 1221 bs->opaque = NULL; 1222 1223 update_options_from_flags(bs->options, flags); 1224 1225 ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp); 1226 if (ret < 0) { 1227 QDECREF(bs->explicit_options); 1228 bs->explicit_options = NULL; 1229 QDECREF(bs->options); 1230 bs->options = NULL; 1231 bdrv_unref(bs); 1232 return NULL; 1233 } 1234 1235 return bs; 1236 } 1237 1238 QemuOptsList bdrv_runtime_opts = { 1239 .name = "bdrv_common", 1240 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), 1241 .desc = { 1242 { 1243 .name = "node-name", 1244 .type = QEMU_OPT_STRING, 1245 .help = "Node name of the block device node", 1246 }, 1247 { 1248 .name = "driver", 1249 .type = QEMU_OPT_STRING, 1250 .help = "Block driver to use for the node", 1251 }, 1252 { 1253 .name = BDRV_OPT_CACHE_DIRECT, 1254 .type = QEMU_OPT_BOOL, 1255 .help = "Bypass software writeback cache on the host", 1256 }, 1257 { 1258 .name = BDRV_OPT_CACHE_NO_FLUSH, 1259 .type = QEMU_OPT_BOOL, 1260 .help = "Ignore flush requests", 1261 }, 1262 { 1263 .name = BDRV_OPT_READ_ONLY, 1264 .type = QEMU_OPT_BOOL, 1265 .help = "Node is opened in read-only mode", 1266 }, 1267 { 1268 .name = "detect-zeroes", 1269 .type = QEMU_OPT_STRING, 1270 .help = "try to optimize zero writes (off, on, unmap)", 1271 }, 1272 { 1273 .name = "discard", 1274 .type = QEMU_OPT_STRING, 1275 .help = "discard operation (ignore/off, unmap/on)", 1276 }, 1277 { 1278 .name = BDRV_OPT_FORCE_SHARE, 1279 .type = QEMU_OPT_BOOL, 1280 .help = "always accept other writers (default: off)", 1281 }, 1282 { /* end of list */ } 1283 }, 1284 }; 1285 1286 /* 1287 * Common part for opening disk images and files 1288 * 1289 * Removes all processed options from *options. 1290 */ 1291 static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, 1292 QDict *options, Error **errp) 1293 { 1294 int ret, open_flags; 1295 const char *filename; 1296 const char *driver_name = NULL; 1297 const char *node_name = NULL; 1298 const char *discard; 1299 const char *detect_zeroes; 1300 QemuOpts *opts; 1301 BlockDriver *drv; 1302 Error *local_err = NULL; 1303 1304 assert(bs->file == NULL); 1305 assert(options != NULL && bs->options != options); 1306 1307 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 1308 qemu_opts_absorb_qdict(opts, options, &local_err); 1309 if (local_err) { 1310 error_propagate(errp, local_err); 1311 ret = -EINVAL; 1312 goto fail_opts; 1313 } 1314 1315 update_flags_from_options(&bs->open_flags, opts); 1316 1317 driver_name = qemu_opt_get(opts, "driver"); 1318 drv = bdrv_find_format(driver_name); 1319 assert(drv != NULL); 1320 1321 bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false); 1322 1323 if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) { 1324 error_setg(errp, 1325 BDRV_OPT_FORCE_SHARE 1326 "=on can only be used with read-only images"); 1327 ret = -EINVAL; 1328 goto fail_opts; 1329 } 1330 1331 if (file != NULL) { 1332 filename = blk_bs(file)->filename; 1333 } else { 1334 /* 1335 * Caution: while qdict_get_try_str() is fine, getting 1336 * non-string types would require more care. When @options 1337 * come from -blockdev or blockdev_add, its members are typed 1338 * according to the QAPI schema, but when they come from 1339 * -drive, they're all QString. 1340 */ 1341 filename = qdict_get_try_str(options, "filename"); 1342 } 1343 1344 if (drv->bdrv_needs_filename && (!filename || !filename[0])) { 1345 error_setg(errp, "The '%s' block driver requires a file name", 1346 drv->format_name); 1347 ret = -EINVAL; 1348 goto fail_opts; 1349 } 1350 1351 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags, 1352 drv->format_name); 1353 1354 bs->read_only = !(bs->open_flags & BDRV_O_RDWR); 1355 1356 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { 1357 error_setg(errp, 1358 !bs->read_only && bdrv_is_whitelisted(drv, true) 1359 ? "Driver '%s' can only be used for read-only devices" 1360 : "Driver '%s' is not whitelisted", 1361 drv->format_name); 1362 ret = -ENOTSUP; 1363 goto fail_opts; 1364 } 1365 1366 /* bdrv_new() and bdrv_close() make it so */ 1367 assert(atomic_read(&bs->copy_on_read) == 0); 1368 1369 if (bs->open_flags & BDRV_O_COPY_ON_READ) { 1370 if (!bs->read_only) { 1371 bdrv_enable_copy_on_read(bs); 1372 } else { 1373 error_setg(errp, "Can't use copy-on-read on read-only device"); 1374 ret = -EINVAL; 1375 goto fail_opts; 1376 } 1377 } 1378 1379 discard = qemu_opt_get(opts, "discard"); 1380 if (discard != NULL) { 1381 if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) { 1382 error_setg(errp, "Invalid discard option"); 1383 ret = -EINVAL; 1384 goto fail_opts; 1385 } 1386 } 1387 1388 detect_zeroes = qemu_opt_get(opts, "detect-zeroes"); 1389 if (detect_zeroes) { 1390 BlockdevDetectZeroesOptions value = 1391 qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, 1392 detect_zeroes, 1393 BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, 1394 &local_err); 1395 if (local_err) { 1396 error_propagate(errp, local_err); 1397 ret = -EINVAL; 1398 goto fail_opts; 1399 } 1400 1401 if (value == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && 1402 !(bs->open_flags & BDRV_O_UNMAP)) 1403 { 1404 error_setg(errp, "setting detect-zeroes to unmap is not allowed " 1405 "without setting discard operation to unmap"); 1406 ret = -EINVAL; 1407 goto fail_opts; 1408 } 1409 1410 bs->detect_zeroes = value; 1411 } 1412 1413 if (filename != NULL) { 1414 pstrcpy(bs->filename, sizeof(bs->filename), filename); 1415 } else { 1416 bs->filename[0] = '\0'; 1417 } 1418 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 1419 1420 /* Open the image, either directly or using a protocol */ 1421 open_flags = bdrv_open_flags(bs, bs->open_flags); 1422 node_name = qemu_opt_get(opts, "node-name"); 1423 1424 assert(!drv->bdrv_file_open || file == NULL); 1425 ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp); 1426 if (ret < 0) { 1427 goto fail_opts; 1428 } 1429 1430 qemu_opts_del(opts); 1431 return 0; 1432 1433 fail_opts: 1434 qemu_opts_del(opts); 1435 return ret; 1436 } 1437 1438 static QDict *parse_json_filename(const char *filename, Error **errp) 1439 { 1440 QObject *options_obj; 1441 QDict *options; 1442 int ret; 1443 1444 ret = strstart(filename, "json:", &filename); 1445 assert(ret); 1446 1447 options_obj = qobject_from_json(filename, errp); 1448 if (!options_obj) { 1449 /* Work around qobject_from_json() lossage TODO fix that */ 1450 if (errp && !*errp) { 1451 error_setg(errp, "Could not parse the JSON options"); 1452 return NULL; 1453 } 1454 error_prepend(errp, "Could not parse the JSON options: "); 1455 return NULL; 1456 } 1457 1458 options = qobject_to_qdict(options_obj); 1459 if (!options) { 1460 qobject_decref(options_obj); 1461 error_setg(errp, "Invalid JSON object given"); 1462 return NULL; 1463 } 1464 1465 qdict_flatten(options); 1466 1467 return options; 1468 } 1469 1470 static void parse_json_protocol(QDict *options, const char **pfilename, 1471 Error **errp) 1472 { 1473 QDict *json_options; 1474 Error *local_err = NULL; 1475 1476 /* Parse json: pseudo-protocol */ 1477 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) { 1478 return; 1479 } 1480 1481 json_options = parse_json_filename(*pfilename, &local_err); 1482 if (local_err) { 1483 error_propagate(errp, local_err); 1484 return; 1485 } 1486 1487 /* Options given in the filename have lower priority than options 1488 * specified directly */ 1489 qdict_join(options, json_options, false); 1490 QDECREF(json_options); 1491 *pfilename = NULL; 1492 } 1493 1494 /* 1495 * Fills in default options for opening images and converts the legacy 1496 * filename/flags pair to option QDict entries. 1497 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a 1498 * block driver has been specified explicitly. 1499 */ 1500 static int bdrv_fill_options(QDict **options, const char *filename, 1501 int *flags, Error **errp) 1502 { 1503 const char *drvname; 1504 bool protocol = *flags & BDRV_O_PROTOCOL; 1505 bool parse_filename = false; 1506 BlockDriver *drv = NULL; 1507 Error *local_err = NULL; 1508 1509 /* 1510 * Caution: while qdict_get_try_str() is fine, getting non-string 1511 * types would require more care. When @options come from 1512 * -blockdev or blockdev_add, its members are typed according to 1513 * the QAPI schema, but when they come from -drive, they're all 1514 * QString. 1515 */ 1516 drvname = qdict_get_try_str(*options, "driver"); 1517 if (drvname) { 1518 drv = bdrv_find_format(drvname); 1519 if (!drv) { 1520 error_setg(errp, "Unknown driver '%s'", drvname); 1521 return -ENOENT; 1522 } 1523 /* If the user has explicitly specified the driver, this choice should 1524 * override the BDRV_O_PROTOCOL flag */ 1525 protocol = drv->bdrv_file_open; 1526 } 1527 1528 if (protocol) { 1529 *flags |= BDRV_O_PROTOCOL; 1530 } else { 1531 *flags &= ~BDRV_O_PROTOCOL; 1532 } 1533 1534 /* Translate cache options from flags into options */ 1535 update_options_from_flags(*options, *flags); 1536 1537 /* Fetch the file name from the options QDict if necessary */ 1538 if (protocol && filename) { 1539 if (!qdict_haskey(*options, "filename")) { 1540 qdict_put_str(*options, "filename", filename); 1541 parse_filename = true; 1542 } else { 1543 error_setg(errp, "Can't specify 'file' and 'filename' options at " 1544 "the same time"); 1545 return -EINVAL; 1546 } 1547 } 1548 1549 /* Find the right block driver */ 1550 /* See cautionary note on accessing @options above */ 1551 filename = qdict_get_try_str(*options, "filename"); 1552 1553 if (!drvname && protocol) { 1554 if (filename) { 1555 drv = bdrv_find_protocol(filename, parse_filename, errp); 1556 if (!drv) { 1557 return -EINVAL; 1558 } 1559 1560 drvname = drv->format_name; 1561 qdict_put_str(*options, "driver", drvname); 1562 } else { 1563 error_setg(errp, "Must specify either driver or file"); 1564 return -EINVAL; 1565 } 1566 } 1567 1568 assert(drv || !protocol); 1569 1570 /* Driver-specific filename parsing */ 1571 if (drv && drv->bdrv_parse_filename && parse_filename) { 1572 drv->bdrv_parse_filename(filename, *options, &local_err); 1573 if (local_err) { 1574 error_propagate(errp, local_err); 1575 return -EINVAL; 1576 } 1577 1578 if (!drv->bdrv_needs_filename) { 1579 qdict_del(*options, "filename"); 1580 } 1581 } 1582 1583 return 0; 1584 } 1585 1586 static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, 1587 uint64_t perm, uint64_t shared, 1588 GSList *ignore_children, Error **errp); 1589 static void bdrv_child_abort_perm_update(BdrvChild *c); 1590 static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); 1591 1592 typedef struct BlockReopenQueueEntry { 1593 bool prepared; 1594 BDRVReopenState state; 1595 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; 1596 } BlockReopenQueueEntry; 1597 1598 /* 1599 * Return the flags that @bs will have after the reopens in @q have 1600 * successfully completed. If @q is NULL (or @bs is not contained in @q), 1601 * return the current flags. 1602 */ 1603 static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs) 1604 { 1605 BlockReopenQueueEntry *entry; 1606 1607 if (q != NULL) { 1608 QSIMPLEQ_FOREACH(entry, q, entry) { 1609 if (entry->state.bs == bs) { 1610 return entry->state.flags; 1611 } 1612 } 1613 } 1614 1615 return bs->open_flags; 1616 } 1617 1618 /* Returns whether the image file can be written to after the reopen queue @q 1619 * has been successfully applied, or right now if @q is NULL. */ 1620 static bool bdrv_is_writable(BlockDriverState *bs, BlockReopenQueue *q) 1621 { 1622 int flags = bdrv_reopen_get_flags(q, bs); 1623 1624 return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR; 1625 } 1626 1627 static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, 1628 BdrvChild *c, const BdrvChildRole *role, 1629 BlockReopenQueue *reopen_queue, 1630 uint64_t parent_perm, uint64_t parent_shared, 1631 uint64_t *nperm, uint64_t *nshared) 1632 { 1633 if (bs->drv && bs->drv->bdrv_child_perm) { 1634 bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, 1635 parent_perm, parent_shared, 1636 nperm, nshared); 1637 } 1638 /* TODO Take force_share from reopen_queue */ 1639 if (child_bs && child_bs->force_share) { 1640 *nshared = BLK_PERM_ALL; 1641 } 1642 } 1643 1644 /* 1645 * Check whether permissions on this node can be changed in a way that 1646 * @cumulative_perms and @cumulative_shared_perms are the new cumulative 1647 * permissions of all its parents. This involves checking whether all necessary 1648 * permission changes to child nodes can be performed. 1649 * 1650 * A call to this function must always be followed by a call to bdrv_set_perm() 1651 * or bdrv_abort_perm_update(). 1652 */ 1653 static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, 1654 uint64_t cumulative_perms, 1655 uint64_t cumulative_shared_perms, 1656 GSList *ignore_children, Error **errp) 1657 { 1658 BlockDriver *drv = bs->drv; 1659 BdrvChild *c; 1660 int ret; 1661 1662 /* Write permissions never work with read-only images */ 1663 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && 1664 !bdrv_is_writable(bs, q)) 1665 { 1666 error_setg(errp, "Block node is read-only"); 1667 return -EPERM; 1668 } 1669 1670 /* Check this node */ 1671 if (!drv) { 1672 return 0; 1673 } 1674 1675 if (drv->bdrv_check_perm) { 1676 return drv->bdrv_check_perm(bs, cumulative_perms, 1677 cumulative_shared_perms, errp); 1678 } 1679 1680 /* Drivers that never have children can omit .bdrv_child_perm() */ 1681 if (!drv->bdrv_child_perm) { 1682 assert(QLIST_EMPTY(&bs->children)); 1683 return 0; 1684 } 1685 1686 /* Check all children */ 1687 QLIST_FOREACH(c, &bs->children, next) { 1688 uint64_t cur_perm, cur_shared; 1689 bdrv_child_perm(bs, c->bs, c, c->role, q, 1690 cumulative_perms, cumulative_shared_perms, 1691 &cur_perm, &cur_shared); 1692 ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, 1693 ignore_children, errp); 1694 if (ret < 0) { 1695 return ret; 1696 } 1697 } 1698 1699 return 0; 1700 } 1701 1702 /* 1703 * Notifies drivers that after a previous bdrv_check_perm() call, the 1704 * permission update is not performed and any preparations made for it (e.g. 1705 * taken file locks) need to be undone. 1706 * 1707 * This function recursively notifies all child nodes. 1708 */ 1709 static void bdrv_abort_perm_update(BlockDriverState *bs) 1710 { 1711 BlockDriver *drv = bs->drv; 1712 BdrvChild *c; 1713 1714 if (!drv) { 1715 return; 1716 } 1717 1718 if (drv->bdrv_abort_perm_update) { 1719 drv->bdrv_abort_perm_update(bs); 1720 } 1721 1722 QLIST_FOREACH(c, &bs->children, next) { 1723 bdrv_child_abort_perm_update(c); 1724 } 1725 } 1726 1727 static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, 1728 uint64_t cumulative_shared_perms) 1729 { 1730 BlockDriver *drv = bs->drv; 1731 BdrvChild *c; 1732 1733 if (!drv) { 1734 return; 1735 } 1736 1737 /* Update this node */ 1738 if (drv->bdrv_set_perm) { 1739 drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms); 1740 } 1741 1742 /* Drivers that never have children can omit .bdrv_child_perm() */ 1743 if (!drv->bdrv_child_perm) { 1744 assert(QLIST_EMPTY(&bs->children)); 1745 return; 1746 } 1747 1748 /* Update all children */ 1749 QLIST_FOREACH(c, &bs->children, next) { 1750 uint64_t cur_perm, cur_shared; 1751 bdrv_child_perm(bs, c->bs, c, c->role, NULL, 1752 cumulative_perms, cumulative_shared_perms, 1753 &cur_perm, &cur_shared); 1754 bdrv_child_set_perm(c, cur_perm, cur_shared); 1755 } 1756 } 1757 1758 static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, 1759 uint64_t *shared_perm) 1760 { 1761 BdrvChild *c; 1762 uint64_t cumulative_perms = 0; 1763 uint64_t cumulative_shared_perms = BLK_PERM_ALL; 1764 1765 QLIST_FOREACH(c, &bs->parents, next_parent) { 1766 cumulative_perms |= c->perm; 1767 cumulative_shared_perms &= c->shared_perm; 1768 } 1769 1770 *perm = cumulative_perms; 1771 *shared_perm = cumulative_shared_perms; 1772 } 1773 1774 static char *bdrv_child_user_desc(BdrvChild *c) 1775 { 1776 if (c->role->get_parent_desc) { 1777 return c->role->get_parent_desc(c); 1778 } 1779 1780 return g_strdup("another user"); 1781 } 1782 1783 char *bdrv_perm_names(uint64_t perm) 1784 { 1785 struct perm_name { 1786 uint64_t perm; 1787 const char *name; 1788 } permissions[] = { 1789 { BLK_PERM_CONSISTENT_READ, "consistent read" }, 1790 { BLK_PERM_WRITE, "write" }, 1791 { BLK_PERM_WRITE_UNCHANGED, "write unchanged" }, 1792 { BLK_PERM_RESIZE, "resize" }, 1793 { BLK_PERM_GRAPH_MOD, "change children" }, 1794 { 0, NULL } 1795 }; 1796 1797 char *result = g_strdup(""); 1798 struct perm_name *p; 1799 1800 for (p = permissions; p->name; p++) { 1801 if (perm & p->perm) { 1802 char *old = result; 1803 result = g_strdup_printf("%s%s%s", old, *old ? ", " : "", p->name); 1804 g_free(old); 1805 } 1806 } 1807 1808 return result; 1809 } 1810 1811 /* 1812 * Checks whether a new reference to @bs can be added if the new user requires 1813 * @new_used_perm/@new_shared_perm as its permissions. If @ignore_children is 1814 * set, the BdrvChild objects in this list are ignored in the calculations; 1815 * this allows checking permission updates for an existing reference. 1816 * 1817 * Needs to be followed by a call to either bdrv_set_perm() or 1818 * bdrv_abort_perm_update(). */ 1819 static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, 1820 uint64_t new_used_perm, 1821 uint64_t new_shared_perm, 1822 GSList *ignore_children, Error **errp) 1823 { 1824 BdrvChild *c; 1825 uint64_t cumulative_perms = new_used_perm; 1826 uint64_t cumulative_shared_perms = new_shared_perm; 1827 1828 /* There is no reason why anyone couldn't tolerate write_unchanged */ 1829 assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED); 1830 1831 QLIST_FOREACH(c, &bs->parents, next_parent) { 1832 if (g_slist_find(ignore_children, c)) { 1833 continue; 1834 } 1835 1836 if ((new_used_perm & c->shared_perm) != new_used_perm) { 1837 char *user = bdrv_child_user_desc(c); 1838 char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm); 1839 error_setg(errp, "Conflicts with use by %s as '%s', which does not " 1840 "allow '%s' on %s", 1841 user, c->name, perm_names, bdrv_get_node_name(c->bs)); 1842 g_free(user); 1843 g_free(perm_names); 1844 return -EPERM; 1845 } 1846 1847 if ((c->perm & new_shared_perm) != c->perm) { 1848 char *user = bdrv_child_user_desc(c); 1849 char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm); 1850 error_setg(errp, "Conflicts with use by %s as '%s', which uses " 1851 "'%s' on %s", 1852 user, c->name, perm_names, bdrv_get_node_name(c->bs)); 1853 g_free(user); 1854 g_free(perm_names); 1855 return -EPERM; 1856 } 1857 1858 cumulative_perms |= c->perm; 1859 cumulative_shared_perms &= c->shared_perm; 1860 } 1861 1862 return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms, 1863 ignore_children, errp); 1864 } 1865 1866 /* Needs to be followed by a call to either bdrv_child_set_perm() or 1867 * bdrv_child_abort_perm_update(). */ 1868 static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, 1869 uint64_t perm, uint64_t shared, 1870 GSList *ignore_children, Error **errp) 1871 { 1872 int ret; 1873 1874 ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c); 1875 ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children, errp); 1876 g_slist_free(ignore_children); 1877 1878 return ret; 1879 } 1880 1881 static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared) 1882 { 1883 uint64_t cumulative_perms, cumulative_shared_perms; 1884 1885 c->perm = perm; 1886 c->shared_perm = shared; 1887 1888 bdrv_get_cumulative_perm(c->bs, &cumulative_perms, 1889 &cumulative_shared_perms); 1890 bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms); 1891 } 1892 1893 static void bdrv_child_abort_perm_update(BdrvChild *c) 1894 { 1895 bdrv_abort_perm_update(c->bs); 1896 } 1897 1898 int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, 1899 Error **errp) 1900 { 1901 int ret; 1902 1903 ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL, errp); 1904 if (ret < 0) { 1905 bdrv_child_abort_perm_update(c); 1906 return ret; 1907 } 1908 1909 bdrv_child_set_perm(c, perm, shared); 1910 1911 return 0; 1912 } 1913 1914 #define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \ 1915 | BLK_PERM_WRITE \ 1916 | BLK_PERM_WRITE_UNCHANGED \ 1917 | BLK_PERM_RESIZE) 1918 #define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH) 1919 1920 void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, 1921 const BdrvChildRole *role, 1922 BlockReopenQueue *reopen_queue, 1923 uint64_t perm, uint64_t shared, 1924 uint64_t *nperm, uint64_t *nshared) 1925 { 1926 if (c == NULL) { 1927 *nperm = perm & DEFAULT_PERM_PASSTHROUGH; 1928 *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; 1929 return; 1930 } 1931 1932 *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) | 1933 (c->perm & DEFAULT_PERM_UNCHANGED); 1934 *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | 1935 (c->shared_perm & DEFAULT_PERM_UNCHANGED); 1936 } 1937 1938 void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, 1939 const BdrvChildRole *role, 1940 BlockReopenQueue *reopen_queue, 1941 uint64_t perm, uint64_t shared, 1942 uint64_t *nperm, uint64_t *nshared) 1943 { 1944 bool backing = (role == &child_backing); 1945 assert(role == &child_backing || role == &child_file); 1946 1947 if (!backing) { 1948 int flags = bdrv_reopen_get_flags(reopen_queue, bs); 1949 1950 /* Apart from the modifications below, the same permissions are 1951 * forwarded and left alone as for filters */ 1952 bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared, 1953 &perm, &shared); 1954 1955 /* Format drivers may touch metadata even if the guest doesn't write */ 1956 if (bdrv_is_writable(bs, reopen_queue)) { 1957 perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 1958 } 1959 1960 /* bs->file always needs to be consistent because of the metadata. We 1961 * can never allow other users to resize or write to it. */ 1962 if (!(flags & BDRV_O_NO_IO)) { 1963 perm |= BLK_PERM_CONSISTENT_READ; 1964 } 1965 shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); 1966 } else { 1967 /* We want consistent read from backing files if the parent needs it. 1968 * No other operations are performed on backing files. */ 1969 perm &= BLK_PERM_CONSISTENT_READ; 1970 1971 /* If the parent can deal with changing data, we're okay with a 1972 * writable and resizable backing file. */ 1973 /* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? */ 1974 if (shared & BLK_PERM_WRITE) { 1975 shared = BLK_PERM_WRITE | BLK_PERM_RESIZE; 1976 } else { 1977 shared = 0; 1978 } 1979 1980 shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD | 1981 BLK_PERM_WRITE_UNCHANGED; 1982 } 1983 1984 if (bs->open_flags & BDRV_O_INACTIVE) { 1985 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 1986 } 1987 1988 *nperm = perm; 1989 *nshared = shared; 1990 } 1991 1992 static void bdrv_replace_child_noperm(BdrvChild *child, 1993 BlockDriverState *new_bs) 1994 { 1995 BlockDriverState *old_bs = child->bs; 1996 int i; 1997 1998 if (old_bs && new_bs) { 1999 assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); 2000 } 2001 if (old_bs) { 2002 /* Detach first so that the recursive drain sections coming from @child 2003 * are already gone and we only end the drain sections that came from 2004 * elsewhere. */ 2005 if (child->role->detach) { 2006 child->role->detach(child); 2007 } 2008 if (old_bs->quiesce_counter && child->role->drained_end) { 2009 for (i = 0; i < old_bs->quiesce_counter; i++) { 2010 child->role->drained_end(child); 2011 } 2012 } 2013 QLIST_REMOVE(child, next_parent); 2014 } 2015 2016 child->bs = new_bs; 2017 2018 if (new_bs) { 2019 QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); 2020 if (new_bs->quiesce_counter && child->role->drained_begin) { 2021 for (i = 0; i < new_bs->quiesce_counter; i++) { 2022 child->role->drained_begin(child); 2023 } 2024 } 2025 2026 /* Attach only after starting new drained sections, so that recursive 2027 * drain sections coming from @child don't get an extra .drained_begin 2028 * callback. */ 2029 if (child->role->attach) { 2030 child->role->attach(child); 2031 } 2032 } 2033 } 2034 2035 /* 2036 * Updates @child to change its reference to point to @new_bs, including 2037 * checking and applying the necessary permisson updates both to the old node 2038 * and to @new_bs. 2039 * 2040 * NULL is passed as @new_bs for removing the reference before freeing @child. 2041 * 2042 * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this 2043 * function uses bdrv_set_perm() to update the permissions according to the new 2044 * reference that @new_bs gets. 2045 */ 2046 static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) 2047 { 2048 BlockDriverState *old_bs = child->bs; 2049 uint64_t perm, shared_perm; 2050 2051 bdrv_replace_child_noperm(child, new_bs); 2052 2053 if (old_bs) { 2054 /* Update permissions for old node. This is guaranteed to succeed 2055 * because we're just taking a parent away, so we're loosening 2056 * restrictions. */ 2057 bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm); 2058 bdrv_check_perm(old_bs, NULL, perm, shared_perm, NULL, &error_abort); 2059 bdrv_set_perm(old_bs, perm, shared_perm); 2060 } 2061 2062 if (new_bs) { 2063 bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm); 2064 bdrv_set_perm(new_bs, perm, shared_perm); 2065 } 2066 } 2067 2068 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, 2069 const char *child_name, 2070 const BdrvChildRole *child_role, 2071 uint64_t perm, uint64_t shared_perm, 2072 void *opaque, Error **errp) 2073 { 2074 BdrvChild *child; 2075 int ret; 2076 2077 ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp); 2078 if (ret < 0) { 2079 bdrv_abort_perm_update(child_bs); 2080 return NULL; 2081 } 2082 2083 child = g_new(BdrvChild, 1); 2084 *child = (BdrvChild) { 2085 .bs = NULL, 2086 .name = g_strdup(child_name), 2087 .role = child_role, 2088 .perm = perm, 2089 .shared_perm = shared_perm, 2090 .opaque = opaque, 2091 }; 2092 2093 /* This performs the matching bdrv_set_perm() for the above check. */ 2094 bdrv_replace_child(child, child_bs); 2095 2096 return child; 2097 } 2098 2099 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, 2100 BlockDriverState *child_bs, 2101 const char *child_name, 2102 const BdrvChildRole *child_role, 2103 Error **errp) 2104 { 2105 BdrvChild *child; 2106 uint64_t perm, shared_perm; 2107 2108 bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); 2109 2110 assert(parent_bs->drv); 2111 assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs)); 2112 bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, 2113 perm, shared_perm, &perm, &shared_perm); 2114 2115 child = bdrv_root_attach_child(child_bs, child_name, child_role, 2116 perm, shared_perm, parent_bs, errp); 2117 if (child == NULL) { 2118 return NULL; 2119 } 2120 2121 QLIST_INSERT_HEAD(&parent_bs->children, child, next); 2122 return child; 2123 } 2124 2125 static void bdrv_detach_child(BdrvChild *child) 2126 { 2127 if (child->next.le_prev) { 2128 QLIST_REMOVE(child, next); 2129 child->next.le_prev = NULL; 2130 } 2131 2132 bdrv_replace_child(child, NULL); 2133 2134 g_free(child->name); 2135 g_free(child); 2136 } 2137 2138 void bdrv_root_unref_child(BdrvChild *child) 2139 { 2140 BlockDriverState *child_bs; 2141 2142 child_bs = child->bs; 2143 bdrv_detach_child(child); 2144 bdrv_unref(child_bs); 2145 } 2146 2147 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) 2148 { 2149 if (child == NULL) { 2150 return; 2151 } 2152 2153 if (child->bs->inherits_from == parent) { 2154 BdrvChild *c; 2155 2156 /* Remove inherits_from only when the last reference between parent and 2157 * child->bs goes away. */ 2158 QLIST_FOREACH(c, &parent->children, next) { 2159 if (c != child && c->bs == child->bs) { 2160 break; 2161 } 2162 } 2163 if (c == NULL) { 2164 child->bs->inherits_from = NULL; 2165 } 2166 } 2167 2168 bdrv_root_unref_child(child); 2169 } 2170 2171 2172 static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) 2173 { 2174 BdrvChild *c; 2175 QLIST_FOREACH(c, &bs->parents, next_parent) { 2176 if (c->role->change_media) { 2177 c->role->change_media(c, load); 2178 } 2179 } 2180 } 2181 2182 static void bdrv_parent_cb_resize(BlockDriverState *bs) 2183 { 2184 BdrvChild *c; 2185 QLIST_FOREACH(c, &bs->parents, next_parent) { 2186 if (c->role->resize) { 2187 c->role->resize(c); 2188 } 2189 } 2190 } 2191 2192 /* 2193 * Sets the backing file link of a BDS. A new reference is created; callers 2194 * which don't need their own reference any more must call bdrv_unref(). 2195 */ 2196 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, 2197 Error **errp) 2198 { 2199 if (backing_hd) { 2200 bdrv_ref(backing_hd); 2201 } 2202 2203 if (bs->backing) { 2204 bdrv_unref_child(bs, bs->backing); 2205 } 2206 2207 if (!backing_hd) { 2208 bs->backing = NULL; 2209 goto out; 2210 } 2211 2212 bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing, 2213 errp); 2214 if (!bs->backing) { 2215 bdrv_unref(backing_hd); 2216 } 2217 2218 bdrv_refresh_filename(bs); 2219 2220 out: 2221 bdrv_refresh_limits(bs, NULL); 2222 } 2223 2224 /* 2225 * Opens the backing file for a BlockDriverState if not yet open 2226 * 2227 * bdref_key specifies the key for the image's BlockdevRef in the options QDict. 2228 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 2229 * itself, all options starting with "${bdref_key}." are considered part of the 2230 * BlockdevRef. 2231 * 2232 * TODO Can this be unified with bdrv_open_image()? 2233 */ 2234 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, 2235 const char *bdref_key, Error **errp) 2236 { 2237 char *backing_filename = g_malloc0(PATH_MAX); 2238 char *bdref_key_dot; 2239 const char *reference = NULL; 2240 int ret = 0; 2241 BlockDriverState *backing_hd; 2242 QDict *options; 2243 QDict *tmp_parent_options = NULL; 2244 Error *local_err = NULL; 2245 2246 if (bs->backing != NULL) { 2247 goto free_exit; 2248 } 2249 2250 /* NULL means an empty set of options */ 2251 if (parent_options == NULL) { 2252 tmp_parent_options = qdict_new(); 2253 parent_options = tmp_parent_options; 2254 } 2255 2256 bs->open_flags &= ~BDRV_O_NO_BACKING; 2257 2258 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 2259 qdict_extract_subqdict(parent_options, &options, bdref_key_dot); 2260 g_free(bdref_key_dot); 2261 2262 /* 2263 * Caution: while qdict_get_try_str() is fine, getting non-string 2264 * types would require more care. When @parent_options come from 2265 * -blockdev or blockdev_add, its members are typed according to 2266 * the QAPI schema, but when they come from -drive, they're all 2267 * QString. 2268 */ 2269 reference = qdict_get_try_str(parent_options, bdref_key); 2270 if (reference || qdict_haskey(options, "file.filename")) { 2271 backing_filename[0] = '\0'; 2272 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 2273 QDECREF(options); 2274 goto free_exit; 2275 } else { 2276 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX, 2277 &local_err); 2278 if (local_err) { 2279 ret = -EINVAL; 2280 error_propagate(errp, local_err); 2281 QDECREF(options); 2282 goto free_exit; 2283 } 2284 } 2285 2286 if (!bs->drv || !bs->drv->supports_backing) { 2287 ret = -EINVAL; 2288 error_setg(errp, "Driver doesn't support backing files"); 2289 QDECREF(options); 2290 goto free_exit; 2291 } 2292 2293 if (!reference && 2294 bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 2295 qdict_put_str(options, "driver", bs->backing_format); 2296 } 2297 2298 backing_hd = bdrv_open_inherit(*backing_filename ? backing_filename : NULL, 2299 reference, options, 0, bs, &child_backing, 2300 errp); 2301 if (!backing_hd) { 2302 bs->open_flags |= BDRV_O_NO_BACKING; 2303 error_prepend(errp, "Could not open backing file: "); 2304 ret = -EINVAL; 2305 goto free_exit; 2306 } 2307 bdrv_set_aio_context(backing_hd, bdrv_get_aio_context(bs)); 2308 2309 /* Hook up the backing file link; drop our reference, bs owns the 2310 * backing_hd reference now */ 2311 bdrv_set_backing_hd(bs, backing_hd, &local_err); 2312 bdrv_unref(backing_hd); 2313 if (local_err) { 2314 error_propagate(errp, local_err); 2315 ret = -EINVAL; 2316 goto free_exit; 2317 } 2318 2319 qdict_del(parent_options, bdref_key); 2320 2321 free_exit: 2322 g_free(backing_filename); 2323 QDECREF(tmp_parent_options); 2324 return ret; 2325 } 2326 2327 static BlockDriverState * 2328 bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, 2329 BlockDriverState *parent, const BdrvChildRole *child_role, 2330 bool allow_none, Error **errp) 2331 { 2332 BlockDriverState *bs = NULL; 2333 QDict *image_options; 2334 char *bdref_key_dot; 2335 const char *reference; 2336 2337 assert(child_role != NULL); 2338 2339 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 2340 qdict_extract_subqdict(options, &image_options, bdref_key_dot); 2341 g_free(bdref_key_dot); 2342 2343 /* 2344 * Caution: while qdict_get_try_str() is fine, getting non-string 2345 * types would require more care. When @options come from 2346 * -blockdev or blockdev_add, its members are typed according to 2347 * the QAPI schema, but when they come from -drive, they're all 2348 * QString. 2349 */ 2350 reference = qdict_get_try_str(options, bdref_key); 2351 if (!filename && !reference && !qdict_size(image_options)) { 2352 if (!allow_none) { 2353 error_setg(errp, "A block device must be specified for \"%s\"", 2354 bdref_key); 2355 } 2356 QDECREF(image_options); 2357 goto done; 2358 } 2359 2360 bs = bdrv_open_inherit(filename, reference, image_options, 0, 2361 parent, child_role, errp); 2362 if (!bs) { 2363 goto done; 2364 } 2365 2366 done: 2367 qdict_del(options, bdref_key); 2368 return bs; 2369 } 2370 2371 /* 2372 * Opens a disk image whose options are given as BlockdevRef in another block 2373 * device's options. 2374 * 2375 * If allow_none is true, no image will be opened if filename is false and no 2376 * BlockdevRef is given. NULL will be returned, but errp remains unset. 2377 * 2378 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 2379 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 2380 * itself, all options starting with "${bdref_key}." are considered part of the 2381 * BlockdevRef. 2382 * 2383 * The BlockdevRef will be removed from the options QDict. 2384 */ 2385 BdrvChild *bdrv_open_child(const char *filename, 2386 QDict *options, const char *bdref_key, 2387 BlockDriverState *parent, 2388 const BdrvChildRole *child_role, 2389 bool allow_none, Error **errp) 2390 { 2391 BdrvChild *c; 2392 BlockDriverState *bs; 2393 2394 bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role, 2395 allow_none, errp); 2396 if (bs == NULL) { 2397 return NULL; 2398 } 2399 2400 c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp); 2401 if (!c) { 2402 bdrv_unref(bs); 2403 return NULL; 2404 } 2405 2406 return c; 2407 } 2408 2409 static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, 2410 int flags, 2411 QDict *snapshot_options, 2412 Error **errp) 2413 { 2414 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 2415 char *tmp_filename = g_malloc0(PATH_MAX + 1); 2416 int64_t total_size; 2417 QemuOpts *opts = NULL; 2418 BlockDriverState *bs_snapshot = NULL; 2419 Error *local_err = NULL; 2420 int ret; 2421 2422 /* if snapshot, we create a temporary backing file and open it 2423 instead of opening 'filename' directly */ 2424 2425 /* Get the required size from the image */ 2426 total_size = bdrv_getlength(bs); 2427 if (total_size < 0) { 2428 error_setg_errno(errp, -total_size, "Could not get image size"); 2429 goto out; 2430 } 2431 2432 /* Create the temporary image */ 2433 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); 2434 if (ret < 0) { 2435 error_setg_errno(errp, -ret, "Could not get temporary filename"); 2436 goto out; 2437 } 2438 2439 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 2440 &error_abort); 2441 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 2442 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp); 2443 qemu_opts_del(opts); 2444 if (ret < 0) { 2445 error_prepend(errp, "Could not create temporary overlay '%s': ", 2446 tmp_filename); 2447 goto out; 2448 } 2449 2450 /* Prepare options QDict for the temporary file */ 2451 qdict_put_str(snapshot_options, "file.driver", "file"); 2452 qdict_put_str(snapshot_options, "file.filename", tmp_filename); 2453 qdict_put_str(snapshot_options, "driver", "qcow2"); 2454 2455 bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp); 2456 snapshot_options = NULL; 2457 if (!bs_snapshot) { 2458 goto out; 2459 } 2460 2461 /* bdrv_append() consumes a strong reference to bs_snapshot 2462 * (i.e. it will call bdrv_unref() on it) even on error, so in 2463 * order to be able to return one, we have to increase 2464 * bs_snapshot's refcount here */ 2465 bdrv_ref(bs_snapshot); 2466 bdrv_append(bs_snapshot, bs, &local_err); 2467 if (local_err) { 2468 error_propagate(errp, local_err); 2469 bs_snapshot = NULL; 2470 goto out; 2471 } 2472 2473 out: 2474 QDECREF(snapshot_options); 2475 g_free(tmp_filename); 2476 return bs_snapshot; 2477 } 2478 2479 /* 2480 * Opens a disk image (raw, qcow2, vmdk, ...) 2481 * 2482 * options is a QDict of options to pass to the block drivers, or NULL for an 2483 * empty set of options. The reference to the QDict belongs to the block layer 2484 * after the call (even on failure), so if the caller intends to reuse the 2485 * dictionary, it needs to use QINCREF() before calling bdrv_open. 2486 * 2487 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 2488 * If it is not NULL, the referenced BDS will be reused. 2489 * 2490 * The reference parameter may be used to specify an existing block device which 2491 * should be opened. If specified, neither options nor a filename may be given, 2492 * nor can an existing BDS be reused (that is, *pbs has to be NULL). 2493 */ 2494 static BlockDriverState *bdrv_open_inherit(const char *filename, 2495 const char *reference, 2496 QDict *options, int flags, 2497 BlockDriverState *parent, 2498 const BdrvChildRole *child_role, 2499 Error **errp) 2500 { 2501 int ret; 2502 BlockBackend *file = NULL; 2503 BlockDriverState *bs; 2504 BlockDriver *drv = NULL; 2505 const char *drvname; 2506 const char *backing; 2507 Error *local_err = NULL; 2508 QDict *snapshot_options = NULL; 2509 int snapshot_flags = 0; 2510 2511 assert(!child_role || !flags); 2512 assert(!child_role == !parent); 2513 2514 if (reference) { 2515 bool options_non_empty = options ? qdict_size(options) : false; 2516 QDECREF(options); 2517 2518 if (filename || options_non_empty) { 2519 error_setg(errp, "Cannot reference an existing block device with " 2520 "additional options or a new filename"); 2521 return NULL; 2522 } 2523 2524 bs = bdrv_lookup_bs(reference, reference, errp); 2525 if (!bs) { 2526 return NULL; 2527 } 2528 2529 bdrv_ref(bs); 2530 return bs; 2531 } 2532 2533 bs = bdrv_new(); 2534 2535 /* NULL means an empty set of options */ 2536 if (options == NULL) { 2537 options = qdict_new(); 2538 } 2539 2540 /* json: syntax counts as explicit options, as if in the QDict */ 2541 parse_json_protocol(options, &filename, &local_err); 2542 if (local_err) { 2543 goto fail; 2544 } 2545 2546 bs->explicit_options = qdict_clone_shallow(options); 2547 2548 if (child_role) { 2549 bs->inherits_from = parent; 2550 child_role->inherit_options(&flags, options, 2551 parent->open_flags, parent->options); 2552 } 2553 2554 ret = bdrv_fill_options(&options, filename, &flags, &local_err); 2555 if (local_err) { 2556 goto fail; 2557 } 2558 2559 /* 2560 * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags. 2561 * Caution: getting a boolean member of @options requires care. 2562 * When @options come from -blockdev or blockdev_add, members are 2563 * typed according to the QAPI schema, but when they come from 2564 * -drive, they're all QString. 2565 */ 2566 if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") && 2567 !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) { 2568 flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR); 2569 } else { 2570 flags &= ~BDRV_O_RDWR; 2571 } 2572 2573 if (flags & BDRV_O_SNAPSHOT) { 2574 snapshot_options = qdict_new(); 2575 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options, 2576 flags, options); 2577 /* Let bdrv_backing_options() override "read-only" */ 2578 qdict_del(options, BDRV_OPT_READ_ONLY); 2579 bdrv_backing_options(&flags, options, flags, options); 2580 } 2581 2582 bs->open_flags = flags; 2583 bs->options = options; 2584 options = qdict_clone_shallow(options); 2585 2586 /* Find the right image format driver */ 2587 /* See cautionary note on accessing @options above */ 2588 drvname = qdict_get_try_str(options, "driver"); 2589 if (drvname) { 2590 drv = bdrv_find_format(drvname); 2591 if (!drv) { 2592 error_setg(errp, "Unknown driver: '%s'", drvname); 2593 goto fail; 2594 } 2595 } 2596 2597 assert(drvname || !(flags & BDRV_O_PROTOCOL)); 2598 2599 /* See cautionary note on accessing @options above */ 2600 backing = qdict_get_try_str(options, "backing"); 2601 if (backing && *backing == '\0') { 2602 flags |= BDRV_O_NO_BACKING; 2603 qdict_del(options, "backing"); 2604 } 2605 2606 /* Open image file without format layer. This BlockBackend is only used for 2607 * probing, the block drivers will do their own bdrv_open_child() for the 2608 * same BDS, which is why we put the node name back into options. */ 2609 if ((flags & BDRV_O_PROTOCOL) == 0) { 2610 BlockDriverState *file_bs; 2611 2612 file_bs = bdrv_open_child_bs(filename, options, "file", bs, 2613 &child_file, true, &local_err); 2614 if (local_err) { 2615 goto fail; 2616 } 2617 if (file_bs != NULL) { 2618 /* Not requesting BLK_PERM_CONSISTENT_READ because we're only 2619 * looking at the header to guess the image format. This works even 2620 * in cases where a guest would not see a consistent state. */ 2621 file = blk_new(0, BLK_PERM_ALL); 2622 blk_insert_bs(file, file_bs, &local_err); 2623 bdrv_unref(file_bs); 2624 if (local_err) { 2625 goto fail; 2626 } 2627 2628 qdict_put_str(options, "file", bdrv_get_node_name(file_bs)); 2629 } 2630 } 2631 2632 /* Image format probing */ 2633 bs->probed = !drv; 2634 if (!drv && file) { 2635 ret = find_image_format(file, filename, &drv, &local_err); 2636 if (ret < 0) { 2637 goto fail; 2638 } 2639 /* 2640 * This option update would logically belong in bdrv_fill_options(), 2641 * but we first need to open bs->file for the probing to work, while 2642 * opening bs->file already requires the (mostly) final set of options 2643 * so that cache mode etc. can be inherited. 2644 * 2645 * Adding the driver later is somewhat ugly, but it's not an option 2646 * that would ever be inherited, so it's correct. We just need to make 2647 * sure to update both bs->options (which has the full effective 2648 * options for bs) and options (which has file.* already removed). 2649 */ 2650 qdict_put_str(bs->options, "driver", drv->format_name); 2651 qdict_put_str(options, "driver", drv->format_name); 2652 } else if (!drv) { 2653 error_setg(errp, "Must specify either driver or file"); 2654 goto fail; 2655 } 2656 2657 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ 2658 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open); 2659 /* file must be NULL if a protocol BDS is about to be created 2660 * (the inverse results in an error message from bdrv_open_common()) */ 2661 assert(!(flags & BDRV_O_PROTOCOL) || !file); 2662 2663 /* Open the image */ 2664 ret = bdrv_open_common(bs, file, options, &local_err); 2665 if (ret < 0) { 2666 goto fail; 2667 } 2668 2669 if (file) { 2670 blk_unref(file); 2671 file = NULL; 2672 } 2673 2674 /* If there is a backing file, use it */ 2675 if ((flags & BDRV_O_NO_BACKING) == 0) { 2676 ret = bdrv_open_backing_file(bs, options, "backing", &local_err); 2677 if (ret < 0) { 2678 goto close_and_fail; 2679 } 2680 } 2681 2682 bdrv_refresh_filename(bs); 2683 2684 /* Check if any unknown options were used */ 2685 if (qdict_size(options) != 0) { 2686 const QDictEntry *entry = qdict_first(options); 2687 if (flags & BDRV_O_PROTOCOL) { 2688 error_setg(errp, "Block protocol '%s' doesn't support the option " 2689 "'%s'", drv->format_name, entry->key); 2690 } else { 2691 error_setg(errp, 2692 "Block format '%s' does not support the option '%s'", 2693 drv->format_name, entry->key); 2694 } 2695 2696 goto close_and_fail; 2697 } 2698 2699 bdrv_parent_cb_change_media(bs, true); 2700 2701 QDECREF(options); 2702 2703 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 2704 * temporary snapshot afterwards. */ 2705 if (snapshot_flags) { 2706 BlockDriverState *snapshot_bs; 2707 snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags, 2708 snapshot_options, &local_err); 2709 snapshot_options = NULL; 2710 if (local_err) { 2711 goto close_and_fail; 2712 } 2713 /* We are not going to return bs but the overlay on top of it 2714 * (snapshot_bs); thus, we have to drop the strong reference to bs 2715 * (which we obtained by calling bdrv_new()). bs will not be deleted, 2716 * though, because the overlay still has a reference to it. */ 2717 bdrv_unref(bs); 2718 bs = snapshot_bs; 2719 } 2720 2721 return bs; 2722 2723 fail: 2724 blk_unref(file); 2725 QDECREF(snapshot_options); 2726 QDECREF(bs->explicit_options); 2727 QDECREF(bs->options); 2728 QDECREF(options); 2729 bs->options = NULL; 2730 bs->explicit_options = NULL; 2731 bdrv_unref(bs); 2732 error_propagate(errp, local_err); 2733 return NULL; 2734 2735 close_and_fail: 2736 bdrv_unref(bs); 2737 QDECREF(snapshot_options); 2738 QDECREF(options); 2739 error_propagate(errp, local_err); 2740 return NULL; 2741 } 2742 2743 BlockDriverState *bdrv_open(const char *filename, const char *reference, 2744 QDict *options, int flags, Error **errp) 2745 { 2746 return bdrv_open_inherit(filename, reference, options, flags, NULL, 2747 NULL, errp); 2748 } 2749 2750 /* 2751 * Adds a BlockDriverState to a simple queue for an atomic, transactional 2752 * reopen of multiple devices. 2753 * 2754 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT 2755 * already performed, or alternatively may be NULL a new BlockReopenQueue will 2756 * be created and initialized. This newly created BlockReopenQueue should be 2757 * passed back in for subsequent calls that are intended to be of the same 2758 * atomic 'set'. 2759 * 2760 * bs is the BlockDriverState to add to the reopen queue. 2761 * 2762 * options contains the changed options for the associated bs 2763 * (the BlockReopenQueue takes ownership) 2764 * 2765 * flags contains the open flags for the associated bs 2766 * 2767 * returns a pointer to bs_queue, which is either the newly allocated 2768 * bs_queue, or the existing bs_queue being used. 2769 * 2770 * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). 2771 */ 2772 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, 2773 BlockDriverState *bs, 2774 QDict *options, 2775 int flags, 2776 const BdrvChildRole *role, 2777 QDict *parent_options, 2778 int parent_flags) 2779 { 2780 assert(bs != NULL); 2781 2782 BlockReopenQueueEntry *bs_entry; 2783 BdrvChild *child; 2784 QDict *old_options, *explicit_options; 2785 2786 /* Make sure that the caller remembered to use a drained section. This is 2787 * important to avoid graph changes between the recursive queuing here and 2788 * bdrv_reopen_multiple(). */ 2789 assert(bs->quiesce_counter > 0); 2790 2791 if (bs_queue == NULL) { 2792 bs_queue = g_new0(BlockReopenQueue, 1); 2793 QSIMPLEQ_INIT(bs_queue); 2794 } 2795 2796 if (!options) { 2797 options = qdict_new(); 2798 } 2799 2800 /* Check if this BlockDriverState is already in the queue */ 2801 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 2802 if (bs == bs_entry->state.bs) { 2803 break; 2804 } 2805 } 2806 2807 /* 2808 * Precedence of options: 2809 * 1. Explicitly passed in options (highest) 2810 * 2. Set in flags (only for top level) 2811 * 3. Retained from explicitly set options of bs 2812 * 4. Inherited from parent node 2813 * 5. Retained from effective options of bs 2814 */ 2815 2816 if (!parent_options) { 2817 /* 2818 * Any setting represented by flags is always updated. If the 2819 * corresponding QDict option is set, it takes precedence. Otherwise 2820 * the flag is translated into a QDict option. The old setting of bs is 2821 * not considered. 2822 */ 2823 update_options_from_flags(options, flags); 2824 } 2825 2826 /* Old explicitly set values (don't overwrite by inherited value) */ 2827 if (bs_entry) { 2828 old_options = qdict_clone_shallow(bs_entry->state.explicit_options); 2829 } else { 2830 old_options = qdict_clone_shallow(bs->explicit_options); 2831 } 2832 bdrv_join_options(bs, options, old_options); 2833 QDECREF(old_options); 2834 2835 explicit_options = qdict_clone_shallow(options); 2836 2837 /* Inherit from parent node */ 2838 if (parent_options) { 2839 assert(!flags); 2840 role->inherit_options(&flags, options, parent_flags, parent_options); 2841 } 2842 2843 /* Old values are used for options that aren't set yet */ 2844 old_options = qdict_clone_shallow(bs->options); 2845 bdrv_join_options(bs, options, old_options); 2846 QDECREF(old_options); 2847 2848 /* bdrv_open_inherit() sets and clears some additional flags internally */ 2849 flags &= ~BDRV_O_PROTOCOL; 2850 if (flags & BDRV_O_RDWR) { 2851 flags |= BDRV_O_ALLOW_RDWR; 2852 } 2853 2854 if (!bs_entry) { 2855 bs_entry = g_new0(BlockReopenQueueEntry, 1); 2856 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); 2857 } else { 2858 QDECREF(bs_entry->state.options); 2859 QDECREF(bs_entry->state.explicit_options); 2860 } 2861 2862 bs_entry->state.bs = bs; 2863 bs_entry->state.options = options; 2864 bs_entry->state.explicit_options = explicit_options; 2865 bs_entry->state.flags = flags; 2866 2867 /* This needs to be overwritten in bdrv_reopen_prepare() */ 2868 bs_entry->state.perm = UINT64_MAX; 2869 bs_entry->state.shared_perm = 0; 2870 2871 QLIST_FOREACH(child, &bs->children, next) { 2872 QDict *new_child_options; 2873 char *child_key_dot; 2874 2875 /* reopen can only change the options of block devices that were 2876 * implicitly created and inherited options. For other (referenced) 2877 * block devices, a syntax like "backing.foo" results in an error. */ 2878 if (child->bs->inherits_from != bs) { 2879 continue; 2880 } 2881 2882 child_key_dot = g_strdup_printf("%s.", child->name); 2883 qdict_extract_subqdict(options, &new_child_options, child_key_dot); 2884 g_free(child_key_dot); 2885 2886 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0, 2887 child->role, options, flags); 2888 } 2889 2890 return bs_queue; 2891 } 2892 2893 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 2894 BlockDriverState *bs, 2895 QDict *options, int flags) 2896 { 2897 return bdrv_reopen_queue_child(bs_queue, bs, options, flags, 2898 NULL, NULL, 0); 2899 } 2900 2901 /* 2902 * Reopen multiple BlockDriverStates atomically & transactionally. 2903 * 2904 * The queue passed in (bs_queue) must have been built up previous 2905 * via bdrv_reopen_queue(). 2906 * 2907 * Reopens all BDS specified in the queue, with the appropriate 2908 * flags. All devices are prepared for reopen, and failure of any 2909 * device will cause all device changes to be abandonded, and intermediate 2910 * data cleaned up. 2911 * 2912 * If all devices prepare successfully, then the changes are committed 2913 * to all devices. 2914 * 2915 * All affected nodes must be drained between bdrv_reopen_queue() and 2916 * bdrv_reopen_multiple(). 2917 */ 2918 int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp) 2919 { 2920 int ret = -1; 2921 BlockReopenQueueEntry *bs_entry, *next; 2922 Error *local_err = NULL; 2923 2924 assert(bs_queue != NULL); 2925 2926 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 2927 assert(bs_entry->state.bs->quiesce_counter > 0); 2928 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { 2929 error_propagate(errp, local_err); 2930 goto cleanup; 2931 } 2932 bs_entry->prepared = true; 2933 } 2934 2935 /* If we reach this point, we have success and just need to apply the 2936 * changes 2937 */ 2938 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 2939 bdrv_reopen_commit(&bs_entry->state); 2940 } 2941 2942 ret = 0; 2943 2944 cleanup: 2945 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 2946 if (ret && bs_entry->prepared) { 2947 bdrv_reopen_abort(&bs_entry->state); 2948 } else if (ret) { 2949 QDECREF(bs_entry->state.explicit_options); 2950 } 2951 QDECREF(bs_entry->state.options); 2952 g_free(bs_entry); 2953 } 2954 g_free(bs_queue); 2955 2956 return ret; 2957 } 2958 2959 2960 /* Reopen a single BlockDriverState with the specified flags. */ 2961 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) 2962 { 2963 int ret = -1; 2964 Error *local_err = NULL; 2965 BlockReopenQueue *queue; 2966 2967 bdrv_subtree_drained_begin(bs); 2968 2969 queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags); 2970 ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err); 2971 if (local_err != NULL) { 2972 error_propagate(errp, local_err); 2973 } 2974 2975 bdrv_subtree_drained_end(bs); 2976 2977 return ret; 2978 } 2979 2980 static BlockReopenQueueEntry *find_parent_in_reopen_queue(BlockReopenQueue *q, 2981 BdrvChild *c) 2982 { 2983 BlockReopenQueueEntry *entry; 2984 2985 QSIMPLEQ_FOREACH(entry, q, entry) { 2986 BlockDriverState *bs = entry->state.bs; 2987 BdrvChild *child; 2988 2989 QLIST_FOREACH(child, &bs->children, next) { 2990 if (child == c) { 2991 return entry; 2992 } 2993 } 2994 } 2995 2996 return NULL; 2997 } 2998 2999 static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs, 3000 uint64_t *perm, uint64_t *shared) 3001 { 3002 BdrvChild *c; 3003 BlockReopenQueueEntry *parent; 3004 uint64_t cumulative_perms = 0; 3005 uint64_t cumulative_shared_perms = BLK_PERM_ALL; 3006 3007 QLIST_FOREACH(c, &bs->parents, next_parent) { 3008 parent = find_parent_in_reopen_queue(q, c); 3009 if (!parent) { 3010 cumulative_perms |= c->perm; 3011 cumulative_shared_perms &= c->shared_perm; 3012 } else { 3013 uint64_t nperm, nshared; 3014 3015 bdrv_child_perm(parent->state.bs, bs, c, c->role, q, 3016 parent->state.perm, parent->state.shared_perm, 3017 &nperm, &nshared); 3018 3019 cumulative_perms |= nperm; 3020 cumulative_shared_perms &= nshared; 3021 } 3022 } 3023 *perm = cumulative_perms; 3024 *shared = cumulative_shared_perms; 3025 } 3026 3027 /* 3028 * Prepares a BlockDriverState for reopen. All changes are staged in the 3029 * 'opaque' field of the BDRVReopenState, which is used and allocated by 3030 * the block driver layer .bdrv_reopen_prepare() 3031 * 3032 * bs is the BlockDriverState to reopen 3033 * flags are the new open flags 3034 * queue is the reopen queue 3035 * 3036 * Returns 0 on success, non-zero on error. On error errp will be set 3037 * as well. 3038 * 3039 * On failure, bdrv_reopen_abort() will be called to clean up any data. 3040 * It is the responsibility of the caller to then call the abort() or 3041 * commit() for any other BDS that have been left in a prepare() state 3042 * 3043 */ 3044 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, 3045 Error **errp) 3046 { 3047 int ret = -1; 3048 Error *local_err = NULL; 3049 BlockDriver *drv; 3050 QemuOpts *opts; 3051 const char *value; 3052 bool read_only; 3053 3054 assert(reopen_state != NULL); 3055 assert(reopen_state->bs->drv != NULL); 3056 drv = reopen_state->bs->drv; 3057 3058 /* Process generic block layer options */ 3059 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 3060 qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err); 3061 if (local_err) { 3062 error_propagate(errp, local_err); 3063 ret = -EINVAL; 3064 goto error; 3065 } 3066 3067 update_flags_from_options(&reopen_state->flags, opts); 3068 3069 /* node-name and driver must be unchanged. Put them back into the QDict, so 3070 * that they are checked at the end of this function. */ 3071 value = qemu_opt_get(opts, "node-name"); 3072 if (value) { 3073 qdict_put_str(reopen_state->options, "node-name", value); 3074 } 3075 3076 value = qemu_opt_get(opts, "driver"); 3077 if (value) { 3078 qdict_put_str(reopen_state->options, "driver", value); 3079 } 3080 3081 /* If we are to stay read-only, do not allow permission change 3082 * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is 3083 * not set, or if the BDS still has copy_on_read enabled */ 3084 read_only = !(reopen_state->flags & BDRV_O_RDWR); 3085 ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err); 3086 if (local_err) { 3087 error_propagate(errp, local_err); 3088 goto error; 3089 } 3090 3091 /* Calculate required permissions after reopening */ 3092 bdrv_reopen_perm(queue, reopen_state->bs, 3093 &reopen_state->perm, &reopen_state->shared_perm); 3094 3095 ret = bdrv_flush(reopen_state->bs); 3096 if (ret) { 3097 error_setg_errno(errp, -ret, "Error flushing drive"); 3098 goto error; 3099 } 3100 3101 if (drv->bdrv_reopen_prepare) { 3102 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 3103 if (ret) { 3104 if (local_err != NULL) { 3105 error_propagate(errp, local_err); 3106 } else { 3107 error_setg(errp, "failed while preparing to reopen image '%s'", 3108 reopen_state->bs->filename); 3109 } 3110 goto error; 3111 } 3112 } else { 3113 /* It is currently mandatory to have a bdrv_reopen_prepare() 3114 * handler for each supported drv. */ 3115 error_setg(errp, "Block format '%s' used by node '%s' " 3116 "does not support reopening files", drv->format_name, 3117 bdrv_get_device_or_node_name(reopen_state->bs)); 3118 ret = -1; 3119 goto error; 3120 } 3121 3122 /* Options that are not handled are only okay if they are unchanged 3123 * compared to the old state. It is expected that some options are only 3124 * used for the initial open, but not reopen (e.g. filename) */ 3125 if (qdict_size(reopen_state->options)) { 3126 const QDictEntry *entry = qdict_first(reopen_state->options); 3127 3128 do { 3129 QObject *new = entry->value; 3130 QObject *old = qdict_get(reopen_state->bs->options, entry->key); 3131 3132 /* 3133 * TODO: When using -drive to specify blockdev options, all values 3134 * will be strings; however, when using -blockdev, blockdev-add or 3135 * filenames using the json:{} pseudo-protocol, they will be 3136 * correctly typed. 3137 * In contrast, reopening options are (currently) always strings 3138 * (because you can only specify them through qemu-io; all other 3139 * callers do not specify any options). 3140 * Therefore, when using anything other than -drive to create a BDS, 3141 * this cannot detect non-string options as unchanged, because 3142 * qobject_is_equal() always returns false for objects of different 3143 * type. In the future, this should be remedied by correctly typing 3144 * all options. For now, this is not too big of an issue because 3145 * the user can simply omit options which cannot be changed anyway, 3146 * so they will stay unchanged. 3147 */ 3148 if (!qobject_is_equal(new, old)) { 3149 error_setg(errp, "Cannot change the option '%s'", entry->key); 3150 ret = -EINVAL; 3151 goto error; 3152 } 3153 } while ((entry = qdict_next(reopen_state->options, entry))); 3154 } 3155 3156 ret = bdrv_check_perm(reopen_state->bs, queue, reopen_state->perm, 3157 reopen_state->shared_perm, NULL, errp); 3158 if (ret < 0) { 3159 goto error; 3160 } 3161 3162 ret = 0; 3163 3164 error: 3165 qemu_opts_del(opts); 3166 return ret; 3167 } 3168 3169 /* 3170 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 3171 * makes them final by swapping the staging BlockDriverState contents into 3172 * the active BlockDriverState contents. 3173 */ 3174 void bdrv_reopen_commit(BDRVReopenState *reopen_state) 3175 { 3176 BlockDriver *drv; 3177 BlockDriverState *bs; 3178 bool old_can_write, new_can_write; 3179 3180 assert(reopen_state != NULL); 3181 bs = reopen_state->bs; 3182 drv = bs->drv; 3183 assert(drv != NULL); 3184 3185 old_can_write = 3186 !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE); 3187 3188 /* If there are any driver level actions to take */ 3189 if (drv->bdrv_reopen_commit) { 3190 drv->bdrv_reopen_commit(reopen_state); 3191 } 3192 3193 /* set BDS specific flags now */ 3194 QDECREF(bs->explicit_options); 3195 3196 bs->explicit_options = reopen_state->explicit_options; 3197 bs->open_flags = reopen_state->flags; 3198 bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); 3199 3200 bdrv_refresh_limits(bs, NULL); 3201 3202 bdrv_set_perm(reopen_state->bs, reopen_state->perm, 3203 reopen_state->shared_perm); 3204 3205 new_can_write = 3206 !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE); 3207 if (!old_can_write && new_can_write && drv->bdrv_reopen_bitmaps_rw) { 3208 Error *local_err = NULL; 3209 if (drv->bdrv_reopen_bitmaps_rw(bs, &local_err) < 0) { 3210 /* This is not fatal, bitmaps just left read-only, so all following 3211 * writes will fail. User can remove read-only bitmaps to unblock 3212 * writes. 3213 */ 3214 error_reportf_err(local_err, 3215 "%s: Failed to make dirty bitmaps writable: ", 3216 bdrv_get_node_name(bs)); 3217 } 3218 } 3219 } 3220 3221 /* 3222 * Abort the reopen, and delete and free the staged changes in 3223 * reopen_state 3224 */ 3225 void bdrv_reopen_abort(BDRVReopenState *reopen_state) 3226 { 3227 BlockDriver *drv; 3228 3229 assert(reopen_state != NULL); 3230 drv = reopen_state->bs->drv; 3231 assert(drv != NULL); 3232 3233 if (drv->bdrv_reopen_abort) { 3234 drv->bdrv_reopen_abort(reopen_state); 3235 } 3236 3237 QDECREF(reopen_state->explicit_options); 3238 3239 bdrv_abort_perm_update(reopen_state->bs); 3240 } 3241 3242 3243 static void bdrv_close(BlockDriverState *bs) 3244 { 3245 BdrvAioNotifier *ban, *ban_next; 3246 BdrvChild *child, *next; 3247 3248 assert(!bs->job); 3249 assert(!bs->refcnt); 3250 3251 bdrv_drained_begin(bs); /* complete I/O */ 3252 bdrv_flush(bs); 3253 bdrv_drain(bs); /* in case flush left pending I/O */ 3254 3255 if (bs->drv) { 3256 bs->drv->bdrv_close(bs); 3257 bs->drv = NULL; 3258 } 3259 3260 bdrv_set_backing_hd(bs, NULL, &error_abort); 3261 3262 if (bs->file != NULL) { 3263 bdrv_unref_child(bs, bs->file); 3264 bs->file = NULL; 3265 } 3266 3267 QLIST_FOREACH_SAFE(child, &bs->children, next, next) { 3268 /* TODO Remove bdrv_unref() from drivers' close function and use 3269 * bdrv_unref_child() here */ 3270 if (child->bs->inherits_from == bs) { 3271 child->bs->inherits_from = NULL; 3272 } 3273 bdrv_detach_child(child); 3274 } 3275 3276 g_free(bs->opaque); 3277 bs->opaque = NULL; 3278 atomic_set(&bs->copy_on_read, 0); 3279 bs->backing_file[0] = '\0'; 3280 bs->backing_format[0] = '\0'; 3281 bs->total_sectors = 0; 3282 bs->encrypted = false; 3283 bs->sg = false; 3284 QDECREF(bs->options); 3285 QDECREF(bs->explicit_options); 3286 bs->options = NULL; 3287 bs->explicit_options = NULL; 3288 QDECREF(bs->full_open_options); 3289 bs->full_open_options = NULL; 3290 3291 bdrv_release_named_dirty_bitmaps(bs); 3292 assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 3293 3294 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 3295 g_free(ban); 3296 } 3297 QLIST_INIT(&bs->aio_notifiers); 3298 bdrv_drained_end(bs); 3299 } 3300 3301 void bdrv_close_all(void) 3302 { 3303 block_job_cancel_sync_all(); 3304 nbd_export_close_all(); 3305 3306 /* Drop references from requests still in flight, such as canceled block 3307 * jobs whose AIO context has not been polled yet */ 3308 bdrv_drain_all(); 3309 3310 blk_remove_all_bs(); 3311 blockdev_close_all_bdrv_states(); 3312 3313 assert(QTAILQ_EMPTY(&all_bdrv_states)); 3314 } 3315 3316 static bool should_update_child(BdrvChild *c, BlockDriverState *to) 3317 { 3318 BdrvChild *to_c; 3319 3320 if (c->role->stay_at_node) { 3321 return false; 3322 } 3323 3324 if (c->role == &child_backing) { 3325 /* If @from is a backing file of @to, ignore the child to avoid 3326 * creating a loop. We only want to change the pointer of other 3327 * parents. */ 3328 QLIST_FOREACH(to_c, &to->children, next) { 3329 if (to_c == c) { 3330 break; 3331 } 3332 } 3333 if (to_c) { 3334 return false; 3335 } 3336 } 3337 3338 return true; 3339 } 3340 3341 void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, 3342 Error **errp) 3343 { 3344 BdrvChild *c, *next; 3345 GSList *list = NULL, *p; 3346 uint64_t old_perm, old_shared; 3347 uint64_t perm = 0, shared = BLK_PERM_ALL; 3348 int ret; 3349 3350 assert(!atomic_read(&from->in_flight)); 3351 assert(!atomic_read(&to->in_flight)); 3352 3353 /* Make sure that @from doesn't go away until we have successfully attached 3354 * all of its parents to @to. */ 3355 bdrv_ref(from); 3356 3357 /* Put all parents into @list and calculate their cumulative permissions */ 3358 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { 3359 if (!should_update_child(c, to)) { 3360 continue; 3361 } 3362 list = g_slist_prepend(list, c); 3363 perm |= c->perm; 3364 shared &= c->shared_perm; 3365 } 3366 3367 /* Check whether the required permissions can be granted on @to, ignoring 3368 * all BdrvChild in @list so that they can't block themselves. */ 3369 ret = bdrv_check_update_perm(to, NULL, perm, shared, list, errp); 3370 if (ret < 0) { 3371 bdrv_abort_perm_update(to); 3372 goto out; 3373 } 3374 3375 /* Now actually perform the change. We performed the permission check for 3376 * all elements of @list at once, so set the permissions all at once at the 3377 * very end. */ 3378 for (p = list; p != NULL; p = p->next) { 3379 c = p->data; 3380 3381 bdrv_ref(to); 3382 bdrv_replace_child_noperm(c, to); 3383 bdrv_unref(from); 3384 } 3385 3386 bdrv_get_cumulative_perm(to, &old_perm, &old_shared); 3387 bdrv_set_perm(to, old_perm | perm, old_shared | shared); 3388 3389 out: 3390 g_slist_free(list); 3391 bdrv_unref(from); 3392 } 3393 3394 /* 3395 * Add new bs contents at the top of an image chain while the chain is 3396 * live, while keeping required fields on the top layer. 3397 * 3398 * This will modify the BlockDriverState fields, and swap contents 3399 * between bs_new and bs_top. Both bs_new and bs_top are modified. 3400 * 3401 * bs_new must not be attached to a BlockBackend. 3402 * 3403 * This function does not create any image files. 3404 * 3405 * bdrv_append() takes ownership of a bs_new reference and unrefs it because 3406 * that's what the callers commonly need. bs_new will be referenced by the old 3407 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a 3408 * reference of its own, it must call bdrv_ref(). 3409 */ 3410 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, 3411 Error **errp) 3412 { 3413 Error *local_err = NULL; 3414 3415 bdrv_set_backing_hd(bs_new, bs_top, &local_err); 3416 if (local_err) { 3417 error_propagate(errp, local_err); 3418 goto out; 3419 } 3420 3421 bdrv_replace_node(bs_top, bs_new, &local_err); 3422 if (local_err) { 3423 error_propagate(errp, local_err); 3424 bdrv_set_backing_hd(bs_new, NULL, &error_abort); 3425 goto out; 3426 } 3427 3428 /* bs_new is now referenced by its new parents, we don't need the 3429 * additional reference any more. */ 3430 out: 3431 bdrv_unref(bs_new); 3432 } 3433 3434 static void bdrv_delete(BlockDriverState *bs) 3435 { 3436 assert(!bs->job); 3437 assert(bdrv_op_blocker_is_empty(bs)); 3438 assert(!bs->refcnt); 3439 3440 bdrv_close(bs); 3441 3442 /* remove from list, if necessary */ 3443 if (bs->node_name[0] != '\0') { 3444 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 3445 } 3446 QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list); 3447 3448 g_free(bs); 3449 } 3450 3451 /* 3452 * Run consistency checks on an image 3453 * 3454 * Returns 0 if the check could be completed (it doesn't mean that the image is 3455 * free of errors) or -errno when an internal error occurred. The results of the 3456 * check are stored in res. 3457 */ 3458 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) 3459 { 3460 if (bs->drv == NULL) { 3461 return -ENOMEDIUM; 3462 } 3463 if (bs->drv->bdrv_check == NULL) { 3464 return -ENOTSUP; 3465 } 3466 3467 memset(res, 0, sizeof(*res)); 3468 return bs->drv->bdrv_check(bs, res, fix); 3469 } 3470 3471 /* 3472 * Return values: 3473 * 0 - success 3474 * -EINVAL - backing format specified, but no file 3475 * -ENOSPC - can't update the backing file because no space is left in the 3476 * image file header 3477 * -ENOTSUP - format driver doesn't support changing the backing file 3478 */ 3479 int bdrv_change_backing_file(BlockDriverState *bs, 3480 const char *backing_file, const char *backing_fmt) 3481 { 3482 BlockDriver *drv = bs->drv; 3483 int ret; 3484 3485 if (!drv) { 3486 return -ENOMEDIUM; 3487 } 3488 3489 /* Backing file format doesn't make sense without a backing file */ 3490 if (backing_fmt && !backing_file) { 3491 return -EINVAL; 3492 } 3493 3494 if (drv->bdrv_change_backing_file != NULL) { 3495 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 3496 } else { 3497 ret = -ENOTSUP; 3498 } 3499 3500 if (ret == 0) { 3501 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 3502 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 3503 } 3504 return ret; 3505 } 3506 3507 /* 3508 * Finds the image layer in the chain that has 'bs' as its backing file. 3509 * 3510 * active is the current topmost image. 3511 * 3512 * Returns NULL if bs is not found in active's image chain, 3513 * or if active == bs. 3514 * 3515 * Returns the bottommost base image if bs == NULL. 3516 */ 3517 BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 3518 BlockDriverState *bs) 3519 { 3520 while (active && bs != backing_bs(active)) { 3521 active = backing_bs(active); 3522 } 3523 3524 return active; 3525 } 3526 3527 /* Given a BDS, searches for the base layer. */ 3528 BlockDriverState *bdrv_find_base(BlockDriverState *bs) 3529 { 3530 return bdrv_find_overlay(bs, NULL); 3531 } 3532 3533 /* 3534 * Drops images above 'base' up to and including 'top', and sets the image 3535 * above 'top' to have base as its backing file. 3536 * 3537 * Requires that the overlay to 'top' is opened r/w, so that the backing file 3538 * information in 'bs' can be properly updated. 3539 * 3540 * E.g., this will convert the following chain: 3541 * bottom <- base <- intermediate <- top <- active 3542 * 3543 * to 3544 * 3545 * bottom <- base <- active 3546 * 3547 * It is allowed for bottom==base, in which case it converts: 3548 * 3549 * base <- intermediate <- top <- active 3550 * 3551 * to 3552 * 3553 * base <- active 3554 * 3555 * If backing_file_str is non-NULL, it will be used when modifying top's 3556 * overlay image metadata. 3557 * 3558 * Error conditions: 3559 * if active == top, that is considered an error 3560 * 3561 */ 3562 int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, 3563 const char *backing_file_str) 3564 { 3565 BdrvChild *c, *next; 3566 Error *local_err = NULL; 3567 int ret = -EIO; 3568 3569 bdrv_ref(top); 3570 3571 if (!top->drv || !base->drv) { 3572 goto exit; 3573 } 3574 3575 /* Make sure that base is in the backing chain of top */ 3576 if (!bdrv_chain_contains(top, base)) { 3577 goto exit; 3578 } 3579 3580 /* success - we can delete the intermediate states, and link top->base */ 3581 /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once 3582 * we've figured out how they should work. */ 3583 backing_file_str = backing_file_str ? backing_file_str : base->filename; 3584 3585 QLIST_FOREACH_SAFE(c, &top->parents, next_parent, next) { 3586 /* Check whether we are allowed to switch c from top to base */ 3587 GSList *ignore_children = g_slist_prepend(NULL, c); 3588 bdrv_check_update_perm(base, NULL, c->perm, c->shared_perm, 3589 ignore_children, &local_err); 3590 if (local_err) { 3591 ret = -EPERM; 3592 error_report_err(local_err); 3593 goto exit; 3594 } 3595 g_slist_free(ignore_children); 3596 3597 /* If so, update the backing file path in the image file */ 3598 if (c->role->update_filename) { 3599 ret = c->role->update_filename(c, base, backing_file_str, 3600 &local_err); 3601 if (ret < 0) { 3602 bdrv_abort_perm_update(base); 3603 error_report_err(local_err); 3604 goto exit; 3605 } 3606 } 3607 3608 /* Do the actual switch in the in-memory graph. 3609 * Completes bdrv_check_update_perm() transaction internally. */ 3610 bdrv_ref(base); 3611 bdrv_replace_child(c, base); 3612 bdrv_unref(top); 3613 } 3614 3615 ret = 0; 3616 exit: 3617 bdrv_unref(top); 3618 return ret; 3619 } 3620 3621 /** 3622 * Truncate file to 'offset' bytes (needed only for file protocols) 3623 */ 3624 int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, 3625 Error **errp) 3626 { 3627 BlockDriverState *bs = child->bs; 3628 BlockDriver *drv = bs->drv; 3629 int ret; 3630 3631 assert(child->perm & BLK_PERM_RESIZE); 3632 3633 /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ 3634 if (!drv) { 3635 error_setg(errp, "No medium inserted"); 3636 return -ENOMEDIUM; 3637 } 3638 if (!drv->bdrv_truncate) { 3639 if (bs->file && drv->is_filter) { 3640 return bdrv_truncate(bs->file, offset, prealloc, errp); 3641 } 3642 error_setg(errp, "Image format driver does not support resize"); 3643 return -ENOTSUP; 3644 } 3645 if (bs->read_only) { 3646 error_setg(errp, "Image is read-only"); 3647 return -EACCES; 3648 } 3649 3650 assert(!(bs->open_flags & BDRV_O_INACTIVE)); 3651 3652 ret = drv->bdrv_truncate(bs, offset, prealloc, errp); 3653 if (ret < 0) { 3654 return ret; 3655 } 3656 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 3657 if (ret < 0) { 3658 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 3659 } else { 3660 offset = bs->total_sectors * BDRV_SECTOR_SIZE; 3661 } 3662 bdrv_dirty_bitmap_truncate(bs, offset); 3663 bdrv_parent_cb_resize(bs); 3664 atomic_inc(&bs->write_gen); 3665 return ret; 3666 } 3667 3668 /** 3669 * Length of a allocated file in bytes. Sparse files are counted by actual 3670 * allocated space. Return < 0 if error or unknown. 3671 */ 3672 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 3673 { 3674 BlockDriver *drv = bs->drv; 3675 if (!drv) { 3676 return -ENOMEDIUM; 3677 } 3678 if (drv->bdrv_get_allocated_file_size) { 3679 return drv->bdrv_get_allocated_file_size(bs); 3680 } 3681 if (bs->file) { 3682 return bdrv_get_allocated_file_size(bs->file->bs); 3683 } 3684 return -ENOTSUP; 3685 } 3686 3687 /* 3688 * bdrv_measure: 3689 * @drv: Format driver 3690 * @opts: Creation options for new image 3691 * @in_bs: Existing image containing data for new image (may be NULL) 3692 * @errp: Error object 3693 * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo()) 3694 * or NULL on error 3695 * 3696 * Calculate file size required to create a new image. 3697 * 3698 * If @in_bs is given then space for allocated clusters and zero clusters 3699 * from that image are included in the calculation. If @opts contains a 3700 * backing file that is shared by @in_bs then backing clusters may be omitted 3701 * from the calculation. 3702 * 3703 * If @in_bs is NULL then the calculation includes no allocated clusters 3704 * unless a preallocation option is given in @opts. 3705 * 3706 * Note that @in_bs may use a different BlockDriver from @drv. 3707 * 3708 * If an error occurs the @errp pointer is set. 3709 */ 3710 BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, 3711 BlockDriverState *in_bs, Error **errp) 3712 { 3713 if (!drv->bdrv_measure) { 3714 error_setg(errp, "Block driver '%s' does not support size measurement", 3715 drv->format_name); 3716 return NULL; 3717 } 3718 3719 return drv->bdrv_measure(opts, in_bs, errp); 3720 } 3721 3722 /** 3723 * Return number of sectors on success, -errno on error. 3724 */ 3725 int64_t bdrv_nb_sectors(BlockDriverState *bs) 3726 { 3727 BlockDriver *drv = bs->drv; 3728 3729 if (!drv) 3730 return -ENOMEDIUM; 3731 3732 if (drv->has_variable_length) { 3733 int ret = refresh_total_sectors(bs, bs->total_sectors); 3734 if (ret < 0) { 3735 return ret; 3736 } 3737 } 3738 return bs->total_sectors; 3739 } 3740 3741 /** 3742 * Return length in bytes on success, -errno on error. 3743 * The length is always a multiple of BDRV_SECTOR_SIZE. 3744 */ 3745 int64_t bdrv_getlength(BlockDriverState *bs) 3746 { 3747 int64_t ret = bdrv_nb_sectors(bs); 3748 3749 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret; 3750 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE; 3751 } 3752 3753 /* return 0 as number of sectors if no device present or error */ 3754 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 3755 { 3756 int64_t nb_sectors = bdrv_nb_sectors(bs); 3757 3758 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 3759 } 3760 3761 bool bdrv_is_sg(BlockDriverState *bs) 3762 { 3763 return bs->sg; 3764 } 3765 3766 bool bdrv_is_encrypted(BlockDriverState *bs) 3767 { 3768 if (bs->backing && bs->backing->bs->encrypted) { 3769 return true; 3770 } 3771 return bs->encrypted; 3772 } 3773 3774 const char *bdrv_get_format_name(BlockDriverState *bs) 3775 { 3776 return bs->drv ? bs->drv->format_name : NULL; 3777 } 3778 3779 static int qsort_strcmp(const void *a, const void *b) 3780 { 3781 return strcmp(*(char *const *)a, *(char *const *)b); 3782 } 3783 3784 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 3785 void *opaque) 3786 { 3787 BlockDriver *drv; 3788 int count = 0; 3789 int i; 3790 const char **formats = NULL; 3791 3792 QLIST_FOREACH(drv, &bdrv_drivers, list) { 3793 if (drv->format_name) { 3794 bool found = false; 3795 int i = count; 3796 while (formats && i && !found) { 3797 found = !strcmp(formats[--i], drv->format_name); 3798 } 3799 3800 if (!found) { 3801 formats = g_renew(const char *, formats, count + 1); 3802 formats[count++] = drv->format_name; 3803 } 3804 } 3805 } 3806 3807 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) { 3808 const char *format_name = block_driver_modules[i].format_name; 3809 3810 if (format_name) { 3811 bool found = false; 3812 int j = count; 3813 3814 while (formats && j && !found) { 3815 found = !strcmp(formats[--j], format_name); 3816 } 3817 3818 if (!found) { 3819 formats = g_renew(const char *, formats, count + 1); 3820 formats[count++] = format_name; 3821 } 3822 } 3823 } 3824 3825 qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 3826 3827 for (i = 0; i < count; i++) { 3828 it(opaque, formats[i]); 3829 } 3830 3831 g_free(formats); 3832 } 3833 3834 /* This function is to find a node in the bs graph */ 3835 BlockDriverState *bdrv_find_node(const char *node_name) 3836 { 3837 BlockDriverState *bs; 3838 3839 assert(node_name); 3840 3841 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 3842 if (!strcmp(node_name, bs->node_name)) { 3843 return bs; 3844 } 3845 } 3846 return NULL; 3847 } 3848 3849 /* Put this QMP function here so it can access the static graph_bdrv_states. */ 3850 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp) 3851 { 3852 BlockDeviceInfoList *list, *entry; 3853 BlockDriverState *bs; 3854 3855 list = NULL; 3856 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 3857 BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, errp); 3858 if (!info) { 3859 qapi_free_BlockDeviceInfoList(list); 3860 return NULL; 3861 } 3862 entry = g_malloc0(sizeof(*entry)); 3863 entry->value = info; 3864 entry->next = list; 3865 list = entry; 3866 } 3867 3868 return list; 3869 } 3870 3871 BlockDriverState *bdrv_lookup_bs(const char *device, 3872 const char *node_name, 3873 Error **errp) 3874 { 3875 BlockBackend *blk; 3876 BlockDriverState *bs; 3877 3878 if (device) { 3879 blk = blk_by_name(device); 3880 3881 if (blk) { 3882 bs = blk_bs(blk); 3883 if (!bs) { 3884 error_setg(errp, "Device '%s' has no medium", device); 3885 } 3886 3887 return bs; 3888 } 3889 } 3890 3891 if (node_name) { 3892 bs = bdrv_find_node(node_name); 3893 3894 if (bs) { 3895 return bs; 3896 } 3897 } 3898 3899 error_setg(errp, "Cannot find device=%s nor node_name=%s", 3900 device ? device : "", 3901 node_name ? node_name : ""); 3902 return NULL; 3903 } 3904 3905 /* If 'base' is in the same chain as 'top', return true. Otherwise, 3906 * return false. If either argument is NULL, return false. */ 3907 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 3908 { 3909 while (top && top != base) { 3910 top = backing_bs(top); 3911 } 3912 3913 return top != NULL; 3914 } 3915 3916 BlockDriverState *bdrv_next_node(BlockDriverState *bs) 3917 { 3918 if (!bs) { 3919 return QTAILQ_FIRST(&graph_bdrv_states); 3920 } 3921 return QTAILQ_NEXT(bs, node_list); 3922 } 3923 3924 const char *bdrv_get_node_name(const BlockDriverState *bs) 3925 { 3926 return bs->node_name; 3927 } 3928 3929 const char *bdrv_get_parent_name(const BlockDriverState *bs) 3930 { 3931 BdrvChild *c; 3932 const char *name; 3933 3934 /* If multiple parents have a name, just pick the first one. */ 3935 QLIST_FOREACH(c, &bs->parents, next_parent) { 3936 if (c->role->get_name) { 3937 name = c->role->get_name(c); 3938 if (name && *name) { 3939 return name; 3940 } 3941 } 3942 } 3943 3944 return NULL; 3945 } 3946 3947 /* TODO check what callers really want: bs->node_name or blk_name() */ 3948 const char *bdrv_get_device_name(const BlockDriverState *bs) 3949 { 3950 return bdrv_get_parent_name(bs) ?: ""; 3951 } 3952 3953 /* This can be used to identify nodes that might not have a device 3954 * name associated. Since node and device names live in the same 3955 * namespace, the result is unambiguous. The exception is if both are 3956 * absent, then this returns an empty (non-null) string. */ 3957 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 3958 { 3959 return bdrv_get_parent_name(bs) ?: bs->node_name; 3960 } 3961 3962 int bdrv_get_flags(BlockDriverState *bs) 3963 { 3964 return bs->open_flags; 3965 } 3966 3967 int bdrv_has_zero_init_1(BlockDriverState *bs) 3968 { 3969 return 1; 3970 } 3971 3972 int bdrv_has_zero_init(BlockDriverState *bs) 3973 { 3974 if (!bs->drv) { 3975 return 0; 3976 } 3977 3978 /* If BS is a copy on write image, it is initialized to 3979 the contents of the base image, which may not be zeroes. */ 3980 if (bs->backing) { 3981 return 0; 3982 } 3983 if (bs->drv->bdrv_has_zero_init) { 3984 return bs->drv->bdrv_has_zero_init(bs); 3985 } 3986 if (bs->file && bs->drv->is_filter) { 3987 return bdrv_has_zero_init(bs->file->bs); 3988 } 3989 3990 /* safe default */ 3991 return 0; 3992 } 3993 3994 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs) 3995 { 3996 BlockDriverInfo bdi; 3997 3998 if (bs->backing) { 3999 return false; 4000 } 4001 4002 if (bdrv_get_info(bs, &bdi) == 0) { 4003 return bdi.unallocated_blocks_are_zero; 4004 } 4005 4006 return false; 4007 } 4008 4009 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 4010 { 4011 if (!(bs->open_flags & BDRV_O_UNMAP)) { 4012 return false; 4013 } 4014 4015 return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP; 4016 } 4017 4018 const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 4019 { 4020 if (bs->backing && bs->backing->bs->encrypted) 4021 return bs->backing_file; 4022 else if (bs->encrypted) 4023 return bs->filename; 4024 else 4025 return NULL; 4026 } 4027 4028 void bdrv_get_backing_filename(BlockDriverState *bs, 4029 char *filename, int filename_size) 4030 { 4031 pstrcpy(filename, filename_size, bs->backing_file); 4032 } 4033 4034 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 4035 { 4036 BlockDriver *drv = bs->drv; 4037 /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ 4038 if (!drv) { 4039 return -ENOMEDIUM; 4040 } 4041 if (!drv->bdrv_get_info) { 4042 if (bs->file && drv->is_filter) { 4043 return bdrv_get_info(bs->file->bs, bdi); 4044 } 4045 return -ENOTSUP; 4046 } 4047 memset(bdi, 0, sizeof(*bdi)); 4048 return drv->bdrv_get_info(bs, bdi); 4049 } 4050 4051 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs) 4052 { 4053 BlockDriver *drv = bs->drv; 4054 if (drv && drv->bdrv_get_specific_info) { 4055 return drv->bdrv_get_specific_info(bs); 4056 } 4057 return NULL; 4058 } 4059 4060 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event) 4061 { 4062 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 4063 return; 4064 } 4065 4066 bs->drv->bdrv_debug_event(bs, event); 4067 } 4068 4069 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 4070 const char *tag) 4071 { 4072 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 4073 bs = bs->file ? bs->file->bs : NULL; 4074 } 4075 4076 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 4077 return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 4078 } 4079 4080 return -ENOTSUP; 4081 } 4082 4083 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 4084 { 4085 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) { 4086 bs = bs->file ? bs->file->bs : NULL; 4087 } 4088 4089 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) { 4090 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 4091 } 4092 4093 return -ENOTSUP; 4094 } 4095 4096 int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 4097 { 4098 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 4099 bs = bs->file ? bs->file->bs : NULL; 4100 } 4101 4102 if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 4103 return bs->drv->bdrv_debug_resume(bs, tag); 4104 } 4105 4106 return -ENOTSUP; 4107 } 4108 4109 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 4110 { 4111 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 4112 bs = bs->file ? bs->file->bs : NULL; 4113 } 4114 4115 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 4116 return bs->drv->bdrv_debug_is_suspended(bs, tag); 4117 } 4118 4119 return false; 4120 } 4121 4122 /* backing_file can either be relative, or absolute, or a protocol. If it is 4123 * relative, it must be relative to the chain. So, passing in bs->filename 4124 * from a BDS as backing_file should not be done, as that may be relative to 4125 * the CWD rather than the chain. */ 4126 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 4127 const char *backing_file) 4128 { 4129 char *filename_full = NULL; 4130 char *backing_file_full = NULL; 4131 char *filename_tmp = NULL; 4132 int is_protocol = 0; 4133 BlockDriverState *curr_bs = NULL; 4134 BlockDriverState *retval = NULL; 4135 Error *local_error = NULL; 4136 4137 if (!bs || !bs->drv || !backing_file) { 4138 return NULL; 4139 } 4140 4141 filename_full = g_malloc(PATH_MAX); 4142 backing_file_full = g_malloc(PATH_MAX); 4143 filename_tmp = g_malloc(PATH_MAX); 4144 4145 is_protocol = path_has_protocol(backing_file); 4146 4147 for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) { 4148 4149 /* If either of the filename paths is actually a protocol, then 4150 * compare unmodified paths; otherwise make paths relative */ 4151 if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 4152 if (strcmp(backing_file, curr_bs->backing_file) == 0) { 4153 retval = curr_bs->backing->bs; 4154 break; 4155 } 4156 /* Also check against the full backing filename for the image */ 4157 bdrv_get_full_backing_filename(curr_bs, backing_file_full, PATH_MAX, 4158 &local_error); 4159 if (local_error == NULL) { 4160 if (strcmp(backing_file, backing_file_full) == 0) { 4161 retval = curr_bs->backing->bs; 4162 break; 4163 } 4164 } else { 4165 error_free(local_error); 4166 local_error = NULL; 4167 } 4168 } else { 4169 /* If not an absolute filename path, make it relative to the current 4170 * image's filename path */ 4171 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 4172 backing_file); 4173 4174 /* We are going to compare absolute pathnames */ 4175 if (!realpath(filename_tmp, filename_full)) { 4176 continue; 4177 } 4178 4179 /* We need to make sure the backing filename we are comparing against 4180 * is relative to the current image filename (or absolute) */ 4181 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 4182 curr_bs->backing_file); 4183 4184 if (!realpath(filename_tmp, backing_file_full)) { 4185 continue; 4186 } 4187 4188 if (strcmp(backing_file_full, filename_full) == 0) { 4189 retval = curr_bs->backing->bs; 4190 break; 4191 } 4192 } 4193 } 4194 4195 g_free(filename_full); 4196 g_free(backing_file_full); 4197 g_free(filename_tmp); 4198 return retval; 4199 } 4200 4201 void bdrv_init(void) 4202 { 4203 module_call_init(MODULE_INIT_BLOCK); 4204 } 4205 4206 void bdrv_init_with_whitelist(void) 4207 { 4208 use_bdrv_whitelist = 1; 4209 bdrv_init(); 4210 } 4211 4212 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) 4213 { 4214 BdrvChild *child, *parent; 4215 uint64_t perm, shared_perm; 4216 Error *local_err = NULL; 4217 int ret; 4218 4219 if (!bs->drv) { 4220 return; 4221 } 4222 4223 if (!(bs->open_flags & BDRV_O_INACTIVE)) { 4224 return; 4225 } 4226 4227 QLIST_FOREACH(child, &bs->children, next) { 4228 bdrv_invalidate_cache(child->bs, &local_err); 4229 if (local_err) { 4230 error_propagate(errp, local_err); 4231 return; 4232 } 4233 } 4234 4235 /* 4236 * Update permissions, they may differ for inactive nodes. 4237 * 4238 * Note that the required permissions of inactive images are always a 4239 * subset of the permissions required after activating the image. This 4240 * allows us to just get the permissions upfront without restricting 4241 * drv->bdrv_invalidate_cache(). 4242 * 4243 * It also means that in error cases, we don't have to try and revert to 4244 * the old permissions (which is an operation that could fail, too). We can 4245 * just keep the extended permissions for the next time that an activation 4246 * of the image is tried. 4247 */ 4248 bs->open_flags &= ~BDRV_O_INACTIVE; 4249 bdrv_get_cumulative_perm(bs, &perm, &shared_perm); 4250 ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &local_err); 4251 if (ret < 0) { 4252 bs->open_flags |= BDRV_O_INACTIVE; 4253 error_propagate(errp, local_err); 4254 return; 4255 } 4256 bdrv_set_perm(bs, perm, shared_perm); 4257 4258 if (bs->drv->bdrv_invalidate_cache) { 4259 bs->drv->bdrv_invalidate_cache(bs, &local_err); 4260 if (local_err) { 4261 bs->open_flags |= BDRV_O_INACTIVE; 4262 error_propagate(errp, local_err); 4263 return; 4264 } 4265 } 4266 4267 ret = refresh_total_sectors(bs, bs->total_sectors); 4268 if (ret < 0) { 4269 bs->open_flags |= BDRV_O_INACTIVE; 4270 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 4271 return; 4272 } 4273 4274 QLIST_FOREACH(parent, &bs->parents, next_parent) { 4275 if (parent->role->activate) { 4276 parent->role->activate(parent, &local_err); 4277 if (local_err) { 4278 error_propagate(errp, local_err); 4279 return; 4280 } 4281 } 4282 } 4283 } 4284 4285 void bdrv_invalidate_cache_all(Error **errp) 4286 { 4287 BlockDriverState *bs; 4288 Error *local_err = NULL; 4289 BdrvNextIterator it; 4290 4291 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 4292 AioContext *aio_context = bdrv_get_aio_context(bs); 4293 4294 aio_context_acquire(aio_context); 4295 bdrv_invalidate_cache(bs, &local_err); 4296 aio_context_release(aio_context); 4297 if (local_err) { 4298 error_propagate(errp, local_err); 4299 bdrv_next_cleanup(&it); 4300 return; 4301 } 4302 } 4303 } 4304 4305 static int bdrv_inactivate_recurse(BlockDriverState *bs, 4306 bool setting_flag) 4307 { 4308 BdrvChild *child, *parent; 4309 int ret; 4310 4311 if (!bs->drv) { 4312 return -ENOMEDIUM; 4313 } 4314 4315 if (!setting_flag && bs->drv->bdrv_inactivate) { 4316 ret = bs->drv->bdrv_inactivate(bs); 4317 if (ret < 0) { 4318 return ret; 4319 } 4320 } 4321 4322 if (setting_flag && !(bs->open_flags & BDRV_O_INACTIVE)) { 4323 uint64_t perm, shared_perm; 4324 4325 QLIST_FOREACH(parent, &bs->parents, next_parent) { 4326 if (parent->role->inactivate) { 4327 ret = parent->role->inactivate(parent); 4328 if (ret < 0) { 4329 return ret; 4330 } 4331 } 4332 } 4333 4334 bs->open_flags |= BDRV_O_INACTIVE; 4335 4336 /* Update permissions, they may differ for inactive nodes */ 4337 bdrv_get_cumulative_perm(bs, &perm, &shared_perm); 4338 bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &error_abort); 4339 bdrv_set_perm(bs, perm, shared_perm); 4340 } 4341 4342 QLIST_FOREACH(child, &bs->children, next) { 4343 ret = bdrv_inactivate_recurse(child->bs, setting_flag); 4344 if (ret < 0) { 4345 return ret; 4346 } 4347 } 4348 4349 /* At this point persistent bitmaps should be already stored by the format 4350 * driver */ 4351 bdrv_release_persistent_dirty_bitmaps(bs); 4352 4353 return 0; 4354 } 4355 4356 int bdrv_inactivate_all(void) 4357 { 4358 BlockDriverState *bs = NULL; 4359 BdrvNextIterator it; 4360 int ret = 0; 4361 int pass; 4362 GSList *aio_ctxs = NULL, *ctx; 4363 4364 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 4365 AioContext *aio_context = bdrv_get_aio_context(bs); 4366 4367 if (!g_slist_find(aio_ctxs, aio_context)) { 4368 aio_ctxs = g_slist_prepend(aio_ctxs, aio_context); 4369 aio_context_acquire(aio_context); 4370 } 4371 } 4372 4373 /* We do two passes of inactivation. The first pass calls to drivers' 4374 * .bdrv_inactivate callbacks recursively so all cache is flushed to disk; 4375 * the second pass sets the BDRV_O_INACTIVE flag so that no further write 4376 * is allowed. */ 4377 for (pass = 0; pass < 2; pass++) { 4378 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 4379 ret = bdrv_inactivate_recurse(bs, pass); 4380 if (ret < 0) { 4381 bdrv_next_cleanup(&it); 4382 goto out; 4383 } 4384 } 4385 } 4386 4387 out: 4388 for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { 4389 AioContext *aio_context = ctx->data; 4390 aio_context_release(aio_context); 4391 } 4392 g_slist_free(aio_ctxs); 4393 4394 return ret; 4395 } 4396 4397 /**************************************************************/ 4398 /* removable device support */ 4399 4400 /** 4401 * Return TRUE if the media is present 4402 */ 4403 bool bdrv_is_inserted(BlockDriverState *bs) 4404 { 4405 BlockDriver *drv = bs->drv; 4406 BdrvChild *child; 4407 4408 if (!drv) { 4409 return false; 4410 } 4411 if (drv->bdrv_is_inserted) { 4412 return drv->bdrv_is_inserted(bs); 4413 } 4414 QLIST_FOREACH(child, &bs->children, next) { 4415 if (!bdrv_is_inserted(child->bs)) { 4416 return false; 4417 } 4418 } 4419 return true; 4420 } 4421 4422 /** 4423 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 4424 */ 4425 void bdrv_eject(BlockDriverState *bs, bool eject_flag) 4426 { 4427 BlockDriver *drv = bs->drv; 4428 4429 if (drv && drv->bdrv_eject) { 4430 drv->bdrv_eject(bs, eject_flag); 4431 } 4432 } 4433 4434 /** 4435 * Lock or unlock the media (if it is locked, the user won't be able 4436 * to eject it manually). 4437 */ 4438 void bdrv_lock_medium(BlockDriverState *bs, bool locked) 4439 { 4440 BlockDriver *drv = bs->drv; 4441 4442 trace_bdrv_lock_medium(bs, locked); 4443 4444 if (drv && drv->bdrv_lock_medium) { 4445 drv->bdrv_lock_medium(bs, locked); 4446 } 4447 } 4448 4449 /* Get a reference to bs */ 4450 void bdrv_ref(BlockDriverState *bs) 4451 { 4452 bs->refcnt++; 4453 } 4454 4455 /* Release a previously grabbed reference to bs. 4456 * If after releasing, reference count is zero, the BlockDriverState is 4457 * deleted. */ 4458 void bdrv_unref(BlockDriverState *bs) 4459 { 4460 if (!bs) { 4461 return; 4462 } 4463 assert(bs->refcnt > 0); 4464 if (--bs->refcnt == 0) { 4465 bdrv_delete(bs); 4466 } 4467 } 4468 4469 struct BdrvOpBlocker { 4470 Error *reason; 4471 QLIST_ENTRY(BdrvOpBlocker) list; 4472 }; 4473 4474 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 4475 { 4476 BdrvOpBlocker *blocker; 4477 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 4478 if (!QLIST_EMPTY(&bs->op_blockers[op])) { 4479 blocker = QLIST_FIRST(&bs->op_blockers[op]); 4480 error_propagate(errp, error_copy(blocker->reason)); 4481 error_prepend(errp, "Node '%s' is busy: ", 4482 bdrv_get_device_or_node_name(bs)); 4483 return true; 4484 } 4485 return false; 4486 } 4487 4488 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 4489 { 4490 BdrvOpBlocker *blocker; 4491 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 4492 4493 blocker = g_new0(BdrvOpBlocker, 1); 4494 blocker->reason = reason; 4495 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 4496 } 4497 4498 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 4499 { 4500 BdrvOpBlocker *blocker, *next; 4501 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 4502 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 4503 if (blocker->reason == reason) { 4504 QLIST_REMOVE(blocker, list); 4505 g_free(blocker); 4506 } 4507 } 4508 } 4509 4510 void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 4511 { 4512 int i; 4513 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 4514 bdrv_op_block(bs, i, reason); 4515 } 4516 } 4517 4518 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 4519 { 4520 int i; 4521 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 4522 bdrv_op_unblock(bs, i, reason); 4523 } 4524 } 4525 4526 bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 4527 { 4528 int i; 4529 4530 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 4531 if (!QLIST_EMPTY(&bs->op_blockers[i])) { 4532 return false; 4533 } 4534 } 4535 return true; 4536 } 4537 4538 void bdrv_img_create(const char *filename, const char *fmt, 4539 const char *base_filename, const char *base_fmt, 4540 char *options, uint64_t img_size, int flags, bool quiet, 4541 Error **errp) 4542 { 4543 QemuOptsList *create_opts = NULL; 4544 QemuOpts *opts = NULL; 4545 const char *backing_fmt, *backing_file; 4546 int64_t size; 4547 BlockDriver *drv, *proto_drv; 4548 Error *local_err = NULL; 4549 int ret = 0; 4550 4551 /* Find driver and parse its options */ 4552 drv = bdrv_find_format(fmt); 4553 if (!drv) { 4554 error_setg(errp, "Unknown file format '%s'", fmt); 4555 return; 4556 } 4557 4558 proto_drv = bdrv_find_protocol(filename, true, errp); 4559 if (!proto_drv) { 4560 return; 4561 } 4562 4563 if (!drv->create_opts) { 4564 error_setg(errp, "Format driver '%s' does not support image creation", 4565 drv->format_name); 4566 return; 4567 } 4568 4569 if (!proto_drv->create_opts) { 4570 error_setg(errp, "Protocol driver '%s' does not support image creation", 4571 proto_drv->format_name); 4572 return; 4573 } 4574 4575 create_opts = qemu_opts_append(create_opts, drv->create_opts); 4576 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 4577 4578 /* Create parameter list with default values */ 4579 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 4580 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 4581 4582 /* Parse -o options */ 4583 if (options) { 4584 qemu_opts_do_parse(opts, options, NULL, &local_err); 4585 if (local_err) { 4586 error_report_err(local_err); 4587 local_err = NULL; 4588 error_setg(errp, "Invalid options for file format '%s'", fmt); 4589 goto out; 4590 } 4591 } 4592 4593 if (base_filename) { 4594 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err); 4595 if (local_err) { 4596 error_setg(errp, "Backing file not supported for file format '%s'", 4597 fmt); 4598 goto out; 4599 } 4600 } 4601 4602 if (base_fmt) { 4603 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err); 4604 if (local_err) { 4605 error_setg(errp, "Backing file format not supported for file " 4606 "format '%s'", fmt); 4607 goto out; 4608 } 4609 } 4610 4611 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 4612 if (backing_file) { 4613 if (!strcmp(filename, backing_file)) { 4614 error_setg(errp, "Error: Trying to create an image with the " 4615 "same filename as the backing file"); 4616 goto out; 4617 } 4618 } 4619 4620 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 4621 4622 /* The size for the image must always be specified, unless we have a backing 4623 * file and we have not been forbidden from opening it. */ 4624 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size); 4625 if (backing_file && !(flags & BDRV_O_NO_BACKING)) { 4626 BlockDriverState *bs; 4627 char *full_backing = g_new0(char, PATH_MAX); 4628 int back_flags; 4629 QDict *backing_options = NULL; 4630 4631 bdrv_get_full_backing_filename_from_filename(filename, backing_file, 4632 full_backing, PATH_MAX, 4633 &local_err); 4634 if (local_err) { 4635 g_free(full_backing); 4636 goto out; 4637 } 4638 4639 /* backing files always opened read-only */ 4640 back_flags = flags; 4641 back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 4642 4643 backing_options = qdict_new(); 4644 if (backing_fmt) { 4645 qdict_put_str(backing_options, "driver", backing_fmt); 4646 } 4647 qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true); 4648 4649 bs = bdrv_open(full_backing, NULL, backing_options, back_flags, 4650 &local_err); 4651 g_free(full_backing); 4652 if (!bs && size != -1) { 4653 /* Couldn't open BS, but we have a size, so it's nonfatal */ 4654 warn_reportf_err(local_err, 4655 "Could not verify backing image. " 4656 "This may become an error in future versions.\n"); 4657 local_err = NULL; 4658 } else if (!bs) { 4659 /* Couldn't open bs, do not have size */ 4660 error_append_hint(&local_err, 4661 "Could not open backing image to determine size.\n"); 4662 goto out; 4663 } else { 4664 if (size == -1) { 4665 /* Opened BS, have no size */ 4666 size = bdrv_getlength(bs); 4667 if (size < 0) { 4668 error_setg_errno(errp, -size, "Could not get size of '%s'", 4669 backing_file); 4670 bdrv_unref(bs); 4671 goto out; 4672 } 4673 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 4674 } 4675 bdrv_unref(bs); 4676 } 4677 } /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */ 4678 4679 if (size == -1) { 4680 error_setg(errp, "Image creation needs a size parameter"); 4681 goto out; 4682 } 4683 4684 if (!quiet) { 4685 printf("Formatting '%s', fmt=%s ", filename, fmt); 4686 qemu_opts_print(opts, " "); 4687 puts(""); 4688 } 4689 4690 ret = bdrv_create(drv, filename, opts, &local_err); 4691 4692 if (ret == -EFBIG) { 4693 /* This is generally a better message than whatever the driver would 4694 * deliver (especially because of the cluster_size_hint), since that 4695 * is most probably not much different from "image too large". */ 4696 const char *cluster_size_hint = ""; 4697 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 4698 cluster_size_hint = " (try using a larger cluster size)"; 4699 } 4700 error_setg(errp, "The image size is too large for file format '%s'" 4701 "%s", fmt, cluster_size_hint); 4702 error_free(local_err); 4703 local_err = NULL; 4704 } 4705 4706 out: 4707 qemu_opts_del(opts); 4708 qemu_opts_free(create_opts); 4709 error_propagate(errp, local_err); 4710 } 4711 4712 AioContext *bdrv_get_aio_context(BlockDriverState *bs) 4713 { 4714 return bs->aio_context; 4715 } 4716 4717 void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co) 4718 { 4719 aio_co_enter(bdrv_get_aio_context(bs), co); 4720 } 4721 4722 static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) 4723 { 4724 QLIST_REMOVE(ban, list); 4725 g_free(ban); 4726 } 4727 4728 void bdrv_detach_aio_context(BlockDriverState *bs) 4729 { 4730 BdrvAioNotifier *baf, *baf_tmp; 4731 BdrvChild *child; 4732 4733 if (!bs->drv) { 4734 return; 4735 } 4736 4737 assert(!bs->walking_aio_notifiers); 4738 bs->walking_aio_notifiers = true; 4739 QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) { 4740 if (baf->deleted) { 4741 bdrv_do_remove_aio_context_notifier(baf); 4742 } else { 4743 baf->detach_aio_context(baf->opaque); 4744 } 4745 } 4746 /* Never mind iterating again to check for ->deleted. bdrv_close() will 4747 * remove remaining aio notifiers if we aren't called again. 4748 */ 4749 bs->walking_aio_notifiers = false; 4750 4751 if (bs->drv->bdrv_detach_aio_context) { 4752 bs->drv->bdrv_detach_aio_context(bs); 4753 } 4754 QLIST_FOREACH(child, &bs->children, next) { 4755 bdrv_detach_aio_context(child->bs); 4756 } 4757 4758 bs->aio_context = NULL; 4759 } 4760 4761 void bdrv_attach_aio_context(BlockDriverState *bs, 4762 AioContext *new_context) 4763 { 4764 BdrvAioNotifier *ban, *ban_tmp; 4765 BdrvChild *child; 4766 4767 if (!bs->drv) { 4768 return; 4769 } 4770 4771 bs->aio_context = new_context; 4772 4773 QLIST_FOREACH(child, &bs->children, next) { 4774 bdrv_attach_aio_context(child->bs, new_context); 4775 } 4776 if (bs->drv->bdrv_attach_aio_context) { 4777 bs->drv->bdrv_attach_aio_context(bs, new_context); 4778 } 4779 4780 assert(!bs->walking_aio_notifiers); 4781 bs->walking_aio_notifiers = true; 4782 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) { 4783 if (ban->deleted) { 4784 bdrv_do_remove_aio_context_notifier(ban); 4785 } else { 4786 ban->attached_aio_context(new_context, ban->opaque); 4787 } 4788 } 4789 bs->walking_aio_notifiers = false; 4790 } 4791 4792 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) 4793 { 4794 AioContext *ctx = bdrv_get_aio_context(bs); 4795 4796 aio_disable_external(ctx); 4797 bdrv_parent_drained_begin(bs, NULL); 4798 bdrv_drain(bs); /* ensure there are no in-flight requests */ 4799 4800 while (aio_poll(ctx, false)) { 4801 /* wait for all bottom halves to execute */ 4802 } 4803 4804 bdrv_detach_aio_context(bs); 4805 4806 /* This function executes in the old AioContext so acquire the new one in 4807 * case it runs in a different thread. 4808 */ 4809 aio_context_acquire(new_context); 4810 bdrv_attach_aio_context(bs, new_context); 4811 bdrv_parent_drained_end(bs, NULL); 4812 aio_enable_external(ctx); 4813 aio_context_release(new_context); 4814 } 4815 4816 void bdrv_add_aio_context_notifier(BlockDriverState *bs, 4817 void (*attached_aio_context)(AioContext *new_context, void *opaque), 4818 void (*detach_aio_context)(void *opaque), void *opaque) 4819 { 4820 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 4821 *ban = (BdrvAioNotifier){ 4822 .attached_aio_context = attached_aio_context, 4823 .detach_aio_context = detach_aio_context, 4824 .opaque = opaque 4825 }; 4826 4827 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 4828 } 4829 4830 void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 4831 void (*attached_aio_context)(AioContext *, 4832 void *), 4833 void (*detach_aio_context)(void *), 4834 void *opaque) 4835 { 4836 BdrvAioNotifier *ban, *ban_next; 4837 4838 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 4839 if (ban->attached_aio_context == attached_aio_context && 4840 ban->detach_aio_context == detach_aio_context && 4841 ban->opaque == opaque && 4842 ban->deleted == false) 4843 { 4844 if (bs->walking_aio_notifiers) { 4845 ban->deleted = true; 4846 } else { 4847 bdrv_do_remove_aio_context_notifier(ban); 4848 } 4849 return; 4850 } 4851 } 4852 4853 abort(); 4854 } 4855 4856 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 4857 BlockDriverAmendStatusCB *status_cb, void *cb_opaque) 4858 { 4859 if (!bs->drv) { 4860 return -ENOMEDIUM; 4861 } 4862 if (!bs->drv->bdrv_amend_options) { 4863 return -ENOTSUP; 4864 } 4865 return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque); 4866 } 4867 4868 /* This function will be called by the bdrv_recurse_is_first_non_filter method 4869 * of block filter and by bdrv_is_first_non_filter. 4870 * It is used to test if the given bs is the candidate or recurse more in the 4871 * node graph. 4872 */ 4873 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, 4874 BlockDriverState *candidate) 4875 { 4876 /* return false if basic checks fails */ 4877 if (!bs || !bs->drv) { 4878 return false; 4879 } 4880 4881 /* the code reached a non block filter driver -> check if the bs is 4882 * the same as the candidate. It's the recursion termination condition. 4883 */ 4884 if (!bs->drv->is_filter) { 4885 return bs == candidate; 4886 } 4887 /* Down this path the driver is a block filter driver */ 4888 4889 /* If the block filter recursion method is defined use it to recurse down 4890 * the node graph. 4891 */ 4892 if (bs->drv->bdrv_recurse_is_first_non_filter) { 4893 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); 4894 } 4895 4896 /* the driver is a block filter but don't allow to recurse -> return false 4897 */ 4898 return false; 4899 } 4900 4901 /* This function checks if the candidate is the first non filter bs down it's 4902 * bs chain. Since we don't have pointers to parents it explore all bs chains 4903 * from the top. Some filters can choose not to pass down the recursion. 4904 */ 4905 bool bdrv_is_first_non_filter(BlockDriverState *candidate) 4906 { 4907 BlockDriverState *bs; 4908 BdrvNextIterator it; 4909 4910 /* walk down the bs forest recursively */ 4911 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 4912 bool perm; 4913 4914 /* try to recurse in this top level bs */ 4915 perm = bdrv_recurse_is_first_non_filter(bs, candidate); 4916 4917 /* candidate is the first non filter */ 4918 if (perm) { 4919 bdrv_next_cleanup(&it); 4920 return true; 4921 } 4922 } 4923 4924 return false; 4925 } 4926 4927 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, 4928 const char *node_name, Error **errp) 4929 { 4930 BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 4931 AioContext *aio_context; 4932 4933 if (!to_replace_bs) { 4934 error_setg(errp, "Node name '%s' not found", node_name); 4935 return NULL; 4936 } 4937 4938 aio_context = bdrv_get_aio_context(to_replace_bs); 4939 aio_context_acquire(aio_context); 4940 4941 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 4942 to_replace_bs = NULL; 4943 goto out; 4944 } 4945 4946 /* We don't want arbitrary node of the BDS chain to be replaced only the top 4947 * most non filter in order to prevent data corruption. 4948 * Another benefit is that this tests exclude backing files which are 4949 * blocked by the backing blockers. 4950 */ 4951 if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) { 4952 error_setg(errp, "Only top most non filter can be replaced"); 4953 to_replace_bs = NULL; 4954 goto out; 4955 } 4956 4957 out: 4958 aio_context_release(aio_context); 4959 return to_replace_bs; 4960 } 4961 4962 static bool append_open_options(QDict *d, BlockDriverState *bs) 4963 { 4964 const QDictEntry *entry; 4965 QemuOptDesc *desc; 4966 BdrvChild *child; 4967 bool found_any = false; 4968 const char *p; 4969 4970 for (entry = qdict_first(bs->options); entry; 4971 entry = qdict_next(bs->options, entry)) 4972 { 4973 /* Exclude options for children */ 4974 QLIST_FOREACH(child, &bs->children, next) { 4975 if (strstart(qdict_entry_key(entry), child->name, &p) 4976 && (!*p || *p == '.')) 4977 { 4978 break; 4979 } 4980 } 4981 if (child) { 4982 continue; 4983 } 4984 4985 /* And exclude all non-driver-specific options */ 4986 for (desc = bdrv_runtime_opts.desc; desc->name; desc++) { 4987 if (!strcmp(qdict_entry_key(entry), desc->name)) { 4988 break; 4989 } 4990 } 4991 if (desc->name) { 4992 continue; 4993 } 4994 4995 qobject_incref(qdict_entry_value(entry)); 4996 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry)); 4997 found_any = true; 4998 } 4999 5000 return found_any; 5001 } 5002 5003 /* Updates the following BDS fields: 5004 * - exact_filename: A filename which may be used for opening a block device 5005 * which (mostly) equals the given BDS (even without any 5006 * other options; so reading and writing must return the same 5007 * results, but caching etc. may be different) 5008 * - full_open_options: Options which, when given when opening a block device 5009 * (without a filename), result in a BDS (mostly) 5010 * equalling the given one 5011 * - filename: If exact_filename is set, it is copied here. Otherwise, 5012 * full_open_options is converted to a JSON object, prefixed with 5013 * "json:" (for use through the JSON pseudo protocol) and put here. 5014 */ 5015 void bdrv_refresh_filename(BlockDriverState *bs) 5016 { 5017 BlockDriver *drv = bs->drv; 5018 QDict *opts; 5019 5020 if (!drv) { 5021 return; 5022 } 5023 5024 /* This BDS's file name will most probably depend on its file's name, so 5025 * refresh that first */ 5026 if (bs->file) { 5027 bdrv_refresh_filename(bs->file->bs); 5028 } 5029 5030 if (drv->bdrv_refresh_filename) { 5031 /* Obsolete information is of no use here, so drop the old file name 5032 * information before refreshing it */ 5033 bs->exact_filename[0] = '\0'; 5034 if (bs->full_open_options) { 5035 QDECREF(bs->full_open_options); 5036 bs->full_open_options = NULL; 5037 } 5038 5039 opts = qdict_new(); 5040 append_open_options(opts, bs); 5041 drv->bdrv_refresh_filename(bs, opts); 5042 QDECREF(opts); 5043 } else if (bs->file) { 5044 /* Try to reconstruct valid information from the underlying file */ 5045 bool has_open_options; 5046 5047 bs->exact_filename[0] = '\0'; 5048 if (bs->full_open_options) { 5049 QDECREF(bs->full_open_options); 5050 bs->full_open_options = NULL; 5051 } 5052 5053 opts = qdict_new(); 5054 has_open_options = append_open_options(opts, bs); 5055 5056 /* If no specific options have been given for this BDS, the filename of 5057 * the underlying file should suffice for this one as well */ 5058 if (bs->file->bs->exact_filename[0] && !has_open_options) { 5059 strcpy(bs->exact_filename, bs->file->bs->exact_filename); 5060 } 5061 /* Reconstructing the full options QDict is simple for most format block 5062 * drivers, as long as the full options are known for the underlying 5063 * file BDS. The full options QDict of that file BDS should somehow 5064 * contain a representation of the filename, therefore the following 5065 * suffices without querying the (exact_)filename of this BDS. */ 5066 if (bs->file->bs->full_open_options) { 5067 qdict_put_str(opts, "driver", drv->format_name); 5068 QINCREF(bs->file->bs->full_open_options); 5069 qdict_put(opts, "file", bs->file->bs->full_open_options); 5070 5071 bs->full_open_options = opts; 5072 } else { 5073 QDECREF(opts); 5074 } 5075 } else if (!bs->full_open_options && qdict_size(bs->options)) { 5076 /* There is no underlying file BDS (at least referenced by BDS.file), 5077 * so the full options QDict should be equal to the options given 5078 * specifically for this block device when it was opened (plus the 5079 * driver specification). 5080 * Because those options don't change, there is no need to update 5081 * full_open_options when it's already set. */ 5082 5083 opts = qdict_new(); 5084 append_open_options(opts, bs); 5085 qdict_put_str(opts, "driver", drv->format_name); 5086 5087 if (bs->exact_filename[0]) { 5088 /* This may not work for all block protocol drivers (some may 5089 * require this filename to be parsed), but we have to find some 5090 * default solution here, so just include it. If some block driver 5091 * does not support pure options without any filename at all or 5092 * needs some special format of the options QDict, it needs to 5093 * implement the driver-specific bdrv_refresh_filename() function. 5094 */ 5095 qdict_put_str(opts, "filename", bs->exact_filename); 5096 } 5097 5098 bs->full_open_options = opts; 5099 } 5100 5101 if (bs->exact_filename[0]) { 5102 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 5103 } else if (bs->full_open_options) { 5104 QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 5105 snprintf(bs->filename, sizeof(bs->filename), "json:%s", 5106 qstring_get_str(json)); 5107 QDECREF(json); 5108 } 5109 } 5110 5111 /* 5112 * Hot add/remove a BDS's child. So the user can take a child offline when 5113 * it is broken and take a new child online 5114 */ 5115 void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, 5116 Error **errp) 5117 { 5118 5119 if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) { 5120 error_setg(errp, "The node %s does not support adding a child", 5121 bdrv_get_device_or_node_name(parent_bs)); 5122 return; 5123 } 5124 5125 if (!QLIST_EMPTY(&child_bs->parents)) { 5126 error_setg(errp, "The node %s already has a parent", 5127 child_bs->node_name); 5128 return; 5129 } 5130 5131 parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp); 5132 } 5133 5134 void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp) 5135 { 5136 BdrvChild *tmp; 5137 5138 if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) { 5139 error_setg(errp, "The node %s does not support removing a child", 5140 bdrv_get_device_or_node_name(parent_bs)); 5141 return; 5142 } 5143 5144 QLIST_FOREACH(tmp, &parent_bs->children, next) { 5145 if (tmp == child) { 5146 break; 5147 } 5148 } 5149 5150 if (!tmp) { 5151 error_setg(errp, "The node %s does not have a child named %s", 5152 bdrv_get_device_or_node_name(parent_bs), 5153 bdrv_get_device_or_node_name(child->bs)); 5154 return; 5155 } 5156 5157 parent_bs->drv->bdrv_del_child(parent_bs, child, errp); 5158 } 5159 5160 bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, 5161 uint32_t granularity, Error **errp) 5162 { 5163 BlockDriver *drv = bs->drv; 5164 5165 if (!drv) { 5166 error_setg_errno(errp, ENOMEDIUM, 5167 "Can't store persistent bitmaps to %s", 5168 bdrv_get_device_or_node_name(bs)); 5169 return false; 5170 } 5171 5172 if (!drv->bdrv_can_store_new_dirty_bitmap) { 5173 error_setg_errno(errp, ENOTSUP, 5174 "Can't store persistent bitmaps to %s", 5175 bdrv_get_device_or_node_name(bs)); 5176 return false; 5177 } 5178 5179 return drv->bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp); 5180 } 5181