1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "qemu/osdep.h" 25 #include "trace.h" 26 #include "block/block_int.h" 27 #include "block/blockjob.h" 28 #include "qemu/error-report.h" 29 #include "qemu/module.h" 30 #include "qapi/qmp/qerror.h" 31 #include "qapi/qmp/qbool.h" 32 #include "qapi/qmp/qjson.h" 33 #include "sysemu/block-backend.h" 34 #include "sysemu/sysemu.h" 35 #include "qemu/notify.h" 36 #include "qemu/coroutine.h" 37 #include "block/qapi.h" 38 #include "qmp-commands.h" 39 #include "qemu/timer.h" 40 #include "qapi-event.h" 41 #include "block/throttle-groups.h" 42 #include "qemu/cutils.h" 43 #include "qemu/id.h" 44 45 #ifdef CONFIG_BSD 46 #include <sys/ioctl.h> 47 #include <sys/queue.h> 48 #ifndef __DragonFly__ 49 #include <sys/disk.h> 50 #endif 51 #endif 52 53 #ifdef _WIN32 54 #include <windows.h> 55 #endif 56 57 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 58 59 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 60 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 61 62 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states = 63 QTAILQ_HEAD_INITIALIZER(all_bdrv_states); 64 65 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 66 QLIST_HEAD_INITIALIZER(bdrv_drivers); 67 68 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, 69 const char *reference, QDict *options, int flags, 70 BlockDriverState *parent, 71 const BdrvChildRole *child_role, Error **errp); 72 73 /* If non-zero, use only whitelisted block drivers */ 74 static int use_bdrv_whitelist; 75 76 static void bdrv_close(BlockDriverState *bs); 77 78 #ifdef _WIN32 79 static int is_windows_drive_prefix(const char *filename) 80 { 81 return (((filename[0] >= 'a' && filename[0] <= 'z') || 82 (filename[0] >= 'A' && filename[0] <= 'Z')) && 83 filename[1] == ':'); 84 } 85 86 int is_windows_drive(const char *filename) 87 { 88 if (is_windows_drive_prefix(filename) && 89 filename[2] == '\0') 90 return 1; 91 if (strstart(filename, "\\\\.\\", NULL) || 92 strstart(filename, "//./", NULL)) 93 return 1; 94 return 0; 95 } 96 #endif 97 98 size_t bdrv_opt_mem_align(BlockDriverState *bs) 99 { 100 if (!bs || !bs->drv) { 101 /* page size or 4k (hdd sector size) should be on the safe side */ 102 return MAX(4096, getpagesize()); 103 } 104 105 return bs->bl.opt_mem_alignment; 106 } 107 108 size_t bdrv_min_mem_align(BlockDriverState *bs) 109 { 110 if (!bs || !bs->drv) { 111 /* page size or 4k (hdd sector size) should be on the safe side */ 112 return MAX(4096, getpagesize()); 113 } 114 115 return bs->bl.min_mem_alignment; 116 } 117 118 /* check if the path starts with "<protocol>:" */ 119 int path_has_protocol(const char *path) 120 { 121 const char *p; 122 123 #ifdef _WIN32 124 if (is_windows_drive(path) || 125 is_windows_drive_prefix(path)) { 126 return 0; 127 } 128 p = path + strcspn(path, ":/\\"); 129 #else 130 p = path + strcspn(path, ":/"); 131 #endif 132 133 return *p == ':'; 134 } 135 136 int path_is_absolute(const char *path) 137 { 138 #ifdef _WIN32 139 /* specific case for names like: "\\.\d:" */ 140 if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 141 return 1; 142 } 143 return (*path == '/' || *path == '\\'); 144 #else 145 return (*path == '/'); 146 #endif 147 } 148 149 /* if filename is absolute, just copy it to dest. Otherwise, build a 150 path to it by considering it is relative to base_path. URL are 151 supported. */ 152 void path_combine(char *dest, int dest_size, 153 const char *base_path, 154 const char *filename) 155 { 156 const char *p, *p1; 157 int len; 158 159 if (dest_size <= 0) 160 return; 161 if (path_is_absolute(filename)) { 162 pstrcpy(dest, dest_size, filename); 163 } else { 164 p = strchr(base_path, ':'); 165 if (p) 166 p++; 167 else 168 p = base_path; 169 p1 = strrchr(base_path, '/'); 170 #ifdef _WIN32 171 { 172 const char *p2; 173 p2 = strrchr(base_path, '\\'); 174 if (!p1 || p2 > p1) 175 p1 = p2; 176 } 177 #endif 178 if (p1) 179 p1++; 180 else 181 p1 = base_path; 182 if (p1 > p) 183 p = p1; 184 len = p - base_path; 185 if (len > dest_size - 1) 186 len = dest_size - 1; 187 memcpy(dest, base_path, len); 188 dest[len] = '\0'; 189 pstrcat(dest, dest_size, filename); 190 } 191 } 192 193 void bdrv_get_full_backing_filename_from_filename(const char *backed, 194 const char *backing, 195 char *dest, size_t sz, 196 Error **errp) 197 { 198 if (backing[0] == '\0' || path_has_protocol(backing) || 199 path_is_absolute(backing)) 200 { 201 pstrcpy(dest, sz, backing); 202 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 203 error_setg(errp, "Cannot use relative backing file names for '%s'", 204 backed); 205 } else { 206 path_combine(dest, sz, backed, backing); 207 } 208 } 209 210 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz, 211 Error **errp) 212 { 213 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename; 214 215 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file, 216 dest, sz, errp); 217 } 218 219 void bdrv_register(BlockDriver *bdrv) 220 { 221 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 222 } 223 224 BlockDriverState *bdrv_new_root(void) 225 { 226 return bdrv_new(); 227 } 228 229 BlockDriverState *bdrv_new(void) 230 { 231 BlockDriverState *bs; 232 int i; 233 234 bs = g_new0(BlockDriverState, 1); 235 QLIST_INIT(&bs->dirty_bitmaps); 236 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 237 QLIST_INIT(&bs->op_blockers[i]); 238 } 239 notifier_with_return_list_init(&bs->before_write_notifiers); 240 qemu_co_queue_init(&bs->throttled_reqs[0]); 241 qemu_co_queue_init(&bs->throttled_reqs[1]); 242 bs->refcnt = 1; 243 bs->aio_context = qemu_get_aio_context(); 244 245 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list); 246 247 return bs; 248 } 249 250 BlockDriver *bdrv_find_format(const char *format_name) 251 { 252 BlockDriver *drv1; 253 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 254 if (!strcmp(drv1->format_name, format_name)) { 255 return drv1; 256 } 257 } 258 return NULL; 259 } 260 261 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 262 { 263 static const char *whitelist_rw[] = { 264 CONFIG_BDRV_RW_WHITELIST 265 }; 266 static const char *whitelist_ro[] = { 267 CONFIG_BDRV_RO_WHITELIST 268 }; 269 const char **p; 270 271 if (!whitelist_rw[0] && !whitelist_ro[0]) { 272 return 1; /* no whitelist, anything goes */ 273 } 274 275 for (p = whitelist_rw; *p; p++) { 276 if (!strcmp(drv->format_name, *p)) { 277 return 1; 278 } 279 } 280 if (read_only) { 281 for (p = whitelist_ro; *p; p++) { 282 if (!strcmp(drv->format_name, *p)) { 283 return 1; 284 } 285 } 286 } 287 return 0; 288 } 289 290 bool bdrv_uses_whitelist(void) 291 { 292 return use_bdrv_whitelist; 293 } 294 295 typedef struct CreateCo { 296 BlockDriver *drv; 297 char *filename; 298 QemuOpts *opts; 299 int ret; 300 Error *err; 301 } CreateCo; 302 303 static void coroutine_fn bdrv_create_co_entry(void *opaque) 304 { 305 Error *local_err = NULL; 306 int ret; 307 308 CreateCo *cco = opaque; 309 assert(cco->drv); 310 311 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err); 312 if (local_err) { 313 error_propagate(&cco->err, local_err); 314 } 315 cco->ret = ret; 316 } 317 318 int bdrv_create(BlockDriver *drv, const char* filename, 319 QemuOpts *opts, Error **errp) 320 { 321 int ret; 322 323 Coroutine *co; 324 CreateCo cco = { 325 .drv = drv, 326 .filename = g_strdup(filename), 327 .opts = opts, 328 .ret = NOT_DONE, 329 .err = NULL, 330 }; 331 332 if (!drv->bdrv_create) { 333 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 334 ret = -ENOTSUP; 335 goto out; 336 } 337 338 if (qemu_in_coroutine()) { 339 /* Fast-path if already in coroutine context */ 340 bdrv_create_co_entry(&cco); 341 } else { 342 co = qemu_coroutine_create(bdrv_create_co_entry); 343 qemu_coroutine_enter(co, &cco); 344 while (cco.ret == NOT_DONE) { 345 aio_poll(qemu_get_aio_context(), true); 346 } 347 } 348 349 ret = cco.ret; 350 if (ret < 0) { 351 if (cco.err) { 352 error_propagate(errp, cco.err); 353 } else { 354 error_setg_errno(errp, -ret, "Could not create image"); 355 } 356 } 357 358 out: 359 g_free(cco.filename); 360 return ret; 361 } 362 363 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 364 { 365 BlockDriver *drv; 366 Error *local_err = NULL; 367 int ret; 368 369 drv = bdrv_find_protocol(filename, true, errp); 370 if (drv == NULL) { 371 return -ENOENT; 372 } 373 374 ret = bdrv_create(drv, filename, opts, &local_err); 375 if (local_err) { 376 error_propagate(errp, local_err); 377 } 378 return ret; 379 } 380 381 /** 382 * Try to get @bs's logical and physical block size. 383 * On success, store them in @bsz struct and return 0. 384 * On failure return -errno. 385 * @bs must not be empty. 386 */ 387 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 388 { 389 BlockDriver *drv = bs->drv; 390 391 if (drv && drv->bdrv_probe_blocksizes) { 392 return drv->bdrv_probe_blocksizes(bs, bsz); 393 } 394 395 return -ENOTSUP; 396 } 397 398 /** 399 * Try to get @bs's geometry (cyls, heads, sectors). 400 * On success, store them in @geo struct and return 0. 401 * On failure return -errno. 402 * @bs must not be empty. 403 */ 404 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 405 { 406 BlockDriver *drv = bs->drv; 407 408 if (drv && drv->bdrv_probe_geometry) { 409 return drv->bdrv_probe_geometry(bs, geo); 410 } 411 412 return -ENOTSUP; 413 } 414 415 /* 416 * Create a uniquely-named empty temporary file. 417 * Return 0 upon success, otherwise a negative errno value. 418 */ 419 int get_tmp_filename(char *filename, int size) 420 { 421 #ifdef _WIN32 422 char temp_dir[MAX_PATH]; 423 /* GetTempFileName requires that its output buffer (4th param) 424 have length MAX_PATH or greater. */ 425 assert(size >= MAX_PATH); 426 return (GetTempPath(MAX_PATH, temp_dir) 427 && GetTempFileName(temp_dir, "qem", 0, filename) 428 ? 0 : -GetLastError()); 429 #else 430 int fd; 431 const char *tmpdir; 432 tmpdir = getenv("TMPDIR"); 433 if (!tmpdir) { 434 tmpdir = "/var/tmp"; 435 } 436 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 437 return -EOVERFLOW; 438 } 439 fd = mkstemp(filename); 440 if (fd < 0) { 441 return -errno; 442 } 443 if (close(fd) != 0) { 444 unlink(filename); 445 return -errno; 446 } 447 return 0; 448 #endif 449 } 450 451 /* 452 * Detect host devices. By convention, /dev/cdrom[N] is always 453 * recognized as a host CDROM. 454 */ 455 static BlockDriver *find_hdev_driver(const char *filename) 456 { 457 int score_max = 0, score; 458 BlockDriver *drv = NULL, *d; 459 460 QLIST_FOREACH(d, &bdrv_drivers, list) { 461 if (d->bdrv_probe_device) { 462 score = d->bdrv_probe_device(filename); 463 if (score > score_max) { 464 score_max = score; 465 drv = d; 466 } 467 } 468 } 469 470 return drv; 471 } 472 473 BlockDriver *bdrv_find_protocol(const char *filename, 474 bool allow_protocol_prefix, 475 Error **errp) 476 { 477 BlockDriver *drv1; 478 char protocol[128]; 479 int len; 480 const char *p; 481 482 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 483 484 /* 485 * XXX(hch): we really should not let host device detection 486 * override an explicit protocol specification, but moving this 487 * later breaks access to device names with colons in them. 488 * Thanks to the brain-dead persistent naming schemes on udev- 489 * based Linux systems those actually are quite common. 490 */ 491 drv1 = find_hdev_driver(filename); 492 if (drv1) { 493 return drv1; 494 } 495 496 if (!path_has_protocol(filename) || !allow_protocol_prefix) { 497 return &bdrv_file; 498 } 499 500 p = strchr(filename, ':'); 501 assert(p != NULL); 502 len = p - filename; 503 if (len > sizeof(protocol) - 1) 504 len = sizeof(protocol) - 1; 505 memcpy(protocol, filename, len); 506 protocol[len] = '\0'; 507 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 508 if (drv1->protocol_name && 509 !strcmp(drv1->protocol_name, protocol)) { 510 return drv1; 511 } 512 } 513 514 error_setg(errp, "Unknown protocol '%s'", protocol); 515 return NULL; 516 } 517 518 /* 519 * Guess image format by probing its contents. 520 * This is not a good idea when your image is raw (CVE-2008-2004), but 521 * we do it anyway for backward compatibility. 522 * 523 * @buf contains the image's first @buf_size bytes. 524 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 525 * but can be smaller if the image file is smaller) 526 * @filename is its filename. 527 * 528 * For all block drivers, call the bdrv_probe() method to get its 529 * probing score. 530 * Return the first block driver with the highest probing score. 531 */ 532 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 533 const char *filename) 534 { 535 int score_max = 0, score; 536 BlockDriver *drv = NULL, *d; 537 538 QLIST_FOREACH(d, &bdrv_drivers, list) { 539 if (d->bdrv_probe) { 540 score = d->bdrv_probe(buf, buf_size, filename); 541 if (score > score_max) { 542 score_max = score; 543 drv = d; 544 } 545 } 546 } 547 548 return drv; 549 } 550 551 static int find_image_format(BlockDriverState *bs, const char *filename, 552 BlockDriver **pdrv, Error **errp) 553 { 554 BlockDriver *drv; 555 uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 556 int ret = 0; 557 558 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 559 if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { 560 *pdrv = &bdrv_raw; 561 return ret; 562 } 563 564 ret = bdrv_pread(bs, 0, buf, sizeof(buf)); 565 if (ret < 0) { 566 error_setg_errno(errp, -ret, "Could not read image for determining its " 567 "format"); 568 *pdrv = NULL; 569 return ret; 570 } 571 572 drv = bdrv_probe_all(buf, ret, filename); 573 if (!drv) { 574 error_setg(errp, "Could not determine image format: No compatible " 575 "driver found"); 576 ret = -ENOENT; 577 } 578 *pdrv = drv; 579 return ret; 580 } 581 582 /** 583 * Set the current 'total_sectors' value 584 * Return 0 on success, -errno on error. 585 */ 586 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 587 { 588 BlockDriver *drv = bs->drv; 589 590 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 591 if (bdrv_is_sg(bs)) 592 return 0; 593 594 /* query actual device if possible, otherwise just trust the hint */ 595 if (drv->bdrv_getlength) { 596 int64_t length = drv->bdrv_getlength(bs); 597 if (length < 0) { 598 return length; 599 } 600 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 601 } 602 603 bs->total_sectors = hint; 604 return 0; 605 } 606 607 /** 608 * Combines a QDict of new block driver @options with any missing options taken 609 * from @old_options, so that leaving out an option defaults to its old value. 610 */ 611 static void bdrv_join_options(BlockDriverState *bs, QDict *options, 612 QDict *old_options) 613 { 614 if (bs->drv && bs->drv->bdrv_join_options) { 615 bs->drv->bdrv_join_options(options, old_options); 616 } else { 617 qdict_join(options, old_options, false); 618 } 619 } 620 621 /** 622 * Set open flags for a given discard mode 623 * 624 * Return 0 on success, -1 if the discard mode was invalid. 625 */ 626 int bdrv_parse_discard_flags(const char *mode, int *flags) 627 { 628 *flags &= ~BDRV_O_UNMAP; 629 630 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 631 /* do nothing */ 632 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 633 *flags |= BDRV_O_UNMAP; 634 } else { 635 return -1; 636 } 637 638 return 0; 639 } 640 641 /** 642 * Set open flags for a given cache mode 643 * 644 * Return 0 on success, -1 if the cache mode was invalid. 645 */ 646 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) 647 { 648 *flags &= ~BDRV_O_CACHE_MASK; 649 650 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 651 *writethrough = false; 652 *flags |= BDRV_O_NOCACHE; 653 } else if (!strcmp(mode, "directsync")) { 654 *writethrough = true; 655 *flags |= BDRV_O_NOCACHE; 656 } else if (!strcmp(mode, "writeback")) { 657 *writethrough = false; 658 } else if (!strcmp(mode, "unsafe")) { 659 *writethrough = false; 660 *flags |= BDRV_O_NO_FLUSH; 661 } else if (!strcmp(mode, "writethrough")) { 662 *writethrough = true; 663 } else { 664 return -1; 665 } 666 667 return 0; 668 } 669 670 /* 671 * Returns the options and flags that a temporary snapshot should get, based on 672 * the originally requested flags (the originally requested image will have 673 * flags like a backing file) 674 */ 675 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, 676 int parent_flags, QDict *parent_options) 677 { 678 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 679 680 /* For temporary files, unconditional cache=unsafe is fine */ 681 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); 682 qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); 683 } 684 685 /* 686 * Returns the options and flags that bs->file should get if a protocol driver 687 * is expected, based on the given options and flags for the parent BDS 688 */ 689 static void bdrv_inherited_options(int *child_flags, QDict *child_options, 690 int parent_flags, QDict *parent_options) 691 { 692 int flags = parent_flags; 693 694 /* Enable protocol handling, disable format probing for bs->file */ 695 flags |= BDRV_O_PROTOCOL; 696 697 /* If the cache mode isn't explicitly set, inherit direct and no-flush from 698 * the parent. */ 699 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); 700 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); 701 702 /* Our block drivers take care to send flushes and respect unmap policy, 703 * so we can default to enable both on lower layers regardless of the 704 * corresponding parent options. */ 705 flags |= BDRV_O_UNMAP; 706 707 /* Clear flags that only apply to the top layer */ 708 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ | 709 BDRV_O_NO_IO); 710 711 *child_flags = flags; 712 } 713 714 const BdrvChildRole child_file = { 715 .inherit_options = bdrv_inherited_options, 716 }; 717 718 /* 719 * Returns the options and flags that bs->file should get if the use of formats 720 * (and not only protocols) is permitted for it, based on the given options and 721 * flags for the parent BDS 722 */ 723 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options, 724 int parent_flags, QDict *parent_options) 725 { 726 child_file.inherit_options(child_flags, child_options, 727 parent_flags, parent_options); 728 729 *child_flags &= ~(BDRV_O_PROTOCOL | BDRV_O_NO_IO); 730 } 731 732 const BdrvChildRole child_format = { 733 .inherit_options = bdrv_inherited_fmt_options, 734 }; 735 736 /* 737 * Returns the options and flags that bs->backing should get, based on the 738 * given options and flags for the parent BDS 739 */ 740 static void bdrv_backing_options(int *child_flags, QDict *child_options, 741 int parent_flags, QDict *parent_options) 742 { 743 int flags = parent_flags; 744 745 /* The cache mode is inherited unmodified for backing files; except WCE, 746 * which is only applied on the top level (BlockBackend) */ 747 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); 748 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); 749 750 /* backing files always opened read-only */ 751 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ); 752 753 /* snapshot=on is handled on the top layer */ 754 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY); 755 756 *child_flags = flags; 757 } 758 759 static const BdrvChildRole child_backing = { 760 .inherit_options = bdrv_backing_options, 761 }; 762 763 static int bdrv_open_flags(BlockDriverState *bs, int flags) 764 { 765 int open_flags = flags; 766 767 /* 768 * Clear flags that are internal to the block layer before opening the 769 * image. 770 */ 771 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 772 773 /* 774 * Snapshots should be writable. 775 */ 776 if (flags & BDRV_O_TEMPORARY) { 777 open_flags |= BDRV_O_RDWR; 778 } 779 780 return open_flags; 781 } 782 783 static void update_flags_from_options(int *flags, QemuOpts *opts) 784 { 785 *flags &= ~BDRV_O_CACHE_MASK; 786 787 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH)); 788 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { 789 *flags |= BDRV_O_NO_FLUSH; 790 } 791 792 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT)); 793 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) { 794 *flags |= BDRV_O_NOCACHE; 795 } 796 } 797 798 static void update_options_from_flags(QDict *options, int flags) 799 { 800 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) { 801 qdict_put(options, BDRV_OPT_CACHE_DIRECT, 802 qbool_from_bool(flags & BDRV_O_NOCACHE)); 803 } 804 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) { 805 qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH, 806 qbool_from_bool(flags & BDRV_O_NO_FLUSH)); 807 } 808 } 809 810 static void bdrv_assign_node_name(BlockDriverState *bs, 811 const char *node_name, 812 Error **errp) 813 { 814 char *gen_node_name = NULL; 815 816 if (!node_name) { 817 node_name = gen_node_name = id_generate(ID_BLOCK); 818 } else if (!id_wellformed(node_name)) { 819 /* 820 * Check for empty string or invalid characters, but not if it is 821 * generated (generated names use characters not available to the user) 822 */ 823 error_setg(errp, "Invalid node name"); 824 return; 825 } 826 827 /* takes care of avoiding namespaces collisions */ 828 if (blk_by_name(node_name)) { 829 error_setg(errp, "node-name=%s is conflicting with a device id", 830 node_name); 831 goto out; 832 } 833 834 /* takes care of avoiding duplicates node names */ 835 if (bdrv_find_node(node_name)) { 836 error_setg(errp, "Duplicate node name"); 837 goto out; 838 } 839 840 /* copy node name into the bs and insert it into the graph list */ 841 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 842 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 843 out: 844 g_free(gen_node_name); 845 } 846 847 static QemuOptsList bdrv_runtime_opts = { 848 .name = "bdrv_common", 849 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), 850 .desc = { 851 { 852 .name = "node-name", 853 .type = QEMU_OPT_STRING, 854 .help = "Node name of the block device node", 855 }, 856 { 857 .name = "driver", 858 .type = QEMU_OPT_STRING, 859 .help = "Block driver to use for the node", 860 }, 861 { 862 .name = BDRV_OPT_CACHE_DIRECT, 863 .type = QEMU_OPT_BOOL, 864 .help = "Bypass software writeback cache on the host", 865 }, 866 { 867 .name = BDRV_OPT_CACHE_NO_FLUSH, 868 .type = QEMU_OPT_BOOL, 869 .help = "Ignore flush requests", 870 }, 871 { /* end of list */ } 872 }, 873 }; 874 875 /* 876 * Common part for opening disk images and files 877 * 878 * Removes all processed options from *options. 879 */ 880 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file, 881 QDict *options, Error **errp) 882 { 883 int ret, open_flags; 884 const char *filename; 885 const char *driver_name = NULL; 886 const char *node_name = NULL; 887 QemuOpts *opts; 888 BlockDriver *drv; 889 Error *local_err = NULL; 890 891 assert(bs->file == NULL); 892 assert(options != NULL && bs->options != options); 893 894 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 895 qemu_opts_absorb_qdict(opts, options, &local_err); 896 if (local_err) { 897 error_propagate(errp, local_err); 898 ret = -EINVAL; 899 goto fail_opts; 900 } 901 902 driver_name = qemu_opt_get(opts, "driver"); 903 drv = bdrv_find_format(driver_name); 904 assert(drv != NULL); 905 906 if (file != NULL) { 907 filename = file->bs->filename; 908 } else { 909 filename = qdict_get_try_str(options, "filename"); 910 } 911 912 if (drv->bdrv_needs_filename && !filename) { 913 error_setg(errp, "The '%s' block driver requires a file name", 914 drv->format_name); 915 ret = -EINVAL; 916 goto fail_opts; 917 } 918 919 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags, 920 drv->format_name); 921 922 node_name = qemu_opt_get(opts, "node-name"); 923 bdrv_assign_node_name(bs, node_name, &local_err); 924 if (local_err) { 925 error_propagate(errp, local_err); 926 ret = -EINVAL; 927 goto fail_opts; 928 } 929 930 bs->request_alignment = 512; 931 bs->zero_beyond_eof = true; 932 bs->read_only = !(bs->open_flags & BDRV_O_RDWR); 933 934 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { 935 error_setg(errp, 936 !bs->read_only && bdrv_is_whitelisted(drv, true) 937 ? "Driver '%s' can only be used for read-only devices" 938 : "Driver '%s' is not whitelisted", 939 drv->format_name); 940 ret = -ENOTSUP; 941 goto fail_opts; 942 } 943 944 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ 945 if (bs->open_flags & BDRV_O_COPY_ON_READ) { 946 if (!bs->read_only) { 947 bdrv_enable_copy_on_read(bs); 948 } else { 949 error_setg(errp, "Can't use copy-on-read on read-only device"); 950 ret = -EINVAL; 951 goto fail_opts; 952 } 953 } 954 955 if (filename != NULL) { 956 pstrcpy(bs->filename, sizeof(bs->filename), filename); 957 } else { 958 bs->filename[0] = '\0'; 959 } 960 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 961 962 bs->drv = drv; 963 bs->opaque = g_malloc0(drv->instance_size); 964 965 /* Apply cache mode options */ 966 update_flags_from_options(&bs->open_flags, opts); 967 968 /* Open the image, either directly or using a protocol */ 969 open_flags = bdrv_open_flags(bs, bs->open_flags); 970 if (drv->bdrv_file_open) { 971 assert(file == NULL); 972 assert(!drv->bdrv_needs_filename || filename != NULL); 973 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 974 } else { 975 if (file == NULL) { 976 error_setg(errp, "Can't use '%s' as a block driver for the " 977 "protocol level", drv->format_name); 978 ret = -EINVAL; 979 goto free_and_fail; 980 } 981 bs->file = file; 982 ret = drv->bdrv_open(bs, options, open_flags, &local_err); 983 } 984 985 if (ret < 0) { 986 if (local_err) { 987 error_propagate(errp, local_err); 988 } else if (bs->filename[0]) { 989 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 990 } else { 991 error_setg_errno(errp, -ret, "Could not open image"); 992 } 993 goto free_and_fail; 994 } 995 996 ret = refresh_total_sectors(bs, bs->total_sectors); 997 if (ret < 0) { 998 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 999 goto free_and_fail; 1000 } 1001 1002 bdrv_refresh_limits(bs, &local_err); 1003 if (local_err) { 1004 error_propagate(errp, local_err); 1005 ret = -EINVAL; 1006 goto free_and_fail; 1007 } 1008 1009 assert(bdrv_opt_mem_align(bs) != 0); 1010 assert(bdrv_min_mem_align(bs) != 0); 1011 assert((bs->request_alignment != 0) || bdrv_is_sg(bs)); 1012 1013 qemu_opts_del(opts); 1014 return 0; 1015 1016 free_and_fail: 1017 bs->file = NULL; 1018 g_free(bs->opaque); 1019 bs->opaque = NULL; 1020 bs->drv = NULL; 1021 fail_opts: 1022 qemu_opts_del(opts); 1023 return ret; 1024 } 1025 1026 static QDict *parse_json_filename(const char *filename, Error **errp) 1027 { 1028 QObject *options_obj; 1029 QDict *options; 1030 int ret; 1031 1032 ret = strstart(filename, "json:", &filename); 1033 assert(ret); 1034 1035 options_obj = qobject_from_json(filename); 1036 if (!options_obj) { 1037 error_setg(errp, "Could not parse the JSON options"); 1038 return NULL; 1039 } 1040 1041 if (qobject_type(options_obj) != QTYPE_QDICT) { 1042 qobject_decref(options_obj); 1043 error_setg(errp, "Invalid JSON object given"); 1044 return NULL; 1045 } 1046 1047 options = qobject_to_qdict(options_obj); 1048 qdict_flatten(options); 1049 1050 return options; 1051 } 1052 1053 static void parse_json_protocol(QDict *options, const char **pfilename, 1054 Error **errp) 1055 { 1056 QDict *json_options; 1057 Error *local_err = NULL; 1058 1059 /* Parse json: pseudo-protocol */ 1060 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) { 1061 return; 1062 } 1063 1064 json_options = parse_json_filename(*pfilename, &local_err); 1065 if (local_err) { 1066 error_propagate(errp, local_err); 1067 return; 1068 } 1069 1070 /* Options given in the filename have lower priority than options 1071 * specified directly */ 1072 qdict_join(options, json_options, false); 1073 QDECREF(json_options); 1074 *pfilename = NULL; 1075 } 1076 1077 /* 1078 * Fills in default options for opening images and converts the legacy 1079 * filename/flags pair to option QDict entries. 1080 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a 1081 * block driver has been specified explicitly. 1082 */ 1083 static int bdrv_fill_options(QDict **options, const char *filename, 1084 int *flags, Error **errp) 1085 { 1086 const char *drvname; 1087 bool protocol = *flags & BDRV_O_PROTOCOL; 1088 bool parse_filename = false; 1089 BlockDriver *drv = NULL; 1090 Error *local_err = NULL; 1091 1092 drvname = qdict_get_try_str(*options, "driver"); 1093 if (drvname) { 1094 drv = bdrv_find_format(drvname); 1095 if (!drv) { 1096 error_setg(errp, "Unknown driver '%s'", drvname); 1097 return -ENOENT; 1098 } 1099 /* If the user has explicitly specified the driver, this choice should 1100 * override the BDRV_O_PROTOCOL flag */ 1101 protocol = drv->bdrv_file_open; 1102 } 1103 1104 if (protocol) { 1105 *flags |= BDRV_O_PROTOCOL; 1106 } else { 1107 *flags &= ~BDRV_O_PROTOCOL; 1108 } 1109 1110 /* Translate cache options from flags into options */ 1111 update_options_from_flags(*options, *flags); 1112 1113 /* Fetch the file name from the options QDict if necessary */ 1114 if (protocol && filename) { 1115 if (!qdict_haskey(*options, "filename")) { 1116 qdict_put(*options, "filename", qstring_from_str(filename)); 1117 parse_filename = true; 1118 } else { 1119 error_setg(errp, "Can't specify 'file' and 'filename' options at " 1120 "the same time"); 1121 return -EINVAL; 1122 } 1123 } 1124 1125 /* Find the right block driver */ 1126 filename = qdict_get_try_str(*options, "filename"); 1127 1128 if (!drvname && protocol) { 1129 if (filename) { 1130 drv = bdrv_find_protocol(filename, parse_filename, errp); 1131 if (!drv) { 1132 return -EINVAL; 1133 } 1134 1135 drvname = drv->format_name; 1136 qdict_put(*options, "driver", qstring_from_str(drvname)); 1137 } else { 1138 error_setg(errp, "Must specify either driver or file"); 1139 return -EINVAL; 1140 } 1141 } 1142 1143 assert(drv || !protocol); 1144 1145 /* Driver-specific filename parsing */ 1146 if (drv && drv->bdrv_parse_filename && parse_filename) { 1147 drv->bdrv_parse_filename(filename, *options, &local_err); 1148 if (local_err) { 1149 error_propagate(errp, local_err); 1150 return -EINVAL; 1151 } 1152 1153 if (!drv->bdrv_needs_filename) { 1154 qdict_del(*options, "filename"); 1155 } 1156 } 1157 1158 return 0; 1159 } 1160 1161 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, 1162 const char *child_name, 1163 const BdrvChildRole *child_role) 1164 { 1165 BdrvChild *child = g_new(BdrvChild, 1); 1166 *child = (BdrvChild) { 1167 .bs = child_bs, 1168 .name = g_strdup(child_name), 1169 .role = child_role, 1170 }; 1171 1172 QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent); 1173 1174 return child; 1175 } 1176 1177 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, 1178 BlockDriverState *child_bs, 1179 const char *child_name, 1180 const BdrvChildRole *child_role) 1181 { 1182 BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role); 1183 QLIST_INSERT_HEAD(&parent_bs->children, child, next); 1184 return child; 1185 } 1186 1187 static void bdrv_detach_child(BdrvChild *child) 1188 { 1189 if (child->next.le_prev) { 1190 QLIST_REMOVE(child, next); 1191 child->next.le_prev = NULL; 1192 } 1193 QLIST_REMOVE(child, next_parent); 1194 g_free(child->name); 1195 g_free(child); 1196 } 1197 1198 void bdrv_root_unref_child(BdrvChild *child) 1199 { 1200 BlockDriverState *child_bs; 1201 1202 child_bs = child->bs; 1203 bdrv_detach_child(child); 1204 bdrv_unref(child_bs); 1205 } 1206 1207 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) 1208 { 1209 if (child == NULL) { 1210 return; 1211 } 1212 1213 if (child->bs->inherits_from == parent) { 1214 child->bs->inherits_from = NULL; 1215 } 1216 1217 bdrv_root_unref_child(child); 1218 } 1219 1220 /* 1221 * Sets the backing file link of a BDS. A new reference is created; callers 1222 * which don't need their own reference any more must call bdrv_unref(). 1223 */ 1224 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) 1225 { 1226 if (backing_hd) { 1227 bdrv_ref(backing_hd); 1228 } 1229 1230 if (bs->backing) { 1231 assert(bs->backing_blocker); 1232 bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker); 1233 bdrv_unref_child(bs, bs->backing); 1234 } else if (backing_hd) { 1235 error_setg(&bs->backing_blocker, 1236 "node is used as backing hd of '%s'", 1237 bdrv_get_device_or_node_name(bs)); 1238 } 1239 1240 if (!backing_hd) { 1241 error_free(bs->backing_blocker); 1242 bs->backing_blocker = NULL; 1243 bs->backing = NULL; 1244 goto out; 1245 } 1246 bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing); 1247 bs->open_flags &= ~BDRV_O_NO_BACKING; 1248 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); 1249 pstrcpy(bs->backing_format, sizeof(bs->backing_format), 1250 backing_hd->drv ? backing_hd->drv->format_name : ""); 1251 1252 bdrv_op_block_all(backing_hd, bs->backing_blocker); 1253 /* Otherwise we won't be able to commit due to check in bdrv_commit */ 1254 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1255 bs->backing_blocker); 1256 out: 1257 bdrv_refresh_limits(bs, NULL); 1258 } 1259 1260 /* 1261 * Opens the backing file for a BlockDriverState if not yet open 1262 * 1263 * bdref_key specifies the key for the image's BlockdevRef in the options QDict. 1264 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 1265 * itself, all options starting with "${bdref_key}." are considered part of the 1266 * BlockdevRef. 1267 * 1268 * TODO Can this be unified with bdrv_open_image()? 1269 */ 1270 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, 1271 const char *bdref_key, Error **errp) 1272 { 1273 char *backing_filename = g_malloc0(PATH_MAX); 1274 char *bdref_key_dot; 1275 const char *reference = NULL; 1276 int ret = 0; 1277 BlockDriverState *backing_hd; 1278 QDict *options; 1279 QDict *tmp_parent_options = NULL; 1280 Error *local_err = NULL; 1281 1282 if (bs->backing != NULL) { 1283 goto free_exit; 1284 } 1285 1286 /* NULL means an empty set of options */ 1287 if (parent_options == NULL) { 1288 tmp_parent_options = qdict_new(); 1289 parent_options = tmp_parent_options; 1290 } 1291 1292 bs->open_flags &= ~BDRV_O_NO_BACKING; 1293 1294 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 1295 qdict_extract_subqdict(parent_options, &options, bdref_key_dot); 1296 g_free(bdref_key_dot); 1297 1298 reference = qdict_get_try_str(parent_options, bdref_key); 1299 if (reference || qdict_haskey(options, "file.filename")) { 1300 backing_filename[0] = '\0'; 1301 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 1302 QDECREF(options); 1303 goto free_exit; 1304 } else { 1305 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX, 1306 &local_err); 1307 if (local_err) { 1308 ret = -EINVAL; 1309 error_propagate(errp, local_err); 1310 QDECREF(options); 1311 goto free_exit; 1312 } 1313 } 1314 1315 if (!bs->drv || !bs->drv->supports_backing) { 1316 ret = -EINVAL; 1317 error_setg(errp, "Driver doesn't support backing files"); 1318 QDECREF(options); 1319 goto free_exit; 1320 } 1321 1322 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 1323 qdict_put(options, "driver", qstring_from_str(bs->backing_format)); 1324 } 1325 1326 backing_hd = NULL; 1327 ret = bdrv_open_inherit(&backing_hd, 1328 *backing_filename ? backing_filename : NULL, 1329 reference, options, 0, bs, &child_backing, 1330 errp); 1331 if (ret < 0) { 1332 bs->open_flags |= BDRV_O_NO_BACKING; 1333 error_prepend(errp, "Could not open backing file: "); 1334 goto free_exit; 1335 } 1336 1337 /* Hook up the backing file link; drop our reference, bs owns the 1338 * backing_hd reference now */ 1339 bdrv_set_backing_hd(bs, backing_hd); 1340 bdrv_unref(backing_hd); 1341 1342 qdict_del(parent_options, bdref_key); 1343 1344 free_exit: 1345 g_free(backing_filename); 1346 QDECREF(tmp_parent_options); 1347 return ret; 1348 } 1349 1350 /* 1351 * Opens a disk image whose options are given as BlockdevRef in another block 1352 * device's options. 1353 * 1354 * If allow_none is true, no image will be opened if filename is false and no 1355 * BlockdevRef is given. NULL will be returned, but errp remains unset. 1356 * 1357 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 1358 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 1359 * itself, all options starting with "${bdref_key}." are considered part of the 1360 * BlockdevRef. 1361 * 1362 * The BlockdevRef will be removed from the options QDict. 1363 */ 1364 BdrvChild *bdrv_open_child(const char *filename, 1365 QDict *options, const char *bdref_key, 1366 BlockDriverState* parent, 1367 const BdrvChildRole *child_role, 1368 bool allow_none, Error **errp) 1369 { 1370 BdrvChild *c = NULL; 1371 BlockDriverState *bs; 1372 QDict *image_options; 1373 int ret; 1374 char *bdref_key_dot; 1375 const char *reference; 1376 1377 assert(child_role != NULL); 1378 1379 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 1380 qdict_extract_subqdict(options, &image_options, bdref_key_dot); 1381 g_free(bdref_key_dot); 1382 1383 reference = qdict_get_try_str(options, bdref_key); 1384 if (!filename && !reference && !qdict_size(image_options)) { 1385 if (!allow_none) { 1386 error_setg(errp, "A block device must be specified for \"%s\"", 1387 bdref_key); 1388 } 1389 QDECREF(image_options); 1390 goto done; 1391 } 1392 1393 bs = NULL; 1394 ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0, 1395 parent, child_role, errp); 1396 if (ret < 0) { 1397 goto done; 1398 } 1399 1400 c = bdrv_attach_child(parent, bs, bdref_key, child_role); 1401 1402 done: 1403 qdict_del(options, bdref_key); 1404 return c; 1405 } 1406 1407 static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, 1408 QDict *snapshot_options, Error **errp) 1409 { 1410 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 1411 char *tmp_filename = g_malloc0(PATH_MAX + 1); 1412 int64_t total_size; 1413 QemuOpts *opts = NULL; 1414 BlockDriverState *bs_snapshot; 1415 Error *local_err = NULL; 1416 int ret; 1417 1418 /* if snapshot, we create a temporary backing file and open it 1419 instead of opening 'filename' directly */ 1420 1421 /* Get the required size from the image */ 1422 total_size = bdrv_getlength(bs); 1423 if (total_size < 0) { 1424 ret = total_size; 1425 error_setg_errno(errp, -total_size, "Could not get image size"); 1426 goto out; 1427 } 1428 1429 /* Create the temporary image */ 1430 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); 1431 if (ret < 0) { 1432 error_setg_errno(errp, -ret, "Could not get temporary filename"); 1433 goto out; 1434 } 1435 1436 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 1437 &error_abort); 1438 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 1439 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp); 1440 qemu_opts_del(opts); 1441 if (ret < 0) { 1442 error_prepend(errp, "Could not create temporary overlay '%s': ", 1443 tmp_filename); 1444 goto out; 1445 } 1446 1447 /* Prepare options QDict for the temporary file */ 1448 qdict_put(snapshot_options, "file.driver", 1449 qstring_from_str("file")); 1450 qdict_put(snapshot_options, "file.filename", 1451 qstring_from_str(tmp_filename)); 1452 qdict_put(snapshot_options, "driver", 1453 qstring_from_str("qcow2")); 1454 1455 bs_snapshot = bdrv_new(); 1456 1457 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, 1458 flags, &local_err); 1459 snapshot_options = NULL; 1460 if (ret < 0) { 1461 error_propagate(errp, local_err); 1462 goto out; 1463 } 1464 1465 bdrv_append(bs_snapshot, bs); 1466 1467 out: 1468 QDECREF(snapshot_options); 1469 g_free(tmp_filename); 1470 return ret; 1471 } 1472 1473 /* 1474 * Opens a disk image (raw, qcow2, vmdk, ...) 1475 * 1476 * options is a QDict of options to pass to the block drivers, or NULL for an 1477 * empty set of options. The reference to the QDict belongs to the block layer 1478 * after the call (even on failure), so if the caller intends to reuse the 1479 * dictionary, it needs to use QINCREF() before calling bdrv_open. 1480 * 1481 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 1482 * If it is not NULL, the referenced BDS will be reused. 1483 * 1484 * The reference parameter may be used to specify an existing block device which 1485 * should be opened. If specified, neither options nor a filename may be given, 1486 * nor can an existing BDS be reused (that is, *pbs has to be NULL). 1487 */ 1488 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, 1489 const char *reference, QDict *options, int flags, 1490 BlockDriverState *parent, 1491 const BdrvChildRole *child_role, Error **errp) 1492 { 1493 int ret; 1494 BdrvChild *file = NULL; 1495 BlockDriverState *bs; 1496 BlockDriver *drv = NULL; 1497 const char *drvname; 1498 const char *backing; 1499 Error *local_err = NULL; 1500 QDict *snapshot_options = NULL; 1501 int snapshot_flags = 0; 1502 1503 assert(pbs); 1504 assert(!child_role || !flags); 1505 assert(!child_role == !parent); 1506 1507 if (reference) { 1508 bool options_non_empty = options ? qdict_size(options) : false; 1509 QDECREF(options); 1510 1511 if (*pbs) { 1512 error_setg(errp, "Cannot reuse an existing BDS when referencing " 1513 "another block device"); 1514 return -EINVAL; 1515 } 1516 1517 if (filename || options_non_empty) { 1518 error_setg(errp, "Cannot reference an existing block device with " 1519 "additional options or a new filename"); 1520 return -EINVAL; 1521 } 1522 1523 bs = bdrv_lookup_bs(reference, reference, errp); 1524 if (!bs) { 1525 return -ENODEV; 1526 } 1527 1528 if (bs->throttle_state) { 1529 error_setg(errp, "Cannot reference an existing block device for " 1530 "which I/O throttling is enabled"); 1531 return -EINVAL; 1532 } 1533 1534 bdrv_ref(bs); 1535 *pbs = bs; 1536 return 0; 1537 } 1538 1539 if (*pbs) { 1540 bs = *pbs; 1541 } else { 1542 bs = bdrv_new(); 1543 } 1544 1545 /* NULL means an empty set of options */ 1546 if (options == NULL) { 1547 options = qdict_new(); 1548 } 1549 1550 /* json: syntax counts as explicit options, as if in the QDict */ 1551 parse_json_protocol(options, &filename, &local_err); 1552 if (local_err) { 1553 ret = -EINVAL; 1554 goto fail; 1555 } 1556 1557 bs->explicit_options = qdict_clone_shallow(options); 1558 1559 if (child_role) { 1560 bs->inherits_from = parent; 1561 child_role->inherit_options(&flags, options, 1562 parent->open_flags, parent->options); 1563 } 1564 1565 ret = bdrv_fill_options(&options, filename, &flags, &local_err); 1566 if (local_err) { 1567 goto fail; 1568 } 1569 1570 bs->open_flags = flags; 1571 bs->options = options; 1572 options = qdict_clone_shallow(options); 1573 1574 /* Find the right image format driver */ 1575 drvname = qdict_get_try_str(options, "driver"); 1576 if (drvname) { 1577 drv = bdrv_find_format(drvname); 1578 if (!drv) { 1579 error_setg(errp, "Unknown driver: '%s'", drvname); 1580 ret = -EINVAL; 1581 goto fail; 1582 } 1583 } 1584 1585 assert(drvname || !(flags & BDRV_O_PROTOCOL)); 1586 1587 backing = qdict_get_try_str(options, "backing"); 1588 if (backing && *backing == '\0') { 1589 flags |= BDRV_O_NO_BACKING; 1590 qdict_del(options, "backing"); 1591 } 1592 1593 /* Open image file without format layer */ 1594 if ((flags & BDRV_O_PROTOCOL) == 0) { 1595 if (flags & BDRV_O_RDWR) { 1596 flags |= BDRV_O_ALLOW_RDWR; 1597 } 1598 if (flags & BDRV_O_SNAPSHOT) { 1599 snapshot_options = qdict_new(); 1600 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options, 1601 flags, options); 1602 bdrv_backing_options(&flags, options, flags, options); 1603 } 1604 1605 bs->open_flags = flags; 1606 1607 file = bdrv_open_child(filename, options, "file", bs, 1608 &child_file, true, &local_err); 1609 if (local_err) { 1610 ret = -EINVAL; 1611 goto fail; 1612 } 1613 } 1614 1615 /* Image format probing */ 1616 bs->probed = !drv; 1617 if (!drv && file) { 1618 ret = find_image_format(file->bs, filename, &drv, &local_err); 1619 if (ret < 0) { 1620 goto fail; 1621 } 1622 /* 1623 * This option update would logically belong in bdrv_fill_options(), 1624 * but we first need to open bs->file for the probing to work, while 1625 * opening bs->file already requires the (mostly) final set of options 1626 * so that cache mode etc. can be inherited. 1627 * 1628 * Adding the driver later is somewhat ugly, but it's not an option 1629 * that would ever be inherited, so it's correct. We just need to make 1630 * sure to update both bs->options (which has the full effective 1631 * options for bs) and options (which has file.* already removed). 1632 */ 1633 qdict_put(bs->options, "driver", qstring_from_str(drv->format_name)); 1634 qdict_put(options, "driver", qstring_from_str(drv->format_name)); 1635 } else if (!drv) { 1636 error_setg(errp, "Must specify either driver or file"); 1637 ret = -EINVAL; 1638 goto fail; 1639 } 1640 1641 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ 1642 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open); 1643 /* file must be NULL if a protocol BDS is about to be created 1644 * (the inverse results in an error message from bdrv_open_common()) */ 1645 assert(!(flags & BDRV_O_PROTOCOL) || !file); 1646 1647 /* Open the image */ 1648 ret = bdrv_open_common(bs, file, options, &local_err); 1649 if (ret < 0) { 1650 goto fail; 1651 } 1652 1653 if (file && (bs->file != file)) { 1654 bdrv_unref_child(bs, file); 1655 file = NULL; 1656 } 1657 1658 /* If there is a backing file, use it */ 1659 if ((flags & BDRV_O_NO_BACKING) == 0) { 1660 ret = bdrv_open_backing_file(bs, options, "backing", &local_err); 1661 if (ret < 0) { 1662 goto close_and_fail; 1663 } 1664 } 1665 1666 bdrv_refresh_filename(bs); 1667 1668 /* Check if any unknown options were used */ 1669 if (options && (qdict_size(options) != 0)) { 1670 const QDictEntry *entry = qdict_first(options); 1671 if (flags & BDRV_O_PROTOCOL) { 1672 error_setg(errp, "Block protocol '%s' doesn't support the option " 1673 "'%s'", drv->format_name, entry->key); 1674 } else { 1675 error_setg(errp, 1676 "Block format '%s' does not support the option '%s'", 1677 drv->format_name, entry->key); 1678 } 1679 1680 ret = -EINVAL; 1681 goto close_and_fail; 1682 } 1683 1684 if (!bdrv_key_required(bs)) { 1685 if (bs->blk) { 1686 blk_dev_change_media_cb(bs->blk, true); 1687 } 1688 } else if (!runstate_check(RUN_STATE_PRELAUNCH) 1689 && !runstate_check(RUN_STATE_INMIGRATE) 1690 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */ 1691 error_setg(errp, 1692 "Guest must be stopped for opening of encrypted image"); 1693 ret = -EBUSY; 1694 goto close_and_fail; 1695 } 1696 1697 QDECREF(options); 1698 *pbs = bs; 1699 1700 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 1701 * temporary snapshot afterwards. */ 1702 if (snapshot_flags) { 1703 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options, 1704 &local_err); 1705 snapshot_options = NULL; 1706 if (local_err) { 1707 goto close_and_fail; 1708 } 1709 } 1710 1711 return 0; 1712 1713 fail: 1714 if (file != NULL) { 1715 bdrv_unref_child(bs, file); 1716 } 1717 QDECREF(snapshot_options); 1718 QDECREF(bs->explicit_options); 1719 QDECREF(bs->options); 1720 QDECREF(options); 1721 bs->options = NULL; 1722 if (!*pbs) { 1723 /* If *pbs is NULL, a new BDS has been created in this function and 1724 needs to be freed now. Otherwise, it does not need to be closed, 1725 since it has not really been opened yet. */ 1726 bdrv_unref(bs); 1727 } 1728 if (local_err) { 1729 error_propagate(errp, local_err); 1730 } 1731 return ret; 1732 1733 close_and_fail: 1734 /* See fail path, but now the BDS has to be always closed */ 1735 if (*pbs) { 1736 bdrv_close(bs); 1737 } else { 1738 bdrv_unref(bs); 1739 } 1740 QDECREF(snapshot_options); 1741 QDECREF(options); 1742 if (local_err) { 1743 error_propagate(errp, local_err); 1744 } 1745 return ret; 1746 } 1747 1748 int bdrv_open(BlockDriverState **pbs, const char *filename, 1749 const char *reference, QDict *options, int flags, Error **errp) 1750 { 1751 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL, 1752 NULL, errp); 1753 } 1754 1755 typedef struct BlockReopenQueueEntry { 1756 bool prepared; 1757 BDRVReopenState state; 1758 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; 1759 } BlockReopenQueueEntry; 1760 1761 /* 1762 * Adds a BlockDriverState to a simple queue for an atomic, transactional 1763 * reopen of multiple devices. 1764 * 1765 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT 1766 * already performed, or alternatively may be NULL a new BlockReopenQueue will 1767 * be created and initialized. This newly created BlockReopenQueue should be 1768 * passed back in for subsequent calls that are intended to be of the same 1769 * atomic 'set'. 1770 * 1771 * bs is the BlockDriverState to add to the reopen queue. 1772 * 1773 * options contains the changed options for the associated bs 1774 * (the BlockReopenQueue takes ownership) 1775 * 1776 * flags contains the open flags for the associated bs 1777 * 1778 * returns a pointer to bs_queue, which is either the newly allocated 1779 * bs_queue, or the existing bs_queue being used. 1780 * 1781 */ 1782 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, 1783 BlockDriverState *bs, 1784 QDict *options, 1785 int flags, 1786 const BdrvChildRole *role, 1787 QDict *parent_options, 1788 int parent_flags) 1789 { 1790 assert(bs != NULL); 1791 1792 BlockReopenQueueEntry *bs_entry; 1793 BdrvChild *child; 1794 QDict *old_options, *explicit_options; 1795 1796 if (bs_queue == NULL) { 1797 bs_queue = g_new0(BlockReopenQueue, 1); 1798 QSIMPLEQ_INIT(bs_queue); 1799 } 1800 1801 if (!options) { 1802 options = qdict_new(); 1803 } 1804 1805 /* 1806 * Precedence of options: 1807 * 1. Explicitly passed in options (highest) 1808 * 2. Set in flags (only for top level) 1809 * 3. Retained from explicitly set options of bs 1810 * 4. Inherited from parent node 1811 * 5. Retained from effective options of bs 1812 */ 1813 1814 if (!parent_options) { 1815 /* 1816 * Any setting represented by flags is always updated. If the 1817 * corresponding QDict option is set, it takes precedence. Otherwise 1818 * the flag is translated into a QDict option. The old setting of bs is 1819 * not considered. 1820 */ 1821 update_options_from_flags(options, flags); 1822 } 1823 1824 /* Old explicitly set values (don't overwrite by inherited value) */ 1825 old_options = qdict_clone_shallow(bs->explicit_options); 1826 bdrv_join_options(bs, options, old_options); 1827 QDECREF(old_options); 1828 1829 explicit_options = qdict_clone_shallow(options); 1830 1831 /* Inherit from parent node */ 1832 if (parent_options) { 1833 assert(!flags); 1834 role->inherit_options(&flags, options, parent_flags, parent_options); 1835 } 1836 1837 /* Old values are used for options that aren't set yet */ 1838 old_options = qdict_clone_shallow(bs->options); 1839 bdrv_join_options(bs, options, old_options); 1840 QDECREF(old_options); 1841 1842 /* bdrv_open() masks this flag out */ 1843 flags &= ~BDRV_O_PROTOCOL; 1844 1845 QLIST_FOREACH(child, &bs->children, next) { 1846 QDict *new_child_options; 1847 char *child_key_dot; 1848 1849 /* reopen can only change the options of block devices that were 1850 * implicitly created and inherited options. For other (referenced) 1851 * block devices, a syntax like "backing.foo" results in an error. */ 1852 if (child->bs->inherits_from != bs) { 1853 continue; 1854 } 1855 1856 child_key_dot = g_strdup_printf("%s.", child->name); 1857 qdict_extract_subqdict(options, &new_child_options, child_key_dot); 1858 g_free(child_key_dot); 1859 1860 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0, 1861 child->role, options, flags); 1862 } 1863 1864 bs_entry = g_new0(BlockReopenQueueEntry, 1); 1865 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); 1866 1867 bs_entry->state.bs = bs; 1868 bs_entry->state.options = options; 1869 bs_entry->state.explicit_options = explicit_options; 1870 bs_entry->state.flags = flags; 1871 1872 return bs_queue; 1873 } 1874 1875 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 1876 BlockDriverState *bs, 1877 QDict *options, int flags) 1878 { 1879 return bdrv_reopen_queue_child(bs_queue, bs, options, flags, 1880 NULL, NULL, 0); 1881 } 1882 1883 /* 1884 * Reopen multiple BlockDriverStates atomically & transactionally. 1885 * 1886 * The queue passed in (bs_queue) must have been built up previous 1887 * via bdrv_reopen_queue(). 1888 * 1889 * Reopens all BDS specified in the queue, with the appropriate 1890 * flags. All devices are prepared for reopen, and failure of any 1891 * device will cause all device changes to be abandonded, and intermediate 1892 * data cleaned up. 1893 * 1894 * If all devices prepare successfully, then the changes are committed 1895 * to all devices. 1896 * 1897 */ 1898 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 1899 { 1900 int ret = -1; 1901 BlockReopenQueueEntry *bs_entry, *next; 1902 Error *local_err = NULL; 1903 1904 assert(bs_queue != NULL); 1905 1906 bdrv_drain_all(); 1907 1908 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1909 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { 1910 error_propagate(errp, local_err); 1911 goto cleanup; 1912 } 1913 bs_entry->prepared = true; 1914 } 1915 1916 /* If we reach this point, we have success and just need to apply the 1917 * changes 1918 */ 1919 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1920 bdrv_reopen_commit(&bs_entry->state); 1921 } 1922 1923 ret = 0; 1924 1925 cleanup: 1926 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 1927 if (ret && bs_entry->prepared) { 1928 bdrv_reopen_abort(&bs_entry->state); 1929 } else if (ret) { 1930 QDECREF(bs_entry->state.explicit_options); 1931 } 1932 QDECREF(bs_entry->state.options); 1933 g_free(bs_entry); 1934 } 1935 g_free(bs_queue); 1936 return ret; 1937 } 1938 1939 1940 /* Reopen a single BlockDriverState with the specified flags. */ 1941 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) 1942 { 1943 int ret = -1; 1944 Error *local_err = NULL; 1945 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags); 1946 1947 ret = bdrv_reopen_multiple(queue, &local_err); 1948 if (local_err != NULL) { 1949 error_propagate(errp, local_err); 1950 } 1951 return ret; 1952 } 1953 1954 1955 /* 1956 * Prepares a BlockDriverState for reopen. All changes are staged in the 1957 * 'opaque' field of the BDRVReopenState, which is used and allocated by 1958 * the block driver layer .bdrv_reopen_prepare() 1959 * 1960 * bs is the BlockDriverState to reopen 1961 * flags are the new open flags 1962 * queue is the reopen queue 1963 * 1964 * Returns 0 on success, non-zero on error. On error errp will be set 1965 * as well. 1966 * 1967 * On failure, bdrv_reopen_abort() will be called to clean up any data. 1968 * It is the responsibility of the caller to then call the abort() or 1969 * commit() for any other BDS that have been left in a prepare() state 1970 * 1971 */ 1972 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, 1973 Error **errp) 1974 { 1975 int ret = -1; 1976 Error *local_err = NULL; 1977 BlockDriver *drv; 1978 QemuOpts *opts; 1979 const char *value; 1980 1981 assert(reopen_state != NULL); 1982 assert(reopen_state->bs->drv != NULL); 1983 drv = reopen_state->bs->drv; 1984 1985 /* Process generic block layer options */ 1986 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 1987 qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err); 1988 if (local_err) { 1989 error_propagate(errp, local_err); 1990 ret = -EINVAL; 1991 goto error; 1992 } 1993 1994 update_flags_from_options(&reopen_state->flags, opts); 1995 1996 /* node-name and driver must be unchanged. Put them back into the QDict, so 1997 * that they are checked at the end of this function. */ 1998 value = qemu_opt_get(opts, "node-name"); 1999 if (value) { 2000 qdict_put(reopen_state->options, "node-name", qstring_from_str(value)); 2001 } 2002 2003 value = qemu_opt_get(opts, "driver"); 2004 if (value) { 2005 qdict_put(reopen_state->options, "driver", qstring_from_str(value)); 2006 } 2007 2008 /* if we are to stay read-only, do not allow permission change 2009 * to r/w */ 2010 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) && 2011 reopen_state->flags & BDRV_O_RDWR) { 2012 error_setg(errp, "Node '%s' is read only", 2013 bdrv_get_device_or_node_name(reopen_state->bs)); 2014 goto error; 2015 } 2016 2017 2018 ret = bdrv_flush(reopen_state->bs); 2019 if (ret) { 2020 error_setg_errno(errp, -ret, "Error flushing drive"); 2021 goto error; 2022 } 2023 2024 if (drv->bdrv_reopen_prepare) { 2025 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 2026 if (ret) { 2027 if (local_err != NULL) { 2028 error_propagate(errp, local_err); 2029 } else { 2030 error_setg(errp, "failed while preparing to reopen image '%s'", 2031 reopen_state->bs->filename); 2032 } 2033 goto error; 2034 } 2035 } else { 2036 /* It is currently mandatory to have a bdrv_reopen_prepare() 2037 * handler for each supported drv. */ 2038 error_setg(errp, "Block format '%s' used by node '%s' " 2039 "does not support reopening files", drv->format_name, 2040 bdrv_get_device_or_node_name(reopen_state->bs)); 2041 ret = -1; 2042 goto error; 2043 } 2044 2045 /* Options that are not handled are only okay if they are unchanged 2046 * compared to the old state. It is expected that some options are only 2047 * used for the initial open, but not reopen (e.g. filename) */ 2048 if (qdict_size(reopen_state->options)) { 2049 const QDictEntry *entry = qdict_first(reopen_state->options); 2050 2051 do { 2052 QString *new_obj = qobject_to_qstring(entry->value); 2053 const char *new = qstring_get_str(new_obj); 2054 const char *old = qdict_get_try_str(reopen_state->bs->options, 2055 entry->key); 2056 2057 if (!old || strcmp(new, old)) { 2058 error_setg(errp, "Cannot change the option '%s'", entry->key); 2059 ret = -EINVAL; 2060 goto error; 2061 } 2062 } while ((entry = qdict_next(reopen_state->options, entry))); 2063 } 2064 2065 ret = 0; 2066 2067 error: 2068 qemu_opts_del(opts); 2069 return ret; 2070 } 2071 2072 /* 2073 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 2074 * makes them final by swapping the staging BlockDriverState contents into 2075 * the active BlockDriverState contents. 2076 */ 2077 void bdrv_reopen_commit(BDRVReopenState *reopen_state) 2078 { 2079 BlockDriver *drv; 2080 2081 assert(reopen_state != NULL); 2082 drv = reopen_state->bs->drv; 2083 assert(drv != NULL); 2084 2085 /* If there are any driver level actions to take */ 2086 if (drv->bdrv_reopen_commit) { 2087 drv->bdrv_reopen_commit(reopen_state); 2088 } 2089 2090 /* set BDS specific flags now */ 2091 QDECREF(reopen_state->bs->explicit_options); 2092 2093 reopen_state->bs->explicit_options = reopen_state->explicit_options; 2094 reopen_state->bs->open_flags = reopen_state->flags; 2095 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); 2096 2097 bdrv_refresh_limits(reopen_state->bs, NULL); 2098 } 2099 2100 /* 2101 * Abort the reopen, and delete and free the staged changes in 2102 * reopen_state 2103 */ 2104 void bdrv_reopen_abort(BDRVReopenState *reopen_state) 2105 { 2106 BlockDriver *drv; 2107 2108 assert(reopen_state != NULL); 2109 drv = reopen_state->bs->drv; 2110 assert(drv != NULL); 2111 2112 if (drv->bdrv_reopen_abort) { 2113 drv->bdrv_reopen_abort(reopen_state); 2114 } 2115 2116 QDECREF(reopen_state->explicit_options); 2117 } 2118 2119 2120 static void bdrv_close(BlockDriverState *bs) 2121 { 2122 BdrvAioNotifier *ban, *ban_next; 2123 2124 assert(!bs->job); 2125 2126 /* Disable I/O limits and drain all pending throttled requests */ 2127 if (bs->throttle_state) { 2128 bdrv_io_limits_disable(bs); 2129 } 2130 2131 bdrv_drained_begin(bs); /* complete I/O */ 2132 bdrv_flush(bs); 2133 bdrv_drain(bs); /* in case flush left pending I/O */ 2134 2135 bdrv_release_named_dirty_bitmaps(bs); 2136 assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 2137 2138 if (bs->blk) { 2139 blk_dev_change_media_cb(bs->blk, false); 2140 } 2141 2142 if (bs->drv) { 2143 BdrvChild *child, *next; 2144 2145 bs->drv->bdrv_close(bs); 2146 bs->drv = NULL; 2147 2148 bdrv_set_backing_hd(bs, NULL); 2149 2150 if (bs->file != NULL) { 2151 bdrv_unref_child(bs, bs->file); 2152 bs->file = NULL; 2153 } 2154 2155 QLIST_FOREACH_SAFE(child, &bs->children, next, next) { 2156 /* TODO Remove bdrv_unref() from drivers' close function and use 2157 * bdrv_unref_child() here */ 2158 if (child->bs->inherits_from == bs) { 2159 child->bs->inherits_from = NULL; 2160 } 2161 bdrv_detach_child(child); 2162 } 2163 2164 g_free(bs->opaque); 2165 bs->opaque = NULL; 2166 bs->copy_on_read = 0; 2167 bs->backing_file[0] = '\0'; 2168 bs->backing_format[0] = '\0'; 2169 bs->total_sectors = 0; 2170 bs->encrypted = 0; 2171 bs->valid_key = 0; 2172 bs->sg = 0; 2173 bs->zero_beyond_eof = false; 2174 QDECREF(bs->options); 2175 QDECREF(bs->explicit_options); 2176 bs->options = NULL; 2177 QDECREF(bs->full_open_options); 2178 bs->full_open_options = NULL; 2179 } 2180 2181 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 2182 g_free(ban); 2183 } 2184 QLIST_INIT(&bs->aio_notifiers); 2185 bdrv_drained_end(bs); 2186 } 2187 2188 void bdrv_close_all(void) 2189 { 2190 BlockDriverState *bs; 2191 AioContext *aio_context; 2192 2193 /* Drop references from requests still in flight, such as canceled block 2194 * jobs whose AIO context has not been polled yet */ 2195 bdrv_drain_all(); 2196 2197 blk_remove_all_bs(); 2198 blockdev_close_all_bdrv_states(); 2199 2200 /* Cancel all block jobs */ 2201 while (!QTAILQ_EMPTY(&all_bdrv_states)) { 2202 QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) { 2203 aio_context = bdrv_get_aio_context(bs); 2204 2205 aio_context_acquire(aio_context); 2206 if (bs->job) { 2207 block_job_cancel_sync(bs->job); 2208 aio_context_release(aio_context); 2209 break; 2210 } 2211 aio_context_release(aio_context); 2212 } 2213 2214 /* All the remaining BlockDriverStates are referenced directly or 2215 * indirectly from block jobs, so there needs to be at least one BDS 2216 * directly used by a block job */ 2217 assert(bs); 2218 } 2219 } 2220 2221 /* Fields that need to stay with the top-level BDS */ 2222 static void bdrv_move_feature_fields(BlockDriverState *bs_dest, 2223 BlockDriverState *bs_src) 2224 { 2225 /* move some fields that need to stay attached to the device */ 2226 } 2227 2228 static void change_parent_backing_link(BlockDriverState *from, 2229 BlockDriverState *to) 2230 { 2231 BdrvChild *c, *next; 2232 2233 if (from->blk) { 2234 /* FIXME We bypass blk_set_bs(), so we need to make these updates 2235 * manually. The root problem is not in this change function, but the 2236 * existence of BlockDriverState.blk. */ 2237 to->blk = from->blk; 2238 from->blk = NULL; 2239 } 2240 2241 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { 2242 assert(c->role != &child_backing); 2243 c->bs = to; 2244 QLIST_REMOVE(c, next_parent); 2245 QLIST_INSERT_HEAD(&to->parents, c, next_parent); 2246 bdrv_ref(to); 2247 bdrv_unref(from); 2248 } 2249 } 2250 2251 static void swap_feature_fields(BlockDriverState *bs_top, 2252 BlockDriverState *bs_new) 2253 { 2254 BlockDriverState tmp; 2255 2256 bdrv_move_feature_fields(&tmp, bs_top); 2257 bdrv_move_feature_fields(bs_top, bs_new); 2258 bdrv_move_feature_fields(bs_new, &tmp); 2259 2260 assert(!bs_new->throttle_state); 2261 if (bs_top->throttle_state) { 2262 /* 2263 * FIXME Need to break I/O throttling with graph manipulations 2264 * temporarily because of conflicting invariants (3. will go away when 2265 * throttling is fully converted to work on BlockBackends): 2266 * 2267 * 1. Every BlockBackend has a single root BDS 2268 * 2. I/O throttling functions require an attached BlockBackend 2269 * 3. We need to first enable throttling on the new BDS and then 2270 * disable it on the old one (because of throttle group refcounts) 2271 */ 2272 #if 0 2273 bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top)); 2274 bdrv_io_limits_disable(bs_top); 2275 #else 2276 abort(); 2277 #endif 2278 } 2279 } 2280 2281 /* 2282 * Add new bs contents at the top of an image chain while the chain is 2283 * live, while keeping required fields on the top layer. 2284 * 2285 * This will modify the BlockDriverState fields, and swap contents 2286 * between bs_new and bs_top. Both bs_new and bs_top are modified. 2287 * 2288 * bs_new must not be attached to a BlockBackend. 2289 * 2290 * This function does not create any image files. 2291 * 2292 * bdrv_append() takes ownership of a bs_new reference and unrefs it because 2293 * that's what the callers commonly need. bs_new will be referenced by the old 2294 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a 2295 * reference of its own, it must call bdrv_ref(). 2296 */ 2297 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) 2298 { 2299 assert(!bdrv_requests_pending(bs_top)); 2300 assert(!bdrv_requests_pending(bs_new)); 2301 2302 bdrv_ref(bs_top); 2303 change_parent_backing_link(bs_top, bs_new); 2304 2305 /* Some fields always stay on top of the backing file chain */ 2306 swap_feature_fields(bs_top, bs_new); 2307 2308 bdrv_set_backing_hd(bs_new, bs_top); 2309 bdrv_unref(bs_top); 2310 2311 /* bs_new is now referenced by its new parents, we don't need the 2312 * additional reference any more. */ 2313 bdrv_unref(bs_new); 2314 } 2315 2316 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new) 2317 { 2318 assert(!bdrv_requests_pending(old)); 2319 assert(!bdrv_requests_pending(new)); 2320 2321 bdrv_ref(old); 2322 2323 if (old->blk) { 2324 /* As long as these fields aren't in BlockBackend, but in the top-level 2325 * BlockDriverState, it's not possible for a BDS to have two BBs. 2326 * 2327 * We really want to copy the fields from old to new, but we go for a 2328 * swap instead so that pointers aren't duplicated and cause trouble. 2329 * (Also, bdrv_swap() used to do the same.) */ 2330 assert(!new->blk); 2331 swap_feature_fields(old, new); 2332 } 2333 change_parent_backing_link(old, new); 2334 2335 /* Change backing files if a previously independent node is added to the 2336 * chain. For active commit, we replace top by its own (indirect) backing 2337 * file and don't do anything here so we don't build a loop. */ 2338 if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) { 2339 bdrv_set_backing_hd(new, backing_bs(old)); 2340 bdrv_set_backing_hd(old, NULL); 2341 } 2342 2343 bdrv_unref(old); 2344 } 2345 2346 static void bdrv_delete(BlockDriverState *bs) 2347 { 2348 assert(!bs->job); 2349 assert(bdrv_op_blocker_is_empty(bs)); 2350 assert(!bs->refcnt); 2351 2352 bdrv_close(bs); 2353 2354 /* remove from list, if necessary */ 2355 if (bs->node_name[0] != '\0') { 2356 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 2357 } 2358 QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list); 2359 2360 g_free(bs); 2361 } 2362 2363 /* 2364 * Run consistency checks on an image 2365 * 2366 * Returns 0 if the check could be completed (it doesn't mean that the image is 2367 * free of errors) or -errno when an internal error occurred. The results of the 2368 * check are stored in res. 2369 */ 2370 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) 2371 { 2372 if (bs->drv == NULL) { 2373 return -ENOMEDIUM; 2374 } 2375 if (bs->drv->bdrv_check == NULL) { 2376 return -ENOTSUP; 2377 } 2378 2379 memset(res, 0, sizeof(*res)); 2380 return bs->drv->bdrv_check(bs, res, fix); 2381 } 2382 2383 #define COMMIT_BUF_SECTORS 2048 2384 2385 /* commit COW file into the raw image */ 2386 int bdrv_commit(BlockDriverState *bs) 2387 { 2388 BlockDriver *drv = bs->drv; 2389 int64_t sector, total_sectors, length, backing_length; 2390 int n, ro, open_flags; 2391 int ret = 0; 2392 uint8_t *buf = NULL; 2393 2394 if (!drv) 2395 return -ENOMEDIUM; 2396 2397 if (!bs->backing) { 2398 return -ENOTSUP; 2399 } 2400 2401 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || 2402 bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { 2403 return -EBUSY; 2404 } 2405 2406 ro = bs->backing->bs->read_only; 2407 open_flags = bs->backing->bs->open_flags; 2408 2409 if (ro) { 2410 if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) { 2411 return -EACCES; 2412 } 2413 } 2414 2415 length = bdrv_getlength(bs); 2416 if (length < 0) { 2417 ret = length; 2418 goto ro_cleanup; 2419 } 2420 2421 backing_length = bdrv_getlength(bs->backing->bs); 2422 if (backing_length < 0) { 2423 ret = backing_length; 2424 goto ro_cleanup; 2425 } 2426 2427 /* If our top snapshot is larger than the backing file image, 2428 * grow the backing file image if possible. If not possible, 2429 * we must return an error */ 2430 if (length > backing_length) { 2431 ret = bdrv_truncate(bs->backing->bs, length); 2432 if (ret < 0) { 2433 goto ro_cleanup; 2434 } 2435 } 2436 2437 total_sectors = length >> BDRV_SECTOR_BITS; 2438 2439 /* qemu_try_blockalign() for bs will choose an alignment that works for 2440 * bs->backing->bs as well, so no need to compare the alignment manually. */ 2441 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 2442 if (buf == NULL) { 2443 ret = -ENOMEM; 2444 goto ro_cleanup; 2445 } 2446 2447 for (sector = 0; sector < total_sectors; sector += n) { 2448 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n); 2449 if (ret < 0) { 2450 goto ro_cleanup; 2451 } 2452 if (ret) { 2453 ret = bdrv_read(bs, sector, buf, n); 2454 if (ret < 0) { 2455 goto ro_cleanup; 2456 } 2457 2458 ret = bdrv_write(bs->backing->bs, sector, buf, n); 2459 if (ret < 0) { 2460 goto ro_cleanup; 2461 } 2462 } 2463 } 2464 2465 if (drv->bdrv_make_empty) { 2466 ret = drv->bdrv_make_empty(bs); 2467 if (ret < 0) { 2468 goto ro_cleanup; 2469 } 2470 bdrv_flush(bs); 2471 } 2472 2473 /* 2474 * Make sure all data we wrote to the backing device is actually 2475 * stable on disk. 2476 */ 2477 if (bs->backing) { 2478 bdrv_flush(bs->backing->bs); 2479 } 2480 2481 ret = 0; 2482 ro_cleanup: 2483 qemu_vfree(buf); 2484 2485 if (ro) { 2486 /* ignoring error return here */ 2487 bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL); 2488 } 2489 2490 return ret; 2491 } 2492 2493 /* 2494 * Return values: 2495 * 0 - success 2496 * -EINVAL - backing format specified, but no file 2497 * -ENOSPC - can't update the backing file because no space is left in the 2498 * image file header 2499 * -ENOTSUP - format driver doesn't support changing the backing file 2500 */ 2501 int bdrv_change_backing_file(BlockDriverState *bs, 2502 const char *backing_file, const char *backing_fmt) 2503 { 2504 BlockDriver *drv = bs->drv; 2505 int ret; 2506 2507 /* Backing file format doesn't make sense without a backing file */ 2508 if (backing_fmt && !backing_file) { 2509 return -EINVAL; 2510 } 2511 2512 if (drv->bdrv_change_backing_file != NULL) { 2513 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 2514 } else { 2515 ret = -ENOTSUP; 2516 } 2517 2518 if (ret == 0) { 2519 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 2520 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 2521 } 2522 return ret; 2523 } 2524 2525 /* 2526 * Finds the image layer in the chain that has 'bs' as its backing file. 2527 * 2528 * active is the current topmost image. 2529 * 2530 * Returns NULL if bs is not found in active's image chain, 2531 * or if active == bs. 2532 * 2533 * Returns the bottommost base image if bs == NULL. 2534 */ 2535 BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 2536 BlockDriverState *bs) 2537 { 2538 while (active && bs != backing_bs(active)) { 2539 active = backing_bs(active); 2540 } 2541 2542 return active; 2543 } 2544 2545 /* Given a BDS, searches for the base layer. */ 2546 BlockDriverState *bdrv_find_base(BlockDriverState *bs) 2547 { 2548 return bdrv_find_overlay(bs, NULL); 2549 } 2550 2551 /* 2552 * Drops images above 'base' up to and including 'top', and sets the image 2553 * above 'top' to have base as its backing file. 2554 * 2555 * Requires that the overlay to 'top' is opened r/w, so that the backing file 2556 * information in 'bs' can be properly updated. 2557 * 2558 * E.g., this will convert the following chain: 2559 * bottom <- base <- intermediate <- top <- active 2560 * 2561 * to 2562 * 2563 * bottom <- base <- active 2564 * 2565 * It is allowed for bottom==base, in which case it converts: 2566 * 2567 * base <- intermediate <- top <- active 2568 * 2569 * to 2570 * 2571 * base <- active 2572 * 2573 * If backing_file_str is non-NULL, it will be used when modifying top's 2574 * overlay image metadata. 2575 * 2576 * Error conditions: 2577 * if active == top, that is considered an error 2578 * 2579 */ 2580 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, 2581 BlockDriverState *base, const char *backing_file_str) 2582 { 2583 BlockDriverState *new_top_bs = NULL; 2584 int ret = -EIO; 2585 2586 if (!top->drv || !base->drv) { 2587 goto exit; 2588 } 2589 2590 new_top_bs = bdrv_find_overlay(active, top); 2591 2592 if (new_top_bs == NULL) { 2593 /* we could not find the image above 'top', this is an error */ 2594 goto exit; 2595 } 2596 2597 /* special case of new_top_bs->backing->bs already pointing to base - nothing 2598 * to do, no intermediate images */ 2599 if (backing_bs(new_top_bs) == base) { 2600 ret = 0; 2601 goto exit; 2602 } 2603 2604 /* Make sure that base is in the backing chain of top */ 2605 if (!bdrv_chain_contains(top, base)) { 2606 goto exit; 2607 } 2608 2609 /* success - we can delete the intermediate states, and link top->base */ 2610 backing_file_str = backing_file_str ? backing_file_str : base->filename; 2611 ret = bdrv_change_backing_file(new_top_bs, backing_file_str, 2612 base->drv ? base->drv->format_name : ""); 2613 if (ret) { 2614 goto exit; 2615 } 2616 bdrv_set_backing_hd(new_top_bs, base); 2617 2618 ret = 0; 2619 exit: 2620 return ret; 2621 } 2622 2623 /** 2624 * Truncate file to 'offset' bytes (needed only for file protocols) 2625 */ 2626 int bdrv_truncate(BlockDriverState *bs, int64_t offset) 2627 { 2628 BlockDriver *drv = bs->drv; 2629 int ret; 2630 if (!drv) 2631 return -ENOMEDIUM; 2632 if (!drv->bdrv_truncate) 2633 return -ENOTSUP; 2634 if (bs->read_only) 2635 return -EACCES; 2636 2637 ret = drv->bdrv_truncate(bs, offset); 2638 if (ret == 0) { 2639 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 2640 bdrv_dirty_bitmap_truncate(bs); 2641 if (bs->blk) { 2642 blk_dev_resize_cb(bs->blk); 2643 } 2644 } 2645 return ret; 2646 } 2647 2648 /** 2649 * Length of a allocated file in bytes. Sparse files are counted by actual 2650 * allocated space. Return < 0 if error or unknown. 2651 */ 2652 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 2653 { 2654 BlockDriver *drv = bs->drv; 2655 if (!drv) { 2656 return -ENOMEDIUM; 2657 } 2658 if (drv->bdrv_get_allocated_file_size) { 2659 return drv->bdrv_get_allocated_file_size(bs); 2660 } 2661 if (bs->file) { 2662 return bdrv_get_allocated_file_size(bs->file->bs); 2663 } 2664 return -ENOTSUP; 2665 } 2666 2667 /** 2668 * Return number of sectors on success, -errno on error. 2669 */ 2670 int64_t bdrv_nb_sectors(BlockDriverState *bs) 2671 { 2672 BlockDriver *drv = bs->drv; 2673 2674 if (!drv) 2675 return -ENOMEDIUM; 2676 2677 if (drv->has_variable_length) { 2678 int ret = refresh_total_sectors(bs, bs->total_sectors); 2679 if (ret < 0) { 2680 return ret; 2681 } 2682 } 2683 return bs->total_sectors; 2684 } 2685 2686 /** 2687 * Return length in bytes on success, -errno on error. 2688 * The length is always a multiple of BDRV_SECTOR_SIZE. 2689 */ 2690 int64_t bdrv_getlength(BlockDriverState *bs) 2691 { 2692 int64_t ret = bdrv_nb_sectors(bs); 2693 2694 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret; 2695 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE; 2696 } 2697 2698 /* return 0 as number of sectors if no device present or error */ 2699 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 2700 { 2701 int64_t nb_sectors = bdrv_nb_sectors(bs); 2702 2703 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 2704 } 2705 2706 int bdrv_is_read_only(BlockDriverState *bs) 2707 { 2708 return bs->read_only; 2709 } 2710 2711 int bdrv_is_sg(BlockDriverState *bs) 2712 { 2713 return bs->sg; 2714 } 2715 2716 int bdrv_is_encrypted(BlockDriverState *bs) 2717 { 2718 if (bs->backing && bs->backing->bs->encrypted) { 2719 return 1; 2720 } 2721 return bs->encrypted; 2722 } 2723 2724 int bdrv_key_required(BlockDriverState *bs) 2725 { 2726 BdrvChild *backing = bs->backing; 2727 2728 if (backing && backing->bs->encrypted && !backing->bs->valid_key) { 2729 return 1; 2730 } 2731 return (bs->encrypted && !bs->valid_key); 2732 } 2733 2734 int bdrv_set_key(BlockDriverState *bs, const char *key) 2735 { 2736 int ret; 2737 if (bs->backing && bs->backing->bs->encrypted) { 2738 ret = bdrv_set_key(bs->backing->bs, key); 2739 if (ret < 0) 2740 return ret; 2741 if (!bs->encrypted) 2742 return 0; 2743 } 2744 if (!bs->encrypted) { 2745 return -EINVAL; 2746 } else if (!bs->drv || !bs->drv->bdrv_set_key) { 2747 return -ENOMEDIUM; 2748 } 2749 ret = bs->drv->bdrv_set_key(bs, key); 2750 if (ret < 0) { 2751 bs->valid_key = 0; 2752 } else if (!bs->valid_key) { 2753 bs->valid_key = 1; 2754 if (bs->blk) { 2755 /* call the change callback now, we skipped it on open */ 2756 blk_dev_change_media_cb(bs->blk, true); 2757 } 2758 } 2759 return ret; 2760 } 2761 2762 /* 2763 * Provide an encryption key for @bs. 2764 * If @key is non-null: 2765 * If @bs is not encrypted, fail. 2766 * Else if the key is invalid, fail. 2767 * Else set @bs's key to @key, replacing the existing key, if any. 2768 * If @key is null: 2769 * If @bs is encrypted and still lacks a key, fail. 2770 * Else do nothing. 2771 * On failure, store an error object through @errp if non-null. 2772 */ 2773 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp) 2774 { 2775 if (key) { 2776 if (!bdrv_is_encrypted(bs)) { 2777 error_setg(errp, "Node '%s' is not encrypted", 2778 bdrv_get_device_or_node_name(bs)); 2779 } else if (bdrv_set_key(bs, key) < 0) { 2780 error_setg(errp, QERR_INVALID_PASSWORD); 2781 } 2782 } else { 2783 if (bdrv_key_required(bs)) { 2784 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED, 2785 "'%s' (%s) is encrypted", 2786 bdrv_get_device_or_node_name(bs), 2787 bdrv_get_encrypted_filename(bs)); 2788 } 2789 } 2790 } 2791 2792 const char *bdrv_get_format_name(BlockDriverState *bs) 2793 { 2794 return bs->drv ? bs->drv->format_name : NULL; 2795 } 2796 2797 static int qsort_strcmp(const void *a, const void *b) 2798 { 2799 return strcmp(a, b); 2800 } 2801 2802 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 2803 void *opaque) 2804 { 2805 BlockDriver *drv; 2806 int count = 0; 2807 int i; 2808 const char **formats = NULL; 2809 2810 QLIST_FOREACH(drv, &bdrv_drivers, list) { 2811 if (drv->format_name) { 2812 bool found = false; 2813 int i = count; 2814 while (formats && i && !found) { 2815 found = !strcmp(formats[--i], drv->format_name); 2816 } 2817 2818 if (!found) { 2819 formats = g_renew(const char *, formats, count + 1); 2820 formats[count++] = drv->format_name; 2821 } 2822 } 2823 } 2824 2825 qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 2826 2827 for (i = 0; i < count; i++) { 2828 it(opaque, formats[i]); 2829 } 2830 2831 g_free(formats); 2832 } 2833 2834 /* This function is to find a node in the bs graph */ 2835 BlockDriverState *bdrv_find_node(const char *node_name) 2836 { 2837 BlockDriverState *bs; 2838 2839 assert(node_name); 2840 2841 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 2842 if (!strcmp(node_name, bs->node_name)) { 2843 return bs; 2844 } 2845 } 2846 return NULL; 2847 } 2848 2849 /* Put this QMP function here so it can access the static graph_bdrv_states. */ 2850 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp) 2851 { 2852 BlockDeviceInfoList *list, *entry; 2853 BlockDriverState *bs; 2854 2855 list = NULL; 2856 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 2857 BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, errp); 2858 if (!info) { 2859 qapi_free_BlockDeviceInfoList(list); 2860 return NULL; 2861 } 2862 entry = g_malloc0(sizeof(*entry)); 2863 entry->value = info; 2864 entry->next = list; 2865 list = entry; 2866 } 2867 2868 return list; 2869 } 2870 2871 BlockDriverState *bdrv_lookup_bs(const char *device, 2872 const char *node_name, 2873 Error **errp) 2874 { 2875 BlockBackend *blk; 2876 BlockDriverState *bs; 2877 2878 if (device) { 2879 blk = blk_by_name(device); 2880 2881 if (blk) { 2882 bs = blk_bs(blk); 2883 if (!bs) { 2884 error_setg(errp, "Device '%s' has no medium", device); 2885 } 2886 2887 return bs; 2888 } 2889 } 2890 2891 if (node_name) { 2892 bs = bdrv_find_node(node_name); 2893 2894 if (bs) { 2895 return bs; 2896 } 2897 } 2898 2899 error_setg(errp, "Cannot find device=%s nor node_name=%s", 2900 device ? device : "", 2901 node_name ? node_name : ""); 2902 return NULL; 2903 } 2904 2905 /* If 'base' is in the same chain as 'top', return true. Otherwise, 2906 * return false. If either argument is NULL, return false. */ 2907 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 2908 { 2909 while (top && top != base) { 2910 top = backing_bs(top); 2911 } 2912 2913 return top != NULL; 2914 } 2915 2916 BlockDriverState *bdrv_next_node(BlockDriverState *bs) 2917 { 2918 if (!bs) { 2919 return QTAILQ_FIRST(&graph_bdrv_states); 2920 } 2921 return QTAILQ_NEXT(bs, node_list); 2922 } 2923 2924 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by 2925 * the monitor or attached to a BlockBackend */ 2926 BlockDriverState *bdrv_next(BlockDriverState *bs) 2927 { 2928 if (!bs || bs->blk) { 2929 bs = blk_next_root_bs(bs); 2930 if (bs) { 2931 return bs; 2932 } 2933 } 2934 2935 /* Ignore all BDSs that are attached to a BlockBackend here; they have been 2936 * handled by the above block already */ 2937 do { 2938 bs = bdrv_next_monitor_owned(bs); 2939 } while (bs && bs->blk); 2940 return bs; 2941 } 2942 2943 const char *bdrv_get_node_name(const BlockDriverState *bs) 2944 { 2945 return bs->node_name; 2946 } 2947 2948 /* TODO check what callers really want: bs->node_name or blk_name() */ 2949 const char *bdrv_get_device_name(const BlockDriverState *bs) 2950 { 2951 return bs->blk ? blk_name(bs->blk) : ""; 2952 } 2953 2954 /* This can be used to identify nodes that might not have a device 2955 * name associated. Since node and device names live in the same 2956 * namespace, the result is unambiguous. The exception is if both are 2957 * absent, then this returns an empty (non-null) string. */ 2958 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 2959 { 2960 return bs->blk ? blk_name(bs->blk) : bs->node_name; 2961 } 2962 2963 int bdrv_get_flags(BlockDriverState *bs) 2964 { 2965 return bs->open_flags; 2966 } 2967 2968 int bdrv_has_zero_init_1(BlockDriverState *bs) 2969 { 2970 return 1; 2971 } 2972 2973 int bdrv_has_zero_init(BlockDriverState *bs) 2974 { 2975 assert(bs->drv); 2976 2977 /* If BS is a copy on write image, it is initialized to 2978 the contents of the base image, which may not be zeroes. */ 2979 if (bs->backing) { 2980 return 0; 2981 } 2982 if (bs->drv->bdrv_has_zero_init) { 2983 return bs->drv->bdrv_has_zero_init(bs); 2984 } 2985 2986 /* safe default */ 2987 return 0; 2988 } 2989 2990 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs) 2991 { 2992 BlockDriverInfo bdi; 2993 2994 if (bs->backing) { 2995 return false; 2996 } 2997 2998 if (bdrv_get_info(bs, &bdi) == 0) { 2999 return bdi.unallocated_blocks_are_zero; 3000 } 3001 3002 return false; 3003 } 3004 3005 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 3006 { 3007 BlockDriverInfo bdi; 3008 3009 if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) { 3010 return false; 3011 } 3012 3013 if (bdrv_get_info(bs, &bdi) == 0) { 3014 return bdi.can_write_zeroes_with_unmap; 3015 } 3016 3017 return false; 3018 } 3019 3020 const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 3021 { 3022 if (bs->backing && bs->backing->bs->encrypted) 3023 return bs->backing_file; 3024 else if (bs->encrypted) 3025 return bs->filename; 3026 else 3027 return NULL; 3028 } 3029 3030 void bdrv_get_backing_filename(BlockDriverState *bs, 3031 char *filename, int filename_size) 3032 { 3033 pstrcpy(filename, filename_size, bs->backing_file); 3034 } 3035 3036 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 3037 { 3038 BlockDriver *drv = bs->drv; 3039 if (!drv) 3040 return -ENOMEDIUM; 3041 if (!drv->bdrv_get_info) 3042 return -ENOTSUP; 3043 memset(bdi, 0, sizeof(*bdi)); 3044 return drv->bdrv_get_info(bs, bdi); 3045 } 3046 3047 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs) 3048 { 3049 BlockDriver *drv = bs->drv; 3050 if (drv && drv->bdrv_get_specific_info) { 3051 return drv->bdrv_get_specific_info(bs); 3052 } 3053 return NULL; 3054 } 3055 3056 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event) 3057 { 3058 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 3059 return; 3060 } 3061 3062 bs->drv->bdrv_debug_event(bs, event); 3063 } 3064 3065 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 3066 const char *tag) 3067 { 3068 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 3069 bs = bs->file ? bs->file->bs : NULL; 3070 } 3071 3072 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 3073 return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 3074 } 3075 3076 return -ENOTSUP; 3077 } 3078 3079 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 3080 { 3081 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) { 3082 bs = bs->file ? bs->file->bs : NULL; 3083 } 3084 3085 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) { 3086 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 3087 } 3088 3089 return -ENOTSUP; 3090 } 3091 3092 int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 3093 { 3094 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 3095 bs = bs->file ? bs->file->bs : NULL; 3096 } 3097 3098 if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 3099 return bs->drv->bdrv_debug_resume(bs, tag); 3100 } 3101 3102 return -ENOTSUP; 3103 } 3104 3105 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 3106 { 3107 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 3108 bs = bs->file ? bs->file->bs : NULL; 3109 } 3110 3111 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 3112 return bs->drv->bdrv_debug_is_suspended(bs, tag); 3113 } 3114 3115 return false; 3116 } 3117 3118 int bdrv_is_snapshot(BlockDriverState *bs) 3119 { 3120 return !!(bs->open_flags & BDRV_O_SNAPSHOT); 3121 } 3122 3123 /* backing_file can either be relative, or absolute, or a protocol. If it is 3124 * relative, it must be relative to the chain. So, passing in bs->filename 3125 * from a BDS as backing_file should not be done, as that may be relative to 3126 * the CWD rather than the chain. */ 3127 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 3128 const char *backing_file) 3129 { 3130 char *filename_full = NULL; 3131 char *backing_file_full = NULL; 3132 char *filename_tmp = NULL; 3133 int is_protocol = 0; 3134 BlockDriverState *curr_bs = NULL; 3135 BlockDriverState *retval = NULL; 3136 3137 if (!bs || !bs->drv || !backing_file) { 3138 return NULL; 3139 } 3140 3141 filename_full = g_malloc(PATH_MAX); 3142 backing_file_full = g_malloc(PATH_MAX); 3143 filename_tmp = g_malloc(PATH_MAX); 3144 3145 is_protocol = path_has_protocol(backing_file); 3146 3147 for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) { 3148 3149 /* If either of the filename paths is actually a protocol, then 3150 * compare unmodified paths; otherwise make paths relative */ 3151 if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 3152 if (strcmp(backing_file, curr_bs->backing_file) == 0) { 3153 retval = curr_bs->backing->bs; 3154 break; 3155 } 3156 } else { 3157 /* If not an absolute filename path, make it relative to the current 3158 * image's filename path */ 3159 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 3160 backing_file); 3161 3162 /* We are going to compare absolute pathnames */ 3163 if (!realpath(filename_tmp, filename_full)) { 3164 continue; 3165 } 3166 3167 /* We need to make sure the backing filename we are comparing against 3168 * is relative to the current image filename (or absolute) */ 3169 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 3170 curr_bs->backing_file); 3171 3172 if (!realpath(filename_tmp, backing_file_full)) { 3173 continue; 3174 } 3175 3176 if (strcmp(backing_file_full, filename_full) == 0) { 3177 retval = curr_bs->backing->bs; 3178 break; 3179 } 3180 } 3181 } 3182 3183 g_free(filename_full); 3184 g_free(backing_file_full); 3185 g_free(filename_tmp); 3186 return retval; 3187 } 3188 3189 int bdrv_get_backing_file_depth(BlockDriverState *bs) 3190 { 3191 if (!bs->drv) { 3192 return 0; 3193 } 3194 3195 if (!bs->backing) { 3196 return 0; 3197 } 3198 3199 return 1 + bdrv_get_backing_file_depth(bs->backing->bs); 3200 } 3201 3202 void bdrv_init(void) 3203 { 3204 module_call_init(MODULE_INIT_BLOCK); 3205 } 3206 3207 void bdrv_init_with_whitelist(void) 3208 { 3209 use_bdrv_whitelist = 1; 3210 bdrv_init(); 3211 } 3212 3213 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) 3214 { 3215 BdrvChild *child; 3216 Error *local_err = NULL; 3217 int ret; 3218 3219 if (!bs->drv) { 3220 return; 3221 } 3222 3223 if (!(bs->open_flags & BDRV_O_INACTIVE)) { 3224 return; 3225 } 3226 bs->open_flags &= ~BDRV_O_INACTIVE; 3227 3228 if (bs->drv->bdrv_invalidate_cache) { 3229 bs->drv->bdrv_invalidate_cache(bs, &local_err); 3230 if (local_err) { 3231 bs->open_flags |= BDRV_O_INACTIVE; 3232 error_propagate(errp, local_err); 3233 return; 3234 } 3235 } 3236 3237 QLIST_FOREACH(child, &bs->children, next) { 3238 bdrv_invalidate_cache(child->bs, &local_err); 3239 if (local_err) { 3240 bs->open_flags |= BDRV_O_INACTIVE; 3241 error_propagate(errp, local_err); 3242 return; 3243 } 3244 } 3245 3246 ret = refresh_total_sectors(bs, bs->total_sectors); 3247 if (ret < 0) { 3248 bs->open_flags |= BDRV_O_INACTIVE; 3249 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 3250 return; 3251 } 3252 } 3253 3254 void bdrv_invalidate_cache_all(Error **errp) 3255 { 3256 BlockDriverState *bs = NULL; 3257 Error *local_err = NULL; 3258 3259 while ((bs = bdrv_next(bs)) != NULL) { 3260 AioContext *aio_context = bdrv_get_aio_context(bs); 3261 3262 aio_context_acquire(aio_context); 3263 bdrv_invalidate_cache(bs, &local_err); 3264 aio_context_release(aio_context); 3265 if (local_err) { 3266 error_propagate(errp, local_err); 3267 return; 3268 } 3269 } 3270 } 3271 3272 static int bdrv_inactivate_recurse(BlockDriverState *bs, 3273 bool setting_flag) 3274 { 3275 BdrvChild *child; 3276 int ret; 3277 3278 if (!setting_flag && bs->drv->bdrv_inactivate) { 3279 ret = bs->drv->bdrv_inactivate(bs); 3280 if (ret < 0) { 3281 return ret; 3282 } 3283 } 3284 3285 QLIST_FOREACH(child, &bs->children, next) { 3286 ret = bdrv_inactivate_recurse(child->bs, setting_flag); 3287 if (ret < 0) { 3288 return ret; 3289 } 3290 } 3291 3292 if (setting_flag) { 3293 bs->open_flags |= BDRV_O_INACTIVE; 3294 } 3295 return 0; 3296 } 3297 3298 int bdrv_inactivate_all(void) 3299 { 3300 BlockDriverState *bs = NULL; 3301 int ret = 0; 3302 int pass; 3303 3304 while ((bs = bdrv_next(bs)) != NULL) { 3305 aio_context_acquire(bdrv_get_aio_context(bs)); 3306 } 3307 3308 /* We do two passes of inactivation. The first pass calls to drivers' 3309 * .bdrv_inactivate callbacks recursively so all cache is flushed to disk; 3310 * the second pass sets the BDRV_O_INACTIVE flag so that no further write 3311 * is allowed. */ 3312 for (pass = 0; pass < 2; pass++) { 3313 bs = NULL; 3314 while ((bs = bdrv_next(bs)) != NULL) { 3315 ret = bdrv_inactivate_recurse(bs, pass); 3316 if (ret < 0) { 3317 goto out; 3318 } 3319 } 3320 } 3321 3322 out: 3323 bs = NULL; 3324 while ((bs = bdrv_next(bs)) != NULL) { 3325 aio_context_release(bdrv_get_aio_context(bs)); 3326 } 3327 3328 return ret; 3329 } 3330 3331 /**************************************************************/ 3332 /* removable device support */ 3333 3334 /** 3335 * Return TRUE if the media is present 3336 */ 3337 bool bdrv_is_inserted(BlockDriverState *bs) 3338 { 3339 BlockDriver *drv = bs->drv; 3340 BdrvChild *child; 3341 3342 if (!drv) { 3343 return false; 3344 } 3345 if (drv->bdrv_is_inserted) { 3346 return drv->bdrv_is_inserted(bs); 3347 } 3348 QLIST_FOREACH(child, &bs->children, next) { 3349 if (!bdrv_is_inserted(child->bs)) { 3350 return false; 3351 } 3352 } 3353 return true; 3354 } 3355 3356 /** 3357 * Return whether the media changed since the last call to this 3358 * function, or -ENOTSUP if we don't know. Most drivers don't know. 3359 */ 3360 int bdrv_media_changed(BlockDriverState *bs) 3361 { 3362 BlockDriver *drv = bs->drv; 3363 3364 if (drv && drv->bdrv_media_changed) { 3365 return drv->bdrv_media_changed(bs); 3366 } 3367 return -ENOTSUP; 3368 } 3369 3370 /** 3371 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 3372 */ 3373 void bdrv_eject(BlockDriverState *bs, bool eject_flag) 3374 { 3375 BlockDriver *drv = bs->drv; 3376 const char *device_name; 3377 3378 if (drv && drv->bdrv_eject) { 3379 drv->bdrv_eject(bs, eject_flag); 3380 } 3381 3382 device_name = bdrv_get_device_name(bs); 3383 if (device_name[0] != '\0') { 3384 qapi_event_send_device_tray_moved(device_name, 3385 eject_flag, &error_abort); 3386 } 3387 } 3388 3389 /** 3390 * Lock or unlock the media (if it is locked, the user won't be able 3391 * to eject it manually). 3392 */ 3393 void bdrv_lock_medium(BlockDriverState *bs, bool locked) 3394 { 3395 BlockDriver *drv = bs->drv; 3396 3397 trace_bdrv_lock_medium(bs, locked); 3398 3399 if (drv && drv->bdrv_lock_medium) { 3400 drv->bdrv_lock_medium(bs, locked); 3401 } 3402 } 3403 3404 /* Get a reference to bs */ 3405 void bdrv_ref(BlockDriverState *bs) 3406 { 3407 bs->refcnt++; 3408 } 3409 3410 /* Release a previously grabbed reference to bs. 3411 * If after releasing, reference count is zero, the BlockDriverState is 3412 * deleted. */ 3413 void bdrv_unref(BlockDriverState *bs) 3414 { 3415 if (!bs) { 3416 return; 3417 } 3418 assert(bs->refcnt > 0); 3419 if (--bs->refcnt == 0) { 3420 bdrv_delete(bs); 3421 } 3422 } 3423 3424 struct BdrvOpBlocker { 3425 Error *reason; 3426 QLIST_ENTRY(BdrvOpBlocker) list; 3427 }; 3428 3429 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 3430 { 3431 BdrvOpBlocker *blocker; 3432 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3433 if (!QLIST_EMPTY(&bs->op_blockers[op])) { 3434 blocker = QLIST_FIRST(&bs->op_blockers[op]); 3435 if (errp) { 3436 *errp = error_copy(blocker->reason); 3437 error_prepend(errp, "Node '%s' is busy: ", 3438 bdrv_get_device_or_node_name(bs)); 3439 } 3440 return true; 3441 } 3442 return false; 3443 } 3444 3445 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 3446 { 3447 BdrvOpBlocker *blocker; 3448 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3449 3450 blocker = g_new0(BdrvOpBlocker, 1); 3451 blocker->reason = reason; 3452 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 3453 } 3454 3455 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 3456 { 3457 BdrvOpBlocker *blocker, *next; 3458 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3459 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 3460 if (blocker->reason == reason) { 3461 QLIST_REMOVE(blocker, list); 3462 g_free(blocker); 3463 } 3464 } 3465 } 3466 3467 void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 3468 { 3469 int i; 3470 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3471 bdrv_op_block(bs, i, reason); 3472 } 3473 } 3474 3475 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 3476 { 3477 int i; 3478 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3479 bdrv_op_unblock(bs, i, reason); 3480 } 3481 } 3482 3483 bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 3484 { 3485 int i; 3486 3487 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3488 if (!QLIST_EMPTY(&bs->op_blockers[i])) { 3489 return false; 3490 } 3491 } 3492 return true; 3493 } 3494 3495 void bdrv_img_create(const char *filename, const char *fmt, 3496 const char *base_filename, const char *base_fmt, 3497 char *options, uint64_t img_size, int flags, 3498 Error **errp, bool quiet) 3499 { 3500 QemuOptsList *create_opts = NULL; 3501 QemuOpts *opts = NULL; 3502 const char *backing_fmt, *backing_file; 3503 int64_t size; 3504 BlockDriver *drv, *proto_drv; 3505 Error *local_err = NULL; 3506 int ret = 0; 3507 3508 /* Find driver and parse its options */ 3509 drv = bdrv_find_format(fmt); 3510 if (!drv) { 3511 error_setg(errp, "Unknown file format '%s'", fmt); 3512 return; 3513 } 3514 3515 proto_drv = bdrv_find_protocol(filename, true, errp); 3516 if (!proto_drv) { 3517 return; 3518 } 3519 3520 if (!drv->create_opts) { 3521 error_setg(errp, "Format driver '%s' does not support image creation", 3522 drv->format_name); 3523 return; 3524 } 3525 3526 if (!proto_drv->create_opts) { 3527 error_setg(errp, "Protocol driver '%s' does not support image creation", 3528 proto_drv->format_name); 3529 return; 3530 } 3531 3532 create_opts = qemu_opts_append(create_opts, drv->create_opts); 3533 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 3534 3535 /* Create parameter list with default values */ 3536 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 3537 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 3538 3539 /* Parse -o options */ 3540 if (options) { 3541 qemu_opts_do_parse(opts, options, NULL, &local_err); 3542 if (local_err) { 3543 error_report_err(local_err); 3544 local_err = NULL; 3545 error_setg(errp, "Invalid options for file format '%s'", fmt); 3546 goto out; 3547 } 3548 } 3549 3550 if (base_filename) { 3551 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err); 3552 if (local_err) { 3553 error_setg(errp, "Backing file not supported for file format '%s'", 3554 fmt); 3555 goto out; 3556 } 3557 } 3558 3559 if (base_fmt) { 3560 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err); 3561 if (local_err) { 3562 error_setg(errp, "Backing file format not supported for file " 3563 "format '%s'", fmt); 3564 goto out; 3565 } 3566 } 3567 3568 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 3569 if (backing_file) { 3570 if (!strcmp(filename, backing_file)) { 3571 error_setg(errp, "Error: Trying to create an image with the " 3572 "same filename as the backing file"); 3573 goto out; 3574 } 3575 } 3576 3577 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 3578 3579 // The size for the image must always be specified, with one exception: 3580 // If we are using a backing file, we can obtain the size from there 3581 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 3582 if (size == -1) { 3583 if (backing_file) { 3584 BlockDriverState *bs; 3585 char *full_backing = g_new0(char, PATH_MAX); 3586 int64_t size; 3587 int back_flags; 3588 QDict *backing_options = NULL; 3589 3590 bdrv_get_full_backing_filename_from_filename(filename, backing_file, 3591 full_backing, PATH_MAX, 3592 &local_err); 3593 if (local_err) { 3594 g_free(full_backing); 3595 goto out; 3596 } 3597 3598 /* backing files always opened read-only */ 3599 back_flags = flags; 3600 back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 3601 3602 if (backing_fmt) { 3603 backing_options = qdict_new(); 3604 qdict_put(backing_options, "driver", 3605 qstring_from_str(backing_fmt)); 3606 } 3607 3608 bs = NULL; 3609 ret = bdrv_open(&bs, full_backing, NULL, backing_options, 3610 back_flags, &local_err); 3611 g_free(full_backing); 3612 if (ret < 0) { 3613 goto out; 3614 } 3615 size = bdrv_getlength(bs); 3616 if (size < 0) { 3617 error_setg_errno(errp, -size, "Could not get size of '%s'", 3618 backing_file); 3619 bdrv_unref(bs); 3620 goto out; 3621 } 3622 3623 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 3624 3625 bdrv_unref(bs); 3626 } else { 3627 error_setg(errp, "Image creation needs a size parameter"); 3628 goto out; 3629 } 3630 } 3631 3632 if (!quiet) { 3633 printf("Formatting '%s', fmt=%s ", filename, fmt); 3634 qemu_opts_print(opts, " "); 3635 puts(""); 3636 } 3637 3638 ret = bdrv_create(drv, filename, opts, &local_err); 3639 3640 if (ret == -EFBIG) { 3641 /* This is generally a better message than whatever the driver would 3642 * deliver (especially because of the cluster_size_hint), since that 3643 * is most probably not much different from "image too large". */ 3644 const char *cluster_size_hint = ""; 3645 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 3646 cluster_size_hint = " (try using a larger cluster size)"; 3647 } 3648 error_setg(errp, "The image size is too large for file format '%s'" 3649 "%s", fmt, cluster_size_hint); 3650 error_free(local_err); 3651 local_err = NULL; 3652 } 3653 3654 out: 3655 qemu_opts_del(opts); 3656 qemu_opts_free(create_opts); 3657 if (local_err) { 3658 error_propagate(errp, local_err); 3659 } 3660 } 3661 3662 AioContext *bdrv_get_aio_context(BlockDriverState *bs) 3663 { 3664 return bs->aio_context; 3665 } 3666 3667 void bdrv_detach_aio_context(BlockDriverState *bs) 3668 { 3669 BdrvAioNotifier *baf; 3670 3671 if (!bs->drv) { 3672 return; 3673 } 3674 3675 QLIST_FOREACH(baf, &bs->aio_notifiers, list) { 3676 baf->detach_aio_context(baf->opaque); 3677 } 3678 3679 if (bs->throttle_state) { 3680 throttle_timers_detach_aio_context(&bs->throttle_timers); 3681 } 3682 if (bs->drv->bdrv_detach_aio_context) { 3683 bs->drv->bdrv_detach_aio_context(bs); 3684 } 3685 if (bs->file) { 3686 bdrv_detach_aio_context(bs->file->bs); 3687 } 3688 if (bs->backing) { 3689 bdrv_detach_aio_context(bs->backing->bs); 3690 } 3691 3692 bs->aio_context = NULL; 3693 } 3694 3695 void bdrv_attach_aio_context(BlockDriverState *bs, 3696 AioContext *new_context) 3697 { 3698 BdrvAioNotifier *ban; 3699 3700 if (!bs->drv) { 3701 return; 3702 } 3703 3704 bs->aio_context = new_context; 3705 3706 if (bs->backing) { 3707 bdrv_attach_aio_context(bs->backing->bs, new_context); 3708 } 3709 if (bs->file) { 3710 bdrv_attach_aio_context(bs->file->bs, new_context); 3711 } 3712 if (bs->drv->bdrv_attach_aio_context) { 3713 bs->drv->bdrv_attach_aio_context(bs, new_context); 3714 } 3715 if (bs->throttle_state) { 3716 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context); 3717 } 3718 3719 QLIST_FOREACH(ban, &bs->aio_notifiers, list) { 3720 ban->attached_aio_context(new_context, ban->opaque); 3721 } 3722 } 3723 3724 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) 3725 { 3726 bdrv_drain(bs); /* ensure there are no in-flight requests */ 3727 3728 bdrv_detach_aio_context(bs); 3729 3730 /* This function executes in the old AioContext so acquire the new one in 3731 * case it runs in a different thread. 3732 */ 3733 aio_context_acquire(new_context); 3734 bdrv_attach_aio_context(bs, new_context); 3735 aio_context_release(new_context); 3736 } 3737 3738 void bdrv_add_aio_context_notifier(BlockDriverState *bs, 3739 void (*attached_aio_context)(AioContext *new_context, void *opaque), 3740 void (*detach_aio_context)(void *opaque), void *opaque) 3741 { 3742 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 3743 *ban = (BdrvAioNotifier){ 3744 .attached_aio_context = attached_aio_context, 3745 .detach_aio_context = detach_aio_context, 3746 .opaque = opaque 3747 }; 3748 3749 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 3750 } 3751 3752 void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 3753 void (*attached_aio_context)(AioContext *, 3754 void *), 3755 void (*detach_aio_context)(void *), 3756 void *opaque) 3757 { 3758 BdrvAioNotifier *ban, *ban_next; 3759 3760 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 3761 if (ban->attached_aio_context == attached_aio_context && 3762 ban->detach_aio_context == detach_aio_context && 3763 ban->opaque == opaque) 3764 { 3765 QLIST_REMOVE(ban, list); 3766 g_free(ban); 3767 3768 return; 3769 } 3770 } 3771 3772 abort(); 3773 } 3774 3775 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 3776 BlockDriverAmendStatusCB *status_cb, void *cb_opaque) 3777 { 3778 if (!bs->drv->bdrv_amend_options) { 3779 return -ENOTSUP; 3780 } 3781 return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque); 3782 } 3783 3784 /* This function will be called by the bdrv_recurse_is_first_non_filter method 3785 * of block filter and by bdrv_is_first_non_filter. 3786 * It is used to test if the given bs is the candidate or recurse more in the 3787 * node graph. 3788 */ 3789 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, 3790 BlockDriverState *candidate) 3791 { 3792 /* return false if basic checks fails */ 3793 if (!bs || !bs->drv) { 3794 return false; 3795 } 3796 3797 /* the code reached a non block filter driver -> check if the bs is 3798 * the same as the candidate. It's the recursion termination condition. 3799 */ 3800 if (!bs->drv->is_filter) { 3801 return bs == candidate; 3802 } 3803 /* Down this path the driver is a block filter driver */ 3804 3805 /* If the block filter recursion method is defined use it to recurse down 3806 * the node graph. 3807 */ 3808 if (bs->drv->bdrv_recurse_is_first_non_filter) { 3809 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); 3810 } 3811 3812 /* the driver is a block filter but don't allow to recurse -> return false 3813 */ 3814 return false; 3815 } 3816 3817 /* This function checks if the candidate is the first non filter bs down it's 3818 * bs chain. Since we don't have pointers to parents it explore all bs chains 3819 * from the top. Some filters can choose not to pass down the recursion. 3820 */ 3821 bool bdrv_is_first_non_filter(BlockDriverState *candidate) 3822 { 3823 BlockDriverState *bs = NULL; 3824 3825 /* walk down the bs forest recursively */ 3826 while ((bs = bdrv_next(bs)) != NULL) { 3827 bool perm; 3828 3829 /* try to recurse in this top level bs */ 3830 perm = bdrv_recurse_is_first_non_filter(bs, candidate); 3831 3832 /* candidate is the first non filter */ 3833 if (perm) { 3834 return true; 3835 } 3836 } 3837 3838 return false; 3839 } 3840 3841 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, 3842 const char *node_name, Error **errp) 3843 { 3844 BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 3845 AioContext *aio_context; 3846 3847 if (!to_replace_bs) { 3848 error_setg(errp, "Node name '%s' not found", node_name); 3849 return NULL; 3850 } 3851 3852 aio_context = bdrv_get_aio_context(to_replace_bs); 3853 aio_context_acquire(aio_context); 3854 3855 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 3856 to_replace_bs = NULL; 3857 goto out; 3858 } 3859 3860 /* We don't want arbitrary node of the BDS chain to be replaced only the top 3861 * most non filter in order to prevent data corruption. 3862 * Another benefit is that this tests exclude backing files which are 3863 * blocked by the backing blockers. 3864 */ 3865 if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) { 3866 error_setg(errp, "Only top most non filter can be replaced"); 3867 to_replace_bs = NULL; 3868 goto out; 3869 } 3870 3871 out: 3872 aio_context_release(aio_context); 3873 return to_replace_bs; 3874 } 3875 3876 static bool append_open_options(QDict *d, BlockDriverState *bs) 3877 { 3878 const QDictEntry *entry; 3879 QemuOptDesc *desc; 3880 BdrvChild *child; 3881 bool found_any = false; 3882 const char *p; 3883 3884 for (entry = qdict_first(bs->options); entry; 3885 entry = qdict_next(bs->options, entry)) 3886 { 3887 /* Exclude options for children */ 3888 QLIST_FOREACH(child, &bs->children, next) { 3889 if (strstart(qdict_entry_key(entry), child->name, &p) 3890 && (!*p || *p == '.')) 3891 { 3892 break; 3893 } 3894 } 3895 if (child) { 3896 continue; 3897 } 3898 3899 /* And exclude all non-driver-specific options */ 3900 for (desc = bdrv_runtime_opts.desc; desc->name; desc++) { 3901 if (!strcmp(qdict_entry_key(entry), desc->name)) { 3902 break; 3903 } 3904 } 3905 if (desc->name) { 3906 continue; 3907 } 3908 3909 qobject_incref(qdict_entry_value(entry)); 3910 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry)); 3911 found_any = true; 3912 } 3913 3914 return found_any; 3915 } 3916 3917 /* Updates the following BDS fields: 3918 * - exact_filename: A filename which may be used for opening a block device 3919 * which (mostly) equals the given BDS (even without any 3920 * other options; so reading and writing must return the same 3921 * results, but caching etc. may be different) 3922 * - full_open_options: Options which, when given when opening a block device 3923 * (without a filename), result in a BDS (mostly) 3924 * equalling the given one 3925 * - filename: If exact_filename is set, it is copied here. Otherwise, 3926 * full_open_options is converted to a JSON object, prefixed with 3927 * "json:" (for use through the JSON pseudo protocol) and put here. 3928 */ 3929 void bdrv_refresh_filename(BlockDriverState *bs) 3930 { 3931 BlockDriver *drv = bs->drv; 3932 QDict *opts; 3933 3934 if (!drv) { 3935 return; 3936 } 3937 3938 /* This BDS's file name will most probably depend on its file's name, so 3939 * refresh that first */ 3940 if (bs->file) { 3941 bdrv_refresh_filename(bs->file->bs); 3942 } 3943 3944 if (drv->bdrv_refresh_filename) { 3945 /* Obsolete information is of no use here, so drop the old file name 3946 * information before refreshing it */ 3947 bs->exact_filename[0] = '\0'; 3948 if (bs->full_open_options) { 3949 QDECREF(bs->full_open_options); 3950 bs->full_open_options = NULL; 3951 } 3952 3953 opts = qdict_new(); 3954 append_open_options(opts, bs); 3955 drv->bdrv_refresh_filename(bs, opts); 3956 QDECREF(opts); 3957 } else if (bs->file) { 3958 /* Try to reconstruct valid information from the underlying file */ 3959 bool has_open_options; 3960 3961 bs->exact_filename[0] = '\0'; 3962 if (bs->full_open_options) { 3963 QDECREF(bs->full_open_options); 3964 bs->full_open_options = NULL; 3965 } 3966 3967 opts = qdict_new(); 3968 has_open_options = append_open_options(opts, bs); 3969 3970 /* If no specific options have been given for this BDS, the filename of 3971 * the underlying file should suffice for this one as well */ 3972 if (bs->file->bs->exact_filename[0] && !has_open_options) { 3973 strcpy(bs->exact_filename, bs->file->bs->exact_filename); 3974 } 3975 /* Reconstructing the full options QDict is simple for most format block 3976 * drivers, as long as the full options are known for the underlying 3977 * file BDS. The full options QDict of that file BDS should somehow 3978 * contain a representation of the filename, therefore the following 3979 * suffices without querying the (exact_)filename of this BDS. */ 3980 if (bs->file->bs->full_open_options) { 3981 qdict_put_obj(opts, "driver", 3982 QOBJECT(qstring_from_str(drv->format_name))); 3983 QINCREF(bs->file->bs->full_open_options); 3984 qdict_put_obj(opts, "file", 3985 QOBJECT(bs->file->bs->full_open_options)); 3986 3987 bs->full_open_options = opts; 3988 } else { 3989 QDECREF(opts); 3990 } 3991 } else if (!bs->full_open_options && qdict_size(bs->options)) { 3992 /* There is no underlying file BDS (at least referenced by BDS.file), 3993 * so the full options QDict should be equal to the options given 3994 * specifically for this block device when it was opened (plus the 3995 * driver specification). 3996 * Because those options don't change, there is no need to update 3997 * full_open_options when it's already set. */ 3998 3999 opts = qdict_new(); 4000 append_open_options(opts, bs); 4001 qdict_put_obj(opts, "driver", 4002 QOBJECT(qstring_from_str(drv->format_name))); 4003 4004 if (bs->exact_filename[0]) { 4005 /* This may not work for all block protocol drivers (some may 4006 * require this filename to be parsed), but we have to find some 4007 * default solution here, so just include it. If some block driver 4008 * does not support pure options without any filename at all or 4009 * needs some special format of the options QDict, it needs to 4010 * implement the driver-specific bdrv_refresh_filename() function. 4011 */ 4012 qdict_put_obj(opts, "filename", 4013 QOBJECT(qstring_from_str(bs->exact_filename))); 4014 } 4015 4016 bs->full_open_options = opts; 4017 } 4018 4019 if (bs->exact_filename[0]) { 4020 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 4021 } else if (bs->full_open_options) { 4022 QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 4023 snprintf(bs->filename, sizeof(bs->filename), "json:%s", 4024 qstring_get_str(json)); 4025 QDECREF(json); 4026 } 4027 } 4028 4029 /* 4030 * Hot add/remove a BDS's child. So the user can take a child offline when 4031 * it is broken and take a new child online 4032 */ 4033 void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, 4034 Error **errp) 4035 { 4036 4037 if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) { 4038 error_setg(errp, "The node %s does not support adding a child", 4039 bdrv_get_device_or_node_name(parent_bs)); 4040 return; 4041 } 4042 4043 if (!QLIST_EMPTY(&child_bs->parents)) { 4044 error_setg(errp, "The node %s already has a parent", 4045 child_bs->node_name); 4046 return; 4047 } 4048 4049 parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp); 4050 } 4051 4052 void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp) 4053 { 4054 BdrvChild *tmp; 4055 4056 if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) { 4057 error_setg(errp, "The node %s does not support removing a child", 4058 bdrv_get_device_or_node_name(parent_bs)); 4059 return; 4060 } 4061 4062 QLIST_FOREACH(tmp, &parent_bs->children, next) { 4063 if (tmp == child) { 4064 break; 4065 } 4066 } 4067 4068 if (!tmp) { 4069 error_setg(errp, "The node %s does not have a child named %s", 4070 bdrv_get_device_or_node_name(parent_bs), 4071 bdrv_get_device_or_node_name(child->bs)); 4072 return; 4073 } 4074 4075 parent_bs->drv->bdrv_del_child(parent_bs, child, errp); 4076 } 4077