1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "config-host.h" 25 #include "qemu-common.h" 26 #include "trace.h" 27 #include "block/block_int.h" 28 #include "block/blockjob.h" 29 #include "qemu/error-report.h" 30 #include "qemu/module.h" 31 #include "qapi/qmp/qerror.h" 32 #include "qapi/qmp/qjson.h" 33 #include "sysemu/block-backend.h" 34 #include "sysemu/sysemu.h" 35 #include "qemu/notify.h" 36 #include "block/coroutine.h" 37 #include "block/qapi.h" 38 #include "qmp-commands.h" 39 #include "qemu/timer.h" 40 #include "qapi-event.h" 41 #include "block/throttle-groups.h" 42 43 #ifdef CONFIG_BSD 44 #include <sys/types.h> 45 #include <sys/stat.h> 46 #include <sys/ioctl.h> 47 #include <sys/queue.h> 48 #ifndef __DragonFly__ 49 #include <sys/disk.h> 50 #endif 51 #endif 52 53 #ifdef _WIN32 54 #include <windows.h> 55 #endif 56 57 /** 58 * A BdrvDirtyBitmap can be in three possible states: 59 * (1) successor is NULL and disabled is false: full r/w mode 60 * (2) successor is NULL and disabled is true: read only mode ("disabled") 61 * (3) successor is set: frozen mode. 62 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set, 63 * or enabled. A frozen bitmap can only abdicate() or reclaim(). 64 */ 65 struct BdrvDirtyBitmap { 66 HBitmap *bitmap; /* Dirty sector bitmap implementation */ 67 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ 68 char *name; /* Optional non-empty unique ID */ 69 int64_t size; /* Size of the bitmap (Number of sectors) */ 70 bool disabled; /* Bitmap is read-only */ 71 QLIST_ENTRY(BdrvDirtyBitmap) list; 72 }; 73 74 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 75 76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states = 77 QTAILQ_HEAD_INITIALIZER(bdrv_states); 78 79 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 80 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 81 82 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 83 QLIST_HEAD_INITIALIZER(bdrv_drivers); 84 85 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, 86 const char *reference, QDict *options, int flags, 87 BlockDriverState *parent, 88 const BdrvChildRole *child_role, 89 BlockDriver *drv, Error **errp); 90 91 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); 92 /* If non-zero, use only whitelisted block drivers */ 93 static int use_bdrv_whitelist; 94 95 #ifdef _WIN32 96 static int is_windows_drive_prefix(const char *filename) 97 { 98 return (((filename[0] >= 'a' && filename[0] <= 'z') || 99 (filename[0] >= 'A' && filename[0] <= 'Z')) && 100 filename[1] == ':'); 101 } 102 103 int is_windows_drive(const char *filename) 104 { 105 if (is_windows_drive_prefix(filename) && 106 filename[2] == '\0') 107 return 1; 108 if (strstart(filename, "\\\\.\\", NULL) || 109 strstart(filename, "//./", NULL)) 110 return 1; 111 return 0; 112 } 113 #endif 114 115 size_t bdrv_opt_mem_align(BlockDriverState *bs) 116 { 117 if (!bs || !bs->drv) { 118 /* page size or 4k (hdd sector size) should be on the safe side */ 119 return MAX(4096, getpagesize()); 120 } 121 122 return bs->bl.opt_mem_alignment; 123 } 124 125 size_t bdrv_min_mem_align(BlockDriverState *bs) 126 { 127 if (!bs || !bs->drv) { 128 /* page size or 4k (hdd sector size) should be on the safe side */ 129 return MAX(4096, getpagesize()); 130 } 131 132 return bs->bl.min_mem_alignment; 133 } 134 135 /* check if the path starts with "<protocol>:" */ 136 int path_has_protocol(const char *path) 137 { 138 const char *p; 139 140 #ifdef _WIN32 141 if (is_windows_drive(path) || 142 is_windows_drive_prefix(path)) { 143 return 0; 144 } 145 p = path + strcspn(path, ":/\\"); 146 #else 147 p = path + strcspn(path, ":/"); 148 #endif 149 150 return *p == ':'; 151 } 152 153 int path_is_absolute(const char *path) 154 { 155 #ifdef _WIN32 156 /* specific case for names like: "\\.\d:" */ 157 if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 158 return 1; 159 } 160 return (*path == '/' || *path == '\\'); 161 #else 162 return (*path == '/'); 163 #endif 164 } 165 166 /* if filename is absolute, just copy it to dest. Otherwise, build a 167 path to it by considering it is relative to base_path. URL are 168 supported. */ 169 void path_combine(char *dest, int dest_size, 170 const char *base_path, 171 const char *filename) 172 { 173 const char *p, *p1; 174 int len; 175 176 if (dest_size <= 0) 177 return; 178 if (path_is_absolute(filename)) { 179 pstrcpy(dest, dest_size, filename); 180 } else { 181 p = strchr(base_path, ':'); 182 if (p) 183 p++; 184 else 185 p = base_path; 186 p1 = strrchr(base_path, '/'); 187 #ifdef _WIN32 188 { 189 const char *p2; 190 p2 = strrchr(base_path, '\\'); 191 if (!p1 || p2 > p1) 192 p1 = p2; 193 } 194 #endif 195 if (p1) 196 p1++; 197 else 198 p1 = base_path; 199 if (p1 > p) 200 p = p1; 201 len = p - base_path; 202 if (len > dest_size - 1) 203 len = dest_size - 1; 204 memcpy(dest, base_path, len); 205 dest[len] = '\0'; 206 pstrcat(dest, dest_size, filename); 207 } 208 } 209 210 void bdrv_get_full_backing_filename_from_filename(const char *backed, 211 const char *backing, 212 char *dest, size_t sz, 213 Error **errp) 214 { 215 if (backing[0] == '\0' || path_has_protocol(backing) || 216 path_is_absolute(backing)) 217 { 218 pstrcpy(dest, sz, backing); 219 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 220 error_setg(errp, "Cannot use relative backing file names for '%s'", 221 backed); 222 } else { 223 path_combine(dest, sz, backed, backing); 224 } 225 } 226 227 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz, 228 Error **errp) 229 { 230 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename; 231 232 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file, 233 dest, sz, errp); 234 } 235 236 void bdrv_register(BlockDriver *bdrv) 237 { 238 bdrv_setup_io_funcs(bdrv); 239 240 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 241 } 242 243 BlockDriverState *bdrv_new_root(void) 244 { 245 BlockDriverState *bs = bdrv_new(); 246 247 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list); 248 return bs; 249 } 250 251 BlockDriverState *bdrv_new(void) 252 { 253 BlockDriverState *bs; 254 int i; 255 256 bs = g_new0(BlockDriverState, 1); 257 QLIST_INIT(&bs->dirty_bitmaps); 258 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 259 QLIST_INIT(&bs->op_blockers[i]); 260 } 261 bdrv_iostatus_disable(bs); 262 notifier_list_init(&bs->close_notifiers); 263 notifier_with_return_list_init(&bs->before_write_notifiers); 264 qemu_co_queue_init(&bs->throttled_reqs[0]); 265 qemu_co_queue_init(&bs->throttled_reqs[1]); 266 bs->refcnt = 1; 267 bs->aio_context = qemu_get_aio_context(); 268 269 return bs; 270 } 271 272 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify) 273 { 274 notifier_list_add(&bs->close_notifiers, notify); 275 } 276 277 BlockDriver *bdrv_find_format(const char *format_name) 278 { 279 BlockDriver *drv1; 280 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 281 if (!strcmp(drv1->format_name, format_name)) { 282 return drv1; 283 } 284 } 285 return NULL; 286 } 287 288 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 289 { 290 static const char *whitelist_rw[] = { 291 CONFIG_BDRV_RW_WHITELIST 292 }; 293 static const char *whitelist_ro[] = { 294 CONFIG_BDRV_RO_WHITELIST 295 }; 296 const char **p; 297 298 if (!whitelist_rw[0] && !whitelist_ro[0]) { 299 return 1; /* no whitelist, anything goes */ 300 } 301 302 for (p = whitelist_rw; *p; p++) { 303 if (!strcmp(drv->format_name, *p)) { 304 return 1; 305 } 306 } 307 if (read_only) { 308 for (p = whitelist_ro; *p; p++) { 309 if (!strcmp(drv->format_name, *p)) { 310 return 1; 311 } 312 } 313 } 314 return 0; 315 } 316 317 BlockDriver *bdrv_find_whitelisted_format(const char *format_name, 318 bool read_only) 319 { 320 BlockDriver *drv = bdrv_find_format(format_name); 321 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL; 322 } 323 324 typedef struct CreateCo { 325 BlockDriver *drv; 326 char *filename; 327 QemuOpts *opts; 328 int ret; 329 Error *err; 330 } CreateCo; 331 332 static void coroutine_fn bdrv_create_co_entry(void *opaque) 333 { 334 Error *local_err = NULL; 335 int ret; 336 337 CreateCo *cco = opaque; 338 assert(cco->drv); 339 340 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err); 341 if (local_err) { 342 error_propagate(&cco->err, local_err); 343 } 344 cco->ret = ret; 345 } 346 347 int bdrv_create(BlockDriver *drv, const char* filename, 348 QemuOpts *opts, Error **errp) 349 { 350 int ret; 351 352 Coroutine *co; 353 CreateCo cco = { 354 .drv = drv, 355 .filename = g_strdup(filename), 356 .opts = opts, 357 .ret = NOT_DONE, 358 .err = NULL, 359 }; 360 361 if (!drv->bdrv_create) { 362 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 363 ret = -ENOTSUP; 364 goto out; 365 } 366 367 if (qemu_in_coroutine()) { 368 /* Fast-path if already in coroutine context */ 369 bdrv_create_co_entry(&cco); 370 } else { 371 co = qemu_coroutine_create(bdrv_create_co_entry); 372 qemu_coroutine_enter(co, &cco); 373 while (cco.ret == NOT_DONE) { 374 aio_poll(qemu_get_aio_context(), true); 375 } 376 } 377 378 ret = cco.ret; 379 if (ret < 0) { 380 if (cco.err) { 381 error_propagate(errp, cco.err); 382 } else { 383 error_setg_errno(errp, -ret, "Could not create image"); 384 } 385 } 386 387 out: 388 g_free(cco.filename); 389 return ret; 390 } 391 392 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 393 { 394 BlockDriver *drv; 395 Error *local_err = NULL; 396 int ret; 397 398 drv = bdrv_find_protocol(filename, true, errp); 399 if (drv == NULL) { 400 return -ENOENT; 401 } 402 403 ret = bdrv_create(drv, filename, opts, &local_err); 404 if (local_err) { 405 error_propagate(errp, local_err); 406 } 407 return ret; 408 } 409 410 /** 411 * Try to get @bs's logical and physical block size. 412 * On success, store them in @bsz struct and return 0. 413 * On failure return -errno. 414 * @bs must not be empty. 415 */ 416 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 417 { 418 BlockDriver *drv = bs->drv; 419 420 if (drv && drv->bdrv_probe_blocksizes) { 421 return drv->bdrv_probe_blocksizes(bs, bsz); 422 } 423 424 return -ENOTSUP; 425 } 426 427 /** 428 * Try to get @bs's geometry (cyls, heads, sectors). 429 * On success, store them in @geo struct and return 0. 430 * On failure return -errno. 431 * @bs must not be empty. 432 */ 433 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 434 { 435 BlockDriver *drv = bs->drv; 436 437 if (drv && drv->bdrv_probe_geometry) { 438 return drv->bdrv_probe_geometry(bs, geo); 439 } 440 441 return -ENOTSUP; 442 } 443 444 /* 445 * Create a uniquely-named empty temporary file. 446 * Return 0 upon success, otherwise a negative errno value. 447 */ 448 int get_tmp_filename(char *filename, int size) 449 { 450 #ifdef _WIN32 451 char temp_dir[MAX_PATH]; 452 /* GetTempFileName requires that its output buffer (4th param) 453 have length MAX_PATH or greater. */ 454 assert(size >= MAX_PATH); 455 return (GetTempPath(MAX_PATH, temp_dir) 456 && GetTempFileName(temp_dir, "qem", 0, filename) 457 ? 0 : -GetLastError()); 458 #else 459 int fd; 460 const char *tmpdir; 461 tmpdir = getenv("TMPDIR"); 462 if (!tmpdir) { 463 tmpdir = "/var/tmp"; 464 } 465 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 466 return -EOVERFLOW; 467 } 468 fd = mkstemp(filename); 469 if (fd < 0) { 470 return -errno; 471 } 472 if (close(fd) != 0) { 473 unlink(filename); 474 return -errno; 475 } 476 return 0; 477 #endif 478 } 479 480 /* 481 * Detect host devices. By convention, /dev/cdrom[N] is always 482 * recognized as a host CDROM. 483 */ 484 static BlockDriver *find_hdev_driver(const char *filename) 485 { 486 int score_max = 0, score; 487 BlockDriver *drv = NULL, *d; 488 489 QLIST_FOREACH(d, &bdrv_drivers, list) { 490 if (d->bdrv_probe_device) { 491 score = d->bdrv_probe_device(filename); 492 if (score > score_max) { 493 score_max = score; 494 drv = d; 495 } 496 } 497 } 498 499 return drv; 500 } 501 502 BlockDriver *bdrv_find_protocol(const char *filename, 503 bool allow_protocol_prefix, 504 Error **errp) 505 { 506 BlockDriver *drv1; 507 char protocol[128]; 508 int len; 509 const char *p; 510 511 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 512 513 /* 514 * XXX(hch): we really should not let host device detection 515 * override an explicit protocol specification, but moving this 516 * later breaks access to device names with colons in them. 517 * Thanks to the brain-dead persistent naming schemes on udev- 518 * based Linux systems those actually are quite common. 519 */ 520 drv1 = find_hdev_driver(filename); 521 if (drv1) { 522 return drv1; 523 } 524 525 if (!path_has_protocol(filename) || !allow_protocol_prefix) { 526 return &bdrv_file; 527 } 528 529 p = strchr(filename, ':'); 530 assert(p != NULL); 531 len = p - filename; 532 if (len > sizeof(protocol) - 1) 533 len = sizeof(protocol) - 1; 534 memcpy(protocol, filename, len); 535 protocol[len] = '\0'; 536 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 537 if (drv1->protocol_name && 538 !strcmp(drv1->protocol_name, protocol)) { 539 return drv1; 540 } 541 } 542 543 error_setg(errp, "Unknown protocol '%s'", protocol); 544 return NULL; 545 } 546 547 /* 548 * Guess image format by probing its contents. 549 * This is not a good idea when your image is raw (CVE-2008-2004), but 550 * we do it anyway for backward compatibility. 551 * 552 * @buf contains the image's first @buf_size bytes. 553 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 554 * but can be smaller if the image file is smaller) 555 * @filename is its filename. 556 * 557 * For all block drivers, call the bdrv_probe() method to get its 558 * probing score. 559 * Return the first block driver with the highest probing score. 560 */ 561 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 562 const char *filename) 563 { 564 int score_max = 0, score; 565 BlockDriver *drv = NULL, *d; 566 567 QLIST_FOREACH(d, &bdrv_drivers, list) { 568 if (d->bdrv_probe) { 569 score = d->bdrv_probe(buf, buf_size, filename); 570 if (score > score_max) { 571 score_max = score; 572 drv = d; 573 } 574 } 575 } 576 577 return drv; 578 } 579 580 static int find_image_format(BlockDriverState *bs, const char *filename, 581 BlockDriver **pdrv, Error **errp) 582 { 583 BlockDriver *drv; 584 uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 585 int ret = 0; 586 587 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 588 if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { 589 *pdrv = &bdrv_raw; 590 return ret; 591 } 592 593 ret = bdrv_pread(bs, 0, buf, sizeof(buf)); 594 if (ret < 0) { 595 error_setg_errno(errp, -ret, "Could not read image for determining its " 596 "format"); 597 *pdrv = NULL; 598 return ret; 599 } 600 601 drv = bdrv_probe_all(buf, ret, filename); 602 if (!drv) { 603 error_setg(errp, "Could not determine image format: No compatible " 604 "driver found"); 605 ret = -ENOENT; 606 } 607 *pdrv = drv; 608 return ret; 609 } 610 611 /** 612 * Set the current 'total_sectors' value 613 * Return 0 on success, -errno on error. 614 */ 615 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 616 { 617 BlockDriver *drv = bs->drv; 618 619 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 620 if (bdrv_is_sg(bs)) 621 return 0; 622 623 /* query actual device if possible, otherwise just trust the hint */ 624 if (drv->bdrv_getlength) { 625 int64_t length = drv->bdrv_getlength(bs); 626 if (length < 0) { 627 return length; 628 } 629 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 630 } 631 632 bs->total_sectors = hint; 633 return 0; 634 } 635 636 /** 637 * Set open flags for a given discard mode 638 * 639 * Return 0 on success, -1 if the discard mode was invalid. 640 */ 641 int bdrv_parse_discard_flags(const char *mode, int *flags) 642 { 643 *flags &= ~BDRV_O_UNMAP; 644 645 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 646 /* do nothing */ 647 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 648 *flags |= BDRV_O_UNMAP; 649 } else { 650 return -1; 651 } 652 653 return 0; 654 } 655 656 /** 657 * Set open flags for a given cache mode 658 * 659 * Return 0 on success, -1 if the cache mode was invalid. 660 */ 661 int bdrv_parse_cache_flags(const char *mode, int *flags) 662 { 663 *flags &= ~BDRV_O_CACHE_MASK; 664 665 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 666 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; 667 } else if (!strcmp(mode, "directsync")) { 668 *flags |= BDRV_O_NOCACHE; 669 } else if (!strcmp(mode, "writeback")) { 670 *flags |= BDRV_O_CACHE_WB; 671 } else if (!strcmp(mode, "unsafe")) { 672 *flags |= BDRV_O_CACHE_WB; 673 *flags |= BDRV_O_NO_FLUSH; 674 } else if (!strcmp(mode, "writethrough")) { 675 /* this is the default */ 676 } else { 677 return -1; 678 } 679 680 return 0; 681 } 682 683 /* 684 * Returns the flags that a temporary snapshot should get, based on the 685 * originally requested flags (the originally requested image will have flags 686 * like a backing file) 687 */ 688 static int bdrv_temp_snapshot_flags(int flags) 689 { 690 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 691 } 692 693 /* 694 * Returns the flags that bs->file should get if a protocol driver is expected, 695 * based on the given flags for the parent BDS 696 */ 697 static int bdrv_inherited_flags(int flags) 698 { 699 /* Enable protocol handling, disable format probing for bs->file */ 700 flags |= BDRV_O_PROTOCOL; 701 702 /* Our block drivers take care to send flushes and respect unmap policy, 703 * so we can enable both unconditionally on lower layers. */ 704 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP; 705 706 /* Clear flags that only apply to the top layer */ 707 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); 708 709 return flags; 710 } 711 712 const BdrvChildRole child_file = { 713 .inherit_flags = bdrv_inherited_flags, 714 }; 715 716 /* 717 * Returns the flags that bs->file should get if the use of formats (and not 718 * only protocols) is permitted for it, based on the given flags for the parent 719 * BDS 720 */ 721 static int bdrv_inherited_fmt_flags(int parent_flags) 722 { 723 int flags = child_file.inherit_flags(parent_flags); 724 return flags & ~BDRV_O_PROTOCOL; 725 } 726 727 const BdrvChildRole child_format = { 728 .inherit_flags = bdrv_inherited_fmt_flags, 729 }; 730 731 /* 732 * Returns the flags that bs->backing_hd should get, based on the given flags 733 * for the parent BDS 734 */ 735 static int bdrv_backing_flags(int flags) 736 { 737 /* backing files always opened read-only */ 738 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ); 739 740 /* snapshot=on is handled on the top layer */ 741 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY); 742 743 return flags; 744 } 745 746 static const BdrvChildRole child_backing = { 747 .inherit_flags = bdrv_backing_flags, 748 }; 749 750 static int bdrv_open_flags(BlockDriverState *bs, int flags) 751 { 752 int open_flags = flags | BDRV_O_CACHE_WB; 753 754 /* 755 * Clear flags that are internal to the block layer before opening the 756 * image. 757 */ 758 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 759 760 /* 761 * Snapshots should be writable. 762 */ 763 if (flags & BDRV_O_TEMPORARY) { 764 open_flags |= BDRV_O_RDWR; 765 } 766 767 return open_flags; 768 } 769 770 static void bdrv_assign_node_name(BlockDriverState *bs, 771 const char *node_name, 772 Error **errp) 773 { 774 if (!node_name) { 775 return; 776 } 777 778 /* Check for empty string or invalid characters */ 779 if (!id_wellformed(node_name)) { 780 error_setg(errp, "Invalid node name"); 781 return; 782 } 783 784 /* takes care of avoiding namespaces collisions */ 785 if (blk_by_name(node_name)) { 786 error_setg(errp, "node-name=%s is conflicting with a device id", 787 node_name); 788 return; 789 } 790 791 /* takes care of avoiding duplicates node names */ 792 if (bdrv_find_node(node_name)) { 793 error_setg(errp, "Duplicate node name"); 794 return; 795 } 796 797 /* copy node name into the bs and insert it into the graph list */ 798 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 799 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 800 } 801 802 static QemuOptsList bdrv_runtime_opts = { 803 .name = "bdrv_common", 804 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), 805 .desc = { 806 { 807 .name = "node-name", 808 .type = QEMU_OPT_STRING, 809 .help = "Node name of the block device node", 810 }, 811 { /* end of list */ } 812 }, 813 }; 814 815 /* 816 * Common part for opening disk images and files 817 * 818 * Removes all processed options from *options. 819 */ 820 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, 821 QDict *options, int flags, BlockDriver *drv, Error **errp) 822 { 823 int ret, open_flags; 824 const char *filename; 825 const char *node_name = NULL; 826 QemuOpts *opts; 827 Error *local_err = NULL; 828 829 assert(drv != NULL); 830 assert(bs->file == NULL); 831 assert(options != NULL && bs->options != options); 832 833 if (file != NULL) { 834 filename = file->filename; 835 } else { 836 filename = qdict_get_try_str(options, "filename"); 837 } 838 839 if (drv->bdrv_needs_filename && !filename) { 840 error_setg(errp, "The '%s' block driver requires a file name", 841 drv->format_name); 842 return -EINVAL; 843 } 844 845 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); 846 847 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 848 qemu_opts_absorb_qdict(opts, options, &local_err); 849 if (local_err) { 850 error_propagate(errp, local_err); 851 ret = -EINVAL; 852 goto fail_opts; 853 } 854 855 node_name = qemu_opt_get(opts, "node-name"); 856 bdrv_assign_node_name(bs, node_name, &local_err); 857 if (local_err) { 858 error_propagate(errp, local_err); 859 ret = -EINVAL; 860 goto fail_opts; 861 } 862 863 bs->guest_block_size = 512; 864 bs->request_alignment = 512; 865 bs->zero_beyond_eof = true; 866 open_flags = bdrv_open_flags(bs, flags); 867 bs->read_only = !(open_flags & BDRV_O_RDWR); 868 869 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { 870 error_setg(errp, 871 !bs->read_only && bdrv_is_whitelisted(drv, true) 872 ? "Driver '%s' can only be used for read-only devices" 873 : "Driver '%s' is not whitelisted", 874 drv->format_name); 875 ret = -ENOTSUP; 876 goto fail_opts; 877 } 878 879 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ 880 if (flags & BDRV_O_COPY_ON_READ) { 881 if (!bs->read_only) { 882 bdrv_enable_copy_on_read(bs); 883 } else { 884 error_setg(errp, "Can't use copy-on-read on read-only device"); 885 ret = -EINVAL; 886 goto fail_opts; 887 } 888 } 889 890 if (filename != NULL) { 891 pstrcpy(bs->filename, sizeof(bs->filename), filename); 892 } else { 893 bs->filename[0] = '\0'; 894 } 895 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 896 897 bs->drv = drv; 898 bs->opaque = g_malloc0(drv->instance_size); 899 900 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB); 901 902 /* Open the image, either directly or using a protocol */ 903 if (drv->bdrv_file_open) { 904 assert(file == NULL); 905 assert(!drv->bdrv_needs_filename || filename != NULL); 906 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 907 } else { 908 if (file == NULL) { 909 error_setg(errp, "Can't use '%s' as a block driver for the " 910 "protocol level", drv->format_name); 911 ret = -EINVAL; 912 goto free_and_fail; 913 } 914 bs->file = file; 915 ret = drv->bdrv_open(bs, options, open_flags, &local_err); 916 } 917 918 if (ret < 0) { 919 if (local_err) { 920 error_propagate(errp, local_err); 921 } else if (bs->filename[0]) { 922 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 923 } else { 924 error_setg_errno(errp, -ret, "Could not open image"); 925 } 926 goto free_and_fail; 927 } 928 929 if (bs->encrypted) { 930 error_report("Encrypted images are deprecated"); 931 error_printf("Support for them will be removed in a future release.\n" 932 "You can use 'qemu-img convert' to convert your image" 933 " to an unencrypted one.\n"); 934 } 935 936 ret = refresh_total_sectors(bs, bs->total_sectors); 937 if (ret < 0) { 938 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 939 goto free_and_fail; 940 } 941 942 bdrv_refresh_limits(bs, &local_err); 943 if (local_err) { 944 error_propagate(errp, local_err); 945 ret = -EINVAL; 946 goto free_and_fail; 947 } 948 949 assert(bdrv_opt_mem_align(bs) != 0); 950 assert(bdrv_min_mem_align(bs) != 0); 951 assert((bs->request_alignment != 0) || bdrv_is_sg(bs)); 952 953 qemu_opts_del(opts); 954 return 0; 955 956 free_and_fail: 957 bs->file = NULL; 958 g_free(bs->opaque); 959 bs->opaque = NULL; 960 bs->drv = NULL; 961 fail_opts: 962 qemu_opts_del(opts); 963 return ret; 964 } 965 966 static QDict *parse_json_filename(const char *filename, Error **errp) 967 { 968 QObject *options_obj; 969 QDict *options; 970 int ret; 971 972 ret = strstart(filename, "json:", &filename); 973 assert(ret); 974 975 options_obj = qobject_from_json(filename); 976 if (!options_obj) { 977 error_setg(errp, "Could not parse the JSON options"); 978 return NULL; 979 } 980 981 if (qobject_type(options_obj) != QTYPE_QDICT) { 982 qobject_decref(options_obj); 983 error_setg(errp, "Invalid JSON object given"); 984 return NULL; 985 } 986 987 options = qobject_to_qdict(options_obj); 988 qdict_flatten(options); 989 990 return options; 991 } 992 993 /* 994 * Fills in default options for opening images and converts the legacy 995 * filename/flags pair to option QDict entries. 996 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a 997 * block driver has been specified explicitly. 998 */ 999 static int bdrv_fill_options(QDict **options, const char **pfilename, 1000 int *flags, BlockDriver *drv, Error **errp) 1001 { 1002 const char *filename = *pfilename; 1003 const char *drvname; 1004 bool protocol = *flags & BDRV_O_PROTOCOL; 1005 bool parse_filename = false; 1006 BlockDriver *tmp_drv; 1007 Error *local_err = NULL; 1008 1009 /* Parse json: pseudo-protocol */ 1010 if (filename && g_str_has_prefix(filename, "json:")) { 1011 QDict *json_options = parse_json_filename(filename, &local_err); 1012 if (local_err) { 1013 error_propagate(errp, local_err); 1014 return -EINVAL; 1015 } 1016 1017 /* Options given in the filename have lower priority than options 1018 * specified directly */ 1019 qdict_join(*options, json_options, false); 1020 QDECREF(json_options); 1021 *pfilename = filename = NULL; 1022 } 1023 1024 drvname = qdict_get_try_str(*options, "driver"); 1025 1026 /* If the user has explicitly specified the driver, this choice should 1027 * override the BDRV_O_PROTOCOL flag */ 1028 tmp_drv = drv; 1029 if (!tmp_drv && drvname) { 1030 tmp_drv = bdrv_find_format(drvname); 1031 } 1032 if (tmp_drv) { 1033 protocol = tmp_drv->bdrv_file_open; 1034 } 1035 1036 if (protocol) { 1037 *flags |= BDRV_O_PROTOCOL; 1038 } else { 1039 *flags &= ~BDRV_O_PROTOCOL; 1040 } 1041 1042 /* Fetch the file name from the options QDict if necessary */ 1043 if (protocol && filename) { 1044 if (!qdict_haskey(*options, "filename")) { 1045 qdict_put(*options, "filename", qstring_from_str(filename)); 1046 parse_filename = true; 1047 } else { 1048 error_setg(errp, "Can't specify 'file' and 'filename' options at " 1049 "the same time"); 1050 return -EINVAL; 1051 } 1052 } 1053 1054 /* Find the right block driver */ 1055 filename = qdict_get_try_str(*options, "filename"); 1056 1057 if (drv) { 1058 if (drvname) { 1059 error_setg(errp, "Driver specified twice"); 1060 return -EINVAL; 1061 } 1062 drvname = drv->format_name; 1063 qdict_put(*options, "driver", qstring_from_str(drvname)); 1064 } else { 1065 if (!drvname && protocol) { 1066 if (filename) { 1067 drv = bdrv_find_protocol(filename, parse_filename, errp); 1068 if (!drv) { 1069 return -EINVAL; 1070 } 1071 1072 drvname = drv->format_name; 1073 qdict_put(*options, "driver", qstring_from_str(drvname)); 1074 } else { 1075 error_setg(errp, "Must specify either driver or file"); 1076 return -EINVAL; 1077 } 1078 } else if (drvname) { 1079 drv = bdrv_find_format(drvname); 1080 if (!drv) { 1081 error_setg(errp, "Unknown driver '%s'", drvname); 1082 return -ENOENT; 1083 } 1084 } 1085 } 1086 1087 assert(drv || !protocol); 1088 1089 /* Driver-specific filename parsing */ 1090 if (drv && drv->bdrv_parse_filename && parse_filename) { 1091 drv->bdrv_parse_filename(filename, *options, &local_err); 1092 if (local_err) { 1093 error_propagate(errp, local_err); 1094 return -EINVAL; 1095 } 1096 1097 if (!drv->bdrv_needs_filename) { 1098 qdict_del(*options, "filename"); 1099 } 1100 } 1101 1102 return 0; 1103 } 1104 1105 static void bdrv_attach_child(BlockDriverState *parent_bs, 1106 BlockDriverState *child_bs, 1107 const BdrvChildRole *child_role) 1108 { 1109 BdrvChild *child = g_new(BdrvChild, 1); 1110 *child = (BdrvChild) { 1111 .bs = child_bs, 1112 .role = child_role, 1113 }; 1114 1115 QLIST_INSERT_HEAD(&parent_bs->children, child, next); 1116 } 1117 1118 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) 1119 { 1120 1121 if (bs->backing_hd) { 1122 assert(bs->backing_blocker); 1123 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker); 1124 } else if (backing_hd) { 1125 error_setg(&bs->backing_blocker, 1126 "node is used as backing hd of '%s'", 1127 bdrv_get_device_or_node_name(bs)); 1128 } 1129 1130 bs->backing_hd = backing_hd; 1131 if (!backing_hd) { 1132 error_free(bs->backing_blocker); 1133 bs->backing_blocker = NULL; 1134 goto out; 1135 } 1136 bs->open_flags &= ~BDRV_O_NO_BACKING; 1137 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); 1138 pstrcpy(bs->backing_format, sizeof(bs->backing_format), 1139 backing_hd->drv ? backing_hd->drv->format_name : ""); 1140 1141 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker); 1142 /* Otherwise we won't be able to commit due to check in bdrv_commit */ 1143 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1144 bs->backing_blocker); 1145 out: 1146 bdrv_refresh_limits(bs, NULL); 1147 } 1148 1149 /* 1150 * Opens the backing file for a BlockDriverState if not yet open 1151 * 1152 * options is a QDict of options to pass to the block drivers, or NULL for an 1153 * empty set of options. The reference to the QDict is transferred to this 1154 * function (even on failure), so if the caller intends to reuse the dictionary, 1155 * it needs to use QINCREF() before calling bdrv_file_open. 1156 */ 1157 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) 1158 { 1159 char *backing_filename = g_malloc0(PATH_MAX); 1160 int ret = 0; 1161 BlockDriverState *backing_hd; 1162 Error *local_err = NULL; 1163 1164 if (bs->backing_hd != NULL) { 1165 QDECREF(options); 1166 goto free_exit; 1167 } 1168 1169 /* NULL means an empty set of options */ 1170 if (options == NULL) { 1171 options = qdict_new(); 1172 } 1173 1174 bs->open_flags &= ~BDRV_O_NO_BACKING; 1175 if (qdict_haskey(options, "file.filename")) { 1176 backing_filename[0] = '\0'; 1177 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 1178 QDECREF(options); 1179 goto free_exit; 1180 } else { 1181 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX, 1182 &local_err); 1183 if (local_err) { 1184 ret = -EINVAL; 1185 error_propagate(errp, local_err); 1186 QDECREF(options); 1187 goto free_exit; 1188 } 1189 } 1190 1191 if (!bs->drv || !bs->drv->supports_backing) { 1192 ret = -EINVAL; 1193 error_setg(errp, "Driver doesn't support backing files"); 1194 QDECREF(options); 1195 goto free_exit; 1196 } 1197 1198 backing_hd = bdrv_new(); 1199 1200 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 1201 qdict_put(options, "driver", qstring_from_str(bs->backing_format)); 1202 } 1203 1204 assert(bs->backing_hd == NULL); 1205 ret = bdrv_open_inherit(&backing_hd, 1206 *backing_filename ? backing_filename : NULL, 1207 NULL, options, 0, bs, &child_backing, 1208 NULL, &local_err); 1209 if (ret < 0) { 1210 bdrv_unref(backing_hd); 1211 backing_hd = NULL; 1212 bs->open_flags |= BDRV_O_NO_BACKING; 1213 error_setg(errp, "Could not open backing file: %s", 1214 error_get_pretty(local_err)); 1215 error_free(local_err); 1216 goto free_exit; 1217 } 1218 1219 bdrv_attach_child(bs, backing_hd, &child_backing); 1220 bdrv_set_backing_hd(bs, backing_hd); 1221 1222 free_exit: 1223 g_free(backing_filename); 1224 return ret; 1225 } 1226 1227 /* 1228 * Opens a disk image whose options are given as BlockdevRef in another block 1229 * device's options. 1230 * 1231 * If allow_none is true, no image will be opened if filename is false and no 1232 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. 1233 * 1234 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 1235 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 1236 * itself, all options starting with "${bdref_key}." are considered part of the 1237 * BlockdevRef. 1238 * 1239 * The BlockdevRef will be removed from the options QDict. 1240 * 1241 * To conform with the behavior of bdrv_open(), *pbs has to be NULL. 1242 */ 1243 int bdrv_open_image(BlockDriverState **pbs, const char *filename, 1244 QDict *options, const char *bdref_key, 1245 BlockDriverState* parent, const BdrvChildRole *child_role, 1246 bool allow_none, Error **errp) 1247 { 1248 QDict *image_options; 1249 int ret; 1250 char *bdref_key_dot; 1251 const char *reference; 1252 1253 assert(pbs); 1254 assert(*pbs == NULL); 1255 assert(child_role != NULL); 1256 1257 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 1258 qdict_extract_subqdict(options, &image_options, bdref_key_dot); 1259 g_free(bdref_key_dot); 1260 1261 reference = qdict_get_try_str(options, bdref_key); 1262 if (!filename && !reference && !qdict_size(image_options)) { 1263 if (allow_none) { 1264 ret = 0; 1265 } else { 1266 error_setg(errp, "A block device must be specified for \"%s\"", 1267 bdref_key); 1268 ret = -EINVAL; 1269 } 1270 QDECREF(image_options); 1271 goto done; 1272 } 1273 1274 ret = bdrv_open_inherit(pbs, filename, reference, image_options, 0, 1275 parent, child_role, NULL, errp); 1276 if (ret < 0) { 1277 goto done; 1278 } 1279 1280 bdrv_attach_child(parent, *pbs, child_role); 1281 1282 done: 1283 qdict_del(options, bdref_key); 1284 return ret; 1285 } 1286 1287 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) 1288 { 1289 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 1290 char *tmp_filename = g_malloc0(PATH_MAX + 1); 1291 int64_t total_size; 1292 QemuOpts *opts = NULL; 1293 QDict *snapshot_options; 1294 BlockDriverState *bs_snapshot; 1295 Error *local_err = NULL; 1296 int ret; 1297 1298 /* if snapshot, we create a temporary backing file and open it 1299 instead of opening 'filename' directly */ 1300 1301 /* Get the required size from the image */ 1302 total_size = bdrv_getlength(bs); 1303 if (total_size < 0) { 1304 ret = total_size; 1305 error_setg_errno(errp, -total_size, "Could not get image size"); 1306 goto out; 1307 } 1308 1309 /* Create the temporary image */ 1310 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); 1311 if (ret < 0) { 1312 error_setg_errno(errp, -ret, "Could not get temporary filename"); 1313 goto out; 1314 } 1315 1316 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 1317 &error_abort); 1318 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 1319 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err); 1320 qemu_opts_del(opts); 1321 if (ret < 0) { 1322 error_setg_errno(errp, -ret, "Could not create temporary overlay " 1323 "'%s': %s", tmp_filename, 1324 error_get_pretty(local_err)); 1325 error_free(local_err); 1326 goto out; 1327 } 1328 1329 /* Prepare a new options QDict for the temporary file */ 1330 snapshot_options = qdict_new(); 1331 qdict_put(snapshot_options, "file.driver", 1332 qstring_from_str("file")); 1333 qdict_put(snapshot_options, "file.filename", 1334 qstring_from_str(tmp_filename)); 1335 1336 bs_snapshot = bdrv_new(); 1337 1338 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, 1339 flags, &bdrv_qcow2, &local_err); 1340 if (ret < 0) { 1341 error_propagate(errp, local_err); 1342 goto out; 1343 } 1344 1345 bdrv_append(bs_snapshot, bs); 1346 1347 out: 1348 g_free(tmp_filename); 1349 return ret; 1350 } 1351 1352 /* 1353 * Opens a disk image (raw, qcow2, vmdk, ...) 1354 * 1355 * options is a QDict of options to pass to the block drivers, or NULL for an 1356 * empty set of options. The reference to the QDict belongs to the block layer 1357 * after the call (even on failure), so if the caller intends to reuse the 1358 * dictionary, it needs to use QINCREF() before calling bdrv_open. 1359 * 1360 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 1361 * If it is not NULL, the referenced BDS will be reused. 1362 * 1363 * The reference parameter may be used to specify an existing block device which 1364 * should be opened. If specified, neither options nor a filename may be given, 1365 * nor can an existing BDS be reused (that is, *pbs has to be NULL). 1366 */ 1367 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, 1368 const char *reference, QDict *options, int flags, 1369 BlockDriverState *parent, 1370 const BdrvChildRole *child_role, 1371 BlockDriver *drv, Error **errp) 1372 { 1373 int ret; 1374 BlockDriverState *file = NULL, *bs; 1375 const char *drvname; 1376 Error *local_err = NULL; 1377 int snapshot_flags = 0; 1378 1379 assert(pbs); 1380 assert(!child_role || !flags); 1381 assert(!child_role == !parent); 1382 1383 if (reference) { 1384 bool options_non_empty = options ? qdict_size(options) : false; 1385 QDECREF(options); 1386 1387 if (*pbs) { 1388 error_setg(errp, "Cannot reuse an existing BDS when referencing " 1389 "another block device"); 1390 return -EINVAL; 1391 } 1392 1393 if (filename || options_non_empty) { 1394 error_setg(errp, "Cannot reference an existing block device with " 1395 "additional options or a new filename"); 1396 return -EINVAL; 1397 } 1398 1399 bs = bdrv_lookup_bs(reference, reference, errp); 1400 if (!bs) { 1401 return -ENODEV; 1402 } 1403 bdrv_ref(bs); 1404 *pbs = bs; 1405 return 0; 1406 } 1407 1408 if (*pbs) { 1409 bs = *pbs; 1410 } else { 1411 bs = bdrv_new(); 1412 } 1413 1414 /* NULL means an empty set of options */ 1415 if (options == NULL) { 1416 options = qdict_new(); 1417 } 1418 1419 if (child_role) { 1420 bs->inherits_from = parent; 1421 flags = child_role->inherit_flags(parent->open_flags); 1422 } 1423 1424 ret = bdrv_fill_options(&options, &filename, &flags, drv, &local_err); 1425 if (local_err) { 1426 goto fail; 1427 } 1428 1429 /* Find the right image format driver */ 1430 drv = NULL; 1431 drvname = qdict_get_try_str(options, "driver"); 1432 if (drvname) { 1433 drv = bdrv_find_format(drvname); 1434 qdict_del(options, "driver"); 1435 if (!drv) { 1436 error_setg(errp, "Unknown driver: '%s'", drvname); 1437 ret = -EINVAL; 1438 goto fail; 1439 } 1440 } 1441 1442 assert(drvname || !(flags & BDRV_O_PROTOCOL)); 1443 1444 bs->open_flags = flags; 1445 bs->options = options; 1446 options = qdict_clone_shallow(options); 1447 1448 /* Open image file without format layer */ 1449 if ((flags & BDRV_O_PROTOCOL) == 0) { 1450 if (flags & BDRV_O_RDWR) { 1451 flags |= BDRV_O_ALLOW_RDWR; 1452 } 1453 if (flags & BDRV_O_SNAPSHOT) { 1454 snapshot_flags = bdrv_temp_snapshot_flags(flags); 1455 flags = bdrv_backing_flags(flags); 1456 } 1457 1458 assert(file == NULL); 1459 bs->open_flags = flags; 1460 ret = bdrv_open_image(&file, filename, options, "file", 1461 bs, &child_file, true, &local_err); 1462 if (ret < 0) { 1463 goto fail; 1464 } 1465 } 1466 1467 /* Image format probing */ 1468 bs->probed = !drv; 1469 if (!drv && file) { 1470 ret = find_image_format(file, filename, &drv, &local_err); 1471 if (ret < 0) { 1472 goto fail; 1473 } 1474 } else if (!drv) { 1475 error_setg(errp, "Must specify either driver or file"); 1476 ret = -EINVAL; 1477 goto fail; 1478 } 1479 1480 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ 1481 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open); 1482 /* file must be NULL if a protocol BDS is about to be created 1483 * (the inverse results in an error message from bdrv_open_common()) */ 1484 assert(!(flags & BDRV_O_PROTOCOL) || !file); 1485 1486 /* Open the image */ 1487 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err); 1488 if (ret < 0) { 1489 goto fail; 1490 } 1491 1492 if (file && (bs->file != file)) { 1493 bdrv_unref(file); 1494 file = NULL; 1495 } 1496 1497 /* If there is a backing file, use it */ 1498 if ((flags & BDRV_O_NO_BACKING) == 0) { 1499 QDict *backing_options; 1500 1501 qdict_extract_subqdict(options, &backing_options, "backing."); 1502 ret = bdrv_open_backing_file(bs, backing_options, &local_err); 1503 if (ret < 0) { 1504 goto close_and_fail; 1505 } 1506 } 1507 1508 bdrv_refresh_filename(bs); 1509 1510 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 1511 * temporary snapshot afterwards. */ 1512 if (snapshot_flags) { 1513 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); 1514 if (local_err) { 1515 goto close_and_fail; 1516 } 1517 } 1518 1519 /* Check if any unknown options were used */ 1520 if (options && (qdict_size(options) != 0)) { 1521 const QDictEntry *entry = qdict_first(options); 1522 if (flags & BDRV_O_PROTOCOL) { 1523 error_setg(errp, "Block protocol '%s' doesn't support the option " 1524 "'%s'", drv->format_name, entry->key); 1525 } else { 1526 error_setg(errp, "Block format '%s' used by device '%s' doesn't " 1527 "support the option '%s'", drv->format_name, 1528 bdrv_get_device_name(bs), entry->key); 1529 } 1530 1531 ret = -EINVAL; 1532 goto close_and_fail; 1533 } 1534 1535 if (!bdrv_key_required(bs)) { 1536 if (bs->blk) { 1537 blk_dev_change_media_cb(bs->blk, true); 1538 } 1539 } else if (!runstate_check(RUN_STATE_PRELAUNCH) 1540 && !runstate_check(RUN_STATE_INMIGRATE) 1541 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */ 1542 error_setg(errp, 1543 "Guest must be stopped for opening of encrypted image"); 1544 ret = -EBUSY; 1545 goto close_and_fail; 1546 } 1547 1548 QDECREF(options); 1549 *pbs = bs; 1550 return 0; 1551 1552 fail: 1553 if (file != NULL) { 1554 bdrv_unref(file); 1555 } 1556 QDECREF(bs->options); 1557 QDECREF(options); 1558 bs->options = NULL; 1559 if (!*pbs) { 1560 /* If *pbs is NULL, a new BDS has been created in this function and 1561 needs to be freed now. Otherwise, it does not need to be closed, 1562 since it has not really been opened yet. */ 1563 bdrv_unref(bs); 1564 } 1565 if (local_err) { 1566 error_propagate(errp, local_err); 1567 } 1568 return ret; 1569 1570 close_and_fail: 1571 /* See fail path, but now the BDS has to be always closed */ 1572 if (*pbs) { 1573 bdrv_close(bs); 1574 } else { 1575 bdrv_unref(bs); 1576 } 1577 QDECREF(options); 1578 if (local_err) { 1579 error_propagate(errp, local_err); 1580 } 1581 return ret; 1582 } 1583 1584 int bdrv_open(BlockDriverState **pbs, const char *filename, 1585 const char *reference, QDict *options, int flags, 1586 BlockDriver *drv, Error **errp) 1587 { 1588 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL, 1589 NULL, drv, errp); 1590 } 1591 1592 typedef struct BlockReopenQueueEntry { 1593 bool prepared; 1594 BDRVReopenState state; 1595 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; 1596 } BlockReopenQueueEntry; 1597 1598 /* 1599 * Adds a BlockDriverState to a simple queue for an atomic, transactional 1600 * reopen of multiple devices. 1601 * 1602 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT 1603 * already performed, or alternatively may be NULL a new BlockReopenQueue will 1604 * be created and initialized. This newly created BlockReopenQueue should be 1605 * passed back in for subsequent calls that are intended to be of the same 1606 * atomic 'set'. 1607 * 1608 * bs is the BlockDriverState to add to the reopen queue. 1609 * 1610 * flags contains the open flags for the associated bs 1611 * 1612 * returns a pointer to bs_queue, which is either the newly allocated 1613 * bs_queue, or the existing bs_queue being used. 1614 * 1615 */ 1616 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 1617 BlockDriverState *bs, int flags) 1618 { 1619 assert(bs != NULL); 1620 1621 BlockReopenQueueEntry *bs_entry; 1622 BdrvChild *child; 1623 1624 if (bs_queue == NULL) { 1625 bs_queue = g_new0(BlockReopenQueue, 1); 1626 QSIMPLEQ_INIT(bs_queue); 1627 } 1628 1629 /* bdrv_open() masks this flag out */ 1630 flags &= ~BDRV_O_PROTOCOL; 1631 1632 QLIST_FOREACH(child, &bs->children, next) { 1633 int child_flags; 1634 1635 if (child->bs->inherits_from != bs) { 1636 continue; 1637 } 1638 1639 child_flags = child->role->inherit_flags(flags); 1640 bdrv_reopen_queue(bs_queue, child->bs, child_flags); 1641 } 1642 1643 bs_entry = g_new0(BlockReopenQueueEntry, 1); 1644 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); 1645 1646 bs_entry->state.bs = bs; 1647 bs_entry->state.flags = flags; 1648 1649 return bs_queue; 1650 } 1651 1652 /* 1653 * Reopen multiple BlockDriverStates atomically & transactionally. 1654 * 1655 * The queue passed in (bs_queue) must have been built up previous 1656 * via bdrv_reopen_queue(). 1657 * 1658 * Reopens all BDS specified in the queue, with the appropriate 1659 * flags. All devices are prepared for reopen, and failure of any 1660 * device will cause all device changes to be abandonded, and intermediate 1661 * data cleaned up. 1662 * 1663 * If all devices prepare successfully, then the changes are committed 1664 * to all devices. 1665 * 1666 */ 1667 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 1668 { 1669 int ret = -1; 1670 BlockReopenQueueEntry *bs_entry, *next; 1671 Error *local_err = NULL; 1672 1673 assert(bs_queue != NULL); 1674 1675 bdrv_drain_all(); 1676 1677 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1678 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { 1679 error_propagate(errp, local_err); 1680 goto cleanup; 1681 } 1682 bs_entry->prepared = true; 1683 } 1684 1685 /* If we reach this point, we have success and just need to apply the 1686 * changes 1687 */ 1688 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1689 bdrv_reopen_commit(&bs_entry->state); 1690 } 1691 1692 ret = 0; 1693 1694 cleanup: 1695 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 1696 if (ret && bs_entry->prepared) { 1697 bdrv_reopen_abort(&bs_entry->state); 1698 } 1699 g_free(bs_entry); 1700 } 1701 g_free(bs_queue); 1702 return ret; 1703 } 1704 1705 1706 /* Reopen a single BlockDriverState with the specified flags. */ 1707 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) 1708 { 1709 int ret = -1; 1710 Error *local_err = NULL; 1711 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags); 1712 1713 ret = bdrv_reopen_multiple(queue, &local_err); 1714 if (local_err != NULL) { 1715 error_propagate(errp, local_err); 1716 } 1717 return ret; 1718 } 1719 1720 1721 /* 1722 * Prepares a BlockDriverState for reopen. All changes are staged in the 1723 * 'opaque' field of the BDRVReopenState, which is used and allocated by 1724 * the block driver layer .bdrv_reopen_prepare() 1725 * 1726 * bs is the BlockDriverState to reopen 1727 * flags are the new open flags 1728 * queue is the reopen queue 1729 * 1730 * Returns 0 on success, non-zero on error. On error errp will be set 1731 * as well. 1732 * 1733 * On failure, bdrv_reopen_abort() will be called to clean up any data. 1734 * It is the responsibility of the caller to then call the abort() or 1735 * commit() for any other BDS that have been left in a prepare() state 1736 * 1737 */ 1738 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, 1739 Error **errp) 1740 { 1741 int ret = -1; 1742 Error *local_err = NULL; 1743 BlockDriver *drv; 1744 1745 assert(reopen_state != NULL); 1746 assert(reopen_state->bs->drv != NULL); 1747 drv = reopen_state->bs->drv; 1748 1749 /* if we are to stay read-only, do not allow permission change 1750 * to r/w */ 1751 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) && 1752 reopen_state->flags & BDRV_O_RDWR) { 1753 error_setg(errp, "Node '%s' is read only", 1754 bdrv_get_device_or_node_name(reopen_state->bs)); 1755 goto error; 1756 } 1757 1758 1759 ret = bdrv_flush(reopen_state->bs); 1760 if (ret) { 1761 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive", 1762 strerror(-ret)); 1763 goto error; 1764 } 1765 1766 if (drv->bdrv_reopen_prepare) { 1767 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 1768 if (ret) { 1769 if (local_err != NULL) { 1770 error_propagate(errp, local_err); 1771 } else { 1772 error_setg(errp, "failed while preparing to reopen image '%s'", 1773 reopen_state->bs->filename); 1774 } 1775 goto error; 1776 } 1777 } else { 1778 /* It is currently mandatory to have a bdrv_reopen_prepare() 1779 * handler for each supported drv. */ 1780 error_setg(errp, "Block format '%s' used by node '%s' " 1781 "does not support reopening files", drv->format_name, 1782 bdrv_get_device_or_node_name(reopen_state->bs)); 1783 ret = -1; 1784 goto error; 1785 } 1786 1787 ret = 0; 1788 1789 error: 1790 return ret; 1791 } 1792 1793 /* 1794 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 1795 * makes them final by swapping the staging BlockDriverState contents into 1796 * the active BlockDriverState contents. 1797 */ 1798 void bdrv_reopen_commit(BDRVReopenState *reopen_state) 1799 { 1800 BlockDriver *drv; 1801 1802 assert(reopen_state != NULL); 1803 drv = reopen_state->bs->drv; 1804 assert(drv != NULL); 1805 1806 /* If there are any driver level actions to take */ 1807 if (drv->bdrv_reopen_commit) { 1808 drv->bdrv_reopen_commit(reopen_state); 1809 } 1810 1811 /* set BDS specific flags now */ 1812 reopen_state->bs->open_flags = reopen_state->flags; 1813 reopen_state->bs->enable_write_cache = !!(reopen_state->flags & 1814 BDRV_O_CACHE_WB); 1815 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); 1816 1817 bdrv_refresh_limits(reopen_state->bs, NULL); 1818 } 1819 1820 /* 1821 * Abort the reopen, and delete and free the staged changes in 1822 * reopen_state 1823 */ 1824 void bdrv_reopen_abort(BDRVReopenState *reopen_state) 1825 { 1826 BlockDriver *drv; 1827 1828 assert(reopen_state != NULL); 1829 drv = reopen_state->bs->drv; 1830 assert(drv != NULL); 1831 1832 if (drv->bdrv_reopen_abort) { 1833 drv->bdrv_reopen_abort(reopen_state); 1834 } 1835 } 1836 1837 1838 void bdrv_close(BlockDriverState *bs) 1839 { 1840 BdrvAioNotifier *ban, *ban_next; 1841 1842 if (bs->job) { 1843 block_job_cancel_sync(bs->job); 1844 } 1845 bdrv_drain(bs); /* complete I/O */ 1846 bdrv_flush(bs); 1847 bdrv_drain(bs); /* in case flush left pending I/O */ 1848 notifier_list_notify(&bs->close_notifiers, bs); 1849 1850 if (bs->drv) { 1851 BdrvChild *child, *next; 1852 1853 QLIST_FOREACH_SAFE(child, &bs->children, next, next) { 1854 if (child->bs->inherits_from == bs) { 1855 child->bs->inherits_from = NULL; 1856 } 1857 QLIST_REMOVE(child, next); 1858 g_free(child); 1859 } 1860 1861 if (bs->backing_hd) { 1862 BlockDriverState *backing_hd = bs->backing_hd; 1863 bdrv_set_backing_hd(bs, NULL); 1864 bdrv_unref(backing_hd); 1865 } 1866 bs->drv->bdrv_close(bs); 1867 g_free(bs->opaque); 1868 bs->opaque = NULL; 1869 bs->drv = NULL; 1870 bs->copy_on_read = 0; 1871 bs->backing_file[0] = '\0'; 1872 bs->backing_format[0] = '\0'; 1873 bs->total_sectors = 0; 1874 bs->encrypted = 0; 1875 bs->valid_key = 0; 1876 bs->sg = 0; 1877 bs->zero_beyond_eof = false; 1878 QDECREF(bs->options); 1879 bs->options = NULL; 1880 QDECREF(bs->full_open_options); 1881 bs->full_open_options = NULL; 1882 1883 if (bs->file != NULL) { 1884 bdrv_unref(bs->file); 1885 bs->file = NULL; 1886 } 1887 } 1888 1889 if (bs->blk) { 1890 blk_dev_change_media_cb(bs->blk, false); 1891 } 1892 1893 /*throttling disk I/O limits*/ 1894 if (bs->io_limits_enabled) { 1895 bdrv_io_limits_disable(bs); 1896 } 1897 1898 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 1899 g_free(ban); 1900 } 1901 QLIST_INIT(&bs->aio_notifiers); 1902 } 1903 1904 void bdrv_close_all(void) 1905 { 1906 BlockDriverState *bs; 1907 1908 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 1909 AioContext *aio_context = bdrv_get_aio_context(bs); 1910 1911 aio_context_acquire(aio_context); 1912 bdrv_close(bs); 1913 aio_context_release(aio_context); 1914 } 1915 } 1916 1917 /* make a BlockDriverState anonymous by removing from bdrv_state and 1918 * graph_bdrv_state list. 1919 Also, NULL terminate the device_name to prevent double remove */ 1920 void bdrv_make_anon(BlockDriverState *bs) 1921 { 1922 /* 1923 * Take care to remove bs from bdrv_states only when it's actually 1924 * in it. Note that bs->device_list.tqe_prev is initially null, 1925 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish 1926 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by 1927 * resetting it to null on remove. 1928 */ 1929 if (bs->device_list.tqe_prev) { 1930 QTAILQ_REMOVE(&bdrv_states, bs, device_list); 1931 bs->device_list.tqe_prev = NULL; 1932 } 1933 if (bs->node_name[0] != '\0') { 1934 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 1935 } 1936 bs->node_name[0] = '\0'; 1937 } 1938 1939 static void bdrv_rebind(BlockDriverState *bs) 1940 { 1941 if (bs->drv && bs->drv->bdrv_rebind) { 1942 bs->drv->bdrv_rebind(bs); 1943 } 1944 } 1945 1946 static void bdrv_move_feature_fields(BlockDriverState *bs_dest, 1947 BlockDriverState *bs_src) 1948 { 1949 /* move some fields that need to stay attached to the device */ 1950 1951 /* dev info */ 1952 bs_dest->guest_block_size = bs_src->guest_block_size; 1953 bs_dest->copy_on_read = bs_src->copy_on_read; 1954 1955 bs_dest->enable_write_cache = bs_src->enable_write_cache; 1956 1957 /* i/o throttled req */ 1958 bs_dest->throttle_state = bs_src->throttle_state, 1959 bs_dest->io_limits_enabled = bs_src->io_limits_enabled; 1960 bs_dest->pending_reqs[0] = bs_src->pending_reqs[0]; 1961 bs_dest->pending_reqs[1] = bs_src->pending_reqs[1]; 1962 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; 1963 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; 1964 memcpy(&bs_dest->round_robin, 1965 &bs_src->round_robin, 1966 sizeof(bs_dest->round_robin)); 1967 memcpy(&bs_dest->throttle_timers, 1968 &bs_src->throttle_timers, 1969 sizeof(ThrottleTimers)); 1970 1971 /* r/w error */ 1972 bs_dest->on_read_error = bs_src->on_read_error; 1973 bs_dest->on_write_error = bs_src->on_write_error; 1974 1975 /* i/o status */ 1976 bs_dest->iostatus_enabled = bs_src->iostatus_enabled; 1977 bs_dest->iostatus = bs_src->iostatus; 1978 1979 /* dirty bitmap */ 1980 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps; 1981 1982 /* reference count */ 1983 bs_dest->refcnt = bs_src->refcnt; 1984 1985 /* job */ 1986 bs_dest->job = bs_src->job; 1987 1988 /* keep the same entry in bdrv_states */ 1989 bs_dest->device_list = bs_src->device_list; 1990 bs_dest->blk = bs_src->blk; 1991 1992 memcpy(bs_dest->op_blockers, bs_src->op_blockers, 1993 sizeof(bs_dest->op_blockers)); 1994 } 1995 1996 /* 1997 * Swap bs contents for two image chains while they are live, 1998 * while keeping required fields on the BlockDriverState that is 1999 * actually attached to a device. 2000 * 2001 * This will modify the BlockDriverState fields, and swap contents 2002 * between bs_new and bs_old. Both bs_new and bs_old are modified. 2003 * 2004 * bs_new must not be attached to a BlockBackend. 2005 * 2006 * This function does not create any image files. 2007 */ 2008 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) 2009 { 2010 BlockDriverState tmp; 2011 BdrvChild *child; 2012 2013 bdrv_drain(bs_new); 2014 bdrv_drain(bs_old); 2015 2016 /* The code needs to swap the node_name but simply swapping node_list won't 2017 * work so first remove the nodes from the graph list, do the swap then 2018 * insert them back if needed. 2019 */ 2020 if (bs_new->node_name[0] != '\0') { 2021 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list); 2022 } 2023 if (bs_old->node_name[0] != '\0') { 2024 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list); 2025 } 2026 2027 /* If the BlockDriverState is part of a throttling group acquire 2028 * its lock since we're going to mess with the protected fields. 2029 * Otherwise there's no need to worry since no one else can touch 2030 * them. */ 2031 if (bs_old->throttle_state) { 2032 throttle_group_lock(bs_old); 2033 } 2034 2035 /* bs_new must be unattached and shouldn't have anything fancy enabled */ 2036 assert(!bs_new->blk); 2037 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); 2038 assert(bs_new->job == NULL); 2039 assert(bs_new->io_limits_enabled == false); 2040 assert(bs_new->throttle_state == NULL); 2041 assert(!throttle_timers_are_initialized(&bs_new->throttle_timers)); 2042 2043 tmp = *bs_new; 2044 *bs_new = *bs_old; 2045 *bs_old = tmp; 2046 2047 /* there are some fields that should not be swapped, move them back */ 2048 bdrv_move_feature_fields(&tmp, bs_old); 2049 bdrv_move_feature_fields(bs_old, bs_new); 2050 bdrv_move_feature_fields(bs_new, &tmp); 2051 2052 /* bs_new must remain unattached */ 2053 assert(!bs_new->blk); 2054 2055 /* Check a few fields that should remain attached to the device */ 2056 assert(bs_new->job == NULL); 2057 assert(bs_new->io_limits_enabled == false); 2058 assert(bs_new->throttle_state == NULL); 2059 assert(!throttle_timers_are_initialized(&bs_new->throttle_timers)); 2060 2061 /* Release the ThrottleGroup lock */ 2062 if (bs_old->throttle_state) { 2063 throttle_group_unlock(bs_old); 2064 } 2065 2066 /* insert the nodes back into the graph node list if needed */ 2067 if (bs_new->node_name[0] != '\0') { 2068 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list); 2069 } 2070 if (bs_old->node_name[0] != '\0') { 2071 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list); 2072 } 2073 2074 /* 2075 * Update lh_first.le_prev for non-empty lists. 2076 * 2077 * The head of the op blocker list doesn't change because it is moved back 2078 * in bdrv_move_feature_fields(). 2079 */ 2080 assert(QLIST_EMPTY(&bs_old->tracked_requests)); 2081 assert(QLIST_EMPTY(&bs_new->tracked_requests)); 2082 2083 QLIST_FIX_HEAD_PTR(&bs_new->children, next); 2084 QLIST_FIX_HEAD_PTR(&bs_old->children, next); 2085 2086 /* Update references in bs->opaque and children */ 2087 QLIST_FOREACH(child, &bs_old->children, next) { 2088 if (child->bs->inherits_from == bs_new) { 2089 child->bs->inherits_from = bs_old; 2090 } 2091 } 2092 QLIST_FOREACH(child, &bs_new->children, next) { 2093 if (child->bs->inherits_from == bs_old) { 2094 child->bs->inherits_from = bs_new; 2095 } 2096 } 2097 2098 bdrv_rebind(bs_new); 2099 bdrv_rebind(bs_old); 2100 } 2101 2102 /* 2103 * Add new bs contents at the top of an image chain while the chain is 2104 * live, while keeping required fields on the top layer. 2105 * 2106 * This will modify the BlockDriverState fields, and swap contents 2107 * between bs_new and bs_top. Both bs_new and bs_top are modified. 2108 * 2109 * bs_new must not be attached to a BlockBackend. 2110 * 2111 * This function does not create any image files. 2112 */ 2113 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) 2114 { 2115 bdrv_swap(bs_new, bs_top); 2116 2117 /* The contents of 'tmp' will become bs_top, as we are 2118 * swapping bs_new and bs_top contents. */ 2119 bdrv_set_backing_hd(bs_top, bs_new); 2120 bdrv_attach_child(bs_top, bs_new, &child_backing); 2121 } 2122 2123 static void bdrv_delete(BlockDriverState *bs) 2124 { 2125 assert(!bs->job); 2126 assert(bdrv_op_blocker_is_empty(bs)); 2127 assert(!bs->refcnt); 2128 assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 2129 2130 bdrv_close(bs); 2131 2132 /* remove from list, if necessary */ 2133 bdrv_make_anon(bs); 2134 2135 g_free(bs); 2136 } 2137 2138 /* 2139 * Run consistency checks on an image 2140 * 2141 * Returns 0 if the check could be completed (it doesn't mean that the image is 2142 * free of errors) or -errno when an internal error occurred. The results of the 2143 * check are stored in res. 2144 */ 2145 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) 2146 { 2147 if (bs->drv == NULL) { 2148 return -ENOMEDIUM; 2149 } 2150 if (bs->drv->bdrv_check == NULL) { 2151 return -ENOTSUP; 2152 } 2153 2154 memset(res, 0, sizeof(*res)); 2155 return bs->drv->bdrv_check(bs, res, fix); 2156 } 2157 2158 #define COMMIT_BUF_SECTORS 2048 2159 2160 /* commit COW file into the raw image */ 2161 int bdrv_commit(BlockDriverState *bs) 2162 { 2163 BlockDriver *drv = bs->drv; 2164 int64_t sector, total_sectors, length, backing_length; 2165 int n, ro, open_flags; 2166 int ret = 0; 2167 uint8_t *buf = NULL; 2168 2169 if (!drv) 2170 return -ENOMEDIUM; 2171 2172 if (!bs->backing_hd) { 2173 return -ENOTSUP; 2174 } 2175 2176 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || 2177 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { 2178 return -EBUSY; 2179 } 2180 2181 ro = bs->backing_hd->read_only; 2182 open_flags = bs->backing_hd->open_flags; 2183 2184 if (ro) { 2185 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) { 2186 return -EACCES; 2187 } 2188 } 2189 2190 length = bdrv_getlength(bs); 2191 if (length < 0) { 2192 ret = length; 2193 goto ro_cleanup; 2194 } 2195 2196 backing_length = bdrv_getlength(bs->backing_hd); 2197 if (backing_length < 0) { 2198 ret = backing_length; 2199 goto ro_cleanup; 2200 } 2201 2202 /* If our top snapshot is larger than the backing file image, 2203 * grow the backing file image if possible. If not possible, 2204 * we must return an error */ 2205 if (length > backing_length) { 2206 ret = bdrv_truncate(bs->backing_hd, length); 2207 if (ret < 0) { 2208 goto ro_cleanup; 2209 } 2210 } 2211 2212 total_sectors = length >> BDRV_SECTOR_BITS; 2213 2214 /* qemu_try_blockalign() for bs will choose an alignment that works for 2215 * bs->backing_hd as well, so no need to compare the alignment manually. */ 2216 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 2217 if (buf == NULL) { 2218 ret = -ENOMEM; 2219 goto ro_cleanup; 2220 } 2221 2222 for (sector = 0; sector < total_sectors; sector += n) { 2223 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n); 2224 if (ret < 0) { 2225 goto ro_cleanup; 2226 } 2227 if (ret) { 2228 ret = bdrv_read(bs, sector, buf, n); 2229 if (ret < 0) { 2230 goto ro_cleanup; 2231 } 2232 2233 ret = bdrv_write(bs->backing_hd, sector, buf, n); 2234 if (ret < 0) { 2235 goto ro_cleanup; 2236 } 2237 } 2238 } 2239 2240 if (drv->bdrv_make_empty) { 2241 ret = drv->bdrv_make_empty(bs); 2242 if (ret < 0) { 2243 goto ro_cleanup; 2244 } 2245 bdrv_flush(bs); 2246 } 2247 2248 /* 2249 * Make sure all data we wrote to the backing device is actually 2250 * stable on disk. 2251 */ 2252 if (bs->backing_hd) { 2253 bdrv_flush(bs->backing_hd); 2254 } 2255 2256 ret = 0; 2257 ro_cleanup: 2258 qemu_vfree(buf); 2259 2260 if (ro) { 2261 /* ignoring error return here */ 2262 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL); 2263 } 2264 2265 return ret; 2266 } 2267 2268 int bdrv_commit_all(void) 2269 { 2270 BlockDriverState *bs; 2271 2272 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 2273 AioContext *aio_context = bdrv_get_aio_context(bs); 2274 2275 aio_context_acquire(aio_context); 2276 if (bs->drv && bs->backing_hd) { 2277 int ret = bdrv_commit(bs); 2278 if (ret < 0) { 2279 aio_context_release(aio_context); 2280 return ret; 2281 } 2282 } 2283 aio_context_release(aio_context); 2284 } 2285 return 0; 2286 } 2287 2288 /* 2289 * Return values: 2290 * 0 - success 2291 * -EINVAL - backing format specified, but no file 2292 * -ENOSPC - can't update the backing file because no space is left in the 2293 * image file header 2294 * -ENOTSUP - format driver doesn't support changing the backing file 2295 */ 2296 int bdrv_change_backing_file(BlockDriverState *bs, 2297 const char *backing_file, const char *backing_fmt) 2298 { 2299 BlockDriver *drv = bs->drv; 2300 int ret; 2301 2302 /* Backing file format doesn't make sense without a backing file */ 2303 if (backing_fmt && !backing_file) { 2304 return -EINVAL; 2305 } 2306 2307 if (drv->bdrv_change_backing_file != NULL) { 2308 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 2309 } else { 2310 ret = -ENOTSUP; 2311 } 2312 2313 if (ret == 0) { 2314 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 2315 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 2316 } 2317 return ret; 2318 } 2319 2320 /* 2321 * Finds the image layer in the chain that has 'bs' as its backing file. 2322 * 2323 * active is the current topmost image. 2324 * 2325 * Returns NULL if bs is not found in active's image chain, 2326 * or if active == bs. 2327 * 2328 * Returns the bottommost base image if bs == NULL. 2329 */ 2330 BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 2331 BlockDriverState *bs) 2332 { 2333 while (active && bs != active->backing_hd) { 2334 active = active->backing_hd; 2335 } 2336 2337 return active; 2338 } 2339 2340 /* Given a BDS, searches for the base layer. */ 2341 BlockDriverState *bdrv_find_base(BlockDriverState *bs) 2342 { 2343 return bdrv_find_overlay(bs, NULL); 2344 } 2345 2346 typedef struct BlkIntermediateStates { 2347 BlockDriverState *bs; 2348 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; 2349 } BlkIntermediateStates; 2350 2351 2352 /* 2353 * Drops images above 'base' up to and including 'top', and sets the image 2354 * above 'top' to have base as its backing file. 2355 * 2356 * Requires that the overlay to 'top' is opened r/w, so that the backing file 2357 * information in 'bs' can be properly updated. 2358 * 2359 * E.g., this will convert the following chain: 2360 * bottom <- base <- intermediate <- top <- active 2361 * 2362 * to 2363 * 2364 * bottom <- base <- active 2365 * 2366 * It is allowed for bottom==base, in which case it converts: 2367 * 2368 * base <- intermediate <- top <- active 2369 * 2370 * to 2371 * 2372 * base <- active 2373 * 2374 * If backing_file_str is non-NULL, it will be used when modifying top's 2375 * overlay image metadata. 2376 * 2377 * Error conditions: 2378 * if active == top, that is considered an error 2379 * 2380 */ 2381 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, 2382 BlockDriverState *base, const char *backing_file_str) 2383 { 2384 BlockDriverState *intermediate; 2385 BlockDriverState *base_bs = NULL; 2386 BlockDriverState *new_top_bs = NULL; 2387 BlkIntermediateStates *intermediate_state, *next; 2388 int ret = -EIO; 2389 2390 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; 2391 QSIMPLEQ_INIT(&states_to_delete); 2392 2393 if (!top->drv || !base->drv) { 2394 goto exit; 2395 } 2396 2397 new_top_bs = bdrv_find_overlay(active, top); 2398 2399 if (new_top_bs == NULL) { 2400 /* we could not find the image above 'top', this is an error */ 2401 goto exit; 2402 } 2403 2404 /* special case of new_top_bs->backing_hd already pointing to base - nothing 2405 * to do, no intermediate images */ 2406 if (new_top_bs->backing_hd == base) { 2407 ret = 0; 2408 goto exit; 2409 } 2410 2411 intermediate = top; 2412 2413 /* now we will go down through the list, and add each BDS we find 2414 * into our deletion queue, until we hit the 'base' 2415 */ 2416 while (intermediate) { 2417 intermediate_state = g_new0(BlkIntermediateStates, 1); 2418 intermediate_state->bs = intermediate; 2419 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry); 2420 2421 if (intermediate->backing_hd == base) { 2422 base_bs = intermediate->backing_hd; 2423 break; 2424 } 2425 intermediate = intermediate->backing_hd; 2426 } 2427 if (base_bs == NULL) { 2428 /* something went wrong, we did not end at the base. safely 2429 * unravel everything, and exit with error */ 2430 goto exit; 2431 } 2432 2433 /* success - we can delete the intermediate states, and link top->base */ 2434 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename; 2435 ret = bdrv_change_backing_file(new_top_bs, backing_file_str, 2436 base_bs->drv ? base_bs->drv->format_name : ""); 2437 if (ret) { 2438 goto exit; 2439 } 2440 bdrv_set_backing_hd(new_top_bs, base_bs); 2441 2442 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 2443 /* so that bdrv_close() does not recursively close the chain */ 2444 bdrv_set_backing_hd(intermediate_state->bs, NULL); 2445 bdrv_unref(intermediate_state->bs); 2446 } 2447 ret = 0; 2448 2449 exit: 2450 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 2451 g_free(intermediate_state); 2452 } 2453 return ret; 2454 } 2455 2456 /** 2457 * Truncate file to 'offset' bytes (needed only for file protocols) 2458 */ 2459 int bdrv_truncate(BlockDriverState *bs, int64_t offset) 2460 { 2461 BlockDriver *drv = bs->drv; 2462 int ret; 2463 if (!drv) 2464 return -ENOMEDIUM; 2465 if (!drv->bdrv_truncate) 2466 return -ENOTSUP; 2467 if (bs->read_only) 2468 return -EACCES; 2469 2470 ret = drv->bdrv_truncate(bs, offset); 2471 if (ret == 0) { 2472 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 2473 bdrv_dirty_bitmap_truncate(bs); 2474 if (bs->blk) { 2475 blk_dev_resize_cb(bs->blk); 2476 } 2477 } 2478 return ret; 2479 } 2480 2481 /** 2482 * Length of a allocated file in bytes. Sparse files are counted by actual 2483 * allocated space. Return < 0 if error or unknown. 2484 */ 2485 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 2486 { 2487 BlockDriver *drv = bs->drv; 2488 if (!drv) { 2489 return -ENOMEDIUM; 2490 } 2491 if (drv->bdrv_get_allocated_file_size) { 2492 return drv->bdrv_get_allocated_file_size(bs); 2493 } 2494 if (bs->file) { 2495 return bdrv_get_allocated_file_size(bs->file); 2496 } 2497 return -ENOTSUP; 2498 } 2499 2500 /** 2501 * Return number of sectors on success, -errno on error. 2502 */ 2503 int64_t bdrv_nb_sectors(BlockDriverState *bs) 2504 { 2505 BlockDriver *drv = bs->drv; 2506 2507 if (!drv) 2508 return -ENOMEDIUM; 2509 2510 if (drv->has_variable_length) { 2511 int ret = refresh_total_sectors(bs, bs->total_sectors); 2512 if (ret < 0) { 2513 return ret; 2514 } 2515 } 2516 return bs->total_sectors; 2517 } 2518 2519 /** 2520 * Return length in bytes on success, -errno on error. 2521 * The length is always a multiple of BDRV_SECTOR_SIZE. 2522 */ 2523 int64_t bdrv_getlength(BlockDriverState *bs) 2524 { 2525 int64_t ret = bdrv_nb_sectors(bs); 2526 2527 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret; 2528 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE; 2529 } 2530 2531 /* return 0 as number of sectors if no device present or error */ 2532 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 2533 { 2534 int64_t nb_sectors = bdrv_nb_sectors(bs); 2535 2536 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 2537 } 2538 2539 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, 2540 BlockdevOnError on_write_error) 2541 { 2542 bs->on_read_error = on_read_error; 2543 bs->on_write_error = on_write_error; 2544 } 2545 2546 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) 2547 { 2548 return is_read ? bs->on_read_error : bs->on_write_error; 2549 } 2550 2551 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error) 2552 { 2553 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error; 2554 2555 switch (on_err) { 2556 case BLOCKDEV_ON_ERROR_ENOSPC: 2557 return (error == ENOSPC) ? 2558 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; 2559 case BLOCKDEV_ON_ERROR_STOP: 2560 return BLOCK_ERROR_ACTION_STOP; 2561 case BLOCKDEV_ON_ERROR_REPORT: 2562 return BLOCK_ERROR_ACTION_REPORT; 2563 case BLOCKDEV_ON_ERROR_IGNORE: 2564 return BLOCK_ERROR_ACTION_IGNORE; 2565 default: 2566 abort(); 2567 } 2568 } 2569 2570 static void send_qmp_error_event(BlockDriverState *bs, 2571 BlockErrorAction action, 2572 bool is_read, int error) 2573 { 2574 IoOperationType optype; 2575 2576 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; 2577 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action, 2578 bdrv_iostatus_is_enabled(bs), 2579 error == ENOSPC, strerror(error), 2580 &error_abort); 2581 } 2582 2583 /* This is done by device models because, while the block layer knows 2584 * about the error, it does not know whether an operation comes from 2585 * the device or the block layer (from a job, for example). 2586 */ 2587 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, 2588 bool is_read, int error) 2589 { 2590 assert(error >= 0); 2591 2592 if (action == BLOCK_ERROR_ACTION_STOP) { 2593 /* First set the iostatus, so that "info block" returns an iostatus 2594 * that matches the events raised so far (an additional error iostatus 2595 * is fine, but not a lost one). 2596 */ 2597 bdrv_iostatus_set_err(bs, error); 2598 2599 /* Then raise the request to stop the VM and the event. 2600 * qemu_system_vmstop_request_prepare has two effects. First, 2601 * it ensures that the STOP event always comes after the 2602 * BLOCK_IO_ERROR event. Second, it ensures that even if management 2603 * can observe the STOP event and do a "cont" before the STOP 2604 * event is issued, the VM will not stop. In this case, vm_start() 2605 * also ensures that the STOP/RESUME pair of events is emitted. 2606 */ 2607 qemu_system_vmstop_request_prepare(); 2608 send_qmp_error_event(bs, action, is_read, error); 2609 qemu_system_vmstop_request(RUN_STATE_IO_ERROR); 2610 } else { 2611 send_qmp_error_event(bs, action, is_read, error); 2612 } 2613 } 2614 2615 int bdrv_is_read_only(BlockDriverState *bs) 2616 { 2617 return bs->read_only; 2618 } 2619 2620 int bdrv_is_sg(BlockDriverState *bs) 2621 { 2622 return bs->sg; 2623 } 2624 2625 int bdrv_enable_write_cache(BlockDriverState *bs) 2626 { 2627 return bs->enable_write_cache; 2628 } 2629 2630 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce) 2631 { 2632 bs->enable_write_cache = wce; 2633 2634 /* so a reopen() will preserve wce */ 2635 if (wce) { 2636 bs->open_flags |= BDRV_O_CACHE_WB; 2637 } else { 2638 bs->open_flags &= ~BDRV_O_CACHE_WB; 2639 } 2640 } 2641 2642 int bdrv_is_encrypted(BlockDriverState *bs) 2643 { 2644 if (bs->backing_hd && bs->backing_hd->encrypted) 2645 return 1; 2646 return bs->encrypted; 2647 } 2648 2649 int bdrv_key_required(BlockDriverState *bs) 2650 { 2651 BlockDriverState *backing_hd = bs->backing_hd; 2652 2653 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key) 2654 return 1; 2655 return (bs->encrypted && !bs->valid_key); 2656 } 2657 2658 int bdrv_set_key(BlockDriverState *bs, const char *key) 2659 { 2660 int ret; 2661 if (bs->backing_hd && bs->backing_hd->encrypted) { 2662 ret = bdrv_set_key(bs->backing_hd, key); 2663 if (ret < 0) 2664 return ret; 2665 if (!bs->encrypted) 2666 return 0; 2667 } 2668 if (!bs->encrypted) { 2669 return -EINVAL; 2670 } else if (!bs->drv || !bs->drv->bdrv_set_key) { 2671 return -ENOMEDIUM; 2672 } 2673 ret = bs->drv->bdrv_set_key(bs, key); 2674 if (ret < 0) { 2675 bs->valid_key = 0; 2676 } else if (!bs->valid_key) { 2677 bs->valid_key = 1; 2678 if (bs->blk) { 2679 /* call the change callback now, we skipped it on open */ 2680 blk_dev_change_media_cb(bs->blk, true); 2681 } 2682 } 2683 return ret; 2684 } 2685 2686 /* 2687 * Provide an encryption key for @bs. 2688 * If @key is non-null: 2689 * If @bs is not encrypted, fail. 2690 * Else if the key is invalid, fail. 2691 * Else set @bs's key to @key, replacing the existing key, if any. 2692 * If @key is null: 2693 * If @bs is encrypted and still lacks a key, fail. 2694 * Else do nothing. 2695 * On failure, store an error object through @errp if non-null. 2696 */ 2697 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp) 2698 { 2699 if (key) { 2700 if (!bdrv_is_encrypted(bs)) { 2701 error_setg(errp, "Node '%s' is not encrypted", 2702 bdrv_get_device_or_node_name(bs)); 2703 } else if (bdrv_set_key(bs, key) < 0) { 2704 error_setg(errp, QERR_INVALID_PASSWORD); 2705 } 2706 } else { 2707 if (bdrv_key_required(bs)) { 2708 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED, 2709 "'%s' (%s) is encrypted", 2710 bdrv_get_device_or_node_name(bs), 2711 bdrv_get_encrypted_filename(bs)); 2712 } 2713 } 2714 } 2715 2716 const char *bdrv_get_format_name(BlockDriverState *bs) 2717 { 2718 return bs->drv ? bs->drv->format_name : NULL; 2719 } 2720 2721 static int qsort_strcmp(const void *a, const void *b) 2722 { 2723 return strcmp(a, b); 2724 } 2725 2726 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 2727 void *opaque) 2728 { 2729 BlockDriver *drv; 2730 int count = 0; 2731 int i; 2732 const char **formats = NULL; 2733 2734 QLIST_FOREACH(drv, &bdrv_drivers, list) { 2735 if (drv->format_name) { 2736 bool found = false; 2737 int i = count; 2738 while (formats && i && !found) { 2739 found = !strcmp(formats[--i], drv->format_name); 2740 } 2741 2742 if (!found) { 2743 formats = g_renew(const char *, formats, count + 1); 2744 formats[count++] = drv->format_name; 2745 } 2746 } 2747 } 2748 2749 qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 2750 2751 for (i = 0; i < count; i++) { 2752 it(opaque, formats[i]); 2753 } 2754 2755 g_free(formats); 2756 } 2757 2758 /* This function is to find a node in the bs graph */ 2759 BlockDriverState *bdrv_find_node(const char *node_name) 2760 { 2761 BlockDriverState *bs; 2762 2763 assert(node_name); 2764 2765 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 2766 if (!strcmp(node_name, bs->node_name)) { 2767 return bs; 2768 } 2769 } 2770 return NULL; 2771 } 2772 2773 /* Put this QMP function here so it can access the static graph_bdrv_states. */ 2774 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp) 2775 { 2776 BlockDeviceInfoList *list, *entry; 2777 BlockDriverState *bs; 2778 2779 list = NULL; 2780 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 2781 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp); 2782 if (!info) { 2783 qapi_free_BlockDeviceInfoList(list); 2784 return NULL; 2785 } 2786 entry = g_malloc0(sizeof(*entry)); 2787 entry->value = info; 2788 entry->next = list; 2789 list = entry; 2790 } 2791 2792 return list; 2793 } 2794 2795 BlockDriverState *bdrv_lookup_bs(const char *device, 2796 const char *node_name, 2797 Error **errp) 2798 { 2799 BlockBackend *blk; 2800 BlockDriverState *bs; 2801 2802 if (device) { 2803 blk = blk_by_name(device); 2804 2805 if (blk) { 2806 return blk_bs(blk); 2807 } 2808 } 2809 2810 if (node_name) { 2811 bs = bdrv_find_node(node_name); 2812 2813 if (bs) { 2814 return bs; 2815 } 2816 } 2817 2818 error_setg(errp, "Cannot find device=%s nor node_name=%s", 2819 device ? device : "", 2820 node_name ? node_name : ""); 2821 return NULL; 2822 } 2823 2824 /* If 'base' is in the same chain as 'top', return true. Otherwise, 2825 * return false. If either argument is NULL, return false. */ 2826 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 2827 { 2828 while (top && top != base) { 2829 top = top->backing_hd; 2830 } 2831 2832 return top != NULL; 2833 } 2834 2835 BlockDriverState *bdrv_next_node(BlockDriverState *bs) 2836 { 2837 if (!bs) { 2838 return QTAILQ_FIRST(&graph_bdrv_states); 2839 } 2840 return QTAILQ_NEXT(bs, node_list); 2841 } 2842 2843 BlockDriverState *bdrv_next(BlockDriverState *bs) 2844 { 2845 if (!bs) { 2846 return QTAILQ_FIRST(&bdrv_states); 2847 } 2848 return QTAILQ_NEXT(bs, device_list); 2849 } 2850 2851 const char *bdrv_get_node_name(const BlockDriverState *bs) 2852 { 2853 return bs->node_name; 2854 } 2855 2856 /* TODO check what callers really want: bs->node_name or blk_name() */ 2857 const char *bdrv_get_device_name(const BlockDriverState *bs) 2858 { 2859 return bs->blk ? blk_name(bs->blk) : ""; 2860 } 2861 2862 /* This can be used to identify nodes that might not have a device 2863 * name associated. Since node and device names live in the same 2864 * namespace, the result is unambiguous. The exception is if both are 2865 * absent, then this returns an empty (non-null) string. */ 2866 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 2867 { 2868 return bs->blk ? blk_name(bs->blk) : bs->node_name; 2869 } 2870 2871 int bdrv_get_flags(BlockDriverState *bs) 2872 { 2873 return bs->open_flags; 2874 } 2875 2876 int bdrv_has_zero_init_1(BlockDriverState *bs) 2877 { 2878 return 1; 2879 } 2880 2881 int bdrv_has_zero_init(BlockDriverState *bs) 2882 { 2883 assert(bs->drv); 2884 2885 /* If BS is a copy on write image, it is initialized to 2886 the contents of the base image, which may not be zeroes. */ 2887 if (bs->backing_hd) { 2888 return 0; 2889 } 2890 if (bs->drv->bdrv_has_zero_init) { 2891 return bs->drv->bdrv_has_zero_init(bs); 2892 } 2893 2894 /* safe default */ 2895 return 0; 2896 } 2897 2898 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs) 2899 { 2900 BlockDriverInfo bdi; 2901 2902 if (bs->backing_hd) { 2903 return false; 2904 } 2905 2906 if (bdrv_get_info(bs, &bdi) == 0) { 2907 return bdi.unallocated_blocks_are_zero; 2908 } 2909 2910 return false; 2911 } 2912 2913 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 2914 { 2915 BlockDriverInfo bdi; 2916 2917 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) { 2918 return false; 2919 } 2920 2921 if (bdrv_get_info(bs, &bdi) == 0) { 2922 return bdi.can_write_zeroes_with_unmap; 2923 } 2924 2925 return false; 2926 } 2927 2928 const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 2929 { 2930 if (bs->backing_hd && bs->backing_hd->encrypted) 2931 return bs->backing_file; 2932 else if (bs->encrypted) 2933 return bs->filename; 2934 else 2935 return NULL; 2936 } 2937 2938 void bdrv_get_backing_filename(BlockDriverState *bs, 2939 char *filename, int filename_size) 2940 { 2941 pstrcpy(filename, filename_size, bs->backing_file); 2942 } 2943 2944 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 2945 { 2946 BlockDriver *drv = bs->drv; 2947 if (!drv) 2948 return -ENOMEDIUM; 2949 if (!drv->bdrv_get_info) 2950 return -ENOTSUP; 2951 memset(bdi, 0, sizeof(*bdi)); 2952 return drv->bdrv_get_info(bs, bdi); 2953 } 2954 2955 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs) 2956 { 2957 BlockDriver *drv = bs->drv; 2958 if (drv && drv->bdrv_get_specific_info) { 2959 return drv->bdrv_get_specific_info(bs); 2960 } 2961 return NULL; 2962 } 2963 2964 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) 2965 { 2966 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 2967 return; 2968 } 2969 2970 bs->drv->bdrv_debug_event(bs, event); 2971 } 2972 2973 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 2974 const char *tag) 2975 { 2976 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 2977 bs = bs->file; 2978 } 2979 2980 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 2981 return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 2982 } 2983 2984 return -ENOTSUP; 2985 } 2986 2987 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 2988 { 2989 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) { 2990 bs = bs->file; 2991 } 2992 2993 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) { 2994 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 2995 } 2996 2997 return -ENOTSUP; 2998 } 2999 3000 int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 3001 { 3002 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 3003 bs = bs->file; 3004 } 3005 3006 if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 3007 return bs->drv->bdrv_debug_resume(bs, tag); 3008 } 3009 3010 return -ENOTSUP; 3011 } 3012 3013 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 3014 { 3015 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 3016 bs = bs->file; 3017 } 3018 3019 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 3020 return bs->drv->bdrv_debug_is_suspended(bs, tag); 3021 } 3022 3023 return false; 3024 } 3025 3026 int bdrv_is_snapshot(BlockDriverState *bs) 3027 { 3028 return !!(bs->open_flags & BDRV_O_SNAPSHOT); 3029 } 3030 3031 /* backing_file can either be relative, or absolute, or a protocol. If it is 3032 * relative, it must be relative to the chain. So, passing in bs->filename 3033 * from a BDS as backing_file should not be done, as that may be relative to 3034 * the CWD rather than the chain. */ 3035 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 3036 const char *backing_file) 3037 { 3038 char *filename_full = NULL; 3039 char *backing_file_full = NULL; 3040 char *filename_tmp = NULL; 3041 int is_protocol = 0; 3042 BlockDriverState *curr_bs = NULL; 3043 BlockDriverState *retval = NULL; 3044 3045 if (!bs || !bs->drv || !backing_file) { 3046 return NULL; 3047 } 3048 3049 filename_full = g_malloc(PATH_MAX); 3050 backing_file_full = g_malloc(PATH_MAX); 3051 filename_tmp = g_malloc(PATH_MAX); 3052 3053 is_protocol = path_has_protocol(backing_file); 3054 3055 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) { 3056 3057 /* If either of the filename paths is actually a protocol, then 3058 * compare unmodified paths; otherwise make paths relative */ 3059 if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 3060 if (strcmp(backing_file, curr_bs->backing_file) == 0) { 3061 retval = curr_bs->backing_hd; 3062 break; 3063 } 3064 } else { 3065 /* If not an absolute filename path, make it relative to the current 3066 * image's filename path */ 3067 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 3068 backing_file); 3069 3070 /* We are going to compare absolute pathnames */ 3071 if (!realpath(filename_tmp, filename_full)) { 3072 continue; 3073 } 3074 3075 /* We need to make sure the backing filename we are comparing against 3076 * is relative to the current image filename (or absolute) */ 3077 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 3078 curr_bs->backing_file); 3079 3080 if (!realpath(filename_tmp, backing_file_full)) { 3081 continue; 3082 } 3083 3084 if (strcmp(backing_file_full, filename_full) == 0) { 3085 retval = curr_bs->backing_hd; 3086 break; 3087 } 3088 } 3089 } 3090 3091 g_free(filename_full); 3092 g_free(backing_file_full); 3093 g_free(filename_tmp); 3094 return retval; 3095 } 3096 3097 int bdrv_get_backing_file_depth(BlockDriverState *bs) 3098 { 3099 if (!bs->drv) { 3100 return 0; 3101 } 3102 3103 if (!bs->backing_hd) { 3104 return 0; 3105 } 3106 3107 return 1 + bdrv_get_backing_file_depth(bs->backing_hd); 3108 } 3109 3110 void bdrv_init(void) 3111 { 3112 module_call_init(MODULE_INIT_BLOCK); 3113 } 3114 3115 void bdrv_init_with_whitelist(void) 3116 { 3117 use_bdrv_whitelist = 1; 3118 bdrv_init(); 3119 } 3120 3121 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) 3122 { 3123 Error *local_err = NULL; 3124 int ret; 3125 3126 if (!bs->drv) { 3127 return; 3128 } 3129 3130 if (!(bs->open_flags & BDRV_O_INCOMING)) { 3131 return; 3132 } 3133 bs->open_flags &= ~BDRV_O_INCOMING; 3134 3135 if (bs->drv->bdrv_invalidate_cache) { 3136 bs->drv->bdrv_invalidate_cache(bs, &local_err); 3137 } else if (bs->file) { 3138 bdrv_invalidate_cache(bs->file, &local_err); 3139 } 3140 if (local_err) { 3141 error_propagate(errp, local_err); 3142 return; 3143 } 3144 3145 ret = refresh_total_sectors(bs, bs->total_sectors); 3146 if (ret < 0) { 3147 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 3148 return; 3149 } 3150 } 3151 3152 void bdrv_invalidate_cache_all(Error **errp) 3153 { 3154 BlockDriverState *bs; 3155 Error *local_err = NULL; 3156 3157 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 3158 AioContext *aio_context = bdrv_get_aio_context(bs); 3159 3160 aio_context_acquire(aio_context); 3161 bdrv_invalidate_cache(bs, &local_err); 3162 aio_context_release(aio_context); 3163 if (local_err) { 3164 error_propagate(errp, local_err); 3165 return; 3166 } 3167 } 3168 } 3169 3170 /**************************************************************/ 3171 /* removable device support */ 3172 3173 /** 3174 * Return TRUE if the media is present 3175 */ 3176 int bdrv_is_inserted(BlockDriverState *bs) 3177 { 3178 BlockDriver *drv = bs->drv; 3179 3180 if (!drv) 3181 return 0; 3182 if (!drv->bdrv_is_inserted) 3183 return 1; 3184 return drv->bdrv_is_inserted(bs); 3185 } 3186 3187 /** 3188 * Return whether the media changed since the last call to this 3189 * function, or -ENOTSUP if we don't know. Most drivers don't know. 3190 */ 3191 int bdrv_media_changed(BlockDriverState *bs) 3192 { 3193 BlockDriver *drv = bs->drv; 3194 3195 if (drv && drv->bdrv_media_changed) { 3196 return drv->bdrv_media_changed(bs); 3197 } 3198 return -ENOTSUP; 3199 } 3200 3201 /** 3202 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 3203 */ 3204 void bdrv_eject(BlockDriverState *bs, bool eject_flag) 3205 { 3206 BlockDriver *drv = bs->drv; 3207 const char *device_name; 3208 3209 if (drv && drv->bdrv_eject) { 3210 drv->bdrv_eject(bs, eject_flag); 3211 } 3212 3213 device_name = bdrv_get_device_name(bs); 3214 if (device_name[0] != '\0') { 3215 qapi_event_send_device_tray_moved(device_name, 3216 eject_flag, &error_abort); 3217 } 3218 } 3219 3220 /** 3221 * Lock or unlock the media (if it is locked, the user won't be able 3222 * to eject it manually). 3223 */ 3224 void bdrv_lock_medium(BlockDriverState *bs, bool locked) 3225 { 3226 BlockDriver *drv = bs->drv; 3227 3228 trace_bdrv_lock_medium(bs, locked); 3229 3230 if (drv && drv->bdrv_lock_medium) { 3231 drv->bdrv_lock_medium(bs, locked); 3232 } 3233 } 3234 3235 void bdrv_set_guest_block_size(BlockDriverState *bs, int align) 3236 { 3237 bs->guest_block_size = align; 3238 } 3239 3240 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) 3241 { 3242 BdrvDirtyBitmap *bm; 3243 3244 assert(name); 3245 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 3246 if (bm->name && !strcmp(name, bm->name)) { 3247 return bm; 3248 } 3249 } 3250 return NULL; 3251 } 3252 3253 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) 3254 { 3255 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3256 g_free(bitmap->name); 3257 bitmap->name = NULL; 3258 } 3259 3260 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, 3261 uint32_t granularity, 3262 const char *name, 3263 Error **errp) 3264 { 3265 int64_t bitmap_size; 3266 BdrvDirtyBitmap *bitmap; 3267 uint32_t sector_granularity; 3268 3269 assert((granularity & (granularity - 1)) == 0); 3270 3271 if (name && bdrv_find_dirty_bitmap(bs, name)) { 3272 error_setg(errp, "Bitmap already exists: %s", name); 3273 return NULL; 3274 } 3275 sector_granularity = granularity >> BDRV_SECTOR_BITS; 3276 assert(sector_granularity); 3277 bitmap_size = bdrv_nb_sectors(bs); 3278 if (bitmap_size < 0) { 3279 error_setg_errno(errp, -bitmap_size, "could not get length of device"); 3280 errno = -bitmap_size; 3281 return NULL; 3282 } 3283 bitmap = g_new0(BdrvDirtyBitmap, 1); 3284 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); 3285 bitmap->size = bitmap_size; 3286 bitmap->name = g_strdup(name); 3287 bitmap->disabled = false; 3288 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); 3289 return bitmap; 3290 } 3291 3292 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) 3293 { 3294 return bitmap->successor; 3295 } 3296 3297 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) 3298 { 3299 return !(bitmap->disabled || bitmap->successor); 3300 } 3301 3302 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap) 3303 { 3304 if (bdrv_dirty_bitmap_frozen(bitmap)) { 3305 return DIRTY_BITMAP_STATUS_FROZEN; 3306 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) { 3307 return DIRTY_BITMAP_STATUS_DISABLED; 3308 } else { 3309 return DIRTY_BITMAP_STATUS_ACTIVE; 3310 } 3311 } 3312 3313 /** 3314 * Create a successor bitmap destined to replace this bitmap after an operation. 3315 * Requires that the bitmap is not frozen and has no successor. 3316 */ 3317 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, 3318 BdrvDirtyBitmap *bitmap, Error **errp) 3319 { 3320 uint64_t granularity; 3321 BdrvDirtyBitmap *child; 3322 3323 if (bdrv_dirty_bitmap_frozen(bitmap)) { 3324 error_setg(errp, "Cannot create a successor for a bitmap that is " 3325 "currently frozen"); 3326 return -1; 3327 } 3328 assert(!bitmap->successor); 3329 3330 /* Create an anonymous successor */ 3331 granularity = bdrv_dirty_bitmap_granularity(bitmap); 3332 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); 3333 if (!child) { 3334 return -1; 3335 } 3336 3337 /* Successor will be on or off based on our current state. */ 3338 child->disabled = bitmap->disabled; 3339 3340 /* Install the successor and freeze the parent */ 3341 bitmap->successor = child; 3342 return 0; 3343 } 3344 3345 /** 3346 * For a bitmap with a successor, yield our name to the successor, 3347 * delete the old bitmap, and return a handle to the new bitmap. 3348 */ 3349 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, 3350 BdrvDirtyBitmap *bitmap, 3351 Error **errp) 3352 { 3353 char *name; 3354 BdrvDirtyBitmap *successor = bitmap->successor; 3355 3356 if (successor == NULL) { 3357 error_setg(errp, "Cannot relinquish control if " 3358 "there's no successor present"); 3359 return NULL; 3360 } 3361 3362 name = bitmap->name; 3363 bitmap->name = NULL; 3364 successor->name = name; 3365 bitmap->successor = NULL; 3366 bdrv_release_dirty_bitmap(bs, bitmap); 3367 3368 return successor; 3369 } 3370 3371 /** 3372 * In cases of failure where we can no longer safely delete the parent, 3373 * we may wish to re-join the parent and child/successor. 3374 * The merged parent will be un-frozen, but not explicitly re-enabled. 3375 */ 3376 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, 3377 BdrvDirtyBitmap *parent, 3378 Error **errp) 3379 { 3380 BdrvDirtyBitmap *successor = parent->successor; 3381 3382 if (!successor) { 3383 error_setg(errp, "Cannot reclaim a successor when none is present"); 3384 return NULL; 3385 } 3386 3387 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) { 3388 error_setg(errp, "Merging of parent and successor bitmap failed"); 3389 return NULL; 3390 } 3391 bdrv_release_dirty_bitmap(bs, successor); 3392 parent->successor = NULL; 3393 3394 return parent; 3395 } 3396 3397 /** 3398 * Truncates _all_ bitmaps attached to a BDS. 3399 */ 3400 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) 3401 { 3402 BdrvDirtyBitmap *bitmap; 3403 uint64_t size = bdrv_nb_sectors(bs); 3404 3405 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 3406 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3407 hbitmap_truncate(bitmap->bitmap, size); 3408 bitmap->size = size; 3409 } 3410 } 3411 3412 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) 3413 { 3414 BdrvDirtyBitmap *bm, *next; 3415 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { 3416 if (bm == bitmap) { 3417 assert(!bdrv_dirty_bitmap_frozen(bm)); 3418 QLIST_REMOVE(bitmap, list); 3419 hbitmap_free(bitmap->bitmap); 3420 g_free(bitmap->name); 3421 g_free(bitmap); 3422 return; 3423 } 3424 } 3425 } 3426 3427 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 3428 { 3429 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3430 bitmap->disabled = true; 3431 } 3432 3433 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 3434 { 3435 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3436 bitmap->disabled = false; 3437 } 3438 3439 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) 3440 { 3441 BdrvDirtyBitmap *bm; 3442 BlockDirtyInfoList *list = NULL; 3443 BlockDirtyInfoList **plist = &list; 3444 3445 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 3446 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); 3447 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); 3448 info->count = bdrv_get_dirty_count(bm); 3449 info->granularity = bdrv_dirty_bitmap_granularity(bm); 3450 info->has_name = !!bm->name; 3451 info->name = g_strdup(bm->name); 3452 info->status = bdrv_dirty_bitmap_status(bm); 3453 entry->value = info; 3454 *plist = entry; 3455 plist = &entry->next; 3456 } 3457 3458 return list; 3459 } 3460 3461 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector) 3462 { 3463 if (bitmap) { 3464 return hbitmap_get(bitmap->bitmap, sector); 3465 } else { 3466 return 0; 3467 } 3468 } 3469 3470 /** 3471 * Chooses a default granularity based on the existing cluster size, 3472 * but clamped between [4K, 64K]. Defaults to 64K in the case that there 3473 * is no cluster size information available. 3474 */ 3475 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) 3476 { 3477 BlockDriverInfo bdi; 3478 uint32_t granularity; 3479 3480 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) { 3481 granularity = MAX(4096, bdi.cluster_size); 3482 granularity = MIN(65536, granularity); 3483 } else { 3484 granularity = 65536; 3485 } 3486 3487 return granularity; 3488 } 3489 3490 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) 3491 { 3492 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); 3493 } 3494 3495 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) 3496 { 3497 hbitmap_iter_init(hbi, bitmap->bitmap, 0); 3498 } 3499 3500 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, 3501 int64_t cur_sector, int nr_sectors) 3502 { 3503 assert(bdrv_dirty_bitmap_enabled(bitmap)); 3504 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 3505 } 3506 3507 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, 3508 int64_t cur_sector, int nr_sectors) 3509 { 3510 assert(bdrv_dirty_bitmap_enabled(bitmap)); 3511 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 3512 } 3513 3514 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap) 3515 { 3516 assert(bdrv_dirty_bitmap_enabled(bitmap)); 3517 hbitmap_reset_all(bitmap->bitmap); 3518 } 3519 3520 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, 3521 int nr_sectors) 3522 { 3523 BdrvDirtyBitmap *bitmap; 3524 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 3525 if (!bdrv_dirty_bitmap_enabled(bitmap)) { 3526 continue; 3527 } 3528 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 3529 } 3530 } 3531 3532 /** 3533 * Advance an HBitmapIter to an arbitrary offset. 3534 */ 3535 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) 3536 { 3537 assert(hbi->hb); 3538 hbitmap_iter_init(hbi, hbi->hb, offset); 3539 } 3540 3541 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) 3542 { 3543 return hbitmap_count(bitmap->bitmap); 3544 } 3545 3546 /* Get a reference to bs */ 3547 void bdrv_ref(BlockDriverState *bs) 3548 { 3549 bs->refcnt++; 3550 } 3551 3552 /* Release a previously grabbed reference to bs. 3553 * If after releasing, reference count is zero, the BlockDriverState is 3554 * deleted. */ 3555 void bdrv_unref(BlockDriverState *bs) 3556 { 3557 if (!bs) { 3558 return; 3559 } 3560 assert(bs->refcnt > 0); 3561 if (--bs->refcnt == 0) { 3562 bdrv_delete(bs); 3563 } 3564 } 3565 3566 struct BdrvOpBlocker { 3567 Error *reason; 3568 QLIST_ENTRY(BdrvOpBlocker) list; 3569 }; 3570 3571 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 3572 { 3573 BdrvOpBlocker *blocker; 3574 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3575 if (!QLIST_EMPTY(&bs->op_blockers[op])) { 3576 blocker = QLIST_FIRST(&bs->op_blockers[op]); 3577 if (errp) { 3578 error_setg(errp, "Node '%s' is busy: %s", 3579 bdrv_get_device_or_node_name(bs), 3580 error_get_pretty(blocker->reason)); 3581 } 3582 return true; 3583 } 3584 return false; 3585 } 3586 3587 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 3588 { 3589 BdrvOpBlocker *blocker; 3590 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3591 3592 blocker = g_new0(BdrvOpBlocker, 1); 3593 blocker->reason = reason; 3594 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 3595 } 3596 3597 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 3598 { 3599 BdrvOpBlocker *blocker, *next; 3600 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3601 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 3602 if (blocker->reason == reason) { 3603 QLIST_REMOVE(blocker, list); 3604 g_free(blocker); 3605 } 3606 } 3607 } 3608 3609 void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 3610 { 3611 int i; 3612 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3613 bdrv_op_block(bs, i, reason); 3614 } 3615 } 3616 3617 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 3618 { 3619 int i; 3620 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3621 bdrv_op_unblock(bs, i, reason); 3622 } 3623 } 3624 3625 bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 3626 { 3627 int i; 3628 3629 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3630 if (!QLIST_EMPTY(&bs->op_blockers[i])) { 3631 return false; 3632 } 3633 } 3634 return true; 3635 } 3636 3637 void bdrv_iostatus_enable(BlockDriverState *bs) 3638 { 3639 bs->iostatus_enabled = true; 3640 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 3641 } 3642 3643 /* The I/O status is only enabled if the drive explicitly 3644 * enables it _and_ the VM is configured to stop on errors */ 3645 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs) 3646 { 3647 return (bs->iostatus_enabled && 3648 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || 3649 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP || 3650 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP)); 3651 } 3652 3653 void bdrv_iostatus_disable(BlockDriverState *bs) 3654 { 3655 bs->iostatus_enabled = false; 3656 } 3657 3658 void bdrv_iostatus_reset(BlockDriverState *bs) 3659 { 3660 if (bdrv_iostatus_is_enabled(bs)) { 3661 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 3662 if (bs->job) { 3663 block_job_iostatus_reset(bs->job); 3664 } 3665 } 3666 } 3667 3668 void bdrv_iostatus_set_err(BlockDriverState *bs, int error) 3669 { 3670 assert(bdrv_iostatus_is_enabled(bs)); 3671 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { 3672 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : 3673 BLOCK_DEVICE_IO_STATUS_FAILED; 3674 } 3675 } 3676 3677 void bdrv_img_create(const char *filename, const char *fmt, 3678 const char *base_filename, const char *base_fmt, 3679 char *options, uint64_t img_size, int flags, 3680 Error **errp, bool quiet) 3681 { 3682 QemuOptsList *create_opts = NULL; 3683 QemuOpts *opts = NULL; 3684 const char *backing_fmt, *backing_file; 3685 int64_t size; 3686 BlockDriver *drv, *proto_drv; 3687 BlockDriver *backing_drv = NULL; 3688 Error *local_err = NULL; 3689 int ret = 0; 3690 3691 /* Find driver and parse its options */ 3692 drv = bdrv_find_format(fmt); 3693 if (!drv) { 3694 error_setg(errp, "Unknown file format '%s'", fmt); 3695 return; 3696 } 3697 3698 proto_drv = bdrv_find_protocol(filename, true, errp); 3699 if (!proto_drv) { 3700 return; 3701 } 3702 3703 if (!drv->create_opts) { 3704 error_setg(errp, "Format driver '%s' does not support image creation", 3705 drv->format_name); 3706 return; 3707 } 3708 3709 if (!proto_drv->create_opts) { 3710 error_setg(errp, "Protocol driver '%s' does not support image creation", 3711 proto_drv->format_name); 3712 return; 3713 } 3714 3715 create_opts = qemu_opts_append(create_opts, drv->create_opts); 3716 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 3717 3718 /* Create parameter list with default values */ 3719 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 3720 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 3721 3722 /* Parse -o options */ 3723 if (options) { 3724 qemu_opts_do_parse(opts, options, NULL, &local_err); 3725 if (local_err) { 3726 error_report_err(local_err); 3727 local_err = NULL; 3728 error_setg(errp, "Invalid options for file format '%s'", fmt); 3729 goto out; 3730 } 3731 } 3732 3733 if (base_filename) { 3734 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err); 3735 if (local_err) { 3736 error_setg(errp, "Backing file not supported for file format '%s'", 3737 fmt); 3738 goto out; 3739 } 3740 } 3741 3742 if (base_fmt) { 3743 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err); 3744 if (local_err) { 3745 error_setg(errp, "Backing file format not supported for file " 3746 "format '%s'", fmt); 3747 goto out; 3748 } 3749 } 3750 3751 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 3752 if (backing_file) { 3753 if (!strcmp(filename, backing_file)) { 3754 error_setg(errp, "Error: Trying to create an image with the " 3755 "same filename as the backing file"); 3756 goto out; 3757 } 3758 } 3759 3760 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 3761 if (backing_fmt) { 3762 backing_drv = bdrv_find_format(backing_fmt); 3763 if (!backing_drv) { 3764 error_setg(errp, "Unknown backing file format '%s'", 3765 backing_fmt); 3766 goto out; 3767 } 3768 } 3769 3770 // The size for the image must always be specified, with one exception: 3771 // If we are using a backing file, we can obtain the size from there 3772 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 3773 if (size == -1) { 3774 if (backing_file) { 3775 BlockDriverState *bs; 3776 char *full_backing = g_new0(char, PATH_MAX); 3777 int64_t size; 3778 int back_flags; 3779 3780 bdrv_get_full_backing_filename_from_filename(filename, backing_file, 3781 full_backing, PATH_MAX, 3782 &local_err); 3783 if (local_err) { 3784 g_free(full_backing); 3785 goto out; 3786 } 3787 3788 /* backing files always opened read-only */ 3789 back_flags = 3790 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 3791 3792 bs = NULL; 3793 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags, 3794 backing_drv, &local_err); 3795 g_free(full_backing); 3796 if (ret < 0) { 3797 goto out; 3798 } 3799 size = bdrv_getlength(bs); 3800 if (size < 0) { 3801 error_setg_errno(errp, -size, "Could not get size of '%s'", 3802 backing_file); 3803 bdrv_unref(bs); 3804 goto out; 3805 } 3806 3807 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 3808 3809 bdrv_unref(bs); 3810 } else { 3811 error_setg(errp, "Image creation needs a size parameter"); 3812 goto out; 3813 } 3814 } 3815 3816 if (!quiet) { 3817 printf("Formatting '%s', fmt=%s", filename, fmt); 3818 qemu_opts_print(opts, " "); 3819 puts(""); 3820 } 3821 3822 ret = bdrv_create(drv, filename, opts, &local_err); 3823 3824 if (ret == -EFBIG) { 3825 /* This is generally a better message than whatever the driver would 3826 * deliver (especially because of the cluster_size_hint), since that 3827 * is most probably not much different from "image too large". */ 3828 const char *cluster_size_hint = ""; 3829 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 3830 cluster_size_hint = " (try using a larger cluster size)"; 3831 } 3832 error_setg(errp, "The image size is too large for file format '%s'" 3833 "%s", fmt, cluster_size_hint); 3834 error_free(local_err); 3835 local_err = NULL; 3836 } 3837 3838 out: 3839 qemu_opts_del(opts); 3840 qemu_opts_free(create_opts); 3841 if (local_err) { 3842 error_propagate(errp, local_err); 3843 } 3844 } 3845 3846 AioContext *bdrv_get_aio_context(BlockDriverState *bs) 3847 { 3848 return bs->aio_context; 3849 } 3850 3851 void bdrv_detach_aio_context(BlockDriverState *bs) 3852 { 3853 BdrvAioNotifier *baf; 3854 3855 if (!bs->drv) { 3856 return; 3857 } 3858 3859 QLIST_FOREACH(baf, &bs->aio_notifiers, list) { 3860 baf->detach_aio_context(baf->opaque); 3861 } 3862 3863 if (bs->io_limits_enabled) { 3864 throttle_timers_detach_aio_context(&bs->throttle_timers); 3865 } 3866 if (bs->drv->bdrv_detach_aio_context) { 3867 bs->drv->bdrv_detach_aio_context(bs); 3868 } 3869 if (bs->file) { 3870 bdrv_detach_aio_context(bs->file); 3871 } 3872 if (bs->backing_hd) { 3873 bdrv_detach_aio_context(bs->backing_hd); 3874 } 3875 3876 bs->aio_context = NULL; 3877 } 3878 3879 void bdrv_attach_aio_context(BlockDriverState *bs, 3880 AioContext *new_context) 3881 { 3882 BdrvAioNotifier *ban; 3883 3884 if (!bs->drv) { 3885 return; 3886 } 3887 3888 bs->aio_context = new_context; 3889 3890 if (bs->backing_hd) { 3891 bdrv_attach_aio_context(bs->backing_hd, new_context); 3892 } 3893 if (bs->file) { 3894 bdrv_attach_aio_context(bs->file, new_context); 3895 } 3896 if (bs->drv->bdrv_attach_aio_context) { 3897 bs->drv->bdrv_attach_aio_context(bs, new_context); 3898 } 3899 if (bs->io_limits_enabled) { 3900 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context); 3901 } 3902 3903 QLIST_FOREACH(ban, &bs->aio_notifiers, list) { 3904 ban->attached_aio_context(new_context, ban->opaque); 3905 } 3906 } 3907 3908 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) 3909 { 3910 bdrv_drain(bs); /* ensure there are no in-flight requests */ 3911 3912 bdrv_detach_aio_context(bs); 3913 3914 /* This function executes in the old AioContext so acquire the new one in 3915 * case it runs in a different thread. 3916 */ 3917 aio_context_acquire(new_context); 3918 bdrv_attach_aio_context(bs, new_context); 3919 aio_context_release(new_context); 3920 } 3921 3922 void bdrv_add_aio_context_notifier(BlockDriverState *bs, 3923 void (*attached_aio_context)(AioContext *new_context, void *opaque), 3924 void (*detach_aio_context)(void *opaque), void *opaque) 3925 { 3926 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 3927 *ban = (BdrvAioNotifier){ 3928 .attached_aio_context = attached_aio_context, 3929 .detach_aio_context = detach_aio_context, 3930 .opaque = opaque 3931 }; 3932 3933 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 3934 } 3935 3936 void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 3937 void (*attached_aio_context)(AioContext *, 3938 void *), 3939 void (*detach_aio_context)(void *), 3940 void *opaque) 3941 { 3942 BdrvAioNotifier *ban, *ban_next; 3943 3944 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 3945 if (ban->attached_aio_context == attached_aio_context && 3946 ban->detach_aio_context == detach_aio_context && 3947 ban->opaque == opaque) 3948 { 3949 QLIST_REMOVE(ban, list); 3950 g_free(ban); 3951 3952 return; 3953 } 3954 } 3955 3956 abort(); 3957 } 3958 3959 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 3960 BlockDriverAmendStatusCB *status_cb) 3961 { 3962 if (!bs->drv->bdrv_amend_options) { 3963 return -ENOTSUP; 3964 } 3965 return bs->drv->bdrv_amend_options(bs, opts, status_cb); 3966 } 3967 3968 /* This function will be called by the bdrv_recurse_is_first_non_filter method 3969 * of block filter and by bdrv_is_first_non_filter. 3970 * It is used to test if the given bs is the candidate or recurse more in the 3971 * node graph. 3972 */ 3973 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, 3974 BlockDriverState *candidate) 3975 { 3976 /* return false if basic checks fails */ 3977 if (!bs || !bs->drv) { 3978 return false; 3979 } 3980 3981 /* the code reached a non block filter driver -> check if the bs is 3982 * the same as the candidate. It's the recursion termination condition. 3983 */ 3984 if (!bs->drv->is_filter) { 3985 return bs == candidate; 3986 } 3987 /* Down this path the driver is a block filter driver */ 3988 3989 /* If the block filter recursion method is defined use it to recurse down 3990 * the node graph. 3991 */ 3992 if (bs->drv->bdrv_recurse_is_first_non_filter) { 3993 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); 3994 } 3995 3996 /* the driver is a block filter but don't allow to recurse -> return false 3997 */ 3998 return false; 3999 } 4000 4001 /* This function checks if the candidate is the first non filter bs down it's 4002 * bs chain. Since we don't have pointers to parents it explore all bs chains 4003 * from the top. Some filters can choose not to pass down the recursion. 4004 */ 4005 bool bdrv_is_first_non_filter(BlockDriverState *candidate) 4006 { 4007 BlockDriverState *bs; 4008 4009 /* walk down the bs forest recursively */ 4010 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 4011 bool perm; 4012 4013 /* try to recurse in this top level bs */ 4014 perm = bdrv_recurse_is_first_non_filter(bs, candidate); 4015 4016 /* candidate is the first non filter */ 4017 if (perm) { 4018 return true; 4019 } 4020 } 4021 4022 return false; 4023 } 4024 4025 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp) 4026 { 4027 BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 4028 AioContext *aio_context; 4029 4030 if (!to_replace_bs) { 4031 error_setg(errp, "Node name '%s' not found", node_name); 4032 return NULL; 4033 } 4034 4035 aio_context = bdrv_get_aio_context(to_replace_bs); 4036 aio_context_acquire(aio_context); 4037 4038 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 4039 to_replace_bs = NULL; 4040 goto out; 4041 } 4042 4043 /* We don't want arbitrary node of the BDS chain to be replaced only the top 4044 * most non filter in order to prevent data corruption. 4045 * Another benefit is that this tests exclude backing files which are 4046 * blocked by the backing blockers. 4047 */ 4048 if (!bdrv_is_first_non_filter(to_replace_bs)) { 4049 error_setg(errp, "Only top most non filter can be replaced"); 4050 to_replace_bs = NULL; 4051 goto out; 4052 } 4053 4054 out: 4055 aio_context_release(aio_context); 4056 return to_replace_bs; 4057 } 4058 4059 static bool append_open_options(QDict *d, BlockDriverState *bs) 4060 { 4061 const QDictEntry *entry; 4062 bool found_any = false; 4063 4064 for (entry = qdict_first(bs->options); entry; 4065 entry = qdict_next(bs->options, entry)) 4066 { 4067 /* Only take options for this level and exclude all non-driver-specific 4068 * options */ 4069 if (!strchr(qdict_entry_key(entry), '.') && 4070 strcmp(qdict_entry_key(entry), "node-name")) 4071 { 4072 qobject_incref(qdict_entry_value(entry)); 4073 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry)); 4074 found_any = true; 4075 } 4076 } 4077 4078 return found_any; 4079 } 4080 4081 /* Updates the following BDS fields: 4082 * - exact_filename: A filename which may be used for opening a block device 4083 * which (mostly) equals the given BDS (even without any 4084 * other options; so reading and writing must return the same 4085 * results, but caching etc. may be different) 4086 * - full_open_options: Options which, when given when opening a block device 4087 * (without a filename), result in a BDS (mostly) 4088 * equalling the given one 4089 * - filename: If exact_filename is set, it is copied here. Otherwise, 4090 * full_open_options is converted to a JSON object, prefixed with 4091 * "json:" (for use through the JSON pseudo protocol) and put here. 4092 */ 4093 void bdrv_refresh_filename(BlockDriverState *bs) 4094 { 4095 BlockDriver *drv = bs->drv; 4096 QDict *opts; 4097 4098 if (!drv) { 4099 return; 4100 } 4101 4102 /* This BDS's file name will most probably depend on its file's name, so 4103 * refresh that first */ 4104 if (bs->file) { 4105 bdrv_refresh_filename(bs->file); 4106 } 4107 4108 if (drv->bdrv_refresh_filename) { 4109 /* Obsolete information is of no use here, so drop the old file name 4110 * information before refreshing it */ 4111 bs->exact_filename[0] = '\0'; 4112 if (bs->full_open_options) { 4113 QDECREF(bs->full_open_options); 4114 bs->full_open_options = NULL; 4115 } 4116 4117 drv->bdrv_refresh_filename(bs); 4118 } else if (bs->file) { 4119 /* Try to reconstruct valid information from the underlying file */ 4120 bool has_open_options; 4121 4122 bs->exact_filename[0] = '\0'; 4123 if (bs->full_open_options) { 4124 QDECREF(bs->full_open_options); 4125 bs->full_open_options = NULL; 4126 } 4127 4128 opts = qdict_new(); 4129 has_open_options = append_open_options(opts, bs); 4130 4131 /* If no specific options have been given for this BDS, the filename of 4132 * the underlying file should suffice for this one as well */ 4133 if (bs->file->exact_filename[0] && !has_open_options) { 4134 strcpy(bs->exact_filename, bs->file->exact_filename); 4135 } 4136 /* Reconstructing the full options QDict is simple for most format block 4137 * drivers, as long as the full options are known for the underlying 4138 * file BDS. The full options QDict of that file BDS should somehow 4139 * contain a representation of the filename, therefore the following 4140 * suffices without querying the (exact_)filename of this BDS. */ 4141 if (bs->file->full_open_options) { 4142 qdict_put_obj(opts, "driver", 4143 QOBJECT(qstring_from_str(drv->format_name))); 4144 QINCREF(bs->file->full_open_options); 4145 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options)); 4146 4147 bs->full_open_options = opts; 4148 } else { 4149 QDECREF(opts); 4150 } 4151 } else if (!bs->full_open_options && qdict_size(bs->options)) { 4152 /* There is no underlying file BDS (at least referenced by BDS.file), 4153 * so the full options QDict should be equal to the options given 4154 * specifically for this block device when it was opened (plus the 4155 * driver specification). 4156 * Because those options don't change, there is no need to update 4157 * full_open_options when it's already set. */ 4158 4159 opts = qdict_new(); 4160 append_open_options(opts, bs); 4161 qdict_put_obj(opts, "driver", 4162 QOBJECT(qstring_from_str(drv->format_name))); 4163 4164 if (bs->exact_filename[0]) { 4165 /* This may not work for all block protocol drivers (some may 4166 * require this filename to be parsed), but we have to find some 4167 * default solution here, so just include it. If some block driver 4168 * does not support pure options without any filename at all or 4169 * needs some special format of the options QDict, it needs to 4170 * implement the driver-specific bdrv_refresh_filename() function. 4171 */ 4172 qdict_put_obj(opts, "filename", 4173 QOBJECT(qstring_from_str(bs->exact_filename))); 4174 } 4175 4176 bs->full_open_options = opts; 4177 } 4178 4179 if (bs->exact_filename[0]) { 4180 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 4181 } else if (bs->full_open_options) { 4182 QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 4183 snprintf(bs->filename, sizeof(bs->filename), "json:%s", 4184 qstring_get_str(json)); 4185 QDECREF(json); 4186 } 4187 } 4188 4189 /* This accessor function purpose is to allow the device models to access the 4190 * BlockAcctStats structure embedded inside a BlockDriverState without being 4191 * aware of the BlockDriverState structure layout. 4192 * It will go away when the BlockAcctStats structure will be moved inside 4193 * the device models. 4194 */ 4195 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs) 4196 { 4197 return &bs->stats; 4198 } 4199