1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "config-host.h" 25 #include "qemu-common.h" 26 #include "trace.h" 27 #include "block/block_int.h" 28 #include "block/blockjob.h" 29 #include "qemu/module.h" 30 #include "qapi/qmp/qjson.h" 31 #include "sysemu/block-backend.h" 32 #include "sysemu/sysemu.h" 33 #include "qemu/notify.h" 34 #include "block/coroutine.h" 35 #include "block/qapi.h" 36 #include "qmp-commands.h" 37 #include "qemu/timer.h" 38 #include "qapi-event.h" 39 40 #ifdef CONFIG_BSD 41 #include <sys/types.h> 42 #include <sys/stat.h> 43 #include <sys/ioctl.h> 44 #include <sys/queue.h> 45 #ifndef __DragonFly__ 46 #include <sys/disk.h> 47 #endif 48 #endif 49 50 #ifdef _WIN32 51 #include <windows.h> 52 #endif 53 54 /** 55 * A BdrvDirtyBitmap can be in three possible states: 56 * (1) successor is NULL and disabled is false: full r/w mode 57 * (2) successor is NULL and disabled is true: read only mode ("disabled") 58 * (3) successor is set: frozen mode. 59 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set, 60 * or enabled. A frozen bitmap can only abdicate() or reclaim(). 61 */ 62 struct BdrvDirtyBitmap { 63 HBitmap *bitmap; /* Dirty sector bitmap implementation */ 64 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ 65 char *name; /* Optional non-empty unique ID */ 66 int64_t size; /* Size of the bitmap (Number of sectors) */ 67 bool disabled; /* Bitmap is read-only */ 68 QLIST_ENTRY(BdrvDirtyBitmap) list; 69 }; 70 71 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 72 73 static QTAILQ_HEAD(, BlockDriverState) bdrv_states = 74 QTAILQ_HEAD_INITIALIZER(bdrv_states); 75 76 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 77 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 78 79 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 80 QLIST_HEAD_INITIALIZER(bdrv_drivers); 81 82 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); 83 /* If non-zero, use only whitelisted block drivers */ 84 static int use_bdrv_whitelist; 85 86 #ifdef _WIN32 87 static int is_windows_drive_prefix(const char *filename) 88 { 89 return (((filename[0] >= 'a' && filename[0] <= 'z') || 90 (filename[0] >= 'A' && filename[0] <= 'Z')) && 91 filename[1] == ':'); 92 } 93 94 int is_windows_drive(const char *filename) 95 { 96 if (is_windows_drive_prefix(filename) && 97 filename[2] == '\0') 98 return 1; 99 if (strstart(filename, "\\\\.\\", NULL) || 100 strstart(filename, "//./", NULL)) 101 return 1; 102 return 0; 103 } 104 #endif 105 106 size_t bdrv_opt_mem_align(BlockDriverState *bs) 107 { 108 if (!bs || !bs->drv) { 109 /* page size or 4k (hdd sector size) should be on the safe side */ 110 return MAX(4096, getpagesize()); 111 } 112 113 return bs->bl.opt_mem_alignment; 114 } 115 116 size_t bdrv_min_mem_align(BlockDriverState *bs) 117 { 118 if (!bs || !bs->drv) { 119 /* page size or 4k (hdd sector size) should be on the safe side */ 120 return MAX(4096, getpagesize()); 121 } 122 123 return bs->bl.min_mem_alignment; 124 } 125 126 /* check if the path starts with "<protocol>:" */ 127 int path_has_protocol(const char *path) 128 { 129 const char *p; 130 131 #ifdef _WIN32 132 if (is_windows_drive(path) || 133 is_windows_drive_prefix(path)) { 134 return 0; 135 } 136 p = path + strcspn(path, ":/\\"); 137 #else 138 p = path + strcspn(path, ":/"); 139 #endif 140 141 return *p == ':'; 142 } 143 144 int path_is_absolute(const char *path) 145 { 146 #ifdef _WIN32 147 /* specific case for names like: "\\.\d:" */ 148 if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 149 return 1; 150 } 151 return (*path == '/' || *path == '\\'); 152 #else 153 return (*path == '/'); 154 #endif 155 } 156 157 /* if filename is absolute, just copy it to dest. Otherwise, build a 158 path to it by considering it is relative to base_path. URL are 159 supported. */ 160 void path_combine(char *dest, int dest_size, 161 const char *base_path, 162 const char *filename) 163 { 164 const char *p, *p1; 165 int len; 166 167 if (dest_size <= 0) 168 return; 169 if (path_is_absolute(filename)) { 170 pstrcpy(dest, dest_size, filename); 171 } else { 172 p = strchr(base_path, ':'); 173 if (p) 174 p++; 175 else 176 p = base_path; 177 p1 = strrchr(base_path, '/'); 178 #ifdef _WIN32 179 { 180 const char *p2; 181 p2 = strrchr(base_path, '\\'); 182 if (!p1 || p2 > p1) 183 p1 = p2; 184 } 185 #endif 186 if (p1) 187 p1++; 188 else 189 p1 = base_path; 190 if (p1 > p) 191 p = p1; 192 len = p - base_path; 193 if (len > dest_size - 1) 194 len = dest_size - 1; 195 memcpy(dest, base_path, len); 196 dest[len] = '\0'; 197 pstrcat(dest, dest_size, filename); 198 } 199 } 200 201 void bdrv_get_full_backing_filename_from_filename(const char *backed, 202 const char *backing, 203 char *dest, size_t sz, 204 Error **errp) 205 { 206 if (backing[0] == '\0' || path_has_protocol(backing) || 207 path_is_absolute(backing)) 208 { 209 pstrcpy(dest, sz, backing); 210 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 211 error_setg(errp, "Cannot use relative backing file names for '%s'", 212 backed); 213 } else { 214 path_combine(dest, sz, backed, backing); 215 } 216 } 217 218 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz, 219 Error **errp) 220 { 221 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename; 222 223 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file, 224 dest, sz, errp); 225 } 226 227 void bdrv_register(BlockDriver *bdrv) 228 { 229 bdrv_setup_io_funcs(bdrv); 230 231 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 232 } 233 234 BlockDriverState *bdrv_new_root(void) 235 { 236 BlockDriverState *bs = bdrv_new(); 237 238 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list); 239 return bs; 240 } 241 242 BlockDriverState *bdrv_new(void) 243 { 244 BlockDriverState *bs; 245 int i; 246 247 bs = g_new0(BlockDriverState, 1); 248 QLIST_INIT(&bs->dirty_bitmaps); 249 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 250 QLIST_INIT(&bs->op_blockers[i]); 251 } 252 bdrv_iostatus_disable(bs); 253 notifier_list_init(&bs->close_notifiers); 254 notifier_with_return_list_init(&bs->before_write_notifiers); 255 qemu_co_queue_init(&bs->throttled_reqs[0]); 256 qemu_co_queue_init(&bs->throttled_reqs[1]); 257 bs->refcnt = 1; 258 bs->aio_context = qemu_get_aio_context(); 259 260 return bs; 261 } 262 263 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify) 264 { 265 notifier_list_add(&bs->close_notifiers, notify); 266 } 267 268 BlockDriver *bdrv_find_format(const char *format_name) 269 { 270 BlockDriver *drv1; 271 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 272 if (!strcmp(drv1->format_name, format_name)) { 273 return drv1; 274 } 275 } 276 return NULL; 277 } 278 279 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 280 { 281 static const char *whitelist_rw[] = { 282 CONFIG_BDRV_RW_WHITELIST 283 }; 284 static const char *whitelist_ro[] = { 285 CONFIG_BDRV_RO_WHITELIST 286 }; 287 const char **p; 288 289 if (!whitelist_rw[0] && !whitelist_ro[0]) { 290 return 1; /* no whitelist, anything goes */ 291 } 292 293 for (p = whitelist_rw; *p; p++) { 294 if (!strcmp(drv->format_name, *p)) { 295 return 1; 296 } 297 } 298 if (read_only) { 299 for (p = whitelist_ro; *p; p++) { 300 if (!strcmp(drv->format_name, *p)) { 301 return 1; 302 } 303 } 304 } 305 return 0; 306 } 307 308 BlockDriver *bdrv_find_whitelisted_format(const char *format_name, 309 bool read_only) 310 { 311 BlockDriver *drv = bdrv_find_format(format_name); 312 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL; 313 } 314 315 typedef struct CreateCo { 316 BlockDriver *drv; 317 char *filename; 318 QemuOpts *opts; 319 int ret; 320 Error *err; 321 } CreateCo; 322 323 static void coroutine_fn bdrv_create_co_entry(void *opaque) 324 { 325 Error *local_err = NULL; 326 int ret; 327 328 CreateCo *cco = opaque; 329 assert(cco->drv); 330 331 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err); 332 if (local_err) { 333 error_propagate(&cco->err, local_err); 334 } 335 cco->ret = ret; 336 } 337 338 int bdrv_create(BlockDriver *drv, const char* filename, 339 QemuOpts *opts, Error **errp) 340 { 341 int ret; 342 343 Coroutine *co; 344 CreateCo cco = { 345 .drv = drv, 346 .filename = g_strdup(filename), 347 .opts = opts, 348 .ret = NOT_DONE, 349 .err = NULL, 350 }; 351 352 if (!drv->bdrv_create) { 353 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 354 ret = -ENOTSUP; 355 goto out; 356 } 357 358 if (qemu_in_coroutine()) { 359 /* Fast-path if already in coroutine context */ 360 bdrv_create_co_entry(&cco); 361 } else { 362 co = qemu_coroutine_create(bdrv_create_co_entry); 363 qemu_coroutine_enter(co, &cco); 364 while (cco.ret == NOT_DONE) { 365 aio_poll(qemu_get_aio_context(), true); 366 } 367 } 368 369 ret = cco.ret; 370 if (ret < 0) { 371 if (cco.err) { 372 error_propagate(errp, cco.err); 373 } else { 374 error_setg_errno(errp, -ret, "Could not create image"); 375 } 376 } 377 378 out: 379 g_free(cco.filename); 380 return ret; 381 } 382 383 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 384 { 385 BlockDriver *drv; 386 Error *local_err = NULL; 387 int ret; 388 389 drv = bdrv_find_protocol(filename, true, errp); 390 if (drv == NULL) { 391 return -ENOENT; 392 } 393 394 ret = bdrv_create(drv, filename, opts, &local_err); 395 if (local_err) { 396 error_propagate(errp, local_err); 397 } 398 return ret; 399 } 400 401 /** 402 * Try to get @bs's logical and physical block size. 403 * On success, store them in @bsz struct and return 0. 404 * On failure return -errno. 405 * @bs must not be empty. 406 */ 407 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 408 { 409 BlockDriver *drv = bs->drv; 410 411 if (drv && drv->bdrv_probe_blocksizes) { 412 return drv->bdrv_probe_blocksizes(bs, bsz); 413 } 414 415 return -ENOTSUP; 416 } 417 418 /** 419 * Try to get @bs's geometry (cyls, heads, sectors). 420 * On success, store them in @geo struct and return 0. 421 * On failure return -errno. 422 * @bs must not be empty. 423 */ 424 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 425 { 426 BlockDriver *drv = bs->drv; 427 428 if (drv && drv->bdrv_probe_geometry) { 429 return drv->bdrv_probe_geometry(bs, geo); 430 } 431 432 return -ENOTSUP; 433 } 434 435 /* 436 * Create a uniquely-named empty temporary file. 437 * Return 0 upon success, otherwise a negative errno value. 438 */ 439 int get_tmp_filename(char *filename, int size) 440 { 441 #ifdef _WIN32 442 char temp_dir[MAX_PATH]; 443 /* GetTempFileName requires that its output buffer (4th param) 444 have length MAX_PATH or greater. */ 445 assert(size >= MAX_PATH); 446 return (GetTempPath(MAX_PATH, temp_dir) 447 && GetTempFileName(temp_dir, "qem", 0, filename) 448 ? 0 : -GetLastError()); 449 #else 450 int fd; 451 const char *tmpdir; 452 tmpdir = getenv("TMPDIR"); 453 if (!tmpdir) { 454 tmpdir = "/var/tmp"; 455 } 456 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 457 return -EOVERFLOW; 458 } 459 fd = mkstemp(filename); 460 if (fd < 0) { 461 return -errno; 462 } 463 if (close(fd) != 0) { 464 unlink(filename); 465 return -errno; 466 } 467 return 0; 468 #endif 469 } 470 471 /* 472 * Detect host devices. By convention, /dev/cdrom[N] is always 473 * recognized as a host CDROM. 474 */ 475 static BlockDriver *find_hdev_driver(const char *filename) 476 { 477 int score_max = 0, score; 478 BlockDriver *drv = NULL, *d; 479 480 QLIST_FOREACH(d, &bdrv_drivers, list) { 481 if (d->bdrv_probe_device) { 482 score = d->bdrv_probe_device(filename); 483 if (score > score_max) { 484 score_max = score; 485 drv = d; 486 } 487 } 488 } 489 490 return drv; 491 } 492 493 BlockDriver *bdrv_find_protocol(const char *filename, 494 bool allow_protocol_prefix, 495 Error **errp) 496 { 497 BlockDriver *drv1; 498 char protocol[128]; 499 int len; 500 const char *p; 501 502 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 503 504 /* 505 * XXX(hch): we really should not let host device detection 506 * override an explicit protocol specification, but moving this 507 * later breaks access to device names with colons in them. 508 * Thanks to the brain-dead persistent naming schemes on udev- 509 * based Linux systems those actually are quite common. 510 */ 511 drv1 = find_hdev_driver(filename); 512 if (drv1) { 513 return drv1; 514 } 515 516 if (!path_has_protocol(filename) || !allow_protocol_prefix) { 517 return &bdrv_file; 518 } 519 520 p = strchr(filename, ':'); 521 assert(p != NULL); 522 len = p - filename; 523 if (len > sizeof(protocol) - 1) 524 len = sizeof(protocol) - 1; 525 memcpy(protocol, filename, len); 526 protocol[len] = '\0'; 527 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 528 if (drv1->protocol_name && 529 !strcmp(drv1->protocol_name, protocol)) { 530 return drv1; 531 } 532 } 533 534 error_setg(errp, "Unknown protocol '%s'", protocol); 535 return NULL; 536 } 537 538 /* 539 * Guess image format by probing its contents. 540 * This is not a good idea when your image is raw (CVE-2008-2004), but 541 * we do it anyway for backward compatibility. 542 * 543 * @buf contains the image's first @buf_size bytes. 544 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 545 * but can be smaller if the image file is smaller) 546 * @filename is its filename. 547 * 548 * For all block drivers, call the bdrv_probe() method to get its 549 * probing score. 550 * Return the first block driver with the highest probing score. 551 */ 552 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 553 const char *filename) 554 { 555 int score_max = 0, score; 556 BlockDriver *drv = NULL, *d; 557 558 QLIST_FOREACH(d, &bdrv_drivers, list) { 559 if (d->bdrv_probe) { 560 score = d->bdrv_probe(buf, buf_size, filename); 561 if (score > score_max) { 562 score_max = score; 563 drv = d; 564 } 565 } 566 } 567 568 return drv; 569 } 570 571 static int find_image_format(BlockDriverState *bs, const char *filename, 572 BlockDriver **pdrv, Error **errp) 573 { 574 BlockDriver *drv; 575 uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 576 int ret = 0; 577 578 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 579 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { 580 *pdrv = &bdrv_raw; 581 return ret; 582 } 583 584 ret = bdrv_pread(bs, 0, buf, sizeof(buf)); 585 if (ret < 0) { 586 error_setg_errno(errp, -ret, "Could not read image for determining its " 587 "format"); 588 *pdrv = NULL; 589 return ret; 590 } 591 592 drv = bdrv_probe_all(buf, ret, filename); 593 if (!drv) { 594 error_setg(errp, "Could not determine image format: No compatible " 595 "driver found"); 596 ret = -ENOENT; 597 } 598 *pdrv = drv; 599 return ret; 600 } 601 602 /** 603 * Set the current 'total_sectors' value 604 * Return 0 on success, -errno on error. 605 */ 606 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 607 { 608 BlockDriver *drv = bs->drv; 609 610 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 611 if (bs->sg) 612 return 0; 613 614 /* query actual device if possible, otherwise just trust the hint */ 615 if (drv->bdrv_getlength) { 616 int64_t length = drv->bdrv_getlength(bs); 617 if (length < 0) { 618 return length; 619 } 620 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 621 } 622 623 bs->total_sectors = hint; 624 return 0; 625 } 626 627 /** 628 * Set open flags for a given discard mode 629 * 630 * Return 0 on success, -1 if the discard mode was invalid. 631 */ 632 int bdrv_parse_discard_flags(const char *mode, int *flags) 633 { 634 *flags &= ~BDRV_O_UNMAP; 635 636 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 637 /* do nothing */ 638 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 639 *flags |= BDRV_O_UNMAP; 640 } else { 641 return -1; 642 } 643 644 return 0; 645 } 646 647 /** 648 * Set open flags for a given cache mode 649 * 650 * Return 0 on success, -1 if the cache mode was invalid. 651 */ 652 int bdrv_parse_cache_flags(const char *mode, int *flags) 653 { 654 *flags &= ~BDRV_O_CACHE_MASK; 655 656 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 657 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; 658 } else if (!strcmp(mode, "directsync")) { 659 *flags |= BDRV_O_NOCACHE; 660 } else if (!strcmp(mode, "writeback")) { 661 *flags |= BDRV_O_CACHE_WB; 662 } else if (!strcmp(mode, "unsafe")) { 663 *flags |= BDRV_O_CACHE_WB; 664 *flags |= BDRV_O_NO_FLUSH; 665 } else if (!strcmp(mode, "writethrough")) { 666 /* this is the default */ 667 } else { 668 return -1; 669 } 670 671 return 0; 672 } 673 674 /* 675 * Returns the flags that a temporary snapshot should get, based on the 676 * originally requested flags (the originally requested image will have flags 677 * like a backing file) 678 */ 679 static int bdrv_temp_snapshot_flags(int flags) 680 { 681 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 682 } 683 684 /* 685 * Returns the flags that bs->file should get, based on the given flags for 686 * the parent BDS 687 */ 688 static int bdrv_inherited_flags(int flags) 689 { 690 /* Enable protocol handling, disable format probing for bs->file */ 691 flags |= BDRV_O_PROTOCOL; 692 693 /* Our block drivers take care to send flushes and respect unmap policy, 694 * so we can enable both unconditionally on lower layers. */ 695 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP; 696 697 /* Clear flags that only apply to the top layer */ 698 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); 699 700 return flags; 701 } 702 703 /* 704 * Returns the flags that bs->backing_hd should get, based on the given flags 705 * for the parent BDS 706 */ 707 static int bdrv_backing_flags(int flags) 708 { 709 /* backing files always opened read-only */ 710 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ); 711 712 /* snapshot=on is handled on the top layer */ 713 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY); 714 715 return flags; 716 } 717 718 static int bdrv_open_flags(BlockDriverState *bs, int flags) 719 { 720 int open_flags = flags | BDRV_O_CACHE_WB; 721 722 /* 723 * Clear flags that are internal to the block layer before opening the 724 * image. 725 */ 726 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 727 728 /* 729 * Snapshots should be writable. 730 */ 731 if (flags & BDRV_O_TEMPORARY) { 732 open_flags |= BDRV_O_RDWR; 733 } 734 735 return open_flags; 736 } 737 738 static void bdrv_assign_node_name(BlockDriverState *bs, 739 const char *node_name, 740 Error **errp) 741 { 742 if (!node_name) { 743 return; 744 } 745 746 /* Check for empty string or invalid characters */ 747 if (!id_wellformed(node_name)) { 748 error_setg(errp, "Invalid node name"); 749 return; 750 } 751 752 /* takes care of avoiding namespaces collisions */ 753 if (blk_by_name(node_name)) { 754 error_setg(errp, "node-name=%s is conflicting with a device id", 755 node_name); 756 return; 757 } 758 759 /* takes care of avoiding duplicates node names */ 760 if (bdrv_find_node(node_name)) { 761 error_setg(errp, "Duplicate node name"); 762 return; 763 } 764 765 /* copy node name into the bs and insert it into the graph list */ 766 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 767 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 768 } 769 770 /* 771 * Common part for opening disk images and files 772 * 773 * Removes all processed options from *options. 774 */ 775 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, 776 QDict *options, int flags, BlockDriver *drv, Error **errp) 777 { 778 int ret, open_flags; 779 const char *filename; 780 const char *node_name = NULL; 781 Error *local_err = NULL; 782 783 assert(drv != NULL); 784 assert(bs->file == NULL); 785 assert(options != NULL && bs->options != options); 786 787 if (file != NULL) { 788 filename = file->filename; 789 } else { 790 filename = qdict_get_try_str(options, "filename"); 791 } 792 793 if (drv->bdrv_needs_filename && !filename) { 794 error_setg(errp, "The '%s' block driver requires a file name", 795 drv->format_name); 796 return -EINVAL; 797 } 798 799 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); 800 801 node_name = qdict_get_try_str(options, "node-name"); 802 bdrv_assign_node_name(bs, node_name, &local_err); 803 if (local_err) { 804 error_propagate(errp, local_err); 805 return -EINVAL; 806 } 807 qdict_del(options, "node-name"); 808 809 /* bdrv_open() with directly using a protocol as drv. This layer is already 810 * opened, so assign it to bs (while file becomes a closed BlockDriverState) 811 * and return immediately. */ 812 if (file != NULL && drv->bdrv_file_open) { 813 bdrv_swap(file, bs); 814 return 0; 815 } 816 817 bs->open_flags = flags; 818 bs->guest_block_size = 512; 819 bs->request_alignment = 512; 820 bs->zero_beyond_eof = true; 821 open_flags = bdrv_open_flags(bs, flags); 822 bs->read_only = !(open_flags & BDRV_O_RDWR); 823 824 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { 825 error_setg(errp, 826 !bs->read_only && bdrv_is_whitelisted(drv, true) 827 ? "Driver '%s' can only be used for read-only devices" 828 : "Driver '%s' is not whitelisted", 829 drv->format_name); 830 return -ENOTSUP; 831 } 832 833 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ 834 if (flags & BDRV_O_COPY_ON_READ) { 835 if (!bs->read_only) { 836 bdrv_enable_copy_on_read(bs); 837 } else { 838 error_setg(errp, "Can't use copy-on-read on read-only device"); 839 return -EINVAL; 840 } 841 } 842 843 if (filename != NULL) { 844 pstrcpy(bs->filename, sizeof(bs->filename), filename); 845 } else { 846 bs->filename[0] = '\0'; 847 } 848 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 849 850 bs->drv = drv; 851 bs->opaque = g_malloc0(drv->instance_size); 852 853 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB); 854 855 /* Open the image, either directly or using a protocol */ 856 if (drv->bdrv_file_open) { 857 assert(file == NULL); 858 assert(!drv->bdrv_needs_filename || filename != NULL); 859 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 860 } else { 861 if (file == NULL) { 862 error_setg(errp, "Can't use '%s' as a block driver for the " 863 "protocol level", drv->format_name); 864 ret = -EINVAL; 865 goto free_and_fail; 866 } 867 bs->file = file; 868 ret = drv->bdrv_open(bs, options, open_flags, &local_err); 869 } 870 871 if (ret < 0) { 872 if (local_err) { 873 error_propagate(errp, local_err); 874 } else if (bs->filename[0]) { 875 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 876 } else { 877 error_setg_errno(errp, -ret, "Could not open image"); 878 } 879 goto free_and_fail; 880 } 881 882 if (bs->encrypted) { 883 error_report("Encrypted images are deprecated"); 884 error_printf("Support for them will be removed in a future release.\n" 885 "You can use 'qemu-img convert' to convert your image" 886 " to an unencrypted one.\n"); 887 } 888 889 ret = refresh_total_sectors(bs, bs->total_sectors); 890 if (ret < 0) { 891 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 892 goto free_and_fail; 893 } 894 895 bdrv_refresh_limits(bs, &local_err); 896 if (local_err) { 897 error_propagate(errp, local_err); 898 ret = -EINVAL; 899 goto free_and_fail; 900 } 901 902 assert(bdrv_opt_mem_align(bs) != 0); 903 assert(bdrv_min_mem_align(bs) != 0); 904 assert((bs->request_alignment != 0) || bs->sg); 905 return 0; 906 907 free_and_fail: 908 bs->file = NULL; 909 g_free(bs->opaque); 910 bs->opaque = NULL; 911 bs->drv = NULL; 912 return ret; 913 } 914 915 static QDict *parse_json_filename(const char *filename, Error **errp) 916 { 917 QObject *options_obj; 918 QDict *options; 919 int ret; 920 921 ret = strstart(filename, "json:", &filename); 922 assert(ret); 923 924 options_obj = qobject_from_json(filename); 925 if (!options_obj) { 926 error_setg(errp, "Could not parse the JSON options"); 927 return NULL; 928 } 929 930 if (qobject_type(options_obj) != QTYPE_QDICT) { 931 qobject_decref(options_obj); 932 error_setg(errp, "Invalid JSON object given"); 933 return NULL; 934 } 935 936 options = qobject_to_qdict(options_obj); 937 qdict_flatten(options); 938 939 return options; 940 } 941 942 /* 943 * Fills in default options for opening images and converts the legacy 944 * filename/flags pair to option QDict entries. 945 */ 946 static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, 947 BlockDriver *drv, Error **errp) 948 { 949 const char *filename = *pfilename; 950 const char *drvname; 951 bool protocol = flags & BDRV_O_PROTOCOL; 952 bool parse_filename = false; 953 Error *local_err = NULL; 954 955 /* Parse json: pseudo-protocol */ 956 if (filename && g_str_has_prefix(filename, "json:")) { 957 QDict *json_options = parse_json_filename(filename, &local_err); 958 if (local_err) { 959 error_propagate(errp, local_err); 960 return -EINVAL; 961 } 962 963 /* Options given in the filename have lower priority than options 964 * specified directly */ 965 qdict_join(*options, json_options, false); 966 QDECREF(json_options); 967 *pfilename = filename = NULL; 968 } 969 970 /* Fetch the file name from the options QDict if necessary */ 971 if (protocol && filename) { 972 if (!qdict_haskey(*options, "filename")) { 973 qdict_put(*options, "filename", qstring_from_str(filename)); 974 parse_filename = true; 975 } else { 976 error_setg(errp, "Can't specify 'file' and 'filename' options at " 977 "the same time"); 978 return -EINVAL; 979 } 980 } 981 982 /* Find the right block driver */ 983 filename = qdict_get_try_str(*options, "filename"); 984 drvname = qdict_get_try_str(*options, "driver"); 985 986 if (drv) { 987 if (drvname) { 988 error_setg(errp, "Driver specified twice"); 989 return -EINVAL; 990 } 991 drvname = drv->format_name; 992 qdict_put(*options, "driver", qstring_from_str(drvname)); 993 } else { 994 if (!drvname && protocol) { 995 if (filename) { 996 drv = bdrv_find_protocol(filename, parse_filename, errp); 997 if (!drv) { 998 return -EINVAL; 999 } 1000 1001 drvname = drv->format_name; 1002 qdict_put(*options, "driver", qstring_from_str(drvname)); 1003 } else { 1004 error_setg(errp, "Must specify either driver or file"); 1005 return -EINVAL; 1006 } 1007 } else if (drvname) { 1008 drv = bdrv_find_format(drvname); 1009 if (!drv) { 1010 error_setg(errp, "Unknown driver '%s'", drvname); 1011 return -ENOENT; 1012 } 1013 } 1014 } 1015 1016 assert(drv || !protocol); 1017 1018 /* Driver-specific filename parsing */ 1019 if (drv && drv->bdrv_parse_filename && parse_filename) { 1020 drv->bdrv_parse_filename(filename, *options, &local_err); 1021 if (local_err) { 1022 error_propagate(errp, local_err); 1023 return -EINVAL; 1024 } 1025 1026 if (!drv->bdrv_needs_filename) { 1027 qdict_del(*options, "filename"); 1028 } 1029 } 1030 1031 return 0; 1032 } 1033 1034 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) 1035 { 1036 1037 if (bs->backing_hd) { 1038 assert(bs->backing_blocker); 1039 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker); 1040 } else if (backing_hd) { 1041 error_setg(&bs->backing_blocker, 1042 "node is used as backing hd of '%s'", 1043 bdrv_get_device_or_node_name(bs)); 1044 } 1045 1046 bs->backing_hd = backing_hd; 1047 if (!backing_hd) { 1048 error_free(bs->backing_blocker); 1049 bs->backing_blocker = NULL; 1050 goto out; 1051 } 1052 bs->open_flags &= ~BDRV_O_NO_BACKING; 1053 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); 1054 pstrcpy(bs->backing_format, sizeof(bs->backing_format), 1055 backing_hd->drv ? backing_hd->drv->format_name : ""); 1056 1057 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker); 1058 /* Otherwise we won't be able to commit due to check in bdrv_commit */ 1059 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1060 bs->backing_blocker); 1061 out: 1062 bdrv_refresh_limits(bs, NULL); 1063 } 1064 1065 /* 1066 * Opens the backing file for a BlockDriverState if not yet open 1067 * 1068 * options is a QDict of options to pass to the block drivers, or NULL for an 1069 * empty set of options. The reference to the QDict is transferred to this 1070 * function (even on failure), so if the caller intends to reuse the dictionary, 1071 * it needs to use QINCREF() before calling bdrv_file_open. 1072 */ 1073 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) 1074 { 1075 char *backing_filename = g_malloc0(PATH_MAX); 1076 int ret = 0; 1077 BlockDriverState *backing_hd; 1078 Error *local_err = NULL; 1079 1080 if (bs->backing_hd != NULL) { 1081 QDECREF(options); 1082 goto free_exit; 1083 } 1084 1085 /* NULL means an empty set of options */ 1086 if (options == NULL) { 1087 options = qdict_new(); 1088 } 1089 1090 bs->open_flags &= ~BDRV_O_NO_BACKING; 1091 if (qdict_haskey(options, "file.filename")) { 1092 backing_filename[0] = '\0'; 1093 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 1094 QDECREF(options); 1095 goto free_exit; 1096 } else { 1097 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX, 1098 &local_err); 1099 if (local_err) { 1100 ret = -EINVAL; 1101 error_propagate(errp, local_err); 1102 QDECREF(options); 1103 goto free_exit; 1104 } 1105 } 1106 1107 if (!bs->drv || !bs->drv->supports_backing) { 1108 ret = -EINVAL; 1109 error_setg(errp, "Driver doesn't support backing files"); 1110 QDECREF(options); 1111 goto free_exit; 1112 } 1113 1114 backing_hd = bdrv_new(); 1115 1116 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 1117 qdict_put(options, "driver", qstring_from_str(bs->backing_format)); 1118 } 1119 1120 assert(bs->backing_hd == NULL); 1121 ret = bdrv_open(&backing_hd, 1122 *backing_filename ? backing_filename : NULL, NULL, options, 1123 bdrv_backing_flags(bs->open_flags), NULL, &local_err); 1124 if (ret < 0) { 1125 bdrv_unref(backing_hd); 1126 backing_hd = NULL; 1127 bs->open_flags |= BDRV_O_NO_BACKING; 1128 error_setg(errp, "Could not open backing file: %s", 1129 error_get_pretty(local_err)); 1130 error_free(local_err); 1131 goto free_exit; 1132 } 1133 bdrv_set_backing_hd(bs, backing_hd); 1134 1135 free_exit: 1136 g_free(backing_filename); 1137 return ret; 1138 } 1139 1140 /* 1141 * Opens a disk image whose options are given as BlockdevRef in another block 1142 * device's options. 1143 * 1144 * If allow_none is true, no image will be opened if filename is false and no 1145 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. 1146 * 1147 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 1148 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 1149 * itself, all options starting with "${bdref_key}." are considered part of the 1150 * BlockdevRef. 1151 * 1152 * The BlockdevRef will be removed from the options QDict. 1153 * 1154 * To conform with the behavior of bdrv_open(), *pbs has to be NULL. 1155 */ 1156 int bdrv_open_image(BlockDriverState **pbs, const char *filename, 1157 QDict *options, const char *bdref_key, int flags, 1158 bool allow_none, Error **errp) 1159 { 1160 QDict *image_options; 1161 int ret; 1162 char *bdref_key_dot; 1163 const char *reference; 1164 1165 assert(pbs); 1166 assert(*pbs == NULL); 1167 1168 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 1169 qdict_extract_subqdict(options, &image_options, bdref_key_dot); 1170 g_free(bdref_key_dot); 1171 1172 reference = qdict_get_try_str(options, bdref_key); 1173 if (!filename && !reference && !qdict_size(image_options)) { 1174 if (allow_none) { 1175 ret = 0; 1176 } else { 1177 error_setg(errp, "A block device must be specified for \"%s\"", 1178 bdref_key); 1179 ret = -EINVAL; 1180 } 1181 QDECREF(image_options); 1182 goto done; 1183 } 1184 1185 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp); 1186 1187 done: 1188 qdict_del(options, bdref_key); 1189 return ret; 1190 } 1191 1192 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) 1193 { 1194 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 1195 char *tmp_filename = g_malloc0(PATH_MAX + 1); 1196 int64_t total_size; 1197 QemuOpts *opts = NULL; 1198 QDict *snapshot_options; 1199 BlockDriverState *bs_snapshot; 1200 Error *local_err; 1201 int ret; 1202 1203 /* if snapshot, we create a temporary backing file and open it 1204 instead of opening 'filename' directly */ 1205 1206 /* Get the required size from the image */ 1207 total_size = bdrv_getlength(bs); 1208 if (total_size < 0) { 1209 ret = total_size; 1210 error_setg_errno(errp, -total_size, "Could not get image size"); 1211 goto out; 1212 } 1213 1214 /* Create the temporary image */ 1215 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); 1216 if (ret < 0) { 1217 error_setg_errno(errp, -ret, "Could not get temporary filename"); 1218 goto out; 1219 } 1220 1221 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 1222 &error_abort); 1223 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 1224 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err); 1225 qemu_opts_del(opts); 1226 if (ret < 0) { 1227 error_setg_errno(errp, -ret, "Could not create temporary overlay " 1228 "'%s': %s", tmp_filename, 1229 error_get_pretty(local_err)); 1230 error_free(local_err); 1231 goto out; 1232 } 1233 1234 /* Prepare a new options QDict for the temporary file */ 1235 snapshot_options = qdict_new(); 1236 qdict_put(snapshot_options, "file.driver", 1237 qstring_from_str("file")); 1238 qdict_put(snapshot_options, "file.filename", 1239 qstring_from_str(tmp_filename)); 1240 1241 bs_snapshot = bdrv_new(); 1242 1243 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, 1244 flags, &bdrv_qcow2, &local_err); 1245 if (ret < 0) { 1246 error_propagate(errp, local_err); 1247 goto out; 1248 } 1249 1250 bdrv_append(bs_snapshot, bs); 1251 1252 out: 1253 g_free(tmp_filename); 1254 return ret; 1255 } 1256 1257 /* 1258 * Opens a disk image (raw, qcow2, vmdk, ...) 1259 * 1260 * options is a QDict of options to pass to the block drivers, or NULL for an 1261 * empty set of options. The reference to the QDict belongs to the block layer 1262 * after the call (even on failure), so if the caller intends to reuse the 1263 * dictionary, it needs to use QINCREF() before calling bdrv_open. 1264 * 1265 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 1266 * If it is not NULL, the referenced BDS will be reused. 1267 * 1268 * The reference parameter may be used to specify an existing block device which 1269 * should be opened. If specified, neither options nor a filename may be given, 1270 * nor can an existing BDS be reused (that is, *pbs has to be NULL). 1271 */ 1272 int bdrv_open(BlockDriverState **pbs, const char *filename, 1273 const char *reference, QDict *options, int flags, 1274 BlockDriver *drv, Error **errp) 1275 { 1276 int ret; 1277 BlockDriverState *file = NULL, *bs; 1278 const char *drvname; 1279 Error *local_err = NULL; 1280 int snapshot_flags = 0; 1281 1282 assert(pbs); 1283 1284 if (reference) { 1285 bool options_non_empty = options ? qdict_size(options) : false; 1286 QDECREF(options); 1287 1288 if (*pbs) { 1289 error_setg(errp, "Cannot reuse an existing BDS when referencing " 1290 "another block device"); 1291 return -EINVAL; 1292 } 1293 1294 if (filename || options_non_empty) { 1295 error_setg(errp, "Cannot reference an existing block device with " 1296 "additional options or a new filename"); 1297 return -EINVAL; 1298 } 1299 1300 bs = bdrv_lookup_bs(reference, reference, errp); 1301 if (!bs) { 1302 return -ENODEV; 1303 } 1304 bdrv_ref(bs); 1305 *pbs = bs; 1306 return 0; 1307 } 1308 1309 if (*pbs) { 1310 bs = *pbs; 1311 } else { 1312 bs = bdrv_new(); 1313 } 1314 1315 /* NULL means an empty set of options */ 1316 if (options == NULL) { 1317 options = qdict_new(); 1318 } 1319 1320 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err); 1321 if (local_err) { 1322 goto fail; 1323 } 1324 1325 /* Find the right image format driver */ 1326 drv = NULL; 1327 drvname = qdict_get_try_str(options, "driver"); 1328 if (drvname) { 1329 drv = bdrv_find_format(drvname); 1330 qdict_del(options, "driver"); 1331 if (!drv) { 1332 error_setg(errp, "Unknown driver: '%s'", drvname); 1333 ret = -EINVAL; 1334 goto fail; 1335 } 1336 } 1337 1338 assert(drvname || !(flags & BDRV_O_PROTOCOL)); 1339 if (drv && !drv->bdrv_file_open) { 1340 /* If the user explicitly wants a format driver here, we'll need to add 1341 * another layer for the protocol in bs->file */ 1342 flags &= ~BDRV_O_PROTOCOL; 1343 } 1344 1345 bs->options = options; 1346 options = qdict_clone_shallow(options); 1347 1348 /* Open image file without format layer */ 1349 if ((flags & BDRV_O_PROTOCOL) == 0) { 1350 if (flags & BDRV_O_RDWR) { 1351 flags |= BDRV_O_ALLOW_RDWR; 1352 } 1353 if (flags & BDRV_O_SNAPSHOT) { 1354 snapshot_flags = bdrv_temp_snapshot_flags(flags); 1355 flags = bdrv_backing_flags(flags); 1356 } 1357 1358 assert(file == NULL); 1359 ret = bdrv_open_image(&file, filename, options, "file", 1360 bdrv_inherited_flags(flags), 1361 true, &local_err); 1362 if (ret < 0) { 1363 goto fail; 1364 } 1365 } 1366 1367 /* Image format probing */ 1368 bs->probed = !drv; 1369 if (!drv && file) { 1370 ret = find_image_format(file, filename, &drv, &local_err); 1371 if (ret < 0) { 1372 goto fail; 1373 } 1374 } else if (!drv) { 1375 error_setg(errp, "Must specify either driver or file"); 1376 ret = -EINVAL; 1377 goto fail; 1378 } 1379 1380 /* Open the image */ 1381 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err); 1382 if (ret < 0) { 1383 goto fail; 1384 } 1385 1386 if (file && (bs->file != file)) { 1387 bdrv_unref(file); 1388 file = NULL; 1389 } 1390 1391 /* If there is a backing file, use it */ 1392 if ((flags & BDRV_O_NO_BACKING) == 0) { 1393 QDict *backing_options; 1394 1395 qdict_extract_subqdict(options, &backing_options, "backing."); 1396 ret = bdrv_open_backing_file(bs, backing_options, &local_err); 1397 if (ret < 0) { 1398 goto close_and_fail; 1399 } 1400 } 1401 1402 bdrv_refresh_filename(bs); 1403 1404 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 1405 * temporary snapshot afterwards. */ 1406 if (snapshot_flags) { 1407 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); 1408 if (local_err) { 1409 goto close_and_fail; 1410 } 1411 } 1412 1413 /* Check if any unknown options were used */ 1414 if (options && (qdict_size(options) != 0)) { 1415 const QDictEntry *entry = qdict_first(options); 1416 if (flags & BDRV_O_PROTOCOL) { 1417 error_setg(errp, "Block protocol '%s' doesn't support the option " 1418 "'%s'", drv->format_name, entry->key); 1419 } else { 1420 error_setg(errp, "Block format '%s' used by device '%s' doesn't " 1421 "support the option '%s'", drv->format_name, 1422 bdrv_get_device_name(bs), entry->key); 1423 } 1424 1425 ret = -EINVAL; 1426 goto close_and_fail; 1427 } 1428 1429 if (!bdrv_key_required(bs)) { 1430 if (bs->blk) { 1431 blk_dev_change_media_cb(bs->blk, true); 1432 } 1433 } else if (!runstate_check(RUN_STATE_PRELAUNCH) 1434 && !runstate_check(RUN_STATE_INMIGRATE) 1435 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */ 1436 error_setg(errp, 1437 "Guest must be stopped for opening of encrypted image"); 1438 ret = -EBUSY; 1439 goto close_and_fail; 1440 } 1441 1442 QDECREF(options); 1443 *pbs = bs; 1444 return 0; 1445 1446 fail: 1447 if (file != NULL) { 1448 bdrv_unref(file); 1449 } 1450 QDECREF(bs->options); 1451 QDECREF(options); 1452 bs->options = NULL; 1453 if (!*pbs) { 1454 /* If *pbs is NULL, a new BDS has been created in this function and 1455 needs to be freed now. Otherwise, it does not need to be closed, 1456 since it has not really been opened yet. */ 1457 bdrv_unref(bs); 1458 } 1459 if (local_err) { 1460 error_propagate(errp, local_err); 1461 } 1462 return ret; 1463 1464 close_and_fail: 1465 /* See fail path, but now the BDS has to be always closed */ 1466 if (*pbs) { 1467 bdrv_close(bs); 1468 } else { 1469 bdrv_unref(bs); 1470 } 1471 QDECREF(options); 1472 if (local_err) { 1473 error_propagate(errp, local_err); 1474 } 1475 return ret; 1476 } 1477 1478 typedef struct BlockReopenQueueEntry { 1479 bool prepared; 1480 BDRVReopenState state; 1481 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; 1482 } BlockReopenQueueEntry; 1483 1484 /* 1485 * Adds a BlockDriverState to a simple queue for an atomic, transactional 1486 * reopen of multiple devices. 1487 * 1488 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT 1489 * already performed, or alternatively may be NULL a new BlockReopenQueue will 1490 * be created and initialized. This newly created BlockReopenQueue should be 1491 * passed back in for subsequent calls that are intended to be of the same 1492 * atomic 'set'. 1493 * 1494 * bs is the BlockDriverState to add to the reopen queue. 1495 * 1496 * flags contains the open flags for the associated bs 1497 * 1498 * returns a pointer to bs_queue, which is either the newly allocated 1499 * bs_queue, or the existing bs_queue being used. 1500 * 1501 */ 1502 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 1503 BlockDriverState *bs, int flags) 1504 { 1505 assert(bs != NULL); 1506 1507 BlockReopenQueueEntry *bs_entry; 1508 if (bs_queue == NULL) { 1509 bs_queue = g_new0(BlockReopenQueue, 1); 1510 QSIMPLEQ_INIT(bs_queue); 1511 } 1512 1513 /* bdrv_open() masks this flag out */ 1514 flags &= ~BDRV_O_PROTOCOL; 1515 1516 if (bs->file) { 1517 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags)); 1518 } 1519 1520 bs_entry = g_new0(BlockReopenQueueEntry, 1); 1521 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); 1522 1523 bs_entry->state.bs = bs; 1524 bs_entry->state.flags = flags; 1525 1526 return bs_queue; 1527 } 1528 1529 /* 1530 * Reopen multiple BlockDriverStates atomically & transactionally. 1531 * 1532 * The queue passed in (bs_queue) must have been built up previous 1533 * via bdrv_reopen_queue(). 1534 * 1535 * Reopens all BDS specified in the queue, with the appropriate 1536 * flags. All devices are prepared for reopen, and failure of any 1537 * device will cause all device changes to be abandonded, and intermediate 1538 * data cleaned up. 1539 * 1540 * If all devices prepare successfully, then the changes are committed 1541 * to all devices. 1542 * 1543 */ 1544 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 1545 { 1546 int ret = -1; 1547 BlockReopenQueueEntry *bs_entry, *next; 1548 Error *local_err = NULL; 1549 1550 assert(bs_queue != NULL); 1551 1552 bdrv_drain_all(); 1553 1554 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1555 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { 1556 error_propagate(errp, local_err); 1557 goto cleanup; 1558 } 1559 bs_entry->prepared = true; 1560 } 1561 1562 /* If we reach this point, we have success and just need to apply the 1563 * changes 1564 */ 1565 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1566 bdrv_reopen_commit(&bs_entry->state); 1567 } 1568 1569 ret = 0; 1570 1571 cleanup: 1572 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 1573 if (ret && bs_entry->prepared) { 1574 bdrv_reopen_abort(&bs_entry->state); 1575 } 1576 g_free(bs_entry); 1577 } 1578 g_free(bs_queue); 1579 return ret; 1580 } 1581 1582 1583 /* Reopen a single BlockDriverState with the specified flags. */ 1584 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) 1585 { 1586 int ret = -1; 1587 Error *local_err = NULL; 1588 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags); 1589 1590 ret = bdrv_reopen_multiple(queue, &local_err); 1591 if (local_err != NULL) { 1592 error_propagate(errp, local_err); 1593 } 1594 return ret; 1595 } 1596 1597 1598 /* 1599 * Prepares a BlockDriverState for reopen. All changes are staged in the 1600 * 'opaque' field of the BDRVReopenState, which is used and allocated by 1601 * the block driver layer .bdrv_reopen_prepare() 1602 * 1603 * bs is the BlockDriverState to reopen 1604 * flags are the new open flags 1605 * queue is the reopen queue 1606 * 1607 * Returns 0 on success, non-zero on error. On error errp will be set 1608 * as well. 1609 * 1610 * On failure, bdrv_reopen_abort() will be called to clean up any data. 1611 * It is the responsibility of the caller to then call the abort() or 1612 * commit() for any other BDS that have been left in a prepare() state 1613 * 1614 */ 1615 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, 1616 Error **errp) 1617 { 1618 int ret = -1; 1619 Error *local_err = NULL; 1620 BlockDriver *drv; 1621 1622 assert(reopen_state != NULL); 1623 assert(reopen_state->bs->drv != NULL); 1624 drv = reopen_state->bs->drv; 1625 1626 /* if we are to stay read-only, do not allow permission change 1627 * to r/w */ 1628 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) && 1629 reopen_state->flags & BDRV_O_RDWR) { 1630 error_setg(errp, "Node '%s' is read only", 1631 bdrv_get_device_or_node_name(reopen_state->bs)); 1632 goto error; 1633 } 1634 1635 1636 ret = bdrv_flush(reopen_state->bs); 1637 if (ret) { 1638 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive", 1639 strerror(-ret)); 1640 goto error; 1641 } 1642 1643 if (drv->bdrv_reopen_prepare) { 1644 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 1645 if (ret) { 1646 if (local_err != NULL) { 1647 error_propagate(errp, local_err); 1648 } else { 1649 error_setg(errp, "failed while preparing to reopen image '%s'", 1650 reopen_state->bs->filename); 1651 } 1652 goto error; 1653 } 1654 } else { 1655 /* It is currently mandatory to have a bdrv_reopen_prepare() 1656 * handler for each supported drv. */ 1657 error_setg(errp, "Block format '%s' used by node '%s' " 1658 "does not support reopening files", drv->format_name, 1659 bdrv_get_device_or_node_name(reopen_state->bs)); 1660 ret = -1; 1661 goto error; 1662 } 1663 1664 ret = 0; 1665 1666 error: 1667 return ret; 1668 } 1669 1670 /* 1671 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 1672 * makes them final by swapping the staging BlockDriverState contents into 1673 * the active BlockDriverState contents. 1674 */ 1675 void bdrv_reopen_commit(BDRVReopenState *reopen_state) 1676 { 1677 BlockDriver *drv; 1678 1679 assert(reopen_state != NULL); 1680 drv = reopen_state->bs->drv; 1681 assert(drv != NULL); 1682 1683 /* If there are any driver level actions to take */ 1684 if (drv->bdrv_reopen_commit) { 1685 drv->bdrv_reopen_commit(reopen_state); 1686 } 1687 1688 /* set BDS specific flags now */ 1689 reopen_state->bs->open_flags = reopen_state->flags; 1690 reopen_state->bs->enable_write_cache = !!(reopen_state->flags & 1691 BDRV_O_CACHE_WB); 1692 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); 1693 1694 bdrv_refresh_limits(reopen_state->bs, NULL); 1695 } 1696 1697 /* 1698 * Abort the reopen, and delete and free the staged changes in 1699 * reopen_state 1700 */ 1701 void bdrv_reopen_abort(BDRVReopenState *reopen_state) 1702 { 1703 BlockDriver *drv; 1704 1705 assert(reopen_state != NULL); 1706 drv = reopen_state->bs->drv; 1707 assert(drv != NULL); 1708 1709 if (drv->bdrv_reopen_abort) { 1710 drv->bdrv_reopen_abort(reopen_state); 1711 } 1712 } 1713 1714 1715 void bdrv_close(BlockDriverState *bs) 1716 { 1717 BdrvAioNotifier *ban, *ban_next; 1718 1719 if (bs->job) { 1720 block_job_cancel_sync(bs->job); 1721 } 1722 bdrv_drain_all(); /* complete I/O */ 1723 bdrv_flush(bs); 1724 bdrv_drain_all(); /* in case flush left pending I/O */ 1725 notifier_list_notify(&bs->close_notifiers, bs); 1726 1727 if (bs->drv) { 1728 if (bs->backing_hd) { 1729 BlockDriverState *backing_hd = bs->backing_hd; 1730 bdrv_set_backing_hd(bs, NULL); 1731 bdrv_unref(backing_hd); 1732 } 1733 bs->drv->bdrv_close(bs); 1734 g_free(bs->opaque); 1735 bs->opaque = NULL; 1736 bs->drv = NULL; 1737 bs->copy_on_read = 0; 1738 bs->backing_file[0] = '\0'; 1739 bs->backing_format[0] = '\0'; 1740 bs->total_sectors = 0; 1741 bs->encrypted = 0; 1742 bs->valid_key = 0; 1743 bs->sg = 0; 1744 bs->zero_beyond_eof = false; 1745 QDECREF(bs->options); 1746 bs->options = NULL; 1747 QDECREF(bs->full_open_options); 1748 bs->full_open_options = NULL; 1749 1750 if (bs->file != NULL) { 1751 bdrv_unref(bs->file); 1752 bs->file = NULL; 1753 } 1754 } 1755 1756 if (bs->blk) { 1757 blk_dev_change_media_cb(bs->blk, false); 1758 } 1759 1760 /*throttling disk I/O limits*/ 1761 if (bs->io_limits_enabled) { 1762 bdrv_io_limits_disable(bs); 1763 } 1764 1765 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 1766 g_free(ban); 1767 } 1768 QLIST_INIT(&bs->aio_notifiers); 1769 } 1770 1771 void bdrv_close_all(void) 1772 { 1773 BlockDriverState *bs; 1774 1775 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 1776 AioContext *aio_context = bdrv_get_aio_context(bs); 1777 1778 aio_context_acquire(aio_context); 1779 bdrv_close(bs); 1780 aio_context_release(aio_context); 1781 } 1782 } 1783 1784 /* make a BlockDriverState anonymous by removing from bdrv_state and 1785 * graph_bdrv_state list. 1786 Also, NULL terminate the device_name to prevent double remove */ 1787 void bdrv_make_anon(BlockDriverState *bs) 1788 { 1789 /* 1790 * Take care to remove bs from bdrv_states only when it's actually 1791 * in it. Note that bs->device_list.tqe_prev is initially null, 1792 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish 1793 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by 1794 * resetting it to null on remove. 1795 */ 1796 if (bs->device_list.tqe_prev) { 1797 QTAILQ_REMOVE(&bdrv_states, bs, device_list); 1798 bs->device_list.tqe_prev = NULL; 1799 } 1800 if (bs->node_name[0] != '\0') { 1801 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 1802 } 1803 bs->node_name[0] = '\0'; 1804 } 1805 1806 static void bdrv_rebind(BlockDriverState *bs) 1807 { 1808 if (bs->drv && bs->drv->bdrv_rebind) { 1809 bs->drv->bdrv_rebind(bs); 1810 } 1811 } 1812 1813 static void bdrv_move_feature_fields(BlockDriverState *bs_dest, 1814 BlockDriverState *bs_src) 1815 { 1816 /* move some fields that need to stay attached to the device */ 1817 1818 /* dev info */ 1819 bs_dest->guest_block_size = bs_src->guest_block_size; 1820 bs_dest->copy_on_read = bs_src->copy_on_read; 1821 1822 bs_dest->enable_write_cache = bs_src->enable_write_cache; 1823 1824 /* i/o throttled req */ 1825 memcpy(&bs_dest->throttle_state, 1826 &bs_src->throttle_state, 1827 sizeof(ThrottleState)); 1828 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; 1829 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; 1830 bs_dest->io_limits_enabled = bs_src->io_limits_enabled; 1831 1832 /* r/w error */ 1833 bs_dest->on_read_error = bs_src->on_read_error; 1834 bs_dest->on_write_error = bs_src->on_write_error; 1835 1836 /* i/o status */ 1837 bs_dest->iostatus_enabled = bs_src->iostatus_enabled; 1838 bs_dest->iostatus = bs_src->iostatus; 1839 1840 /* dirty bitmap */ 1841 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps; 1842 1843 /* reference count */ 1844 bs_dest->refcnt = bs_src->refcnt; 1845 1846 /* job */ 1847 bs_dest->job = bs_src->job; 1848 1849 /* keep the same entry in bdrv_states */ 1850 bs_dest->device_list = bs_src->device_list; 1851 bs_dest->blk = bs_src->blk; 1852 1853 memcpy(bs_dest->op_blockers, bs_src->op_blockers, 1854 sizeof(bs_dest->op_blockers)); 1855 } 1856 1857 /* 1858 * Swap bs contents for two image chains while they are live, 1859 * while keeping required fields on the BlockDriverState that is 1860 * actually attached to a device. 1861 * 1862 * This will modify the BlockDriverState fields, and swap contents 1863 * between bs_new and bs_old. Both bs_new and bs_old are modified. 1864 * 1865 * bs_new must not be attached to a BlockBackend. 1866 * 1867 * This function does not create any image files. 1868 */ 1869 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) 1870 { 1871 BlockDriverState tmp; 1872 1873 /* The code needs to swap the node_name but simply swapping node_list won't 1874 * work so first remove the nodes from the graph list, do the swap then 1875 * insert them back if needed. 1876 */ 1877 if (bs_new->node_name[0] != '\0') { 1878 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list); 1879 } 1880 if (bs_old->node_name[0] != '\0') { 1881 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list); 1882 } 1883 1884 /* bs_new must be unattached and shouldn't have anything fancy enabled */ 1885 assert(!bs_new->blk); 1886 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); 1887 assert(bs_new->job == NULL); 1888 assert(bs_new->io_limits_enabled == false); 1889 assert(!throttle_have_timer(&bs_new->throttle_state)); 1890 1891 tmp = *bs_new; 1892 *bs_new = *bs_old; 1893 *bs_old = tmp; 1894 1895 /* there are some fields that should not be swapped, move them back */ 1896 bdrv_move_feature_fields(&tmp, bs_old); 1897 bdrv_move_feature_fields(bs_old, bs_new); 1898 bdrv_move_feature_fields(bs_new, &tmp); 1899 1900 /* bs_new must remain unattached */ 1901 assert(!bs_new->blk); 1902 1903 /* Check a few fields that should remain attached to the device */ 1904 assert(bs_new->job == NULL); 1905 assert(bs_new->io_limits_enabled == false); 1906 assert(!throttle_have_timer(&bs_new->throttle_state)); 1907 1908 /* insert the nodes back into the graph node list if needed */ 1909 if (bs_new->node_name[0] != '\0') { 1910 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list); 1911 } 1912 if (bs_old->node_name[0] != '\0') { 1913 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list); 1914 } 1915 1916 bdrv_rebind(bs_new); 1917 bdrv_rebind(bs_old); 1918 } 1919 1920 /* 1921 * Add new bs contents at the top of an image chain while the chain is 1922 * live, while keeping required fields on the top layer. 1923 * 1924 * This will modify the BlockDriverState fields, and swap contents 1925 * between bs_new and bs_top. Both bs_new and bs_top are modified. 1926 * 1927 * bs_new must not be attached to a BlockBackend. 1928 * 1929 * This function does not create any image files. 1930 */ 1931 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) 1932 { 1933 bdrv_swap(bs_new, bs_top); 1934 1935 /* The contents of 'tmp' will become bs_top, as we are 1936 * swapping bs_new and bs_top contents. */ 1937 bdrv_set_backing_hd(bs_top, bs_new); 1938 } 1939 1940 static void bdrv_delete(BlockDriverState *bs) 1941 { 1942 assert(!bs->job); 1943 assert(bdrv_op_blocker_is_empty(bs)); 1944 assert(!bs->refcnt); 1945 assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 1946 1947 bdrv_close(bs); 1948 1949 /* remove from list, if necessary */ 1950 bdrv_make_anon(bs); 1951 1952 g_free(bs); 1953 } 1954 1955 /* 1956 * Run consistency checks on an image 1957 * 1958 * Returns 0 if the check could be completed (it doesn't mean that the image is 1959 * free of errors) or -errno when an internal error occurred. The results of the 1960 * check are stored in res. 1961 */ 1962 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) 1963 { 1964 if (bs->drv == NULL) { 1965 return -ENOMEDIUM; 1966 } 1967 if (bs->drv->bdrv_check == NULL) { 1968 return -ENOTSUP; 1969 } 1970 1971 memset(res, 0, sizeof(*res)); 1972 return bs->drv->bdrv_check(bs, res, fix); 1973 } 1974 1975 #define COMMIT_BUF_SECTORS 2048 1976 1977 /* commit COW file into the raw image */ 1978 int bdrv_commit(BlockDriverState *bs) 1979 { 1980 BlockDriver *drv = bs->drv; 1981 int64_t sector, total_sectors, length, backing_length; 1982 int n, ro, open_flags; 1983 int ret = 0; 1984 uint8_t *buf = NULL; 1985 1986 if (!drv) 1987 return -ENOMEDIUM; 1988 1989 if (!bs->backing_hd) { 1990 return -ENOTSUP; 1991 } 1992 1993 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || 1994 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { 1995 return -EBUSY; 1996 } 1997 1998 ro = bs->backing_hd->read_only; 1999 open_flags = bs->backing_hd->open_flags; 2000 2001 if (ro) { 2002 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) { 2003 return -EACCES; 2004 } 2005 } 2006 2007 length = bdrv_getlength(bs); 2008 if (length < 0) { 2009 ret = length; 2010 goto ro_cleanup; 2011 } 2012 2013 backing_length = bdrv_getlength(bs->backing_hd); 2014 if (backing_length < 0) { 2015 ret = backing_length; 2016 goto ro_cleanup; 2017 } 2018 2019 /* If our top snapshot is larger than the backing file image, 2020 * grow the backing file image if possible. If not possible, 2021 * we must return an error */ 2022 if (length > backing_length) { 2023 ret = bdrv_truncate(bs->backing_hd, length); 2024 if (ret < 0) { 2025 goto ro_cleanup; 2026 } 2027 } 2028 2029 total_sectors = length >> BDRV_SECTOR_BITS; 2030 2031 /* qemu_try_blockalign() for bs will choose an alignment that works for 2032 * bs->backing_hd as well, so no need to compare the alignment manually. */ 2033 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 2034 if (buf == NULL) { 2035 ret = -ENOMEM; 2036 goto ro_cleanup; 2037 } 2038 2039 for (sector = 0; sector < total_sectors; sector += n) { 2040 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n); 2041 if (ret < 0) { 2042 goto ro_cleanup; 2043 } 2044 if (ret) { 2045 ret = bdrv_read(bs, sector, buf, n); 2046 if (ret < 0) { 2047 goto ro_cleanup; 2048 } 2049 2050 ret = bdrv_write(bs->backing_hd, sector, buf, n); 2051 if (ret < 0) { 2052 goto ro_cleanup; 2053 } 2054 } 2055 } 2056 2057 if (drv->bdrv_make_empty) { 2058 ret = drv->bdrv_make_empty(bs); 2059 if (ret < 0) { 2060 goto ro_cleanup; 2061 } 2062 bdrv_flush(bs); 2063 } 2064 2065 /* 2066 * Make sure all data we wrote to the backing device is actually 2067 * stable on disk. 2068 */ 2069 if (bs->backing_hd) { 2070 bdrv_flush(bs->backing_hd); 2071 } 2072 2073 ret = 0; 2074 ro_cleanup: 2075 qemu_vfree(buf); 2076 2077 if (ro) { 2078 /* ignoring error return here */ 2079 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL); 2080 } 2081 2082 return ret; 2083 } 2084 2085 int bdrv_commit_all(void) 2086 { 2087 BlockDriverState *bs; 2088 2089 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 2090 AioContext *aio_context = bdrv_get_aio_context(bs); 2091 2092 aio_context_acquire(aio_context); 2093 if (bs->drv && bs->backing_hd) { 2094 int ret = bdrv_commit(bs); 2095 if (ret < 0) { 2096 aio_context_release(aio_context); 2097 return ret; 2098 } 2099 } 2100 aio_context_release(aio_context); 2101 } 2102 return 0; 2103 } 2104 2105 /* 2106 * Return values: 2107 * 0 - success 2108 * -EINVAL - backing format specified, but no file 2109 * -ENOSPC - can't update the backing file because no space is left in the 2110 * image file header 2111 * -ENOTSUP - format driver doesn't support changing the backing file 2112 */ 2113 int bdrv_change_backing_file(BlockDriverState *bs, 2114 const char *backing_file, const char *backing_fmt) 2115 { 2116 BlockDriver *drv = bs->drv; 2117 int ret; 2118 2119 /* Backing file format doesn't make sense without a backing file */ 2120 if (backing_fmt && !backing_file) { 2121 return -EINVAL; 2122 } 2123 2124 if (drv->bdrv_change_backing_file != NULL) { 2125 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 2126 } else { 2127 ret = -ENOTSUP; 2128 } 2129 2130 if (ret == 0) { 2131 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 2132 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 2133 } 2134 return ret; 2135 } 2136 2137 /* 2138 * Finds the image layer in the chain that has 'bs' as its backing file. 2139 * 2140 * active is the current topmost image. 2141 * 2142 * Returns NULL if bs is not found in active's image chain, 2143 * or if active == bs. 2144 * 2145 * Returns the bottommost base image if bs == NULL. 2146 */ 2147 BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 2148 BlockDriverState *bs) 2149 { 2150 while (active && bs != active->backing_hd) { 2151 active = active->backing_hd; 2152 } 2153 2154 return active; 2155 } 2156 2157 /* Given a BDS, searches for the base layer. */ 2158 BlockDriverState *bdrv_find_base(BlockDriverState *bs) 2159 { 2160 return bdrv_find_overlay(bs, NULL); 2161 } 2162 2163 typedef struct BlkIntermediateStates { 2164 BlockDriverState *bs; 2165 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; 2166 } BlkIntermediateStates; 2167 2168 2169 /* 2170 * Drops images above 'base' up to and including 'top', and sets the image 2171 * above 'top' to have base as its backing file. 2172 * 2173 * Requires that the overlay to 'top' is opened r/w, so that the backing file 2174 * information in 'bs' can be properly updated. 2175 * 2176 * E.g., this will convert the following chain: 2177 * bottom <- base <- intermediate <- top <- active 2178 * 2179 * to 2180 * 2181 * bottom <- base <- active 2182 * 2183 * It is allowed for bottom==base, in which case it converts: 2184 * 2185 * base <- intermediate <- top <- active 2186 * 2187 * to 2188 * 2189 * base <- active 2190 * 2191 * If backing_file_str is non-NULL, it will be used when modifying top's 2192 * overlay image metadata. 2193 * 2194 * Error conditions: 2195 * if active == top, that is considered an error 2196 * 2197 */ 2198 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, 2199 BlockDriverState *base, const char *backing_file_str) 2200 { 2201 BlockDriverState *intermediate; 2202 BlockDriverState *base_bs = NULL; 2203 BlockDriverState *new_top_bs = NULL; 2204 BlkIntermediateStates *intermediate_state, *next; 2205 int ret = -EIO; 2206 2207 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; 2208 QSIMPLEQ_INIT(&states_to_delete); 2209 2210 if (!top->drv || !base->drv) { 2211 goto exit; 2212 } 2213 2214 new_top_bs = bdrv_find_overlay(active, top); 2215 2216 if (new_top_bs == NULL) { 2217 /* we could not find the image above 'top', this is an error */ 2218 goto exit; 2219 } 2220 2221 /* special case of new_top_bs->backing_hd already pointing to base - nothing 2222 * to do, no intermediate images */ 2223 if (new_top_bs->backing_hd == base) { 2224 ret = 0; 2225 goto exit; 2226 } 2227 2228 intermediate = top; 2229 2230 /* now we will go down through the list, and add each BDS we find 2231 * into our deletion queue, until we hit the 'base' 2232 */ 2233 while (intermediate) { 2234 intermediate_state = g_new0(BlkIntermediateStates, 1); 2235 intermediate_state->bs = intermediate; 2236 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry); 2237 2238 if (intermediate->backing_hd == base) { 2239 base_bs = intermediate->backing_hd; 2240 break; 2241 } 2242 intermediate = intermediate->backing_hd; 2243 } 2244 if (base_bs == NULL) { 2245 /* something went wrong, we did not end at the base. safely 2246 * unravel everything, and exit with error */ 2247 goto exit; 2248 } 2249 2250 /* success - we can delete the intermediate states, and link top->base */ 2251 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename; 2252 ret = bdrv_change_backing_file(new_top_bs, backing_file_str, 2253 base_bs->drv ? base_bs->drv->format_name : ""); 2254 if (ret) { 2255 goto exit; 2256 } 2257 bdrv_set_backing_hd(new_top_bs, base_bs); 2258 2259 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 2260 /* so that bdrv_close() does not recursively close the chain */ 2261 bdrv_set_backing_hd(intermediate_state->bs, NULL); 2262 bdrv_unref(intermediate_state->bs); 2263 } 2264 ret = 0; 2265 2266 exit: 2267 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 2268 g_free(intermediate_state); 2269 } 2270 return ret; 2271 } 2272 2273 /** 2274 * Truncate file to 'offset' bytes (needed only for file protocols) 2275 */ 2276 int bdrv_truncate(BlockDriverState *bs, int64_t offset) 2277 { 2278 BlockDriver *drv = bs->drv; 2279 int ret; 2280 if (!drv) 2281 return -ENOMEDIUM; 2282 if (!drv->bdrv_truncate) 2283 return -ENOTSUP; 2284 if (bs->read_only) 2285 return -EACCES; 2286 2287 ret = drv->bdrv_truncate(bs, offset); 2288 if (ret == 0) { 2289 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 2290 bdrv_dirty_bitmap_truncate(bs); 2291 if (bs->blk) { 2292 blk_dev_resize_cb(bs->blk); 2293 } 2294 } 2295 return ret; 2296 } 2297 2298 /** 2299 * Length of a allocated file in bytes. Sparse files are counted by actual 2300 * allocated space. Return < 0 if error or unknown. 2301 */ 2302 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 2303 { 2304 BlockDriver *drv = bs->drv; 2305 if (!drv) { 2306 return -ENOMEDIUM; 2307 } 2308 if (drv->bdrv_get_allocated_file_size) { 2309 return drv->bdrv_get_allocated_file_size(bs); 2310 } 2311 if (bs->file) { 2312 return bdrv_get_allocated_file_size(bs->file); 2313 } 2314 return -ENOTSUP; 2315 } 2316 2317 /** 2318 * Return number of sectors on success, -errno on error. 2319 */ 2320 int64_t bdrv_nb_sectors(BlockDriverState *bs) 2321 { 2322 BlockDriver *drv = bs->drv; 2323 2324 if (!drv) 2325 return -ENOMEDIUM; 2326 2327 if (drv->has_variable_length) { 2328 int ret = refresh_total_sectors(bs, bs->total_sectors); 2329 if (ret < 0) { 2330 return ret; 2331 } 2332 } 2333 return bs->total_sectors; 2334 } 2335 2336 /** 2337 * Return length in bytes on success, -errno on error. 2338 * The length is always a multiple of BDRV_SECTOR_SIZE. 2339 */ 2340 int64_t bdrv_getlength(BlockDriverState *bs) 2341 { 2342 int64_t ret = bdrv_nb_sectors(bs); 2343 2344 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret; 2345 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE; 2346 } 2347 2348 /* return 0 as number of sectors if no device present or error */ 2349 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 2350 { 2351 int64_t nb_sectors = bdrv_nb_sectors(bs); 2352 2353 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 2354 } 2355 2356 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, 2357 BlockdevOnError on_write_error) 2358 { 2359 bs->on_read_error = on_read_error; 2360 bs->on_write_error = on_write_error; 2361 } 2362 2363 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) 2364 { 2365 return is_read ? bs->on_read_error : bs->on_write_error; 2366 } 2367 2368 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error) 2369 { 2370 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error; 2371 2372 switch (on_err) { 2373 case BLOCKDEV_ON_ERROR_ENOSPC: 2374 return (error == ENOSPC) ? 2375 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; 2376 case BLOCKDEV_ON_ERROR_STOP: 2377 return BLOCK_ERROR_ACTION_STOP; 2378 case BLOCKDEV_ON_ERROR_REPORT: 2379 return BLOCK_ERROR_ACTION_REPORT; 2380 case BLOCKDEV_ON_ERROR_IGNORE: 2381 return BLOCK_ERROR_ACTION_IGNORE; 2382 default: 2383 abort(); 2384 } 2385 } 2386 2387 static void send_qmp_error_event(BlockDriverState *bs, 2388 BlockErrorAction action, 2389 bool is_read, int error) 2390 { 2391 IoOperationType optype; 2392 2393 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; 2394 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action, 2395 bdrv_iostatus_is_enabled(bs), 2396 error == ENOSPC, strerror(error), 2397 &error_abort); 2398 } 2399 2400 /* This is done by device models because, while the block layer knows 2401 * about the error, it does not know whether an operation comes from 2402 * the device or the block layer (from a job, for example). 2403 */ 2404 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, 2405 bool is_read, int error) 2406 { 2407 assert(error >= 0); 2408 2409 if (action == BLOCK_ERROR_ACTION_STOP) { 2410 /* First set the iostatus, so that "info block" returns an iostatus 2411 * that matches the events raised so far (an additional error iostatus 2412 * is fine, but not a lost one). 2413 */ 2414 bdrv_iostatus_set_err(bs, error); 2415 2416 /* Then raise the request to stop the VM and the event. 2417 * qemu_system_vmstop_request_prepare has two effects. First, 2418 * it ensures that the STOP event always comes after the 2419 * BLOCK_IO_ERROR event. Second, it ensures that even if management 2420 * can observe the STOP event and do a "cont" before the STOP 2421 * event is issued, the VM will not stop. In this case, vm_start() 2422 * also ensures that the STOP/RESUME pair of events is emitted. 2423 */ 2424 qemu_system_vmstop_request_prepare(); 2425 send_qmp_error_event(bs, action, is_read, error); 2426 qemu_system_vmstop_request(RUN_STATE_IO_ERROR); 2427 } else { 2428 send_qmp_error_event(bs, action, is_read, error); 2429 } 2430 } 2431 2432 int bdrv_is_read_only(BlockDriverState *bs) 2433 { 2434 return bs->read_only; 2435 } 2436 2437 int bdrv_is_sg(BlockDriverState *bs) 2438 { 2439 return bs->sg; 2440 } 2441 2442 int bdrv_enable_write_cache(BlockDriverState *bs) 2443 { 2444 return bs->enable_write_cache; 2445 } 2446 2447 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce) 2448 { 2449 bs->enable_write_cache = wce; 2450 2451 /* so a reopen() will preserve wce */ 2452 if (wce) { 2453 bs->open_flags |= BDRV_O_CACHE_WB; 2454 } else { 2455 bs->open_flags &= ~BDRV_O_CACHE_WB; 2456 } 2457 } 2458 2459 int bdrv_is_encrypted(BlockDriverState *bs) 2460 { 2461 if (bs->backing_hd && bs->backing_hd->encrypted) 2462 return 1; 2463 return bs->encrypted; 2464 } 2465 2466 int bdrv_key_required(BlockDriverState *bs) 2467 { 2468 BlockDriverState *backing_hd = bs->backing_hd; 2469 2470 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key) 2471 return 1; 2472 return (bs->encrypted && !bs->valid_key); 2473 } 2474 2475 int bdrv_set_key(BlockDriverState *bs, const char *key) 2476 { 2477 int ret; 2478 if (bs->backing_hd && bs->backing_hd->encrypted) { 2479 ret = bdrv_set_key(bs->backing_hd, key); 2480 if (ret < 0) 2481 return ret; 2482 if (!bs->encrypted) 2483 return 0; 2484 } 2485 if (!bs->encrypted) { 2486 return -EINVAL; 2487 } else if (!bs->drv || !bs->drv->bdrv_set_key) { 2488 return -ENOMEDIUM; 2489 } 2490 ret = bs->drv->bdrv_set_key(bs, key); 2491 if (ret < 0) { 2492 bs->valid_key = 0; 2493 } else if (!bs->valid_key) { 2494 bs->valid_key = 1; 2495 if (bs->blk) { 2496 /* call the change callback now, we skipped it on open */ 2497 blk_dev_change_media_cb(bs->blk, true); 2498 } 2499 } 2500 return ret; 2501 } 2502 2503 /* 2504 * Provide an encryption key for @bs. 2505 * If @key is non-null: 2506 * If @bs is not encrypted, fail. 2507 * Else if the key is invalid, fail. 2508 * Else set @bs's key to @key, replacing the existing key, if any. 2509 * If @key is null: 2510 * If @bs is encrypted and still lacks a key, fail. 2511 * Else do nothing. 2512 * On failure, store an error object through @errp if non-null. 2513 */ 2514 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp) 2515 { 2516 if (key) { 2517 if (!bdrv_is_encrypted(bs)) { 2518 error_setg(errp, "Node '%s' is not encrypted", 2519 bdrv_get_device_or_node_name(bs)); 2520 } else if (bdrv_set_key(bs, key) < 0) { 2521 error_set(errp, QERR_INVALID_PASSWORD); 2522 } 2523 } else { 2524 if (bdrv_key_required(bs)) { 2525 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED, 2526 "'%s' (%s) is encrypted", 2527 bdrv_get_device_or_node_name(bs), 2528 bdrv_get_encrypted_filename(bs)); 2529 } 2530 } 2531 } 2532 2533 const char *bdrv_get_format_name(BlockDriverState *bs) 2534 { 2535 return bs->drv ? bs->drv->format_name : NULL; 2536 } 2537 2538 static int qsort_strcmp(const void *a, const void *b) 2539 { 2540 return strcmp(a, b); 2541 } 2542 2543 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 2544 void *opaque) 2545 { 2546 BlockDriver *drv; 2547 int count = 0; 2548 int i; 2549 const char **formats = NULL; 2550 2551 QLIST_FOREACH(drv, &bdrv_drivers, list) { 2552 if (drv->format_name) { 2553 bool found = false; 2554 int i = count; 2555 while (formats && i && !found) { 2556 found = !strcmp(formats[--i], drv->format_name); 2557 } 2558 2559 if (!found) { 2560 formats = g_renew(const char *, formats, count + 1); 2561 formats[count++] = drv->format_name; 2562 } 2563 } 2564 } 2565 2566 qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 2567 2568 for (i = 0; i < count; i++) { 2569 it(opaque, formats[i]); 2570 } 2571 2572 g_free(formats); 2573 } 2574 2575 /* This function is to find a node in the bs graph */ 2576 BlockDriverState *bdrv_find_node(const char *node_name) 2577 { 2578 BlockDriverState *bs; 2579 2580 assert(node_name); 2581 2582 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 2583 if (!strcmp(node_name, bs->node_name)) { 2584 return bs; 2585 } 2586 } 2587 return NULL; 2588 } 2589 2590 /* Put this QMP function here so it can access the static graph_bdrv_states. */ 2591 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp) 2592 { 2593 BlockDeviceInfoList *list, *entry; 2594 BlockDriverState *bs; 2595 2596 list = NULL; 2597 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 2598 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp); 2599 if (!info) { 2600 qapi_free_BlockDeviceInfoList(list); 2601 return NULL; 2602 } 2603 entry = g_malloc0(sizeof(*entry)); 2604 entry->value = info; 2605 entry->next = list; 2606 list = entry; 2607 } 2608 2609 return list; 2610 } 2611 2612 BlockDriverState *bdrv_lookup_bs(const char *device, 2613 const char *node_name, 2614 Error **errp) 2615 { 2616 BlockBackend *blk; 2617 BlockDriverState *bs; 2618 2619 if (device) { 2620 blk = blk_by_name(device); 2621 2622 if (blk) { 2623 return blk_bs(blk); 2624 } 2625 } 2626 2627 if (node_name) { 2628 bs = bdrv_find_node(node_name); 2629 2630 if (bs) { 2631 return bs; 2632 } 2633 } 2634 2635 error_setg(errp, "Cannot find device=%s nor node_name=%s", 2636 device ? device : "", 2637 node_name ? node_name : ""); 2638 return NULL; 2639 } 2640 2641 /* If 'base' is in the same chain as 'top', return true. Otherwise, 2642 * return false. If either argument is NULL, return false. */ 2643 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 2644 { 2645 while (top && top != base) { 2646 top = top->backing_hd; 2647 } 2648 2649 return top != NULL; 2650 } 2651 2652 BlockDriverState *bdrv_next_node(BlockDriverState *bs) 2653 { 2654 if (!bs) { 2655 return QTAILQ_FIRST(&graph_bdrv_states); 2656 } 2657 return QTAILQ_NEXT(bs, node_list); 2658 } 2659 2660 BlockDriverState *bdrv_next(BlockDriverState *bs) 2661 { 2662 if (!bs) { 2663 return QTAILQ_FIRST(&bdrv_states); 2664 } 2665 return QTAILQ_NEXT(bs, device_list); 2666 } 2667 2668 const char *bdrv_get_node_name(const BlockDriverState *bs) 2669 { 2670 return bs->node_name; 2671 } 2672 2673 /* TODO check what callers really want: bs->node_name or blk_name() */ 2674 const char *bdrv_get_device_name(const BlockDriverState *bs) 2675 { 2676 return bs->blk ? blk_name(bs->blk) : ""; 2677 } 2678 2679 /* This can be used to identify nodes that might not have a device 2680 * name associated. Since node and device names live in the same 2681 * namespace, the result is unambiguous. The exception is if both are 2682 * absent, then this returns an empty (non-null) string. */ 2683 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 2684 { 2685 return bs->blk ? blk_name(bs->blk) : bs->node_name; 2686 } 2687 2688 int bdrv_get_flags(BlockDriverState *bs) 2689 { 2690 return bs->open_flags; 2691 } 2692 2693 int bdrv_has_zero_init_1(BlockDriverState *bs) 2694 { 2695 return 1; 2696 } 2697 2698 int bdrv_has_zero_init(BlockDriverState *bs) 2699 { 2700 assert(bs->drv); 2701 2702 /* If BS is a copy on write image, it is initialized to 2703 the contents of the base image, which may not be zeroes. */ 2704 if (bs->backing_hd) { 2705 return 0; 2706 } 2707 if (bs->drv->bdrv_has_zero_init) { 2708 return bs->drv->bdrv_has_zero_init(bs); 2709 } 2710 2711 /* safe default */ 2712 return 0; 2713 } 2714 2715 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs) 2716 { 2717 BlockDriverInfo bdi; 2718 2719 if (bs->backing_hd) { 2720 return false; 2721 } 2722 2723 if (bdrv_get_info(bs, &bdi) == 0) { 2724 return bdi.unallocated_blocks_are_zero; 2725 } 2726 2727 return false; 2728 } 2729 2730 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 2731 { 2732 BlockDriverInfo bdi; 2733 2734 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) { 2735 return false; 2736 } 2737 2738 if (bdrv_get_info(bs, &bdi) == 0) { 2739 return bdi.can_write_zeroes_with_unmap; 2740 } 2741 2742 return false; 2743 } 2744 2745 const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 2746 { 2747 if (bs->backing_hd && bs->backing_hd->encrypted) 2748 return bs->backing_file; 2749 else if (bs->encrypted) 2750 return bs->filename; 2751 else 2752 return NULL; 2753 } 2754 2755 void bdrv_get_backing_filename(BlockDriverState *bs, 2756 char *filename, int filename_size) 2757 { 2758 pstrcpy(filename, filename_size, bs->backing_file); 2759 } 2760 2761 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 2762 { 2763 BlockDriver *drv = bs->drv; 2764 if (!drv) 2765 return -ENOMEDIUM; 2766 if (!drv->bdrv_get_info) 2767 return -ENOTSUP; 2768 memset(bdi, 0, sizeof(*bdi)); 2769 return drv->bdrv_get_info(bs, bdi); 2770 } 2771 2772 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs) 2773 { 2774 BlockDriver *drv = bs->drv; 2775 if (drv && drv->bdrv_get_specific_info) { 2776 return drv->bdrv_get_specific_info(bs); 2777 } 2778 return NULL; 2779 } 2780 2781 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) 2782 { 2783 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 2784 return; 2785 } 2786 2787 bs->drv->bdrv_debug_event(bs, event); 2788 } 2789 2790 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 2791 const char *tag) 2792 { 2793 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 2794 bs = bs->file; 2795 } 2796 2797 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 2798 return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 2799 } 2800 2801 return -ENOTSUP; 2802 } 2803 2804 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 2805 { 2806 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) { 2807 bs = bs->file; 2808 } 2809 2810 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) { 2811 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 2812 } 2813 2814 return -ENOTSUP; 2815 } 2816 2817 int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 2818 { 2819 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 2820 bs = bs->file; 2821 } 2822 2823 if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 2824 return bs->drv->bdrv_debug_resume(bs, tag); 2825 } 2826 2827 return -ENOTSUP; 2828 } 2829 2830 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 2831 { 2832 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 2833 bs = bs->file; 2834 } 2835 2836 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 2837 return bs->drv->bdrv_debug_is_suspended(bs, tag); 2838 } 2839 2840 return false; 2841 } 2842 2843 int bdrv_is_snapshot(BlockDriverState *bs) 2844 { 2845 return !!(bs->open_flags & BDRV_O_SNAPSHOT); 2846 } 2847 2848 /* backing_file can either be relative, or absolute, or a protocol. If it is 2849 * relative, it must be relative to the chain. So, passing in bs->filename 2850 * from a BDS as backing_file should not be done, as that may be relative to 2851 * the CWD rather than the chain. */ 2852 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 2853 const char *backing_file) 2854 { 2855 char *filename_full = NULL; 2856 char *backing_file_full = NULL; 2857 char *filename_tmp = NULL; 2858 int is_protocol = 0; 2859 BlockDriverState *curr_bs = NULL; 2860 BlockDriverState *retval = NULL; 2861 2862 if (!bs || !bs->drv || !backing_file) { 2863 return NULL; 2864 } 2865 2866 filename_full = g_malloc(PATH_MAX); 2867 backing_file_full = g_malloc(PATH_MAX); 2868 filename_tmp = g_malloc(PATH_MAX); 2869 2870 is_protocol = path_has_protocol(backing_file); 2871 2872 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) { 2873 2874 /* If either of the filename paths is actually a protocol, then 2875 * compare unmodified paths; otherwise make paths relative */ 2876 if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 2877 if (strcmp(backing_file, curr_bs->backing_file) == 0) { 2878 retval = curr_bs->backing_hd; 2879 break; 2880 } 2881 } else { 2882 /* If not an absolute filename path, make it relative to the current 2883 * image's filename path */ 2884 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 2885 backing_file); 2886 2887 /* We are going to compare absolute pathnames */ 2888 if (!realpath(filename_tmp, filename_full)) { 2889 continue; 2890 } 2891 2892 /* We need to make sure the backing filename we are comparing against 2893 * is relative to the current image filename (or absolute) */ 2894 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 2895 curr_bs->backing_file); 2896 2897 if (!realpath(filename_tmp, backing_file_full)) { 2898 continue; 2899 } 2900 2901 if (strcmp(backing_file_full, filename_full) == 0) { 2902 retval = curr_bs->backing_hd; 2903 break; 2904 } 2905 } 2906 } 2907 2908 g_free(filename_full); 2909 g_free(backing_file_full); 2910 g_free(filename_tmp); 2911 return retval; 2912 } 2913 2914 int bdrv_get_backing_file_depth(BlockDriverState *bs) 2915 { 2916 if (!bs->drv) { 2917 return 0; 2918 } 2919 2920 if (!bs->backing_hd) { 2921 return 0; 2922 } 2923 2924 return 1 + bdrv_get_backing_file_depth(bs->backing_hd); 2925 } 2926 2927 void bdrv_init(void) 2928 { 2929 module_call_init(MODULE_INIT_BLOCK); 2930 } 2931 2932 void bdrv_init_with_whitelist(void) 2933 { 2934 use_bdrv_whitelist = 1; 2935 bdrv_init(); 2936 } 2937 2938 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) 2939 { 2940 Error *local_err = NULL; 2941 int ret; 2942 2943 if (!bs->drv) { 2944 return; 2945 } 2946 2947 if (!(bs->open_flags & BDRV_O_INCOMING)) { 2948 return; 2949 } 2950 bs->open_flags &= ~BDRV_O_INCOMING; 2951 2952 if (bs->drv->bdrv_invalidate_cache) { 2953 bs->drv->bdrv_invalidate_cache(bs, &local_err); 2954 } else if (bs->file) { 2955 bdrv_invalidate_cache(bs->file, &local_err); 2956 } 2957 if (local_err) { 2958 error_propagate(errp, local_err); 2959 return; 2960 } 2961 2962 ret = refresh_total_sectors(bs, bs->total_sectors); 2963 if (ret < 0) { 2964 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 2965 return; 2966 } 2967 } 2968 2969 void bdrv_invalidate_cache_all(Error **errp) 2970 { 2971 BlockDriverState *bs; 2972 Error *local_err = NULL; 2973 2974 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 2975 AioContext *aio_context = bdrv_get_aio_context(bs); 2976 2977 aio_context_acquire(aio_context); 2978 bdrv_invalidate_cache(bs, &local_err); 2979 aio_context_release(aio_context); 2980 if (local_err) { 2981 error_propagate(errp, local_err); 2982 return; 2983 } 2984 } 2985 } 2986 2987 /**************************************************************/ 2988 /* removable device support */ 2989 2990 /** 2991 * Return TRUE if the media is present 2992 */ 2993 int bdrv_is_inserted(BlockDriverState *bs) 2994 { 2995 BlockDriver *drv = bs->drv; 2996 2997 if (!drv) 2998 return 0; 2999 if (!drv->bdrv_is_inserted) 3000 return 1; 3001 return drv->bdrv_is_inserted(bs); 3002 } 3003 3004 /** 3005 * Return whether the media changed since the last call to this 3006 * function, or -ENOTSUP if we don't know. Most drivers don't know. 3007 */ 3008 int bdrv_media_changed(BlockDriverState *bs) 3009 { 3010 BlockDriver *drv = bs->drv; 3011 3012 if (drv && drv->bdrv_media_changed) { 3013 return drv->bdrv_media_changed(bs); 3014 } 3015 return -ENOTSUP; 3016 } 3017 3018 /** 3019 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 3020 */ 3021 void bdrv_eject(BlockDriverState *bs, bool eject_flag) 3022 { 3023 BlockDriver *drv = bs->drv; 3024 const char *device_name; 3025 3026 if (drv && drv->bdrv_eject) { 3027 drv->bdrv_eject(bs, eject_flag); 3028 } 3029 3030 device_name = bdrv_get_device_name(bs); 3031 if (device_name[0] != '\0') { 3032 qapi_event_send_device_tray_moved(device_name, 3033 eject_flag, &error_abort); 3034 } 3035 } 3036 3037 /** 3038 * Lock or unlock the media (if it is locked, the user won't be able 3039 * to eject it manually). 3040 */ 3041 void bdrv_lock_medium(BlockDriverState *bs, bool locked) 3042 { 3043 BlockDriver *drv = bs->drv; 3044 3045 trace_bdrv_lock_medium(bs, locked); 3046 3047 if (drv && drv->bdrv_lock_medium) { 3048 drv->bdrv_lock_medium(bs, locked); 3049 } 3050 } 3051 3052 void bdrv_set_guest_block_size(BlockDriverState *bs, int align) 3053 { 3054 bs->guest_block_size = align; 3055 } 3056 3057 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) 3058 { 3059 BdrvDirtyBitmap *bm; 3060 3061 assert(name); 3062 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 3063 if (bm->name && !strcmp(name, bm->name)) { 3064 return bm; 3065 } 3066 } 3067 return NULL; 3068 } 3069 3070 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) 3071 { 3072 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3073 g_free(bitmap->name); 3074 bitmap->name = NULL; 3075 } 3076 3077 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, 3078 uint32_t granularity, 3079 const char *name, 3080 Error **errp) 3081 { 3082 int64_t bitmap_size; 3083 BdrvDirtyBitmap *bitmap; 3084 uint32_t sector_granularity; 3085 3086 assert((granularity & (granularity - 1)) == 0); 3087 3088 if (name && bdrv_find_dirty_bitmap(bs, name)) { 3089 error_setg(errp, "Bitmap already exists: %s", name); 3090 return NULL; 3091 } 3092 sector_granularity = granularity >> BDRV_SECTOR_BITS; 3093 assert(sector_granularity); 3094 bitmap_size = bdrv_nb_sectors(bs); 3095 if (bitmap_size < 0) { 3096 error_setg_errno(errp, -bitmap_size, "could not get length of device"); 3097 errno = -bitmap_size; 3098 return NULL; 3099 } 3100 bitmap = g_new0(BdrvDirtyBitmap, 1); 3101 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); 3102 bitmap->size = bitmap_size; 3103 bitmap->name = g_strdup(name); 3104 bitmap->disabled = false; 3105 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); 3106 return bitmap; 3107 } 3108 3109 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) 3110 { 3111 return bitmap->successor; 3112 } 3113 3114 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) 3115 { 3116 return !(bitmap->disabled || bitmap->successor); 3117 } 3118 3119 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap) 3120 { 3121 if (bdrv_dirty_bitmap_frozen(bitmap)) { 3122 return DIRTY_BITMAP_STATUS_FROZEN; 3123 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) { 3124 return DIRTY_BITMAP_STATUS_DISABLED; 3125 } else { 3126 return DIRTY_BITMAP_STATUS_ACTIVE; 3127 } 3128 } 3129 3130 /** 3131 * Create a successor bitmap destined to replace this bitmap after an operation. 3132 * Requires that the bitmap is not frozen and has no successor. 3133 */ 3134 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, 3135 BdrvDirtyBitmap *bitmap, Error **errp) 3136 { 3137 uint64_t granularity; 3138 BdrvDirtyBitmap *child; 3139 3140 if (bdrv_dirty_bitmap_frozen(bitmap)) { 3141 error_setg(errp, "Cannot create a successor for a bitmap that is " 3142 "currently frozen"); 3143 return -1; 3144 } 3145 assert(!bitmap->successor); 3146 3147 /* Create an anonymous successor */ 3148 granularity = bdrv_dirty_bitmap_granularity(bitmap); 3149 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); 3150 if (!child) { 3151 return -1; 3152 } 3153 3154 /* Successor will be on or off based on our current state. */ 3155 child->disabled = bitmap->disabled; 3156 3157 /* Install the successor and freeze the parent */ 3158 bitmap->successor = child; 3159 return 0; 3160 } 3161 3162 /** 3163 * For a bitmap with a successor, yield our name to the successor, 3164 * delete the old bitmap, and return a handle to the new bitmap. 3165 */ 3166 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, 3167 BdrvDirtyBitmap *bitmap, 3168 Error **errp) 3169 { 3170 char *name; 3171 BdrvDirtyBitmap *successor = bitmap->successor; 3172 3173 if (successor == NULL) { 3174 error_setg(errp, "Cannot relinquish control if " 3175 "there's no successor present"); 3176 return NULL; 3177 } 3178 3179 name = bitmap->name; 3180 bitmap->name = NULL; 3181 successor->name = name; 3182 bitmap->successor = NULL; 3183 bdrv_release_dirty_bitmap(bs, bitmap); 3184 3185 return successor; 3186 } 3187 3188 /** 3189 * In cases of failure where we can no longer safely delete the parent, 3190 * we may wish to re-join the parent and child/successor. 3191 * The merged parent will be un-frozen, but not explicitly re-enabled. 3192 */ 3193 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, 3194 BdrvDirtyBitmap *parent, 3195 Error **errp) 3196 { 3197 BdrvDirtyBitmap *successor = parent->successor; 3198 3199 if (!successor) { 3200 error_setg(errp, "Cannot reclaim a successor when none is present"); 3201 return NULL; 3202 } 3203 3204 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) { 3205 error_setg(errp, "Merging of parent and successor bitmap failed"); 3206 return NULL; 3207 } 3208 bdrv_release_dirty_bitmap(bs, successor); 3209 parent->successor = NULL; 3210 3211 return parent; 3212 } 3213 3214 /** 3215 * Truncates _all_ bitmaps attached to a BDS. 3216 */ 3217 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) 3218 { 3219 BdrvDirtyBitmap *bitmap; 3220 uint64_t size = bdrv_nb_sectors(bs); 3221 3222 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 3223 if (bdrv_dirty_bitmap_frozen(bitmap)) { 3224 continue; 3225 } 3226 hbitmap_truncate(bitmap->bitmap, size); 3227 } 3228 } 3229 3230 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) 3231 { 3232 BdrvDirtyBitmap *bm, *next; 3233 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { 3234 if (bm == bitmap) { 3235 assert(!bdrv_dirty_bitmap_frozen(bm)); 3236 QLIST_REMOVE(bitmap, list); 3237 hbitmap_free(bitmap->bitmap); 3238 g_free(bitmap->name); 3239 g_free(bitmap); 3240 return; 3241 } 3242 } 3243 } 3244 3245 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 3246 { 3247 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3248 bitmap->disabled = true; 3249 } 3250 3251 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 3252 { 3253 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3254 bitmap->disabled = false; 3255 } 3256 3257 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) 3258 { 3259 BdrvDirtyBitmap *bm; 3260 BlockDirtyInfoList *list = NULL; 3261 BlockDirtyInfoList **plist = &list; 3262 3263 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 3264 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); 3265 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); 3266 info->count = bdrv_get_dirty_count(bm); 3267 info->granularity = bdrv_dirty_bitmap_granularity(bm); 3268 info->has_name = !!bm->name; 3269 info->name = g_strdup(bm->name); 3270 info->status = bdrv_dirty_bitmap_status(bm); 3271 entry->value = info; 3272 *plist = entry; 3273 plist = &entry->next; 3274 } 3275 3276 return list; 3277 } 3278 3279 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector) 3280 { 3281 if (bitmap) { 3282 return hbitmap_get(bitmap->bitmap, sector); 3283 } else { 3284 return 0; 3285 } 3286 } 3287 3288 /** 3289 * Chooses a default granularity based on the existing cluster size, 3290 * but clamped between [4K, 64K]. Defaults to 64K in the case that there 3291 * is no cluster size information available. 3292 */ 3293 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) 3294 { 3295 BlockDriverInfo bdi; 3296 uint32_t granularity; 3297 3298 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) { 3299 granularity = MAX(4096, bdi.cluster_size); 3300 granularity = MIN(65536, granularity); 3301 } else { 3302 granularity = 65536; 3303 } 3304 3305 return granularity; 3306 } 3307 3308 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) 3309 { 3310 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); 3311 } 3312 3313 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) 3314 { 3315 hbitmap_iter_init(hbi, bitmap->bitmap, 0); 3316 } 3317 3318 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, 3319 int64_t cur_sector, int nr_sectors) 3320 { 3321 assert(bdrv_dirty_bitmap_enabled(bitmap)); 3322 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 3323 } 3324 3325 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, 3326 int64_t cur_sector, int nr_sectors) 3327 { 3328 assert(bdrv_dirty_bitmap_enabled(bitmap)); 3329 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 3330 } 3331 3332 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap) 3333 { 3334 assert(bdrv_dirty_bitmap_enabled(bitmap)); 3335 hbitmap_reset(bitmap->bitmap, 0, bitmap->size); 3336 } 3337 3338 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, 3339 int nr_sectors) 3340 { 3341 BdrvDirtyBitmap *bitmap; 3342 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 3343 if (!bdrv_dirty_bitmap_enabled(bitmap)) { 3344 continue; 3345 } 3346 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 3347 } 3348 } 3349 3350 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 3351 int nr_sectors) 3352 { 3353 BdrvDirtyBitmap *bitmap; 3354 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 3355 if (!bdrv_dirty_bitmap_enabled(bitmap)) { 3356 continue; 3357 } 3358 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 3359 } 3360 } 3361 3362 /** 3363 * Advance an HBitmapIter to an arbitrary offset. 3364 */ 3365 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) 3366 { 3367 assert(hbi->hb); 3368 hbitmap_iter_init(hbi, hbi->hb, offset); 3369 } 3370 3371 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) 3372 { 3373 return hbitmap_count(bitmap->bitmap); 3374 } 3375 3376 /* Get a reference to bs */ 3377 void bdrv_ref(BlockDriverState *bs) 3378 { 3379 bs->refcnt++; 3380 } 3381 3382 /* Release a previously grabbed reference to bs. 3383 * If after releasing, reference count is zero, the BlockDriverState is 3384 * deleted. */ 3385 void bdrv_unref(BlockDriverState *bs) 3386 { 3387 if (!bs) { 3388 return; 3389 } 3390 assert(bs->refcnt > 0); 3391 if (--bs->refcnt == 0) { 3392 bdrv_delete(bs); 3393 } 3394 } 3395 3396 struct BdrvOpBlocker { 3397 Error *reason; 3398 QLIST_ENTRY(BdrvOpBlocker) list; 3399 }; 3400 3401 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 3402 { 3403 BdrvOpBlocker *blocker; 3404 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3405 if (!QLIST_EMPTY(&bs->op_blockers[op])) { 3406 blocker = QLIST_FIRST(&bs->op_blockers[op]); 3407 if (errp) { 3408 error_setg(errp, "Node '%s' is busy: %s", 3409 bdrv_get_device_or_node_name(bs), 3410 error_get_pretty(blocker->reason)); 3411 } 3412 return true; 3413 } 3414 return false; 3415 } 3416 3417 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 3418 { 3419 BdrvOpBlocker *blocker; 3420 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3421 3422 blocker = g_new0(BdrvOpBlocker, 1); 3423 blocker->reason = reason; 3424 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 3425 } 3426 3427 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 3428 { 3429 BdrvOpBlocker *blocker, *next; 3430 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3431 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 3432 if (blocker->reason == reason) { 3433 QLIST_REMOVE(blocker, list); 3434 g_free(blocker); 3435 } 3436 } 3437 } 3438 3439 void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 3440 { 3441 int i; 3442 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3443 bdrv_op_block(bs, i, reason); 3444 } 3445 } 3446 3447 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 3448 { 3449 int i; 3450 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3451 bdrv_op_unblock(bs, i, reason); 3452 } 3453 } 3454 3455 bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 3456 { 3457 int i; 3458 3459 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3460 if (!QLIST_EMPTY(&bs->op_blockers[i])) { 3461 return false; 3462 } 3463 } 3464 return true; 3465 } 3466 3467 void bdrv_iostatus_enable(BlockDriverState *bs) 3468 { 3469 bs->iostatus_enabled = true; 3470 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 3471 } 3472 3473 /* The I/O status is only enabled if the drive explicitly 3474 * enables it _and_ the VM is configured to stop on errors */ 3475 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs) 3476 { 3477 return (bs->iostatus_enabled && 3478 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || 3479 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP || 3480 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP)); 3481 } 3482 3483 void bdrv_iostatus_disable(BlockDriverState *bs) 3484 { 3485 bs->iostatus_enabled = false; 3486 } 3487 3488 void bdrv_iostatus_reset(BlockDriverState *bs) 3489 { 3490 if (bdrv_iostatus_is_enabled(bs)) { 3491 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 3492 if (bs->job) { 3493 block_job_iostatus_reset(bs->job); 3494 } 3495 } 3496 } 3497 3498 void bdrv_iostatus_set_err(BlockDriverState *bs, int error) 3499 { 3500 assert(bdrv_iostatus_is_enabled(bs)); 3501 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { 3502 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : 3503 BLOCK_DEVICE_IO_STATUS_FAILED; 3504 } 3505 } 3506 3507 void bdrv_img_create(const char *filename, const char *fmt, 3508 const char *base_filename, const char *base_fmt, 3509 char *options, uint64_t img_size, int flags, 3510 Error **errp, bool quiet) 3511 { 3512 QemuOptsList *create_opts = NULL; 3513 QemuOpts *opts = NULL; 3514 const char *backing_fmt, *backing_file; 3515 int64_t size; 3516 BlockDriver *drv, *proto_drv; 3517 BlockDriver *backing_drv = NULL; 3518 Error *local_err = NULL; 3519 int ret = 0; 3520 3521 /* Find driver and parse its options */ 3522 drv = bdrv_find_format(fmt); 3523 if (!drv) { 3524 error_setg(errp, "Unknown file format '%s'", fmt); 3525 return; 3526 } 3527 3528 proto_drv = bdrv_find_protocol(filename, true, errp); 3529 if (!proto_drv) { 3530 return; 3531 } 3532 3533 if (!drv->create_opts) { 3534 error_setg(errp, "Format driver '%s' does not support image creation", 3535 drv->format_name); 3536 return; 3537 } 3538 3539 if (!proto_drv->create_opts) { 3540 error_setg(errp, "Protocol driver '%s' does not support image creation", 3541 proto_drv->format_name); 3542 return; 3543 } 3544 3545 create_opts = qemu_opts_append(create_opts, drv->create_opts); 3546 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 3547 3548 /* Create parameter list with default values */ 3549 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 3550 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 3551 3552 /* Parse -o options */ 3553 if (options) { 3554 qemu_opts_do_parse(opts, options, NULL, &local_err); 3555 if (local_err) { 3556 error_report_err(local_err); 3557 local_err = NULL; 3558 error_setg(errp, "Invalid options for file format '%s'", fmt); 3559 goto out; 3560 } 3561 } 3562 3563 if (base_filename) { 3564 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err); 3565 if (local_err) { 3566 error_setg(errp, "Backing file not supported for file format '%s'", 3567 fmt); 3568 goto out; 3569 } 3570 } 3571 3572 if (base_fmt) { 3573 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err); 3574 if (local_err) { 3575 error_setg(errp, "Backing file format not supported for file " 3576 "format '%s'", fmt); 3577 goto out; 3578 } 3579 } 3580 3581 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 3582 if (backing_file) { 3583 if (!strcmp(filename, backing_file)) { 3584 error_setg(errp, "Error: Trying to create an image with the " 3585 "same filename as the backing file"); 3586 goto out; 3587 } 3588 } 3589 3590 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 3591 if (backing_fmt) { 3592 backing_drv = bdrv_find_format(backing_fmt); 3593 if (!backing_drv) { 3594 error_setg(errp, "Unknown backing file format '%s'", 3595 backing_fmt); 3596 goto out; 3597 } 3598 } 3599 3600 // The size for the image must always be specified, with one exception: 3601 // If we are using a backing file, we can obtain the size from there 3602 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 3603 if (size == -1) { 3604 if (backing_file) { 3605 BlockDriverState *bs; 3606 char *full_backing = g_new0(char, PATH_MAX); 3607 int64_t size; 3608 int back_flags; 3609 3610 bdrv_get_full_backing_filename_from_filename(filename, backing_file, 3611 full_backing, PATH_MAX, 3612 &local_err); 3613 if (local_err) { 3614 g_free(full_backing); 3615 goto out; 3616 } 3617 3618 /* backing files always opened read-only */ 3619 back_flags = 3620 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 3621 3622 bs = NULL; 3623 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags, 3624 backing_drv, &local_err); 3625 g_free(full_backing); 3626 if (ret < 0) { 3627 goto out; 3628 } 3629 size = bdrv_getlength(bs); 3630 if (size < 0) { 3631 error_setg_errno(errp, -size, "Could not get size of '%s'", 3632 backing_file); 3633 bdrv_unref(bs); 3634 goto out; 3635 } 3636 3637 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 3638 3639 bdrv_unref(bs); 3640 } else { 3641 error_setg(errp, "Image creation needs a size parameter"); 3642 goto out; 3643 } 3644 } 3645 3646 if (!quiet) { 3647 printf("Formatting '%s', fmt=%s", filename, fmt); 3648 qemu_opts_print(opts, " "); 3649 puts(""); 3650 } 3651 3652 ret = bdrv_create(drv, filename, opts, &local_err); 3653 3654 if (ret == -EFBIG) { 3655 /* This is generally a better message than whatever the driver would 3656 * deliver (especially because of the cluster_size_hint), since that 3657 * is most probably not much different from "image too large". */ 3658 const char *cluster_size_hint = ""; 3659 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 3660 cluster_size_hint = " (try using a larger cluster size)"; 3661 } 3662 error_setg(errp, "The image size is too large for file format '%s'" 3663 "%s", fmt, cluster_size_hint); 3664 error_free(local_err); 3665 local_err = NULL; 3666 } 3667 3668 out: 3669 qemu_opts_del(opts); 3670 qemu_opts_free(create_opts); 3671 if (local_err) { 3672 error_propagate(errp, local_err); 3673 } 3674 } 3675 3676 AioContext *bdrv_get_aio_context(BlockDriverState *bs) 3677 { 3678 return bs->aio_context; 3679 } 3680 3681 void bdrv_detach_aio_context(BlockDriverState *bs) 3682 { 3683 BdrvAioNotifier *baf; 3684 3685 if (!bs->drv) { 3686 return; 3687 } 3688 3689 QLIST_FOREACH(baf, &bs->aio_notifiers, list) { 3690 baf->detach_aio_context(baf->opaque); 3691 } 3692 3693 if (bs->io_limits_enabled) { 3694 throttle_detach_aio_context(&bs->throttle_state); 3695 } 3696 if (bs->drv->bdrv_detach_aio_context) { 3697 bs->drv->bdrv_detach_aio_context(bs); 3698 } 3699 if (bs->file) { 3700 bdrv_detach_aio_context(bs->file); 3701 } 3702 if (bs->backing_hd) { 3703 bdrv_detach_aio_context(bs->backing_hd); 3704 } 3705 3706 bs->aio_context = NULL; 3707 } 3708 3709 void bdrv_attach_aio_context(BlockDriverState *bs, 3710 AioContext *new_context) 3711 { 3712 BdrvAioNotifier *ban; 3713 3714 if (!bs->drv) { 3715 return; 3716 } 3717 3718 bs->aio_context = new_context; 3719 3720 if (bs->backing_hd) { 3721 bdrv_attach_aio_context(bs->backing_hd, new_context); 3722 } 3723 if (bs->file) { 3724 bdrv_attach_aio_context(bs->file, new_context); 3725 } 3726 if (bs->drv->bdrv_attach_aio_context) { 3727 bs->drv->bdrv_attach_aio_context(bs, new_context); 3728 } 3729 if (bs->io_limits_enabled) { 3730 throttle_attach_aio_context(&bs->throttle_state, new_context); 3731 } 3732 3733 QLIST_FOREACH(ban, &bs->aio_notifiers, list) { 3734 ban->attached_aio_context(new_context, ban->opaque); 3735 } 3736 } 3737 3738 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) 3739 { 3740 bdrv_drain_all(); /* ensure there are no in-flight requests */ 3741 3742 bdrv_detach_aio_context(bs); 3743 3744 /* This function executes in the old AioContext so acquire the new one in 3745 * case it runs in a different thread. 3746 */ 3747 aio_context_acquire(new_context); 3748 bdrv_attach_aio_context(bs, new_context); 3749 aio_context_release(new_context); 3750 } 3751 3752 void bdrv_add_aio_context_notifier(BlockDriverState *bs, 3753 void (*attached_aio_context)(AioContext *new_context, void *opaque), 3754 void (*detach_aio_context)(void *opaque), void *opaque) 3755 { 3756 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 3757 *ban = (BdrvAioNotifier){ 3758 .attached_aio_context = attached_aio_context, 3759 .detach_aio_context = detach_aio_context, 3760 .opaque = opaque 3761 }; 3762 3763 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 3764 } 3765 3766 void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 3767 void (*attached_aio_context)(AioContext *, 3768 void *), 3769 void (*detach_aio_context)(void *), 3770 void *opaque) 3771 { 3772 BdrvAioNotifier *ban, *ban_next; 3773 3774 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 3775 if (ban->attached_aio_context == attached_aio_context && 3776 ban->detach_aio_context == detach_aio_context && 3777 ban->opaque == opaque) 3778 { 3779 QLIST_REMOVE(ban, list); 3780 g_free(ban); 3781 3782 return; 3783 } 3784 } 3785 3786 abort(); 3787 } 3788 3789 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 3790 BlockDriverAmendStatusCB *status_cb) 3791 { 3792 if (!bs->drv->bdrv_amend_options) { 3793 return -ENOTSUP; 3794 } 3795 return bs->drv->bdrv_amend_options(bs, opts, status_cb); 3796 } 3797 3798 /* This function will be called by the bdrv_recurse_is_first_non_filter method 3799 * of block filter and by bdrv_is_first_non_filter. 3800 * It is used to test if the given bs is the candidate or recurse more in the 3801 * node graph. 3802 */ 3803 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, 3804 BlockDriverState *candidate) 3805 { 3806 /* return false if basic checks fails */ 3807 if (!bs || !bs->drv) { 3808 return false; 3809 } 3810 3811 /* the code reached a non block filter driver -> check if the bs is 3812 * the same as the candidate. It's the recursion termination condition. 3813 */ 3814 if (!bs->drv->is_filter) { 3815 return bs == candidate; 3816 } 3817 /* Down this path the driver is a block filter driver */ 3818 3819 /* If the block filter recursion method is defined use it to recurse down 3820 * the node graph. 3821 */ 3822 if (bs->drv->bdrv_recurse_is_first_non_filter) { 3823 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); 3824 } 3825 3826 /* the driver is a block filter but don't allow to recurse -> return false 3827 */ 3828 return false; 3829 } 3830 3831 /* This function checks if the candidate is the first non filter bs down it's 3832 * bs chain. Since we don't have pointers to parents it explore all bs chains 3833 * from the top. Some filters can choose not to pass down the recursion. 3834 */ 3835 bool bdrv_is_first_non_filter(BlockDriverState *candidate) 3836 { 3837 BlockDriverState *bs; 3838 3839 /* walk down the bs forest recursively */ 3840 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 3841 bool perm; 3842 3843 /* try to recurse in this top level bs */ 3844 perm = bdrv_recurse_is_first_non_filter(bs, candidate); 3845 3846 /* candidate is the first non filter */ 3847 if (perm) { 3848 return true; 3849 } 3850 } 3851 3852 return false; 3853 } 3854 3855 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp) 3856 { 3857 BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 3858 AioContext *aio_context; 3859 3860 if (!to_replace_bs) { 3861 error_setg(errp, "Node name '%s' not found", node_name); 3862 return NULL; 3863 } 3864 3865 aio_context = bdrv_get_aio_context(to_replace_bs); 3866 aio_context_acquire(aio_context); 3867 3868 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 3869 to_replace_bs = NULL; 3870 goto out; 3871 } 3872 3873 /* We don't want arbitrary node of the BDS chain to be replaced only the top 3874 * most non filter in order to prevent data corruption. 3875 * Another benefit is that this tests exclude backing files which are 3876 * blocked by the backing blockers. 3877 */ 3878 if (!bdrv_is_first_non_filter(to_replace_bs)) { 3879 error_setg(errp, "Only top most non filter can be replaced"); 3880 to_replace_bs = NULL; 3881 goto out; 3882 } 3883 3884 out: 3885 aio_context_release(aio_context); 3886 return to_replace_bs; 3887 } 3888 3889 static bool append_open_options(QDict *d, BlockDriverState *bs) 3890 { 3891 const QDictEntry *entry; 3892 bool found_any = false; 3893 3894 for (entry = qdict_first(bs->options); entry; 3895 entry = qdict_next(bs->options, entry)) 3896 { 3897 /* Only take options for this level and exclude all non-driver-specific 3898 * options */ 3899 if (!strchr(qdict_entry_key(entry), '.') && 3900 strcmp(qdict_entry_key(entry), "node-name")) 3901 { 3902 qobject_incref(qdict_entry_value(entry)); 3903 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry)); 3904 found_any = true; 3905 } 3906 } 3907 3908 return found_any; 3909 } 3910 3911 /* Updates the following BDS fields: 3912 * - exact_filename: A filename which may be used for opening a block device 3913 * which (mostly) equals the given BDS (even without any 3914 * other options; so reading and writing must return the same 3915 * results, but caching etc. may be different) 3916 * - full_open_options: Options which, when given when opening a block device 3917 * (without a filename), result in a BDS (mostly) 3918 * equalling the given one 3919 * - filename: If exact_filename is set, it is copied here. Otherwise, 3920 * full_open_options is converted to a JSON object, prefixed with 3921 * "json:" (for use through the JSON pseudo protocol) and put here. 3922 */ 3923 void bdrv_refresh_filename(BlockDriverState *bs) 3924 { 3925 BlockDriver *drv = bs->drv; 3926 QDict *opts; 3927 3928 if (!drv) { 3929 return; 3930 } 3931 3932 /* This BDS's file name will most probably depend on its file's name, so 3933 * refresh that first */ 3934 if (bs->file) { 3935 bdrv_refresh_filename(bs->file); 3936 } 3937 3938 if (drv->bdrv_refresh_filename) { 3939 /* Obsolete information is of no use here, so drop the old file name 3940 * information before refreshing it */ 3941 bs->exact_filename[0] = '\0'; 3942 if (bs->full_open_options) { 3943 QDECREF(bs->full_open_options); 3944 bs->full_open_options = NULL; 3945 } 3946 3947 drv->bdrv_refresh_filename(bs); 3948 } else if (bs->file) { 3949 /* Try to reconstruct valid information from the underlying file */ 3950 bool has_open_options; 3951 3952 bs->exact_filename[0] = '\0'; 3953 if (bs->full_open_options) { 3954 QDECREF(bs->full_open_options); 3955 bs->full_open_options = NULL; 3956 } 3957 3958 opts = qdict_new(); 3959 has_open_options = append_open_options(opts, bs); 3960 3961 /* If no specific options have been given for this BDS, the filename of 3962 * the underlying file should suffice for this one as well */ 3963 if (bs->file->exact_filename[0] && !has_open_options) { 3964 strcpy(bs->exact_filename, bs->file->exact_filename); 3965 } 3966 /* Reconstructing the full options QDict is simple for most format block 3967 * drivers, as long as the full options are known for the underlying 3968 * file BDS. The full options QDict of that file BDS should somehow 3969 * contain a representation of the filename, therefore the following 3970 * suffices without querying the (exact_)filename of this BDS. */ 3971 if (bs->file->full_open_options) { 3972 qdict_put_obj(opts, "driver", 3973 QOBJECT(qstring_from_str(drv->format_name))); 3974 QINCREF(bs->file->full_open_options); 3975 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options)); 3976 3977 bs->full_open_options = opts; 3978 } else { 3979 QDECREF(opts); 3980 } 3981 } else if (!bs->full_open_options && qdict_size(bs->options)) { 3982 /* There is no underlying file BDS (at least referenced by BDS.file), 3983 * so the full options QDict should be equal to the options given 3984 * specifically for this block device when it was opened (plus the 3985 * driver specification). 3986 * Because those options don't change, there is no need to update 3987 * full_open_options when it's already set. */ 3988 3989 opts = qdict_new(); 3990 append_open_options(opts, bs); 3991 qdict_put_obj(opts, "driver", 3992 QOBJECT(qstring_from_str(drv->format_name))); 3993 3994 if (bs->exact_filename[0]) { 3995 /* This may not work for all block protocol drivers (some may 3996 * require this filename to be parsed), but we have to find some 3997 * default solution here, so just include it. If some block driver 3998 * does not support pure options without any filename at all or 3999 * needs some special format of the options QDict, it needs to 4000 * implement the driver-specific bdrv_refresh_filename() function. 4001 */ 4002 qdict_put_obj(opts, "filename", 4003 QOBJECT(qstring_from_str(bs->exact_filename))); 4004 } 4005 4006 bs->full_open_options = opts; 4007 } 4008 4009 if (bs->exact_filename[0]) { 4010 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 4011 } else if (bs->full_open_options) { 4012 QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 4013 snprintf(bs->filename, sizeof(bs->filename), "json:%s", 4014 qstring_get_str(json)); 4015 QDECREF(json); 4016 } 4017 } 4018 4019 /* This accessor function purpose is to allow the device models to access the 4020 * BlockAcctStats structure embedded inside a BlockDriverState without being 4021 * aware of the BlockDriverState structure layout. 4022 * It will go away when the BlockAcctStats structure will be moved inside 4023 * the device models. 4024 */ 4025 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs) 4026 { 4027 return &bs->stats; 4028 } 4029