1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "config-host.h" 25 #include "qemu-common.h" 26 #include "trace.h" 27 #include "block/block_int.h" 28 #include "block/blockjob.h" 29 #include "qemu/module.h" 30 #include "qapi/qmp/qjson.h" 31 #include "sysemu/block-backend.h" 32 #include "sysemu/sysemu.h" 33 #include "qemu/notify.h" 34 #include "block/coroutine.h" 35 #include "block/qapi.h" 36 #include "qmp-commands.h" 37 #include "qemu/timer.h" 38 #include "qapi-event.h" 39 40 #ifdef CONFIG_BSD 41 #include <sys/types.h> 42 #include <sys/stat.h> 43 #include <sys/ioctl.h> 44 #include <sys/queue.h> 45 #ifndef __DragonFly__ 46 #include <sys/disk.h> 47 #endif 48 #endif 49 50 #ifdef _WIN32 51 #include <windows.h> 52 #endif 53 54 /** 55 * A BdrvDirtyBitmap can be in three possible states: 56 * (1) successor is NULL and disabled is false: full r/w mode 57 * (2) successor is NULL and disabled is true: read only mode ("disabled") 58 * (3) successor is set: frozen mode. 59 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set, 60 * or enabled. A frozen bitmap can only abdicate() or reclaim(). 61 */ 62 struct BdrvDirtyBitmap { 63 HBitmap *bitmap; /* Dirty sector bitmap implementation */ 64 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ 65 char *name; /* Optional non-empty unique ID */ 66 int64_t size; /* Size of the bitmap (Number of sectors) */ 67 bool disabled; /* Bitmap is read-only */ 68 QLIST_ENTRY(BdrvDirtyBitmap) list; 69 }; 70 71 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 72 73 static QTAILQ_HEAD(, BlockDriverState) bdrv_states = 74 QTAILQ_HEAD_INITIALIZER(bdrv_states); 75 76 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 77 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 78 79 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 80 QLIST_HEAD_INITIALIZER(bdrv_drivers); 81 82 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, 83 const char *reference, QDict *options, int flags, 84 BlockDriverState *parent, 85 const BdrvChildRole *child_role, 86 BlockDriver *drv, Error **errp); 87 88 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); 89 /* If non-zero, use only whitelisted block drivers */ 90 static int use_bdrv_whitelist; 91 92 #ifdef _WIN32 93 static int is_windows_drive_prefix(const char *filename) 94 { 95 return (((filename[0] >= 'a' && filename[0] <= 'z') || 96 (filename[0] >= 'A' && filename[0] <= 'Z')) && 97 filename[1] == ':'); 98 } 99 100 int is_windows_drive(const char *filename) 101 { 102 if (is_windows_drive_prefix(filename) && 103 filename[2] == '\0') 104 return 1; 105 if (strstart(filename, "\\\\.\\", NULL) || 106 strstart(filename, "//./", NULL)) 107 return 1; 108 return 0; 109 } 110 #endif 111 112 size_t bdrv_opt_mem_align(BlockDriverState *bs) 113 { 114 if (!bs || !bs->drv) { 115 /* page size or 4k (hdd sector size) should be on the safe side */ 116 return MAX(4096, getpagesize()); 117 } 118 119 return bs->bl.opt_mem_alignment; 120 } 121 122 size_t bdrv_min_mem_align(BlockDriverState *bs) 123 { 124 if (!bs || !bs->drv) { 125 /* page size or 4k (hdd sector size) should be on the safe side */ 126 return MAX(4096, getpagesize()); 127 } 128 129 return bs->bl.min_mem_alignment; 130 } 131 132 /* check if the path starts with "<protocol>:" */ 133 int path_has_protocol(const char *path) 134 { 135 const char *p; 136 137 #ifdef _WIN32 138 if (is_windows_drive(path) || 139 is_windows_drive_prefix(path)) { 140 return 0; 141 } 142 p = path + strcspn(path, ":/\\"); 143 #else 144 p = path + strcspn(path, ":/"); 145 #endif 146 147 return *p == ':'; 148 } 149 150 int path_is_absolute(const char *path) 151 { 152 #ifdef _WIN32 153 /* specific case for names like: "\\.\d:" */ 154 if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 155 return 1; 156 } 157 return (*path == '/' || *path == '\\'); 158 #else 159 return (*path == '/'); 160 #endif 161 } 162 163 /* if filename is absolute, just copy it to dest. Otherwise, build a 164 path to it by considering it is relative to base_path. URL are 165 supported. */ 166 void path_combine(char *dest, int dest_size, 167 const char *base_path, 168 const char *filename) 169 { 170 const char *p, *p1; 171 int len; 172 173 if (dest_size <= 0) 174 return; 175 if (path_is_absolute(filename)) { 176 pstrcpy(dest, dest_size, filename); 177 } else { 178 p = strchr(base_path, ':'); 179 if (p) 180 p++; 181 else 182 p = base_path; 183 p1 = strrchr(base_path, '/'); 184 #ifdef _WIN32 185 { 186 const char *p2; 187 p2 = strrchr(base_path, '\\'); 188 if (!p1 || p2 > p1) 189 p1 = p2; 190 } 191 #endif 192 if (p1) 193 p1++; 194 else 195 p1 = base_path; 196 if (p1 > p) 197 p = p1; 198 len = p - base_path; 199 if (len > dest_size - 1) 200 len = dest_size - 1; 201 memcpy(dest, base_path, len); 202 dest[len] = '\0'; 203 pstrcat(dest, dest_size, filename); 204 } 205 } 206 207 void bdrv_get_full_backing_filename_from_filename(const char *backed, 208 const char *backing, 209 char *dest, size_t sz, 210 Error **errp) 211 { 212 if (backing[0] == '\0' || path_has_protocol(backing) || 213 path_is_absolute(backing)) 214 { 215 pstrcpy(dest, sz, backing); 216 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 217 error_setg(errp, "Cannot use relative backing file names for '%s'", 218 backed); 219 } else { 220 path_combine(dest, sz, backed, backing); 221 } 222 } 223 224 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz, 225 Error **errp) 226 { 227 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename; 228 229 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file, 230 dest, sz, errp); 231 } 232 233 void bdrv_register(BlockDriver *bdrv) 234 { 235 bdrv_setup_io_funcs(bdrv); 236 237 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 238 } 239 240 BlockDriverState *bdrv_new_root(void) 241 { 242 BlockDriverState *bs = bdrv_new(); 243 244 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list); 245 return bs; 246 } 247 248 BlockDriverState *bdrv_new(void) 249 { 250 BlockDriverState *bs; 251 int i; 252 253 bs = g_new0(BlockDriverState, 1); 254 QLIST_INIT(&bs->dirty_bitmaps); 255 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 256 QLIST_INIT(&bs->op_blockers[i]); 257 } 258 bdrv_iostatus_disable(bs); 259 notifier_list_init(&bs->close_notifiers); 260 notifier_with_return_list_init(&bs->before_write_notifiers); 261 qemu_co_queue_init(&bs->throttled_reqs[0]); 262 qemu_co_queue_init(&bs->throttled_reqs[1]); 263 bs->refcnt = 1; 264 bs->aio_context = qemu_get_aio_context(); 265 266 return bs; 267 } 268 269 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify) 270 { 271 notifier_list_add(&bs->close_notifiers, notify); 272 } 273 274 BlockDriver *bdrv_find_format(const char *format_name) 275 { 276 BlockDriver *drv1; 277 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 278 if (!strcmp(drv1->format_name, format_name)) { 279 return drv1; 280 } 281 } 282 return NULL; 283 } 284 285 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 286 { 287 static const char *whitelist_rw[] = { 288 CONFIG_BDRV_RW_WHITELIST 289 }; 290 static const char *whitelist_ro[] = { 291 CONFIG_BDRV_RO_WHITELIST 292 }; 293 const char **p; 294 295 if (!whitelist_rw[0] && !whitelist_ro[0]) { 296 return 1; /* no whitelist, anything goes */ 297 } 298 299 for (p = whitelist_rw; *p; p++) { 300 if (!strcmp(drv->format_name, *p)) { 301 return 1; 302 } 303 } 304 if (read_only) { 305 for (p = whitelist_ro; *p; p++) { 306 if (!strcmp(drv->format_name, *p)) { 307 return 1; 308 } 309 } 310 } 311 return 0; 312 } 313 314 BlockDriver *bdrv_find_whitelisted_format(const char *format_name, 315 bool read_only) 316 { 317 BlockDriver *drv = bdrv_find_format(format_name); 318 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL; 319 } 320 321 typedef struct CreateCo { 322 BlockDriver *drv; 323 char *filename; 324 QemuOpts *opts; 325 int ret; 326 Error *err; 327 } CreateCo; 328 329 static void coroutine_fn bdrv_create_co_entry(void *opaque) 330 { 331 Error *local_err = NULL; 332 int ret; 333 334 CreateCo *cco = opaque; 335 assert(cco->drv); 336 337 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err); 338 if (local_err) { 339 error_propagate(&cco->err, local_err); 340 } 341 cco->ret = ret; 342 } 343 344 int bdrv_create(BlockDriver *drv, const char* filename, 345 QemuOpts *opts, Error **errp) 346 { 347 int ret; 348 349 Coroutine *co; 350 CreateCo cco = { 351 .drv = drv, 352 .filename = g_strdup(filename), 353 .opts = opts, 354 .ret = NOT_DONE, 355 .err = NULL, 356 }; 357 358 if (!drv->bdrv_create) { 359 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 360 ret = -ENOTSUP; 361 goto out; 362 } 363 364 if (qemu_in_coroutine()) { 365 /* Fast-path if already in coroutine context */ 366 bdrv_create_co_entry(&cco); 367 } else { 368 co = qemu_coroutine_create(bdrv_create_co_entry); 369 qemu_coroutine_enter(co, &cco); 370 while (cco.ret == NOT_DONE) { 371 aio_poll(qemu_get_aio_context(), true); 372 } 373 } 374 375 ret = cco.ret; 376 if (ret < 0) { 377 if (cco.err) { 378 error_propagate(errp, cco.err); 379 } else { 380 error_setg_errno(errp, -ret, "Could not create image"); 381 } 382 } 383 384 out: 385 g_free(cco.filename); 386 return ret; 387 } 388 389 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 390 { 391 BlockDriver *drv; 392 Error *local_err = NULL; 393 int ret; 394 395 drv = bdrv_find_protocol(filename, true, errp); 396 if (drv == NULL) { 397 return -ENOENT; 398 } 399 400 ret = bdrv_create(drv, filename, opts, &local_err); 401 if (local_err) { 402 error_propagate(errp, local_err); 403 } 404 return ret; 405 } 406 407 /** 408 * Try to get @bs's logical and physical block size. 409 * On success, store them in @bsz struct and return 0. 410 * On failure return -errno. 411 * @bs must not be empty. 412 */ 413 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 414 { 415 BlockDriver *drv = bs->drv; 416 417 if (drv && drv->bdrv_probe_blocksizes) { 418 return drv->bdrv_probe_blocksizes(bs, bsz); 419 } 420 421 return -ENOTSUP; 422 } 423 424 /** 425 * Try to get @bs's geometry (cyls, heads, sectors). 426 * On success, store them in @geo struct and return 0. 427 * On failure return -errno. 428 * @bs must not be empty. 429 */ 430 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 431 { 432 BlockDriver *drv = bs->drv; 433 434 if (drv && drv->bdrv_probe_geometry) { 435 return drv->bdrv_probe_geometry(bs, geo); 436 } 437 438 return -ENOTSUP; 439 } 440 441 /* 442 * Create a uniquely-named empty temporary file. 443 * Return 0 upon success, otherwise a negative errno value. 444 */ 445 int get_tmp_filename(char *filename, int size) 446 { 447 #ifdef _WIN32 448 char temp_dir[MAX_PATH]; 449 /* GetTempFileName requires that its output buffer (4th param) 450 have length MAX_PATH or greater. */ 451 assert(size >= MAX_PATH); 452 return (GetTempPath(MAX_PATH, temp_dir) 453 && GetTempFileName(temp_dir, "qem", 0, filename) 454 ? 0 : -GetLastError()); 455 #else 456 int fd; 457 const char *tmpdir; 458 tmpdir = getenv("TMPDIR"); 459 if (!tmpdir) { 460 tmpdir = "/var/tmp"; 461 } 462 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 463 return -EOVERFLOW; 464 } 465 fd = mkstemp(filename); 466 if (fd < 0) { 467 return -errno; 468 } 469 if (close(fd) != 0) { 470 unlink(filename); 471 return -errno; 472 } 473 return 0; 474 #endif 475 } 476 477 /* 478 * Detect host devices. By convention, /dev/cdrom[N] is always 479 * recognized as a host CDROM. 480 */ 481 static BlockDriver *find_hdev_driver(const char *filename) 482 { 483 int score_max = 0, score; 484 BlockDriver *drv = NULL, *d; 485 486 QLIST_FOREACH(d, &bdrv_drivers, list) { 487 if (d->bdrv_probe_device) { 488 score = d->bdrv_probe_device(filename); 489 if (score > score_max) { 490 score_max = score; 491 drv = d; 492 } 493 } 494 } 495 496 return drv; 497 } 498 499 BlockDriver *bdrv_find_protocol(const char *filename, 500 bool allow_protocol_prefix, 501 Error **errp) 502 { 503 BlockDriver *drv1; 504 char protocol[128]; 505 int len; 506 const char *p; 507 508 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 509 510 /* 511 * XXX(hch): we really should not let host device detection 512 * override an explicit protocol specification, but moving this 513 * later breaks access to device names with colons in them. 514 * Thanks to the brain-dead persistent naming schemes on udev- 515 * based Linux systems those actually are quite common. 516 */ 517 drv1 = find_hdev_driver(filename); 518 if (drv1) { 519 return drv1; 520 } 521 522 if (!path_has_protocol(filename) || !allow_protocol_prefix) { 523 return &bdrv_file; 524 } 525 526 p = strchr(filename, ':'); 527 assert(p != NULL); 528 len = p - filename; 529 if (len > sizeof(protocol) - 1) 530 len = sizeof(protocol) - 1; 531 memcpy(protocol, filename, len); 532 protocol[len] = '\0'; 533 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 534 if (drv1->protocol_name && 535 !strcmp(drv1->protocol_name, protocol)) { 536 return drv1; 537 } 538 } 539 540 error_setg(errp, "Unknown protocol '%s'", protocol); 541 return NULL; 542 } 543 544 /* 545 * Guess image format by probing its contents. 546 * This is not a good idea when your image is raw (CVE-2008-2004), but 547 * we do it anyway for backward compatibility. 548 * 549 * @buf contains the image's first @buf_size bytes. 550 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 551 * but can be smaller if the image file is smaller) 552 * @filename is its filename. 553 * 554 * For all block drivers, call the bdrv_probe() method to get its 555 * probing score. 556 * Return the first block driver with the highest probing score. 557 */ 558 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 559 const char *filename) 560 { 561 int score_max = 0, score; 562 BlockDriver *drv = NULL, *d; 563 564 QLIST_FOREACH(d, &bdrv_drivers, list) { 565 if (d->bdrv_probe) { 566 score = d->bdrv_probe(buf, buf_size, filename); 567 if (score > score_max) { 568 score_max = score; 569 drv = d; 570 } 571 } 572 } 573 574 return drv; 575 } 576 577 static int find_image_format(BlockDriverState *bs, const char *filename, 578 BlockDriver **pdrv, Error **errp) 579 { 580 BlockDriver *drv; 581 uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 582 int ret = 0; 583 584 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 585 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { 586 *pdrv = &bdrv_raw; 587 return ret; 588 } 589 590 ret = bdrv_pread(bs, 0, buf, sizeof(buf)); 591 if (ret < 0) { 592 error_setg_errno(errp, -ret, "Could not read image for determining its " 593 "format"); 594 *pdrv = NULL; 595 return ret; 596 } 597 598 drv = bdrv_probe_all(buf, ret, filename); 599 if (!drv) { 600 error_setg(errp, "Could not determine image format: No compatible " 601 "driver found"); 602 ret = -ENOENT; 603 } 604 *pdrv = drv; 605 return ret; 606 } 607 608 /** 609 * Set the current 'total_sectors' value 610 * Return 0 on success, -errno on error. 611 */ 612 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 613 { 614 BlockDriver *drv = bs->drv; 615 616 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 617 if (bs->sg) 618 return 0; 619 620 /* query actual device if possible, otherwise just trust the hint */ 621 if (drv->bdrv_getlength) { 622 int64_t length = drv->bdrv_getlength(bs); 623 if (length < 0) { 624 return length; 625 } 626 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 627 } 628 629 bs->total_sectors = hint; 630 return 0; 631 } 632 633 /** 634 * Set open flags for a given discard mode 635 * 636 * Return 0 on success, -1 if the discard mode was invalid. 637 */ 638 int bdrv_parse_discard_flags(const char *mode, int *flags) 639 { 640 *flags &= ~BDRV_O_UNMAP; 641 642 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 643 /* do nothing */ 644 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 645 *flags |= BDRV_O_UNMAP; 646 } else { 647 return -1; 648 } 649 650 return 0; 651 } 652 653 /** 654 * Set open flags for a given cache mode 655 * 656 * Return 0 on success, -1 if the cache mode was invalid. 657 */ 658 int bdrv_parse_cache_flags(const char *mode, int *flags) 659 { 660 *flags &= ~BDRV_O_CACHE_MASK; 661 662 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 663 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; 664 } else if (!strcmp(mode, "directsync")) { 665 *flags |= BDRV_O_NOCACHE; 666 } else if (!strcmp(mode, "writeback")) { 667 *flags |= BDRV_O_CACHE_WB; 668 } else if (!strcmp(mode, "unsafe")) { 669 *flags |= BDRV_O_CACHE_WB; 670 *flags |= BDRV_O_NO_FLUSH; 671 } else if (!strcmp(mode, "writethrough")) { 672 /* this is the default */ 673 } else { 674 return -1; 675 } 676 677 return 0; 678 } 679 680 /* 681 * Returns the flags that a temporary snapshot should get, based on the 682 * originally requested flags (the originally requested image will have flags 683 * like a backing file) 684 */ 685 static int bdrv_temp_snapshot_flags(int flags) 686 { 687 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 688 } 689 690 /* 691 * Returns the flags that bs->file should get if a protocol driver is expected, 692 * based on the given flags for the parent BDS 693 */ 694 static int bdrv_inherited_flags(int flags) 695 { 696 /* Enable protocol handling, disable format probing for bs->file */ 697 flags |= BDRV_O_PROTOCOL; 698 699 /* Our block drivers take care to send flushes and respect unmap policy, 700 * so we can enable both unconditionally on lower layers. */ 701 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP; 702 703 /* Clear flags that only apply to the top layer */ 704 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); 705 706 return flags; 707 } 708 709 const BdrvChildRole child_file = { 710 .inherit_flags = bdrv_inherited_flags, 711 }; 712 713 /* 714 * Returns the flags that bs->file should get if the use of formats (and not 715 * only protocols) is permitted for it, based on the given flags for the parent 716 * BDS 717 */ 718 static int bdrv_inherited_fmt_flags(int parent_flags) 719 { 720 int flags = child_file.inherit_flags(parent_flags); 721 return flags & ~BDRV_O_PROTOCOL; 722 } 723 724 const BdrvChildRole child_format = { 725 .inherit_flags = bdrv_inherited_fmt_flags, 726 }; 727 728 /* 729 * Returns the flags that bs->backing_hd should get, based on the given flags 730 * for the parent BDS 731 */ 732 static int bdrv_backing_flags(int flags) 733 { 734 /* backing files always opened read-only */ 735 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ); 736 737 /* snapshot=on is handled on the top layer */ 738 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY); 739 740 return flags; 741 } 742 743 static const BdrvChildRole child_backing = { 744 .inherit_flags = bdrv_backing_flags, 745 }; 746 747 static int bdrv_open_flags(BlockDriverState *bs, int flags) 748 { 749 int open_flags = flags | BDRV_O_CACHE_WB; 750 751 /* 752 * Clear flags that are internal to the block layer before opening the 753 * image. 754 */ 755 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 756 757 /* 758 * Snapshots should be writable. 759 */ 760 if (flags & BDRV_O_TEMPORARY) { 761 open_flags |= BDRV_O_RDWR; 762 } 763 764 return open_flags; 765 } 766 767 static void bdrv_assign_node_name(BlockDriverState *bs, 768 const char *node_name, 769 Error **errp) 770 { 771 if (!node_name) { 772 return; 773 } 774 775 /* Check for empty string or invalid characters */ 776 if (!id_wellformed(node_name)) { 777 error_setg(errp, "Invalid node name"); 778 return; 779 } 780 781 /* takes care of avoiding namespaces collisions */ 782 if (blk_by_name(node_name)) { 783 error_setg(errp, "node-name=%s is conflicting with a device id", 784 node_name); 785 return; 786 } 787 788 /* takes care of avoiding duplicates node names */ 789 if (bdrv_find_node(node_name)) { 790 error_setg(errp, "Duplicate node name"); 791 return; 792 } 793 794 /* copy node name into the bs and insert it into the graph list */ 795 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 796 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 797 } 798 799 static QemuOptsList bdrv_runtime_opts = { 800 .name = "bdrv_common", 801 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), 802 .desc = { 803 { 804 .name = "node-name", 805 .type = QEMU_OPT_STRING, 806 .help = "Node name of the block device node", 807 }, 808 { /* end of list */ } 809 }, 810 }; 811 812 /* 813 * Common part for opening disk images and files 814 * 815 * Removes all processed options from *options. 816 */ 817 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, 818 QDict *options, int flags, BlockDriver *drv, Error **errp) 819 { 820 int ret, open_flags; 821 const char *filename; 822 const char *node_name = NULL; 823 QemuOpts *opts; 824 Error *local_err = NULL; 825 826 assert(drv != NULL); 827 assert(bs->file == NULL); 828 assert(options != NULL && bs->options != options); 829 830 if (file != NULL) { 831 filename = file->filename; 832 } else { 833 filename = qdict_get_try_str(options, "filename"); 834 } 835 836 if (drv->bdrv_needs_filename && !filename) { 837 error_setg(errp, "The '%s' block driver requires a file name", 838 drv->format_name); 839 return -EINVAL; 840 } 841 842 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); 843 844 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 845 qemu_opts_absorb_qdict(opts, options, &local_err); 846 if (local_err) { 847 error_propagate(errp, local_err); 848 ret = -EINVAL; 849 goto fail_opts; 850 } 851 852 node_name = qemu_opt_get(opts, "node-name"); 853 bdrv_assign_node_name(bs, node_name, &local_err); 854 if (local_err) { 855 error_propagate(errp, local_err); 856 ret = -EINVAL; 857 goto fail_opts; 858 } 859 860 bs->guest_block_size = 512; 861 bs->request_alignment = 512; 862 bs->zero_beyond_eof = true; 863 open_flags = bdrv_open_flags(bs, flags); 864 bs->read_only = !(open_flags & BDRV_O_RDWR); 865 866 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { 867 error_setg(errp, 868 !bs->read_only && bdrv_is_whitelisted(drv, true) 869 ? "Driver '%s' can only be used for read-only devices" 870 : "Driver '%s' is not whitelisted", 871 drv->format_name); 872 ret = -ENOTSUP; 873 goto fail_opts; 874 } 875 876 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ 877 if (flags & BDRV_O_COPY_ON_READ) { 878 if (!bs->read_only) { 879 bdrv_enable_copy_on_read(bs); 880 } else { 881 error_setg(errp, "Can't use copy-on-read on read-only device"); 882 ret = -EINVAL; 883 goto fail_opts; 884 } 885 } 886 887 if (filename != NULL) { 888 pstrcpy(bs->filename, sizeof(bs->filename), filename); 889 } else { 890 bs->filename[0] = '\0'; 891 } 892 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 893 894 bs->drv = drv; 895 bs->opaque = g_malloc0(drv->instance_size); 896 897 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB); 898 899 /* Open the image, either directly or using a protocol */ 900 if (drv->bdrv_file_open) { 901 assert(file == NULL); 902 assert(!drv->bdrv_needs_filename || filename != NULL); 903 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 904 } else { 905 if (file == NULL) { 906 error_setg(errp, "Can't use '%s' as a block driver for the " 907 "protocol level", drv->format_name); 908 ret = -EINVAL; 909 goto free_and_fail; 910 } 911 bs->file = file; 912 ret = drv->bdrv_open(bs, options, open_flags, &local_err); 913 } 914 915 if (ret < 0) { 916 if (local_err) { 917 error_propagate(errp, local_err); 918 } else if (bs->filename[0]) { 919 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 920 } else { 921 error_setg_errno(errp, -ret, "Could not open image"); 922 } 923 goto free_and_fail; 924 } 925 926 if (bs->encrypted) { 927 error_report("Encrypted images are deprecated"); 928 error_printf("Support for them will be removed in a future release.\n" 929 "You can use 'qemu-img convert' to convert your image" 930 " to an unencrypted one.\n"); 931 } 932 933 ret = refresh_total_sectors(bs, bs->total_sectors); 934 if (ret < 0) { 935 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 936 goto free_and_fail; 937 } 938 939 bdrv_refresh_limits(bs, &local_err); 940 if (local_err) { 941 error_propagate(errp, local_err); 942 ret = -EINVAL; 943 goto free_and_fail; 944 } 945 946 assert(bdrv_opt_mem_align(bs) != 0); 947 assert(bdrv_min_mem_align(bs) != 0); 948 assert((bs->request_alignment != 0) || bs->sg); 949 950 qemu_opts_del(opts); 951 return 0; 952 953 free_and_fail: 954 bs->file = NULL; 955 g_free(bs->opaque); 956 bs->opaque = NULL; 957 bs->drv = NULL; 958 fail_opts: 959 qemu_opts_del(opts); 960 return ret; 961 } 962 963 static QDict *parse_json_filename(const char *filename, Error **errp) 964 { 965 QObject *options_obj; 966 QDict *options; 967 int ret; 968 969 ret = strstart(filename, "json:", &filename); 970 assert(ret); 971 972 options_obj = qobject_from_json(filename); 973 if (!options_obj) { 974 error_setg(errp, "Could not parse the JSON options"); 975 return NULL; 976 } 977 978 if (qobject_type(options_obj) != QTYPE_QDICT) { 979 qobject_decref(options_obj); 980 error_setg(errp, "Invalid JSON object given"); 981 return NULL; 982 } 983 984 options = qobject_to_qdict(options_obj); 985 qdict_flatten(options); 986 987 return options; 988 } 989 990 /* 991 * Fills in default options for opening images and converts the legacy 992 * filename/flags pair to option QDict entries. 993 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a 994 * block driver has been specified explicitly. 995 */ 996 static int bdrv_fill_options(QDict **options, const char **pfilename, 997 int *flags, BlockDriver *drv, Error **errp) 998 { 999 const char *filename = *pfilename; 1000 const char *drvname; 1001 bool protocol = *flags & BDRV_O_PROTOCOL; 1002 bool parse_filename = false; 1003 BlockDriver *tmp_drv; 1004 Error *local_err = NULL; 1005 1006 /* Parse json: pseudo-protocol */ 1007 if (filename && g_str_has_prefix(filename, "json:")) { 1008 QDict *json_options = parse_json_filename(filename, &local_err); 1009 if (local_err) { 1010 error_propagate(errp, local_err); 1011 return -EINVAL; 1012 } 1013 1014 /* Options given in the filename have lower priority than options 1015 * specified directly */ 1016 qdict_join(*options, json_options, false); 1017 QDECREF(json_options); 1018 *pfilename = filename = NULL; 1019 } 1020 1021 drvname = qdict_get_try_str(*options, "driver"); 1022 1023 /* If the user has explicitly specified the driver, this choice should 1024 * override the BDRV_O_PROTOCOL flag */ 1025 tmp_drv = drv; 1026 if (!tmp_drv && drvname) { 1027 tmp_drv = bdrv_find_format(drvname); 1028 } 1029 if (tmp_drv) { 1030 protocol = tmp_drv->bdrv_file_open; 1031 } 1032 1033 if (protocol) { 1034 *flags |= BDRV_O_PROTOCOL; 1035 } else { 1036 *flags &= ~BDRV_O_PROTOCOL; 1037 } 1038 1039 /* Fetch the file name from the options QDict if necessary */ 1040 if (protocol && filename) { 1041 if (!qdict_haskey(*options, "filename")) { 1042 qdict_put(*options, "filename", qstring_from_str(filename)); 1043 parse_filename = true; 1044 } else { 1045 error_setg(errp, "Can't specify 'file' and 'filename' options at " 1046 "the same time"); 1047 return -EINVAL; 1048 } 1049 } 1050 1051 /* Find the right block driver */ 1052 filename = qdict_get_try_str(*options, "filename"); 1053 1054 if (drv) { 1055 if (drvname) { 1056 error_setg(errp, "Driver specified twice"); 1057 return -EINVAL; 1058 } 1059 drvname = drv->format_name; 1060 qdict_put(*options, "driver", qstring_from_str(drvname)); 1061 } else { 1062 if (!drvname && protocol) { 1063 if (filename) { 1064 drv = bdrv_find_protocol(filename, parse_filename, errp); 1065 if (!drv) { 1066 return -EINVAL; 1067 } 1068 1069 drvname = drv->format_name; 1070 qdict_put(*options, "driver", qstring_from_str(drvname)); 1071 } else { 1072 error_setg(errp, "Must specify either driver or file"); 1073 return -EINVAL; 1074 } 1075 } else if (drvname) { 1076 drv = bdrv_find_format(drvname); 1077 if (!drv) { 1078 error_setg(errp, "Unknown driver '%s'", drvname); 1079 return -ENOENT; 1080 } 1081 } 1082 } 1083 1084 assert(drv || !protocol); 1085 1086 /* Driver-specific filename parsing */ 1087 if (drv && drv->bdrv_parse_filename && parse_filename) { 1088 drv->bdrv_parse_filename(filename, *options, &local_err); 1089 if (local_err) { 1090 error_propagate(errp, local_err); 1091 return -EINVAL; 1092 } 1093 1094 if (!drv->bdrv_needs_filename) { 1095 qdict_del(*options, "filename"); 1096 } 1097 } 1098 1099 return 0; 1100 } 1101 1102 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) 1103 { 1104 1105 if (bs->backing_hd) { 1106 assert(bs->backing_blocker); 1107 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker); 1108 } else if (backing_hd) { 1109 error_setg(&bs->backing_blocker, 1110 "node is used as backing hd of '%s'", 1111 bdrv_get_device_or_node_name(bs)); 1112 } 1113 1114 bs->backing_hd = backing_hd; 1115 if (!backing_hd) { 1116 error_free(bs->backing_blocker); 1117 bs->backing_blocker = NULL; 1118 goto out; 1119 } 1120 bs->open_flags &= ~BDRV_O_NO_BACKING; 1121 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); 1122 pstrcpy(bs->backing_format, sizeof(bs->backing_format), 1123 backing_hd->drv ? backing_hd->drv->format_name : ""); 1124 1125 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker); 1126 /* Otherwise we won't be able to commit due to check in bdrv_commit */ 1127 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1128 bs->backing_blocker); 1129 out: 1130 bdrv_refresh_limits(bs, NULL); 1131 } 1132 1133 /* 1134 * Opens the backing file for a BlockDriverState if not yet open 1135 * 1136 * options is a QDict of options to pass to the block drivers, or NULL for an 1137 * empty set of options. The reference to the QDict is transferred to this 1138 * function (even on failure), so if the caller intends to reuse the dictionary, 1139 * it needs to use QINCREF() before calling bdrv_file_open. 1140 */ 1141 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) 1142 { 1143 char *backing_filename = g_malloc0(PATH_MAX); 1144 int ret = 0; 1145 BlockDriverState *backing_hd; 1146 Error *local_err = NULL; 1147 1148 if (bs->backing_hd != NULL) { 1149 QDECREF(options); 1150 goto free_exit; 1151 } 1152 1153 /* NULL means an empty set of options */ 1154 if (options == NULL) { 1155 options = qdict_new(); 1156 } 1157 1158 bs->open_flags &= ~BDRV_O_NO_BACKING; 1159 if (qdict_haskey(options, "file.filename")) { 1160 backing_filename[0] = '\0'; 1161 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 1162 QDECREF(options); 1163 goto free_exit; 1164 } else { 1165 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX, 1166 &local_err); 1167 if (local_err) { 1168 ret = -EINVAL; 1169 error_propagate(errp, local_err); 1170 QDECREF(options); 1171 goto free_exit; 1172 } 1173 } 1174 1175 if (!bs->drv || !bs->drv->supports_backing) { 1176 ret = -EINVAL; 1177 error_setg(errp, "Driver doesn't support backing files"); 1178 QDECREF(options); 1179 goto free_exit; 1180 } 1181 1182 backing_hd = bdrv_new(); 1183 1184 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 1185 qdict_put(options, "driver", qstring_from_str(bs->backing_format)); 1186 } 1187 1188 assert(bs->backing_hd == NULL); 1189 ret = bdrv_open_inherit(&backing_hd, 1190 *backing_filename ? backing_filename : NULL, 1191 NULL, options, 0, bs, &child_backing, 1192 NULL, &local_err); 1193 if (ret < 0) { 1194 bdrv_unref(backing_hd); 1195 backing_hd = NULL; 1196 bs->open_flags |= BDRV_O_NO_BACKING; 1197 error_setg(errp, "Could not open backing file: %s", 1198 error_get_pretty(local_err)); 1199 error_free(local_err); 1200 goto free_exit; 1201 } 1202 bdrv_set_backing_hd(bs, backing_hd); 1203 1204 free_exit: 1205 g_free(backing_filename); 1206 return ret; 1207 } 1208 1209 /* 1210 * Opens a disk image whose options are given as BlockdevRef in another block 1211 * device's options. 1212 * 1213 * If allow_none is true, no image will be opened if filename is false and no 1214 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. 1215 * 1216 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 1217 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 1218 * itself, all options starting with "${bdref_key}." are considered part of the 1219 * BlockdevRef. 1220 * 1221 * The BlockdevRef will be removed from the options QDict. 1222 * 1223 * To conform with the behavior of bdrv_open(), *pbs has to be NULL. 1224 */ 1225 int bdrv_open_image(BlockDriverState **pbs, const char *filename, 1226 QDict *options, const char *bdref_key, 1227 BlockDriverState* parent, const BdrvChildRole *child_role, 1228 bool allow_none, Error **errp) 1229 { 1230 QDict *image_options; 1231 int ret; 1232 char *bdref_key_dot; 1233 const char *reference; 1234 1235 assert(pbs); 1236 assert(*pbs == NULL); 1237 1238 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 1239 qdict_extract_subqdict(options, &image_options, bdref_key_dot); 1240 g_free(bdref_key_dot); 1241 1242 reference = qdict_get_try_str(options, bdref_key); 1243 if (!filename && !reference && !qdict_size(image_options)) { 1244 if (allow_none) { 1245 ret = 0; 1246 } else { 1247 error_setg(errp, "A block device must be specified for \"%s\"", 1248 bdref_key); 1249 ret = -EINVAL; 1250 } 1251 QDECREF(image_options); 1252 goto done; 1253 } 1254 1255 ret = bdrv_open_inherit(pbs, filename, reference, image_options, 0, 1256 parent, child_role, NULL, errp); 1257 1258 done: 1259 qdict_del(options, bdref_key); 1260 return ret; 1261 } 1262 1263 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) 1264 { 1265 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 1266 char *tmp_filename = g_malloc0(PATH_MAX + 1); 1267 int64_t total_size; 1268 QemuOpts *opts = NULL; 1269 QDict *snapshot_options; 1270 BlockDriverState *bs_snapshot; 1271 Error *local_err; 1272 int ret; 1273 1274 /* if snapshot, we create a temporary backing file and open it 1275 instead of opening 'filename' directly */ 1276 1277 /* Get the required size from the image */ 1278 total_size = bdrv_getlength(bs); 1279 if (total_size < 0) { 1280 ret = total_size; 1281 error_setg_errno(errp, -total_size, "Could not get image size"); 1282 goto out; 1283 } 1284 1285 /* Create the temporary image */ 1286 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); 1287 if (ret < 0) { 1288 error_setg_errno(errp, -ret, "Could not get temporary filename"); 1289 goto out; 1290 } 1291 1292 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 1293 &error_abort); 1294 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 1295 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err); 1296 qemu_opts_del(opts); 1297 if (ret < 0) { 1298 error_setg_errno(errp, -ret, "Could not create temporary overlay " 1299 "'%s': %s", tmp_filename, 1300 error_get_pretty(local_err)); 1301 error_free(local_err); 1302 goto out; 1303 } 1304 1305 /* Prepare a new options QDict for the temporary file */ 1306 snapshot_options = qdict_new(); 1307 qdict_put(snapshot_options, "file.driver", 1308 qstring_from_str("file")); 1309 qdict_put(snapshot_options, "file.filename", 1310 qstring_from_str(tmp_filename)); 1311 1312 bs_snapshot = bdrv_new(); 1313 1314 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, 1315 flags, &bdrv_qcow2, &local_err); 1316 if (ret < 0) { 1317 error_propagate(errp, local_err); 1318 goto out; 1319 } 1320 1321 bdrv_append(bs_snapshot, bs); 1322 1323 out: 1324 g_free(tmp_filename); 1325 return ret; 1326 } 1327 1328 /* 1329 * Opens a disk image (raw, qcow2, vmdk, ...) 1330 * 1331 * options is a QDict of options to pass to the block drivers, or NULL for an 1332 * empty set of options. The reference to the QDict belongs to the block layer 1333 * after the call (even on failure), so if the caller intends to reuse the 1334 * dictionary, it needs to use QINCREF() before calling bdrv_open. 1335 * 1336 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 1337 * If it is not NULL, the referenced BDS will be reused. 1338 * 1339 * The reference parameter may be used to specify an existing block device which 1340 * should be opened. If specified, neither options nor a filename may be given, 1341 * nor can an existing BDS be reused (that is, *pbs has to be NULL). 1342 */ 1343 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, 1344 const char *reference, QDict *options, int flags, 1345 BlockDriverState *parent, 1346 const BdrvChildRole *child_role, 1347 BlockDriver *drv, Error **errp) 1348 { 1349 int ret; 1350 BlockDriverState *file = NULL, *bs; 1351 const char *drvname; 1352 Error *local_err = NULL; 1353 int snapshot_flags = 0; 1354 1355 assert(pbs); 1356 assert(!child_role || !flags); 1357 assert(!child_role == !parent); 1358 1359 if (reference) { 1360 bool options_non_empty = options ? qdict_size(options) : false; 1361 QDECREF(options); 1362 1363 if (*pbs) { 1364 error_setg(errp, "Cannot reuse an existing BDS when referencing " 1365 "another block device"); 1366 return -EINVAL; 1367 } 1368 1369 if (filename || options_non_empty) { 1370 error_setg(errp, "Cannot reference an existing block device with " 1371 "additional options or a new filename"); 1372 return -EINVAL; 1373 } 1374 1375 bs = bdrv_lookup_bs(reference, reference, errp); 1376 if (!bs) { 1377 return -ENODEV; 1378 } 1379 bdrv_ref(bs); 1380 *pbs = bs; 1381 return 0; 1382 } 1383 1384 if (*pbs) { 1385 bs = *pbs; 1386 } else { 1387 bs = bdrv_new(); 1388 } 1389 1390 /* NULL means an empty set of options */ 1391 if (options == NULL) { 1392 options = qdict_new(); 1393 } 1394 1395 if (child_role) { 1396 flags = child_role->inherit_flags(parent->open_flags); 1397 } 1398 1399 ret = bdrv_fill_options(&options, &filename, &flags, drv, &local_err); 1400 if (local_err) { 1401 goto fail; 1402 } 1403 1404 /* Find the right image format driver */ 1405 drv = NULL; 1406 drvname = qdict_get_try_str(options, "driver"); 1407 if (drvname) { 1408 drv = bdrv_find_format(drvname); 1409 qdict_del(options, "driver"); 1410 if (!drv) { 1411 error_setg(errp, "Unknown driver: '%s'", drvname); 1412 ret = -EINVAL; 1413 goto fail; 1414 } 1415 } 1416 1417 assert(drvname || !(flags & BDRV_O_PROTOCOL)); 1418 1419 bs->open_flags = flags; 1420 bs->options = options; 1421 options = qdict_clone_shallow(options); 1422 1423 /* Open image file without format layer */ 1424 if ((flags & BDRV_O_PROTOCOL) == 0) { 1425 if (flags & BDRV_O_RDWR) { 1426 flags |= BDRV_O_ALLOW_RDWR; 1427 } 1428 if (flags & BDRV_O_SNAPSHOT) { 1429 snapshot_flags = bdrv_temp_snapshot_flags(flags); 1430 flags = bdrv_backing_flags(flags); 1431 } 1432 1433 assert(file == NULL); 1434 bs->open_flags = flags; 1435 ret = bdrv_open_image(&file, filename, options, "file", 1436 bs, &child_file, true, &local_err); 1437 if (ret < 0) { 1438 goto fail; 1439 } 1440 } 1441 1442 /* Image format probing */ 1443 bs->probed = !drv; 1444 if (!drv && file) { 1445 ret = find_image_format(file, filename, &drv, &local_err); 1446 if (ret < 0) { 1447 goto fail; 1448 } 1449 } else if (!drv) { 1450 error_setg(errp, "Must specify either driver or file"); 1451 ret = -EINVAL; 1452 goto fail; 1453 } 1454 1455 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ 1456 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open); 1457 /* file must be NULL if a protocol BDS is about to be created 1458 * (the inverse results in an error message from bdrv_open_common()) */ 1459 assert(!(flags & BDRV_O_PROTOCOL) || !file); 1460 1461 /* Open the image */ 1462 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err); 1463 if (ret < 0) { 1464 goto fail; 1465 } 1466 1467 if (file && (bs->file != file)) { 1468 bdrv_unref(file); 1469 file = NULL; 1470 } 1471 1472 /* If there is a backing file, use it */ 1473 if ((flags & BDRV_O_NO_BACKING) == 0) { 1474 QDict *backing_options; 1475 1476 qdict_extract_subqdict(options, &backing_options, "backing."); 1477 ret = bdrv_open_backing_file(bs, backing_options, &local_err); 1478 if (ret < 0) { 1479 goto close_and_fail; 1480 } 1481 } 1482 1483 bdrv_refresh_filename(bs); 1484 1485 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 1486 * temporary snapshot afterwards. */ 1487 if (snapshot_flags) { 1488 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); 1489 if (local_err) { 1490 goto close_and_fail; 1491 } 1492 } 1493 1494 /* Check if any unknown options were used */ 1495 if (options && (qdict_size(options) != 0)) { 1496 const QDictEntry *entry = qdict_first(options); 1497 if (flags & BDRV_O_PROTOCOL) { 1498 error_setg(errp, "Block protocol '%s' doesn't support the option " 1499 "'%s'", drv->format_name, entry->key); 1500 } else { 1501 error_setg(errp, "Block format '%s' used by device '%s' doesn't " 1502 "support the option '%s'", drv->format_name, 1503 bdrv_get_device_name(bs), entry->key); 1504 } 1505 1506 ret = -EINVAL; 1507 goto close_and_fail; 1508 } 1509 1510 if (!bdrv_key_required(bs)) { 1511 if (bs->blk) { 1512 blk_dev_change_media_cb(bs->blk, true); 1513 } 1514 } else if (!runstate_check(RUN_STATE_PRELAUNCH) 1515 && !runstate_check(RUN_STATE_INMIGRATE) 1516 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */ 1517 error_setg(errp, 1518 "Guest must be stopped for opening of encrypted image"); 1519 ret = -EBUSY; 1520 goto close_and_fail; 1521 } 1522 1523 QDECREF(options); 1524 *pbs = bs; 1525 return 0; 1526 1527 fail: 1528 if (file != NULL) { 1529 bdrv_unref(file); 1530 } 1531 QDECREF(bs->options); 1532 QDECREF(options); 1533 bs->options = NULL; 1534 if (!*pbs) { 1535 /* If *pbs is NULL, a new BDS has been created in this function and 1536 needs to be freed now. Otherwise, it does not need to be closed, 1537 since it has not really been opened yet. */ 1538 bdrv_unref(bs); 1539 } 1540 if (local_err) { 1541 error_propagate(errp, local_err); 1542 } 1543 return ret; 1544 1545 close_and_fail: 1546 /* See fail path, but now the BDS has to be always closed */ 1547 if (*pbs) { 1548 bdrv_close(bs); 1549 } else { 1550 bdrv_unref(bs); 1551 } 1552 QDECREF(options); 1553 if (local_err) { 1554 error_propagate(errp, local_err); 1555 } 1556 return ret; 1557 } 1558 1559 int bdrv_open(BlockDriverState **pbs, const char *filename, 1560 const char *reference, QDict *options, int flags, 1561 BlockDriver *drv, Error **errp) 1562 { 1563 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL, 1564 NULL, drv, errp); 1565 } 1566 1567 typedef struct BlockReopenQueueEntry { 1568 bool prepared; 1569 BDRVReopenState state; 1570 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; 1571 } BlockReopenQueueEntry; 1572 1573 /* 1574 * Adds a BlockDriverState to a simple queue for an atomic, transactional 1575 * reopen of multiple devices. 1576 * 1577 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT 1578 * already performed, or alternatively may be NULL a new BlockReopenQueue will 1579 * be created and initialized. This newly created BlockReopenQueue should be 1580 * passed back in for subsequent calls that are intended to be of the same 1581 * atomic 'set'. 1582 * 1583 * bs is the BlockDriverState to add to the reopen queue. 1584 * 1585 * flags contains the open flags for the associated bs 1586 * 1587 * returns a pointer to bs_queue, which is either the newly allocated 1588 * bs_queue, or the existing bs_queue being used. 1589 * 1590 */ 1591 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 1592 BlockDriverState *bs, int flags) 1593 { 1594 assert(bs != NULL); 1595 1596 BlockReopenQueueEntry *bs_entry; 1597 if (bs_queue == NULL) { 1598 bs_queue = g_new0(BlockReopenQueue, 1); 1599 QSIMPLEQ_INIT(bs_queue); 1600 } 1601 1602 /* bdrv_open() masks this flag out */ 1603 flags &= ~BDRV_O_PROTOCOL; 1604 1605 if (bs->file) { 1606 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags)); 1607 } 1608 1609 bs_entry = g_new0(BlockReopenQueueEntry, 1); 1610 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); 1611 1612 bs_entry->state.bs = bs; 1613 bs_entry->state.flags = flags; 1614 1615 return bs_queue; 1616 } 1617 1618 /* 1619 * Reopen multiple BlockDriverStates atomically & transactionally. 1620 * 1621 * The queue passed in (bs_queue) must have been built up previous 1622 * via bdrv_reopen_queue(). 1623 * 1624 * Reopens all BDS specified in the queue, with the appropriate 1625 * flags. All devices are prepared for reopen, and failure of any 1626 * device will cause all device changes to be abandonded, and intermediate 1627 * data cleaned up. 1628 * 1629 * If all devices prepare successfully, then the changes are committed 1630 * to all devices. 1631 * 1632 */ 1633 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 1634 { 1635 int ret = -1; 1636 BlockReopenQueueEntry *bs_entry, *next; 1637 Error *local_err = NULL; 1638 1639 assert(bs_queue != NULL); 1640 1641 bdrv_drain_all(); 1642 1643 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1644 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { 1645 error_propagate(errp, local_err); 1646 goto cleanup; 1647 } 1648 bs_entry->prepared = true; 1649 } 1650 1651 /* If we reach this point, we have success and just need to apply the 1652 * changes 1653 */ 1654 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1655 bdrv_reopen_commit(&bs_entry->state); 1656 } 1657 1658 ret = 0; 1659 1660 cleanup: 1661 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 1662 if (ret && bs_entry->prepared) { 1663 bdrv_reopen_abort(&bs_entry->state); 1664 } 1665 g_free(bs_entry); 1666 } 1667 g_free(bs_queue); 1668 return ret; 1669 } 1670 1671 1672 /* Reopen a single BlockDriverState with the specified flags. */ 1673 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) 1674 { 1675 int ret = -1; 1676 Error *local_err = NULL; 1677 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags); 1678 1679 ret = bdrv_reopen_multiple(queue, &local_err); 1680 if (local_err != NULL) { 1681 error_propagate(errp, local_err); 1682 } 1683 return ret; 1684 } 1685 1686 1687 /* 1688 * Prepares a BlockDriverState for reopen. All changes are staged in the 1689 * 'opaque' field of the BDRVReopenState, which is used and allocated by 1690 * the block driver layer .bdrv_reopen_prepare() 1691 * 1692 * bs is the BlockDriverState to reopen 1693 * flags are the new open flags 1694 * queue is the reopen queue 1695 * 1696 * Returns 0 on success, non-zero on error. On error errp will be set 1697 * as well. 1698 * 1699 * On failure, bdrv_reopen_abort() will be called to clean up any data. 1700 * It is the responsibility of the caller to then call the abort() or 1701 * commit() for any other BDS that have been left in a prepare() state 1702 * 1703 */ 1704 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, 1705 Error **errp) 1706 { 1707 int ret = -1; 1708 Error *local_err = NULL; 1709 BlockDriver *drv; 1710 1711 assert(reopen_state != NULL); 1712 assert(reopen_state->bs->drv != NULL); 1713 drv = reopen_state->bs->drv; 1714 1715 /* if we are to stay read-only, do not allow permission change 1716 * to r/w */ 1717 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) && 1718 reopen_state->flags & BDRV_O_RDWR) { 1719 error_setg(errp, "Node '%s' is read only", 1720 bdrv_get_device_or_node_name(reopen_state->bs)); 1721 goto error; 1722 } 1723 1724 1725 ret = bdrv_flush(reopen_state->bs); 1726 if (ret) { 1727 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive", 1728 strerror(-ret)); 1729 goto error; 1730 } 1731 1732 if (drv->bdrv_reopen_prepare) { 1733 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 1734 if (ret) { 1735 if (local_err != NULL) { 1736 error_propagate(errp, local_err); 1737 } else { 1738 error_setg(errp, "failed while preparing to reopen image '%s'", 1739 reopen_state->bs->filename); 1740 } 1741 goto error; 1742 } 1743 } else { 1744 /* It is currently mandatory to have a bdrv_reopen_prepare() 1745 * handler for each supported drv. */ 1746 error_setg(errp, "Block format '%s' used by node '%s' " 1747 "does not support reopening files", drv->format_name, 1748 bdrv_get_device_or_node_name(reopen_state->bs)); 1749 ret = -1; 1750 goto error; 1751 } 1752 1753 ret = 0; 1754 1755 error: 1756 return ret; 1757 } 1758 1759 /* 1760 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 1761 * makes them final by swapping the staging BlockDriverState contents into 1762 * the active BlockDriverState contents. 1763 */ 1764 void bdrv_reopen_commit(BDRVReopenState *reopen_state) 1765 { 1766 BlockDriver *drv; 1767 1768 assert(reopen_state != NULL); 1769 drv = reopen_state->bs->drv; 1770 assert(drv != NULL); 1771 1772 /* If there are any driver level actions to take */ 1773 if (drv->bdrv_reopen_commit) { 1774 drv->bdrv_reopen_commit(reopen_state); 1775 } 1776 1777 /* set BDS specific flags now */ 1778 reopen_state->bs->open_flags = reopen_state->flags; 1779 reopen_state->bs->enable_write_cache = !!(reopen_state->flags & 1780 BDRV_O_CACHE_WB); 1781 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); 1782 1783 bdrv_refresh_limits(reopen_state->bs, NULL); 1784 } 1785 1786 /* 1787 * Abort the reopen, and delete and free the staged changes in 1788 * reopen_state 1789 */ 1790 void bdrv_reopen_abort(BDRVReopenState *reopen_state) 1791 { 1792 BlockDriver *drv; 1793 1794 assert(reopen_state != NULL); 1795 drv = reopen_state->bs->drv; 1796 assert(drv != NULL); 1797 1798 if (drv->bdrv_reopen_abort) { 1799 drv->bdrv_reopen_abort(reopen_state); 1800 } 1801 } 1802 1803 1804 void bdrv_close(BlockDriverState *bs) 1805 { 1806 BdrvAioNotifier *ban, *ban_next; 1807 1808 if (bs->job) { 1809 block_job_cancel_sync(bs->job); 1810 } 1811 bdrv_drain_all(); /* complete I/O */ 1812 bdrv_flush(bs); 1813 bdrv_drain_all(); /* in case flush left pending I/O */ 1814 notifier_list_notify(&bs->close_notifiers, bs); 1815 1816 if (bs->drv) { 1817 if (bs->backing_hd) { 1818 BlockDriverState *backing_hd = bs->backing_hd; 1819 bdrv_set_backing_hd(bs, NULL); 1820 bdrv_unref(backing_hd); 1821 } 1822 bs->drv->bdrv_close(bs); 1823 g_free(bs->opaque); 1824 bs->opaque = NULL; 1825 bs->drv = NULL; 1826 bs->copy_on_read = 0; 1827 bs->backing_file[0] = '\0'; 1828 bs->backing_format[0] = '\0'; 1829 bs->total_sectors = 0; 1830 bs->encrypted = 0; 1831 bs->valid_key = 0; 1832 bs->sg = 0; 1833 bs->zero_beyond_eof = false; 1834 QDECREF(bs->options); 1835 bs->options = NULL; 1836 QDECREF(bs->full_open_options); 1837 bs->full_open_options = NULL; 1838 1839 if (bs->file != NULL) { 1840 bdrv_unref(bs->file); 1841 bs->file = NULL; 1842 } 1843 } 1844 1845 if (bs->blk) { 1846 blk_dev_change_media_cb(bs->blk, false); 1847 } 1848 1849 /*throttling disk I/O limits*/ 1850 if (bs->io_limits_enabled) { 1851 bdrv_io_limits_disable(bs); 1852 } 1853 1854 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 1855 g_free(ban); 1856 } 1857 QLIST_INIT(&bs->aio_notifiers); 1858 } 1859 1860 void bdrv_close_all(void) 1861 { 1862 BlockDriverState *bs; 1863 1864 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 1865 AioContext *aio_context = bdrv_get_aio_context(bs); 1866 1867 aio_context_acquire(aio_context); 1868 bdrv_close(bs); 1869 aio_context_release(aio_context); 1870 } 1871 } 1872 1873 /* make a BlockDriverState anonymous by removing from bdrv_state and 1874 * graph_bdrv_state list. 1875 Also, NULL terminate the device_name to prevent double remove */ 1876 void bdrv_make_anon(BlockDriverState *bs) 1877 { 1878 /* 1879 * Take care to remove bs from bdrv_states only when it's actually 1880 * in it. Note that bs->device_list.tqe_prev is initially null, 1881 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish 1882 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by 1883 * resetting it to null on remove. 1884 */ 1885 if (bs->device_list.tqe_prev) { 1886 QTAILQ_REMOVE(&bdrv_states, bs, device_list); 1887 bs->device_list.tqe_prev = NULL; 1888 } 1889 if (bs->node_name[0] != '\0') { 1890 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 1891 } 1892 bs->node_name[0] = '\0'; 1893 } 1894 1895 static void bdrv_rebind(BlockDriverState *bs) 1896 { 1897 if (bs->drv && bs->drv->bdrv_rebind) { 1898 bs->drv->bdrv_rebind(bs); 1899 } 1900 } 1901 1902 static void bdrv_move_feature_fields(BlockDriverState *bs_dest, 1903 BlockDriverState *bs_src) 1904 { 1905 /* move some fields that need to stay attached to the device */ 1906 1907 /* dev info */ 1908 bs_dest->guest_block_size = bs_src->guest_block_size; 1909 bs_dest->copy_on_read = bs_src->copy_on_read; 1910 1911 bs_dest->enable_write_cache = bs_src->enable_write_cache; 1912 1913 /* i/o throttled req */ 1914 memcpy(&bs_dest->throttle_state, 1915 &bs_src->throttle_state, 1916 sizeof(ThrottleState)); 1917 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; 1918 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; 1919 bs_dest->io_limits_enabled = bs_src->io_limits_enabled; 1920 1921 /* r/w error */ 1922 bs_dest->on_read_error = bs_src->on_read_error; 1923 bs_dest->on_write_error = bs_src->on_write_error; 1924 1925 /* i/o status */ 1926 bs_dest->iostatus_enabled = bs_src->iostatus_enabled; 1927 bs_dest->iostatus = bs_src->iostatus; 1928 1929 /* dirty bitmap */ 1930 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps; 1931 1932 /* reference count */ 1933 bs_dest->refcnt = bs_src->refcnt; 1934 1935 /* job */ 1936 bs_dest->job = bs_src->job; 1937 1938 /* keep the same entry in bdrv_states */ 1939 bs_dest->device_list = bs_src->device_list; 1940 bs_dest->blk = bs_src->blk; 1941 1942 memcpy(bs_dest->op_blockers, bs_src->op_blockers, 1943 sizeof(bs_dest->op_blockers)); 1944 } 1945 1946 /* 1947 * Swap bs contents for two image chains while they are live, 1948 * while keeping required fields on the BlockDriverState that is 1949 * actually attached to a device. 1950 * 1951 * This will modify the BlockDriverState fields, and swap contents 1952 * between bs_new and bs_old. Both bs_new and bs_old are modified. 1953 * 1954 * bs_new must not be attached to a BlockBackend. 1955 * 1956 * This function does not create any image files. 1957 */ 1958 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) 1959 { 1960 BlockDriverState tmp; 1961 1962 bdrv_drain(bs_new); 1963 bdrv_drain(bs_old); 1964 1965 /* The code needs to swap the node_name but simply swapping node_list won't 1966 * work so first remove the nodes from the graph list, do the swap then 1967 * insert them back if needed. 1968 */ 1969 if (bs_new->node_name[0] != '\0') { 1970 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list); 1971 } 1972 if (bs_old->node_name[0] != '\0') { 1973 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list); 1974 } 1975 1976 /* bs_new must be unattached and shouldn't have anything fancy enabled */ 1977 assert(!bs_new->blk); 1978 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); 1979 assert(bs_new->job == NULL); 1980 assert(bs_new->io_limits_enabled == false); 1981 assert(!throttle_have_timer(&bs_new->throttle_state)); 1982 1983 tmp = *bs_new; 1984 *bs_new = *bs_old; 1985 *bs_old = tmp; 1986 1987 /* there are some fields that should not be swapped, move them back */ 1988 bdrv_move_feature_fields(&tmp, bs_old); 1989 bdrv_move_feature_fields(bs_old, bs_new); 1990 bdrv_move_feature_fields(bs_new, &tmp); 1991 1992 /* bs_new must remain unattached */ 1993 assert(!bs_new->blk); 1994 1995 /* Check a few fields that should remain attached to the device */ 1996 assert(bs_new->job == NULL); 1997 assert(bs_new->io_limits_enabled == false); 1998 assert(!throttle_have_timer(&bs_new->throttle_state)); 1999 2000 /* insert the nodes back into the graph node list if needed */ 2001 if (bs_new->node_name[0] != '\0') { 2002 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list); 2003 } 2004 if (bs_old->node_name[0] != '\0') { 2005 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list); 2006 } 2007 2008 assert(QLIST_EMPTY(&bs_old->tracked_requests)); 2009 assert(QLIST_EMPTY(&bs_new->tracked_requests)); 2010 2011 bdrv_rebind(bs_new); 2012 bdrv_rebind(bs_old); 2013 } 2014 2015 /* 2016 * Add new bs contents at the top of an image chain while the chain is 2017 * live, while keeping required fields on the top layer. 2018 * 2019 * This will modify the BlockDriverState fields, and swap contents 2020 * between bs_new and bs_top. Both bs_new and bs_top are modified. 2021 * 2022 * bs_new must not be attached to a BlockBackend. 2023 * 2024 * This function does not create any image files. 2025 */ 2026 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) 2027 { 2028 bdrv_swap(bs_new, bs_top); 2029 2030 /* The contents of 'tmp' will become bs_top, as we are 2031 * swapping bs_new and bs_top contents. */ 2032 bdrv_set_backing_hd(bs_top, bs_new); 2033 } 2034 2035 static void bdrv_delete(BlockDriverState *bs) 2036 { 2037 assert(!bs->job); 2038 assert(bdrv_op_blocker_is_empty(bs)); 2039 assert(!bs->refcnt); 2040 assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 2041 2042 bdrv_close(bs); 2043 2044 /* remove from list, if necessary */ 2045 bdrv_make_anon(bs); 2046 2047 g_free(bs); 2048 } 2049 2050 /* 2051 * Run consistency checks on an image 2052 * 2053 * Returns 0 if the check could be completed (it doesn't mean that the image is 2054 * free of errors) or -errno when an internal error occurred. The results of the 2055 * check are stored in res. 2056 */ 2057 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) 2058 { 2059 if (bs->drv == NULL) { 2060 return -ENOMEDIUM; 2061 } 2062 if (bs->drv->bdrv_check == NULL) { 2063 return -ENOTSUP; 2064 } 2065 2066 memset(res, 0, sizeof(*res)); 2067 return bs->drv->bdrv_check(bs, res, fix); 2068 } 2069 2070 #define COMMIT_BUF_SECTORS 2048 2071 2072 /* commit COW file into the raw image */ 2073 int bdrv_commit(BlockDriverState *bs) 2074 { 2075 BlockDriver *drv = bs->drv; 2076 int64_t sector, total_sectors, length, backing_length; 2077 int n, ro, open_flags; 2078 int ret = 0; 2079 uint8_t *buf = NULL; 2080 2081 if (!drv) 2082 return -ENOMEDIUM; 2083 2084 if (!bs->backing_hd) { 2085 return -ENOTSUP; 2086 } 2087 2088 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || 2089 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { 2090 return -EBUSY; 2091 } 2092 2093 ro = bs->backing_hd->read_only; 2094 open_flags = bs->backing_hd->open_flags; 2095 2096 if (ro) { 2097 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) { 2098 return -EACCES; 2099 } 2100 } 2101 2102 length = bdrv_getlength(bs); 2103 if (length < 0) { 2104 ret = length; 2105 goto ro_cleanup; 2106 } 2107 2108 backing_length = bdrv_getlength(bs->backing_hd); 2109 if (backing_length < 0) { 2110 ret = backing_length; 2111 goto ro_cleanup; 2112 } 2113 2114 /* If our top snapshot is larger than the backing file image, 2115 * grow the backing file image if possible. If not possible, 2116 * we must return an error */ 2117 if (length > backing_length) { 2118 ret = bdrv_truncate(bs->backing_hd, length); 2119 if (ret < 0) { 2120 goto ro_cleanup; 2121 } 2122 } 2123 2124 total_sectors = length >> BDRV_SECTOR_BITS; 2125 2126 /* qemu_try_blockalign() for bs will choose an alignment that works for 2127 * bs->backing_hd as well, so no need to compare the alignment manually. */ 2128 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 2129 if (buf == NULL) { 2130 ret = -ENOMEM; 2131 goto ro_cleanup; 2132 } 2133 2134 for (sector = 0; sector < total_sectors; sector += n) { 2135 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n); 2136 if (ret < 0) { 2137 goto ro_cleanup; 2138 } 2139 if (ret) { 2140 ret = bdrv_read(bs, sector, buf, n); 2141 if (ret < 0) { 2142 goto ro_cleanup; 2143 } 2144 2145 ret = bdrv_write(bs->backing_hd, sector, buf, n); 2146 if (ret < 0) { 2147 goto ro_cleanup; 2148 } 2149 } 2150 } 2151 2152 if (drv->bdrv_make_empty) { 2153 ret = drv->bdrv_make_empty(bs); 2154 if (ret < 0) { 2155 goto ro_cleanup; 2156 } 2157 bdrv_flush(bs); 2158 } 2159 2160 /* 2161 * Make sure all data we wrote to the backing device is actually 2162 * stable on disk. 2163 */ 2164 if (bs->backing_hd) { 2165 bdrv_flush(bs->backing_hd); 2166 } 2167 2168 ret = 0; 2169 ro_cleanup: 2170 qemu_vfree(buf); 2171 2172 if (ro) { 2173 /* ignoring error return here */ 2174 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL); 2175 } 2176 2177 return ret; 2178 } 2179 2180 int bdrv_commit_all(void) 2181 { 2182 BlockDriverState *bs; 2183 2184 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 2185 AioContext *aio_context = bdrv_get_aio_context(bs); 2186 2187 aio_context_acquire(aio_context); 2188 if (bs->drv && bs->backing_hd) { 2189 int ret = bdrv_commit(bs); 2190 if (ret < 0) { 2191 aio_context_release(aio_context); 2192 return ret; 2193 } 2194 } 2195 aio_context_release(aio_context); 2196 } 2197 return 0; 2198 } 2199 2200 /* 2201 * Return values: 2202 * 0 - success 2203 * -EINVAL - backing format specified, but no file 2204 * -ENOSPC - can't update the backing file because no space is left in the 2205 * image file header 2206 * -ENOTSUP - format driver doesn't support changing the backing file 2207 */ 2208 int bdrv_change_backing_file(BlockDriverState *bs, 2209 const char *backing_file, const char *backing_fmt) 2210 { 2211 BlockDriver *drv = bs->drv; 2212 int ret; 2213 2214 /* Backing file format doesn't make sense without a backing file */ 2215 if (backing_fmt && !backing_file) { 2216 return -EINVAL; 2217 } 2218 2219 if (drv->bdrv_change_backing_file != NULL) { 2220 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 2221 } else { 2222 ret = -ENOTSUP; 2223 } 2224 2225 if (ret == 0) { 2226 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 2227 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 2228 } 2229 return ret; 2230 } 2231 2232 /* 2233 * Finds the image layer in the chain that has 'bs' as its backing file. 2234 * 2235 * active is the current topmost image. 2236 * 2237 * Returns NULL if bs is not found in active's image chain, 2238 * or if active == bs. 2239 * 2240 * Returns the bottommost base image if bs == NULL. 2241 */ 2242 BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 2243 BlockDriverState *bs) 2244 { 2245 while (active && bs != active->backing_hd) { 2246 active = active->backing_hd; 2247 } 2248 2249 return active; 2250 } 2251 2252 /* Given a BDS, searches for the base layer. */ 2253 BlockDriverState *bdrv_find_base(BlockDriverState *bs) 2254 { 2255 return bdrv_find_overlay(bs, NULL); 2256 } 2257 2258 typedef struct BlkIntermediateStates { 2259 BlockDriverState *bs; 2260 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; 2261 } BlkIntermediateStates; 2262 2263 2264 /* 2265 * Drops images above 'base' up to and including 'top', and sets the image 2266 * above 'top' to have base as its backing file. 2267 * 2268 * Requires that the overlay to 'top' is opened r/w, so that the backing file 2269 * information in 'bs' can be properly updated. 2270 * 2271 * E.g., this will convert the following chain: 2272 * bottom <- base <- intermediate <- top <- active 2273 * 2274 * to 2275 * 2276 * bottom <- base <- active 2277 * 2278 * It is allowed for bottom==base, in which case it converts: 2279 * 2280 * base <- intermediate <- top <- active 2281 * 2282 * to 2283 * 2284 * base <- active 2285 * 2286 * If backing_file_str is non-NULL, it will be used when modifying top's 2287 * overlay image metadata. 2288 * 2289 * Error conditions: 2290 * if active == top, that is considered an error 2291 * 2292 */ 2293 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, 2294 BlockDriverState *base, const char *backing_file_str) 2295 { 2296 BlockDriverState *intermediate; 2297 BlockDriverState *base_bs = NULL; 2298 BlockDriverState *new_top_bs = NULL; 2299 BlkIntermediateStates *intermediate_state, *next; 2300 int ret = -EIO; 2301 2302 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; 2303 QSIMPLEQ_INIT(&states_to_delete); 2304 2305 if (!top->drv || !base->drv) { 2306 goto exit; 2307 } 2308 2309 new_top_bs = bdrv_find_overlay(active, top); 2310 2311 if (new_top_bs == NULL) { 2312 /* we could not find the image above 'top', this is an error */ 2313 goto exit; 2314 } 2315 2316 /* special case of new_top_bs->backing_hd already pointing to base - nothing 2317 * to do, no intermediate images */ 2318 if (new_top_bs->backing_hd == base) { 2319 ret = 0; 2320 goto exit; 2321 } 2322 2323 intermediate = top; 2324 2325 /* now we will go down through the list, and add each BDS we find 2326 * into our deletion queue, until we hit the 'base' 2327 */ 2328 while (intermediate) { 2329 intermediate_state = g_new0(BlkIntermediateStates, 1); 2330 intermediate_state->bs = intermediate; 2331 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry); 2332 2333 if (intermediate->backing_hd == base) { 2334 base_bs = intermediate->backing_hd; 2335 break; 2336 } 2337 intermediate = intermediate->backing_hd; 2338 } 2339 if (base_bs == NULL) { 2340 /* something went wrong, we did not end at the base. safely 2341 * unravel everything, and exit with error */ 2342 goto exit; 2343 } 2344 2345 /* success - we can delete the intermediate states, and link top->base */ 2346 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename; 2347 ret = bdrv_change_backing_file(new_top_bs, backing_file_str, 2348 base_bs->drv ? base_bs->drv->format_name : ""); 2349 if (ret) { 2350 goto exit; 2351 } 2352 bdrv_set_backing_hd(new_top_bs, base_bs); 2353 2354 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 2355 /* so that bdrv_close() does not recursively close the chain */ 2356 bdrv_set_backing_hd(intermediate_state->bs, NULL); 2357 bdrv_unref(intermediate_state->bs); 2358 } 2359 ret = 0; 2360 2361 exit: 2362 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 2363 g_free(intermediate_state); 2364 } 2365 return ret; 2366 } 2367 2368 /** 2369 * Truncate file to 'offset' bytes (needed only for file protocols) 2370 */ 2371 int bdrv_truncate(BlockDriverState *bs, int64_t offset) 2372 { 2373 BlockDriver *drv = bs->drv; 2374 int ret; 2375 if (!drv) 2376 return -ENOMEDIUM; 2377 if (!drv->bdrv_truncate) 2378 return -ENOTSUP; 2379 if (bs->read_only) 2380 return -EACCES; 2381 2382 ret = drv->bdrv_truncate(bs, offset); 2383 if (ret == 0) { 2384 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 2385 bdrv_dirty_bitmap_truncate(bs); 2386 if (bs->blk) { 2387 blk_dev_resize_cb(bs->blk); 2388 } 2389 } 2390 return ret; 2391 } 2392 2393 /** 2394 * Length of a allocated file in bytes. Sparse files are counted by actual 2395 * allocated space. Return < 0 if error or unknown. 2396 */ 2397 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 2398 { 2399 BlockDriver *drv = bs->drv; 2400 if (!drv) { 2401 return -ENOMEDIUM; 2402 } 2403 if (drv->bdrv_get_allocated_file_size) { 2404 return drv->bdrv_get_allocated_file_size(bs); 2405 } 2406 if (bs->file) { 2407 return bdrv_get_allocated_file_size(bs->file); 2408 } 2409 return -ENOTSUP; 2410 } 2411 2412 /** 2413 * Return number of sectors on success, -errno on error. 2414 */ 2415 int64_t bdrv_nb_sectors(BlockDriverState *bs) 2416 { 2417 BlockDriver *drv = bs->drv; 2418 2419 if (!drv) 2420 return -ENOMEDIUM; 2421 2422 if (drv->has_variable_length) { 2423 int ret = refresh_total_sectors(bs, bs->total_sectors); 2424 if (ret < 0) { 2425 return ret; 2426 } 2427 } 2428 return bs->total_sectors; 2429 } 2430 2431 /** 2432 * Return length in bytes on success, -errno on error. 2433 * The length is always a multiple of BDRV_SECTOR_SIZE. 2434 */ 2435 int64_t bdrv_getlength(BlockDriverState *bs) 2436 { 2437 int64_t ret = bdrv_nb_sectors(bs); 2438 2439 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret; 2440 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE; 2441 } 2442 2443 /* return 0 as number of sectors if no device present or error */ 2444 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 2445 { 2446 int64_t nb_sectors = bdrv_nb_sectors(bs); 2447 2448 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 2449 } 2450 2451 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, 2452 BlockdevOnError on_write_error) 2453 { 2454 bs->on_read_error = on_read_error; 2455 bs->on_write_error = on_write_error; 2456 } 2457 2458 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) 2459 { 2460 return is_read ? bs->on_read_error : bs->on_write_error; 2461 } 2462 2463 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error) 2464 { 2465 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error; 2466 2467 switch (on_err) { 2468 case BLOCKDEV_ON_ERROR_ENOSPC: 2469 return (error == ENOSPC) ? 2470 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; 2471 case BLOCKDEV_ON_ERROR_STOP: 2472 return BLOCK_ERROR_ACTION_STOP; 2473 case BLOCKDEV_ON_ERROR_REPORT: 2474 return BLOCK_ERROR_ACTION_REPORT; 2475 case BLOCKDEV_ON_ERROR_IGNORE: 2476 return BLOCK_ERROR_ACTION_IGNORE; 2477 default: 2478 abort(); 2479 } 2480 } 2481 2482 static void send_qmp_error_event(BlockDriverState *bs, 2483 BlockErrorAction action, 2484 bool is_read, int error) 2485 { 2486 IoOperationType optype; 2487 2488 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; 2489 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action, 2490 bdrv_iostatus_is_enabled(bs), 2491 error == ENOSPC, strerror(error), 2492 &error_abort); 2493 } 2494 2495 /* This is done by device models because, while the block layer knows 2496 * about the error, it does not know whether an operation comes from 2497 * the device or the block layer (from a job, for example). 2498 */ 2499 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, 2500 bool is_read, int error) 2501 { 2502 assert(error >= 0); 2503 2504 if (action == BLOCK_ERROR_ACTION_STOP) { 2505 /* First set the iostatus, so that "info block" returns an iostatus 2506 * that matches the events raised so far (an additional error iostatus 2507 * is fine, but not a lost one). 2508 */ 2509 bdrv_iostatus_set_err(bs, error); 2510 2511 /* Then raise the request to stop the VM and the event. 2512 * qemu_system_vmstop_request_prepare has two effects. First, 2513 * it ensures that the STOP event always comes after the 2514 * BLOCK_IO_ERROR event. Second, it ensures that even if management 2515 * can observe the STOP event and do a "cont" before the STOP 2516 * event is issued, the VM will not stop. In this case, vm_start() 2517 * also ensures that the STOP/RESUME pair of events is emitted. 2518 */ 2519 qemu_system_vmstop_request_prepare(); 2520 send_qmp_error_event(bs, action, is_read, error); 2521 qemu_system_vmstop_request(RUN_STATE_IO_ERROR); 2522 } else { 2523 send_qmp_error_event(bs, action, is_read, error); 2524 } 2525 } 2526 2527 int bdrv_is_read_only(BlockDriverState *bs) 2528 { 2529 return bs->read_only; 2530 } 2531 2532 int bdrv_is_sg(BlockDriverState *bs) 2533 { 2534 return bs->sg; 2535 } 2536 2537 int bdrv_enable_write_cache(BlockDriverState *bs) 2538 { 2539 return bs->enable_write_cache; 2540 } 2541 2542 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce) 2543 { 2544 bs->enable_write_cache = wce; 2545 2546 /* so a reopen() will preserve wce */ 2547 if (wce) { 2548 bs->open_flags |= BDRV_O_CACHE_WB; 2549 } else { 2550 bs->open_flags &= ~BDRV_O_CACHE_WB; 2551 } 2552 } 2553 2554 int bdrv_is_encrypted(BlockDriverState *bs) 2555 { 2556 if (bs->backing_hd && bs->backing_hd->encrypted) 2557 return 1; 2558 return bs->encrypted; 2559 } 2560 2561 int bdrv_key_required(BlockDriverState *bs) 2562 { 2563 BlockDriverState *backing_hd = bs->backing_hd; 2564 2565 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key) 2566 return 1; 2567 return (bs->encrypted && !bs->valid_key); 2568 } 2569 2570 int bdrv_set_key(BlockDriverState *bs, const char *key) 2571 { 2572 int ret; 2573 if (bs->backing_hd && bs->backing_hd->encrypted) { 2574 ret = bdrv_set_key(bs->backing_hd, key); 2575 if (ret < 0) 2576 return ret; 2577 if (!bs->encrypted) 2578 return 0; 2579 } 2580 if (!bs->encrypted) { 2581 return -EINVAL; 2582 } else if (!bs->drv || !bs->drv->bdrv_set_key) { 2583 return -ENOMEDIUM; 2584 } 2585 ret = bs->drv->bdrv_set_key(bs, key); 2586 if (ret < 0) { 2587 bs->valid_key = 0; 2588 } else if (!bs->valid_key) { 2589 bs->valid_key = 1; 2590 if (bs->blk) { 2591 /* call the change callback now, we skipped it on open */ 2592 blk_dev_change_media_cb(bs->blk, true); 2593 } 2594 } 2595 return ret; 2596 } 2597 2598 /* 2599 * Provide an encryption key for @bs. 2600 * If @key is non-null: 2601 * If @bs is not encrypted, fail. 2602 * Else if the key is invalid, fail. 2603 * Else set @bs's key to @key, replacing the existing key, if any. 2604 * If @key is null: 2605 * If @bs is encrypted and still lacks a key, fail. 2606 * Else do nothing. 2607 * On failure, store an error object through @errp if non-null. 2608 */ 2609 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp) 2610 { 2611 if (key) { 2612 if (!bdrv_is_encrypted(bs)) { 2613 error_setg(errp, "Node '%s' is not encrypted", 2614 bdrv_get_device_or_node_name(bs)); 2615 } else if (bdrv_set_key(bs, key) < 0) { 2616 error_set(errp, QERR_INVALID_PASSWORD); 2617 } 2618 } else { 2619 if (bdrv_key_required(bs)) { 2620 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED, 2621 "'%s' (%s) is encrypted", 2622 bdrv_get_device_or_node_name(bs), 2623 bdrv_get_encrypted_filename(bs)); 2624 } 2625 } 2626 } 2627 2628 const char *bdrv_get_format_name(BlockDriverState *bs) 2629 { 2630 return bs->drv ? bs->drv->format_name : NULL; 2631 } 2632 2633 static int qsort_strcmp(const void *a, const void *b) 2634 { 2635 return strcmp(a, b); 2636 } 2637 2638 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 2639 void *opaque) 2640 { 2641 BlockDriver *drv; 2642 int count = 0; 2643 int i; 2644 const char **formats = NULL; 2645 2646 QLIST_FOREACH(drv, &bdrv_drivers, list) { 2647 if (drv->format_name) { 2648 bool found = false; 2649 int i = count; 2650 while (formats && i && !found) { 2651 found = !strcmp(formats[--i], drv->format_name); 2652 } 2653 2654 if (!found) { 2655 formats = g_renew(const char *, formats, count + 1); 2656 formats[count++] = drv->format_name; 2657 } 2658 } 2659 } 2660 2661 qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 2662 2663 for (i = 0; i < count; i++) { 2664 it(opaque, formats[i]); 2665 } 2666 2667 g_free(formats); 2668 } 2669 2670 /* This function is to find a node in the bs graph */ 2671 BlockDriverState *bdrv_find_node(const char *node_name) 2672 { 2673 BlockDriverState *bs; 2674 2675 assert(node_name); 2676 2677 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 2678 if (!strcmp(node_name, bs->node_name)) { 2679 return bs; 2680 } 2681 } 2682 return NULL; 2683 } 2684 2685 /* Put this QMP function here so it can access the static graph_bdrv_states. */ 2686 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp) 2687 { 2688 BlockDeviceInfoList *list, *entry; 2689 BlockDriverState *bs; 2690 2691 list = NULL; 2692 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 2693 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp); 2694 if (!info) { 2695 qapi_free_BlockDeviceInfoList(list); 2696 return NULL; 2697 } 2698 entry = g_malloc0(sizeof(*entry)); 2699 entry->value = info; 2700 entry->next = list; 2701 list = entry; 2702 } 2703 2704 return list; 2705 } 2706 2707 BlockDriverState *bdrv_lookup_bs(const char *device, 2708 const char *node_name, 2709 Error **errp) 2710 { 2711 BlockBackend *blk; 2712 BlockDriverState *bs; 2713 2714 if (device) { 2715 blk = blk_by_name(device); 2716 2717 if (blk) { 2718 return blk_bs(blk); 2719 } 2720 } 2721 2722 if (node_name) { 2723 bs = bdrv_find_node(node_name); 2724 2725 if (bs) { 2726 return bs; 2727 } 2728 } 2729 2730 error_setg(errp, "Cannot find device=%s nor node_name=%s", 2731 device ? device : "", 2732 node_name ? node_name : ""); 2733 return NULL; 2734 } 2735 2736 /* If 'base' is in the same chain as 'top', return true. Otherwise, 2737 * return false. If either argument is NULL, return false. */ 2738 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 2739 { 2740 while (top && top != base) { 2741 top = top->backing_hd; 2742 } 2743 2744 return top != NULL; 2745 } 2746 2747 BlockDriverState *bdrv_next_node(BlockDriverState *bs) 2748 { 2749 if (!bs) { 2750 return QTAILQ_FIRST(&graph_bdrv_states); 2751 } 2752 return QTAILQ_NEXT(bs, node_list); 2753 } 2754 2755 BlockDriverState *bdrv_next(BlockDriverState *bs) 2756 { 2757 if (!bs) { 2758 return QTAILQ_FIRST(&bdrv_states); 2759 } 2760 return QTAILQ_NEXT(bs, device_list); 2761 } 2762 2763 const char *bdrv_get_node_name(const BlockDriverState *bs) 2764 { 2765 return bs->node_name; 2766 } 2767 2768 /* TODO check what callers really want: bs->node_name or blk_name() */ 2769 const char *bdrv_get_device_name(const BlockDriverState *bs) 2770 { 2771 return bs->blk ? blk_name(bs->blk) : ""; 2772 } 2773 2774 /* This can be used to identify nodes that might not have a device 2775 * name associated. Since node and device names live in the same 2776 * namespace, the result is unambiguous. The exception is if both are 2777 * absent, then this returns an empty (non-null) string. */ 2778 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 2779 { 2780 return bs->blk ? blk_name(bs->blk) : bs->node_name; 2781 } 2782 2783 int bdrv_get_flags(BlockDriverState *bs) 2784 { 2785 return bs->open_flags; 2786 } 2787 2788 int bdrv_has_zero_init_1(BlockDriverState *bs) 2789 { 2790 return 1; 2791 } 2792 2793 int bdrv_has_zero_init(BlockDriverState *bs) 2794 { 2795 assert(bs->drv); 2796 2797 /* If BS is a copy on write image, it is initialized to 2798 the contents of the base image, which may not be zeroes. */ 2799 if (bs->backing_hd) { 2800 return 0; 2801 } 2802 if (bs->drv->bdrv_has_zero_init) { 2803 return bs->drv->bdrv_has_zero_init(bs); 2804 } 2805 2806 /* safe default */ 2807 return 0; 2808 } 2809 2810 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs) 2811 { 2812 BlockDriverInfo bdi; 2813 2814 if (bs->backing_hd) { 2815 return false; 2816 } 2817 2818 if (bdrv_get_info(bs, &bdi) == 0) { 2819 return bdi.unallocated_blocks_are_zero; 2820 } 2821 2822 return false; 2823 } 2824 2825 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 2826 { 2827 BlockDriverInfo bdi; 2828 2829 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) { 2830 return false; 2831 } 2832 2833 if (bdrv_get_info(bs, &bdi) == 0) { 2834 return bdi.can_write_zeroes_with_unmap; 2835 } 2836 2837 return false; 2838 } 2839 2840 const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 2841 { 2842 if (bs->backing_hd && bs->backing_hd->encrypted) 2843 return bs->backing_file; 2844 else if (bs->encrypted) 2845 return bs->filename; 2846 else 2847 return NULL; 2848 } 2849 2850 void bdrv_get_backing_filename(BlockDriverState *bs, 2851 char *filename, int filename_size) 2852 { 2853 pstrcpy(filename, filename_size, bs->backing_file); 2854 } 2855 2856 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 2857 { 2858 BlockDriver *drv = bs->drv; 2859 if (!drv) 2860 return -ENOMEDIUM; 2861 if (!drv->bdrv_get_info) 2862 return -ENOTSUP; 2863 memset(bdi, 0, sizeof(*bdi)); 2864 return drv->bdrv_get_info(bs, bdi); 2865 } 2866 2867 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs) 2868 { 2869 BlockDriver *drv = bs->drv; 2870 if (drv && drv->bdrv_get_specific_info) { 2871 return drv->bdrv_get_specific_info(bs); 2872 } 2873 return NULL; 2874 } 2875 2876 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) 2877 { 2878 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 2879 return; 2880 } 2881 2882 bs->drv->bdrv_debug_event(bs, event); 2883 } 2884 2885 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 2886 const char *tag) 2887 { 2888 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 2889 bs = bs->file; 2890 } 2891 2892 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 2893 return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 2894 } 2895 2896 return -ENOTSUP; 2897 } 2898 2899 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 2900 { 2901 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) { 2902 bs = bs->file; 2903 } 2904 2905 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) { 2906 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 2907 } 2908 2909 return -ENOTSUP; 2910 } 2911 2912 int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 2913 { 2914 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 2915 bs = bs->file; 2916 } 2917 2918 if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 2919 return bs->drv->bdrv_debug_resume(bs, tag); 2920 } 2921 2922 return -ENOTSUP; 2923 } 2924 2925 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 2926 { 2927 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 2928 bs = bs->file; 2929 } 2930 2931 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 2932 return bs->drv->bdrv_debug_is_suspended(bs, tag); 2933 } 2934 2935 return false; 2936 } 2937 2938 int bdrv_is_snapshot(BlockDriverState *bs) 2939 { 2940 return !!(bs->open_flags & BDRV_O_SNAPSHOT); 2941 } 2942 2943 /* backing_file can either be relative, or absolute, or a protocol. If it is 2944 * relative, it must be relative to the chain. So, passing in bs->filename 2945 * from a BDS as backing_file should not be done, as that may be relative to 2946 * the CWD rather than the chain. */ 2947 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 2948 const char *backing_file) 2949 { 2950 char *filename_full = NULL; 2951 char *backing_file_full = NULL; 2952 char *filename_tmp = NULL; 2953 int is_protocol = 0; 2954 BlockDriverState *curr_bs = NULL; 2955 BlockDriverState *retval = NULL; 2956 2957 if (!bs || !bs->drv || !backing_file) { 2958 return NULL; 2959 } 2960 2961 filename_full = g_malloc(PATH_MAX); 2962 backing_file_full = g_malloc(PATH_MAX); 2963 filename_tmp = g_malloc(PATH_MAX); 2964 2965 is_protocol = path_has_protocol(backing_file); 2966 2967 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) { 2968 2969 /* If either of the filename paths is actually a protocol, then 2970 * compare unmodified paths; otherwise make paths relative */ 2971 if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 2972 if (strcmp(backing_file, curr_bs->backing_file) == 0) { 2973 retval = curr_bs->backing_hd; 2974 break; 2975 } 2976 } else { 2977 /* If not an absolute filename path, make it relative to the current 2978 * image's filename path */ 2979 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 2980 backing_file); 2981 2982 /* We are going to compare absolute pathnames */ 2983 if (!realpath(filename_tmp, filename_full)) { 2984 continue; 2985 } 2986 2987 /* We need to make sure the backing filename we are comparing against 2988 * is relative to the current image filename (or absolute) */ 2989 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 2990 curr_bs->backing_file); 2991 2992 if (!realpath(filename_tmp, backing_file_full)) { 2993 continue; 2994 } 2995 2996 if (strcmp(backing_file_full, filename_full) == 0) { 2997 retval = curr_bs->backing_hd; 2998 break; 2999 } 3000 } 3001 } 3002 3003 g_free(filename_full); 3004 g_free(backing_file_full); 3005 g_free(filename_tmp); 3006 return retval; 3007 } 3008 3009 int bdrv_get_backing_file_depth(BlockDriverState *bs) 3010 { 3011 if (!bs->drv) { 3012 return 0; 3013 } 3014 3015 if (!bs->backing_hd) { 3016 return 0; 3017 } 3018 3019 return 1 + bdrv_get_backing_file_depth(bs->backing_hd); 3020 } 3021 3022 void bdrv_init(void) 3023 { 3024 module_call_init(MODULE_INIT_BLOCK); 3025 } 3026 3027 void bdrv_init_with_whitelist(void) 3028 { 3029 use_bdrv_whitelist = 1; 3030 bdrv_init(); 3031 } 3032 3033 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) 3034 { 3035 Error *local_err = NULL; 3036 int ret; 3037 3038 if (!bs->drv) { 3039 return; 3040 } 3041 3042 if (!(bs->open_flags & BDRV_O_INCOMING)) { 3043 return; 3044 } 3045 bs->open_flags &= ~BDRV_O_INCOMING; 3046 3047 if (bs->drv->bdrv_invalidate_cache) { 3048 bs->drv->bdrv_invalidate_cache(bs, &local_err); 3049 } else if (bs->file) { 3050 bdrv_invalidate_cache(bs->file, &local_err); 3051 } 3052 if (local_err) { 3053 error_propagate(errp, local_err); 3054 return; 3055 } 3056 3057 ret = refresh_total_sectors(bs, bs->total_sectors); 3058 if (ret < 0) { 3059 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 3060 return; 3061 } 3062 } 3063 3064 void bdrv_invalidate_cache_all(Error **errp) 3065 { 3066 BlockDriverState *bs; 3067 Error *local_err = NULL; 3068 3069 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 3070 AioContext *aio_context = bdrv_get_aio_context(bs); 3071 3072 aio_context_acquire(aio_context); 3073 bdrv_invalidate_cache(bs, &local_err); 3074 aio_context_release(aio_context); 3075 if (local_err) { 3076 error_propagate(errp, local_err); 3077 return; 3078 } 3079 } 3080 } 3081 3082 /**************************************************************/ 3083 /* removable device support */ 3084 3085 /** 3086 * Return TRUE if the media is present 3087 */ 3088 int bdrv_is_inserted(BlockDriverState *bs) 3089 { 3090 BlockDriver *drv = bs->drv; 3091 3092 if (!drv) 3093 return 0; 3094 if (!drv->bdrv_is_inserted) 3095 return 1; 3096 return drv->bdrv_is_inserted(bs); 3097 } 3098 3099 /** 3100 * Return whether the media changed since the last call to this 3101 * function, or -ENOTSUP if we don't know. Most drivers don't know. 3102 */ 3103 int bdrv_media_changed(BlockDriverState *bs) 3104 { 3105 BlockDriver *drv = bs->drv; 3106 3107 if (drv && drv->bdrv_media_changed) { 3108 return drv->bdrv_media_changed(bs); 3109 } 3110 return -ENOTSUP; 3111 } 3112 3113 /** 3114 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 3115 */ 3116 void bdrv_eject(BlockDriverState *bs, bool eject_flag) 3117 { 3118 BlockDriver *drv = bs->drv; 3119 const char *device_name; 3120 3121 if (drv && drv->bdrv_eject) { 3122 drv->bdrv_eject(bs, eject_flag); 3123 } 3124 3125 device_name = bdrv_get_device_name(bs); 3126 if (device_name[0] != '\0') { 3127 qapi_event_send_device_tray_moved(device_name, 3128 eject_flag, &error_abort); 3129 } 3130 } 3131 3132 /** 3133 * Lock or unlock the media (if it is locked, the user won't be able 3134 * to eject it manually). 3135 */ 3136 void bdrv_lock_medium(BlockDriverState *bs, bool locked) 3137 { 3138 BlockDriver *drv = bs->drv; 3139 3140 trace_bdrv_lock_medium(bs, locked); 3141 3142 if (drv && drv->bdrv_lock_medium) { 3143 drv->bdrv_lock_medium(bs, locked); 3144 } 3145 } 3146 3147 void bdrv_set_guest_block_size(BlockDriverState *bs, int align) 3148 { 3149 bs->guest_block_size = align; 3150 } 3151 3152 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) 3153 { 3154 BdrvDirtyBitmap *bm; 3155 3156 assert(name); 3157 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 3158 if (bm->name && !strcmp(name, bm->name)) { 3159 return bm; 3160 } 3161 } 3162 return NULL; 3163 } 3164 3165 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) 3166 { 3167 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3168 g_free(bitmap->name); 3169 bitmap->name = NULL; 3170 } 3171 3172 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, 3173 uint32_t granularity, 3174 const char *name, 3175 Error **errp) 3176 { 3177 int64_t bitmap_size; 3178 BdrvDirtyBitmap *bitmap; 3179 uint32_t sector_granularity; 3180 3181 assert((granularity & (granularity - 1)) == 0); 3182 3183 if (name && bdrv_find_dirty_bitmap(bs, name)) { 3184 error_setg(errp, "Bitmap already exists: %s", name); 3185 return NULL; 3186 } 3187 sector_granularity = granularity >> BDRV_SECTOR_BITS; 3188 assert(sector_granularity); 3189 bitmap_size = bdrv_nb_sectors(bs); 3190 if (bitmap_size < 0) { 3191 error_setg_errno(errp, -bitmap_size, "could not get length of device"); 3192 errno = -bitmap_size; 3193 return NULL; 3194 } 3195 bitmap = g_new0(BdrvDirtyBitmap, 1); 3196 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); 3197 bitmap->size = bitmap_size; 3198 bitmap->name = g_strdup(name); 3199 bitmap->disabled = false; 3200 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); 3201 return bitmap; 3202 } 3203 3204 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) 3205 { 3206 return bitmap->successor; 3207 } 3208 3209 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) 3210 { 3211 return !(bitmap->disabled || bitmap->successor); 3212 } 3213 3214 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap) 3215 { 3216 if (bdrv_dirty_bitmap_frozen(bitmap)) { 3217 return DIRTY_BITMAP_STATUS_FROZEN; 3218 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) { 3219 return DIRTY_BITMAP_STATUS_DISABLED; 3220 } else { 3221 return DIRTY_BITMAP_STATUS_ACTIVE; 3222 } 3223 } 3224 3225 /** 3226 * Create a successor bitmap destined to replace this bitmap after an operation. 3227 * Requires that the bitmap is not frozen and has no successor. 3228 */ 3229 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, 3230 BdrvDirtyBitmap *bitmap, Error **errp) 3231 { 3232 uint64_t granularity; 3233 BdrvDirtyBitmap *child; 3234 3235 if (bdrv_dirty_bitmap_frozen(bitmap)) { 3236 error_setg(errp, "Cannot create a successor for a bitmap that is " 3237 "currently frozen"); 3238 return -1; 3239 } 3240 assert(!bitmap->successor); 3241 3242 /* Create an anonymous successor */ 3243 granularity = bdrv_dirty_bitmap_granularity(bitmap); 3244 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); 3245 if (!child) { 3246 return -1; 3247 } 3248 3249 /* Successor will be on or off based on our current state. */ 3250 child->disabled = bitmap->disabled; 3251 3252 /* Install the successor and freeze the parent */ 3253 bitmap->successor = child; 3254 return 0; 3255 } 3256 3257 /** 3258 * For a bitmap with a successor, yield our name to the successor, 3259 * delete the old bitmap, and return a handle to the new bitmap. 3260 */ 3261 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, 3262 BdrvDirtyBitmap *bitmap, 3263 Error **errp) 3264 { 3265 char *name; 3266 BdrvDirtyBitmap *successor = bitmap->successor; 3267 3268 if (successor == NULL) { 3269 error_setg(errp, "Cannot relinquish control if " 3270 "there's no successor present"); 3271 return NULL; 3272 } 3273 3274 name = bitmap->name; 3275 bitmap->name = NULL; 3276 successor->name = name; 3277 bitmap->successor = NULL; 3278 bdrv_release_dirty_bitmap(bs, bitmap); 3279 3280 return successor; 3281 } 3282 3283 /** 3284 * In cases of failure where we can no longer safely delete the parent, 3285 * we may wish to re-join the parent and child/successor. 3286 * The merged parent will be un-frozen, but not explicitly re-enabled. 3287 */ 3288 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, 3289 BdrvDirtyBitmap *parent, 3290 Error **errp) 3291 { 3292 BdrvDirtyBitmap *successor = parent->successor; 3293 3294 if (!successor) { 3295 error_setg(errp, "Cannot reclaim a successor when none is present"); 3296 return NULL; 3297 } 3298 3299 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) { 3300 error_setg(errp, "Merging of parent and successor bitmap failed"); 3301 return NULL; 3302 } 3303 bdrv_release_dirty_bitmap(bs, successor); 3304 parent->successor = NULL; 3305 3306 return parent; 3307 } 3308 3309 /** 3310 * Truncates _all_ bitmaps attached to a BDS. 3311 */ 3312 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) 3313 { 3314 BdrvDirtyBitmap *bitmap; 3315 uint64_t size = bdrv_nb_sectors(bs); 3316 3317 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 3318 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3319 hbitmap_truncate(bitmap->bitmap, size); 3320 bitmap->size = size; 3321 } 3322 } 3323 3324 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) 3325 { 3326 BdrvDirtyBitmap *bm, *next; 3327 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { 3328 if (bm == bitmap) { 3329 assert(!bdrv_dirty_bitmap_frozen(bm)); 3330 QLIST_REMOVE(bitmap, list); 3331 hbitmap_free(bitmap->bitmap); 3332 g_free(bitmap->name); 3333 g_free(bitmap); 3334 return; 3335 } 3336 } 3337 } 3338 3339 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 3340 { 3341 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3342 bitmap->disabled = true; 3343 } 3344 3345 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 3346 { 3347 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3348 bitmap->disabled = false; 3349 } 3350 3351 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) 3352 { 3353 BdrvDirtyBitmap *bm; 3354 BlockDirtyInfoList *list = NULL; 3355 BlockDirtyInfoList **plist = &list; 3356 3357 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 3358 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); 3359 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); 3360 info->count = bdrv_get_dirty_count(bm); 3361 info->granularity = bdrv_dirty_bitmap_granularity(bm); 3362 info->has_name = !!bm->name; 3363 info->name = g_strdup(bm->name); 3364 info->status = bdrv_dirty_bitmap_status(bm); 3365 entry->value = info; 3366 *plist = entry; 3367 plist = &entry->next; 3368 } 3369 3370 return list; 3371 } 3372 3373 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector) 3374 { 3375 if (bitmap) { 3376 return hbitmap_get(bitmap->bitmap, sector); 3377 } else { 3378 return 0; 3379 } 3380 } 3381 3382 /** 3383 * Chooses a default granularity based on the existing cluster size, 3384 * but clamped between [4K, 64K]. Defaults to 64K in the case that there 3385 * is no cluster size information available. 3386 */ 3387 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) 3388 { 3389 BlockDriverInfo bdi; 3390 uint32_t granularity; 3391 3392 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) { 3393 granularity = MAX(4096, bdi.cluster_size); 3394 granularity = MIN(65536, granularity); 3395 } else { 3396 granularity = 65536; 3397 } 3398 3399 return granularity; 3400 } 3401 3402 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) 3403 { 3404 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); 3405 } 3406 3407 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) 3408 { 3409 hbitmap_iter_init(hbi, bitmap->bitmap, 0); 3410 } 3411 3412 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, 3413 int64_t cur_sector, int nr_sectors) 3414 { 3415 assert(bdrv_dirty_bitmap_enabled(bitmap)); 3416 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 3417 } 3418 3419 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, 3420 int64_t cur_sector, int nr_sectors) 3421 { 3422 assert(bdrv_dirty_bitmap_enabled(bitmap)); 3423 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 3424 } 3425 3426 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap) 3427 { 3428 assert(bdrv_dirty_bitmap_enabled(bitmap)); 3429 hbitmap_reset(bitmap->bitmap, 0, bitmap->size); 3430 } 3431 3432 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, 3433 int nr_sectors) 3434 { 3435 BdrvDirtyBitmap *bitmap; 3436 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 3437 if (!bdrv_dirty_bitmap_enabled(bitmap)) { 3438 continue; 3439 } 3440 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 3441 } 3442 } 3443 3444 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 3445 int nr_sectors) 3446 { 3447 BdrvDirtyBitmap *bitmap; 3448 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 3449 if (!bdrv_dirty_bitmap_enabled(bitmap)) { 3450 continue; 3451 } 3452 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 3453 } 3454 } 3455 3456 /** 3457 * Advance an HBitmapIter to an arbitrary offset. 3458 */ 3459 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) 3460 { 3461 assert(hbi->hb); 3462 hbitmap_iter_init(hbi, hbi->hb, offset); 3463 } 3464 3465 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) 3466 { 3467 return hbitmap_count(bitmap->bitmap); 3468 } 3469 3470 /* Get a reference to bs */ 3471 void bdrv_ref(BlockDriverState *bs) 3472 { 3473 bs->refcnt++; 3474 } 3475 3476 /* Release a previously grabbed reference to bs. 3477 * If after releasing, reference count is zero, the BlockDriverState is 3478 * deleted. */ 3479 void bdrv_unref(BlockDriverState *bs) 3480 { 3481 if (!bs) { 3482 return; 3483 } 3484 assert(bs->refcnt > 0); 3485 if (--bs->refcnt == 0) { 3486 bdrv_delete(bs); 3487 } 3488 } 3489 3490 struct BdrvOpBlocker { 3491 Error *reason; 3492 QLIST_ENTRY(BdrvOpBlocker) list; 3493 }; 3494 3495 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 3496 { 3497 BdrvOpBlocker *blocker; 3498 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3499 if (!QLIST_EMPTY(&bs->op_blockers[op])) { 3500 blocker = QLIST_FIRST(&bs->op_blockers[op]); 3501 if (errp) { 3502 error_setg(errp, "Node '%s' is busy: %s", 3503 bdrv_get_device_or_node_name(bs), 3504 error_get_pretty(blocker->reason)); 3505 } 3506 return true; 3507 } 3508 return false; 3509 } 3510 3511 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 3512 { 3513 BdrvOpBlocker *blocker; 3514 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3515 3516 blocker = g_new0(BdrvOpBlocker, 1); 3517 blocker->reason = reason; 3518 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 3519 } 3520 3521 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 3522 { 3523 BdrvOpBlocker *blocker, *next; 3524 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3525 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 3526 if (blocker->reason == reason) { 3527 QLIST_REMOVE(blocker, list); 3528 g_free(blocker); 3529 } 3530 } 3531 } 3532 3533 void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 3534 { 3535 int i; 3536 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3537 bdrv_op_block(bs, i, reason); 3538 } 3539 } 3540 3541 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 3542 { 3543 int i; 3544 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3545 bdrv_op_unblock(bs, i, reason); 3546 } 3547 } 3548 3549 bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 3550 { 3551 int i; 3552 3553 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3554 if (!QLIST_EMPTY(&bs->op_blockers[i])) { 3555 return false; 3556 } 3557 } 3558 return true; 3559 } 3560 3561 void bdrv_iostatus_enable(BlockDriverState *bs) 3562 { 3563 bs->iostatus_enabled = true; 3564 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 3565 } 3566 3567 /* The I/O status is only enabled if the drive explicitly 3568 * enables it _and_ the VM is configured to stop on errors */ 3569 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs) 3570 { 3571 return (bs->iostatus_enabled && 3572 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || 3573 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP || 3574 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP)); 3575 } 3576 3577 void bdrv_iostatus_disable(BlockDriverState *bs) 3578 { 3579 bs->iostatus_enabled = false; 3580 } 3581 3582 void bdrv_iostatus_reset(BlockDriverState *bs) 3583 { 3584 if (bdrv_iostatus_is_enabled(bs)) { 3585 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 3586 if (bs->job) { 3587 block_job_iostatus_reset(bs->job); 3588 } 3589 } 3590 } 3591 3592 void bdrv_iostatus_set_err(BlockDriverState *bs, int error) 3593 { 3594 assert(bdrv_iostatus_is_enabled(bs)); 3595 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { 3596 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : 3597 BLOCK_DEVICE_IO_STATUS_FAILED; 3598 } 3599 } 3600 3601 void bdrv_img_create(const char *filename, const char *fmt, 3602 const char *base_filename, const char *base_fmt, 3603 char *options, uint64_t img_size, int flags, 3604 Error **errp, bool quiet) 3605 { 3606 QemuOptsList *create_opts = NULL; 3607 QemuOpts *opts = NULL; 3608 const char *backing_fmt, *backing_file; 3609 int64_t size; 3610 BlockDriver *drv, *proto_drv; 3611 BlockDriver *backing_drv = NULL; 3612 Error *local_err = NULL; 3613 int ret = 0; 3614 3615 /* Find driver and parse its options */ 3616 drv = bdrv_find_format(fmt); 3617 if (!drv) { 3618 error_setg(errp, "Unknown file format '%s'", fmt); 3619 return; 3620 } 3621 3622 proto_drv = bdrv_find_protocol(filename, true, errp); 3623 if (!proto_drv) { 3624 return; 3625 } 3626 3627 if (!drv->create_opts) { 3628 error_setg(errp, "Format driver '%s' does not support image creation", 3629 drv->format_name); 3630 return; 3631 } 3632 3633 if (!proto_drv->create_opts) { 3634 error_setg(errp, "Protocol driver '%s' does not support image creation", 3635 proto_drv->format_name); 3636 return; 3637 } 3638 3639 create_opts = qemu_opts_append(create_opts, drv->create_opts); 3640 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 3641 3642 /* Create parameter list with default values */ 3643 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 3644 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 3645 3646 /* Parse -o options */ 3647 if (options) { 3648 qemu_opts_do_parse(opts, options, NULL, &local_err); 3649 if (local_err) { 3650 error_report_err(local_err); 3651 local_err = NULL; 3652 error_setg(errp, "Invalid options for file format '%s'", fmt); 3653 goto out; 3654 } 3655 } 3656 3657 if (base_filename) { 3658 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err); 3659 if (local_err) { 3660 error_setg(errp, "Backing file not supported for file format '%s'", 3661 fmt); 3662 goto out; 3663 } 3664 } 3665 3666 if (base_fmt) { 3667 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err); 3668 if (local_err) { 3669 error_setg(errp, "Backing file format not supported for file " 3670 "format '%s'", fmt); 3671 goto out; 3672 } 3673 } 3674 3675 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 3676 if (backing_file) { 3677 if (!strcmp(filename, backing_file)) { 3678 error_setg(errp, "Error: Trying to create an image with the " 3679 "same filename as the backing file"); 3680 goto out; 3681 } 3682 } 3683 3684 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 3685 if (backing_fmt) { 3686 backing_drv = bdrv_find_format(backing_fmt); 3687 if (!backing_drv) { 3688 error_setg(errp, "Unknown backing file format '%s'", 3689 backing_fmt); 3690 goto out; 3691 } 3692 } 3693 3694 // The size for the image must always be specified, with one exception: 3695 // If we are using a backing file, we can obtain the size from there 3696 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 3697 if (size == -1) { 3698 if (backing_file) { 3699 BlockDriverState *bs; 3700 char *full_backing = g_new0(char, PATH_MAX); 3701 int64_t size; 3702 int back_flags; 3703 3704 bdrv_get_full_backing_filename_from_filename(filename, backing_file, 3705 full_backing, PATH_MAX, 3706 &local_err); 3707 if (local_err) { 3708 g_free(full_backing); 3709 goto out; 3710 } 3711 3712 /* backing files always opened read-only */ 3713 back_flags = 3714 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 3715 3716 bs = NULL; 3717 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags, 3718 backing_drv, &local_err); 3719 g_free(full_backing); 3720 if (ret < 0) { 3721 goto out; 3722 } 3723 size = bdrv_getlength(bs); 3724 if (size < 0) { 3725 error_setg_errno(errp, -size, "Could not get size of '%s'", 3726 backing_file); 3727 bdrv_unref(bs); 3728 goto out; 3729 } 3730 3731 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 3732 3733 bdrv_unref(bs); 3734 } else { 3735 error_setg(errp, "Image creation needs a size parameter"); 3736 goto out; 3737 } 3738 } 3739 3740 if (!quiet) { 3741 printf("Formatting '%s', fmt=%s", filename, fmt); 3742 qemu_opts_print(opts, " "); 3743 puts(""); 3744 } 3745 3746 ret = bdrv_create(drv, filename, opts, &local_err); 3747 3748 if (ret == -EFBIG) { 3749 /* This is generally a better message than whatever the driver would 3750 * deliver (especially because of the cluster_size_hint), since that 3751 * is most probably not much different from "image too large". */ 3752 const char *cluster_size_hint = ""; 3753 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 3754 cluster_size_hint = " (try using a larger cluster size)"; 3755 } 3756 error_setg(errp, "The image size is too large for file format '%s'" 3757 "%s", fmt, cluster_size_hint); 3758 error_free(local_err); 3759 local_err = NULL; 3760 } 3761 3762 out: 3763 qemu_opts_del(opts); 3764 qemu_opts_free(create_opts); 3765 if (local_err) { 3766 error_propagate(errp, local_err); 3767 } 3768 } 3769 3770 AioContext *bdrv_get_aio_context(BlockDriverState *bs) 3771 { 3772 return bs->aio_context; 3773 } 3774 3775 void bdrv_detach_aio_context(BlockDriverState *bs) 3776 { 3777 BdrvAioNotifier *baf; 3778 3779 if (!bs->drv) { 3780 return; 3781 } 3782 3783 QLIST_FOREACH(baf, &bs->aio_notifiers, list) { 3784 baf->detach_aio_context(baf->opaque); 3785 } 3786 3787 if (bs->io_limits_enabled) { 3788 throttle_detach_aio_context(&bs->throttle_state); 3789 } 3790 if (bs->drv->bdrv_detach_aio_context) { 3791 bs->drv->bdrv_detach_aio_context(bs); 3792 } 3793 if (bs->file) { 3794 bdrv_detach_aio_context(bs->file); 3795 } 3796 if (bs->backing_hd) { 3797 bdrv_detach_aio_context(bs->backing_hd); 3798 } 3799 3800 bs->aio_context = NULL; 3801 } 3802 3803 void bdrv_attach_aio_context(BlockDriverState *bs, 3804 AioContext *new_context) 3805 { 3806 BdrvAioNotifier *ban; 3807 3808 if (!bs->drv) { 3809 return; 3810 } 3811 3812 bs->aio_context = new_context; 3813 3814 if (bs->backing_hd) { 3815 bdrv_attach_aio_context(bs->backing_hd, new_context); 3816 } 3817 if (bs->file) { 3818 bdrv_attach_aio_context(bs->file, new_context); 3819 } 3820 if (bs->drv->bdrv_attach_aio_context) { 3821 bs->drv->bdrv_attach_aio_context(bs, new_context); 3822 } 3823 if (bs->io_limits_enabled) { 3824 throttle_attach_aio_context(&bs->throttle_state, new_context); 3825 } 3826 3827 QLIST_FOREACH(ban, &bs->aio_notifiers, list) { 3828 ban->attached_aio_context(new_context, ban->opaque); 3829 } 3830 } 3831 3832 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) 3833 { 3834 bdrv_drain_all(); /* ensure there are no in-flight requests */ 3835 3836 bdrv_detach_aio_context(bs); 3837 3838 /* This function executes in the old AioContext so acquire the new one in 3839 * case it runs in a different thread. 3840 */ 3841 aio_context_acquire(new_context); 3842 bdrv_attach_aio_context(bs, new_context); 3843 aio_context_release(new_context); 3844 } 3845 3846 void bdrv_add_aio_context_notifier(BlockDriverState *bs, 3847 void (*attached_aio_context)(AioContext *new_context, void *opaque), 3848 void (*detach_aio_context)(void *opaque), void *opaque) 3849 { 3850 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 3851 *ban = (BdrvAioNotifier){ 3852 .attached_aio_context = attached_aio_context, 3853 .detach_aio_context = detach_aio_context, 3854 .opaque = opaque 3855 }; 3856 3857 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 3858 } 3859 3860 void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 3861 void (*attached_aio_context)(AioContext *, 3862 void *), 3863 void (*detach_aio_context)(void *), 3864 void *opaque) 3865 { 3866 BdrvAioNotifier *ban, *ban_next; 3867 3868 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 3869 if (ban->attached_aio_context == attached_aio_context && 3870 ban->detach_aio_context == detach_aio_context && 3871 ban->opaque == opaque) 3872 { 3873 QLIST_REMOVE(ban, list); 3874 g_free(ban); 3875 3876 return; 3877 } 3878 } 3879 3880 abort(); 3881 } 3882 3883 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 3884 BlockDriverAmendStatusCB *status_cb) 3885 { 3886 if (!bs->drv->bdrv_amend_options) { 3887 return -ENOTSUP; 3888 } 3889 return bs->drv->bdrv_amend_options(bs, opts, status_cb); 3890 } 3891 3892 /* This function will be called by the bdrv_recurse_is_first_non_filter method 3893 * of block filter and by bdrv_is_first_non_filter. 3894 * It is used to test if the given bs is the candidate or recurse more in the 3895 * node graph. 3896 */ 3897 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, 3898 BlockDriverState *candidate) 3899 { 3900 /* return false if basic checks fails */ 3901 if (!bs || !bs->drv) { 3902 return false; 3903 } 3904 3905 /* the code reached a non block filter driver -> check if the bs is 3906 * the same as the candidate. It's the recursion termination condition. 3907 */ 3908 if (!bs->drv->is_filter) { 3909 return bs == candidate; 3910 } 3911 /* Down this path the driver is a block filter driver */ 3912 3913 /* If the block filter recursion method is defined use it to recurse down 3914 * the node graph. 3915 */ 3916 if (bs->drv->bdrv_recurse_is_first_non_filter) { 3917 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); 3918 } 3919 3920 /* the driver is a block filter but don't allow to recurse -> return false 3921 */ 3922 return false; 3923 } 3924 3925 /* This function checks if the candidate is the first non filter bs down it's 3926 * bs chain. Since we don't have pointers to parents it explore all bs chains 3927 * from the top. Some filters can choose not to pass down the recursion. 3928 */ 3929 bool bdrv_is_first_non_filter(BlockDriverState *candidate) 3930 { 3931 BlockDriverState *bs; 3932 3933 /* walk down the bs forest recursively */ 3934 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 3935 bool perm; 3936 3937 /* try to recurse in this top level bs */ 3938 perm = bdrv_recurse_is_first_non_filter(bs, candidate); 3939 3940 /* candidate is the first non filter */ 3941 if (perm) { 3942 return true; 3943 } 3944 } 3945 3946 return false; 3947 } 3948 3949 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp) 3950 { 3951 BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 3952 AioContext *aio_context; 3953 3954 if (!to_replace_bs) { 3955 error_setg(errp, "Node name '%s' not found", node_name); 3956 return NULL; 3957 } 3958 3959 aio_context = bdrv_get_aio_context(to_replace_bs); 3960 aio_context_acquire(aio_context); 3961 3962 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 3963 to_replace_bs = NULL; 3964 goto out; 3965 } 3966 3967 /* We don't want arbitrary node of the BDS chain to be replaced only the top 3968 * most non filter in order to prevent data corruption. 3969 * Another benefit is that this tests exclude backing files which are 3970 * blocked by the backing blockers. 3971 */ 3972 if (!bdrv_is_first_non_filter(to_replace_bs)) { 3973 error_setg(errp, "Only top most non filter can be replaced"); 3974 to_replace_bs = NULL; 3975 goto out; 3976 } 3977 3978 out: 3979 aio_context_release(aio_context); 3980 return to_replace_bs; 3981 } 3982 3983 static bool append_open_options(QDict *d, BlockDriverState *bs) 3984 { 3985 const QDictEntry *entry; 3986 bool found_any = false; 3987 3988 for (entry = qdict_first(bs->options); entry; 3989 entry = qdict_next(bs->options, entry)) 3990 { 3991 /* Only take options for this level and exclude all non-driver-specific 3992 * options */ 3993 if (!strchr(qdict_entry_key(entry), '.') && 3994 strcmp(qdict_entry_key(entry), "node-name")) 3995 { 3996 qobject_incref(qdict_entry_value(entry)); 3997 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry)); 3998 found_any = true; 3999 } 4000 } 4001 4002 return found_any; 4003 } 4004 4005 /* Updates the following BDS fields: 4006 * - exact_filename: A filename which may be used for opening a block device 4007 * which (mostly) equals the given BDS (even without any 4008 * other options; so reading and writing must return the same 4009 * results, but caching etc. may be different) 4010 * - full_open_options: Options which, when given when opening a block device 4011 * (without a filename), result in a BDS (mostly) 4012 * equalling the given one 4013 * - filename: If exact_filename is set, it is copied here. Otherwise, 4014 * full_open_options is converted to a JSON object, prefixed with 4015 * "json:" (for use through the JSON pseudo protocol) and put here. 4016 */ 4017 void bdrv_refresh_filename(BlockDriverState *bs) 4018 { 4019 BlockDriver *drv = bs->drv; 4020 QDict *opts; 4021 4022 if (!drv) { 4023 return; 4024 } 4025 4026 /* This BDS's file name will most probably depend on its file's name, so 4027 * refresh that first */ 4028 if (bs->file) { 4029 bdrv_refresh_filename(bs->file); 4030 } 4031 4032 if (drv->bdrv_refresh_filename) { 4033 /* Obsolete information is of no use here, so drop the old file name 4034 * information before refreshing it */ 4035 bs->exact_filename[0] = '\0'; 4036 if (bs->full_open_options) { 4037 QDECREF(bs->full_open_options); 4038 bs->full_open_options = NULL; 4039 } 4040 4041 drv->bdrv_refresh_filename(bs); 4042 } else if (bs->file) { 4043 /* Try to reconstruct valid information from the underlying file */ 4044 bool has_open_options; 4045 4046 bs->exact_filename[0] = '\0'; 4047 if (bs->full_open_options) { 4048 QDECREF(bs->full_open_options); 4049 bs->full_open_options = NULL; 4050 } 4051 4052 opts = qdict_new(); 4053 has_open_options = append_open_options(opts, bs); 4054 4055 /* If no specific options have been given for this BDS, the filename of 4056 * the underlying file should suffice for this one as well */ 4057 if (bs->file->exact_filename[0] && !has_open_options) { 4058 strcpy(bs->exact_filename, bs->file->exact_filename); 4059 } 4060 /* Reconstructing the full options QDict is simple for most format block 4061 * drivers, as long as the full options are known for the underlying 4062 * file BDS. The full options QDict of that file BDS should somehow 4063 * contain a representation of the filename, therefore the following 4064 * suffices without querying the (exact_)filename of this BDS. */ 4065 if (bs->file->full_open_options) { 4066 qdict_put_obj(opts, "driver", 4067 QOBJECT(qstring_from_str(drv->format_name))); 4068 QINCREF(bs->file->full_open_options); 4069 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options)); 4070 4071 bs->full_open_options = opts; 4072 } else { 4073 QDECREF(opts); 4074 } 4075 } else if (!bs->full_open_options && qdict_size(bs->options)) { 4076 /* There is no underlying file BDS (at least referenced by BDS.file), 4077 * so the full options QDict should be equal to the options given 4078 * specifically for this block device when it was opened (plus the 4079 * driver specification). 4080 * Because those options don't change, there is no need to update 4081 * full_open_options when it's already set. */ 4082 4083 opts = qdict_new(); 4084 append_open_options(opts, bs); 4085 qdict_put_obj(opts, "driver", 4086 QOBJECT(qstring_from_str(drv->format_name))); 4087 4088 if (bs->exact_filename[0]) { 4089 /* This may not work for all block protocol drivers (some may 4090 * require this filename to be parsed), but we have to find some 4091 * default solution here, so just include it. If some block driver 4092 * does not support pure options without any filename at all or 4093 * needs some special format of the options QDict, it needs to 4094 * implement the driver-specific bdrv_refresh_filename() function. 4095 */ 4096 qdict_put_obj(opts, "filename", 4097 QOBJECT(qstring_from_str(bs->exact_filename))); 4098 } 4099 4100 bs->full_open_options = opts; 4101 } 4102 4103 if (bs->exact_filename[0]) { 4104 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 4105 } else if (bs->full_open_options) { 4106 QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 4107 snprintf(bs->filename, sizeof(bs->filename), "json:%s", 4108 qstring_get_str(json)); 4109 QDECREF(json); 4110 } 4111 } 4112 4113 /* This accessor function purpose is to allow the device models to access the 4114 * BlockAcctStats structure embedded inside a BlockDriverState without being 4115 * aware of the BlockDriverState structure layout. 4116 * It will go away when the BlockAcctStats structure will be moved inside 4117 * the device models. 4118 */ 4119 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs) 4120 { 4121 return &bs->stats; 4122 } 4123