1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "config-host.h" 25 #include "qemu-common.h" 26 #include "trace.h" 27 #include "block/block_int.h" 28 #include "block/blockjob.h" 29 #include "qemu/module.h" 30 #include "qapi/qmp/qjson.h" 31 #include "sysemu/block-backend.h" 32 #include "sysemu/sysemu.h" 33 #include "qemu/notify.h" 34 #include "block/coroutine.h" 35 #include "block/qapi.h" 36 #include "qmp-commands.h" 37 #include "qemu/timer.h" 38 #include "qapi-event.h" 39 40 #ifdef CONFIG_BSD 41 #include <sys/types.h> 42 #include <sys/stat.h> 43 #include <sys/ioctl.h> 44 #include <sys/queue.h> 45 #ifndef __DragonFly__ 46 #include <sys/disk.h> 47 #endif 48 #endif 49 50 #ifdef _WIN32 51 #include <windows.h> 52 #endif 53 54 /** 55 * A BdrvDirtyBitmap can be in three possible states: 56 * (1) successor is NULL and disabled is false: full r/w mode 57 * (2) successor is NULL and disabled is true: read only mode ("disabled") 58 * (3) successor is set: frozen mode. 59 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set, 60 * or enabled. A frozen bitmap can only abdicate() or reclaim(). 61 */ 62 struct BdrvDirtyBitmap { 63 HBitmap *bitmap; /* Dirty sector bitmap implementation */ 64 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ 65 char *name; /* Optional non-empty unique ID */ 66 int64_t size; /* Size of the bitmap (Number of sectors) */ 67 bool disabled; /* Bitmap is read-only */ 68 QLIST_ENTRY(BdrvDirtyBitmap) list; 69 }; 70 71 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 72 73 static QTAILQ_HEAD(, BlockDriverState) bdrv_states = 74 QTAILQ_HEAD_INITIALIZER(bdrv_states); 75 76 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 77 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 78 79 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 80 QLIST_HEAD_INITIALIZER(bdrv_drivers); 81 82 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, 83 const char *reference, QDict *options, int flags, 84 BlockDriverState *parent, 85 const BdrvChildRole *child_role, 86 BlockDriver *drv, Error **errp); 87 88 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); 89 /* If non-zero, use only whitelisted block drivers */ 90 static int use_bdrv_whitelist; 91 92 #ifdef _WIN32 93 static int is_windows_drive_prefix(const char *filename) 94 { 95 return (((filename[0] >= 'a' && filename[0] <= 'z') || 96 (filename[0] >= 'A' && filename[0] <= 'Z')) && 97 filename[1] == ':'); 98 } 99 100 int is_windows_drive(const char *filename) 101 { 102 if (is_windows_drive_prefix(filename) && 103 filename[2] == '\0') 104 return 1; 105 if (strstart(filename, "\\\\.\\", NULL) || 106 strstart(filename, "//./", NULL)) 107 return 1; 108 return 0; 109 } 110 #endif 111 112 size_t bdrv_opt_mem_align(BlockDriverState *bs) 113 { 114 if (!bs || !bs->drv) { 115 /* page size or 4k (hdd sector size) should be on the safe side */ 116 return MAX(4096, getpagesize()); 117 } 118 119 return bs->bl.opt_mem_alignment; 120 } 121 122 size_t bdrv_min_mem_align(BlockDriverState *bs) 123 { 124 if (!bs || !bs->drv) { 125 /* page size or 4k (hdd sector size) should be on the safe side */ 126 return MAX(4096, getpagesize()); 127 } 128 129 return bs->bl.min_mem_alignment; 130 } 131 132 /* check if the path starts with "<protocol>:" */ 133 int path_has_protocol(const char *path) 134 { 135 const char *p; 136 137 #ifdef _WIN32 138 if (is_windows_drive(path) || 139 is_windows_drive_prefix(path)) { 140 return 0; 141 } 142 p = path + strcspn(path, ":/\\"); 143 #else 144 p = path + strcspn(path, ":/"); 145 #endif 146 147 return *p == ':'; 148 } 149 150 int path_is_absolute(const char *path) 151 { 152 #ifdef _WIN32 153 /* specific case for names like: "\\.\d:" */ 154 if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 155 return 1; 156 } 157 return (*path == '/' || *path == '\\'); 158 #else 159 return (*path == '/'); 160 #endif 161 } 162 163 /* if filename is absolute, just copy it to dest. Otherwise, build a 164 path to it by considering it is relative to base_path. URL are 165 supported. */ 166 void path_combine(char *dest, int dest_size, 167 const char *base_path, 168 const char *filename) 169 { 170 const char *p, *p1; 171 int len; 172 173 if (dest_size <= 0) 174 return; 175 if (path_is_absolute(filename)) { 176 pstrcpy(dest, dest_size, filename); 177 } else { 178 p = strchr(base_path, ':'); 179 if (p) 180 p++; 181 else 182 p = base_path; 183 p1 = strrchr(base_path, '/'); 184 #ifdef _WIN32 185 { 186 const char *p2; 187 p2 = strrchr(base_path, '\\'); 188 if (!p1 || p2 > p1) 189 p1 = p2; 190 } 191 #endif 192 if (p1) 193 p1++; 194 else 195 p1 = base_path; 196 if (p1 > p) 197 p = p1; 198 len = p - base_path; 199 if (len > dest_size - 1) 200 len = dest_size - 1; 201 memcpy(dest, base_path, len); 202 dest[len] = '\0'; 203 pstrcat(dest, dest_size, filename); 204 } 205 } 206 207 void bdrv_get_full_backing_filename_from_filename(const char *backed, 208 const char *backing, 209 char *dest, size_t sz, 210 Error **errp) 211 { 212 if (backing[0] == '\0' || path_has_protocol(backing) || 213 path_is_absolute(backing)) 214 { 215 pstrcpy(dest, sz, backing); 216 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 217 error_setg(errp, "Cannot use relative backing file names for '%s'", 218 backed); 219 } else { 220 path_combine(dest, sz, backed, backing); 221 } 222 } 223 224 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz, 225 Error **errp) 226 { 227 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename; 228 229 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file, 230 dest, sz, errp); 231 } 232 233 void bdrv_register(BlockDriver *bdrv) 234 { 235 bdrv_setup_io_funcs(bdrv); 236 237 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 238 } 239 240 BlockDriverState *bdrv_new_root(void) 241 { 242 BlockDriverState *bs = bdrv_new(); 243 244 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list); 245 return bs; 246 } 247 248 BlockDriverState *bdrv_new(void) 249 { 250 BlockDriverState *bs; 251 int i; 252 253 bs = g_new0(BlockDriverState, 1); 254 QLIST_INIT(&bs->dirty_bitmaps); 255 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 256 QLIST_INIT(&bs->op_blockers[i]); 257 } 258 bdrv_iostatus_disable(bs); 259 notifier_list_init(&bs->close_notifiers); 260 notifier_with_return_list_init(&bs->before_write_notifiers); 261 qemu_co_queue_init(&bs->throttled_reqs[0]); 262 qemu_co_queue_init(&bs->throttled_reqs[1]); 263 bs->refcnt = 1; 264 bs->aio_context = qemu_get_aio_context(); 265 266 return bs; 267 } 268 269 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify) 270 { 271 notifier_list_add(&bs->close_notifiers, notify); 272 } 273 274 BlockDriver *bdrv_find_format(const char *format_name) 275 { 276 BlockDriver *drv1; 277 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 278 if (!strcmp(drv1->format_name, format_name)) { 279 return drv1; 280 } 281 } 282 return NULL; 283 } 284 285 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 286 { 287 static const char *whitelist_rw[] = { 288 CONFIG_BDRV_RW_WHITELIST 289 }; 290 static const char *whitelist_ro[] = { 291 CONFIG_BDRV_RO_WHITELIST 292 }; 293 const char **p; 294 295 if (!whitelist_rw[0] && !whitelist_ro[0]) { 296 return 1; /* no whitelist, anything goes */ 297 } 298 299 for (p = whitelist_rw; *p; p++) { 300 if (!strcmp(drv->format_name, *p)) { 301 return 1; 302 } 303 } 304 if (read_only) { 305 for (p = whitelist_ro; *p; p++) { 306 if (!strcmp(drv->format_name, *p)) { 307 return 1; 308 } 309 } 310 } 311 return 0; 312 } 313 314 BlockDriver *bdrv_find_whitelisted_format(const char *format_name, 315 bool read_only) 316 { 317 BlockDriver *drv = bdrv_find_format(format_name); 318 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL; 319 } 320 321 typedef struct CreateCo { 322 BlockDriver *drv; 323 char *filename; 324 QemuOpts *opts; 325 int ret; 326 Error *err; 327 } CreateCo; 328 329 static void coroutine_fn bdrv_create_co_entry(void *opaque) 330 { 331 Error *local_err = NULL; 332 int ret; 333 334 CreateCo *cco = opaque; 335 assert(cco->drv); 336 337 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err); 338 if (local_err) { 339 error_propagate(&cco->err, local_err); 340 } 341 cco->ret = ret; 342 } 343 344 int bdrv_create(BlockDriver *drv, const char* filename, 345 QemuOpts *opts, Error **errp) 346 { 347 int ret; 348 349 Coroutine *co; 350 CreateCo cco = { 351 .drv = drv, 352 .filename = g_strdup(filename), 353 .opts = opts, 354 .ret = NOT_DONE, 355 .err = NULL, 356 }; 357 358 if (!drv->bdrv_create) { 359 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 360 ret = -ENOTSUP; 361 goto out; 362 } 363 364 if (qemu_in_coroutine()) { 365 /* Fast-path if already in coroutine context */ 366 bdrv_create_co_entry(&cco); 367 } else { 368 co = qemu_coroutine_create(bdrv_create_co_entry); 369 qemu_coroutine_enter(co, &cco); 370 while (cco.ret == NOT_DONE) { 371 aio_poll(qemu_get_aio_context(), true); 372 } 373 } 374 375 ret = cco.ret; 376 if (ret < 0) { 377 if (cco.err) { 378 error_propagate(errp, cco.err); 379 } else { 380 error_setg_errno(errp, -ret, "Could not create image"); 381 } 382 } 383 384 out: 385 g_free(cco.filename); 386 return ret; 387 } 388 389 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 390 { 391 BlockDriver *drv; 392 Error *local_err = NULL; 393 int ret; 394 395 drv = bdrv_find_protocol(filename, true, errp); 396 if (drv == NULL) { 397 return -ENOENT; 398 } 399 400 ret = bdrv_create(drv, filename, opts, &local_err); 401 if (local_err) { 402 error_propagate(errp, local_err); 403 } 404 return ret; 405 } 406 407 /** 408 * Try to get @bs's logical and physical block size. 409 * On success, store them in @bsz struct and return 0. 410 * On failure return -errno. 411 * @bs must not be empty. 412 */ 413 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 414 { 415 BlockDriver *drv = bs->drv; 416 417 if (drv && drv->bdrv_probe_blocksizes) { 418 return drv->bdrv_probe_blocksizes(bs, bsz); 419 } 420 421 return -ENOTSUP; 422 } 423 424 /** 425 * Try to get @bs's geometry (cyls, heads, sectors). 426 * On success, store them in @geo struct and return 0. 427 * On failure return -errno. 428 * @bs must not be empty. 429 */ 430 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 431 { 432 BlockDriver *drv = bs->drv; 433 434 if (drv && drv->bdrv_probe_geometry) { 435 return drv->bdrv_probe_geometry(bs, geo); 436 } 437 438 return -ENOTSUP; 439 } 440 441 /* 442 * Create a uniquely-named empty temporary file. 443 * Return 0 upon success, otherwise a negative errno value. 444 */ 445 int get_tmp_filename(char *filename, int size) 446 { 447 #ifdef _WIN32 448 char temp_dir[MAX_PATH]; 449 /* GetTempFileName requires that its output buffer (4th param) 450 have length MAX_PATH or greater. */ 451 assert(size >= MAX_PATH); 452 return (GetTempPath(MAX_PATH, temp_dir) 453 && GetTempFileName(temp_dir, "qem", 0, filename) 454 ? 0 : -GetLastError()); 455 #else 456 int fd; 457 const char *tmpdir; 458 tmpdir = getenv("TMPDIR"); 459 if (!tmpdir) { 460 tmpdir = "/var/tmp"; 461 } 462 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 463 return -EOVERFLOW; 464 } 465 fd = mkstemp(filename); 466 if (fd < 0) { 467 return -errno; 468 } 469 if (close(fd) != 0) { 470 unlink(filename); 471 return -errno; 472 } 473 return 0; 474 #endif 475 } 476 477 /* 478 * Detect host devices. By convention, /dev/cdrom[N] is always 479 * recognized as a host CDROM. 480 */ 481 static BlockDriver *find_hdev_driver(const char *filename) 482 { 483 int score_max = 0, score; 484 BlockDriver *drv = NULL, *d; 485 486 QLIST_FOREACH(d, &bdrv_drivers, list) { 487 if (d->bdrv_probe_device) { 488 score = d->bdrv_probe_device(filename); 489 if (score > score_max) { 490 score_max = score; 491 drv = d; 492 } 493 } 494 } 495 496 return drv; 497 } 498 499 BlockDriver *bdrv_find_protocol(const char *filename, 500 bool allow_protocol_prefix, 501 Error **errp) 502 { 503 BlockDriver *drv1; 504 char protocol[128]; 505 int len; 506 const char *p; 507 508 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 509 510 /* 511 * XXX(hch): we really should not let host device detection 512 * override an explicit protocol specification, but moving this 513 * later breaks access to device names with colons in them. 514 * Thanks to the brain-dead persistent naming schemes on udev- 515 * based Linux systems those actually are quite common. 516 */ 517 drv1 = find_hdev_driver(filename); 518 if (drv1) { 519 return drv1; 520 } 521 522 if (!path_has_protocol(filename) || !allow_protocol_prefix) { 523 return &bdrv_file; 524 } 525 526 p = strchr(filename, ':'); 527 assert(p != NULL); 528 len = p - filename; 529 if (len > sizeof(protocol) - 1) 530 len = sizeof(protocol) - 1; 531 memcpy(protocol, filename, len); 532 protocol[len] = '\0'; 533 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 534 if (drv1->protocol_name && 535 !strcmp(drv1->protocol_name, protocol)) { 536 return drv1; 537 } 538 } 539 540 error_setg(errp, "Unknown protocol '%s'", protocol); 541 return NULL; 542 } 543 544 /* 545 * Guess image format by probing its contents. 546 * This is not a good idea when your image is raw (CVE-2008-2004), but 547 * we do it anyway for backward compatibility. 548 * 549 * @buf contains the image's first @buf_size bytes. 550 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 551 * but can be smaller if the image file is smaller) 552 * @filename is its filename. 553 * 554 * For all block drivers, call the bdrv_probe() method to get its 555 * probing score. 556 * Return the first block driver with the highest probing score. 557 */ 558 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 559 const char *filename) 560 { 561 int score_max = 0, score; 562 BlockDriver *drv = NULL, *d; 563 564 QLIST_FOREACH(d, &bdrv_drivers, list) { 565 if (d->bdrv_probe) { 566 score = d->bdrv_probe(buf, buf_size, filename); 567 if (score > score_max) { 568 score_max = score; 569 drv = d; 570 } 571 } 572 } 573 574 return drv; 575 } 576 577 static int find_image_format(BlockDriverState *bs, const char *filename, 578 BlockDriver **pdrv, Error **errp) 579 { 580 BlockDriver *drv; 581 uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 582 int ret = 0; 583 584 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 585 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { 586 *pdrv = &bdrv_raw; 587 return ret; 588 } 589 590 ret = bdrv_pread(bs, 0, buf, sizeof(buf)); 591 if (ret < 0) { 592 error_setg_errno(errp, -ret, "Could not read image for determining its " 593 "format"); 594 *pdrv = NULL; 595 return ret; 596 } 597 598 drv = bdrv_probe_all(buf, ret, filename); 599 if (!drv) { 600 error_setg(errp, "Could not determine image format: No compatible " 601 "driver found"); 602 ret = -ENOENT; 603 } 604 *pdrv = drv; 605 return ret; 606 } 607 608 /** 609 * Set the current 'total_sectors' value 610 * Return 0 on success, -errno on error. 611 */ 612 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 613 { 614 BlockDriver *drv = bs->drv; 615 616 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 617 if (bs->sg) 618 return 0; 619 620 /* query actual device if possible, otherwise just trust the hint */ 621 if (drv->bdrv_getlength) { 622 int64_t length = drv->bdrv_getlength(bs); 623 if (length < 0) { 624 return length; 625 } 626 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 627 } 628 629 bs->total_sectors = hint; 630 return 0; 631 } 632 633 /** 634 * Set open flags for a given discard mode 635 * 636 * Return 0 on success, -1 if the discard mode was invalid. 637 */ 638 int bdrv_parse_discard_flags(const char *mode, int *flags) 639 { 640 *flags &= ~BDRV_O_UNMAP; 641 642 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 643 /* do nothing */ 644 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 645 *flags |= BDRV_O_UNMAP; 646 } else { 647 return -1; 648 } 649 650 return 0; 651 } 652 653 /** 654 * Set open flags for a given cache mode 655 * 656 * Return 0 on success, -1 if the cache mode was invalid. 657 */ 658 int bdrv_parse_cache_flags(const char *mode, int *flags) 659 { 660 *flags &= ~BDRV_O_CACHE_MASK; 661 662 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 663 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; 664 } else if (!strcmp(mode, "directsync")) { 665 *flags |= BDRV_O_NOCACHE; 666 } else if (!strcmp(mode, "writeback")) { 667 *flags |= BDRV_O_CACHE_WB; 668 } else if (!strcmp(mode, "unsafe")) { 669 *flags |= BDRV_O_CACHE_WB; 670 *flags |= BDRV_O_NO_FLUSH; 671 } else if (!strcmp(mode, "writethrough")) { 672 /* this is the default */ 673 } else { 674 return -1; 675 } 676 677 return 0; 678 } 679 680 /* 681 * Returns the flags that a temporary snapshot should get, based on the 682 * originally requested flags (the originally requested image will have flags 683 * like a backing file) 684 */ 685 static int bdrv_temp_snapshot_flags(int flags) 686 { 687 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 688 } 689 690 /* 691 * Returns the flags that bs->file should get if a protocol driver is expected, 692 * based on the given flags for the parent BDS 693 */ 694 static int bdrv_inherited_flags(int flags) 695 { 696 /* Enable protocol handling, disable format probing for bs->file */ 697 flags |= BDRV_O_PROTOCOL; 698 699 /* Our block drivers take care to send flushes and respect unmap policy, 700 * so we can enable both unconditionally on lower layers. */ 701 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP; 702 703 /* Clear flags that only apply to the top layer */ 704 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); 705 706 return flags; 707 } 708 709 const BdrvChildRole child_file = { 710 .inherit_flags = bdrv_inherited_flags, 711 }; 712 713 /* 714 * Returns the flags that bs->file should get if the use of formats (and not 715 * only protocols) is permitted for it, based on the given flags for the parent 716 * BDS 717 */ 718 static int bdrv_inherited_fmt_flags(int parent_flags) 719 { 720 int flags = child_file.inherit_flags(parent_flags); 721 return flags & ~BDRV_O_PROTOCOL; 722 } 723 724 const BdrvChildRole child_format = { 725 .inherit_flags = bdrv_inherited_fmt_flags, 726 }; 727 728 /* 729 * Returns the flags that bs->backing_hd should get, based on the given flags 730 * for the parent BDS 731 */ 732 static int bdrv_backing_flags(int flags) 733 { 734 /* backing files always opened read-only */ 735 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ); 736 737 /* snapshot=on is handled on the top layer */ 738 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY); 739 740 return flags; 741 } 742 743 static const BdrvChildRole child_backing = { 744 .inherit_flags = bdrv_backing_flags, 745 }; 746 747 static int bdrv_open_flags(BlockDriverState *bs, int flags) 748 { 749 int open_flags = flags | BDRV_O_CACHE_WB; 750 751 /* 752 * Clear flags that are internal to the block layer before opening the 753 * image. 754 */ 755 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 756 757 /* 758 * Snapshots should be writable. 759 */ 760 if (flags & BDRV_O_TEMPORARY) { 761 open_flags |= BDRV_O_RDWR; 762 } 763 764 return open_flags; 765 } 766 767 static void bdrv_assign_node_name(BlockDriverState *bs, 768 const char *node_name, 769 Error **errp) 770 { 771 if (!node_name) { 772 return; 773 } 774 775 /* Check for empty string or invalid characters */ 776 if (!id_wellformed(node_name)) { 777 error_setg(errp, "Invalid node name"); 778 return; 779 } 780 781 /* takes care of avoiding namespaces collisions */ 782 if (blk_by_name(node_name)) { 783 error_setg(errp, "node-name=%s is conflicting with a device id", 784 node_name); 785 return; 786 } 787 788 /* takes care of avoiding duplicates node names */ 789 if (bdrv_find_node(node_name)) { 790 error_setg(errp, "Duplicate node name"); 791 return; 792 } 793 794 /* copy node name into the bs and insert it into the graph list */ 795 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 796 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 797 } 798 799 static QemuOptsList bdrv_runtime_opts = { 800 .name = "bdrv_common", 801 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), 802 .desc = { 803 { 804 .name = "node-name", 805 .type = QEMU_OPT_STRING, 806 .help = "Node name of the block device node", 807 }, 808 { /* end of list */ } 809 }, 810 }; 811 812 /* 813 * Common part for opening disk images and files 814 * 815 * Removes all processed options from *options. 816 */ 817 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, 818 QDict *options, int flags, BlockDriver *drv, Error **errp) 819 { 820 int ret, open_flags; 821 const char *filename; 822 const char *node_name = NULL; 823 QemuOpts *opts; 824 Error *local_err = NULL; 825 826 assert(drv != NULL); 827 assert(bs->file == NULL); 828 assert(options != NULL && bs->options != options); 829 830 if (file != NULL) { 831 filename = file->filename; 832 } else { 833 filename = qdict_get_try_str(options, "filename"); 834 } 835 836 if (drv->bdrv_needs_filename && !filename) { 837 error_setg(errp, "The '%s' block driver requires a file name", 838 drv->format_name); 839 return -EINVAL; 840 } 841 842 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); 843 844 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 845 qemu_opts_absorb_qdict(opts, options, &local_err); 846 if (local_err) { 847 error_propagate(errp, local_err); 848 ret = -EINVAL; 849 goto fail_opts; 850 } 851 852 node_name = qemu_opt_get(opts, "node-name"); 853 bdrv_assign_node_name(bs, node_name, &local_err); 854 if (local_err) { 855 error_propagate(errp, local_err); 856 ret = -EINVAL; 857 goto fail_opts; 858 } 859 860 bs->guest_block_size = 512; 861 bs->request_alignment = 512; 862 bs->zero_beyond_eof = true; 863 open_flags = bdrv_open_flags(bs, flags); 864 bs->read_only = !(open_flags & BDRV_O_RDWR); 865 866 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { 867 error_setg(errp, 868 !bs->read_only && bdrv_is_whitelisted(drv, true) 869 ? "Driver '%s' can only be used for read-only devices" 870 : "Driver '%s' is not whitelisted", 871 drv->format_name); 872 ret = -ENOTSUP; 873 goto fail_opts; 874 } 875 876 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ 877 if (flags & BDRV_O_COPY_ON_READ) { 878 if (!bs->read_only) { 879 bdrv_enable_copy_on_read(bs); 880 } else { 881 error_setg(errp, "Can't use copy-on-read on read-only device"); 882 ret = -EINVAL; 883 goto fail_opts; 884 } 885 } 886 887 if (filename != NULL) { 888 pstrcpy(bs->filename, sizeof(bs->filename), filename); 889 } else { 890 bs->filename[0] = '\0'; 891 } 892 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 893 894 bs->drv = drv; 895 bs->opaque = g_malloc0(drv->instance_size); 896 897 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB); 898 899 /* Open the image, either directly or using a protocol */ 900 if (drv->bdrv_file_open) { 901 assert(file == NULL); 902 assert(!drv->bdrv_needs_filename || filename != NULL); 903 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 904 } else { 905 if (file == NULL) { 906 error_setg(errp, "Can't use '%s' as a block driver for the " 907 "protocol level", drv->format_name); 908 ret = -EINVAL; 909 goto free_and_fail; 910 } 911 bs->file = file; 912 ret = drv->bdrv_open(bs, options, open_flags, &local_err); 913 } 914 915 if (ret < 0) { 916 if (local_err) { 917 error_propagate(errp, local_err); 918 } else if (bs->filename[0]) { 919 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 920 } else { 921 error_setg_errno(errp, -ret, "Could not open image"); 922 } 923 goto free_and_fail; 924 } 925 926 if (bs->encrypted) { 927 error_report("Encrypted images are deprecated"); 928 error_printf("Support for them will be removed in a future release.\n" 929 "You can use 'qemu-img convert' to convert your image" 930 " to an unencrypted one.\n"); 931 } 932 933 ret = refresh_total_sectors(bs, bs->total_sectors); 934 if (ret < 0) { 935 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 936 goto free_and_fail; 937 } 938 939 bdrv_refresh_limits(bs, &local_err); 940 if (local_err) { 941 error_propagate(errp, local_err); 942 ret = -EINVAL; 943 goto free_and_fail; 944 } 945 946 assert(bdrv_opt_mem_align(bs) != 0); 947 assert(bdrv_min_mem_align(bs) != 0); 948 assert((bs->request_alignment != 0) || bs->sg); 949 950 qemu_opts_del(opts); 951 return 0; 952 953 free_and_fail: 954 bs->file = NULL; 955 g_free(bs->opaque); 956 bs->opaque = NULL; 957 bs->drv = NULL; 958 fail_opts: 959 qemu_opts_del(opts); 960 return ret; 961 } 962 963 static QDict *parse_json_filename(const char *filename, Error **errp) 964 { 965 QObject *options_obj; 966 QDict *options; 967 int ret; 968 969 ret = strstart(filename, "json:", &filename); 970 assert(ret); 971 972 options_obj = qobject_from_json(filename); 973 if (!options_obj) { 974 error_setg(errp, "Could not parse the JSON options"); 975 return NULL; 976 } 977 978 if (qobject_type(options_obj) != QTYPE_QDICT) { 979 qobject_decref(options_obj); 980 error_setg(errp, "Invalid JSON object given"); 981 return NULL; 982 } 983 984 options = qobject_to_qdict(options_obj); 985 qdict_flatten(options); 986 987 return options; 988 } 989 990 /* 991 * Fills in default options for opening images and converts the legacy 992 * filename/flags pair to option QDict entries. 993 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a 994 * block driver has been specified explicitly. 995 */ 996 static int bdrv_fill_options(QDict **options, const char **pfilename, 997 int *flags, BlockDriver *drv, Error **errp) 998 { 999 const char *filename = *pfilename; 1000 const char *drvname; 1001 bool protocol = *flags & BDRV_O_PROTOCOL; 1002 bool parse_filename = false; 1003 BlockDriver *tmp_drv; 1004 Error *local_err = NULL; 1005 1006 /* Parse json: pseudo-protocol */ 1007 if (filename && g_str_has_prefix(filename, "json:")) { 1008 QDict *json_options = parse_json_filename(filename, &local_err); 1009 if (local_err) { 1010 error_propagate(errp, local_err); 1011 return -EINVAL; 1012 } 1013 1014 /* Options given in the filename have lower priority than options 1015 * specified directly */ 1016 qdict_join(*options, json_options, false); 1017 QDECREF(json_options); 1018 *pfilename = filename = NULL; 1019 } 1020 1021 drvname = qdict_get_try_str(*options, "driver"); 1022 1023 /* If the user has explicitly specified the driver, this choice should 1024 * override the BDRV_O_PROTOCOL flag */ 1025 tmp_drv = drv; 1026 if (!tmp_drv && drvname) { 1027 tmp_drv = bdrv_find_format(drvname); 1028 } 1029 if (tmp_drv) { 1030 protocol = tmp_drv->bdrv_file_open; 1031 } 1032 1033 if (protocol) { 1034 *flags |= BDRV_O_PROTOCOL; 1035 } else { 1036 *flags &= ~BDRV_O_PROTOCOL; 1037 } 1038 1039 /* Fetch the file name from the options QDict if necessary */ 1040 if (protocol && filename) { 1041 if (!qdict_haskey(*options, "filename")) { 1042 qdict_put(*options, "filename", qstring_from_str(filename)); 1043 parse_filename = true; 1044 } else { 1045 error_setg(errp, "Can't specify 'file' and 'filename' options at " 1046 "the same time"); 1047 return -EINVAL; 1048 } 1049 } 1050 1051 /* Find the right block driver */ 1052 filename = qdict_get_try_str(*options, "filename"); 1053 1054 if (drv) { 1055 if (drvname) { 1056 error_setg(errp, "Driver specified twice"); 1057 return -EINVAL; 1058 } 1059 drvname = drv->format_name; 1060 qdict_put(*options, "driver", qstring_from_str(drvname)); 1061 } else { 1062 if (!drvname && protocol) { 1063 if (filename) { 1064 drv = bdrv_find_protocol(filename, parse_filename, errp); 1065 if (!drv) { 1066 return -EINVAL; 1067 } 1068 1069 drvname = drv->format_name; 1070 qdict_put(*options, "driver", qstring_from_str(drvname)); 1071 } else { 1072 error_setg(errp, "Must specify either driver or file"); 1073 return -EINVAL; 1074 } 1075 } else if (drvname) { 1076 drv = bdrv_find_format(drvname); 1077 if (!drv) { 1078 error_setg(errp, "Unknown driver '%s'", drvname); 1079 return -ENOENT; 1080 } 1081 } 1082 } 1083 1084 assert(drv || !protocol); 1085 1086 /* Driver-specific filename parsing */ 1087 if (drv && drv->bdrv_parse_filename && parse_filename) { 1088 drv->bdrv_parse_filename(filename, *options, &local_err); 1089 if (local_err) { 1090 error_propagate(errp, local_err); 1091 return -EINVAL; 1092 } 1093 1094 if (!drv->bdrv_needs_filename) { 1095 qdict_del(*options, "filename"); 1096 } 1097 } 1098 1099 return 0; 1100 } 1101 1102 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) 1103 { 1104 1105 if (bs->backing_hd) { 1106 assert(bs->backing_blocker); 1107 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker); 1108 } else if (backing_hd) { 1109 error_setg(&bs->backing_blocker, 1110 "node is used as backing hd of '%s'", 1111 bdrv_get_device_or_node_name(bs)); 1112 } 1113 1114 bs->backing_hd = backing_hd; 1115 if (!backing_hd) { 1116 error_free(bs->backing_blocker); 1117 bs->backing_blocker = NULL; 1118 goto out; 1119 } 1120 bs->open_flags &= ~BDRV_O_NO_BACKING; 1121 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); 1122 pstrcpy(bs->backing_format, sizeof(bs->backing_format), 1123 backing_hd->drv ? backing_hd->drv->format_name : ""); 1124 1125 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker); 1126 /* Otherwise we won't be able to commit due to check in bdrv_commit */ 1127 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1128 bs->backing_blocker); 1129 out: 1130 bdrv_refresh_limits(bs, NULL); 1131 } 1132 1133 /* 1134 * Opens the backing file for a BlockDriverState if not yet open 1135 * 1136 * options is a QDict of options to pass to the block drivers, or NULL for an 1137 * empty set of options. The reference to the QDict is transferred to this 1138 * function (even on failure), so if the caller intends to reuse the dictionary, 1139 * it needs to use QINCREF() before calling bdrv_file_open. 1140 */ 1141 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) 1142 { 1143 char *backing_filename = g_malloc0(PATH_MAX); 1144 int ret = 0; 1145 BlockDriverState *backing_hd; 1146 Error *local_err = NULL; 1147 1148 if (bs->backing_hd != NULL) { 1149 QDECREF(options); 1150 goto free_exit; 1151 } 1152 1153 /* NULL means an empty set of options */ 1154 if (options == NULL) { 1155 options = qdict_new(); 1156 } 1157 1158 bs->open_flags &= ~BDRV_O_NO_BACKING; 1159 if (qdict_haskey(options, "file.filename")) { 1160 backing_filename[0] = '\0'; 1161 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 1162 QDECREF(options); 1163 goto free_exit; 1164 } else { 1165 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX, 1166 &local_err); 1167 if (local_err) { 1168 ret = -EINVAL; 1169 error_propagate(errp, local_err); 1170 QDECREF(options); 1171 goto free_exit; 1172 } 1173 } 1174 1175 if (!bs->drv || !bs->drv->supports_backing) { 1176 ret = -EINVAL; 1177 error_setg(errp, "Driver doesn't support backing files"); 1178 QDECREF(options); 1179 goto free_exit; 1180 } 1181 1182 backing_hd = bdrv_new(); 1183 1184 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 1185 qdict_put(options, "driver", qstring_from_str(bs->backing_format)); 1186 } 1187 1188 assert(bs->backing_hd == NULL); 1189 ret = bdrv_open_inherit(&backing_hd, 1190 *backing_filename ? backing_filename : NULL, 1191 NULL, options, 0, bs, &child_backing, 1192 NULL, &local_err); 1193 if (ret < 0) { 1194 bdrv_unref(backing_hd); 1195 backing_hd = NULL; 1196 bs->open_flags |= BDRV_O_NO_BACKING; 1197 error_setg(errp, "Could not open backing file: %s", 1198 error_get_pretty(local_err)); 1199 error_free(local_err); 1200 goto free_exit; 1201 } 1202 bdrv_set_backing_hd(bs, backing_hd); 1203 1204 free_exit: 1205 g_free(backing_filename); 1206 return ret; 1207 } 1208 1209 /* 1210 * Opens a disk image whose options are given as BlockdevRef in another block 1211 * device's options. 1212 * 1213 * If allow_none is true, no image will be opened if filename is false and no 1214 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. 1215 * 1216 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 1217 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 1218 * itself, all options starting with "${bdref_key}." are considered part of the 1219 * BlockdevRef. 1220 * 1221 * The BlockdevRef will be removed from the options QDict. 1222 * 1223 * To conform with the behavior of bdrv_open(), *pbs has to be NULL. 1224 */ 1225 int bdrv_open_image(BlockDriverState **pbs, const char *filename, 1226 QDict *options, const char *bdref_key, 1227 BlockDriverState* parent, const BdrvChildRole *child_role, 1228 bool allow_none, Error **errp) 1229 { 1230 QDict *image_options; 1231 int ret; 1232 char *bdref_key_dot; 1233 const char *reference; 1234 1235 assert(pbs); 1236 assert(*pbs == NULL); 1237 1238 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 1239 qdict_extract_subqdict(options, &image_options, bdref_key_dot); 1240 g_free(bdref_key_dot); 1241 1242 reference = qdict_get_try_str(options, bdref_key); 1243 if (!filename && !reference && !qdict_size(image_options)) { 1244 if (allow_none) { 1245 ret = 0; 1246 } else { 1247 error_setg(errp, "A block device must be specified for \"%s\"", 1248 bdref_key); 1249 ret = -EINVAL; 1250 } 1251 QDECREF(image_options); 1252 goto done; 1253 } 1254 1255 ret = bdrv_open_inherit(pbs, filename, reference, image_options, 0, 1256 parent, child_role, NULL, errp); 1257 1258 done: 1259 qdict_del(options, bdref_key); 1260 return ret; 1261 } 1262 1263 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) 1264 { 1265 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 1266 char *tmp_filename = g_malloc0(PATH_MAX + 1); 1267 int64_t total_size; 1268 QemuOpts *opts = NULL; 1269 QDict *snapshot_options; 1270 BlockDriverState *bs_snapshot; 1271 Error *local_err; 1272 int ret; 1273 1274 /* if snapshot, we create a temporary backing file and open it 1275 instead of opening 'filename' directly */ 1276 1277 /* Get the required size from the image */ 1278 total_size = bdrv_getlength(bs); 1279 if (total_size < 0) { 1280 ret = total_size; 1281 error_setg_errno(errp, -total_size, "Could not get image size"); 1282 goto out; 1283 } 1284 1285 /* Create the temporary image */ 1286 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); 1287 if (ret < 0) { 1288 error_setg_errno(errp, -ret, "Could not get temporary filename"); 1289 goto out; 1290 } 1291 1292 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 1293 &error_abort); 1294 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 1295 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err); 1296 qemu_opts_del(opts); 1297 if (ret < 0) { 1298 error_setg_errno(errp, -ret, "Could not create temporary overlay " 1299 "'%s': %s", tmp_filename, 1300 error_get_pretty(local_err)); 1301 error_free(local_err); 1302 goto out; 1303 } 1304 1305 /* Prepare a new options QDict for the temporary file */ 1306 snapshot_options = qdict_new(); 1307 qdict_put(snapshot_options, "file.driver", 1308 qstring_from_str("file")); 1309 qdict_put(snapshot_options, "file.filename", 1310 qstring_from_str(tmp_filename)); 1311 1312 bs_snapshot = bdrv_new(); 1313 1314 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, 1315 flags, &bdrv_qcow2, &local_err); 1316 if (ret < 0) { 1317 error_propagate(errp, local_err); 1318 goto out; 1319 } 1320 1321 bdrv_append(bs_snapshot, bs); 1322 1323 out: 1324 g_free(tmp_filename); 1325 return ret; 1326 } 1327 1328 /* 1329 * Opens a disk image (raw, qcow2, vmdk, ...) 1330 * 1331 * options is a QDict of options to pass to the block drivers, or NULL for an 1332 * empty set of options. The reference to the QDict belongs to the block layer 1333 * after the call (even on failure), so if the caller intends to reuse the 1334 * dictionary, it needs to use QINCREF() before calling bdrv_open. 1335 * 1336 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 1337 * If it is not NULL, the referenced BDS will be reused. 1338 * 1339 * The reference parameter may be used to specify an existing block device which 1340 * should be opened. If specified, neither options nor a filename may be given, 1341 * nor can an existing BDS be reused (that is, *pbs has to be NULL). 1342 */ 1343 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, 1344 const char *reference, QDict *options, int flags, 1345 BlockDriverState *parent, 1346 const BdrvChildRole *child_role, 1347 BlockDriver *drv, Error **errp) 1348 { 1349 int ret; 1350 BlockDriverState *file = NULL, *bs; 1351 const char *drvname; 1352 Error *local_err = NULL; 1353 int snapshot_flags = 0; 1354 1355 assert(pbs); 1356 assert(!child_role || !flags); 1357 assert(!child_role == !parent); 1358 1359 if (reference) { 1360 bool options_non_empty = options ? qdict_size(options) : false; 1361 QDECREF(options); 1362 1363 if (*pbs) { 1364 error_setg(errp, "Cannot reuse an existing BDS when referencing " 1365 "another block device"); 1366 return -EINVAL; 1367 } 1368 1369 if (filename || options_non_empty) { 1370 error_setg(errp, "Cannot reference an existing block device with " 1371 "additional options or a new filename"); 1372 return -EINVAL; 1373 } 1374 1375 bs = bdrv_lookup_bs(reference, reference, errp); 1376 if (!bs) { 1377 return -ENODEV; 1378 } 1379 bdrv_ref(bs); 1380 *pbs = bs; 1381 return 0; 1382 } 1383 1384 if (*pbs) { 1385 bs = *pbs; 1386 } else { 1387 bs = bdrv_new(); 1388 } 1389 1390 /* NULL means an empty set of options */ 1391 if (options == NULL) { 1392 options = qdict_new(); 1393 } 1394 1395 if (child_role) { 1396 flags = child_role->inherit_flags(parent->open_flags); 1397 } 1398 1399 ret = bdrv_fill_options(&options, &filename, &flags, drv, &local_err); 1400 if (local_err) { 1401 goto fail; 1402 } 1403 1404 /* Find the right image format driver */ 1405 drv = NULL; 1406 drvname = qdict_get_try_str(options, "driver"); 1407 if (drvname) { 1408 drv = bdrv_find_format(drvname); 1409 qdict_del(options, "driver"); 1410 if (!drv) { 1411 error_setg(errp, "Unknown driver: '%s'", drvname); 1412 ret = -EINVAL; 1413 goto fail; 1414 } 1415 } 1416 1417 assert(drvname || !(flags & BDRV_O_PROTOCOL)); 1418 1419 bs->open_flags = flags; 1420 bs->options = options; 1421 options = qdict_clone_shallow(options); 1422 1423 /* Open image file without format layer */ 1424 if ((flags & BDRV_O_PROTOCOL) == 0) { 1425 if (flags & BDRV_O_RDWR) { 1426 flags |= BDRV_O_ALLOW_RDWR; 1427 } 1428 if (flags & BDRV_O_SNAPSHOT) { 1429 snapshot_flags = bdrv_temp_snapshot_flags(flags); 1430 flags = bdrv_backing_flags(flags); 1431 } 1432 1433 assert(file == NULL); 1434 bs->open_flags = flags; 1435 ret = bdrv_open_image(&file, filename, options, "file", 1436 bs, &child_file, true, &local_err); 1437 if (ret < 0) { 1438 goto fail; 1439 } 1440 } 1441 1442 /* Image format probing */ 1443 bs->probed = !drv; 1444 if (!drv && file) { 1445 ret = find_image_format(file, filename, &drv, &local_err); 1446 if (ret < 0) { 1447 goto fail; 1448 } 1449 } else if (!drv) { 1450 error_setg(errp, "Must specify either driver or file"); 1451 ret = -EINVAL; 1452 goto fail; 1453 } 1454 1455 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ 1456 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open); 1457 /* file must be NULL if a protocol BDS is about to be created 1458 * (the inverse results in an error message from bdrv_open_common()) */ 1459 assert(!(flags & BDRV_O_PROTOCOL) || !file); 1460 1461 /* Open the image */ 1462 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err); 1463 if (ret < 0) { 1464 goto fail; 1465 } 1466 1467 if (file && (bs->file != file)) { 1468 bdrv_unref(file); 1469 file = NULL; 1470 } 1471 1472 /* If there is a backing file, use it */ 1473 if ((flags & BDRV_O_NO_BACKING) == 0) { 1474 QDict *backing_options; 1475 1476 qdict_extract_subqdict(options, &backing_options, "backing."); 1477 ret = bdrv_open_backing_file(bs, backing_options, &local_err); 1478 if (ret < 0) { 1479 goto close_and_fail; 1480 } 1481 } 1482 1483 bdrv_refresh_filename(bs); 1484 1485 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 1486 * temporary snapshot afterwards. */ 1487 if (snapshot_flags) { 1488 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); 1489 if (local_err) { 1490 goto close_and_fail; 1491 } 1492 } 1493 1494 /* Check if any unknown options were used */ 1495 if (options && (qdict_size(options) != 0)) { 1496 const QDictEntry *entry = qdict_first(options); 1497 if (flags & BDRV_O_PROTOCOL) { 1498 error_setg(errp, "Block protocol '%s' doesn't support the option " 1499 "'%s'", drv->format_name, entry->key); 1500 } else { 1501 error_setg(errp, "Block format '%s' used by device '%s' doesn't " 1502 "support the option '%s'", drv->format_name, 1503 bdrv_get_device_name(bs), entry->key); 1504 } 1505 1506 ret = -EINVAL; 1507 goto close_and_fail; 1508 } 1509 1510 if (!bdrv_key_required(bs)) { 1511 if (bs->blk) { 1512 blk_dev_change_media_cb(bs->blk, true); 1513 } 1514 } else if (!runstate_check(RUN_STATE_PRELAUNCH) 1515 && !runstate_check(RUN_STATE_INMIGRATE) 1516 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */ 1517 error_setg(errp, 1518 "Guest must be stopped for opening of encrypted image"); 1519 ret = -EBUSY; 1520 goto close_and_fail; 1521 } 1522 1523 QDECREF(options); 1524 *pbs = bs; 1525 return 0; 1526 1527 fail: 1528 if (file != NULL) { 1529 bdrv_unref(file); 1530 } 1531 QDECREF(bs->options); 1532 QDECREF(options); 1533 bs->options = NULL; 1534 if (!*pbs) { 1535 /* If *pbs is NULL, a new BDS has been created in this function and 1536 needs to be freed now. Otherwise, it does not need to be closed, 1537 since it has not really been opened yet. */ 1538 bdrv_unref(bs); 1539 } 1540 if (local_err) { 1541 error_propagate(errp, local_err); 1542 } 1543 return ret; 1544 1545 close_and_fail: 1546 /* See fail path, but now the BDS has to be always closed */ 1547 if (*pbs) { 1548 bdrv_close(bs); 1549 } else { 1550 bdrv_unref(bs); 1551 } 1552 QDECREF(options); 1553 if (local_err) { 1554 error_propagate(errp, local_err); 1555 } 1556 return ret; 1557 } 1558 1559 int bdrv_open(BlockDriverState **pbs, const char *filename, 1560 const char *reference, QDict *options, int flags, 1561 BlockDriver *drv, Error **errp) 1562 { 1563 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL, 1564 NULL, drv, errp); 1565 } 1566 1567 typedef struct BlockReopenQueueEntry { 1568 bool prepared; 1569 BDRVReopenState state; 1570 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; 1571 } BlockReopenQueueEntry; 1572 1573 /* 1574 * Adds a BlockDriverState to a simple queue for an atomic, transactional 1575 * reopen of multiple devices. 1576 * 1577 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT 1578 * already performed, or alternatively may be NULL a new BlockReopenQueue will 1579 * be created and initialized. This newly created BlockReopenQueue should be 1580 * passed back in for subsequent calls that are intended to be of the same 1581 * atomic 'set'. 1582 * 1583 * bs is the BlockDriverState to add to the reopen queue. 1584 * 1585 * flags contains the open flags for the associated bs 1586 * 1587 * returns a pointer to bs_queue, which is either the newly allocated 1588 * bs_queue, or the existing bs_queue being used. 1589 * 1590 */ 1591 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 1592 BlockDriverState *bs, int flags) 1593 { 1594 assert(bs != NULL); 1595 1596 BlockReopenQueueEntry *bs_entry; 1597 if (bs_queue == NULL) { 1598 bs_queue = g_new0(BlockReopenQueue, 1); 1599 QSIMPLEQ_INIT(bs_queue); 1600 } 1601 1602 /* bdrv_open() masks this flag out */ 1603 flags &= ~BDRV_O_PROTOCOL; 1604 1605 if (bs->file) { 1606 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags)); 1607 } 1608 1609 bs_entry = g_new0(BlockReopenQueueEntry, 1); 1610 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); 1611 1612 bs_entry->state.bs = bs; 1613 bs_entry->state.flags = flags; 1614 1615 return bs_queue; 1616 } 1617 1618 /* 1619 * Reopen multiple BlockDriverStates atomically & transactionally. 1620 * 1621 * The queue passed in (bs_queue) must have been built up previous 1622 * via bdrv_reopen_queue(). 1623 * 1624 * Reopens all BDS specified in the queue, with the appropriate 1625 * flags. All devices are prepared for reopen, and failure of any 1626 * device will cause all device changes to be abandonded, and intermediate 1627 * data cleaned up. 1628 * 1629 * If all devices prepare successfully, then the changes are committed 1630 * to all devices. 1631 * 1632 */ 1633 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 1634 { 1635 int ret = -1; 1636 BlockReopenQueueEntry *bs_entry, *next; 1637 Error *local_err = NULL; 1638 1639 assert(bs_queue != NULL); 1640 1641 bdrv_drain_all(); 1642 1643 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1644 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { 1645 error_propagate(errp, local_err); 1646 goto cleanup; 1647 } 1648 bs_entry->prepared = true; 1649 } 1650 1651 /* If we reach this point, we have success and just need to apply the 1652 * changes 1653 */ 1654 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1655 bdrv_reopen_commit(&bs_entry->state); 1656 } 1657 1658 ret = 0; 1659 1660 cleanup: 1661 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 1662 if (ret && bs_entry->prepared) { 1663 bdrv_reopen_abort(&bs_entry->state); 1664 } 1665 g_free(bs_entry); 1666 } 1667 g_free(bs_queue); 1668 return ret; 1669 } 1670 1671 1672 /* Reopen a single BlockDriverState with the specified flags. */ 1673 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) 1674 { 1675 int ret = -1; 1676 Error *local_err = NULL; 1677 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags); 1678 1679 ret = bdrv_reopen_multiple(queue, &local_err); 1680 if (local_err != NULL) { 1681 error_propagate(errp, local_err); 1682 } 1683 return ret; 1684 } 1685 1686 1687 /* 1688 * Prepares a BlockDriverState for reopen. All changes are staged in the 1689 * 'opaque' field of the BDRVReopenState, which is used and allocated by 1690 * the block driver layer .bdrv_reopen_prepare() 1691 * 1692 * bs is the BlockDriverState to reopen 1693 * flags are the new open flags 1694 * queue is the reopen queue 1695 * 1696 * Returns 0 on success, non-zero on error. On error errp will be set 1697 * as well. 1698 * 1699 * On failure, bdrv_reopen_abort() will be called to clean up any data. 1700 * It is the responsibility of the caller to then call the abort() or 1701 * commit() for any other BDS that have been left in a prepare() state 1702 * 1703 */ 1704 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, 1705 Error **errp) 1706 { 1707 int ret = -1; 1708 Error *local_err = NULL; 1709 BlockDriver *drv; 1710 1711 assert(reopen_state != NULL); 1712 assert(reopen_state->bs->drv != NULL); 1713 drv = reopen_state->bs->drv; 1714 1715 /* if we are to stay read-only, do not allow permission change 1716 * to r/w */ 1717 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) && 1718 reopen_state->flags & BDRV_O_RDWR) { 1719 error_setg(errp, "Node '%s' is read only", 1720 bdrv_get_device_or_node_name(reopen_state->bs)); 1721 goto error; 1722 } 1723 1724 1725 ret = bdrv_flush(reopen_state->bs); 1726 if (ret) { 1727 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive", 1728 strerror(-ret)); 1729 goto error; 1730 } 1731 1732 if (drv->bdrv_reopen_prepare) { 1733 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 1734 if (ret) { 1735 if (local_err != NULL) { 1736 error_propagate(errp, local_err); 1737 } else { 1738 error_setg(errp, "failed while preparing to reopen image '%s'", 1739 reopen_state->bs->filename); 1740 } 1741 goto error; 1742 } 1743 } else { 1744 /* It is currently mandatory to have a bdrv_reopen_prepare() 1745 * handler for each supported drv. */ 1746 error_setg(errp, "Block format '%s' used by node '%s' " 1747 "does not support reopening files", drv->format_name, 1748 bdrv_get_device_or_node_name(reopen_state->bs)); 1749 ret = -1; 1750 goto error; 1751 } 1752 1753 ret = 0; 1754 1755 error: 1756 return ret; 1757 } 1758 1759 /* 1760 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 1761 * makes them final by swapping the staging BlockDriverState contents into 1762 * the active BlockDriverState contents. 1763 */ 1764 void bdrv_reopen_commit(BDRVReopenState *reopen_state) 1765 { 1766 BlockDriver *drv; 1767 1768 assert(reopen_state != NULL); 1769 drv = reopen_state->bs->drv; 1770 assert(drv != NULL); 1771 1772 /* If there are any driver level actions to take */ 1773 if (drv->bdrv_reopen_commit) { 1774 drv->bdrv_reopen_commit(reopen_state); 1775 } 1776 1777 /* set BDS specific flags now */ 1778 reopen_state->bs->open_flags = reopen_state->flags; 1779 reopen_state->bs->enable_write_cache = !!(reopen_state->flags & 1780 BDRV_O_CACHE_WB); 1781 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); 1782 1783 bdrv_refresh_limits(reopen_state->bs, NULL); 1784 } 1785 1786 /* 1787 * Abort the reopen, and delete and free the staged changes in 1788 * reopen_state 1789 */ 1790 void bdrv_reopen_abort(BDRVReopenState *reopen_state) 1791 { 1792 BlockDriver *drv; 1793 1794 assert(reopen_state != NULL); 1795 drv = reopen_state->bs->drv; 1796 assert(drv != NULL); 1797 1798 if (drv->bdrv_reopen_abort) { 1799 drv->bdrv_reopen_abort(reopen_state); 1800 } 1801 } 1802 1803 1804 void bdrv_close(BlockDriverState *bs) 1805 { 1806 BdrvAioNotifier *ban, *ban_next; 1807 1808 if (bs->job) { 1809 block_job_cancel_sync(bs->job); 1810 } 1811 bdrv_drain_all(); /* complete I/O */ 1812 bdrv_flush(bs); 1813 bdrv_drain_all(); /* in case flush left pending I/O */ 1814 notifier_list_notify(&bs->close_notifiers, bs); 1815 1816 if (bs->drv) { 1817 if (bs->backing_hd) { 1818 BlockDriverState *backing_hd = bs->backing_hd; 1819 bdrv_set_backing_hd(bs, NULL); 1820 bdrv_unref(backing_hd); 1821 } 1822 bs->drv->bdrv_close(bs); 1823 g_free(bs->opaque); 1824 bs->opaque = NULL; 1825 bs->drv = NULL; 1826 bs->copy_on_read = 0; 1827 bs->backing_file[0] = '\0'; 1828 bs->backing_format[0] = '\0'; 1829 bs->total_sectors = 0; 1830 bs->encrypted = 0; 1831 bs->valid_key = 0; 1832 bs->sg = 0; 1833 bs->zero_beyond_eof = false; 1834 QDECREF(bs->options); 1835 bs->options = NULL; 1836 QDECREF(bs->full_open_options); 1837 bs->full_open_options = NULL; 1838 1839 if (bs->file != NULL) { 1840 bdrv_unref(bs->file); 1841 bs->file = NULL; 1842 } 1843 } 1844 1845 if (bs->blk) { 1846 blk_dev_change_media_cb(bs->blk, false); 1847 } 1848 1849 /*throttling disk I/O limits*/ 1850 if (bs->io_limits_enabled) { 1851 bdrv_io_limits_disable(bs); 1852 } 1853 1854 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 1855 g_free(ban); 1856 } 1857 QLIST_INIT(&bs->aio_notifiers); 1858 } 1859 1860 void bdrv_close_all(void) 1861 { 1862 BlockDriverState *bs; 1863 1864 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 1865 AioContext *aio_context = bdrv_get_aio_context(bs); 1866 1867 aio_context_acquire(aio_context); 1868 bdrv_close(bs); 1869 aio_context_release(aio_context); 1870 } 1871 } 1872 1873 /* make a BlockDriverState anonymous by removing from bdrv_state and 1874 * graph_bdrv_state list. 1875 Also, NULL terminate the device_name to prevent double remove */ 1876 void bdrv_make_anon(BlockDriverState *bs) 1877 { 1878 /* 1879 * Take care to remove bs from bdrv_states only when it's actually 1880 * in it. Note that bs->device_list.tqe_prev is initially null, 1881 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish 1882 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by 1883 * resetting it to null on remove. 1884 */ 1885 if (bs->device_list.tqe_prev) { 1886 QTAILQ_REMOVE(&bdrv_states, bs, device_list); 1887 bs->device_list.tqe_prev = NULL; 1888 } 1889 if (bs->node_name[0] != '\0') { 1890 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 1891 } 1892 bs->node_name[0] = '\0'; 1893 } 1894 1895 static void bdrv_rebind(BlockDriverState *bs) 1896 { 1897 if (bs->drv && bs->drv->bdrv_rebind) { 1898 bs->drv->bdrv_rebind(bs); 1899 } 1900 } 1901 1902 static void bdrv_move_feature_fields(BlockDriverState *bs_dest, 1903 BlockDriverState *bs_src) 1904 { 1905 /* move some fields that need to stay attached to the device */ 1906 1907 /* dev info */ 1908 bs_dest->guest_block_size = bs_src->guest_block_size; 1909 bs_dest->copy_on_read = bs_src->copy_on_read; 1910 1911 bs_dest->enable_write_cache = bs_src->enable_write_cache; 1912 1913 /* i/o throttled req */ 1914 memcpy(&bs_dest->throttle_state, 1915 &bs_src->throttle_state, 1916 sizeof(ThrottleState)); 1917 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; 1918 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; 1919 bs_dest->io_limits_enabled = bs_src->io_limits_enabled; 1920 1921 /* r/w error */ 1922 bs_dest->on_read_error = bs_src->on_read_error; 1923 bs_dest->on_write_error = bs_src->on_write_error; 1924 1925 /* i/o status */ 1926 bs_dest->iostatus_enabled = bs_src->iostatus_enabled; 1927 bs_dest->iostatus = bs_src->iostatus; 1928 1929 /* dirty bitmap */ 1930 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps; 1931 1932 /* reference count */ 1933 bs_dest->refcnt = bs_src->refcnt; 1934 1935 /* job */ 1936 bs_dest->job = bs_src->job; 1937 1938 /* keep the same entry in bdrv_states */ 1939 bs_dest->device_list = bs_src->device_list; 1940 bs_dest->blk = bs_src->blk; 1941 1942 memcpy(bs_dest->op_blockers, bs_src->op_blockers, 1943 sizeof(bs_dest->op_blockers)); 1944 } 1945 1946 /* 1947 * Swap bs contents for two image chains while they are live, 1948 * while keeping required fields on the BlockDriverState that is 1949 * actually attached to a device. 1950 * 1951 * This will modify the BlockDriverState fields, and swap contents 1952 * between bs_new and bs_old. Both bs_new and bs_old are modified. 1953 * 1954 * bs_new must not be attached to a BlockBackend. 1955 * 1956 * This function does not create any image files. 1957 */ 1958 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) 1959 { 1960 BlockDriverState tmp; 1961 1962 /* The code needs to swap the node_name but simply swapping node_list won't 1963 * work so first remove the nodes from the graph list, do the swap then 1964 * insert them back if needed. 1965 */ 1966 if (bs_new->node_name[0] != '\0') { 1967 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list); 1968 } 1969 if (bs_old->node_name[0] != '\0') { 1970 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list); 1971 } 1972 1973 /* bs_new must be unattached and shouldn't have anything fancy enabled */ 1974 assert(!bs_new->blk); 1975 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); 1976 assert(bs_new->job == NULL); 1977 assert(bs_new->io_limits_enabled == false); 1978 assert(!throttle_have_timer(&bs_new->throttle_state)); 1979 1980 tmp = *bs_new; 1981 *bs_new = *bs_old; 1982 *bs_old = tmp; 1983 1984 /* there are some fields that should not be swapped, move them back */ 1985 bdrv_move_feature_fields(&tmp, bs_old); 1986 bdrv_move_feature_fields(bs_old, bs_new); 1987 bdrv_move_feature_fields(bs_new, &tmp); 1988 1989 /* bs_new must remain unattached */ 1990 assert(!bs_new->blk); 1991 1992 /* Check a few fields that should remain attached to the device */ 1993 assert(bs_new->job == NULL); 1994 assert(bs_new->io_limits_enabled == false); 1995 assert(!throttle_have_timer(&bs_new->throttle_state)); 1996 1997 /* insert the nodes back into the graph node list if needed */ 1998 if (bs_new->node_name[0] != '\0') { 1999 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list); 2000 } 2001 if (bs_old->node_name[0] != '\0') { 2002 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list); 2003 } 2004 2005 bdrv_rebind(bs_new); 2006 bdrv_rebind(bs_old); 2007 } 2008 2009 /* 2010 * Add new bs contents at the top of an image chain while the chain is 2011 * live, while keeping required fields on the top layer. 2012 * 2013 * This will modify the BlockDriverState fields, and swap contents 2014 * between bs_new and bs_top. Both bs_new and bs_top are modified. 2015 * 2016 * bs_new must not be attached to a BlockBackend. 2017 * 2018 * This function does not create any image files. 2019 */ 2020 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) 2021 { 2022 bdrv_swap(bs_new, bs_top); 2023 2024 /* The contents of 'tmp' will become bs_top, as we are 2025 * swapping bs_new and bs_top contents. */ 2026 bdrv_set_backing_hd(bs_top, bs_new); 2027 } 2028 2029 static void bdrv_delete(BlockDriverState *bs) 2030 { 2031 assert(!bs->job); 2032 assert(bdrv_op_blocker_is_empty(bs)); 2033 assert(!bs->refcnt); 2034 assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 2035 2036 bdrv_close(bs); 2037 2038 /* remove from list, if necessary */ 2039 bdrv_make_anon(bs); 2040 2041 g_free(bs); 2042 } 2043 2044 /* 2045 * Run consistency checks on an image 2046 * 2047 * Returns 0 if the check could be completed (it doesn't mean that the image is 2048 * free of errors) or -errno when an internal error occurred. The results of the 2049 * check are stored in res. 2050 */ 2051 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) 2052 { 2053 if (bs->drv == NULL) { 2054 return -ENOMEDIUM; 2055 } 2056 if (bs->drv->bdrv_check == NULL) { 2057 return -ENOTSUP; 2058 } 2059 2060 memset(res, 0, sizeof(*res)); 2061 return bs->drv->bdrv_check(bs, res, fix); 2062 } 2063 2064 #define COMMIT_BUF_SECTORS 2048 2065 2066 /* commit COW file into the raw image */ 2067 int bdrv_commit(BlockDriverState *bs) 2068 { 2069 BlockDriver *drv = bs->drv; 2070 int64_t sector, total_sectors, length, backing_length; 2071 int n, ro, open_flags; 2072 int ret = 0; 2073 uint8_t *buf = NULL; 2074 2075 if (!drv) 2076 return -ENOMEDIUM; 2077 2078 if (!bs->backing_hd) { 2079 return -ENOTSUP; 2080 } 2081 2082 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || 2083 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { 2084 return -EBUSY; 2085 } 2086 2087 ro = bs->backing_hd->read_only; 2088 open_flags = bs->backing_hd->open_flags; 2089 2090 if (ro) { 2091 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) { 2092 return -EACCES; 2093 } 2094 } 2095 2096 length = bdrv_getlength(bs); 2097 if (length < 0) { 2098 ret = length; 2099 goto ro_cleanup; 2100 } 2101 2102 backing_length = bdrv_getlength(bs->backing_hd); 2103 if (backing_length < 0) { 2104 ret = backing_length; 2105 goto ro_cleanup; 2106 } 2107 2108 /* If our top snapshot is larger than the backing file image, 2109 * grow the backing file image if possible. If not possible, 2110 * we must return an error */ 2111 if (length > backing_length) { 2112 ret = bdrv_truncate(bs->backing_hd, length); 2113 if (ret < 0) { 2114 goto ro_cleanup; 2115 } 2116 } 2117 2118 total_sectors = length >> BDRV_SECTOR_BITS; 2119 2120 /* qemu_try_blockalign() for bs will choose an alignment that works for 2121 * bs->backing_hd as well, so no need to compare the alignment manually. */ 2122 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 2123 if (buf == NULL) { 2124 ret = -ENOMEM; 2125 goto ro_cleanup; 2126 } 2127 2128 for (sector = 0; sector < total_sectors; sector += n) { 2129 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n); 2130 if (ret < 0) { 2131 goto ro_cleanup; 2132 } 2133 if (ret) { 2134 ret = bdrv_read(bs, sector, buf, n); 2135 if (ret < 0) { 2136 goto ro_cleanup; 2137 } 2138 2139 ret = bdrv_write(bs->backing_hd, sector, buf, n); 2140 if (ret < 0) { 2141 goto ro_cleanup; 2142 } 2143 } 2144 } 2145 2146 if (drv->bdrv_make_empty) { 2147 ret = drv->bdrv_make_empty(bs); 2148 if (ret < 0) { 2149 goto ro_cleanup; 2150 } 2151 bdrv_flush(bs); 2152 } 2153 2154 /* 2155 * Make sure all data we wrote to the backing device is actually 2156 * stable on disk. 2157 */ 2158 if (bs->backing_hd) { 2159 bdrv_flush(bs->backing_hd); 2160 } 2161 2162 ret = 0; 2163 ro_cleanup: 2164 qemu_vfree(buf); 2165 2166 if (ro) { 2167 /* ignoring error return here */ 2168 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL); 2169 } 2170 2171 return ret; 2172 } 2173 2174 int bdrv_commit_all(void) 2175 { 2176 BlockDriverState *bs; 2177 2178 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 2179 AioContext *aio_context = bdrv_get_aio_context(bs); 2180 2181 aio_context_acquire(aio_context); 2182 if (bs->drv && bs->backing_hd) { 2183 int ret = bdrv_commit(bs); 2184 if (ret < 0) { 2185 aio_context_release(aio_context); 2186 return ret; 2187 } 2188 } 2189 aio_context_release(aio_context); 2190 } 2191 return 0; 2192 } 2193 2194 /* 2195 * Return values: 2196 * 0 - success 2197 * -EINVAL - backing format specified, but no file 2198 * -ENOSPC - can't update the backing file because no space is left in the 2199 * image file header 2200 * -ENOTSUP - format driver doesn't support changing the backing file 2201 */ 2202 int bdrv_change_backing_file(BlockDriverState *bs, 2203 const char *backing_file, const char *backing_fmt) 2204 { 2205 BlockDriver *drv = bs->drv; 2206 int ret; 2207 2208 /* Backing file format doesn't make sense without a backing file */ 2209 if (backing_fmt && !backing_file) { 2210 return -EINVAL; 2211 } 2212 2213 if (drv->bdrv_change_backing_file != NULL) { 2214 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 2215 } else { 2216 ret = -ENOTSUP; 2217 } 2218 2219 if (ret == 0) { 2220 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 2221 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 2222 } 2223 return ret; 2224 } 2225 2226 /* 2227 * Finds the image layer in the chain that has 'bs' as its backing file. 2228 * 2229 * active is the current topmost image. 2230 * 2231 * Returns NULL if bs is not found in active's image chain, 2232 * or if active == bs. 2233 * 2234 * Returns the bottommost base image if bs == NULL. 2235 */ 2236 BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 2237 BlockDriverState *bs) 2238 { 2239 while (active && bs != active->backing_hd) { 2240 active = active->backing_hd; 2241 } 2242 2243 return active; 2244 } 2245 2246 /* Given a BDS, searches for the base layer. */ 2247 BlockDriverState *bdrv_find_base(BlockDriverState *bs) 2248 { 2249 return bdrv_find_overlay(bs, NULL); 2250 } 2251 2252 typedef struct BlkIntermediateStates { 2253 BlockDriverState *bs; 2254 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; 2255 } BlkIntermediateStates; 2256 2257 2258 /* 2259 * Drops images above 'base' up to and including 'top', and sets the image 2260 * above 'top' to have base as its backing file. 2261 * 2262 * Requires that the overlay to 'top' is opened r/w, so that the backing file 2263 * information in 'bs' can be properly updated. 2264 * 2265 * E.g., this will convert the following chain: 2266 * bottom <- base <- intermediate <- top <- active 2267 * 2268 * to 2269 * 2270 * bottom <- base <- active 2271 * 2272 * It is allowed for bottom==base, in which case it converts: 2273 * 2274 * base <- intermediate <- top <- active 2275 * 2276 * to 2277 * 2278 * base <- active 2279 * 2280 * If backing_file_str is non-NULL, it will be used when modifying top's 2281 * overlay image metadata. 2282 * 2283 * Error conditions: 2284 * if active == top, that is considered an error 2285 * 2286 */ 2287 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, 2288 BlockDriverState *base, const char *backing_file_str) 2289 { 2290 BlockDriverState *intermediate; 2291 BlockDriverState *base_bs = NULL; 2292 BlockDriverState *new_top_bs = NULL; 2293 BlkIntermediateStates *intermediate_state, *next; 2294 int ret = -EIO; 2295 2296 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; 2297 QSIMPLEQ_INIT(&states_to_delete); 2298 2299 if (!top->drv || !base->drv) { 2300 goto exit; 2301 } 2302 2303 new_top_bs = bdrv_find_overlay(active, top); 2304 2305 if (new_top_bs == NULL) { 2306 /* we could not find the image above 'top', this is an error */ 2307 goto exit; 2308 } 2309 2310 /* special case of new_top_bs->backing_hd already pointing to base - nothing 2311 * to do, no intermediate images */ 2312 if (new_top_bs->backing_hd == base) { 2313 ret = 0; 2314 goto exit; 2315 } 2316 2317 intermediate = top; 2318 2319 /* now we will go down through the list, and add each BDS we find 2320 * into our deletion queue, until we hit the 'base' 2321 */ 2322 while (intermediate) { 2323 intermediate_state = g_new0(BlkIntermediateStates, 1); 2324 intermediate_state->bs = intermediate; 2325 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry); 2326 2327 if (intermediate->backing_hd == base) { 2328 base_bs = intermediate->backing_hd; 2329 break; 2330 } 2331 intermediate = intermediate->backing_hd; 2332 } 2333 if (base_bs == NULL) { 2334 /* something went wrong, we did not end at the base. safely 2335 * unravel everything, and exit with error */ 2336 goto exit; 2337 } 2338 2339 /* success - we can delete the intermediate states, and link top->base */ 2340 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename; 2341 ret = bdrv_change_backing_file(new_top_bs, backing_file_str, 2342 base_bs->drv ? base_bs->drv->format_name : ""); 2343 if (ret) { 2344 goto exit; 2345 } 2346 bdrv_set_backing_hd(new_top_bs, base_bs); 2347 2348 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 2349 /* so that bdrv_close() does not recursively close the chain */ 2350 bdrv_set_backing_hd(intermediate_state->bs, NULL); 2351 bdrv_unref(intermediate_state->bs); 2352 } 2353 ret = 0; 2354 2355 exit: 2356 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 2357 g_free(intermediate_state); 2358 } 2359 return ret; 2360 } 2361 2362 /** 2363 * Truncate file to 'offset' bytes (needed only for file protocols) 2364 */ 2365 int bdrv_truncate(BlockDriverState *bs, int64_t offset) 2366 { 2367 BlockDriver *drv = bs->drv; 2368 int ret; 2369 if (!drv) 2370 return -ENOMEDIUM; 2371 if (!drv->bdrv_truncate) 2372 return -ENOTSUP; 2373 if (bs->read_only) 2374 return -EACCES; 2375 2376 ret = drv->bdrv_truncate(bs, offset); 2377 if (ret == 0) { 2378 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 2379 bdrv_dirty_bitmap_truncate(bs); 2380 if (bs->blk) { 2381 blk_dev_resize_cb(bs->blk); 2382 } 2383 } 2384 return ret; 2385 } 2386 2387 /** 2388 * Length of a allocated file in bytes. Sparse files are counted by actual 2389 * allocated space. Return < 0 if error or unknown. 2390 */ 2391 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 2392 { 2393 BlockDriver *drv = bs->drv; 2394 if (!drv) { 2395 return -ENOMEDIUM; 2396 } 2397 if (drv->bdrv_get_allocated_file_size) { 2398 return drv->bdrv_get_allocated_file_size(bs); 2399 } 2400 if (bs->file) { 2401 return bdrv_get_allocated_file_size(bs->file); 2402 } 2403 return -ENOTSUP; 2404 } 2405 2406 /** 2407 * Return number of sectors on success, -errno on error. 2408 */ 2409 int64_t bdrv_nb_sectors(BlockDriverState *bs) 2410 { 2411 BlockDriver *drv = bs->drv; 2412 2413 if (!drv) 2414 return -ENOMEDIUM; 2415 2416 if (drv->has_variable_length) { 2417 int ret = refresh_total_sectors(bs, bs->total_sectors); 2418 if (ret < 0) { 2419 return ret; 2420 } 2421 } 2422 return bs->total_sectors; 2423 } 2424 2425 /** 2426 * Return length in bytes on success, -errno on error. 2427 * The length is always a multiple of BDRV_SECTOR_SIZE. 2428 */ 2429 int64_t bdrv_getlength(BlockDriverState *bs) 2430 { 2431 int64_t ret = bdrv_nb_sectors(bs); 2432 2433 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret; 2434 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE; 2435 } 2436 2437 /* return 0 as number of sectors if no device present or error */ 2438 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 2439 { 2440 int64_t nb_sectors = bdrv_nb_sectors(bs); 2441 2442 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 2443 } 2444 2445 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, 2446 BlockdevOnError on_write_error) 2447 { 2448 bs->on_read_error = on_read_error; 2449 bs->on_write_error = on_write_error; 2450 } 2451 2452 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) 2453 { 2454 return is_read ? bs->on_read_error : bs->on_write_error; 2455 } 2456 2457 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error) 2458 { 2459 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error; 2460 2461 switch (on_err) { 2462 case BLOCKDEV_ON_ERROR_ENOSPC: 2463 return (error == ENOSPC) ? 2464 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; 2465 case BLOCKDEV_ON_ERROR_STOP: 2466 return BLOCK_ERROR_ACTION_STOP; 2467 case BLOCKDEV_ON_ERROR_REPORT: 2468 return BLOCK_ERROR_ACTION_REPORT; 2469 case BLOCKDEV_ON_ERROR_IGNORE: 2470 return BLOCK_ERROR_ACTION_IGNORE; 2471 default: 2472 abort(); 2473 } 2474 } 2475 2476 static void send_qmp_error_event(BlockDriverState *bs, 2477 BlockErrorAction action, 2478 bool is_read, int error) 2479 { 2480 IoOperationType optype; 2481 2482 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; 2483 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action, 2484 bdrv_iostatus_is_enabled(bs), 2485 error == ENOSPC, strerror(error), 2486 &error_abort); 2487 } 2488 2489 /* This is done by device models because, while the block layer knows 2490 * about the error, it does not know whether an operation comes from 2491 * the device or the block layer (from a job, for example). 2492 */ 2493 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, 2494 bool is_read, int error) 2495 { 2496 assert(error >= 0); 2497 2498 if (action == BLOCK_ERROR_ACTION_STOP) { 2499 /* First set the iostatus, so that "info block" returns an iostatus 2500 * that matches the events raised so far (an additional error iostatus 2501 * is fine, but not a lost one). 2502 */ 2503 bdrv_iostatus_set_err(bs, error); 2504 2505 /* Then raise the request to stop the VM and the event. 2506 * qemu_system_vmstop_request_prepare has two effects. First, 2507 * it ensures that the STOP event always comes after the 2508 * BLOCK_IO_ERROR event. Second, it ensures that even if management 2509 * can observe the STOP event and do a "cont" before the STOP 2510 * event is issued, the VM will not stop. In this case, vm_start() 2511 * also ensures that the STOP/RESUME pair of events is emitted. 2512 */ 2513 qemu_system_vmstop_request_prepare(); 2514 send_qmp_error_event(bs, action, is_read, error); 2515 qemu_system_vmstop_request(RUN_STATE_IO_ERROR); 2516 } else { 2517 send_qmp_error_event(bs, action, is_read, error); 2518 } 2519 } 2520 2521 int bdrv_is_read_only(BlockDriverState *bs) 2522 { 2523 return bs->read_only; 2524 } 2525 2526 int bdrv_is_sg(BlockDriverState *bs) 2527 { 2528 return bs->sg; 2529 } 2530 2531 int bdrv_enable_write_cache(BlockDriverState *bs) 2532 { 2533 return bs->enable_write_cache; 2534 } 2535 2536 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce) 2537 { 2538 bs->enable_write_cache = wce; 2539 2540 /* so a reopen() will preserve wce */ 2541 if (wce) { 2542 bs->open_flags |= BDRV_O_CACHE_WB; 2543 } else { 2544 bs->open_flags &= ~BDRV_O_CACHE_WB; 2545 } 2546 } 2547 2548 int bdrv_is_encrypted(BlockDriverState *bs) 2549 { 2550 if (bs->backing_hd && bs->backing_hd->encrypted) 2551 return 1; 2552 return bs->encrypted; 2553 } 2554 2555 int bdrv_key_required(BlockDriverState *bs) 2556 { 2557 BlockDriverState *backing_hd = bs->backing_hd; 2558 2559 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key) 2560 return 1; 2561 return (bs->encrypted && !bs->valid_key); 2562 } 2563 2564 int bdrv_set_key(BlockDriverState *bs, const char *key) 2565 { 2566 int ret; 2567 if (bs->backing_hd && bs->backing_hd->encrypted) { 2568 ret = bdrv_set_key(bs->backing_hd, key); 2569 if (ret < 0) 2570 return ret; 2571 if (!bs->encrypted) 2572 return 0; 2573 } 2574 if (!bs->encrypted) { 2575 return -EINVAL; 2576 } else if (!bs->drv || !bs->drv->bdrv_set_key) { 2577 return -ENOMEDIUM; 2578 } 2579 ret = bs->drv->bdrv_set_key(bs, key); 2580 if (ret < 0) { 2581 bs->valid_key = 0; 2582 } else if (!bs->valid_key) { 2583 bs->valid_key = 1; 2584 if (bs->blk) { 2585 /* call the change callback now, we skipped it on open */ 2586 blk_dev_change_media_cb(bs->blk, true); 2587 } 2588 } 2589 return ret; 2590 } 2591 2592 /* 2593 * Provide an encryption key for @bs. 2594 * If @key is non-null: 2595 * If @bs is not encrypted, fail. 2596 * Else if the key is invalid, fail. 2597 * Else set @bs's key to @key, replacing the existing key, if any. 2598 * If @key is null: 2599 * If @bs is encrypted and still lacks a key, fail. 2600 * Else do nothing. 2601 * On failure, store an error object through @errp if non-null. 2602 */ 2603 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp) 2604 { 2605 if (key) { 2606 if (!bdrv_is_encrypted(bs)) { 2607 error_setg(errp, "Node '%s' is not encrypted", 2608 bdrv_get_device_or_node_name(bs)); 2609 } else if (bdrv_set_key(bs, key) < 0) { 2610 error_set(errp, QERR_INVALID_PASSWORD); 2611 } 2612 } else { 2613 if (bdrv_key_required(bs)) { 2614 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED, 2615 "'%s' (%s) is encrypted", 2616 bdrv_get_device_or_node_name(bs), 2617 bdrv_get_encrypted_filename(bs)); 2618 } 2619 } 2620 } 2621 2622 const char *bdrv_get_format_name(BlockDriverState *bs) 2623 { 2624 return bs->drv ? bs->drv->format_name : NULL; 2625 } 2626 2627 static int qsort_strcmp(const void *a, const void *b) 2628 { 2629 return strcmp(a, b); 2630 } 2631 2632 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 2633 void *opaque) 2634 { 2635 BlockDriver *drv; 2636 int count = 0; 2637 int i; 2638 const char **formats = NULL; 2639 2640 QLIST_FOREACH(drv, &bdrv_drivers, list) { 2641 if (drv->format_name) { 2642 bool found = false; 2643 int i = count; 2644 while (formats && i && !found) { 2645 found = !strcmp(formats[--i], drv->format_name); 2646 } 2647 2648 if (!found) { 2649 formats = g_renew(const char *, formats, count + 1); 2650 formats[count++] = drv->format_name; 2651 } 2652 } 2653 } 2654 2655 qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 2656 2657 for (i = 0; i < count; i++) { 2658 it(opaque, formats[i]); 2659 } 2660 2661 g_free(formats); 2662 } 2663 2664 /* This function is to find a node in the bs graph */ 2665 BlockDriverState *bdrv_find_node(const char *node_name) 2666 { 2667 BlockDriverState *bs; 2668 2669 assert(node_name); 2670 2671 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 2672 if (!strcmp(node_name, bs->node_name)) { 2673 return bs; 2674 } 2675 } 2676 return NULL; 2677 } 2678 2679 /* Put this QMP function here so it can access the static graph_bdrv_states. */ 2680 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp) 2681 { 2682 BlockDeviceInfoList *list, *entry; 2683 BlockDriverState *bs; 2684 2685 list = NULL; 2686 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 2687 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp); 2688 if (!info) { 2689 qapi_free_BlockDeviceInfoList(list); 2690 return NULL; 2691 } 2692 entry = g_malloc0(sizeof(*entry)); 2693 entry->value = info; 2694 entry->next = list; 2695 list = entry; 2696 } 2697 2698 return list; 2699 } 2700 2701 BlockDriverState *bdrv_lookup_bs(const char *device, 2702 const char *node_name, 2703 Error **errp) 2704 { 2705 BlockBackend *blk; 2706 BlockDriverState *bs; 2707 2708 if (device) { 2709 blk = blk_by_name(device); 2710 2711 if (blk) { 2712 return blk_bs(blk); 2713 } 2714 } 2715 2716 if (node_name) { 2717 bs = bdrv_find_node(node_name); 2718 2719 if (bs) { 2720 return bs; 2721 } 2722 } 2723 2724 error_setg(errp, "Cannot find device=%s nor node_name=%s", 2725 device ? device : "", 2726 node_name ? node_name : ""); 2727 return NULL; 2728 } 2729 2730 /* If 'base' is in the same chain as 'top', return true. Otherwise, 2731 * return false. If either argument is NULL, return false. */ 2732 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 2733 { 2734 while (top && top != base) { 2735 top = top->backing_hd; 2736 } 2737 2738 return top != NULL; 2739 } 2740 2741 BlockDriverState *bdrv_next_node(BlockDriverState *bs) 2742 { 2743 if (!bs) { 2744 return QTAILQ_FIRST(&graph_bdrv_states); 2745 } 2746 return QTAILQ_NEXT(bs, node_list); 2747 } 2748 2749 BlockDriverState *bdrv_next(BlockDriverState *bs) 2750 { 2751 if (!bs) { 2752 return QTAILQ_FIRST(&bdrv_states); 2753 } 2754 return QTAILQ_NEXT(bs, device_list); 2755 } 2756 2757 const char *bdrv_get_node_name(const BlockDriverState *bs) 2758 { 2759 return bs->node_name; 2760 } 2761 2762 /* TODO check what callers really want: bs->node_name or blk_name() */ 2763 const char *bdrv_get_device_name(const BlockDriverState *bs) 2764 { 2765 return bs->blk ? blk_name(bs->blk) : ""; 2766 } 2767 2768 /* This can be used to identify nodes that might not have a device 2769 * name associated. Since node and device names live in the same 2770 * namespace, the result is unambiguous. The exception is if both are 2771 * absent, then this returns an empty (non-null) string. */ 2772 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 2773 { 2774 return bs->blk ? blk_name(bs->blk) : bs->node_name; 2775 } 2776 2777 int bdrv_get_flags(BlockDriverState *bs) 2778 { 2779 return bs->open_flags; 2780 } 2781 2782 int bdrv_has_zero_init_1(BlockDriverState *bs) 2783 { 2784 return 1; 2785 } 2786 2787 int bdrv_has_zero_init(BlockDriverState *bs) 2788 { 2789 assert(bs->drv); 2790 2791 /* If BS is a copy on write image, it is initialized to 2792 the contents of the base image, which may not be zeroes. */ 2793 if (bs->backing_hd) { 2794 return 0; 2795 } 2796 if (bs->drv->bdrv_has_zero_init) { 2797 return bs->drv->bdrv_has_zero_init(bs); 2798 } 2799 2800 /* safe default */ 2801 return 0; 2802 } 2803 2804 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs) 2805 { 2806 BlockDriverInfo bdi; 2807 2808 if (bs->backing_hd) { 2809 return false; 2810 } 2811 2812 if (bdrv_get_info(bs, &bdi) == 0) { 2813 return bdi.unallocated_blocks_are_zero; 2814 } 2815 2816 return false; 2817 } 2818 2819 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 2820 { 2821 BlockDriverInfo bdi; 2822 2823 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) { 2824 return false; 2825 } 2826 2827 if (bdrv_get_info(bs, &bdi) == 0) { 2828 return bdi.can_write_zeroes_with_unmap; 2829 } 2830 2831 return false; 2832 } 2833 2834 const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 2835 { 2836 if (bs->backing_hd && bs->backing_hd->encrypted) 2837 return bs->backing_file; 2838 else if (bs->encrypted) 2839 return bs->filename; 2840 else 2841 return NULL; 2842 } 2843 2844 void bdrv_get_backing_filename(BlockDriverState *bs, 2845 char *filename, int filename_size) 2846 { 2847 pstrcpy(filename, filename_size, bs->backing_file); 2848 } 2849 2850 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 2851 { 2852 BlockDriver *drv = bs->drv; 2853 if (!drv) 2854 return -ENOMEDIUM; 2855 if (!drv->bdrv_get_info) 2856 return -ENOTSUP; 2857 memset(bdi, 0, sizeof(*bdi)); 2858 return drv->bdrv_get_info(bs, bdi); 2859 } 2860 2861 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs) 2862 { 2863 BlockDriver *drv = bs->drv; 2864 if (drv && drv->bdrv_get_specific_info) { 2865 return drv->bdrv_get_specific_info(bs); 2866 } 2867 return NULL; 2868 } 2869 2870 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) 2871 { 2872 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 2873 return; 2874 } 2875 2876 bs->drv->bdrv_debug_event(bs, event); 2877 } 2878 2879 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 2880 const char *tag) 2881 { 2882 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 2883 bs = bs->file; 2884 } 2885 2886 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 2887 return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 2888 } 2889 2890 return -ENOTSUP; 2891 } 2892 2893 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 2894 { 2895 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) { 2896 bs = bs->file; 2897 } 2898 2899 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) { 2900 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 2901 } 2902 2903 return -ENOTSUP; 2904 } 2905 2906 int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 2907 { 2908 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 2909 bs = bs->file; 2910 } 2911 2912 if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 2913 return bs->drv->bdrv_debug_resume(bs, tag); 2914 } 2915 2916 return -ENOTSUP; 2917 } 2918 2919 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 2920 { 2921 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 2922 bs = bs->file; 2923 } 2924 2925 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 2926 return bs->drv->bdrv_debug_is_suspended(bs, tag); 2927 } 2928 2929 return false; 2930 } 2931 2932 int bdrv_is_snapshot(BlockDriverState *bs) 2933 { 2934 return !!(bs->open_flags & BDRV_O_SNAPSHOT); 2935 } 2936 2937 /* backing_file can either be relative, or absolute, or a protocol. If it is 2938 * relative, it must be relative to the chain. So, passing in bs->filename 2939 * from a BDS as backing_file should not be done, as that may be relative to 2940 * the CWD rather than the chain. */ 2941 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 2942 const char *backing_file) 2943 { 2944 char *filename_full = NULL; 2945 char *backing_file_full = NULL; 2946 char *filename_tmp = NULL; 2947 int is_protocol = 0; 2948 BlockDriverState *curr_bs = NULL; 2949 BlockDriverState *retval = NULL; 2950 2951 if (!bs || !bs->drv || !backing_file) { 2952 return NULL; 2953 } 2954 2955 filename_full = g_malloc(PATH_MAX); 2956 backing_file_full = g_malloc(PATH_MAX); 2957 filename_tmp = g_malloc(PATH_MAX); 2958 2959 is_protocol = path_has_protocol(backing_file); 2960 2961 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) { 2962 2963 /* If either of the filename paths is actually a protocol, then 2964 * compare unmodified paths; otherwise make paths relative */ 2965 if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 2966 if (strcmp(backing_file, curr_bs->backing_file) == 0) { 2967 retval = curr_bs->backing_hd; 2968 break; 2969 } 2970 } else { 2971 /* If not an absolute filename path, make it relative to the current 2972 * image's filename path */ 2973 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 2974 backing_file); 2975 2976 /* We are going to compare absolute pathnames */ 2977 if (!realpath(filename_tmp, filename_full)) { 2978 continue; 2979 } 2980 2981 /* We need to make sure the backing filename we are comparing against 2982 * is relative to the current image filename (or absolute) */ 2983 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 2984 curr_bs->backing_file); 2985 2986 if (!realpath(filename_tmp, backing_file_full)) { 2987 continue; 2988 } 2989 2990 if (strcmp(backing_file_full, filename_full) == 0) { 2991 retval = curr_bs->backing_hd; 2992 break; 2993 } 2994 } 2995 } 2996 2997 g_free(filename_full); 2998 g_free(backing_file_full); 2999 g_free(filename_tmp); 3000 return retval; 3001 } 3002 3003 int bdrv_get_backing_file_depth(BlockDriverState *bs) 3004 { 3005 if (!bs->drv) { 3006 return 0; 3007 } 3008 3009 if (!bs->backing_hd) { 3010 return 0; 3011 } 3012 3013 return 1 + bdrv_get_backing_file_depth(bs->backing_hd); 3014 } 3015 3016 void bdrv_init(void) 3017 { 3018 module_call_init(MODULE_INIT_BLOCK); 3019 } 3020 3021 void bdrv_init_with_whitelist(void) 3022 { 3023 use_bdrv_whitelist = 1; 3024 bdrv_init(); 3025 } 3026 3027 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) 3028 { 3029 Error *local_err = NULL; 3030 int ret; 3031 3032 if (!bs->drv) { 3033 return; 3034 } 3035 3036 if (!(bs->open_flags & BDRV_O_INCOMING)) { 3037 return; 3038 } 3039 bs->open_flags &= ~BDRV_O_INCOMING; 3040 3041 if (bs->drv->bdrv_invalidate_cache) { 3042 bs->drv->bdrv_invalidate_cache(bs, &local_err); 3043 } else if (bs->file) { 3044 bdrv_invalidate_cache(bs->file, &local_err); 3045 } 3046 if (local_err) { 3047 error_propagate(errp, local_err); 3048 return; 3049 } 3050 3051 ret = refresh_total_sectors(bs, bs->total_sectors); 3052 if (ret < 0) { 3053 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 3054 return; 3055 } 3056 } 3057 3058 void bdrv_invalidate_cache_all(Error **errp) 3059 { 3060 BlockDriverState *bs; 3061 Error *local_err = NULL; 3062 3063 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 3064 AioContext *aio_context = bdrv_get_aio_context(bs); 3065 3066 aio_context_acquire(aio_context); 3067 bdrv_invalidate_cache(bs, &local_err); 3068 aio_context_release(aio_context); 3069 if (local_err) { 3070 error_propagate(errp, local_err); 3071 return; 3072 } 3073 } 3074 } 3075 3076 /**************************************************************/ 3077 /* removable device support */ 3078 3079 /** 3080 * Return TRUE if the media is present 3081 */ 3082 int bdrv_is_inserted(BlockDriverState *bs) 3083 { 3084 BlockDriver *drv = bs->drv; 3085 3086 if (!drv) 3087 return 0; 3088 if (!drv->bdrv_is_inserted) 3089 return 1; 3090 return drv->bdrv_is_inserted(bs); 3091 } 3092 3093 /** 3094 * Return whether the media changed since the last call to this 3095 * function, or -ENOTSUP if we don't know. Most drivers don't know. 3096 */ 3097 int bdrv_media_changed(BlockDriverState *bs) 3098 { 3099 BlockDriver *drv = bs->drv; 3100 3101 if (drv && drv->bdrv_media_changed) { 3102 return drv->bdrv_media_changed(bs); 3103 } 3104 return -ENOTSUP; 3105 } 3106 3107 /** 3108 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 3109 */ 3110 void bdrv_eject(BlockDriverState *bs, bool eject_flag) 3111 { 3112 BlockDriver *drv = bs->drv; 3113 const char *device_name; 3114 3115 if (drv && drv->bdrv_eject) { 3116 drv->bdrv_eject(bs, eject_flag); 3117 } 3118 3119 device_name = bdrv_get_device_name(bs); 3120 if (device_name[0] != '\0') { 3121 qapi_event_send_device_tray_moved(device_name, 3122 eject_flag, &error_abort); 3123 } 3124 } 3125 3126 /** 3127 * Lock or unlock the media (if it is locked, the user won't be able 3128 * to eject it manually). 3129 */ 3130 void bdrv_lock_medium(BlockDriverState *bs, bool locked) 3131 { 3132 BlockDriver *drv = bs->drv; 3133 3134 trace_bdrv_lock_medium(bs, locked); 3135 3136 if (drv && drv->bdrv_lock_medium) { 3137 drv->bdrv_lock_medium(bs, locked); 3138 } 3139 } 3140 3141 void bdrv_set_guest_block_size(BlockDriverState *bs, int align) 3142 { 3143 bs->guest_block_size = align; 3144 } 3145 3146 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) 3147 { 3148 BdrvDirtyBitmap *bm; 3149 3150 assert(name); 3151 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 3152 if (bm->name && !strcmp(name, bm->name)) { 3153 return bm; 3154 } 3155 } 3156 return NULL; 3157 } 3158 3159 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) 3160 { 3161 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3162 g_free(bitmap->name); 3163 bitmap->name = NULL; 3164 } 3165 3166 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, 3167 uint32_t granularity, 3168 const char *name, 3169 Error **errp) 3170 { 3171 int64_t bitmap_size; 3172 BdrvDirtyBitmap *bitmap; 3173 uint32_t sector_granularity; 3174 3175 assert((granularity & (granularity - 1)) == 0); 3176 3177 if (name && bdrv_find_dirty_bitmap(bs, name)) { 3178 error_setg(errp, "Bitmap already exists: %s", name); 3179 return NULL; 3180 } 3181 sector_granularity = granularity >> BDRV_SECTOR_BITS; 3182 assert(sector_granularity); 3183 bitmap_size = bdrv_nb_sectors(bs); 3184 if (bitmap_size < 0) { 3185 error_setg_errno(errp, -bitmap_size, "could not get length of device"); 3186 errno = -bitmap_size; 3187 return NULL; 3188 } 3189 bitmap = g_new0(BdrvDirtyBitmap, 1); 3190 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); 3191 bitmap->size = bitmap_size; 3192 bitmap->name = g_strdup(name); 3193 bitmap->disabled = false; 3194 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); 3195 return bitmap; 3196 } 3197 3198 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) 3199 { 3200 return bitmap->successor; 3201 } 3202 3203 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) 3204 { 3205 return !(bitmap->disabled || bitmap->successor); 3206 } 3207 3208 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap) 3209 { 3210 if (bdrv_dirty_bitmap_frozen(bitmap)) { 3211 return DIRTY_BITMAP_STATUS_FROZEN; 3212 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) { 3213 return DIRTY_BITMAP_STATUS_DISABLED; 3214 } else { 3215 return DIRTY_BITMAP_STATUS_ACTIVE; 3216 } 3217 } 3218 3219 /** 3220 * Create a successor bitmap destined to replace this bitmap after an operation. 3221 * Requires that the bitmap is not frozen and has no successor. 3222 */ 3223 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, 3224 BdrvDirtyBitmap *bitmap, Error **errp) 3225 { 3226 uint64_t granularity; 3227 BdrvDirtyBitmap *child; 3228 3229 if (bdrv_dirty_bitmap_frozen(bitmap)) { 3230 error_setg(errp, "Cannot create a successor for a bitmap that is " 3231 "currently frozen"); 3232 return -1; 3233 } 3234 assert(!bitmap->successor); 3235 3236 /* Create an anonymous successor */ 3237 granularity = bdrv_dirty_bitmap_granularity(bitmap); 3238 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); 3239 if (!child) { 3240 return -1; 3241 } 3242 3243 /* Successor will be on or off based on our current state. */ 3244 child->disabled = bitmap->disabled; 3245 3246 /* Install the successor and freeze the parent */ 3247 bitmap->successor = child; 3248 return 0; 3249 } 3250 3251 /** 3252 * For a bitmap with a successor, yield our name to the successor, 3253 * delete the old bitmap, and return a handle to the new bitmap. 3254 */ 3255 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, 3256 BdrvDirtyBitmap *bitmap, 3257 Error **errp) 3258 { 3259 char *name; 3260 BdrvDirtyBitmap *successor = bitmap->successor; 3261 3262 if (successor == NULL) { 3263 error_setg(errp, "Cannot relinquish control if " 3264 "there's no successor present"); 3265 return NULL; 3266 } 3267 3268 name = bitmap->name; 3269 bitmap->name = NULL; 3270 successor->name = name; 3271 bitmap->successor = NULL; 3272 bdrv_release_dirty_bitmap(bs, bitmap); 3273 3274 return successor; 3275 } 3276 3277 /** 3278 * In cases of failure where we can no longer safely delete the parent, 3279 * we may wish to re-join the parent and child/successor. 3280 * The merged parent will be un-frozen, but not explicitly re-enabled. 3281 */ 3282 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, 3283 BdrvDirtyBitmap *parent, 3284 Error **errp) 3285 { 3286 BdrvDirtyBitmap *successor = parent->successor; 3287 3288 if (!successor) { 3289 error_setg(errp, "Cannot reclaim a successor when none is present"); 3290 return NULL; 3291 } 3292 3293 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) { 3294 error_setg(errp, "Merging of parent and successor bitmap failed"); 3295 return NULL; 3296 } 3297 bdrv_release_dirty_bitmap(bs, successor); 3298 parent->successor = NULL; 3299 3300 return parent; 3301 } 3302 3303 /** 3304 * Truncates _all_ bitmaps attached to a BDS. 3305 */ 3306 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) 3307 { 3308 BdrvDirtyBitmap *bitmap; 3309 uint64_t size = bdrv_nb_sectors(bs); 3310 3311 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 3312 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3313 hbitmap_truncate(bitmap->bitmap, size); 3314 bitmap->size = size; 3315 } 3316 } 3317 3318 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) 3319 { 3320 BdrvDirtyBitmap *bm, *next; 3321 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { 3322 if (bm == bitmap) { 3323 assert(!bdrv_dirty_bitmap_frozen(bm)); 3324 QLIST_REMOVE(bitmap, list); 3325 hbitmap_free(bitmap->bitmap); 3326 g_free(bitmap->name); 3327 g_free(bitmap); 3328 return; 3329 } 3330 } 3331 } 3332 3333 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 3334 { 3335 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3336 bitmap->disabled = true; 3337 } 3338 3339 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 3340 { 3341 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3342 bitmap->disabled = false; 3343 } 3344 3345 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) 3346 { 3347 BdrvDirtyBitmap *bm; 3348 BlockDirtyInfoList *list = NULL; 3349 BlockDirtyInfoList **plist = &list; 3350 3351 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 3352 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); 3353 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); 3354 info->count = bdrv_get_dirty_count(bm); 3355 info->granularity = bdrv_dirty_bitmap_granularity(bm); 3356 info->has_name = !!bm->name; 3357 info->name = g_strdup(bm->name); 3358 info->status = bdrv_dirty_bitmap_status(bm); 3359 entry->value = info; 3360 *plist = entry; 3361 plist = &entry->next; 3362 } 3363 3364 return list; 3365 } 3366 3367 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector) 3368 { 3369 if (bitmap) { 3370 return hbitmap_get(bitmap->bitmap, sector); 3371 } else { 3372 return 0; 3373 } 3374 } 3375 3376 /** 3377 * Chooses a default granularity based on the existing cluster size, 3378 * but clamped between [4K, 64K]. Defaults to 64K in the case that there 3379 * is no cluster size information available. 3380 */ 3381 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) 3382 { 3383 BlockDriverInfo bdi; 3384 uint32_t granularity; 3385 3386 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) { 3387 granularity = MAX(4096, bdi.cluster_size); 3388 granularity = MIN(65536, granularity); 3389 } else { 3390 granularity = 65536; 3391 } 3392 3393 return granularity; 3394 } 3395 3396 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) 3397 { 3398 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); 3399 } 3400 3401 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) 3402 { 3403 hbitmap_iter_init(hbi, bitmap->bitmap, 0); 3404 } 3405 3406 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, 3407 int64_t cur_sector, int nr_sectors) 3408 { 3409 assert(bdrv_dirty_bitmap_enabled(bitmap)); 3410 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 3411 } 3412 3413 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, 3414 int64_t cur_sector, int nr_sectors) 3415 { 3416 assert(bdrv_dirty_bitmap_enabled(bitmap)); 3417 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 3418 } 3419 3420 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap) 3421 { 3422 assert(bdrv_dirty_bitmap_enabled(bitmap)); 3423 hbitmap_reset(bitmap->bitmap, 0, bitmap->size); 3424 } 3425 3426 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, 3427 int nr_sectors) 3428 { 3429 BdrvDirtyBitmap *bitmap; 3430 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 3431 if (!bdrv_dirty_bitmap_enabled(bitmap)) { 3432 continue; 3433 } 3434 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 3435 } 3436 } 3437 3438 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 3439 int nr_sectors) 3440 { 3441 BdrvDirtyBitmap *bitmap; 3442 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 3443 if (!bdrv_dirty_bitmap_enabled(bitmap)) { 3444 continue; 3445 } 3446 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 3447 } 3448 } 3449 3450 /** 3451 * Advance an HBitmapIter to an arbitrary offset. 3452 */ 3453 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) 3454 { 3455 assert(hbi->hb); 3456 hbitmap_iter_init(hbi, hbi->hb, offset); 3457 } 3458 3459 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) 3460 { 3461 return hbitmap_count(bitmap->bitmap); 3462 } 3463 3464 /* Get a reference to bs */ 3465 void bdrv_ref(BlockDriverState *bs) 3466 { 3467 bs->refcnt++; 3468 } 3469 3470 /* Release a previously grabbed reference to bs. 3471 * If after releasing, reference count is zero, the BlockDriverState is 3472 * deleted. */ 3473 void bdrv_unref(BlockDriverState *bs) 3474 { 3475 if (!bs) { 3476 return; 3477 } 3478 assert(bs->refcnt > 0); 3479 if (--bs->refcnt == 0) { 3480 bdrv_delete(bs); 3481 } 3482 } 3483 3484 struct BdrvOpBlocker { 3485 Error *reason; 3486 QLIST_ENTRY(BdrvOpBlocker) list; 3487 }; 3488 3489 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 3490 { 3491 BdrvOpBlocker *blocker; 3492 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3493 if (!QLIST_EMPTY(&bs->op_blockers[op])) { 3494 blocker = QLIST_FIRST(&bs->op_blockers[op]); 3495 if (errp) { 3496 error_setg(errp, "Node '%s' is busy: %s", 3497 bdrv_get_device_or_node_name(bs), 3498 error_get_pretty(blocker->reason)); 3499 } 3500 return true; 3501 } 3502 return false; 3503 } 3504 3505 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 3506 { 3507 BdrvOpBlocker *blocker; 3508 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3509 3510 blocker = g_new0(BdrvOpBlocker, 1); 3511 blocker->reason = reason; 3512 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 3513 } 3514 3515 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 3516 { 3517 BdrvOpBlocker *blocker, *next; 3518 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3519 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 3520 if (blocker->reason == reason) { 3521 QLIST_REMOVE(blocker, list); 3522 g_free(blocker); 3523 } 3524 } 3525 } 3526 3527 void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 3528 { 3529 int i; 3530 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3531 bdrv_op_block(bs, i, reason); 3532 } 3533 } 3534 3535 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 3536 { 3537 int i; 3538 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3539 bdrv_op_unblock(bs, i, reason); 3540 } 3541 } 3542 3543 bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 3544 { 3545 int i; 3546 3547 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3548 if (!QLIST_EMPTY(&bs->op_blockers[i])) { 3549 return false; 3550 } 3551 } 3552 return true; 3553 } 3554 3555 void bdrv_iostatus_enable(BlockDriverState *bs) 3556 { 3557 bs->iostatus_enabled = true; 3558 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 3559 } 3560 3561 /* The I/O status is only enabled if the drive explicitly 3562 * enables it _and_ the VM is configured to stop on errors */ 3563 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs) 3564 { 3565 return (bs->iostatus_enabled && 3566 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || 3567 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP || 3568 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP)); 3569 } 3570 3571 void bdrv_iostatus_disable(BlockDriverState *bs) 3572 { 3573 bs->iostatus_enabled = false; 3574 } 3575 3576 void bdrv_iostatus_reset(BlockDriverState *bs) 3577 { 3578 if (bdrv_iostatus_is_enabled(bs)) { 3579 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 3580 if (bs->job) { 3581 block_job_iostatus_reset(bs->job); 3582 } 3583 } 3584 } 3585 3586 void bdrv_iostatus_set_err(BlockDriverState *bs, int error) 3587 { 3588 assert(bdrv_iostatus_is_enabled(bs)); 3589 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { 3590 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : 3591 BLOCK_DEVICE_IO_STATUS_FAILED; 3592 } 3593 } 3594 3595 void bdrv_img_create(const char *filename, const char *fmt, 3596 const char *base_filename, const char *base_fmt, 3597 char *options, uint64_t img_size, int flags, 3598 Error **errp, bool quiet) 3599 { 3600 QemuOptsList *create_opts = NULL; 3601 QemuOpts *opts = NULL; 3602 const char *backing_fmt, *backing_file; 3603 int64_t size; 3604 BlockDriver *drv, *proto_drv; 3605 BlockDriver *backing_drv = NULL; 3606 Error *local_err = NULL; 3607 int ret = 0; 3608 3609 /* Find driver and parse its options */ 3610 drv = bdrv_find_format(fmt); 3611 if (!drv) { 3612 error_setg(errp, "Unknown file format '%s'", fmt); 3613 return; 3614 } 3615 3616 proto_drv = bdrv_find_protocol(filename, true, errp); 3617 if (!proto_drv) { 3618 return; 3619 } 3620 3621 if (!drv->create_opts) { 3622 error_setg(errp, "Format driver '%s' does not support image creation", 3623 drv->format_name); 3624 return; 3625 } 3626 3627 if (!proto_drv->create_opts) { 3628 error_setg(errp, "Protocol driver '%s' does not support image creation", 3629 proto_drv->format_name); 3630 return; 3631 } 3632 3633 create_opts = qemu_opts_append(create_opts, drv->create_opts); 3634 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 3635 3636 /* Create parameter list with default values */ 3637 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 3638 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 3639 3640 /* Parse -o options */ 3641 if (options) { 3642 qemu_opts_do_parse(opts, options, NULL, &local_err); 3643 if (local_err) { 3644 error_report_err(local_err); 3645 local_err = NULL; 3646 error_setg(errp, "Invalid options for file format '%s'", fmt); 3647 goto out; 3648 } 3649 } 3650 3651 if (base_filename) { 3652 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err); 3653 if (local_err) { 3654 error_setg(errp, "Backing file not supported for file format '%s'", 3655 fmt); 3656 goto out; 3657 } 3658 } 3659 3660 if (base_fmt) { 3661 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err); 3662 if (local_err) { 3663 error_setg(errp, "Backing file format not supported for file " 3664 "format '%s'", fmt); 3665 goto out; 3666 } 3667 } 3668 3669 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 3670 if (backing_file) { 3671 if (!strcmp(filename, backing_file)) { 3672 error_setg(errp, "Error: Trying to create an image with the " 3673 "same filename as the backing file"); 3674 goto out; 3675 } 3676 } 3677 3678 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 3679 if (backing_fmt) { 3680 backing_drv = bdrv_find_format(backing_fmt); 3681 if (!backing_drv) { 3682 error_setg(errp, "Unknown backing file format '%s'", 3683 backing_fmt); 3684 goto out; 3685 } 3686 } 3687 3688 // The size for the image must always be specified, with one exception: 3689 // If we are using a backing file, we can obtain the size from there 3690 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 3691 if (size == -1) { 3692 if (backing_file) { 3693 BlockDriverState *bs; 3694 char *full_backing = g_new0(char, PATH_MAX); 3695 int64_t size; 3696 int back_flags; 3697 3698 bdrv_get_full_backing_filename_from_filename(filename, backing_file, 3699 full_backing, PATH_MAX, 3700 &local_err); 3701 if (local_err) { 3702 g_free(full_backing); 3703 goto out; 3704 } 3705 3706 /* backing files always opened read-only */ 3707 back_flags = 3708 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 3709 3710 bs = NULL; 3711 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags, 3712 backing_drv, &local_err); 3713 g_free(full_backing); 3714 if (ret < 0) { 3715 goto out; 3716 } 3717 size = bdrv_getlength(bs); 3718 if (size < 0) { 3719 error_setg_errno(errp, -size, "Could not get size of '%s'", 3720 backing_file); 3721 bdrv_unref(bs); 3722 goto out; 3723 } 3724 3725 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 3726 3727 bdrv_unref(bs); 3728 } else { 3729 error_setg(errp, "Image creation needs a size parameter"); 3730 goto out; 3731 } 3732 } 3733 3734 if (!quiet) { 3735 printf("Formatting '%s', fmt=%s", filename, fmt); 3736 qemu_opts_print(opts, " "); 3737 puts(""); 3738 } 3739 3740 ret = bdrv_create(drv, filename, opts, &local_err); 3741 3742 if (ret == -EFBIG) { 3743 /* This is generally a better message than whatever the driver would 3744 * deliver (especially because of the cluster_size_hint), since that 3745 * is most probably not much different from "image too large". */ 3746 const char *cluster_size_hint = ""; 3747 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 3748 cluster_size_hint = " (try using a larger cluster size)"; 3749 } 3750 error_setg(errp, "The image size is too large for file format '%s'" 3751 "%s", fmt, cluster_size_hint); 3752 error_free(local_err); 3753 local_err = NULL; 3754 } 3755 3756 out: 3757 qemu_opts_del(opts); 3758 qemu_opts_free(create_opts); 3759 if (local_err) { 3760 error_propagate(errp, local_err); 3761 } 3762 } 3763 3764 AioContext *bdrv_get_aio_context(BlockDriverState *bs) 3765 { 3766 return bs->aio_context; 3767 } 3768 3769 void bdrv_detach_aio_context(BlockDriverState *bs) 3770 { 3771 BdrvAioNotifier *baf; 3772 3773 if (!bs->drv) { 3774 return; 3775 } 3776 3777 QLIST_FOREACH(baf, &bs->aio_notifiers, list) { 3778 baf->detach_aio_context(baf->opaque); 3779 } 3780 3781 if (bs->io_limits_enabled) { 3782 throttle_detach_aio_context(&bs->throttle_state); 3783 } 3784 if (bs->drv->bdrv_detach_aio_context) { 3785 bs->drv->bdrv_detach_aio_context(bs); 3786 } 3787 if (bs->file) { 3788 bdrv_detach_aio_context(bs->file); 3789 } 3790 if (bs->backing_hd) { 3791 bdrv_detach_aio_context(bs->backing_hd); 3792 } 3793 3794 bs->aio_context = NULL; 3795 } 3796 3797 void bdrv_attach_aio_context(BlockDriverState *bs, 3798 AioContext *new_context) 3799 { 3800 BdrvAioNotifier *ban; 3801 3802 if (!bs->drv) { 3803 return; 3804 } 3805 3806 bs->aio_context = new_context; 3807 3808 if (bs->backing_hd) { 3809 bdrv_attach_aio_context(bs->backing_hd, new_context); 3810 } 3811 if (bs->file) { 3812 bdrv_attach_aio_context(bs->file, new_context); 3813 } 3814 if (bs->drv->bdrv_attach_aio_context) { 3815 bs->drv->bdrv_attach_aio_context(bs, new_context); 3816 } 3817 if (bs->io_limits_enabled) { 3818 throttle_attach_aio_context(&bs->throttle_state, new_context); 3819 } 3820 3821 QLIST_FOREACH(ban, &bs->aio_notifiers, list) { 3822 ban->attached_aio_context(new_context, ban->opaque); 3823 } 3824 } 3825 3826 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) 3827 { 3828 bdrv_drain_all(); /* ensure there are no in-flight requests */ 3829 3830 bdrv_detach_aio_context(bs); 3831 3832 /* This function executes in the old AioContext so acquire the new one in 3833 * case it runs in a different thread. 3834 */ 3835 aio_context_acquire(new_context); 3836 bdrv_attach_aio_context(bs, new_context); 3837 aio_context_release(new_context); 3838 } 3839 3840 void bdrv_add_aio_context_notifier(BlockDriverState *bs, 3841 void (*attached_aio_context)(AioContext *new_context, void *opaque), 3842 void (*detach_aio_context)(void *opaque), void *opaque) 3843 { 3844 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 3845 *ban = (BdrvAioNotifier){ 3846 .attached_aio_context = attached_aio_context, 3847 .detach_aio_context = detach_aio_context, 3848 .opaque = opaque 3849 }; 3850 3851 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 3852 } 3853 3854 void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 3855 void (*attached_aio_context)(AioContext *, 3856 void *), 3857 void (*detach_aio_context)(void *), 3858 void *opaque) 3859 { 3860 BdrvAioNotifier *ban, *ban_next; 3861 3862 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 3863 if (ban->attached_aio_context == attached_aio_context && 3864 ban->detach_aio_context == detach_aio_context && 3865 ban->opaque == opaque) 3866 { 3867 QLIST_REMOVE(ban, list); 3868 g_free(ban); 3869 3870 return; 3871 } 3872 } 3873 3874 abort(); 3875 } 3876 3877 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 3878 BlockDriverAmendStatusCB *status_cb) 3879 { 3880 if (!bs->drv->bdrv_amend_options) { 3881 return -ENOTSUP; 3882 } 3883 return bs->drv->bdrv_amend_options(bs, opts, status_cb); 3884 } 3885 3886 /* This function will be called by the bdrv_recurse_is_first_non_filter method 3887 * of block filter and by bdrv_is_first_non_filter. 3888 * It is used to test if the given bs is the candidate or recurse more in the 3889 * node graph. 3890 */ 3891 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, 3892 BlockDriverState *candidate) 3893 { 3894 /* return false if basic checks fails */ 3895 if (!bs || !bs->drv) { 3896 return false; 3897 } 3898 3899 /* the code reached a non block filter driver -> check if the bs is 3900 * the same as the candidate. It's the recursion termination condition. 3901 */ 3902 if (!bs->drv->is_filter) { 3903 return bs == candidate; 3904 } 3905 /* Down this path the driver is a block filter driver */ 3906 3907 /* If the block filter recursion method is defined use it to recurse down 3908 * the node graph. 3909 */ 3910 if (bs->drv->bdrv_recurse_is_first_non_filter) { 3911 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); 3912 } 3913 3914 /* the driver is a block filter but don't allow to recurse -> return false 3915 */ 3916 return false; 3917 } 3918 3919 /* This function checks if the candidate is the first non filter bs down it's 3920 * bs chain. Since we don't have pointers to parents it explore all bs chains 3921 * from the top. Some filters can choose not to pass down the recursion. 3922 */ 3923 bool bdrv_is_first_non_filter(BlockDriverState *candidate) 3924 { 3925 BlockDriverState *bs; 3926 3927 /* walk down the bs forest recursively */ 3928 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 3929 bool perm; 3930 3931 /* try to recurse in this top level bs */ 3932 perm = bdrv_recurse_is_first_non_filter(bs, candidate); 3933 3934 /* candidate is the first non filter */ 3935 if (perm) { 3936 return true; 3937 } 3938 } 3939 3940 return false; 3941 } 3942 3943 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp) 3944 { 3945 BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 3946 AioContext *aio_context; 3947 3948 if (!to_replace_bs) { 3949 error_setg(errp, "Node name '%s' not found", node_name); 3950 return NULL; 3951 } 3952 3953 aio_context = bdrv_get_aio_context(to_replace_bs); 3954 aio_context_acquire(aio_context); 3955 3956 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 3957 to_replace_bs = NULL; 3958 goto out; 3959 } 3960 3961 /* We don't want arbitrary node of the BDS chain to be replaced only the top 3962 * most non filter in order to prevent data corruption. 3963 * Another benefit is that this tests exclude backing files which are 3964 * blocked by the backing blockers. 3965 */ 3966 if (!bdrv_is_first_non_filter(to_replace_bs)) { 3967 error_setg(errp, "Only top most non filter can be replaced"); 3968 to_replace_bs = NULL; 3969 goto out; 3970 } 3971 3972 out: 3973 aio_context_release(aio_context); 3974 return to_replace_bs; 3975 } 3976 3977 static bool append_open_options(QDict *d, BlockDriverState *bs) 3978 { 3979 const QDictEntry *entry; 3980 bool found_any = false; 3981 3982 for (entry = qdict_first(bs->options); entry; 3983 entry = qdict_next(bs->options, entry)) 3984 { 3985 /* Only take options for this level and exclude all non-driver-specific 3986 * options */ 3987 if (!strchr(qdict_entry_key(entry), '.') && 3988 strcmp(qdict_entry_key(entry), "node-name")) 3989 { 3990 qobject_incref(qdict_entry_value(entry)); 3991 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry)); 3992 found_any = true; 3993 } 3994 } 3995 3996 return found_any; 3997 } 3998 3999 /* Updates the following BDS fields: 4000 * - exact_filename: A filename which may be used for opening a block device 4001 * which (mostly) equals the given BDS (even without any 4002 * other options; so reading and writing must return the same 4003 * results, but caching etc. may be different) 4004 * - full_open_options: Options which, when given when opening a block device 4005 * (without a filename), result in a BDS (mostly) 4006 * equalling the given one 4007 * - filename: If exact_filename is set, it is copied here. Otherwise, 4008 * full_open_options is converted to a JSON object, prefixed with 4009 * "json:" (for use through the JSON pseudo protocol) and put here. 4010 */ 4011 void bdrv_refresh_filename(BlockDriverState *bs) 4012 { 4013 BlockDriver *drv = bs->drv; 4014 QDict *opts; 4015 4016 if (!drv) { 4017 return; 4018 } 4019 4020 /* This BDS's file name will most probably depend on its file's name, so 4021 * refresh that first */ 4022 if (bs->file) { 4023 bdrv_refresh_filename(bs->file); 4024 } 4025 4026 if (drv->bdrv_refresh_filename) { 4027 /* Obsolete information is of no use here, so drop the old file name 4028 * information before refreshing it */ 4029 bs->exact_filename[0] = '\0'; 4030 if (bs->full_open_options) { 4031 QDECREF(bs->full_open_options); 4032 bs->full_open_options = NULL; 4033 } 4034 4035 drv->bdrv_refresh_filename(bs); 4036 } else if (bs->file) { 4037 /* Try to reconstruct valid information from the underlying file */ 4038 bool has_open_options; 4039 4040 bs->exact_filename[0] = '\0'; 4041 if (bs->full_open_options) { 4042 QDECREF(bs->full_open_options); 4043 bs->full_open_options = NULL; 4044 } 4045 4046 opts = qdict_new(); 4047 has_open_options = append_open_options(opts, bs); 4048 4049 /* If no specific options have been given for this BDS, the filename of 4050 * the underlying file should suffice for this one as well */ 4051 if (bs->file->exact_filename[0] && !has_open_options) { 4052 strcpy(bs->exact_filename, bs->file->exact_filename); 4053 } 4054 /* Reconstructing the full options QDict is simple for most format block 4055 * drivers, as long as the full options are known for the underlying 4056 * file BDS. The full options QDict of that file BDS should somehow 4057 * contain a representation of the filename, therefore the following 4058 * suffices without querying the (exact_)filename of this BDS. */ 4059 if (bs->file->full_open_options) { 4060 qdict_put_obj(opts, "driver", 4061 QOBJECT(qstring_from_str(drv->format_name))); 4062 QINCREF(bs->file->full_open_options); 4063 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options)); 4064 4065 bs->full_open_options = opts; 4066 } else { 4067 QDECREF(opts); 4068 } 4069 } else if (!bs->full_open_options && qdict_size(bs->options)) { 4070 /* There is no underlying file BDS (at least referenced by BDS.file), 4071 * so the full options QDict should be equal to the options given 4072 * specifically for this block device when it was opened (plus the 4073 * driver specification). 4074 * Because those options don't change, there is no need to update 4075 * full_open_options when it's already set. */ 4076 4077 opts = qdict_new(); 4078 append_open_options(opts, bs); 4079 qdict_put_obj(opts, "driver", 4080 QOBJECT(qstring_from_str(drv->format_name))); 4081 4082 if (bs->exact_filename[0]) { 4083 /* This may not work for all block protocol drivers (some may 4084 * require this filename to be parsed), but we have to find some 4085 * default solution here, so just include it. If some block driver 4086 * does not support pure options without any filename at all or 4087 * needs some special format of the options QDict, it needs to 4088 * implement the driver-specific bdrv_refresh_filename() function. 4089 */ 4090 qdict_put_obj(opts, "filename", 4091 QOBJECT(qstring_from_str(bs->exact_filename))); 4092 } 4093 4094 bs->full_open_options = opts; 4095 } 4096 4097 if (bs->exact_filename[0]) { 4098 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 4099 } else if (bs->full_open_options) { 4100 QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 4101 snprintf(bs->filename, sizeof(bs->filename), "json:%s", 4102 qstring_get_str(json)); 4103 QDECREF(json); 4104 } 4105 } 4106 4107 /* This accessor function purpose is to allow the device models to access the 4108 * BlockAcctStats structure embedded inside a BlockDriverState without being 4109 * aware of the BlockDriverState structure layout. 4110 * It will go away when the BlockAcctStats structure will be moved inside 4111 * the device models. 4112 */ 4113 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs) 4114 { 4115 return &bs->stats; 4116 } 4117