1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "config-host.h" 25 #include "qemu-common.h" 26 #include "trace.h" 27 #include "block/block_int.h" 28 #include "block/blockjob.h" 29 #include "qemu/error-report.h" 30 #include "qemu/module.h" 31 #include "qapi/qmp/qerror.h" 32 #include "qapi/qmp/qjson.h" 33 #include "sysemu/block-backend.h" 34 #include "sysemu/sysemu.h" 35 #include "qemu/notify.h" 36 #include "block/coroutine.h" 37 #include "block/qapi.h" 38 #include "qmp-commands.h" 39 #include "qemu/timer.h" 40 #include "qapi-event.h" 41 #include "block/throttle-groups.h" 42 43 #ifdef CONFIG_BSD 44 #include <sys/types.h> 45 #include <sys/stat.h> 46 #include <sys/ioctl.h> 47 #include <sys/queue.h> 48 #ifndef __DragonFly__ 49 #include <sys/disk.h> 50 #endif 51 #endif 52 53 #ifdef _WIN32 54 #include <windows.h> 55 #endif 56 57 /** 58 * A BdrvDirtyBitmap can be in three possible states: 59 * (1) successor is NULL and disabled is false: full r/w mode 60 * (2) successor is NULL and disabled is true: read only mode ("disabled") 61 * (3) successor is set: frozen mode. 62 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set, 63 * or enabled. A frozen bitmap can only abdicate() or reclaim(). 64 */ 65 struct BdrvDirtyBitmap { 66 HBitmap *bitmap; /* Dirty sector bitmap implementation */ 67 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ 68 char *name; /* Optional non-empty unique ID */ 69 int64_t size; /* Size of the bitmap (Number of sectors) */ 70 bool disabled; /* Bitmap is read-only */ 71 QLIST_ENTRY(BdrvDirtyBitmap) list; 72 }; 73 74 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 75 76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states = 77 QTAILQ_HEAD_INITIALIZER(bdrv_states); 78 79 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 80 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 81 82 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 83 QLIST_HEAD_INITIALIZER(bdrv_drivers); 84 85 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, 86 const char *reference, QDict *options, int flags, 87 BlockDriverState *parent, 88 const BdrvChildRole *child_role, Error **errp); 89 90 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); 91 /* If non-zero, use only whitelisted block drivers */ 92 static int use_bdrv_whitelist; 93 94 #ifdef _WIN32 95 static int is_windows_drive_prefix(const char *filename) 96 { 97 return (((filename[0] >= 'a' && filename[0] <= 'z') || 98 (filename[0] >= 'A' && filename[0] <= 'Z')) && 99 filename[1] == ':'); 100 } 101 102 int is_windows_drive(const char *filename) 103 { 104 if (is_windows_drive_prefix(filename) && 105 filename[2] == '\0') 106 return 1; 107 if (strstart(filename, "\\\\.\\", NULL) || 108 strstart(filename, "//./", NULL)) 109 return 1; 110 return 0; 111 } 112 #endif 113 114 size_t bdrv_opt_mem_align(BlockDriverState *bs) 115 { 116 if (!bs || !bs->drv) { 117 /* page size or 4k (hdd sector size) should be on the safe side */ 118 return MAX(4096, getpagesize()); 119 } 120 121 return bs->bl.opt_mem_alignment; 122 } 123 124 size_t bdrv_min_mem_align(BlockDriverState *bs) 125 { 126 if (!bs || !bs->drv) { 127 /* page size or 4k (hdd sector size) should be on the safe side */ 128 return MAX(4096, getpagesize()); 129 } 130 131 return bs->bl.min_mem_alignment; 132 } 133 134 /* check if the path starts with "<protocol>:" */ 135 int path_has_protocol(const char *path) 136 { 137 const char *p; 138 139 #ifdef _WIN32 140 if (is_windows_drive(path) || 141 is_windows_drive_prefix(path)) { 142 return 0; 143 } 144 p = path + strcspn(path, ":/\\"); 145 #else 146 p = path + strcspn(path, ":/"); 147 #endif 148 149 return *p == ':'; 150 } 151 152 int path_is_absolute(const char *path) 153 { 154 #ifdef _WIN32 155 /* specific case for names like: "\\.\d:" */ 156 if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 157 return 1; 158 } 159 return (*path == '/' || *path == '\\'); 160 #else 161 return (*path == '/'); 162 #endif 163 } 164 165 /* if filename is absolute, just copy it to dest. Otherwise, build a 166 path to it by considering it is relative to base_path. URL are 167 supported. */ 168 void path_combine(char *dest, int dest_size, 169 const char *base_path, 170 const char *filename) 171 { 172 const char *p, *p1; 173 int len; 174 175 if (dest_size <= 0) 176 return; 177 if (path_is_absolute(filename)) { 178 pstrcpy(dest, dest_size, filename); 179 } else { 180 p = strchr(base_path, ':'); 181 if (p) 182 p++; 183 else 184 p = base_path; 185 p1 = strrchr(base_path, '/'); 186 #ifdef _WIN32 187 { 188 const char *p2; 189 p2 = strrchr(base_path, '\\'); 190 if (!p1 || p2 > p1) 191 p1 = p2; 192 } 193 #endif 194 if (p1) 195 p1++; 196 else 197 p1 = base_path; 198 if (p1 > p) 199 p = p1; 200 len = p - base_path; 201 if (len > dest_size - 1) 202 len = dest_size - 1; 203 memcpy(dest, base_path, len); 204 dest[len] = '\0'; 205 pstrcat(dest, dest_size, filename); 206 } 207 } 208 209 void bdrv_get_full_backing_filename_from_filename(const char *backed, 210 const char *backing, 211 char *dest, size_t sz, 212 Error **errp) 213 { 214 if (backing[0] == '\0' || path_has_protocol(backing) || 215 path_is_absolute(backing)) 216 { 217 pstrcpy(dest, sz, backing); 218 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 219 error_setg(errp, "Cannot use relative backing file names for '%s'", 220 backed); 221 } else { 222 path_combine(dest, sz, backed, backing); 223 } 224 } 225 226 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz, 227 Error **errp) 228 { 229 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename; 230 231 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file, 232 dest, sz, errp); 233 } 234 235 void bdrv_register(BlockDriver *bdrv) 236 { 237 bdrv_setup_io_funcs(bdrv); 238 239 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 240 } 241 242 BlockDriverState *bdrv_new_root(void) 243 { 244 BlockDriverState *bs = bdrv_new(); 245 246 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list); 247 return bs; 248 } 249 250 BlockDriverState *bdrv_new(void) 251 { 252 BlockDriverState *bs; 253 int i; 254 255 bs = g_new0(BlockDriverState, 1); 256 QLIST_INIT(&bs->dirty_bitmaps); 257 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 258 QLIST_INIT(&bs->op_blockers[i]); 259 } 260 bdrv_iostatus_disable(bs); 261 notifier_list_init(&bs->close_notifiers); 262 notifier_with_return_list_init(&bs->before_write_notifiers); 263 qemu_co_queue_init(&bs->throttled_reqs[0]); 264 qemu_co_queue_init(&bs->throttled_reqs[1]); 265 bs->refcnt = 1; 266 bs->aio_context = qemu_get_aio_context(); 267 268 return bs; 269 } 270 271 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify) 272 { 273 notifier_list_add(&bs->close_notifiers, notify); 274 } 275 276 BlockDriver *bdrv_find_format(const char *format_name) 277 { 278 BlockDriver *drv1; 279 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 280 if (!strcmp(drv1->format_name, format_name)) { 281 return drv1; 282 } 283 } 284 return NULL; 285 } 286 287 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 288 { 289 static const char *whitelist_rw[] = { 290 CONFIG_BDRV_RW_WHITELIST 291 }; 292 static const char *whitelist_ro[] = { 293 CONFIG_BDRV_RO_WHITELIST 294 }; 295 const char **p; 296 297 if (!whitelist_rw[0] && !whitelist_ro[0]) { 298 return 1; /* no whitelist, anything goes */ 299 } 300 301 for (p = whitelist_rw; *p; p++) { 302 if (!strcmp(drv->format_name, *p)) { 303 return 1; 304 } 305 } 306 if (read_only) { 307 for (p = whitelist_ro; *p; p++) { 308 if (!strcmp(drv->format_name, *p)) { 309 return 1; 310 } 311 } 312 } 313 return 0; 314 } 315 316 typedef struct CreateCo { 317 BlockDriver *drv; 318 char *filename; 319 QemuOpts *opts; 320 int ret; 321 Error *err; 322 } CreateCo; 323 324 static void coroutine_fn bdrv_create_co_entry(void *opaque) 325 { 326 Error *local_err = NULL; 327 int ret; 328 329 CreateCo *cco = opaque; 330 assert(cco->drv); 331 332 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err); 333 if (local_err) { 334 error_propagate(&cco->err, local_err); 335 } 336 cco->ret = ret; 337 } 338 339 int bdrv_create(BlockDriver *drv, const char* filename, 340 QemuOpts *opts, Error **errp) 341 { 342 int ret; 343 344 Coroutine *co; 345 CreateCo cco = { 346 .drv = drv, 347 .filename = g_strdup(filename), 348 .opts = opts, 349 .ret = NOT_DONE, 350 .err = NULL, 351 }; 352 353 if (!drv->bdrv_create) { 354 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 355 ret = -ENOTSUP; 356 goto out; 357 } 358 359 if (qemu_in_coroutine()) { 360 /* Fast-path if already in coroutine context */ 361 bdrv_create_co_entry(&cco); 362 } else { 363 co = qemu_coroutine_create(bdrv_create_co_entry); 364 qemu_coroutine_enter(co, &cco); 365 while (cco.ret == NOT_DONE) { 366 aio_poll(qemu_get_aio_context(), true); 367 } 368 } 369 370 ret = cco.ret; 371 if (ret < 0) { 372 if (cco.err) { 373 error_propagate(errp, cco.err); 374 } else { 375 error_setg_errno(errp, -ret, "Could not create image"); 376 } 377 } 378 379 out: 380 g_free(cco.filename); 381 return ret; 382 } 383 384 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 385 { 386 BlockDriver *drv; 387 Error *local_err = NULL; 388 int ret; 389 390 drv = bdrv_find_protocol(filename, true, errp); 391 if (drv == NULL) { 392 return -ENOENT; 393 } 394 395 ret = bdrv_create(drv, filename, opts, &local_err); 396 if (local_err) { 397 error_propagate(errp, local_err); 398 } 399 return ret; 400 } 401 402 /** 403 * Try to get @bs's logical and physical block size. 404 * On success, store them in @bsz struct and return 0. 405 * On failure return -errno. 406 * @bs must not be empty. 407 */ 408 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 409 { 410 BlockDriver *drv = bs->drv; 411 412 if (drv && drv->bdrv_probe_blocksizes) { 413 return drv->bdrv_probe_blocksizes(bs, bsz); 414 } 415 416 return -ENOTSUP; 417 } 418 419 /** 420 * Try to get @bs's geometry (cyls, heads, sectors). 421 * On success, store them in @geo struct and return 0. 422 * On failure return -errno. 423 * @bs must not be empty. 424 */ 425 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 426 { 427 BlockDriver *drv = bs->drv; 428 429 if (drv && drv->bdrv_probe_geometry) { 430 return drv->bdrv_probe_geometry(bs, geo); 431 } 432 433 return -ENOTSUP; 434 } 435 436 /* 437 * Create a uniquely-named empty temporary file. 438 * Return 0 upon success, otherwise a negative errno value. 439 */ 440 int get_tmp_filename(char *filename, int size) 441 { 442 #ifdef _WIN32 443 char temp_dir[MAX_PATH]; 444 /* GetTempFileName requires that its output buffer (4th param) 445 have length MAX_PATH or greater. */ 446 assert(size >= MAX_PATH); 447 return (GetTempPath(MAX_PATH, temp_dir) 448 && GetTempFileName(temp_dir, "qem", 0, filename) 449 ? 0 : -GetLastError()); 450 #else 451 int fd; 452 const char *tmpdir; 453 tmpdir = getenv("TMPDIR"); 454 if (!tmpdir) { 455 tmpdir = "/var/tmp"; 456 } 457 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 458 return -EOVERFLOW; 459 } 460 fd = mkstemp(filename); 461 if (fd < 0) { 462 return -errno; 463 } 464 if (close(fd) != 0) { 465 unlink(filename); 466 return -errno; 467 } 468 return 0; 469 #endif 470 } 471 472 /* 473 * Detect host devices. By convention, /dev/cdrom[N] is always 474 * recognized as a host CDROM. 475 */ 476 static BlockDriver *find_hdev_driver(const char *filename) 477 { 478 int score_max = 0, score; 479 BlockDriver *drv = NULL, *d; 480 481 QLIST_FOREACH(d, &bdrv_drivers, list) { 482 if (d->bdrv_probe_device) { 483 score = d->bdrv_probe_device(filename); 484 if (score > score_max) { 485 score_max = score; 486 drv = d; 487 } 488 } 489 } 490 491 return drv; 492 } 493 494 BlockDriver *bdrv_find_protocol(const char *filename, 495 bool allow_protocol_prefix, 496 Error **errp) 497 { 498 BlockDriver *drv1; 499 char protocol[128]; 500 int len; 501 const char *p; 502 503 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 504 505 /* 506 * XXX(hch): we really should not let host device detection 507 * override an explicit protocol specification, but moving this 508 * later breaks access to device names with colons in them. 509 * Thanks to the brain-dead persistent naming schemes on udev- 510 * based Linux systems those actually are quite common. 511 */ 512 drv1 = find_hdev_driver(filename); 513 if (drv1) { 514 return drv1; 515 } 516 517 if (!path_has_protocol(filename) || !allow_protocol_prefix) { 518 return &bdrv_file; 519 } 520 521 p = strchr(filename, ':'); 522 assert(p != NULL); 523 len = p - filename; 524 if (len > sizeof(protocol) - 1) 525 len = sizeof(protocol) - 1; 526 memcpy(protocol, filename, len); 527 protocol[len] = '\0'; 528 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 529 if (drv1->protocol_name && 530 !strcmp(drv1->protocol_name, protocol)) { 531 return drv1; 532 } 533 } 534 535 error_setg(errp, "Unknown protocol '%s'", protocol); 536 return NULL; 537 } 538 539 /* 540 * Guess image format by probing its contents. 541 * This is not a good idea when your image is raw (CVE-2008-2004), but 542 * we do it anyway for backward compatibility. 543 * 544 * @buf contains the image's first @buf_size bytes. 545 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 546 * but can be smaller if the image file is smaller) 547 * @filename is its filename. 548 * 549 * For all block drivers, call the bdrv_probe() method to get its 550 * probing score. 551 * Return the first block driver with the highest probing score. 552 */ 553 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 554 const char *filename) 555 { 556 int score_max = 0, score; 557 BlockDriver *drv = NULL, *d; 558 559 QLIST_FOREACH(d, &bdrv_drivers, list) { 560 if (d->bdrv_probe) { 561 score = d->bdrv_probe(buf, buf_size, filename); 562 if (score > score_max) { 563 score_max = score; 564 drv = d; 565 } 566 } 567 } 568 569 return drv; 570 } 571 572 static int find_image_format(BlockDriverState *bs, const char *filename, 573 BlockDriver **pdrv, Error **errp) 574 { 575 BlockDriver *drv; 576 uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 577 int ret = 0; 578 579 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 580 if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { 581 *pdrv = &bdrv_raw; 582 return ret; 583 } 584 585 ret = bdrv_pread(bs, 0, buf, sizeof(buf)); 586 if (ret < 0) { 587 error_setg_errno(errp, -ret, "Could not read image for determining its " 588 "format"); 589 *pdrv = NULL; 590 return ret; 591 } 592 593 drv = bdrv_probe_all(buf, ret, filename); 594 if (!drv) { 595 error_setg(errp, "Could not determine image format: No compatible " 596 "driver found"); 597 ret = -ENOENT; 598 } 599 *pdrv = drv; 600 return ret; 601 } 602 603 /** 604 * Set the current 'total_sectors' value 605 * Return 0 on success, -errno on error. 606 */ 607 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 608 { 609 BlockDriver *drv = bs->drv; 610 611 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 612 if (bdrv_is_sg(bs)) 613 return 0; 614 615 /* query actual device if possible, otherwise just trust the hint */ 616 if (drv->bdrv_getlength) { 617 int64_t length = drv->bdrv_getlength(bs); 618 if (length < 0) { 619 return length; 620 } 621 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 622 } 623 624 bs->total_sectors = hint; 625 return 0; 626 } 627 628 /** 629 * Set open flags for a given discard mode 630 * 631 * Return 0 on success, -1 if the discard mode was invalid. 632 */ 633 int bdrv_parse_discard_flags(const char *mode, int *flags) 634 { 635 *flags &= ~BDRV_O_UNMAP; 636 637 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 638 /* do nothing */ 639 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 640 *flags |= BDRV_O_UNMAP; 641 } else { 642 return -1; 643 } 644 645 return 0; 646 } 647 648 /** 649 * Set open flags for a given cache mode 650 * 651 * Return 0 on success, -1 if the cache mode was invalid. 652 */ 653 int bdrv_parse_cache_flags(const char *mode, int *flags) 654 { 655 *flags &= ~BDRV_O_CACHE_MASK; 656 657 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 658 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; 659 } else if (!strcmp(mode, "directsync")) { 660 *flags |= BDRV_O_NOCACHE; 661 } else if (!strcmp(mode, "writeback")) { 662 *flags |= BDRV_O_CACHE_WB; 663 } else if (!strcmp(mode, "unsafe")) { 664 *flags |= BDRV_O_CACHE_WB; 665 *flags |= BDRV_O_NO_FLUSH; 666 } else if (!strcmp(mode, "writethrough")) { 667 /* this is the default */ 668 } else { 669 return -1; 670 } 671 672 return 0; 673 } 674 675 /* 676 * Returns the flags that a temporary snapshot should get, based on the 677 * originally requested flags (the originally requested image will have flags 678 * like a backing file) 679 */ 680 static int bdrv_temp_snapshot_flags(int flags) 681 { 682 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 683 } 684 685 /* 686 * Returns the flags that bs->file should get if a protocol driver is expected, 687 * based on the given flags for the parent BDS 688 */ 689 static int bdrv_inherited_flags(int flags) 690 { 691 /* Enable protocol handling, disable format probing for bs->file */ 692 flags |= BDRV_O_PROTOCOL; 693 694 /* Our block drivers take care to send flushes and respect unmap policy, 695 * so we can enable both unconditionally on lower layers. */ 696 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP; 697 698 /* Clear flags that only apply to the top layer */ 699 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); 700 701 return flags; 702 } 703 704 const BdrvChildRole child_file = { 705 .inherit_flags = bdrv_inherited_flags, 706 }; 707 708 /* 709 * Returns the flags that bs->file should get if the use of formats (and not 710 * only protocols) is permitted for it, based on the given flags for the parent 711 * BDS 712 */ 713 static int bdrv_inherited_fmt_flags(int parent_flags) 714 { 715 int flags = child_file.inherit_flags(parent_flags); 716 return flags & ~BDRV_O_PROTOCOL; 717 } 718 719 const BdrvChildRole child_format = { 720 .inherit_flags = bdrv_inherited_fmt_flags, 721 }; 722 723 /* 724 * Returns the flags that bs->backing should get, based on the given flags 725 * for the parent BDS 726 */ 727 static int bdrv_backing_flags(int flags) 728 { 729 /* backing files always opened read-only */ 730 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ); 731 732 /* snapshot=on is handled on the top layer */ 733 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY); 734 735 return flags; 736 } 737 738 static const BdrvChildRole child_backing = { 739 .inherit_flags = bdrv_backing_flags, 740 }; 741 742 static int bdrv_open_flags(BlockDriverState *bs, int flags) 743 { 744 int open_flags = flags | BDRV_O_CACHE_WB; 745 746 /* 747 * Clear flags that are internal to the block layer before opening the 748 * image. 749 */ 750 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 751 752 /* 753 * Snapshots should be writable. 754 */ 755 if (flags & BDRV_O_TEMPORARY) { 756 open_flags |= BDRV_O_RDWR; 757 } 758 759 return open_flags; 760 } 761 762 static void bdrv_assign_node_name(BlockDriverState *bs, 763 const char *node_name, 764 Error **errp) 765 { 766 if (!node_name) { 767 return; 768 } 769 770 /* Check for empty string or invalid characters */ 771 if (!id_wellformed(node_name)) { 772 error_setg(errp, "Invalid node name"); 773 return; 774 } 775 776 /* takes care of avoiding namespaces collisions */ 777 if (blk_by_name(node_name)) { 778 error_setg(errp, "node-name=%s is conflicting with a device id", 779 node_name); 780 return; 781 } 782 783 /* takes care of avoiding duplicates node names */ 784 if (bdrv_find_node(node_name)) { 785 error_setg(errp, "Duplicate node name"); 786 return; 787 } 788 789 /* copy node name into the bs and insert it into the graph list */ 790 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 791 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 792 } 793 794 static QemuOptsList bdrv_runtime_opts = { 795 .name = "bdrv_common", 796 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), 797 .desc = { 798 { 799 .name = "node-name", 800 .type = QEMU_OPT_STRING, 801 .help = "Node name of the block device node", 802 }, 803 { /* end of list */ } 804 }, 805 }; 806 807 /* 808 * Common part for opening disk images and files 809 * 810 * Removes all processed options from *options. 811 */ 812 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file, 813 QDict *options, int flags, BlockDriver *drv, Error **errp) 814 { 815 int ret, open_flags; 816 const char *filename; 817 const char *node_name = NULL; 818 QemuOpts *opts; 819 Error *local_err = NULL; 820 821 assert(drv != NULL); 822 assert(bs->file == NULL); 823 assert(options != NULL && bs->options != options); 824 825 if (file != NULL) { 826 filename = file->bs->filename; 827 } else { 828 filename = qdict_get_try_str(options, "filename"); 829 } 830 831 if (drv->bdrv_needs_filename && !filename) { 832 error_setg(errp, "The '%s' block driver requires a file name", 833 drv->format_name); 834 return -EINVAL; 835 } 836 837 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); 838 839 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); 840 qemu_opts_absorb_qdict(opts, options, &local_err); 841 if (local_err) { 842 error_propagate(errp, local_err); 843 ret = -EINVAL; 844 goto fail_opts; 845 } 846 847 node_name = qemu_opt_get(opts, "node-name"); 848 bdrv_assign_node_name(bs, node_name, &local_err); 849 if (local_err) { 850 error_propagate(errp, local_err); 851 ret = -EINVAL; 852 goto fail_opts; 853 } 854 855 bs->guest_block_size = 512; 856 bs->request_alignment = 512; 857 bs->zero_beyond_eof = true; 858 open_flags = bdrv_open_flags(bs, flags); 859 bs->read_only = !(open_flags & BDRV_O_RDWR); 860 861 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { 862 error_setg(errp, 863 !bs->read_only && bdrv_is_whitelisted(drv, true) 864 ? "Driver '%s' can only be used for read-only devices" 865 : "Driver '%s' is not whitelisted", 866 drv->format_name); 867 ret = -ENOTSUP; 868 goto fail_opts; 869 } 870 871 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ 872 if (flags & BDRV_O_COPY_ON_READ) { 873 if (!bs->read_only) { 874 bdrv_enable_copy_on_read(bs); 875 } else { 876 error_setg(errp, "Can't use copy-on-read on read-only device"); 877 ret = -EINVAL; 878 goto fail_opts; 879 } 880 } 881 882 if (filename != NULL) { 883 pstrcpy(bs->filename, sizeof(bs->filename), filename); 884 } else { 885 bs->filename[0] = '\0'; 886 } 887 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 888 889 bs->drv = drv; 890 bs->opaque = g_malloc0(drv->instance_size); 891 892 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB); 893 894 /* Open the image, either directly or using a protocol */ 895 if (drv->bdrv_file_open) { 896 assert(file == NULL); 897 assert(!drv->bdrv_needs_filename || filename != NULL); 898 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 899 } else { 900 if (file == NULL) { 901 error_setg(errp, "Can't use '%s' as a block driver for the " 902 "protocol level", drv->format_name); 903 ret = -EINVAL; 904 goto free_and_fail; 905 } 906 bs->file = file; 907 ret = drv->bdrv_open(bs, options, open_flags, &local_err); 908 } 909 910 if (ret < 0) { 911 if (local_err) { 912 error_propagate(errp, local_err); 913 } else if (bs->filename[0]) { 914 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 915 } else { 916 error_setg_errno(errp, -ret, "Could not open image"); 917 } 918 goto free_and_fail; 919 } 920 921 if (bs->encrypted) { 922 error_report("Encrypted images are deprecated"); 923 error_printf("Support for them will be removed in a future release.\n" 924 "You can use 'qemu-img convert' to convert your image" 925 " to an unencrypted one.\n"); 926 } 927 928 ret = refresh_total_sectors(bs, bs->total_sectors); 929 if (ret < 0) { 930 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 931 goto free_and_fail; 932 } 933 934 bdrv_refresh_limits(bs, &local_err); 935 if (local_err) { 936 error_propagate(errp, local_err); 937 ret = -EINVAL; 938 goto free_and_fail; 939 } 940 941 assert(bdrv_opt_mem_align(bs) != 0); 942 assert(bdrv_min_mem_align(bs) != 0); 943 assert((bs->request_alignment != 0) || bdrv_is_sg(bs)); 944 945 qemu_opts_del(opts); 946 return 0; 947 948 free_and_fail: 949 bs->file = NULL; 950 g_free(bs->opaque); 951 bs->opaque = NULL; 952 bs->drv = NULL; 953 fail_opts: 954 qemu_opts_del(opts); 955 return ret; 956 } 957 958 static QDict *parse_json_filename(const char *filename, Error **errp) 959 { 960 QObject *options_obj; 961 QDict *options; 962 int ret; 963 964 ret = strstart(filename, "json:", &filename); 965 assert(ret); 966 967 options_obj = qobject_from_json(filename); 968 if (!options_obj) { 969 error_setg(errp, "Could not parse the JSON options"); 970 return NULL; 971 } 972 973 if (qobject_type(options_obj) != QTYPE_QDICT) { 974 qobject_decref(options_obj); 975 error_setg(errp, "Invalid JSON object given"); 976 return NULL; 977 } 978 979 options = qobject_to_qdict(options_obj); 980 qdict_flatten(options); 981 982 return options; 983 } 984 985 /* 986 * Fills in default options for opening images and converts the legacy 987 * filename/flags pair to option QDict entries. 988 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a 989 * block driver has been specified explicitly. 990 */ 991 static int bdrv_fill_options(QDict **options, const char **pfilename, 992 int *flags, Error **errp) 993 { 994 const char *filename = *pfilename; 995 const char *drvname; 996 bool protocol = *flags & BDRV_O_PROTOCOL; 997 bool parse_filename = false; 998 BlockDriver *drv = NULL; 999 Error *local_err = NULL; 1000 1001 /* Parse json: pseudo-protocol */ 1002 if (filename && g_str_has_prefix(filename, "json:")) { 1003 QDict *json_options = parse_json_filename(filename, &local_err); 1004 if (local_err) { 1005 error_propagate(errp, local_err); 1006 return -EINVAL; 1007 } 1008 1009 /* Options given in the filename have lower priority than options 1010 * specified directly */ 1011 qdict_join(*options, json_options, false); 1012 QDECREF(json_options); 1013 *pfilename = filename = NULL; 1014 } 1015 1016 drvname = qdict_get_try_str(*options, "driver"); 1017 if (drvname) { 1018 drv = bdrv_find_format(drvname); 1019 if (!drv) { 1020 error_setg(errp, "Unknown driver '%s'", drvname); 1021 return -ENOENT; 1022 } 1023 /* If the user has explicitly specified the driver, this choice should 1024 * override the BDRV_O_PROTOCOL flag */ 1025 protocol = drv->bdrv_file_open; 1026 } 1027 1028 if (protocol) { 1029 *flags |= BDRV_O_PROTOCOL; 1030 } else { 1031 *flags &= ~BDRV_O_PROTOCOL; 1032 } 1033 1034 /* Fetch the file name from the options QDict if necessary */ 1035 if (protocol && filename) { 1036 if (!qdict_haskey(*options, "filename")) { 1037 qdict_put(*options, "filename", qstring_from_str(filename)); 1038 parse_filename = true; 1039 } else { 1040 error_setg(errp, "Can't specify 'file' and 'filename' options at " 1041 "the same time"); 1042 return -EINVAL; 1043 } 1044 } 1045 1046 /* Find the right block driver */ 1047 filename = qdict_get_try_str(*options, "filename"); 1048 1049 if (!drvname && protocol) { 1050 if (filename) { 1051 drv = bdrv_find_protocol(filename, parse_filename, errp); 1052 if (!drv) { 1053 return -EINVAL; 1054 } 1055 1056 drvname = drv->format_name; 1057 qdict_put(*options, "driver", qstring_from_str(drvname)); 1058 } else { 1059 error_setg(errp, "Must specify either driver or file"); 1060 return -EINVAL; 1061 } 1062 } 1063 1064 assert(drv || !protocol); 1065 1066 /* Driver-specific filename parsing */ 1067 if (drv && drv->bdrv_parse_filename && parse_filename) { 1068 drv->bdrv_parse_filename(filename, *options, &local_err); 1069 if (local_err) { 1070 error_propagate(errp, local_err); 1071 return -EINVAL; 1072 } 1073 1074 if (!drv->bdrv_needs_filename) { 1075 qdict_del(*options, "filename"); 1076 } 1077 } 1078 1079 return 0; 1080 } 1081 1082 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, 1083 BlockDriverState *child_bs, 1084 const BdrvChildRole *child_role) 1085 { 1086 BdrvChild *child = g_new(BdrvChild, 1); 1087 *child = (BdrvChild) { 1088 .bs = child_bs, 1089 .role = child_role, 1090 }; 1091 1092 QLIST_INSERT_HEAD(&parent_bs->children, child, next); 1093 QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent); 1094 1095 return child; 1096 } 1097 1098 static void bdrv_detach_child(BdrvChild *child) 1099 { 1100 QLIST_REMOVE(child, next); 1101 QLIST_REMOVE(child, next_parent); 1102 g_free(child); 1103 } 1104 1105 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) 1106 { 1107 BlockDriverState *child_bs; 1108 1109 if (child == NULL) { 1110 return; 1111 } 1112 1113 if (child->bs->inherits_from == parent) { 1114 child->bs->inherits_from = NULL; 1115 } 1116 1117 child_bs = child->bs; 1118 bdrv_detach_child(child); 1119 bdrv_unref(child_bs); 1120 } 1121 1122 /* 1123 * Sets the backing file link of a BDS. A new reference is created; callers 1124 * which don't need their own reference any more must call bdrv_unref(). 1125 */ 1126 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) 1127 { 1128 if (backing_hd) { 1129 bdrv_ref(backing_hd); 1130 } 1131 1132 if (bs->backing) { 1133 assert(bs->backing_blocker); 1134 bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker); 1135 bdrv_unref_child(bs, bs->backing); 1136 } else if (backing_hd) { 1137 error_setg(&bs->backing_blocker, 1138 "node is used as backing hd of '%s'", 1139 bdrv_get_device_or_node_name(bs)); 1140 } 1141 1142 if (!backing_hd) { 1143 error_free(bs->backing_blocker); 1144 bs->backing_blocker = NULL; 1145 bs->backing = NULL; 1146 goto out; 1147 } 1148 bs->backing = bdrv_attach_child(bs, backing_hd, &child_backing); 1149 bs->open_flags &= ~BDRV_O_NO_BACKING; 1150 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); 1151 pstrcpy(bs->backing_format, sizeof(bs->backing_format), 1152 backing_hd->drv ? backing_hd->drv->format_name : ""); 1153 1154 bdrv_op_block_all(backing_hd, bs->backing_blocker); 1155 /* Otherwise we won't be able to commit due to check in bdrv_commit */ 1156 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1157 bs->backing_blocker); 1158 out: 1159 bdrv_refresh_limits(bs, NULL); 1160 } 1161 1162 /* 1163 * Opens the backing file for a BlockDriverState if not yet open 1164 * 1165 * options is a QDict of options to pass to the block drivers, or NULL for an 1166 * empty set of options. The reference to the QDict is transferred to this 1167 * function (even on failure), so if the caller intends to reuse the dictionary, 1168 * it needs to use QINCREF() before calling bdrv_file_open. 1169 */ 1170 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) 1171 { 1172 char *backing_filename = g_malloc0(PATH_MAX); 1173 int ret = 0; 1174 BlockDriverState *backing_hd; 1175 Error *local_err = NULL; 1176 1177 if (bs->backing != NULL) { 1178 QDECREF(options); 1179 goto free_exit; 1180 } 1181 1182 /* NULL means an empty set of options */ 1183 if (options == NULL) { 1184 options = qdict_new(); 1185 } 1186 1187 bs->open_flags &= ~BDRV_O_NO_BACKING; 1188 if (qdict_haskey(options, "file.filename")) { 1189 backing_filename[0] = '\0'; 1190 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 1191 QDECREF(options); 1192 goto free_exit; 1193 } else { 1194 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX, 1195 &local_err); 1196 if (local_err) { 1197 ret = -EINVAL; 1198 error_propagate(errp, local_err); 1199 QDECREF(options); 1200 goto free_exit; 1201 } 1202 } 1203 1204 if (!bs->drv || !bs->drv->supports_backing) { 1205 ret = -EINVAL; 1206 error_setg(errp, "Driver doesn't support backing files"); 1207 QDECREF(options); 1208 goto free_exit; 1209 } 1210 1211 backing_hd = bdrv_new(); 1212 1213 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 1214 qdict_put(options, "driver", qstring_from_str(bs->backing_format)); 1215 } 1216 1217 assert(bs->backing == NULL); 1218 ret = bdrv_open_inherit(&backing_hd, 1219 *backing_filename ? backing_filename : NULL, 1220 NULL, options, 0, bs, &child_backing, &local_err); 1221 if (ret < 0) { 1222 bdrv_unref(backing_hd); 1223 backing_hd = NULL; 1224 bs->open_flags |= BDRV_O_NO_BACKING; 1225 error_setg(errp, "Could not open backing file: %s", 1226 error_get_pretty(local_err)); 1227 error_free(local_err); 1228 goto free_exit; 1229 } 1230 1231 /* Hook up the backing file link; drop our reference, bs owns the 1232 * backing_hd reference now */ 1233 bdrv_set_backing_hd(bs, backing_hd); 1234 bdrv_unref(backing_hd); 1235 1236 free_exit: 1237 g_free(backing_filename); 1238 return ret; 1239 } 1240 1241 /* 1242 * Opens a disk image whose options are given as BlockdevRef in another block 1243 * device's options. 1244 * 1245 * If allow_none is true, no image will be opened if filename is false and no 1246 * BlockdevRef is given. NULL will be returned, but errp remains unset. 1247 * 1248 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 1249 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 1250 * itself, all options starting with "${bdref_key}." are considered part of the 1251 * BlockdevRef. 1252 * 1253 * The BlockdevRef will be removed from the options QDict. 1254 */ 1255 BdrvChild *bdrv_open_child(const char *filename, 1256 QDict *options, const char *bdref_key, 1257 BlockDriverState* parent, 1258 const BdrvChildRole *child_role, 1259 bool allow_none, Error **errp) 1260 { 1261 BdrvChild *c = NULL; 1262 BlockDriverState *bs; 1263 QDict *image_options; 1264 int ret; 1265 char *bdref_key_dot; 1266 const char *reference; 1267 1268 assert(child_role != NULL); 1269 1270 bdref_key_dot = g_strdup_printf("%s.", bdref_key); 1271 qdict_extract_subqdict(options, &image_options, bdref_key_dot); 1272 g_free(bdref_key_dot); 1273 1274 reference = qdict_get_try_str(options, bdref_key); 1275 if (!filename && !reference && !qdict_size(image_options)) { 1276 if (!allow_none) { 1277 error_setg(errp, "A block device must be specified for \"%s\"", 1278 bdref_key); 1279 } 1280 QDECREF(image_options); 1281 goto done; 1282 } 1283 1284 bs = NULL; 1285 ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0, 1286 parent, child_role, errp); 1287 if (ret < 0) { 1288 goto done; 1289 } 1290 1291 c = bdrv_attach_child(parent, bs, child_role); 1292 1293 done: 1294 qdict_del(options, bdref_key); 1295 return c; 1296 } 1297 1298 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) 1299 { 1300 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 1301 char *tmp_filename = g_malloc0(PATH_MAX + 1); 1302 int64_t total_size; 1303 QemuOpts *opts = NULL; 1304 QDict *snapshot_options; 1305 BlockDriverState *bs_snapshot; 1306 Error *local_err = NULL; 1307 int ret; 1308 1309 /* if snapshot, we create a temporary backing file and open it 1310 instead of opening 'filename' directly */ 1311 1312 /* Get the required size from the image */ 1313 total_size = bdrv_getlength(bs); 1314 if (total_size < 0) { 1315 ret = total_size; 1316 error_setg_errno(errp, -total_size, "Could not get image size"); 1317 goto out; 1318 } 1319 1320 /* Create the temporary image */ 1321 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); 1322 if (ret < 0) { 1323 error_setg_errno(errp, -ret, "Could not get temporary filename"); 1324 goto out; 1325 } 1326 1327 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 1328 &error_abort); 1329 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 1330 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err); 1331 qemu_opts_del(opts); 1332 if (ret < 0) { 1333 error_setg_errno(errp, -ret, "Could not create temporary overlay " 1334 "'%s': %s", tmp_filename, 1335 error_get_pretty(local_err)); 1336 error_free(local_err); 1337 goto out; 1338 } 1339 1340 /* Prepare a new options QDict for the temporary file */ 1341 snapshot_options = qdict_new(); 1342 qdict_put(snapshot_options, "file.driver", 1343 qstring_from_str("file")); 1344 qdict_put(snapshot_options, "file.filename", 1345 qstring_from_str(tmp_filename)); 1346 qdict_put(snapshot_options, "driver", 1347 qstring_from_str("qcow2")); 1348 1349 bs_snapshot = bdrv_new(); 1350 1351 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, 1352 flags, &local_err); 1353 if (ret < 0) { 1354 error_propagate(errp, local_err); 1355 goto out; 1356 } 1357 1358 bdrv_append(bs_snapshot, bs); 1359 1360 out: 1361 g_free(tmp_filename); 1362 return ret; 1363 } 1364 1365 /* 1366 * Opens a disk image (raw, qcow2, vmdk, ...) 1367 * 1368 * options is a QDict of options to pass to the block drivers, or NULL for an 1369 * empty set of options. The reference to the QDict belongs to the block layer 1370 * after the call (even on failure), so if the caller intends to reuse the 1371 * dictionary, it needs to use QINCREF() before calling bdrv_open. 1372 * 1373 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 1374 * If it is not NULL, the referenced BDS will be reused. 1375 * 1376 * The reference parameter may be used to specify an existing block device which 1377 * should be opened. If specified, neither options nor a filename may be given, 1378 * nor can an existing BDS be reused (that is, *pbs has to be NULL). 1379 */ 1380 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, 1381 const char *reference, QDict *options, int flags, 1382 BlockDriverState *parent, 1383 const BdrvChildRole *child_role, Error **errp) 1384 { 1385 int ret; 1386 BdrvChild *file = NULL; 1387 BlockDriverState *bs; 1388 BlockDriver *drv = NULL; 1389 const char *drvname; 1390 Error *local_err = NULL; 1391 int snapshot_flags = 0; 1392 1393 assert(pbs); 1394 assert(!child_role || !flags); 1395 assert(!child_role == !parent); 1396 1397 if (reference) { 1398 bool options_non_empty = options ? qdict_size(options) : false; 1399 QDECREF(options); 1400 1401 if (*pbs) { 1402 error_setg(errp, "Cannot reuse an existing BDS when referencing " 1403 "another block device"); 1404 return -EINVAL; 1405 } 1406 1407 if (filename || options_non_empty) { 1408 error_setg(errp, "Cannot reference an existing block device with " 1409 "additional options or a new filename"); 1410 return -EINVAL; 1411 } 1412 1413 bs = bdrv_lookup_bs(reference, reference, errp); 1414 if (!bs) { 1415 return -ENODEV; 1416 } 1417 bdrv_ref(bs); 1418 *pbs = bs; 1419 return 0; 1420 } 1421 1422 if (*pbs) { 1423 bs = *pbs; 1424 } else { 1425 bs = bdrv_new(); 1426 } 1427 1428 /* NULL means an empty set of options */ 1429 if (options == NULL) { 1430 options = qdict_new(); 1431 } 1432 1433 if (child_role) { 1434 bs->inherits_from = parent; 1435 flags = child_role->inherit_flags(parent->open_flags); 1436 } 1437 1438 ret = bdrv_fill_options(&options, &filename, &flags, &local_err); 1439 if (local_err) { 1440 goto fail; 1441 } 1442 1443 /* Find the right image format driver */ 1444 drvname = qdict_get_try_str(options, "driver"); 1445 if (drvname) { 1446 drv = bdrv_find_format(drvname); 1447 qdict_del(options, "driver"); 1448 if (!drv) { 1449 error_setg(errp, "Unknown driver: '%s'", drvname); 1450 ret = -EINVAL; 1451 goto fail; 1452 } 1453 } 1454 1455 assert(drvname || !(flags & BDRV_O_PROTOCOL)); 1456 1457 bs->open_flags = flags; 1458 bs->options = options; 1459 options = qdict_clone_shallow(options); 1460 1461 /* Open image file without format layer */ 1462 if ((flags & BDRV_O_PROTOCOL) == 0) { 1463 if (flags & BDRV_O_RDWR) { 1464 flags |= BDRV_O_ALLOW_RDWR; 1465 } 1466 if (flags & BDRV_O_SNAPSHOT) { 1467 snapshot_flags = bdrv_temp_snapshot_flags(flags); 1468 flags = bdrv_backing_flags(flags); 1469 } 1470 1471 bs->open_flags = flags; 1472 1473 file = bdrv_open_child(filename, options, "file", bs, 1474 &child_file, true, &local_err); 1475 if (local_err) { 1476 ret = -EINVAL; 1477 goto fail; 1478 } 1479 } 1480 1481 /* Image format probing */ 1482 bs->probed = !drv; 1483 if (!drv && file) { 1484 ret = find_image_format(file->bs, filename, &drv, &local_err); 1485 if (ret < 0) { 1486 goto fail; 1487 } 1488 } else if (!drv) { 1489 error_setg(errp, "Must specify either driver or file"); 1490 ret = -EINVAL; 1491 goto fail; 1492 } 1493 1494 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ 1495 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open); 1496 /* file must be NULL if a protocol BDS is about to be created 1497 * (the inverse results in an error message from bdrv_open_common()) */ 1498 assert(!(flags & BDRV_O_PROTOCOL) || !file); 1499 1500 /* Open the image */ 1501 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err); 1502 if (ret < 0) { 1503 goto fail; 1504 } 1505 1506 if (file && (bs->file != file)) { 1507 bdrv_unref_child(bs, file); 1508 file = NULL; 1509 } 1510 1511 /* If there is a backing file, use it */ 1512 if ((flags & BDRV_O_NO_BACKING) == 0) { 1513 QDict *backing_options; 1514 1515 qdict_extract_subqdict(options, &backing_options, "backing."); 1516 ret = bdrv_open_backing_file(bs, backing_options, &local_err); 1517 if (ret < 0) { 1518 goto close_and_fail; 1519 } 1520 } 1521 1522 bdrv_refresh_filename(bs); 1523 1524 /* Check if any unknown options were used */ 1525 if (options && (qdict_size(options) != 0)) { 1526 const QDictEntry *entry = qdict_first(options); 1527 if (flags & BDRV_O_PROTOCOL) { 1528 error_setg(errp, "Block protocol '%s' doesn't support the option " 1529 "'%s'", drv->format_name, entry->key); 1530 } else { 1531 error_setg(errp, "Block format '%s' used by device '%s' doesn't " 1532 "support the option '%s'", drv->format_name, 1533 bdrv_get_device_name(bs), entry->key); 1534 } 1535 1536 ret = -EINVAL; 1537 goto close_and_fail; 1538 } 1539 1540 if (!bdrv_key_required(bs)) { 1541 if (bs->blk) { 1542 blk_dev_change_media_cb(bs->blk, true); 1543 } 1544 } else if (!runstate_check(RUN_STATE_PRELAUNCH) 1545 && !runstate_check(RUN_STATE_INMIGRATE) 1546 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */ 1547 error_setg(errp, 1548 "Guest must be stopped for opening of encrypted image"); 1549 ret = -EBUSY; 1550 goto close_and_fail; 1551 } 1552 1553 QDECREF(options); 1554 *pbs = bs; 1555 1556 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 1557 * temporary snapshot afterwards. */ 1558 if (snapshot_flags) { 1559 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); 1560 if (local_err) { 1561 goto close_and_fail; 1562 } 1563 } 1564 1565 return 0; 1566 1567 fail: 1568 if (file != NULL) { 1569 bdrv_unref_child(bs, file); 1570 } 1571 QDECREF(bs->options); 1572 QDECREF(options); 1573 bs->options = NULL; 1574 if (!*pbs) { 1575 /* If *pbs is NULL, a new BDS has been created in this function and 1576 needs to be freed now. Otherwise, it does not need to be closed, 1577 since it has not really been opened yet. */ 1578 bdrv_unref(bs); 1579 } 1580 if (local_err) { 1581 error_propagate(errp, local_err); 1582 } 1583 return ret; 1584 1585 close_and_fail: 1586 /* See fail path, but now the BDS has to be always closed */ 1587 if (*pbs) { 1588 bdrv_close(bs); 1589 } else { 1590 bdrv_unref(bs); 1591 } 1592 QDECREF(options); 1593 if (local_err) { 1594 error_propagate(errp, local_err); 1595 } 1596 return ret; 1597 } 1598 1599 int bdrv_open(BlockDriverState **pbs, const char *filename, 1600 const char *reference, QDict *options, int flags, Error **errp) 1601 { 1602 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL, 1603 NULL, errp); 1604 } 1605 1606 typedef struct BlockReopenQueueEntry { 1607 bool prepared; 1608 BDRVReopenState state; 1609 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; 1610 } BlockReopenQueueEntry; 1611 1612 /* 1613 * Adds a BlockDriverState to a simple queue for an atomic, transactional 1614 * reopen of multiple devices. 1615 * 1616 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT 1617 * already performed, or alternatively may be NULL a new BlockReopenQueue will 1618 * be created and initialized. This newly created BlockReopenQueue should be 1619 * passed back in for subsequent calls that are intended to be of the same 1620 * atomic 'set'. 1621 * 1622 * bs is the BlockDriverState to add to the reopen queue. 1623 * 1624 * options contains the changed options for the associated bs 1625 * (the BlockReopenQueue takes ownership) 1626 * 1627 * flags contains the open flags for the associated bs 1628 * 1629 * returns a pointer to bs_queue, which is either the newly allocated 1630 * bs_queue, or the existing bs_queue being used. 1631 * 1632 */ 1633 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 1634 BlockDriverState *bs, 1635 QDict *options, int flags) 1636 { 1637 assert(bs != NULL); 1638 1639 BlockReopenQueueEntry *bs_entry; 1640 BdrvChild *child; 1641 QDict *old_options; 1642 1643 if (bs_queue == NULL) { 1644 bs_queue = g_new0(BlockReopenQueue, 1); 1645 QSIMPLEQ_INIT(bs_queue); 1646 } 1647 1648 if (!options) { 1649 options = qdict_new(); 1650 } 1651 1652 old_options = qdict_clone_shallow(bs->options); 1653 qdict_join(options, old_options, false); 1654 QDECREF(old_options); 1655 1656 /* bdrv_open() masks this flag out */ 1657 flags &= ~BDRV_O_PROTOCOL; 1658 1659 QLIST_FOREACH(child, &bs->children, next) { 1660 int child_flags; 1661 1662 if (child->bs->inherits_from != bs) { 1663 continue; 1664 } 1665 1666 child_flags = child->role->inherit_flags(flags); 1667 /* TODO Pass down child flags (backing.*, extents.*, ...) */ 1668 bdrv_reopen_queue(bs_queue, child->bs, NULL, child_flags); 1669 } 1670 1671 bs_entry = g_new0(BlockReopenQueueEntry, 1); 1672 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); 1673 1674 bs_entry->state.bs = bs; 1675 bs_entry->state.options = options; 1676 bs_entry->state.flags = flags; 1677 1678 return bs_queue; 1679 } 1680 1681 /* 1682 * Reopen multiple BlockDriverStates atomically & transactionally. 1683 * 1684 * The queue passed in (bs_queue) must have been built up previous 1685 * via bdrv_reopen_queue(). 1686 * 1687 * Reopens all BDS specified in the queue, with the appropriate 1688 * flags. All devices are prepared for reopen, and failure of any 1689 * device will cause all device changes to be abandonded, and intermediate 1690 * data cleaned up. 1691 * 1692 * If all devices prepare successfully, then the changes are committed 1693 * to all devices. 1694 * 1695 */ 1696 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 1697 { 1698 int ret = -1; 1699 BlockReopenQueueEntry *bs_entry, *next; 1700 Error *local_err = NULL; 1701 1702 assert(bs_queue != NULL); 1703 1704 bdrv_drain_all(); 1705 1706 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1707 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { 1708 error_propagate(errp, local_err); 1709 goto cleanup; 1710 } 1711 bs_entry->prepared = true; 1712 } 1713 1714 /* If we reach this point, we have success and just need to apply the 1715 * changes 1716 */ 1717 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1718 bdrv_reopen_commit(&bs_entry->state); 1719 } 1720 1721 ret = 0; 1722 1723 cleanup: 1724 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 1725 if (ret && bs_entry->prepared) { 1726 bdrv_reopen_abort(&bs_entry->state); 1727 } 1728 QDECREF(bs_entry->state.options); 1729 g_free(bs_entry); 1730 } 1731 g_free(bs_queue); 1732 return ret; 1733 } 1734 1735 1736 /* Reopen a single BlockDriverState with the specified flags. */ 1737 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) 1738 { 1739 int ret = -1; 1740 Error *local_err = NULL; 1741 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags); 1742 1743 ret = bdrv_reopen_multiple(queue, &local_err); 1744 if (local_err != NULL) { 1745 error_propagate(errp, local_err); 1746 } 1747 return ret; 1748 } 1749 1750 1751 /* 1752 * Prepares a BlockDriverState for reopen. All changes are staged in the 1753 * 'opaque' field of the BDRVReopenState, which is used and allocated by 1754 * the block driver layer .bdrv_reopen_prepare() 1755 * 1756 * bs is the BlockDriverState to reopen 1757 * flags are the new open flags 1758 * queue is the reopen queue 1759 * 1760 * Returns 0 on success, non-zero on error. On error errp will be set 1761 * as well. 1762 * 1763 * On failure, bdrv_reopen_abort() will be called to clean up any data. 1764 * It is the responsibility of the caller to then call the abort() or 1765 * commit() for any other BDS that have been left in a prepare() state 1766 * 1767 */ 1768 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, 1769 Error **errp) 1770 { 1771 int ret = -1; 1772 Error *local_err = NULL; 1773 BlockDriver *drv; 1774 1775 assert(reopen_state != NULL); 1776 assert(reopen_state->bs->drv != NULL); 1777 drv = reopen_state->bs->drv; 1778 1779 /* if we are to stay read-only, do not allow permission change 1780 * to r/w */ 1781 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) && 1782 reopen_state->flags & BDRV_O_RDWR) { 1783 error_setg(errp, "Node '%s' is read only", 1784 bdrv_get_device_or_node_name(reopen_state->bs)); 1785 goto error; 1786 } 1787 1788 1789 ret = bdrv_flush(reopen_state->bs); 1790 if (ret) { 1791 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive", 1792 strerror(-ret)); 1793 goto error; 1794 } 1795 1796 if (drv->bdrv_reopen_prepare) { 1797 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 1798 if (ret) { 1799 if (local_err != NULL) { 1800 error_propagate(errp, local_err); 1801 } else { 1802 error_setg(errp, "failed while preparing to reopen image '%s'", 1803 reopen_state->bs->filename); 1804 } 1805 goto error; 1806 } 1807 } else { 1808 /* It is currently mandatory to have a bdrv_reopen_prepare() 1809 * handler for each supported drv. */ 1810 error_setg(errp, "Block format '%s' used by node '%s' " 1811 "does not support reopening files", drv->format_name, 1812 bdrv_get_device_or_node_name(reopen_state->bs)); 1813 ret = -1; 1814 goto error; 1815 } 1816 1817 /* Options that are not handled are only okay if they are unchanged 1818 * compared to the old state. It is expected that some options are only 1819 * used for the initial open, but not reopen (e.g. filename) */ 1820 if (qdict_size(reopen_state->options)) { 1821 const QDictEntry *entry = qdict_first(reopen_state->options); 1822 1823 do { 1824 QString *new_obj = qobject_to_qstring(entry->value); 1825 const char *new = qstring_get_str(new_obj); 1826 const char *old = qdict_get_try_str(reopen_state->bs->options, 1827 entry->key); 1828 1829 if (!old || strcmp(new, old)) { 1830 error_setg(errp, "Cannot change the option '%s'", entry->key); 1831 ret = -EINVAL; 1832 goto error; 1833 } 1834 } while ((entry = qdict_next(reopen_state->options, entry))); 1835 } 1836 1837 ret = 0; 1838 1839 error: 1840 return ret; 1841 } 1842 1843 /* 1844 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 1845 * makes them final by swapping the staging BlockDriverState contents into 1846 * the active BlockDriverState contents. 1847 */ 1848 void bdrv_reopen_commit(BDRVReopenState *reopen_state) 1849 { 1850 BlockDriver *drv; 1851 1852 assert(reopen_state != NULL); 1853 drv = reopen_state->bs->drv; 1854 assert(drv != NULL); 1855 1856 /* If there are any driver level actions to take */ 1857 if (drv->bdrv_reopen_commit) { 1858 drv->bdrv_reopen_commit(reopen_state); 1859 } 1860 1861 /* set BDS specific flags now */ 1862 reopen_state->bs->open_flags = reopen_state->flags; 1863 reopen_state->bs->enable_write_cache = !!(reopen_state->flags & 1864 BDRV_O_CACHE_WB); 1865 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); 1866 1867 bdrv_refresh_limits(reopen_state->bs, NULL); 1868 } 1869 1870 /* 1871 * Abort the reopen, and delete and free the staged changes in 1872 * reopen_state 1873 */ 1874 void bdrv_reopen_abort(BDRVReopenState *reopen_state) 1875 { 1876 BlockDriver *drv; 1877 1878 assert(reopen_state != NULL); 1879 drv = reopen_state->bs->drv; 1880 assert(drv != NULL); 1881 1882 if (drv->bdrv_reopen_abort) { 1883 drv->bdrv_reopen_abort(reopen_state); 1884 } 1885 } 1886 1887 1888 void bdrv_close(BlockDriverState *bs) 1889 { 1890 BdrvAioNotifier *ban, *ban_next; 1891 1892 if (bs->job) { 1893 block_job_cancel_sync(bs->job); 1894 } 1895 1896 /* Disable I/O limits and drain all pending throttled requests */ 1897 if (bs->io_limits_enabled) { 1898 bdrv_io_limits_disable(bs); 1899 } 1900 1901 bdrv_drain(bs); /* complete I/O */ 1902 bdrv_flush(bs); 1903 bdrv_drain(bs); /* in case flush left pending I/O */ 1904 notifier_list_notify(&bs->close_notifiers, bs); 1905 1906 if (bs->drv) { 1907 BdrvChild *child, *next; 1908 1909 bs->drv->bdrv_close(bs); 1910 bs->drv = NULL; 1911 1912 bdrv_set_backing_hd(bs, NULL); 1913 1914 if (bs->file != NULL) { 1915 bdrv_unref_child(bs, bs->file); 1916 bs->file = NULL; 1917 } 1918 1919 QLIST_FOREACH_SAFE(child, &bs->children, next, next) { 1920 /* TODO Remove bdrv_unref() from drivers' close function and use 1921 * bdrv_unref_child() here */ 1922 if (child->bs->inherits_from == bs) { 1923 child->bs->inherits_from = NULL; 1924 } 1925 bdrv_detach_child(child); 1926 } 1927 1928 g_free(bs->opaque); 1929 bs->opaque = NULL; 1930 bs->copy_on_read = 0; 1931 bs->backing_file[0] = '\0'; 1932 bs->backing_format[0] = '\0'; 1933 bs->total_sectors = 0; 1934 bs->encrypted = 0; 1935 bs->valid_key = 0; 1936 bs->sg = 0; 1937 bs->zero_beyond_eof = false; 1938 QDECREF(bs->options); 1939 bs->options = NULL; 1940 QDECREF(bs->full_open_options); 1941 bs->full_open_options = NULL; 1942 } 1943 1944 if (bs->blk) { 1945 blk_dev_change_media_cb(bs->blk, false); 1946 } 1947 1948 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 1949 g_free(ban); 1950 } 1951 QLIST_INIT(&bs->aio_notifiers); 1952 } 1953 1954 void bdrv_close_all(void) 1955 { 1956 BlockDriverState *bs; 1957 1958 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 1959 AioContext *aio_context = bdrv_get_aio_context(bs); 1960 1961 aio_context_acquire(aio_context); 1962 bdrv_close(bs); 1963 aio_context_release(aio_context); 1964 } 1965 } 1966 1967 /* make a BlockDriverState anonymous by removing from bdrv_state and 1968 * graph_bdrv_state list. 1969 Also, NULL terminate the device_name to prevent double remove */ 1970 void bdrv_make_anon(BlockDriverState *bs) 1971 { 1972 /* 1973 * Take care to remove bs from bdrv_states only when it's actually 1974 * in it. Note that bs->device_list.tqe_prev is initially null, 1975 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish 1976 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by 1977 * resetting it to null on remove. 1978 */ 1979 if (bs->device_list.tqe_prev) { 1980 QTAILQ_REMOVE(&bdrv_states, bs, device_list); 1981 bs->device_list.tqe_prev = NULL; 1982 } 1983 if (bs->node_name[0] != '\0') { 1984 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 1985 } 1986 bs->node_name[0] = '\0'; 1987 } 1988 1989 /* Fields that need to stay with the top-level BDS */ 1990 static void bdrv_move_feature_fields(BlockDriverState *bs_dest, 1991 BlockDriverState *bs_src) 1992 { 1993 /* move some fields that need to stay attached to the device */ 1994 1995 /* dev info */ 1996 bs_dest->guest_block_size = bs_src->guest_block_size; 1997 bs_dest->copy_on_read = bs_src->copy_on_read; 1998 1999 bs_dest->enable_write_cache = bs_src->enable_write_cache; 2000 2001 /* r/w error */ 2002 bs_dest->on_read_error = bs_src->on_read_error; 2003 bs_dest->on_write_error = bs_src->on_write_error; 2004 2005 /* i/o status */ 2006 bs_dest->iostatus_enabled = bs_src->iostatus_enabled; 2007 bs_dest->iostatus = bs_src->iostatus; 2008 2009 /* dirty bitmap */ 2010 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps; 2011 } 2012 2013 static void change_parent_backing_link(BlockDriverState *from, 2014 BlockDriverState *to) 2015 { 2016 BdrvChild *c, *next; 2017 2018 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { 2019 assert(c->role != &child_backing); 2020 c->bs = to; 2021 QLIST_REMOVE(c, next_parent); 2022 QLIST_INSERT_HEAD(&to->parents, c, next_parent); 2023 bdrv_ref(to); 2024 bdrv_unref(from); 2025 } 2026 if (from->blk) { 2027 blk_set_bs(from->blk, to); 2028 if (!to->device_list.tqe_prev) { 2029 QTAILQ_INSERT_BEFORE(from, to, device_list); 2030 } 2031 QTAILQ_REMOVE(&bdrv_states, from, device_list); 2032 } 2033 } 2034 2035 static void swap_feature_fields(BlockDriverState *bs_top, 2036 BlockDriverState *bs_new) 2037 { 2038 BlockDriverState tmp; 2039 2040 bdrv_move_feature_fields(&tmp, bs_top); 2041 bdrv_move_feature_fields(bs_top, bs_new); 2042 bdrv_move_feature_fields(bs_new, &tmp); 2043 2044 assert(!bs_new->throttle_state); 2045 if (bs_top->throttle_state) { 2046 assert(bs_top->io_limits_enabled); 2047 bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top)); 2048 bdrv_io_limits_disable(bs_top); 2049 } 2050 } 2051 2052 /* 2053 * Add new bs contents at the top of an image chain while the chain is 2054 * live, while keeping required fields on the top layer. 2055 * 2056 * This will modify the BlockDriverState fields, and swap contents 2057 * between bs_new and bs_top. Both bs_new and bs_top are modified. 2058 * 2059 * bs_new must not be attached to a BlockBackend. 2060 * 2061 * This function does not create any image files. 2062 * 2063 * bdrv_append() takes ownership of a bs_new reference and unrefs it because 2064 * that's what the callers commonly need. bs_new will be referenced by the old 2065 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a 2066 * reference of its own, it must call bdrv_ref(). 2067 */ 2068 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) 2069 { 2070 assert(!bdrv_requests_pending(bs_top)); 2071 assert(!bdrv_requests_pending(bs_new)); 2072 2073 bdrv_ref(bs_top); 2074 change_parent_backing_link(bs_top, bs_new); 2075 2076 /* Some fields always stay on top of the backing file chain */ 2077 swap_feature_fields(bs_top, bs_new); 2078 2079 bdrv_set_backing_hd(bs_new, bs_top); 2080 bdrv_unref(bs_top); 2081 2082 /* bs_new is now referenced by its new parents, we don't need the 2083 * additional reference any more. */ 2084 bdrv_unref(bs_new); 2085 } 2086 2087 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new) 2088 { 2089 assert(!bdrv_requests_pending(old)); 2090 assert(!bdrv_requests_pending(new)); 2091 2092 bdrv_ref(old); 2093 2094 if (old->blk) { 2095 /* As long as these fields aren't in BlockBackend, but in the top-level 2096 * BlockDriverState, it's not possible for a BDS to have two BBs. 2097 * 2098 * We really want to copy the fields from old to new, but we go for a 2099 * swap instead so that pointers aren't duplicated and cause trouble. 2100 * (Also, bdrv_swap() used to do the same.) */ 2101 assert(!new->blk); 2102 swap_feature_fields(old, new); 2103 } 2104 change_parent_backing_link(old, new); 2105 2106 /* Change backing files if a previously independent node is added to the 2107 * chain. For active commit, we replace top by its own (indirect) backing 2108 * file and don't do anything here so we don't build a loop. */ 2109 if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) { 2110 bdrv_set_backing_hd(new, backing_bs(old)); 2111 bdrv_set_backing_hd(old, NULL); 2112 } 2113 2114 bdrv_unref(old); 2115 } 2116 2117 static void bdrv_delete(BlockDriverState *bs) 2118 { 2119 assert(!bs->job); 2120 assert(bdrv_op_blocker_is_empty(bs)); 2121 assert(!bs->refcnt); 2122 assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 2123 2124 bdrv_close(bs); 2125 2126 /* remove from list, if necessary */ 2127 bdrv_make_anon(bs); 2128 2129 g_free(bs); 2130 } 2131 2132 /* 2133 * Run consistency checks on an image 2134 * 2135 * Returns 0 if the check could be completed (it doesn't mean that the image is 2136 * free of errors) or -errno when an internal error occurred. The results of the 2137 * check are stored in res. 2138 */ 2139 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) 2140 { 2141 if (bs->drv == NULL) { 2142 return -ENOMEDIUM; 2143 } 2144 if (bs->drv->bdrv_check == NULL) { 2145 return -ENOTSUP; 2146 } 2147 2148 memset(res, 0, sizeof(*res)); 2149 return bs->drv->bdrv_check(bs, res, fix); 2150 } 2151 2152 #define COMMIT_BUF_SECTORS 2048 2153 2154 /* commit COW file into the raw image */ 2155 int bdrv_commit(BlockDriverState *bs) 2156 { 2157 BlockDriver *drv = bs->drv; 2158 int64_t sector, total_sectors, length, backing_length; 2159 int n, ro, open_flags; 2160 int ret = 0; 2161 uint8_t *buf = NULL; 2162 2163 if (!drv) 2164 return -ENOMEDIUM; 2165 2166 if (!bs->backing) { 2167 return -ENOTSUP; 2168 } 2169 2170 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || 2171 bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { 2172 return -EBUSY; 2173 } 2174 2175 ro = bs->backing->bs->read_only; 2176 open_flags = bs->backing->bs->open_flags; 2177 2178 if (ro) { 2179 if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) { 2180 return -EACCES; 2181 } 2182 } 2183 2184 length = bdrv_getlength(bs); 2185 if (length < 0) { 2186 ret = length; 2187 goto ro_cleanup; 2188 } 2189 2190 backing_length = bdrv_getlength(bs->backing->bs); 2191 if (backing_length < 0) { 2192 ret = backing_length; 2193 goto ro_cleanup; 2194 } 2195 2196 /* If our top snapshot is larger than the backing file image, 2197 * grow the backing file image if possible. If not possible, 2198 * we must return an error */ 2199 if (length > backing_length) { 2200 ret = bdrv_truncate(bs->backing->bs, length); 2201 if (ret < 0) { 2202 goto ro_cleanup; 2203 } 2204 } 2205 2206 total_sectors = length >> BDRV_SECTOR_BITS; 2207 2208 /* qemu_try_blockalign() for bs will choose an alignment that works for 2209 * bs->backing->bs as well, so no need to compare the alignment manually. */ 2210 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 2211 if (buf == NULL) { 2212 ret = -ENOMEM; 2213 goto ro_cleanup; 2214 } 2215 2216 for (sector = 0; sector < total_sectors; sector += n) { 2217 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n); 2218 if (ret < 0) { 2219 goto ro_cleanup; 2220 } 2221 if (ret) { 2222 ret = bdrv_read(bs, sector, buf, n); 2223 if (ret < 0) { 2224 goto ro_cleanup; 2225 } 2226 2227 ret = bdrv_write(bs->backing->bs, sector, buf, n); 2228 if (ret < 0) { 2229 goto ro_cleanup; 2230 } 2231 } 2232 } 2233 2234 if (drv->bdrv_make_empty) { 2235 ret = drv->bdrv_make_empty(bs); 2236 if (ret < 0) { 2237 goto ro_cleanup; 2238 } 2239 bdrv_flush(bs); 2240 } 2241 2242 /* 2243 * Make sure all data we wrote to the backing device is actually 2244 * stable on disk. 2245 */ 2246 if (bs->backing) { 2247 bdrv_flush(bs->backing->bs); 2248 } 2249 2250 ret = 0; 2251 ro_cleanup: 2252 qemu_vfree(buf); 2253 2254 if (ro) { 2255 /* ignoring error return here */ 2256 bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL); 2257 } 2258 2259 return ret; 2260 } 2261 2262 int bdrv_commit_all(void) 2263 { 2264 BlockDriverState *bs; 2265 2266 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 2267 AioContext *aio_context = bdrv_get_aio_context(bs); 2268 2269 aio_context_acquire(aio_context); 2270 if (bs->drv && bs->backing) { 2271 int ret = bdrv_commit(bs); 2272 if (ret < 0) { 2273 aio_context_release(aio_context); 2274 return ret; 2275 } 2276 } 2277 aio_context_release(aio_context); 2278 } 2279 return 0; 2280 } 2281 2282 /* 2283 * Return values: 2284 * 0 - success 2285 * -EINVAL - backing format specified, but no file 2286 * -ENOSPC - can't update the backing file because no space is left in the 2287 * image file header 2288 * -ENOTSUP - format driver doesn't support changing the backing file 2289 */ 2290 int bdrv_change_backing_file(BlockDriverState *bs, 2291 const char *backing_file, const char *backing_fmt) 2292 { 2293 BlockDriver *drv = bs->drv; 2294 int ret; 2295 2296 /* Backing file format doesn't make sense without a backing file */ 2297 if (backing_fmt && !backing_file) { 2298 return -EINVAL; 2299 } 2300 2301 if (drv->bdrv_change_backing_file != NULL) { 2302 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 2303 } else { 2304 ret = -ENOTSUP; 2305 } 2306 2307 if (ret == 0) { 2308 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 2309 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 2310 } 2311 return ret; 2312 } 2313 2314 /* 2315 * Finds the image layer in the chain that has 'bs' as its backing file. 2316 * 2317 * active is the current topmost image. 2318 * 2319 * Returns NULL if bs is not found in active's image chain, 2320 * or if active == bs. 2321 * 2322 * Returns the bottommost base image if bs == NULL. 2323 */ 2324 BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 2325 BlockDriverState *bs) 2326 { 2327 while (active && bs != backing_bs(active)) { 2328 active = backing_bs(active); 2329 } 2330 2331 return active; 2332 } 2333 2334 /* Given a BDS, searches for the base layer. */ 2335 BlockDriverState *bdrv_find_base(BlockDriverState *bs) 2336 { 2337 return bdrv_find_overlay(bs, NULL); 2338 } 2339 2340 /* 2341 * Drops images above 'base' up to and including 'top', and sets the image 2342 * above 'top' to have base as its backing file. 2343 * 2344 * Requires that the overlay to 'top' is opened r/w, so that the backing file 2345 * information in 'bs' can be properly updated. 2346 * 2347 * E.g., this will convert the following chain: 2348 * bottom <- base <- intermediate <- top <- active 2349 * 2350 * to 2351 * 2352 * bottom <- base <- active 2353 * 2354 * It is allowed for bottom==base, in which case it converts: 2355 * 2356 * base <- intermediate <- top <- active 2357 * 2358 * to 2359 * 2360 * base <- active 2361 * 2362 * If backing_file_str is non-NULL, it will be used when modifying top's 2363 * overlay image metadata. 2364 * 2365 * Error conditions: 2366 * if active == top, that is considered an error 2367 * 2368 */ 2369 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, 2370 BlockDriverState *base, const char *backing_file_str) 2371 { 2372 BlockDriverState *new_top_bs = NULL; 2373 int ret = -EIO; 2374 2375 if (!top->drv || !base->drv) { 2376 goto exit; 2377 } 2378 2379 new_top_bs = bdrv_find_overlay(active, top); 2380 2381 if (new_top_bs == NULL) { 2382 /* we could not find the image above 'top', this is an error */ 2383 goto exit; 2384 } 2385 2386 /* special case of new_top_bs->backing->bs already pointing to base - nothing 2387 * to do, no intermediate images */ 2388 if (backing_bs(new_top_bs) == base) { 2389 ret = 0; 2390 goto exit; 2391 } 2392 2393 /* Make sure that base is in the backing chain of top */ 2394 if (!bdrv_chain_contains(top, base)) { 2395 goto exit; 2396 } 2397 2398 /* success - we can delete the intermediate states, and link top->base */ 2399 backing_file_str = backing_file_str ? backing_file_str : base->filename; 2400 ret = bdrv_change_backing_file(new_top_bs, backing_file_str, 2401 base->drv ? base->drv->format_name : ""); 2402 if (ret) { 2403 goto exit; 2404 } 2405 bdrv_set_backing_hd(new_top_bs, base); 2406 2407 ret = 0; 2408 exit: 2409 return ret; 2410 } 2411 2412 /** 2413 * Truncate file to 'offset' bytes (needed only for file protocols) 2414 */ 2415 int bdrv_truncate(BlockDriverState *bs, int64_t offset) 2416 { 2417 BlockDriver *drv = bs->drv; 2418 int ret; 2419 if (!drv) 2420 return -ENOMEDIUM; 2421 if (!drv->bdrv_truncate) 2422 return -ENOTSUP; 2423 if (bs->read_only) 2424 return -EACCES; 2425 2426 ret = drv->bdrv_truncate(bs, offset); 2427 if (ret == 0) { 2428 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 2429 bdrv_dirty_bitmap_truncate(bs); 2430 if (bs->blk) { 2431 blk_dev_resize_cb(bs->blk); 2432 } 2433 } 2434 return ret; 2435 } 2436 2437 /** 2438 * Length of a allocated file in bytes. Sparse files are counted by actual 2439 * allocated space. Return < 0 if error or unknown. 2440 */ 2441 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 2442 { 2443 BlockDriver *drv = bs->drv; 2444 if (!drv) { 2445 return -ENOMEDIUM; 2446 } 2447 if (drv->bdrv_get_allocated_file_size) { 2448 return drv->bdrv_get_allocated_file_size(bs); 2449 } 2450 if (bs->file) { 2451 return bdrv_get_allocated_file_size(bs->file->bs); 2452 } 2453 return -ENOTSUP; 2454 } 2455 2456 /** 2457 * Return number of sectors on success, -errno on error. 2458 */ 2459 int64_t bdrv_nb_sectors(BlockDriverState *bs) 2460 { 2461 BlockDriver *drv = bs->drv; 2462 2463 if (!drv) 2464 return -ENOMEDIUM; 2465 2466 if (drv->has_variable_length) { 2467 int ret = refresh_total_sectors(bs, bs->total_sectors); 2468 if (ret < 0) { 2469 return ret; 2470 } 2471 } 2472 return bs->total_sectors; 2473 } 2474 2475 /** 2476 * Return length in bytes on success, -errno on error. 2477 * The length is always a multiple of BDRV_SECTOR_SIZE. 2478 */ 2479 int64_t bdrv_getlength(BlockDriverState *bs) 2480 { 2481 int64_t ret = bdrv_nb_sectors(bs); 2482 2483 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret; 2484 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE; 2485 } 2486 2487 /* return 0 as number of sectors if no device present or error */ 2488 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 2489 { 2490 int64_t nb_sectors = bdrv_nb_sectors(bs); 2491 2492 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 2493 } 2494 2495 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, 2496 BlockdevOnError on_write_error) 2497 { 2498 bs->on_read_error = on_read_error; 2499 bs->on_write_error = on_write_error; 2500 } 2501 2502 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) 2503 { 2504 return is_read ? bs->on_read_error : bs->on_write_error; 2505 } 2506 2507 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error) 2508 { 2509 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error; 2510 2511 switch (on_err) { 2512 case BLOCKDEV_ON_ERROR_ENOSPC: 2513 return (error == ENOSPC) ? 2514 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; 2515 case BLOCKDEV_ON_ERROR_STOP: 2516 return BLOCK_ERROR_ACTION_STOP; 2517 case BLOCKDEV_ON_ERROR_REPORT: 2518 return BLOCK_ERROR_ACTION_REPORT; 2519 case BLOCKDEV_ON_ERROR_IGNORE: 2520 return BLOCK_ERROR_ACTION_IGNORE; 2521 default: 2522 abort(); 2523 } 2524 } 2525 2526 static void send_qmp_error_event(BlockDriverState *bs, 2527 BlockErrorAction action, 2528 bool is_read, int error) 2529 { 2530 IoOperationType optype; 2531 2532 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; 2533 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action, 2534 bdrv_iostatus_is_enabled(bs), 2535 error == ENOSPC, strerror(error), 2536 &error_abort); 2537 } 2538 2539 /* This is done by device models because, while the block layer knows 2540 * about the error, it does not know whether an operation comes from 2541 * the device or the block layer (from a job, for example). 2542 */ 2543 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, 2544 bool is_read, int error) 2545 { 2546 assert(error >= 0); 2547 2548 if (action == BLOCK_ERROR_ACTION_STOP) { 2549 /* First set the iostatus, so that "info block" returns an iostatus 2550 * that matches the events raised so far (an additional error iostatus 2551 * is fine, but not a lost one). 2552 */ 2553 bdrv_iostatus_set_err(bs, error); 2554 2555 /* Then raise the request to stop the VM and the event. 2556 * qemu_system_vmstop_request_prepare has two effects. First, 2557 * it ensures that the STOP event always comes after the 2558 * BLOCK_IO_ERROR event. Second, it ensures that even if management 2559 * can observe the STOP event and do a "cont" before the STOP 2560 * event is issued, the VM will not stop. In this case, vm_start() 2561 * also ensures that the STOP/RESUME pair of events is emitted. 2562 */ 2563 qemu_system_vmstop_request_prepare(); 2564 send_qmp_error_event(bs, action, is_read, error); 2565 qemu_system_vmstop_request(RUN_STATE_IO_ERROR); 2566 } else { 2567 send_qmp_error_event(bs, action, is_read, error); 2568 } 2569 } 2570 2571 int bdrv_is_read_only(BlockDriverState *bs) 2572 { 2573 return bs->read_only; 2574 } 2575 2576 int bdrv_is_sg(BlockDriverState *bs) 2577 { 2578 return bs->sg; 2579 } 2580 2581 int bdrv_enable_write_cache(BlockDriverState *bs) 2582 { 2583 return bs->enable_write_cache; 2584 } 2585 2586 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce) 2587 { 2588 bs->enable_write_cache = wce; 2589 2590 /* so a reopen() will preserve wce */ 2591 if (wce) { 2592 bs->open_flags |= BDRV_O_CACHE_WB; 2593 } else { 2594 bs->open_flags &= ~BDRV_O_CACHE_WB; 2595 } 2596 } 2597 2598 int bdrv_is_encrypted(BlockDriverState *bs) 2599 { 2600 if (bs->backing && bs->backing->bs->encrypted) { 2601 return 1; 2602 } 2603 return bs->encrypted; 2604 } 2605 2606 int bdrv_key_required(BlockDriverState *bs) 2607 { 2608 BdrvChild *backing = bs->backing; 2609 2610 if (backing && backing->bs->encrypted && !backing->bs->valid_key) { 2611 return 1; 2612 } 2613 return (bs->encrypted && !bs->valid_key); 2614 } 2615 2616 int bdrv_set_key(BlockDriverState *bs, const char *key) 2617 { 2618 int ret; 2619 if (bs->backing && bs->backing->bs->encrypted) { 2620 ret = bdrv_set_key(bs->backing->bs, key); 2621 if (ret < 0) 2622 return ret; 2623 if (!bs->encrypted) 2624 return 0; 2625 } 2626 if (!bs->encrypted) { 2627 return -EINVAL; 2628 } else if (!bs->drv || !bs->drv->bdrv_set_key) { 2629 return -ENOMEDIUM; 2630 } 2631 ret = bs->drv->bdrv_set_key(bs, key); 2632 if (ret < 0) { 2633 bs->valid_key = 0; 2634 } else if (!bs->valid_key) { 2635 bs->valid_key = 1; 2636 if (bs->blk) { 2637 /* call the change callback now, we skipped it on open */ 2638 blk_dev_change_media_cb(bs->blk, true); 2639 } 2640 } 2641 return ret; 2642 } 2643 2644 /* 2645 * Provide an encryption key for @bs. 2646 * If @key is non-null: 2647 * If @bs is not encrypted, fail. 2648 * Else if the key is invalid, fail. 2649 * Else set @bs's key to @key, replacing the existing key, if any. 2650 * If @key is null: 2651 * If @bs is encrypted and still lacks a key, fail. 2652 * Else do nothing. 2653 * On failure, store an error object through @errp if non-null. 2654 */ 2655 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp) 2656 { 2657 if (key) { 2658 if (!bdrv_is_encrypted(bs)) { 2659 error_setg(errp, "Node '%s' is not encrypted", 2660 bdrv_get_device_or_node_name(bs)); 2661 } else if (bdrv_set_key(bs, key) < 0) { 2662 error_setg(errp, QERR_INVALID_PASSWORD); 2663 } 2664 } else { 2665 if (bdrv_key_required(bs)) { 2666 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED, 2667 "'%s' (%s) is encrypted", 2668 bdrv_get_device_or_node_name(bs), 2669 bdrv_get_encrypted_filename(bs)); 2670 } 2671 } 2672 } 2673 2674 const char *bdrv_get_format_name(BlockDriverState *bs) 2675 { 2676 return bs->drv ? bs->drv->format_name : NULL; 2677 } 2678 2679 static int qsort_strcmp(const void *a, const void *b) 2680 { 2681 return strcmp(a, b); 2682 } 2683 2684 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 2685 void *opaque) 2686 { 2687 BlockDriver *drv; 2688 int count = 0; 2689 int i; 2690 const char **formats = NULL; 2691 2692 QLIST_FOREACH(drv, &bdrv_drivers, list) { 2693 if (drv->format_name) { 2694 bool found = false; 2695 int i = count; 2696 while (formats && i && !found) { 2697 found = !strcmp(formats[--i], drv->format_name); 2698 } 2699 2700 if (!found) { 2701 formats = g_renew(const char *, formats, count + 1); 2702 formats[count++] = drv->format_name; 2703 } 2704 } 2705 } 2706 2707 qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 2708 2709 for (i = 0; i < count; i++) { 2710 it(opaque, formats[i]); 2711 } 2712 2713 g_free(formats); 2714 } 2715 2716 /* This function is to find a node in the bs graph */ 2717 BlockDriverState *bdrv_find_node(const char *node_name) 2718 { 2719 BlockDriverState *bs; 2720 2721 assert(node_name); 2722 2723 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 2724 if (!strcmp(node_name, bs->node_name)) { 2725 return bs; 2726 } 2727 } 2728 return NULL; 2729 } 2730 2731 /* Put this QMP function here so it can access the static graph_bdrv_states. */ 2732 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp) 2733 { 2734 BlockDeviceInfoList *list, *entry; 2735 BlockDriverState *bs; 2736 2737 list = NULL; 2738 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 2739 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp); 2740 if (!info) { 2741 qapi_free_BlockDeviceInfoList(list); 2742 return NULL; 2743 } 2744 entry = g_malloc0(sizeof(*entry)); 2745 entry->value = info; 2746 entry->next = list; 2747 list = entry; 2748 } 2749 2750 return list; 2751 } 2752 2753 BlockDriverState *bdrv_lookup_bs(const char *device, 2754 const char *node_name, 2755 Error **errp) 2756 { 2757 BlockBackend *blk; 2758 BlockDriverState *bs; 2759 2760 if (device) { 2761 blk = blk_by_name(device); 2762 2763 if (blk) { 2764 return blk_bs(blk); 2765 } 2766 } 2767 2768 if (node_name) { 2769 bs = bdrv_find_node(node_name); 2770 2771 if (bs) { 2772 return bs; 2773 } 2774 } 2775 2776 error_setg(errp, "Cannot find device=%s nor node_name=%s", 2777 device ? device : "", 2778 node_name ? node_name : ""); 2779 return NULL; 2780 } 2781 2782 /* If 'base' is in the same chain as 'top', return true. Otherwise, 2783 * return false. If either argument is NULL, return false. */ 2784 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 2785 { 2786 while (top && top != base) { 2787 top = backing_bs(top); 2788 } 2789 2790 return top != NULL; 2791 } 2792 2793 BlockDriverState *bdrv_next_node(BlockDriverState *bs) 2794 { 2795 if (!bs) { 2796 return QTAILQ_FIRST(&graph_bdrv_states); 2797 } 2798 return QTAILQ_NEXT(bs, node_list); 2799 } 2800 2801 BlockDriverState *bdrv_next(BlockDriverState *bs) 2802 { 2803 if (!bs) { 2804 return QTAILQ_FIRST(&bdrv_states); 2805 } 2806 return QTAILQ_NEXT(bs, device_list); 2807 } 2808 2809 const char *bdrv_get_node_name(const BlockDriverState *bs) 2810 { 2811 return bs->node_name; 2812 } 2813 2814 /* TODO check what callers really want: bs->node_name or blk_name() */ 2815 const char *bdrv_get_device_name(const BlockDriverState *bs) 2816 { 2817 return bs->blk ? blk_name(bs->blk) : ""; 2818 } 2819 2820 /* This can be used to identify nodes that might not have a device 2821 * name associated. Since node and device names live in the same 2822 * namespace, the result is unambiguous. The exception is if both are 2823 * absent, then this returns an empty (non-null) string. */ 2824 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 2825 { 2826 return bs->blk ? blk_name(bs->blk) : bs->node_name; 2827 } 2828 2829 int bdrv_get_flags(BlockDriverState *bs) 2830 { 2831 return bs->open_flags; 2832 } 2833 2834 int bdrv_has_zero_init_1(BlockDriverState *bs) 2835 { 2836 return 1; 2837 } 2838 2839 int bdrv_has_zero_init(BlockDriverState *bs) 2840 { 2841 assert(bs->drv); 2842 2843 /* If BS is a copy on write image, it is initialized to 2844 the contents of the base image, which may not be zeroes. */ 2845 if (bs->backing) { 2846 return 0; 2847 } 2848 if (bs->drv->bdrv_has_zero_init) { 2849 return bs->drv->bdrv_has_zero_init(bs); 2850 } 2851 2852 /* safe default */ 2853 return 0; 2854 } 2855 2856 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs) 2857 { 2858 BlockDriverInfo bdi; 2859 2860 if (bs->backing) { 2861 return false; 2862 } 2863 2864 if (bdrv_get_info(bs, &bdi) == 0) { 2865 return bdi.unallocated_blocks_are_zero; 2866 } 2867 2868 return false; 2869 } 2870 2871 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 2872 { 2873 BlockDriverInfo bdi; 2874 2875 if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) { 2876 return false; 2877 } 2878 2879 if (bdrv_get_info(bs, &bdi) == 0) { 2880 return bdi.can_write_zeroes_with_unmap; 2881 } 2882 2883 return false; 2884 } 2885 2886 const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 2887 { 2888 if (bs->backing && bs->backing->bs->encrypted) 2889 return bs->backing_file; 2890 else if (bs->encrypted) 2891 return bs->filename; 2892 else 2893 return NULL; 2894 } 2895 2896 void bdrv_get_backing_filename(BlockDriverState *bs, 2897 char *filename, int filename_size) 2898 { 2899 pstrcpy(filename, filename_size, bs->backing_file); 2900 } 2901 2902 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 2903 { 2904 BlockDriver *drv = bs->drv; 2905 if (!drv) 2906 return -ENOMEDIUM; 2907 if (!drv->bdrv_get_info) 2908 return -ENOTSUP; 2909 memset(bdi, 0, sizeof(*bdi)); 2910 return drv->bdrv_get_info(bs, bdi); 2911 } 2912 2913 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs) 2914 { 2915 BlockDriver *drv = bs->drv; 2916 if (drv && drv->bdrv_get_specific_info) { 2917 return drv->bdrv_get_specific_info(bs); 2918 } 2919 return NULL; 2920 } 2921 2922 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) 2923 { 2924 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 2925 return; 2926 } 2927 2928 bs->drv->bdrv_debug_event(bs, event); 2929 } 2930 2931 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 2932 const char *tag) 2933 { 2934 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 2935 bs = bs->file ? bs->file->bs : NULL; 2936 } 2937 2938 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 2939 return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 2940 } 2941 2942 return -ENOTSUP; 2943 } 2944 2945 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 2946 { 2947 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) { 2948 bs = bs->file ? bs->file->bs : NULL; 2949 } 2950 2951 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) { 2952 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 2953 } 2954 2955 return -ENOTSUP; 2956 } 2957 2958 int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 2959 { 2960 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 2961 bs = bs->file ? bs->file->bs : NULL; 2962 } 2963 2964 if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 2965 return bs->drv->bdrv_debug_resume(bs, tag); 2966 } 2967 2968 return -ENOTSUP; 2969 } 2970 2971 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 2972 { 2973 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 2974 bs = bs->file ? bs->file->bs : NULL; 2975 } 2976 2977 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 2978 return bs->drv->bdrv_debug_is_suspended(bs, tag); 2979 } 2980 2981 return false; 2982 } 2983 2984 int bdrv_is_snapshot(BlockDriverState *bs) 2985 { 2986 return !!(bs->open_flags & BDRV_O_SNAPSHOT); 2987 } 2988 2989 /* backing_file can either be relative, or absolute, or a protocol. If it is 2990 * relative, it must be relative to the chain. So, passing in bs->filename 2991 * from a BDS as backing_file should not be done, as that may be relative to 2992 * the CWD rather than the chain. */ 2993 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 2994 const char *backing_file) 2995 { 2996 char *filename_full = NULL; 2997 char *backing_file_full = NULL; 2998 char *filename_tmp = NULL; 2999 int is_protocol = 0; 3000 BlockDriverState *curr_bs = NULL; 3001 BlockDriverState *retval = NULL; 3002 3003 if (!bs || !bs->drv || !backing_file) { 3004 return NULL; 3005 } 3006 3007 filename_full = g_malloc(PATH_MAX); 3008 backing_file_full = g_malloc(PATH_MAX); 3009 filename_tmp = g_malloc(PATH_MAX); 3010 3011 is_protocol = path_has_protocol(backing_file); 3012 3013 for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) { 3014 3015 /* If either of the filename paths is actually a protocol, then 3016 * compare unmodified paths; otherwise make paths relative */ 3017 if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 3018 if (strcmp(backing_file, curr_bs->backing_file) == 0) { 3019 retval = curr_bs->backing->bs; 3020 break; 3021 } 3022 } else { 3023 /* If not an absolute filename path, make it relative to the current 3024 * image's filename path */ 3025 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 3026 backing_file); 3027 3028 /* We are going to compare absolute pathnames */ 3029 if (!realpath(filename_tmp, filename_full)) { 3030 continue; 3031 } 3032 3033 /* We need to make sure the backing filename we are comparing against 3034 * is relative to the current image filename (or absolute) */ 3035 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 3036 curr_bs->backing_file); 3037 3038 if (!realpath(filename_tmp, backing_file_full)) { 3039 continue; 3040 } 3041 3042 if (strcmp(backing_file_full, filename_full) == 0) { 3043 retval = curr_bs->backing->bs; 3044 break; 3045 } 3046 } 3047 } 3048 3049 g_free(filename_full); 3050 g_free(backing_file_full); 3051 g_free(filename_tmp); 3052 return retval; 3053 } 3054 3055 int bdrv_get_backing_file_depth(BlockDriverState *bs) 3056 { 3057 if (!bs->drv) { 3058 return 0; 3059 } 3060 3061 if (!bs->backing) { 3062 return 0; 3063 } 3064 3065 return 1 + bdrv_get_backing_file_depth(bs->backing->bs); 3066 } 3067 3068 void bdrv_init(void) 3069 { 3070 module_call_init(MODULE_INIT_BLOCK); 3071 } 3072 3073 void bdrv_init_with_whitelist(void) 3074 { 3075 use_bdrv_whitelist = 1; 3076 bdrv_init(); 3077 } 3078 3079 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) 3080 { 3081 Error *local_err = NULL; 3082 int ret; 3083 3084 if (!bs->drv) { 3085 return; 3086 } 3087 3088 if (!(bs->open_flags & BDRV_O_INCOMING)) { 3089 return; 3090 } 3091 bs->open_flags &= ~BDRV_O_INCOMING; 3092 3093 if (bs->drv->bdrv_invalidate_cache) { 3094 bs->drv->bdrv_invalidate_cache(bs, &local_err); 3095 } else if (bs->file) { 3096 bdrv_invalidate_cache(bs->file->bs, &local_err); 3097 } 3098 if (local_err) { 3099 error_propagate(errp, local_err); 3100 return; 3101 } 3102 3103 ret = refresh_total_sectors(bs, bs->total_sectors); 3104 if (ret < 0) { 3105 error_setg_errno(errp, -ret, "Could not refresh total sector count"); 3106 return; 3107 } 3108 } 3109 3110 void bdrv_invalidate_cache_all(Error **errp) 3111 { 3112 BlockDriverState *bs; 3113 Error *local_err = NULL; 3114 3115 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 3116 AioContext *aio_context = bdrv_get_aio_context(bs); 3117 3118 aio_context_acquire(aio_context); 3119 bdrv_invalidate_cache(bs, &local_err); 3120 aio_context_release(aio_context); 3121 if (local_err) { 3122 error_propagate(errp, local_err); 3123 return; 3124 } 3125 } 3126 } 3127 3128 /**************************************************************/ 3129 /* removable device support */ 3130 3131 /** 3132 * Return TRUE if the media is present 3133 */ 3134 int bdrv_is_inserted(BlockDriverState *bs) 3135 { 3136 BlockDriver *drv = bs->drv; 3137 3138 if (!drv) 3139 return 0; 3140 if (!drv->bdrv_is_inserted) 3141 return 1; 3142 return drv->bdrv_is_inserted(bs); 3143 } 3144 3145 /** 3146 * Return whether the media changed since the last call to this 3147 * function, or -ENOTSUP if we don't know. Most drivers don't know. 3148 */ 3149 int bdrv_media_changed(BlockDriverState *bs) 3150 { 3151 BlockDriver *drv = bs->drv; 3152 3153 if (drv && drv->bdrv_media_changed) { 3154 return drv->bdrv_media_changed(bs); 3155 } 3156 return -ENOTSUP; 3157 } 3158 3159 /** 3160 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 3161 */ 3162 void bdrv_eject(BlockDriverState *bs, bool eject_flag) 3163 { 3164 BlockDriver *drv = bs->drv; 3165 const char *device_name; 3166 3167 if (drv && drv->bdrv_eject) { 3168 drv->bdrv_eject(bs, eject_flag); 3169 } 3170 3171 device_name = bdrv_get_device_name(bs); 3172 if (device_name[0] != '\0') { 3173 qapi_event_send_device_tray_moved(device_name, 3174 eject_flag, &error_abort); 3175 } 3176 } 3177 3178 /** 3179 * Lock or unlock the media (if it is locked, the user won't be able 3180 * to eject it manually). 3181 */ 3182 void bdrv_lock_medium(BlockDriverState *bs, bool locked) 3183 { 3184 BlockDriver *drv = bs->drv; 3185 3186 trace_bdrv_lock_medium(bs, locked); 3187 3188 if (drv && drv->bdrv_lock_medium) { 3189 drv->bdrv_lock_medium(bs, locked); 3190 } 3191 } 3192 3193 void bdrv_set_guest_block_size(BlockDriverState *bs, int align) 3194 { 3195 bs->guest_block_size = align; 3196 } 3197 3198 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) 3199 { 3200 BdrvDirtyBitmap *bm; 3201 3202 assert(name); 3203 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 3204 if (bm->name && !strcmp(name, bm->name)) { 3205 return bm; 3206 } 3207 } 3208 return NULL; 3209 } 3210 3211 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) 3212 { 3213 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3214 g_free(bitmap->name); 3215 bitmap->name = NULL; 3216 } 3217 3218 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, 3219 uint32_t granularity, 3220 const char *name, 3221 Error **errp) 3222 { 3223 int64_t bitmap_size; 3224 BdrvDirtyBitmap *bitmap; 3225 uint32_t sector_granularity; 3226 3227 assert((granularity & (granularity - 1)) == 0); 3228 3229 if (name && bdrv_find_dirty_bitmap(bs, name)) { 3230 error_setg(errp, "Bitmap already exists: %s", name); 3231 return NULL; 3232 } 3233 sector_granularity = granularity >> BDRV_SECTOR_BITS; 3234 assert(sector_granularity); 3235 bitmap_size = bdrv_nb_sectors(bs); 3236 if (bitmap_size < 0) { 3237 error_setg_errno(errp, -bitmap_size, "could not get length of device"); 3238 errno = -bitmap_size; 3239 return NULL; 3240 } 3241 bitmap = g_new0(BdrvDirtyBitmap, 1); 3242 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); 3243 bitmap->size = bitmap_size; 3244 bitmap->name = g_strdup(name); 3245 bitmap->disabled = false; 3246 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); 3247 return bitmap; 3248 } 3249 3250 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) 3251 { 3252 return bitmap->successor; 3253 } 3254 3255 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) 3256 { 3257 return !(bitmap->disabled || bitmap->successor); 3258 } 3259 3260 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap) 3261 { 3262 if (bdrv_dirty_bitmap_frozen(bitmap)) { 3263 return DIRTY_BITMAP_STATUS_FROZEN; 3264 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) { 3265 return DIRTY_BITMAP_STATUS_DISABLED; 3266 } else { 3267 return DIRTY_BITMAP_STATUS_ACTIVE; 3268 } 3269 } 3270 3271 /** 3272 * Create a successor bitmap destined to replace this bitmap after an operation. 3273 * Requires that the bitmap is not frozen and has no successor. 3274 */ 3275 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, 3276 BdrvDirtyBitmap *bitmap, Error **errp) 3277 { 3278 uint64_t granularity; 3279 BdrvDirtyBitmap *child; 3280 3281 if (bdrv_dirty_bitmap_frozen(bitmap)) { 3282 error_setg(errp, "Cannot create a successor for a bitmap that is " 3283 "currently frozen"); 3284 return -1; 3285 } 3286 assert(!bitmap->successor); 3287 3288 /* Create an anonymous successor */ 3289 granularity = bdrv_dirty_bitmap_granularity(bitmap); 3290 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); 3291 if (!child) { 3292 return -1; 3293 } 3294 3295 /* Successor will be on or off based on our current state. */ 3296 child->disabled = bitmap->disabled; 3297 3298 /* Install the successor and freeze the parent */ 3299 bitmap->successor = child; 3300 return 0; 3301 } 3302 3303 /** 3304 * For a bitmap with a successor, yield our name to the successor, 3305 * delete the old bitmap, and return a handle to the new bitmap. 3306 */ 3307 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, 3308 BdrvDirtyBitmap *bitmap, 3309 Error **errp) 3310 { 3311 char *name; 3312 BdrvDirtyBitmap *successor = bitmap->successor; 3313 3314 if (successor == NULL) { 3315 error_setg(errp, "Cannot relinquish control if " 3316 "there's no successor present"); 3317 return NULL; 3318 } 3319 3320 name = bitmap->name; 3321 bitmap->name = NULL; 3322 successor->name = name; 3323 bitmap->successor = NULL; 3324 bdrv_release_dirty_bitmap(bs, bitmap); 3325 3326 return successor; 3327 } 3328 3329 /** 3330 * In cases of failure where we can no longer safely delete the parent, 3331 * we may wish to re-join the parent and child/successor. 3332 * The merged parent will be un-frozen, but not explicitly re-enabled. 3333 */ 3334 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, 3335 BdrvDirtyBitmap *parent, 3336 Error **errp) 3337 { 3338 BdrvDirtyBitmap *successor = parent->successor; 3339 3340 if (!successor) { 3341 error_setg(errp, "Cannot reclaim a successor when none is present"); 3342 return NULL; 3343 } 3344 3345 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) { 3346 error_setg(errp, "Merging of parent and successor bitmap failed"); 3347 return NULL; 3348 } 3349 bdrv_release_dirty_bitmap(bs, successor); 3350 parent->successor = NULL; 3351 3352 return parent; 3353 } 3354 3355 /** 3356 * Truncates _all_ bitmaps attached to a BDS. 3357 */ 3358 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) 3359 { 3360 BdrvDirtyBitmap *bitmap; 3361 uint64_t size = bdrv_nb_sectors(bs); 3362 3363 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 3364 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3365 hbitmap_truncate(bitmap->bitmap, size); 3366 bitmap->size = size; 3367 } 3368 } 3369 3370 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) 3371 { 3372 BdrvDirtyBitmap *bm, *next; 3373 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { 3374 if (bm == bitmap) { 3375 assert(!bdrv_dirty_bitmap_frozen(bm)); 3376 QLIST_REMOVE(bitmap, list); 3377 hbitmap_free(bitmap->bitmap); 3378 g_free(bitmap->name); 3379 g_free(bitmap); 3380 return; 3381 } 3382 } 3383 } 3384 3385 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 3386 { 3387 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3388 bitmap->disabled = true; 3389 } 3390 3391 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 3392 { 3393 assert(!bdrv_dirty_bitmap_frozen(bitmap)); 3394 bitmap->disabled = false; 3395 } 3396 3397 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) 3398 { 3399 BdrvDirtyBitmap *bm; 3400 BlockDirtyInfoList *list = NULL; 3401 BlockDirtyInfoList **plist = &list; 3402 3403 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 3404 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); 3405 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); 3406 info->count = bdrv_get_dirty_count(bm); 3407 info->granularity = bdrv_dirty_bitmap_granularity(bm); 3408 info->has_name = !!bm->name; 3409 info->name = g_strdup(bm->name); 3410 info->status = bdrv_dirty_bitmap_status(bm); 3411 entry->value = info; 3412 *plist = entry; 3413 plist = &entry->next; 3414 } 3415 3416 return list; 3417 } 3418 3419 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector) 3420 { 3421 if (bitmap) { 3422 return hbitmap_get(bitmap->bitmap, sector); 3423 } else { 3424 return 0; 3425 } 3426 } 3427 3428 /** 3429 * Chooses a default granularity based on the existing cluster size, 3430 * but clamped between [4K, 64K]. Defaults to 64K in the case that there 3431 * is no cluster size information available. 3432 */ 3433 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) 3434 { 3435 BlockDriverInfo bdi; 3436 uint32_t granularity; 3437 3438 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) { 3439 granularity = MAX(4096, bdi.cluster_size); 3440 granularity = MIN(65536, granularity); 3441 } else { 3442 granularity = 65536; 3443 } 3444 3445 return granularity; 3446 } 3447 3448 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) 3449 { 3450 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); 3451 } 3452 3453 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) 3454 { 3455 hbitmap_iter_init(hbi, bitmap->bitmap, 0); 3456 } 3457 3458 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, 3459 int64_t cur_sector, int nr_sectors) 3460 { 3461 assert(bdrv_dirty_bitmap_enabled(bitmap)); 3462 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 3463 } 3464 3465 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, 3466 int64_t cur_sector, int nr_sectors) 3467 { 3468 assert(bdrv_dirty_bitmap_enabled(bitmap)); 3469 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 3470 } 3471 3472 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap) 3473 { 3474 assert(bdrv_dirty_bitmap_enabled(bitmap)); 3475 hbitmap_reset_all(bitmap->bitmap); 3476 } 3477 3478 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, 3479 int nr_sectors) 3480 { 3481 BdrvDirtyBitmap *bitmap; 3482 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 3483 if (!bdrv_dirty_bitmap_enabled(bitmap)) { 3484 continue; 3485 } 3486 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 3487 } 3488 } 3489 3490 /** 3491 * Advance an HBitmapIter to an arbitrary offset. 3492 */ 3493 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) 3494 { 3495 assert(hbi->hb); 3496 hbitmap_iter_init(hbi, hbi->hb, offset); 3497 } 3498 3499 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) 3500 { 3501 return hbitmap_count(bitmap->bitmap); 3502 } 3503 3504 /* Get a reference to bs */ 3505 void bdrv_ref(BlockDriverState *bs) 3506 { 3507 bs->refcnt++; 3508 } 3509 3510 /* Release a previously grabbed reference to bs. 3511 * If after releasing, reference count is zero, the BlockDriverState is 3512 * deleted. */ 3513 void bdrv_unref(BlockDriverState *bs) 3514 { 3515 if (!bs) { 3516 return; 3517 } 3518 assert(bs->refcnt > 0); 3519 if (--bs->refcnt == 0) { 3520 bdrv_delete(bs); 3521 } 3522 } 3523 3524 struct BdrvOpBlocker { 3525 Error *reason; 3526 QLIST_ENTRY(BdrvOpBlocker) list; 3527 }; 3528 3529 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 3530 { 3531 BdrvOpBlocker *blocker; 3532 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3533 if (!QLIST_EMPTY(&bs->op_blockers[op])) { 3534 blocker = QLIST_FIRST(&bs->op_blockers[op]); 3535 if (errp) { 3536 error_setg(errp, "Node '%s' is busy: %s", 3537 bdrv_get_device_or_node_name(bs), 3538 error_get_pretty(blocker->reason)); 3539 } 3540 return true; 3541 } 3542 return false; 3543 } 3544 3545 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 3546 { 3547 BdrvOpBlocker *blocker; 3548 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3549 3550 blocker = g_new0(BdrvOpBlocker, 1); 3551 blocker->reason = reason; 3552 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 3553 } 3554 3555 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 3556 { 3557 BdrvOpBlocker *blocker, *next; 3558 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 3559 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 3560 if (blocker->reason == reason) { 3561 QLIST_REMOVE(blocker, list); 3562 g_free(blocker); 3563 } 3564 } 3565 } 3566 3567 void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 3568 { 3569 int i; 3570 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3571 bdrv_op_block(bs, i, reason); 3572 } 3573 } 3574 3575 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 3576 { 3577 int i; 3578 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3579 bdrv_op_unblock(bs, i, reason); 3580 } 3581 } 3582 3583 bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 3584 { 3585 int i; 3586 3587 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 3588 if (!QLIST_EMPTY(&bs->op_blockers[i])) { 3589 return false; 3590 } 3591 } 3592 return true; 3593 } 3594 3595 void bdrv_iostatus_enable(BlockDriverState *bs) 3596 { 3597 bs->iostatus_enabled = true; 3598 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 3599 } 3600 3601 /* The I/O status is only enabled if the drive explicitly 3602 * enables it _and_ the VM is configured to stop on errors */ 3603 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs) 3604 { 3605 return (bs->iostatus_enabled && 3606 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || 3607 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP || 3608 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP)); 3609 } 3610 3611 void bdrv_iostatus_disable(BlockDriverState *bs) 3612 { 3613 bs->iostatus_enabled = false; 3614 } 3615 3616 void bdrv_iostatus_reset(BlockDriverState *bs) 3617 { 3618 if (bdrv_iostatus_is_enabled(bs)) { 3619 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 3620 if (bs->job) { 3621 block_job_iostatus_reset(bs->job); 3622 } 3623 } 3624 } 3625 3626 void bdrv_iostatus_set_err(BlockDriverState *bs, int error) 3627 { 3628 assert(bdrv_iostatus_is_enabled(bs)); 3629 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { 3630 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : 3631 BLOCK_DEVICE_IO_STATUS_FAILED; 3632 } 3633 } 3634 3635 void bdrv_img_create(const char *filename, const char *fmt, 3636 const char *base_filename, const char *base_fmt, 3637 char *options, uint64_t img_size, int flags, 3638 Error **errp, bool quiet) 3639 { 3640 QemuOptsList *create_opts = NULL; 3641 QemuOpts *opts = NULL; 3642 const char *backing_fmt, *backing_file; 3643 int64_t size; 3644 BlockDriver *drv, *proto_drv; 3645 Error *local_err = NULL; 3646 int ret = 0; 3647 3648 /* Find driver and parse its options */ 3649 drv = bdrv_find_format(fmt); 3650 if (!drv) { 3651 error_setg(errp, "Unknown file format '%s'", fmt); 3652 return; 3653 } 3654 3655 proto_drv = bdrv_find_protocol(filename, true, errp); 3656 if (!proto_drv) { 3657 return; 3658 } 3659 3660 if (!drv->create_opts) { 3661 error_setg(errp, "Format driver '%s' does not support image creation", 3662 drv->format_name); 3663 return; 3664 } 3665 3666 if (!proto_drv->create_opts) { 3667 error_setg(errp, "Protocol driver '%s' does not support image creation", 3668 proto_drv->format_name); 3669 return; 3670 } 3671 3672 create_opts = qemu_opts_append(create_opts, drv->create_opts); 3673 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 3674 3675 /* Create parameter list with default values */ 3676 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 3677 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 3678 3679 /* Parse -o options */ 3680 if (options) { 3681 qemu_opts_do_parse(opts, options, NULL, &local_err); 3682 if (local_err) { 3683 error_report_err(local_err); 3684 local_err = NULL; 3685 error_setg(errp, "Invalid options for file format '%s'", fmt); 3686 goto out; 3687 } 3688 } 3689 3690 if (base_filename) { 3691 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err); 3692 if (local_err) { 3693 error_setg(errp, "Backing file not supported for file format '%s'", 3694 fmt); 3695 goto out; 3696 } 3697 } 3698 3699 if (base_fmt) { 3700 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err); 3701 if (local_err) { 3702 error_setg(errp, "Backing file format not supported for file " 3703 "format '%s'", fmt); 3704 goto out; 3705 } 3706 } 3707 3708 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 3709 if (backing_file) { 3710 if (!strcmp(filename, backing_file)) { 3711 error_setg(errp, "Error: Trying to create an image with the " 3712 "same filename as the backing file"); 3713 goto out; 3714 } 3715 } 3716 3717 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 3718 3719 // The size for the image must always be specified, with one exception: 3720 // If we are using a backing file, we can obtain the size from there 3721 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 3722 if (size == -1) { 3723 if (backing_file) { 3724 BlockDriverState *bs; 3725 char *full_backing = g_new0(char, PATH_MAX); 3726 int64_t size; 3727 int back_flags; 3728 QDict *backing_options = NULL; 3729 3730 bdrv_get_full_backing_filename_from_filename(filename, backing_file, 3731 full_backing, PATH_MAX, 3732 &local_err); 3733 if (local_err) { 3734 g_free(full_backing); 3735 goto out; 3736 } 3737 3738 /* backing files always opened read-only */ 3739 back_flags = 3740 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 3741 3742 if (backing_fmt) { 3743 backing_options = qdict_new(); 3744 qdict_put(backing_options, "driver", 3745 qstring_from_str(backing_fmt)); 3746 } 3747 3748 bs = NULL; 3749 ret = bdrv_open(&bs, full_backing, NULL, backing_options, 3750 back_flags, &local_err); 3751 g_free(full_backing); 3752 if (ret < 0) { 3753 goto out; 3754 } 3755 size = bdrv_getlength(bs); 3756 if (size < 0) { 3757 error_setg_errno(errp, -size, "Could not get size of '%s'", 3758 backing_file); 3759 bdrv_unref(bs); 3760 goto out; 3761 } 3762 3763 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 3764 3765 bdrv_unref(bs); 3766 } else { 3767 error_setg(errp, "Image creation needs a size parameter"); 3768 goto out; 3769 } 3770 } 3771 3772 if (!quiet) { 3773 printf("Formatting '%s', fmt=%s ", filename, fmt); 3774 qemu_opts_print(opts, " "); 3775 puts(""); 3776 } 3777 3778 ret = bdrv_create(drv, filename, opts, &local_err); 3779 3780 if (ret == -EFBIG) { 3781 /* This is generally a better message than whatever the driver would 3782 * deliver (especially because of the cluster_size_hint), since that 3783 * is most probably not much different from "image too large". */ 3784 const char *cluster_size_hint = ""; 3785 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 3786 cluster_size_hint = " (try using a larger cluster size)"; 3787 } 3788 error_setg(errp, "The image size is too large for file format '%s'" 3789 "%s", fmt, cluster_size_hint); 3790 error_free(local_err); 3791 local_err = NULL; 3792 } 3793 3794 out: 3795 qemu_opts_del(opts); 3796 qemu_opts_free(create_opts); 3797 if (local_err) { 3798 error_propagate(errp, local_err); 3799 } 3800 } 3801 3802 AioContext *bdrv_get_aio_context(BlockDriverState *bs) 3803 { 3804 return bs->aio_context; 3805 } 3806 3807 void bdrv_detach_aio_context(BlockDriverState *bs) 3808 { 3809 BdrvAioNotifier *baf; 3810 3811 if (!bs->drv) { 3812 return; 3813 } 3814 3815 QLIST_FOREACH(baf, &bs->aio_notifiers, list) { 3816 baf->detach_aio_context(baf->opaque); 3817 } 3818 3819 if (bs->io_limits_enabled) { 3820 throttle_timers_detach_aio_context(&bs->throttle_timers); 3821 } 3822 if (bs->drv->bdrv_detach_aio_context) { 3823 bs->drv->bdrv_detach_aio_context(bs); 3824 } 3825 if (bs->file) { 3826 bdrv_detach_aio_context(bs->file->bs); 3827 } 3828 if (bs->backing) { 3829 bdrv_detach_aio_context(bs->backing->bs); 3830 } 3831 3832 bs->aio_context = NULL; 3833 } 3834 3835 void bdrv_attach_aio_context(BlockDriverState *bs, 3836 AioContext *new_context) 3837 { 3838 BdrvAioNotifier *ban; 3839 3840 if (!bs->drv) { 3841 return; 3842 } 3843 3844 bs->aio_context = new_context; 3845 3846 if (bs->backing) { 3847 bdrv_attach_aio_context(bs->backing->bs, new_context); 3848 } 3849 if (bs->file) { 3850 bdrv_attach_aio_context(bs->file->bs, new_context); 3851 } 3852 if (bs->drv->bdrv_attach_aio_context) { 3853 bs->drv->bdrv_attach_aio_context(bs, new_context); 3854 } 3855 if (bs->io_limits_enabled) { 3856 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context); 3857 } 3858 3859 QLIST_FOREACH(ban, &bs->aio_notifiers, list) { 3860 ban->attached_aio_context(new_context, ban->opaque); 3861 } 3862 } 3863 3864 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) 3865 { 3866 bdrv_drain(bs); /* ensure there are no in-flight requests */ 3867 3868 bdrv_detach_aio_context(bs); 3869 3870 /* This function executes in the old AioContext so acquire the new one in 3871 * case it runs in a different thread. 3872 */ 3873 aio_context_acquire(new_context); 3874 bdrv_attach_aio_context(bs, new_context); 3875 aio_context_release(new_context); 3876 } 3877 3878 void bdrv_add_aio_context_notifier(BlockDriverState *bs, 3879 void (*attached_aio_context)(AioContext *new_context, void *opaque), 3880 void (*detach_aio_context)(void *opaque), void *opaque) 3881 { 3882 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 3883 *ban = (BdrvAioNotifier){ 3884 .attached_aio_context = attached_aio_context, 3885 .detach_aio_context = detach_aio_context, 3886 .opaque = opaque 3887 }; 3888 3889 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 3890 } 3891 3892 void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 3893 void (*attached_aio_context)(AioContext *, 3894 void *), 3895 void (*detach_aio_context)(void *), 3896 void *opaque) 3897 { 3898 BdrvAioNotifier *ban, *ban_next; 3899 3900 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 3901 if (ban->attached_aio_context == attached_aio_context && 3902 ban->detach_aio_context == detach_aio_context && 3903 ban->opaque == opaque) 3904 { 3905 QLIST_REMOVE(ban, list); 3906 g_free(ban); 3907 3908 return; 3909 } 3910 } 3911 3912 abort(); 3913 } 3914 3915 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 3916 BlockDriverAmendStatusCB *status_cb) 3917 { 3918 if (!bs->drv->bdrv_amend_options) { 3919 return -ENOTSUP; 3920 } 3921 return bs->drv->bdrv_amend_options(bs, opts, status_cb); 3922 } 3923 3924 /* This function will be called by the bdrv_recurse_is_first_non_filter method 3925 * of block filter and by bdrv_is_first_non_filter. 3926 * It is used to test if the given bs is the candidate or recurse more in the 3927 * node graph. 3928 */ 3929 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, 3930 BlockDriverState *candidate) 3931 { 3932 /* return false if basic checks fails */ 3933 if (!bs || !bs->drv) { 3934 return false; 3935 } 3936 3937 /* the code reached a non block filter driver -> check if the bs is 3938 * the same as the candidate. It's the recursion termination condition. 3939 */ 3940 if (!bs->drv->is_filter) { 3941 return bs == candidate; 3942 } 3943 /* Down this path the driver is a block filter driver */ 3944 3945 /* If the block filter recursion method is defined use it to recurse down 3946 * the node graph. 3947 */ 3948 if (bs->drv->bdrv_recurse_is_first_non_filter) { 3949 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); 3950 } 3951 3952 /* the driver is a block filter but don't allow to recurse -> return false 3953 */ 3954 return false; 3955 } 3956 3957 /* This function checks if the candidate is the first non filter bs down it's 3958 * bs chain. Since we don't have pointers to parents it explore all bs chains 3959 * from the top. Some filters can choose not to pass down the recursion. 3960 */ 3961 bool bdrv_is_first_non_filter(BlockDriverState *candidate) 3962 { 3963 BlockDriverState *bs; 3964 3965 /* walk down the bs forest recursively */ 3966 QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 3967 bool perm; 3968 3969 /* try to recurse in this top level bs */ 3970 perm = bdrv_recurse_is_first_non_filter(bs, candidate); 3971 3972 /* candidate is the first non filter */ 3973 if (perm) { 3974 return true; 3975 } 3976 } 3977 3978 return false; 3979 } 3980 3981 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, 3982 const char *node_name, Error **errp) 3983 { 3984 BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 3985 AioContext *aio_context; 3986 3987 if (!to_replace_bs) { 3988 error_setg(errp, "Node name '%s' not found", node_name); 3989 return NULL; 3990 } 3991 3992 aio_context = bdrv_get_aio_context(to_replace_bs); 3993 aio_context_acquire(aio_context); 3994 3995 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 3996 to_replace_bs = NULL; 3997 goto out; 3998 } 3999 4000 /* We don't want arbitrary node of the BDS chain to be replaced only the top 4001 * most non filter in order to prevent data corruption. 4002 * Another benefit is that this tests exclude backing files which are 4003 * blocked by the backing blockers. 4004 */ 4005 if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) { 4006 error_setg(errp, "Only top most non filter can be replaced"); 4007 to_replace_bs = NULL; 4008 goto out; 4009 } 4010 4011 out: 4012 aio_context_release(aio_context); 4013 return to_replace_bs; 4014 } 4015 4016 static bool append_open_options(QDict *d, BlockDriverState *bs) 4017 { 4018 const QDictEntry *entry; 4019 bool found_any = false; 4020 4021 for (entry = qdict_first(bs->options); entry; 4022 entry = qdict_next(bs->options, entry)) 4023 { 4024 /* Only take options for this level and exclude all non-driver-specific 4025 * options */ 4026 if (!strchr(qdict_entry_key(entry), '.') && 4027 strcmp(qdict_entry_key(entry), "node-name")) 4028 { 4029 qobject_incref(qdict_entry_value(entry)); 4030 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry)); 4031 found_any = true; 4032 } 4033 } 4034 4035 return found_any; 4036 } 4037 4038 /* Updates the following BDS fields: 4039 * - exact_filename: A filename which may be used for opening a block device 4040 * which (mostly) equals the given BDS (even without any 4041 * other options; so reading and writing must return the same 4042 * results, but caching etc. may be different) 4043 * - full_open_options: Options which, when given when opening a block device 4044 * (without a filename), result in a BDS (mostly) 4045 * equalling the given one 4046 * - filename: If exact_filename is set, it is copied here. Otherwise, 4047 * full_open_options is converted to a JSON object, prefixed with 4048 * "json:" (for use through the JSON pseudo protocol) and put here. 4049 */ 4050 void bdrv_refresh_filename(BlockDriverState *bs) 4051 { 4052 BlockDriver *drv = bs->drv; 4053 QDict *opts; 4054 4055 if (!drv) { 4056 return; 4057 } 4058 4059 /* This BDS's file name will most probably depend on its file's name, so 4060 * refresh that first */ 4061 if (bs->file) { 4062 bdrv_refresh_filename(bs->file->bs); 4063 } 4064 4065 if (drv->bdrv_refresh_filename) { 4066 /* Obsolete information is of no use here, so drop the old file name 4067 * information before refreshing it */ 4068 bs->exact_filename[0] = '\0'; 4069 if (bs->full_open_options) { 4070 QDECREF(bs->full_open_options); 4071 bs->full_open_options = NULL; 4072 } 4073 4074 drv->bdrv_refresh_filename(bs); 4075 } else if (bs->file) { 4076 /* Try to reconstruct valid information from the underlying file */ 4077 bool has_open_options; 4078 4079 bs->exact_filename[0] = '\0'; 4080 if (bs->full_open_options) { 4081 QDECREF(bs->full_open_options); 4082 bs->full_open_options = NULL; 4083 } 4084 4085 opts = qdict_new(); 4086 has_open_options = append_open_options(opts, bs); 4087 4088 /* If no specific options have been given for this BDS, the filename of 4089 * the underlying file should suffice for this one as well */ 4090 if (bs->file->bs->exact_filename[0] && !has_open_options) { 4091 strcpy(bs->exact_filename, bs->file->bs->exact_filename); 4092 } 4093 /* Reconstructing the full options QDict is simple for most format block 4094 * drivers, as long as the full options are known for the underlying 4095 * file BDS. The full options QDict of that file BDS should somehow 4096 * contain a representation of the filename, therefore the following 4097 * suffices without querying the (exact_)filename of this BDS. */ 4098 if (bs->file->bs->full_open_options) { 4099 qdict_put_obj(opts, "driver", 4100 QOBJECT(qstring_from_str(drv->format_name))); 4101 QINCREF(bs->file->bs->full_open_options); 4102 qdict_put_obj(opts, "file", 4103 QOBJECT(bs->file->bs->full_open_options)); 4104 4105 bs->full_open_options = opts; 4106 } else { 4107 QDECREF(opts); 4108 } 4109 } else if (!bs->full_open_options && qdict_size(bs->options)) { 4110 /* There is no underlying file BDS (at least referenced by BDS.file), 4111 * so the full options QDict should be equal to the options given 4112 * specifically for this block device when it was opened (plus the 4113 * driver specification). 4114 * Because those options don't change, there is no need to update 4115 * full_open_options when it's already set. */ 4116 4117 opts = qdict_new(); 4118 append_open_options(opts, bs); 4119 qdict_put_obj(opts, "driver", 4120 QOBJECT(qstring_from_str(drv->format_name))); 4121 4122 if (bs->exact_filename[0]) { 4123 /* This may not work for all block protocol drivers (some may 4124 * require this filename to be parsed), but we have to find some 4125 * default solution here, so just include it. If some block driver 4126 * does not support pure options without any filename at all or 4127 * needs some special format of the options QDict, it needs to 4128 * implement the driver-specific bdrv_refresh_filename() function. 4129 */ 4130 qdict_put_obj(opts, "filename", 4131 QOBJECT(qstring_from_str(bs->exact_filename))); 4132 } 4133 4134 bs->full_open_options = opts; 4135 } 4136 4137 if (bs->exact_filename[0]) { 4138 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 4139 } else if (bs->full_open_options) { 4140 QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 4141 snprintf(bs->filename, sizeof(bs->filename), "json:%s", 4142 qstring_get_str(json)); 4143 QDECREF(json); 4144 } 4145 } 4146 4147 /* This accessor function purpose is to allow the device models to access the 4148 * BlockAcctStats structure embedded inside a BlockDriverState without being 4149 * aware of the BlockDriverState structure layout. 4150 * It will go away when the BlockAcctStats structure will be moved inside 4151 * the device models. 4152 */ 4153 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs) 4154 { 4155 return &bs->stats; 4156 } 4157