1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "config-host.h" 25 #include "qemu-common.h" 26 #include "trace.h" 27 #include "monitor/monitor.h" 28 #include "block/block_int.h" 29 #include "block/blockjob.h" 30 #include "qemu/module.h" 31 #include "qapi/qmp/qjson.h" 32 #include "sysemu/sysemu.h" 33 #include "qemu/notify.h" 34 #include "block/coroutine.h" 35 #include "qmp-commands.h" 36 #include "qemu/timer.h" 37 38 #ifdef CONFIG_BSD 39 #include <sys/types.h> 40 #include <sys/stat.h> 41 #include <sys/ioctl.h> 42 #include <sys/queue.h> 43 #ifndef __DragonFly__ 44 #include <sys/disk.h> 45 #endif 46 #endif 47 48 #ifdef _WIN32 49 #include <windows.h> 50 #endif 51 52 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 53 54 typedef enum { 55 BDRV_REQ_COPY_ON_READ = 0x1, 56 BDRV_REQ_ZERO_WRITE = 0x2, 57 } BdrvRequestFlags; 58 59 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load); 60 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 62 BlockDriverCompletionFunc *cb, void *opaque); 63 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 64 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 65 BlockDriverCompletionFunc *cb, void *opaque); 66 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 67 int64_t sector_num, int nb_sectors, 68 QEMUIOVector *iov); 69 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 70 int64_t sector_num, int nb_sectors, 71 QEMUIOVector *iov); 72 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, 73 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, 74 BdrvRequestFlags flags); 75 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, 76 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, 77 BdrvRequestFlags flags); 78 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, 79 int64_t sector_num, 80 QEMUIOVector *qiov, 81 int nb_sectors, 82 BlockDriverCompletionFunc *cb, 83 void *opaque, 84 bool is_write); 85 static void coroutine_fn bdrv_co_do_rw(void *opaque); 86 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, 87 int64_t sector_num, int nb_sectors); 88 89 static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, 90 bool is_write, double elapsed_time, uint64_t *wait); 91 static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, 92 double elapsed_time, uint64_t *wait); 93 static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, 94 bool is_write, int64_t *wait); 95 96 static QTAILQ_HEAD(, BlockDriverState) bdrv_states = 97 QTAILQ_HEAD_INITIALIZER(bdrv_states); 98 99 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 100 QLIST_HEAD_INITIALIZER(bdrv_drivers); 101 102 /* The device to use for VM snapshots */ 103 static BlockDriverState *bs_snapshots; 104 105 /* If non-zero, use only whitelisted block drivers */ 106 static int use_bdrv_whitelist; 107 108 #ifdef _WIN32 109 static int is_windows_drive_prefix(const char *filename) 110 { 111 return (((filename[0] >= 'a' && filename[0] <= 'z') || 112 (filename[0] >= 'A' && filename[0] <= 'Z')) && 113 filename[1] == ':'); 114 } 115 116 int is_windows_drive(const char *filename) 117 { 118 if (is_windows_drive_prefix(filename) && 119 filename[2] == '\0') 120 return 1; 121 if (strstart(filename, "\\\\.\\", NULL) || 122 strstart(filename, "//./", NULL)) 123 return 1; 124 return 0; 125 } 126 #endif 127 128 /* throttling disk I/O limits */ 129 void bdrv_io_limits_disable(BlockDriverState *bs) 130 { 131 bs->io_limits_enabled = false; 132 133 while (qemu_co_queue_next(&bs->throttled_reqs)); 134 135 if (bs->block_timer) { 136 qemu_del_timer(bs->block_timer); 137 qemu_free_timer(bs->block_timer); 138 bs->block_timer = NULL; 139 } 140 141 bs->slice_start = 0; 142 bs->slice_end = 0; 143 } 144 145 static void bdrv_block_timer(void *opaque) 146 { 147 BlockDriverState *bs = opaque; 148 149 qemu_co_queue_next(&bs->throttled_reqs); 150 } 151 152 void bdrv_io_limits_enable(BlockDriverState *bs) 153 { 154 qemu_co_queue_init(&bs->throttled_reqs); 155 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs); 156 bs->io_limits_enabled = true; 157 } 158 159 bool bdrv_io_limits_enabled(BlockDriverState *bs) 160 { 161 BlockIOLimit *io_limits = &bs->io_limits; 162 return io_limits->bps[BLOCK_IO_LIMIT_READ] 163 || io_limits->bps[BLOCK_IO_LIMIT_WRITE] 164 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL] 165 || io_limits->iops[BLOCK_IO_LIMIT_READ] 166 || io_limits->iops[BLOCK_IO_LIMIT_WRITE] 167 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL]; 168 } 169 170 static void bdrv_io_limits_intercept(BlockDriverState *bs, 171 bool is_write, int nb_sectors) 172 { 173 int64_t wait_time = -1; 174 175 if (!qemu_co_queue_empty(&bs->throttled_reqs)) { 176 qemu_co_queue_wait(&bs->throttled_reqs); 177 } 178 179 /* In fact, we hope to keep each request's timing, in FIFO mode. The next 180 * throttled requests will not be dequeued until the current request is 181 * allowed to be serviced. So if the current request still exceeds the 182 * limits, it will be inserted to the head. All requests followed it will 183 * be still in throttled_reqs queue. 184 */ 185 186 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) { 187 qemu_mod_timer(bs->block_timer, 188 wait_time + qemu_get_clock_ns(vm_clock)); 189 qemu_co_queue_wait_insert_head(&bs->throttled_reqs); 190 } 191 192 qemu_co_queue_next(&bs->throttled_reqs); 193 } 194 195 /* check if the path starts with "<protocol>:" */ 196 static int path_has_protocol(const char *path) 197 { 198 const char *p; 199 200 #ifdef _WIN32 201 if (is_windows_drive(path) || 202 is_windows_drive_prefix(path)) { 203 return 0; 204 } 205 p = path + strcspn(path, ":/\\"); 206 #else 207 p = path + strcspn(path, ":/"); 208 #endif 209 210 return *p == ':'; 211 } 212 213 int path_is_absolute(const char *path) 214 { 215 #ifdef _WIN32 216 /* specific case for names like: "\\.\d:" */ 217 if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 218 return 1; 219 } 220 return (*path == '/' || *path == '\\'); 221 #else 222 return (*path == '/'); 223 #endif 224 } 225 226 /* if filename is absolute, just copy it to dest. Otherwise, build a 227 path to it by considering it is relative to base_path. URL are 228 supported. */ 229 void path_combine(char *dest, int dest_size, 230 const char *base_path, 231 const char *filename) 232 { 233 const char *p, *p1; 234 int len; 235 236 if (dest_size <= 0) 237 return; 238 if (path_is_absolute(filename)) { 239 pstrcpy(dest, dest_size, filename); 240 } else { 241 p = strchr(base_path, ':'); 242 if (p) 243 p++; 244 else 245 p = base_path; 246 p1 = strrchr(base_path, '/'); 247 #ifdef _WIN32 248 { 249 const char *p2; 250 p2 = strrchr(base_path, '\\'); 251 if (!p1 || p2 > p1) 252 p1 = p2; 253 } 254 #endif 255 if (p1) 256 p1++; 257 else 258 p1 = base_path; 259 if (p1 > p) 260 p = p1; 261 len = p - base_path; 262 if (len > dest_size - 1) 263 len = dest_size - 1; 264 memcpy(dest, base_path, len); 265 dest[len] = '\0'; 266 pstrcat(dest, dest_size, filename); 267 } 268 } 269 270 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz) 271 { 272 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) { 273 pstrcpy(dest, sz, bs->backing_file); 274 } else { 275 path_combine(dest, sz, bs->filename, bs->backing_file); 276 } 277 } 278 279 void bdrv_register(BlockDriver *bdrv) 280 { 281 /* Block drivers without coroutine functions need emulation */ 282 if (!bdrv->bdrv_co_readv) { 283 bdrv->bdrv_co_readv = bdrv_co_readv_em; 284 bdrv->bdrv_co_writev = bdrv_co_writev_em; 285 286 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if 287 * the block driver lacks aio we need to emulate that too. 288 */ 289 if (!bdrv->bdrv_aio_readv) { 290 /* add AIO emulation layer */ 291 bdrv->bdrv_aio_readv = bdrv_aio_readv_em; 292 bdrv->bdrv_aio_writev = bdrv_aio_writev_em; 293 } 294 } 295 296 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 297 } 298 299 /* create a new block device (by default it is empty) */ 300 BlockDriverState *bdrv_new(const char *device_name) 301 { 302 BlockDriverState *bs; 303 304 bs = g_malloc0(sizeof(BlockDriverState)); 305 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name); 306 if (device_name[0] != '\0') { 307 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list); 308 } 309 bdrv_iostatus_disable(bs); 310 notifier_list_init(&bs->close_notifiers); 311 312 return bs; 313 } 314 315 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify) 316 { 317 notifier_list_add(&bs->close_notifiers, notify); 318 } 319 320 BlockDriver *bdrv_find_format(const char *format_name) 321 { 322 BlockDriver *drv1; 323 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 324 if (!strcmp(drv1->format_name, format_name)) { 325 return drv1; 326 } 327 } 328 return NULL; 329 } 330 331 static int bdrv_is_whitelisted(BlockDriver *drv) 332 { 333 static const char *whitelist[] = { 334 CONFIG_BDRV_WHITELIST 335 }; 336 const char **p; 337 338 if (!whitelist[0]) 339 return 1; /* no whitelist, anything goes */ 340 341 for (p = whitelist; *p; p++) { 342 if (!strcmp(drv->format_name, *p)) { 343 return 1; 344 } 345 } 346 return 0; 347 } 348 349 BlockDriver *bdrv_find_whitelisted_format(const char *format_name) 350 { 351 BlockDriver *drv = bdrv_find_format(format_name); 352 return drv && bdrv_is_whitelisted(drv) ? drv : NULL; 353 } 354 355 typedef struct CreateCo { 356 BlockDriver *drv; 357 char *filename; 358 QEMUOptionParameter *options; 359 int ret; 360 } CreateCo; 361 362 static void coroutine_fn bdrv_create_co_entry(void *opaque) 363 { 364 CreateCo *cco = opaque; 365 assert(cco->drv); 366 367 cco->ret = cco->drv->bdrv_create(cco->filename, cco->options); 368 } 369 370 int bdrv_create(BlockDriver *drv, const char* filename, 371 QEMUOptionParameter *options) 372 { 373 int ret; 374 375 Coroutine *co; 376 CreateCo cco = { 377 .drv = drv, 378 .filename = g_strdup(filename), 379 .options = options, 380 .ret = NOT_DONE, 381 }; 382 383 if (!drv->bdrv_create) { 384 ret = -ENOTSUP; 385 goto out; 386 } 387 388 if (qemu_in_coroutine()) { 389 /* Fast-path if already in coroutine context */ 390 bdrv_create_co_entry(&cco); 391 } else { 392 co = qemu_coroutine_create(bdrv_create_co_entry); 393 qemu_coroutine_enter(co, &cco); 394 while (cco.ret == NOT_DONE) { 395 qemu_aio_wait(); 396 } 397 } 398 399 ret = cco.ret; 400 401 out: 402 g_free(cco.filename); 403 return ret; 404 } 405 406 int bdrv_create_file(const char* filename, QEMUOptionParameter *options) 407 { 408 BlockDriver *drv; 409 410 drv = bdrv_find_protocol(filename); 411 if (drv == NULL) { 412 return -ENOENT; 413 } 414 415 return bdrv_create(drv, filename, options); 416 } 417 418 /* 419 * Create a uniquely-named empty temporary file. 420 * Return 0 upon success, otherwise a negative errno value. 421 */ 422 int get_tmp_filename(char *filename, int size) 423 { 424 #ifdef _WIN32 425 char temp_dir[MAX_PATH]; 426 /* GetTempFileName requires that its output buffer (4th param) 427 have length MAX_PATH or greater. */ 428 assert(size >= MAX_PATH); 429 return (GetTempPath(MAX_PATH, temp_dir) 430 && GetTempFileName(temp_dir, "qem", 0, filename) 431 ? 0 : -GetLastError()); 432 #else 433 int fd; 434 const char *tmpdir; 435 tmpdir = getenv("TMPDIR"); 436 if (!tmpdir) 437 tmpdir = "/tmp"; 438 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 439 return -EOVERFLOW; 440 } 441 fd = mkstemp(filename); 442 if (fd < 0) { 443 return -errno; 444 } 445 if (close(fd) != 0) { 446 unlink(filename); 447 return -errno; 448 } 449 return 0; 450 #endif 451 } 452 453 /* 454 * Detect host devices. By convention, /dev/cdrom[N] is always 455 * recognized as a host CDROM. 456 */ 457 static BlockDriver *find_hdev_driver(const char *filename) 458 { 459 int score_max = 0, score; 460 BlockDriver *drv = NULL, *d; 461 462 QLIST_FOREACH(d, &bdrv_drivers, list) { 463 if (d->bdrv_probe_device) { 464 score = d->bdrv_probe_device(filename); 465 if (score > score_max) { 466 score_max = score; 467 drv = d; 468 } 469 } 470 } 471 472 return drv; 473 } 474 475 BlockDriver *bdrv_find_protocol(const char *filename) 476 { 477 BlockDriver *drv1; 478 char protocol[128]; 479 int len; 480 const char *p; 481 482 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 483 484 /* 485 * XXX(hch): we really should not let host device detection 486 * override an explicit protocol specification, but moving this 487 * later breaks access to device names with colons in them. 488 * Thanks to the brain-dead persistent naming schemes on udev- 489 * based Linux systems those actually are quite common. 490 */ 491 drv1 = find_hdev_driver(filename); 492 if (drv1) { 493 return drv1; 494 } 495 496 if (!path_has_protocol(filename)) { 497 return bdrv_find_format("file"); 498 } 499 p = strchr(filename, ':'); 500 assert(p != NULL); 501 len = p - filename; 502 if (len > sizeof(protocol) - 1) 503 len = sizeof(protocol) - 1; 504 memcpy(protocol, filename, len); 505 protocol[len] = '\0'; 506 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 507 if (drv1->protocol_name && 508 !strcmp(drv1->protocol_name, protocol)) { 509 return drv1; 510 } 511 } 512 return NULL; 513 } 514 515 static int find_image_format(BlockDriverState *bs, const char *filename, 516 BlockDriver **pdrv) 517 { 518 int score, score_max; 519 BlockDriver *drv1, *drv; 520 uint8_t buf[2048]; 521 int ret = 0; 522 523 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 524 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { 525 drv = bdrv_find_format("raw"); 526 if (!drv) { 527 ret = -ENOENT; 528 } 529 *pdrv = drv; 530 return ret; 531 } 532 533 ret = bdrv_pread(bs, 0, buf, sizeof(buf)); 534 if (ret < 0) { 535 *pdrv = NULL; 536 return ret; 537 } 538 539 score_max = 0; 540 drv = NULL; 541 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 542 if (drv1->bdrv_probe) { 543 score = drv1->bdrv_probe(buf, ret, filename); 544 if (score > score_max) { 545 score_max = score; 546 drv = drv1; 547 } 548 } 549 } 550 if (!drv) { 551 ret = -ENOENT; 552 } 553 *pdrv = drv; 554 return ret; 555 } 556 557 /** 558 * Set the current 'total_sectors' value 559 */ 560 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 561 { 562 BlockDriver *drv = bs->drv; 563 564 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 565 if (bs->sg) 566 return 0; 567 568 /* query actual device if possible, otherwise just trust the hint */ 569 if (drv->bdrv_getlength) { 570 int64_t length = drv->bdrv_getlength(bs); 571 if (length < 0) { 572 return length; 573 } 574 hint = length >> BDRV_SECTOR_BITS; 575 } 576 577 bs->total_sectors = hint; 578 return 0; 579 } 580 581 /** 582 * Set open flags for a given discard mode 583 * 584 * Return 0 on success, -1 if the discard mode was invalid. 585 */ 586 int bdrv_parse_discard_flags(const char *mode, int *flags) 587 { 588 *flags &= ~BDRV_O_UNMAP; 589 590 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 591 /* do nothing */ 592 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 593 *flags |= BDRV_O_UNMAP; 594 } else { 595 return -1; 596 } 597 598 return 0; 599 } 600 601 /** 602 * Set open flags for a given cache mode 603 * 604 * Return 0 on success, -1 if the cache mode was invalid. 605 */ 606 int bdrv_parse_cache_flags(const char *mode, int *flags) 607 { 608 *flags &= ~BDRV_O_CACHE_MASK; 609 610 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 611 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; 612 } else if (!strcmp(mode, "directsync")) { 613 *flags |= BDRV_O_NOCACHE; 614 } else if (!strcmp(mode, "writeback")) { 615 *flags |= BDRV_O_CACHE_WB; 616 } else if (!strcmp(mode, "unsafe")) { 617 *flags |= BDRV_O_CACHE_WB; 618 *flags |= BDRV_O_NO_FLUSH; 619 } else if (!strcmp(mode, "writethrough")) { 620 /* this is the default */ 621 } else { 622 return -1; 623 } 624 625 return 0; 626 } 627 628 /** 629 * The copy-on-read flag is actually a reference count so multiple users may 630 * use the feature without worrying about clobbering its previous state. 631 * Copy-on-read stays enabled until all users have called to disable it. 632 */ 633 void bdrv_enable_copy_on_read(BlockDriverState *bs) 634 { 635 bs->copy_on_read++; 636 } 637 638 void bdrv_disable_copy_on_read(BlockDriverState *bs) 639 { 640 assert(bs->copy_on_read > 0); 641 bs->copy_on_read--; 642 } 643 644 static int bdrv_open_flags(BlockDriverState *bs, int flags) 645 { 646 int open_flags = flags | BDRV_O_CACHE_WB; 647 648 /* 649 * Clear flags that are internal to the block layer before opening the 650 * image. 651 */ 652 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 653 654 /* 655 * Snapshots should be writable. 656 */ 657 if (bs->is_temporary) { 658 open_flags |= BDRV_O_RDWR; 659 } 660 661 return open_flags; 662 } 663 664 /* 665 * Common part for opening disk images and files 666 * 667 * Removes all processed options from *options. 668 */ 669 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, 670 QDict *options, int flags, BlockDriver *drv) 671 { 672 int ret, open_flags; 673 const char *filename; 674 675 assert(drv != NULL); 676 assert(bs->file == NULL); 677 assert(options != NULL && bs->options != options); 678 679 trace_bdrv_open_common(bs, filename, flags, drv->format_name); 680 681 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) { 682 return -ENOTSUP; 683 } 684 685 /* bdrv_open() with directly using a protocol as drv. This layer is already 686 * opened, so assign it to bs (while file becomes a closed BlockDriverState) 687 * and return immediately. */ 688 if (file != NULL && drv->bdrv_file_open) { 689 bdrv_swap(file, bs); 690 return 0; 691 } 692 693 bs->open_flags = flags; 694 bs->buffer_alignment = 512; 695 696 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ 697 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) { 698 bdrv_enable_copy_on_read(bs); 699 } 700 701 if (file != NULL) { 702 filename = file->filename; 703 } else { 704 filename = qdict_get_try_str(options, "filename"); 705 } 706 707 if (filename != NULL) { 708 pstrcpy(bs->filename, sizeof(bs->filename), filename); 709 } else { 710 bs->filename[0] = '\0'; 711 } 712 713 bs->drv = drv; 714 bs->opaque = g_malloc0(drv->instance_size); 715 716 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB); 717 open_flags = bdrv_open_flags(bs, flags); 718 719 bs->read_only = !(open_flags & BDRV_O_RDWR); 720 721 /* Open the image, either directly or using a protocol */ 722 if (drv->bdrv_file_open) { 723 assert(file == NULL); 724 assert(drv->bdrv_parse_filename || filename != NULL); 725 ret = drv->bdrv_file_open(bs, options, open_flags); 726 } else { 727 if (file == NULL) { 728 qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't use '%s' as a " 729 "block driver for the protocol level", 730 drv->format_name); 731 ret = -EINVAL; 732 goto free_and_fail; 733 } 734 assert(file != NULL); 735 bs->file = file; 736 ret = drv->bdrv_open(bs, options, open_flags); 737 } 738 739 if (ret < 0) { 740 goto free_and_fail; 741 } 742 743 ret = refresh_total_sectors(bs, bs->total_sectors); 744 if (ret < 0) { 745 goto free_and_fail; 746 } 747 748 #ifndef _WIN32 749 if (bs->is_temporary) { 750 assert(filename != NULL); 751 unlink(filename); 752 } 753 #endif 754 return 0; 755 756 free_and_fail: 757 bs->file = NULL; 758 g_free(bs->opaque); 759 bs->opaque = NULL; 760 bs->drv = NULL; 761 return ret; 762 } 763 764 /* 765 * Opens a file using a protocol (file, host_device, nbd, ...) 766 * 767 * options is a QDict of options to pass to the block drivers, or NULL for an 768 * empty set of options. The reference to the QDict belongs to the block layer 769 * after the call (even on failure), so if the caller intends to reuse the 770 * dictionary, it needs to use QINCREF() before calling bdrv_file_open. 771 */ 772 int bdrv_file_open(BlockDriverState **pbs, const char *filename, 773 QDict *options, int flags) 774 { 775 BlockDriverState *bs; 776 BlockDriver *drv; 777 const char *drvname; 778 int ret; 779 780 /* NULL means an empty set of options */ 781 if (options == NULL) { 782 options = qdict_new(); 783 } 784 785 bs = bdrv_new(""); 786 bs->options = options; 787 options = qdict_clone_shallow(options); 788 789 /* Fetch the file name from the options QDict if necessary */ 790 if (!filename) { 791 filename = qdict_get_try_str(options, "filename"); 792 } else if (filename && !qdict_haskey(options, "filename")) { 793 qdict_put(options, "filename", qstring_from_str(filename)); 794 } else { 795 qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't specify 'file' and " 796 "'filename' options at the same time"); 797 ret = -EINVAL; 798 goto fail; 799 } 800 801 /* Find the right block driver */ 802 drvname = qdict_get_try_str(options, "driver"); 803 if (drvname) { 804 drv = bdrv_find_whitelisted_format(drvname); 805 qdict_del(options, "driver"); 806 } else if (filename) { 807 drv = bdrv_find_protocol(filename); 808 } else { 809 qerror_report(ERROR_CLASS_GENERIC_ERROR, 810 "Must specify either driver or file"); 811 drv = NULL; 812 } 813 814 if (!drv) { 815 ret = -ENOENT; 816 goto fail; 817 } 818 819 /* Parse the filename and open it */ 820 if (drv->bdrv_parse_filename && filename) { 821 Error *local_err = NULL; 822 drv->bdrv_parse_filename(filename, options, &local_err); 823 if (error_is_set(&local_err)) { 824 qerror_report_err(local_err); 825 error_free(local_err); 826 ret = -EINVAL; 827 goto fail; 828 } 829 qdict_del(options, "filename"); 830 } else if (!drv->bdrv_parse_filename && !filename) { 831 qerror_report(ERROR_CLASS_GENERIC_ERROR, 832 "The '%s' block driver requires a file name", 833 drv->format_name); 834 ret = -EINVAL; 835 goto fail; 836 } 837 838 ret = bdrv_open_common(bs, NULL, options, flags, drv); 839 if (ret < 0) { 840 goto fail; 841 } 842 843 /* Check if any unknown options were used */ 844 if (qdict_size(options) != 0) { 845 const QDictEntry *entry = qdict_first(options); 846 qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block protocol '%s' doesn't " 847 "support the option '%s'", 848 drv->format_name, entry->key); 849 ret = -EINVAL; 850 goto fail; 851 } 852 QDECREF(options); 853 854 bs->growable = 1; 855 *pbs = bs; 856 return 0; 857 858 fail: 859 QDECREF(options); 860 if (!bs->drv) { 861 QDECREF(bs->options); 862 } 863 bdrv_delete(bs); 864 return ret; 865 } 866 867 /* 868 * Opens the backing file for a BlockDriverState if not yet open 869 * 870 * options is a QDict of options to pass to the block drivers, or NULL for an 871 * empty set of options. The reference to the QDict is transferred to this 872 * function (even on failure), so if the caller intends to reuse the dictionary, 873 * it needs to use QINCREF() before calling bdrv_file_open. 874 */ 875 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options) 876 { 877 char backing_filename[PATH_MAX]; 878 int back_flags, ret; 879 BlockDriver *back_drv = NULL; 880 881 if (bs->backing_hd != NULL) { 882 QDECREF(options); 883 return 0; 884 } 885 886 /* NULL means an empty set of options */ 887 if (options == NULL) { 888 options = qdict_new(); 889 } 890 891 bs->open_flags &= ~BDRV_O_NO_BACKING; 892 if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 893 QDECREF(options); 894 return 0; 895 } 896 897 bs->backing_hd = bdrv_new(""); 898 bdrv_get_full_backing_filename(bs, backing_filename, 899 sizeof(backing_filename)); 900 901 if (bs->backing_format[0] != '\0') { 902 back_drv = bdrv_find_format(bs->backing_format); 903 } 904 905 /* backing files always opened read-only */ 906 back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT); 907 908 ret = bdrv_open(bs->backing_hd, 909 *backing_filename ? backing_filename : NULL, options, 910 back_flags, back_drv); 911 if (ret < 0) { 912 bdrv_delete(bs->backing_hd); 913 bs->backing_hd = NULL; 914 bs->open_flags |= BDRV_O_NO_BACKING; 915 return ret; 916 } 917 return 0; 918 } 919 920 static void extract_subqdict(QDict *src, QDict **dst, const char *start) 921 { 922 const QDictEntry *entry, *next; 923 const char *p; 924 925 *dst = qdict_new(); 926 entry = qdict_first(src); 927 928 while (entry != NULL) { 929 next = qdict_next(src, entry); 930 if (strstart(entry->key, start, &p)) { 931 qobject_incref(entry->value); 932 qdict_put_obj(*dst, p, entry->value); 933 qdict_del(src, entry->key); 934 } 935 entry = next; 936 } 937 } 938 939 /* 940 * Opens a disk image (raw, qcow2, vmdk, ...) 941 * 942 * options is a QDict of options to pass to the block drivers, or NULL for an 943 * empty set of options. The reference to the QDict belongs to the block layer 944 * after the call (even on failure), so if the caller intends to reuse the 945 * dictionary, it needs to use QINCREF() before calling bdrv_open. 946 */ 947 int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, 948 int flags, BlockDriver *drv) 949 { 950 int ret; 951 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 952 char tmp_filename[PATH_MAX + 1]; 953 BlockDriverState *file = NULL; 954 QDict *file_options = NULL; 955 956 /* NULL means an empty set of options */ 957 if (options == NULL) { 958 options = qdict_new(); 959 } 960 961 bs->options = options; 962 options = qdict_clone_shallow(options); 963 964 /* For snapshot=on, create a temporary qcow2 overlay */ 965 if (flags & BDRV_O_SNAPSHOT) { 966 BlockDriverState *bs1; 967 int64_t total_size; 968 BlockDriver *bdrv_qcow2; 969 QEMUOptionParameter *create_options; 970 char backing_filename[PATH_MAX]; 971 972 if (qdict_size(options) != 0) { 973 error_report("Can't use snapshot=on with driver-specific options"); 974 ret = -EINVAL; 975 goto fail; 976 } 977 assert(filename != NULL); 978 979 /* if snapshot, we create a temporary backing file and open it 980 instead of opening 'filename' directly */ 981 982 /* if there is a backing file, use it */ 983 bs1 = bdrv_new(""); 984 ret = bdrv_open(bs1, filename, NULL, 0, drv); 985 if (ret < 0) { 986 bdrv_delete(bs1); 987 goto fail; 988 } 989 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK; 990 991 bdrv_delete(bs1); 992 993 ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename)); 994 if (ret < 0) { 995 goto fail; 996 } 997 998 /* Real path is meaningless for protocols */ 999 if (path_has_protocol(filename)) { 1000 snprintf(backing_filename, sizeof(backing_filename), 1001 "%s", filename); 1002 } else if (!realpath(filename, backing_filename)) { 1003 ret = -errno; 1004 goto fail; 1005 } 1006 1007 bdrv_qcow2 = bdrv_find_format("qcow2"); 1008 create_options = parse_option_parameters("", bdrv_qcow2->create_options, 1009 NULL); 1010 1011 set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size); 1012 set_option_parameter(create_options, BLOCK_OPT_BACKING_FILE, 1013 backing_filename); 1014 if (drv) { 1015 set_option_parameter(create_options, BLOCK_OPT_BACKING_FMT, 1016 drv->format_name); 1017 } 1018 1019 ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options); 1020 free_option_parameters(create_options); 1021 if (ret < 0) { 1022 goto fail; 1023 } 1024 1025 filename = tmp_filename; 1026 drv = bdrv_qcow2; 1027 bs->is_temporary = 1; 1028 } 1029 1030 /* Open image file without format layer */ 1031 if (flags & BDRV_O_RDWR) { 1032 flags |= BDRV_O_ALLOW_RDWR; 1033 } 1034 1035 extract_subqdict(options, &file_options, "file."); 1036 1037 ret = bdrv_file_open(&file, filename, file_options, 1038 bdrv_open_flags(bs, flags)); 1039 if (ret < 0) { 1040 goto fail; 1041 } 1042 1043 /* Find the right image format driver */ 1044 if (!drv) { 1045 ret = find_image_format(file, filename, &drv); 1046 } 1047 1048 if (!drv) { 1049 goto unlink_and_fail; 1050 } 1051 1052 /* Open the image */ 1053 ret = bdrv_open_common(bs, file, options, flags, drv); 1054 if (ret < 0) { 1055 goto unlink_and_fail; 1056 } 1057 1058 if (bs->file != file) { 1059 bdrv_delete(file); 1060 file = NULL; 1061 } 1062 1063 /* If there is a backing file, use it */ 1064 if ((flags & BDRV_O_NO_BACKING) == 0) { 1065 QDict *backing_options; 1066 1067 extract_subqdict(options, &backing_options, "backing."); 1068 ret = bdrv_open_backing_file(bs, backing_options); 1069 if (ret < 0) { 1070 goto close_and_fail; 1071 } 1072 } 1073 1074 /* Check if any unknown options were used */ 1075 if (qdict_size(options) != 0) { 1076 const QDictEntry *entry = qdict_first(options); 1077 qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by " 1078 "device '%s' doesn't support the option '%s'", 1079 drv->format_name, bs->device_name, entry->key); 1080 1081 ret = -EINVAL; 1082 goto close_and_fail; 1083 } 1084 QDECREF(options); 1085 1086 if (!bdrv_key_required(bs)) { 1087 bdrv_dev_change_media_cb(bs, true); 1088 } 1089 1090 /* throttling disk I/O limits */ 1091 if (bs->io_limits_enabled) { 1092 bdrv_io_limits_enable(bs); 1093 } 1094 1095 return 0; 1096 1097 unlink_and_fail: 1098 if (file != NULL) { 1099 bdrv_delete(file); 1100 } 1101 if (bs->is_temporary) { 1102 unlink(filename); 1103 } 1104 fail: 1105 QDECREF(bs->options); 1106 QDECREF(options); 1107 bs->options = NULL; 1108 return ret; 1109 1110 close_and_fail: 1111 bdrv_close(bs); 1112 QDECREF(options); 1113 return ret; 1114 } 1115 1116 typedef struct BlockReopenQueueEntry { 1117 bool prepared; 1118 BDRVReopenState state; 1119 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; 1120 } BlockReopenQueueEntry; 1121 1122 /* 1123 * Adds a BlockDriverState to a simple queue for an atomic, transactional 1124 * reopen of multiple devices. 1125 * 1126 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT 1127 * already performed, or alternatively may be NULL a new BlockReopenQueue will 1128 * be created and initialized. This newly created BlockReopenQueue should be 1129 * passed back in for subsequent calls that are intended to be of the same 1130 * atomic 'set'. 1131 * 1132 * bs is the BlockDriverState to add to the reopen queue. 1133 * 1134 * flags contains the open flags for the associated bs 1135 * 1136 * returns a pointer to bs_queue, which is either the newly allocated 1137 * bs_queue, or the existing bs_queue being used. 1138 * 1139 */ 1140 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 1141 BlockDriverState *bs, int flags) 1142 { 1143 assert(bs != NULL); 1144 1145 BlockReopenQueueEntry *bs_entry; 1146 if (bs_queue == NULL) { 1147 bs_queue = g_new0(BlockReopenQueue, 1); 1148 QSIMPLEQ_INIT(bs_queue); 1149 } 1150 1151 if (bs->file) { 1152 bdrv_reopen_queue(bs_queue, bs->file, flags); 1153 } 1154 1155 bs_entry = g_new0(BlockReopenQueueEntry, 1); 1156 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); 1157 1158 bs_entry->state.bs = bs; 1159 bs_entry->state.flags = flags; 1160 1161 return bs_queue; 1162 } 1163 1164 /* 1165 * Reopen multiple BlockDriverStates atomically & transactionally. 1166 * 1167 * The queue passed in (bs_queue) must have been built up previous 1168 * via bdrv_reopen_queue(). 1169 * 1170 * Reopens all BDS specified in the queue, with the appropriate 1171 * flags. All devices are prepared for reopen, and failure of any 1172 * device will cause all device changes to be abandonded, and intermediate 1173 * data cleaned up. 1174 * 1175 * If all devices prepare successfully, then the changes are committed 1176 * to all devices. 1177 * 1178 */ 1179 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 1180 { 1181 int ret = -1; 1182 BlockReopenQueueEntry *bs_entry, *next; 1183 Error *local_err = NULL; 1184 1185 assert(bs_queue != NULL); 1186 1187 bdrv_drain_all(); 1188 1189 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1190 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { 1191 error_propagate(errp, local_err); 1192 goto cleanup; 1193 } 1194 bs_entry->prepared = true; 1195 } 1196 1197 /* If we reach this point, we have success and just need to apply the 1198 * changes 1199 */ 1200 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1201 bdrv_reopen_commit(&bs_entry->state); 1202 } 1203 1204 ret = 0; 1205 1206 cleanup: 1207 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 1208 if (ret && bs_entry->prepared) { 1209 bdrv_reopen_abort(&bs_entry->state); 1210 } 1211 g_free(bs_entry); 1212 } 1213 g_free(bs_queue); 1214 return ret; 1215 } 1216 1217 1218 /* Reopen a single BlockDriverState with the specified flags. */ 1219 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) 1220 { 1221 int ret = -1; 1222 Error *local_err = NULL; 1223 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags); 1224 1225 ret = bdrv_reopen_multiple(queue, &local_err); 1226 if (local_err != NULL) { 1227 error_propagate(errp, local_err); 1228 } 1229 return ret; 1230 } 1231 1232 1233 /* 1234 * Prepares a BlockDriverState for reopen. All changes are staged in the 1235 * 'opaque' field of the BDRVReopenState, which is used and allocated by 1236 * the block driver layer .bdrv_reopen_prepare() 1237 * 1238 * bs is the BlockDriverState to reopen 1239 * flags are the new open flags 1240 * queue is the reopen queue 1241 * 1242 * Returns 0 on success, non-zero on error. On error errp will be set 1243 * as well. 1244 * 1245 * On failure, bdrv_reopen_abort() will be called to clean up any data. 1246 * It is the responsibility of the caller to then call the abort() or 1247 * commit() for any other BDS that have been left in a prepare() state 1248 * 1249 */ 1250 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, 1251 Error **errp) 1252 { 1253 int ret = -1; 1254 Error *local_err = NULL; 1255 BlockDriver *drv; 1256 1257 assert(reopen_state != NULL); 1258 assert(reopen_state->bs->drv != NULL); 1259 drv = reopen_state->bs->drv; 1260 1261 /* if we are to stay read-only, do not allow permission change 1262 * to r/w */ 1263 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) && 1264 reopen_state->flags & BDRV_O_RDWR) { 1265 error_set(errp, QERR_DEVICE_IS_READ_ONLY, 1266 reopen_state->bs->device_name); 1267 goto error; 1268 } 1269 1270 1271 ret = bdrv_flush(reopen_state->bs); 1272 if (ret) { 1273 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive", 1274 strerror(-ret)); 1275 goto error; 1276 } 1277 1278 if (drv->bdrv_reopen_prepare) { 1279 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 1280 if (ret) { 1281 if (local_err != NULL) { 1282 error_propagate(errp, local_err); 1283 } else { 1284 error_set(errp, QERR_OPEN_FILE_FAILED, 1285 reopen_state->bs->filename); 1286 } 1287 goto error; 1288 } 1289 } else { 1290 /* It is currently mandatory to have a bdrv_reopen_prepare() 1291 * handler for each supported drv. */ 1292 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED, 1293 drv->format_name, reopen_state->bs->device_name, 1294 "reopening of file"); 1295 ret = -1; 1296 goto error; 1297 } 1298 1299 ret = 0; 1300 1301 error: 1302 return ret; 1303 } 1304 1305 /* 1306 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 1307 * makes them final by swapping the staging BlockDriverState contents into 1308 * the active BlockDriverState contents. 1309 */ 1310 void bdrv_reopen_commit(BDRVReopenState *reopen_state) 1311 { 1312 BlockDriver *drv; 1313 1314 assert(reopen_state != NULL); 1315 drv = reopen_state->bs->drv; 1316 assert(drv != NULL); 1317 1318 /* If there are any driver level actions to take */ 1319 if (drv->bdrv_reopen_commit) { 1320 drv->bdrv_reopen_commit(reopen_state); 1321 } 1322 1323 /* set BDS specific flags now */ 1324 reopen_state->bs->open_flags = reopen_state->flags; 1325 reopen_state->bs->enable_write_cache = !!(reopen_state->flags & 1326 BDRV_O_CACHE_WB); 1327 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); 1328 } 1329 1330 /* 1331 * Abort the reopen, and delete and free the staged changes in 1332 * reopen_state 1333 */ 1334 void bdrv_reopen_abort(BDRVReopenState *reopen_state) 1335 { 1336 BlockDriver *drv; 1337 1338 assert(reopen_state != NULL); 1339 drv = reopen_state->bs->drv; 1340 assert(drv != NULL); 1341 1342 if (drv->bdrv_reopen_abort) { 1343 drv->bdrv_reopen_abort(reopen_state); 1344 } 1345 } 1346 1347 1348 void bdrv_close(BlockDriverState *bs) 1349 { 1350 bdrv_flush(bs); 1351 if (bs->job) { 1352 block_job_cancel_sync(bs->job); 1353 } 1354 bdrv_drain_all(); 1355 notifier_list_notify(&bs->close_notifiers, bs); 1356 1357 if (bs->drv) { 1358 if (bs == bs_snapshots) { 1359 bs_snapshots = NULL; 1360 } 1361 if (bs->backing_hd) { 1362 bdrv_delete(bs->backing_hd); 1363 bs->backing_hd = NULL; 1364 } 1365 bs->drv->bdrv_close(bs); 1366 g_free(bs->opaque); 1367 #ifdef _WIN32 1368 if (bs->is_temporary) { 1369 unlink(bs->filename); 1370 } 1371 #endif 1372 bs->opaque = NULL; 1373 bs->drv = NULL; 1374 bs->copy_on_read = 0; 1375 bs->backing_file[0] = '\0'; 1376 bs->backing_format[0] = '\0'; 1377 bs->total_sectors = 0; 1378 bs->encrypted = 0; 1379 bs->valid_key = 0; 1380 bs->sg = 0; 1381 bs->growable = 0; 1382 QDECREF(bs->options); 1383 bs->options = NULL; 1384 1385 if (bs->file != NULL) { 1386 bdrv_delete(bs->file); 1387 bs->file = NULL; 1388 } 1389 } 1390 1391 bdrv_dev_change_media_cb(bs, false); 1392 1393 /*throttling disk I/O limits*/ 1394 if (bs->io_limits_enabled) { 1395 bdrv_io_limits_disable(bs); 1396 } 1397 } 1398 1399 void bdrv_close_all(void) 1400 { 1401 BlockDriverState *bs; 1402 1403 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1404 bdrv_close(bs); 1405 } 1406 } 1407 1408 /* 1409 * Wait for pending requests to complete across all BlockDriverStates 1410 * 1411 * This function does not flush data to disk, use bdrv_flush_all() for that 1412 * after calling this function. 1413 * 1414 * Note that completion of an asynchronous I/O operation can trigger any 1415 * number of other I/O operations on other devices---for example a coroutine 1416 * can be arbitrarily complex and a constant flow of I/O can come until the 1417 * coroutine is complete. Because of this, it is not possible to have a 1418 * function to drain a single device's I/O queue. 1419 */ 1420 void bdrv_drain_all(void) 1421 { 1422 BlockDriverState *bs; 1423 bool busy; 1424 1425 do { 1426 busy = qemu_aio_wait(); 1427 1428 /* FIXME: We do not have timer support here, so this is effectively 1429 * a busy wait. 1430 */ 1431 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1432 if (!qemu_co_queue_empty(&bs->throttled_reqs)) { 1433 qemu_co_queue_restart_all(&bs->throttled_reqs); 1434 busy = true; 1435 } 1436 } 1437 } while (busy); 1438 1439 /* If requests are still pending there is a bug somewhere */ 1440 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1441 assert(QLIST_EMPTY(&bs->tracked_requests)); 1442 assert(qemu_co_queue_empty(&bs->throttled_reqs)); 1443 } 1444 } 1445 1446 /* make a BlockDriverState anonymous by removing from bdrv_state list. 1447 Also, NULL terminate the device_name to prevent double remove */ 1448 void bdrv_make_anon(BlockDriverState *bs) 1449 { 1450 if (bs->device_name[0] != '\0') { 1451 QTAILQ_REMOVE(&bdrv_states, bs, list); 1452 } 1453 bs->device_name[0] = '\0'; 1454 } 1455 1456 static void bdrv_rebind(BlockDriverState *bs) 1457 { 1458 if (bs->drv && bs->drv->bdrv_rebind) { 1459 bs->drv->bdrv_rebind(bs); 1460 } 1461 } 1462 1463 static void bdrv_move_feature_fields(BlockDriverState *bs_dest, 1464 BlockDriverState *bs_src) 1465 { 1466 /* move some fields that need to stay attached to the device */ 1467 bs_dest->open_flags = bs_src->open_flags; 1468 1469 /* dev info */ 1470 bs_dest->dev_ops = bs_src->dev_ops; 1471 bs_dest->dev_opaque = bs_src->dev_opaque; 1472 bs_dest->dev = bs_src->dev; 1473 bs_dest->buffer_alignment = bs_src->buffer_alignment; 1474 bs_dest->copy_on_read = bs_src->copy_on_read; 1475 1476 bs_dest->enable_write_cache = bs_src->enable_write_cache; 1477 1478 /* i/o timing parameters */ 1479 bs_dest->slice_start = bs_src->slice_start; 1480 bs_dest->slice_end = bs_src->slice_end; 1481 bs_dest->slice_submitted = bs_src->slice_submitted; 1482 bs_dest->io_limits = bs_src->io_limits; 1483 bs_dest->throttled_reqs = bs_src->throttled_reqs; 1484 bs_dest->block_timer = bs_src->block_timer; 1485 bs_dest->io_limits_enabled = bs_src->io_limits_enabled; 1486 1487 /* r/w error */ 1488 bs_dest->on_read_error = bs_src->on_read_error; 1489 bs_dest->on_write_error = bs_src->on_write_error; 1490 1491 /* i/o status */ 1492 bs_dest->iostatus_enabled = bs_src->iostatus_enabled; 1493 bs_dest->iostatus = bs_src->iostatus; 1494 1495 /* dirty bitmap */ 1496 bs_dest->dirty_bitmap = bs_src->dirty_bitmap; 1497 1498 /* job */ 1499 bs_dest->in_use = bs_src->in_use; 1500 bs_dest->job = bs_src->job; 1501 1502 /* keep the same entry in bdrv_states */ 1503 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name), 1504 bs_src->device_name); 1505 bs_dest->list = bs_src->list; 1506 } 1507 1508 /* 1509 * Swap bs contents for two image chains while they are live, 1510 * while keeping required fields on the BlockDriverState that is 1511 * actually attached to a device. 1512 * 1513 * This will modify the BlockDriverState fields, and swap contents 1514 * between bs_new and bs_old. Both bs_new and bs_old are modified. 1515 * 1516 * bs_new is required to be anonymous. 1517 * 1518 * This function does not create any image files. 1519 */ 1520 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) 1521 { 1522 BlockDriverState tmp; 1523 1524 /* bs_new must be anonymous and shouldn't have anything fancy enabled */ 1525 assert(bs_new->device_name[0] == '\0'); 1526 assert(bs_new->dirty_bitmap == NULL); 1527 assert(bs_new->job == NULL); 1528 assert(bs_new->dev == NULL); 1529 assert(bs_new->in_use == 0); 1530 assert(bs_new->io_limits_enabled == false); 1531 assert(bs_new->block_timer == NULL); 1532 1533 tmp = *bs_new; 1534 *bs_new = *bs_old; 1535 *bs_old = tmp; 1536 1537 /* there are some fields that should not be swapped, move them back */ 1538 bdrv_move_feature_fields(&tmp, bs_old); 1539 bdrv_move_feature_fields(bs_old, bs_new); 1540 bdrv_move_feature_fields(bs_new, &tmp); 1541 1542 /* bs_new shouldn't be in bdrv_states even after the swap! */ 1543 assert(bs_new->device_name[0] == '\0'); 1544 1545 /* Check a few fields that should remain attached to the device */ 1546 assert(bs_new->dev == NULL); 1547 assert(bs_new->job == NULL); 1548 assert(bs_new->in_use == 0); 1549 assert(bs_new->io_limits_enabled == false); 1550 assert(bs_new->block_timer == NULL); 1551 1552 bdrv_rebind(bs_new); 1553 bdrv_rebind(bs_old); 1554 } 1555 1556 /* 1557 * Add new bs contents at the top of an image chain while the chain is 1558 * live, while keeping required fields on the top layer. 1559 * 1560 * This will modify the BlockDriverState fields, and swap contents 1561 * between bs_new and bs_top. Both bs_new and bs_top are modified. 1562 * 1563 * bs_new is required to be anonymous. 1564 * 1565 * This function does not create any image files. 1566 */ 1567 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) 1568 { 1569 bdrv_swap(bs_new, bs_top); 1570 1571 /* The contents of 'tmp' will become bs_top, as we are 1572 * swapping bs_new and bs_top contents. */ 1573 bs_top->backing_hd = bs_new; 1574 bs_top->open_flags &= ~BDRV_O_NO_BACKING; 1575 pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file), 1576 bs_new->filename); 1577 pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format), 1578 bs_new->drv ? bs_new->drv->format_name : ""); 1579 } 1580 1581 void bdrv_delete(BlockDriverState *bs) 1582 { 1583 assert(!bs->dev); 1584 assert(!bs->job); 1585 assert(!bs->in_use); 1586 1587 /* remove from list, if necessary */ 1588 bdrv_make_anon(bs); 1589 1590 bdrv_close(bs); 1591 1592 assert(bs != bs_snapshots); 1593 g_free(bs); 1594 } 1595 1596 int bdrv_attach_dev(BlockDriverState *bs, void *dev) 1597 /* TODO change to DeviceState *dev when all users are qdevified */ 1598 { 1599 if (bs->dev) { 1600 return -EBUSY; 1601 } 1602 bs->dev = dev; 1603 bdrv_iostatus_reset(bs); 1604 return 0; 1605 } 1606 1607 /* TODO qdevified devices don't use this, remove when devices are qdevified */ 1608 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev) 1609 { 1610 if (bdrv_attach_dev(bs, dev) < 0) { 1611 abort(); 1612 } 1613 } 1614 1615 void bdrv_detach_dev(BlockDriverState *bs, void *dev) 1616 /* TODO change to DeviceState *dev when all users are qdevified */ 1617 { 1618 assert(bs->dev == dev); 1619 bs->dev = NULL; 1620 bs->dev_ops = NULL; 1621 bs->dev_opaque = NULL; 1622 bs->buffer_alignment = 512; 1623 } 1624 1625 /* TODO change to return DeviceState * when all users are qdevified */ 1626 void *bdrv_get_attached_dev(BlockDriverState *bs) 1627 { 1628 return bs->dev; 1629 } 1630 1631 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops, 1632 void *opaque) 1633 { 1634 bs->dev_ops = ops; 1635 bs->dev_opaque = opaque; 1636 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) { 1637 bs_snapshots = NULL; 1638 } 1639 } 1640 1641 void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, 1642 enum MonitorEvent ev, 1643 BlockErrorAction action, bool is_read) 1644 { 1645 QObject *data; 1646 const char *action_str; 1647 1648 switch (action) { 1649 case BDRV_ACTION_REPORT: 1650 action_str = "report"; 1651 break; 1652 case BDRV_ACTION_IGNORE: 1653 action_str = "ignore"; 1654 break; 1655 case BDRV_ACTION_STOP: 1656 action_str = "stop"; 1657 break; 1658 default: 1659 abort(); 1660 } 1661 1662 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }", 1663 bdrv->device_name, 1664 action_str, 1665 is_read ? "read" : "write"); 1666 monitor_protocol_event(ev, data); 1667 1668 qobject_decref(data); 1669 } 1670 1671 static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected) 1672 { 1673 QObject *data; 1674 1675 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }", 1676 bdrv_get_device_name(bs), ejected); 1677 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data); 1678 1679 qobject_decref(data); 1680 } 1681 1682 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load) 1683 { 1684 if (bs->dev_ops && bs->dev_ops->change_media_cb) { 1685 bool tray_was_closed = !bdrv_dev_is_tray_open(bs); 1686 bs->dev_ops->change_media_cb(bs->dev_opaque, load); 1687 if (tray_was_closed) { 1688 /* tray open */ 1689 bdrv_emit_qmp_eject_event(bs, true); 1690 } 1691 if (load) { 1692 /* tray close */ 1693 bdrv_emit_qmp_eject_event(bs, false); 1694 } 1695 } 1696 } 1697 1698 bool bdrv_dev_has_removable_media(BlockDriverState *bs) 1699 { 1700 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb); 1701 } 1702 1703 void bdrv_dev_eject_request(BlockDriverState *bs, bool force) 1704 { 1705 if (bs->dev_ops && bs->dev_ops->eject_request_cb) { 1706 bs->dev_ops->eject_request_cb(bs->dev_opaque, force); 1707 } 1708 } 1709 1710 bool bdrv_dev_is_tray_open(BlockDriverState *bs) 1711 { 1712 if (bs->dev_ops && bs->dev_ops->is_tray_open) { 1713 return bs->dev_ops->is_tray_open(bs->dev_opaque); 1714 } 1715 return false; 1716 } 1717 1718 static void bdrv_dev_resize_cb(BlockDriverState *bs) 1719 { 1720 if (bs->dev_ops && bs->dev_ops->resize_cb) { 1721 bs->dev_ops->resize_cb(bs->dev_opaque); 1722 } 1723 } 1724 1725 bool bdrv_dev_is_medium_locked(BlockDriverState *bs) 1726 { 1727 if (bs->dev_ops && bs->dev_ops->is_medium_locked) { 1728 return bs->dev_ops->is_medium_locked(bs->dev_opaque); 1729 } 1730 return false; 1731 } 1732 1733 /* 1734 * Run consistency checks on an image 1735 * 1736 * Returns 0 if the check could be completed (it doesn't mean that the image is 1737 * free of errors) or -errno when an internal error occurred. The results of the 1738 * check are stored in res. 1739 */ 1740 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) 1741 { 1742 if (bs->drv->bdrv_check == NULL) { 1743 return -ENOTSUP; 1744 } 1745 1746 memset(res, 0, sizeof(*res)); 1747 return bs->drv->bdrv_check(bs, res, fix); 1748 } 1749 1750 #define COMMIT_BUF_SECTORS 2048 1751 1752 /* commit COW file into the raw image */ 1753 int bdrv_commit(BlockDriverState *bs) 1754 { 1755 BlockDriver *drv = bs->drv; 1756 int64_t sector, total_sectors; 1757 int n, ro, open_flags; 1758 int ret = 0; 1759 uint8_t *buf; 1760 char filename[PATH_MAX]; 1761 1762 if (!drv) 1763 return -ENOMEDIUM; 1764 1765 if (!bs->backing_hd) { 1766 return -ENOTSUP; 1767 } 1768 1769 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) { 1770 return -EBUSY; 1771 } 1772 1773 ro = bs->backing_hd->read_only; 1774 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */ 1775 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename); 1776 open_flags = bs->backing_hd->open_flags; 1777 1778 if (ro) { 1779 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) { 1780 return -EACCES; 1781 } 1782 } 1783 1784 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; 1785 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 1786 1787 for (sector = 0; sector < total_sectors; sector += n) { 1788 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) { 1789 1790 if (bdrv_read(bs, sector, buf, n) != 0) { 1791 ret = -EIO; 1792 goto ro_cleanup; 1793 } 1794 1795 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) { 1796 ret = -EIO; 1797 goto ro_cleanup; 1798 } 1799 } 1800 } 1801 1802 if (drv->bdrv_make_empty) { 1803 ret = drv->bdrv_make_empty(bs); 1804 bdrv_flush(bs); 1805 } 1806 1807 /* 1808 * Make sure all data we wrote to the backing device is actually 1809 * stable on disk. 1810 */ 1811 if (bs->backing_hd) 1812 bdrv_flush(bs->backing_hd); 1813 1814 ro_cleanup: 1815 g_free(buf); 1816 1817 if (ro) { 1818 /* ignoring error return here */ 1819 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL); 1820 } 1821 1822 return ret; 1823 } 1824 1825 int bdrv_commit_all(void) 1826 { 1827 BlockDriverState *bs; 1828 1829 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1830 if (bs->drv && bs->backing_hd) { 1831 int ret = bdrv_commit(bs); 1832 if (ret < 0) { 1833 return ret; 1834 } 1835 } 1836 } 1837 return 0; 1838 } 1839 1840 struct BdrvTrackedRequest { 1841 BlockDriverState *bs; 1842 int64_t sector_num; 1843 int nb_sectors; 1844 bool is_write; 1845 QLIST_ENTRY(BdrvTrackedRequest) list; 1846 Coroutine *co; /* owner, used for deadlock detection */ 1847 CoQueue wait_queue; /* coroutines blocked on this request */ 1848 }; 1849 1850 /** 1851 * Remove an active request from the tracked requests list 1852 * 1853 * This function should be called when a tracked request is completing. 1854 */ 1855 static void tracked_request_end(BdrvTrackedRequest *req) 1856 { 1857 QLIST_REMOVE(req, list); 1858 qemu_co_queue_restart_all(&req->wait_queue); 1859 } 1860 1861 /** 1862 * Add an active request to the tracked requests list 1863 */ 1864 static void tracked_request_begin(BdrvTrackedRequest *req, 1865 BlockDriverState *bs, 1866 int64_t sector_num, 1867 int nb_sectors, bool is_write) 1868 { 1869 *req = (BdrvTrackedRequest){ 1870 .bs = bs, 1871 .sector_num = sector_num, 1872 .nb_sectors = nb_sectors, 1873 .is_write = is_write, 1874 .co = qemu_coroutine_self(), 1875 }; 1876 1877 qemu_co_queue_init(&req->wait_queue); 1878 1879 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list); 1880 } 1881 1882 /** 1883 * Round a region to cluster boundaries 1884 */ 1885 void bdrv_round_to_clusters(BlockDriverState *bs, 1886 int64_t sector_num, int nb_sectors, 1887 int64_t *cluster_sector_num, 1888 int *cluster_nb_sectors) 1889 { 1890 BlockDriverInfo bdi; 1891 1892 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) { 1893 *cluster_sector_num = sector_num; 1894 *cluster_nb_sectors = nb_sectors; 1895 } else { 1896 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE; 1897 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c); 1898 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num + 1899 nb_sectors, c); 1900 } 1901 } 1902 1903 static bool tracked_request_overlaps(BdrvTrackedRequest *req, 1904 int64_t sector_num, int nb_sectors) { 1905 /* aaaa bbbb */ 1906 if (sector_num >= req->sector_num + req->nb_sectors) { 1907 return false; 1908 } 1909 /* bbbb aaaa */ 1910 if (req->sector_num >= sector_num + nb_sectors) { 1911 return false; 1912 } 1913 return true; 1914 } 1915 1916 static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs, 1917 int64_t sector_num, int nb_sectors) 1918 { 1919 BdrvTrackedRequest *req; 1920 int64_t cluster_sector_num; 1921 int cluster_nb_sectors; 1922 bool retry; 1923 1924 /* If we touch the same cluster it counts as an overlap. This guarantees 1925 * that allocating writes will be serialized and not race with each other 1926 * for the same cluster. For example, in copy-on-read it ensures that the 1927 * CoR read and write operations are atomic and guest writes cannot 1928 * interleave between them. 1929 */ 1930 bdrv_round_to_clusters(bs, sector_num, nb_sectors, 1931 &cluster_sector_num, &cluster_nb_sectors); 1932 1933 do { 1934 retry = false; 1935 QLIST_FOREACH(req, &bs->tracked_requests, list) { 1936 if (tracked_request_overlaps(req, cluster_sector_num, 1937 cluster_nb_sectors)) { 1938 /* Hitting this means there was a reentrant request, for 1939 * example, a block driver issuing nested requests. This must 1940 * never happen since it means deadlock. 1941 */ 1942 assert(qemu_coroutine_self() != req->co); 1943 1944 qemu_co_queue_wait(&req->wait_queue); 1945 retry = true; 1946 break; 1947 } 1948 } 1949 } while (retry); 1950 } 1951 1952 /* 1953 * Return values: 1954 * 0 - success 1955 * -EINVAL - backing format specified, but no file 1956 * -ENOSPC - can't update the backing file because no space is left in the 1957 * image file header 1958 * -ENOTSUP - format driver doesn't support changing the backing file 1959 */ 1960 int bdrv_change_backing_file(BlockDriverState *bs, 1961 const char *backing_file, const char *backing_fmt) 1962 { 1963 BlockDriver *drv = bs->drv; 1964 int ret; 1965 1966 /* Backing file format doesn't make sense without a backing file */ 1967 if (backing_fmt && !backing_file) { 1968 return -EINVAL; 1969 } 1970 1971 if (drv->bdrv_change_backing_file != NULL) { 1972 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 1973 } else { 1974 ret = -ENOTSUP; 1975 } 1976 1977 if (ret == 0) { 1978 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 1979 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 1980 } 1981 return ret; 1982 } 1983 1984 /* 1985 * Finds the image layer in the chain that has 'bs' as its backing file. 1986 * 1987 * active is the current topmost image. 1988 * 1989 * Returns NULL if bs is not found in active's image chain, 1990 * or if active == bs. 1991 */ 1992 BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 1993 BlockDriverState *bs) 1994 { 1995 BlockDriverState *overlay = NULL; 1996 BlockDriverState *intermediate; 1997 1998 assert(active != NULL); 1999 assert(bs != NULL); 2000 2001 /* if bs is the same as active, then by definition it has no overlay 2002 */ 2003 if (active == bs) { 2004 return NULL; 2005 } 2006 2007 intermediate = active; 2008 while (intermediate->backing_hd) { 2009 if (intermediate->backing_hd == bs) { 2010 overlay = intermediate; 2011 break; 2012 } 2013 intermediate = intermediate->backing_hd; 2014 } 2015 2016 return overlay; 2017 } 2018 2019 typedef struct BlkIntermediateStates { 2020 BlockDriverState *bs; 2021 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; 2022 } BlkIntermediateStates; 2023 2024 2025 /* 2026 * Drops images above 'base' up to and including 'top', and sets the image 2027 * above 'top' to have base as its backing file. 2028 * 2029 * Requires that the overlay to 'top' is opened r/w, so that the backing file 2030 * information in 'bs' can be properly updated. 2031 * 2032 * E.g., this will convert the following chain: 2033 * bottom <- base <- intermediate <- top <- active 2034 * 2035 * to 2036 * 2037 * bottom <- base <- active 2038 * 2039 * It is allowed for bottom==base, in which case it converts: 2040 * 2041 * base <- intermediate <- top <- active 2042 * 2043 * to 2044 * 2045 * base <- active 2046 * 2047 * Error conditions: 2048 * if active == top, that is considered an error 2049 * 2050 */ 2051 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, 2052 BlockDriverState *base) 2053 { 2054 BlockDriverState *intermediate; 2055 BlockDriverState *base_bs = NULL; 2056 BlockDriverState *new_top_bs = NULL; 2057 BlkIntermediateStates *intermediate_state, *next; 2058 int ret = -EIO; 2059 2060 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; 2061 QSIMPLEQ_INIT(&states_to_delete); 2062 2063 if (!top->drv || !base->drv) { 2064 goto exit; 2065 } 2066 2067 new_top_bs = bdrv_find_overlay(active, top); 2068 2069 if (new_top_bs == NULL) { 2070 /* we could not find the image above 'top', this is an error */ 2071 goto exit; 2072 } 2073 2074 /* special case of new_top_bs->backing_hd already pointing to base - nothing 2075 * to do, no intermediate images */ 2076 if (new_top_bs->backing_hd == base) { 2077 ret = 0; 2078 goto exit; 2079 } 2080 2081 intermediate = top; 2082 2083 /* now we will go down through the list, and add each BDS we find 2084 * into our deletion queue, until we hit the 'base' 2085 */ 2086 while (intermediate) { 2087 intermediate_state = g_malloc0(sizeof(BlkIntermediateStates)); 2088 intermediate_state->bs = intermediate; 2089 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry); 2090 2091 if (intermediate->backing_hd == base) { 2092 base_bs = intermediate->backing_hd; 2093 break; 2094 } 2095 intermediate = intermediate->backing_hd; 2096 } 2097 if (base_bs == NULL) { 2098 /* something went wrong, we did not end at the base. safely 2099 * unravel everything, and exit with error */ 2100 goto exit; 2101 } 2102 2103 /* success - we can delete the intermediate states, and link top->base */ 2104 ret = bdrv_change_backing_file(new_top_bs, base_bs->filename, 2105 base_bs->drv ? base_bs->drv->format_name : ""); 2106 if (ret) { 2107 goto exit; 2108 } 2109 new_top_bs->backing_hd = base_bs; 2110 2111 2112 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 2113 /* so that bdrv_close() does not recursively close the chain */ 2114 intermediate_state->bs->backing_hd = NULL; 2115 bdrv_delete(intermediate_state->bs); 2116 } 2117 ret = 0; 2118 2119 exit: 2120 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 2121 g_free(intermediate_state); 2122 } 2123 return ret; 2124 } 2125 2126 2127 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, 2128 size_t size) 2129 { 2130 int64_t len; 2131 2132 if (!bdrv_is_inserted(bs)) 2133 return -ENOMEDIUM; 2134 2135 if (bs->growable) 2136 return 0; 2137 2138 len = bdrv_getlength(bs); 2139 2140 if (offset < 0) 2141 return -EIO; 2142 2143 if ((offset > len) || (len - offset < size)) 2144 return -EIO; 2145 2146 return 0; 2147 } 2148 2149 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, 2150 int nb_sectors) 2151 { 2152 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE, 2153 nb_sectors * BDRV_SECTOR_SIZE); 2154 } 2155 2156 typedef struct RwCo { 2157 BlockDriverState *bs; 2158 int64_t sector_num; 2159 int nb_sectors; 2160 QEMUIOVector *qiov; 2161 bool is_write; 2162 int ret; 2163 } RwCo; 2164 2165 static void coroutine_fn bdrv_rw_co_entry(void *opaque) 2166 { 2167 RwCo *rwco = opaque; 2168 2169 if (!rwco->is_write) { 2170 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num, 2171 rwco->nb_sectors, rwco->qiov, 0); 2172 } else { 2173 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num, 2174 rwco->nb_sectors, rwco->qiov, 0); 2175 } 2176 } 2177 2178 /* 2179 * Process a vectored synchronous request using coroutines 2180 */ 2181 static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num, 2182 QEMUIOVector *qiov, bool is_write) 2183 { 2184 Coroutine *co; 2185 RwCo rwco = { 2186 .bs = bs, 2187 .sector_num = sector_num, 2188 .nb_sectors = qiov->size >> BDRV_SECTOR_BITS, 2189 .qiov = qiov, 2190 .is_write = is_write, 2191 .ret = NOT_DONE, 2192 }; 2193 assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0); 2194 2195 /** 2196 * In sync call context, when the vcpu is blocked, this throttling timer 2197 * will not fire; so the I/O throttling function has to be disabled here 2198 * if it has been enabled. 2199 */ 2200 if (bs->io_limits_enabled) { 2201 fprintf(stderr, "Disabling I/O throttling on '%s' due " 2202 "to synchronous I/O.\n", bdrv_get_device_name(bs)); 2203 bdrv_io_limits_disable(bs); 2204 } 2205 2206 if (qemu_in_coroutine()) { 2207 /* Fast-path if already in coroutine context */ 2208 bdrv_rw_co_entry(&rwco); 2209 } else { 2210 co = qemu_coroutine_create(bdrv_rw_co_entry); 2211 qemu_coroutine_enter(co, &rwco); 2212 while (rwco.ret == NOT_DONE) { 2213 qemu_aio_wait(); 2214 } 2215 } 2216 return rwco.ret; 2217 } 2218 2219 /* 2220 * Process a synchronous request using coroutines 2221 */ 2222 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, 2223 int nb_sectors, bool is_write) 2224 { 2225 QEMUIOVector qiov; 2226 struct iovec iov = { 2227 .iov_base = (void *)buf, 2228 .iov_len = nb_sectors * BDRV_SECTOR_SIZE, 2229 }; 2230 2231 qemu_iovec_init_external(&qiov, &iov, 1); 2232 return bdrv_rwv_co(bs, sector_num, &qiov, is_write); 2233 } 2234 2235 /* return < 0 if error. See bdrv_write() for the return codes */ 2236 int bdrv_read(BlockDriverState *bs, int64_t sector_num, 2237 uint8_t *buf, int nb_sectors) 2238 { 2239 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false); 2240 } 2241 2242 /* Just like bdrv_read(), but with I/O throttling temporarily disabled */ 2243 int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num, 2244 uint8_t *buf, int nb_sectors) 2245 { 2246 bool enabled; 2247 int ret; 2248 2249 enabled = bs->io_limits_enabled; 2250 bs->io_limits_enabled = false; 2251 ret = bdrv_read(bs, 0, buf, 1); 2252 bs->io_limits_enabled = enabled; 2253 return ret; 2254 } 2255 2256 /* Return < 0 if error. Important errors are: 2257 -EIO generic I/O error (may happen for all errors) 2258 -ENOMEDIUM No media inserted. 2259 -EINVAL Invalid sector number or nb_sectors 2260 -EACCES Trying to write a read-only device 2261 */ 2262 int bdrv_write(BlockDriverState *bs, int64_t sector_num, 2263 const uint8_t *buf, int nb_sectors) 2264 { 2265 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true); 2266 } 2267 2268 int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov) 2269 { 2270 return bdrv_rwv_co(bs, sector_num, qiov, true); 2271 } 2272 2273 int bdrv_pread(BlockDriverState *bs, int64_t offset, 2274 void *buf, int count1) 2275 { 2276 uint8_t tmp_buf[BDRV_SECTOR_SIZE]; 2277 int len, nb_sectors, count; 2278 int64_t sector_num; 2279 int ret; 2280 2281 count = count1; 2282 /* first read to align to sector start */ 2283 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); 2284 if (len > count) 2285 len = count; 2286 sector_num = offset >> BDRV_SECTOR_BITS; 2287 if (len > 0) { 2288 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 2289 return ret; 2290 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len); 2291 count -= len; 2292 if (count == 0) 2293 return count1; 2294 sector_num++; 2295 buf += len; 2296 } 2297 2298 /* read the sectors "in place" */ 2299 nb_sectors = count >> BDRV_SECTOR_BITS; 2300 if (nb_sectors > 0) { 2301 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0) 2302 return ret; 2303 sector_num += nb_sectors; 2304 len = nb_sectors << BDRV_SECTOR_BITS; 2305 buf += len; 2306 count -= len; 2307 } 2308 2309 /* add data from the last sector */ 2310 if (count > 0) { 2311 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 2312 return ret; 2313 memcpy(buf, tmp_buf, count); 2314 } 2315 return count1; 2316 } 2317 2318 int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov) 2319 { 2320 uint8_t tmp_buf[BDRV_SECTOR_SIZE]; 2321 int len, nb_sectors, count; 2322 int64_t sector_num; 2323 int ret; 2324 2325 count = qiov->size; 2326 2327 /* first write to align to sector start */ 2328 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); 2329 if (len > count) 2330 len = count; 2331 sector_num = offset >> BDRV_SECTOR_BITS; 2332 if (len > 0) { 2333 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 2334 return ret; 2335 qemu_iovec_to_buf(qiov, 0, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), 2336 len); 2337 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0) 2338 return ret; 2339 count -= len; 2340 if (count == 0) 2341 return qiov->size; 2342 sector_num++; 2343 } 2344 2345 /* write the sectors "in place" */ 2346 nb_sectors = count >> BDRV_SECTOR_BITS; 2347 if (nb_sectors > 0) { 2348 QEMUIOVector qiov_inplace; 2349 2350 qemu_iovec_init(&qiov_inplace, qiov->niov); 2351 qemu_iovec_concat(&qiov_inplace, qiov, len, 2352 nb_sectors << BDRV_SECTOR_BITS); 2353 ret = bdrv_writev(bs, sector_num, &qiov_inplace); 2354 qemu_iovec_destroy(&qiov_inplace); 2355 if (ret < 0) { 2356 return ret; 2357 } 2358 2359 sector_num += nb_sectors; 2360 len = nb_sectors << BDRV_SECTOR_BITS; 2361 count -= len; 2362 } 2363 2364 /* add data from the last sector */ 2365 if (count > 0) { 2366 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 2367 return ret; 2368 qemu_iovec_to_buf(qiov, qiov->size - count, tmp_buf, count); 2369 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0) 2370 return ret; 2371 } 2372 return qiov->size; 2373 } 2374 2375 int bdrv_pwrite(BlockDriverState *bs, int64_t offset, 2376 const void *buf, int count1) 2377 { 2378 QEMUIOVector qiov; 2379 struct iovec iov = { 2380 .iov_base = (void *) buf, 2381 .iov_len = count1, 2382 }; 2383 2384 qemu_iovec_init_external(&qiov, &iov, 1); 2385 return bdrv_pwritev(bs, offset, &qiov); 2386 } 2387 2388 /* 2389 * Writes to the file and ensures that no writes are reordered across this 2390 * request (acts as a barrier) 2391 * 2392 * Returns 0 on success, -errno in error cases. 2393 */ 2394 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, 2395 const void *buf, int count) 2396 { 2397 int ret; 2398 2399 ret = bdrv_pwrite(bs, offset, buf, count); 2400 if (ret < 0) { 2401 return ret; 2402 } 2403 2404 /* No flush needed for cache modes that already do it */ 2405 if (bs->enable_write_cache) { 2406 bdrv_flush(bs); 2407 } 2408 2409 return 0; 2410 } 2411 2412 static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, 2413 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) 2414 { 2415 /* Perform I/O through a temporary buffer so that users who scribble over 2416 * their read buffer while the operation is in progress do not end up 2417 * modifying the image file. This is critical for zero-copy guest I/O 2418 * where anything might happen inside guest memory. 2419 */ 2420 void *bounce_buffer; 2421 2422 BlockDriver *drv = bs->drv; 2423 struct iovec iov; 2424 QEMUIOVector bounce_qiov; 2425 int64_t cluster_sector_num; 2426 int cluster_nb_sectors; 2427 size_t skip_bytes; 2428 int ret; 2429 2430 /* Cover entire cluster so no additional backing file I/O is required when 2431 * allocating cluster in the image file. 2432 */ 2433 bdrv_round_to_clusters(bs, sector_num, nb_sectors, 2434 &cluster_sector_num, &cluster_nb_sectors); 2435 2436 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, 2437 cluster_sector_num, cluster_nb_sectors); 2438 2439 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE; 2440 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len); 2441 qemu_iovec_init_external(&bounce_qiov, &iov, 1); 2442 2443 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors, 2444 &bounce_qiov); 2445 if (ret < 0) { 2446 goto err; 2447 } 2448 2449 if (drv->bdrv_co_write_zeroes && 2450 buffer_is_zero(bounce_buffer, iov.iov_len)) { 2451 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num, 2452 cluster_nb_sectors); 2453 } else { 2454 /* This does not change the data on the disk, it is not necessary 2455 * to flush even in cache=writethrough mode. 2456 */ 2457 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors, 2458 &bounce_qiov); 2459 } 2460 2461 if (ret < 0) { 2462 /* It might be okay to ignore write errors for guest requests. If this 2463 * is a deliberate copy-on-read then we don't want to ignore the error. 2464 * Simply report it in all cases. 2465 */ 2466 goto err; 2467 } 2468 2469 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE; 2470 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, 2471 nb_sectors * BDRV_SECTOR_SIZE); 2472 2473 err: 2474 qemu_vfree(bounce_buffer); 2475 return ret; 2476 } 2477 2478 /* 2479 * Handle a read request in coroutine context 2480 */ 2481 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, 2482 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, 2483 BdrvRequestFlags flags) 2484 { 2485 BlockDriver *drv = bs->drv; 2486 BdrvTrackedRequest req; 2487 int ret; 2488 2489 if (!drv) { 2490 return -ENOMEDIUM; 2491 } 2492 if (bdrv_check_request(bs, sector_num, nb_sectors)) { 2493 return -EIO; 2494 } 2495 2496 /* throttling disk read I/O */ 2497 if (bs->io_limits_enabled) { 2498 bdrv_io_limits_intercept(bs, false, nb_sectors); 2499 } 2500 2501 if (bs->copy_on_read) { 2502 flags |= BDRV_REQ_COPY_ON_READ; 2503 } 2504 if (flags & BDRV_REQ_COPY_ON_READ) { 2505 bs->copy_on_read_in_flight++; 2506 } 2507 2508 if (bs->copy_on_read_in_flight) { 2509 wait_for_overlapping_requests(bs, sector_num, nb_sectors); 2510 } 2511 2512 tracked_request_begin(&req, bs, sector_num, nb_sectors, false); 2513 2514 if (flags & BDRV_REQ_COPY_ON_READ) { 2515 int pnum; 2516 2517 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum); 2518 if (ret < 0) { 2519 goto out; 2520 } 2521 2522 if (!ret || pnum != nb_sectors) { 2523 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov); 2524 goto out; 2525 } 2526 } 2527 2528 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); 2529 2530 out: 2531 tracked_request_end(&req); 2532 2533 if (flags & BDRV_REQ_COPY_ON_READ) { 2534 bs->copy_on_read_in_flight--; 2535 } 2536 2537 return ret; 2538 } 2539 2540 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, 2541 int nb_sectors, QEMUIOVector *qiov) 2542 { 2543 trace_bdrv_co_readv(bs, sector_num, nb_sectors); 2544 2545 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0); 2546 } 2547 2548 int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, 2549 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) 2550 { 2551 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors); 2552 2553 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 2554 BDRV_REQ_COPY_ON_READ); 2555 } 2556 2557 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, 2558 int64_t sector_num, int nb_sectors) 2559 { 2560 BlockDriver *drv = bs->drv; 2561 QEMUIOVector qiov; 2562 struct iovec iov; 2563 int ret; 2564 2565 /* TODO Emulate only part of misaligned requests instead of letting block 2566 * drivers return -ENOTSUP and emulate everything */ 2567 2568 /* First try the efficient write zeroes operation */ 2569 if (drv->bdrv_co_write_zeroes) { 2570 ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors); 2571 if (ret != -ENOTSUP) { 2572 return ret; 2573 } 2574 } 2575 2576 /* Fall back to bounce buffer if write zeroes is unsupported */ 2577 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; 2578 iov.iov_base = qemu_blockalign(bs, iov.iov_len); 2579 memset(iov.iov_base, 0, iov.iov_len); 2580 qemu_iovec_init_external(&qiov, &iov, 1); 2581 2582 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov); 2583 2584 qemu_vfree(iov.iov_base); 2585 return ret; 2586 } 2587 2588 /* 2589 * Handle a write request in coroutine context 2590 */ 2591 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, 2592 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, 2593 BdrvRequestFlags flags) 2594 { 2595 BlockDriver *drv = bs->drv; 2596 BdrvTrackedRequest req; 2597 int ret; 2598 2599 if (!bs->drv) { 2600 return -ENOMEDIUM; 2601 } 2602 if (bs->read_only) { 2603 return -EACCES; 2604 } 2605 if (bdrv_check_request(bs, sector_num, nb_sectors)) { 2606 return -EIO; 2607 } 2608 2609 /* throttling disk write I/O */ 2610 if (bs->io_limits_enabled) { 2611 bdrv_io_limits_intercept(bs, true, nb_sectors); 2612 } 2613 2614 if (bs->copy_on_read_in_flight) { 2615 wait_for_overlapping_requests(bs, sector_num, nb_sectors); 2616 } 2617 2618 tracked_request_begin(&req, bs, sector_num, nb_sectors, true); 2619 2620 if (flags & BDRV_REQ_ZERO_WRITE) { 2621 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors); 2622 } else { 2623 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); 2624 } 2625 2626 if (ret == 0 && !bs->enable_write_cache) { 2627 ret = bdrv_co_flush(bs); 2628 } 2629 2630 if (bs->dirty_bitmap) { 2631 bdrv_set_dirty(bs, sector_num, nb_sectors); 2632 } 2633 2634 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { 2635 bs->wr_highest_sector = sector_num + nb_sectors - 1; 2636 } 2637 2638 tracked_request_end(&req); 2639 2640 return ret; 2641 } 2642 2643 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, 2644 int nb_sectors, QEMUIOVector *qiov) 2645 { 2646 trace_bdrv_co_writev(bs, sector_num, nb_sectors); 2647 2648 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0); 2649 } 2650 2651 int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, 2652 int64_t sector_num, int nb_sectors) 2653 { 2654 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors); 2655 2656 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, 2657 BDRV_REQ_ZERO_WRITE); 2658 } 2659 2660 /** 2661 * Truncate file to 'offset' bytes (needed only for file protocols) 2662 */ 2663 int bdrv_truncate(BlockDriverState *bs, int64_t offset) 2664 { 2665 BlockDriver *drv = bs->drv; 2666 int ret; 2667 if (!drv) 2668 return -ENOMEDIUM; 2669 if (!drv->bdrv_truncate) 2670 return -ENOTSUP; 2671 if (bs->read_only) 2672 return -EACCES; 2673 if (bdrv_in_use(bs)) 2674 return -EBUSY; 2675 ret = drv->bdrv_truncate(bs, offset); 2676 if (ret == 0) { 2677 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 2678 bdrv_dev_resize_cb(bs); 2679 } 2680 return ret; 2681 } 2682 2683 /** 2684 * Length of a allocated file in bytes. Sparse files are counted by actual 2685 * allocated space. Return < 0 if error or unknown. 2686 */ 2687 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 2688 { 2689 BlockDriver *drv = bs->drv; 2690 if (!drv) { 2691 return -ENOMEDIUM; 2692 } 2693 if (drv->bdrv_get_allocated_file_size) { 2694 return drv->bdrv_get_allocated_file_size(bs); 2695 } 2696 if (bs->file) { 2697 return bdrv_get_allocated_file_size(bs->file); 2698 } 2699 return -ENOTSUP; 2700 } 2701 2702 /** 2703 * Length of a file in bytes. Return < 0 if error or unknown. 2704 */ 2705 int64_t bdrv_getlength(BlockDriverState *bs) 2706 { 2707 BlockDriver *drv = bs->drv; 2708 if (!drv) 2709 return -ENOMEDIUM; 2710 2711 if (bs->growable || bdrv_dev_has_removable_media(bs)) { 2712 if (drv->bdrv_getlength) { 2713 return drv->bdrv_getlength(bs); 2714 } 2715 } 2716 return bs->total_sectors * BDRV_SECTOR_SIZE; 2717 } 2718 2719 /* return 0 as number of sectors if no device present or error */ 2720 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 2721 { 2722 int64_t length; 2723 length = bdrv_getlength(bs); 2724 if (length < 0) 2725 length = 0; 2726 else 2727 length = length >> BDRV_SECTOR_BITS; 2728 *nb_sectors_ptr = length; 2729 } 2730 2731 /* throttling disk io limits */ 2732 void bdrv_set_io_limits(BlockDriverState *bs, 2733 BlockIOLimit *io_limits) 2734 { 2735 bs->io_limits = *io_limits; 2736 bs->io_limits_enabled = bdrv_io_limits_enabled(bs); 2737 } 2738 2739 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, 2740 BlockdevOnError on_write_error) 2741 { 2742 bs->on_read_error = on_read_error; 2743 bs->on_write_error = on_write_error; 2744 } 2745 2746 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) 2747 { 2748 return is_read ? bs->on_read_error : bs->on_write_error; 2749 } 2750 2751 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error) 2752 { 2753 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error; 2754 2755 switch (on_err) { 2756 case BLOCKDEV_ON_ERROR_ENOSPC: 2757 return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT; 2758 case BLOCKDEV_ON_ERROR_STOP: 2759 return BDRV_ACTION_STOP; 2760 case BLOCKDEV_ON_ERROR_REPORT: 2761 return BDRV_ACTION_REPORT; 2762 case BLOCKDEV_ON_ERROR_IGNORE: 2763 return BDRV_ACTION_IGNORE; 2764 default: 2765 abort(); 2766 } 2767 } 2768 2769 /* This is done by device models because, while the block layer knows 2770 * about the error, it does not know whether an operation comes from 2771 * the device or the block layer (from a job, for example). 2772 */ 2773 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, 2774 bool is_read, int error) 2775 { 2776 assert(error >= 0); 2777 bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read); 2778 if (action == BDRV_ACTION_STOP) { 2779 vm_stop(RUN_STATE_IO_ERROR); 2780 bdrv_iostatus_set_err(bs, error); 2781 } 2782 } 2783 2784 int bdrv_is_read_only(BlockDriverState *bs) 2785 { 2786 return bs->read_only; 2787 } 2788 2789 int bdrv_is_sg(BlockDriverState *bs) 2790 { 2791 return bs->sg; 2792 } 2793 2794 int bdrv_enable_write_cache(BlockDriverState *bs) 2795 { 2796 return bs->enable_write_cache; 2797 } 2798 2799 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce) 2800 { 2801 bs->enable_write_cache = wce; 2802 2803 /* so a reopen() will preserve wce */ 2804 if (wce) { 2805 bs->open_flags |= BDRV_O_CACHE_WB; 2806 } else { 2807 bs->open_flags &= ~BDRV_O_CACHE_WB; 2808 } 2809 } 2810 2811 int bdrv_is_encrypted(BlockDriverState *bs) 2812 { 2813 if (bs->backing_hd && bs->backing_hd->encrypted) 2814 return 1; 2815 return bs->encrypted; 2816 } 2817 2818 int bdrv_key_required(BlockDriverState *bs) 2819 { 2820 BlockDriverState *backing_hd = bs->backing_hd; 2821 2822 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key) 2823 return 1; 2824 return (bs->encrypted && !bs->valid_key); 2825 } 2826 2827 int bdrv_set_key(BlockDriverState *bs, const char *key) 2828 { 2829 int ret; 2830 if (bs->backing_hd && bs->backing_hd->encrypted) { 2831 ret = bdrv_set_key(bs->backing_hd, key); 2832 if (ret < 0) 2833 return ret; 2834 if (!bs->encrypted) 2835 return 0; 2836 } 2837 if (!bs->encrypted) { 2838 return -EINVAL; 2839 } else if (!bs->drv || !bs->drv->bdrv_set_key) { 2840 return -ENOMEDIUM; 2841 } 2842 ret = bs->drv->bdrv_set_key(bs, key); 2843 if (ret < 0) { 2844 bs->valid_key = 0; 2845 } else if (!bs->valid_key) { 2846 bs->valid_key = 1; 2847 /* call the change callback now, we skipped it on open */ 2848 bdrv_dev_change_media_cb(bs, true); 2849 } 2850 return ret; 2851 } 2852 2853 const char *bdrv_get_format_name(BlockDriverState *bs) 2854 { 2855 return bs->drv ? bs->drv->format_name : NULL; 2856 } 2857 2858 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 2859 void *opaque) 2860 { 2861 BlockDriver *drv; 2862 2863 QLIST_FOREACH(drv, &bdrv_drivers, list) { 2864 it(opaque, drv->format_name); 2865 } 2866 } 2867 2868 BlockDriverState *bdrv_find(const char *name) 2869 { 2870 BlockDriverState *bs; 2871 2872 QTAILQ_FOREACH(bs, &bdrv_states, list) { 2873 if (!strcmp(name, bs->device_name)) { 2874 return bs; 2875 } 2876 } 2877 return NULL; 2878 } 2879 2880 BlockDriverState *bdrv_next(BlockDriverState *bs) 2881 { 2882 if (!bs) { 2883 return QTAILQ_FIRST(&bdrv_states); 2884 } 2885 return QTAILQ_NEXT(bs, list); 2886 } 2887 2888 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque) 2889 { 2890 BlockDriverState *bs; 2891 2892 QTAILQ_FOREACH(bs, &bdrv_states, list) { 2893 it(opaque, bs); 2894 } 2895 } 2896 2897 const char *bdrv_get_device_name(BlockDriverState *bs) 2898 { 2899 return bs->device_name; 2900 } 2901 2902 int bdrv_get_flags(BlockDriverState *bs) 2903 { 2904 return bs->open_flags; 2905 } 2906 2907 void bdrv_flush_all(void) 2908 { 2909 BlockDriverState *bs; 2910 2911 QTAILQ_FOREACH(bs, &bdrv_states, list) { 2912 bdrv_flush(bs); 2913 } 2914 } 2915 2916 int bdrv_has_zero_init(BlockDriverState *bs) 2917 { 2918 assert(bs->drv); 2919 2920 if (bs->drv->bdrv_has_zero_init) { 2921 return bs->drv->bdrv_has_zero_init(bs); 2922 } 2923 2924 return 1; 2925 } 2926 2927 typedef struct BdrvCoIsAllocatedData { 2928 BlockDriverState *bs; 2929 BlockDriverState *base; 2930 int64_t sector_num; 2931 int nb_sectors; 2932 int *pnum; 2933 int ret; 2934 bool done; 2935 } BdrvCoIsAllocatedData; 2936 2937 /* 2938 * Returns true iff the specified sector is present in the disk image. Drivers 2939 * not implementing the functionality are assumed to not support backing files, 2940 * hence all their sectors are reported as allocated. 2941 * 2942 * If 'sector_num' is beyond the end of the disk image the return value is 0 2943 * and 'pnum' is set to 0. 2944 * 2945 * 'pnum' is set to the number of sectors (including and immediately following 2946 * the specified sector) that are known to be in the same 2947 * allocated/unallocated state. 2948 * 2949 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes 2950 * beyond the end of the disk image it will be clamped. 2951 */ 2952 int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num, 2953 int nb_sectors, int *pnum) 2954 { 2955 int64_t n; 2956 2957 if (sector_num >= bs->total_sectors) { 2958 *pnum = 0; 2959 return 0; 2960 } 2961 2962 n = bs->total_sectors - sector_num; 2963 if (n < nb_sectors) { 2964 nb_sectors = n; 2965 } 2966 2967 if (!bs->drv->bdrv_co_is_allocated) { 2968 *pnum = nb_sectors; 2969 return 1; 2970 } 2971 2972 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum); 2973 } 2974 2975 /* Coroutine wrapper for bdrv_is_allocated() */ 2976 static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque) 2977 { 2978 BdrvCoIsAllocatedData *data = opaque; 2979 BlockDriverState *bs = data->bs; 2980 2981 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors, 2982 data->pnum); 2983 data->done = true; 2984 } 2985 2986 /* 2987 * Synchronous wrapper around bdrv_co_is_allocated(). 2988 * 2989 * See bdrv_co_is_allocated() for details. 2990 */ 2991 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, 2992 int *pnum) 2993 { 2994 Coroutine *co; 2995 BdrvCoIsAllocatedData data = { 2996 .bs = bs, 2997 .sector_num = sector_num, 2998 .nb_sectors = nb_sectors, 2999 .pnum = pnum, 3000 .done = false, 3001 }; 3002 3003 co = qemu_coroutine_create(bdrv_is_allocated_co_entry); 3004 qemu_coroutine_enter(co, &data); 3005 while (!data.done) { 3006 qemu_aio_wait(); 3007 } 3008 return data.ret; 3009 } 3010 3011 /* 3012 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP] 3013 * 3014 * Return true if the given sector is allocated in any image between 3015 * BASE and TOP (inclusive). BASE can be NULL to check if the given 3016 * sector is allocated in any image of the chain. Return false otherwise. 3017 * 3018 * 'pnum' is set to the number of sectors (including and immediately following 3019 * the specified sector) that are known to be in the same 3020 * allocated/unallocated state. 3021 * 3022 */ 3023 int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, 3024 BlockDriverState *base, 3025 int64_t sector_num, 3026 int nb_sectors, int *pnum) 3027 { 3028 BlockDriverState *intermediate; 3029 int ret, n = nb_sectors; 3030 3031 intermediate = top; 3032 while (intermediate && intermediate != base) { 3033 int pnum_inter; 3034 ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors, 3035 &pnum_inter); 3036 if (ret < 0) { 3037 return ret; 3038 } else if (ret) { 3039 *pnum = pnum_inter; 3040 return 1; 3041 } 3042 3043 /* 3044 * [sector_num, nb_sectors] is unallocated on top but intermediate 3045 * might have 3046 * 3047 * [sector_num+x, nr_sectors] allocated. 3048 */ 3049 if (n > pnum_inter && 3050 (intermediate == top || 3051 sector_num + pnum_inter < intermediate->total_sectors)) { 3052 n = pnum_inter; 3053 } 3054 3055 intermediate = intermediate->backing_hd; 3056 } 3057 3058 *pnum = n; 3059 return 0; 3060 } 3061 3062 /* Coroutine wrapper for bdrv_is_allocated_above() */ 3063 static void coroutine_fn bdrv_is_allocated_above_co_entry(void *opaque) 3064 { 3065 BdrvCoIsAllocatedData *data = opaque; 3066 BlockDriverState *top = data->bs; 3067 BlockDriverState *base = data->base; 3068 3069 data->ret = bdrv_co_is_allocated_above(top, base, data->sector_num, 3070 data->nb_sectors, data->pnum); 3071 data->done = true; 3072 } 3073 3074 /* 3075 * Synchronous wrapper around bdrv_co_is_allocated_above(). 3076 * 3077 * See bdrv_co_is_allocated_above() for details. 3078 */ 3079 int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, 3080 int64_t sector_num, int nb_sectors, int *pnum) 3081 { 3082 Coroutine *co; 3083 BdrvCoIsAllocatedData data = { 3084 .bs = top, 3085 .base = base, 3086 .sector_num = sector_num, 3087 .nb_sectors = nb_sectors, 3088 .pnum = pnum, 3089 .done = false, 3090 }; 3091 3092 co = qemu_coroutine_create(bdrv_is_allocated_above_co_entry); 3093 qemu_coroutine_enter(co, &data); 3094 while (!data.done) { 3095 qemu_aio_wait(); 3096 } 3097 return data.ret; 3098 } 3099 3100 BlockInfo *bdrv_query_info(BlockDriverState *bs) 3101 { 3102 BlockInfo *info = g_malloc0(sizeof(*info)); 3103 info->device = g_strdup(bs->device_name); 3104 info->type = g_strdup("unknown"); 3105 info->locked = bdrv_dev_is_medium_locked(bs); 3106 info->removable = bdrv_dev_has_removable_media(bs); 3107 3108 if (bdrv_dev_has_removable_media(bs)) { 3109 info->has_tray_open = true; 3110 info->tray_open = bdrv_dev_is_tray_open(bs); 3111 } 3112 3113 if (bdrv_iostatus_is_enabled(bs)) { 3114 info->has_io_status = true; 3115 info->io_status = bs->iostatus; 3116 } 3117 3118 if (bs->dirty_bitmap) { 3119 info->has_dirty = true; 3120 info->dirty = g_malloc0(sizeof(*info->dirty)); 3121 info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE; 3122 info->dirty->granularity = 3123 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap)); 3124 } 3125 3126 if (bs->drv) { 3127 info->has_inserted = true; 3128 info->inserted = g_malloc0(sizeof(*info->inserted)); 3129 info->inserted->file = g_strdup(bs->filename); 3130 info->inserted->ro = bs->read_only; 3131 info->inserted->drv = g_strdup(bs->drv->format_name); 3132 info->inserted->encrypted = bs->encrypted; 3133 info->inserted->encryption_key_missing = bdrv_key_required(bs); 3134 3135 if (bs->backing_file[0]) { 3136 info->inserted->has_backing_file = true; 3137 info->inserted->backing_file = g_strdup(bs->backing_file); 3138 } 3139 3140 info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs); 3141 3142 if (bs->io_limits_enabled) { 3143 info->inserted->bps = 3144 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]; 3145 info->inserted->bps_rd = 3146 bs->io_limits.bps[BLOCK_IO_LIMIT_READ]; 3147 info->inserted->bps_wr = 3148 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE]; 3149 info->inserted->iops = 3150 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]; 3151 info->inserted->iops_rd = 3152 bs->io_limits.iops[BLOCK_IO_LIMIT_READ]; 3153 info->inserted->iops_wr = 3154 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE]; 3155 } 3156 } 3157 return info; 3158 } 3159 3160 BlockInfoList *qmp_query_block(Error **errp) 3161 { 3162 BlockInfoList *head = NULL, **p_next = &head; 3163 BlockDriverState *bs; 3164 3165 QTAILQ_FOREACH(bs, &bdrv_states, list) { 3166 BlockInfoList *info = g_malloc0(sizeof(*info)); 3167 info->value = bdrv_query_info(bs); 3168 3169 *p_next = info; 3170 p_next = &info->next; 3171 } 3172 3173 return head; 3174 } 3175 3176 BlockStats *bdrv_query_stats(const BlockDriverState *bs) 3177 { 3178 BlockStats *s; 3179 3180 s = g_malloc0(sizeof(*s)); 3181 3182 if (bs->device_name[0]) { 3183 s->has_device = true; 3184 s->device = g_strdup(bs->device_name); 3185 } 3186 3187 s->stats = g_malloc0(sizeof(*s->stats)); 3188 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ]; 3189 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE]; 3190 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ]; 3191 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE]; 3192 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE; 3193 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH]; 3194 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE]; 3195 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ]; 3196 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH]; 3197 3198 if (bs->file) { 3199 s->has_parent = true; 3200 s->parent = bdrv_query_stats(bs->file); 3201 } 3202 3203 return s; 3204 } 3205 3206 BlockStatsList *qmp_query_blockstats(Error **errp) 3207 { 3208 BlockStatsList *head = NULL, **p_next = &head; 3209 BlockDriverState *bs; 3210 3211 QTAILQ_FOREACH(bs, &bdrv_states, list) { 3212 BlockStatsList *info = g_malloc0(sizeof(*info)); 3213 info->value = bdrv_query_stats(bs); 3214 3215 *p_next = info; 3216 p_next = &info->next; 3217 } 3218 3219 return head; 3220 } 3221 3222 const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 3223 { 3224 if (bs->backing_hd && bs->backing_hd->encrypted) 3225 return bs->backing_file; 3226 else if (bs->encrypted) 3227 return bs->filename; 3228 else 3229 return NULL; 3230 } 3231 3232 void bdrv_get_backing_filename(BlockDriverState *bs, 3233 char *filename, int filename_size) 3234 { 3235 pstrcpy(filename, filename_size, bs->backing_file); 3236 } 3237 3238 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, 3239 const uint8_t *buf, int nb_sectors) 3240 { 3241 BlockDriver *drv = bs->drv; 3242 if (!drv) 3243 return -ENOMEDIUM; 3244 if (!drv->bdrv_write_compressed) 3245 return -ENOTSUP; 3246 if (bdrv_check_request(bs, sector_num, nb_sectors)) 3247 return -EIO; 3248 3249 assert(!bs->dirty_bitmap); 3250 3251 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); 3252 } 3253 3254 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 3255 { 3256 BlockDriver *drv = bs->drv; 3257 if (!drv) 3258 return -ENOMEDIUM; 3259 if (!drv->bdrv_get_info) 3260 return -ENOTSUP; 3261 memset(bdi, 0, sizeof(*bdi)); 3262 return drv->bdrv_get_info(bs, bdi); 3263 } 3264 3265 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, 3266 int64_t pos, int size) 3267 { 3268 QEMUIOVector qiov; 3269 struct iovec iov = { 3270 .iov_base = (void *) buf, 3271 .iov_len = size, 3272 }; 3273 3274 qemu_iovec_init_external(&qiov, &iov, 1); 3275 return bdrv_writev_vmstate(bs, &qiov, pos); 3276 } 3277 3278 int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) 3279 { 3280 BlockDriver *drv = bs->drv; 3281 3282 if (!drv) { 3283 return -ENOMEDIUM; 3284 } else if (drv->bdrv_save_vmstate) { 3285 return drv->bdrv_save_vmstate(bs, qiov, pos); 3286 } else if (bs->file) { 3287 return bdrv_writev_vmstate(bs->file, qiov, pos); 3288 } 3289 3290 return -ENOTSUP; 3291 } 3292 3293 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, 3294 int64_t pos, int size) 3295 { 3296 BlockDriver *drv = bs->drv; 3297 if (!drv) 3298 return -ENOMEDIUM; 3299 if (drv->bdrv_load_vmstate) 3300 return drv->bdrv_load_vmstate(bs, buf, pos, size); 3301 if (bs->file) 3302 return bdrv_load_vmstate(bs->file, buf, pos, size); 3303 return -ENOTSUP; 3304 } 3305 3306 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) 3307 { 3308 BlockDriver *drv = bs->drv; 3309 3310 if (!drv || !drv->bdrv_debug_event) { 3311 return; 3312 } 3313 3314 drv->bdrv_debug_event(bs, event); 3315 } 3316 3317 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 3318 const char *tag) 3319 { 3320 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 3321 bs = bs->file; 3322 } 3323 3324 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 3325 return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 3326 } 3327 3328 return -ENOTSUP; 3329 } 3330 3331 int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 3332 { 3333 while (bs && bs->drv && !bs->drv->bdrv_debug_resume) { 3334 bs = bs->file; 3335 } 3336 3337 if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 3338 return bs->drv->bdrv_debug_resume(bs, tag); 3339 } 3340 3341 return -ENOTSUP; 3342 } 3343 3344 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 3345 { 3346 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 3347 bs = bs->file; 3348 } 3349 3350 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 3351 return bs->drv->bdrv_debug_is_suspended(bs, tag); 3352 } 3353 3354 return false; 3355 } 3356 3357 /**************************************************************/ 3358 /* handling of snapshots */ 3359 3360 int bdrv_can_snapshot(BlockDriverState *bs) 3361 { 3362 BlockDriver *drv = bs->drv; 3363 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { 3364 return 0; 3365 } 3366 3367 if (!drv->bdrv_snapshot_create) { 3368 if (bs->file != NULL) { 3369 return bdrv_can_snapshot(bs->file); 3370 } 3371 return 0; 3372 } 3373 3374 return 1; 3375 } 3376 3377 int bdrv_is_snapshot(BlockDriverState *bs) 3378 { 3379 return !!(bs->open_flags & BDRV_O_SNAPSHOT); 3380 } 3381 3382 BlockDriverState *bdrv_snapshots(void) 3383 { 3384 BlockDriverState *bs; 3385 3386 if (bs_snapshots) { 3387 return bs_snapshots; 3388 } 3389 3390 bs = NULL; 3391 while ((bs = bdrv_next(bs))) { 3392 if (bdrv_can_snapshot(bs)) { 3393 bs_snapshots = bs; 3394 return bs; 3395 } 3396 } 3397 return NULL; 3398 } 3399 3400 int bdrv_snapshot_create(BlockDriverState *bs, 3401 QEMUSnapshotInfo *sn_info) 3402 { 3403 BlockDriver *drv = bs->drv; 3404 if (!drv) 3405 return -ENOMEDIUM; 3406 if (drv->bdrv_snapshot_create) 3407 return drv->bdrv_snapshot_create(bs, sn_info); 3408 if (bs->file) 3409 return bdrv_snapshot_create(bs->file, sn_info); 3410 return -ENOTSUP; 3411 } 3412 3413 int bdrv_snapshot_goto(BlockDriverState *bs, 3414 const char *snapshot_id) 3415 { 3416 BlockDriver *drv = bs->drv; 3417 int ret, open_ret; 3418 3419 if (!drv) 3420 return -ENOMEDIUM; 3421 if (drv->bdrv_snapshot_goto) 3422 return drv->bdrv_snapshot_goto(bs, snapshot_id); 3423 3424 if (bs->file) { 3425 drv->bdrv_close(bs); 3426 ret = bdrv_snapshot_goto(bs->file, snapshot_id); 3427 open_ret = drv->bdrv_open(bs, NULL, bs->open_flags); 3428 if (open_ret < 0) { 3429 bdrv_delete(bs->file); 3430 bs->drv = NULL; 3431 return open_ret; 3432 } 3433 return ret; 3434 } 3435 3436 return -ENOTSUP; 3437 } 3438 3439 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) 3440 { 3441 BlockDriver *drv = bs->drv; 3442 if (!drv) 3443 return -ENOMEDIUM; 3444 if (drv->bdrv_snapshot_delete) 3445 return drv->bdrv_snapshot_delete(bs, snapshot_id); 3446 if (bs->file) 3447 return bdrv_snapshot_delete(bs->file, snapshot_id); 3448 return -ENOTSUP; 3449 } 3450 3451 int bdrv_snapshot_list(BlockDriverState *bs, 3452 QEMUSnapshotInfo **psn_info) 3453 { 3454 BlockDriver *drv = bs->drv; 3455 if (!drv) 3456 return -ENOMEDIUM; 3457 if (drv->bdrv_snapshot_list) 3458 return drv->bdrv_snapshot_list(bs, psn_info); 3459 if (bs->file) 3460 return bdrv_snapshot_list(bs->file, psn_info); 3461 return -ENOTSUP; 3462 } 3463 3464 int bdrv_snapshot_load_tmp(BlockDriverState *bs, 3465 const char *snapshot_name) 3466 { 3467 BlockDriver *drv = bs->drv; 3468 if (!drv) { 3469 return -ENOMEDIUM; 3470 } 3471 if (!bs->read_only) { 3472 return -EINVAL; 3473 } 3474 if (drv->bdrv_snapshot_load_tmp) { 3475 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name); 3476 } 3477 return -ENOTSUP; 3478 } 3479 3480 /* backing_file can either be relative, or absolute, or a protocol. If it is 3481 * relative, it must be relative to the chain. So, passing in bs->filename 3482 * from a BDS as backing_file should not be done, as that may be relative to 3483 * the CWD rather than the chain. */ 3484 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 3485 const char *backing_file) 3486 { 3487 char *filename_full = NULL; 3488 char *backing_file_full = NULL; 3489 char *filename_tmp = NULL; 3490 int is_protocol = 0; 3491 BlockDriverState *curr_bs = NULL; 3492 BlockDriverState *retval = NULL; 3493 3494 if (!bs || !bs->drv || !backing_file) { 3495 return NULL; 3496 } 3497 3498 filename_full = g_malloc(PATH_MAX); 3499 backing_file_full = g_malloc(PATH_MAX); 3500 filename_tmp = g_malloc(PATH_MAX); 3501 3502 is_protocol = path_has_protocol(backing_file); 3503 3504 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) { 3505 3506 /* If either of the filename paths is actually a protocol, then 3507 * compare unmodified paths; otherwise make paths relative */ 3508 if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 3509 if (strcmp(backing_file, curr_bs->backing_file) == 0) { 3510 retval = curr_bs->backing_hd; 3511 break; 3512 } 3513 } else { 3514 /* If not an absolute filename path, make it relative to the current 3515 * image's filename path */ 3516 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 3517 backing_file); 3518 3519 /* We are going to compare absolute pathnames */ 3520 if (!realpath(filename_tmp, filename_full)) { 3521 continue; 3522 } 3523 3524 /* We need to make sure the backing filename we are comparing against 3525 * is relative to the current image filename (or absolute) */ 3526 path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 3527 curr_bs->backing_file); 3528 3529 if (!realpath(filename_tmp, backing_file_full)) { 3530 continue; 3531 } 3532 3533 if (strcmp(backing_file_full, filename_full) == 0) { 3534 retval = curr_bs->backing_hd; 3535 break; 3536 } 3537 } 3538 } 3539 3540 g_free(filename_full); 3541 g_free(backing_file_full); 3542 g_free(filename_tmp); 3543 return retval; 3544 } 3545 3546 int bdrv_get_backing_file_depth(BlockDriverState *bs) 3547 { 3548 if (!bs->drv) { 3549 return 0; 3550 } 3551 3552 if (!bs->backing_hd) { 3553 return 0; 3554 } 3555 3556 return 1 + bdrv_get_backing_file_depth(bs->backing_hd); 3557 } 3558 3559 BlockDriverState *bdrv_find_base(BlockDriverState *bs) 3560 { 3561 BlockDriverState *curr_bs = NULL; 3562 3563 if (!bs) { 3564 return NULL; 3565 } 3566 3567 curr_bs = bs; 3568 3569 while (curr_bs->backing_hd) { 3570 curr_bs = curr_bs->backing_hd; 3571 } 3572 return curr_bs; 3573 } 3574 3575 #define NB_SUFFIXES 4 3576 3577 char *get_human_readable_size(char *buf, int buf_size, int64_t size) 3578 { 3579 static const char suffixes[NB_SUFFIXES] = "KMGT"; 3580 int64_t base; 3581 int i; 3582 3583 if (size <= 999) { 3584 snprintf(buf, buf_size, "%" PRId64, size); 3585 } else { 3586 base = 1024; 3587 for(i = 0; i < NB_SUFFIXES; i++) { 3588 if (size < (10 * base)) { 3589 snprintf(buf, buf_size, "%0.1f%c", 3590 (double)size / base, 3591 suffixes[i]); 3592 break; 3593 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) { 3594 snprintf(buf, buf_size, "%" PRId64 "%c", 3595 ((size + (base >> 1)) / base), 3596 suffixes[i]); 3597 break; 3598 } 3599 base = base * 1024; 3600 } 3601 } 3602 return buf; 3603 } 3604 3605 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn) 3606 { 3607 char buf1[128], date_buf[128], clock_buf[128]; 3608 struct tm tm; 3609 time_t ti; 3610 int64_t secs; 3611 3612 if (!sn) { 3613 snprintf(buf, buf_size, 3614 "%-10s%-20s%7s%20s%15s", 3615 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK"); 3616 } else { 3617 ti = sn->date_sec; 3618 localtime_r(&ti, &tm); 3619 strftime(date_buf, sizeof(date_buf), 3620 "%Y-%m-%d %H:%M:%S", &tm); 3621 secs = sn->vm_clock_nsec / 1000000000; 3622 snprintf(clock_buf, sizeof(clock_buf), 3623 "%02d:%02d:%02d.%03d", 3624 (int)(secs / 3600), 3625 (int)((secs / 60) % 60), 3626 (int)(secs % 60), 3627 (int)((sn->vm_clock_nsec / 1000000) % 1000)); 3628 snprintf(buf, buf_size, 3629 "%-10s%-20s%7s%20s%15s", 3630 sn->id_str, sn->name, 3631 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size), 3632 date_buf, 3633 clock_buf); 3634 } 3635 return buf; 3636 } 3637 3638 /**************************************************************/ 3639 /* async I/Os */ 3640 3641 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, 3642 QEMUIOVector *qiov, int nb_sectors, 3643 BlockDriverCompletionFunc *cb, void *opaque) 3644 { 3645 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque); 3646 3647 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 3648 cb, opaque, false); 3649 } 3650 3651 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, 3652 QEMUIOVector *qiov, int nb_sectors, 3653 BlockDriverCompletionFunc *cb, void *opaque) 3654 { 3655 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque); 3656 3657 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 3658 cb, opaque, true); 3659 } 3660 3661 3662 typedef struct MultiwriteCB { 3663 int error; 3664 int num_requests; 3665 int num_callbacks; 3666 struct { 3667 BlockDriverCompletionFunc *cb; 3668 void *opaque; 3669 QEMUIOVector *free_qiov; 3670 } callbacks[]; 3671 } MultiwriteCB; 3672 3673 static void multiwrite_user_cb(MultiwriteCB *mcb) 3674 { 3675 int i; 3676 3677 for (i = 0; i < mcb->num_callbacks; i++) { 3678 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error); 3679 if (mcb->callbacks[i].free_qiov) { 3680 qemu_iovec_destroy(mcb->callbacks[i].free_qiov); 3681 } 3682 g_free(mcb->callbacks[i].free_qiov); 3683 } 3684 } 3685 3686 static void multiwrite_cb(void *opaque, int ret) 3687 { 3688 MultiwriteCB *mcb = opaque; 3689 3690 trace_multiwrite_cb(mcb, ret); 3691 3692 if (ret < 0 && !mcb->error) { 3693 mcb->error = ret; 3694 } 3695 3696 mcb->num_requests--; 3697 if (mcb->num_requests == 0) { 3698 multiwrite_user_cb(mcb); 3699 g_free(mcb); 3700 } 3701 } 3702 3703 static int multiwrite_req_compare(const void *a, const void *b) 3704 { 3705 const BlockRequest *req1 = a, *req2 = b; 3706 3707 /* 3708 * Note that we can't simply subtract req2->sector from req1->sector 3709 * here as that could overflow the return value. 3710 */ 3711 if (req1->sector > req2->sector) { 3712 return 1; 3713 } else if (req1->sector < req2->sector) { 3714 return -1; 3715 } else { 3716 return 0; 3717 } 3718 } 3719 3720 /* 3721 * Takes a bunch of requests and tries to merge them. Returns the number of 3722 * requests that remain after merging. 3723 */ 3724 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, 3725 int num_reqs, MultiwriteCB *mcb) 3726 { 3727 int i, outidx; 3728 3729 // Sort requests by start sector 3730 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare); 3731 3732 // Check if adjacent requests touch the same clusters. If so, combine them, 3733 // filling up gaps with zero sectors. 3734 outidx = 0; 3735 for (i = 1; i < num_reqs; i++) { 3736 int merge = 0; 3737 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors; 3738 3739 // Handle exactly sequential writes and overlapping writes. 3740 if (reqs[i].sector <= oldreq_last) { 3741 merge = 1; 3742 } 3743 3744 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) { 3745 merge = 0; 3746 } 3747 3748 if (merge) { 3749 size_t size; 3750 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov)); 3751 qemu_iovec_init(qiov, 3752 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1); 3753 3754 // Add the first request to the merged one. If the requests are 3755 // overlapping, drop the last sectors of the first request. 3756 size = (reqs[i].sector - reqs[outidx].sector) << 9; 3757 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size); 3758 3759 // We should need to add any zeros between the two requests 3760 assert (reqs[i].sector <= oldreq_last); 3761 3762 // Add the second request 3763 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size); 3764 3765 reqs[outidx].nb_sectors = qiov->size >> 9; 3766 reqs[outidx].qiov = qiov; 3767 3768 mcb->callbacks[i].free_qiov = reqs[outidx].qiov; 3769 } else { 3770 outidx++; 3771 reqs[outidx].sector = reqs[i].sector; 3772 reqs[outidx].nb_sectors = reqs[i].nb_sectors; 3773 reqs[outidx].qiov = reqs[i].qiov; 3774 } 3775 } 3776 3777 return outidx + 1; 3778 } 3779 3780 /* 3781 * Submit multiple AIO write requests at once. 3782 * 3783 * On success, the function returns 0 and all requests in the reqs array have 3784 * been submitted. In error case this function returns -1, and any of the 3785 * requests may or may not be submitted yet. In particular, this means that the 3786 * callback will be called for some of the requests, for others it won't. The 3787 * caller must check the error field of the BlockRequest to wait for the right 3788 * callbacks (if error != 0, no callback will be called). 3789 * 3790 * The implementation may modify the contents of the reqs array, e.g. to merge 3791 * requests. However, the fields opaque and error are left unmodified as they 3792 * are used to signal failure for a single request to the caller. 3793 */ 3794 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) 3795 { 3796 MultiwriteCB *mcb; 3797 int i; 3798 3799 /* don't submit writes if we don't have a medium */ 3800 if (bs->drv == NULL) { 3801 for (i = 0; i < num_reqs; i++) { 3802 reqs[i].error = -ENOMEDIUM; 3803 } 3804 return -1; 3805 } 3806 3807 if (num_reqs == 0) { 3808 return 0; 3809 } 3810 3811 // Create MultiwriteCB structure 3812 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks)); 3813 mcb->num_requests = 0; 3814 mcb->num_callbacks = num_reqs; 3815 3816 for (i = 0; i < num_reqs; i++) { 3817 mcb->callbacks[i].cb = reqs[i].cb; 3818 mcb->callbacks[i].opaque = reqs[i].opaque; 3819 } 3820 3821 // Check for mergable requests 3822 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb); 3823 3824 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs); 3825 3826 /* Run the aio requests. */ 3827 mcb->num_requests = num_reqs; 3828 for (i = 0; i < num_reqs; i++) { 3829 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov, 3830 reqs[i].nb_sectors, multiwrite_cb, mcb); 3831 } 3832 3833 return 0; 3834 } 3835 3836 void bdrv_aio_cancel(BlockDriverAIOCB *acb) 3837 { 3838 acb->aiocb_info->cancel(acb); 3839 } 3840 3841 /* block I/O throttling */ 3842 static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, 3843 bool is_write, double elapsed_time, uint64_t *wait) 3844 { 3845 uint64_t bps_limit = 0; 3846 uint64_t extension; 3847 double bytes_limit, bytes_base, bytes_res; 3848 double slice_time, wait_time; 3849 3850 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { 3851 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]; 3852 } else if (bs->io_limits.bps[is_write]) { 3853 bps_limit = bs->io_limits.bps[is_write]; 3854 } else { 3855 if (wait) { 3856 *wait = 0; 3857 } 3858 3859 return false; 3860 } 3861 3862 slice_time = bs->slice_end - bs->slice_start; 3863 slice_time /= (NANOSECONDS_PER_SECOND); 3864 bytes_limit = bps_limit * slice_time; 3865 bytes_base = bs->slice_submitted.bytes[is_write]; 3866 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { 3867 bytes_base += bs->slice_submitted.bytes[!is_write]; 3868 } 3869 3870 /* bytes_base: the bytes of data which have been read/written; and 3871 * it is obtained from the history statistic info. 3872 * bytes_res: the remaining bytes of data which need to be read/written. 3873 * (bytes_base + bytes_res) / bps_limit: used to calcuate 3874 * the total time for completing reading/writting all data. 3875 */ 3876 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE; 3877 3878 if (bytes_base + bytes_res <= bytes_limit) { 3879 if (wait) { 3880 *wait = 0; 3881 } 3882 3883 return false; 3884 } 3885 3886 /* Calc approx time to dispatch */ 3887 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time; 3888 3889 /* When the I/O rate at runtime exceeds the limits, 3890 * bs->slice_end need to be extended in order that the current statistic 3891 * info can be kept until the timer fire, so it is increased and tuned 3892 * based on the result of experiment. 3893 */ 3894 extension = wait_time * NANOSECONDS_PER_SECOND; 3895 extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) * 3896 BLOCK_IO_SLICE_TIME; 3897 bs->slice_end += extension; 3898 if (wait) { 3899 *wait = wait_time * NANOSECONDS_PER_SECOND; 3900 } 3901 3902 return true; 3903 } 3904 3905 static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, 3906 double elapsed_time, uint64_t *wait) 3907 { 3908 uint64_t iops_limit = 0; 3909 double ios_limit, ios_base; 3910 double slice_time, wait_time; 3911 3912 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { 3913 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]; 3914 } else if (bs->io_limits.iops[is_write]) { 3915 iops_limit = bs->io_limits.iops[is_write]; 3916 } else { 3917 if (wait) { 3918 *wait = 0; 3919 } 3920 3921 return false; 3922 } 3923 3924 slice_time = bs->slice_end - bs->slice_start; 3925 slice_time /= (NANOSECONDS_PER_SECOND); 3926 ios_limit = iops_limit * slice_time; 3927 ios_base = bs->slice_submitted.ios[is_write]; 3928 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { 3929 ios_base += bs->slice_submitted.ios[!is_write]; 3930 } 3931 3932 if (ios_base + 1 <= ios_limit) { 3933 if (wait) { 3934 *wait = 0; 3935 } 3936 3937 return false; 3938 } 3939 3940 /* Calc approx time to dispatch, in seconds */ 3941 wait_time = (ios_base + 1) / iops_limit; 3942 if (wait_time > elapsed_time) { 3943 wait_time = wait_time - elapsed_time; 3944 } else { 3945 wait_time = 0; 3946 } 3947 3948 /* Exceeded current slice, extend it by another slice time */ 3949 bs->slice_end += BLOCK_IO_SLICE_TIME; 3950 if (wait) { 3951 *wait = wait_time * NANOSECONDS_PER_SECOND; 3952 } 3953 3954 return true; 3955 } 3956 3957 static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, 3958 bool is_write, int64_t *wait) 3959 { 3960 int64_t now, max_wait; 3961 uint64_t bps_wait = 0, iops_wait = 0; 3962 double elapsed_time; 3963 int bps_ret, iops_ret; 3964 3965 now = qemu_get_clock_ns(vm_clock); 3966 if (now > bs->slice_end) { 3967 bs->slice_start = now; 3968 bs->slice_end = now + BLOCK_IO_SLICE_TIME; 3969 memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted)); 3970 } 3971 3972 elapsed_time = now - bs->slice_start; 3973 elapsed_time /= (NANOSECONDS_PER_SECOND); 3974 3975 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors, 3976 is_write, elapsed_time, &bps_wait); 3977 iops_ret = bdrv_exceed_iops_limits(bs, is_write, 3978 elapsed_time, &iops_wait); 3979 if (bps_ret || iops_ret) { 3980 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait; 3981 if (wait) { 3982 *wait = max_wait; 3983 } 3984 3985 now = qemu_get_clock_ns(vm_clock); 3986 if (bs->slice_end < now + max_wait) { 3987 bs->slice_end = now + max_wait; 3988 } 3989 3990 return true; 3991 } 3992 3993 if (wait) { 3994 *wait = 0; 3995 } 3996 3997 bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors * 3998 BDRV_SECTOR_SIZE; 3999 bs->slice_submitted.ios[is_write]++; 4000 4001 return false; 4002 } 4003 4004 /**************************************************************/ 4005 /* async block device emulation */ 4006 4007 typedef struct BlockDriverAIOCBSync { 4008 BlockDriverAIOCB common; 4009 QEMUBH *bh; 4010 int ret; 4011 /* vector translation state */ 4012 QEMUIOVector *qiov; 4013 uint8_t *bounce; 4014 int is_write; 4015 } BlockDriverAIOCBSync; 4016 4017 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb) 4018 { 4019 BlockDriverAIOCBSync *acb = 4020 container_of(blockacb, BlockDriverAIOCBSync, common); 4021 qemu_bh_delete(acb->bh); 4022 acb->bh = NULL; 4023 qemu_aio_release(acb); 4024 } 4025 4026 static const AIOCBInfo bdrv_em_aiocb_info = { 4027 .aiocb_size = sizeof(BlockDriverAIOCBSync), 4028 .cancel = bdrv_aio_cancel_em, 4029 }; 4030 4031 static void bdrv_aio_bh_cb(void *opaque) 4032 { 4033 BlockDriverAIOCBSync *acb = opaque; 4034 4035 if (!acb->is_write) 4036 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); 4037 qemu_vfree(acb->bounce); 4038 acb->common.cb(acb->common.opaque, acb->ret); 4039 qemu_bh_delete(acb->bh); 4040 acb->bh = NULL; 4041 qemu_aio_release(acb); 4042 } 4043 4044 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, 4045 int64_t sector_num, 4046 QEMUIOVector *qiov, 4047 int nb_sectors, 4048 BlockDriverCompletionFunc *cb, 4049 void *opaque, 4050 int is_write) 4051 4052 { 4053 BlockDriverAIOCBSync *acb; 4054 4055 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque); 4056 acb->is_write = is_write; 4057 acb->qiov = qiov; 4058 acb->bounce = qemu_blockalign(bs, qiov->size); 4059 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); 4060 4061 if (is_write) { 4062 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); 4063 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors); 4064 } else { 4065 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors); 4066 } 4067 4068 qemu_bh_schedule(acb->bh); 4069 4070 return &acb->common; 4071 } 4072 4073 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 4074 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 4075 BlockDriverCompletionFunc *cb, void *opaque) 4076 { 4077 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); 4078 } 4079 4080 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 4081 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 4082 BlockDriverCompletionFunc *cb, void *opaque) 4083 { 4084 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); 4085 } 4086 4087 4088 typedef struct BlockDriverAIOCBCoroutine { 4089 BlockDriverAIOCB common; 4090 BlockRequest req; 4091 bool is_write; 4092 bool *done; 4093 QEMUBH* bh; 4094 } BlockDriverAIOCBCoroutine; 4095 4096 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb) 4097 { 4098 BlockDriverAIOCBCoroutine *acb = 4099 container_of(blockacb, BlockDriverAIOCBCoroutine, common); 4100 bool done = false; 4101 4102 acb->done = &done; 4103 while (!done) { 4104 qemu_aio_wait(); 4105 } 4106 } 4107 4108 static const AIOCBInfo bdrv_em_co_aiocb_info = { 4109 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine), 4110 .cancel = bdrv_aio_co_cancel_em, 4111 }; 4112 4113 static void bdrv_co_em_bh(void *opaque) 4114 { 4115 BlockDriverAIOCBCoroutine *acb = opaque; 4116 4117 acb->common.cb(acb->common.opaque, acb->req.error); 4118 4119 if (acb->done) { 4120 *acb->done = true; 4121 } 4122 4123 qemu_bh_delete(acb->bh); 4124 qemu_aio_release(acb); 4125 } 4126 4127 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */ 4128 static void coroutine_fn bdrv_co_do_rw(void *opaque) 4129 { 4130 BlockDriverAIOCBCoroutine *acb = opaque; 4131 BlockDriverState *bs = acb->common.bs; 4132 4133 if (!acb->is_write) { 4134 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector, 4135 acb->req.nb_sectors, acb->req.qiov, 0); 4136 } else { 4137 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector, 4138 acb->req.nb_sectors, acb->req.qiov, 0); 4139 } 4140 4141 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); 4142 qemu_bh_schedule(acb->bh); 4143 } 4144 4145 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, 4146 int64_t sector_num, 4147 QEMUIOVector *qiov, 4148 int nb_sectors, 4149 BlockDriverCompletionFunc *cb, 4150 void *opaque, 4151 bool is_write) 4152 { 4153 Coroutine *co; 4154 BlockDriverAIOCBCoroutine *acb; 4155 4156 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); 4157 acb->req.sector = sector_num; 4158 acb->req.nb_sectors = nb_sectors; 4159 acb->req.qiov = qiov; 4160 acb->is_write = is_write; 4161 acb->done = NULL; 4162 4163 co = qemu_coroutine_create(bdrv_co_do_rw); 4164 qemu_coroutine_enter(co, acb); 4165 4166 return &acb->common; 4167 } 4168 4169 static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque) 4170 { 4171 BlockDriverAIOCBCoroutine *acb = opaque; 4172 BlockDriverState *bs = acb->common.bs; 4173 4174 acb->req.error = bdrv_co_flush(bs); 4175 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); 4176 qemu_bh_schedule(acb->bh); 4177 } 4178 4179 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, 4180 BlockDriverCompletionFunc *cb, void *opaque) 4181 { 4182 trace_bdrv_aio_flush(bs, opaque); 4183 4184 Coroutine *co; 4185 BlockDriverAIOCBCoroutine *acb; 4186 4187 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); 4188 acb->done = NULL; 4189 4190 co = qemu_coroutine_create(bdrv_aio_flush_co_entry); 4191 qemu_coroutine_enter(co, acb); 4192 4193 return &acb->common; 4194 } 4195 4196 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque) 4197 { 4198 BlockDriverAIOCBCoroutine *acb = opaque; 4199 BlockDriverState *bs = acb->common.bs; 4200 4201 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors); 4202 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); 4203 qemu_bh_schedule(acb->bh); 4204 } 4205 4206 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs, 4207 int64_t sector_num, int nb_sectors, 4208 BlockDriverCompletionFunc *cb, void *opaque) 4209 { 4210 Coroutine *co; 4211 BlockDriverAIOCBCoroutine *acb; 4212 4213 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque); 4214 4215 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); 4216 acb->req.sector = sector_num; 4217 acb->req.nb_sectors = nb_sectors; 4218 acb->done = NULL; 4219 co = qemu_coroutine_create(bdrv_aio_discard_co_entry); 4220 qemu_coroutine_enter(co, acb); 4221 4222 return &acb->common; 4223 } 4224 4225 void bdrv_init(void) 4226 { 4227 module_call_init(MODULE_INIT_BLOCK); 4228 } 4229 4230 void bdrv_init_with_whitelist(void) 4231 { 4232 use_bdrv_whitelist = 1; 4233 bdrv_init(); 4234 } 4235 4236 void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, 4237 BlockDriverCompletionFunc *cb, void *opaque) 4238 { 4239 BlockDriverAIOCB *acb; 4240 4241 acb = g_slice_alloc(aiocb_info->aiocb_size); 4242 acb->aiocb_info = aiocb_info; 4243 acb->bs = bs; 4244 acb->cb = cb; 4245 acb->opaque = opaque; 4246 return acb; 4247 } 4248 4249 void qemu_aio_release(void *p) 4250 { 4251 BlockDriverAIOCB *acb = p; 4252 g_slice_free1(acb->aiocb_info->aiocb_size, acb); 4253 } 4254 4255 /**************************************************************/ 4256 /* Coroutine block device emulation */ 4257 4258 typedef struct CoroutineIOCompletion { 4259 Coroutine *coroutine; 4260 int ret; 4261 } CoroutineIOCompletion; 4262 4263 static void bdrv_co_io_em_complete(void *opaque, int ret) 4264 { 4265 CoroutineIOCompletion *co = opaque; 4266 4267 co->ret = ret; 4268 qemu_coroutine_enter(co->coroutine, NULL); 4269 } 4270 4271 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num, 4272 int nb_sectors, QEMUIOVector *iov, 4273 bool is_write) 4274 { 4275 CoroutineIOCompletion co = { 4276 .coroutine = qemu_coroutine_self(), 4277 }; 4278 BlockDriverAIOCB *acb; 4279 4280 if (is_write) { 4281 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors, 4282 bdrv_co_io_em_complete, &co); 4283 } else { 4284 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors, 4285 bdrv_co_io_em_complete, &co); 4286 } 4287 4288 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb); 4289 if (!acb) { 4290 return -EIO; 4291 } 4292 qemu_coroutine_yield(); 4293 4294 return co.ret; 4295 } 4296 4297 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 4298 int64_t sector_num, int nb_sectors, 4299 QEMUIOVector *iov) 4300 { 4301 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false); 4302 } 4303 4304 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 4305 int64_t sector_num, int nb_sectors, 4306 QEMUIOVector *iov) 4307 { 4308 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true); 4309 } 4310 4311 static void coroutine_fn bdrv_flush_co_entry(void *opaque) 4312 { 4313 RwCo *rwco = opaque; 4314 4315 rwco->ret = bdrv_co_flush(rwco->bs); 4316 } 4317 4318 int coroutine_fn bdrv_co_flush(BlockDriverState *bs) 4319 { 4320 int ret; 4321 4322 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { 4323 return 0; 4324 } 4325 4326 /* Write back cached data to the OS even with cache=unsafe */ 4327 if (bs->drv->bdrv_co_flush_to_os) { 4328 ret = bs->drv->bdrv_co_flush_to_os(bs); 4329 if (ret < 0) { 4330 return ret; 4331 } 4332 } 4333 4334 /* But don't actually force it to the disk with cache=unsafe */ 4335 if (bs->open_flags & BDRV_O_NO_FLUSH) { 4336 goto flush_parent; 4337 } 4338 4339 if (bs->drv->bdrv_co_flush_to_disk) { 4340 ret = bs->drv->bdrv_co_flush_to_disk(bs); 4341 } else if (bs->drv->bdrv_aio_flush) { 4342 BlockDriverAIOCB *acb; 4343 CoroutineIOCompletion co = { 4344 .coroutine = qemu_coroutine_self(), 4345 }; 4346 4347 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co); 4348 if (acb == NULL) { 4349 ret = -EIO; 4350 } else { 4351 qemu_coroutine_yield(); 4352 ret = co.ret; 4353 } 4354 } else { 4355 /* 4356 * Some block drivers always operate in either writethrough or unsafe 4357 * mode and don't support bdrv_flush therefore. Usually qemu doesn't 4358 * know how the server works (because the behaviour is hardcoded or 4359 * depends on server-side configuration), so we can't ensure that 4360 * everything is safe on disk. Returning an error doesn't work because 4361 * that would break guests even if the server operates in writethrough 4362 * mode. 4363 * 4364 * Let's hope the user knows what he's doing. 4365 */ 4366 ret = 0; 4367 } 4368 if (ret < 0) { 4369 return ret; 4370 } 4371 4372 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH 4373 * in the case of cache=unsafe, so there are no useless flushes. 4374 */ 4375 flush_parent: 4376 return bdrv_co_flush(bs->file); 4377 } 4378 4379 void bdrv_invalidate_cache(BlockDriverState *bs) 4380 { 4381 if (bs->drv && bs->drv->bdrv_invalidate_cache) { 4382 bs->drv->bdrv_invalidate_cache(bs); 4383 } 4384 } 4385 4386 void bdrv_invalidate_cache_all(void) 4387 { 4388 BlockDriverState *bs; 4389 4390 QTAILQ_FOREACH(bs, &bdrv_states, list) { 4391 bdrv_invalidate_cache(bs); 4392 } 4393 } 4394 4395 void bdrv_clear_incoming_migration_all(void) 4396 { 4397 BlockDriverState *bs; 4398 4399 QTAILQ_FOREACH(bs, &bdrv_states, list) { 4400 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING); 4401 } 4402 } 4403 4404 int bdrv_flush(BlockDriverState *bs) 4405 { 4406 Coroutine *co; 4407 RwCo rwco = { 4408 .bs = bs, 4409 .ret = NOT_DONE, 4410 }; 4411 4412 if (qemu_in_coroutine()) { 4413 /* Fast-path if already in coroutine context */ 4414 bdrv_flush_co_entry(&rwco); 4415 } else { 4416 co = qemu_coroutine_create(bdrv_flush_co_entry); 4417 qemu_coroutine_enter(co, &rwco); 4418 while (rwco.ret == NOT_DONE) { 4419 qemu_aio_wait(); 4420 } 4421 } 4422 4423 return rwco.ret; 4424 } 4425 4426 static void coroutine_fn bdrv_discard_co_entry(void *opaque) 4427 { 4428 RwCo *rwco = opaque; 4429 4430 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors); 4431 } 4432 4433 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, 4434 int nb_sectors) 4435 { 4436 if (!bs->drv) { 4437 return -ENOMEDIUM; 4438 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) { 4439 return -EIO; 4440 } else if (bs->read_only) { 4441 return -EROFS; 4442 } 4443 4444 if (bs->dirty_bitmap) { 4445 bdrv_reset_dirty(bs, sector_num, nb_sectors); 4446 } 4447 4448 /* Do nothing if disabled. */ 4449 if (!(bs->open_flags & BDRV_O_UNMAP)) { 4450 return 0; 4451 } 4452 4453 if (bs->drv->bdrv_co_discard) { 4454 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors); 4455 } else if (bs->drv->bdrv_aio_discard) { 4456 BlockDriverAIOCB *acb; 4457 CoroutineIOCompletion co = { 4458 .coroutine = qemu_coroutine_self(), 4459 }; 4460 4461 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors, 4462 bdrv_co_io_em_complete, &co); 4463 if (acb == NULL) { 4464 return -EIO; 4465 } else { 4466 qemu_coroutine_yield(); 4467 return co.ret; 4468 } 4469 } else { 4470 return 0; 4471 } 4472 } 4473 4474 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) 4475 { 4476 Coroutine *co; 4477 RwCo rwco = { 4478 .bs = bs, 4479 .sector_num = sector_num, 4480 .nb_sectors = nb_sectors, 4481 .ret = NOT_DONE, 4482 }; 4483 4484 if (qemu_in_coroutine()) { 4485 /* Fast-path if already in coroutine context */ 4486 bdrv_discard_co_entry(&rwco); 4487 } else { 4488 co = qemu_coroutine_create(bdrv_discard_co_entry); 4489 qemu_coroutine_enter(co, &rwco); 4490 while (rwco.ret == NOT_DONE) { 4491 qemu_aio_wait(); 4492 } 4493 } 4494 4495 return rwco.ret; 4496 } 4497 4498 /**************************************************************/ 4499 /* removable device support */ 4500 4501 /** 4502 * Return TRUE if the media is present 4503 */ 4504 int bdrv_is_inserted(BlockDriverState *bs) 4505 { 4506 BlockDriver *drv = bs->drv; 4507 4508 if (!drv) 4509 return 0; 4510 if (!drv->bdrv_is_inserted) 4511 return 1; 4512 return drv->bdrv_is_inserted(bs); 4513 } 4514 4515 /** 4516 * Return whether the media changed since the last call to this 4517 * function, or -ENOTSUP if we don't know. Most drivers don't know. 4518 */ 4519 int bdrv_media_changed(BlockDriverState *bs) 4520 { 4521 BlockDriver *drv = bs->drv; 4522 4523 if (drv && drv->bdrv_media_changed) { 4524 return drv->bdrv_media_changed(bs); 4525 } 4526 return -ENOTSUP; 4527 } 4528 4529 /** 4530 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 4531 */ 4532 void bdrv_eject(BlockDriverState *bs, bool eject_flag) 4533 { 4534 BlockDriver *drv = bs->drv; 4535 4536 if (drv && drv->bdrv_eject) { 4537 drv->bdrv_eject(bs, eject_flag); 4538 } 4539 4540 if (bs->device_name[0] != '\0') { 4541 bdrv_emit_qmp_eject_event(bs, eject_flag); 4542 } 4543 } 4544 4545 /** 4546 * Lock or unlock the media (if it is locked, the user won't be able 4547 * to eject it manually). 4548 */ 4549 void bdrv_lock_medium(BlockDriverState *bs, bool locked) 4550 { 4551 BlockDriver *drv = bs->drv; 4552 4553 trace_bdrv_lock_medium(bs, locked); 4554 4555 if (drv && drv->bdrv_lock_medium) { 4556 drv->bdrv_lock_medium(bs, locked); 4557 } 4558 } 4559 4560 /* needed for generic scsi interface */ 4561 4562 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) 4563 { 4564 BlockDriver *drv = bs->drv; 4565 4566 if (drv && drv->bdrv_ioctl) 4567 return drv->bdrv_ioctl(bs, req, buf); 4568 return -ENOTSUP; 4569 } 4570 4571 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, 4572 unsigned long int req, void *buf, 4573 BlockDriverCompletionFunc *cb, void *opaque) 4574 { 4575 BlockDriver *drv = bs->drv; 4576 4577 if (drv && drv->bdrv_aio_ioctl) 4578 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque); 4579 return NULL; 4580 } 4581 4582 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align) 4583 { 4584 bs->buffer_alignment = align; 4585 } 4586 4587 void *qemu_blockalign(BlockDriverState *bs, size_t size) 4588 { 4589 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size); 4590 } 4591 4592 /* 4593 * Check if all memory in this vector is sector aligned. 4594 */ 4595 bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) 4596 { 4597 int i; 4598 4599 for (i = 0; i < qiov->niov; i++) { 4600 if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) { 4601 return false; 4602 } 4603 } 4604 4605 return true; 4606 } 4607 4608 void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity) 4609 { 4610 int64_t bitmap_size; 4611 4612 assert((granularity & (granularity - 1)) == 0); 4613 4614 if (granularity) { 4615 granularity >>= BDRV_SECTOR_BITS; 4616 assert(!bs->dirty_bitmap); 4617 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS); 4618 bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1); 4619 } else { 4620 if (bs->dirty_bitmap) { 4621 hbitmap_free(bs->dirty_bitmap); 4622 bs->dirty_bitmap = NULL; 4623 } 4624 } 4625 } 4626 4627 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector) 4628 { 4629 if (bs->dirty_bitmap) { 4630 return hbitmap_get(bs->dirty_bitmap, sector); 4631 } else { 4632 return 0; 4633 } 4634 } 4635 4636 void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi) 4637 { 4638 hbitmap_iter_init(hbi, bs->dirty_bitmap, 0); 4639 } 4640 4641 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, 4642 int nr_sectors) 4643 { 4644 hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors); 4645 } 4646 4647 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 4648 int nr_sectors) 4649 { 4650 hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors); 4651 } 4652 4653 int64_t bdrv_get_dirty_count(BlockDriverState *bs) 4654 { 4655 if (bs->dirty_bitmap) { 4656 return hbitmap_count(bs->dirty_bitmap); 4657 } else { 4658 return 0; 4659 } 4660 } 4661 4662 void bdrv_set_in_use(BlockDriverState *bs, int in_use) 4663 { 4664 assert(bs->in_use != in_use); 4665 bs->in_use = in_use; 4666 } 4667 4668 int bdrv_in_use(BlockDriverState *bs) 4669 { 4670 return bs->in_use; 4671 } 4672 4673 void bdrv_iostatus_enable(BlockDriverState *bs) 4674 { 4675 bs->iostatus_enabled = true; 4676 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 4677 } 4678 4679 /* The I/O status is only enabled if the drive explicitly 4680 * enables it _and_ the VM is configured to stop on errors */ 4681 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs) 4682 { 4683 return (bs->iostatus_enabled && 4684 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || 4685 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP || 4686 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP)); 4687 } 4688 4689 void bdrv_iostatus_disable(BlockDriverState *bs) 4690 { 4691 bs->iostatus_enabled = false; 4692 } 4693 4694 void bdrv_iostatus_reset(BlockDriverState *bs) 4695 { 4696 if (bdrv_iostatus_is_enabled(bs)) { 4697 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 4698 if (bs->job) { 4699 block_job_iostatus_reset(bs->job); 4700 } 4701 } 4702 } 4703 4704 void bdrv_iostatus_set_err(BlockDriverState *bs, int error) 4705 { 4706 assert(bdrv_iostatus_is_enabled(bs)); 4707 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { 4708 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : 4709 BLOCK_DEVICE_IO_STATUS_FAILED; 4710 } 4711 } 4712 4713 void 4714 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes, 4715 enum BlockAcctType type) 4716 { 4717 assert(type < BDRV_MAX_IOTYPE); 4718 4719 cookie->bytes = bytes; 4720 cookie->start_time_ns = get_clock(); 4721 cookie->type = type; 4722 } 4723 4724 void 4725 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie) 4726 { 4727 assert(cookie->type < BDRV_MAX_IOTYPE); 4728 4729 bs->nr_bytes[cookie->type] += cookie->bytes; 4730 bs->nr_ops[cookie->type]++; 4731 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns; 4732 } 4733 4734 void bdrv_img_create(const char *filename, const char *fmt, 4735 const char *base_filename, const char *base_fmt, 4736 char *options, uint64_t img_size, int flags, 4737 Error **errp, bool quiet) 4738 { 4739 QEMUOptionParameter *param = NULL, *create_options = NULL; 4740 QEMUOptionParameter *backing_fmt, *backing_file, *size; 4741 BlockDriverState *bs = NULL; 4742 BlockDriver *drv, *proto_drv; 4743 BlockDriver *backing_drv = NULL; 4744 int ret = 0; 4745 4746 /* Find driver and parse its options */ 4747 drv = bdrv_find_format(fmt); 4748 if (!drv) { 4749 error_setg(errp, "Unknown file format '%s'", fmt); 4750 return; 4751 } 4752 4753 proto_drv = bdrv_find_protocol(filename); 4754 if (!proto_drv) { 4755 error_setg(errp, "Unknown protocol '%s'", filename); 4756 return; 4757 } 4758 4759 create_options = append_option_parameters(create_options, 4760 drv->create_options); 4761 create_options = append_option_parameters(create_options, 4762 proto_drv->create_options); 4763 4764 /* Create parameter list with default values */ 4765 param = parse_option_parameters("", create_options, param); 4766 4767 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size); 4768 4769 /* Parse -o options */ 4770 if (options) { 4771 param = parse_option_parameters(options, create_options, param); 4772 if (param == NULL) { 4773 error_setg(errp, "Invalid options for file format '%s'.", fmt); 4774 goto out; 4775 } 4776 } 4777 4778 if (base_filename) { 4779 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE, 4780 base_filename)) { 4781 error_setg(errp, "Backing file not supported for file format '%s'", 4782 fmt); 4783 goto out; 4784 } 4785 } 4786 4787 if (base_fmt) { 4788 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) { 4789 error_setg(errp, "Backing file format not supported for file " 4790 "format '%s'", fmt); 4791 goto out; 4792 } 4793 } 4794 4795 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE); 4796 if (backing_file && backing_file->value.s) { 4797 if (!strcmp(filename, backing_file->value.s)) { 4798 error_setg(errp, "Error: Trying to create an image with the " 4799 "same filename as the backing file"); 4800 goto out; 4801 } 4802 } 4803 4804 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT); 4805 if (backing_fmt && backing_fmt->value.s) { 4806 backing_drv = bdrv_find_format(backing_fmt->value.s); 4807 if (!backing_drv) { 4808 error_setg(errp, "Unknown backing file format '%s'", 4809 backing_fmt->value.s); 4810 goto out; 4811 } 4812 } 4813 4814 // The size for the image must always be specified, with one exception: 4815 // If we are using a backing file, we can obtain the size from there 4816 size = get_option_parameter(param, BLOCK_OPT_SIZE); 4817 if (size && size->value.n == -1) { 4818 if (backing_file && backing_file->value.s) { 4819 uint64_t size; 4820 char buf[32]; 4821 int back_flags; 4822 4823 /* backing files always opened read-only */ 4824 back_flags = 4825 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 4826 4827 bs = bdrv_new(""); 4828 4829 ret = bdrv_open(bs, backing_file->value.s, NULL, back_flags, 4830 backing_drv); 4831 if (ret < 0) { 4832 error_setg_errno(errp, -ret, "Could not open '%s'", 4833 backing_file->value.s); 4834 goto out; 4835 } 4836 bdrv_get_geometry(bs, &size); 4837 size *= 512; 4838 4839 snprintf(buf, sizeof(buf), "%" PRId64, size); 4840 set_option_parameter(param, BLOCK_OPT_SIZE, buf); 4841 } else { 4842 error_setg(errp, "Image creation needs a size parameter"); 4843 goto out; 4844 } 4845 } 4846 4847 if (!quiet) { 4848 printf("Formatting '%s', fmt=%s ", filename, fmt); 4849 print_option_parameters(param); 4850 puts(""); 4851 } 4852 ret = bdrv_create(drv, filename, param); 4853 if (ret < 0) { 4854 if (ret == -ENOTSUP) { 4855 error_setg(errp,"Formatting or formatting option not supported for " 4856 "file format '%s'", fmt); 4857 } else if (ret == -EFBIG) { 4858 error_setg(errp, "The image size is too large for file format '%s'", 4859 fmt); 4860 } else { 4861 error_setg(errp, "%s: error while creating %s: %s", filename, fmt, 4862 strerror(-ret)); 4863 } 4864 } 4865 4866 out: 4867 free_option_parameters(create_options); 4868 free_option_parameters(param); 4869 4870 if (bs) { 4871 bdrv_delete(bs); 4872 } 4873 } 4874 4875 AioContext *bdrv_get_aio_context(BlockDriverState *bs) 4876 { 4877 /* Currently BlockDriverState always uses the main loop AioContext */ 4878 return qemu_get_aio_context(); 4879 } 4880