1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "config-host.h" 25 #include "qemu-common.h" 26 #include "trace.h" 27 #include "monitor.h" 28 #include "block_int.h" 29 #include "module.h" 30 #include "qjson.h" 31 #include "qemu-coroutine.h" 32 #include "qmp-commands.h" 33 #include "qemu-timer.h" 34 35 #ifdef CONFIG_BSD 36 #include <sys/types.h> 37 #include <sys/stat.h> 38 #include <sys/ioctl.h> 39 #include <sys/queue.h> 40 #ifndef __DragonFly__ 41 #include <sys/disk.h> 42 #endif 43 #endif 44 45 #ifdef _WIN32 46 #include <windows.h> 47 #endif 48 49 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 50 51 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load); 52 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 54 BlockDriverCompletionFunc *cb, void *opaque); 55 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 57 BlockDriverCompletionFunc *cb, void *opaque); 58 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 59 int64_t sector_num, int nb_sectors, 60 QEMUIOVector *iov); 61 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 62 int64_t sector_num, int nb_sectors, 63 QEMUIOVector *iov); 64 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, 65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); 66 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, 67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); 68 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, 69 int64_t sector_num, 70 QEMUIOVector *qiov, 71 int nb_sectors, 72 BlockDriverCompletionFunc *cb, 73 void *opaque, 74 bool is_write); 75 static void coroutine_fn bdrv_co_do_rw(void *opaque); 76 77 static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, 78 bool is_write, double elapsed_time, uint64_t *wait); 79 static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, 80 double elapsed_time, uint64_t *wait); 81 static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, 82 bool is_write, int64_t *wait); 83 84 static QTAILQ_HEAD(, BlockDriverState) bdrv_states = 85 QTAILQ_HEAD_INITIALIZER(bdrv_states); 86 87 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 88 QLIST_HEAD_INITIALIZER(bdrv_drivers); 89 90 /* The device to use for VM snapshots */ 91 static BlockDriverState *bs_snapshots; 92 93 /* If non-zero, use only whitelisted block drivers */ 94 static int use_bdrv_whitelist; 95 96 #ifdef _WIN32 97 static int is_windows_drive_prefix(const char *filename) 98 { 99 return (((filename[0] >= 'a' && filename[0] <= 'z') || 100 (filename[0] >= 'A' && filename[0] <= 'Z')) && 101 filename[1] == ':'); 102 } 103 104 int is_windows_drive(const char *filename) 105 { 106 if (is_windows_drive_prefix(filename) && 107 filename[2] == '\0') 108 return 1; 109 if (strstart(filename, "\\\\.\\", NULL) || 110 strstart(filename, "//./", NULL)) 111 return 1; 112 return 0; 113 } 114 #endif 115 116 /* throttling disk I/O limits */ 117 void bdrv_io_limits_disable(BlockDriverState *bs) 118 { 119 bs->io_limits_enabled = false; 120 121 while (qemu_co_queue_next(&bs->throttled_reqs)); 122 123 if (bs->block_timer) { 124 qemu_del_timer(bs->block_timer); 125 qemu_free_timer(bs->block_timer); 126 bs->block_timer = NULL; 127 } 128 129 bs->slice_start = 0; 130 bs->slice_end = 0; 131 bs->slice_time = 0; 132 memset(&bs->io_base, 0, sizeof(bs->io_base)); 133 } 134 135 static void bdrv_block_timer(void *opaque) 136 { 137 BlockDriverState *bs = opaque; 138 139 qemu_co_queue_next(&bs->throttled_reqs); 140 } 141 142 void bdrv_io_limits_enable(BlockDriverState *bs) 143 { 144 qemu_co_queue_init(&bs->throttled_reqs); 145 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs); 146 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME; 147 bs->slice_start = qemu_get_clock_ns(vm_clock); 148 bs->slice_end = bs->slice_start + bs->slice_time; 149 memset(&bs->io_base, 0, sizeof(bs->io_base)); 150 bs->io_limits_enabled = true; 151 } 152 153 bool bdrv_io_limits_enabled(BlockDriverState *bs) 154 { 155 BlockIOLimit *io_limits = &bs->io_limits; 156 return io_limits->bps[BLOCK_IO_LIMIT_READ] 157 || io_limits->bps[BLOCK_IO_LIMIT_WRITE] 158 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL] 159 || io_limits->iops[BLOCK_IO_LIMIT_READ] 160 || io_limits->iops[BLOCK_IO_LIMIT_WRITE] 161 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL]; 162 } 163 164 static void bdrv_io_limits_intercept(BlockDriverState *bs, 165 bool is_write, int nb_sectors) 166 { 167 int64_t wait_time = -1; 168 169 if (!qemu_co_queue_empty(&bs->throttled_reqs)) { 170 qemu_co_queue_wait(&bs->throttled_reqs); 171 } 172 173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next 174 * throttled requests will not be dequeued until the current request is 175 * allowed to be serviced. So if the current request still exceeds the 176 * limits, it will be inserted to the head. All requests followed it will 177 * be still in throttled_reqs queue. 178 */ 179 180 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) { 181 qemu_mod_timer(bs->block_timer, 182 wait_time + qemu_get_clock_ns(vm_clock)); 183 qemu_co_queue_wait_insert_head(&bs->throttled_reqs); 184 } 185 186 qemu_co_queue_next(&bs->throttled_reqs); 187 } 188 189 /* check if the path starts with "<protocol>:" */ 190 static int path_has_protocol(const char *path) 191 { 192 #ifdef _WIN32 193 if (is_windows_drive(path) || 194 is_windows_drive_prefix(path)) { 195 return 0; 196 } 197 #endif 198 199 return strchr(path, ':') != NULL; 200 } 201 202 int path_is_absolute(const char *path) 203 { 204 const char *p; 205 #ifdef _WIN32 206 /* specific case for names like: "\\.\d:" */ 207 if (*path == '/' || *path == '\\') 208 return 1; 209 #endif 210 p = strchr(path, ':'); 211 if (p) 212 p++; 213 else 214 p = path; 215 #ifdef _WIN32 216 return (*p == '/' || *p == '\\'); 217 #else 218 return (*p == '/'); 219 #endif 220 } 221 222 /* if filename is absolute, just copy it to dest. Otherwise, build a 223 path to it by considering it is relative to base_path. URL are 224 supported. */ 225 void path_combine(char *dest, int dest_size, 226 const char *base_path, 227 const char *filename) 228 { 229 const char *p, *p1; 230 int len; 231 232 if (dest_size <= 0) 233 return; 234 if (path_is_absolute(filename)) { 235 pstrcpy(dest, dest_size, filename); 236 } else { 237 p = strchr(base_path, ':'); 238 if (p) 239 p++; 240 else 241 p = base_path; 242 p1 = strrchr(base_path, '/'); 243 #ifdef _WIN32 244 { 245 const char *p2; 246 p2 = strrchr(base_path, '\\'); 247 if (!p1 || p2 > p1) 248 p1 = p2; 249 } 250 #endif 251 if (p1) 252 p1++; 253 else 254 p1 = base_path; 255 if (p1 > p) 256 p = p1; 257 len = p - base_path; 258 if (len > dest_size - 1) 259 len = dest_size - 1; 260 memcpy(dest, base_path, len); 261 dest[len] = '\0'; 262 pstrcat(dest, dest_size, filename); 263 } 264 } 265 266 void bdrv_register(BlockDriver *bdrv) 267 { 268 /* Block drivers without coroutine functions need emulation */ 269 if (!bdrv->bdrv_co_readv) { 270 bdrv->bdrv_co_readv = bdrv_co_readv_em; 271 bdrv->bdrv_co_writev = bdrv_co_writev_em; 272 273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if 274 * the block driver lacks aio we need to emulate that too. 275 */ 276 if (!bdrv->bdrv_aio_readv) { 277 /* add AIO emulation layer */ 278 bdrv->bdrv_aio_readv = bdrv_aio_readv_em; 279 bdrv->bdrv_aio_writev = bdrv_aio_writev_em; 280 } 281 } 282 283 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 284 } 285 286 /* create a new block device (by default it is empty) */ 287 BlockDriverState *bdrv_new(const char *device_name) 288 { 289 BlockDriverState *bs; 290 291 bs = g_malloc0(sizeof(BlockDriverState)); 292 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name); 293 if (device_name[0] != '\0') { 294 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list); 295 } 296 bdrv_iostatus_disable(bs); 297 return bs; 298 } 299 300 BlockDriver *bdrv_find_format(const char *format_name) 301 { 302 BlockDriver *drv1; 303 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 304 if (!strcmp(drv1->format_name, format_name)) { 305 return drv1; 306 } 307 } 308 return NULL; 309 } 310 311 static int bdrv_is_whitelisted(BlockDriver *drv) 312 { 313 static const char *whitelist[] = { 314 CONFIG_BDRV_WHITELIST 315 }; 316 const char **p; 317 318 if (!whitelist[0]) 319 return 1; /* no whitelist, anything goes */ 320 321 for (p = whitelist; *p; p++) { 322 if (!strcmp(drv->format_name, *p)) { 323 return 1; 324 } 325 } 326 return 0; 327 } 328 329 BlockDriver *bdrv_find_whitelisted_format(const char *format_name) 330 { 331 BlockDriver *drv = bdrv_find_format(format_name); 332 return drv && bdrv_is_whitelisted(drv) ? drv : NULL; 333 } 334 335 int bdrv_create(BlockDriver *drv, const char* filename, 336 QEMUOptionParameter *options) 337 { 338 if (!drv->bdrv_create) 339 return -ENOTSUP; 340 341 return drv->bdrv_create(filename, options); 342 } 343 344 int bdrv_create_file(const char* filename, QEMUOptionParameter *options) 345 { 346 BlockDriver *drv; 347 348 drv = bdrv_find_protocol(filename); 349 if (drv == NULL) { 350 return -ENOENT; 351 } 352 353 return bdrv_create(drv, filename, options); 354 } 355 356 #ifdef _WIN32 357 void get_tmp_filename(char *filename, int size) 358 { 359 char temp_dir[MAX_PATH]; 360 361 GetTempPath(MAX_PATH, temp_dir); 362 GetTempFileName(temp_dir, "qem", 0, filename); 363 } 364 #else 365 void get_tmp_filename(char *filename, int size) 366 { 367 int fd; 368 const char *tmpdir; 369 /* XXX: race condition possible */ 370 tmpdir = getenv("TMPDIR"); 371 if (!tmpdir) 372 tmpdir = "/tmp"; 373 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir); 374 fd = mkstemp(filename); 375 close(fd); 376 } 377 #endif 378 379 /* 380 * Detect host devices. By convention, /dev/cdrom[N] is always 381 * recognized as a host CDROM. 382 */ 383 static BlockDriver *find_hdev_driver(const char *filename) 384 { 385 int score_max = 0, score; 386 BlockDriver *drv = NULL, *d; 387 388 QLIST_FOREACH(d, &bdrv_drivers, list) { 389 if (d->bdrv_probe_device) { 390 score = d->bdrv_probe_device(filename); 391 if (score > score_max) { 392 score_max = score; 393 drv = d; 394 } 395 } 396 } 397 398 return drv; 399 } 400 401 BlockDriver *bdrv_find_protocol(const char *filename) 402 { 403 BlockDriver *drv1; 404 char protocol[128]; 405 int len; 406 const char *p; 407 408 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 409 410 /* 411 * XXX(hch): we really should not let host device detection 412 * override an explicit protocol specification, but moving this 413 * later breaks access to device names with colons in them. 414 * Thanks to the brain-dead persistent naming schemes on udev- 415 * based Linux systems those actually are quite common. 416 */ 417 drv1 = find_hdev_driver(filename); 418 if (drv1) { 419 return drv1; 420 } 421 422 if (!path_has_protocol(filename)) { 423 return bdrv_find_format("file"); 424 } 425 p = strchr(filename, ':'); 426 assert(p != NULL); 427 len = p - filename; 428 if (len > sizeof(protocol) - 1) 429 len = sizeof(protocol) - 1; 430 memcpy(protocol, filename, len); 431 protocol[len] = '\0'; 432 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 433 if (drv1->protocol_name && 434 !strcmp(drv1->protocol_name, protocol)) { 435 return drv1; 436 } 437 } 438 return NULL; 439 } 440 441 static int find_image_format(const char *filename, BlockDriver **pdrv) 442 { 443 int ret, score, score_max; 444 BlockDriver *drv1, *drv; 445 uint8_t buf[2048]; 446 BlockDriverState *bs; 447 448 ret = bdrv_file_open(&bs, filename, 0); 449 if (ret < 0) { 450 *pdrv = NULL; 451 return ret; 452 } 453 454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 455 if (bs->sg || !bdrv_is_inserted(bs)) { 456 bdrv_delete(bs); 457 drv = bdrv_find_format("raw"); 458 if (!drv) { 459 ret = -ENOENT; 460 } 461 *pdrv = drv; 462 return ret; 463 } 464 465 ret = bdrv_pread(bs, 0, buf, sizeof(buf)); 466 bdrv_delete(bs); 467 if (ret < 0) { 468 *pdrv = NULL; 469 return ret; 470 } 471 472 score_max = 0; 473 drv = NULL; 474 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 475 if (drv1->bdrv_probe) { 476 score = drv1->bdrv_probe(buf, ret, filename); 477 if (score > score_max) { 478 score_max = score; 479 drv = drv1; 480 } 481 } 482 } 483 if (!drv) { 484 ret = -ENOENT; 485 } 486 *pdrv = drv; 487 return ret; 488 } 489 490 /** 491 * Set the current 'total_sectors' value 492 */ 493 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 494 { 495 BlockDriver *drv = bs->drv; 496 497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 498 if (bs->sg) 499 return 0; 500 501 /* query actual device if possible, otherwise just trust the hint */ 502 if (drv->bdrv_getlength) { 503 int64_t length = drv->bdrv_getlength(bs); 504 if (length < 0) { 505 return length; 506 } 507 hint = length >> BDRV_SECTOR_BITS; 508 } 509 510 bs->total_sectors = hint; 511 return 0; 512 } 513 514 /** 515 * Set open flags for a given cache mode 516 * 517 * Return 0 on success, -1 if the cache mode was invalid. 518 */ 519 int bdrv_parse_cache_flags(const char *mode, int *flags) 520 { 521 *flags &= ~BDRV_O_CACHE_MASK; 522 523 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 524 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; 525 } else if (!strcmp(mode, "directsync")) { 526 *flags |= BDRV_O_NOCACHE; 527 } else if (!strcmp(mode, "writeback")) { 528 *flags |= BDRV_O_CACHE_WB; 529 } else if (!strcmp(mode, "unsafe")) { 530 *flags |= BDRV_O_CACHE_WB; 531 *flags |= BDRV_O_NO_FLUSH; 532 } else if (!strcmp(mode, "writethrough")) { 533 /* this is the default */ 534 } else { 535 return -1; 536 } 537 538 return 0; 539 } 540 541 /* 542 * Common part for opening disk images and files 543 */ 544 static int bdrv_open_common(BlockDriverState *bs, const char *filename, 545 int flags, BlockDriver *drv) 546 { 547 int ret, open_flags; 548 549 assert(drv != NULL); 550 551 trace_bdrv_open_common(bs, filename, flags, drv->format_name); 552 553 bs->file = NULL; 554 bs->total_sectors = 0; 555 bs->encrypted = 0; 556 bs->valid_key = 0; 557 bs->sg = 0; 558 bs->open_flags = flags; 559 bs->growable = 0; 560 bs->buffer_alignment = 512; 561 562 pstrcpy(bs->filename, sizeof(bs->filename), filename); 563 bs->backing_file[0] = '\0'; 564 565 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) { 566 return -ENOTSUP; 567 } 568 569 bs->drv = drv; 570 bs->opaque = g_malloc0(drv->instance_size); 571 572 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB); 573 574 /* 575 * Clear flags that are internal to the block layer before opening the 576 * image. 577 */ 578 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 579 580 /* 581 * Snapshots should be writable. 582 */ 583 if (bs->is_temporary) { 584 open_flags |= BDRV_O_RDWR; 585 } 586 587 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR); 588 589 /* Open the image, either directly or using a protocol */ 590 if (drv->bdrv_file_open) { 591 ret = drv->bdrv_file_open(bs, filename, open_flags); 592 } else { 593 ret = bdrv_file_open(&bs->file, filename, open_flags); 594 if (ret >= 0) { 595 ret = drv->bdrv_open(bs, open_flags); 596 } 597 } 598 599 if (ret < 0) { 600 goto free_and_fail; 601 } 602 603 ret = refresh_total_sectors(bs, bs->total_sectors); 604 if (ret < 0) { 605 goto free_and_fail; 606 } 607 608 #ifndef _WIN32 609 if (bs->is_temporary) { 610 unlink(filename); 611 } 612 #endif 613 return 0; 614 615 free_and_fail: 616 if (bs->file) { 617 bdrv_delete(bs->file); 618 bs->file = NULL; 619 } 620 g_free(bs->opaque); 621 bs->opaque = NULL; 622 bs->drv = NULL; 623 return ret; 624 } 625 626 /* 627 * Opens a file using a protocol (file, host_device, nbd, ...) 628 */ 629 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags) 630 { 631 BlockDriverState *bs; 632 BlockDriver *drv; 633 int ret; 634 635 drv = bdrv_find_protocol(filename); 636 if (!drv) { 637 return -ENOENT; 638 } 639 640 bs = bdrv_new(""); 641 ret = bdrv_open_common(bs, filename, flags, drv); 642 if (ret < 0) { 643 bdrv_delete(bs); 644 return ret; 645 } 646 bs->growable = 1; 647 *pbs = bs; 648 return 0; 649 } 650 651 /* 652 * Opens a disk image (raw, qcow2, vmdk, ...) 653 */ 654 int bdrv_open(BlockDriverState *bs, const char *filename, int flags, 655 BlockDriver *drv) 656 { 657 int ret; 658 char tmp_filename[PATH_MAX]; 659 660 if (flags & BDRV_O_SNAPSHOT) { 661 BlockDriverState *bs1; 662 int64_t total_size; 663 int is_protocol = 0; 664 BlockDriver *bdrv_qcow2; 665 QEMUOptionParameter *options; 666 char backing_filename[PATH_MAX]; 667 668 /* if snapshot, we create a temporary backing file and open it 669 instead of opening 'filename' directly */ 670 671 /* if there is a backing file, use it */ 672 bs1 = bdrv_new(""); 673 ret = bdrv_open(bs1, filename, 0, drv); 674 if (ret < 0) { 675 bdrv_delete(bs1); 676 return ret; 677 } 678 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK; 679 680 if (bs1->drv && bs1->drv->protocol_name) 681 is_protocol = 1; 682 683 bdrv_delete(bs1); 684 685 get_tmp_filename(tmp_filename, sizeof(tmp_filename)); 686 687 /* Real path is meaningless for protocols */ 688 if (is_protocol) 689 snprintf(backing_filename, sizeof(backing_filename), 690 "%s", filename); 691 else if (!realpath(filename, backing_filename)) 692 return -errno; 693 694 bdrv_qcow2 = bdrv_find_format("qcow2"); 695 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL); 696 697 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size); 698 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename); 699 if (drv) { 700 set_option_parameter(options, BLOCK_OPT_BACKING_FMT, 701 drv->format_name); 702 } 703 704 ret = bdrv_create(bdrv_qcow2, tmp_filename, options); 705 free_option_parameters(options); 706 if (ret < 0) { 707 return ret; 708 } 709 710 filename = tmp_filename; 711 drv = bdrv_qcow2; 712 bs->is_temporary = 1; 713 } 714 715 /* Find the right image format driver */ 716 if (!drv) { 717 ret = find_image_format(filename, &drv); 718 } 719 720 if (!drv) { 721 goto unlink_and_fail; 722 } 723 724 /* Open the image */ 725 ret = bdrv_open_common(bs, filename, flags, drv); 726 if (ret < 0) { 727 goto unlink_and_fail; 728 } 729 730 /* If there is a backing file, use it */ 731 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') { 732 char backing_filename[PATH_MAX]; 733 int back_flags; 734 BlockDriver *back_drv = NULL; 735 736 bs->backing_hd = bdrv_new(""); 737 738 if (path_has_protocol(bs->backing_file)) { 739 pstrcpy(backing_filename, sizeof(backing_filename), 740 bs->backing_file); 741 } else { 742 path_combine(backing_filename, sizeof(backing_filename), 743 filename, bs->backing_file); 744 } 745 746 if (bs->backing_format[0] != '\0') { 747 back_drv = bdrv_find_format(bs->backing_format); 748 } 749 750 /* backing files always opened read-only */ 751 back_flags = 752 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 753 754 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv); 755 if (ret < 0) { 756 bdrv_close(bs); 757 return ret; 758 } 759 if (bs->is_temporary) { 760 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR); 761 } else { 762 /* base image inherits from "parent" */ 763 bs->backing_hd->keep_read_only = bs->keep_read_only; 764 } 765 } 766 767 if (!bdrv_key_required(bs)) { 768 bdrv_dev_change_media_cb(bs, true); 769 } 770 771 /* throttling disk I/O limits */ 772 if (bs->io_limits_enabled) { 773 bdrv_io_limits_enable(bs); 774 } 775 776 return 0; 777 778 unlink_and_fail: 779 if (bs->is_temporary) { 780 unlink(filename); 781 } 782 return ret; 783 } 784 785 void bdrv_close(BlockDriverState *bs) 786 { 787 if (bs->drv) { 788 if (bs == bs_snapshots) { 789 bs_snapshots = NULL; 790 } 791 if (bs->backing_hd) { 792 bdrv_delete(bs->backing_hd); 793 bs->backing_hd = NULL; 794 } 795 bs->drv->bdrv_close(bs); 796 g_free(bs->opaque); 797 #ifdef _WIN32 798 if (bs->is_temporary) { 799 unlink(bs->filename); 800 } 801 #endif 802 bs->opaque = NULL; 803 bs->drv = NULL; 804 805 if (bs->file != NULL) { 806 bdrv_close(bs->file); 807 } 808 809 bdrv_dev_change_media_cb(bs, false); 810 } 811 812 /*throttling disk I/O limits*/ 813 if (bs->io_limits_enabled) { 814 bdrv_io_limits_disable(bs); 815 } 816 } 817 818 void bdrv_close_all(void) 819 { 820 BlockDriverState *bs; 821 822 QTAILQ_FOREACH(bs, &bdrv_states, list) { 823 bdrv_close(bs); 824 } 825 } 826 827 /* make a BlockDriverState anonymous by removing from bdrv_state list. 828 Also, NULL terminate the device_name to prevent double remove */ 829 void bdrv_make_anon(BlockDriverState *bs) 830 { 831 if (bs->device_name[0] != '\0') { 832 QTAILQ_REMOVE(&bdrv_states, bs, list); 833 } 834 bs->device_name[0] = '\0'; 835 } 836 837 void bdrv_delete(BlockDriverState *bs) 838 { 839 assert(!bs->dev); 840 841 /* remove from list, if necessary */ 842 bdrv_make_anon(bs); 843 844 bdrv_close(bs); 845 if (bs->file != NULL) { 846 bdrv_delete(bs->file); 847 } 848 849 assert(bs != bs_snapshots); 850 g_free(bs); 851 } 852 853 int bdrv_attach_dev(BlockDriverState *bs, void *dev) 854 /* TODO change to DeviceState *dev when all users are qdevified */ 855 { 856 if (bs->dev) { 857 return -EBUSY; 858 } 859 bs->dev = dev; 860 bdrv_iostatus_reset(bs); 861 return 0; 862 } 863 864 /* TODO qdevified devices don't use this, remove when devices are qdevified */ 865 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev) 866 { 867 if (bdrv_attach_dev(bs, dev) < 0) { 868 abort(); 869 } 870 } 871 872 void bdrv_detach_dev(BlockDriverState *bs, void *dev) 873 /* TODO change to DeviceState *dev when all users are qdevified */ 874 { 875 assert(bs->dev == dev); 876 bs->dev = NULL; 877 bs->dev_ops = NULL; 878 bs->dev_opaque = NULL; 879 bs->buffer_alignment = 512; 880 } 881 882 /* TODO change to return DeviceState * when all users are qdevified */ 883 void *bdrv_get_attached_dev(BlockDriverState *bs) 884 { 885 return bs->dev; 886 } 887 888 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops, 889 void *opaque) 890 { 891 bs->dev_ops = ops; 892 bs->dev_opaque = opaque; 893 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) { 894 bs_snapshots = NULL; 895 } 896 } 897 898 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load) 899 { 900 if (bs->dev_ops && bs->dev_ops->change_media_cb) { 901 bs->dev_ops->change_media_cb(bs->dev_opaque, load); 902 } 903 } 904 905 bool bdrv_dev_has_removable_media(BlockDriverState *bs) 906 { 907 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb); 908 } 909 910 void bdrv_dev_eject_request(BlockDriverState *bs, bool force) 911 { 912 if (bs->dev_ops && bs->dev_ops->eject_request_cb) { 913 bs->dev_ops->eject_request_cb(bs->dev_opaque, force); 914 } 915 } 916 917 bool bdrv_dev_is_tray_open(BlockDriverState *bs) 918 { 919 if (bs->dev_ops && bs->dev_ops->is_tray_open) { 920 return bs->dev_ops->is_tray_open(bs->dev_opaque); 921 } 922 return false; 923 } 924 925 static void bdrv_dev_resize_cb(BlockDriverState *bs) 926 { 927 if (bs->dev_ops && bs->dev_ops->resize_cb) { 928 bs->dev_ops->resize_cb(bs->dev_opaque); 929 } 930 } 931 932 bool bdrv_dev_is_medium_locked(BlockDriverState *bs) 933 { 934 if (bs->dev_ops && bs->dev_ops->is_medium_locked) { 935 return bs->dev_ops->is_medium_locked(bs->dev_opaque); 936 } 937 return false; 938 } 939 940 /* 941 * Run consistency checks on an image 942 * 943 * Returns 0 if the check could be completed (it doesn't mean that the image is 944 * free of errors) or -errno when an internal error occurred. The results of the 945 * check are stored in res. 946 */ 947 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res) 948 { 949 if (bs->drv->bdrv_check == NULL) { 950 return -ENOTSUP; 951 } 952 953 memset(res, 0, sizeof(*res)); 954 return bs->drv->bdrv_check(bs, res); 955 } 956 957 #define COMMIT_BUF_SECTORS 2048 958 959 /* commit COW file into the raw image */ 960 int bdrv_commit(BlockDriverState *bs) 961 { 962 BlockDriver *drv = bs->drv; 963 BlockDriver *backing_drv; 964 int64_t sector, total_sectors; 965 int n, ro, open_flags; 966 int ret = 0, rw_ret = 0; 967 uint8_t *buf; 968 char filename[1024]; 969 BlockDriverState *bs_rw, *bs_ro; 970 971 if (!drv) 972 return -ENOMEDIUM; 973 974 if (!bs->backing_hd) { 975 return -ENOTSUP; 976 } 977 978 if (bs->backing_hd->keep_read_only) { 979 return -EACCES; 980 } 981 982 backing_drv = bs->backing_hd->drv; 983 ro = bs->backing_hd->read_only; 984 strncpy(filename, bs->backing_hd->filename, sizeof(filename)); 985 open_flags = bs->backing_hd->open_flags; 986 987 if (ro) { 988 /* re-open as RW */ 989 bdrv_delete(bs->backing_hd); 990 bs->backing_hd = NULL; 991 bs_rw = bdrv_new(""); 992 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR, 993 backing_drv); 994 if (rw_ret < 0) { 995 bdrv_delete(bs_rw); 996 /* try to re-open read-only */ 997 bs_ro = bdrv_new(""); 998 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, 999 backing_drv); 1000 if (ret < 0) { 1001 bdrv_delete(bs_ro); 1002 /* drive not functional anymore */ 1003 bs->drv = NULL; 1004 return ret; 1005 } 1006 bs->backing_hd = bs_ro; 1007 return rw_ret; 1008 } 1009 bs->backing_hd = bs_rw; 1010 } 1011 1012 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; 1013 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 1014 1015 for (sector = 0; sector < total_sectors; sector += n) { 1016 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) { 1017 1018 if (bdrv_read(bs, sector, buf, n) != 0) { 1019 ret = -EIO; 1020 goto ro_cleanup; 1021 } 1022 1023 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) { 1024 ret = -EIO; 1025 goto ro_cleanup; 1026 } 1027 } 1028 } 1029 1030 if (drv->bdrv_make_empty) { 1031 ret = drv->bdrv_make_empty(bs); 1032 bdrv_flush(bs); 1033 } 1034 1035 /* 1036 * Make sure all data we wrote to the backing device is actually 1037 * stable on disk. 1038 */ 1039 if (bs->backing_hd) 1040 bdrv_flush(bs->backing_hd); 1041 1042 ro_cleanup: 1043 g_free(buf); 1044 1045 if (ro) { 1046 /* re-open as RO */ 1047 bdrv_delete(bs->backing_hd); 1048 bs->backing_hd = NULL; 1049 bs_ro = bdrv_new(""); 1050 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, 1051 backing_drv); 1052 if (ret < 0) { 1053 bdrv_delete(bs_ro); 1054 /* drive not functional anymore */ 1055 bs->drv = NULL; 1056 return ret; 1057 } 1058 bs->backing_hd = bs_ro; 1059 bs->backing_hd->keep_read_only = 0; 1060 } 1061 1062 return ret; 1063 } 1064 1065 void bdrv_commit_all(void) 1066 { 1067 BlockDriverState *bs; 1068 1069 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1070 bdrv_commit(bs); 1071 } 1072 } 1073 1074 struct BdrvTrackedRequest { 1075 BlockDriverState *bs; 1076 int64_t sector_num; 1077 int nb_sectors; 1078 bool is_write; 1079 QLIST_ENTRY(BdrvTrackedRequest) list; 1080 }; 1081 1082 /** 1083 * Remove an active request from the tracked requests list 1084 * 1085 * This function should be called when a tracked request is completing. 1086 */ 1087 static void tracked_request_end(BdrvTrackedRequest *req) 1088 { 1089 QLIST_REMOVE(req, list); 1090 } 1091 1092 /** 1093 * Add an active request to the tracked requests list 1094 */ 1095 static void tracked_request_begin(BdrvTrackedRequest *req, 1096 BlockDriverState *bs, 1097 int64_t sector_num, 1098 int nb_sectors, bool is_write) 1099 { 1100 *req = (BdrvTrackedRequest){ 1101 .bs = bs, 1102 .sector_num = sector_num, 1103 .nb_sectors = nb_sectors, 1104 .is_write = is_write, 1105 }; 1106 1107 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list); 1108 } 1109 1110 /* 1111 * Return values: 1112 * 0 - success 1113 * -EINVAL - backing format specified, but no file 1114 * -ENOSPC - can't update the backing file because no space is left in the 1115 * image file header 1116 * -ENOTSUP - format driver doesn't support changing the backing file 1117 */ 1118 int bdrv_change_backing_file(BlockDriverState *bs, 1119 const char *backing_file, const char *backing_fmt) 1120 { 1121 BlockDriver *drv = bs->drv; 1122 1123 if (drv->bdrv_change_backing_file != NULL) { 1124 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 1125 } else { 1126 return -ENOTSUP; 1127 } 1128 } 1129 1130 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, 1131 size_t size) 1132 { 1133 int64_t len; 1134 1135 if (!bdrv_is_inserted(bs)) 1136 return -ENOMEDIUM; 1137 1138 if (bs->growable) 1139 return 0; 1140 1141 len = bdrv_getlength(bs); 1142 1143 if (offset < 0) 1144 return -EIO; 1145 1146 if ((offset > len) || (len - offset < size)) 1147 return -EIO; 1148 1149 return 0; 1150 } 1151 1152 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, 1153 int nb_sectors) 1154 { 1155 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE, 1156 nb_sectors * BDRV_SECTOR_SIZE); 1157 } 1158 1159 typedef struct RwCo { 1160 BlockDriverState *bs; 1161 int64_t sector_num; 1162 int nb_sectors; 1163 QEMUIOVector *qiov; 1164 bool is_write; 1165 int ret; 1166 } RwCo; 1167 1168 static void coroutine_fn bdrv_rw_co_entry(void *opaque) 1169 { 1170 RwCo *rwco = opaque; 1171 1172 if (!rwco->is_write) { 1173 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num, 1174 rwco->nb_sectors, rwco->qiov); 1175 } else { 1176 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num, 1177 rwco->nb_sectors, rwco->qiov); 1178 } 1179 } 1180 1181 /* 1182 * Process a synchronous request using coroutines 1183 */ 1184 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, 1185 int nb_sectors, bool is_write) 1186 { 1187 QEMUIOVector qiov; 1188 struct iovec iov = { 1189 .iov_base = (void *)buf, 1190 .iov_len = nb_sectors * BDRV_SECTOR_SIZE, 1191 }; 1192 Coroutine *co; 1193 RwCo rwco = { 1194 .bs = bs, 1195 .sector_num = sector_num, 1196 .nb_sectors = nb_sectors, 1197 .qiov = &qiov, 1198 .is_write = is_write, 1199 .ret = NOT_DONE, 1200 }; 1201 1202 qemu_iovec_init_external(&qiov, &iov, 1); 1203 1204 if (qemu_in_coroutine()) { 1205 /* Fast-path if already in coroutine context */ 1206 bdrv_rw_co_entry(&rwco); 1207 } else { 1208 co = qemu_coroutine_create(bdrv_rw_co_entry); 1209 qemu_coroutine_enter(co, &rwco); 1210 while (rwco.ret == NOT_DONE) { 1211 qemu_aio_wait(); 1212 } 1213 } 1214 return rwco.ret; 1215 } 1216 1217 /* return < 0 if error. See bdrv_write() for the return codes */ 1218 int bdrv_read(BlockDriverState *bs, int64_t sector_num, 1219 uint8_t *buf, int nb_sectors) 1220 { 1221 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false); 1222 } 1223 1224 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num, 1225 int nb_sectors, int dirty) 1226 { 1227 int64_t start, end; 1228 unsigned long val, idx, bit; 1229 1230 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK; 1231 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK; 1232 1233 for (; start <= end; start++) { 1234 idx = start / (sizeof(unsigned long) * 8); 1235 bit = start % (sizeof(unsigned long) * 8); 1236 val = bs->dirty_bitmap[idx]; 1237 if (dirty) { 1238 if (!(val & (1UL << bit))) { 1239 bs->dirty_count++; 1240 val |= 1UL << bit; 1241 } 1242 } else { 1243 if (val & (1UL << bit)) { 1244 bs->dirty_count--; 1245 val &= ~(1UL << bit); 1246 } 1247 } 1248 bs->dirty_bitmap[idx] = val; 1249 } 1250 } 1251 1252 /* Return < 0 if error. Important errors are: 1253 -EIO generic I/O error (may happen for all errors) 1254 -ENOMEDIUM No media inserted. 1255 -EINVAL Invalid sector number or nb_sectors 1256 -EACCES Trying to write a read-only device 1257 */ 1258 int bdrv_write(BlockDriverState *bs, int64_t sector_num, 1259 const uint8_t *buf, int nb_sectors) 1260 { 1261 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true); 1262 } 1263 1264 int bdrv_pread(BlockDriverState *bs, int64_t offset, 1265 void *buf, int count1) 1266 { 1267 uint8_t tmp_buf[BDRV_SECTOR_SIZE]; 1268 int len, nb_sectors, count; 1269 int64_t sector_num; 1270 int ret; 1271 1272 count = count1; 1273 /* first read to align to sector start */ 1274 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); 1275 if (len > count) 1276 len = count; 1277 sector_num = offset >> BDRV_SECTOR_BITS; 1278 if (len > 0) { 1279 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1280 return ret; 1281 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len); 1282 count -= len; 1283 if (count == 0) 1284 return count1; 1285 sector_num++; 1286 buf += len; 1287 } 1288 1289 /* read the sectors "in place" */ 1290 nb_sectors = count >> BDRV_SECTOR_BITS; 1291 if (nb_sectors > 0) { 1292 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0) 1293 return ret; 1294 sector_num += nb_sectors; 1295 len = nb_sectors << BDRV_SECTOR_BITS; 1296 buf += len; 1297 count -= len; 1298 } 1299 1300 /* add data from the last sector */ 1301 if (count > 0) { 1302 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1303 return ret; 1304 memcpy(buf, tmp_buf, count); 1305 } 1306 return count1; 1307 } 1308 1309 int bdrv_pwrite(BlockDriverState *bs, int64_t offset, 1310 const void *buf, int count1) 1311 { 1312 uint8_t tmp_buf[BDRV_SECTOR_SIZE]; 1313 int len, nb_sectors, count; 1314 int64_t sector_num; 1315 int ret; 1316 1317 count = count1; 1318 /* first write to align to sector start */ 1319 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); 1320 if (len > count) 1321 len = count; 1322 sector_num = offset >> BDRV_SECTOR_BITS; 1323 if (len > 0) { 1324 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1325 return ret; 1326 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len); 1327 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0) 1328 return ret; 1329 count -= len; 1330 if (count == 0) 1331 return count1; 1332 sector_num++; 1333 buf += len; 1334 } 1335 1336 /* write the sectors "in place" */ 1337 nb_sectors = count >> BDRV_SECTOR_BITS; 1338 if (nb_sectors > 0) { 1339 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0) 1340 return ret; 1341 sector_num += nb_sectors; 1342 len = nb_sectors << BDRV_SECTOR_BITS; 1343 buf += len; 1344 count -= len; 1345 } 1346 1347 /* add data from the last sector */ 1348 if (count > 0) { 1349 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1350 return ret; 1351 memcpy(tmp_buf, buf, count); 1352 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0) 1353 return ret; 1354 } 1355 return count1; 1356 } 1357 1358 /* 1359 * Writes to the file and ensures that no writes are reordered across this 1360 * request (acts as a barrier) 1361 * 1362 * Returns 0 on success, -errno in error cases. 1363 */ 1364 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, 1365 const void *buf, int count) 1366 { 1367 int ret; 1368 1369 ret = bdrv_pwrite(bs, offset, buf, count); 1370 if (ret < 0) { 1371 return ret; 1372 } 1373 1374 /* No flush needed for cache modes that use O_DSYNC */ 1375 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) { 1376 bdrv_flush(bs); 1377 } 1378 1379 return 0; 1380 } 1381 1382 /* 1383 * Handle a read request in coroutine context 1384 */ 1385 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, 1386 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) 1387 { 1388 BlockDriver *drv = bs->drv; 1389 BdrvTrackedRequest req; 1390 int ret; 1391 1392 if (!drv) { 1393 return -ENOMEDIUM; 1394 } 1395 if (bdrv_check_request(bs, sector_num, nb_sectors)) { 1396 return -EIO; 1397 } 1398 1399 /* throttling disk read I/O */ 1400 if (bs->io_limits_enabled) { 1401 bdrv_io_limits_intercept(bs, false, nb_sectors); 1402 } 1403 1404 tracked_request_begin(&req, bs, sector_num, nb_sectors, false); 1405 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); 1406 tracked_request_end(&req); 1407 return ret; 1408 } 1409 1410 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, 1411 int nb_sectors, QEMUIOVector *qiov) 1412 { 1413 trace_bdrv_co_readv(bs, sector_num, nb_sectors); 1414 1415 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov); 1416 } 1417 1418 /* 1419 * Handle a write request in coroutine context 1420 */ 1421 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, 1422 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) 1423 { 1424 BlockDriver *drv = bs->drv; 1425 BdrvTrackedRequest req; 1426 int ret; 1427 1428 if (!bs->drv) { 1429 return -ENOMEDIUM; 1430 } 1431 if (bs->read_only) { 1432 return -EACCES; 1433 } 1434 if (bdrv_check_request(bs, sector_num, nb_sectors)) { 1435 return -EIO; 1436 } 1437 1438 /* throttling disk write I/O */ 1439 if (bs->io_limits_enabled) { 1440 bdrv_io_limits_intercept(bs, true, nb_sectors); 1441 } 1442 1443 tracked_request_begin(&req, bs, sector_num, nb_sectors, true); 1444 1445 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); 1446 1447 if (bs->dirty_bitmap) { 1448 set_dirty_bitmap(bs, sector_num, nb_sectors, 1); 1449 } 1450 1451 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { 1452 bs->wr_highest_sector = sector_num + nb_sectors - 1; 1453 } 1454 1455 tracked_request_end(&req); 1456 1457 return ret; 1458 } 1459 1460 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, 1461 int nb_sectors, QEMUIOVector *qiov) 1462 { 1463 trace_bdrv_co_writev(bs, sector_num, nb_sectors); 1464 1465 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov); 1466 } 1467 1468 /** 1469 * Truncate file to 'offset' bytes (needed only for file protocols) 1470 */ 1471 int bdrv_truncate(BlockDriverState *bs, int64_t offset) 1472 { 1473 BlockDriver *drv = bs->drv; 1474 int ret; 1475 if (!drv) 1476 return -ENOMEDIUM; 1477 if (!drv->bdrv_truncate) 1478 return -ENOTSUP; 1479 if (bs->read_only) 1480 return -EACCES; 1481 if (bdrv_in_use(bs)) 1482 return -EBUSY; 1483 ret = drv->bdrv_truncate(bs, offset); 1484 if (ret == 0) { 1485 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 1486 bdrv_dev_resize_cb(bs); 1487 } 1488 return ret; 1489 } 1490 1491 /** 1492 * Length of a allocated file in bytes. Sparse files are counted by actual 1493 * allocated space. Return < 0 if error or unknown. 1494 */ 1495 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 1496 { 1497 BlockDriver *drv = bs->drv; 1498 if (!drv) { 1499 return -ENOMEDIUM; 1500 } 1501 if (drv->bdrv_get_allocated_file_size) { 1502 return drv->bdrv_get_allocated_file_size(bs); 1503 } 1504 if (bs->file) { 1505 return bdrv_get_allocated_file_size(bs->file); 1506 } 1507 return -ENOTSUP; 1508 } 1509 1510 /** 1511 * Length of a file in bytes. Return < 0 if error or unknown. 1512 */ 1513 int64_t bdrv_getlength(BlockDriverState *bs) 1514 { 1515 BlockDriver *drv = bs->drv; 1516 if (!drv) 1517 return -ENOMEDIUM; 1518 1519 if (bs->growable || bdrv_dev_has_removable_media(bs)) { 1520 if (drv->bdrv_getlength) { 1521 return drv->bdrv_getlength(bs); 1522 } 1523 } 1524 return bs->total_sectors * BDRV_SECTOR_SIZE; 1525 } 1526 1527 /* return 0 as number of sectors if no device present or error */ 1528 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 1529 { 1530 int64_t length; 1531 length = bdrv_getlength(bs); 1532 if (length < 0) 1533 length = 0; 1534 else 1535 length = length >> BDRV_SECTOR_BITS; 1536 *nb_sectors_ptr = length; 1537 } 1538 1539 struct partition { 1540 uint8_t boot_ind; /* 0x80 - active */ 1541 uint8_t head; /* starting head */ 1542 uint8_t sector; /* starting sector */ 1543 uint8_t cyl; /* starting cylinder */ 1544 uint8_t sys_ind; /* What partition type */ 1545 uint8_t end_head; /* end head */ 1546 uint8_t end_sector; /* end sector */ 1547 uint8_t end_cyl; /* end cylinder */ 1548 uint32_t start_sect; /* starting sector counting from 0 */ 1549 uint32_t nr_sects; /* nr of sectors in partition */ 1550 } QEMU_PACKED; 1551 1552 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */ 1553 static int guess_disk_lchs(BlockDriverState *bs, 1554 int *pcylinders, int *pheads, int *psectors) 1555 { 1556 uint8_t buf[BDRV_SECTOR_SIZE]; 1557 int ret, i, heads, sectors, cylinders; 1558 struct partition *p; 1559 uint32_t nr_sects; 1560 uint64_t nb_sectors; 1561 1562 bdrv_get_geometry(bs, &nb_sectors); 1563 1564 ret = bdrv_read(bs, 0, buf, 1); 1565 if (ret < 0) 1566 return -1; 1567 /* test msdos magic */ 1568 if (buf[510] != 0x55 || buf[511] != 0xaa) 1569 return -1; 1570 for(i = 0; i < 4; i++) { 1571 p = ((struct partition *)(buf + 0x1be)) + i; 1572 nr_sects = le32_to_cpu(p->nr_sects); 1573 if (nr_sects && p->end_head) { 1574 /* We make the assumption that the partition terminates on 1575 a cylinder boundary */ 1576 heads = p->end_head + 1; 1577 sectors = p->end_sector & 63; 1578 if (sectors == 0) 1579 continue; 1580 cylinders = nb_sectors / (heads * sectors); 1581 if (cylinders < 1 || cylinders > 16383) 1582 continue; 1583 *pheads = heads; 1584 *psectors = sectors; 1585 *pcylinders = cylinders; 1586 #if 0 1587 printf("guessed geometry: LCHS=%d %d %d\n", 1588 cylinders, heads, sectors); 1589 #endif 1590 return 0; 1591 } 1592 } 1593 return -1; 1594 } 1595 1596 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs) 1597 { 1598 int translation, lba_detected = 0; 1599 int cylinders, heads, secs; 1600 uint64_t nb_sectors; 1601 1602 /* if a geometry hint is available, use it */ 1603 bdrv_get_geometry(bs, &nb_sectors); 1604 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs); 1605 translation = bdrv_get_translation_hint(bs); 1606 if (cylinders != 0) { 1607 *pcyls = cylinders; 1608 *pheads = heads; 1609 *psecs = secs; 1610 } else { 1611 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) { 1612 if (heads > 16) { 1613 /* if heads > 16, it means that a BIOS LBA 1614 translation was active, so the default 1615 hardware geometry is OK */ 1616 lba_detected = 1; 1617 goto default_geometry; 1618 } else { 1619 *pcyls = cylinders; 1620 *pheads = heads; 1621 *psecs = secs; 1622 /* disable any translation to be in sync with 1623 the logical geometry */ 1624 if (translation == BIOS_ATA_TRANSLATION_AUTO) { 1625 bdrv_set_translation_hint(bs, 1626 BIOS_ATA_TRANSLATION_NONE); 1627 } 1628 } 1629 } else { 1630 default_geometry: 1631 /* if no geometry, use a standard physical disk geometry */ 1632 cylinders = nb_sectors / (16 * 63); 1633 1634 if (cylinders > 16383) 1635 cylinders = 16383; 1636 else if (cylinders < 2) 1637 cylinders = 2; 1638 *pcyls = cylinders; 1639 *pheads = 16; 1640 *psecs = 63; 1641 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) { 1642 if ((*pcyls * *pheads) <= 131072) { 1643 bdrv_set_translation_hint(bs, 1644 BIOS_ATA_TRANSLATION_LARGE); 1645 } else { 1646 bdrv_set_translation_hint(bs, 1647 BIOS_ATA_TRANSLATION_LBA); 1648 } 1649 } 1650 } 1651 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs); 1652 } 1653 } 1654 1655 void bdrv_set_geometry_hint(BlockDriverState *bs, 1656 int cyls, int heads, int secs) 1657 { 1658 bs->cyls = cyls; 1659 bs->heads = heads; 1660 bs->secs = secs; 1661 } 1662 1663 void bdrv_set_translation_hint(BlockDriverState *bs, int translation) 1664 { 1665 bs->translation = translation; 1666 } 1667 1668 void bdrv_get_geometry_hint(BlockDriverState *bs, 1669 int *pcyls, int *pheads, int *psecs) 1670 { 1671 *pcyls = bs->cyls; 1672 *pheads = bs->heads; 1673 *psecs = bs->secs; 1674 } 1675 1676 /* throttling disk io limits */ 1677 void bdrv_set_io_limits(BlockDriverState *bs, 1678 BlockIOLimit *io_limits) 1679 { 1680 bs->io_limits = *io_limits; 1681 bs->io_limits_enabled = bdrv_io_limits_enabled(bs); 1682 } 1683 1684 /* Recognize floppy formats */ 1685 typedef struct FDFormat { 1686 FDriveType drive; 1687 uint8_t last_sect; 1688 uint8_t max_track; 1689 uint8_t max_head; 1690 } FDFormat; 1691 1692 static const FDFormat fd_formats[] = { 1693 /* First entry is default format */ 1694 /* 1.44 MB 3"1/2 floppy disks */ 1695 { FDRIVE_DRV_144, 18, 80, 1, }, 1696 { FDRIVE_DRV_144, 20, 80, 1, }, 1697 { FDRIVE_DRV_144, 21, 80, 1, }, 1698 { FDRIVE_DRV_144, 21, 82, 1, }, 1699 { FDRIVE_DRV_144, 21, 83, 1, }, 1700 { FDRIVE_DRV_144, 22, 80, 1, }, 1701 { FDRIVE_DRV_144, 23, 80, 1, }, 1702 { FDRIVE_DRV_144, 24, 80, 1, }, 1703 /* 2.88 MB 3"1/2 floppy disks */ 1704 { FDRIVE_DRV_288, 36, 80, 1, }, 1705 { FDRIVE_DRV_288, 39, 80, 1, }, 1706 { FDRIVE_DRV_288, 40, 80, 1, }, 1707 { FDRIVE_DRV_288, 44, 80, 1, }, 1708 { FDRIVE_DRV_288, 48, 80, 1, }, 1709 /* 720 kB 3"1/2 floppy disks */ 1710 { FDRIVE_DRV_144, 9, 80, 1, }, 1711 { FDRIVE_DRV_144, 10, 80, 1, }, 1712 { FDRIVE_DRV_144, 10, 82, 1, }, 1713 { FDRIVE_DRV_144, 10, 83, 1, }, 1714 { FDRIVE_DRV_144, 13, 80, 1, }, 1715 { FDRIVE_DRV_144, 14, 80, 1, }, 1716 /* 1.2 MB 5"1/4 floppy disks */ 1717 { FDRIVE_DRV_120, 15, 80, 1, }, 1718 { FDRIVE_DRV_120, 18, 80, 1, }, 1719 { FDRIVE_DRV_120, 18, 82, 1, }, 1720 { FDRIVE_DRV_120, 18, 83, 1, }, 1721 { FDRIVE_DRV_120, 20, 80, 1, }, 1722 /* 720 kB 5"1/4 floppy disks */ 1723 { FDRIVE_DRV_120, 9, 80, 1, }, 1724 { FDRIVE_DRV_120, 11, 80, 1, }, 1725 /* 360 kB 5"1/4 floppy disks */ 1726 { FDRIVE_DRV_120, 9, 40, 1, }, 1727 { FDRIVE_DRV_120, 9, 40, 0, }, 1728 { FDRIVE_DRV_120, 10, 41, 1, }, 1729 { FDRIVE_DRV_120, 10, 42, 1, }, 1730 /* 320 kB 5"1/4 floppy disks */ 1731 { FDRIVE_DRV_120, 8, 40, 1, }, 1732 { FDRIVE_DRV_120, 8, 40, 0, }, 1733 /* 360 kB must match 5"1/4 better than 3"1/2... */ 1734 { FDRIVE_DRV_144, 9, 80, 0, }, 1735 /* end */ 1736 { FDRIVE_DRV_NONE, -1, -1, 0, }, 1737 }; 1738 1739 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads, 1740 int *max_track, int *last_sect, 1741 FDriveType drive_in, FDriveType *drive) 1742 { 1743 const FDFormat *parse; 1744 uint64_t nb_sectors, size; 1745 int i, first_match, match; 1746 1747 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect); 1748 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) { 1749 /* User defined disk */ 1750 } else { 1751 bdrv_get_geometry(bs, &nb_sectors); 1752 match = -1; 1753 first_match = -1; 1754 for (i = 0; ; i++) { 1755 parse = &fd_formats[i]; 1756 if (parse->drive == FDRIVE_DRV_NONE) { 1757 break; 1758 } 1759 if (drive_in == parse->drive || 1760 drive_in == FDRIVE_DRV_NONE) { 1761 size = (parse->max_head + 1) * parse->max_track * 1762 parse->last_sect; 1763 if (nb_sectors == size) { 1764 match = i; 1765 break; 1766 } 1767 if (first_match == -1) { 1768 first_match = i; 1769 } 1770 } 1771 } 1772 if (match == -1) { 1773 if (first_match == -1) { 1774 match = 1; 1775 } else { 1776 match = first_match; 1777 } 1778 parse = &fd_formats[match]; 1779 } 1780 *nb_heads = parse->max_head + 1; 1781 *max_track = parse->max_track; 1782 *last_sect = parse->last_sect; 1783 *drive = parse->drive; 1784 } 1785 } 1786 1787 int bdrv_get_translation_hint(BlockDriverState *bs) 1788 { 1789 return bs->translation; 1790 } 1791 1792 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error, 1793 BlockErrorAction on_write_error) 1794 { 1795 bs->on_read_error = on_read_error; 1796 bs->on_write_error = on_write_error; 1797 } 1798 1799 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read) 1800 { 1801 return is_read ? bs->on_read_error : bs->on_write_error; 1802 } 1803 1804 int bdrv_is_read_only(BlockDriverState *bs) 1805 { 1806 return bs->read_only; 1807 } 1808 1809 int bdrv_is_sg(BlockDriverState *bs) 1810 { 1811 return bs->sg; 1812 } 1813 1814 int bdrv_enable_write_cache(BlockDriverState *bs) 1815 { 1816 return bs->enable_write_cache; 1817 } 1818 1819 int bdrv_is_encrypted(BlockDriverState *bs) 1820 { 1821 if (bs->backing_hd && bs->backing_hd->encrypted) 1822 return 1; 1823 return bs->encrypted; 1824 } 1825 1826 int bdrv_key_required(BlockDriverState *bs) 1827 { 1828 BlockDriverState *backing_hd = bs->backing_hd; 1829 1830 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key) 1831 return 1; 1832 return (bs->encrypted && !bs->valid_key); 1833 } 1834 1835 int bdrv_set_key(BlockDriverState *bs, const char *key) 1836 { 1837 int ret; 1838 if (bs->backing_hd && bs->backing_hd->encrypted) { 1839 ret = bdrv_set_key(bs->backing_hd, key); 1840 if (ret < 0) 1841 return ret; 1842 if (!bs->encrypted) 1843 return 0; 1844 } 1845 if (!bs->encrypted) { 1846 return -EINVAL; 1847 } else if (!bs->drv || !bs->drv->bdrv_set_key) { 1848 return -ENOMEDIUM; 1849 } 1850 ret = bs->drv->bdrv_set_key(bs, key); 1851 if (ret < 0) { 1852 bs->valid_key = 0; 1853 } else if (!bs->valid_key) { 1854 bs->valid_key = 1; 1855 /* call the change callback now, we skipped it on open */ 1856 bdrv_dev_change_media_cb(bs, true); 1857 } 1858 return ret; 1859 } 1860 1861 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size) 1862 { 1863 if (!bs->drv) { 1864 buf[0] = '\0'; 1865 } else { 1866 pstrcpy(buf, buf_size, bs->drv->format_name); 1867 } 1868 } 1869 1870 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 1871 void *opaque) 1872 { 1873 BlockDriver *drv; 1874 1875 QLIST_FOREACH(drv, &bdrv_drivers, list) { 1876 it(opaque, drv->format_name); 1877 } 1878 } 1879 1880 BlockDriverState *bdrv_find(const char *name) 1881 { 1882 BlockDriverState *bs; 1883 1884 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1885 if (!strcmp(name, bs->device_name)) { 1886 return bs; 1887 } 1888 } 1889 return NULL; 1890 } 1891 1892 BlockDriverState *bdrv_next(BlockDriverState *bs) 1893 { 1894 if (!bs) { 1895 return QTAILQ_FIRST(&bdrv_states); 1896 } 1897 return QTAILQ_NEXT(bs, list); 1898 } 1899 1900 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque) 1901 { 1902 BlockDriverState *bs; 1903 1904 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1905 it(opaque, bs); 1906 } 1907 } 1908 1909 const char *bdrv_get_device_name(BlockDriverState *bs) 1910 { 1911 return bs->device_name; 1912 } 1913 1914 void bdrv_flush_all(void) 1915 { 1916 BlockDriverState *bs; 1917 1918 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1919 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) { 1920 bdrv_flush(bs); 1921 } 1922 } 1923 } 1924 1925 int bdrv_has_zero_init(BlockDriverState *bs) 1926 { 1927 assert(bs->drv); 1928 1929 if (bs->drv->bdrv_has_zero_init) { 1930 return bs->drv->bdrv_has_zero_init(bs); 1931 } 1932 1933 return 1; 1934 } 1935 1936 typedef struct BdrvCoIsAllocatedData { 1937 BlockDriverState *bs; 1938 int64_t sector_num; 1939 int nb_sectors; 1940 int *pnum; 1941 int ret; 1942 bool done; 1943 } BdrvCoIsAllocatedData; 1944 1945 /* 1946 * Returns true iff the specified sector is present in the disk image. Drivers 1947 * not implementing the functionality are assumed to not support backing files, 1948 * hence all their sectors are reported as allocated. 1949 * 1950 * 'pnum' is set to the number of sectors (including and immediately following 1951 * the specified sector) that are known to be in the same 1952 * allocated/unallocated state. 1953 * 1954 * 'nb_sectors' is the max value 'pnum' should be set to. 1955 */ 1956 int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num, 1957 int nb_sectors, int *pnum) 1958 { 1959 if (!bs->drv->bdrv_co_is_allocated) { 1960 int64_t n; 1961 if (sector_num >= bs->total_sectors) { 1962 *pnum = 0; 1963 return 0; 1964 } 1965 n = bs->total_sectors - sector_num; 1966 *pnum = (n < nb_sectors) ? (n) : (nb_sectors); 1967 return 1; 1968 } 1969 1970 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum); 1971 } 1972 1973 /* Coroutine wrapper for bdrv_is_allocated() */ 1974 static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque) 1975 { 1976 BdrvCoIsAllocatedData *data = opaque; 1977 BlockDriverState *bs = data->bs; 1978 1979 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors, 1980 data->pnum); 1981 data->done = true; 1982 } 1983 1984 /* 1985 * Synchronous wrapper around bdrv_co_is_allocated(). 1986 * 1987 * See bdrv_co_is_allocated() for details. 1988 */ 1989 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, 1990 int *pnum) 1991 { 1992 Coroutine *co; 1993 BdrvCoIsAllocatedData data = { 1994 .bs = bs, 1995 .sector_num = sector_num, 1996 .nb_sectors = nb_sectors, 1997 .pnum = pnum, 1998 .done = false, 1999 }; 2000 2001 co = qemu_coroutine_create(bdrv_is_allocated_co_entry); 2002 qemu_coroutine_enter(co, &data); 2003 while (!data.done) { 2004 qemu_aio_wait(); 2005 } 2006 return data.ret; 2007 } 2008 2009 void bdrv_mon_event(const BlockDriverState *bdrv, 2010 BlockMonEventAction action, int is_read) 2011 { 2012 QObject *data; 2013 const char *action_str; 2014 2015 switch (action) { 2016 case BDRV_ACTION_REPORT: 2017 action_str = "report"; 2018 break; 2019 case BDRV_ACTION_IGNORE: 2020 action_str = "ignore"; 2021 break; 2022 case BDRV_ACTION_STOP: 2023 action_str = "stop"; 2024 break; 2025 default: 2026 abort(); 2027 } 2028 2029 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }", 2030 bdrv->device_name, 2031 action_str, 2032 is_read ? "read" : "write"); 2033 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data); 2034 2035 qobject_decref(data); 2036 } 2037 2038 BlockInfoList *qmp_query_block(Error **errp) 2039 { 2040 BlockInfoList *head = NULL, *cur_item = NULL; 2041 BlockDriverState *bs; 2042 2043 QTAILQ_FOREACH(bs, &bdrv_states, list) { 2044 BlockInfoList *info = g_malloc0(sizeof(*info)); 2045 2046 info->value = g_malloc0(sizeof(*info->value)); 2047 info->value->device = g_strdup(bs->device_name); 2048 info->value->type = g_strdup("unknown"); 2049 info->value->locked = bdrv_dev_is_medium_locked(bs); 2050 info->value->removable = bdrv_dev_has_removable_media(bs); 2051 2052 if (bdrv_dev_has_removable_media(bs)) { 2053 info->value->has_tray_open = true; 2054 info->value->tray_open = bdrv_dev_is_tray_open(bs); 2055 } 2056 2057 if (bdrv_iostatus_is_enabled(bs)) { 2058 info->value->has_io_status = true; 2059 info->value->io_status = bs->iostatus; 2060 } 2061 2062 if (bs->drv) { 2063 info->value->has_inserted = true; 2064 info->value->inserted = g_malloc0(sizeof(*info->value->inserted)); 2065 info->value->inserted->file = g_strdup(bs->filename); 2066 info->value->inserted->ro = bs->read_only; 2067 info->value->inserted->drv = g_strdup(bs->drv->format_name); 2068 info->value->inserted->encrypted = bs->encrypted; 2069 if (bs->backing_file[0]) { 2070 info->value->inserted->has_backing_file = true; 2071 info->value->inserted->backing_file = g_strdup(bs->backing_file); 2072 } 2073 2074 if (bs->io_limits_enabled) { 2075 info->value->inserted->bps = 2076 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]; 2077 info->value->inserted->bps_rd = 2078 bs->io_limits.bps[BLOCK_IO_LIMIT_READ]; 2079 info->value->inserted->bps_wr = 2080 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE]; 2081 info->value->inserted->iops = 2082 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]; 2083 info->value->inserted->iops_rd = 2084 bs->io_limits.iops[BLOCK_IO_LIMIT_READ]; 2085 info->value->inserted->iops_wr = 2086 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE]; 2087 } 2088 } 2089 2090 /* XXX: waiting for the qapi to support GSList */ 2091 if (!cur_item) { 2092 head = cur_item = info; 2093 } else { 2094 cur_item->next = info; 2095 cur_item = info; 2096 } 2097 } 2098 2099 return head; 2100 } 2101 2102 /* Consider exposing this as a full fledged QMP command */ 2103 static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp) 2104 { 2105 BlockStats *s; 2106 2107 s = g_malloc0(sizeof(*s)); 2108 2109 if (bs->device_name[0]) { 2110 s->has_device = true; 2111 s->device = g_strdup(bs->device_name); 2112 } 2113 2114 s->stats = g_malloc0(sizeof(*s->stats)); 2115 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ]; 2116 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE]; 2117 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ]; 2118 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE]; 2119 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE; 2120 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH]; 2121 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE]; 2122 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ]; 2123 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH]; 2124 2125 if (bs->file) { 2126 s->has_parent = true; 2127 s->parent = qmp_query_blockstat(bs->file, NULL); 2128 } 2129 2130 return s; 2131 } 2132 2133 BlockStatsList *qmp_query_blockstats(Error **errp) 2134 { 2135 BlockStatsList *head = NULL, *cur_item = NULL; 2136 BlockDriverState *bs; 2137 2138 QTAILQ_FOREACH(bs, &bdrv_states, list) { 2139 BlockStatsList *info = g_malloc0(sizeof(*info)); 2140 info->value = qmp_query_blockstat(bs, NULL); 2141 2142 /* XXX: waiting for the qapi to support GSList */ 2143 if (!cur_item) { 2144 head = cur_item = info; 2145 } else { 2146 cur_item->next = info; 2147 cur_item = info; 2148 } 2149 } 2150 2151 return head; 2152 } 2153 2154 const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 2155 { 2156 if (bs->backing_hd && bs->backing_hd->encrypted) 2157 return bs->backing_file; 2158 else if (bs->encrypted) 2159 return bs->filename; 2160 else 2161 return NULL; 2162 } 2163 2164 void bdrv_get_backing_filename(BlockDriverState *bs, 2165 char *filename, int filename_size) 2166 { 2167 pstrcpy(filename, filename_size, bs->backing_file); 2168 } 2169 2170 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, 2171 const uint8_t *buf, int nb_sectors) 2172 { 2173 BlockDriver *drv = bs->drv; 2174 if (!drv) 2175 return -ENOMEDIUM; 2176 if (!drv->bdrv_write_compressed) 2177 return -ENOTSUP; 2178 if (bdrv_check_request(bs, sector_num, nb_sectors)) 2179 return -EIO; 2180 2181 if (bs->dirty_bitmap) { 2182 set_dirty_bitmap(bs, sector_num, nb_sectors, 1); 2183 } 2184 2185 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); 2186 } 2187 2188 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 2189 { 2190 BlockDriver *drv = bs->drv; 2191 if (!drv) 2192 return -ENOMEDIUM; 2193 if (!drv->bdrv_get_info) 2194 return -ENOTSUP; 2195 memset(bdi, 0, sizeof(*bdi)); 2196 return drv->bdrv_get_info(bs, bdi); 2197 } 2198 2199 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, 2200 int64_t pos, int size) 2201 { 2202 BlockDriver *drv = bs->drv; 2203 if (!drv) 2204 return -ENOMEDIUM; 2205 if (drv->bdrv_save_vmstate) 2206 return drv->bdrv_save_vmstate(bs, buf, pos, size); 2207 if (bs->file) 2208 return bdrv_save_vmstate(bs->file, buf, pos, size); 2209 return -ENOTSUP; 2210 } 2211 2212 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, 2213 int64_t pos, int size) 2214 { 2215 BlockDriver *drv = bs->drv; 2216 if (!drv) 2217 return -ENOMEDIUM; 2218 if (drv->bdrv_load_vmstate) 2219 return drv->bdrv_load_vmstate(bs, buf, pos, size); 2220 if (bs->file) 2221 return bdrv_load_vmstate(bs->file, buf, pos, size); 2222 return -ENOTSUP; 2223 } 2224 2225 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) 2226 { 2227 BlockDriver *drv = bs->drv; 2228 2229 if (!drv || !drv->bdrv_debug_event) { 2230 return; 2231 } 2232 2233 return drv->bdrv_debug_event(bs, event); 2234 2235 } 2236 2237 /**************************************************************/ 2238 /* handling of snapshots */ 2239 2240 int bdrv_can_snapshot(BlockDriverState *bs) 2241 { 2242 BlockDriver *drv = bs->drv; 2243 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { 2244 return 0; 2245 } 2246 2247 if (!drv->bdrv_snapshot_create) { 2248 if (bs->file != NULL) { 2249 return bdrv_can_snapshot(bs->file); 2250 } 2251 return 0; 2252 } 2253 2254 return 1; 2255 } 2256 2257 int bdrv_is_snapshot(BlockDriverState *bs) 2258 { 2259 return !!(bs->open_flags & BDRV_O_SNAPSHOT); 2260 } 2261 2262 BlockDriverState *bdrv_snapshots(void) 2263 { 2264 BlockDriverState *bs; 2265 2266 if (bs_snapshots) { 2267 return bs_snapshots; 2268 } 2269 2270 bs = NULL; 2271 while ((bs = bdrv_next(bs))) { 2272 if (bdrv_can_snapshot(bs)) { 2273 bs_snapshots = bs; 2274 return bs; 2275 } 2276 } 2277 return NULL; 2278 } 2279 2280 int bdrv_snapshot_create(BlockDriverState *bs, 2281 QEMUSnapshotInfo *sn_info) 2282 { 2283 BlockDriver *drv = bs->drv; 2284 if (!drv) 2285 return -ENOMEDIUM; 2286 if (drv->bdrv_snapshot_create) 2287 return drv->bdrv_snapshot_create(bs, sn_info); 2288 if (bs->file) 2289 return bdrv_snapshot_create(bs->file, sn_info); 2290 return -ENOTSUP; 2291 } 2292 2293 int bdrv_snapshot_goto(BlockDriverState *bs, 2294 const char *snapshot_id) 2295 { 2296 BlockDriver *drv = bs->drv; 2297 int ret, open_ret; 2298 2299 if (!drv) 2300 return -ENOMEDIUM; 2301 if (drv->bdrv_snapshot_goto) 2302 return drv->bdrv_snapshot_goto(bs, snapshot_id); 2303 2304 if (bs->file) { 2305 drv->bdrv_close(bs); 2306 ret = bdrv_snapshot_goto(bs->file, snapshot_id); 2307 open_ret = drv->bdrv_open(bs, bs->open_flags); 2308 if (open_ret < 0) { 2309 bdrv_delete(bs->file); 2310 bs->drv = NULL; 2311 return open_ret; 2312 } 2313 return ret; 2314 } 2315 2316 return -ENOTSUP; 2317 } 2318 2319 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) 2320 { 2321 BlockDriver *drv = bs->drv; 2322 if (!drv) 2323 return -ENOMEDIUM; 2324 if (drv->bdrv_snapshot_delete) 2325 return drv->bdrv_snapshot_delete(bs, snapshot_id); 2326 if (bs->file) 2327 return bdrv_snapshot_delete(bs->file, snapshot_id); 2328 return -ENOTSUP; 2329 } 2330 2331 int bdrv_snapshot_list(BlockDriverState *bs, 2332 QEMUSnapshotInfo **psn_info) 2333 { 2334 BlockDriver *drv = bs->drv; 2335 if (!drv) 2336 return -ENOMEDIUM; 2337 if (drv->bdrv_snapshot_list) 2338 return drv->bdrv_snapshot_list(bs, psn_info); 2339 if (bs->file) 2340 return bdrv_snapshot_list(bs->file, psn_info); 2341 return -ENOTSUP; 2342 } 2343 2344 int bdrv_snapshot_load_tmp(BlockDriverState *bs, 2345 const char *snapshot_name) 2346 { 2347 BlockDriver *drv = bs->drv; 2348 if (!drv) { 2349 return -ENOMEDIUM; 2350 } 2351 if (!bs->read_only) { 2352 return -EINVAL; 2353 } 2354 if (drv->bdrv_snapshot_load_tmp) { 2355 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name); 2356 } 2357 return -ENOTSUP; 2358 } 2359 2360 #define NB_SUFFIXES 4 2361 2362 char *get_human_readable_size(char *buf, int buf_size, int64_t size) 2363 { 2364 static const char suffixes[NB_SUFFIXES] = "KMGT"; 2365 int64_t base; 2366 int i; 2367 2368 if (size <= 999) { 2369 snprintf(buf, buf_size, "%" PRId64, size); 2370 } else { 2371 base = 1024; 2372 for(i = 0; i < NB_SUFFIXES; i++) { 2373 if (size < (10 * base)) { 2374 snprintf(buf, buf_size, "%0.1f%c", 2375 (double)size / base, 2376 suffixes[i]); 2377 break; 2378 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) { 2379 snprintf(buf, buf_size, "%" PRId64 "%c", 2380 ((size + (base >> 1)) / base), 2381 suffixes[i]); 2382 break; 2383 } 2384 base = base * 1024; 2385 } 2386 } 2387 return buf; 2388 } 2389 2390 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn) 2391 { 2392 char buf1[128], date_buf[128], clock_buf[128]; 2393 #ifdef _WIN32 2394 struct tm *ptm; 2395 #else 2396 struct tm tm; 2397 #endif 2398 time_t ti; 2399 int64_t secs; 2400 2401 if (!sn) { 2402 snprintf(buf, buf_size, 2403 "%-10s%-20s%7s%20s%15s", 2404 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK"); 2405 } else { 2406 ti = sn->date_sec; 2407 #ifdef _WIN32 2408 ptm = localtime(&ti); 2409 strftime(date_buf, sizeof(date_buf), 2410 "%Y-%m-%d %H:%M:%S", ptm); 2411 #else 2412 localtime_r(&ti, &tm); 2413 strftime(date_buf, sizeof(date_buf), 2414 "%Y-%m-%d %H:%M:%S", &tm); 2415 #endif 2416 secs = sn->vm_clock_nsec / 1000000000; 2417 snprintf(clock_buf, sizeof(clock_buf), 2418 "%02d:%02d:%02d.%03d", 2419 (int)(secs / 3600), 2420 (int)((secs / 60) % 60), 2421 (int)(secs % 60), 2422 (int)((sn->vm_clock_nsec / 1000000) % 1000)); 2423 snprintf(buf, buf_size, 2424 "%-10s%-20s%7s%20s%15s", 2425 sn->id_str, sn->name, 2426 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size), 2427 date_buf, 2428 clock_buf); 2429 } 2430 return buf; 2431 } 2432 2433 /**************************************************************/ 2434 /* async I/Os */ 2435 2436 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, 2437 QEMUIOVector *qiov, int nb_sectors, 2438 BlockDriverCompletionFunc *cb, void *opaque) 2439 { 2440 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque); 2441 2442 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 2443 cb, opaque, false); 2444 } 2445 2446 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, 2447 QEMUIOVector *qiov, int nb_sectors, 2448 BlockDriverCompletionFunc *cb, void *opaque) 2449 { 2450 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque); 2451 2452 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 2453 cb, opaque, true); 2454 } 2455 2456 2457 typedef struct MultiwriteCB { 2458 int error; 2459 int num_requests; 2460 int num_callbacks; 2461 struct { 2462 BlockDriverCompletionFunc *cb; 2463 void *opaque; 2464 QEMUIOVector *free_qiov; 2465 void *free_buf; 2466 } callbacks[]; 2467 } MultiwriteCB; 2468 2469 static void multiwrite_user_cb(MultiwriteCB *mcb) 2470 { 2471 int i; 2472 2473 for (i = 0; i < mcb->num_callbacks; i++) { 2474 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error); 2475 if (mcb->callbacks[i].free_qiov) { 2476 qemu_iovec_destroy(mcb->callbacks[i].free_qiov); 2477 } 2478 g_free(mcb->callbacks[i].free_qiov); 2479 qemu_vfree(mcb->callbacks[i].free_buf); 2480 } 2481 } 2482 2483 static void multiwrite_cb(void *opaque, int ret) 2484 { 2485 MultiwriteCB *mcb = opaque; 2486 2487 trace_multiwrite_cb(mcb, ret); 2488 2489 if (ret < 0 && !mcb->error) { 2490 mcb->error = ret; 2491 } 2492 2493 mcb->num_requests--; 2494 if (mcb->num_requests == 0) { 2495 multiwrite_user_cb(mcb); 2496 g_free(mcb); 2497 } 2498 } 2499 2500 static int multiwrite_req_compare(const void *a, const void *b) 2501 { 2502 const BlockRequest *req1 = a, *req2 = b; 2503 2504 /* 2505 * Note that we can't simply subtract req2->sector from req1->sector 2506 * here as that could overflow the return value. 2507 */ 2508 if (req1->sector > req2->sector) { 2509 return 1; 2510 } else if (req1->sector < req2->sector) { 2511 return -1; 2512 } else { 2513 return 0; 2514 } 2515 } 2516 2517 /* 2518 * Takes a bunch of requests and tries to merge them. Returns the number of 2519 * requests that remain after merging. 2520 */ 2521 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, 2522 int num_reqs, MultiwriteCB *mcb) 2523 { 2524 int i, outidx; 2525 2526 // Sort requests by start sector 2527 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare); 2528 2529 // Check if adjacent requests touch the same clusters. If so, combine them, 2530 // filling up gaps with zero sectors. 2531 outidx = 0; 2532 for (i = 1; i < num_reqs; i++) { 2533 int merge = 0; 2534 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors; 2535 2536 // This handles the cases that are valid for all block drivers, namely 2537 // exactly sequential writes and overlapping writes. 2538 if (reqs[i].sector <= oldreq_last) { 2539 merge = 1; 2540 } 2541 2542 // The block driver may decide that it makes sense to combine requests 2543 // even if there is a gap of some sectors between them. In this case, 2544 // the gap is filled with zeros (therefore only applicable for yet 2545 // unused space in format like qcow2). 2546 if (!merge && bs->drv->bdrv_merge_requests) { 2547 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]); 2548 } 2549 2550 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) { 2551 merge = 0; 2552 } 2553 2554 if (merge) { 2555 size_t size; 2556 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov)); 2557 qemu_iovec_init(qiov, 2558 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1); 2559 2560 // Add the first request to the merged one. If the requests are 2561 // overlapping, drop the last sectors of the first request. 2562 size = (reqs[i].sector - reqs[outidx].sector) << 9; 2563 qemu_iovec_concat(qiov, reqs[outidx].qiov, size); 2564 2565 // We might need to add some zeros between the two requests 2566 if (reqs[i].sector > oldreq_last) { 2567 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9; 2568 uint8_t *buf = qemu_blockalign(bs, zero_bytes); 2569 memset(buf, 0, zero_bytes); 2570 qemu_iovec_add(qiov, buf, zero_bytes); 2571 mcb->callbacks[i].free_buf = buf; 2572 } 2573 2574 // Add the second request 2575 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size); 2576 2577 reqs[outidx].nb_sectors = qiov->size >> 9; 2578 reqs[outidx].qiov = qiov; 2579 2580 mcb->callbacks[i].free_qiov = reqs[outidx].qiov; 2581 } else { 2582 outidx++; 2583 reqs[outidx].sector = reqs[i].sector; 2584 reqs[outidx].nb_sectors = reqs[i].nb_sectors; 2585 reqs[outidx].qiov = reqs[i].qiov; 2586 } 2587 } 2588 2589 return outidx + 1; 2590 } 2591 2592 /* 2593 * Submit multiple AIO write requests at once. 2594 * 2595 * On success, the function returns 0 and all requests in the reqs array have 2596 * been submitted. In error case this function returns -1, and any of the 2597 * requests may or may not be submitted yet. In particular, this means that the 2598 * callback will be called for some of the requests, for others it won't. The 2599 * caller must check the error field of the BlockRequest to wait for the right 2600 * callbacks (if error != 0, no callback will be called). 2601 * 2602 * The implementation may modify the contents of the reqs array, e.g. to merge 2603 * requests. However, the fields opaque and error are left unmodified as they 2604 * are used to signal failure for a single request to the caller. 2605 */ 2606 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) 2607 { 2608 BlockDriverAIOCB *acb; 2609 MultiwriteCB *mcb; 2610 int i; 2611 2612 /* don't submit writes if we don't have a medium */ 2613 if (bs->drv == NULL) { 2614 for (i = 0; i < num_reqs; i++) { 2615 reqs[i].error = -ENOMEDIUM; 2616 } 2617 return -1; 2618 } 2619 2620 if (num_reqs == 0) { 2621 return 0; 2622 } 2623 2624 // Create MultiwriteCB structure 2625 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks)); 2626 mcb->num_requests = 0; 2627 mcb->num_callbacks = num_reqs; 2628 2629 for (i = 0; i < num_reqs; i++) { 2630 mcb->callbacks[i].cb = reqs[i].cb; 2631 mcb->callbacks[i].opaque = reqs[i].opaque; 2632 } 2633 2634 // Check for mergable requests 2635 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb); 2636 2637 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs); 2638 2639 /* 2640 * Run the aio requests. As soon as one request can't be submitted 2641 * successfully, fail all requests that are not yet submitted (we must 2642 * return failure for all requests anyway) 2643 * 2644 * num_requests cannot be set to the right value immediately: If 2645 * bdrv_aio_writev fails for some request, num_requests would be too high 2646 * and therefore multiwrite_cb() would never recognize the multiwrite 2647 * request as completed. We also cannot use the loop variable i to set it 2648 * when the first request fails because the callback may already have been 2649 * called for previously submitted requests. Thus, num_requests must be 2650 * incremented for each request that is submitted. 2651 * 2652 * The problem that callbacks may be called early also means that we need 2653 * to take care that num_requests doesn't become 0 before all requests are 2654 * submitted - multiwrite_cb() would consider the multiwrite request 2655 * completed. A dummy request that is "completed" by a manual call to 2656 * multiwrite_cb() takes care of this. 2657 */ 2658 mcb->num_requests = 1; 2659 2660 // Run the aio requests 2661 for (i = 0; i < num_reqs; i++) { 2662 mcb->num_requests++; 2663 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov, 2664 reqs[i].nb_sectors, multiwrite_cb, mcb); 2665 2666 if (acb == NULL) { 2667 // We can only fail the whole thing if no request has been 2668 // submitted yet. Otherwise we'll wait for the submitted AIOs to 2669 // complete and report the error in the callback. 2670 if (i == 0) { 2671 trace_bdrv_aio_multiwrite_earlyfail(mcb); 2672 goto fail; 2673 } else { 2674 trace_bdrv_aio_multiwrite_latefail(mcb, i); 2675 multiwrite_cb(mcb, -EIO); 2676 break; 2677 } 2678 } 2679 } 2680 2681 /* Complete the dummy request */ 2682 multiwrite_cb(mcb, 0); 2683 2684 return 0; 2685 2686 fail: 2687 for (i = 0; i < mcb->num_callbacks; i++) { 2688 reqs[i].error = -EIO; 2689 } 2690 g_free(mcb); 2691 return -1; 2692 } 2693 2694 void bdrv_aio_cancel(BlockDriverAIOCB *acb) 2695 { 2696 acb->pool->cancel(acb); 2697 } 2698 2699 /* block I/O throttling */ 2700 static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, 2701 bool is_write, double elapsed_time, uint64_t *wait) 2702 { 2703 uint64_t bps_limit = 0; 2704 double bytes_limit, bytes_base, bytes_res; 2705 double slice_time, wait_time; 2706 2707 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { 2708 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]; 2709 } else if (bs->io_limits.bps[is_write]) { 2710 bps_limit = bs->io_limits.bps[is_write]; 2711 } else { 2712 if (wait) { 2713 *wait = 0; 2714 } 2715 2716 return false; 2717 } 2718 2719 slice_time = bs->slice_end - bs->slice_start; 2720 slice_time /= (NANOSECONDS_PER_SECOND); 2721 bytes_limit = bps_limit * slice_time; 2722 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write]; 2723 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { 2724 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write]; 2725 } 2726 2727 /* bytes_base: the bytes of data which have been read/written; and 2728 * it is obtained from the history statistic info. 2729 * bytes_res: the remaining bytes of data which need to be read/written. 2730 * (bytes_base + bytes_res) / bps_limit: used to calcuate 2731 * the total time for completing reading/writting all data. 2732 */ 2733 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE; 2734 2735 if (bytes_base + bytes_res <= bytes_limit) { 2736 if (wait) { 2737 *wait = 0; 2738 } 2739 2740 return false; 2741 } 2742 2743 /* Calc approx time to dispatch */ 2744 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time; 2745 2746 /* When the I/O rate at runtime exceeds the limits, 2747 * bs->slice_end need to be extended in order that the current statistic 2748 * info can be kept until the timer fire, so it is increased and tuned 2749 * based on the result of experiment. 2750 */ 2751 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10; 2752 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME; 2753 if (wait) { 2754 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10; 2755 } 2756 2757 return true; 2758 } 2759 2760 static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, 2761 double elapsed_time, uint64_t *wait) 2762 { 2763 uint64_t iops_limit = 0; 2764 double ios_limit, ios_base; 2765 double slice_time, wait_time; 2766 2767 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { 2768 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]; 2769 } else if (bs->io_limits.iops[is_write]) { 2770 iops_limit = bs->io_limits.iops[is_write]; 2771 } else { 2772 if (wait) { 2773 *wait = 0; 2774 } 2775 2776 return false; 2777 } 2778 2779 slice_time = bs->slice_end - bs->slice_start; 2780 slice_time /= (NANOSECONDS_PER_SECOND); 2781 ios_limit = iops_limit * slice_time; 2782 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write]; 2783 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { 2784 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write]; 2785 } 2786 2787 if (ios_base + 1 <= ios_limit) { 2788 if (wait) { 2789 *wait = 0; 2790 } 2791 2792 return false; 2793 } 2794 2795 /* Calc approx time to dispatch */ 2796 wait_time = (ios_base + 1) / iops_limit; 2797 if (wait_time > elapsed_time) { 2798 wait_time = wait_time - elapsed_time; 2799 } else { 2800 wait_time = 0; 2801 } 2802 2803 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10; 2804 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME; 2805 if (wait) { 2806 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10; 2807 } 2808 2809 return true; 2810 } 2811 2812 static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, 2813 bool is_write, int64_t *wait) 2814 { 2815 int64_t now, max_wait; 2816 uint64_t bps_wait = 0, iops_wait = 0; 2817 double elapsed_time; 2818 int bps_ret, iops_ret; 2819 2820 now = qemu_get_clock_ns(vm_clock); 2821 if ((bs->slice_start < now) 2822 && (bs->slice_end > now)) { 2823 bs->slice_end = now + bs->slice_time; 2824 } else { 2825 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME; 2826 bs->slice_start = now; 2827 bs->slice_end = now + bs->slice_time; 2828 2829 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write]; 2830 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write]; 2831 2832 bs->io_base.ios[is_write] = bs->nr_ops[is_write]; 2833 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write]; 2834 } 2835 2836 elapsed_time = now - bs->slice_start; 2837 elapsed_time /= (NANOSECONDS_PER_SECOND); 2838 2839 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors, 2840 is_write, elapsed_time, &bps_wait); 2841 iops_ret = bdrv_exceed_iops_limits(bs, is_write, 2842 elapsed_time, &iops_wait); 2843 if (bps_ret || iops_ret) { 2844 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait; 2845 if (wait) { 2846 *wait = max_wait; 2847 } 2848 2849 now = qemu_get_clock_ns(vm_clock); 2850 if (bs->slice_end < now + max_wait) { 2851 bs->slice_end = now + max_wait; 2852 } 2853 2854 return true; 2855 } 2856 2857 if (wait) { 2858 *wait = 0; 2859 } 2860 2861 return false; 2862 } 2863 2864 /**************************************************************/ 2865 /* async block device emulation */ 2866 2867 typedef struct BlockDriverAIOCBSync { 2868 BlockDriverAIOCB common; 2869 QEMUBH *bh; 2870 int ret; 2871 /* vector translation state */ 2872 QEMUIOVector *qiov; 2873 uint8_t *bounce; 2874 int is_write; 2875 } BlockDriverAIOCBSync; 2876 2877 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb) 2878 { 2879 BlockDriverAIOCBSync *acb = 2880 container_of(blockacb, BlockDriverAIOCBSync, common); 2881 qemu_bh_delete(acb->bh); 2882 acb->bh = NULL; 2883 qemu_aio_release(acb); 2884 } 2885 2886 static AIOPool bdrv_em_aio_pool = { 2887 .aiocb_size = sizeof(BlockDriverAIOCBSync), 2888 .cancel = bdrv_aio_cancel_em, 2889 }; 2890 2891 static void bdrv_aio_bh_cb(void *opaque) 2892 { 2893 BlockDriverAIOCBSync *acb = opaque; 2894 2895 if (!acb->is_write) 2896 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size); 2897 qemu_vfree(acb->bounce); 2898 acb->common.cb(acb->common.opaque, acb->ret); 2899 qemu_bh_delete(acb->bh); 2900 acb->bh = NULL; 2901 qemu_aio_release(acb); 2902 } 2903 2904 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, 2905 int64_t sector_num, 2906 QEMUIOVector *qiov, 2907 int nb_sectors, 2908 BlockDriverCompletionFunc *cb, 2909 void *opaque, 2910 int is_write) 2911 2912 { 2913 BlockDriverAIOCBSync *acb; 2914 2915 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque); 2916 acb->is_write = is_write; 2917 acb->qiov = qiov; 2918 acb->bounce = qemu_blockalign(bs, qiov->size); 2919 2920 if (!acb->bh) 2921 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); 2922 2923 if (is_write) { 2924 qemu_iovec_to_buffer(acb->qiov, acb->bounce); 2925 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors); 2926 } else { 2927 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors); 2928 } 2929 2930 qemu_bh_schedule(acb->bh); 2931 2932 return &acb->common; 2933 } 2934 2935 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 2936 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 2937 BlockDriverCompletionFunc *cb, void *opaque) 2938 { 2939 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); 2940 } 2941 2942 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 2943 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 2944 BlockDriverCompletionFunc *cb, void *opaque) 2945 { 2946 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); 2947 } 2948 2949 2950 typedef struct BlockDriverAIOCBCoroutine { 2951 BlockDriverAIOCB common; 2952 BlockRequest req; 2953 bool is_write; 2954 QEMUBH* bh; 2955 } BlockDriverAIOCBCoroutine; 2956 2957 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb) 2958 { 2959 qemu_aio_flush(); 2960 } 2961 2962 static AIOPool bdrv_em_co_aio_pool = { 2963 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine), 2964 .cancel = bdrv_aio_co_cancel_em, 2965 }; 2966 2967 static void bdrv_co_em_bh(void *opaque) 2968 { 2969 BlockDriverAIOCBCoroutine *acb = opaque; 2970 2971 acb->common.cb(acb->common.opaque, acb->req.error); 2972 qemu_bh_delete(acb->bh); 2973 qemu_aio_release(acb); 2974 } 2975 2976 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */ 2977 static void coroutine_fn bdrv_co_do_rw(void *opaque) 2978 { 2979 BlockDriverAIOCBCoroutine *acb = opaque; 2980 BlockDriverState *bs = acb->common.bs; 2981 2982 if (!acb->is_write) { 2983 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector, 2984 acb->req.nb_sectors, acb->req.qiov); 2985 } else { 2986 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector, 2987 acb->req.nb_sectors, acb->req.qiov); 2988 } 2989 2990 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); 2991 qemu_bh_schedule(acb->bh); 2992 } 2993 2994 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, 2995 int64_t sector_num, 2996 QEMUIOVector *qiov, 2997 int nb_sectors, 2998 BlockDriverCompletionFunc *cb, 2999 void *opaque, 3000 bool is_write) 3001 { 3002 Coroutine *co; 3003 BlockDriverAIOCBCoroutine *acb; 3004 3005 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque); 3006 acb->req.sector = sector_num; 3007 acb->req.nb_sectors = nb_sectors; 3008 acb->req.qiov = qiov; 3009 acb->is_write = is_write; 3010 3011 co = qemu_coroutine_create(bdrv_co_do_rw); 3012 qemu_coroutine_enter(co, acb); 3013 3014 return &acb->common; 3015 } 3016 3017 static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque) 3018 { 3019 BlockDriverAIOCBCoroutine *acb = opaque; 3020 BlockDriverState *bs = acb->common.bs; 3021 3022 acb->req.error = bdrv_co_flush(bs); 3023 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); 3024 qemu_bh_schedule(acb->bh); 3025 } 3026 3027 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, 3028 BlockDriverCompletionFunc *cb, void *opaque) 3029 { 3030 trace_bdrv_aio_flush(bs, opaque); 3031 3032 Coroutine *co; 3033 BlockDriverAIOCBCoroutine *acb; 3034 3035 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque); 3036 co = qemu_coroutine_create(bdrv_aio_flush_co_entry); 3037 qemu_coroutine_enter(co, acb); 3038 3039 return &acb->common; 3040 } 3041 3042 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque) 3043 { 3044 BlockDriverAIOCBCoroutine *acb = opaque; 3045 BlockDriverState *bs = acb->common.bs; 3046 3047 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors); 3048 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); 3049 qemu_bh_schedule(acb->bh); 3050 } 3051 3052 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs, 3053 int64_t sector_num, int nb_sectors, 3054 BlockDriverCompletionFunc *cb, void *opaque) 3055 { 3056 Coroutine *co; 3057 BlockDriverAIOCBCoroutine *acb; 3058 3059 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque); 3060 3061 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque); 3062 acb->req.sector = sector_num; 3063 acb->req.nb_sectors = nb_sectors; 3064 co = qemu_coroutine_create(bdrv_aio_discard_co_entry); 3065 qemu_coroutine_enter(co, acb); 3066 3067 return &acb->common; 3068 } 3069 3070 void bdrv_init(void) 3071 { 3072 module_call_init(MODULE_INIT_BLOCK); 3073 } 3074 3075 void bdrv_init_with_whitelist(void) 3076 { 3077 use_bdrv_whitelist = 1; 3078 bdrv_init(); 3079 } 3080 3081 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs, 3082 BlockDriverCompletionFunc *cb, void *opaque) 3083 { 3084 BlockDriverAIOCB *acb; 3085 3086 if (pool->free_aiocb) { 3087 acb = pool->free_aiocb; 3088 pool->free_aiocb = acb->next; 3089 } else { 3090 acb = g_malloc0(pool->aiocb_size); 3091 acb->pool = pool; 3092 } 3093 acb->bs = bs; 3094 acb->cb = cb; 3095 acb->opaque = opaque; 3096 return acb; 3097 } 3098 3099 void qemu_aio_release(void *p) 3100 { 3101 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p; 3102 AIOPool *pool = acb->pool; 3103 acb->next = pool->free_aiocb; 3104 pool->free_aiocb = acb; 3105 } 3106 3107 /**************************************************************/ 3108 /* Coroutine block device emulation */ 3109 3110 typedef struct CoroutineIOCompletion { 3111 Coroutine *coroutine; 3112 int ret; 3113 } CoroutineIOCompletion; 3114 3115 static void bdrv_co_io_em_complete(void *opaque, int ret) 3116 { 3117 CoroutineIOCompletion *co = opaque; 3118 3119 co->ret = ret; 3120 qemu_coroutine_enter(co->coroutine, NULL); 3121 } 3122 3123 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num, 3124 int nb_sectors, QEMUIOVector *iov, 3125 bool is_write) 3126 { 3127 CoroutineIOCompletion co = { 3128 .coroutine = qemu_coroutine_self(), 3129 }; 3130 BlockDriverAIOCB *acb; 3131 3132 if (is_write) { 3133 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors, 3134 bdrv_co_io_em_complete, &co); 3135 } else { 3136 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors, 3137 bdrv_co_io_em_complete, &co); 3138 } 3139 3140 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb); 3141 if (!acb) { 3142 return -EIO; 3143 } 3144 qemu_coroutine_yield(); 3145 3146 return co.ret; 3147 } 3148 3149 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 3150 int64_t sector_num, int nb_sectors, 3151 QEMUIOVector *iov) 3152 { 3153 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false); 3154 } 3155 3156 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 3157 int64_t sector_num, int nb_sectors, 3158 QEMUIOVector *iov) 3159 { 3160 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true); 3161 } 3162 3163 static void coroutine_fn bdrv_flush_co_entry(void *opaque) 3164 { 3165 RwCo *rwco = opaque; 3166 3167 rwco->ret = bdrv_co_flush(rwco->bs); 3168 } 3169 3170 int coroutine_fn bdrv_co_flush(BlockDriverState *bs) 3171 { 3172 int ret; 3173 3174 if (!bs->drv) { 3175 return 0; 3176 } 3177 3178 /* Write back cached data to the OS even with cache=unsafe */ 3179 if (bs->drv->bdrv_co_flush_to_os) { 3180 ret = bs->drv->bdrv_co_flush_to_os(bs); 3181 if (ret < 0) { 3182 return ret; 3183 } 3184 } 3185 3186 /* But don't actually force it to the disk with cache=unsafe */ 3187 if (bs->open_flags & BDRV_O_NO_FLUSH) { 3188 return 0; 3189 } 3190 3191 if (bs->drv->bdrv_co_flush_to_disk) { 3192 return bs->drv->bdrv_co_flush_to_disk(bs); 3193 } else if (bs->drv->bdrv_aio_flush) { 3194 BlockDriverAIOCB *acb; 3195 CoroutineIOCompletion co = { 3196 .coroutine = qemu_coroutine_self(), 3197 }; 3198 3199 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co); 3200 if (acb == NULL) { 3201 return -EIO; 3202 } else { 3203 qemu_coroutine_yield(); 3204 return co.ret; 3205 } 3206 } else { 3207 /* 3208 * Some block drivers always operate in either writethrough or unsafe 3209 * mode and don't support bdrv_flush therefore. Usually qemu doesn't 3210 * know how the server works (because the behaviour is hardcoded or 3211 * depends on server-side configuration), so we can't ensure that 3212 * everything is safe on disk. Returning an error doesn't work because 3213 * that would break guests even if the server operates in writethrough 3214 * mode. 3215 * 3216 * Let's hope the user knows what he's doing. 3217 */ 3218 return 0; 3219 } 3220 } 3221 3222 void bdrv_invalidate_cache(BlockDriverState *bs) 3223 { 3224 if (bs->drv && bs->drv->bdrv_invalidate_cache) { 3225 bs->drv->bdrv_invalidate_cache(bs); 3226 } 3227 } 3228 3229 void bdrv_invalidate_cache_all(void) 3230 { 3231 BlockDriverState *bs; 3232 3233 QTAILQ_FOREACH(bs, &bdrv_states, list) { 3234 bdrv_invalidate_cache(bs); 3235 } 3236 } 3237 3238 int bdrv_flush(BlockDriverState *bs) 3239 { 3240 Coroutine *co; 3241 RwCo rwco = { 3242 .bs = bs, 3243 .ret = NOT_DONE, 3244 }; 3245 3246 if (qemu_in_coroutine()) { 3247 /* Fast-path if already in coroutine context */ 3248 bdrv_flush_co_entry(&rwco); 3249 } else { 3250 co = qemu_coroutine_create(bdrv_flush_co_entry); 3251 qemu_coroutine_enter(co, &rwco); 3252 while (rwco.ret == NOT_DONE) { 3253 qemu_aio_wait(); 3254 } 3255 } 3256 3257 return rwco.ret; 3258 } 3259 3260 static void coroutine_fn bdrv_discard_co_entry(void *opaque) 3261 { 3262 RwCo *rwco = opaque; 3263 3264 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors); 3265 } 3266 3267 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, 3268 int nb_sectors) 3269 { 3270 if (!bs->drv) { 3271 return -ENOMEDIUM; 3272 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) { 3273 return -EIO; 3274 } else if (bs->read_only) { 3275 return -EROFS; 3276 } else if (bs->drv->bdrv_co_discard) { 3277 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors); 3278 } else if (bs->drv->bdrv_aio_discard) { 3279 BlockDriverAIOCB *acb; 3280 CoroutineIOCompletion co = { 3281 .coroutine = qemu_coroutine_self(), 3282 }; 3283 3284 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors, 3285 bdrv_co_io_em_complete, &co); 3286 if (acb == NULL) { 3287 return -EIO; 3288 } else { 3289 qemu_coroutine_yield(); 3290 return co.ret; 3291 } 3292 } else { 3293 return 0; 3294 } 3295 } 3296 3297 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) 3298 { 3299 Coroutine *co; 3300 RwCo rwco = { 3301 .bs = bs, 3302 .sector_num = sector_num, 3303 .nb_sectors = nb_sectors, 3304 .ret = NOT_DONE, 3305 }; 3306 3307 if (qemu_in_coroutine()) { 3308 /* Fast-path if already in coroutine context */ 3309 bdrv_discard_co_entry(&rwco); 3310 } else { 3311 co = qemu_coroutine_create(bdrv_discard_co_entry); 3312 qemu_coroutine_enter(co, &rwco); 3313 while (rwco.ret == NOT_DONE) { 3314 qemu_aio_wait(); 3315 } 3316 } 3317 3318 return rwco.ret; 3319 } 3320 3321 /**************************************************************/ 3322 /* removable device support */ 3323 3324 /** 3325 * Return TRUE if the media is present 3326 */ 3327 int bdrv_is_inserted(BlockDriverState *bs) 3328 { 3329 BlockDriver *drv = bs->drv; 3330 3331 if (!drv) 3332 return 0; 3333 if (!drv->bdrv_is_inserted) 3334 return 1; 3335 return drv->bdrv_is_inserted(bs); 3336 } 3337 3338 /** 3339 * Return whether the media changed since the last call to this 3340 * function, or -ENOTSUP if we don't know. Most drivers don't know. 3341 */ 3342 int bdrv_media_changed(BlockDriverState *bs) 3343 { 3344 BlockDriver *drv = bs->drv; 3345 3346 if (drv && drv->bdrv_media_changed) { 3347 return drv->bdrv_media_changed(bs); 3348 } 3349 return -ENOTSUP; 3350 } 3351 3352 /** 3353 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 3354 */ 3355 void bdrv_eject(BlockDriverState *bs, int eject_flag) 3356 { 3357 BlockDriver *drv = bs->drv; 3358 3359 if (drv && drv->bdrv_eject) { 3360 drv->bdrv_eject(bs, eject_flag); 3361 } 3362 } 3363 3364 /** 3365 * Lock or unlock the media (if it is locked, the user won't be able 3366 * to eject it manually). 3367 */ 3368 void bdrv_lock_medium(BlockDriverState *bs, bool locked) 3369 { 3370 BlockDriver *drv = bs->drv; 3371 3372 trace_bdrv_lock_medium(bs, locked); 3373 3374 if (drv && drv->bdrv_lock_medium) { 3375 drv->bdrv_lock_medium(bs, locked); 3376 } 3377 } 3378 3379 /* needed for generic scsi interface */ 3380 3381 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) 3382 { 3383 BlockDriver *drv = bs->drv; 3384 3385 if (drv && drv->bdrv_ioctl) 3386 return drv->bdrv_ioctl(bs, req, buf); 3387 return -ENOTSUP; 3388 } 3389 3390 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, 3391 unsigned long int req, void *buf, 3392 BlockDriverCompletionFunc *cb, void *opaque) 3393 { 3394 BlockDriver *drv = bs->drv; 3395 3396 if (drv && drv->bdrv_aio_ioctl) 3397 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque); 3398 return NULL; 3399 } 3400 3401 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align) 3402 { 3403 bs->buffer_alignment = align; 3404 } 3405 3406 void *qemu_blockalign(BlockDriverState *bs, size_t size) 3407 { 3408 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size); 3409 } 3410 3411 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable) 3412 { 3413 int64_t bitmap_size; 3414 3415 bs->dirty_count = 0; 3416 if (enable) { 3417 if (!bs->dirty_bitmap) { 3418 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) + 3419 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1; 3420 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8; 3421 3422 bs->dirty_bitmap = g_malloc0(bitmap_size); 3423 } 3424 } else { 3425 if (bs->dirty_bitmap) { 3426 g_free(bs->dirty_bitmap); 3427 bs->dirty_bitmap = NULL; 3428 } 3429 } 3430 } 3431 3432 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector) 3433 { 3434 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK; 3435 3436 if (bs->dirty_bitmap && 3437 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) { 3438 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] & 3439 (1UL << (chunk % (sizeof(unsigned long) * 8)))); 3440 } else { 3441 return 0; 3442 } 3443 } 3444 3445 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 3446 int nr_sectors) 3447 { 3448 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0); 3449 } 3450 3451 int64_t bdrv_get_dirty_count(BlockDriverState *bs) 3452 { 3453 return bs->dirty_count; 3454 } 3455 3456 void bdrv_set_in_use(BlockDriverState *bs, int in_use) 3457 { 3458 assert(bs->in_use != in_use); 3459 bs->in_use = in_use; 3460 } 3461 3462 int bdrv_in_use(BlockDriverState *bs) 3463 { 3464 return bs->in_use; 3465 } 3466 3467 void bdrv_iostatus_enable(BlockDriverState *bs) 3468 { 3469 bs->iostatus_enabled = true; 3470 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 3471 } 3472 3473 /* The I/O status is only enabled if the drive explicitly 3474 * enables it _and_ the VM is configured to stop on errors */ 3475 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs) 3476 { 3477 return (bs->iostatus_enabled && 3478 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC || 3479 bs->on_write_error == BLOCK_ERR_STOP_ANY || 3480 bs->on_read_error == BLOCK_ERR_STOP_ANY)); 3481 } 3482 3483 void bdrv_iostatus_disable(BlockDriverState *bs) 3484 { 3485 bs->iostatus_enabled = false; 3486 } 3487 3488 void bdrv_iostatus_reset(BlockDriverState *bs) 3489 { 3490 if (bdrv_iostatus_is_enabled(bs)) { 3491 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 3492 } 3493 } 3494 3495 /* XXX: Today this is set by device models because it makes the implementation 3496 quite simple. However, the block layer knows about the error, so it's 3497 possible to implement this without device models being involved */ 3498 void bdrv_iostatus_set_err(BlockDriverState *bs, int error) 3499 { 3500 if (bdrv_iostatus_is_enabled(bs) && 3501 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { 3502 assert(error >= 0); 3503 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : 3504 BLOCK_DEVICE_IO_STATUS_FAILED; 3505 } 3506 } 3507 3508 void 3509 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes, 3510 enum BlockAcctType type) 3511 { 3512 assert(type < BDRV_MAX_IOTYPE); 3513 3514 cookie->bytes = bytes; 3515 cookie->start_time_ns = get_clock(); 3516 cookie->type = type; 3517 } 3518 3519 void 3520 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie) 3521 { 3522 assert(cookie->type < BDRV_MAX_IOTYPE); 3523 3524 bs->nr_bytes[cookie->type] += cookie->bytes; 3525 bs->nr_ops[cookie->type]++; 3526 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns; 3527 } 3528 3529 int bdrv_img_create(const char *filename, const char *fmt, 3530 const char *base_filename, const char *base_fmt, 3531 char *options, uint64_t img_size, int flags) 3532 { 3533 QEMUOptionParameter *param = NULL, *create_options = NULL; 3534 QEMUOptionParameter *backing_fmt, *backing_file, *size; 3535 BlockDriverState *bs = NULL; 3536 BlockDriver *drv, *proto_drv; 3537 BlockDriver *backing_drv = NULL; 3538 int ret = 0; 3539 3540 /* Find driver and parse its options */ 3541 drv = bdrv_find_format(fmt); 3542 if (!drv) { 3543 error_report("Unknown file format '%s'", fmt); 3544 ret = -EINVAL; 3545 goto out; 3546 } 3547 3548 proto_drv = bdrv_find_protocol(filename); 3549 if (!proto_drv) { 3550 error_report("Unknown protocol '%s'", filename); 3551 ret = -EINVAL; 3552 goto out; 3553 } 3554 3555 create_options = append_option_parameters(create_options, 3556 drv->create_options); 3557 create_options = append_option_parameters(create_options, 3558 proto_drv->create_options); 3559 3560 /* Create parameter list with default values */ 3561 param = parse_option_parameters("", create_options, param); 3562 3563 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size); 3564 3565 /* Parse -o options */ 3566 if (options) { 3567 param = parse_option_parameters(options, create_options, param); 3568 if (param == NULL) { 3569 error_report("Invalid options for file format '%s'.", fmt); 3570 ret = -EINVAL; 3571 goto out; 3572 } 3573 } 3574 3575 if (base_filename) { 3576 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE, 3577 base_filename)) { 3578 error_report("Backing file not supported for file format '%s'", 3579 fmt); 3580 ret = -EINVAL; 3581 goto out; 3582 } 3583 } 3584 3585 if (base_fmt) { 3586 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) { 3587 error_report("Backing file format not supported for file " 3588 "format '%s'", fmt); 3589 ret = -EINVAL; 3590 goto out; 3591 } 3592 } 3593 3594 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE); 3595 if (backing_file && backing_file->value.s) { 3596 if (!strcmp(filename, backing_file->value.s)) { 3597 error_report("Error: Trying to create an image with the " 3598 "same filename as the backing file"); 3599 ret = -EINVAL; 3600 goto out; 3601 } 3602 } 3603 3604 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT); 3605 if (backing_fmt && backing_fmt->value.s) { 3606 backing_drv = bdrv_find_format(backing_fmt->value.s); 3607 if (!backing_drv) { 3608 error_report("Unknown backing file format '%s'", 3609 backing_fmt->value.s); 3610 ret = -EINVAL; 3611 goto out; 3612 } 3613 } 3614 3615 // The size for the image must always be specified, with one exception: 3616 // If we are using a backing file, we can obtain the size from there 3617 size = get_option_parameter(param, BLOCK_OPT_SIZE); 3618 if (size && size->value.n == -1) { 3619 if (backing_file && backing_file->value.s) { 3620 uint64_t size; 3621 char buf[32]; 3622 3623 bs = bdrv_new(""); 3624 3625 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv); 3626 if (ret < 0) { 3627 error_report("Could not open '%s'", backing_file->value.s); 3628 goto out; 3629 } 3630 bdrv_get_geometry(bs, &size); 3631 size *= 512; 3632 3633 snprintf(buf, sizeof(buf), "%" PRId64, size); 3634 set_option_parameter(param, BLOCK_OPT_SIZE, buf); 3635 } else { 3636 error_report("Image creation needs a size parameter"); 3637 ret = -EINVAL; 3638 goto out; 3639 } 3640 } 3641 3642 printf("Formatting '%s', fmt=%s ", filename, fmt); 3643 print_option_parameters(param); 3644 puts(""); 3645 3646 ret = bdrv_create(drv, filename, param); 3647 3648 if (ret < 0) { 3649 if (ret == -ENOTSUP) { 3650 error_report("Formatting or formatting option not supported for " 3651 "file format '%s'", fmt); 3652 } else if (ret == -EFBIG) { 3653 error_report("The image size is too large for file format '%s'", 3654 fmt); 3655 } else { 3656 error_report("%s: error while creating %s: %s", filename, fmt, 3657 strerror(-ret)); 3658 } 3659 } 3660 3661 out: 3662 free_option_parameters(create_options); 3663 free_option_parameters(param); 3664 3665 if (bs) { 3666 bdrv_delete(bs); 3667 } 3668 3669 return ret; 3670 } 3671