1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "config-host.h" 25 #include "qemu-common.h" 26 #include "trace.h" 27 #include "monitor.h" 28 #include "block_int.h" 29 #include "module.h" 30 #include "qemu-objects.h" 31 #include "qemu-coroutine.h" 32 33 #ifdef CONFIG_BSD 34 #include <sys/types.h> 35 #include <sys/stat.h> 36 #include <sys/ioctl.h> 37 #include <sys/queue.h> 38 #ifndef __DragonFly__ 39 #include <sys/disk.h> 40 #endif 41 #endif 42 43 #ifdef _WIN32 44 #include <windows.h> 45 #endif 46 47 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load); 48 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 49 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 50 BlockDriverCompletionFunc *cb, void *opaque); 51 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 52 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 53 BlockDriverCompletionFunc *cb, void *opaque); 54 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, 55 BlockDriverCompletionFunc *cb, void *opaque); 56 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs, 57 BlockDriverCompletionFunc *cb, void *opaque); 58 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, 59 uint8_t *buf, int nb_sectors); 60 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, 61 const uint8_t *buf, int nb_sectors); 62 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs, 63 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 64 BlockDriverCompletionFunc *cb, void *opaque); 65 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs, 66 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 67 BlockDriverCompletionFunc *cb, void *opaque); 68 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 69 int64_t sector_num, int nb_sectors, 70 QEMUIOVector *iov); 71 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 72 int64_t sector_num, int nb_sectors, 73 QEMUIOVector *iov); 74 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs); 75 76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states = 77 QTAILQ_HEAD_INITIALIZER(bdrv_states); 78 79 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 80 QLIST_HEAD_INITIALIZER(bdrv_drivers); 81 82 /* The device to use for VM snapshots */ 83 static BlockDriverState *bs_snapshots; 84 85 /* If non-zero, use only whitelisted block drivers */ 86 static int use_bdrv_whitelist; 87 88 #ifdef _WIN32 89 static int is_windows_drive_prefix(const char *filename) 90 { 91 return (((filename[0] >= 'a' && filename[0] <= 'z') || 92 (filename[0] >= 'A' && filename[0] <= 'Z')) && 93 filename[1] == ':'); 94 } 95 96 int is_windows_drive(const char *filename) 97 { 98 if (is_windows_drive_prefix(filename) && 99 filename[2] == '\0') 100 return 1; 101 if (strstart(filename, "\\\\.\\", NULL) || 102 strstart(filename, "//./", NULL)) 103 return 1; 104 return 0; 105 } 106 #endif 107 108 /* check if the path starts with "<protocol>:" */ 109 static int path_has_protocol(const char *path) 110 { 111 #ifdef _WIN32 112 if (is_windows_drive(path) || 113 is_windows_drive_prefix(path)) { 114 return 0; 115 } 116 #endif 117 118 return strchr(path, ':') != NULL; 119 } 120 121 int path_is_absolute(const char *path) 122 { 123 const char *p; 124 #ifdef _WIN32 125 /* specific case for names like: "\\.\d:" */ 126 if (*path == '/' || *path == '\\') 127 return 1; 128 #endif 129 p = strchr(path, ':'); 130 if (p) 131 p++; 132 else 133 p = path; 134 #ifdef _WIN32 135 return (*p == '/' || *p == '\\'); 136 #else 137 return (*p == '/'); 138 #endif 139 } 140 141 /* if filename is absolute, just copy it to dest. Otherwise, build a 142 path to it by considering it is relative to base_path. URL are 143 supported. */ 144 void path_combine(char *dest, int dest_size, 145 const char *base_path, 146 const char *filename) 147 { 148 const char *p, *p1; 149 int len; 150 151 if (dest_size <= 0) 152 return; 153 if (path_is_absolute(filename)) { 154 pstrcpy(dest, dest_size, filename); 155 } else { 156 p = strchr(base_path, ':'); 157 if (p) 158 p++; 159 else 160 p = base_path; 161 p1 = strrchr(base_path, '/'); 162 #ifdef _WIN32 163 { 164 const char *p2; 165 p2 = strrchr(base_path, '\\'); 166 if (!p1 || p2 > p1) 167 p1 = p2; 168 } 169 #endif 170 if (p1) 171 p1++; 172 else 173 p1 = base_path; 174 if (p1 > p) 175 p = p1; 176 len = p - base_path; 177 if (len > dest_size - 1) 178 len = dest_size - 1; 179 memcpy(dest, base_path, len); 180 dest[len] = '\0'; 181 pstrcat(dest, dest_size, filename); 182 } 183 } 184 185 void bdrv_register(BlockDriver *bdrv) 186 { 187 if (bdrv->bdrv_co_readv) { 188 /* Emulate AIO by coroutines, and sync by AIO */ 189 bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em; 190 bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em; 191 bdrv->bdrv_read = bdrv_read_em; 192 bdrv->bdrv_write = bdrv_write_em; 193 } else { 194 bdrv->bdrv_co_readv = bdrv_co_readv_em; 195 bdrv->bdrv_co_writev = bdrv_co_writev_em; 196 197 if (!bdrv->bdrv_aio_readv) { 198 /* add AIO emulation layer */ 199 bdrv->bdrv_aio_readv = bdrv_aio_readv_em; 200 bdrv->bdrv_aio_writev = bdrv_aio_writev_em; 201 } else if (!bdrv->bdrv_read) { 202 /* add synchronous IO emulation layer */ 203 bdrv->bdrv_read = bdrv_read_em; 204 bdrv->bdrv_write = bdrv_write_em; 205 } 206 } 207 208 if (!bdrv->bdrv_aio_flush) 209 bdrv->bdrv_aio_flush = bdrv_aio_flush_em; 210 211 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 212 } 213 214 /* create a new block device (by default it is empty) */ 215 BlockDriverState *bdrv_new(const char *device_name) 216 { 217 BlockDriverState *bs; 218 219 bs = g_malloc0(sizeof(BlockDriverState)); 220 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name); 221 if (device_name[0] != '\0') { 222 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list); 223 } 224 return bs; 225 } 226 227 BlockDriver *bdrv_find_format(const char *format_name) 228 { 229 BlockDriver *drv1; 230 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 231 if (!strcmp(drv1->format_name, format_name)) { 232 return drv1; 233 } 234 } 235 return NULL; 236 } 237 238 static int bdrv_is_whitelisted(BlockDriver *drv) 239 { 240 static const char *whitelist[] = { 241 CONFIG_BDRV_WHITELIST 242 }; 243 const char **p; 244 245 if (!whitelist[0]) 246 return 1; /* no whitelist, anything goes */ 247 248 for (p = whitelist; *p; p++) { 249 if (!strcmp(drv->format_name, *p)) { 250 return 1; 251 } 252 } 253 return 0; 254 } 255 256 BlockDriver *bdrv_find_whitelisted_format(const char *format_name) 257 { 258 BlockDriver *drv = bdrv_find_format(format_name); 259 return drv && bdrv_is_whitelisted(drv) ? drv : NULL; 260 } 261 262 int bdrv_create(BlockDriver *drv, const char* filename, 263 QEMUOptionParameter *options) 264 { 265 if (!drv->bdrv_create) 266 return -ENOTSUP; 267 268 return drv->bdrv_create(filename, options); 269 } 270 271 int bdrv_create_file(const char* filename, QEMUOptionParameter *options) 272 { 273 BlockDriver *drv; 274 275 drv = bdrv_find_protocol(filename); 276 if (drv == NULL) { 277 return -ENOENT; 278 } 279 280 return bdrv_create(drv, filename, options); 281 } 282 283 #ifdef _WIN32 284 void get_tmp_filename(char *filename, int size) 285 { 286 char temp_dir[MAX_PATH]; 287 288 GetTempPath(MAX_PATH, temp_dir); 289 GetTempFileName(temp_dir, "qem", 0, filename); 290 } 291 #else 292 void get_tmp_filename(char *filename, int size) 293 { 294 int fd; 295 const char *tmpdir; 296 /* XXX: race condition possible */ 297 tmpdir = getenv("TMPDIR"); 298 if (!tmpdir) 299 tmpdir = "/tmp"; 300 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir); 301 fd = mkstemp(filename); 302 close(fd); 303 } 304 #endif 305 306 /* 307 * Detect host devices. By convention, /dev/cdrom[N] is always 308 * recognized as a host CDROM. 309 */ 310 static BlockDriver *find_hdev_driver(const char *filename) 311 { 312 int score_max = 0, score; 313 BlockDriver *drv = NULL, *d; 314 315 QLIST_FOREACH(d, &bdrv_drivers, list) { 316 if (d->bdrv_probe_device) { 317 score = d->bdrv_probe_device(filename); 318 if (score > score_max) { 319 score_max = score; 320 drv = d; 321 } 322 } 323 } 324 325 return drv; 326 } 327 328 BlockDriver *bdrv_find_protocol(const char *filename) 329 { 330 BlockDriver *drv1; 331 char protocol[128]; 332 int len; 333 const char *p; 334 335 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 336 337 /* 338 * XXX(hch): we really should not let host device detection 339 * override an explicit protocol specification, but moving this 340 * later breaks access to device names with colons in them. 341 * Thanks to the brain-dead persistent naming schemes on udev- 342 * based Linux systems those actually are quite common. 343 */ 344 drv1 = find_hdev_driver(filename); 345 if (drv1) { 346 return drv1; 347 } 348 349 if (!path_has_protocol(filename)) { 350 return bdrv_find_format("file"); 351 } 352 p = strchr(filename, ':'); 353 assert(p != NULL); 354 len = p - filename; 355 if (len > sizeof(protocol) - 1) 356 len = sizeof(protocol) - 1; 357 memcpy(protocol, filename, len); 358 protocol[len] = '\0'; 359 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 360 if (drv1->protocol_name && 361 !strcmp(drv1->protocol_name, protocol)) { 362 return drv1; 363 } 364 } 365 return NULL; 366 } 367 368 static int find_image_format(const char *filename, BlockDriver **pdrv) 369 { 370 int ret, score, score_max; 371 BlockDriver *drv1, *drv; 372 uint8_t buf[2048]; 373 BlockDriverState *bs; 374 375 ret = bdrv_file_open(&bs, filename, 0); 376 if (ret < 0) { 377 *pdrv = NULL; 378 return ret; 379 } 380 381 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 382 if (bs->sg || !bdrv_is_inserted(bs)) { 383 bdrv_delete(bs); 384 drv = bdrv_find_format("raw"); 385 if (!drv) { 386 ret = -ENOENT; 387 } 388 *pdrv = drv; 389 return ret; 390 } 391 392 ret = bdrv_pread(bs, 0, buf, sizeof(buf)); 393 bdrv_delete(bs); 394 if (ret < 0) { 395 *pdrv = NULL; 396 return ret; 397 } 398 399 score_max = 0; 400 drv = NULL; 401 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 402 if (drv1->bdrv_probe) { 403 score = drv1->bdrv_probe(buf, ret, filename); 404 if (score > score_max) { 405 score_max = score; 406 drv = drv1; 407 } 408 } 409 } 410 if (!drv) { 411 ret = -ENOENT; 412 } 413 *pdrv = drv; 414 return ret; 415 } 416 417 /** 418 * Set the current 'total_sectors' value 419 */ 420 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 421 { 422 BlockDriver *drv = bs->drv; 423 424 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 425 if (bs->sg) 426 return 0; 427 428 /* query actual device if possible, otherwise just trust the hint */ 429 if (drv->bdrv_getlength) { 430 int64_t length = drv->bdrv_getlength(bs); 431 if (length < 0) { 432 return length; 433 } 434 hint = length >> BDRV_SECTOR_BITS; 435 } 436 437 bs->total_sectors = hint; 438 return 0; 439 } 440 441 /** 442 * Set open flags for a given cache mode 443 * 444 * Return 0 on success, -1 if the cache mode was invalid. 445 */ 446 int bdrv_parse_cache_flags(const char *mode, int *flags) 447 { 448 *flags &= ~BDRV_O_CACHE_MASK; 449 450 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 451 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; 452 } else if (!strcmp(mode, "directsync")) { 453 *flags |= BDRV_O_NOCACHE; 454 } else if (!strcmp(mode, "writeback")) { 455 *flags |= BDRV_O_CACHE_WB; 456 } else if (!strcmp(mode, "unsafe")) { 457 *flags |= BDRV_O_CACHE_WB; 458 *flags |= BDRV_O_NO_FLUSH; 459 } else if (!strcmp(mode, "writethrough")) { 460 /* this is the default */ 461 } else { 462 return -1; 463 } 464 465 return 0; 466 } 467 468 /* 469 * Common part for opening disk images and files 470 */ 471 static int bdrv_open_common(BlockDriverState *bs, const char *filename, 472 int flags, BlockDriver *drv) 473 { 474 int ret, open_flags; 475 476 assert(drv != NULL); 477 478 bs->file = NULL; 479 bs->total_sectors = 0; 480 bs->encrypted = 0; 481 bs->valid_key = 0; 482 bs->open_flags = flags; 483 bs->buffer_alignment = 512; 484 485 pstrcpy(bs->filename, sizeof(bs->filename), filename); 486 487 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) { 488 return -ENOTSUP; 489 } 490 491 bs->drv = drv; 492 bs->opaque = g_malloc0(drv->instance_size); 493 494 if (flags & BDRV_O_CACHE_WB) 495 bs->enable_write_cache = 1; 496 497 /* 498 * Clear flags that are internal to the block layer before opening the 499 * image. 500 */ 501 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 502 503 /* 504 * Snapshots should be writable. 505 */ 506 if (bs->is_temporary) { 507 open_flags |= BDRV_O_RDWR; 508 } 509 510 /* Open the image, either directly or using a protocol */ 511 if (drv->bdrv_file_open) { 512 ret = drv->bdrv_file_open(bs, filename, open_flags); 513 } else { 514 ret = bdrv_file_open(&bs->file, filename, open_flags); 515 if (ret >= 0) { 516 ret = drv->bdrv_open(bs, open_flags); 517 } 518 } 519 520 if (ret < 0) { 521 goto free_and_fail; 522 } 523 524 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR); 525 526 ret = refresh_total_sectors(bs, bs->total_sectors); 527 if (ret < 0) { 528 goto free_and_fail; 529 } 530 531 #ifndef _WIN32 532 if (bs->is_temporary) { 533 unlink(filename); 534 } 535 #endif 536 return 0; 537 538 free_and_fail: 539 if (bs->file) { 540 bdrv_delete(bs->file); 541 bs->file = NULL; 542 } 543 g_free(bs->opaque); 544 bs->opaque = NULL; 545 bs->drv = NULL; 546 return ret; 547 } 548 549 /* 550 * Opens a file using a protocol (file, host_device, nbd, ...) 551 */ 552 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags) 553 { 554 BlockDriverState *bs; 555 BlockDriver *drv; 556 int ret; 557 558 drv = bdrv_find_protocol(filename); 559 if (!drv) { 560 return -ENOENT; 561 } 562 563 bs = bdrv_new(""); 564 ret = bdrv_open_common(bs, filename, flags, drv); 565 if (ret < 0) { 566 bdrv_delete(bs); 567 return ret; 568 } 569 bs->growable = 1; 570 *pbs = bs; 571 return 0; 572 } 573 574 /* 575 * Opens a disk image (raw, qcow2, vmdk, ...) 576 */ 577 int bdrv_open(BlockDriverState *bs, const char *filename, int flags, 578 BlockDriver *drv) 579 { 580 int ret; 581 582 if (flags & BDRV_O_SNAPSHOT) { 583 BlockDriverState *bs1; 584 int64_t total_size; 585 int is_protocol = 0; 586 BlockDriver *bdrv_qcow2; 587 QEMUOptionParameter *options; 588 char tmp_filename[PATH_MAX]; 589 char backing_filename[PATH_MAX]; 590 591 /* if snapshot, we create a temporary backing file and open it 592 instead of opening 'filename' directly */ 593 594 /* if there is a backing file, use it */ 595 bs1 = bdrv_new(""); 596 ret = bdrv_open(bs1, filename, 0, drv); 597 if (ret < 0) { 598 bdrv_delete(bs1); 599 return ret; 600 } 601 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK; 602 603 if (bs1->drv && bs1->drv->protocol_name) 604 is_protocol = 1; 605 606 bdrv_delete(bs1); 607 608 get_tmp_filename(tmp_filename, sizeof(tmp_filename)); 609 610 /* Real path is meaningless for protocols */ 611 if (is_protocol) 612 snprintf(backing_filename, sizeof(backing_filename), 613 "%s", filename); 614 else if (!realpath(filename, backing_filename)) 615 return -errno; 616 617 bdrv_qcow2 = bdrv_find_format("qcow2"); 618 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL); 619 620 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size); 621 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename); 622 if (drv) { 623 set_option_parameter(options, BLOCK_OPT_BACKING_FMT, 624 drv->format_name); 625 } 626 627 ret = bdrv_create(bdrv_qcow2, tmp_filename, options); 628 free_option_parameters(options); 629 if (ret < 0) { 630 return ret; 631 } 632 633 filename = tmp_filename; 634 drv = bdrv_qcow2; 635 bs->is_temporary = 1; 636 } 637 638 /* Find the right image format driver */ 639 if (!drv) { 640 ret = find_image_format(filename, &drv); 641 } 642 643 if (!drv) { 644 goto unlink_and_fail; 645 } 646 647 /* Open the image */ 648 ret = bdrv_open_common(bs, filename, flags, drv); 649 if (ret < 0) { 650 goto unlink_and_fail; 651 } 652 653 /* If there is a backing file, use it */ 654 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') { 655 char backing_filename[PATH_MAX]; 656 int back_flags; 657 BlockDriver *back_drv = NULL; 658 659 bs->backing_hd = bdrv_new(""); 660 661 if (path_has_protocol(bs->backing_file)) { 662 pstrcpy(backing_filename, sizeof(backing_filename), 663 bs->backing_file); 664 } else { 665 path_combine(backing_filename, sizeof(backing_filename), 666 filename, bs->backing_file); 667 } 668 669 if (bs->backing_format[0] != '\0') { 670 back_drv = bdrv_find_format(bs->backing_format); 671 } 672 673 /* backing files always opened read-only */ 674 back_flags = 675 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 676 677 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv); 678 if (ret < 0) { 679 bdrv_close(bs); 680 return ret; 681 } 682 if (bs->is_temporary) { 683 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR); 684 } else { 685 /* base image inherits from "parent" */ 686 bs->backing_hd->keep_read_only = bs->keep_read_only; 687 } 688 } 689 690 if (!bdrv_key_required(bs)) { 691 bdrv_dev_change_media_cb(bs, true); 692 } 693 694 return 0; 695 696 unlink_and_fail: 697 if (bs->is_temporary) { 698 unlink(filename); 699 } 700 return ret; 701 } 702 703 void bdrv_close(BlockDriverState *bs) 704 { 705 if (bs->drv) { 706 if (bs == bs_snapshots) { 707 bs_snapshots = NULL; 708 } 709 if (bs->backing_hd) { 710 bdrv_delete(bs->backing_hd); 711 bs->backing_hd = NULL; 712 } 713 bs->drv->bdrv_close(bs); 714 g_free(bs->opaque); 715 #ifdef _WIN32 716 if (bs->is_temporary) { 717 unlink(bs->filename); 718 } 719 #endif 720 bs->opaque = NULL; 721 bs->drv = NULL; 722 723 if (bs->file != NULL) { 724 bdrv_close(bs->file); 725 } 726 727 bdrv_dev_change_media_cb(bs, false); 728 } 729 } 730 731 void bdrv_close_all(void) 732 { 733 BlockDriverState *bs; 734 735 QTAILQ_FOREACH(bs, &bdrv_states, list) { 736 bdrv_close(bs); 737 } 738 } 739 740 /* make a BlockDriverState anonymous by removing from bdrv_state list. 741 Also, NULL terminate the device_name to prevent double remove */ 742 void bdrv_make_anon(BlockDriverState *bs) 743 { 744 if (bs->device_name[0] != '\0') { 745 QTAILQ_REMOVE(&bdrv_states, bs, list); 746 } 747 bs->device_name[0] = '\0'; 748 } 749 750 void bdrv_delete(BlockDriverState *bs) 751 { 752 assert(!bs->dev); 753 754 /* remove from list, if necessary */ 755 bdrv_make_anon(bs); 756 757 bdrv_close(bs); 758 if (bs->file != NULL) { 759 bdrv_delete(bs->file); 760 } 761 762 assert(bs != bs_snapshots); 763 g_free(bs); 764 } 765 766 int bdrv_attach_dev(BlockDriverState *bs, void *dev) 767 /* TODO change to DeviceState *dev when all users are qdevified */ 768 { 769 if (bs->dev) { 770 return -EBUSY; 771 } 772 bs->dev = dev; 773 return 0; 774 } 775 776 /* TODO qdevified devices don't use this, remove when devices are qdevified */ 777 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev) 778 { 779 if (bdrv_attach_dev(bs, dev) < 0) { 780 abort(); 781 } 782 } 783 784 void bdrv_detach_dev(BlockDriverState *bs, void *dev) 785 /* TODO change to DeviceState *dev when all users are qdevified */ 786 { 787 assert(bs->dev == dev); 788 bs->dev = NULL; 789 bs->dev_ops = NULL; 790 bs->dev_opaque = NULL; 791 bs->buffer_alignment = 512; 792 } 793 794 /* TODO change to return DeviceState * when all users are qdevified */ 795 void *bdrv_get_attached_dev(BlockDriverState *bs) 796 { 797 return bs->dev; 798 } 799 800 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops, 801 void *opaque) 802 { 803 bs->dev_ops = ops; 804 bs->dev_opaque = opaque; 805 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) { 806 bs_snapshots = NULL; 807 } 808 } 809 810 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load) 811 { 812 if (bs->dev_ops && bs->dev_ops->change_media_cb) { 813 bs->dev_ops->change_media_cb(bs->dev_opaque, load); 814 } 815 } 816 817 bool bdrv_dev_has_removable_media(BlockDriverState *bs) 818 { 819 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb); 820 } 821 822 bool bdrv_dev_is_tray_open(BlockDriverState *bs) 823 { 824 if (bs->dev_ops && bs->dev_ops->is_tray_open) { 825 return bs->dev_ops->is_tray_open(bs->dev_opaque); 826 } 827 return false; 828 } 829 830 static void bdrv_dev_resize_cb(BlockDriverState *bs) 831 { 832 if (bs->dev_ops && bs->dev_ops->resize_cb) { 833 bs->dev_ops->resize_cb(bs->dev_opaque); 834 } 835 } 836 837 bool bdrv_dev_is_medium_locked(BlockDriverState *bs) 838 { 839 if (bs->dev_ops && bs->dev_ops->is_medium_locked) { 840 return bs->dev_ops->is_medium_locked(bs->dev_opaque); 841 } 842 return false; 843 } 844 845 /* 846 * Run consistency checks on an image 847 * 848 * Returns 0 if the check could be completed (it doesn't mean that the image is 849 * free of errors) or -errno when an internal error occurred. The results of the 850 * check are stored in res. 851 */ 852 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res) 853 { 854 if (bs->drv->bdrv_check == NULL) { 855 return -ENOTSUP; 856 } 857 858 memset(res, 0, sizeof(*res)); 859 return bs->drv->bdrv_check(bs, res); 860 } 861 862 #define COMMIT_BUF_SECTORS 2048 863 864 /* commit COW file into the raw image */ 865 int bdrv_commit(BlockDriverState *bs) 866 { 867 BlockDriver *drv = bs->drv; 868 BlockDriver *backing_drv; 869 int64_t sector, total_sectors; 870 int n, ro, open_flags; 871 int ret = 0, rw_ret = 0; 872 uint8_t *buf; 873 char filename[1024]; 874 BlockDriverState *bs_rw, *bs_ro; 875 876 if (!drv) 877 return -ENOMEDIUM; 878 879 if (!bs->backing_hd) { 880 return -ENOTSUP; 881 } 882 883 if (bs->backing_hd->keep_read_only) { 884 return -EACCES; 885 } 886 887 backing_drv = bs->backing_hd->drv; 888 ro = bs->backing_hd->read_only; 889 strncpy(filename, bs->backing_hd->filename, sizeof(filename)); 890 open_flags = bs->backing_hd->open_flags; 891 892 if (ro) { 893 /* re-open as RW */ 894 bdrv_delete(bs->backing_hd); 895 bs->backing_hd = NULL; 896 bs_rw = bdrv_new(""); 897 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR, 898 backing_drv); 899 if (rw_ret < 0) { 900 bdrv_delete(bs_rw); 901 /* try to re-open read-only */ 902 bs_ro = bdrv_new(""); 903 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, 904 backing_drv); 905 if (ret < 0) { 906 bdrv_delete(bs_ro); 907 /* drive not functional anymore */ 908 bs->drv = NULL; 909 return ret; 910 } 911 bs->backing_hd = bs_ro; 912 return rw_ret; 913 } 914 bs->backing_hd = bs_rw; 915 } 916 917 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; 918 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 919 920 for (sector = 0; sector < total_sectors; sector += n) { 921 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) { 922 923 if (bdrv_read(bs, sector, buf, n) != 0) { 924 ret = -EIO; 925 goto ro_cleanup; 926 } 927 928 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) { 929 ret = -EIO; 930 goto ro_cleanup; 931 } 932 } 933 } 934 935 if (drv->bdrv_make_empty) { 936 ret = drv->bdrv_make_empty(bs); 937 bdrv_flush(bs); 938 } 939 940 /* 941 * Make sure all data we wrote to the backing device is actually 942 * stable on disk. 943 */ 944 if (bs->backing_hd) 945 bdrv_flush(bs->backing_hd); 946 947 ro_cleanup: 948 g_free(buf); 949 950 if (ro) { 951 /* re-open as RO */ 952 bdrv_delete(bs->backing_hd); 953 bs->backing_hd = NULL; 954 bs_ro = bdrv_new(""); 955 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, 956 backing_drv); 957 if (ret < 0) { 958 bdrv_delete(bs_ro); 959 /* drive not functional anymore */ 960 bs->drv = NULL; 961 return ret; 962 } 963 bs->backing_hd = bs_ro; 964 bs->backing_hd->keep_read_only = 0; 965 } 966 967 return ret; 968 } 969 970 void bdrv_commit_all(void) 971 { 972 BlockDriverState *bs; 973 974 QTAILQ_FOREACH(bs, &bdrv_states, list) { 975 bdrv_commit(bs); 976 } 977 } 978 979 /* 980 * Return values: 981 * 0 - success 982 * -EINVAL - backing format specified, but no file 983 * -ENOSPC - can't update the backing file because no space is left in the 984 * image file header 985 * -ENOTSUP - format driver doesn't support changing the backing file 986 */ 987 int bdrv_change_backing_file(BlockDriverState *bs, 988 const char *backing_file, const char *backing_fmt) 989 { 990 BlockDriver *drv = bs->drv; 991 992 if (drv->bdrv_change_backing_file != NULL) { 993 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 994 } else { 995 return -ENOTSUP; 996 } 997 } 998 999 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, 1000 size_t size) 1001 { 1002 int64_t len; 1003 1004 if (!bdrv_is_inserted(bs)) 1005 return -ENOMEDIUM; 1006 1007 if (bs->growable) 1008 return 0; 1009 1010 len = bdrv_getlength(bs); 1011 1012 if (offset < 0) 1013 return -EIO; 1014 1015 if ((offset > len) || (len - offset < size)) 1016 return -EIO; 1017 1018 return 0; 1019 } 1020 1021 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, 1022 int nb_sectors) 1023 { 1024 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE, 1025 nb_sectors * BDRV_SECTOR_SIZE); 1026 } 1027 1028 static inline bool bdrv_has_async_rw(BlockDriver *drv) 1029 { 1030 return drv->bdrv_co_readv != bdrv_co_readv_em 1031 || drv->bdrv_aio_readv != bdrv_aio_readv_em; 1032 } 1033 1034 static inline bool bdrv_has_async_flush(BlockDriver *drv) 1035 { 1036 return drv->bdrv_aio_flush != bdrv_aio_flush_em; 1037 } 1038 1039 /* return < 0 if error. See bdrv_write() for the return codes */ 1040 int bdrv_read(BlockDriverState *bs, int64_t sector_num, 1041 uint8_t *buf, int nb_sectors) 1042 { 1043 BlockDriver *drv = bs->drv; 1044 1045 if (!drv) 1046 return -ENOMEDIUM; 1047 1048 if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) { 1049 QEMUIOVector qiov; 1050 struct iovec iov = { 1051 .iov_base = (void *)buf, 1052 .iov_len = nb_sectors * BDRV_SECTOR_SIZE, 1053 }; 1054 1055 qemu_iovec_init_external(&qiov, &iov, 1); 1056 return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov); 1057 } 1058 1059 if (bdrv_check_request(bs, sector_num, nb_sectors)) 1060 return -EIO; 1061 1062 return drv->bdrv_read(bs, sector_num, buf, nb_sectors); 1063 } 1064 1065 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num, 1066 int nb_sectors, int dirty) 1067 { 1068 int64_t start, end; 1069 unsigned long val, idx, bit; 1070 1071 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK; 1072 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK; 1073 1074 for (; start <= end; start++) { 1075 idx = start / (sizeof(unsigned long) * 8); 1076 bit = start % (sizeof(unsigned long) * 8); 1077 val = bs->dirty_bitmap[idx]; 1078 if (dirty) { 1079 if (!(val & (1UL << bit))) { 1080 bs->dirty_count++; 1081 val |= 1UL << bit; 1082 } 1083 } else { 1084 if (val & (1UL << bit)) { 1085 bs->dirty_count--; 1086 val &= ~(1UL << bit); 1087 } 1088 } 1089 bs->dirty_bitmap[idx] = val; 1090 } 1091 } 1092 1093 /* Return < 0 if error. Important errors are: 1094 -EIO generic I/O error (may happen for all errors) 1095 -ENOMEDIUM No media inserted. 1096 -EINVAL Invalid sector number or nb_sectors 1097 -EACCES Trying to write a read-only device 1098 */ 1099 int bdrv_write(BlockDriverState *bs, int64_t sector_num, 1100 const uint8_t *buf, int nb_sectors) 1101 { 1102 BlockDriver *drv = bs->drv; 1103 1104 if (!bs->drv) 1105 return -ENOMEDIUM; 1106 1107 if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) { 1108 QEMUIOVector qiov; 1109 struct iovec iov = { 1110 .iov_base = (void *)buf, 1111 .iov_len = nb_sectors * BDRV_SECTOR_SIZE, 1112 }; 1113 1114 qemu_iovec_init_external(&qiov, &iov, 1); 1115 return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov); 1116 } 1117 1118 if (bs->read_only) 1119 return -EACCES; 1120 if (bdrv_check_request(bs, sector_num, nb_sectors)) 1121 return -EIO; 1122 1123 if (bs->dirty_bitmap) { 1124 set_dirty_bitmap(bs, sector_num, nb_sectors, 1); 1125 } 1126 1127 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { 1128 bs->wr_highest_sector = sector_num + nb_sectors - 1; 1129 } 1130 1131 return drv->bdrv_write(bs, sector_num, buf, nb_sectors); 1132 } 1133 1134 int bdrv_pread(BlockDriverState *bs, int64_t offset, 1135 void *buf, int count1) 1136 { 1137 uint8_t tmp_buf[BDRV_SECTOR_SIZE]; 1138 int len, nb_sectors, count; 1139 int64_t sector_num; 1140 int ret; 1141 1142 count = count1; 1143 /* first read to align to sector start */ 1144 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); 1145 if (len > count) 1146 len = count; 1147 sector_num = offset >> BDRV_SECTOR_BITS; 1148 if (len > 0) { 1149 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1150 return ret; 1151 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len); 1152 count -= len; 1153 if (count == 0) 1154 return count1; 1155 sector_num++; 1156 buf += len; 1157 } 1158 1159 /* read the sectors "in place" */ 1160 nb_sectors = count >> BDRV_SECTOR_BITS; 1161 if (nb_sectors > 0) { 1162 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0) 1163 return ret; 1164 sector_num += nb_sectors; 1165 len = nb_sectors << BDRV_SECTOR_BITS; 1166 buf += len; 1167 count -= len; 1168 } 1169 1170 /* add data from the last sector */ 1171 if (count > 0) { 1172 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1173 return ret; 1174 memcpy(buf, tmp_buf, count); 1175 } 1176 return count1; 1177 } 1178 1179 int bdrv_pwrite(BlockDriverState *bs, int64_t offset, 1180 const void *buf, int count1) 1181 { 1182 uint8_t tmp_buf[BDRV_SECTOR_SIZE]; 1183 int len, nb_sectors, count; 1184 int64_t sector_num; 1185 int ret; 1186 1187 count = count1; 1188 /* first write to align to sector start */ 1189 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); 1190 if (len > count) 1191 len = count; 1192 sector_num = offset >> BDRV_SECTOR_BITS; 1193 if (len > 0) { 1194 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1195 return ret; 1196 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len); 1197 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0) 1198 return ret; 1199 count -= len; 1200 if (count == 0) 1201 return count1; 1202 sector_num++; 1203 buf += len; 1204 } 1205 1206 /* write the sectors "in place" */ 1207 nb_sectors = count >> BDRV_SECTOR_BITS; 1208 if (nb_sectors > 0) { 1209 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0) 1210 return ret; 1211 sector_num += nb_sectors; 1212 len = nb_sectors << BDRV_SECTOR_BITS; 1213 buf += len; 1214 count -= len; 1215 } 1216 1217 /* add data from the last sector */ 1218 if (count > 0) { 1219 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1220 return ret; 1221 memcpy(tmp_buf, buf, count); 1222 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0) 1223 return ret; 1224 } 1225 return count1; 1226 } 1227 1228 /* 1229 * Writes to the file and ensures that no writes are reordered across this 1230 * request (acts as a barrier) 1231 * 1232 * Returns 0 on success, -errno in error cases. 1233 */ 1234 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, 1235 const void *buf, int count) 1236 { 1237 int ret; 1238 1239 ret = bdrv_pwrite(bs, offset, buf, count); 1240 if (ret < 0) { 1241 return ret; 1242 } 1243 1244 /* No flush needed for cache modes that use O_DSYNC */ 1245 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) { 1246 bdrv_flush(bs); 1247 } 1248 1249 return 0; 1250 } 1251 1252 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, 1253 int nb_sectors, QEMUIOVector *qiov) 1254 { 1255 BlockDriver *drv = bs->drv; 1256 1257 trace_bdrv_co_readv(bs, sector_num, nb_sectors); 1258 1259 if (!drv) { 1260 return -ENOMEDIUM; 1261 } 1262 if (bdrv_check_request(bs, sector_num, nb_sectors)) { 1263 return -EIO; 1264 } 1265 1266 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); 1267 } 1268 1269 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, 1270 int nb_sectors, QEMUIOVector *qiov) 1271 { 1272 BlockDriver *drv = bs->drv; 1273 1274 trace_bdrv_co_writev(bs, sector_num, nb_sectors); 1275 1276 if (!bs->drv) { 1277 return -ENOMEDIUM; 1278 } 1279 if (bs->read_only) { 1280 return -EACCES; 1281 } 1282 if (bdrv_check_request(bs, sector_num, nb_sectors)) { 1283 return -EIO; 1284 } 1285 1286 if (bs->dirty_bitmap) { 1287 set_dirty_bitmap(bs, sector_num, nb_sectors, 1); 1288 } 1289 1290 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { 1291 bs->wr_highest_sector = sector_num + nb_sectors - 1; 1292 } 1293 1294 return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); 1295 } 1296 1297 /** 1298 * Truncate file to 'offset' bytes (needed only for file protocols) 1299 */ 1300 int bdrv_truncate(BlockDriverState *bs, int64_t offset) 1301 { 1302 BlockDriver *drv = bs->drv; 1303 int ret; 1304 if (!drv) 1305 return -ENOMEDIUM; 1306 if (!drv->bdrv_truncate) 1307 return -ENOTSUP; 1308 if (bs->read_only) 1309 return -EACCES; 1310 if (bdrv_in_use(bs)) 1311 return -EBUSY; 1312 ret = drv->bdrv_truncate(bs, offset); 1313 if (ret == 0) { 1314 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 1315 bdrv_dev_resize_cb(bs); 1316 } 1317 return ret; 1318 } 1319 1320 /** 1321 * Length of a allocated file in bytes. Sparse files are counted by actual 1322 * allocated space. Return < 0 if error or unknown. 1323 */ 1324 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 1325 { 1326 BlockDriver *drv = bs->drv; 1327 if (!drv) { 1328 return -ENOMEDIUM; 1329 } 1330 if (drv->bdrv_get_allocated_file_size) { 1331 return drv->bdrv_get_allocated_file_size(bs); 1332 } 1333 if (bs->file) { 1334 return bdrv_get_allocated_file_size(bs->file); 1335 } 1336 return -ENOTSUP; 1337 } 1338 1339 /** 1340 * Length of a file in bytes. Return < 0 if error or unknown. 1341 */ 1342 int64_t bdrv_getlength(BlockDriverState *bs) 1343 { 1344 BlockDriver *drv = bs->drv; 1345 if (!drv) 1346 return -ENOMEDIUM; 1347 1348 if (bs->growable || bdrv_dev_has_removable_media(bs)) { 1349 if (drv->bdrv_getlength) { 1350 return drv->bdrv_getlength(bs); 1351 } 1352 } 1353 return bs->total_sectors * BDRV_SECTOR_SIZE; 1354 } 1355 1356 /* return 0 as number of sectors if no device present or error */ 1357 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 1358 { 1359 int64_t length; 1360 length = bdrv_getlength(bs); 1361 if (length < 0) 1362 length = 0; 1363 else 1364 length = length >> BDRV_SECTOR_BITS; 1365 *nb_sectors_ptr = length; 1366 } 1367 1368 struct partition { 1369 uint8_t boot_ind; /* 0x80 - active */ 1370 uint8_t head; /* starting head */ 1371 uint8_t sector; /* starting sector */ 1372 uint8_t cyl; /* starting cylinder */ 1373 uint8_t sys_ind; /* What partition type */ 1374 uint8_t end_head; /* end head */ 1375 uint8_t end_sector; /* end sector */ 1376 uint8_t end_cyl; /* end cylinder */ 1377 uint32_t start_sect; /* starting sector counting from 0 */ 1378 uint32_t nr_sects; /* nr of sectors in partition */ 1379 } QEMU_PACKED; 1380 1381 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */ 1382 static int guess_disk_lchs(BlockDriverState *bs, 1383 int *pcylinders, int *pheads, int *psectors) 1384 { 1385 uint8_t buf[BDRV_SECTOR_SIZE]; 1386 int ret, i, heads, sectors, cylinders; 1387 struct partition *p; 1388 uint32_t nr_sects; 1389 uint64_t nb_sectors; 1390 1391 bdrv_get_geometry(bs, &nb_sectors); 1392 1393 ret = bdrv_read(bs, 0, buf, 1); 1394 if (ret < 0) 1395 return -1; 1396 /* test msdos magic */ 1397 if (buf[510] != 0x55 || buf[511] != 0xaa) 1398 return -1; 1399 for(i = 0; i < 4; i++) { 1400 p = ((struct partition *)(buf + 0x1be)) + i; 1401 nr_sects = le32_to_cpu(p->nr_sects); 1402 if (nr_sects && p->end_head) { 1403 /* We make the assumption that the partition terminates on 1404 a cylinder boundary */ 1405 heads = p->end_head + 1; 1406 sectors = p->end_sector & 63; 1407 if (sectors == 0) 1408 continue; 1409 cylinders = nb_sectors / (heads * sectors); 1410 if (cylinders < 1 || cylinders > 16383) 1411 continue; 1412 *pheads = heads; 1413 *psectors = sectors; 1414 *pcylinders = cylinders; 1415 #if 0 1416 printf("guessed geometry: LCHS=%d %d %d\n", 1417 cylinders, heads, sectors); 1418 #endif 1419 return 0; 1420 } 1421 } 1422 return -1; 1423 } 1424 1425 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs) 1426 { 1427 int translation, lba_detected = 0; 1428 int cylinders, heads, secs; 1429 uint64_t nb_sectors; 1430 1431 /* if a geometry hint is available, use it */ 1432 bdrv_get_geometry(bs, &nb_sectors); 1433 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs); 1434 translation = bdrv_get_translation_hint(bs); 1435 if (cylinders != 0) { 1436 *pcyls = cylinders; 1437 *pheads = heads; 1438 *psecs = secs; 1439 } else { 1440 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) { 1441 if (heads > 16) { 1442 /* if heads > 16, it means that a BIOS LBA 1443 translation was active, so the default 1444 hardware geometry is OK */ 1445 lba_detected = 1; 1446 goto default_geometry; 1447 } else { 1448 *pcyls = cylinders; 1449 *pheads = heads; 1450 *psecs = secs; 1451 /* disable any translation to be in sync with 1452 the logical geometry */ 1453 if (translation == BIOS_ATA_TRANSLATION_AUTO) { 1454 bdrv_set_translation_hint(bs, 1455 BIOS_ATA_TRANSLATION_NONE); 1456 } 1457 } 1458 } else { 1459 default_geometry: 1460 /* if no geometry, use a standard physical disk geometry */ 1461 cylinders = nb_sectors / (16 * 63); 1462 1463 if (cylinders > 16383) 1464 cylinders = 16383; 1465 else if (cylinders < 2) 1466 cylinders = 2; 1467 *pcyls = cylinders; 1468 *pheads = 16; 1469 *psecs = 63; 1470 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) { 1471 if ((*pcyls * *pheads) <= 131072) { 1472 bdrv_set_translation_hint(bs, 1473 BIOS_ATA_TRANSLATION_LARGE); 1474 } else { 1475 bdrv_set_translation_hint(bs, 1476 BIOS_ATA_TRANSLATION_LBA); 1477 } 1478 } 1479 } 1480 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs); 1481 } 1482 } 1483 1484 void bdrv_set_geometry_hint(BlockDriverState *bs, 1485 int cyls, int heads, int secs) 1486 { 1487 bs->cyls = cyls; 1488 bs->heads = heads; 1489 bs->secs = secs; 1490 } 1491 1492 void bdrv_set_translation_hint(BlockDriverState *bs, int translation) 1493 { 1494 bs->translation = translation; 1495 } 1496 1497 void bdrv_get_geometry_hint(BlockDriverState *bs, 1498 int *pcyls, int *pheads, int *psecs) 1499 { 1500 *pcyls = bs->cyls; 1501 *pheads = bs->heads; 1502 *psecs = bs->secs; 1503 } 1504 1505 /* Recognize floppy formats */ 1506 typedef struct FDFormat { 1507 FDriveType drive; 1508 uint8_t last_sect; 1509 uint8_t max_track; 1510 uint8_t max_head; 1511 } FDFormat; 1512 1513 static const FDFormat fd_formats[] = { 1514 /* First entry is default format */ 1515 /* 1.44 MB 3"1/2 floppy disks */ 1516 { FDRIVE_DRV_144, 18, 80, 1, }, 1517 { FDRIVE_DRV_144, 20, 80, 1, }, 1518 { FDRIVE_DRV_144, 21, 80, 1, }, 1519 { FDRIVE_DRV_144, 21, 82, 1, }, 1520 { FDRIVE_DRV_144, 21, 83, 1, }, 1521 { FDRIVE_DRV_144, 22, 80, 1, }, 1522 { FDRIVE_DRV_144, 23, 80, 1, }, 1523 { FDRIVE_DRV_144, 24, 80, 1, }, 1524 /* 2.88 MB 3"1/2 floppy disks */ 1525 { FDRIVE_DRV_288, 36, 80, 1, }, 1526 { FDRIVE_DRV_288, 39, 80, 1, }, 1527 { FDRIVE_DRV_288, 40, 80, 1, }, 1528 { FDRIVE_DRV_288, 44, 80, 1, }, 1529 { FDRIVE_DRV_288, 48, 80, 1, }, 1530 /* 720 kB 3"1/2 floppy disks */ 1531 { FDRIVE_DRV_144, 9, 80, 1, }, 1532 { FDRIVE_DRV_144, 10, 80, 1, }, 1533 { FDRIVE_DRV_144, 10, 82, 1, }, 1534 { FDRIVE_DRV_144, 10, 83, 1, }, 1535 { FDRIVE_DRV_144, 13, 80, 1, }, 1536 { FDRIVE_DRV_144, 14, 80, 1, }, 1537 /* 1.2 MB 5"1/4 floppy disks */ 1538 { FDRIVE_DRV_120, 15, 80, 1, }, 1539 { FDRIVE_DRV_120, 18, 80, 1, }, 1540 { FDRIVE_DRV_120, 18, 82, 1, }, 1541 { FDRIVE_DRV_120, 18, 83, 1, }, 1542 { FDRIVE_DRV_120, 20, 80, 1, }, 1543 /* 720 kB 5"1/4 floppy disks */ 1544 { FDRIVE_DRV_120, 9, 80, 1, }, 1545 { FDRIVE_DRV_120, 11, 80, 1, }, 1546 /* 360 kB 5"1/4 floppy disks */ 1547 { FDRIVE_DRV_120, 9, 40, 1, }, 1548 { FDRIVE_DRV_120, 9, 40, 0, }, 1549 { FDRIVE_DRV_120, 10, 41, 1, }, 1550 { FDRIVE_DRV_120, 10, 42, 1, }, 1551 /* 320 kB 5"1/4 floppy disks */ 1552 { FDRIVE_DRV_120, 8, 40, 1, }, 1553 { FDRIVE_DRV_120, 8, 40, 0, }, 1554 /* 360 kB must match 5"1/4 better than 3"1/2... */ 1555 { FDRIVE_DRV_144, 9, 80, 0, }, 1556 /* end */ 1557 { FDRIVE_DRV_NONE, -1, -1, 0, }, 1558 }; 1559 1560 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads, 1561 int *max_track, int *last_sect, 1562 FDriveType drive_in, FDriveType *drive) 1563 { 1564 const FDFormat *parse; 1565 uint64_t nb_sectors, size; 1566 int i, first_match, match; 1567 1568 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect); 1569 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) { 1570 /* User defined disk */ 1571 } else { 1572 bdrv_get_geometry(bs, &nb_sectors); 1573 match = -1; 1574 first_match = -1; 1575 for (i = 0; ; i++) { 1576 parse = &fd_formats[i]; 1577 if (parse->drive == FDRIVE_DRV_NONE) { 1578 break; 1579 } 1580 if (drive_in == parse->drive || 1581 drive_in == FDRIVE_DRV_NONE) { 1582 size = (parse->max_head + 1) * parse->max_track * 1583 parse->last_sect; 1584 if (nb_sectors == size) { 1585 match = i; 1586 break; 1587 } 1588 if (first_match == -1) { 1589 first_match = i; 1590 } 1591 } 1592 } 1593 if (match == -1) { 1594 if (first_match == -1) { 1595 match = 1; 1596 } else { 1597 match = first_match; 1598 } 1599 parse = &fd_formats[match]; 1600 } 1601 *nb_heads = parse->max_head + 1; 1602 *max_track = parse->max_track; 1603 *last_sect = parse->last_sect; 1604 *drive = parse->drive; 1605 } 1606 } 1607 1608 int bdrv_get_translation_hint(BlockDriverState *bs) 1609 { 1610 return bs->translation; 1611 } 1612 1613 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error, 1614 BlockErrorAction on_write_error) 1615 { 1616 bs->on_read_error = on_read_error; 1617 bs->on_write_error = on_write_error; 1618 } 1619 1620 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read) 1621 { 1622 return is_read ? bs->on_read_error : bs->on_write_error; 1623 } 1624 1625 int bdrv_is_read_only(BlockDriverState *bs) 1626 { 1627 return bs->read_only; 1628 } 1629 1630 int bdrv_is_sg(BlockDriverState *bs) 1631 { 1632 return bs->sg; 1633 } 1634 1635 int bdrv_enable_write_cache(BlockDriverState *bs) 1636 { 1637 return bs->enable_write_cache; 1638 } 1639 1640 int bdrv_is_encrypted(BlockDriverState *bs) 1641 { 1642 if (bs->backing_hd && bs->backing_hd->encrypted) 1643 return 1; 1644 return bs->encrypted; 1645 } 1646 1647 int bdrv_key_required(BlockDriverState *bs) 1648 { 1649 BlockDriverState *backing_hd = bs->backing_hd; 1650 1651 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key) 1652 return 1; 1653 return (bs->encrypted && !bs->valid_key); 1654 } 1655 1656 int bdrv_set_key(BlockDriverState *bs, const char *key) 1657 { 1658 int ret; 1659 if (bs->backing_hd && bs->backing_hd->encrypted) { 1660 ret = bdrv_set_key(bs->backing_hd, key); 1661 if (ret < 0) 1662 return ret; 1663 if (!bs->encrypted) 1664 return 0; 1665 } 1666 if (!bs->encrypted) { 1667 return -EINVAL; 1668 } else if (!bs->drv || !bs->drv->bdrv_set_key) { 1669 return -ENOMEDIUM; 1670 } 1671 ret = bs->drv->bdrv_set_key(bs, key); 1672 if (ret < 0) { 1673 bs->valid_key = 0; 1674 } else if (!bs->valid_key) { 1675 bs->valid_key = 1; 1676 /* call the change callback now, we skipped it on open */ 1677 bdrv_dev_change_media_cb(bs, true); 1678 } 1679 return ret; 1680 } 1681 1682 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size) 1683 { 1684 if (!bs->drv) { 1685 buf[0] = '\0'; 1686 } else { 1687 pstrcpy(buf, buf_size, bs->drv->format_name); 1688 } 1689 } 1690 1691 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 1692 void *opaque) 1693 { 1694 BlockDriver *drv; 1695 1696 QLIST_FOREACH(drv, &bdrv_drivers, list) { 1697 it(opaque, drv->format_name); 1698 } 1699 } 1700 1701 BlockDriverState *bdrv_find(const char *name) 1702 { 1703 BlockDriverState *bs; 1704 1705 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1706 if (!strcmp(name, bs->device_name)) { 1707 return bs; 1708 } 1709 } 1710 return NULL; 1711 } 1712 1713 BlockDriverState *bdrv_next(BlockDriverState *bs) 1714 { 1715 if (!bs) { 1716 return QTAILQ_FIRST(&bdrv_states); 1717 } 1718 return QTAILQ_NEXT(bs, list); 1719 } 1720 1721 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque) 1722 { 1723 BlockDriverState *bs; 1724 1725 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1726 it(opaque, bs); 1727 } 1728 } 1729 1730 const char *bdrv_get_device_name(BlockDriverState *bs) 1731 { 1732 return bs->device_name; 1733 } 1734 1735 int bdrv_flush(BlockDriverState *bs) 1736 { 1737 if (bs->open_flags & BDRV_O_NO_FLUSH) { 1738 return 0; 1739 } 1740 1741 if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) { 1742 return bdrv_co_flush_em(bs); 1743 } 1744 1745 if (bs->drv && bs->drv->bdrv_flush) { 1746 return bs->drv->bdrv_flush(bs); 1747 } 1748 1749 /* 1750 * Some block drivers always operate in either writethrough or unsafe mode 1751 * and don't support bdrv_flush therefore. Usually qemu doesn't know how 1752 * the server works (because the behaviour is hardcoded or depends on 1753 * server-side configuration), so we can't ensure that everything is safe 1754 * on disk. Returning an error doesn't work because that would break guests 1755 * even if the server operates in writethrough mode. 1756 * 1757 * Let's hope the user knows what he's doing. 1758 */ 1759 return 0; 1760 } 1761 1762 void bdrv_flush_all(void) 1763 { 1764 BlockDriverState *bs; 1765 1766 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1767 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) { 1768 bdrv_flush(bs); 1769 } 1770 } 1771 } 1772 1773 int bdrv_has_zero_init(BlockDriverState *bs) 1774 { 1775 assert(bs->drv); 1776 1777 if (bs->drv->bdrv_has_zero_init) { 1778 return bs->drv->bdrv_has_zero_init(bs); 1779 } 1780 1781 return 1; 1782 } 1783 1784 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) 1785 { 1786 if (!bs->drv) { 1787 return -ENOMEDIUM; 1788 } 1789 if (!bs->drv->bdrv_discard) { 1790 return 0; 1791 } 1792 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors); 1793 } 1794 1795 /* 1796 * Returns true iff the specified sector is present in the disk image. Drivers 1797 * not implementing the functionality are assumed to not support backing files, 1798 * hence all their sectors are reported as allocated. 1799 * 1800 * 'pnum' is set to the number of sectors (including and immediately following 1801 * the specified sector) that are known to be in the same 1802 * allocated/unallocated state. 1803 * 1804 * 'nb_sectors' is the max value 'pnum' should be set to. 1805 */ 1806 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, 1807 int *pnum) 1808 { 1809 int64_t n; 1810 if (!bs->drv->bdrv_is_allocated) { 1811 if (sector_num >= bs->total_sectors) { 1812 *pnum = 0; 1813 return 0; 1814 } 1815 n = bs->total_sectors - sector_num; 1816 *pnum = (n < nb_sectors) ? (n) : (nb_sectors); 1817 return 1; 1818 } 1819 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum); 1820 } 1821 1822 void bdrv_mon_event(const BlockDriverState *bdrv, 1823 BlockMonEventAction action, int is_read) 1824 { 1825 QObject *data; 1826 const char *action_str; 1827 1828 switch (action) { 1829 case BDRV_ACTION_REPORT: 1830 action_str = "report"; 1831 break; 1832 case BDRV_ACTION_IGNORE: 1833 action_str = "ignore"; 1834 break; 1835 case BDRV_ACTION_STOP: 1836 action_str = "stop"; 1837 break; 1838 default: 1839 abort(); 1840 } 1841 1842 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }", 1843 bdrv->device_name, 1844 action_str, 1845 is_read ? "read" : "write"); 1846 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data); 1847 1848 qobject_decref(data); 1849 } 1850 1851 static void bdrv_print_dict(QObject *obj, void *opaque) 1852 { 1853 QDict *bs_dict; 1854 Monitor *mon = opaque; 1855 1856 bs_dict = qobject_to_qdict(obj); 1857 1858 monitor_printf(mon, "%s: removable=%d", 1859 qdict_get_str(bs_dict, "device"), 1860 qdict_get_bool(bs_dict, "removable")); 1861 1862 if (qdict_get_bool(bs_dict, "removable")) { 1863 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked")); 1864 monitor_printf(mon, " tray-open=%d", 1865 qdict_get_bool(bs_dict, "tray-open")); 1866 } 1867 if (qdict_haskey(bs_dict, "inserted")) { 1868 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted")); 1869 1870 monitor_printf(mon, " file="); 1871 monitor_print_filename(mon, qdict_get_str(qdict, "file")); 1872 if (qdict_haskey(qdict, "backing_file")) { 1873 monitor_printf(mon, " backing_file="); 1874 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file")); 1875 } 1876 monitor_printf(mon, " ro=%d drv=%s encrypted=%d", 1877 qdict_get_bool(qdict, "ro"), 1878 qdict_get_str(qdict, "drv"), 1879 qdict_get_bool(qdict, "encrypted")); 1880 } else { 1881 monitor_printf(mon, " [not inserted]"); 1882 } 1883 1884 monitor_printf(mon, "\n"); 1885 } 1886 1887 void bdrv_info_print(Monitor *mon, const QObject *data) 1888 { 1889 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon); 1890 } 1891 1892 void bdrv_info(Monitor *mon, QObject **ret_data) 1893 { 1894 QList *bs_list; 1895 BlockDriverState *bs; 1896 1897 bs_list = qlist_new(); 1898 1899 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1900 QObject *bs_obj; 1901 QDict *bs_dict; 1902 1903 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', " 1904 "'removable': %i, 'locked': %i }", 1905 bs->device_name, 1906 bdrv_dev_has_removable_media(bs), 1907 bdrv_dev_is_medium_locked(bs)); 1908 bs_dict = qobject_to_qdict(bs_obj); 1909 1910 if (bdrv_dev_has_removable_media(bs)) { 1911 qdict_put(bs_dict, "tray-open", 1912 qbool_from_int(bdrv_dev_is_tray_open(bs))); 1913 } 1914 if (bs->drv) { 1915 QObject *obj; 1916 1917 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, " 1918 "'encrypted': %i }", 1919 bs->filename, bs->read_only, 1920 bs->drv->format_name, 1921 bdrv_is_encrypted(bs)); 1922 if (bs->backing_file[0] != '\0') { 1923 QDict *qdict = qobject_to_qdict(obj); 1924 qdict_put(qdict, "backing_file", 1925 qstring_from_str(bs->backing_file)); 1926 } 1927 1928 qdict_put_obj(bs_dict, "inserted", obj); 1929 } 1930 qlist_append_obj(bs_list, bs_obj); 1931 } 1932 1933 *ret_data = QOBJECT(bs_list); 1934 } 1935 1936 static void bdrv_stats_iter(QObject *data, void *opaque) 1937 { 1938 QDict *qdict; 1939 Monitor *mon = opaque; 1940 1941 qdict = qobject_to_qdict(data); 1942 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device")); 1943 1944 qdict = qobject_to_qdict(qdict_get(qdict, "stats")); 1945 monitor_printf(mon, " rd_bytes=%" PRId64 1946 " wr_bytes=%" PRId64 1947 " rd_operations=%" PRId64 1948 " wr_operations=%" PRId64 1949 " flush_operations=%" PRId64 1950 " wr_total_time_ns=%" PRId64 1951 " rd_total_time_ns=%" PRId64 1952 " flush_total_time_ns=%" PRId64 1953 "\n", 1954 qdict_get_int(qdict, "rd_bytes"), 1955 qdict_get_int(qdict, "wr_bytes"), 1956 qdict_get_int(qdict, "rd_operations"), 1957 qdict_get_int(qdict, "wr_operations"), 1958 qdict_get_int(qdict, "flush_operations"), 1959 qdict_get_int(qdict, "wr_total_time_ns"), 1960 qdict_get_int(qdict, "rd_total_time_ns"), 1961 qdict_get_int(qdict, "flush_total_time_ns")); 1962 } 1963 1964 void bdrv_stats_print(Monitor *mon, const QObject *data) 1965 { 1966 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon); 1967 } 1968 1969 static QObject* bdrv_info_stats_bs(BlockDriverState *bs) 1970 { 1971 QObject *res; 1972 QDict *dict; 1973 1974 res = qobject_from_jsonf("{ 'stats': {" 1975 "'rd_bytes': %" PRId64 "," 1976 "'wr_bytes': %" PRId64 "," 1977 "'rd_operations': %" PRId64 "," 1978 "'wr_operations': %" PRId64 "," 1979 "'wr_highest_offset': %" PRId64 "," 1980 "'flush_operations': %" PRId64 "," 1981 "'wr_total_time_ns': %" PRId64 "," 1982 "'rd_total_time_ns': %" PRId64 "," 1983 "'flush_total_time_ns': %" PRId64 1984 "} }", 1985 bs->nr_bytes[BDRV_ACCT_READ], 1986 bs->nr_bytes[BDRV_ACCT_WRITE], 1987 bs->nr_ops[BDRV_ACCT_READ], 1988 bs->nr_ops[BDRV_ACCT_WRITE], 1989 bs->wr_highest_sector * 1990 (uint64_t)BDRV_SECTOR_SIZE, 1991 bs->nr_ops[BDRV_ACCT_FLUSH], 1992 bs->total_time_ns[BDRV_ACCT_WRITE], 1993 bs->total_time_ns[BDRV_ACCT_READ], 1994 bs->total_time_ns[BDRV_ACCT_FLUSH]); 1995 dict = qobject_to_qdict(res); 1996 1997 if (*bs->device_name) { 1998 qdict_put(dict, "device", qstring_from_str(bs->device_name)); 1999 } 2000 2001 if (bs->file) { 2002 QObject *parent = bdrv_info_stats_bs(bs->file); 2003 qdict_put_obj(dict, "parent", parent); 2004 } 2005 2006 return res; 2007 } 2008 2009 void bdrv_info_stats(Monitor *mon, QObject **ret_data) 2010 { 2011 QObject *obj; 2012 QList *devices; 2013 BlockDriverState *bs; 2014 2015 devices = qlist_new(); 2016 2017 QTAILQ_FOREACH(bs, &bdrv_states, list) { 2018 obj = bdrv_info_stats_bs(bs); 2019 qlist_append_obj(devices, obj); 2020 } 2021 2022 *ret_data = QOBJECT(devices); 2023 } 2024 2025 const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 2026 { 2027 if (bs->backing_hd && bs->backing_hd->encrypted) 2028 return bs->backing_file; 2029 else if (bs->encrypted) 2030 return bs->filename; 2031 else 2032 return NULL; 2033 } 2034 2035 void bdrv_get_backing_filename(BlockDriverState *bs, 2036 char *filename, int filename_size) 2037 { 2038 if (!bs->backing_file) { 2039 pstrcpy(filename, filename_size, ""); 2040 } else { 2041 pstrcpy(filename, filename_size, bs->backing_file); 2042 } 2043 } 2044 2045 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, 2046 const uint8_t *buf, int nb_sectors) 2047 { 2048 BlockDriver *drv = bs->drv; 2049 if (!drv) 2050 return -ENOMEDIUM; 2051 if (!drv->bdrv_write_compressed) 2052 return -ENOTSUP; 2053 if (bdrv_check_request(bs, sector_num, nb_sectors)) 2054 return -EIO; 2055 2056 if (bs->dirty_bitmap) { 2057 set_dirty_bitmap(bs, sector_num, nb_sectors, 1); 2058 } 2059 2060 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); 2061 } 2062 2063 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 2064 { 2065 BlockDriver *drv = bs->drv; 2066 if (!drv) 2067 return -ENOMEDIUM; 2068 if (!drv->bdrv_get_info) 2069 return -ENOTSUP; 2070 memset(bdi, 0, sizeof(*bdi)); 2071 return drv->bdrv_get_info(bs, bdi); 2072 } 2073 2074 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, 2075 int64_t pos, int size) 2076 { 2077 BlockDriver *drv = bs->drv; 2078 if (!drv) 2079 return -ENOMEDIUM; 2080 if (drv->bdrv_save_vmstate) 2081 return drv->bdrv_save_vmstate(bs, buf, pos, size); 2082 if (bs->file) 2083 return bdrv_save_vmstate(bs->file, buf, pos, size); 2084 return -ENOTSUP; 2085 } 2086 2087 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, 2088 int64_t pos, int size) 2089 { 2090 BlockDriver *drv = bs->drv; 2091 if (!drv) 2092 return -ENOMEDIUM; 2093 if (drv->bdrv_load_vmstate) 2094 return drv->bdrv_load_vmstate(bs, buf, pos, size); 2095 if (bs->file) 2096 return bdrv_load_vmstate(bs->file, buf, pos, size); 2097 return -ENOTSUP; 2098 } 2099 2100 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) 2101 { 2102 BlockDriver *drv = bs->drv; 2103 2104 if (!drv || !drv->bdrv_debug_event) { 2105 return; 2106 } 2107 2108 return drv->bdrv_debug_event(bs, event); 2109 2110 } 2111 2112 /**************************************************************/ 2113 /* handling of snapshots */ 2114 2115 int bdrv_can_snapshot(BlockDriverState *bs) 2116 { 2117 BlockDriver *drv = bs->drv; 2118 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { 2119 return 0; 2120 } 2121 2122 if (!drv->bdrv_snapshot_create) { 2123 if (bs->file != NULL) { 2124 return bdrv_can_snapshot(bs->file); 2125 } 2126 return 0; 2127 } 2128 2129 return 1; 2130 } 2131 2132 int bdrv_is_snapshot(BlockDriverState *bs) 2133 { 2134 return !!(bs->open_flags & BDRV_O_SNAPSHOT); 2135 } 2136 2137 BlockDriverState *bdrv_snapshots(void) 2138 { 2139 BlockDriverState *bs; 2140 2141 if (bs_snapshots) { 2142 return bs_snapshots; 2143 } 2144 2145 bs = NULL; 2146 while ((bs = bdrv_next(bs))) { 2147 if (bdrv_can_snapshot(bs)) { 2148 bs_snapshots = bs; 2149 return bs; 2150 } 2151 } 2152 return NULL; 2153 } 2154 2155 int bdrv_snapshot_create(BlockDriverState *bs, 2156 QEMUSnapshotInfo *sn_info) 2157 { 2158 BlockDriver *drv = bs->drv; 2159 if (!drv) 2160 return -ENOMEDIUM; 2161 if (drv->bdrv_snapshot_create) 2162 return drv->bdrv_snapshot_create(bs, sn_info); 2163 if (bs->file) 2164 return bdrv_snapshot_create(bs->file, sn_info); 2165 return -ENOTSUP; 2166 } 2167 2168 int bdrv_snapshot_goto(BlockDriverState *bs, 2169 const char *snapshot_id) 2170 { 2171 BlockDriver *drv = bs->drv; 2172 int ret, open_ret; 2173 2174 if (!drv) 2175 return -ENOMEDIUM; 2176 if (drv->bdrv_snapshot_goto) 2177 return drv->bdrv_snapshot_goto(bs, snapshot_id); 2178 2179 if (bs->file) { 2180 drv->bdrv_close(bs); 2181 ret = bdrv_snapshot_goto(bs->file, snapshot_id); 2182 open_ret = drv->bdrv_open(bs, bs->open_flags); 2183 if (open_ret < 0) { 2184 bdrv_delete(bs->file); 2185 bs->drv = NULL; 2186 return open_ret; 2187 } 2188 return ret; 2189 } 2190 2191 return -ENOTSUP; 2192 } 2193 2194 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) 2195 { 2196 BlockDriver *drv = bs->drv; 2197 if (!drv) 2198 return -ENOMEDIUM; 2199 if (drv->bdrv_snapshot_delete) 2200 return drv->bdrv_snapshot_delete(bs, snapshot_id); 2201 if (bs->file) 2202 return bdrv_snapshot_delete(bs->file, snapshot_id); 2203 return -ENOTSUP; 2204 } 2205 2206 int bdrv_snapshot_list(BlockDriverState *bs, 2207 QEMUSnapshotInfo **psn_info) 2208 { 2209 BlockDriver *drv = bs->drv; 2210 if (!drv) 2211 return -ENOMEDIUM; 2212 if (drv->bdrv_snapshot_list) 2213 return drv->bdrv_snapshot_list(bs, psn_info); 2214 if (bs->file) 2215 return bdrv_snapshot_list(bs->file, psn_info); 2216 return -ENOTSUP; 2217 } 2218 2219 int bdrv_snapshot_load_tmp(BlockDriverState *bs, 2220 const char *snapshot_name) 2221 { 2222 BlockDriver *drv = bs->drv; 2223 if (!drv) { 2224 return -ENOMEDIUM; 2225 } 2226 if (!bs->read_only) { 2227 return -EINVAL; 2228 } 2229 if (drv->bdrv_snapshot_load_tmp) { 2230 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name); 2231 } 2232 return -ENOTSUP; 2233 } 2234 2235 #define NB_SUFFIXES 4 2236 2237 char *get_human_readable_size(char *buf, int buf_size, int64_t size) 2238 { 2239 static const char suffixes[NB_SUFFIXES] = "KMGT"; 2240 int64_t base; 2241 int i; 2242 2243 if (size <= 999) { 2244 snprintf(buf, buf_size, "%" PRId64, size); 2245 } else { 2246 base = 1024; 2247 for(i = 0; i < NB_SUFFIXES; i++) { 2248 if (size < (10 * base)) { 2249 snprintf(buf, buf_size, "%0.1f%c", 2250 (double)size / base, 2251 suffixes[i]); 2252 break; 2253 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) { 2254 snprintf(buf, buf_size, "%" PRId64 "%c", 2255 ((size + (base >> 1)) / base), 2256 suffixes[i]); 2257 break; 2258 } 2259 base = base * 1024; 2260 } 2261 } 2262 return buf; 2263 } 2264 2265 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn) 2266 { 2267 char buf1[128], date_buf[128], clock_buf[128]; 2268 #ifdef _WIN32 2269 struct tm *ptm; 2270 #else 2271 struct tm tm; 2272 #endif 2273 time_t ti; 2274 int64_t secs; 2275 2276 if (!sn) { 2277 snprintf(buf, buf_size, 2278 "%-10s%-20s%7s%20s%15s", 2279 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK"); 2280 } else { 2281 ti = sn->date_sec; 2282 #ifdef _WIN32 2283 ptm = localtime(&ti); 2284 strftime(date_buf, sizeof(date_buf), 2285 "%Y-%m-%d %H:%M:%S", ptm); 2286 #else 2287 localtime_r(&ti, &tm); 2288 strftime(date_buf, sizeof(date_buf), 2289 "%Y-%m-%d %H:%M:%S", &tm); 2290 #endif 2291 secs = sn->vm_clock_nsec / 1000000000; 2292 snprintf(clock_buf, sizeof(clock_buf), 2293 "%02d:%02d:%02d.%03d", 2294 (int)(secs / 3600), 2295 (int)((secs / 60) % 60), 2296 (int)(secs % 60), 2297 (int)((sn->vm_clock_nsec / 1000000) % 1000)); 2298 snprintf(buf, buf_size, 2299 "%-10s%-20s%7s%20s%15s", 2300 sn->id_str, sn->name, 2301 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size), 2302 date_buf, 2303 clock_buf); 2304 } 2305 return buf; 2306 } 2307 2308 /**************************************************************/ 2309 /* async I/Os */ 2310 2311 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, 2312 QEMUIOVector *qiov, int nb_sectors, 2313 BlockDriverCompletionFunc *cb, void *opaque) 2314 { 2315 BlockDriver *drv = bs->drv; 2316 2317 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque); 2318 2319 if (!drv) 2320 return NULL; 2321 if (bdrv_check_request(bs, sector_num, nb_sectors)) 2322 return NULL; 2323 2324 return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors, 2325 cb, opaque); 2326 } 2327 2328 typedef struct BlockCompleteData { 2329 BlockDriverCompletionFunc *cb; 2330 void *opaque; 2331 BlockDriverState *bs; 2332 int64_t sector_num; 2333 int nb_sectors; 2334 } BlockCompleteData; 2335 2336 static void block_complete_cb(void *opaque, int ret) 2337 { 2338 BlockCompleteData *b = opaque; 2339 2340 if (b->bs->dirty_bitmap) { 2341 set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1); 2342 } 2343 b->cb(b->opaque, ret); 2344 g_free(b); 2345 } 2346 2347 static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs, 2348 int64_t sector_num, 2349 int nb_sectors, 2350 BlockDriverCompletionFunc *cb, 2351 void *opaque) 2352 { 2353 BlockCompleteData *blkdata = g_malloc0(sizeof(BlockCompleteData)); 2354 2355 blkdata->bs = bs; 2356 blkdata->cb = cb; 2357 blkdata->opaque = opaque; 2358 blkdata->sector_num = sector_num; 2359 blkdata->nb_sectors = nb_sectors; 2360 2361 return blkdata; 2362 } 2363 2364 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, 2365 QEMUIOVector *qiov, int nb_sectors, 2366 BlockDriverCompletionFunc *cb, void *opaque) 2367 { 2368 BlockDriver *drv = bs->drv; 2369 BlockDriverAIOCB *ret; 2370 BlockCompleteData *blk_cb_data; 2371 2372 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque); 2373 2374 if (!drv) 2375 return NULL; 2376 if (bs->read_only) 2377 return NULL; 2378 if (bdrv_check_request(bs, sector_num, nb_sectors)) 2379 return NULL; 2380 2381 if (bs->dirty_bitmap) { 2382 blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb, 2383 opaque); 2384 cb = &block_complete_cb; 2385 opaque = blk_cb_data; 2386 } 2387 2388 ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors, 2389 cb, opaque); 2390 2391 if (ret) { 2392 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { 2393 bs->wr_highest_sector = sector_num + nb_sectors - 1; 2394 } 2395 } 2396 2397 return ret; 2398 } 2399 2400 2401 typedef struct MultiwriteCB { 2402 int error; 2403 int num_requests; 2404 int num_callbacks; 2405 struct { 2406 BlockDriverCompletionFunc *cb; 2407 void *opaque; 2408 QEMUIOVector *free_qiov; 2409 void *free_buf; 2410 } callbacks[]; 2411 } MultiwriteCB; 2412 2413 static void multiwrite_user_cb(MultiwriteCB *mcb) 2414 { 2415 int i; 2416 2417 for (i = 0; i < mcb->num_callbacks; i++) { 2418 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error); 2419 if (mcb->callbacks[i].free_qiov) { 2420 qemu_iovec_destroy(mcb->callbacks[i].free_qiov); 2421 } 2422 g_free(mcb->callbacks[i].free_qiov); 2423 qemu_vfree(mcb->callbacks[i].free_buf); 2424 } 2425 } 2426 2427 static void multiwrite_cb(void *opaque, int ret) 2428 { 2429 MultiwriteCB *mcb = opaque; 2430 2431 trace_multiwrite_cb(mcb, ret); 2432 2433 if (ret < 0 && !mcb->error) { 2434 mcb->error = ret; 2435 } 2436 2437 mcb->num_requests--; 2438 if (mcb->num_requests == 0) { 2439 multiwrite_user_cb(mcb); 2440 g_free(mcb); 2441 } 2442 } 2443 2444 static int multiwrite_req_compare(const void *a, const void *b) 2445 { 2446 const BlockRequest *req1 = a, *req2 = b; 2447 2448 /* 2449 * Note that we can't simply subtract req2->sector from req1->sector 2450 * here as that could overflow the return value. 2451 */ 2452 if (req1->sector > req2->sector) { 2453 return 1; 2454 } else if (req1->sector < req2->sector) { 2455 return -1; 2456 } else { 2457 return 0; 2458 } 2459 } 2460 2461 /* 2462 * Takes a bunch of requests and tries to merge them. Returns the number of 2463 * requests that remain after merging. 2464 */ 2465 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, 2466 int num_reqs, MultiwriteCB *mcb) 2467 { 2468 int i, outidx; 2469 2470 // Sort requests by start sector 2471 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare); 2472 2473 // Check if adjacent requests touch the same clusters. If so, combine them, 2474 // filling up gaps with zero sectors. 2475 outidx = 0; 2476 for (i = 1; i < num_reqs; i++) { 2477 int merge = 0; 2478 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors; 2479 2480 // This handles the cases that are valid for all block drivers, namely 2481 // exactly sequential writes and overlapping writes. 2482 if (reqs[i].sector <= oldreq_last) { 2483 merge = 1; 2484 } 2485 2486 // The block driver may decide that it makes sense to combine requests 2487 // even if there is a gap of some sectors between them. In this case, 2488 // the gap is filled with zeros (therefore only applicable for yet 2489 // unused space in format like qcow2). 2490 if (!merge && bs->drv->bdrv_merge_requests) { 2491 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]); 2492 } 2493 2494 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) { 2495 merge = 0; 2496 } 2497 2498 if (merge) { 2499 size_t size; 2500 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov)); 2501 qemu_iovec_init(qiov, 2502 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1); 2503 2504 // Add the first request to the merged one. If the requests are 2505 // overlapping, drop the last sectors of the first request. 2506 size = (reqs[i].sector - reqs[outidx].sector) << 9; 2507 qemu_iovec_concat(qiov, reqs[outidx].qiov, size); 2508 2509 // We might need to add some zeros between the two requests 2510 if (reqs[i].sector > oldreq_last) { 2511 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9; 2512 uint8_t *buf = qemu_blockalign(bs, zero_bytes); 2513 memset(buf, 0, zero_bytes); 2514 qemu_iovec_add(qiov, buf, zero_bytes); 2515 mcb->callbacks[i].free_buf = buf; 2516 } 2517 2518 // Add the second request 2519 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size); 2520 2521 reqs[outidx].nb_sectors = qiov->size >> 9; 2522 reqs[outidx].qiov = qiov; 2523 2524 mcb->callbacks[i].free_qiov = reqs[outidx].qiov; 2525 } else { 2526 outidx++; 2527 reqs[outidx].sector = reqs[i].sector; 2528 reqs[outidx].nb_sectors = reqs[i].nb_sectors; 2529 reqs[outidx].qiov = reqs[i].qiov; 2530 } 2531 } 2532 2533 return outidx + 1; 2534 } 2535 2536 /* 2537 * Submit multiple AIO write requests at once. 2538 * 2539 * On success, the function returns 0 and all requests in the reqs array have 2540 * been submitted. In error case this function returns -1, and any of the 2541 * requests may or may not be submitted yet. In particular, this means that the 2542 * callback will be called for some of the requests, for others it won't. The 2543 * caller must check the error field of the BlockRequest to wait for the right 2544 * callbacks (if error != 0, no callback will be called). 2545 * 2546 * The implementation may modify the contents of the reqs array, e.g. to merge 2547 * requests. However, the fields opaque and error are left unmodified as they 2548 * are used to signal failure for a single request to the caller. 2549 */ 2550 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) 2551 { 2552 BlockDriverAIOCB *acb; 2553 MultiwriteCB *mcb; 2554 int i; 2555 2556 /* don't submit writes if we don't have a medium */ 2557 if (bs->drv == NULL) { 2558 for (i = 0; i < num_reqs; i++) { 2559 reqs[i].error = -ENOMEDIUM; 2560 } 2561 return -1; 2562 } 2563 2564 if (num_reqs == 0) { 2565 return 0; 2566 } 2567 2568 // Create MultiwriteCB structure 2569 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks)); 2570 mcb->num_requests = 0; 2571 mcb->num_callbacks = num_reqs; 2572 2573 for (i = 0; i < num_reqs; i++) { 2574 mcb->callbacks[i].cb = reqs[i].cb; 2575 mcb->callbacks[i].opaque = reqs[i].opaque; 2576 } 2577 2578 // Check for mergable requests 2579 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb); 2580 2581 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs); 2582 2583 /* 2584 * Run the aio requests. As soon as one request can't be submitted 2585 * successfully, fail all requests that are not yet submitted (we must 2586 * return failure for all requests anyway) 2587 * 2588 * num_requests cannot be set to the right value immediately: If 2589 * bdrv_aio_writev fails for some request, num_requests would be too high 2590 * and therefore multiwrite_cb() would never recognize the multiwrite 2591 * request as completed. We also cannot use the loop variable i to set it 2592 * when the first request fails because the callback may already have been 2593 * called for previously submitted requests. Thus, num_requests must be 2594 * incremented for each request that is submitted. 2595 * 2596 * The problem that callbacks may be called early also means that we need 2597 * to take care that num_requests doesn't become 0 before all requests are 2598 * submitted - multiwrite_cb() would consider the multiwrite request 2599 * completed. A dummy request that is "completed" by a manual call to 2600 * multiwrite_cb() takes care of this. 2601 */ 2602 mcb->num_requests = 1; 2603 2604 // Run the aio requests 2605 for (i = 0; i < num_reqs; i++) { 2606 mcb->num_requests++; 2607 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov, 2608 reqs[i].nb_sectors, multiwrite_cb, mcb); 2609 2610 if (acb == NULL) { 2611 // We can only fail the whole thing if no request has been 2612 // submitted yet. Otherwise we'll wait for the submitted AIOs to 2613 // complete and report the error in the callback. 2614 if (i == 0) { 2615 trace_bdrv_aio_multiwrite_earlyfail(mcb); 2616 goto fail; 2617 } else { 2618 trace_bdrv_aio_multiwrite_latefail(mcb, i); 2619 multiwrite_cb(mcb, -EIO); 2620 break; 2621 } 2622 } 2623 } 2624 2625 /* Complete the dummy request */ 2626 multiwrite_cb(mcb, 0); 2627 2628 return 0; 2629 2630 fail: 2631 for (i = 0; i < mcb->num_callbacks; i++) { 2632 reqs[i].error = -EIO; 2633 } 2634 g_free(mcb); 2635 return -1; 2636 } 2637 2638 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, 2639 BlockDriverCompletionFunc *cb, void *opaque) 2640 { 2641 BlockDriver *drv = bs->drv; 2642 2643 trace_bdrv_aio_flush(bs, opaque); 2644 2645 if (bs->open_flags & BDRV_O_NO_FLUSH) { 2646 return bdrv_aio_noop_em(bs, cb, opaque); 2647 } 2648 2649 if (!drv) 2650 return NULL; 2651 return drv->bdrv_aio_flush(bs, cb, opaque); 2652 } 2653 2654 void bdrv_aio_cancel(BlockDriverAIOCB *acb) 2655 { 2656 acb->pool->cancel(acb); 2657 } 2658 2659 2660 /**************************************************************/ 2661 /* async block device emulation */ 2662 2663 typedef struct BlockDriverAIOCBSync { 2664 BlockDriverAIOCB common; 2665 QEMUBH *bh; 2666 int ret; 2667 /* vector translation state */ 2668 QEMUIOVector *qiov; 2669 uint8_t *bounce; 2670 int is_write; 2671 } BlockDriverAIOCBSync; 2672 2673 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb) 2674 { 2675 BlockDriverAIOCBSync *acb = 2676 container_of(blockacb, BlockDriverAIOCBSync, common); 2677 qemu_bh_delete(acb->bh); 2678 acb->bh = NULL; 2679 qemu_aio_release(acb); 2680 } 2681 2682 static AIOPool bdrv_em_aio_pool = { 2683 .aiocb_size = sizeof(BlockDriverAIOCBSync), 2684 .cancel = bdrv_aio_cancel_em, 2685 }; 2686 2687 static void bdrv_aio_bh_cb(void *opaque) 2688 { 2689 BlockDriverAIOCBSync *acb = opaque; 2690 2691 if (!acb->is_write) 2692 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size); 2693 qemu_vfree(acb->bounce); 2694 acb->common.cb(acb->common.opaque, acb->ret); 2695 qemu_bh_delete(acb->bh); 2696 acb->bh = NULL; 2697 qemu_aio_release(acb); 2698 } 2699 2700 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, 2701 int64_t sector_num, 2702 QEMUIOVector *qiov, 2703 int nb_sectors, 2704 BlockDriverCompletionFunc *cb, 2705 void *opaque, 2706 int is_write) 2707 2708 { 2709 BlockDriverAIOCBSync *acb; 2710 2711 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque); 2712 acb->is_write = is_write; 2713 acb->qiov = qiov; 2714 acb->bounce = qemu_blockalign(bs, qiov->size); 2715 2716 if (!acb->bh) 2717 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); 2718 2719 if (is_write) { 2720 qemu_iovec_to_buffer(acb->qiov, acb->bounce); 2721 acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors); 2722 } else { 2723 acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors); 2724 } 2725 2726 qemu_bh_schedule(acb->bh); 2727 2728 return &acb->common; 2729 } 2730 2731 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 2732 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 2733 BlockDriverCompletionFunc *cb, void *opaque) 2734 { 2735 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); 2736 } 2737 2738 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 2739 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 2740 BlockDriverCompletionFunc *cb, void *opaque) 2741 { 2742 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); 2743 } 2744 2745 2746 typedef struct BlockDriverAIOCBCoroutine { 2747 BlockDriverAIOCB common; 2748 BlockRequest req; 2749 bool is_write; 2750 QEMUBH* bh; 2751 } BlockDriverAIOCBCoroutine; 2752 2753 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb) 2754 { 2755 qemu_aio_flush(); 2756 } 2757 2758 static AIOPool bdrv_em_co_aio_pool = { 2759 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine), 2760 .cancel = bdrv_aio_co_cancel_em, 2761 }; 2762 2763 static void bdrv_co_rw_bh(void *opaque) 2764 { 2765 BlockDriverAIOCBCoroutine *acb = opaque; 2766 2767 acb->common.cb(acb->common.opaque, acb->req.error); 2768 qemu_bh_delete(acb->bh); 2769 qemu_aio_release(acb); 2770 } 2771 2772 static void coroutine_fn bdrv_co_rw(void *opaque) 2773 { 2774 BlockDriverAIOCBCoroutine *acb = opaque; 2775 BlockDriverState *bs = acb->common.bs; 2776 2777 if (!acb->is_write) { 2778 acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector, 2779 acb->req.nb_sectors, acb->req.qiov); 2780 } else { 2781 acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector, 2782 acb->req.nb_sectors, acb->req.qiov); 2783 } 2784 2785 acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb); 2786 qemu_bh_schedule(acb->bh); 2787 } 2788 2789 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, 2790 int64_t sector_num, 2791 QEMUIOVector *qiov, 2792 int nb_sectors, 2793 BlockDriverCompletionFunc *cb, 2794 void *opaque, 2795 bool is_write) 2796 { 2797 Coroutine *co; 2798 BlockDriverAIOCBCoroutine *acb; 2799 2800 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque); 2801 acb->req.sector = sector_num; 2802 acb->req.nb_sectors = nb_sectors; 2803 acb->req.qiov = qiov; 2804 acb->is_write = is_write; 2805 2806 co = qemu_coroutine_create(bdrv_co_rw); 2807 qemu_coroutine_enter(co, acb); 2808 2809 return &acb->common; 2810 } 2811 2812 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs, 2813 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 2814 BlockDriverCompletionFunc *cb, void *opaque) 2815 { 2816 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 2817 false); 2818 } 2819 2820 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs, 2821 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 2822 BlockDriverCompletionFunc *cb, void *opaque) 2823 { 2824 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 2825 true); 2826 } 2827 2828 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, 2829 BlockDriverCompletionFunc *cb, void *opaque) 2830 { 2831 BlockDriverAIOCBSync *acb; 2832 2833 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque); 2834 acb->is_write = 1; /* don't bounce in the completion hadler */ 2835 acb->qiov = NULL; 2836 acb->bounce = NULL; 2837 acb->ret = 0; 2838 2839 if (!acb->bh) 2840 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); 2841 2842 bdrv_flush(bs); 2843 qemu_bh_schedule(acb->bh); 2844 return &acb->common; 2845 } 2846 2847 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs, 2848 BlockDriverCompletionFunc *cb, void *opaque) 2849 { 2850 BlockDriverAIOCBSync *acb; 2851 2852 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque); 2853 acb->is_write = 1; /* don't bounce in the completion handler */ 2854 acb->qiov = NULL; 2855 acb->bounce = NULL; 2856 acb->ret = 0; 2857 2858 if (!acb->bh) { 2859 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); 2860 } 2861 2862 qemu_bh_schedule(acb->bh); 2863 return &acb->common; 2864 } 2865 2866 /**************************************************************/ 2867 /* sync block device emulation */ 2868 2869 static void bdrv_rw_em_cb(void *opaque, int ret) 2870 { 2871 *(int *)opaque = ret; 2872 } 2873 2874 #define NOT_DONE 0x7fffffff 2875 2876 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, 2877 uint8_t *buf, int nb_sectors) 2878 { 2879 int async_ret; 2880 BlockDriverAIOCB *acb; 2881 struct iovec iov; 2882 QEMUIOVector qiov; 2883 2884 async_ret = NOT_DONE; 2885 iov.iov_base = (void *)buf; 2886 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; 2887 qemu_iovec_init_external(&qiov, &iov, 1); 2888 acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors, 2889 bdrv_rw_em_cb, &async_ret); 2890 if (acb == NULL) { 2891 async_ret = -1; 2892 goto fail; 2893 } 2894 2895 while (async_ret == NOT_DONE) { 2896 qemu_aio_wait(); 2897 } 2898 2899 2900 fail: 2901 return async_ret; 2902 } 2903 2904 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, 2905 const uint8_t *buf, int nb_sectors) 2906 { 2907 int async_ret; 2908 BlockDriverAIOCB *acb; 2909 struct iovec iov; 2910 QEMUIOVector qiov; 2911 2912 async_ret = NOT_DONE; 2913 iov.iov_base = (void *)buf; 2914 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; 2915 qemu_iovec_init_external(&qiov, &iov, 1); 2916 acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors, 2917 bdrv_rw_em_cb, &async_ret); 2918 if (acb == NULL) { 2919 async_ret = -1; 2920 goto fail; 2921 } 2922 while (async_ret == NOT_DONE) { 2923 qemu_aio_wait(); 2924 } 2925 2926 fail: 2927 return async_ret; 2928 } 2929 2930 void bdrv_init(void) 2931 { 2932 module_call_init(MODULE_INIT_BLOCK); 2933 } 2934 2935 void bdrv_init_with_whitelist(void) 2936 { 2937 use_bdrv_whitelist = 1; 2938 bdrv_init(); 2939 } 2940 2941 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs, 2942 BlockDriverCompletionFunc *cb, void *opaque) 2943 { 2944 BlockDriverAIOCB *acb; 2945 2946 if (pool->free_aiocb) { 2947 acb = pool->free_aiocb; 2948 pool->free_aiocb = acb->next; 2949 } else { 2950 acb = g_malloc0(pool->aiocb_size); 2951 acb->pool = pool; 2952 } 2953 acb->bs = bs; 2954 acb->cb = cb; 2955 acb->opaque = opaque; 2956 return acb; 2957 } 2958 2959 void qemu_aio_release(void *p) 2960 { 2961 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p; 2962 AIOPool *pool = acb->pool; 2963 acb->next = pool->free_aiocb; 2964 pool->free_aiocb = acb; 2965 } 2966 2967 /**************************************************************/ 2968 /* Coroutine block device emulation */ 2969 2970 typedef struct CoroutineIOCompletion { 2971 Coroutine *coroutine; 2972 int ret; 2973 } CoroutineIOCompletion; 2974 2975 static void bdrv_co_io_em_complete(void *opaque, int ret) 2976 { 2977 CoroutineIOCompletion *co = opaque; 2978 2979 co->ret = ret; 2980 qemu_coroutine_enter(co->coroutine, NULL); 2981 } 2982 2983 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num, 2984 int nb_sectors, QEMUIOVector *iov, 2985 bool is_write) 2986 { 2987 CoroutineIOCompletion co = { 2988 .coroutine = qemu_coroutine_self(), 2989 }; 2990 BlockDriverAIOCB *acb; 2991 2992 if (is_write) { 2993 acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors, 2994 bdrv_co_io_em_complete, &co); 2995 } else { 2996 acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors, 2997 bdrv_co_io_em_complete, &co); 2998 } 2999 3000 trace_bdrv_co_io(is_write, acb); 3001 if (!acb) { 3002 return -EIO; 3003 } 3004 qemu_coroutine_yield(); 3005 3006 return co.ret; 3007 } 3008 3009 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 3010 int64_t sector_num, int nb_sectors, 3011 QEMUIOVector *iov) 3012 { 3013 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false); 3014 } 3015 3016 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 3017 int64_t sector_num, int nb_sectors, 3018 QEMUIOVector *iov) 3019 { 3020 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true); 3021 } 3022 3023 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs) 3024 { 3025 CoroutineIOCompletion co = { 3026 .coroutine = qemu_coroutine_self(), 3027 }; 3028 BlockDriverAIOCB *acb; 3029 3030 acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co); 3031 if (!acb) { 3032 return -EIO; 3033 } 3034 qemu_coroutine_yield(); 3035 return co.ret; 3036 } 3037 3038 /**************************************************************/ 3039 /* removable device support */ 3040 3041 /** 3042 * Return TRUE if the media is present 3043 */ 3044 int bdrv_is_inserted(BlockDriverState *bs) 3045 { 3046 BlockDriver *drv = bs->drv; 3047 3048 if (!drv) 3049 return 0; 3050 if (!drv->bdrv_is_inserted) 3051 return 1; 3052 return drv->bdrv_is_inserted(bs); 3053 } 3054 3055 /** 3056 * Return whether the media changed since the last call to this 3057 * function, or -ENOTSUP if we don't know. Most drivers don't know. 3058 */ 3059 int bdrv_media_changed(BlockDriverState *bs) 3060 { 3061 BlockDriver *drv = bs->drv; 3062 3063 if (drv && drv->bdrv_media_changed) { 3064 return drv->bdrv_media_changed(bs); 3065 } 3066 return -ENOTSUP; 3067 } 3068 3069 /** 3070 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 3071 */ 3072 void bdrv_eject(BlockDriverState *bs, int eject_flag) 3073 { 3074 BlockDriver *drv = bs->drv; 3075 3076 if (drv && drv->bdrv_eject) { 3077 drv->bdrv_eject(bs, eject_flag); 3078 } 3079 } 3080 3081 /** 3082 * Lock or unlock the media (if it is locked, the user won't be able 3083 * to eject it manually). 3084 */ 3085 void bdrv_lock_medium(BlockDriverState *bs, bool locked) 3086 { 3087 BlockDriver *drv = bs->drv; 3088 3089 trace_bdrv_lock_medium(bs, locked); 3090 3091 if (drv && drv->bdrv_lock_medium) { 3092 drv->bdrv_lock_medium(bs, locked); 3093 } 3094 } 3095 3096 /* needed for generic scsi interface */ 3097 3098 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) 3099 { 3100 BlockDriver *drv = bs->drv; 3101 3102 if (drv && drv->bdrv_ioctl) 3103 return drv->bdrv_ioctl(bs, req, buf); 3104 return -ENOTSUP; 3105 } 3106 3107 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, 3108 unsigned long int req, void *buf, 3109 BlockDriverCompletionFunc *cb, void *opaque) 3110 { 3111 BlockDriver *drv = bs->drv; 3112 3113 if (drv && drv->bdrv_aio_ioctl) 3114 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque); 3115 return NULL; 3116 } 3117 3118 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align) 3119 { 3120 bs->buffer_alignment = align; 3121 } 3122 3123 void *qemu_blockalign(BlockDriverState *bs, size_t size) 3124 { 3125 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size); 3126 } 3127 3128 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable) 3129 { 3130 int64_t bitmap_size; 3131 3132 bs->dirty_count = 0; 3133 if (enable) { 3134 if (!bs->dirty_bitmap) { 3135 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) + 3136 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1; 3137 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8; 3138 3139 bs->dirty_bitmap = g_malloc0(bitmap_size); 3140 } 3141 } else { 3142 if (bs->dirty_bitmap) { 3143 g_free(bs->dirty_bitmap); 3144 bs->dirty_bitmap = NULL; 3145 } 3146 } 3147 } 3148 3149 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector) 3150 { 3151 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK; 3152 3153 if (bs->dirty_bitmap && 3154 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) { 3155 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] & 3156 (1UL << (chunk % (sizeof(unsigned long) * 8)))); 3157 } else { 3158 return 0; 3159 } 3160 } 3161 3162 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 3163 int nr_sectors) 3164 { 3165 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0); 3166 } 3167 3168 int64_t bdrv_get_dirty_count(BlockDriverState *bs) 3169 { 3170 return bs->dirty_count; 3171 } 3172 3173 void bdrv_set_in_use(BlockDriverState *bs, int in_use) 3174 { 3175 assert(bs->in_use != in_use); 3176 bs->in_use = in_use; 3177 } 3178 3179 int bdrv_in_use(BlockDriverState *bs) 3180 { 3181 return bs->in_use; 3182 } 3183 3184 void 3185 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes, 3186 enum BlockAcctType type) 3187 { 3188 assert(type < BDRV_MAX_IOTYPE); 3189 3190 cookie->bytes = bytes; 3191 cookie->start_time_ns = get_clock(); 3192 cookie->type = type; 3193 } 3194 3195 void 3196 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie) 3197 { 3198 assert(cookie->type < BDRV_MAX_IOTYPE); 3199 3200 bs->nr_bytes[cookie->type] += cookie->bytes; 3201 bs->nr_ops[cookie->type]++; 3202 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns; 3203 } 3204 3205 int bdrv_img_create(const char *filename, const char *fmt, 3206 const char *base_filename, const char *base_fmt, 3207 char *options, uint64_t img_size, int flags) 3208 { 3209 QEMUOptionParameter *param = NULL, *create_options = NULL; 3210 QEMUOptionParameter *backing_fmt, *backing_file, *size; 3211 BlockDriverState *bs = NULL; 3212 BlockDriver *drv, *proto_drv; 3213 BlockDriver *backing_drv = NULL; 3214 int ret = 0; 3215 3216 /* Find driver and parse its options */ 3217 drv = bdrv_find_format(fmt); 3218 if (!drv) { 3219 error_report("Unknown file format '%s'", fmt); 3220 ret = -EINVAL; 3221 goto out; 3222 } 3223 3224 proto_drv = bdrv_find_protocol(filename); 3225 if (!proto_drv) { 3226 error_report("Unknown protocol '%s'", filename); 3227 ret = -EINVAL; 3228 goto out; 3229 } 3230 3231 create_options = append_option_parameters(create_options, 3232 drv->create_options); 3233 create_options = append_option_parameters(create_options, 3234 proto_drv->create_options); 3235 3236 /* Create parameter list with default values */ 3237 param = parse_option_parameters("", create_options, param); 3238 3239 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size); 3240 3241 /* Parse -o options */ 3242 if (options) { 3243 param = parse_option_parameters(options, create_options, param); 3244 if (param == NULL) { 3245 error_report("Invalid options for file format '%s'.", fmt); 3246 ret = -EINVAL; 3247 goto out; 3248 } 3249 } 3250 3251 if (base_filename) { 3252 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE, 3253 base_filename)) { 3254 error_report("Backing file not supported for file format '%s'", 3255 fmt); 3256 ret = -EINVAL; 3257 goto out; 3258 } 3259 } 3260 3261 if (base_fmt) { 3262 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) { 3263 error_report("Backing file format not supported for file " 3264 "format '%s'", fmt); 3265 ret = -EINVAL; 3266 goto out; 3267 } 3268 } 3269 3270 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE); 3271 if (backing_file && backing_file->value.s) { 3272 if (!strcmp(filename, backing_file->value.s)) { 3273 error_report("Error: Trying to create an image with the " 3274 "same filename as the backing file"); 3275 ret = -EINVAL; 3276 goto out; 3277 } 3278 } 3279 3280 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT); 3281 if (backing_fmt && backing_fmt->value.s) { 3282 backing_drv = bdrv_find_format(backing_fmt->value.s); 3283 if (!backing_drv) { 3284 error_report("Unknown backing file format '%s'", 3285 backing_fmt->value.s); 3286 ret = -EINVAL; 3287 goto out; 3288 } 3289 } 3290 3291 // The size for the image must always be specified, with one exception: 3292 // If we are using a backing file, we can obtain the size from there 3293 size = get_option_parameter(param, BLOCK_OPT_SIZE); 3294 if (size && size->value.n == -1) { 3295 if (backing_file && backing_file->value.s) { 3296 uint64_t size; 3297 char buf[32]; 3298 3299 bs = bdrv_new(""); 3300 3301 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv); 3302 if (ret < 0) { 3303 error_report("Could not open '%s'", backing_file->value.s); 3304 goto out; 3305 } 3306 bdrv_get_geometry(bs, &size); 3307 size *= 512; 3308 3309 snprintf(buf, sizeof(buf), "%" PRId64, size); 3310 set_option_parameter(param, BLOCK_OPT_SIZE, buf); 3311 } else { 3312 error_report("Image creation needs a size parameter"); 3313 ret = -EINVAL; 3314 goto out; 3315 } 3316 } 3317 3318 printf("Formatting '%s', fmt=%s ", filename, fmt); 3319 print_option_parameters(param); 3320 puts(""); 3321 3322 ret = bdrv_create(drv, filename, param); 3323 3324 if (ret < 0) { 3325 if (ret == -ENOTSUP) { 3326 error_report("Formatting or formatting option not supported for " 3327 "file format '%s'", fmt); 3328 } else if (ret == -EFBIG) { 3329 error_report("The image size is too large for file format '%s'", 3330 fmt); 3331 } else { 3332 error_report("%s: error while creating %s: %s", filename, fmt, 3333 strerror(-ret)); 3334 } 3335 } 3336 3337 out: 3338 free_option_parameters(create_options); 3339 free_option_parameters(param); 3340 3341 if (bs) { 3342 bdrv_delete(bs); 3343 } 3344 3345 return ret; 3346 } 3347