1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "config-host.h" 25 #include "qemu-common.h" 26 #include "trace.h" 27 #include "monitor.h" 28 #include "block_int.h" 29 #include "module.h" 30 #include "qemu-objects.h" 31 #include "qemu-coroutine.h" 32 33 #ifdef CONFIG_BSD 34 #include <sys/types.h> 35 #include <sys/stat.h> 36 #include <sys/ioctl.h> 37 #include <sys/queue.h> 38 #ifndef __DragonFly__ 39 #include <sys/disk.h> 40 #endif 41 #endif 42 43 #ifdef _WIN32 44 #include <windows.h> 45 #endif 46 47 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 48 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 49 BlockDriverCompletionFunc *cb, void *opaque); 50 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 51 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 52 BlockDriverCompletionFunc *cb, void *opaque); 53 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, 54 BlockDriverCompletionFunc *cb, void *opaque); 55 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs, 56 BlockDriverCompletionFunc *cb, void *opaque); 57 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, 58 uint8_t *buf, int nb_sectors); 59 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, 60 const uint8_t *buf, int nb_sectors); 61 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs, 62 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 63 BlockDriverCompletionFunc *cb, void *opaque); 64 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs, 65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 66 BlockDriverCompletionFunc *cb, void *opaque); 67 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 68 int64_t sector_num, int nb_sectors, 69 QEMUIOVector *iov); 70 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 71 int64_t sector_num, int nb_sectors, 72 QEMUIOVector *iov); 73 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs); 74 75 static QTAILQ_HEAD(, BlockDriverState) bdrv_states = 76 QTAILQ_HEAD_INITIALIZER(bdrv_states); 77 78 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 79 QLIST_HEAD_INITIALIZER(bdrv_drivers); 80 81 /* The device to use for VM snapshots */ 82 static BlockDriverState *bs_snapshots; 83 84 /* If non-zero, use only whitelisted block drivers */ 85 static int use_bdrv_whitelist; 86 87 #ifdef _WIN32 88 static int is_windows_drive_prefix(const char *filename) 89 { 90 return (((filename[0] >= 'a' && filename[0] <= 'z') || 91 (filename[0] >= 'A' && filename[0] <= 'Z')) && 92 filename[1] == ':'); 93 } 94 95 int is_windows_drive(const char *filename) 96 { 97 if (is_windows_drive_prefix(filename) && 98 filename[2] == '\0') 99 return 1; 100 if (strstart(filename, "\\\\.\\", NULL) || 101 strstart(filename, "//./", NULL)) 102 return 1; 103 return 0; 104 } 105 #endif 106 107 /* check if the path starts with "<protocol>:" */ 108 static int path_has_protocol(const char *path) 109 { 110 #ifdef _WIN32 111 if (is_windows_drive(path) || 112 is_windows_drive_prefix(path)) { 113 return 0; 114 } 115 #endif 116 117 return strchr(path, ':') != NULL; 118 } 119 120 int path_is_absolute(const char *path) 121 { 122 const char *p; 123 #ifdef _WIN32 124 /* specific case for names like: "\\.\d:" */ 125 if (*path == '/' || *path == '\\') 126 return 1; 127 #endif 128 p = strchr(path, ':'); 129 if (p) 130 p++; 131 else 132 p = path; 133 #ifdef _WIN32 134 return (*p == '/' || *p == '\\'); 135 #else 136 return (*p == '/'); 137 #endif 138 } 139 140 /* if filename is absolute, just copy it to dest. Otherwise, build a 141 path to it by considering it is relative to base_path. URL are 142 supported. */ 143 void path_combine(char *dest, int dest_size, 144 const char *base_path, 145 const char *filename) 146 { 147 const char *p, *p1; 148 int len; 149 150 if (dest_size <= 0) 151 return; 152 if (path_is_absolute(filename)) { 153 pstrcpy(dest, dest_size, filename); 154 } else { 155 p = strchr(base_path, ':'); 156 if (p) 157 p++; 158 else 159 p = base_path; 160 p1 = strrchr(base_path, '/'); 161 #ifdef _WIN32 162 { 163 const char *p2; 164 p2 = strrchr(base_path, '\\'); 165 if (!p1 || p2 > p1) 166 p1 = p2; 167 } 168 #endif 169 if (p1) 170 p1++; 171 else 172 p1 = base_path; 173 if (p1 > p) 174 p = p1; 175 len = p - base_path; 176 if (len > dest_size - 1) 177 len = dest_size - 1; 178 memcpy(dest, base_path, len); 179 dest[len] = '\0'; 180 pstrcat(dest, dest_size, filename); 181 } 182 } 183 184 void bdrv_register(BlockDriver *bdrv) 185 { 186 if (bdrv->bdrv_co_readv) { 187 /* Emulate AIO by coroutines, and sync by AIO */ 188 bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em; 189 bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em; 190 bdrv->bdrv_read = bdrv_read_em; 191 bdrv->bdrv_write = bdrv_write_em; 192 } else { 193 bdrv->bdrv_co_readv = bdrv_co_readv_em; 194 bdrv->bdrv_co_writev = bdrv_co_writev_em; 195 196 if (!bdrv->bdrv_aio_readv) { 197 /* add AIO emulation layer */ 198 bdrv->bdrv_aio_readv = bdrv_aio_readv_em; 199 bdrv->bdrv_aio_writev = bdrv_aio_writev_em; 200 } else if (!bdrv->bdrv_read) { 201 /* add synchronous IO emulation layer */ 202 bdrv->bdrv_read = bdrv_read_em; 203 bdrv->bdrv_write = bdrv_write_em; 204 } 205 } 206 207 if (!bdrv->bdrv_aio_flush) 208 bdrv->bdrv_aio_flush = bdrv_aio_flush_em; 209 210 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 211 } 212 213 /* create a new block device (by default it is empty) */ 214 BlockDriverState *bdrv_new(const char *device_name) 215 { 216 BlockDriverState *bs; 217 218 bs = g_malloc0(sizeof(BlockDriverState)); 219 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name); 220 if (device_name[0] != '\0') { 221 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list); 222 } 223 return bs; 224 } 225 226 BlockDriver *bdrv_find_format(const char *format_name) 227 { 228 BlockDriver *drv1; 229 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 230 if (!strcmp(drv1->format_name, format_name)) { 231 return drv1; 232 } 233 } 234 return NULL; 235 } 236 237 static int bdrv_is_whitelisted(BlockDriver *drv) 238 { 239 static const char *whitelist[] = { 240 CONFIG_BDRV_WHITELIST 241 }; 242 const char **p; 243 244 if (!whitelist[0]) 245 return 1; /* no whitelist, anything goes */ 246 247 for (p = whitelist; *p; p++) { 248 if (!strcmp(drv->format_name, *p)) { 249 return 1; 250 } 251 } 252 return 0; 253 } 254 255 BlockDriver *bdrv_find_whitelisted_format(const char *format_name) 256 { 257 BlockDriver *drv = bdrv_find_format(format_name); 258 return drv && bdrv_is_whitelisted(drv) ? drv : NULL; 259 } 260 261 int bdrv_create(BlockDriver *drv, const char* filename, 262 QEMUOptionParameter *options) 263 { 264 if (!drv->bdrv_create) 265 return -ENOTSUP; 266 267 return drv->bdrv_create(filename, options); 268 } 269 270 int bdrv_create_file(const char* filename, QEMUOptionParameter *options) 271 { 272 BlockDriver *drv; 273 274 drv = bdrv_find_protocol(filename); 275 if (drv == NULL) { 276 return -ENOENT; 277 } 278 279 return bdrv_create(drv, filename, options); 280 } 281 282 #ifdef _WIN32 283 void get_tmp_filename(char *filename, int size) 284 { 285 char temp_dir[MAX_PATH]; 286 287 GetTempPath(MAX_PATH, temp_dir); 288 GetTempFileName(temp_dir, "qem", 0, filename); 289 } 290 #else 291 void get_tmp_filename(char *filename, int size) 292 { 293 int fd; 294 const char *tmpdir; 295 /* XXX: race condition possible */ 296 tmpdir = getenv("TMPDIR"); 297 if (!tmpdir) 298 tmpdir = "/tmp"; 299 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir); 300 fd = mkstemp(filename); 301 close(fd); 302 } 303 #endif 304 305 /* 306 * Detect host devices. By convention, /dev/cdrom[N] is always 307 * recognized as a host CDROM. 308 */ 309 static BlockDriver *find_hdev_driver(const char *filename) 310 { 311 int score_max = 0, score; 312 BlockDriver *drv = NULL, *d; 313 314 QLIST_FOREACH(d, &bdrv_drivers, list) { 315 if (d->bdrv_probe_device) { 316 score = d->bdrv_probe_device(filename); 317 if (score > score_max) { 318 score_max = score; 319 drv = d; 320 } 321 } 322 } 323 324 return drv; 325 } 326 327 BlockDriver *bdrv_find_protocol(const char *filename) 328 { 329 BlockDriver *drv1; 330 char protocol[128]; 331 int len; 332 const char *p; 333 334 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 335 336 /* 337 * XXX(hch): we really should not let host device detection 338 * override an explicit protocol specification, but moving this 339 * later breaks access to device names with colons in them. 340 * Thanks to the brain-dead persistent naming schemes on udev- 341 * based Linux systems those actually are quite common. 342 */ 343 drv1 = find_hdev_driver(filename); 344 if (drv1) { 345 return drv1; 346 } 347 348 if (!path_has_protocol(filename)) { 349 return bdrv_find_format("file"); 350 } 351 p = strchr(filename, ':'); 352 assert(p != NULL); 353 len = p - filename; 354 if (len > sizeof(protocol) - 1) 355 len = sizeof(protocol) - 1; 356 memcpy(protocol, filename, len); 357 protocol[len] = '\0'; 358 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 359 if (drv1->protocol_name && 360 !strcmp(drv1->protocol_name, protocol)) { 361 return drv1; 362 } 363 } 364 return NULL; 365 } 366 367 static int find_image_format(const char *filename, BlockDriver **pdrv) 368 { 369 int ret, score, score_max; 370 BlockDriver *drv1, *drv; 371 uint8_t buf[2048]; 372 BlockDriverState *bs; 373 374 ret = bdrv_file_open(&bs, filename, 0); 375 if (ret < 0) { 376 *pdrv = NULL; 377 return ret; 378 } 379 380 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 381 if (bs->sg || !bdrv_is_inserted(bs)) { 382 bdrv_delete(bs); 383 drv = bdrv_find_format("raw"); 384 if (!drv) { 385 ret = -ENOENT; 386 } 387 *pdrv = drv; 388 return ret; 389 } 390 391 ret = bdrv_pread(bs, 0, buf, sizeof(buf)); 392 bdrv_delete(bs); 393 if (ret < 0) { 394 *pdrv = NULL; 395 return ret; 396 } 397 398 score_max = 0; 399 drv = NULL; 400 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 401 if (drv1->bdrv_probe) { 402 score = drv1->bdrv_probe(buf, ret, filename); 403 if (score > score_max) { 404 score_max = score; 405 drv = drv1; 406 } 407 } 408 } 409 if (!drv) { 410 ret = -ENOENT; 411 } 412 *pdrv = drv; 413 return ret; 414 } 415 416 /** 417 * Set the current 'total_sectors' value 418 */ 419 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 420 { 421 BlockDriver *drv = bs->drv; 422 423 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 424 if (bs->sg) 425 return 0; 426 427 /* query actual device if possible, otherwise just trust the hint */ 428 if (drv->bdrv_getlength) { 429 int64_t length = drv->bdrv_getlength(bs); 430 if (length < 0) { 431 return length; 432 } 433 hint = length >> BDRV_SECTOR_BITS; 434 } 435 436 bs->total_sectors = hint; 437 return 0; 438 } 439 440 /** 441 * Set open flags for a given cache mode 442 * 443 * Return 0 on success, -1 if the cache mode was invalid. 444 */ 445 int bdrv_parse_cache_flags(const char *mode, int *flags) 446 { 447 *flags &= ~BDRV_O_CACHE_MASK; 448 449 if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 450 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; 451 } else if (!strcmp(mode, "directsync")) { 452 *flags |= BDRV_O_NOCACHE; 453 } else if (!strcmp(mode, "writeback")) { 454 *flags |= BDRV_O_CACHE_WB; 455 } else if (!strcmp(mode, "unsafe")) { 456 *flags |= BDRV_O_CACHE_WB; 457 *flags |= BDRV_O_NO_FLUSH; 458 } else if (!strcmp(mode, "writethrough")) { 459 /* this is the default */ 460 } else { 461 return -1; 462 } 463 464 return 0; 465 } 466 467 /* 468 * Common part for opening disk images and files 469 */ 470 static int bdrv_open_common(BlockDriverState *bs, const char *filename, 471 int flags, BlockDriver *drv) 472 { 473 int ret, open_flags; 474 475 assert(drv != NULL); 476 477 bs->file = NULL; 478 bs->total_sectors = 0; 479 bs->encrypted = 0; 480 bs->valid_key = 0; 481 bs->open_flags = flags; 482 /* buffer_alignment defaulted to 512, drivers can change this value */ 483 bs->buffer_alignment = 512; 484 485 pstrcpy(bs->filename, sizeof(bs->filename), filename); 486 487 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) { 488 return -ENOTSUP; 489 } 490 491 bs->drv = drv; 492 bs->opaque = g_malloc0(drv->instance_size); 493 494 if (flags & BDRV_O_CACHE_WB) 495 bs->enable_write_cache = 1; 496 497 /* 498 * Clear flags that are internal to the block layer before opening the 499 * image. 500 */ 501 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 502 503 /* 504 * Snapshots should be writable. 505 */ 506 if (bs->is_temporary) { 507 open_flags |= BDRV_O_RDWR; 508 } 509 510 /* Open the image, either directly or using a protocol */ 511 if (drv->bdrv_file_open) { 512 ret = drv->bdrv_file_open(bs, filename, open_flags); 513 } else { 514 ret = bdrv_file_open(&bs->file, filename, open_flags); 515 if (ret >= 0) { 516 ret = drv->bdrv_open(bs, open_flags); 517 } 518 } 519 520 if (ret < 0) { 521 goto free_and_fail; 522 } 523 524 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR); 525 526 ret = refresh_total_sectors(bs, bs->total_sectors); 527 if (ret < 0) { 528 goto free_and_fail; 529 } 530 531 #ifndef _WIN32 532 if (bs->is_temporary) { 533 unlink(filename); 534 } 535 #endif 536 return 0; 537 538 free_and_fail: 539 if (bs->file) { 540 bdrv_delete(bs->file); 541 bs->file = NULL; 542 } 543 g_free(bs->opaque); 544 bs->opaque = NULL; 545 bs->drv = NULL; 546 return ret; 547 } 548 549 /* 550 * Opens a file using a protocol (file, host_device, nbd, ...) 551 */ 552 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags) 553 { 554 BlockDriverState *bs; 555 BlockDriver *drv; 556 int ret; 557 558 drv = bdrv_find_protocol(filename); 559 if (!drv) { 560 return -ENOENT; 561 } 562 563 bs = bdrv_new(""); 564 ret = bdrv_open_common(bs, filename, flags, drv); 565 if (ret < 0) { 566 bdrv_delete(bs); 567 return ret; 568 } 569 bs->growable = 1; 570 *pbs = bs; 571 return 0; 572 } 573 574 /* 575 * Opens a disk image (raw, qcow2, vmdk, ...) 576 */ 577 int bdrv_open(BlockDriverState *bs, const char *filename, int flags, 578 BlockDriver *drv) 579 { 580 int ret; 581 582 if (flags & BDRV_O_SNAPSHOT) { 583 BlockDriverState *bs1; 584 int64_t total_size; 585 int is_protocol = 0; 586 BlockDriver *bdrv_qcow2; 587 QEMUOptionParameter *options; 588 char tmp_filename[PATH_MAX]; 589 char backing_filename[PATH_MAX]; 590 591 /* if snapshot, we create a temporary backing file and open it 592 instead of opening 'filename' directly */ 593 594 /* if there is a backing file, use it */ 595 bs1 = bdrv_new(""); 596 ret = bdrv_open(bs1, filename, 0, drv); 597 if (ret < 0) { 598 bdrv_delete(bs1); 599 return ret; 600 } 601 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK; 602 603 if (bs1->drv && bs1->drv->protocol_name) 604 is_protocol = 1; 605 606 bdrv_delete(bs1); 607 608 get_tmp_filename(tmp_filename, sizeof(tmp_filename)); 609 610 /* Real path is meaningless for protocols */ 611 if (is_protocol) 612 snprintf(backing_filename, sizeof(backing_filename), 613 "%s", filename); 614 else if (!realpath(filename, backing_filename)) 615 return -errno; 616 617 bdrv_qcow2 = bdrv_find_format("qcow2"); 618 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL); 619 620 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size); 621 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename); 622 if (drv) { 623 set_option_parameter(options, BLOCK_OPT_BACKING_FMT, 624 drv->format_name); 625 } 626 627 ret = bdrv_create(bdrv_qcow2, tmp_filename, options); 628 free_option_parameters(options); 629 if (ret < 0) { 630 return ret; 631 } 632 633 filename = tmp_filename; 634 drv = bdrv_qcow2; 635 bs->is_temporary = 1; 636 } 637 638 /* Find the right image format driver */ 639 if (!drv) { 640 ret = find_image_format(filename, &drv); 641 } 642 643 if (!drv) { 644 goto unlink_and_fail; 645 } 646 647 /* Open the image */ 648 ret = bdrv_open_common(bs, filename, flags, drv); 649 if (ret < 0) { 650 goto unlink_and_fail; 651 } 652 653 /* If there is a backing file, use it */ 654 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') { 655 char backing_filename[PATH_MAX]; 656 int back_flags; 657 BlockDriver *back_drv = NULL; 658 659 bs->backing_hd = bdrv_new(""); 660 661 if (path_has_protocol(bs->backing_file)) { 662 pstrcpy(backing_filename, sizeof(backing_filename), 663 bs->backing_file); 664 } else { 665 path_combine(backing_filename, sizeof(backing_filename), 666 filename, bs->backing_file); 667 } 668 669 if (bs->backing_format[0] != '\0') { 670 back_drv = bdrv_find_format(bs->backing_format); 671 } 672 673 /* backing files always opened read-only */ 674 back_flags = 675 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 676 677 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv); 678 if (ret < 0) { 679 bdrv_close(bs); 680 return ret; 681 } 682 if (bs->is_temporary) { 683 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR); 684 } else { 685 /* base image inherits from "parent" */ 686 bs->backing_hd->keep_read_only = bs->keep_read_only; 687 } 688 } 689 690 if (!bdrv_key_required(bs)) { 691 /* call the change callback */ 692 bs->media_changed = 1; 693 if (bs->change_cb) 694 bs->change_cb(bs->change_opaque, CHANGE_MEDIA); 695 } 696 697 return 0; 698 699 unlink_and_fail: 700 if (bs->is_temporary) { 701 unlink(filename); 702 } 703 return ret; 704 } 705 706 void bdrv_close(BlockDriverState *bs) 707 { 708 if (bs->drv) { 709 if (bs == bs_snapshots) { 710 bs_snapshots = NULL; 711 } 712 if (bs->backing_hd) { 713 bdrv_delete(bs->backing_hd); 714 bs->backing_hd = NULL; 715 } 716 bs->drv->bdrv_close(bs); 717 g_free(bs->opaque); 718 #ifdef _WIN32 719 if (bs->is_temporary) { 720 unlink(bs->filename); 721 } 722 #endif 723 bs->opaque = NULL; 724 bs->drv = NULL; 725 726 if (bs->file != NULL) { 727 bdrv_close(bs->file); 728 } 729 730 /* call the change callback */ 731 bs->media_changed = 1; 732 if (bs->change_cb) 733 bs->change_cb(bs->change_opaque, CHANGE_MEDIA); 734 } 735 } 736 737 void bdrv_close_all(void) 738 { 739 BlockDriverState *bs; 740 741 QTAILQ_FOREACH(bs, &bdrv_states, list) { 742 bdrv_close(bs); 743 } 744 } 745 746 /* make a BlockDriverState anonymous by removing from bdrv_state list. 747 Also, NULL terminate the device_name to prevent double remove */ 748 void bdrv_make_anon(BlockDriverState *bs) 749 { 750 if (bs->device_name[0] != '\0') { 751 QTAILQ_REMOVE(&bdrv_states, bs, list); 752 } 753 bs->device_name[0] = '\0'; 754 } 755 756 void bdrv_delete(BlockDriverState *bs) 757 { 758 assert(!bs->peer); 759 760 /* remove from list, if necessary */ 761 bdrv_make_anon(bs); 762 763 bdrv_close(bs); 764 if (bs->file != NULL) { 765 bdrv_delete(bs->file); 766 } 767 768 assert(bs != bs_snapshots); 769 g_free(bs); 770 } 771 772 int bdrv_attach(BlockDriverState *bs, DeviceState *qdev) 773 { 774 if (bs->peer) { 775 return -EBUSY; 776 } 777 bs->peer = qdev; 778 return 0; 779 } 780 781 void bdrv_detach(BlockDriverState *bs, DeviceState *qdev) 782 { 783 assert(bs->peer == qdev); 784 bs->peer = NULL; 785 bs->change_cb = NULL; 786 bs->change_opaque = NULL; 787 } 788 789 DeviceState *bdrv_get_attached(BlockDriverState *bs) 790 { 791 return bs->peer; 792 } 793 794 /* 795 * Run consistency checks on an image 796 * 797 * Returns 0 if the check could be completed (it doesn't mean that the image is 798 * free of errors) or -errno when an internal error occurred. The results of the 799 * check are stored in res. 800 */ 801 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res) 802 { 803 if (bs->drv->bdrv_check == NULL) { 804 return -ENOTSUP; 805 } 806 807 memset(res, 0, sizeof(*res)); 808 return bs->drv->bdrv_check(bs, res); 809 } 810 811 #define COMMIT_BUF_SECTORS 2048 812 813 /* commit COW file into the raw image */ 814 int bdrv_commit(BlockDriverState *bs) 815 { 816 BlockDriver *drv = bs->drv; 817 BlockDriver *backing_drv; 818 int64_t sector, total_sectors; 819 int n, ro, open_flags; 820 int ret = 0, rw_ret = 0; 821 uint8_t *buf; 822 char filename[1024]; 823 BlockDriverState *bs_rw, *bs_ro; 824 825 if (!drv) 826 return -ENOMEDIUM; 827 828 if (!bs->backing_hd) { 829 return -ENOTSUP; 830 } 831 832 if (bs->backing_hd->keep_read_only) { 833 return -EACCES; 834 } 835 836 backing_drv = bs->backing_hd->drv; 837 ro = bs->backing_hd->read_only; 838 strncpy(filename, bs->backing_hd->filename, sizeof(filename)); 839 open_flags = bs->backing_hd->open_flags; 840 841 if (ro) { 842 /* re-open as RW */ 843 bdrv_delete(bs->backing_hd); 844 bs->backing_hd = NULL; 845 bs_rw = bdrv_new(""); 846 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR, 847 backing_drv); 848 if (rw_ret < 0) { 849 bdrv_delete(bs_rw); 850 /* try to re-open read-only */ 851 bs_ro = bdrv_new(""); 852 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, 853 backing_drv); 854 if (ret < 0) { 855 bdrv_delete(bs_ro); 856 /* drive not functional anymore */ 857 bs->drv = NULL; 858 return ret; 859 } 860 bs->backing_hd = bs_ro; 861 return rw_ret; 862 } 863 bs->backing_hd = bs_rw; 864 } 865 866 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; 867 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 868 869 for (sector = 0; sector < total_sectors; sector += n) { 870 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) { 871 872 if (bdrv_read(bs, sector, buf, n) != 0) { 873 ret = -EIO; 874 goto ro_cleanup; 875 } 876 877 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) { 878 ret = -EIO; 879 goto ro_cleanup; 880 } 881 } 882 } 883 884 if (drv->bdrv_make_empty) { 885 ret = drv->bdrv_make_empty(bs); 886 bdrv_flush(bs); 887 } 888 889 /* 890 * Make sure all data we wrote to the backing device is actually 891 * stable on disk. 892 */ 893 if (bs->backing_hd) 894 bdrv_flush(bs->backing_hd); 895 896 ro_cleanup: 897 g_free(buf); 898 899 if (ro) { 900 /* re-open as RO */ 901 bdrv_delete(bs->backing_hd); 902 bs->backing_hd = NULL; 903 bs_ro = bdrv_new(""); 904 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, 905 backing_drv); 906 if (ret < 0) { 907 bdrv_delete(bs_ro); 908 /* drive not functional anymore */ 909 bs->drv = NULL; 910 return ret; 911 } 912 bs->backing_hd = bs_ro; 913 bs->backing_hd->keep_read_only = 0; 914 } 915 916 return ret; 917 } 918 919 void bdrv_commit_all(void) 920 { 921 BlockDriverState *bs; 922 923 QTAILQ_FOREACH(bs, &bdrv_states, list) { 924 bdrv_commit(bs); 925 } 926 } 927 928 /* 929 * Return values: 930 * 0 - success 931 * -EINVAL - backing format specified, but no file 932 * -ENOSPC - can't update the backing file because no space is left in the 933 * image file header 934 * -ENOTSUP - format driver doesn't support changing the backing file 935 */ 936 int bdrv_change_backing_file(BlockDriverState *bs, 937 const char *backing_file, const char *backing_fmt) 938 { 939 BlockDriver *drv = bs->drv; 940 941 if (drv->bdrv_change_backing_file != NULL) { 942 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 943 } else { 944 return -ENOTSUP; 945 } 946 } 947 948 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, 949 size_t size) 950 { 951 int64_t len; 952 953 if (!bdrv_is_inserted(bs)) 954 return -ENOMEDIUM; 955 956 if (bs->growable) 957 return 0; 958 959 len = bdrv_getlength(bs); 960 961 if (offset < 0) 962 return -EIO; 963 964 if ((offset > len) || (len - offset < size)) 965 return -EIO; 966 967 return 0; 968 } 969 970 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, 971 int nb_sectors) 972 { 973 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE, 974 nb_sectors * BDRV_SECTOR_SIZE); 975 } 976 977 static inline bool bdrv_has_async_rw(BlockDriver *drv) 978 { 979 return drv->bdrv_co_readv != bdrv_co_readv_em 980 || drv->bdrv_aio_readv != bdrv_aio_readv_em; 981 } 982 983 static inline bool bdrv_has_async_flush(BlockDriver *drv) 984 { 985 return drv->bdrv_aio_flush != bdrv_aio_flush_em; 986 } 987 988 /* return < 0 if error. See bdrv_write() for the return codes */ 989 int bdrv_read(BlockDriverState *bs, int64_t sector_num, 990 uint8_t *buf, int nb_sectors) 991 { 992 BlockDriver *drv = bs->drv; 993 994 if (!drv) 995 return -ENOMEDIUM; 996 997 if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) { 998 QEMUIOVector qiov; 999 struct iovec iov = { 1000 .iov_base = (void *)buf, 1001 .iov_len = nb_sectors * BDRV_SECTOR_SIZE, 1002 }; 1003 1004 qemu_iovec_init_external(&qiov, &iov, 1); 1005 return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov); 1006 } 1007 1008 if (bdrv_check_request(bs, sector_num, nb_sectors)) 1009 return -EIO; 1010 1011 return drv->bdrv_read(bs, sector_num, buf, nb_sectors); 1012 } 1013 1014 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num, 1015 int nb_sectors, int dirty) 1016 { 1017 int64_t start, end; 1018 unsigned long val, idx, bit; 1019 1020 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK; 1021 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK; 1022 1023 for (; start <= end; start++) { 1024 idx = start / (sizeof(unsigned long) * 8); 1025 bit = start % (sizeof(unsigned long) * 8); 1026 val = bs->dirty_bitmap[idx]; 1027 if (dirty) { 1028 if (!(val & (1UL << bit))) { 1029 bs->dirty_count++; 1030 val |= 1UL << bit; 1031 } 1032 } else { 1033 if (val & (1UL << bit)) { 1034 bs->dirty_count--; 1035 val &= ~(1UL << bit); 1036 } 1037 } 1038 bs->dirty_bitmap[idx] = val; 1039 } 1040 } 1041 1042 /* Return < 0 if error. Important errors are: 1043 -EIO generic I/O error (may happen for all errors) 1044 -ENOMEDIUM No media inserted. 1045 -EINVAL Invalid sector number or nb_sectors 1046 -EACCES Trying to write a read-only device 1047 */ 1048 int bdrv_write(BlockDriverState *bs, int64_t sector_num, 1049 const uint8_t *buf, int nb_sectors) 1050 { 1051 BlockDriver *drv = bs->drv; 1052 1053 if (!bs->drv) 1054 return -ENOMEDIUM; 1055 1056 if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) { 1057 QEMUIOVector qiov; 1058 struct iovec iov = { 1059 .iov_base = (void *)buf, 1060 .iov_len = nb_sectors * BDRV_SECTOR_SIZE, 1061 }; 1062 1063 qemu_iovec_init_external(&qiov, &iov, 1); 1064 return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov); 1065 } 1066 1067 if (bs->read_only) 1068 return -EACCES; 1069 if (bdrv_check_request(bs, sector_num, nb_sectors)) 1070 return -EIO; 1071 1072 if (bs->dirty_bitmap) { 1073 set_dirty_bitmap(bs, sector_num, nb_sectors, 1); 1074 } 1075 1076 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { 1077 bs->wr_highest_sector = sector_num + nb_sectors - 1; 1078 } 1079 1080 return drv->bdrv_write(bs, sector_num, buf, nb_sectors); 1081 } 1082 1083 int bdrv_pread(BlockDriverState *bs, int64_t offset, 1084 void *buf, int count1) 1085 { 1086 uint8_t tmp_buf[BDRV_SECTOR_SIZE]; 1087 int len, nb_sectors, count; 1088 int64_t sector_num; 1089 int ret; 1090 1091 count = count1; 1092 /* first read to align to sector start */ 1093 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); 1094 if (len > count) 1095 len = count; 1096 sector_num = offset >> BDRV_SECTOR_BITS; 1097 if (len > 0) { 1098 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1099 return ret; 1100 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len); 1101 count -= len; 1102 if (count == 0) 1103 return count1; 1104 sector_num++; 1105 buf += len; 1106 } 1107 1108 /* read the sectors "in place" */ 1109 nb_sectors = count >> BDRV_SECTOR_BITS; 1110 if (nb_sectors > 0) { 1111 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0) 1112 return ret; 1113 sector_num += nb_sectors; 1114 len = nb_sectors << BDRV_SECTOR_BITS; 1115 buf += len; 1116 count -= len; 1117 } 1118 1119 /* add data from the last sector */ 1120 if (count > 0) { 1121 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1122 return ret; 1123 memcpy(buf, tmp_buf, count); 1124 } 1125 return count1; 1126 } 1127 1128 int bdrv_pwrite(BlockDriverState *bs, int64_t offset, 1129 const void *buf, int count1) 1130 { 1131 uint8_t tmp_buf[BDRV_SECTOR_SIZE]; 1132 int len, nb_sectors, count; 1133 int64_t sector_num; 1134 int ret; 1135 1136 count = count1; 1137 /* first write to align to sector start */ 1138 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); 1139 if (len > count) 1140 len = count; 1141 sector_num = offset >> BDRV_SECTOR_BITS; 1142 if (len > 0) { 1143 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1144 return ret; 1145 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len); 1146 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0) 1147 return ret; 1148 count -= len; 1149 if (count == 0) 1150 return count1; 1151 sector_num++; 1152 buf += len; 1153 } 1154 1155 /* write the sectors "in place" */ 1156 nb_sectors = count >> BDRV_SECTOR_BITS; 1157 if (nb_sectors > 0) { 1158 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0) 1159 return ret; 1160 sector_num += nb_sectors; 1161 len = nb_sectors << BDRV_SECTOR_BITS; 1162 buf += len; 1163 count -= len; 1164 } 1165 1166 /* add data from the last sector */ 1167 if (count > 0) { 1168 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1169 return ret; 1170 memcpy(tmp_buf, buf, count); 1171 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0) 1172 return ret; 1173 } 1174 return count1; 1175 } 1176 1177 /* 1178 * Writes to the file and ensures that no writes are reordered across this 1179 * request (acts as a barrier) 1180 * 1181 * Returns 0 on success, -errno in error cases. 1182 */ 1183 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, 1184 const void *buf, int count) 1185 { 1186 int ret; 1187 1188 ret = bdrv_pwrite(bs, offset, buf, count); 1189 if (ret < 0) { 1190 return ret; 1191 } 1192 1193 /* No flush needed for cache modes that use O_DSYNC */ 1194 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) { 1195 bdrv_flush(bs); 1196 } 1197 1198 return 0; 1199 } 1200 1201 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, 1202 int nb_sectors, QEMUIOVector *qiov) 1203 { 1204 BlockDriver *drv = bs->drv; 1205 1206 trace_bdrv_co_readv(bs, sector_num, nb_sectors); 1207 1208 if (!drv) { 1209 return -ENOMEDIUM; 1210 } 1211 if (bdrv_check_request(bs, sector_num, nb_sectors)) { 1212 return -EIO; 1213 } 1214 1215 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); 1216 } 1217 1218 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, 1219 int nb_sectors, QEMUIOVector *qiov) 1220 { 1221 BlockDriver *drv = bs->drv; 1222 1223 trace_bdrv_co_writev(bs, sector_num, nb_sectors); 1224 1225 if (!bs->drv) { 1226 return -ENOMEDIUM; 1227 } 1228 if (bs->read_only) { 1229 return -EACCES; 1230 } 1231 if (bdrv_check_request(bs, sector_num, nb_sectors)) { 1232 return -EIO; 1233 } 1234 1235 if (bs->dirty_bitmap) { 1236 set_dirty_bitmap(bs, sector_num, nb_sectors, 1); 1237 } 1238 1239 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { 1240 bs->wr_highest_sector = sector_num + nb_sectors - 1; 1241 } 1242 1243 return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); 1244 } 1245 1246 /** 1247 * Truncate file to 'offset' bytes (needed only for file protocols) 1248 */ 1249 int bdrv_truncate(BlockDriverState *bs, int64_t offset) 1250 { 1251 BlockDriver *drv = bs->drv; 1252 int ret; 1253 if (!drv) 1254 return -ENOMEDIUM; 1255 if (!drv->bdrv_truncate) 1256 return -ENOTSUP; 1257 if (bs->read_only) 1258 return -EACCES; 1259 if (bdrv_in_use(bs)) 1260 return -EBUSY; 1261 ret = drv->bdrv_truncate(bs, offset); 1262 if (ret == 0) { 1263 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 1264 if (bs->change_cb) { 1265 bs->change_cb(bs->change_opaque, CHANGE_SIZE); 1266 } 1267 } 1268 return ret; 1269 } 1270 1271 /** 1272 * Length of a allocated file in bytes. Sparse files are counted by actual 1273 * allocated space. Return < 0 if error or unknown. 1274 */ 1275 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 1276 { 1277 BlockDriver *drv = bs->drv; 1278 if (!drv) { 1279 return -ENOMEDIUM; 1280 } 1281 if (drv->bdrv_get_allocated_file_size) { 1282 return drv->bdrv_get_allocated_file_size(bs); 1283 } 1284 if (bs->file) { 1285 return bdrv_get_allocated_file_size(bs->file); 1286 } 1287 return -ENOTSUP; 1288 } 1289 1290 /** 1291 * Length of a file in bytes. Return < 0 if error or unknown. 1292 */ 1293 int64_t bdrv_getlength(BlockDriverState *bs) 1294 { 1295 BlockDriver *drv = bs->drv; 1296 if (!drv) 1297 return -ENOMEDIUM; 1298 1299 if (bs->growable || bs->removable) { 1300 if (drv->bdrv_getlength) { 1301 return drv->bdrv_getlength(bs); 1302 } 1303 } 1304 return bs->total_sectors * BDRV_SECTOR_SIZE; 1305 } 1306 1307 /* return 0 as number of sectors if no device present or error */ 1308 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 1309 { 1310 int64_t length; 1311 length = bdrv_getlength(bs); 1312 if (length < 0) 1313 length = 0; 1314 else 1315 length = length >> BDRV_SECTOR_BITS; 1316 *nb_sectors_ptr = length; 1317 } 1318 1319 struct partition { 1320 uint8_t boot_ind; /* 0x80 - active */ 1321 uint8_t head; /* starting head */ 1322 uint8_t sector; /* starting sector */ 1323 uint8_t cyl; /* starting cylinder */ 1324 uint8_t sys_ind; /* What partition type */ 1325 uint8_t end_head; /* end head */ 1326 uint8_t end_sector; /* end sector */ 1327 uint8_t end_cyl; /* end cylinder */ 1328 uint32_t start_sect; /* starting sector counting from 0 */ 1329 uint32_t nr_sects; /* nr of sectors in partition */ 1330 } __attribute__((packed)); 1331 1332 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */ 1333 static int guess_disk_lchs(BlockDriverState *bs, 1334 int *pcylinders, int *pheads, int *psectors) 1335 { 1336 uint8_t buf[BDRV_SECTOR_SIZE]; 1337 int ret, i, heads, sectors, cylinders; 1338 struct partition *p; 1339 uint32_t nr_sects; 1340 uint64_t nb_sectors; 1341 1342 bdrv_get_geometry(bs, &nb_sectors); 1343 1344 ret = bdrv_read(bs, 0, buf, 1); 1345 if (ret < 0) 1346 return -1; 1347 /* test msdos magic */ 1348 if (buf[510] != 0x55 || buf[511] != 0xaa) 1349 return -1; 1350 for(i = 0; i < 4; i++) { 1351 p = ((struct partition *)(buf + 0x1be)) + i; 1352 nr_sects = le32_to_cpu(p->nr_sects); 1353 if (nr_sects && p->end_head) { 1354 /* We make the assumption that the partition terminates on 1355 a cylinder boundary */ 1356 heads = p->end_head + 1; 1357 sectors = p->end_sector & 63; 1358 if (sectors == 0) 1359 continue; 1360 cylinders = nb_sectors / (heads * sectors); 1361 if (cylinders < 1 || cylinders > 16383) 1362 continue; 1363 *pheads = heads; 1364 *psectors = sectors; 1365 *pcylinders = cylinders; 1366 #if 0 1367 printf("guessed geometry: LCHS=%d %d %d\n", 1368 cylinders, heads, sectors); 1369 #endif 1370 return 0; 1371 } 1372 } 1373 return -1; 1374 } 1375 1376 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs) 1377 { 1378 int translation, lba_detected = 0; 1379 int cylinders, heads, secs; 1380 uint64_t nb_sectors; 1381 1382 /* if a geometry hint is available, use it */ 1383 bdrv_get_geometry(bs, &nb_sectors); 1384 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs); 1385 translation = bdrv_get_translation_hint(bs); 1386 if (cylinders != 0) { 1387 *pcyls = cylinders; 1388 *pheads = heads; 1389 *psecs = secs; 1390 } else { 1391 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) { 1392 if (heads > 16) { 1393 /* if heads > 16, it means that a BIOS LBA 1394 translation was active, so the default 1395 hardware geometry is OK */ 1396 lba_detected = 1; 1397 goto default_geometry; 1398 } else { 1399 *pcyls = cylinders; 1400 *pheads = heads; 1401 *psecs = secs; 1402 /* disable any translation to be in sync with 1403 the logical geometry */ 1404 if (translation == BIOS_ATA_TRANSLATION_AUTO) { 1405 bdrv_set_translation_hint(bs, 1406 BIOS_ATA_TRANSLATION_NONE); 1407 } 1408 } 1409 } else { 1410 default_geometry: 1411 /* if no geometry, use a standard physical disk geometry */ 1412 cylinders = nb_sectors / (16 * 63); 1413 1414 if (cylinders > 16383) 1415 cylinders = 16383; 1416 else if (cylinders < 2) 1417 cylinders = 2; 1418 *pcyls = cylinders; 1419 *pheads = 16; 1420 *psecs = 63; 1421 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) { 1422 if ((*pcyls * *pheads) <= 131072) { 1423 bdrv_set_translation_hint(bs, 1424 BIOS_ATA_TRANSLATION_LARGE); 1425 } else { 1426 bdrv_set_translation_hint(bs, 1427 BIOS_ATA_TRANSLATION_LBA); 1428 } 1429 } 1430 } 1431 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs); 1432 } 1433 } 1434 1435 void bdrv_set_geometry_hint(BlockDriverState *bs, 1436 int cyls, int heads, int secs) 1437 { 1438 bs->cyls = cyls; 1439 bs->heads = heads; 1440 bs->secs = secs; 1441 } 1442 1443 void bdrv_set_translation_hint(BlockDriverState *bs, int translation) 1444 { 1445 bs->translation = translation; 1446 } 1447 1448 void bdrv_get_geometry_hint(BlockDriverState *bs, 1449 int *pcyls, int *pheads, int *psecs) 1450 { 1451 *pcyls = bs->cyls; 1452 *pheads = bs->heads; 1453 *psecs = bs->secs; 1454 } 1455 1456 /* Recognize floppy formats */ 1457 typedef struct FDFormat { 1458 FDriveType drive; 1459 uint8_t last_sect; 1460 uint8_t max_track; 1461 uint8_t max_head; 1462 } FDFormat; 1463 1464 static const FDFormat fd_formats[] = { 1465 /* First entry is default format */ 1466 /* 1.44 MB 3"1/2 floppy disks */ 1467 { FDRIVE_DRV_144, 18, 80, 1, }, 1468 { FDRIVE_DRV_144, 20, 80, 1, }, 1469 { FDRIVE_DRV_144, 21, 80, 1, }, 1470 { FDRIVE_DRV_144, 21, 82, 1, }, 1471 { FDRIVE_DRV_144, 21, 83, 1, }, 1472 { FDRIVE_DRV_144, 22, 80, 1, }, 1473 { FDRIVE_DRV_144, 23, 80, 1, }, 1474 { FDRIVE_DRV_144, 24, 80, 1, }, 1475 /* 2.88 MB 3"1/2 floppy disks */ 1476 { FDRIVE_DRV_288, 36, 80, 1, }, 1477 { FDRIVE_DRV_288, 39, 80, 1, }, 1478 { FDRIVE_DRV_288, 40, 80, 1, }, 1479 { FDRIVE_DRV_288, 44, 80, 1, }, 1480 { FDRIVE_DRV_288, 48, 80, 1, }, 1481 /* 720 kB 3"1/2 floppy disks */ 1482 { FDRIVE_DRV_144, 9, 80, 1, }, 1483 { FDRIVE_DRV_144, 10, 80, 1, }, 1484 { FDRIVE_DRV_144, 10, 82, 1, }, 1485 { FDRIVE_DRV_144, 10, 83, 1, }, 1486 { FDRIVE_DRV_144, 13, 80, 1, }, 1487 { FDRIVE_DRV_144, 14, 80, 1, }, 1488 /* 1.2 MB 5"1/4 floppy disks */ 1489 { FDRIVE_DRV_120, 15, 80, 1, }, 1490 { FDRIVE_DRV_120, 18, 80, 1, }, 1491 { FDRIVE_DRV_120, 18, 82, 1, }, 1492 { FDRIVE_DRV_120, 18, 83, 1, }, 1493 { FDRIVE_DRV_120, 20, 80, 1, }, 1494 /* 720 kB 5"1/4 floppy disks */ 1495 { FDRIVE_DRV_120, 9, 80, 1, }, 1496 { FDRIVE_DRV_120, 11, 80, 1, }, 1497 /* 360 kB 5"1/4 floppy disks */ 1498 { FDRIVE_DRV_120, 9, 40, 1, }, 1499 { FDRIVE_DRV_120, 9, 40, 0, }, 1500 { FDRIVE_DRV_120, 10, 41, 1, }, 1501 { FDRIVE_DRV_120, 10, 42, 1, }, 1502 /* 320 kB 5"1/4 floppy disks */ 1503 { FDRIVE_DRV_120, 8, 40, 1, }, 1504 { FDRIVE_DRV_120, 8, 40, 0, }, 1505 /* 360 kB must match 5"1/4 better than 3"1/2... */ 1506 { FDRIVE_DRV_144, 9, 80, 0, }, 1507 /* end */ 1508 { FDRIVE_DRV_NONE, -1, -1, 0, }, 1509 }; 1510 1511 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads, 1512 int *max_track, int *last_sect, 1513 FDriveType drive_in, FDriveType *drive) 1514 { 1515 const FDFormat *parse; 1516 uint64_t nb_sectors, size; 1517 int i, first_match, match; 1518 1519 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect); 1520 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) { 1521 /* User defined disk */ 1522 } else { 1523 bdrv_get_geometry(bs, &nb_sectors); 1524 match = -1; 1525 first_match = -1; 1526 for (i = 0; ; i++) { 1527 parse = &fd_formats[i]; 1528 if (parse->drive == FDRIVE_DRV_NONE) { 1529 break; 1530 } 1531 if (drive_in == parse->drive || 1532 drive_in == FDRIVE_DRV_NONE) { 1533 size = (parse->max_head + 1) * parse->max_track * 1534 parse->last_sect; 1535 if (nb_sectors == size) { 1536 match = i; 1537 break; 1538 } 1539 if (first_match == -1) { 1540 first_match = i; 1541 } 1542 } 1543 } 1544 if (match == -1) { 1545 if (first_match == -1) { 1546 match = 1; 1547 } else { 1548 match = first_match; 1549 } 1550 parse = &fd_formats[match]; 1551 } 1552 *nb_heads = parse->max_head + 1; 1553 *max_track = parse->max_track; 1554 *last_sect = parse->last_sect; 1555 *drive = parse->drive; 1556 } 1557 } 1558 1559 int bdrv_get_translation_hint(BlockDriverState *bs) 1560 { 1561 return bs->translation; 1562 } 1563 1564 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error, 1565 BlockErrorAction on_write_error) 1566 { 1567 bs->on_read_error = on_read_error; 1568 bs->on_write_error = on_write_error; 1569 } 1570 1571 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read) 1572 { 1573 return is_read ? bs->on_read_error : bs->on_write_error; 1574 } 1575 1576 void bdrv_set_removable(BlockDriverState *bs, int removable) 1577 { 1578 bs->removable = removable; 1579 if (removable && bs == bs_snapshots) { 1580 bs_snapshots = NULL; 1581 } 1582 } 1583 1584 int bdrv_is_removable(BlockDriverState *bs) 1585 { 1586 return bs->removable; 1587 } 1588 1589 int bdrv_is_read_only(BlockDriverState *bs) 1590 { 1591 return bs->read_only; 1592 } 1593 1594 int bdrv_is_sg(BlockDriverState *bs) 1595 { 1596 return bs->sg; 1597 } 1598 1599 int bdrv_enable_write_cache(BlockDriverState *bs) 1600 { 1601 return bs->enable_write_cache; 1602 } 1603 1604 /* XXX: no longer used */ 1605 void bdrv_set_change_cb(BlockDriverState *bs, 1606 void (*change_cb)(void *opaque, int reason), 1607 void *opaque) 1608 { 1609 bs->change_cb = change_cb; 1610 bs->change_opaque = opaque; 1611 } 1612 1613 int bdrv_is_encrypted(BlockDriverState *bs) 1614 { 1615 if (bs->backing_hd && bs->backing_hd->encrypted) 1616 return 1; 1617 return bs->encrypted; 1618 } 1619 1620 int bdrv_key_required(BlockDriverState *bs) 1621 { 1622 BlockDriverState *backing_hd = bs->backing_hd; 1623 1624 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key) 1625 return 1; 1626 return (bs->encrypted && !bs->valid_key); 1627 } 1628 1629 int bdrv_set_key(BlockDriverState *bs, const char *key) 1630 { 1631 int ret; 1632 if (bs->backing_hd && bs->backing_hd->encrypted) { 1633 ret = bdrv_set_key(bs->backing_hd, key); 1634 if (ret < 0) 1635 return ret; 1636 if (!bs->encrypted) 1637 return 0; 1638 } 1639 if (!bs->encrypted) { 1640 return -EINVAL; 1641 } else if (!bs->drv || !bs->drv->bdrv_set_key) { 1642 return -ENOMEDIUM; 1643 } 1644 ret = bs->drv->bdrv_set_key(bs, key); 1645 if (ret < 0) { 1646 bs->valid_key = 0; 1647 } else if (!bs->valid_key) { 1648 bs->valid_key = 1; 1649 /* call the change callback now, we skipped it on open */ 1650 bs->media_changed = 1; 1651 if (bs->change_cb) 1652 bs->change_cb(bs->change_opaque, CHANGE_MEDIA); 1653 } 1654 return ret; 1655 } 1656 1657 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size) 1658 { 1659 if (!bs->drv) { 1660 buf[0] = '\0'; 1661 } else { 1662 pstrcpy(buf, buf_size, bs->drv->format_name); 1663 } 1664 } 1665 1666 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 1667 void *opaque) 1668 { 1669 BlockDriver *drv; 1670 1671 QLIST_FOREACH(drv, &bdrv_drivers, list) { 1672 it(opaque, drv->format_name); 1673 } 1674 } 1675 1676 BlockDriverState *bdrv_find(const char *name) 1677 { 1678 BlockDriverState *bs; 1679 1680 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1681 if (!strcmp(name, bs->device_name)) { 1682 return bs; 1683 } 1684 } 1685 return NULL; 1686 } 1687 1688 BlockDriverState *bdrv_next(BlockDriverState *bs) 1689 { 1690 if (!bs) { 1691 return QTAILQ_FIRST(&bdrv_states); 1692 } 1693 return QTAILQ_NEXT(bs, list); 1694 } 1695 1696 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque) 1697 { 1698 BlockDriverState *bs; 1699 1700 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1701 it(opaque, bs); 1702 } 1703 } 1704 1705 const char *bdrv_get_device_name(BlockDriverState *bs) 1706 { 1707 return bs->device_name; 1708 } 1709 1710 int bdrv_flush(BlockDriverState *bs) 1711 { 1712 if (bs->open_flags & BDRV_O_NO_FLUSH) { 1713 return 0; 1714 } 1715 1716 if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) { 1717 return bdrv_co_flush_em(bs); 1718 } 1719 1720 if (bs->drv && bs->drv->bdrv_flush) { 1721 return bs->drv->bdrv_flush(bs); 1722 } 1723 1724 /* 1725 * Some block drivers always operate in either writethrough or unsafe mode 1726 * and don't support bdrv_flush therefore. Usually qemu doesn't know how 1727 * the server works (because the behaviour is hardcoded or depends on 1728 * server-side configuration), so we can't ensure that everything is safe 1729 * on disk. Returning an error doesn't work because that would break guests 1730 * even if the server operates in writethrough mode. 1731 * 1732 * Let's hope the user knows what he's doing. 1733 */ 1734 return 0; 1735 } 1736 1737 void bdrv_flush_all(void) 1738 { 1739 BlockDriverState *bs; 1740 1741 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1742 if (bs->drv && !bdrv_is_read_only(bs) && 1743 (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) { 1744 bdrv_flush(bs); 1745 } 1746 } 1747 } 1748 1749 int bdrv_has_zero_init(BlockDriverState *bs) 1750 { 1751 assert(bs->drv); 1752 1753 if (bs->drv->bdrv_has_zero_init) { 1754 return bs->drv->bdrv_has_zero_init(bs); 1755 } 1756 1757 return 1; 1758 } 1759 1760 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) 1761 { 1762 if (!bs->drv) { 1763 return -ENOMEDIUM; 1764 } 1765 if (!bs->drv->bdrv_discard) { 1766 return 0; 1767 } 1768 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors); 1769 } 1770 1771 /* 1772 * Returns true iff the specified sector is present in the disk image. Drivers 1773 * not implementing the functionality are assumed to not support backing files, 1774 * hence all their sectors are reported as allocated. 1775 * 1776 * 'pnum' is set to the number of sectors (including and immediately following 1777 * the specified sector) that are known to be in the same 1778 * allocated/unallocated state. 1779 * 1780 * 'nb_sectors' is the max value 'pnum' should be set to. 1781 */ 1782 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, 1783 int *pnum) 1784 { 1785 int64_t n; 1786 if (!bs->drv->bdrv_is_allocated) { 1787 if (sector_num >= bs->total_sectors) { 1788 *pnum = 0; 1789 return 0; 1790 } 1791 n = bs->total_sectors - sector_num; 1792 *pnum = (n < nb_sectors) ? (n) : (nb_sectors); 1793 return 1; 1794 } 1795 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum); 1796 } 1797 1798 void bdrv_mon_event(const BlockDriverState *bdrv, 1799 BlockMonEventAction action, int is_read) 1800 { 1801 QObject *data; 1802 const char *action_str; 1803 1804 switch (action) { 1805 case BDRV_ACTION_REPORT: 1806 action_str = "report"; 1807 break; 1808 case BDRV_ACTION_IGNORE: 1809 action_str = "ignore"; 1810 break; 1811 case BDRV_ACTION_STOP: 1812 action_str = "stop"; 1813 break; 1814 default: 1815 abort(); 1816 } 1817 1818 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }", 1819 bdrv->device_name, 1820 action_str, 1821 is_read ? "read" : "write"); 1822 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data); 1823 1824 qobject_decref(data); 1825 } 1826 1827 static void bdrv_print_dict(QObject *obj, void *opaque) 1828 { 1829 QDict *bs_dict; 1830 Monitor *mon = opaque; 1831 1832 bs_dict = qobject_to_qdict(obj); 1833 1834 monitor_printf(mon, "%s: removable=%d", 1835 qdict_get_str(bs_dict, "device"), 1836 qdict_get_bool(bs_dict, "removable")); 1837 1838 if (qdict_get_bool(bs_dict, "removable")) { 1839 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked")); 1840 } 1841 1842 if (qdict_haskey(bs_dict, "inserted")) { 1843 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted")); 1844 1845 monitor_printf(mon, " file="); 1846 monitor_print_filename(mon, qdict_get_str(qdict, "file")); 1847 if (qdict_haskey(qdict, "backing_file")) { 1848 monitor_printf(mon, " backing_file="); 1849 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file")); 1850 } 1851 monitor_printf(mon, " ro=%d drv=%s encrypted=%d", 1852 qdict_get_bool(qdict, "ro"), 1853 qdict_get_str(qdict, "drv"), 1854 qdict_get_bool(qdict, "encrypted")); 1855 } else { 1856 monitor_printf(mon, " [not inserted]"); 1857 } 1858 1859 monitor_printf(mon, "\n"); 1860 } 1861 1862 void bdrv_info_print(Monitor *mon, const QObject *data) 1863 { 1864 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon); 1865 } 1866 1867 void bdrv_info(Monitor *mon, QObject **ret_data) 1868 { 1869 QList *bs_list; 1870 BlockDriverState *bs; 1871 1872 bs_list = qlist_new(); 1873 1874 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1875 QObject *bs_obj; 1876 1877 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', " 1878 "'removable': %i, 'locked': %i }", 1879 bs->device_name, bs->removable, 1880 bs->locked); 1881 1882 if (bs->drv) { 1883 QObject *obj; 1884 QDict *bs_dict = qobject_to_qdict(bs_obj); 1885 1886 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, " 1887 "'encrypted': %i }", 1888 bs->filename, bs->read_only, 1889 bs->drv->format_name, 1890 bdrv_is_encrypted(bs)); 1891 if (bs->backing_file[0] != '\0') { 1892 QDict *qdict = qobject_to_qdict(obj); 1893 qdict_put(qdict, "backing_file", 1894 qstring_from_str(bs->backing_file)); 1895 } 1896 1897 qdict_put_obj(bs_dict, "inserted", obj); 1898 } 1899 qlist_append_obj(bs_list, bs_obj); 1900 } 1901 1902 *ret_data = QOBJECT(bs_list); 1903 } 1904 1905 static void bdrv_stats_iter(QObject *data, void *opaque) 1906 { 1907 QDict *qdict; 1908 Monitor *mon = opaque; 1909 1910 qdict = qobject_to_qdict(data); 1911 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device")); 1912 1913 qdict = qobject_to_qdict(qdict_get(qdict, "stats")); 1914 monitor_printf(mon, " rd_bytes=%" PRId64 1915 " wr_bytes=%" PRId64 1916 " rd_operations=%" PRId64 1917 " wr_operations=%" PRId64 1918 " flush_operations=%" PRId64 1919 " wr_total_time_ns=%" PRId64 1920 " rd_total_time_ns=%" PRId64 1921 " flush_total_time_ns=%" PRId64 1922 "\n", 1923 qdict_get_int(qdict, "rd_bytes"), 1924 qdict_get_int(qdict, "wr_bytes"), 1925 qdict_get_int(qdict, "rd_operations"), 1926 qdict_get_int(qdict, "wr_operations"), 1927 qdict_get_int(qdict, "flush_operations"), 1928 qdict_get_int(qdict, "wr_total_time_ns"), 1929 qdict_get_int(qdict, "rd_total_time_ns"), 1930 qdict_get_int(qdict, "flush_total_time_ns")); 1931 } 1932 1933 void bdrv_stats_print(Monitor *mon, const QObject *data) 1934 { 1935 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon); 1936 } 1937 1938 static QObject* bdrv_info_stats_bs(BlockDriverState *bs) 1939 { 1940 QObject *res; 1941 QDict *dict; 1942 1943 res = qobject_from_jsonf("{ 'stats': {" 1944 "'rd_bytes': %" PRId64 "," 1945 "'wr_bytes': %" PRId64 "," 1946 "'rd_operations': %" PRId64 "," 1947 "'wr_operations': %" PRId64 "," 1948 "'wr_highest_offset': %" PRId64 "," 1949 "'flush_operations': %" PRId64 "," 1950 "'wr_total_time_ns': %" PRId64 "," 1951 "'rd_total_time_ns': %" PRId64 "," 1952 "'flush_total_time_ns': %" PRId64 1953 "} }", 1954 bs->nr_bytes[BDRV_ACCT_READ], 1955 bs->nr_bytes[BDRV_ACCT_WRITE], 1956 bs->nr_ops[BDRV_ACCT_READ], 1957 bs->nr_ops[BDRV_ACCT_WRITE], 1958 bs->wr_highest_sector * 1959 (uint64_t)BDRV_SECTOR_SIZE, 1960 bs->nr_ops[BDRV_ACCT_FLUSH], 1961 bs->total_time_ns[BDRV_ACCT_WRITE], 1962 bs->total_time_ns[BDRV_ACCT_READ], 1963 bs->total_time_ns[BDRV_ACCT_FLUSH]); 1964 dict = qobject_to_qdict(res); 1965 1966 if (*bs->device_name) { 1967 qdict_put(dict, "device", qstring_from_str(bs->device_name)); 1968 } 1969 1970 if (bs->file) { 1971 QObject *parent = bdrv_info_stats_bs(bs->file); 1972 qdict_put_obj(dict, "parent", parent); 1973 } 1974 1975 return res; 1976 } 1977 1978 void bdrv_info_stats(Monitor *mon, QObject **ret_data) 1979 { 1980 QObject *obj; 1981 QList *devices; 1982 BlockDriverState *bs; 1983 1984 devices = qlist_new(); 1985 1986 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1987 obj = bdrv_info_stats_bs(bs); 1988 qlist_append_obj(devices, obj); 1989 } 1990 1991 *ret_data = QOBJECT(devices); 1992 } 1993 1994 const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 1995 { 1996 if (bs->backing_hd && bs->backing_hd->encrypted) 1997 return bs->backing_file; 1998 else if (bs->encrypted) 1999 return bs->filename; 2000 else 2001 return NULL; 2002 } 2003 2004 void bdrv_get_backing_filename(BlockDriverState *bs, 2005 char *filename, int filename_size) 2006 { 2007 if (!bs->backing_file) { 2008 pstrcpy(filename, filename_size, ""); 2009 } else { 2010 pstrcpy(filename, filename_size, bs->backing_file); 2011 } 2012 } 2013 2014 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, 2015 const uint8_t *buf, int nb_sectors) 2016 { 2017 BlockDriver *drv = bs->drv; 2018 if (!drv) 2019 return -ENOMEDIUM; 2020 if (!drv->bdrv_write_compressed) 2021 return -ENOTSUP; 2022 if (bdrv_check_request(bs, sector_num, nb_sectors)) 2023 return -EIO; 2024 2025 if (bs->dirty_bitmap) { 2026 set_dirty_bitmap(bs, sector_num, nb_sectors, 1); 2027 } 2028 2029 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); 2030 } 2031 2032 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 2033 { 2034 BlockDriver *drv = bs->drv; 2035 if (!drv) 2036 return -ENOMEDIUM; 2037 if (!drv->bdrv_get_info) 2038 return -ENOTSUP; 2039 memset(bdi, 0, sizeof(*bdi)); 2040 return drv->bdrv_get_info(bs, bdi); 2041 } 2042 2043 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, 2044 int64_t pos, int size) 2045 { 2046 BlockDriver *drv = bs->drv; 2047 if (!drv) 2048 return -ENOMEDIUM; 2049 if (drv->bdrv_save_vmstate) 2050 return drv->bdrv_save_vmstate(bs, buf, pos, size); 2051 if (bs->file) 2052 return bdrv_save_vmstate(bs->file, buf, pos, size); 2053 return -ENOTSUP; 2054 } 2055 2056 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, 2057 int64_t pos, int size) 2058 { 2059 BlockDriver *drv = bs->drv; 2060 if (!drv) 2061 return -ENOMEDIUM; 2062 if (drv->bdrv_load_vmstate) 2063 return drv->bdrv_load_vmstate(bs, buf, pos, size); 2064 if (bs->file) 2065 return bdrv_load_vmstate(bs->file, buf, pos, size); 2066 return -ENOTSUP; 2067 } 2068 2069 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) 2070 { 2071 BlockDriver *drv = bs->drv; 2072 2073 if (!drv || !drv->bdrv_debug_event) { 2074 return; 2075 } 2076 2077 return drv->bdrv_debug_event(bs, event); 2078 2079 } 2080 2081 /**************************************************************/ 2082 /* handling of snapshots */ 2083 2084 int bdrv_can_snapshot(BlockDriverState *bs) 2085 { 2086 BlockDriver *drv = bs->drv; 2087 if (!drv || bdrv_is_removable(bs) || bdrv_is_read_only(bs)) { 2088 return 0; 2089 } 2090 2091 if (!drv->bdrv_snapshot_create) { 2092 if (bs->file != NULL) { 2093 return bdrv_can_snapshot(bs->file); 2094 } 2095 return 0; 2096 } 2097 2098 return 1; 2099 } 2100 2101 int bdrv_is_snapshot(BlockDriverState *bs) 2102 { 2103 return !!(bs->open_flags & BDRV_O_SNAPSHOT); 2104 } 2105 2106 BlockDriverState *bdrv_snapshots(void) 2107 { 2108 BlockDriverState *bs; 2109 2110 if (bs_snapshots) { 2111 return bs_snapshots; 2112 } 2113 2114 bs = NULL; 2115 while ((bs = bdrv_next(bs))) { 2116 if (bdrv_can_snapshot(bs)) { 2117 bs_snapshots = bs; 2118 return bs; 2119 } 2120 } 2121 return NULL; 2122 } 2123 2124 int bdrv_snapshot_create(BlockDriverState *bs, 2125 QEMUSnapshotInfo *sn_info) 2126 { 2127 BlockDriver *drv = bs->drv; 2128 if (!drv) 2129 return -ENOMEDIUM; 2130 if (drv->bdrv_snapshot_create) 2131 return drv->bdrv_snapshot_create(bs, sn_info); 2132 if (bs->file) 2133 return bdrv_snapshot_create(bs->file, sn_info); 2134 return -ENOTSUP; 2135 } 2136 2137 int bdrv_snapshot_goto(BlockDriverState *bs, 2138 const char *snapshot_id) 2139 { 2140 BlockDriver *drv = bs->drv; 2141 int ret, open_ret; 2142 2143 if (!drv) 2144 return -ENOMEDIUM; 2145 if (drv->bdrv_snapshot_goto) 2146 return drv->bdrv_snapshot_goto(bs, snapshot_id); 2147 2148 if (bs->file) { 2149 drv->bdrv_close(bs); 2150 ret = bdrv_snapshot_goto(bs->file, snapshot_id); 2151 open_ret = drv->bdrv_open(bs, bs->open_flags); 2152 if (open_ret < 0) { 2153 bdrv_delete(bs->file); 2154 bs->drv = NULL; 2155 return open_ret; 2156 } 2157 return ret; 2158 } 2159 2160 return -ENOTSUP; 2161 } 2162 2163 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) 2164 { 2165 BlockDriver *drv = bs->drv; 2166 if (!drv) 2167 return -ENOMEDIUM; 2168 if (drv->bdrv_snapshot_delete) 2169 return drv->bdrv_snapshot_delete(bs, snapshot_id); 2170 if (bs->file) 2171 return bdrv_snapshot_delete(bs->file, snapshot_id); 2172 return -ENOTSUP; 2173 } 2174 2175 int bdrv_snapshot_list(BlockDriverState *bs, 2176 QEMUSnapshotInfo **psn_info) 2177 { 2178 BlockDriver *drv = bs->drv; 2179 if (!drv) 2180 return -ENOMEDIUM; 2181 if (drv->bdrv_snapshot_list) 2182 return drv->bdrv_snapshot_list(bs, psn_info); 2183 if (bs->file) 2184 return bdrv_snapshot_list(bs->file, psn_info); 2185 return -ENOTSUP; 2186 } 2187 2188 int bdrv_snapshot_load_tmp(BlockDriverState *bs, 2189 const char *snapshot_name) 2190 { 2191 BlockDriver *drv = bs->drv; 2192 if (!drv) { 2193 return -ENOMEDIUM; 2194 } 2195 if (!bs->read_only) { 2196 return -EINVAL; 2197 } 2198 if (drv->bdrv_snapshot_load_tmp) { 2199 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name); 2200 } 2201 return -ENOTSUP; 2202 } 2203 2204 #define NB_SUFFIXES 4 2205 2206 char *get_human_readable_size(char *buf, int buf_size, int64_t size) 2207 { 2208 static const char suffixes[NB_SUFFIXES] = "KMGT"; 2209 int64_t base; 2210 int i; 2211 2212 if (size <= 999) { 2213 snprintf(buf, buf_size, "%" PRId64, size); 2214 } else { 2215 base = 1024; 2216 for(i = 0; i < NB_SUFFIXES; i++) { 2217 if (size < (10 * base)) { 2218 snprintf(buf, buf_size, "%0.1f%c", 2219 (double)size / base, 2220 suffixes[i]); 2221 break; 2222 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) { 2223 snprintf(buf, buf_size, "%" PRId64 "%c", 2224 ((size + (base >> 1)) / base), 2225 suffixes[i]); 2226 break; 2227 } 2228 base = base * 1024; 2229 } 2230 } 2231 return buf; 2232 } 2233 2234 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn) 2235 { 2236 char buf1[128], date_buf[128], clock_buf[128]; 2237 #ifdef _WIN32 2238 struct tm *ptm; 2239 #else 2240 struct tm tm; 2241 #endif 2242 time_t ti; 2243 int64_t secs; 2244 2245 if (!sn) { 2246 snprintf(buf, buf_size, 2247 "%-10s%-20s%7s%20s%15s", 2248 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK"); 2249 } else { 2250 ti = sn->date_sec; 2251 #ifdef _WIN32 2252 ptm = localtime(&ti); 2253 strftime(date_buf, sizeof(date_buf), 2254 "%Y-%m-%d %H:%M:%S", ptm); 2255 #else 2256 localtime_r(&ti, &tm); 2257 strftime(date_buf, sizeof(date_buf), 2258 "%Y-%m-%d %H:%M:%S", &tm); 2259 #endif 2260 secs = sn->vm_clock_nsec / 1000000000; 2261 snprintf(clock_buf, sizeof(clock_buf), 2262 "%02d:%02d:%02d.%03d", 2263 (int)(secs / 3600), 2264 (int)((secs / 60) % 60), 2265 (int)(secs % 60), 2266 (int)((sn->vm_clock_nsec / 1000000) % 1000)); 2267 snprintf(buf, buf_size, 2268 "%-10s%-20s%7s%20s%15s", 2269 sn->id_str, sn->name, 2270 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size), 2271 date_buf, 2272 clock_buf); 2273 } 2274 return buf; 2275 } 2276 2277 /**************************************************************/ 2278 /* async I/Os */ 2279 2280 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, 2281 QEMUIOVector *qiov, int nb_sectors, 2282 BlockDriverCompletionFunc *cb, void *opaque) 2283 { 2284 BlockDriver *drv = bs->drv; 2285 2286 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque); 2287 2288 if (!drv) 2289 return NULL; 2290 if (bdrv_check_request(bs, sector_num, nb_sectors)) 2291 return NULL; 2292 2293 return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors, 2294 cb, opaque); 2295 } 2296 2297 typedef struct BlockCompleteData { 2298 BlockDriverCompletionFunc *cb; 2299 void *opaque; 2300 BlockDriverState *bs; 2301 int64_t sector_num; 2302 int nb_sectors; 2303 } BlockCompleteData; 2304 2305 static void block_complete_cb(void *opaque, int ret) 2306 { 2307 BlockCompleteData *b = opaque; 2308 2309 if (b->bs->dirty_bitmap) { 2310 set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1); 2311 } 2312 b->cb(b->opaque, ret); 2313 g_free(b); 2314 } 2315 2316 static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs, 2317 int64_t sector_num, 2318 int nb_sectors, 2319 BlockDriverCompletionFunc *cb, 2320 void *opaque) 2321 { 2322 BlockCompleteData *blkdata = g_malloc0(sizeof(BlockCompleteData)); 2323 2324 blkdata->bs = bs; 2325 blkdata->cb = cb; 2326 blkdata->opaque = opaque; 2327 blkdata->sector_num = sector_num; 2328 blkdata->nb_sectors = nb_sectors; 2329 2330 return blkdata; 2331 } 2332 2333 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, 2334 QEMUIOVector *qiov, int nb_sectors, 2335 BlockDriverCompletionFunc *cb, void *opaque) 2336 { 2337 BlockDriver *drv = bs->drv; 2338 BlockDriverAIOCB *ret; 2339 BlockCompleteData *blk_cb_data; 2340 2341 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque); 2342 2343 if (!drv) 2344 return NULL; 2345 if (bs->read_only) 2346 return NULL; 2347 if (bdrv_check_request(bs, sector_num, nb_sectors)) 2348 return NULL; 2349 2350 if (bs->dirty_bitmap) { 2351 blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb, 2352 opaque); 2353 cb = &block_complete_cb; 2354 opaque = blk_cb_data; 2355 } 2356 2357 ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors, 2358 cb, opaque); 2359 2360 if (ret) { 2361 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { 2362 bs->wr_highest_sector = sector_num + nb_sectors - 1; 2363 } 2364 } 2365 2366 return ret; 2367 } 2368 2369 2370 typedef struct MultiwriteCB { 2371 int error; 2372 int num_requests; 2373 int num_callbacks; 2374 struct { 2375 BlockDriverCompletionFunc *cb; 2376 void *opaque; 2377 QEMUIOVector *free_qiov; 2378 void *free_buf; 2379 } callbacks[]; 2380 } MultiwriteCB; 2381 2382 static void multiwrite_user_cb(MultiwriteCB *mcb) 2383 { 2384 int i; 2385 2386 for (i = 0; i < mcb->num_callbacks; i++) { 2387 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error); 2388 if (mcb->callbacks[i].free_qiov) { 2389 qemu_iovec_destroy(mcb->callbacks[i].free_qiov); 2390 } 2391 g_free(mcb->callbacks[i].free_qiov); 2392 qemu_vfree(mcb->callbacks[i].free_buf); 2393 } 2394 } 2395 2396 static void multiwrite_cb(void *opaque, int ret) 2397 { 2398 MultiwriteCB *mcb = opaque; 2399 2400 trace_multiwrite_cb(mcb, ret); 2401 2402 if (ret < 0 && !mcb->error) { 2403 mcb->error = ret; 2404 } 2405 2406 mcb->num_requests--; 2407 if (mcb->num_requests == 0) { 2408 multiwrite_user_cb(mcb); 2409 g_free(mcb); 2410 } 2411 } 2412 2413 static int multiwrite_req_compare(const void *a, const void *b) 2414 { 2415 const BlockRequest *req1 = a, *req2 = b; 2416 2417 /* 2418 * Note that we can't simply subtract req2->sector from req1->sector 2419 * here as that could overflow the return value. 2420 */ 2421 if (req1->sector > req2->sector) { 2422 return 1; 2423 } else if (req1->sector < req2->sector) { 2424 return -1; 2425 } else { 2426 return 0; 2427 } 2428 } 2429 2430 /* 2431 * Takes a bunch of requests and tries to merge them. Returns the number of 2432 * requests that remain after merging. 2433 */ 2434 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, 2435 int num_reqs, MultiwriteCB *mcb) 2436 { 2437 int i, outidx; 2438 2439 // Sort requests by start sector 2440 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare); 2441 2442 // Check if adjacent requests touch the same clusters. If so, combine them, 2443 // filling up gaps with zero sectors. 2444 outidx = 0; 2445 for (i = 1; i < num_reqs; i++) { 2446 int merge = 0; 2447 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors; 2448 2449 // This handles the cases that are valid for all block drivers, namely 2450 // exactly sequential writes and overlapping writes. 2451 if (reqs[i].sector <= oldreq_last) { 2452 merge = 1; 2453 } 2454 2455 // The block driver may decide that it makes sense to combine requests 2456 // even if there is a gap of some sectors between them. In this case, 2457 // the gap is filled with zeros (therefore only applicable for yet 2458 // unused space in format like qcow2). 2459 if (!merge && bs->drv->bdrv_merge_requests) { 2460 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]); 2461 } 2462 2463 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) { 2464 merge = 0; 2465 } 2466 2467 if (merge) { 2468 size_t size; 2469 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov)); 2470 qemu_iovec_init(qiov, 2471 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1); 2472 2473 // Add the first request to the merged one. If the requests are 2474 // overlapping, drop the last sectors of the first request. 2475 size = (reqs[i].sector - reqs[outidx].sector) << 9; 2476 qemu_iovec_concat(qiov, reqs[outidx].qiov, size); 2477 2478 // We might need to add some zeros between the two requests 2479 if (reqs[i].sector > oldreq_last) { 2480 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9; 2481 uint8_t *buf = qemu_blockalign(bs, zero_bytes); 2482 memset(buf, 0, zero_bytes); 2483 qemu_iovec_add(qiov, buf, zero_bytes); 2484 mcb->callbacks[i].free_buf = buf; 2485 } 2486 2487 // Add the second request 2488 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size); 2489 2490 reqs[outidx].nb_sectors = qiov->size >> 9; 2491 reqs[outidx].qiov = qiov; 2492 2493 mcb->callbacks[i].free_qiov = reqs[outidx].qiov; 2494 } else { 2495 outidx++; 2496 reqs[outidx].sector = reqs[i].sector; 2497 reqs[outidx].nb_sectors = reqs[i].nb_sectors; 2498 reqs[outidx].qiov = reqs[i].qiov; 2499 } 2500 } 2501 2502 return outidx + 1; 2503 } 2504 2505 /* 2506 * Submit multiple AIO write requests at once. 2507 * 2508 * On success, the function returns 0 and all requests in the reqs array have 2509 * been submitted. In error case this function returns -1, and any of the 2510 * requests may or may not be submitted yet. In particular, this means that the 2511 * callback will be called for some of the requests, for others it won't. The 2512 * caller must check the error field of the BlockRequest to wait for the right 2513 * callbacks (if error != 0, no callback will be called). 2514 * 2515 * The implementation may modify the contents of the reqs array, e.g. to merge 2516 * requests. However, the fields opaque and error are left unmodified as they 2517 * are used to signal failure for a single request to the caller. 2518 */ 2519 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) 2520 { 2521 BlockDriverAIOCB *acb; 2522 MultiwriteCB *mcb; 2523 int i; 2524 2525 /* don't submit writes if we don't have a medium */ 2526 if (bs->drv == NULL) { 2527 for (i = 0; i < num_reqs; i++) { 2528 reqs[i].error = -ENOMEDIUM; 2529 } 2530 return -1; 2531 } 2532 2533 if (num_reqs == 0) { 2534 return 0; 2535 } 2536 2537 // Create MultiwriteCB structure 2538 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks)); 2539 mcb->num_requests = 0; 2540 mcb->num_callbacks = num_reqs; 2541 2542 for (i = 0; i < num_reqs; i++) { 2543 mcb->callbacks[i].cb = reqs[i].cb; 2544 mcb->callbacks[i].opaque = reqs[i].opaque; 2545 } 2546 2547 // Check for mergable requests 2548 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb); 2549 2550 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs); 2551 2552 /* 2553 * Run the aio requests. As soon as one request can't be submitted 2554 * successfully, fail all requests that are not yet submitted (we must 2555 * return failure for all requests anyway) 2556 * 2557 * num_requests cannot be set to the right value immediately: If 2558 * bdrv_aio_writev fails for some request, num_requests would be too high 2559 * and therefore multiwrite_cb() would never recognize the multiwrite 2560 * request as completed. We also cannot use the loop variable i to set it 2561 * when the first request fails because the callback may already have been 2562 * called for previously submitted requests. Thus, num_requests must be 2563 * incremented for each request that is submitted. 2564 * 2565 * The problem that callbacks may be called early also means that we need 2566 * to take care that num_requests doesn't become 0 before all requests are 2567 * submitted - multiwrite_cb() would consider the multiwrite request 2568 * completed. A dummy request that is "completed" by a manual call to 2569 * multiwrite_cb() takes care of this. 2570 */ 2571 mcb->num_requests = 1; 2572 2573 // Run the aio requests 2574 for (i = 0; i < num_reqs; i++) { 2575 mcb->num_requests++; 2576 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov, 2577 reqs[i].nb_sectors, multiwrite_cb, mcb); 2578 2579 if (acb == NULL) { 2580 // We can only fail the whole thing if no request has been 2581 // submitted yet. Otherwise we'll wait for the submitted AIOs to 2582 // complete and report the error in the callback. 2583 if (i == 0) { 2584 trace_bdrv_aio_multiwrite_earlyfail(mcb); 2585 goto fail; 2586 } else { 2587 trace_bdrv_aio_multiwrite_latefail(mcb, i); 2588 multiwrite_cb(mcb, -EIO); 2589 break; 2590 } 2591 } 2592 } 2593 2594 /* Complete the dummy request */ 2595 multiwrite_cb(mcb, 0); 2596 2597 return 0; 2598 2599 fail: 2600 for (i = 0; i < mcb->num_callbacks; i++) { 2601 reqs[i].error = -EIO; 2602 } 2603 g_free(mcb); 2604 return -1; 2605 } 2606 2607 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, 2608 BlockDriverCompletionFunc *cb, void *opaque) 2609 { 2610 BlockDriver *drv = bs->drv; 2611 2612 trace_bdrv_aio_flush(bs, opaque); 2613 2614 if (bs->open_flags & BDRV_O_NO_FLUSH) { 2615 return bdrv_aio_noop_em(bs, cb, opaque); 2616 } 2617 2618 if (!drv) 2619 return NULL; 2620 return drv->bdrv_aio_flush(bs, cb, opaque); 2621 } 2622 2623 void bdrv_aio_cancel(BlockDriverAIOCB *acb) 2624 { 2625 acb->pool->cancel(acb); 2626 } 2627 2628 2629 /**************************************************************/ 2630 /* async block device emulation */ 2631 2632 typedef struct BlockDriverAIOCBSync { 2633 BlockDriverAIOCB common; 2634 QEMUBH *bh; 2635 int ret; 2636 /* vector translation state */ 2637 QEMUIOVector *qiov; 2638 uint8_t *bounce; 2639 int is_write; 2640 } BlockDriverAIOCBSync; 2641 2642 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb) 2643 { 2644 BlockDriverAIOCBSync *acb = 2645 container_of(blockacb, BlockDriverAIOCBSync, common); 2646 qemu_bh_delete(acb->bh); 2647 acb->bh = NULL; 2648 qemu_aio_release(acb); 2649 } 2650 2651 static AIOPool bdrv_em_aio_pool = { 2652 .aiocb_size = sizeof(BlockDriverAIOCBSync), 2653 .cancel = bdrv_aio_cancel_em, 2654 }; 2655 2656 static void bdrv_aio_bh_cb(void *opaque) 2657 { 2658 BlockDriverAIOCBSync *acb = opaque; 2659 2660 if (!acb->is_write) 2661 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size); 2662 qemu_vfree(acb->bounce); 2663 acb->common.cb(acb->common.opaque, acb->ret); 2664 qemu_bh_delete(acb->bh); 2665 acb->bh = NULL; 2666 qemu_aio_release(acb); 2667 } 2668 2669 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, 2670 int64_t sector_num, 2671 QEMUIOVector *qiov, 2672 int nb_sectors, 2673 BlockDriverCompletionFunc *cb, 2674 void *opaque, 2675 int is_write) 2676 2677 { 2678 BlockDriverAIOCBSync *acb; 2679 2680 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque); 2681 acb->is_write = is_write; 2682 acb->qiov = qiov; 2683 acb->bounce = qemu_blockalign(bs, qiov->size); 2684 2685 if (!acb->bh) 2686 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); 2687 2688 if (is_write) { 2689 qemu_iovec_to_buffer(acb->qiov, acb->bounce); 2690 acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors); 2691 } else { 2692 acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors); 2693 } 2694 2695 qemu_bh_schedule(acb->bh); 2696 2697 return &acb->common; 2698 } 2699 2700 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 2701 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 2702 BlockDriverCompletionFunc *cb, void *opaque) 2703 { 2704 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); 2705 } 2706 2707 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 2708 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 2709 BlockDriverCompletionFunc *cb, void *opaque) 2710 { 2711 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); 2712 } 2713 2714 2715 typedef struct BlockDriverAIOCBCoroutine { 2716 BlockDriverAIOCB common; 2717 BlockRequest req; 2718 bool is_write; 2719 QEMUBH* bh; 2720 } BlockDriverAIOCBCoroutine; 2721 2722 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb) 2723 { 2724 qemu_aio_flush(); 2725 } 2726 2727 static AIOPool bdrv_em_co_aio_pool = { 2728 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine), 2729 .cancel = bdrv_aio_co_cancel_em, 2730 }; 2731 2732 static void bdrv_co_rw_bh(void *opaque) 2733 { 2734 BlockDriverAIOCBCoroutine *acb = opaque; 2735 2736 acb->common.cb(acb->common.opaque, acb->req.error); 2737 qemu_bh_delete(acb->bh); 2738 qemu_aio_release(acb); 2739 } 2740 2741 static void coroutine_fn bdrv_co_rw(void *opaque) 2742 { 2743 BlockDriverAIOCBCoroutine *acb = opaque; 2744 BlockDriverState *bs = acb->common.bs; 2745 2746 if (!acb->is_write) { 2747 acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector, 2748 acb->req.nb_sectors, acb->req.qiov); 2749 } else { 2750 acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector, 2751 acb->req.nb_sectors, acb->req.qiov); 2752 } 2753 2754 acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb); 2755 qemu_bh_schedule(acb->bh); 2756 } 2757 2758 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, 2759 int64_t sector_num, 2760 QEMUIOVector *qiov, 2761 int nb_sectors, 2762 BlockDriverCompletionFunc *cb, 2763 void *opaque, 2764 bool is_write) 2765 { 2766 Coroutine *co; 2767 BlockDriverAIOCBCoroutine *acb; 2768 2769 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque); 2770 acb->req.sector = sector_num; 2771 acb->req.nb_sectors = nb_sectors; 2772 acb->req.qiov = qiov; 2773 acb->is_write = is_write; 2774 2775 co = qemu_coroutine_create(bdrv_co_rw); 2776 qemu_coroutine_enter(co, acb); 2777 2778 return &acb->common; 2779 } 2780 2781 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs, 2782 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 2783 BlockDriverCompletionFunc *cb, void *opaque) 2784 { 2785 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 2786 false); 2787 } 2788 2789 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs, 2790 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 2791 BlockDriverCompletionFunc *cb, void *opaque) 2792 { 2793 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 2794 true); 2795 } 2796 2797 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, 2798 BlockDriverCompletionFunc *cb, void *opaque) 2799 { 2800 BlockDriverAIOCBSync *acb; 2801 2802 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque); 2803 acb->is_write = 1; /* don't bounce in the completion hadler */ 2804 acb->qiov = NULL; 2805 acb->bounce = NULL; 2806 acb->ret = 0; 2807 2808 if (!acb->bh) 2809 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); 2810 2811 bdrv_flush(bs); 2812 qemu_bh_schedule(acb->bh); 2813 return &acb->common; 2814 } 2815 2816 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs, 2817 BlockDriverCompletionFunc *cb, void *opaque) 2818 { 2819 BlockDriverAIOCBSync *acb; 2820 2821 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque); 2822 acb->is_write = 1; /* don't bounce in the completion handler */ 2823 acb->qiov = NULL; 2824 acb->bounce = NULL; 2825 acb->ret = 0; 2826 2827 if (!acb->bh) { 2828 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); 2829 } 2830 2831 qemu_bh_schedule(acb->bh); 2832 return &acb->common; 2833 } 2834 2835 /**************************************************************/ 2836 /* sync block device emulation */ 2837 2838 static void bdrv_rw_em_cb(void *opaque, int ret) 2839 { 2840 *(int *)opaque = ret; 2841 } 2842 2843 #define NOT_DONE 0x7fffffff 2844 2845 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, 2846 uint8_t *buf, int nb_sectors) 2847 { 2848 int async_ret; 2849 BlockDriverAIOCB *acb; 2850 struct iovec iov; 2851 QEMUIOVector qiov; 2852 2853 async_ret = NOT_DONE; 2854 iov.iov_base = (void *)buf; 2855 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; 2856 qemu_iovec_init_external(&qiov, &iov, 1); 2857 acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors, 2858 bdrv_rw_em_cb, &async_ret); 2859 if (acb == NULL) { 2860 async_ret = -1; 2861 goto fail; 2862 } 2863 2864 while (async_ret == NOT_DONE) { 2865 qemu_aio_wait(); 2866 } 2867 2868 2869 fail: 2870 return async_ret; 2871 } 2872 2873 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, 2874 const uint8_t *buf, int nb_sectors) 2875 { 2876 int async_ret; 2877 BlockDriverAIOCB *acb; 2878 struct iovec iov; 2879 QEMUIOVector qiov; 2880 2881 async_ret = NOT_DONE; 2882 iov.iov_base = (void *)buf; 2883 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; 2884 qemu_iovec_init_external(&qiov, &iov, 1); 2885 acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors, 2886 bdrv_rw_em_cb, &async_ret); 2887 if (acb == NULL) { 2888 async_ret = -1; 2889 goto fail; 2890 } 2891 while (async_ret == NOT_DONE) { 2892 qemu_aio_wait(); 2893 } 2894 2895 fail: 2896 return async_ret; 2897 } 2898 2899 void bdrv_init(void) 2900 { 2901 module_call_init(MODULE_INIT_BLOCK); 2902 } 2903 2904 void bdrv_init_with_whitelist(void) 2905 { 2906 use_bdrv_whitelist = 1; 2907 bdrv_init(); 2908 } 2909 2910 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs, 2911 BlockDriverCompletionFunc *cb, void *opaque) 2912 { 2913 BlockDriverAIOCB *acb; 2914 2915 if (pool->free_aiocb) { 2916 acb = pool->free_aiocb; 2917 pool->free_aiocb = acb->next; 2918 } else { 2919 acb = g_malloc0(pool->aiocb_size); 2920 acb->pool = pool; 2921 } 2922 acb->bs = bs; 2923 acb->cb = cb; 2924 acb->opaque = opaque; 2925 return acb; 2926 } 2927 2928 void qemu_aio_release(void *p) 2929 { 2930 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p; 2931 AIOPool *pool = acb->pool; 2932 acb->next = pool->free_aiocb; 2933 pool->free_aiocb = acb; 2934 } 2935 2936 /**************************************************************/ 2937 /* Coroutine block device emulation */ 2938 2939 typedef struct CoroutineIOCompletion { 2940 Coroutine *coroutine; 2941 int ret; 2942 } CoroutineIOCompletion; 2943 2944 static void bdrv_co_io_em_complete(void *opaque, int ret) 2945 { 2946 CoroutineIOCompletion *co = opaque; 2947 2948 co->ret = ret; 2949 qemu_coroutine_enter(co->coroutine, NULL); 2950 } 2951 2952 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num, 2953 int nb_sectors, QEMUIOVector *iov, 2954 bool is_write) 2955 { 2956 CoroutineIOCompletion co = { 2957 .coroutine = qemu_coroutine_self(), 2958 }; 2959 BlockDriverAIOCB *acb; 2960 2961 if (is_write) { 2962 acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors, 2963 bdrv_co_io_em_complete, &co); 2964 } else { 2965 acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors, 2966 bdrv_co_io_em_complete, &co); 2967 } 2968 2969 trace_bdrv_co_io(is_write, acb); 2970 if (!acb) { 2971 return -EIO; 2972 } 2973 qemu_coroutine_yield(); 2974 2975 return co.ret; 2976 } 2977 2978 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 2979 int64_t sector_num, int nb_sectors, 2980 QEMUIOVector *iov) 2981 { 2982 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false); 2983 } 2984 2985 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 2986 int64_t sector_num, int nb_sectors, 2987 QEMUIOVector *iov) 2988 { 2989 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true); 2990 } 2991 2992 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs) 2993 { 2994 CoroutineIOCompletion co = { 2995 .coroutine = qemu_coroutine_self(), 2996 }; 2997 BlockDriverAIOCB *acb; 2998 2999 acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co); 3000 if (!acb) { 3001 return -EIO; 3002 } 3003 qemu_coroutine_yield(); 3004 return co.ret; 3005 } 3006 3007 /**************************************************************/ 3008 /* removable device support */ 3009 3010 /** 3011 * Return TRUE if the media is present 3012 */ 3013 int bdrv_is_inserted(BlockDriverState *bs) 3014 { 3015 BlockDriver *drv = bs->drv; 3016 int ret; 3017 if (!drv) 3018 return 0; 3019 if (!drv->bdrv_is_inserted) 3020 return !bs->tray_open; 3021 ret = drv->bdrv_is_inserted(bs); 3022 return ret; 3023 } 3024 3025 /** 3026 * Return TRUE if the media changed since the last call to this 3027 * function. It is currently only used for floppy disks 3028 */ 3029 int bdrv_media_changed(BlockDriverState *bs) 3030 { 3031 BlockDriver *drv = bs->drv; 3032 int ret; 3033 3034 if (!drv || !drv->bdrv_media_changed) 3035 ret = -ENOTSUP; 3036 else 3037 ret = drv->bdrv_media_changed(bs); 3038 if (ret == -ENOTSUP) 3039 ret = bs->media_changed; 3040 bs->media_changed = 0; 3041 return ret; 3042 } 3043 3044 /** 3045 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 3046 */ 3047 int bdrv_eject(BlockDriverState *bs, int eject_flag) 3048 { 3049 BlockDriver *drv = bs->drv; 3050 3051 if (eject_flag && bs->locked) { 3052 return -EBUSY; 3053 } 3054 3055 if (drv && drv->bdrv_eject) { 3056 drv->bdrv_eject(bs, eject_flag); 3057 } 3058 bs->tray_open = eject_flag; 3059 return 0; 3060 } 3061 3062 int bdrv_is_locked(BlockDriverState *bs) 3063 { 3064 return bs->locked; 3065 } 3066 3067 /** 3068 * Lock or unlock the media (if it is locked, the user won't be able 3069 * to eject it manually). 3070 */ 3071 void bdrv_set_locked(BlockDriverState *bs, int locked) 3072 { 3073 BlockDriver *drv = bs->drv; 3074 3075 trace_bdrv_set_locked(bs, locked); 3076 3077 bs->locked = locked; 3078 if (drv && drv->bdrv_set_locked) { 3079 drv->bdrv_set_locked(bs, locked); 3080 } 3081 } 3082 3083 /* needed for generic scsi interface */ 3084 3085 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) 3086 { 3087 BlockDriver *drv = bs->drv; 3088 3089 if (drv && drv->bdrv_ioctl) 3090 return drv->bdrv_ioctl(bs, req, buf); 3091 return -ENOTSUP; 3092 } 3093 3094 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, 3095 unsigned long int req, void *buf, 3096 BlockDriverCompletionFunc *cb, void *opaque) 3097 { 3098 BlockDriver *drv = bs->drv; 3099 3100 if (drv && drv->bdrv_aio_ioctl) 3101 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque); 3102 return NULL; 3103 } 3104 3105 3106 3107 void *qemu_blockalign(BlockDriverState *bs, size_t size) 3108 { 3109 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size); 3110 } 3111 3112 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable) 3113 { 3114 int64_t bitmap_size; 3115 3116 bs->dirty_count = 0; 3117 if (enable) { 3118 if (!bs->dirty_bitmap) { 3119 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) + 3120 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1; 3121 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8; 3122 3123 bs->dirty_bitmap = g_malloc0(bitmap_size); 3124 } 3125 } else { 3126 if (bs->dirty_bitmap) { 3127 g_free(bs->dirty_bitmap); 3128 bs->dirty_bitmap = NULL; 3129 } 3130 } 3131 } 3132 3133 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector) 3134 { 3135 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK; 3136 3137 if (bs->dirty_bitmap && 3138 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) { 3139 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] & 3140 (1UL << (chunk % (sizeof(unsigned long) * 8)))); 3141 } else { 3142 return 0; 3143 } 3144 } 3145 3146 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 3147 int nr_sectors) 3148 { 3149 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0); 3150 } 3151 3152 int64_t bdrv_get_dirty_count(BlockDriverState *bs) 3153 { 3154 return bs->dirty_count; 3155 } 3156 3157 void bdrv_set_in_use(BlockDriverState *bs, int in_use) 3158 { 3159 assert(bs->in_use != in_use); 3160 bs->in_use = in_use; 3161 } 3162 3163 int bdrv_in_use(BlockDriverState *bs) 3164 { 3165 return bs->in_use; 3166 } 3167 3168 void 3169 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes, 3170 enum BlockAcctType type) 3171 { 3172 assert(type < BDRV_MAX_IOTYPE); 3173 3174 cookie->bytes = bytes; 3175 cookie->start_time_ns = get_clock(); 3176 cookie->type = type; 3177 } 3178 3179 void 3180 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie) 3181 { 3182 assert(cookie->type < BDRV_MAX_IOTYPE); 3183 3184 bs->nr_bytes[cookie->type] += cookie->bytes; 3185 bs->nr_ops[cookie->type]++; 3186 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns; 3187 } 3188 3189 int bdrv_img_create(const char *filename, const char *fmt, 3190 const char *base_filename, const char *base_fmt, 3191 char *options, uint64_t img_size, int flags) 3192 { 3193 QEMUOptionParameter *param = NULL, *create_options = NULL; 3194 QEMUOptionParameter *backing_fmt, *backing_file, *size; 3195 BlockDriverState *bs = NULL; 3196 BlockDriver *drv, *proto_drv; 3197 BlockDriver *backing_drv = NULL; 3198 int ret = 0; 3199 3200 /* Find driver and parse its options */ 3201 drv = bdrv_find_format(fmt); 3202 if (!drv) { 3203 error_report("Unknown file format '%s'", fmt); 3204 ret = -EINVAL; 3205 goto out; 3206 } 3207 3208 proto_drv = bdrv_find_protocol(filename); 3209 if (!proto_drv) { 3210 error_report("Unknown protocol '%s'", filename); 3211 ret = -EINVAL; 3212 goto out; 3213 } 3214 3215 create_options = append_option_parameters(create_options, 3216 drv->create_options); 3217 create_options = append_option_parameters(create_options, 3218 proto_drv->create_options); 3219 3220 /* Create parameter list with default values */ 3221 param = parse_option_parameters("", create_options, param); 3222 3223 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size); 3224 3225 /* Parse -o options */ 3226 if (options) { 3227 param = parse_option_parameters(options, create_options, param); 3228 if (param == NULL) { 3229 error_report("Invalid options for file format '%s'.", fmt); 3230 ret = -EINVAL; 3231 goto out; 3232 } 3233 } 3234 3235 if (base_filename) { 3236 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE, 3237 base_filename)) { 3238 error_report("Backing file not supported for file format '%s'", 3239 fmt); 3240 ret = -EINVAL; 3241 goto out; 3242 } 3243 } 3244 3245 if (base_fmt) { 3246 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) { 3247 error_report("Backing file format not supported for file " 3248 "format '%s'", fmt); 3249 ret = -EINVAL; 3250 goto out; 3251 } 3252 } 3253 3254 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE); 3255 if (backing_file && backing_file->value.s) { 3256 if (!strcmp(filename, backing_file->value.s)) { 3257 error_report("Error: Trying to create an image with the " 3258 "same filename as the backing file"); 3259 ret = -EINVAL; 3260 goto out; 3261 } 3262 } 3263 3264 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT); 3265 if (backing_fmt && backing_fmt->value.s) { 3266 backing_drv = bdrv_find_format(backing_fmt->value.s); 3267 if (!backing_drv) { 3268 error_report("Unknown backing file format '%s'", 3269 backing_fmt->value.s); 3270 ret = -EINVAL; 3271 goto out; 3272 } 3273 } 3274 3275 // The size for the image must always be specified, with one exception: 3276 // If we are using a backing file, we can obtain the size from there 3277 size = get_option_parameter(param, BLOCK_OPT_SIZE); 3278 if (size && size->value.n == -1) { 3279 if (backing_file && backing_file->value.s) { 3280 uint64_t size; 3281 char buf[32]; 3282 3283 bs = bdrv_new(""); 3284 3285 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv); 3286 if (ret < 0) { 3287 error_report("Could not open '%s'", backing_file->value.s); 3288 goto out; 3289 } 3290 bdrv_get_geometry(bs, &size); 3291 size *= 512; 3292 3293 snprintf(buf, sizeof(buf), "%" PRId64, size); 3294 set_option_parameter(param, BLOCK_OPT_SIZE, buf); 3295 } else { 3296 error_report("Image creation needs a size parameter"); 3297 ret = -EINVAL; 3298 goto out; 3299 } 3300 } 3301 3302 printf("Formatting '%s', fmt=%s ", filename, fmt); 3303 print_option_parameters(param); 3304 puts(""); 3305 3306 ret = bdrv_create(drv, filename, param); 3307 3308 if (ret < 0) { 3309 if (ret == -ENOTSUP) { 3310 error_report("Formatting or formatting option not supported for " 3311 "file format '%s'", fmt); 3312 } else if (ret == -EFBIG) { 3313 error_report("The image size is too large for file format '%s'", 3314 fmt); 3315 } else { 3316 error_report("%s: error while creating %s: %s", filename, fmt, 3317 strerror(-ret)); 3318 } 3319 } 3320 3321 out: 3322 free_option_parameters(create_options); 3323 free_option_parameters(param); 3324 3325 if (bs) { 3326 bdrv_delete(bs); 3327 } 3328 3329 return ret; 3330 } 3331