1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "config-host.h" 25 #include "qemu-common.h" 26 #include "trace.h" 27 #include "monitor.h" 28 #include "block_int.h" 29 #include "module.h" 30 #include "qemu-objects.h" 31 32 #ifdef CONFIG_BSD 33 #include <sys/types.h> 34 #include <sys/stat.h> 35 #include <sys/ioctl.h> 36 #include <sys/queue.h> 37 #ifndef __DragonFly__ 38 #include <sys/disk.h> 39 #endif 40 #endif 41 42 #ifdef _WIN32 43 #include <windows.h> 44 #endif 45 46 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 47 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 48 BlockDriverCompletionFunc *cb, void *opaque); 49 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 50 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 51 BlockDriverCompletionFunc *cb, void *opaque); 52 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, 53 BlockDriverCompletionFunc *cb, void *opaque); 54 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs, 55 BlockDriverCompletionFunc *cb, void *opaque); 56 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, 57 uint8_t *buf, int nb_sectors); 58 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, 59 const uint8_t *buf, int nb_sectors); 60 61 static QTAILQ_HEAD(, BlockDriverState) bdrv_states = 62 QTAILQ_HEAD_INITIALIZER(bdrv_states); 63 64 static QLIST_HEAD(, BlockDriver) bdrv_drivers = 65 QLIST_HEAD_INITIALIZER(bdrv_drivers); 66 67 /* The device to use for VM snapshots */ 68 static BlockDriverState *bs_snapshots; 69 70 /* If non-zero, use only whitelisted block drivers */ 71 static int use_bdrv_whitelist; 72 73 #ifdef _WIN32 74 static int is_windows_drive_prefix(const char *filename) 75 { 76 return (((filename[0] >= 'a' && filename[0] <= 'z') || 77 (filename[0] >= 'A' && filename[0] <= 'Z')) && 78 filename[1] == ':'); 79 } 80 81 int is_windows_drive(const char *filename) 82 { 83 if (is_windows_drive_prefix(filename) && 84 filename[2] == '\0') 85 return 1; 86 if (strstart(filename, "\\\\.\\", NULL) || 87 strstart(filename, "//./", NULL)) 88 return 1; 89 return 0; 90 } 91 #endif 92 93 /* check if the path starts with "<protocol>:" */ 94 static int path_has_protocol(const char *path) 95 { 96 #ifdef _WIN32 97 if (is_windows_drive(path) || 98 is_windows_drive_prefix(path)) { 99 return 0; 100 } 101 #endif 102 103 return strchr(path, ':') != NULL; 104 } 105 106 int path_is_absolute(const char *path) 107 { 108 const char *p; 109 #ifdef _WIN32 110 /* specific case for names like: "\\.\d:" */ 111 if (*path == '/' || *path == '\\') 112 return 1; 113 #endif 114 p = strchr(path, ':'); 115 if (p) 116 p++; 117 else 118 p = path; 119 #ifdef _WIN32 120 return (*p == '/' || *p == '\\'); 121 #else 122 return (*p == '/'); 123 #endif 124 } 125 126 /* if filename is absolute, just copy it to dest. Otherwise, build a 127 path to it by considering it is relative to base_path. URL are 128 supported. */ 129 void path_combine(char *dest, int dest_size, 130 const char *base_path, 131 const char *filename) 132 { 133 const char *p, *p1; 134 int len; 135 136 if (dest_size <= 0) 137 return; 138 if (path_is_absolute(filename)) { 139 pstrcpy(dest, dest_size, filename); 140 } else { 141 p = strchr(base_path, ':'); 142 if (p) 143 p++; 144 else 145 p = base_path; 146 p1 = strrchr(base_path, '/'); 147 #ifdef _WIN32 148 { 149 const char *p2; 150 p2 = strrchr(base_path, '\\'); 151 if (!p1 || p2 > p1) 152 p1 = p2; 153 } 154 #endif 155 if (p1) 156 p1++; 157 else 158 p1 = base_path; 159 if (p1 > p) 160 p = p1; 161 len = p - base_path; 162 if (len > dest_size - 1) 163 len = dest_size - 1; 164 memcpy(dest, base_path, len); 165 dest[len] = '\0'; 166 pstrcat(dest, dest_size, filename); 167 } 168 } 169 170 void bdrv_register(BlockDriver *bdrv) 171 { 172 if (!bdrv->bdrv_aio_readv) { 173 /* add AIO emulation layer */ 174 bdrv->bdrv_aio_readv = bdrv_aio_readv_em; 175 bdrv->bdrv_aio_writev = bdrv_aio_writev_em; 176 } else if (!bdrv->bdrv_read) { 177 /* add synchronous IO emulation layer */ 178 bdrv->bdrv_read = bdrv_read_em; 179 bdrv->bdrv_write = bdrv_write_em; 180 } 181 182 if (!bdrv->bdrv_aio_flush) 183 bdrv->bdrv_aio_flush = bdrv_aio_flush_em; 184 185 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 186 } 187 188 /* create a new block device (by default it is empty) */ 189 BlockDriverState *bdrv_new(const char *device_name) 190 { 191 BlockDriverState *bs; 192 193 bs = qemu_mallocz(sizeof(BlockDriverState)); 194 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name); 195 if (device_name[0] != '\0') { 196 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list); 197 } 198 return bs; 199 } 200 201 BlockDriver *bdrv_find_format(const char *format_name) 202 { 203 BlockDriver *drv1; 204 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 205 if (!strcmp(drv1->format_name, format_name)) { 206 return drv1; 207 } 208 } 209 return NULL; 210 } 211 212 static int bdrv_is_whitelisted(BlockDriver *drv) 213 { 214 static const char *whitelist[] = { 215 CONFIG_BDRV_WHITELIST 216 }; 217 const char **p; 218 219 if (!whitelist[0]) 220 return 1; /* no whitelist, anything goes */ 221 222 for (p = whitelist; *p; p++) { 223 if (!strcmp(drv->format_name, *p)) { 224 return 1; 225 } 226 } 227 return 0; 228 } 229 230 BlockDriver *bdrv_find_whitelisted_format(const char *format_name) 231 { 232 BlockDriver *drv = bdrv_find_format(format_name); 233 return drv && bdrv_is_whitelisted(drv) ? drv : NULL; 234 } 235 236 int bdrv_create(BlockDriver *drv, const char* filename, 237 QEMUOptionParameter *options) 238 { 239 if (!drv->bdrv_create) 240 return -ENOTSUP; 241 242 return drv->bdrv_create(filename, options); 243 } 244 245 int bdrv_create_file(const char* filename, QEMUOptionParameter *options) 246 { 247 BlockDriver *drv; 248 249 drv = bdrv_find_protocol(filename); 250 if (drv == NULL) { 251 return -ENOENT; 252 } 253 254 return bdrv_create(drv, filename, options); 255 } 256 257 #ifdef _WIN32 258 void get_tmp_filename(char *filename, int size) 259 { 260 char temp_dir[MAX_PATH]; 261 262 GetTempPath(MAX_PATH, temp_dir); 263 GetTempFileName(temp_dir, "qem", 0, filename); 264 } 265 #else 266 void get_tmp_filename(char *filename, int size) 267 { 268 int fd; 269 const char *tmpdir; 270 /* XXX: race condition possible */ 271 tmpdir = getenv("TMPDIR"); 272 if (!tmpdir) 273 tmpdir = "/tmp"; 274 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir); 275 fd = mkstemp(filename); 276 close(fd); 277 } 278 #endif 279 280 /* 281 * Detect host devices. By convention, /dev/cdrom[N] is always 282 * recognized as a host CDROM. 283 */ 284 static BlockDriver *find_hdev_driver(const char *filename) 285 { 286 int score_max = 0, score; 287 BlockDriver *drv = NULL, *d; 288 289 QLIST_FOREACH(d, &bdrv_drivers, list) { 290 if (d->bdrv_probe_device) { 291 score = d->bdrv_probe_device(filename); 292 if (score > score_max) { 293 score_max = score; 294 drv = d; 295 } 296 } 297 } 298 299 return drv; 300 } 301 302 BlockDriver *bdrv_find_protocol(const char *filename) 303 { 304 BlockDriver *drv1; 305 char protocol[128]; 306 int len; 307 const char *p; 308 309 /* TODO Drivers without bdrv_file_open must be specified explicitly */ 310 311 /* 312 * XXX(hch): we really should not let host device detection 313 * override an explicit protocol specification, but moving this 314 * later breaks access to device names with colons in them. 315 * Thanks to the brain-dead persistent naming schemes on udev- 316 * based Linux systems those actually are quite common. 317 */ 318 drv1 = find_hdev_driver(filename); 319 if (drv1) { 320 return drv1; 321 } 322 323 if (!path_has_protocol(filename)) { 324 return bdrv_find_format("file"); 325 } 326 p = strchr(filename, ':'); 327 assert(p != NULL); 328 len = p - filename; 329 if (len > sizeof(protocol) - 1) 330 len = sizeof(protocol) - 1; 331 memcpy(protocol, filename, len); 332 protocol[len] = '\0'; 333 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 334 if (drv1->protocol_name && 335 !strcmp(drv1->protocol_name, protocol)) { 336 return drv1; 337 } 338 } 339 return NULL; 340 } 341 342 static int find_image_format(const char *filename, BlockDriver **pdrv) 343 { 344 int ret, score, score_max; 345 BlockDriver *drv1, *drv; 346 uint8_t buf[2048]; 347 BlockDriverState *bs; 348 349 ret = bdrv_file_open(&bs, filename, 0); 350 if (ret < 0) { 351 *pdrv = NULL; 352 return ret; 353 } 354 355 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 356 if (bs->sg || !bdrv_is_inserted(bs)) { 357 bdrv_delete(bs); 358 drv = bdrv_find_format("raw"); 359 if (!drv) { 360 ret = -ENOENT; 361 } 362 *pdrv = drv; 363 return ret; 364 } 365 366 ret = bdrv_pread(bs, 0, buf, sizeof(buf)); 367 bdrv_delete(bs); 368 if (ret < 0) { 369 *pdrv = NULL; 370 return ret; 371 } 372 373 score_max = 0; 374 drv = NULL; 375 QLIST_FOREACH(drv1, &bdrv_drivers, list) { 376 if (drv1->bdrv_probe) { 377 score = drv1->bdrv_probe(buf, ret, filename); 378 if (score > score_max) { 379 score_max = score; 380 drv = drv1; 381 } 382 } 383 } 384 if (!drv) { 385 ret = -ENOENT; 386 } 387 *pdrv = drv; 388 return ret; 389 } 390 391 /** 392 * Set the current 'total_sectors' value 393 */ 394 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 395 { 396 BlockDriver *drv = bs->drv; 397 398 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 399 if (bs->sg) 400 return 0; 401 402 /* query actual device if possible, otherwise just trust the hint */ 403 if (drv->bdrv_getlength) { 404 int64_t length = drv->bdrv_getlength(bs); 405 if (length < 0) { 406 return length; 407 } 408 hint = length >> BDRV_SECTOR_BITS; 409 } 410 411 bs->total_sectors = hint; 412 return 0; 413 } 414 415 /* 416 * Common part for opening disk images and files 417 */ 418 static int bdrv_open_common(BlockDriverState *bs, const char *filename, 419 int flags, BlockDriver *drv) 420 { 421 int ret, open_flags; 422 423 assert(drv != NULL); 424 425 bs->file = NULL; 426 bs->total_sectors = 0; 427 bs->encrypted = 0; 428 bs->valid_key = 0; 429 bs->open_flags = flags; 430 /* buffer_alignment defaulted to 512, drivers can change this value */ 431 bs->buffer_alignment = 512; 432 433 pstrcpy(bs->filename, sizeof(bs->filename), filename); 434 435 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) { 436 return -ENOTSUP; 437 } 438 439 bs->drv = drv; 440 bs->opaque = qemu_mallocz(drv->instance_size); 441 442 /* 443 * Yes, BDRV_O_NOCACHE aka O_DIRECT means we have to present a 444 * write cache to the guest. We do need the fdatasync to flush 445 * out transactions for block allocations, and we maybe have a 446 * volatile write cache in our backing device to deal with. 447 */ 448 if (flags & (BDRV_O_CACHE_WB|BDRV_O_NOCACHE)) 449 bs->enable_write_cache = 1; 450 451 /* 452 * Clear flags that are internal to the block layer before opening the 453 * image. 454 */ 455 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 456 457 /* 458 * Snapshots should be writeable. 459 */ 460 if (bs->is_temporary) { 461 open_flags |= BDRV_O_RDWR; 462 } 463 464 /* Open the image, either directly or using a protocol */ 465 if (drv->bdrv_file_open) { 466 ret = drv->bdrv_file_open(bs, filename, open_flags); 467 } else { 468 ret = bdrv_file_open(&bs->file, filename, open_flags); 469 if (ret >= 0) { 470 ret = drv->bdrv_open(bs, open_flags); 471 } 472 } 473 474 if (ret < 0) { 475 goto free_and_fail; 476 } 477 478 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR); 479 480 ret = refresh_total_sectors(bs, bs->total_sectors); 481 if (ret < 0) { 482 goto free_and_fail; 483 } 484 485 #ifndef _WIN32 486 if (bs->is_temporary) { 487 unlink(filename); 488 } 489 #endif 490 return 0; 491 492 free_and_fail: 493 if (bs->file) { 494 bdrv_delete(bs->file); 495 bs->file = NULL; 496 } 497 qemu_free(bs->opaque); 498 bs->opaque = NULL; 499 bs->drv = NULL; 500 return ret; 501 } 502 503 /* 504 * Opens a file using a protocol (file, host_device, nbd, ...) 505 */ 506 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags) 507 { 508 BlockDriverState *bs; 509 BlockDriver *drv; 510 int ret; 511 512 drv = bdrv_find_protocol(filename); 513 if (!drv) { 514 return -ENOENT; 515 } 516 517 bs = bdrv_new(""); 518 ret = bdrv_open_common(bs, filename, flags, drv); 519 if (ret < 0) { 520 bdrv_delete(bs); 521 return ret; 522 } 523 bs->growable = 1; 524 *pbs = bs; 525 return 0; 526 } 527 528 /* 529 * Opens a disk image (raw, qcow2, vmdk, ...) 530 */ 531 int bdrv_open(BlockDriverState *bs, const char *filename, int flags, 532 BlockDriver *drv) 533 { 534 int ret; 535 536 if (flags & BDRV_O_SNAPSHOT) { 537 BlockDriverState *bs1; 538 int64_t total_size; 539 int is_protocol = 0; 540 BlockDriver *bdrv_qcow2; 541 QEMUOptionParameter *options; 542 char tmp_filename[PATH_MAX]; 543 char backing_filename[PATH_MAX]; 544 545 /* if snapshot, we create a temporary backing file and open it 546 instead of opening 'filename' directly */ 547 548 /* if there is a backing file, use it */ 549 bs1 = bdrv_new(""); 550 ret = bdrv_open(bs1, filename, 0, drv); 551 if (ret < 0) { 552 bdrv_delete(bs1); 553 return ret; 554 } 555 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK; 556 557 if (bs1->drv && bs1->drv->protocol_name) 558 is_protocol = 1; 559 560 bdrv_delete(bs1); 561 562 get_tmp_filename(tmp_filename, sizeof(tmp_filename)); 563 564 /* Real path is meaningless for protocols */ 565 if (is_protocol) 566 snprintf(backing_filename, sizeof(backing_filename), 567 "%s", filename); 568 else if (!realpath(filename, backing_filename)) 569 return -errno; 570 571 bdrv_qcow2 = bdrv_find_format("qcow2"); 572 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL); 573 574 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size); 575 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename); 576 if (drv) { 577 set_option_parameter(options, BLOCK_OPT_BACKING_FMT, 578 drv->format_name); 579 } 580 581 ret = bdrv_create(bdrv_qcow2, tmp_filename, options); 582 free_option_parameters(options); 583 if (ret < 0) { 584 return ret; 585 } 586 587 filename = tmp_filename; 588 drv = bdrv_qcow2; 589 bs->is_temporary = 1; 590 } 591 592 /* Find the right image format driver */ 593 if (!drv) { 594 ret = find_image_format(filename, &drv); 595 } 596 597 if (!drv) { 598 goto unlink_and_fail; 599 } 600 601 /* Open the image */ 602 ret = bdrv_open_common(bs, filename, flags, drv); 603 if (ret < 0) { 604 goto unlink_and_fail; 605 } 606 607 /* If there is a backing file, use it */ 608 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') { 609 char backing_filename[PATH_MAX]; 610 int back_flags; 611 BlockDriver *back_drv = NULL; 612 613 bs->backing_hd = bdrv_new(""); 614 615 if (path_has_protocol(bs->backing_file)) { 616 pstrcpy(backing_filename, sizeof(backing_filename), 617 bs->backing_file); 618 } else { 619 path_combine(backing_filename, sizeof(backing_filename), 620 filename, bs->backing_file); 621 } 622 623 if (bs->backing_format[0] != '\0') { 624 back_drv = bdrv_find_format(bs->backing_format); 625 } 626 627 /* backing files always opened read-only */ 628 back_flags = 629 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 630 631 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv); 632 if (ret < 0) { 633 bdrv_close(bs); 634 return ret; 635 } 636 if (bs->is_temporary) { 637 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR); 638 } else { 639 /* base image inherits from "parent" */ 640 bs->backing_hd->keep_read_only = bs->keep_read_only; 641 } 642 } 643 644 if (!bdrv_key_required(bs)) { 645 /* call the change callback */ 646 bs->media_changed = 1; 647 if (bs->change_cb) 648 bs->change_cb(bs->change_opaque, CHANGE_MEDIA); 649 } 650 651 return 0; 652 653 unlink_and_fail: 654 if (bs->is_temporary) { 655 unlink(filename); 656 } 657 return ret; 658 } 659 660 void bdrv_close(BlockDriverState *bs) 661 { 662 if (bs->drv) { 663 if (bs == bs_snapshots) { 664 bs_snapshots = NULL; 665 } 666 if (bs->backing_hd) { 667 bdrv_delete(bs->backing_hd); 668 bs->backing_hd = NULL; 669 } 670 bs->drv->bdrv_close(bs); 671 qemu_free(bs->opaque); 672 #ifdef _WIN32 673 if (bs->is_temporary) { 674 unlink(bs->filename); 675 } 676 #endif 677 bs->opaque = NULL; 678 bs->drv = NULL; 679 680 if (bs->file != NULL) { 681 bdrv_close(bs->file); 682 } 683 684 /* call the change callback */ 685 bs->media_changed = 1; 686 if (bs->change_cb) 687 bs->change_cb(bs->change_opaque, CHANGE_MEDIA); 688 } 689 } 690 691 void bdrv_close_all(void) 692 { 693 BlockDriverState *bs; 694 695 QTAILQ_FOREACH(bs, &bdrv_states, list) { 696 bdrv_close(bs); 697 } 698 } 699 700 /* make a BlockDriverState anonymous by removing from bdrv_state list. 701 Also, NULL terminate the device_name to prevent double remove */ 702 void bdrv_make_anon(BlockDriverState *bs) 703 { 704 if (bs->device_name[0] != '\0') { 705 QTAILQ_REMOVE(&bdrv_states, bs, list); 706 } 707 bs->device_name[0] = '\0'; 708 } 709 710 void bdrv_delete(BlockDriverState *bs) 711 { 712 assert(!bs->peer); 713 714 /* remove from list, if necessary */ 715 bdrv_make_anon(bs); 716 717 bdrv_close(bs); 718 if (bs->file != NULL) { 719 bdrv_delete(bs->file); 720 } 721 722 assert(bs != bs_snapshots); 723 qemu_free(bs); 724 } 725 726 int bdrv_attach(BlockDriverState *bs, DeviceState *qdev) 727 { 728 if (bs->peer) { 729 return -EBUSY; 730 } 731 bs->peer = qdev; 732 return 0; 733 } 734 735 void bdrv_detach(BlockDriverState *bs, DeviceState *qdev) 736 { 737 assert(bs->peer == qdev); 738 bs->peer = NULL; 739 } 740 741 DeviceState *bdrv_get_attached(BlockDriverState *bs) 742 { 743 return bs->peer; 744 } 745 746 /* 747 * Run consistency checks on an image 748 * 749 * Returns 0 if the check could be completed (it doesn't mean that the image is 750 * free of errors) or -errno when an internal error occured. The results of the 751 * check are stored in res. 752 */ 753 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res) 754 { 755 if (bs->drv->bdrv_check == NULL) { 756 return -ENOTSUP; 757 } 758 759 memset(res, 0, sizeof(*res)); 760 return bs->drv->bdrv_check(bs, res); 761 } 762 763 #define COMMIT_BUF_SECTORS 2048 764 765 /* commit COW file into the raw image */ 766 int bdrv_commit(BlockDriverState *bs) 767 { 768 BlockDriver *drv = bs->drv; 769 BlockDriver *backing_drv; 770 int64_t sector, total_sectors; 771 int n, ro, open_flags; 772 int ret = 0, rw_ret = 0; 773 uint8_t *buf; 774 char filename[1024]; 775 BlockDriverState *bs_rw, *bs_ro; 776 777 if (!drv) 778 return -ENOMEDIUM; 779 780 if (!bs->backing_hd) { 781 return -ENOTSUP; 782 } 783 784 if (bs->backing_hd->keep_read_only) { 785 return -EACCES; 786 } 787 788 backing_drv = bs->backing_hd->drv; 789 ro = bs->backing_hd->read_only; 790 strncpy(filename, bs->backing_hd->filename, sizeof(filename)); 791 open_flags = bs->backing_hd->open_flags; 792 793 if (ro) { 794 /* re-open as RW */ 795 bdrv_delete(bs->backing_hd); 796 bs->backing_hd = NULL; 797 bs_rw = bdrv_new(""); 798 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR, 799 backing_drv); 800 if (rw_ret < 0) { 801 bdrv_delete(bs_rw); 802 /* try to re-open read-only */ 803 bs_ro = bdrv_new(""); 804 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, 805 backing_drv); 806 if (ret < 0) { 807 bdrv_delete(bs_ro); 808 /* drive not functional anymore */ 809 bs->drv = NULL; 810 return ret; 811 } 812 bs->backing_hd = bs_ro; 813 return rw_ret; 814 } 815 bs->backing_hd = bs_rw; 816 } 817 818 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; 819 buf = qemu_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 820 821 for (sector = 0; sector < total_sectors; sector += n) { 822 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) { 823 824 if (bdrv_read(bs, sector, buf, n) != 0) { 825 ret = -EIO; 826 goto ro_cleanup; 827 } 828 829 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) { 830 ret = -EIO; 831 goto ro_cleanup; 832 } 833 } 834 } 835 836 if (drv->bdrv_make_empty) { 837 ret = drv->bdrv_make_empty(bs); 838 bdrv_flush(bs); 839 } 840 841 /* 842 * Make sure all data we wrote to the backing device is actually 843 * stable on disk. 844 */ 845 if (bs->backing_hd) 846 bdrv_flush(bs->backing_hd); 847 848 ro_cleanup: 849 qemu_free(buf); 850 851 if (ro) { 852 /* re-open as RO */ 853 bdrv_delete(bs->backing_hd); 854 bs->backing_hd = NULL; 855 bs_ro = bdrv_new(""); 856 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, 857 backing_drv); 858 if (ret < 0) { 859 bdrv_delete(bs_ro); 860 /* drive not functional anymore */ 861 bs->drv = NULL; 862 return ret; 863 } 864 bs->backing_hd = bs_ro; 865 bs->backing_hd->keep_read_only = 0; 866 } 867 868 return ret; 869 } 870 871 void bdrv_commit_all(void) 872 { 873 BlockDriverState *bs; 874 875 QTAILQ_FOREACH(bs, &bdrv_states, list) { 876 bdrv_commit(bs); 877 } 878 } 879 880 /* 881 * Return values: 882 * 0 - success 883 * -EINVAL - backing format specified, but no file 884 * -ENOSPC - can't update the backing file because no space is left in the 885 * image file header 886 * -ENOTSUP - format driver doesn't support changing the backing file 887 */ 888 int bdrv_change_backing_file(BlockDriverState *bs, 889 const char *backing_file, const char *backing_fmt) 890 { 891 BlockDriver *drv = bs->drv; 892 893 if (drv->bdrv_change_backing_file != NULL) { 894 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 895 } else { 896 return -ENOTSUP; 897 } 898 } 899 900 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, 901 size_t size) 902 { 903 int64_t len; 904 905 if (!bdrv_is_inserted(bs)) 906 return -ENOMEDIUM; 907 908 if (bs->growable) 909 return 0; 910 911 len = bdrv_getlength(bs); 912 913 if (offset < 0) 914 return -EIO; 915 916 if ((offset > len) || (len - offset < size)) 917 return -EIO; 918 919 return 0; 920 } 921 922 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, 923 int nb_sectors) 924 { 925 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE, 926 nb_sectors * BDRV_SECTOR_SIZE); 927 } 928 929 /* return < 0 if error. See bdrv_write() for the return codes */ 930 int bdrv_read(BlockDriverState *bs, int64_t sector_num, 931 uint8_t *buf, int nb_sectors) 932 { 933 BlockDriver *drv = bs->drv; 934 935 if (!drv) 936 return -ENOMEDIUM; 937 if (bdrv_check_request(bs, sector_num, nb_sectors)) 938 return -EIO; 939 940 return drv->bdrv_read(bs, sector_num, buf, nb_sectors); 941 } 942 943 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num, 944 int nb_sectors, int dirty) 945 { 946 int64_t start, end; 947 unsigned long val, idx, bit; 948 949 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK; 950 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK; 951 952 for (; start <= end; start++) { 953 idx = start / (sizeof(unsigned long) * 8); 954 bit = start % (sizeof(unsigned long) * 8); 955 val = bs->dirty_bitmap[idx]; 956 if (dirty) { 957 if (!(val & (1UL << bit))) { 958 bs->dirty_count++; 959 val |= 1UL << bit; 960 } 961 } else { 962 if (val & (1UL << bit)) { 963 bs->dirty_count--; 964 val &= ~(1UL << bit); 965 } 966 } 967 bs->dirty_bitmap[idx] = val; 968 } 969 } 970 971 /* Return < 0 if error. Important errors are: 972 -EIO generic I/O error (may happen for all errors) 973 -ENOMEDIUM No media inserted. 974 -EINVAL Invalid sector number or nb_sectors 975 -EACCES Trying to write a read-only device 976 */ 977 int bdrv_write(BlockDriverState *bs, int64_t sector_num, 978 const uint8_t *buf, int nb_sectors) 979 { 980 BlockDriver *drv = bs->drv; 981 if (!bs->drv) 982 return -ENOMEDIUM; 983 if (bs->read_only) 984 return -EACCES; 985 if (bdrv_check_request(bs, sector_num, nb_sectors)) 986 return -EIO; 987 988 if (bs->dirty_bitmap) { 989 set_dirty_bitmap(bs, sector_num, nb_sectors, 1); 990 } 991 992 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { 993 bs->wr_highest_sector = sector_num + nb_sectors - 1; 994 } 995 996 return drv->bdrv_write(bs, sector_num, buf, nb_sectors); 997 } 998 999 int bdrv_pread(BlockDriverState *bs, int64_t offset, 1000 void *buf, int count1) 1001 { 1002 uint8_t tmp_buf[BDRV_SECTOR_SIZE]; 1003 int len, nb_sectors, count; 1004 int64_t sector_num; 1005 int ret; 1006 1007 count = count1; 1008 /* first read to align to sector start */ 1009 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); 1010 if (len > count) 1011 len = count; 1012 sector_num = offset >> BDRV_SECTOR_BITS; 1013 if (len > 0) { 1014 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1015 return ret; 1016 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len); 1017 count -= len; 1018 if (count == 0) 1019 return count1; 1020 sector_num++; 1021 buf += len; 1022 } 1023 1024 /* read the sectors "in place" */ 1025 nb_sectors = count >> BDRV_SECTOR_BITS; 1026 if (nb_sectors > 0) { 1027 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0) 1028 return ret; 1029 sector_num += nb_sectors; 1030 len = nb_sectors << BDRV_SECTOR_BITS; 1031 buf += len; 1032 count -= len; 1033 } 1034 1035 /* add data from the last sector */ 1036 if (count > 0) { 1037 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1038 return ret; 1039 memcpy(buf, tmp_buf, count); 1040 } 1041 return count1; 1042 } 1043 1044 int bdrv_pwrite(BlockDriverState *bs, int64_t offset, 1045 const void *buf, int count1) 1046 { 1047 uint8_t tmp_buf[BDRV_SECTOR_SIZE]; 1048 int len, nb_sectors, count; 1049 int64_t sector_num; 1050 int ret; 1051 1052 count = count1; 1053 /* first write to align to sector start */ 1054 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); 1055 if (len > count) 1056 len = count; 1057 sector_num = offset >> BDRV_SECTOR_BITS; 1058 if (len > 0) { 1059 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1060 return ret; 1061 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len); 1062 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0) 1063 return ret; 1064 count -= len; 1065 if (count == 0) 1066 return count1; 1067 sector_num++; 1068 buf += len; 1069 } 1070 1071 /* write the sectors "in place" */ 1072 nb_sectors = count >> BDRV_SECTOR_BITS; 1073 if (nb_sectors > 0) { 1074 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0) 1075 return ret; 1076 sector_num += nb_sectors; 1077 len = nb_sectors << BDRV_SECTOR_BITS; 1078 buf += len; 1079 count -= len; 1080 } 1081 1082 /* add data from the last sector */ 1083 if (count > 0) { 1084 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) 1085 return ret; 1086 memcpy(tmp_buf, buf, count); 1087 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0) 1088 return ret; 1089 } 1090 return count1; 1091 } 1092 1093 /* 1094 * Writes to the file and ensures that no writes are reordered across this 1095 * request (acts as a barrier) 1096 * 1097 * Returns 0 on success, -errno in error cases. 1098 */ 1099 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, 1100 const void *buf, int count) 1101 { 1102 int ret; 1103 1104 ret = bdrv_pwrite(bs, offset, buf, count); 1105 if (ret < 0) { 1106 return ret; 1107 } 1108 1109 /* No flush needed for cache=writethrough, it uses O_DSYNC */ 1110 if ((bs->open_flags & BDRV_O_CACHE_MASK) != 0) { 1111 bdrv_flush(bs); 1112 } 1113 1114 return 0; 1115 } 1116 1117 /* 1118 * Writes to the file and ensures that no writes are reordered across this 1119 * request (acts as a barrier) 1120 * 1121 * Returns 0 on success, -errno in error cases. 1122 */ 1123 int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num, 1124 const uint8_t *buf, int nb_sectors) 1125 { 1126 return bdrv_pwrite_sync(bs, BDRV_SECTOR_SIZE * sector_num, 1127 buf, BDRV_SECTOR_SIZE * nb_sectors); 1128 } 1129 1130 /** 1131 * Truncate file to 'offset' bytes (needed only for file protocols) 1132 */ 1133 int bdrv_truncate(BlockDriverState *bs, int64_t offset) 1134 { 1135 BlockDriver *drv = bs->drv; 1136 int ret; 1137 if (!drv) 1138 return -ENOMEDIUM; 1139 if (!drv->bdrv_truncate) 1140 return -ENOTSUP; 1141 if (bs->read_only) 1142 return -EACCES; 1143 if (bdrv_in_use(bs)) 1144 return -EBUSY; 1145 ret = drv->bdrv_truncate(bs, offset); 1146 if (ret == 0) { 1147 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 1148 if (bs->change_cb) { 1149 bs->change_cb(bs->change_opaque, CHANGE_SIZE); 1150 } 1151 } 1152 return ret; 1153 } 1154 1155 /** 1156 * Length of a file in bytes. Return < 0 if error or unknown. 1157 */ 1158 int64_t bdrv_getlength(BlockDriverState *bs) 1159 { 1160 BlockDriver *drv = bs->drv; 1161 if (!drv) 1162 return -ENOMEDIUM; 1163 1164 /* Fixed size devices use the total_sectors value for speed instead of 1165 issuing a length query (like lseek) on each call. Also, legacy block 1166 drivers don't provide a bdrv_getlength function and must use 1167 total_sectors. */ 1168 if (!bs->growable || !drv->bdrv_getlength) { 1169 return bs->total_sectors * BDRV_SECTOR_SIZE; 1170 } 1171 return drv->bdrv_getlength(bs); 1172 } 1173 1174 /* return 0 as number of sectors if no device present or error */ 1175 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 1176 { 1177 int64_t length; 1178 length = bdrv_getlength(bs); 1179 if (length < 0) 1180 length = 0; 1181 else 1182 length = length >> BDRV_SECTOR_BITS; 1183 *nb_sectors_ptr = length; 1184 } 1185 1186 struct partition { 1187 uint8_t boot_ind; /* 0x80 - active */ 1188 uint8_t head; /* starting head */ 1189 uint8_t sector; /* starting sector */ 1190 uint8_t cyl; /* starting cylinder */ 1191 uint8_t sys_ind; /* What partition type */ 1192 uint8_t end_head; /* end head */ 1193 uint8_t end_sector; /* end sector */ 1194 uint8_t end_cyl; /* end cylinder */ 1195 uint32_t start_sect; /* starting sector counting from 0 */ 1196 uint32_t nr_sects; /* nr of sectors in partition */ 1197 } __attribute__((packed)); 1198 1199 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */ 1200 static int guess_disk_lchs(BlockDriverState *bs, 1201 int *pcylinders, int *pheads, int *psectors) 1202 { 1203 uint8_t buf[BDRV_SECTOR_SIZE]; 1204 int ret, i, heads, sectors, cylinders; 1205 struct partition *p; 1206 uint32_t nr_sects; 1207 uint64_t nb_sectors; 1208 1209 bdrv_get_geometry(bs, &nb_sectors); 1210 1211 ret = bdrv_read(bs, 0, buf, 1); 1212 if (ret < 0) 1213 return -1; 1214 /* test msdos magic */ 1215 if (buf[510] != 0x55 || buf[511] != 0xaa) 1216 return -1; 1217 for(i = 0; i < 4; i++) { 1218 p = ((struct partition *)(buf + 0x1be)) + i; 1219 nr_sects = le32_to_cpu(p->nr_sects); 1220 if (nr_sects && p->end_head) { 1221 /* We make the assumption that the partition terminates on 1222 a cylinder boundary */ 1223 heads = p->end_head + 1; 1224 sectors = p->end_sector & 63; 1225 if (sectors == 0) 1226 continue; 1227 cylinders = nb_sectors / (heads * sectors); 1228 if (cylinders < 1 || cylinders > 16383) 1229 continue; 1230 *pheads = heads; 1231 *psectors = sectors; 1232 *pcylinders = cylinders; 1233 #if 0 1234 printf("guessed geometry: LCHS=%d %d %d\n", 1235 cylinders, heads, sectors); 1236 #endif 1237 return 0; 1238 } 1239 } 1240 return -1; 1241 } 1242 1243 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs) 1244 { 1245 int translation, lba_detected = 0; 1246 int cylinders, heads, secs; 1247 uint64_t nb_sectors; 1248 1249 /* if a geometry hint is available, use it */ 1250 bdrv_get_geometry(bs, &nb_sectors); 1251 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs); 1252 translation = bdrv_get_translation_hint(bs); 1253 if (cylinders != 0) { 1254 *pcyls = cylinders; 1255 *pheads = heads; 1256 *psecs = secs; 1257 } else { 1258 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) { 1259 if (heads > 16) { 1260 /* if heads > 16, it means that a BIOS LBA 1261 translation was active, so the default 1262 hardware geometry is OK */ 1263 lba_detected = 1; 1264 goto default_geometry; 1265 } else { 1266 *pcyls = cylinders; 1267 *pheads = heads; 1268 *psecs = secs; 1269 /* disable any translation to be in sync with 1270 the logical geometry */ 1271 if (translation == BIOS_ATA_TRANSLATION_AUTO) { 1272 bdrv_set_translation_hint(bs, 1273 BIOS_ATA_TRANSLATION_NONE); 1274 } 1275 } 1276 } else { 1277 default_geometry: 1278 /* if no geometry, use a standard physical disk geometry */ 1279 cylinders = nb_sectors / (16 * 63); 1280 1281 if (cylinders > 16383) 1282 cylinders = 16383; 1283 else if (cylinders < 2) 1284 cylinders = 2; 1285 *pcyls = cylinders; 1286 *pheads = 16; 1287 *psecs = 63; 1288 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) { 1289 if ((*pcyls * *pheads) <= 131072) { 1290 bdrv_set_translation_hint(bs, 1291 BIOS_ATA_TRANSLATION_LARGE); 1292 } else { 1293 bdrv_set_translation_hint(bs, 1294 BIOS_ATA_TRANSLATION_LBA); 1295 } 1296 } 1297 } 1298 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs); 1299 } 1300 } 1301 1302 void bdrv_set_geometry_hint(BlockDriverState *bs, 1303 int cyls, int heads, int secs) 1304 { 1305 bs->cyls = cyls; 1306 bs->heads = heads; 1307 bs->secs = secs; 1308 } 1309 1310 void bdrv_set_type_hint(BlockDriverState *bs, int type) 1311 { 1312 bs->type = type; 1313 bs->removable = ((type == BDRV_TYPE_CDROM || 1314 type == BDRV_TYPE_FLOPPY)); 1315 } 1316 1317 void bdrv_set_translation_hint(BlockDriverState *bs, int translation) 1318 { 1319 bs->translation = translation; 1320 } 1321 1322 void bdrv_get_geometry_hint(BlockDriverState *bs, 1323 int *pcyls, int *pheads, int *psecs) 1324 { 1325 *pcyls = bs->cyls; 1326 *pheads = bs->heads; 1327 *psecs = bs->secs; 1328 } 1329 1330 /* Recognize floppy formats */ 1331 typedef struct FDFormat { 1332 FDriveType drive; 1333 uint8_t last_sect; 1334 uint8_t max_track; 1335 uint8_t max_head; 1336 } FDFormat; 1337 1338 static const FDFormat fd_formats[] = { 1339 /* First entry is default format */ 1340 /* 1.44 MB 3"1/2 floppy disks */ 1341 { FDRIVE_DRV_144, 18, 80, 1, }, 1342 { FDRIVE_DRV_144, 20, 80, 1, }, 1343 { FDRIVE_DRV_144, 21, 80, 1, }, 1344 { FDRIVE_DRV_144, 21, 82, 1, }, 1345 { FDRIVE_DRV_144, 21, 83, 1, }, 1346 { FDRIVE_DRV_144, 22, 80, 1, }, 1347 { FDRIVE_DRV_144, 23, 80, 1, }, 1348 { FDRIVE_DRV_144, 24, 80, 1, }, 1349 /* 2.88 MB 3"1/2 floppy disks */ 1350 { FDRIVE_DRV_288, 36, 80, 1, }, 1351 { FDRIVE_DRV_288, 39, 80, 1, }, 1352 { FDRIVE_DRV_288, 40, 80, 1, }, 1353 { FDRIVE_DRV_288, 44, 80, 1, }, 1354 { FDRIVE_DRV_288, 48, 80, 1, }, 1355 /* 720 kB 3"1/2 floppy disks */ 1356 { FDRIVE_DRV_144, 9, 80, 1, }, 1357 { FDRIVE_DRV_144, 10, 80, 1, }, 1358 { FDRIVE_DRV_144, 10, 82, 1, }, 1359 { FDRIVE_DRV_144, 10, 83, 1, }, 1360 { FDRIVE_DRV_144, 13, 80, 1, }, 1361 { FDRIVE_DRV_144, 14, 80, 1, }, 1362 /* 1.2 MB 5"1/4 floppy disks */ 1363 { FDRIVE_DRV_120, 15, 80, 1, }, 1364 { FDRIVE_DRV_120, 18, 80, 1, }, 1365 { FDRIVE_DRV_120, 18, 82, 1, }, 1366 { FDRIVE_DRV_120, 18, 83, 1, }, 1367 { FDRIVE_DRV_120, 20, 80, 1, }, 1368 /* 720 kB 5"1/4 floppy disks */ 1369 { FDRIVE_DRV_120, 9, 80, 1, }, 1370 { FDRIVE_DRV_120, 11, 80, 1, }, 1371 /* 360 kB 5"1/4 floppy disks */ 1372 { FDRIVE_DRV_120, 9, 40, 1, }, 1373 { FDRIVE_DRV_120, 9, 40, 0, }, 1374 { FDRIVE_DRV_120, 10, 41, 1, }, 1375 { FDRIVE_DRV_120, 10, 42, 1, }, 1376 /* 320 kB 5"1/4 floppy disks */ 1377 { FDRIVE_DRV_120, 8, 40, 1, }, 1378 { FDRIVE_DRV_120, 8, 40, 0, }, 1379 /* 360 kB must match 5"1/4 better than 3"1/2... */ 1380 { FDRIVE_DRV_144, 9, 80, 0, }, 1381 /* end */ 1382 { FDRIVE_DRV_NONE, -1, -1, 0, }, 1383 }; 1384 1385 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads, 1386 int *max_track, int *last_sect, 1387 FDriveType drive_in, FDriveType *drive) 1388 { 1389 const FDFormat *parse; 1390 uint64_t nb_sectors, size; 1391 int i, first_match, match; 1392 1393 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect); 1394 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) { 1395 /* User defined disk */ 1396 } else { 1397 bdrv_get_geometry(bs, &nb_sectors); 1398 match = -1; 1399 first_match = -1; 1400 for (i = 0; ; i++) { 1401 parse = &fd_formats[i]; 1402 if (parse->drive == FDRIVE_DRV_NONE) { 1403 break; 1404 } 1405 if (drive_in == parse->drive || 1406 drive_in == FDRIVE_DRV_NONE) { 1407 size = (parse->max_head + 1) * parse->max_track * 1408 parse->last_sect; 1409 if (nb_sectors == size) { 1410 match = i; 1411 break; 1412 } 1413 if (first_match == -1) { 1414 first_match = i; 1415 } 1416 } 1417 } 1418 if (match == -1) { 1419 if (first_match == -1) { 1420 match = 1; 1421 } else { 1422 match = first_match; 1423 } 1424 parse = &fd_formats[match]; 1425 } 1426 *nb_heads = parse->max_head + 1; 1427 *max_track = parse->max_track; 1428 *last_sect = parse->last_sect; 1429 *drive = parse->drive; 1430 } 1431 } 1432 1433 int bdrv_get_type_hint(BlockDriverState *bs) 1434 { 1435 return bs->type; 1436 } 1437 1438 int bdrv_get_translation_hint(BlockDriverState *bs) 1439 { 1440 return bs->translation; 1441 } 1442 1443 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error, 1444 BlockErrorAction on_write_error) 1445 { 1446 bs->on_read_error = on_read_error; 1447 bs->on_write_error = on_write_error; 1448 } 1449 1450 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read) 1451 { 1452 return is_read ? bs->on_read_error : bs->on_write_error; 1453 } 1454 1455 void bdrv_set_removable(BlockDriverState *bs, int removable) 1456 { 1457 bs->removable = removable; 1458 if (removable && bs == bs_snapshots) { 1459 bs_snapshots = NULL; 1460 } 1461 } 1462 1463 int bdrv_is_removable(BlockDriverState *bs) 1464 { 1465 return bs->removable; 1466 } 1467 1468 int bdrv_is_read_only(BlockDriverState *bs) 1469 { 1470 return bs->read_only; 1471 } 1472 1473 int bdrv_is_sg(BlockDriverState *bs) 1474 { 1475 return bs->sg; 1476 } 1477 1478 int bdrv_enable_write_cache(BlockDriverState *bs) 1479 { 1480 return bs->enable_write_cache; 1481 } 1482 1483 /* XXX: no longer used */ 1484 void bdrv_set_change_cb(BlockDriverState *bs, 1485 void (*change_cb)(void *opaque, int reason), 1486 void *opaque) 1487 { 1488 bs->change_cb = change_cb; 1489 bs->change_opaque = opaque; 1490 } 1491 1492 int bdrv_is_encrypted(BlockDriverState *bs) 1493 { 1494 if (bs->backing_hd && bs->backing_hd->encrypted) 1495 return 1; 1496 return bs->encrypted; 1497 } 1498 1499 int bdrv_key_required(BlockDriverState *bs) 1500 { 1501 BlockDriverState *backing_hd = bs->backing_hd; 1502 1503 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key) 1504 return 1; 1505 return (bs->encrypted && !bs->valid_key); 1506 } 1507 1508 int bdrv_set_key(BlockDriverState *bs, const char *key) 1509 { 1510 int ret; 1511 if (bs->backing_hd && bs->backing_hd->encrypted) { 1512 ret = bdrv_set_key(bs->backing_hd, key); 1513 if (ret < 0) 1514 return ret; 1515 if (!bs->encrypted) 1516 return 0; 1517 } 1518 if (!bs->encrypted) { 1519 return -EINVAL; 1520 } else if (!bs->drv || !bs->drv->bdrv_set_key) { 1521 return -ENOMEDIUM; 1522 } 1523 ret = bs->drv->bdrv_set_key(bs, key); 1524 if (ret < 0) { 1525 bs->valid_key = 0; 1526 } else if (!bs->valid_key) { 1527 bs->valid_key = 1; 1528 /* call the change callback now, we skipped it on open */ 1529 bs->media_changed = 1; 1530 if (bs->change_cb) 1531 bs->change_cb(bs->change_opaque, CHANGE_MEDIA); 1532 } 1533 return ret; 1534 } 1535 1536 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size) 1537 { 1538 if (!bs->drv) { 1539 buf[0] = '\0'; 1540 } else { 1541 pstrcpy(buf, buf_size, bs->drv->format_name); 1542 } 1543 } 1544 1545 void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 1546 void *opaque) 1547 { 1548 BlockDriver *drv; 1549 1550 QLIST_FOREACH(drv, &bdrv_drivers, list) { 1551 it(opaque, drv->format_name); 1552 } 1553 } 1554 1555 BlockDriverState *bdrv_find(const char *name) 1556 { 1557 BlockDriverState *bs; 1558 1559 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1560 if (!strcmp(name, bs->device_name)) { 1561 return bs; 1562 } 1563 } 1564 return NULL; 1565 } 1566 1567 BlockDriverState *bdrv_next(BlockDriverState *bs) 1568 { 1569 if (!bs) { 1570 return QTAILQ_FIRST(&bdrv_states); 1571 } 1572 return QTAILQ_NEXT(bs, list); 1573 } 1574 1575 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque) 1576 { 1577 BlockDriverState *bs; 1578 1579 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1580 it(opaque, bs); 1581 } 1582 } 1583 1584 const char *bdrv_get_device_name(BlockDriverState *bs) 1585 { 1586 return bs->device_name; 1587 } 1588 1589 int bdrv_flush(BlockDriverState *bs) 1590 { 1591 if (bs->open_flags & BDRV_O_NO_FLUSH) { 1592 return 0; 1593 } 1594 1595 if (bs->drv && bs->drv->bdrv_flush) { 1596 return bs->drv->bdrv_flush(bs); 1597 } 1598 1599 /* 1600 * Some block drivers always operate in either writethrough or unsafe mode 1601 * and don't support bdrv_flush therefore. Usually qemu doesn't know how 1602 * the server works (because the behaviour is hardcoded or depends on 1603 * server-side configuration), so we can't ensure that everything is safe 1604 * on disk. Returning an error doesn't work because that would break guests 1605 * even if the server operates in writethrough mode. 1606 * 1607 * Let's hope the user knows what he's doing. 1608 */ 1609 return 0; 1610 } 1611 1612 void bdrv_flush_all(void) 1613 { 1614 BlockDriverState *bs; 1615 1616 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1617 if (bs->drv && !bdrv_is_read_only(bs) && 1618 (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) { 1619 bdrv_flush(bs); 1620 } 1621 } 1622 } 1623 1624 int bdrv_has_zero_init(BlockDriverState *bs) 1625 { 1626 assert(bs->drv); 1627 1628 if (bs->drv->bdrv_has_zero_init) { 1629 return bs->drv->bdrv_has_zero_init(bs); 1630 } 1631 1632 return 1; 1633 } 1634 1635 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) 1636 { 1637 if (!bs->drv) { 1638 return -ENOMEDIUM; 1639 } 1640 if (!bs->drv->bdrv_discard) { 1641 return 0; 1642 } 1643 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors); 1644 } 1645 1646 /* 1647 * Returns true iff the specified sector is present in the disk image. Drivers 1648 * not implementing the functionality are assumed to not support backing files, 1649 * hence all their sectors are reported as allocated. 1650 * 1651 * 'pnum' is set to the number of sectors (including and immediately following 1652 * the specified sector) that are known to be in the same 1653 * allocated/unallocated state. 1654 * 1655 * 'nb_sectors' is the max value 'pnum' should be set to. 1656 */ 1657 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, 1658 int *pnum) 1659 { 1660 int64_t n; 1661 if (!bs->drv->bdrv_is_allocated) { 1662 if (sector_num >= bs->total_sectors) { 1663 *pnum = 0; 1664 return 0; 1665 } 1666 n = bs->total_sectors - sector_num; 1667 *pnum = (n < nb_sectors) ? (n) : (nb_sectors); 1668 return 1; 1669 } 1670 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum); 1671 } 1672 1673 void bdrv_mon_event(const BlockDriverState *bdrv, 1674 BlockMonEventAction action, int is_read) 1675 { 1676 QObject *data; 1677 const char *action_str; 1678 1679 switch (action) { 1680 case BDRV_ACTION_REPORT: 1681 action_str = "report"; 1682 break; 1683 case BDRV_ACTION_IGNORE: 1684 action_str = "ignore"; 1685 break; 1686 case BDRV_ACTION_STOP: 1687 action_str = "stop"; 1688 break; 1689 default: 1690 abort(); 1691 } 1692 1693 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }", 1694 bdrv->device_name, 1695 action_str, 1696 is_read ? "read" : "write"); 1697 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data); 1698 1699 qobject_decref(data); 1700 } 1701 1702 static void bdrv_print_dict(QObject *obj, void *opaque) 1703 { 1704 QDict *bs_dict; 1705 Monitor *mon = opaque; 1706 1707 bs_dict = qobject_to_qdict(obj); 1708 1709 monitor_printf(mon, "%s: type=%s removable=%d", 1710 qdict_get_str(bs_dict, "device"), 1711 qdict_get_str(bs_dict, "type"), 1712 qdict_get_bool(bs_dict, "removable")); 1713 1714 if (qdict_get_bool(bs_dict, "removable")) { 1715 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked")); 1716 } 1717 1718 if (qdict_haskey(bs_dict, "inserted")) { 1719 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted")); 1720 1721 monitor_printf(mon, " file="); 1722 monitor_print_filename(mon, qdict_get_str(qdict, "file")); 1723 if (qdict_haskey(qdict, "backing_file")) { 1724 monitor_printf(mon, " backing_file="); 1725 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file")); 1726 } 1727 monitor_printf(mon, " ro=%d drv=%s encrypted=%d", 1728 qdict_get_bool(qdict, "ro"), 1729 qdict_get_str(qdict, "drv"), 1730 qdict_get_bool(qdict, "encrypted")); 1731 } else { 1732 monitor_printf(mon, " [not inserted]"); 1733 } 1734 1735 monitor_printf(mon, "\n"); 1736 } 1737 1738 void bdrv_info_print(Monitor *mon, const QObject *data) 1739 { 1740 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon); 1741 } 1742 1743 void bdrv_info(Monitor *mon, QObject **ret_data) 1744 { 1745 QList *bs_list; 1746 BlockDriverState *bs; 1747 1748 bs_list = qlist_new(); 1749 1750 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1751 QObject *bs_obj; 1752 const char *type = "unknown"; 1753 1754 switch(bs->type) { 1755 case BDRV_TYPE_HD: 1756 type = "hd"; 1757 break; 1758 case BDRV_TYPE_CDROM: 1759 type = "cdrom"; 1760 break; 1761 case BDRV_TYPE_FLOPPY: 1762 type = "floppy"; 1763 break; 1764 } 1765 1766 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': %s, " 1767 "'removable': %i, 'locked': %i }", 1768 bs->device_name, type, bs->removable, 1769 bs->locked); 1770 1771 if (bs->drv) { 1772 QObject *obj; 1773 QDict *bs_dict = qobject_to_qdict(bs_obj); 1774 1775 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, " 1776 "'encrypted': %i }", 1777 bs->filename, bs->read_only, 1778 bs->drv->format_name, 1779 bdrv_is_encrypted(bs)); 1780 if (bs->backing_file[0] != '\0') { 1781 QDict *qdict = qobject_to_qdict(obj); 1782 qdict_put(qdict, "backing_file", 1783 qstring_from_str(bs->backing_file)); 1784 } 1785 1786 qdict_put_obj(bs_dict, "inserted", obj); 1787 } 1788 qlist_append_obj(bs_list, bs_obj); 1789 } 1790 1791 *ret_data = QOBJECT(bs_list); 1792 } 1793 1794 static void bdrv_stats_iter(QObject *data, void *opaque) 1795 { 1796 QDict *qdict; 1797 Monitor *mon = opaque; 1798 1799 qdict = qobject_to_qdict(data); 1800 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device")); 1801 1802 qdict = qobject_to_qdict(qdict_get(qdict, "stats")); 1803 monitor_printf(mon, " rd_bytes=%" PRId64 1804 " wr_bytes=%" PRId64 1805 " rd_operations=%" PRId64 1806 " wr_operations=%" PRId64 1807 "\n", 1808 qdict_get_int(qdict, "rd_bytes"), 1809 qdict_get_int(qdict, "wr_bytes"), 1810 qdict_get_int(qdict, "rd_operations"), 1811 qdict_get_int(qdict, "wr_operations")); 1812 } 1813 1814 void bdrv_stats_print(Monitor *mon, const QObject *data) 1815 { 1816 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon); 1817 } 1818 1819 static QObject* bdrv_info_stats_bs(BlockDriverState *bs) 1820 { 1821 QObject *res; 1822 QDict *dict; 1823 1824 res = qobject_from_jsonf("{ 'stats': {" 1825 "'rd_bytes': %" PRId64 "," 1826 "'wr_bytes': %" PRId64 "," 1827 "'rd_operations': %" PRId64 "," 1828 "'wr_operations': %" PRId64 "," 1829 "'wr_highest_offset': %" PRId64 1830 "} }", 1831 bs->rd_bytes, bs->wr_bytes, 1832 bs->rd_ops, bs->wr_ops, 1833 bs->wr_highest_sector * 1834 (uint64_t)BDRV_SECTOR_SIZE); 1835 dict = qobject_to_qdict(res); 1836 1837 if (*bs->device_name) { 1838 qdict_put(dict, "device", qstring_from_str(bs->device_name)); 1839 } 1840 1841 if (bs->file) { 1842 QObject *parent = bdrv_info_stats_bs(bs->file); 1843 qdict_put_obj(dict, "parent", parent); 1844 } 1845 1846 return res; 1847 } 1848 1849 void bdrv_info_stats(Monitor *mon, QObject **ret_data) 1850 { 1851 QObject *obj; 1852 QList *devices; 1853 BlockDriverState *bs; 1854 1855 devices = qlist_new(); 1856 1857 QTAILQ_FOREACH(bs, &bdrv_states, list) { 1858 obj = bdrv_info_stats_bs(bs); 1859 qlist_append_obj(devices, obj); 1860 } 1861 1862 *ret_data = QOBJECT(devices); 1863 } 1864 1865 const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 1866 { 1867 if (bs->backing_hd && bs->backing_hd->encrypted) 1868 return bs->backing_file; 1869 else if (bs->encrypted) 1870 return bs->filename; 1871 else 1872 return NULL; 1873 } 1874 1875 void bdrv_get_backing_filename(BlockDriverState *bs, 1876 char *filename, int filename_size) 1877 { 1878 if (!bs->backing_file) { 1879 pstrcpy(filename, filename_size, ""); 1880 } else { 1881 pstrcpy(filename, filename_size, bs->backing_file); 1882 } 1883 } 1884 1885 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, 1886 const uint8_t *buf, int nb_sectors) 1887 { 1888 BlockDriver *drv = bs->drv; 1889 if (!drv) 1890 return -ENOMEDIUM; 1891 if (!drv->bdrv_write_compressed) 1892 return -ENOTSUP; 1893 if (bdrv_check_request(bs, sector_num, nb_sectors)) 1894 return -EIO; 1895 1896 if (bs->dirty_bitmap) { 1897 set_dirty_bitmap(bs, sector_num, nb_sectors, 1); 1898 } 1899 1900 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); 1901 } 1902 1903 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 1904 { 1905 BlockDriver *drv = bs->drv; 1906 if (!drv) 1907 return -ENOMEDIUM; 1908 if (!drv->bdrv_get_info) 1909 return -ENOTSUP; 1910 memset(bdi, 0, sizeof(*bdi)); 1911 return drv->bdrv_get_info(bs, bdi); 1912 } 1913 1914 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, 1915 int64_t pos, int size) 1916 { 1917 BlockDriver *drv = bs->drv; 1918 if (!drv) 1919 return -ENOMEDIUM; 1920 if (drv->bdrv_save_vmstate) 1921 return drv->bdrv_save_vmstate(bs, buf, pos, size); 1922 if (bs->file) 1923 return bdrv_save_vmstate(bs->file, buf, pos, size); 1924 return -ENOTSUP; 1925 } 1926 1927 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, 1928 int64_t pos, int size) 1929 { 1930 BlockDriver *drv = bs->drv; 1931 if (!drv) 1932 return -ENOMEDIUM; 1933 if (drv->bdrv_load_vmstate) 1934 return drv->bdrv_load_vmstate(bs, buf, pos, size); 1935 if (bs->file) 1936 return bdrv_load_vmstate(bs->file, buf, pos, size); 1937 return -ENOTSUP; 1938 } 1939 1940 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) 1941 { 1942 BlockDriver *drv = bs->drv; 1943 1944 if (!drv || !drv->bdrv_debug_event) { 1945 return; 1946 } 1947 1948 return drv->bdrv_debug_event(bs, event); 1949 1950 } 1951 1952 /**************************************************************/ 1953 /* handling of snapshots */ 1954 1955 int bdrv_can_snapshot(BlockDriverState *bs) 1956 { 1957 BlockDriver *drv = bs->drv; 1958 if (!drv || bdrv_is_removable(bs) || bdrv_is_read_only(bs)) { 1959 return 0; 1960 } 1961 1962 if (!drv->bdrv_snapshot_create) { 1963 if (bs->file != NULL) { 1964 return bdrv_can_snapshot(bs->file); 1965 } 1966 return 0; 1967 } 1968 1969 return 1; 1970 } 1971 1972 int bdrv_is_snapshot(BlockDriverState *bs) 1973 { 1974 return !!(bs->open_flags & BDRV_O_SNAPSHOT); 1975 } 1976 1977 BlockDriverState *bdrv_snapshots(void) 1978 { 1979 BlockDriverState *bs; 1980 1981 if (bs_snapshots) { 1982 return bs_snapshots; 1983 } 1984 1985 bs = NULL; 1986 while ((bs = bdrv_next(bs))) { 1987 if (bdrv_can_snapshot(bs)) { 1988 bs_snapshots = bs; 1989 return bs; 1990 } 1991 } 1992 return NULL; 1993 } 1994 1995 int bdrv_snapshot_create(BlockDriverState *bs, 1996 QEMUSnapshotInfo *sn_info) 1997 { 1998 BlockDriver *drv = bs->drv; 1999 if (!drv) 2000 return -ENOMEDIUM; 2001 if (drv->bdrv_snapshot_create) 2002 return drv->bdrv_snapshot_create(bs, sn_info); 2003 if (bs->file) 2004 return bdrv_snapshot_create(bs->file, sn_info); 2005 return -ENOTSUP; 2006 } 2007 2008 int bdrv_snapshot_goto(BlockDriverState *bs, 2009 const char *snapshot_id) 2010 { 2011 BlockDriver *drv = bs->drv; 2012 int ret, open_ret; 2013 2014 if (!drv) 2015 return -ENOMEDIUM; 2016 if (drv->bdrv_snapshot_goto) 2017 return drv->bdrv_snapshot_goto(bs, snapshot_id); 2018 2019 if (bs->file) { 2020 drv->bdrv_close(bs); 2021 ret = bdrv_snapshot_goto(bs->file, snapshot_id); 2022 open_ret = drv->bdrv_open(bs, bs->open_flags); 2023 if (open_ret < 0) { 2024 bdrv_delete(bs->file); 2025 bs->drv = NULL; 2026 return open_ret; 2027 } 2028 return ret; 2029 } 2030 2031 return -ENOTSUP; 2032 } 2033 2034 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) 2035 { 2036 BlockDriver *drv = bs->drv; 2037 if (!drv) 2038 return -ENOMEDIUM; 2039 if (drv->bdrv_snapshot_delete) 2040 return drv->bdrv_snapshot_delete(bs, snapshot_id); 2041 if (bs->file) 2042 return bdrv_snapshot_delete(bs->file, snapshot_id); 2043 return -ENOTSUP; 2044 } 2045 2046 int bdrv_snapshot_list(BlockDriverState *bs, 2047 QEMUSnapshotInfo **psn_info) 2048 { 2049 BlockDriver *drv = bs->drv; 2050 if (!drv) 2051 return -ENOMEDIUM; 2052 if (drv->bdrv_snapshot_list) 2053 return drv->bdrv_snapshot_list(bs, psn_info); 2054 if (bs->file) 2055 return bdrv_snapshot_list(bs->file, psn_info); 2056 return -ENOTSUP; 2057 } 2058 2059 int bdrv_snapshot_load_tmp(BlockDriverState *bs, 2060 const char *snapshot_name) 2061 { 2062 BlockDriver *drv = bs->drv; 2063 if (!drv) { 2064 return -ENOMEDIUM; 2065 } 2066 if (!bs->read_only) { 2067 return -EINVAL; 2068 } 2069 if (drv->bdrv_snapshot_load_tmp) { 2070 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name); 2071 } 2072 return -ENOTSUP; 2073 } 2074 2075 #define NB_SUFFIXES 4 2076 2077 char *get_human_readable_size(char *buf, int buf_size, int64_t size) 2078 { 2079 static const char suffixes[NB_SUFFIXES] = "KMGT"; 2080 int64_t base; 2081 int i; 2082 2083 if (size <= 999) { 2084 snprintf(buf, buf_size, "%" PRId64, size); 2085 } else { 2086 base = 1024; 2087 for(i = 0; i < NB_SUFFIXES; i++) { 2088 if (size < (10 * base)) { 2089 snprintf(buf, buf_size, "%0.1f%c", 2090 (double)size / base, 2091 suffixes[i]); 2092 break; 2093 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) { 2094 snprintf(buf, buf_size, "%" PRId64 "%c", 2095 ((size + (base >> 1)) / base), 2096 suffixes[i]); 2097 break; 2098 } 2099 base = base * 1024; 2100 } 2101 } 2102 return buf; 2103 } 2104 2105 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn) 2106 { 2107 char buf1[128], date_buf[128], clock_buf[128]; 2108 #ifdef _WIN32 2109 struct tm *ptm; 2110 #else 2111 struct tm tm; 2112 #endif 2113 time_t ti; 2114 int64_t secs; 2115 2116 if (!sn) { 2117 snprintf(buf, buf_size, 2118 "%-10s%-20s%7s%20s%15s", 2119 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK"); 2120 } else { 2121 ti = sn->date_sec; 2122 #ifdef _WIN32 2123 ptm = localtime(&ti); 2124 strftime(date_buf, sizeof(date_buf), 2125 "%Y-%m-%d %H:%M:%S", ptm); 2126 #else 2127 localtime_r(&ti, &tm); 2128 strftime(date_buf, sizeof(date_buf), 2129 "%Y-%m-%d %H:%M:%S", &tm); 2130 #endif 2131 secs = sn->vm_clock_nsec / 1000000000; 2132 snprintf(clock_buf, sizeof(clock_buf), 2133 "%02d:%02d:%02d.%03d", 2134 (int)(secs / 3600), 2135 (int)((secs / 60) % 60), 2136 (int)(secs % 60), 2137 (int)((sn->vm_clock_nsec / 1000000) % 1000)); 2138 snprintf(buf, buf_size, 2139 "%-10s%-20s%7s%20s%15s", 2140 sn->id_str, sn->name, 2141 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size), 2142 date_buf, 2143 clock_buf); 2144 } 2145 return buf; 2146 } 2147 2148 2149 /**************************************************************/ 2150 /* async I/Os */ 2151 2152 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, 2153 QEMUIOVector *qiov, int nb_sectors, 2154 BlockDriverCompletionFunc *cb, void *opaque) 2155 { 2156 BlockDriver *drv = bs->drv; 2157 BlockDriverAIOCB *ret; 2158 2159 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque); 2160 2161 if (!drv) 2162 return NULL; 2163 if (bdrv_check_request(bs, sector_num, nb_sectors)) 2164 return NULL; 2165 2166 ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors, 2167 cb, opaque); 2168 2169 if (ret) { 2170 /* Update stats even though technically transfer has not happened. */ 2171 bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE; 2172 bs->rd_ops ++; 2173 } 2174 2175 return ret; 2176 } 2177 2178 typedef struct BlockCompleteData { 2179 BlockDriverCompletionFunc *cb; 2180 void *opaque; 2181 BlockDriverState *bs; 2182 int64_t sector_num; 2183 int nb_sectors; 2184 } BlockCompleteData; 2185 2186 static void block_complete_cb(void *opaque, int ret) 2187 { 2188 BlockCompleteData *b = opaque; 2189 2190 if (b->bs->dirty_bitmap) { 2191 set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1); 2192 } 2193 b->cb(b->opaque, ret); 2194 qemu_free(b); 2195 } 2196 2197 static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs, 2198 int64_t sector_num, 2199 int nb_sectors, 2200 BlockDriverCompletionFunc *cb, 2201 void *opaque) 2202 { 2203 BlockCompleteData *blkdata = qemu_mallocz(sizeof(BlockCompleteData)); 2204 2205 blkdata->bs = bs; 2206 blkdata->cb = cb; 2207 blkdata->opaque = opaque; 2208 blkdata->sector_num = sector_num; 2209 blkdata->nb_sectors = nb_sectors; 2210 2211 return blkdata; 2212 } 2213 2214 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, 2215 QEMUIOVector *qiov, int nb_sectors, 2216 BlockDriverCompletionFunc *cb, void *opaque) 2217 { 2218 BlockDriver *drv = bs->drv; 2219 BlockDriverAIOCB *ret; 2220 BlockCompleteData *blk_cb_data; 2221 2222 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque); 2223 2224 if (!drv) 2225 return NULL; 2226 if (bs->read_only) 2227 return NULL; 2228 if (bdrv_check_request(bs, sector_num, nb_sectors)) 2229 return NULL; 2230 2231 if (bs->dirty_bitmap) { 2232 blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb, 2233 opaque); 2234 cb = &block_complete_cb; 2235 opaque = blk_cb_data; 2236 } 2237 2238 ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors, 2239 cb, opaque); 2240 2241 if (ret) { 2242 /* Update stats even though technically transfer has not happened. */ 2243 bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE; 2244 bs->wr_ops ++; 2245 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { 2246 bs->wr_highest_sector = sector_num + nb_sectors - 1; 2247 } 2248 } 2249 2250 return ret; 2251 } 2252 2253 2254 typedef struct MultiwriteCB { 2255 int error; 2256 int num_requests; 2257 int num_callbacks; 2258 struct { 2259 BlockDriverCompletionFunc *cb; 2260 void *opaque; 2261 QEMUIOVector *free_qiov; 2262 void *free_buf; 2263 } callbacks[]; 2264 } MultiwriteCB; 2265 2266 static void multiwrite_user_cb(MultiwriteCB *mcb) 2267 { 2268 int i; 2269 2270 for (i = 0; i < mcb->num_callbacks; i++) { 2271 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error); 2272 if (mcb->callbacks[i].free_qiov) { 2273 qemu_iovec_destroy(mcb->callbacks[i].free_qiov); 2274 } 2275 qemu_free(mcb->callbacks[i].free_qiov); 2276 qemu_vfree(mcb->callbacks[i].free_buf); 2277 } 2278 } 2279 2280 static void multiwrite_cb(void *opaque, int ret) 2281 { 2282 MultiwriteCB *mcb = opaque; 2283 2284 trace_multiwrite_cb(mcb, ret); 2285 2286 if (ret < 0 && !mcb->error) { 2287 mcb->error = ret; 2288 } 2289 2290 mcb->num_requests--; 2291 if (mcb->num_requests == 0) { 2292 multiwrite_user_cb(mcb); 2293 qemu_free(mcb); 2294 } 2295 } 2296 2297 static int multiwrite_req_compare(const void *a, const void *b) 2298 { 2299 const BlockRequest *req1 = a, *req2 = b; 2300 2301 /* 2302 * Note that we can't simply subtract req2->sector from req1->sector 2303 * here as that could overflow the return value. 2304 */ 2305 if (req1->sector > req2->sector) { 2306 return 1; 2307 } else if (req1->sector < req2->sector) { 2308 return -1; 2309 } else { 2310 return 0; 2311 } 2312 } 2313 2314 /* 2315 * Takes a bunch of requests and tries to merge them. Returns the number of 2316 * requests that remain after merging. 2317 */ 2318 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, 2319 int num_reqs, MultiwriteCB *mcb) 2320 { 2321 int i, outidx; 2322 2323 // Sort requests by start sector 2324 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare); 2325 2326 // Check if adjacent requests touch the same clusters. If so, combine them, 2327 // filling up gaps with zero sectors. 2328 outidx = 0; 2329 for (i = 1; i < num_reqs; i++) { 2330 int merge = 0; 2331 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors; 2332 2333 // This handles the cases that are valid for all block drivers, namely 2334 // exactly sequential writes and overlapping writes. 2335 if (reqs[i].sector <= oldreq_last) { 2336 merge = 1; 2337 } 2338 2339 // The block driver may decide that it makes sense to combine requests 2340 // even if there is a gap of some sectors between them. In this case, 2341 // the gap is filled with zeros (therefore only applicable for yet 2342 // unused space in format like qcow2). 2343 if (!merge && bs->drv->bdrv_merge_requests) { 2344 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]); 2345 } 2346 2347 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) { 2348 merge = 0; 2349 } 2350 2351 if (merge) { 2352 size_t size; 2353 QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov)); 2354 qemu_iovec_init(qiov, 2355 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1); 2356 2357 // Add the first request to the merged one. If the requests are 2358 // overlapping, drop the last sectors of the first request. 2359 size = (reqs[i].sector - reqs[outidx].sector) << 9; 2360 qemu_iovec_concat(qiov, reqs[outidx].qiov, size); 2361 2362 // We might need to add some zeros between the two requests 2363 if (reqs[i].sector > oldreq_last) { 2364 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9; 2365 uint8_t *buf = qemu_blockalign(bs, zero_bytes); 2366 memset(buf, 0, zero_bytes); 2367 qemu_iovec_add(qiov, buf, zero_bytes); 2368 mcb->callbacks[i].free_buf = buf; 2369 } 2370 2371 // Add the second request 2372 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size); 2373 2374 reqs[outidx].nb_sectors = qiov->size >> 9; 2375 reqs[outidx].qiov = qiov; 2376 2377 mcb->callbacks[i].free_qiov = reqs[outidx].qiov; 2378 } else { 2379 outidx++; 2380 reqs[outidx].sector = reqs[i].sector; 2381 reqs[outidx].nb_sectors = reqs[i].nb_sectors; 2382 reqs[outidx].qiov = reqs[i].qiov; 2383 } 2384 } 2385 2386 return outidx + 1; 2387 } 2388 2389 /* 2390 * Submit multiple AIO write requests at once. 2391 * 2392 * On success, the function returns 0 and all requests in the reqs array have 2393 * been submitted. In error case this function returns -1, and any of the 2394 * requests may or may not be submitted yet. In particular, this means that the 2395 * callback will be called for some of the requests, for others it won't. The 2396 * caller must check the error field of the BlockRequest to wait for the right 2397 * callbacks (if error != 0, no callback will be called). 2398 * 2399 * The implementation may modify the contents of the reqs array, e.g. to merge 2400 * requests. However, the fields opaque and error are left unmodified as they 2401 * are used to signal failure for a single request to the caller. 2402 */ 2403 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) 2404 { 2405 BlockDriverAIOCB *acb; 2406 MultiwriteCB *mcb; 2407 int i; 2408 2409 /* don't submit writes if we don't have a medium */ 2410 if (bs->drv == NULL) { 2411 for (i = 0; i < num_reqs; i++) { 2412 reqs[i].error = -ENOMEDIUM; 2413 } 2414 return -1; 2415 } 2416 2417 if (num_reqs == 0) { 2418 return 0; 2419 } 2420 2421 // Create MultiwriteCB structure 2422 mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks)); 2423 mcb->num_requests = 0; 2424 mcb->num_callbacks = num_reqs; 2425 2426 for (i = 0; i < num_reqs; i++) { 2427 mcb->callbacks[i].cb = reqs[i].cb; 2428 mcb->callbacks[i].opaque = reqs[i].opaque; 2429 } 2430 2431 // Check for mergable requests 2432 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb); 2433 2434 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs); 2435 2436 /* 2437 * Run the aio requests. As soon as one request can't be submitted 2438 * successfully, fail all requests that are not yet submitted (we must 2439 * return failure for all requests anyway) 2440 * 2441 * num_requests cannot be set to the right value immediately: If 2442 * bdrv_aio_writev fails for some request, num_requests would be too high 2443 * and therefore multiwrite_cb() would never recognize the multiwrite 2444 * request as completed. We also cannot use the loop variable i to set it 2445 * when the first request fails because the callback may already have been 2446 * called for previously submitted requests. Thus, num_requests must be 2447 * incremented for each request that is submitted. 2448 * 2449 * The problem that callbacks may be called early also means that we need 2450 * to take care that num_requests doesn't become 0 before all requests are 2451 * submitted - multiwrite_cb() would consider the multiwrite request 2452 * completed. A dummy request that is "completed" by a manual call to 2453 * multiwrite_cb() takes care of this. 2454 */ 2455 mcb->num_requests = 1; 2456 2457 // Run the aio requests 2458 for (i = 0; i < num_reqs; i++) { 2459 mcb->num_requests++; 2460 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov, 2461 reqs[i].nb_sectors, multiwrite_cb, mcb); 2462 2463 if (acb == NULL) { 2464 // We can only fail the whole thing if no request has been 2465 // submitted yet. Otherwise we'll wait for the submitted AIOs to 2466 // complete and report the error in the callback. 2467 if (i == 0) { 2468 trace_bdrv_aio_multiwrite_earlyfail(mcb); 2469 goto fail; 2470 } else { 2471 trace_bdrv_aio_multiwrite_latefail(mcb, i); 2472 multiwrite_cb(mcb, -EIO); 2473 break; 2474 } 2475 } 2476 } 2477 2478 /* Complete the dummy request */ 2479 multiwrite_cb(mcb, 0); 2480 2481 return 0; 2482 2483 fail: 2484 for (i = 0; i < mcb->num_callbacks; i++) { 2485 reqs[i].error = -EIO; 2486 } 2487 qemu_free(mcb); 2488 return -1; 2489 } 2490 2491 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, 2492 BlockDriverCompletionFunc *cb, void *opaque) 2493 { 2494 BlockDriver *drv = bs->drv; 2495 2496 trace_bdrv_aio_flush(bs, opaque); 2497 2498 if (bs->open_flags & BDRV_O_NO_FLUSH) { 2499 return bdrv_aio_noop_em(bs, cb, opaque); 2500 } 2501 2502 if (!drv) 2503 return NULL; 2504 return drv->bdrv_aio_flush(bs, cb, opaque); 2505 } 2506 2507 void bdrv_aio_cancel(BlockDriverAIOCB *acb) 2508 { 2509 acb->pool->cancel(acb); 2510 } 2511 2512 2513 /**************************************************************/ 2514 /* async block device emulation */ 2515 2516 typedef struct BlockDriverAIOCBSync { 2517 BlockDriverAIOCB common; 2518 QEMUBH *bh; 2519 int ret; 2520 /* vector translation state */ 2521 QEMUIOVector *qiov; 2522 uint8_t *bounce; 2523 int is_write; 2524 } BlockDriverAIOCBSync; 2525 2526 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb) 2527 { 2528 BlockDriverAIOCBSync *acb = 2529 container_of(blockacb, BlockDriverAIOCBSync, common); 2530 qemu_bh_delete(acb->bh); 2531 acb->bh = NULL; 2532 qemu_aio_release(acb); 2533 } 2534 2535 static AIOPool bdrv_em_aio_pool = { 2536 .aiocb_size = sizeof(BlockDriverAIOCBSync), 2537 .cancel = bdrv_aio_cancel_em, 2538 }; 2539 2540 static void bdrv_aio_bh_cb(void *opaque) 2541 { 2542 BlockDriverAIOCBSync *acb = opaque; 2543 2544 if (!acb->is_write) 2545 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size); 2546 qemu_vfree(acb->bounce); 2547 acb->common.cb(acb->common.opaque, acb->ret); 2548 qemu_bh_delete(acb->bh); 2549 acb->bh = NULL; 2550 qemu_aio_release(acb); 2551 } 2552 2553 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, 2554 int64_t sector_num, 2555 QEMUIOVector *qiov, 2556 int nb_sectors, 2557 BlockDriverCompletionFunc *cb, 2558 void *opaque, 2559 int is_write) 2560 2561 { 2562 BlockDriverAIOCBSync *acb; 2563 2564 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque); 2565 acb->is_write = is_write; 2566 acb->qiov = qiov; 2567 acb->bounce = qemu_blockalign(bs, qiov->size); 2568 2569 if (!acb->bh) 2570 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); 2571 2572 if (is_write) { 2573 qemu_iovec_to_buffer(acb->qiov, acb->bounce); 2574 acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors); 2575 } else { 2576 acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors); 2577 } 2578 2579 qemu_bh_schedule(acb->bh); 2580 2581 return &acb->common; 2582 } 2583 2584 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 2585 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 2586 BlockDriverCompletionFunc *cb, void *opaque) 2587 { 2588 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); 2589 } 2590 2591 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 2592 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 2593 BlockDriverCompletionFunc *cb, void *opaque) 2594 { 2595 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); 2596 } 2597 2598 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, 2599 BlockDriverCompletionFunc *cb, void *opaque) 2600 { 2601 BlockDriverAIOCBSync *acb; 2602 2603 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque); 2604 acb->is_write = 1; /* don't bounce in the completion hadler */ 2605 acb->qiov = NULL; 2606 acb->bounce = NULL; 2607 acb->ret = 0; 2608 2609 if (!acb->bh) 2610 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); 2611 2612 bdrv_flush(bs); 2613 qemu_bh_schedule(acb->bh); 2614 return &acb->common; 2615 } 2616 2617 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs, 2618 BlockDriverCompletionFunc *cb, void *opaque) 2619 { 2620 BlockDriverAIOCBSync *acb; 2621 2622 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque); 2623 acb->is_write = 1; /* don't bounce in the completion handler */ 2624 acb->qiov = NULL; 2625 acb->bounce = NULL; 2626 acb->ret = 0; 2627 2628 if (!acb->bh) { 2629 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); 2630 } 2631 2632 qemu_bh_schedule(acb->bh); 2633 return &acb->common; 2634 } 2635 2636 /**************************************************************/ 2637 /* sync block device emulation */ 2638 2639 static void bdrv_rw_em_cb(void *opaque, int ret) 2640 { 2641 *(int *)opaque = ret; 2642 } 2643 2644 #define NOT_DONE 0x7fffffff 2645 2646 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, 2647 uint8_t *buf, int nb_sectors) 2648 { 2649 int async_ret; 2650 BlockDriverAIOCB *acb; 2651 struct iovec iov; 2652 QEMUIOVector qiov; 2653 2654 async_context_push(); 2655 2656 async_ret = NOT_DONE; 2657 iov.iov_base = (void *)buf; 2658 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; 2659 qemu_iovec_init_external(&qiov, &iov, 1); 2660 acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors, 2661 bdrv_rw_em_cb, &async_ret); 2662 if (acb == NULL) { 2663 async_ret = -1; 2664 goto fail; 2665 } 2666 2667 while (async_ret == NOT_DONE) { 2668 qemu_aio_wait(); 2669 } 2670 2671 2672 fail: 2673 async_context_pop(); 2674 return async_ret; 2675 } 2676 2677 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, 2678 const uint8_t *buf, int nb_sectors) 2679 { 2680 int async_ret; 2681 BlockDriverAIOCB *acb; 2682 struct iovec iov; 2683 QEMUIOVector qiov; 2684 2685 async_context_push(); 2686 2687 async_ret = NOT_DONE; 2688 iov.iov_base = (void *)buf; 2689 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; 2690 qemu_iovec_init_external(&qiov, &iov, 1); 2691 acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors, 2692 bdrv_rw_em_cb, &async_ret); 2693 if (acb == NULL) { 2694 async_ret = -1; 2695 goto fail; 2696 } 2697 while (async_ret == NOT_DONE) { 2698 qemu_aio_wait(); 2699 } 2700 2701 fail: 2702 async_context_pop(); 2703 return async_ret; 2704 } 2705 2706 void bdrv_init(void) 2707 { 2708 module_call_init(MODULE_INIT_BLOCK); 2709 } 2710 2711 void bdrv_init_with_whitelist(void) 2712 { 2713 use_bdrv_whitelist = 1; 2714 bdrv_init(); 2715 } 2716 2717 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs, 2718 BlockDriverCompletionFunc *cb, void *opaque) 2719 { 2720 BlockDriverAIOCB *acb; 2721 2722 if (pool->free_aiocb) { 2723 acb = pool->free_aiocb; 2724 pool->free_aiocb = acb->next; 2725 } else { 2726 acb = qemu_mallocz(pool->aiocb_size); 2727 acb->pool = pool; 2728 } 2729 acb->bs = bs; 2730 acb->cb = cb; 2731 acb->opaque = opaque; 2732 return acb; 2733 } 2734 2735 void qemu_aio_release(void *p) 2736 { 2737 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p; 2738 AIOPool *pool = acb->pool; 2739 acb->next = pool->free_aiocb; 2740 pool->free_aiocb = acb; 2741 } 2742 2743 /**************************************************************/ 2744 /* removable device support */ 2745 2746 /** 2747 * Return TRUE if the media is present 2748 */ 2749 int bdrv_is_inserted(BlockDriverState *bs) 2750 { 2751 BlockDriver *drv = bs->drv; 2752 int ret; 2753 if (!drv) 2754 return 0; 2755 if (!drv->bdrv_is_inserted) 2756 return !bs->tray_open; 2757 ret = drv->bdrv_is_inserted(bs); 2758 return ret; 2759 } 2760 2761 /** 2762 * Return TRUE if the media changed since the last call to this 2763 * function. It is currently only used for floppy disks 2764 */ 2765 int bdrv_media_changed(BlockDriverState *bs) 2766 { 2767 BlockDriver *drv = bs->drv; 2768 int ret; 2769 2770 if (!drv || !drv->bdrv_media_changed) 2771 ret = -ENOTSUP; 2772 else 2773 ret = drv->bdrv_media_changed(bs); 2774 if (ret == -ENOTSUP) 2775 ret = bs->media_changed; 2776 bs->media_changed = 0; 2777 return ret; 2778 } 2779 2780 /** 2781 * If eject_flag is TRUE, eject the media. Otherwise, close the tray 2782 */ 2783 int bdrv_eject(BlockDriverState *bs, int eject_flag) 2784 { 2785 BlockDriver *drv = bs->drv; 2786 int ret; 2787 2788 if (bs->locked) { 2789 return -EBUSY; 2790 } 2791 2792 if (!drv || !drv->bdrv_eject) { 2793 ret = -ENOTSUP; 2794 } else { 2795 ret = drv->bdrv_eject(bs, eject_flag); 2796 } 2797 if (ret == -ENOTSUP) { 2798 ret = 0; 2799 } 2800 if (ret >= 0) { 2801 bs->tray_open = eject_flag; 2802 } 2803 2804 return ret; 2805 } 2806 2807 int bdrv_is_locked(BlockDriverState *bs) 2808 { 2809 return bs->locked; 2810 } 2811 2812 /** 2813 * Lock or unlock the media (if it is locked, the user won't be able 2814 * to eject it manually). 2815 */ 2816 void bdrv_set_locked(BlockDriverState *bs, int locked) 2817 { 2818 BlockDriver *drv = bs->drv; 2819 2820 trace_bdrv_set_locked(bs, locked); 2821 2822 bs->locked = locked; 2823 if (drv && drv->bdrv_set_locked) { 2824 drv->bdrv_set_locked(bs, locked); 2825 } 2826 } 2827 2828 /* needed for generic scsi interface */ 2829 2830 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) 2831 { 2832 BlockDriver *drv = bs->drv; 2833 2834 if (drv && drv->bdrv_ioctl) 2835 return drv->bdrv_ioctl(bs, req, buf); 2836 return -ENOTSUP; 2837 } 2838 2839 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, 2840 unsigned long int req, void *buf, 2841 BlockDriverCompletionFunc *cb, void *opaque) 2842 { 2843 BlockDriver *drv = bs->drv; 2844 2845 if (drv && drv->bdrv_aio_ioctl) 2846 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque); 2847 return NULL; 2848 } 2849 2850 2851 2852 void *qemu_blockalign(BlockDriverState *bs, size_t size) 2853 { 2854 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size); 2855 } 2856 2857 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable) 2858 { 2859 int64_t bitmap_size; 2860 2861 bs->dirty_count = 0; 2862 if (enable) { 2863 if (!bs->dirty_bitmap) { 2864 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) + 2865 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1; 2866 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8; 2867 2868 bs->dirty_bitmap = qemu_mallocz(bitmap_size); 2869 } 2870 } else { 2871 if (bs->dirty_bitmap) { 2872 qemu_free(bs->dirty_bitmap); 2873 bs->dirty_bitmap = NULL; 2874 } 2875 } 2876 } 2877 2878 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector) 2879 { 2880 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK; 2881 2882 if (bs->dirty_bitmap && 2883 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) { 2884 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] & 2885 (1UL << (chunk % (sizeof(unsigned long) * 8)))); 2886 } else { 2887 return 0; 2888 } 2889 } 2890 2891 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 2892 int nr_sectors) 2893 { 2894 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0); 2895 } 2896 2897 int64_t bdrv_get_dirty_count(BlockDriverState *bs) 2898 { 2899 return bs->dirty_count; 2900 } 2901 2902 void bdrv_set_in_use(BlockDriverState *bs, int in_use) 2903 { 2904 assert(bs->in_use != in_use); 2905 bs->in_use = in_use; 2906 } 2907 2908 int bdrv_in_use(BlockDriverState *bs) 2909 { 2910 return bs->in_use; 2911 } 2912 2913 int bdrv_img_create(const char *filename, const char *fmt, 2914 const char *base_filename, const char *base_fmt, 2915 char *options, uint64_t img_size, int flags) 2916 { 2917 QEMUOptionParameter *param = NULL, *create_options = NULL; 2918 QEMUOptionParameter *backing_fmt, *backing_file; 2919 BlockDriverState *bs = NULL; 2920 BlockDriver *drv, *proto_drv; 2921 BlockDriver *backing_drv = NULL; 2922 int ret = 0; 2923 2924 /* Find driver and parse its options */ 2925 drv = bdrv_find_format(fmt); 2926 if (!drv) { 2927 error_report("Unknown file format '%s'", fmt); 2928 ret = -EINVAL; 2929 goto out; 2930 } 2931 2932 proto_drv = bdrv_find_protocol(filename); 2933 if (!proto_drv) { 2934 error_report("Unknown protocol '%s'", filename); 2935 ret = -EINVAL; 2936 goto out; 2937 } 2938 2939 create_options = append_option_parameters(create_options, 2940 drv->create_options); 2941 create_options = append_option_parameters(create_options, 2942 proto_drv->create_options); 2943 2944 /* Create parameter list with default values */ 2945 param = parse_option_parameters("", create_options, param); 2946 2947 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size); 2948 2949 /* Parse -o options */ 2950 if (options) { 2951 param = parse_option_parameters(options, create_options, param); 2952 if (param == NULL) { 2953 error_report("Invalid options for file format '%s'.", fmt); 2954 ret = -EINVAL; 2955 goto out; 2956 } 2957 } 2958 2959 if (base_filename) { 2960 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE, 2961 base_filename)) { 2962 error_report("Backing file not supported for file format '%s'", 2963 fmt); 2964 ret = -EINVAL; 2965 goto out; 2966 } 2967 } 2968 2969 if (base_fmt) { 2970 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) { 2971 error_report("Backing file format not supported for file " 2972 "format '%s'", fmt); 2973 ret = -EINVAL; 2974 goto out; 2975 } 2976 } 2977 2978 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE); 2979 if (backing_file && backing_file->value.s) { 2980 if (!strcmp(filename, backing_file->value.s)) { 2981 error_report("Error: Trying to create an image with the " 2982 "same filename as the backing file"); 2983 ret = -EINVAL; 2984 goto out; 2985 } 2986 } 2987 2988 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT); 2989 if (backing_fmt && backing_fmt->value.s) { 2990 backing_drv = bdrv_find_format(backing_fmt->value.s); 2991 if (!backing_drv) { 2992 error_report("Unknown backing file format '%s'", 2993 backing_fmt->value.s); 2994 ret = -EINVAL; 2995 goto out; 2996 } 2997 } 2998 2999 // The size for the image must always be specified, with one exception: 3000 // If we are using a backing file, we can obtain the size from there 3001 if (get_option_parameter(param, BLOCK_OPT_SIZE)->value.n == -1) { 3002 if (backing_file && backing_file->value.s) { 3003 uint64_t size; 3004 char buf[32]; 3005 3006 bs = bdrv_new(""); 3007 3008 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv); 3009 if (ret < 0) { 3010 error_report("Could not open '%s'", backing_file->value.s); 3011 goto out; 3012 } 3013 bdrv_get_geometry(bs, &size); 3014 size *= 512; 3015 3016 snprintf(buf, sizeof(buf), "%" PRId64, size); 3017 set_option_parameter(param, BLOCK_OPT_SIZE, buf); 3018 } else { 3019 error_report("Image creation needs a size parameter"); 3020 ret = -EINVAL; 3021 goto out; 3022 } 3023 } 3024 3025 printf("Formatting '%s', fmt=%s ", filename, fmt); 3026 print_option_parameters(param); 3027 puts(""); 3028 3029 ret = bdrv_create(drv, filename, param); 3030 3031 if (ret < 0) { 3032 if (ret == -ENOTSUP) { 3033 error_report("Formatting or formatting option not supported for " 3034 "file format '%s'", fmt); 3035 } else if (ret == -EFBIG) { 3036 error_report("The image size is too large for file format '%s'", 3037 fmt); 3038 } else { 3039 error_report("%s: error while creating %s: %s", filename, fmt, 3040 strerror(-ret)); 3041 } 3042 } 3043 3044 out: 3045 free_option_parameters(create_options); 3046 free_option_parameters(param); 3047 3048 if (bs) { 3049 bdrv_delete(bs); 3050 } 3051 3052 return ret; 3053 } 3054