1 /* 2 * Block driver for Connectix / Microsoft Virtual PC images 3 * 4 * Copyright (c) 2005 Alex Beregszaszi 5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 #include "qemu/osdep.h" 26 #include "qapi/error.h" 27 #include "qemu-common.h" 28 #include "block/block_int.h" 29 #include "sysemu/block-backend.h" 30 #include "qemu/module.h" 31 #include "migration/migration.h" 32 #include "qemu/bswap.h" 33 #if defined(CONFIG_UUID) 34 #include <uuid/uuid.h> 35 #endif 36 37 /**************************************************************/ 38 39 #define HEADER_SIZE 512 40 41 //#define CACHE 42 43 enum vhd_type { 44 VHD_FIXED = 2, 45 VHD_DYNAMIC = 3, 46 VHD_DIFFERENCING = 4, 47 }; 48 49 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */ 50 #define VHD_TIMESTAMP_BASE 946684800 51 52 #define VHD_CHS_MAX_C 65535LL 53 #define VHD_CHS_MAX_H 16 54 #define VHD_CHS_MAX_S 255 55 56 #define VHD_MAX_SECTORS 0xff000000 /* 2040 GiB max image size */ 57 #define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S) 58 59 #define VPC_OPT_FORCE_SIZE "force_size" 60 61 /* always big-endian */ 62 typedef struct vhd_footer { 63 char creator[8]; /* "conectix" */ 64 uint32_t features; 65 uint32_t version; 66 67 /* Offset of next header structure, 0xFFFFFFFF if none */ 68 uint64_t data_offset; 69 70 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */ 71 uint32_t timestamp; 72 73 char creator_app[4]; /* e.g., "vpc " */ 74 uint16_t major; 75 uint16_t minor; 76 char creator_os[4]; /* "Wi2k" */ 77 78 uint64_t orig_size; 79 uint64_t current_size; 80 81 uint16_t cyls; 82 uint8_t heads; 83 uint8_t secs_per_cyl; 84 85 uint32_t type; 86 87 /* Checksum of the Hard Disk Footer ("one's complement of the sum of all 88 the bytes in the footer without the checksum field") */ 89 uint32_t checksum; 90 91 /* UUID used to identify a parent hard disk (backing file) */ 92 uint8_t uuid[16]; 93 94 uint8_t in_saved_state; 95 } QEMU_PACKED VHDFooter; 96 97 typedef struct vhd_dyndisk_header { 98 char magic[8]; /* "cxsparse" */ 99 100 /* Offset of next header structure, 0xFFFFFFFF if none */ 101 uint64_t data_offset; 102 103 /* Offset of the Block Allocation Table (BAT) */ 104 uint64_t table_offset; 105 106 uint32_t version; 107 uint32_t max_table_entries; /* 32bit/entry */ 108 109 /* 2 MB by default, must be a power of two */ 110 uint32_t block_size; 111 112 uint32_t checksum; 113 uint8_t parent_uuid[16]; 114 uint32_t parent_timestamp; 115 uint32_t reserved; 116 117 /* Backing file name (in UTF-16) */ 118 uint8_t parent_name[512]; 119 120 struct { 121 uint32_t platform; 122 uint32_t data_space; 123 uint32_t data_length; 124 uint32_t reserved; 125 uint64_t data_offset; 126 } parent_locator[8]; 127 } QEMU_PACKED VHDDynDiskHeader; 128 129 typedef struct BDRVVPCState { 130 CoMutex lock; 131 uint8_t footer_buf[HEADER_SIZE]; 132 uint64_t free_data_block_offset; 133 int max_table_entries; 134 uint32_t *pagetable; 135 uint64_t bat_offset; 136 uint64_t last_bitmap_offset; 137 138 uint32_t block_size; 139 uint32_t bitmap_size; 140 bool force_use_chs; 141 bool force_use_sz; 142 143 #ifdef CACHE 144 uint8_t *pageentry_u8; 145 uint32_t *pageentry_u32; 146 uint16_t *pageentry_u16; 147 148 uint64_t last_bitmap; 149 #endif 150 151 Error *migration_blocker; 152 } BDRVVPCState; 153 154 #define VPC_OPT_SIZE_CALC "force_size_calc" 155 static QemuOptsList vpc_runtime_opts = { 156 .name = "vpc-runtime-opts", 157 .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head), 158 .desc = { 159 { 160 .name = VPC_OPT_SIZE_CALC, 161 .type = QEMU_OPT_STRING, 162 .help = "Force disk size calculation to use either CHS geometry, " 163 "or use the disk current_size specified in the VHD footer. " 164 "{chs, current_size}" 165 }, 166 { /* end of list */ } 167 } 168 }; 169 170 static uint32_t vpc_checksum(uint8_t* buf, size_t size) 171 { 172 uint32_t res = 0; 173 int i; 174 175 for (i = 0; i < size; i++) 176 res += buf[i]; 177 178 return ~res; 179 } 180 181 182 static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename) 183 { 184 if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8)) 185 return 100; 186 return 0; 187 } 188 189 static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts, 190 Error **errp) 191 { 192 BDRVVPCState *s = bs->opaque; 193 const char *size_calc; 194 195 size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC); 196 197 if (!size_calc) { 198 /* no override, use autodetect only */ 199 } else if (!strcmp(size_calc, "current_size")) { 200 s->force_use_sz = true; 201 } else if (!strcmp(size_calc, "chs")) { 202 s->force_use_chs = true; 203 } else { 204 error_setg(errp, "Invalid size calculation mode: '%s'", size_calc); 205 } 206 } 207 208 static int vpc_open(BlockDriverState *bs, QDict *options, int flags, 209 Error **errp) 210 { 211 BDRVVPCState *s = bs->opaque; 212 int i; 213 VHDFooter *footer; 214 VHDDynDiskHeader *dyndisk_header; 215 QemuOpts *opts = NULL; 216 Error *local_err = NULL; 217 bool use_chs; 218 uint8_t buf[HEADER_SIZE]; 219 uint32_t checksum; 220 uint64_t computed_size; 221 uint64_t pagetable_size; 222 int disk_type = VHD_DYNAMIC; 223 int ret; 224 225 opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort); 226 qemu_opts_absorb_qdict(opts, options, &local_err); 227 if (local_err) { 228 error_propagate(errp, local_err); 229 ret = -EINVAL; 230 goto fail; 231 } 232 233 vpc_parse_options(bs, opts, &local_err); 234 if (local_err) { 235 error_propagate(errp, local_err); 236 ret = -EINVAL; 237 goto fail; 238 } 239 240 ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE); 241 if (ret < 0) { 242 error_setg(errp, "Unable to read VHD header"); 243 goto fail; 244 } 245 246 footer = (VHDFooter *) s->footer_buf; 247 if (strncmp(footer->creator, "conectix", 8)) { 248 int64_t offset = bdrv_getlength(bs->file->bs); 249 if (offset < 0) { 250 ret = offset; 251 error_setg(errp, "Invalid file size"); 252 goto fail; 253 } else if (offset < HEADER_SIZE) { 254 ret = -EINVAL; 255 error_setg(errp, "File too small for a VHD header"); 256 goto fail; 257 } 258 259 /* If a fixed disk, the footer is found only at the end of the file */ 260 ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf, 261 HEADER_SIZE); 262 if (ret < 0) { 263 goto fail; 264 } 265 if (strncmp(footer->creator, "conectix", 8)) { 266 error_setg(errp, "invalid VPC image"); 267 ret = -EINVAL; 268 goto fail; 269 } 270 disk_type = VHD_FIXED; 271 } 272 273 checksum = be32_to_cpu(footer->checksum); 274 footer->checksum = 0; 275 if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum) 276 fprintf(stderr, "block-vpc: The header checksum of '%s' is " 277 "incorrect.\n", bs->filename); 278 279 /* Write 'checksum' back to footer, or else will leave it with zero. */ 280 footer->checksum = cpu_to_be32(checksum); 281 282 /* The visible size of a image in Virtual PC depends on the geometry 283 rather than on the size stored in the footer (the size in the footer 284 is too large usually) */ 285 bs->total_sectors = (int64_t) 286 be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; 287 288 /* Microsoft Virtual PC and Microsoft Hyper-V produce and read 289 * VHD image sizes differently. VPC will rely on CHS geometry, 290 * while Hyper-V and disk2vhd use the size specified in the footer. 291 * 292 * We use a couple of approaches to try and determine the correct method: 293 * look at the Creator App field, and look for images that have CHS 294 * geometry that is the maximum value. 295 * 296 * If the CHS geometry is the maximum CHS geometry, then we assume that 297 * the size is the footer->current_size to avoid truncation. Otherwise, 298 * we follow the table based on footer->creator_app: 299 * 300 * Known creator apps: 301 * 'vpc ' : CHS Virtual PC (uses disk geometry) 302 * 'qemu' : CHS QEMU (uses disk geometry) 303 * 'qem2' : current_size QEMU (uses current_size) 304 * 'win ' : current_size Hyper-V 305 * 'd2v ' : current_size Disk2vhd 306 * 'tap\0' : current_size XenServer 307 * 'CTXS' : current_size XenConverter 308 * 309 * The user can override the table values via drive options, however 310 * even with an override we will still use current_size for images 311 * that have CHS geometry of the maximum size. 312 */ 313 use_chs = (!!strncmp(footer->creator_app, "win ", 4) && 314 !!strncmp(footer->creator_app, "qem2", 4) && 315 !!strncmp(footer->creator_app, "d2v ", 4) && 316 !!strncmp(footer->creator_app, "CTXS", 4) && 317 !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs; 318 319 if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) { 320 bs->total_sectors = be64_to_cpu(footer->current_size) / 321 BDRV_SECTOR_SIZE; 322 } 323 324 /* Allow a maximum disk size of 2040 GiB */ 325 if (bs->total_sectors > VHD_MAX_SECTORS) { 326 ret = -EFBIG; 327 goto fail; 328 } 329 330 if (disk_type == VHD_DYNAMIC) { 331 ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf, 332 HEADER_SIZE); 333 if (ret < 0) { 334 error_setg(errp, "Error reading dynamic VHD header"); 335 goto fail; 336 } 337 338 dyndisk_header = (VHDDynDiskHeader *) buf; 339 340 if (strncmp(dyndisk_header->magic, "cxsparse", 8)) { 341 error_setg(errp, "Invalid header magic"); 342 ret = -EINVAL; 343 goto fail; 344 } 345 346 s->block_size = be32_to_cpu(dyndisk_header->block_size); 347 if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) { 348 error_setg(errp, "Invalid block size %" PRIu32, s->block_size); 349 ret = -EINVAL; 350 goto fail; 351 } 352 s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511; 353 354 s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries); 355 356 if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) { 357 error_setg(errp, "Too many blocks"); 358 ret = -EINVAL; 359 goto fail; 360 } 361 362 computed_size = (uint64_t) s->max_table_entries * s->block_size; 363 if (computed_size < bs->total_sectors * 512) { 364 error_setg(errp, "Page table too small"); 365 ret = -EINVAL; 366 goto fail; 367 } 368 369 if (s->max_table_entries > SIZE_MAX / 4 || 370 s->max_table_entries > (int) INT_MAX / 4) { 371 error_setg(errp, "Max Table Entries too large (%" PRId32 ")", 372 s->max_table_entries); 373 ret = -EINVAL; 374 goto fail; 375 } 376 377 pagetable_size = (uint64_t) s->max_table_entries * 4; 378 379 s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size); 380 if (s->pagetable == NULL) { 381 error_setg(errp, "Unable to allocate memory for page table"); 382 ret = -ENOMEM; 383 goto fail; 384 } 385 386 s->bat_offset = be64_to_cpu(dyndisk_header->table_offset); 387 388 ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable, 389 pagetable_size); 390 if (ret < 0) { 391 error_setg(errp, "Error reading pagetable"); 392 goto fail; 393 } 394 395 s->free_data_block_offset = 396 ROUND_UP(s->bat_offset + pagetable_size, 512); 397 398 for (i = 0; i < s->max_table_entries; i++) { 399 be32_to_cpus(&s->pagetable[i]); 400 if (s->pagetable[i] != 0xFFFFFFFF) { 401 int64_t next = (512 * (int64_t) s->pagetable[i]) + 402 s->bitmap_size + s->block_size; 403 404 if (next > s->free_data_block_offset) { 405 s->free_data_block_offset = next; 406 } 407 } 408 } 409 410 if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) { 411 error_setg(errp, "block-vpc: free_data_block_offset points after " 412 "the end of file. The image has been truncated."); 413 ret = -EINVAL; 414 goto fail; 415 } 416 417 s->last_bitmap_offset = (int64_t) -1; 418 419 #ifdef CACHE 420 s->pageentry_u8 = g_malloc(512); 421 s->pageentry_u32 = s->pageentry_u8; 422 s->pageentry_u16 = s->pageentry_u8; 423 s->last_pagetable = -1; 424 #endif 425 } 426 427 qemu_co_mutex_init(&s->lock); 428 429 /* Disable migration when VHD images are used */ 430 error_setg(&s->migration_blocker, "The vpc format used by node '%s' " 431 "does not support live migration", 432 bdrv_get_device_or_node_name(bs)); 433 migrate_add_blocker(s->migration_blocker); 434 435 return 0; 436 437 fail: 438 qemu_vfree(s->pagetable); 439 #ifdef CACHE 440 g_free(s->pageentry_u8); 441 #endif 442 return ret; 443 } 444 445 static int vpc_reopen_prepare(BDRVReopenState *state, 446 BlockReopenQueue *queue, Error **errp) 447 { 448 return 0; 449 } 450 451 /* 452 * Returns the absolute byte offset of the given sector in the image file. 453 * If the sector is not allocated, -1 is returned instead. 454 * 455 * The parameter write must be 1 if the offset will be used for a write 456 * operation (the block bitmaps is updated then), 0 otherwise. 457 */ 458 static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset, 459 bool write) 460 { 461 BDRVVPCState *s = bs->opaque; 462 uint64_t bitmap_offset, block_offset; 463 uint32_t pagetable_index, offset_in_block; 464 465 pagetable_index = offset / s->block_size; 466 offset_in_block = offset % s->block_size; 467 468 if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff) 469 return -1; /* not allocated */ 470 471 bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index]; 472 block_offset = bitmap_offset + s->bitmap_size + offset_in_block; 473 474 /* We must ensure that we don't write to any sectors which are marked as 475 unused in the bitmap. We get away with setting all bits in the block 476 bitmap each time we write to a new block. This might cause Virtual PC to 477 miss sparse read optimization, but it's not a problem in terms of 478 correctness. */ 479 if (write && (s->last_bitmap_offset != bitmap_offset)) { 480 uint8_t bitmap[s->bitmap_size]; 481 482 s->last_bitmap_offset = bitmap_offset; 483 memset(bitmap, 0xff, s->bitmap_size); 484 bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size); 485 } 486 487 return block_offset; 488 } 489 490 static inline int64_t get_sector_offset(BlockDriverState *bs, 491 int64_t sector_num, bool write) 492 { 493 return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write); 494 } 495 496 /* 497 * Writes the footer to the end of the image file. This is needed when the 498 * file grows as it overwrites the old footer 499 * 500 * Returns 0 on success and < 0 on error 501 */ 502 static int rewrite_footer(BlockDriverState* bs) 503 { 504 int ret; 505 BDRVVPCState *s = bs->opaque; 506 int64_t offset = s->free_data_block_offset; 507 508 ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE); 509 if (ret < 0) 510 return ret; 511 512 return 0; 513 } 514 515 /* 516 * Allocates a new block. This involves writing a new footer and updating 517 * the Block Allocation Table to use the space at the old end of the image 518 * file (overwriting the old footer) 519 * 520 * Returns the sectors' offset in the image file on success and < 0 on error 521 */ 522 static int64_t alloc_block(BlockDriverState* bs, int64_t offset) 523 { 524 BDRVVPCState *s = bs->opaque; 525 int64_t bat_offset; 526 uint32_t index, bat_value; 527 int ret; 528 uint8_t bitmap[s->bitmap_size]; 529 530 /* Check if sector_num is valid */ 531 if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) { 532 return -EINVAL; 533 } 534 535 /* Write entry into in-memory BAT */ 536 index = offset / s->block_size; 537 assert(s->pagetable[index] == 0xFFFFFFFF); 538 s->pagetable[index] = s->free_data_block_offset / 512; 539 540 /* Initialize the block's bitmap */ 541 memset(bitmap, 0xff, s->bitmap_size); 542 ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap, 543 s->bitmap_size); 544 if (ret < 0) { 545 return ret; 546 } 547 548 /* Write new footer (the old one will be overwritten) */ 549 s->free_data_block_offset += s->block_size + s->bitmap_size; 550 ret = rewrite_footer(bs); 551 if (ret < 0) 552 goto fail; 553 554 /* Write BAT entry to disk */ 555 bat_offset = s->bat_offset + (4 * index); 556 bat_value = cpu_to_be32(s->pagetable[index]); 557 ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4); 558 if (ret < 0) 559 goto fail; 560 561 return get_image_offset(bs, offset, false); 562 563 fail: 564 s->free_data_block_offset -= (s->block_size + s->bitmap_size); 565 return ret; 566 } 567 568 static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 569 { 570 BDRVVPCState *s = (BDRVVPCState *)bs->opaque; 571 VHDFooter *footer = (VHDFooter *) s->footer_buf; 572 573 if (be32_to_cpu(footer->type) != VHD_FIXED) { 574 bdi->cluster_size = s->block_size; 575 } 576 577 bdi->unallocated_blocks_are_zero = true; 578 return 0; 579 } 580 581 static int coroutine_fn 582 vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, 583 QEMUIOVector *qiov, int flags) 584 { 585 BDRVVPCState *s = bs->opaque; 586 int ret; 587 int64_t image_offset; 588 int64_t n_bytes; 589 int64_t bytes_done = 0; 590 VHDFooter *footer = (VHDFooter *) s->footer_buf; 591 QEMUIOVector local_qiov; 592 593 if (be32_to_cpu(footer->type) == VHD_FIXED) { 594 return bdrv_co_preadv(bs->file->bs, offset, bytes, qiov, 0); 595 } 596 597 qemu_co_mutex_lock(&s->lock); 598 qemu_iovec_init(&local_qiov, qiov->niov); 599 600 while (bytes > 0) { 601 image_offset = get_image_offset(bs, offset, false); 602 n_bytes = MIN(bytes, s->block_size - (offset % s->block_size)); 603 604 if (image_offset == -1) { 605 qemu_iovec_memset(qiov, bytes_done, 0, n_bytes); 606 } else { 607 qemu_iovec_reset(&local_qiov); 608 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes); 609 610 ret = bdrv_co_preadv(bs->file->bs, image_offset, n_bytes, 611 &local_qiov, 0); 612 if (ret < 0) { 613 goto fail; 614 } 615 } 616 617 bytes -= n_bytes; 618 offset += n_bytes; 619 bytes_done += n_bytes; 620 } 621 622 ret = 0; 623 fail: 624 qemu_iovec_destroy(&local_qiov); 625 qemu_co_mutex_unlock(&s->lock); 626 627 return ret; 628 } 629 630 static int coroutine_fn 631 vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, 632 QEMUIOVector *qiov, int flags) 633 { 634 BDRVVPCState *s = bs->opaque; 635 int64_t image_offset; 636 int64_t n_bytes; 637 int64_t bytes_done = 0; 638 int ret; 639 VHDFooter *footer = (VHDFooter *) s->footer_buf; 640 QEMUIOVector local_qiov; 641 642 if (be32_to_cpu(footer->type) == VHD_FIXED) { 643 return bdrv_co_pwritev(bs->file->bs, offset, bytes, qiov, 0); 644 } 645 646 qemu_co_mutex_lock(&s->lock); 647 qemu_iovec_init(&local_qiov, qiov->niov); 648 649 while (bytes > 0) { 650 image_offset = get_image_offset(bs, offset, true); 651 n_bytes = MIN(bytes, s->block_size - (offset % s->block_size)); 652 653 if (image_offset == -1) { 654 image_offset = alloc_block(bs, offset); 655 if (image_offset < 0) { 656 ret = image_offset; 657 goto fail; 658 } 659 } 660 661 qemu_iovec_reset(&local_qiov); 662 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes); 663 664 ret = bdrv_co_pwritev(bs->file->bs, image_offset, n_bytes, 665 &local_qiov, 0); 666 if (ret < 0) { 667 goto fail; 668 } 669 670 bytes -= n_bytes; 671 offset += n_bytes; 672 bytes_done += n_bytes; 673 } 674 675 ret = 0; 676 fail: 677 qemu_iovec_destroy(&local_qiov); 678 qemu_co_mutex_unlock(&s->lock); 679 680 return ret; 681 } 682 683 static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs, 684 int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) 685 { 686 BDRVVPCState *s = bs->opaque; 687 VHDFooter *footer = (VHDFooter*) s->footer_buf; 688 int64_t start, offset; 689 bool allocated; 690 int n; 691 692 if (be32_to_cpu(footer->type) == VHD_FIXED) { 693 *pnum = nb_sectors; 694 *file = bs->file->bs; 695 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | 696 (sector_num << BDRV_SECTOR_BITS); 697 } 698 699 offset = get_sector_offset(bs, sector_num, 0); 700 start = offset; 701 allocated = (offset != -1); 702 *pnum = 0; 703 704 do { 705 /* All sectors in a block are contiguous (without using the bitmap) */ 706 n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE) 707 - sector_num; 708 n = MIN(n, nb_sectors); 709 710 *pnum += n; 711 sector_num += n; 712 nb_sectors -= n; 713 /* *pnum can't be greater than one block for allocated 714 * sectors since there is always a bitmap in between. */ 715 if (allocated) { 716 *file = bs->file->bs; 717 return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; 718 } 719 if (nb_sectors == 0) { 720 break; 721 } 722 offset = get_sector_offset(bs, sector_num, 0); 723 } while (offset == -1); 724 725 return 0; 726 } 727 728 /* 729 * Calculates the number of cylinders, heads and sectors per cylinder 730 * based on a given number of sectors. This is the algorithm described 731 * in the VHD specification. 732 * 733 * Note that the geometry doesn't always exactly match total_sectors but 734 * may round it down. 735 * 736 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override 737 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB) 738 * and instead allow up to 255 heads. 739 */ 740 static int calculate_geometry(int64_t total_sectors, uint16_t* cyls, 741 uint8_t* heads, uint8_t* secs_per_cyl) 742 { 743 uint32_t cyls_times_heads; 744 745 total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY); 746 747 if (total_sectors >= 65535LL * 16 * 63) { 748 *secs_per_cyl = 255; 749 *heads = 16; 750 cyls_times_heads = total_sectors / *secs_per_cyl; 751 } else { 752 *secs_per_cyl = 17; 753 cyls_times_heads = total_sectors / *secs_per_cyl; 754 *heads = (cyls_times_heads + 1023) / 1024; 755 756 if (*heads < 4) { 757 *heads = 4; 758 } 759 760 if (cyls_times_heads >= (*heads * 1024) || *heads > 16) { 761 *secs_per_cyl = 31; 762 *heads = 16; 763 cyls_times_heads = total_sectors / *secs_per_cyl; 764 } 765 766 if (cyls_times_heads >= (*heads * 1024)) { 767 *secs_per_cyl = 63; 768 *heads = 16; 769 cyls_times_heads = total_sectors / *secs_per_cyl; 770 } 771 } 772 773 *cyls = cyls_times_heads / *heads; 774 775 return 0; 776 } 777 778 static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf, 779 int64_t total_sectors) 780 { 781 VHDDynDiskHeader *dyndisk_header = 782 (VHDDynDiskHeader *) buf; 783 size_t block_size, num_bat_entries; 784 int i; 785 int ret; 786 int64_t offset = 0; 787 788 /* Write the footer (twice: at the beginning and at the end) */ 789 block_size = 0x200000; 790 num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512); 791 792 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0); 793 if (ret < 0) { 794 goto fail; 795 } 796 797 offset = 1536 + ((num_bat_entries * 4 + 511) & ~511); 798 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0); 799 if (ret < 0) { 800 goto fail; 801 } 802 803 /* Write the initial BAT */ 804 offset = 3 * 512; 805 806 memset(buf, 0xFF, 512); 807 for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) { 808 ret = blk_pwrite(blk, offset, buf, 512, 0); 809 if (ret < 0) { 810 goto fail; 811 } 812 offset += 512; 813 } 814 815 /* Prepare the Dynamic Disk Header */ 816 memset(buf, 0, 1024); 817 818 memcpy(dyndisk_header->magic, "cxsparse", 8); 819 820 /* 821 * Note: The spec is actually wrong here for data_offset, it says 822 * 0xFFFFFFFF, but MS tools expect all 64 bits to be set. 823 */ 824 dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL); 825 dyndisk_header->table_offset = cpu_to_be64(3 * 512); 826 dyndisk_header->version = cpu_to_be32(0x00010000); 827 dyndisk_header->block_size = cpu_to_be32(block_size); 828 dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries); 829 830 dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024)); 831 832 /* Write the header */ 833 offset = 512; 834 835 ret = blk_pwrite(blk, offset, buf, 1024, 0); 836 if (ret < 0) { 837 goto fail; 838 } 839 840 fail: 841 return ret; 842 } 843 844 static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, 845 int64_t total_size) 846 { 847 int ret; 848 849 /* Add footer to total size */ 850 total_size += HEADER_SIZE; 851 852 ret = blk_truncate(blk, total_size); 853 if (ret < 0) { 854 return ret; 855 } 856 857 ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0); 858 if (ret < 0) { 859 return ret; 860 } 861 862 return ret; 863 } 864 865 static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) 866 { 867 uint8_t buf[1024]; 868 VHDFooter *footer = (VHDFooter *) buf; 869 char *disk_type_param; 870 int i; 871 uint16_t cyls = 0; 872 uint8_t heads = 0; 873 uint8_t secs_per_cyl = 0; 874 int64_t total_sectors; 875 int64_t total_size; 876 int disk_type; 877 int ret = -EIO; 878 bool force_size; 879 Error *local_err = NULL; 880 BlockBackend *blk = NULL; 881 882 /* Read out options */ 883 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), 884 BDRV_SECTOR_SIZE); 885 disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT); 886 if (disk_type_param) { 887 if (!strcmp(disk_type_param, "dynamic")) { 888 disk_type = VHD_DYNAMIC; 889 } else if (!strcmp(disk_type_param, "fixed")) { 890 disk_type = VHD_FIXED; 891 } else { 892 error_setg(errp, "Invalid disk type, %s", disk_type_param); 893 ret = -EINVAL; 894 goto out; 895 } 896 } else { 897 disk_type = VHD_DYNAMIC; 898 } 899 900 force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false); 901 902 ret = bdrv_create_file(filename, opts, &local_err); 903 if (ret < 0) { 904 error_propagate(errp, local_err); 905 goto out; 906 } 907 908 blk = blk_new_open(filename, NULL, NULL, 909 BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); 910 if (blk == NULL) { 911 error_propagate(errp, local_err); 912 ret = -EIO; 913 goto out; 914 } 915 916 blk_set_allow_write_beyond_eof(blk, true); 917 918 /* 919 * Calculate matching total_size and geometry. Increase the number of 920 * sectors requested until we get enough (or fail). This ensures that 921 * qemu-img convert doesn't truncate images, but rather rounds up. 922 * 923 * If the image size can't be represented by a spec conformant CHS geometry, 924 * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use 925 * the image size from the VHD footer to calculate total_sectors. 926 */ 927 if (force_size) { 928 /* This will force the use of total_size for sector count, below */ 929 cyls = VHD_CHS_MAX_C; 930 heads = VHD_CHS_MAX_H; 931 secs_per_cyl = VHD_CHS_MAX_S; 932 } else { 933 total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE); 934 for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) { 935 calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl); 936 } 937 } 938 939 if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) { 940 total_sectors = total_size / BDRV_SECTOR_SIZE; 941 /* Allow a maximum disk size of 2040 GiB */ 942 if (total_sectors > VHD_MAX_SECTORS) { 943 error_setg(errp, "Disk size is too large, max size is 2040 GiB"); 944 ret = -EFBIG; 945 goto out; 946 } 947 } else { 948 total_sectors = (int64_t)cyls * heads * secs_per_cyl; 949 total_size = total_sectors * BDRV_SECTOR_SIZE; 950 } 951 952 /* Prepare the Hard Disk Footer */ 953 memset(buf, 0, 1024); 954 955 memcpy(footer->creator, "conectix", 8); 956 if (force_size) { 957 memcpy(footer->creator_app, "qem2", 4); 958 } else { 959 memcpy(footer->creator_app, "qemu", 4); 960 } 961 memcpy(footer->creator_os, "Wi2k", 4); 962 963 footer->features = cpu_to_be32(0x02); 964 footer->version = cpu_to_be32(0x00010000); 965 if (disk_type == VHD_DYNAMIC) { 966 footer->data_offset = cpu_to_be64(HEADER_SIZE); 967 } else { 968 footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL); 969 } 970 footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE); 971 972 /* Version of Virtual PC 2007 */ 973 footer->major = cpu_to_be16(0x0005); 974 footer->minor = cpu_to_be16(0x0003); 975 footer->orig_size = cpu_to_be64(total_size); 976 footer->current_size = cpu_to_be64(total_size); 977 footer->cyls = cpu_to_be16(cyls); 978 footer->heads = heads; 979 footer->secs_per_cyl = secs_per_cyl; 980 981 footer->type = cpu_to_be32(disk_type); 982 983 #if defined(CONFIG_UUID) 984 uuid_generate(footer->uuid); 985 #endif 986 987 footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE)); 988 989 if (disk_type == VHD_DYNAMIC) { 990 ret = create_dynamic_disk(blk, buf, total_sectors); 991 } else { 992 ret = create_fixed_disk(blk, buf, total_size); 993 } 994 if (ret < 0) { 995 error_setg(errp, "Unable to create or write VHD header"); 996 } 997 998 out: 999 blk_unref(blk); 1000 g_free(disk_type_param); 1001 return ret; 1002 } 1003 1004 static int vpc_has_zero_init(BlockDriverState *bs) 1005 { 1006 BDRVVPCState *s = bs->opaque; 1007 VHDFooter *footer = (VHDFooter *) s->footer_buf; 1008 1009 if (be32_to_cpu(footer->type) == VHD_FIXED) { 1010 return bdrv_has_zero_init(bs->file->bs); 1011 } else { 1012 return 1; 1013 } 1014 } 1015 1016 static void vpc_close(BlockDriverState *bs) 1017 { 1018 BDRVVPCState *s = bs->opaque; 1019 qemu_vfree(s->pagetable); 1020 #ifdef CACHE 1021 g_free(s->pageentry_u8); 1022 #endif 1023 1024 migrate_del_blocker(s->migration_blocker); 1025 error_free(s->migration_blocker); 1026 } 1027 1028 static QemuOptsList vpc_create_opts = { 1029 .name = "vpc-create-opts", 1030 .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head), 1031 .desc = { 1032 { 1033 .name = BLOCK_OPT_SIZE, 1034 .type = QEMU_OPT_SIZE, 1035 .help = "Virtual disk size" 1036 }, 1037 { 1038 .name = BLOCK_OPT_SUBFMT, 1039 .type = QEMU_OPT_STRING, 1040 .help = 1041 "Type of virtual hard disk format. Supported formats are " 1042 "{dynamic (default) | fixed} " 1043 }, 1044 { 1045 .name = VPC_OPT_FORCE_SIZE, 1046 .type = QEMU_OPT_BOOL, 1047 .help = "Force disk size calculation to use the actual size " 1048 "specified, rather than using the nearest CHS-based " 1049 "calculation" 1050 }, 1051 { /* end of list */ } 1052 } 1053 }; 1054 1055 static BlockDriver bdrv_vpc = { 1056 .format_name = "vpc", 1057 .instance_size = sizeof(BDRVVPCState), 1058 1059 .bdrv_probe = vpc_probe, 1060 .bdrv_open = vpc_open, 1061 .bdrv_close = vpc_close, 1062 .bdrv_reopen_prepare = vpc_reopen_prepare, 1063 .bdrv_create = vpc_create, 1064 1065 .bdrv_co_preadv = vpc_co_preadv, 1066 .bdrv_co_pwritev = vpc_co_pwritev, 1067 .bdrv_co_get_block_status = vpc_co_get_block_status, 1068 1069 .bdrv_get_info = vpc_get_info, 1070 1071 .create_opts = &vpc_create_opts, 1072 .bdrv_has_zero_init = vpc_has_zero_init, 1073 }; 1074 1075 static void bdrv_vpc_init(void) 1076 { 1077 bdrv_register(&bdrv_vpc); 1078 } 1079 1080 block_init(bdrv_vpc_init); 1081