1 /* 2 * Block driver for Connectix / Microsoft Virtual PC images 3 * 4 * Copyright (c) 2005 Alex Beregszaszi 5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 #include "qemu/osdep.h" 26 #include "qapi/error.h" 27 #include "qemu-common.h" 28 #include "block/block_int.h" 29 #include "sysemu/block-backend.h" 30 #include "qemu/module.h" 31 #include "migration/migration.h" 32 #if defined(CONFIG_UUID) 33 #include <uuid/uuid.h> 34 #endif 35 36 /**************************************************************/ 37 38 #define HEADER_SIZE 512 39 40 //#define CACHE 41 42 enum vhd_type { 43 VHD_FIXED = 2, 44 VHD_DYNAMIC = 3, 45 VHD_DIFFERENCING = 4, 46 }; 47 48 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */ 49 #define VHD_TIMESTAMP_BASE 946684800 50 51 #define VHD_CHS_MAX_C 65535LL 52 #define VHD_CHS_MAX_H 16 53 #define VHD_CHS_MAX_S 255 54 55 #define VHD_MAX_SECTORS 0xff000000 /* 2040 GiB max image size */ 56 #define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S) 57 58 #define VPC_OPT_FORCE_SIZE "force_size" 59 60 /* always big-endian */ 61 typedef struct vhd_footer { 62 char creator[8]; /* "conectix" */ 63 uint32_t features; 64 uint32_t version; 65 66 /* Offset of next header structure, 0xFFFFFFFF if none */ 67 uint64_t data_offset; 68 69 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */ 70 uint32_t timestamp; 71 72 char creator_app[4]; /* e.g., "vpc " */ 73 uint16_t major; 74 uint16_t minor; 75 char creator_os[4]; /* "Wi2k" */ 76 77 uint64_t orig_size; 78 uint64_t current_size; 79 80 uint16_t cyls; 81 uint8_t heads; 82 uint8_t secs_per_cyl; 83 84 uint32_t type; 85 86 /* Checksum of the Hard Disk Footer ("one's complement of the sum of all 87 the bytes in the footer without the checksum field") */ 88 uint32_t checksum; 89 90 /* UUID used to identify a parent hard disk (backing file) */ 91 uint8_t uuid[16]; 92 93 uint8_t in_saved_state; 94 } QEMU_PACKED VHDFooter; 95 96 typedef struct vhd_dyndisk_header { 97 char magic[8]; /* "cxsparse" */ 98 99 /* Offset of next header structure, 0xFFFFFFFF if none */ 100 uint64_t data_offset; 101 102 /* Offset of the Block Allocation Table (BAT) */ 103 uint64_t table_offset; 104 105 uint32_t version; 106 uint32_t max_table_entries; /* 32bit/entry */ 107 108 /* 2 MB by default, must be a power of two */ 109 uint32_t block_size; 110 111 uint32_t checksum; 112 uint8_t parent_uuid[16]; 113 uint32_t parent_timestamp; 114 uint32_t reserved; 115 116 /* Backing file name (in UTF-16) */ 117 uint8_t parent_name[512]; 118 119 struct { 120 uint32_t platform; 121 uint32_t data_space; 122 uint32_t data_length; 123 uint32_t reserved; 124 uint64_t data_offset; 125 } parent_locator[8]; 126 } QEMU_PACKED VHDDynDiskHeader; 127 128 typedef struct BDRVVPCState { 129 CoMutex lock; 130 uint8_t footer_buf[HEADER_SIZE]; 131 uint64_t free_data_block_offset; 132 int max_table_entries; 133 uint32_t *pagetable; 134 uint64_t bat_offset; 135 uint64_t last_bitmap_offset; 136 137 uint32_t block_size; 138 uint32_t bitmap_size; 139 bool force_use_chs; 140 bool force_use_sz; 141 142 #ifdef CACHE 143 uint8_t *pageentry_u8; 144 uint32_t *pageentry_u32; 145 uint16_t *pageentry_u16; 146 147 uint64_t last_bitmap; 148 #endif 149 150 Error *migration_blocker; 151 } BDRVVPCState; 152 153 #define VPC_OPT_SIZE_CALC "force_size_calc" 154 static QemuOptsList vpc_runtime_opts = { 155 .name = "vpc-runtime-opts", 156 .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head), 157 .desc = { 158 { 159 .name = VPC_OPT_SIZE_CALC, 160 .type = QEMU_OPT_STRING, 161 .help = "Force disk size calculation to use either CHS geometry, " 162 "or use the disk current_size specified in the VHD footer. " 163 "{chs, current_size}" 164 }, 165 { /* end of list */ } 166 } 167 }; 168 169 static uint32_t vpc_checksum(uint8_t* buf, size_t size) 170 { 171 uint32_t res = 0; 172 int i; 173 174 for (i = 0; i < size; i++) 175 res += buf[i]; 176 177 return ~res; 178 } 179 180 181 static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename) 182 { 183 if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8)) 184 return 100; 185 return 0; 186 } 187 188 static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts, 189 Error **errp) 190 { 191 BDRVVPCState *s = bs->opaque; 192 const char *size_calc; 193 194 size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC); 195 196 if (!size_calc) { 197 /* no override, use autodetect only */ 198 } else if (!strcmp(size_calc, "current_size")) { 199 s->force_use_sz = true; 200 } else if (!strcmp(size_calc, "chs")) { 201 s->force_use_chs = true; 202 } else { 203 error_setg(errp, "Invalid size calculation mode: '%s'", size_calc); 204 } 205 } 206 207 static int vpc_open(BlockDriverState *bs, QDict *options, int flags, 208 Error **errp) 209 { 210 BDRVVPCState *s = bs->opaque; 211 int i; 212 VHDFooter *footer; 213 VHDDynDiskHeader *dyndisk_header; 214 QemuOpts *opts = NULL; 215 Error *local_err = NULL; 216 bool use_chs; 217 uint8_t buf[HEADER_SIZE]; 218 uint32_t checksum; 219 uint64_t computed_size; 220 uint64_t pagetable_size; 221 int disk_type = VHD_DYNAMIC; 222 int ret; 223 224 opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort); 225 qemu_opts_absorb_qdict(opts, options, &local_err); 226 if (local_err) { 227 error_propagate(errp, local_err); 228 ret = -EINVAL; 229 goto fail; 230 } 231 232 vpc_parse_options(bs, opts, &local_err); 233 if (local_err) { 234 error_propagate(errp, local_err); 235 ret = -EINVAL; 236 goto fail; 237 } 238 239 ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE); 240 if (ret < 0) { 241 error_setg(errp, "Unable to read VHD header"); 242 goto fail; 243 } 244 245 footer = (VHDFooter *) s->footer_buf; 246 if (strncmp(footer->creator, "conectix", 8)) { 247 int64_t offset = bdrv_getlength(bs->file->bs); 248 if (offset < 0) { 249 ret = offset; 250 error_setg(errp, "Invalid file size"); 251 goto fail; 252 } else if (offset < HEADER_SIZE) { 253 ret = -EINVAL; 254 error_setg(errp, "File too small for a VHD header"); 255 goto fail; 256 } 257 258 /* If a fixed disk, the footer is found only at the end of the file */ 259 ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf, 260 HEADER_SIZE); 261 if (ret < 0) { 262 goto fail; 263 } 264 if (strncmp(footer->creator, "conectix", 8)) { 265 error_setg(errp, "invalid VPC image"); 266 ret = -EINVAL; 267 goto fail; 268 } 269 disk_type = VHD_FIXED; 270 } 271 272 checksum = be32_to_cpu(footer->checksum); 273 footer->checksum = 0; 274 if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum) 275 fprintf(stderr, "block-vpc: The header checksum of '%s' is " 276 "incorrect.\n", bs->filename); 277 278 /* Write 'checksum' back to footer, or else will leave it with zero. */ 279 footer->checksum = cpu_to_be32(checksum); 280 281 /* The visible size of a image in Virtual PC depends on the geometry 282 rather than on the size stored in the footer (the size in the footer 283 is too large usually) */ 284 bs->total_sectors = (int64_t) 285 be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; 286 287 /* Microsoft Virtual PC and Microsoft Hyper-V produce and read 288 * VHD image sizes differently. VPC will rely on CHS geometry, 289 * while Hyper-V and disk2vhd use the size specified in the footer. 290 * 291 * We use a couple of approaches to try and determine the correct method: 292 * look at the Creator App field, and look for images that have CHS 293 * geometry that is the maximum value. 294 * 295 * If the CHS geometry is the maximum CHS geometry, then we assume that 296 * the size is the footer->current_size to avoid truncation. Otherwise, 297 * we follow the table based on footer->creator_app: 298 * 299 * Known creator apps: 300 * 'vpc ' : CHS Virtual PC (uses disk geometry) 301 * 'qemu' : CHS QEMU (uses disk geometry) 302 * 'qem2' : current_size QEMU (uses current_size) 303 * 'win ' : current_size Hyper-V 304 * 'd2v ' : current_size Disk2vhd 305 * 'tap\0' : current_size XenServer 306 * 'CTXS' : current_size XenConverter 307 * 308 * The user can override the table values via drive options, however 309 * even with an override we will still use current_size for images 310 * that have CHS geometry of the maximum size. 311 */ 312 use_chs = (!!strncmp(footer->creator_app, "win ", 4) && 313 !!strncmp(footer->creator_app, "qem2", 4) && 314 !!strncmp(footer->creator_app, "d2v ", 4) && 315 !!strncmp(footer->creator_app, "CTXS", 4) && 316 !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs; 317 318 if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) { 319 bs->total_sectors = be64_to_cpu(footer->current_size) / 320 BDRV_SECTOR_SIZE; 321 } 322 323 /* Allow a maximum disk size of 2040 GiB */ 324 if (bs->total_sectors > VHD_MAX_SECTORS) { 325 ret = -EFBIG; 326 goto fail; 327 } 328 329 if (disk_type == VHD_DYNAMIC) { 330 ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf, 331 HEADER_SIZE); 332 if (ret < 0) { 333 error_setg(errp, "Error reading dynamic VHD header"); 334 goto fail; 335 } 336 337 dyndisk_header = (VHDDynDiskHeader *) buf; 338 339 if (strncmp(dyndisk_header->magic, "cxsparse", 8)) { 340 error_setg(errp, "Invalid header magic"); 341 ret = -EINVAL; 342 goto fail; 343 } 344 345 s->block_size = be32_to_cpu(dyndisk_header->block_size); 346 if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) { 347 error_setg(errp, "Invalid block size %" PRIu32, s->block_size); 348 ret = -EINVAL; 349 goto fail; 350 } 351 s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511; 352 353 s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries); 354 355 if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) { 356 error_setg(errp, "Too many blocks"); 357 ret = -EINVAL; 358 goto fail; 359 } 360 361 computed_size = (uint64_t) s->max_table_entries * s->block_size; 362 if (computed_size < bs->total_sectors * 512) { 363 error_setg(errp, "Page table too small"); 364 ret = -EINVAL; 365 goto fail; 366 } 367 368 if (s->max_table_entries > SIZE_MAX / 4 || 369 s->max_table_entries > (int) INT_MAX / 4) { 370 error_setg(errp, "Max Table Entries too large (%" PRId32 ")", 371 s->max_table_entries); 372 ret = -EINVAL; 373 goto fail; 374 } 375 376 pagetable_size = (uint64_t) s->max_table_entries * 4; 377 378 s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size); 379 if (s->pagetable == NULL) { 380 error_setg(errp, "Unable to allocate memory for page table"); 381 ret = -ENOMEM; 382 goto fail; 383 } 384 385 s->bat_offset = be64_to_cpu(dyndisk_header->table_offset); 386 387 ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable, 388 pagetable_size); 389 if (ret < 0) { 390 error_setg(errp, "Error reading pagetable"); 391 goto fail; 392 } 393 394 s->free_data_block_offset = 395 ROUND_UP(s->bat_offset + pagetable_size, 512); 396 397 for (i = 0; i < s->max_table_entries; i++) { 398 be32_to_cpus(&s->pagetable[i]); 399 if (s->pagetable[i] != 0xFFFFFFFF) { 400 int64_t next = (512 * (int64_t) s->pagetable[i]) + 401 s->bitmap_size + s->block_size; 402 403 if (next > s->free_data_block_offset) { 404 s->free_data_block_offset = next; 405 } 406 } 407 } 408 409 if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) { 410 error_setg(errp, "block-vpc: free_data_block_offset points after " 411 "the end of file. The image has been truncated."); 412 ret = -EINVAL; 413 goto fail; 414 } 415 416 s->last_bitmap_offset = (int64_t) -1; 417 418 #ifdef CACHE 419 s->pageentry_u8 = g_malloc(512); 420 s->pageentry_u32 = s->pageentry_u8; 421 s->pageentry_u16 = s->pageentry_u8; 422 s->last_pagetable = -1; 423 #endif 424 } 425 426 qemu_co_mutex_init(&s->lock); 427 428 /* Disable migration when VHD images are used */ 429 error_setg(&s->migration_blocker, "The vpc format used by node '%s' " 430 "does not support live migration", 431 bdrv_get_device_or_node_name(bs)); 432 migrate_add_blocker(s->migration_blocker); 433 434 return 0; 435 436 fail: 437 qemu_vfree(s->pagetable); 438 #ifdef CACHE 439 g_free(s->pageentry_u8); 440 #endif 441 return ret; 442 } 443 444 static int vpc_reopen_prepare(BDRVReopenState *state, 445 BlockReopenQueue *queue, Error **errp) 446 { 447 return 0; 448 } 449 450 /* 451 * Returns the absolute byte offset of the given sector in the image file. 452 * If the sector is not allocated, -1 is returned instead. 453 * 454 * The parameter write must be 1 if the offset will be used for a write 455 * operation (the block bitmaps is updated then), 0 otherwise. 456 */ 457 static inline int64_t get_sector_offset(BlockDriverState *bs, 458 int64_t sector_num, int write) 459 { 460 BDRVVPCState *s = bs->opaque; 461 uint64_t offset = sector_num * 512; 462 uint64_t bitmap_offset, block_offset; 463 uint32_t pagetable_index, pageentry_index; 464 465 pagetable_index = offset / s->block_size; 466 pageentry_index = (offset % s->block_size) / 512; 467 468 if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff) 469 return -1; /* not allocated */ 470 471 bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index]; 472 block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index); 473 474 /* We must ensure that we don't write to any sectors which are marked as 475 unused in the bitmap. We get away with setting all bits in the block 476 bitmap each time we write to a new block. This might cause Virtual PC to 477 miss sparse read optimization, but it's not a problem in terms of 478 correctness. */ 479 if (write && (s->last_bitmap_offset != bitmap_offset)) { 480 uint8_t bitmap[s->bitmap_size]; 481 482 s->last_bitmap_offset = bitmap_offset; 483 memset(bitmap, 0xff, s->bitmap_size); 484 bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size); 485 } 486 487 return block_offset; 488 } 489 490 /* 491 * Writes the footer to the end of the image file. This is needed when the 492 * file grows as it overwrites the old footer 493 * 494 * Returns 0 on success and < 0 on error 495 */ 496 static int rewrite_footer(BlockDriverState* bs) 497 { 498 int ret; 499 BDRVVPCState *s = bs->opaque; 500 int64_t offset = s->free_data_block_offset; 501 502 ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE); 503 if (ret < 0) 504 return ret; 505 506 return 0; 507 } 508 509 /* 510 * Allocates a new block. This involves writing a new footer and updating 511 * the Block Allocation Table to use the space at the old end of the image 512 * file (overwriting the old footer) 513 * 514 * Returns the sectors' offset in the image file on success and < 0 on error 515 */ 516 static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num) 517 { 518 BDRVVPCState *s = bs->opaque; 519 int64_t bat_offset; 520 uint32_t index, bat_value; 521 int ret; 522 uint8_t bitmap[s->bitmap_size]; 523 524 /* Check if sector_num is valid */ 525 if ((sector_num < 0) || (sector_num > bs->total_sectors)) 526 return -1; 527 528 /* Write entry into in-memory BAT */ 529 index = (sector_num * 512) / s->block_size; 530 if (s->pagetable[index] != 0xFFFFFFFF) 531 return -1; 532 533 s->pagetable[index] = s->free_data_block_offset / 512; 534 535 /* Initialize the block's bitmap */ 536 memset(bitmap, 0xff, s->bitmap_size); 537 ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap, 538 s->bitmap_size); 539 if (ret < 0) { 540 return ret; 541 } 542 543 /* Write new footer (the old one will be overwritten) */ 544 s->free_data_block_offset += s->block_size + s->bitmap_size; 545 ret = rewrite_footer(bs); 546 if (ret < 0) 547 goto fail; 548 549 /* Write BAT entry to disk */ 550 bat_offset = s->bat_offset + (4 * index); 551 bat_value = cpu_to_be32(s->pagetable[index]); 552 ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4); 553 if (ret < 0) 554 goto fail; 555 556 return get_sector_offset(bs, sector_num, 0); 557 558 fail: 559 s->free_data_block_offset -= (s->block_size + s->bitmap_size); 560 return -1; 561 } 562 563 static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 564 { 565 BDRVVPCState *s = (BDRVVPCState *)bs->opaque; 566 VHDFooter *footer = (VHDFooter *) s->footer_buf; 567 568 if (be32_to_cpu(footer->type) != VHD_FIXED) { 569 bdi->cluster_size = s->block_size; 570 } 571 572 bdi->unallocated_blocks_are_zero = true; 573 return 0; 574 } 575 576 static int vpc_read(BlockDriverState *bs, int64_t sector_num, 577 uint8_t *buf, int nb_sectors) 578 { 579 BDRVVPCState *s = bs->opaque; 580 int ret; 581 int64_t offset; 582 int64_t sectors, sectors_per_block; 583 VHDFooter *footer = (VHDFooter *) s->footer_buf; 584 585 if (be32_to_cpu(footer->type) == VHD_FIXED) { 586 return bdrv_read(bs->file->bs, sector_num, buf, nb_sectors); 587 } 588 while (nb_sectors > 0) { 589 offset = get_sector_offset(bs, sector_num, 0); 590 591 sectors_per_block = s->block_size >> BDRV_SECTOR_BITS; 592 sectors = sectors_per_block - (sector_num % sectors_per_block); 593 if (sectors > nb_sectors) { 594 sectors = nb_sectors; 595 } 596 597 if (offset == -1) { 598 memset(buf, 0, sectors * BDRV_SECTOR_SIZE); 599 } else { 600 ret = bdrv_pread(bs->file->bs, offset, buf, 601 sectors * BDRV_SECTOR_SIZE); 602 if (ret != sectors * BDRV_SECTOR_SIZE) { 603 return -1; 604 } 605 } 606 607 nb_sectors -= sectors; 608 sector_num += sectors; 609 buf += sectors * BDRV_SECTOR_SIZE; 610 } 611 return 0; 612 } 613 614 static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num, 615 uint8_t *buf, int nb_sectors) 616 { 617 int ret; 618 BDRVVPCState *s = bs->opaque; 619 qemu_co_mutex_lock(&s->lock); 620 ret = vpc_read(bs, sector_num, buf, nb_sectors); 621 qemu_co_mutex_unlock(&s->lock); 622 return ret; 623 } 624 625 static int vpc_write(BlockDriverState *bs, int64_t sector_num, 626 const uint8_t *buf, int nb_sectors) 627 { 628 BDRVVPCState *s = bs->opaque; 629 int64_t offset; 630 int64_t sectors, sectors_per_block; 631 int ret; 632 VHDFooter *footer = (VHDFooter *) s->footer_buf; 633 634 if (be32_to_cpu(footer->type) == VHD_FIXED) { 635 return bdrv_write(bs->file->bs, sector_num, buf, nb_sectors); 636 } 637 while (nb_sectors > 0) { 638 offset = get_sector_offset(bs, sector_num, 1); 639 640 sectors_per_block = s->block_size >> BDRV_SECTOR_BITS; 641 sectors = sectors_per_block - (sector_num % sectors_per_block); 642 if (sectors > nb_sectors) { 643 sectors = nb_sectors; 644 } 645 646 if (offset == -1) { 647 offset = alloc_block(bs, sector_num); 648 if (offset < 0) 649 return -1; 650 } 651 652 ret = bdrv_pwrite(bs->file->bs, offset, buf, 653 sectors * BDRV_SECTOR_SIZE); 654 if (ret != sectors * BDRV_SECTOR_SIZE) { 655 return -1; 656 } 657 658 nb_sectors -= sectors; 659 sector_num += sectors; 660 buf += sectors * BDRV_SECTOR_SIZE; 661 } 662 663 return 0; 664 } 665 666 static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num, 667 const uint8_t *buf, int nb_sectors) 668 { 669 int ret; 670 BDRVVPCState *s = bs->opaque; 671 qemu_co_mutex_lock(&s->lock); 672 ret = vpc_write(bs, sector_num, buf, nb_sectors); 673 qemu_co_mutex_unlock(&s->lock); 674 return ret; 675 } 676 677 static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs, 678 int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) 679 { 680 BDRVVPCState *s = bs->opaque; 681 VHDFooter *footer = (VHDFooter*) s->footer_buf; 682 int64_t start, offset; 683 bool allocated; 684 int n; 685 686 if (be32_to_cpu(footer->type) == VHD_FIXED) { 687 *pnum = nb_sectors; 688 *file = bs->file->bs; 689 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | 690 (sector_num << BDRV_SECTOR_BITS); 691 } 692 693 offset = get_sector_offset(bs, sector_num, 0); 694 start = offset; 695 allocated = (offset != -1); 696 *pnum = 0; 697 698 do { 699 /* All sectors in a block are contiguous (without using the bitmap) */ 700 n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE) 701 - sector_num; 702 n = MIN(n, nb_sectors); 703 704 *pnum += n; 705 sector_num += n; 706 nb_sectors -= n; 707 /* *pnum can't be greater than one block for allocated 708 * sectors since there is always a bitmap in between. */ 709 if (allocated) { 710 *file = bs->file->bs; 711 return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; 712 } 713 if (nb_sectors == 0) { 714 break; 715 } 716 offset = get_sector_offset(bs, sector_num, 0); 717 } while (offset == -1); 718 719 return 0; 720 } 721 722 /* 723 * Calculates the number of cylinders, heads and sectors per cylinder 724 * based on a given number of sectors. This is the algorithm described 725 * in the VHD specification. 726 * 727 * Note that the geometry doesn't always exactly match total_sectors but 728 * may round it down. 729 * 730 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override 731 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB) 732 * and instead allow up to 255 heads. 733 */ 734 static int calculate_geometry(int64_t total_sectors, uint16_t* cyls, 735 uint8_t* heads, uint8_t* secs_per_cyl) 736 { 737 uint32_t cyls_times_heads; 738 739 total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY); 740 741 if (total_sectors >= 65535LL * 16 * 63) { 742 *secs_per_cyl = 255; 743 *heads = 16; 744 cyls_times_heads = total_sectors / *secs_per_cyl; 745 } else { 746 *secs_per_cyl = 17; 747 cyls_times_heads = total_sectors / *secs_per_cyl; 748 *heads = (cyls_times_heads + 1023) / 1024; 749 750 if (*heads < 4) { 751 *heads = 4; 752 } 753 754 if (cyls_times_heads >= (*heads * 1024) || *heads > 16) { 755 *secs_per_cyl = 31; 756 *heads = 16; 757 cyls_times_heads = total_sectors / *secs_per_cyl; 758 } 759 760 if (cyls_times_heads >= (*heads * 1024)) { 761 *secs_per_cyl = 63; 762 *heads = 16; 763 cyls_times_heads = total_sectors / *secs_per_cyl; 764 } 765 } 766 767 *cyls = cyls_times_heads / *heads; 768 769 return 0; 770 } 771 772 static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf, 773 int64_t total_sectors) 774 { 775 VHDDynDiskHeader *dyndisk_header = 776 (VHDDynDiskHeader *) buf; 777 size_t block_size, num_bat_entries; 778 int i; 779 int ret; 780 int64_t offset = 0; 781 782 /* Write the footer (twice: at the beginning and at the end) */ 783 block_size = 0x200000; 784 num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512); 785 786 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE); 787 if (ret < 0) { 788 goto fail; 789 } 790 791 offset = 1536 + ((num_bat_entries * 4 + 511) & ~511); 792 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE); 793 if (ret < 0) { 794 goto fail; 795 } 796 797 /* Write the initial BAT */ 798 offset = 3 * 512; 799 800 memset(buf, 0xFF, 512); 801 for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) { 802 ret = blk_pwrite(blk, offset, buf, 512); 803 if (ret < 0) { 804 goto fail; 805 } 806 offset += 512; 807 } 808 809 /* Prepare the Dynamic Disk Header */ 810 memset(buf, 0, 1024); 811 812 memcpy(dyndisk_header->magic, "cxsparse", 8); 813 814 /* 815 * Note: The spec is actually wrong here for data_offset, it says 816 * 0xFFFFFFFF, but MS tools expect all 64 bits to be set. 817 */ 818 dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL); 819 dyndisk_header->table_offset = cpu_to_be64(3 * 512); 820 dyndisk_header->version = cpu_to_be32(0x00010000); 821 dyndisk_header->block_size = cpu_to_be32(block_size); 822 dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries); 823 824 dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024)); 825 826 /* Write the header */ 827 offset = 512; 828 829 ret = blk_pwrite(blk, offset, buf, 1024); 830 if (ret < 0) { 831 goto fail; 832 } 833 834 fail: 835 return ret; 836 } 837 838 static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, 839 int64_t total_size) 840 { 841 int ret; 842 843 /* Add footer to total size */ 844 total_size += HEADER_SIZE; 845 846 ret = blk_truncate(blk, total_size); 847 if (ret < 0) { 848 return ret; 849 } 850 851 ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE); 852 if (ret < 0) { 853 return ret; 854 } 855 856 return ret; 857 } 858 859 static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) 860 { 861 uint8_t buf[1024]; 862 VHDFooter *footer = (VHDFooter *) buf; 863 char *disk_type_param; 864 int i; 865 uint16_t cyls = 0; 866 uint8_t heads = 0; 867 uint8_t secs_per_cyl = 0; 868 int64_t total_sectors; 869 int64_t total_size; 870 int disk_type; 871 int ret = -EIO; 872 bool force_size; 873 Error *local_err = NULL; 874 BlockBackend *blk = NULL; 875 876 /* Read out options */ 877 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), 878 BDRV_SECTOR_SIZE); 879 disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT); 880 if (disk_type_param) { 881 if (!strcmp(disk_type_param, "dynamic")) { 882 disk_type = VHD_DYNAMIC; 883 } else if (!strcmp(disk_type_param, "fixed")) { 884 disk_type = VHD_FIXED; 885 } else { 886 error_setg(errp, "Invalid disk type, %s", disk_type_param); 887 ret = -EINVAL; 888 goto out; 889 } 890 } else { 891 disk_type = VHD_DYNAMIC; 892 } 893 894 force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false); 895 896 ret = bdrv_create_file(filename, opts, &local_err); 897 if (ret < 0) { 898 error_propagate(errp, local_err); 899 goto out; 900 } 901 902 blk = blk_new_open(filename, NULL, NULL, 903 BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); 904 if (blk == NULL) { 905 error_propagate(errp, local_err); 906 ret = -EIO; 907 goto out; 908 } 909 910 blk_set_allow_write_beyond_eof(blk, true); 911 912 /* 913 * Calculate matching total_size and geometry. Increase the number of 914 * sectors requested until we get enough (or fail). This ensures that 915 * qemu-img convert doesn't truncate images, but rather rounds up. 916 * 917 * If the image size can't be represented by a spec conformant CHS geometry, 918 * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use 919 * the image size from the VHD footer to calculate total_sectors. 920 */ 921 if (force_size) { 922 /* This will force the use of total_size for sector count, below */ 923 cyls = VHD_CHS_MAX_C; 924 heads = VHD_CHS_MAX_H; 925 secs_per_cyl = VHD_CHS_MAX_S; 926 } else { 927 total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE); 928 for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) { 929 calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl); 930 } 931 } 932 933 if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) { 934 total_sectors = total_size / BDRV_SECTOR_SIZE; 935 /* Allow a maximum disk size of 2040 GiB */ 936 if (total_sectors > VHD_MAX_SECTORS) { 937 error_setg(errp, "Disk size is too large, max size is 2040 GiB"); 938 ret = -EFBIG; 939 goto out; 940 } 941 } else { 942 total_sectors = (int64_t)cyls * heads * secs_per_cyl; 943 total_size = total_sectors * BDRV_SECTOR_SIZE; 944 } 945 946 /* Prepare the Hard Disk Footer */ 947 memset(buf, 0, 1024); 948 949 memcpy(footer->creator, "conectix", 8); 950 if (force_size) { 951 memcpy(footer->creator_app, "qem2", 4); 952 } else { 953 memcpy(footer->creator_app, "qemu", 4); 954 } 955 memcpy(footer->creator_os, "Wi2k", 4); 956 957 footer->features = cpu_to_be32(0x02); 958 footer->version = cpu_to_be32(0x00010000); 959 if (disk_type == VHD_DYNAMIC) { 960 footer->data_offset = cpu_to_be64(HEADER_SIZE); 961 } else { 962 footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL); 963 } 964 footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE); 965 966 /* Version of Virtual PC 2007 */ 967 footer->major = cpu_to_be16(0x0005); 968 footer->minor = cpu_to_be16(0x0003); 969 footer->orig_size = cpu_to_be64(total_size); 970 footer->current_size = cpu_to_be64(total_size); 971 footer->cyls = cpu_to_be16(cyls); 972 footer->heads = heads; 973 footer->secs_per_cyl = secs_per_cyl; 974 975 footer->type = cpu_to_be32(disk_type); 976 977 #if defined(CONFIG_UUID) 978 uuid_generate(footer->uuid); 979 #endif 980 981 footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE)); 982 983 if (disk_type == VHD_DYNAMIC) { 984 ret = create_dynamic_disk(blk, buf, total_sectors); 985 } else { 986 ret = create_fixed_disk(blk, buf, total_size); 987 } 988 if (ret < 0) { 989 error_setg(errp, "Unable to create or write VHD header"); 990 } 991 992 out: 993 blk_unref(blk); 994 g_free(disk_type_param); 995 return ret; 996 } 997 998 static int vpc_has_zero_init(BlockDriverState *bs) 999 { 1000 BDRVVPCState *s = bs->opaque; 1001 VHDFooter *footer = (VHDFooter *) s->footer_buf; 1002 1003 if (be32_to_cpu(footer->type) == VHD_FIXED) { 1004 return bdrv_has_zero_init(bs->file->bs); 1005 } else { 1006 return 1; 1007 } 1008 } 1009 1010 static void vpc_close(BlockDriverState *bs) 1011 { 1012 BDRVVPCState *s = bs->opaque; 1013 qemu_vfree(s->pagetable); 1014 #ifdef CACHE 1015 g_free(s->pageentry_u8); 1016 #endif 1017 1018 migrate_del_blocker(s->migration_blocker); 1019 error_free(s->migration_blocker); 1020 } 1021 1022 static QemuOptsList vpc_create_opts = { 1023 .name = "vpc-create-opts", 1024 .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head), 1025 .desc = { 1026 { 1027 .name = BLOCK_OPT_SIZE, 1028 .type = QEMU_OPT_SIZE, 1029 .help = "Virtual disk size" 1030 }, 1031 { 1032 .name = BLOCK_OPT_SUBFMT, 1033 .type = QEMU_OPT_STRING, 1034 .help = 1035 "Type of virtual hard disk format. Supported formats are " 1036 "{dynamic (default) | fixed} " 1037 }, 1038 { 1039 .name = VPC_OPT_FORCE_SIZE, 1040 .type = QEMU_OPT_BOOL, 1041 .help = "Force disk size calculation to use the actual size " 1042 "specified, rather than using the nearest CHS-based " 1043 "calculation" 1044 }, 1045 { /* end of list */ } 1046 } 1047 }; 1048 1049 static BlockDriver bdrv_vpc = { 1050 .format_name = "vpc", 1051 .instance_size = sizeof(BDRVVPCState), 1052 1053 .bdrv_probe = vpc_probe, 1054 .bdrv_open = vpc_open, 1055 .bdrv_close = vpc_close, 1056 .bdrv_reopen_prepare = vpc_reopen_prepare, 1057 .bdrv_create = vpc_create, 1058 1059 .bdrv_read = vpc_co_read, 1060 .bdrv_write = vpc_co_write, 1061 .bdrv_co_get_block_status = vpc_co_get_block_status, 1062 1063 .bdrv_get_info = vpc_get_info, 1064 1065 .create_opts = &vpc_create_opts, 1066 .bdrv_has_zero_init = vpc_has_zero_init, 1067 }; 1068 1069 static void bdrv_vpc_init(void) 1070 { 1071 bdrv_register(&bdrv_vpc); 1072 } 1073 1074 block_init(bdrv_vpc_init); 1075