1 /* 2 * Block driver for the VMDK format 3 * 4 * Copyright (c) 2004 Fabrice Bellard 5 * Copyright (c) 2005 Filip Navara 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu-common.h" 27 #include "block_int.h" 28 #include "module.h" 29 #include "migration.h" 30 #include <zlib.h> 31 32 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D') 33 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V') 34 #define VMDK4_COMPRESSION_DEFLATE 1 35 #define VMDK4_FLAG_RGD (1 << 1) 36 #define VMDK4_FLAG_COMPRESS (1 << 16) 37 #define VMDK4_FLAG_MARKER (1 << 17) 38 #define VMDK4_GD_AT_END 0xffffffffffffffffULL 39 40 typedef struct { 41 uint32_t version; 42 uint32_t flags; 43 uint32_t disk_sectors; 44 uint32_t granularity; 45 uint32_t l1dir_offset; 46 uint32_t l1dir_size; 47 uint32_t file_sectors; 48 uint32_t cylinders; 49 uint32_t heads; 50 uint32_t sectors_per_track; 51 } VMDK3Header; 52 53 typedef struct { 54 uint32_t version; 55 uint32_t flags; 56 int64_t capacity; 57 int64_t granularity; 58 int64_t desc_offset; 59 int64_t desc_size; 60 int32_t num_gtes_per_gte; 61 int64_t rgd_offset; 62 int64_t gd_offset; 63 int64_t grain_offset; 64 char filler[1]; 65 char check_bytes[4]; 66 uint16_t compressAlgorithm; 67 } QEMU_PACKED VMDK4Header; 68 69 #define L2_CACHE_SIZE 16 70 71 typedef struct VmdkExtent { 72 BlockDriverState *file; 73 bool flat; 74 bool compressed; 75 bool has_marker; 76 int64_t sectors; 77 int64_t end_sector; 78 int64_t flat_start_offset; 79 int64_t l1_table_offset; 80 int64_t l1_backup_table_offset; 81 uint32_t *l1_table; 82 uint32_t *l1_backup_table; 83 unsigned int l1_size; 84 uint32_t l1_entry_sectors; 85 86 unsigned int l2_size; 87 uint32_t *l2_cache; 88 uint32_t l2_cache_offsets[L2_CACHE_SIZE]; 89 uint32_t l2_cache_counts[L2_CACHE_SIZE]; 90 91 unsigned int cluster_sectors; 92 } VmdkExtent; 93 94 typedef struct BDRVVmdkState { 95 CoMutex lock; 96 int desc_offset; 97 bool cid_updated; 98 uint32_t parent_cid; 99 int num_extents; 100 /* Extent array with num_extents entries, ascend ordered by address */ 101 VmdkExtent *extents; 102 Error *migration_blocker; 103 } BDRVVmdkState; 104 105 typedef struct VmdkMetaData { 106 uint32_t offset; 107 unsigned int l1_index; 108 unsigned int l2_index; 109 unsigned int l2_offset; 110 int valid; 111 } VmdkMetaData; 112 113 typedef struct VmdkGrainMarker { 114 uint64_t lba; 115 uint32_t size; 116 uint8_t data[0]; 117 } VmdkGrainMarker; 118 119 enum { 120 MARKER_END_OF_STREAM = 0, 121 MARKER_GRAIN_TABLE = 1, 122 MARKER_GRAIN_DIRECTORY = 2, 123 MARKER_FOOTER = 3, 124 }; 125 126 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename) 127 { 128 uint32_t magic; 129 130 if (buf_size < 4) { 131 return 0; 132 } 133 magic = be32_to_cpu(*(uint32_t *)buf); 134 if (magic == VMDK3_MAGIC || 135 magic == VMDK4_MAGIC) { 136 return 100; 137 } else { 138 const char *p = (const char *)buf; 139 const char *end = p + buf_size; 140 while (p < end) { 141 if (*p == '#') { 142 /* skip comment line */ 143 while (p < end && *p != '\n') { 144 p++; 145 } 146 p++; 147 continue; 148 } 149 if (*p == ' ') { 150 while (p < end && *p == ' ') { 151 p++; 152 } 153 /* skip '\r' if windows line endings used. */ 154 if (p < end && *p == '\r') { 155 p++; 156 } 157 /* only accept blank lines before 'version=' line */ 158 if (p == end || *p != '\n') { 159 return 0; 160 } 161 p++; 162 continue; 163 } 164 if (end - p >= strlen("version=X\n")) { 165 if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 || 166 strncmp("version=2\n", p, strlen("version=2\n")) == 0) { 167 return 100; 168 } 169 } 170 if (end - p >= strlen("version=X\r\n")) { 171 if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 || 172 strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) { 173 return 100; 174 } 175 } 176 return 0; 177 } 178 return 0; 179 } 180 } 181 182 #define CHECK_CID 1 183 184 #define SECTOR_SIZE 512 185 #define DESC_SIZE (20 * SECTOR_SIZE) /* 20 sectors of 512 bytes each */ 186 #define BUF_SIZE 4096 187 #define HEADER_SIZE 512 /* first sector of 512 bytes */ 188 189 static void vmdk_free_extents(BlockDriverState *bs) 190 { 191 int i; 192 BDRVVmdkState *s = bs->opaque; 193 VmdkExtent *e; 194 195 for (i = 0; i < s->num_extents; i++) { 196 e = &s->extents[i]; 197 g_free(e->l1_table); 198 g_free(e->l2_cache); 199 g_free(e->l1_backup_table); 200 if (e->file != bs->file) { 201 bdrv_delete(e->file); 202 } 203 } 204 g_free(s->extents); 205 } 206 207 static void vmdk_free_last_extent(BlockDriverState *bs) 208 { 209 BDRVVmdkState *s = bs->opaque; 210 211 if (s->num_extents == 0) { 212 return; 213 } 214 s->num_extents--; 215 s->extents = g_realloc(s->extents, s->num_extents * sizeof(VmdkExtent)); 216 } 217 218 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) 219 { 220 char desc[DESC_SIZE]; 221 uint32_t cid = 0xffffffff; 222 const char *p_name, *cid_str; 223 size_t cid_str_size; 224 BDRVVmdkState *s = bs->opaque; 225 int ret; 226 227 ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); 228 if (ret < 0) { 229 return 0; 230 } 231 232 if (parent) { 233 cid_str = "parentCID"; 234 cid_str_size = sizeof("parentCID"); 235 } else { 236 cid_str = "CID"; 237 cid_str_size = sizeof("CID"); 238 } 239 240 desc[DESC_SIZE - 1] = '\0'; 241 p_name = strstr(desc, cid_str); 242 if (p_name != NULL) { 243 p_name += cid_str_size; 244 sscanf(p_name, "%x", &cid); 245 } 246 247 return cid; 248 } 249 250 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) 251 { 252 char desc[DESC_SIZE], tmp_desc[DESC_SIZE]; 253 char *p_name, *tmp_str; 254 BDRVVmdkState *s = bs->opaque; 255 int ret; 256 257 ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); 258 if (ret < 0) { 259 return ret; 260 } 261 262 desc[DESC_SIZE - 1] = '\0'; 263 tmp_str = strstr(desc, "parentCID"); 264 if (tmp_str == NULL) { 265 return -EINVAL; 266 } 267 268 pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str); 269 p_name = strstr(desc, "CID"); 270 if (p_name != NULL) { 271 p_name += sizeof("CID"); 272 snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid); 273 pstrcat(desc, sizeof(desc), tmp_desc); 274 } 275 276 ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE); 277 if (ret < 0) { 278 return ret; 279 } 280 281 return 0; 282 } 283 284 static int vmdk_is_cid_valid(BlockDriverState *bs) 285 { 286 #ifdef CHECK_CID 287 BDRVVmdkState *s = bs->opaque; 288 BlockDriverState *p_bs = bs->backing_hd; 289 uint32_t cur_pcid; 290 291 if (p_bs) { 292 cur_pcid = vmdk_read_cid(p_bs, 0); 293 if (s->parent_cid != cur_pcid) { 294 /* CID not valid */ 295 return 0; 296 } 297 } 298 #endif 299 /* CID valid */ 300 return 1; 301 } 302 303 static int vmdk_parent_open(BlockDriverState *bs) 304 { 305 char *p_name; 306 char desc[DESC_SIZE + 1]; 307 BDRVVmdkState *s = bs->opaque; 308 int ret; 309 310 desc[DESC_SIZE] = '\0'; 311 ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); 312 if (ret < 0) { 313 return ret; 314 } 315 316 p_name = strstr(desc, "parentFileNameHint"); 317 if (p_name != NULL) { 318 char *end_name; 319 320 p_name += sizeof("parentFileNameHint") + 1; 321 end_name = strchr(p_name, '\"'); 322 if (end_name == NULL) { 323 return -EINVAL; 324 } 325 if ((end_name - p_name) > sizeof(bs->backing_file) - 1) { 326 return -EINVAL; 327 } 328 329 pstrcpy(bs->backing_file, end_name - p_name + 1, p_name); 330 } 331 332 return 0; 333 } 334 335 /* Create and append extent to the extent array. Return the added VmdkExtent 336 * address. return NULL if allocation failed. */ 337 static VmdkExtent *vmdk_add_extent(BlockDriverState *bs, 338 BlockDriverState *file, bool flat, int64_t sectors, 339 int64_t l1_offset, int64_t l1_backup_offset, 340 uint32_t l1_size, 341 int l2_size, unsigned int cluster_sectors) 342 { 343 VmdkExtent *extent; 344 BDRVVmdkState *s = bs->opaque; 345 346 s->extents = g_realloc(s->extents, 347 (s->num_extents + 1) * sizeof(VmdkExtent)); 348 extent = &s->extents[s->num_extents]; 349 s->num_extents++; 350 351 memset(extent, 0, sizeof(VmdkExtent)); 352 extent->file = file; 353 extent->flat = flat; 354 extent->sectors = sectors; 355 extent->l1_table_offset = l1_offset; 356 extent->l1_backup_table_offset = l1_backup_offset; 357 extent->l1_size = l1_size; 358 extent->l1_entry_sectors = l2_size * cluster_sectors; 359 extent->l2_size = l2_size; 360 extent->cluster_sectors = cluster_sectors; 361 362 if (s->num_extents > 1) { 363 extent->end_sector = (*(extent - 1)).end_sector + extent->sectors; 364 } else { 365 extent->end_sector = extent->sectors; 366 } 367 bs->total_sectors = extent->end_sector; 368 return extent; 369 } 370 371 static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent) 372 { 373 int ret; 374 int l1_size, i; 375 376 /* read the L1 table */ 377 l1_size = extent->l1_size * sizeof(uint32_t); 378 extent->l1_table = g_malloc(l1_size); 379 ret = bdrv_pread(extent->file, 380 extent->l1_table_offset, 381 extent->l1_table, 382 l1_size); 383 if (ret < 0) { 384 goto fail_l1; 385 } 386 for (i = 0; i < extent->l1_size; i++) { 387 le32_to_cpus(&extent->l1_table[i]); 388 } 389 390 if (extent->l1_backup_table_offset) { 391 extent->l1_backup_table = g_malloc(l1_size); 392 ret = bdrv_pread(extent->file, 393 extent->l1_backup_table_offset, 394 extent->l1_backup_table, 395 l1_size); 396 if (ret < 0) { 397 goto fail_l1b; 398 } 399 for (i = 0; i < extent->l1_size; i++) { 400 le32_to_cpus(&extent->l1_backup_table[i]); 401 } 402 } 403 404 extent->l2_cache = 405 g_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t)); 406 return 0; 407 fail_l1b: 408 g_free(extent->l1_backup_table); 409 fail_l1: 410 g_free(extent->l1_table); 411 return ret; 412 } 413 414 static int vmdk_open_vmdk3(BlockDriverState *bs, 415 BlockDriverState *file, 416 int flags) 417 { 418 int ret; 419 uint32_t magic; 420 VMDK3Header header; 421 VmdkExtent *extent; 422 423 ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); 424 if (ret < 0) { 425 return ret; 426 } 427 extent = vmdk_add_extent(bs, 428 bs->file, false, 429 le32_to_cpu(header.disk_sectors), 430 le32_to_cpu(header.l1dir_offset) << 9, 431 0, 1 << 6, 1 << 9, 432 le32_to_cpu(header.granularity)); 433 ret = vmdk_init_tables(bs, extent); 434 if (ret) { 435 /* free extent allocated by vmdk_add_extent */ 436 vmdk_free_last_extent(bs); 437 } 438 return ret; 439 } 440 441 static int vmdk_open_desc_file(BlockDriverState *bs, int flags, 442 int64_t desc_offset); 443 444 static int vmdk_open_vmdk4(BlockDriverState *bs, 445 BlockDriverState *file, 446 int flags) 447 { 448 int ret; 449 uint32_t magic; 450 uint32_t l1_size, l1_entry_sectors; 451 VMDK4Header header; 452 VmdkExtent *extent; 453 int64_t l1_backup_offset = 0; 454 455 ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); 456 if (ret < 0) { 457 return ret; 458 } 459 if (header.capacity == 0 && header.desc_offset) { 460 return vmdk_open_desc_file(bs, flags, header.desc_offset << 9); 461 } 462 463 if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) { 464 /* 465 * The footer takes precedence over the header, so read it in. The 466 * footer starts at offset -1024 from the end: One sector for the 467 * footer, and another one for the end-of-stream marker. 468 */ 469 struct { 470 struct { 471 uint64_t val; 472 uint32_t size; 473 uint32_t type; 474 uint8_t pad[512 - 16]; 475 } QEMU_PACKED footer_marker; 476 477 uint32_t magic; 478 VMDK4Header header; 479 uint8_t pad[512 - 4 - sizeof(VMDK4Header)]; 480 481 struct { 482 uint64_t val; 483 uint32_t size; 484 uint32_t type; 485 uint8_t pad[512 - 16]; 486 } QEMU_PACKED eos_marker; 487 } QEMU_PACKED footer; 488 489 ret = bdrv_pread(file, 490 bs->file->total_sectors * 512 - 1536, 491 &footer, sizeof(footer)); 492 if (ret < 0) { 493 return ret; 494 } 495 496 /* Some sanity checks for the footer */ 497 if (be32_to_cpu(footer.magic) != VMDK4_MAGIC || 498 le32_to_cpu(footer.footer_marker.size) != 0 || 499 le32_to_cpu(footer.footer_marker.type) != MARKER_FOOTER || 500 le64_to_cpu(footer.eos_marker.val) != 0 || 501 le32_to_cpu(footer.eos_marker.size) != 0 || 502 le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM) 503 { 504 return -EINVAL; 505 } 506 507 header = footer.header; 508 } 509 510 l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte) 511 * le64_to_cpu(header.granularity); 512 if (l1_entry_sectors == 0) { 513 return -EINVAL; 514 } 515 l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1) 516 / l1_entry_sectors; 517 if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) { 518 l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9; 519 } 520 extent = vmdk_add_extent(bs, file, false, 521 le64_to_cpu(header.capacity), 522 le64_to_cpu(header.gd_offset) << 9, 523 l1_backup_offset, 524 l1_size, 525 le32_to_cpu(header.num_gtes_per_gte), 526 le64_to_cpu(header.granularity)); 527 extent->compressed = 528 le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE; 529 extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER; 530 ret = vmdk_init_tables(bs, extent); 531 if (ret) { 532 /* free extent allocated by vmdk_add_extent */ 533 vmdk_free_last_extent(bs); 534 } 535 return ret; 536 } 537 538 /* find an option value out of descriptor file */ 539 static int vmdk_parse_description(const char *desc, const char *opt_name, 540 char *buf, int buf_size) 541 { 542 char *opt_pos, *opt_end; 543 const char *end = desc + strlen(desc); 544 545 opt_pos = strstr(desc, opt_name); 546 if (!opt_pos) { 547 return -1; 548 } 549 /* Skip "=\"" following opt_name */ 550 opt_pos += strlen(opt_name) + 2; 551 if (opt_pos >= end) { 552 return -1; 553 } 554 opt_end = opt_pos; 555 while (opt_end < end && *opt_end != '"') { 556 opt_end++; 557 } 558 if (opt_end == end || buf_size < opt_end - opt_pos + 1) { 559 return -1; 560 } 561 pstrcpy(buf, opt_end - opt_pos + 1, opt_pos); 562 return 0; 563 } 564 565 /* Open an extent file and append to bs array */ 566 static int vmdk_open_sparse(BlockDriverState *bs, 567 BlockDriverState *file, 568 int flags) 569 { 570 uint32_t magic; 571 572 if (bdrv_pread(file, 0, &magic, sizeof(magic)) != sizeof(magic)) { 573 return -EIO; 574 } 575 576 magic = be32_to_cpu(magic); 577 switch (magic) { 578 case VMDK3_MAGIC: 579 return vmdk_open_vmdk3(bs, file, flags); 580 break; 581 case VMDK4_MAGIC: 582 return vmdk_open_vmdk4(bs, file, flags); 583 break; 584 default: 585 return -EINVAL; 586 break; 587 } 588 } 589 590 static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, 591 const char *desc_file_path) 592 { 593 int ret; 594 char access[11]; 595 char type[11]; 596 char fname[512]; 597 const char *p = desc; 598 int64_t sectors = 0; 599 int64_t flat_offset; 600 char extent_path[PATH_MAX]; 601 BlockDriverState *extent_file; 602 603 while (*p) { 604 /* parse extent line: 605 * RW [size in sectors] FLAT "file-name.vmdk" OFFSET 606 * or 607 * RW [size in sectors] SPARSE "file-name.vmdk" 608 */ 609 flat_offset = -1; 610 ret = sscanf(p, "%10s %" SCNd64 " %10s %511s %" SCNd64, 611 access, §ors, type, fname, &flat_offset); 612 if (ret < 4 || strcmp(access, "RW")) { 613 goto next_line; 614 } else if (!strcmp(type, "FLAT")) { 615 if (ret != 5 || flat_offset < 0) { 616 return -EINVAL; 617 } 618 } else if (ret != 4) { 619 return -EINVAL; 620 } 621 622 /* trim the quotation marks around */ 623 if (fname[0] == '"') { 624 memmove(fname, fname + 1, strlen(fname)); 625 if (strlen(fname) <= 1 || fname[strlen(fname) - 1] != '"') { 626 return -EINVAL; 627 } 628 fname[strlen(fname) - 1] = '\0'; 629 } 630 if (sectors <= 0 || 631 (strcmp(type, "FLAT") && strcmp(type, "SPARSE")) || 632 (strcmp(access, "RW"))) { 633 goto next_line; 634 } 635 636 path_combine(extent_path, sizeof(extent_path), 637 desc_file_path, fname); 638 ret = bdrv_file_open(&extent_file, extent_path, bs->open_flags); 639 if (ret) { 640 return ret; 641 } 642 643 /* save to extents array */ 644 if (!strcmp(type, "FLAT")) { 645 /* FLAT extent */ 646 VmdkExtent *extent; 647 648 extent = vmdk_add_extent(bs, extent_file, true, sectors, 649 0, 0, 0, 0, sectors); 650 extent->flat_start_offset = flat_offset << 9; 651 } else if (!strcmp(type, "SPARSE")) { 652 /* SPARSE extent */ 653 ret = vmdk_open_sparse(bs, extent_file, bs->open_flags); 654 if (ret) { 655 bdrv_delete(extent_file); 656 return ret; 657 } 658 } else { 659 fprintf(stderr, 660 "VMDK: Not supported extent type \"%s\""".\n", type); 661 return -ENOTSUP; 662 } 663 next_line: 664 /* move to next line */ 665 while (*p && *p != '\n') { 666 p++; 667 } 668 p++; 669 } 670 return 0; 671 } 672 673 static int vmdk_open_desc_file(BlockDriverState *bs, int flags, 674 int64_t desc_offset) 675 { 676 int ret; 677 char buf[2048]; 678 char ct[128]; 679 BDRVVmdkState *s = bs->opaque; 680 681 ret = bdrv_pread(bs->file, desc_offset, buf, sizeof(buf)); 682 if (ret < 0) { 683 return ret; 684 } 685 buf[2047] = '\0'; 686 if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) { 687 return -EINVAL; 688 } 689 if (strcmp(ct, "monolithicFlat") && 690 strcmp(ct, "twoGbMaxExtentSparse") && 691 strcmp(ct, "twoGbMaxExtentFlat")) { 692 fprintf(stderr, 693 "VMDK: Not supported image type \"%s\""".\n", ct); 694 return -ENOTSUP; 695 } 696 s->desc_offset = 0; 697 return vmdk_parse_extents(buf, bs, bs->file->filename); 698 } 699 700 static int vmdk_open(BlockDriverState *bs, int flags) 701 { 702 int ret; 703 BDRVVmdkState *s = bs->opaque; 704 705 if (vmdk_open_sparse(bs, bs->file, flags) == 0) { 706 s->desc_offset = 0x200; 707 } else { 708 ret = vmdk_open_desc_file(bs, flags, 0); 709 if (ret) { 710 goto fail; 711 } 712 } 713 /* try to open parent images, if exist */ 714 ret = vmdk_parent_open(bs); 715 if (ret) { 716 goto fail; 717 } 718 s->parent_cid = vmdk_read_cid(bs, 1); 719 qemu_co_mutex_init(&s->lock); 720 721 /* Disable migration when VMDK images are used */ 722 error_set(&s->migration_blocker, 723 QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED, 724 "vmdk", bs->device_name, "live migration"); 725 migrate_add_blocker(s->migration_blocker); 726 727 return 0; 728 729 fail: 730 vmdk_free_extents(bs); 731 return ret; 732 } 733 734 static int get_whole_cluster(BlockDriverState *bs, 735 VmdkExtent *extent, 736 uint64_t cluster_offset, 737 uint64_t offset, 738 bool allocate) 739 { 740 /* 128 sectors * 512 bytes each = grain size 64KB */ 741 uint8_t whole_grain[extent->cluster_sectors * 512]; 742 743 /* we will be here if it's first write on non-exist grain(cluster). 744 * try to read from parent image, if exist */ 745 if (bs->backing_hd) { 746 int ret; 747 748 if (!vmdk_is_cid_valid(bs)) { 749 return -1; 750 } 751 752 /* floor offset to cluster */ 753 offset -= offset % (extent->cluster_sectors * 512); 754 ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain, 755 extent->cluster_sectors); 756 if (ret < 0) { 757 return -1; 758 } 759 760 /* Write grain only into the active image */ 761 ret = bdrv_write(extent->file, cluster_offset, whole_grain, 762 extent->cluster_sectors); 763 if (ret < 0) { 764 return -1; 765 } 766 } 767 return 0; 768 } 769 770 static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) 771 { 772 /* update L2 table */ 773 if (bdrv_pwrite_sync( 774 extent->file, 775 ((int64_t)m_data->l2_offset * 512) 776 + (m_data->l2_index * sizeof(m_data->offset)), 777 &(m_data->offset), 778 sizeof(m_data->offset) 779 ) < 0) { 780 return -1; 781 } 782 /* update backup L2 table */ 783 if (extent->l1_backup_table_offset != 0) { 784 m_data->l2_offset = extent->l1_backup_table[m_data->l1_index]; 785 if (bdrv_pwrite_sync( 786 extent->file, 787 ((int64_t)m_data->l2_offset * 512) 788 + (m_data->l2_index * sizeof(m_data->offset)), 789 &(m_data->offset), sizeof(m_data->offset) 790 ) < 0) { 791 return -1; 792 } 793 } 794 795 return 0; 796 } 797 798 static int get_cluster_offset(BlockDriverState *bs, 799 VmdkExtent *extent, 800 VmdkMetaData *m_data, 801 uint64_t offset, 802 int allocate, 803 uint64_t *cluster_offset) 804 { 805 unsigned int l1_index, l2_offset, l2_index; 806 int min_index, i, j; 807 uint32_t min_count, *l2_table, tmp = 0; 808 809 if (m_data) { 810 m_data->valid = 0; 811 } 812 if (extent->flat) { 813 *cluster_offset = extent->flat_start_offset; 814 return 0; 815 } 816 817 offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE; 818 l1_index = (offset >> 9) / extent->l1_entry_sectors; 819 if (l1_index >= extent->l1_size) { 820 return -1; 821 } 822 l2_offset = extent->l1_table[l1_index]; 823 if (!l2_offset) { 824 return -1; 825 } 826 for (i = 0; i < L2_CACHE_SIZE; i++) { 827 if (l2_offset == extent->l2_cache_offsets[i]) { 828 /* increment the hit count */ 829 if (++extent->l2_cache_counts[i] == 0xffffffff) { 830 for (j = 0; j < L2_CACHE_SIZE; j++) { 831 extent->l2_cache_counts[j] >>= 1; 832 } 833 } 834 l2_table = extent->l2_cache + (i * extent->l2_size); 835 goto found; 836 } 837 } 838 /* not found: load a new entry in the least used one */ 839 min_index = 0; 840 min_count = 0xffffffff; 841 for (i = 0; i < L2_CACHE_SIZE; i++) { 842 if (extent->l2_cache_counts[i] < min_count) { 843 min_count = extent->l2_cache_counts[i]; 844 min_index = i; 845 } 846 } 847 l2_table = extent->l2_cache + (min_index * extent->l2_size); 848 if (bdrv_pread( 849 extent->file, 850 (int64_t)l2_offset * 512, 851 l2_table, 852 extent->l2_size * sizeof(uint32_t) 853 ) != extent->l2_size * sizeof(uint32_t)) { 854 return -1; 855 } 856 857 extent->l2_cache_offsets[min_index] = l2_offset; 858 extent->l2_cache_counts[min_index] = 1; 859 found: 860 l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size; 861 *cluster_offset = le32_to_cpu(l2_table[l2_index]); 862 863 if (!*cluster_offset) { 864 if (!allocate) { 865 return -1; 866 } 867 868 /* Avoid the L2 tables update for the images that have snapshots. */ 869 *cluster_offset = bdrv_getlength(extent->file); 870 if (!extent->compressed) { 871 bdrv_truncate( 872 extent->file, 873 *cluster_offset + (extent->cluster_sectors << 9) 874 ); 875 } 876 877 *cluster_offset >>= 9; 878 tmp = cpu_to_le32(*cluster_offset); 879 l2_table[l2_index] = tmp; 880 881 /* First of all we write grain itself, to avoid race condition 882 * that may to corrupt the image. 883 * This problem may occur because of insufficient space on host disk 884 * or inappropriate VM shutdown. 885 */ 886 if (get_whole_cluster( 887 bs, extent, *cluster_offset, offset, allocate) == -1) { 888 return -1; 889 } 890 891 if (m_data) { 892 m_data->offset = tmp; 893 m_data->l1_index = l1_index; 894 m_data->l2_index = l2_index; 895 m_data->l2_offset = l2_offset; 896 m_data->valid = 1; 897 } 898 } 899 *cluster_offset <<= 9; 900 return 0; 901 } 902 903 static VmdkExtent *find_extent(BDRVVmdkState *s, 904 int64_t sector_num, VmdkExtent *start_hint) 905 { 906 VmdkExtent *extent = start_hint; 907 908 if (!extent) { 909 extent = &s->extents[0]; 910 } 911 while (extent < &s->extents[s->num_extents]) { 912 if (sector_num < extent->end_sector) { 913 return extent; 914 } 915 extent++; 916 } 917 return NULL; 918 } 919 920 static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs, 921 int64_t sector_num, int nb_sectors, int *pnum) 922 { 923 BDRVVmdkState *s = bs->opaque; 924 int64_t index_in_cluster, n, ret; 925 uint64_t offset; 926 VmdkExtent *extent; 927 928 extent = find_extent(s, sector_num, NULL); 929 if (!extent) { 930 return 0; 931 } 932 qemu_co_mutex_lock(&s->lock); 933 ret = get_cluster_offset(bs, extent, NULL, 934 sector_num * 512, 0, &offset); 935 qemu_co_mutex_unlock(&s->lock); 936 /* get_cluster_offset returning 0 means success */ 937 ret = !ret; 938 939 index_in_cluster = sector_num % extent->cluster_sectors; 940 n = extent->cluster_sectors - index_in_cluster; 941 if (n > nb_sectors) { 942 n = nb_sectors; 943 } 944 *pnum = n; 945 return ret; 946 } 947 948 static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, 949 int64_t offset_in_cluster, const uint8_t *buf, 950 int nb_sectors, int64_t sector_num) 951 { 952 int ret; 953 VmdkGrainMarker *data = NULL; 954 uLongf buf_len; 955 const uint8_t *write_buf = buf; 956 int write_len = nb_sectors * 512; 957 958 if (extent->compressed) { 959 if (!extent->has_marker) { 960 ret = -EINVAL; 961 goto out; 962 } 963 buf_len = (extent->cluster_sectors << 9) * 2; 964 data = g_malloc(buf_len + sizeof(VmdkGrainMarker)); 965 if (compress(data->data, &buf_len, buf, nb_sectors << 9) != Z_OK || 966 buf_len == 0) { 967 ret = -EINVAL; 968 goto out; 969 } 970 data->lba = sector_num; 971 data->size = buf_len; 972 write_buf = (uint8_t *)data; 973 write_len = buf_len + sizeof(VmdkGrainMarker); 974 } 975 ret = bdrv_pwrite(extent->file, 976 cluster_offset + offset_in_cluster, 977 write_buf, 978 write_len); 979 if (ret != write_len) { 980 ret = ret < 0 ? ret : -EIO; 981 goto out; 982 } 983 ret = 0; 984 out: 985 g_free(data); 986 return ret; 987 } 988 989 static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset, 990 int64_t offset_in_cluster, uint8_t *buf, 991 int nb_sectors) 992 { 993 int ret; 994 int cluster_bytes, buf_bytes; 995 uint8_t *cluster_buf, *compressed_data; 996 uint8_t *uncomp_buf; 997 uint32_t data_len; 998 VmdkGrainMarker *marker; 999 uLongf buf_len; 1000 1001 1002 if (!extent->compressed) { 1003 ret = bdrv_pread(extent->file, 1004 cluster_offset + offset_in_cluster, 1005 buf, nb_sectors * 512); 1006 if (ret == nb_sectors * 512) { 1007 return 0; 1008 } else { 1009 return -EIO; 1010 } 1011 } 1012 cluster_bytes = extent->cluster_sectors * 512; 1013 /* Read two clusters in case GrainMarker + compressed data > one cluster */ 1014 buf_bytes = cluster_bytes * 2; 1015 cluster_buf = g_malloc(buf_bytes); 1016 uncomp_buf = g_malloc(cluster_bytes); 1017 ret = bdrv_pread(extent->file, 1018 cluster_offset, 1019 cluster_buf, buf_bytes); 1020 if (ret < 0) { 1021 goto out; 1022 } 1023 compressed_data = cluster_buf; 1024 buf_len = cluster_bytes; 1025 data_len = cluster_bytes; 1026 if (extent->has_marker) { 1027 marker = (VmdkGrainMarker *)cluster_buf; 1028 compressed_data = marker->data; 1029 data_len = le32_to_cpu(marker->size); 1030 } 1031 if (!data_len || data_len > buf_bytes) { 1032 ret = -EINVAL; 1033 goto out; 1034 } 1035 ret = uncompress(uncomp_buf, &buf_len, compressed_data, data_len); 1036 if (ret != Z_OK) { 1037 ret = -EINVAL; 1038 goto out; 1039 1040 } 1041 if (offset_in_cluster < 0 || 1042 offset_in_cluster + nb_sectors * 512 > buf_len) { 1043 ret = -EINVAL; 1044 goto out; 1045 } 1046 memcpy(buf, uncomp_buf + offset_in_cluster, nb_sectors * 512); 1047 ret = 0; 1048 1049 out: 1050 g_free(uncomp_buf); 1051 g_free(cluster_buf); 1052 return ret; 1053 } 1054 1055 static int vmdk_read(BlockDriverState *bs, int64_t sector_num, 1056 uint8_t *buf, int nb_sectors) 1057 { 1058 BDRVVmdkState *s = bs->opaque; 1059 int ret; 1060 uint64_t n, index_in_cluster; 1061 VmdkExtent *extent = NULL; 1062 uint64_t cluster_offset; 1063 1064 while (nb_sectors > 0) { 1065 extent = find_extent(s, sector_num, extent); 1066 if (!extent) { 1067 return -EIO; 1068 } 1069 ret = get_cluster_offset( 1070 bs, extent, NULL, 1071 sector_num << 9, 0, &cluster_offset); 1072 index_in_cluster = sector_num % extent->cluster_sectors; 1073 n = extent->cluster_sectors - index_in_cluster; 1074 if (n > nb_sectors) { 1075 n = nb_sectors; 1076 } 1077 if (ret) { 1078 /* if not allocated, try to read from parent image, if exist */ 1079 if (bs->backing_hd) { 1080 if (!vmdk_is_cid_valid(bs)) { 1081 return -EINVAL; 1082 } 1083 ret = bdrv_read(bs->backing_hd, sector_num, buf, n); 1084 if (ret < 0) { 1085 return ret; 1086 } 1087 } else { 1088 memset(buf, 0, 512 * n); 1089 } 1090 } else { 1091 ret = vmdk_read_extent(extent, 1092 cluster_offset, index_in_cluster * 512, 1093 buf, n); 1094 if (ret) { 1095 return ret; 1096 } 1097 } 1098 nb_sectors -= n; 1099 sector_num += n; 1100 buf += n * 512; 1101 } 1102 return 0; 1103 } 1104 1105 static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num, 1106 uint8_t *buf, int nb_sectors) 1107 { 1108 int ret; 1109 BDRVVmdkState *s = bs->opaque; 1110 qemu_co_mutex_lock(&s->lock); 1111 ret = vmdk_read(bs, sector_num, buf, nb_sectors); 1112 qemu_co_mutex_unlock(&s->lock); 1113 return ret; 1114 } 1115 1116 static int vmdk_write(BlockDriverState *bs, int64_t sector_num, 1117 const uint8_t *buf, int nb_sectors) 1118 { 1119 BDRVVmdkState *s = bs->opaque; 1120 VmdkExtent *extent = NULL; 1121 int n, ret; 1122 int64_t index_in_cluster; 1123 uint64_t cluster_offset; 1124 VmdkMetaData m_data; 1125 1126 if (sector_num > bs->total_sectors) { 1127 fprintf(stderr, 1128 "(VMDK) Wrong offset: sector_num=0x%" PRIx64 1129 " total_sectors=0x%" PRIx64 "\n", 1130 sector_num, bs->total_sectors); 1131 return -EIO; 1132 } 1133 1134 while (nb_sectors > 0) { 1135 extent = find_extent(s, sector_num, extent); 1136 if (!extent) { 1137 return -EIO; 1138 } 1139 ret = get_cluster_offset( 1140 bs, 1141 extent, 1142 &m_data, 1143 sector_num << 9, !extent->compressed, 1144 &cluster_offset); 1145 if (extent->compressed) { 1146 if (ret == 0) { 1147 /* Refuse write to allocated cluster for streamOptimized */ 1148 fprintf(stderr, 1149 "VMDK: can't write to allocated cluster" 1150 " for streamOptimized\n"); 1151 return -EIO; 1152 } else { 1153 /* allocate */ 1154 ret = get_cluster_offset( 1155 bs, 1156 extent, 1157 &m_data, 1158 sector_num << 9, 1, 1159 &cluster_offset); 1160 } 1161 } 1162 if (ret) { 1163 return -EINVAL; 1164 } 1165 index_in_cluster = sector_num % extent->cluster_sectors; 1166 n = extent->cluster_sectors - index_in_cluster; 1167 if (n > nb_sectors) { 1168 n = nb_sectors; 1169 } 1170 1171 ret = vmdk_write_extent(extent, 1172 cluster_offset, index_in_cluster * 512, 1173 buf, n, sector_num); 1174 if (ret) { 1175 return ret; 1176 } 1177 if (m_data.valid) { 1178 /* update L2 tables */ 1179 if (vmdk_L2update(extent, &m_data) == -1) { 1180 return -EIO; 1181 } 1182 } 1183 nb_sectors -= n; 1184 sector_num += n; 1185 buf += n * 512; 1186 1187 /* update CID on the first write every time the virtual disk is 1188 * opened */ 1189 if (!s->cid_updated) { 1190 ret = vmdk_write_cid(bs, time(NULL)); 1191 if (ret < 0) { 1192 return ret; 1193 } 1194 s->cid_updated = true; 1195 } 1196 } 1197 return 0; 1198 } 1199 1200 static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num, 1201 const uint8_t *buf, int nb_sectors) 1202 { 1203 int ret; 1204 BDRVVmdkState *s = bs->opaque; 1205 qemu_co_mutex_lock(&s->lock); 1206 ret = vmdk_write(bs, sector_num, buf, nb_sectors); 1207 qemu_co_mutex_unlock(&s->lock); 1208 return ret; 1209 } 1210 1211 1212 static int vmdk_create_extent(const char *filename, int64_t filesize, 1213 bool flat, bool compress) 1214 { 1215 int ret, i; 1216 int fd = 0; 1217 VMDK4Header header; 1218 uint32_t tmp, magic, grains, gd_size, gt_size, gt_count; 1219 1220 fd = qemu_open(filename, 1221 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 1222 0644); 1223 if (fd < 0) { 1224 return -errno; 1225 } 1226 if (flat) { 1227 ret = ftruncate(fd, filesize); 1228 if (ret < 0) { 1229 ret = -errno; 1230 } 1231 goto exit; 1232 } 1233 magic = cpu_to_be32(VMDK4_MAGIC); 1234 memset(&header, 0, sizeof(header)); 1235 header.version = 1; 1236 header.flags = 1237 3 | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0); 1238 header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0; 1239 header.capacity = filesize / 512; 1240 header.granularity = 128; 1241 header.num_gtes_per_gte = 512; 1242 1243 grains = (filesize / 512 + header.granularity - 1) / header.granularity; 1244 gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9; 1245 gt_count = 1246 (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte; 1247 gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9; 1248 1249 header.desc_offset = 1; 1250 header.desc_size = 20; 1251 header.rgd_offset = header.desc_offset + header.desc_size; 1252 header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count); 1253 header.grain_offset = 1254 ((header.gd_offset + gd_size + (gt_size * gt_count) + 1255 header.granularity - 1) / header.granularity) * 1256 header.granularity; 1257 /* swap endianness for all header fields */ 1258 header.version = cpu_to_le32(header.version); 1259 header.flags = cpu_to_le32(header.flags); 1260 header.capacity = cpu_to_le64(header.capacity); 1261 header.granularity = cpu_to_le64(header.granularity); 1262 header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte); 1263 header.desc_offset = cpu_to_le64(header.desc_offset); 1264 header.desc_size = cpu_to_le64(header.desc_size); 1265 header.rgd_offset = cpu_to_le64(header.rgd_offset); 1266 header.gd_offset = cpu_to_le64(header.gd_offset); 1267 header.grain_offset = cpu_to_le64(header.grain_offset); 1268 header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm); 1269 1270 header.check_bytes[0] = 0xa; 1271 header.check_bytes[1] = 0x20; 1272 header.check_bytes[2] = 0xd; 1273 header.check_bytes[3] = 0xa; 1274 1275 /* write all the data */ 1276 ret = qemu_write_full(fd, &magic, sizeof(magic)); 1277 if (ret != sizeof(magic)) { 1278 ret = -errno; 1279 goto exit; 1280 } 1281 ret = qemu_write_full(fd, &header, sizeof(header)); 1282 if (ret != sizeof(header)) { 1283 ret = -errno; 1284 goto exit; 1285 } 1286 1287 ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9); 1288 if (ret < 0) { 1289 ret = -errno; 1290 goto exit; 1291 } 1292 1293 /* write grain directory */ 1294 lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET); 1295 for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size; 1296 i < gt_count; i++, tmp += gt_size) { 1297 ret = qemu_write_full(fd, &tmp, sizeof(tmp)); 1298 if (ret != sizeof(tmp)) { 1299 ret = -errno; 1300 goto exit; 1301 } 1302 } 1303 1304 /* write backup grain directory */ 1305 lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET); 1306 for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size; 1307 i < gt_count; i++, tmp += gt_size) { 1308 ret = qemu_write_full(fd, &tmp, sizeof(tmp)); 1309 if (ret != sizeof(tmp)) { 1310 ret = -errno; 1311 goto exit; 1312 } 1313 } 1314 1315 ret = 0; 1316 exit: 1317 qemu_close(fd); 1318 return ret; 1319 } 1320 1321 static int filename_decompose(const char *filename, char *path, char *prefix, 1322 char *postfix, size_t buf_len) 1323 { 1324 const char *p, *q; 1325 1326 if (filename == NULL || !strlen(filename)) { 1327 fprintf(stderr, "Vmdk: no filename provided.\n"); 1328 return -1; 1329 } 1330 p = strrchr(filename, '/'); 1331 if (p == NULL) { 1332 p = strrchr(filename, '\\'); 1333 } 1334 if (p == NULL) { 1335 p = strrchr(filename, ':'); 1336 } 1337 if (p != NULL) { 1338 p++; 1339 if (p - filename >= buf_len) { 1340 return -1; 1341 } 1342 pstrcpy(path, p - filename + 1, filename); 1343 } else { 1344 p = filename; 1345 path[0] = '\0'; 1346 } 1347 q = strrchr(p, '.'); 1348 if (q == NULL) { 1349 pstrcpy(prefix, buf_len, p); 1350 postfix[0] = '\0'; 1351 } else { 1352 if (q - p >= buf_len) { 1353 return -1; 1354 } 1355 pstrcpy(prefix, q - p + 1, p); 1356 pstrcpy(postfix, buf_len, q); 1357 } 1358 return 0; 1359 } 1360 1361 static int relative_path(char *dest, int dest_size, 1362 const char *base, const char *target) 1363 { 1364 int i = 0; 1365 int n = 0; 1366 const char *p, *q; 1367 #ifdef _WIN32 1368 const char *sep = "\\"; 1369 #else 1370 const char *sep = "/"; 1371 #endif 1372 1373 if (!(dest && base && target)) { 1374 return -1; 1375 } 1376 if (path_is_absolute(target)) { 1377 dest[dest_size - 1] = '\0'; 1378 strncpy(dest, target, dest_size - 1); 1379 return 0; 1380 } 1381 while (base[i] == target[i]) { 1382 i++; 1383 } 1384 p = &base[i]; 1385 q = &target[i]; 1386 while (*p) { 1387 if (*p == *sep) { 1388 n++; 1389 } 1390 p++; 1391 } 1392 dest[0] = '\0'; 1393 for (; n; n--) { 1394 pstrcat(dest, dest_size, ".."); 1395 pstrcat(dest, dest_size, sep); 1396 } 1397 pstrcat(dest, dest_size, q); 1398 return 0; 1399 } 1400 1401 static int vmdk_create(const char *filename, QEMUOptionParameter *options) 1402 { 1403 int fd, idx = 0; 1404 char desc[BUF_SIZE]; 1405 int64_t total_size = 0, filesize; 1406 const char *backing_file = NULL; 1407 const char *fmt = NULL; 1408 int flags = 0; 1409 int ret = 0; 1410 bool flat, split, compress; 1411 char ext_desc_lines[BUF_SIZE] = ""; 1412 char path[PATH_MAX], prefix[PATH_MAX], postfix[PATH_MAX]; 1413 const int64_t split_size = 0x80000000; /* VMDK has constant split size */ 1414 const char *desc_extent_line; 1415 char parent_desc_line[BUF_SIZE] = ""; 1416 uint32_t parent_cid = 0xffffffff; 1417 const char desc_template[] = 1418 "# Disk DescriptorFile\n" 1419 "version=1\n" 1420 "CID=%x\n" 1421 "parentCID=%x\n" 1422 "createType=\"%s\"\n" 1423 "%s" 1424 "\n" 1425 "# Extent description\n" 1426 "%s" 1427 "\n" 1428 "# The Disk Data Base\n" 1429 "#DDB\n" 1430 "\n" 1431 "ddb.virtualHWVersion = \"%d\"\n" 1432 "ddb.geometry.cylinders = \"%" PRId64 "\"\n" 1433 "ddb.geometry.heads = \"16\"\n" 1434 "ddb.geometry.sectors = \"63\"\n" 1435 "ddb.adapterType = \"ide\"\n"; 1436 1437 if (filename_decompose(filename, path, prefix, postfix, PATH_MAX)) { 1438 return -EINVAL; 1439 } 1440 /* Read out options */ 1441 while (options && options->name) { 1442 if (!strcmp(options->name, BLOCK_OPT_SIZE)) { 1443 total_size = options->value.n; 1444 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { 1445 backing_file = options->value.s; 1446 } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) { 1447 flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0; 1448 } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) { 1449 fmt = options->value.s; 1450 } 1451 options++; 1452 } 1453 if (!fmt) { 1454 /* Default format to monolithicSparse */ 1455 fmt = "monolithicSparse"; 1456 } else if (strcmp(fmt, "monolithicFlat") && 1457 strcmp(fmt, "monolithicSparse") && 1458 strcmp(fmt, "twoGbMaxExtentSparse") && 1459 strcmp(fmt, "twoGbMaxExtentFlat") && 1460 strcmp(fmt, "streamOptimized")) { 1461 fprintf(stderr, "VMDK: Unknown subformat: %s\n", fmt); 1462 return -EINVAL; 1463 } 1464 split = !(strcmp(fmt, "twoGbMaxExtentFlat") && 1465 strcmp(fmt, "twoGbMaxExtentSparse")); 1466 flat = !(strcmp(fmt, "monolithicFlat") && 1467 strcmp(fmt, "twoGbMaxExtentFlat")); 1468 compress = !strcmp(fmt, "streamOptimized"); 1469 if (flat) { 1470 desc_extent_line = "RW %lld FLAT \"%s\" 0\n"; 1471 } else { 1472 desc_extent_line = "RW %lld SPARSE \"%s\"\n"; 1473 } 1474 if (flat && backing_file) { 1475 /* not supporting backing file for flat image */ 1476 return -ENOTSUP; 1477 } 1478 if (backing_file) { 1479 char parent_filename[PATH_MAX]; 1480 BlockDriverState *bs = bdrv_new(""); 1481 ret = bdrv_open(bs, backing_file, 0, NULL); 1482 if (ret != 0) { 1483 bdrv_delete(bs); 1484 return ret; 1485 } 1486 if (strcmp(bs->drv->format_name, "vmdk")) { 1487 bdrv_delete(bs); 1488 return -EINVAL; 1489 } 1490 parent_cid = vmdk_read_cid(bs, 0); 1491 bdrv_delete(bs); 1492 relative_path(parent_filename, sizeof(parent_filename), 1493 filename, backing_file); 1494 snprintf(parent_desc_line, sizeof(parent_desc_line), 1495 "parentFileNameHint=\"%s\"", parent_filename); 1496 } 1497 1498 /* Create extents */ 1499 filesize = total_size; 1500 while (filesize > 0) { 1501 char desc_line[BUF_SIZE]; 1502 char ext_filename[PATH_MAX]; 1503 char desc_filename[PATH_MAX]; 1504 int64_t size = filesize; 1505 1506 if (split && size > split_size) { 1507 size = split_size; 1508 } 1509 if (split) { 1510 snprintf(desc_filename, sizeof(desc_filename), "%s-%c%03d%s", 1511 prefix, flat ? 'f' : 's', ++idx, postfix); 1512 } else if (flat) { 1513 snprintf(desc_filename, sizeof(desc_filename), "%s-flat%s", 1514 prefix, postfix); 1515 } else { 1516 snprintf(desc_filename, sizeof(desc_filename), "%s%s", 1517 prefix, postfix); 1518 } 1519 snprintf(ext_filename, sizeof(ext_filename), "%s%s", 1520 path, desc_filename); 1521 1522 if (vmdk_create_extent(ext_filename, size, flat, compress)) { 1523 return -EINVAL; 1524 } 1525 filesize -= size; 1526 1527 /* Format description line */ 1528 snprintf(desc_line, sizeof(desc_line), 1529 desc_extent_line, size / 512, desc_filename); 1530 pstrcat(ext_desc_lines, sizeof(ext_desc_lines), desc_line); 1531 } 1532 /* generate descriptor file */ 1533 snprintf(desc, sizeof(desc), desc_template, 1534 (unsigned int)time(NULL), 1535 parent_cid, 1536 fmt, 1537 parent_desc_line, 1538 ext_desc_lines, 1539 (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4), 1540 total_size / (int64_t)(63 * 16 * 512)); 1541 if (split || flat) { 1542 fd = qemu_open(filename, 1543 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 1544 0644); 1545 } else { 1546 fd = qemu_open(filename, 1547 O_WRONLY | O_BINARY | O_LARGEFILE, 1548 0644); 1549 } 1550 if (fd < 0) { 1551 return -errno; 1552 } 1553 /* the descriptor offset = 0x200 */ 1554 if (!split && !flat && 0x200 != lseek(fd, 0x200, SEEK_SET)) { 1555 ret = -errno; 1556 goto exit; 1557 } 1558 ret = qemu_write_full(fd, desc, strlen(desc)); 1559 if (ret != strlen(desc)) { 1560 ret = -errno; 1561 goto exit; 1562 } 1563 ret = 0; 1564 exit: 1565 qemu_close(fd); 1566 return ret; 1567 } 1568 1569 static void vmdk_close(BlockDriverState *bs) 1570 { 1571 BDRVVmdkState *s = bs->opaque; 1572 1573 vmdk_free_extents(bs); 1574 1575 migrate_del_blocker(s->migration_blocker); 1576 error_free(s->migration_blocker); 1577 } 1578 1579 static coroutine_fn int vmdk_co_flush(BlockDriverState *bs) 1580 { 1581 BDRVVmdkState *s = bs->opaque; 1582 int i, err; 1583 int ret = 0; 1584 1585 for (i = 0; i < s->num_extents; i++) { 1586 err = bdrv_co_flush(s->extents[i].file); 1587 if (err < 0) { 1588 ret = err; 1589 } 1590 } 1591 return ret; 1592 } 1593 1594 static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs) 1595 { 1596 int i; 1597 int64_t ret = 0; 1598 int64_t r; 1599 BDRVVmdkState *s = bs->opaque; 1600 1601 ret = bdrv_get_allocated_file_size(bs->file); 1602 if (ret < 0) { 1603 return ret; 1604 } 1605 for (i = 0; i < s->num_extents; i++) { 1606 if (s->extents[i].file == bs->file) { 1607 continue; 1608 } 1609 r = bdrv_get_allocated_file_size(s->extents[i].file); 1610 if (r < 0) { 1611 return r; 1612 } 1613 ret += r; 1614 } 1615 return ret; 1616 } 1617 1618 static QEMUOptionParameter vmdk_create_options[] = { 1619 { 1620 .name = BLOCK_OPT_SIZE, 1621 .type = OPT_SIZE, 1622 .help = "Virtual disk size" 1623 }, 1624 { 1625 .name = BLOCK_OPT_BACKING_FILE, 1626 .type = OPT_STRING, 1627 .help = "File name of a base image" 1628 }, 1629 { 1630 .name = BLOCK_OPT_COMPAT6, 1631 .type = OPT_FLAG, 1632 .help = "VMDK version 6 image" 1633 }, 1634 { 1635 .name = BLOCK_OPT_SUBFMT, 1636 .type = OPT_STRING, 1637 .help = 1638 "VMDK flat extent format, can be one of " 1639 "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} " 1640 }, 1641 { NULL } 1642 }; 1643 1644 static BlockDriver bdrv_vmdk = { 1645 .format_name = "vmdk", 1646 .instance_size = sizeof(BDRVVmdkState), 1647 .bdrv_probe = vmdk_probe, 1648 .bdrv_open = vmdk_open, 1649 .bdrv_read = vmdk_co_read, 1650 .bdrv_write = vmdk_co_write, 1651 .bdrv_close = vmdk_close, 1652 .bdrv_create = vmdk_create, 1653 .bdrv_co_flush_to_disk = vmdk_co_flush, 1654 .bdrv_co_is_allocated = vmdk_co_is_allocated, 1655 .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, 1656 1657 .create_options = vmdk_create_options, 1658 }; 1659 1660 static void bdrv_vmdk_init(void) 1661 { 1662 bdrv_register(&bdrv_vmdk); 1663 } 1664 1665 block_init(bdrv_vmdk_init); 1666