1 /* 2 * Block driver for the VMDK format 3 * 4 * Copyright (c) 2004 Fabrice Bellard 5 * Copyright (c) 2005 Filip Navara 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu-common.h" 27 #include "block/block_int.h" 28 #include "qemu/module.h" 29 #include "migration/migration.h" 30 #include <zlib.h> 31 32 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D') 33 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V') 34 #define VMDK4_COMPRESSION_DEFLATE 1 35 #define VMDK4_FLAG_RGD (1 << 1) 36 #define VMDK4_FLAG_COMPRESS (1 << 16) 37 #define VMDK4_FLAG_MARKER (1 << 17) 38 #define VMDK4_GD_AT_END 0xffffffffffffffffULL 39 40 typedef struct { 41 uint32_t version; 42 uint32_t flags; 43 uint32_t disk_sectors; 44 uint32_t granularity; 45 uint32_t l1dir_offset; 46 uint32_t l1dir_size; 47 uint32_t file_sectors; 48 uint32_t cylinders; 49 uint32_t heads; 50 uint32_t sectors_per_track; 51 } VMDK3Header; 52 53 typedef struct { 54 uint32_t version; 55 uint32_t flags; 56 int64_t capacity; 57 int64_t granularity; 58 int64_t desc_offset; 59 int64_t desc_size; 60 int32_t num_gtes_per_gte; 61 int64_t rgd_offset; 62 int64_t gd_offset; 63 int64_t grain_offset; 64 char filler[1]; 65 char check_bytes[4]; 66 uint16_t compressAlgorithm; 67 } QEMU_PACKED VMDK4Header; 68 69 #define L2_CACHE_SIZE 16 70 71 typedef struct VmdkExtent { 72 BlockDriverState *file; 73 bool flat; 74 bool compressed; 75 bool has_marker; 76 int64_t sectors; 77 int64_t end_sector; 78 int64_t flat_start_offset; 79 int64_t l1_table_offset; 80 int64_t l1_backup_table_offset; 81 uint32_t *l1_table; 82 uint32_t *l1_backup_table; 83 unsigned int l1_size; 84 uint32_t l1_entry_sectors; 85 86 unsigned int l2_size; 87 uint32_t *l2_cache; 88 uint32_t l2_cache_offsets[L2_CACHE_SIZE]; 89 uint32_t l2_cache_counts[L2_CACHE_SIZE]; 90 91 unsigned int cluster_sectors; 92 } VmdkExtent; 93 94 typedef struct BDRVVmdkState { 95 CoMutex lock; 96 int desc_offset; 97 bool cid_updated; 98 uint32_t parent_cid; 99 int num_extents; 100 /* Extent array with num_extents entries, ascend ordered by address */ 101 VmdkExtent *extents; 102 Error *migration_blocker; 103 } BDRVVmdkState; 104 105 typedef struct VmdkMetaData { 106 uint32_t offset; 107 unsigned int l1_index; 108 unsigned int l2_index; 109 unsigned int l2_offset; 110 int valid; 111 } VmdkMetaData; 112 113 typedef struct VmdkGrainMarker { 114 uint64_t lba; 115 uint32_t size; 116 uint8_t data[0]; 117 } VmdkGrainMarker; 118 119 enum { 120 MARKER_END_OF_STREAM = 0, 121 MARKER_GRAIN_TABLE = 1, 122 MARKER_GRAIN_DIRECTORY = 2, 123 MARKER_FOOTER = 3, 124 }; 125 126 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename) 127 { 128 uint32_t magic; 129 130 if (buf_size < 4) { 131 return 0; 132 } 133 magic = be32_to_cpu(*(uint32_t *)buf); 134 if (magic == VMDK3_MAGIC || 135 magic == VMDK4_MAGIC) { 136 return 100; 137 } else { 138 const char *p = (const char *)buf; 139 const char *end = p + buf_size; 140 while (p < end) { 141 if (*p == '#') { 142 /* skip comment line */ 143 while (p < end && *p != '\n') { 144 p++; 145 } 146 p++; 147 continue; 148 } 149 if (*p == ' ') { 150 while (p < end && *p == ' ') { 151 p++; 152 } 153 /* skip '\r' if windows line endings used. */ 154 if (p < end && *p == '\r') { 155 p++; 156 } 157 /* only accept blank lines before 'version=' line */ 158 if (p == end || *p != '\n') { 159 return 0; 160 } 161 p++; 162 continue; 163 } 164 if (end - p >= strlen("version=X\n")) { 165 if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 || 166 strncmp("version=2\n", p, strlen("version=2\n")) == 0) { 167 return 100; 168 } 169 } 170 if (end - p >= strlen("version=X\r\n")) { 171 if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 || 172 strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) { 173 return 100; 174 } 175 } 176 return 0; 177 } 178 return 0; 179 } 180 } 181 182 #define CHECK_CID 1 183 184 #define SECTOR_SIZE 512 185 #define DESC_SIZE (20 * SECTOR_SIZE) /* 20 sectors of 512 bytes each */ 186 #define BUF_SIZE 4096 187 #define HEADER_SIZE 512 /* first sector of 512 bytes */ 188 189 static void vmdk_free_extents(BlockDriverState *bs) 190 { 191 int i; 192 BDRVVmdkState *s = bs->opaque; 193 VmdkExtent *e; 194 195 for (i = 0; i < s->num_extents; i++) { 196 e = &s->extents[i]; 197 g_free(e->l1_table); 198 g_free(e->l2_cache); 199 g_free(e->l1_backup_table); 200 if (e->file != bs->file) { 201 bdrv_delete(e->file); 202 } 203 } 204 g_free(s->extents); 205 } 206 207 static void vmdk_free_last_extent(BlockDriverState *bs) 208 { 209 BDRVVmdkState *s = bs->opaque; 210 211 if (s->num_extents == 0) { 212 return; 213 } 214 s->num_extents--; 215 s->extents = g_realloc(s->extents, s->num_extents * sizeof(VmdkExtent)); 216 } 217 218 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) 219 { 220 char desc[DESC_SIZE]; 221 uint32_t cid = 0xffffffff; 222 const char *p_name, *cid_str; 223 size_t cid_str_size; 224 BDRVVmdkState *s = bs->opaque; 225 int ret; 226 227 ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); 228 if (ret < 0) { 229 return 0; 230 } 231 232 if (parent) { 233 cid_str = "parentCID"; 234 cid_str_size = sizeof("parentCID"); 235 } else { 236 cid_str = "CID"; 237 cid_str_size = sizeof("CID"); 238 } 239 240 desc[DESC_SIZE - 1] = '\0'; 241 p_name = strstr(desc, cid_str); 242 if (p_name != NULL) { 243 p_name += cid_str_size; 244 sscanf(p_name, "%x", &cid); 245 } 246 247 return cid; 248 } 249 250 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) 251 { 252 char desc[DESC_SIZE], tmp_desc[DESC_SIZE]; 253 char *p_name, *tmp_str; 254 BDRVVmdkState *s = bs->opaque; 255 int ret; 256 257 ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); 258 if (ret < 0) { 259 return ret; 260 } 261 262 desc[DESC_SIZE - 1] = '\0'; 263 tmp_str = strstr(desc, "parentCID"); 264 if (tmp_str == NULL) { 265 return -EINVAL; 266 } 267 268 pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str); 269 p_name = strstr(desc, "CID"); 270 if (p_name != NULL) { 271 p_name += sizeof("CID"); 272 snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid); 273 pstrcat(desc, sizeof(desc), tmp_desc); 274 } 275 276 ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE); 277 if (ret < 0) { 278 return ret; 279 } 280 281 return 0; 282 } 283 284 static int vmdk_is_cid_valid(BlockDriverState *bs) 285 { 286 #ifdef CHECK_CID 287 BDRVVmdkState *s = bs->opaque; 288 BlockDriverState *p_bs = bs->backing_hd; 289 uint32_t cur_pcid; 290 291 if (p_bs) { 292 cur_pcid = vmdk_read_cid(p_bs, 0); 293 if (s->parent_cid != cur_pcid) { 294 /* CID not valid */ 295 return 0; 296 } 297 } 298 #endif 299 /* CID valid */ 300 return 1; 301 } 302 303 /* Queue extents, if any, for reopen() */ 304 static int vmdk_reopen_prepare(BDRVReopenState *state, 305 BlockReopenQueue *queue, Error **errp) 306 { 307 BDRVVmdkState *s; 308 int ret = -1; 309 int i; 310 VmdkExtent *e; 311 312 assert(state != NULL); 313 assert(state->bs != NULL); 314 315 if (queue == NULL) { 316 error_set(errp, ERROR_CLASS_GENERIC_ERROR, 317 "No reopen queue for VMDK extents"); 318 goto exit; 319 } 320 321 s = state->bs->opaque; 322 323 assert(s != NULL); 324 325 for (i = 0; i < s->num_extents; i++) { 326 e = &s->extents[i]; 327 if (e->file != state->bs->file) { 328 bdrv_reopen_queue(queue, e->file, state->flags); 329 } 330 } 331 ret = 0; 332 333 exit: 334 return ret; 335 } 336 337 static int vmdk_parent_open(BlockDriverState *bs) 338 { 339 char *p_name; 340 char desc[DESC_SIZE + 1]; 341 BDRVVmdkState *s = bs->opaque; 342 int ret; 343 344 desc[DESC_SIZE] = '\0'; 345 ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); 346 if (ret < 0) { 347 return ret; 348 } 349 350 p_name = strstr(desc, "parentFileNameHint"); 351 if (p_name != NULL) { 352 char *end_name; 353 354 p_name += sizeof("parentFileNameHint") + 1; 355 end_name = strchr(p_name, '\"'); 356 if (end_name == NULL) { 357 return -EINVAL; 358 } 359 if ((end_name - p_name) > sizeof(bs->backing_file) - 1) { 360 return -EINVAL; 361 } 362 363 pstrcpy(bs->backing_file, end_name - p_name + 1, p_name); 364 } 365 366 return 0; 367 } 368 369 /* Create and append extent to the extent array. Return the added VmdkExtent 370 * address. return NULL if allocation failed. */ 371 static VmdkExtent *vmdk_add_extent(BlockDriverState *bs, 372 BlockDriverState *file, bool flat, int64_t sectors, 373 int64_t l1_offset, int64_t l1_backup_offset, 374 uint32_t l1_size, 375 int l2_size, unsigned int cluster_sectors) 376 { 377 VmdkExtent *extent; 378 BDRVVmdkState *s = bs->opaque; 379 380 s->extents = g_realloc(s->extents, 381 (s->num_extents + 1) * sizeof(VmdkExtent)); 382 extent = &s->extents[s->num_extents]; 383 s->num_extents++; 384 385 memset(extent, 0, sizeof(VmdkExtent)); 386 extent->file = file; 387 extent->flat = flat; 388 extent->sectors = sectors; 389 extent->l1_table_offset = l1_offset; 390 extent->l1_backup_table_offset = l1_backup_offset; 391 extent->l1_size = l1_size; 392 extent->l1_entry_sectors = l2_size * cluster_sectors; 393 extent->l2_size = l2_size; 394 extent->cluster_sectors = cluster_sectors; 395 396 if (s->num_extents > 1) { 397 extent->end_sector = (*(extent - 1)).end_sector + extent->sectors; 398 } else { 399 extent->end_sector = extent->sectors; 400 } 401 bs->total_sectors = extent->end_sector; 402 return extent; 403 } 404 405 static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent) 406 { 407 int ret; 408 int l1_size, i; 409 410 /* read the L1 table */ 411 l1_size = extent->l1_size * sizeof(uint32_t); 412 extent->l1_table = g_malloc(l1_size); 413 ret = bdrv_pread(extent->file, 414 extent->l1_table_offset, 415 extent->l1_table, 416 l1_size); 417 if (ret < 0) { 418 goto fail_l1; 419 } 420 for (i = 0; i < extent->l1_size; i++) { 421 le32_to_cpus(&extent->l1_table[i]); 422 } 423 424 if (extent->l1_backup_table_offset) { 425 extent->l1_backup_table = g_malloc(l1_size); 426 ret = bdrv_pread(extent->file, 427 extent->l1_backup_table_offset, 428 extent->l1_backup_table, 429 l1_size); 430 if (ret < 0) { 431 goto fail_l1b; 432 } 433 for (i = 0; i < extent->l1_size; i++) { 434 le32_to_cpus(&extent->l1_backup_table[i]); 435 } 436 } 437 438 extent->l2_cache = 439 g_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t)); 440 return 0; 441 fail_l1b: 442 g_free(extent->l1_backup_table); 443 fail_l1: 444 g_free(extent->l1_table); 445 return ret; 446 } 447 448 static int vmdk_open_vmdk3(BlockDriverState *bs, 449 BlockDriverState *file, 450 int flags) 451 { 452 int ret; 453 uint32_t magic; 454 VMDK3Header header; 455 VmdkExtent *extent; 456 457 ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); 458 if (ret < 0) { 459 return ret; 460 } 461 extent = vmdk_add_extent(bs, 462 bs->file, false, 463 le32_to_cpu(header.disk_sectors), 464 le32_to_cpu(header.l1dir_offset) << 9, 465 0, 1 << 6, 1 << 9, 466 le32_to_cpu(header.granularity)); 467 ret = vmdk_init_tables(bs, extent); 468 if (ret) { 469 /* free extent allocated by vmdk_add_extent */ 470 vmdk_free_last_extent(bs); 471 } 472 return ret; 473 } 474 475 static int vmdk_open_desc_file(BlockDriverState *bs, int flags, 476 int64_t desc_offset); 477 478 static int vmdk_open_vmdk4(BlockDriverState *bs, 479 BlockDriverState *file, 480 int flags) 481 { 482 int ret; 483 uint32_t magic; 484 uint32_t l1_size, l1_entry_sectors; 485 VMDK4Header header; 486 VmdkExtent *extent; 487 int64_t l1_backup_offset = 0; 488 489 ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); 490 if (ret < 0) { 491 return ret; 492 } 493 if (header.capacity == 0 && header.desc_offset) { 494 return vmdk_open_desc_file(bs, flags, header.desc_offset << 9); 495 } 496 497 if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) { 498 /* 499 * The footer takes precedence over the header, so read it in. The 500 * footer starts at offset -1024 from the end: One sector for the 501 * footer, and another one for the end-of-stream marker. 502 */ 503 struct { 504 struct { 505 uint64_t val; 506 uint32_t size; 507 uint32_t type; 508 uint8_t pad[512 - 16]; 509 } QEMU_PACKED footer_marker; 510 511 uint32_t magic; 512 VMDK4Header header; 513 uint8_t pad[512 - 4 - sizeof(VMDK4Header)]; 514 515 struct { 516 uint64_t val; 517 uint32_t size; 518 uint32_t type; 519 uint8_t pad[512 - 16]; 520 } QEMU_PACKED eos_marker; 521 } QEMU_PACKED footer; 522 523 ret = bdrv_pread(file, 524 bs->file->total_sectors * 512 - 1536, 525 &footer, sizeof(footer)); 526 if (ret < 0) { 527 return ret; 528 } 529 530 /* Some sanity checks for the footer */ 531 if (be32_to_cpu(footer.magic) != VMDK4_MAGIC || 532 le32_to_cpu(footer.footer_marker.size) != 0 || 533 le32_to_cpu(footer.footer_marker.type) != MARKER_FOOTER || 534 le64_to_cpu(footer.eos_marker.val) != 0 || 535 le32_to_cpu(footer.eos_marker.size) != 0 || 536 le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM) 537 { 538 return -EINVAL; 539 } 540 541 header = footer.header; 542 } 543 544 l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte) 545 * le64_to_cpu(header.granularity); 546 if (l1_entry_sectors == 0) { 547 return -EINVAL; 548 } 549 l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1) 550 / l1_entry_sectors; 551 if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) { 552 l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9; 553 } 554 extent = vmdk_add_extent(bs, file, false, 555 le64_to_cpu(header.capacity), 556 le64_to_cpu(header.gd_offset) << 9, 557 l1_backup_offset, 558 l1_size, 559 le32_to_cpu(header.num_gtes_per_gte), 560 le64_to_cpu(header.granularity)); 561 extent->compressed = 562 le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE; 563 extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER; 564 ret = vmdk_init_tables(bs, extent); 565 if (ret) { 566 /* free extent allocated by vmdk_add_extent */ 567 vmdk_free_last_extent(bs); 568 } 569 return ret; 570 } 571 572 /* find an option value out of descriptor file */ 573 static int vmdk_parse_description(const char *desc, const char *opt_name, 574 char *buf, int buf_size) 575 { 576 char *opt_pos, *opt_end; 577 const char *end = desc + strlen(desc); 578 579 opt_pos = strstr(desc, opt_name); 580 if (!opt_pos) { 581 return -1; 582 } 583 /* Skip "=\"" following opt_name */ 584 opt_pos += strlen(opt_name) + 2; 585 if (opt_pos >= end) { 586 return -1; 587 } 588 opt_end = opt_pos; 589 while (opt_end < end && *opt_end != '"') { 590 opt_end++; 591 } 592 if (opt_end == end || buf_size < opt_end - opt_pos + 1) { 593 return -1; 594 } 595 pstrcpy(buf, opt_end - opt_pos + 1, opt_pos); 596 return 0; 597 } 598 599 /* Open an extent file and append to bs array */ 600 static int vmdk_open_sparse(BlockDriverState *bs, 601 BlockDriverState *file, 602 int flags) 603 { 604 uint32_t magic; 605 606 if (bdrv_pread(file, 0, &magic, sizeof(magic)) != sizeof(magic)) { 607 return -EIO; 608 } 609 610 magic = be32_to_cpu(magic); 611 switch (magic) { 612 case VMDK3_MAGIC: 613 return vmdk_open_vmdk3(bs, file, flags); 614 break; 615 case VMDK4_MAGIC: 616 return vmdk_open_vmdk4(bs, file, flags); 617 break; 618 default: 619 return -EINVAL; 620 break; 621 } 622 } 623 624 static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, 625 const char *desc_file_path) 626 { 627 int ret; 628 char access[11]; 629 char type[11]; 630 char fname[512]; 631 const char *p = desc; 632 int64_t sectors = 0; 633 int64_t flat_offset; 634 char extent_path[PATH_MAX]; 635 BlockDriverState *extent_file; 636 637 while (*p) { 638 /* parse extent line: 639 * RW [size in sectors] FLAT "file-name.vmdk" OFFSET 640 * or 641 * RW [size in sectors] SPARSE "file-name.vmdk" 642 */ 643 flat_offset = -1; 644 ret = sscanf(p, "%10s %" SCNd64 " %10s %511s %" SCNd64, 645 access, §ors, type, fname, &flat_offset); 646 if (ret < 4 || strcmp(access, "RW")) { 647 goto next_line; 648 } else if (!strcmp(type, "FLAT")) { 649 if (ret != 5 || flat_offset < 0) { 650 return -EINVAL; 651 } 652 } else if (ret != 4) { 653 return -EINVAL; 654 } 655 656 /* trim the quotation marks around */ 657 if (fname[0] == '"') { 658 memmove(fname, fname + 1, strlen(fname)); 659 if (strlen(fname) <= 1 || fname[strlen(fname) - 1] != '"') { 660 return -EINVAL; 661 } 662 fname[strlen(fname) - 1] = '\0'; 663 } 664 if (sectors <= 0 || 665 (strcmp(type, "FLAT") && strcmp(type, "SPARSE")) || 666 (strcmp(access, "RW"))) { 667 goto next_line; 668 } 669 670 path_combine(extent_path, sizeof(extent_path), 671 desc_file_path, fname); 672 ret = bdrv_file_open(&extent_file, extent_path, bs->open_flags); 673 if (ret) { 674 return ret; 675 } 676 677 /* save to extents array */ 678 if (!strcmp(type, "FLAT")) { 679 /* FLAT extent */ 680 VmdkExtent *extent; 681 682 extent = vmdk_add_extent(bs, extent_file, true, sectors, 683 0, 0, 0, 0, sectors); 684 extent->flat_start_offset = flat_offset << 9; 685 } else if (!strcmp(type, "SPARSE")) { 686 /* SPARSE extent */ 687 ret = vmdk_open_sparse(bs, extent_file, bs->open_flags); 688 if (ret) { 689 bdrv_delete(extent_file); 690 return ret; 691 } 692 } else { 693 fprintf(stderr, 694 "VMDK: Not supported extent type \"%s\""".\n", type); 695 return -ENOTSUP; 696 } 697 next_line: 698 /* move to next line */ 699 while (*p && *p != '\n') { 700 p++; 701 } 702 p++; 703 } 704 return 0; 705 } 706 707 static int vmdk_open_desc_file(BlockDriverState *bs, int flags, 708 int64_t desc_offset) 709 { 710 int ret; 711 char buf[2048]; 712 char ct[128]; 713 BDRVVmdkState *s = bs->opaque; 714 715 ret = bdrv_pread(bs->file, desc_offset, buf, sizeof(buf)); 716 if (ret < 0) { 717 return ret; 718 } 719 buf[2047] = '\0'; 720 if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) { 721 return -EINVAL; 722 } 723 if (strcmp(ct, "monolithicFlat") && 724 strcmp(ct, "twoGbMaxExtentSparse") && 725 strcmp(ct, "twoGbMaxExtentFlat")) { 726 fprintf(stderr, 727 "VMDK: Not supported image type \"%s\""".\n", ct); 728 return -ENOTSUP; 729 } 730 s->desc_offset = 0; 731 return vmdk_parse_extents(buf, bs, bs->file->filename); 732 } 733 734 static int vmdk_open(BlockDriverState *bs, int flags) 735 { 736 int ret; 737 BDRVVmdkState *s = bs->opaque; 738 739 if (vmdk_open_sparse(bs, bs->file, flags) == 0) { 740 s->desc_offset = 0x200; 741 } else { 742 ret = vmdk_open_desc_file(bs, flags, 0); 743 if (ret) { 744 goto fail; 745 } 746 } 747 /* try to open parent images, if exist */ 748 ret = vmdk_parent_open(bs); 749 if (ret) { 750 goto fail; 751 } 752 s->parent_cid = vmdk_read_cid(bs, 1); 753 qemu_co_mutex_init(&s->lock); 754 755 /* Disable migration when VMDK images are used */ 756 error_set(&s->migration_blocker, 757 QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED, 758 "vmdk", bs->device_name, "live migration"); 759 migrate_add_blocker(s->migration_blocker); 760 761 return 0; 762 763 fail: 764 vmdk_free_extents(bs); 765 return ret; 766 } 767 768 static int get_whole_cluster(BlockDriverState *bs, 769 VmdkExtent *extent, 770 uint64_t cluster_offset, 771 uint64_t offset, 772 bool allocate) 773 { 774 /* 128 sectors * 512 bytes each = grain size 64KB */ 775 uint8_t whole_grain[extent->cluster_sectors * 512]; 776 777 /* we will be here if it's first write on non-exist grain(cluster). 778 * try to read from parent image, if exist */ 779 if (bs->backing_hd) { 780 int ret; 781 782 if (!vmdk_is_cid_valid(bs)) { 783 return -1; 784 } 785 786 /* floor offset to cluster */ 787 offset -= offset % (extent->cluster_sectors * 512); 788 ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain, 789 extent->cluster_sectors); 790 if (ret < 0) { 791 return -1; 792 } 793 794 /* Write grain only into the active image */ 795 ret = bdrv_write(extent->file, cluster_offset, whole_grain, 796 extent->cluster_sectors); 797 if (ret < 0) { 798 return -1; 799 } 800 } 801 return 0; 802 } 803 804 static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) 805 { 806 /* update L2 table */ 807 if (bdrv_pwrite_sync( 808 extent->file, 809 ((int64_t)m_data->l2_offset * 512) 810 + (m_data->l2_index * sizeof(m_data->offset)), 811 &(m_data->offset), 812 sizeof(m_data->offset) 813 ) < 0) { 814 return -1; 815 } 816 /* update backup L2 table */ 817 if (extent->l1_backup_table_offset != 0) { 818 m_data->l2_offset = extent->l1_backup_table[m_data->l1_index]; 819 if (bdrv_pwrite_sync( 820 extent->file, 821 ((int64_t)m_data->l2_offset * 512) 822 + (m_data->l2_index * sizeof(m_data->offset)), 823 &(m_data->offset), sizeof(m_data->offset) 824 ) < 0) { 825 return -1; 826 } 827 } 828 829 return 0; 830 } 831 832 static int get_cluster_offset(BlockDriverState *bs, 833 VmdkExtent *extent, 834 VmdkMetaData *m_data, 835 uint64_t offset, 836 int allocate, 837 uint64_t *cluster_offset) 838 { 839 unsigned int l1_index, l2_offset, l2_index; 840 int min_index, i, j; 841 uint32_t min_count, *l2_table, tmp = 0; 842 843 if (m_data) { 844 m_data->valid = 0; 845 } 846 if (extent->flat) { 847 *cluster_offset = extent->flat_start_offset; 848 return 0; 849 } 850 851 offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE; 852 l1_index = (offset >> 9) / extent->l1_entry_sectors; 853 if (l1_index >= extent->l1_size) { 854 return -1; 855 } 856 l2_offset = extent->l1_table[l1_index]; 857 if (!l2_offset) { 858 return -1; 859 } 860 for (i = 0; i < L2_CACHE_SIZE; i++) { 861 if (l2_offset == extent->l2_cache_offsets[i]) { 862 /* increment the hit count */ 863 if (++extent->l2_cache_counts[i] == 0xffffffff) { 864 for (j = 0; j < L2_CACHE_SIZE; j++) { 865 extent->l2_cache_counts[j] >>= 1; 866 } 867 } 868 l2_table = extent->l2_cache + (i * extent->l2_size); 869 goto found; 870 } 871 } 872 /* not found: load a new entry in the least used one */ 873 min_index = 0; 874 min_count = 0xffffffff; 875 for (i = 0; i < L2_CACHE_SIZE; i++) { 876 if (extent->l2_cache_counts[i] < min_count) { 877 min_count = extent->l2_cache_counts[i]; 878 min_index = i; 879 } 880 } 881 l2_table = extent->l2_cache + (min_index * extent->l2_size); 882 if (bdrv_pread( 883 extent->file, 884 (int64_t)l2_offset * 512, 885 l2_table, 886 extent->l2_size * sizeof(uint32_t) 887 ) != extent->l2_size * sizeof(uint32_t)) { 888 return -1; 889 } 890 891 extent->l2_cache_offsets[min_index] = l2_offset; 892 extent->l2_cache_counts[min_index] = 1; 893 found: 894 l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size; 895 *cluster_offset = le32_to_cpu(l2_table[l2_index]); 896 897 if (!*cluster_offset) { 898 if (!allocate) { 899 return -1; 900 } 901 902 /* Avoid the L2 tables update for the images that have snapshots. */ 903 *cluster_offset = bdrv_getlength(extent->file); 904 if (!extent->compressed) { 905 bdrv_truncate( 906 extent->file, 907 *cluster_offset + (extent->cluster_sectors << 9) 908 ); 909 } 910 911 *cluster_offset >>= 9; 912 tmp = cpu_to_le32(*cluster_offset); 913 l2_table[l2_index] = tmp; 914 915 /* First of all we write grain itself, to avoid race condition 916 * that may to corrupt the image. 917 * This problem may occur because of insufficient space on host disk 918 * or inappropriate VM shutdown. 919 */ 920 if (get_whole_cluster( 921 bs, extent, *cluster_offset, offset, allocate) == -1) { 922 return -1; 923 } 924 925 if (m_data) { 926 m_data->offset = tmp; 927 m_data->l1_index = l1_index; 928 m_data->l2_index = l2_index; 929 m_data->l2_offset = l2_offset; 930 m_data->valid = 1; 931 } 932 } 933 *cluster_offset <<= 9; 934 return 0; 935 } 936 937 static VmdkExtent *find_extent(BDRVVmdkState *s, 938 int64_t sector_num, VmdkExtent *start_hint) 939 { 940 VmdkExtent *extent = start_hint; 941 942 if (!extent) { 943 extent = &s->extents[0]; 944 } 945 while (extent < &s->extents[s->num_extents]) { 946 if (sector_num < extent->end_sector) { 947 return extent; 948 } 949 extent++; 950 } 951 return NULL; 952 } 953 954 static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs, 955 int64_t sector_num, int nb_sectors, int *pnum) 956 { 957 BDRVVmdkState *s = bs->opaque; 958 int64_t index_in_cluster, n, ret; 959 uint64_t offset; 960 VmdkExtent *extent; 961 962 extent = find_extent(s, sector_num, NULL); 963 if (!extent) { 964 return 0; 965 } 966 qemu_co_mutex_lock(&s->lock); 967 ret = get_cluster_offset(bs, extent, NULL, 968 sector_num * 512, 0, &offset); 969 qemu_co_mutex_unlock(&s->lock); 970 /* get_cluster_offset returning 0 means success */ 971 ret = !ret; 972 973 index_in_cluster = sector_num % extent->cluster_sectors; 974 n = extent->cluster_sectors - index_in_cluster; 975 if (n > nb_sectors) { 976 n = nb_sectors; 977 } 978 *pnum = n; 979 return ret; 980 } 981 982 static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, 983 int64_t offset_in_cluster, const uint8_t *buf, 984 int nb_sectors, int64_t sector_num) 985 { 986 int ret; 987 VmdkGrainMarker *data = NULL; 988 uLongf buf_len; 989 const uint8_t *write_buf = buf; 990 int write_len = nb_sectors * 512; 991 992 if (extent->compressed) { 993 if (!extent->has_marker) { 994 ret = -EINVAL; 995 goto out; 996 } 997 buf_len = (extent->cluster_sectors << 9) * 2; 998 data = g_malloc(buf_len + sizeof(VmdkGrainMarker)); 999 if (compress(data->data, &buf_len, buf, nb_sectors << 9) != Z_OK || 1000 buf_len == 0) { 1001 ret = -EINVAL; 1002 goto out; 1003 } 1004 data->lba = sector_num; 1005 data->size = buf_len; 1006 write_buf = (uint8_t *)data; 1007 write_len = buf_len + sizeof(VmdkGrainMarker); 1008 } 1009 ret = bdrv_pwrite(extent->file, 1010 cluster_offset + offset_in_cluster, 1011 write_buf, 1012 write_len); 1013 if (ret != write_len) { 1014 ret = ret < 0 ? ret : -EIO; 1015 goto out; 1016 } 1017 ret = 0; 1018 out: 1019 g_free(data); 1020 return ret; 1021 } 1022 1023 static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset, 1024 int64_t offset_in_cluster, uint8_t *buf, 1025 int nb_sectors) 1026 { 1027 int ret; 1028 int cluster_bytes, buf_bytes; 1029 uint8_t *cluster_buf, *compressed_data; 1030 uint8_t *uncomp_buf; 1031 uint32_t data_len; 1032 VmdkGrainMarker *marker; 1033 uLongf buf_len; 1034 1035 1036 if (!extent->compressed) { 1037 ret = bdrv_pread(extent->file, 1038 cluster_offset + offset_in_cluster, 1039 buf, nb_sectors * 512); 1040 if (ret == nb_sectors * 512) { 1041 return 0; 1042 } else { 1043 return -EIO; 1044 } 1045 } 1046 cluster_bytes = extent->cluster_sectors * 512; 1047 /* Read two clusters in case GrainMarker + compressed data > one cluster */ 1048 buf_bytes = cluster_bytes * 2; 1049 cluster_buf = g_malloc(buf_bytes); 1050 uncomp_buf = g_malloc(cluster_bytes); 1051 ret = bdrv_pread(extent->file, 1052 cluster_offset, 1053 cluster_buf, buf_bytes); 1054 if (ret < 0) { 1055 goto out; 1056 } 1057 compressed_data = cluster_buf; 1058 buf_len = cluster_bytes; 1059 data_len = cluster_bytes; 1060 if (extent->has_marker) { 1061 marker = (VmdkGrainMarker *)cluster_buf; 1062 compressed_data = marker->data; 1063 data_len = le32_to_cpu(marker->size); 1064 } 1065 if (!data_len || data_len > buf_bytes) { 1066 ret = -EINVAL; 1067 goto out; 1068 } 1069 ret = uncompress(uncomp_buf, &buf_len, compressed_data, data_len); 1070 if (ret != Z_OK) { 1071 ret = -EINVAL; 1072 goto out; 1073 1074 } 1075 if (offset_in_cluster < 0 || 1076 offset_in_cluster + nb_sectors * 512 > buf_len) { 1077 ret = -EINVAL; 1078 goto out; 1079 } 1080 memcpy(buf, uncomp_buf + offset_in_cluster, nb_sectors * 512); 1081 ret = 0; 1082 1083 out: 1084 g_free(uncomp_buf); 1085 g_free(cluster_buf); 1086 return ret; 1087 } 1088 1089 static int vmdk_read(BlockDriverState *bs, int64_t sector_num, 1090 uint8_t *buf, int nb_sectors) 1091 { 1092 BDRVVmdkState *s = bs->opaque; 1093 int ret; 1094 uint64_t n, index_in_cluster; 1095 uint64_t extent_begin_sector, extent_relative_sector_num; 1096 VmdkExtent *extent = NULL; 1097 uint64_t cluster_offset; 1098 1099 while (nb_sectors > 0) { 1100 extent = find_extent(s, sector_num, extent); 1101 if (!extent) { 1102 return -EIO; 1103 } 1104 ret = get_cluster_offset( 1105 bs, extent, NULL, 1106 sector_num << 9, 0, &cluster_offset); 1107 extent_begin_sector = extent->end_sector - extent->sectors; 1108 extent_relative_sector_num = sector_num - extent_begin_sector; 1109 index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; 1110 n = extent->cluster_sectors - index_in_cluster; 1111 if (n > nb_sectors) { 1112 n = nb_sectors; 1113 } 1114 if (ret) { 1115 /* if not allocated, try to read from parent image, if exist */ 1116 if (bs->backing_hd) { 1117 if (!vmdk_is_cid_valid(bs)) { 1118 return -EINVAL; 1119 } 1120 ret = bdrv_read(bs->backing_hd, sector_num, buf, n); 1121 if (ret < 0) { 1122 return ret; 1123 } 1124 } else { 1125 memset(buf, 0, 512 * n); 1126 } 1127 } else { 1128 ret = vmdk_read_extent(extent, 1129 cluster_offset, index_in_cluster * 512, 1130 buf, n); 1131 if (ret) { 1132 return ret; 1133 } 1134 } 1135 nb_sectors -= n; 1136 sector_num += n; 1137 buf += n * 512; 1138 } 1139 return 0; 1140 } 1141 1142 static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num, 1143 uint8_t *buf, int nb_sectors) 1144 { 1145 int ret; 1146 BDRVVmdkState *s = bs->opaque; 1147 qemu_co_mutex_lock(&s->lock); 1148 ret = vmdk_read(bs, sector_num, buf, nb_sectors); 1149 qemu_co_mutex_unlock(&s->lock); 1150 return ret; 1151 } 1152 1153 static int vmdk_write(BlockDriverState *bs, int64_t sector_num, 1154 const uint8_t *buf, int nb_sectors) 1155 { 1156 BDRVVmdkState *s = bs->opaque; 1157 VmdkExtent *extent = NULL; 1158 int n, ret; 1159 int64_t index_in_cluster; 1160 uint64_t extent_begin_sector, extent_relative_sector_num; 1161 uint64_t cluster_offset; 1162 VmdkMetaData m_data; 1163 1164 if (sector_num > bs->total_sectors) { 1165 fprintf(stderr, 1166 "(VMDK) Wrong offset: sector_num=0x%" PRIx64 1167 " total_sectors=0x%" PRIx64 "\n", 1168 sector_num, bs->total_sectors); 1169 return -EIO; 1170 } 1171 1172 while (nb_sectors > 0) { 1173 extent = find_extent(s, sector_num, extent); 1174 if (!extent) { 1175 return -EIO; 1176 } 1177 ret = get_cluster_offset( 1178 bs, 1179 extent, 1180 &m_data, 1181 sector_num << 9, !extent->compressed, 1182 &cluster_offset); 1183 if (extent->compressed) { 1184 if (ret == 0) { 1185 /* Refuse write to allocated cluster for streamOptimized */ 1186 fprintf(stderr, 1187 "VMDK: can't write to allocated cluster" 1188 " for streamOptimized\n"); 1189 return -EIO; 1190 } else { 1191 /* allocate */ 1192 ret = get_cluster_offset( 1193 bs, 1194 extent, 1195 &m_data, 1196 sector_num << 9, 1, 1197 &cluster_offset); 1198 } 1199 } 1200 if (ret) { 1201 return -EINVAL; 1202 } 1203 extent_begin_sector = extent->end_sector - extent->sectors; 1204 extent_relative_sector_num = sector_num - extent_begin_sector; 1205 index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; 1206 n = extent->cluster_sectors - index_in_cluster; 1207 if (n > nb_sectors) { 1208 n = nb_sectors; 1209 } 1210 1211 ret = vmdk_write_extent(extent, 1212 cluster_offset, index_in_cluster * 512, 1213 buf, n, sector_num); 1214 if (ret) { 1215 return ret; 1216 } 1217 if (m_data.valid) { 1218 /* update L2 tables */ 1219 if (vmdk_L2update(extent, &m_data) == -1) { 1220 return -EIO; 1221 } 1222 } 1223 nb_sectors -= n; 1224 sector_num += n; 1225 buf += n * 512; 1226 1227 /* update CID on the first write every time the virtual disk is 1228 * opened */ 1229 if (!s->cid_updated) { 1230 ret = vmdk_write_cid(bs, time(NULL)); 1231 if (ret < 0) { 1232 return ret; 1233 } 1234 s->cid_updated = true; 1235 } 1236 } 1237 return 0; 1238 } 1239 1240 static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num, 1241 const uint8_t *buf, int nb_sectors) 1242 { 1243 int ret; 1244 BDRVVmdkState *s = bs->opaque; 1245 qemu_co_mutex_lock(&s->lock); 1246 ret = vmdk_write(bs, sector_num, buf, nb_sectors); 1247 qemu_co_mutex_unlock(&s->lock); 1248 return ret; 1249 } 1250 1251 1252 static int vmdk_create_extent(const char *filename, int64_t filesize, 1253 bool flat, bool compress) 1254 { 1255 int ret, i; 1256 int fd = 0; 1257 VMDK4Header header; 1258 uint32_t tmp, magic, grains, gd_size, gt_size, gt_count; 1259 1260 fd = qemu_open(filename, 1261 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 1262 0644); 1263 if (fd < 0) { 1264 return -errno; 1265 } 1266 if (flat) { 1267 ret = ftruncate(fd, filesize); 1268 if (ret < 0) { 1269 ret = -errno; 1270 } 1271 goto exit; 1272 } 1273 magic = cpu_to_be32(VMDK4_MAGIC); 1274 memset(&header, 0, sizeof(header)); 1275 header.version = 1; 1276 header.flags = 1277 3 | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0); 1278 header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0; 1279 header.capacity = filesize / 512; 1280 header.granularity = 128; 1281 header.num_gtes_per_gte = 512; 1282 1283 grains = (filesize / 512 + header.granularity - 1) / header.granularity; 1284 gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9; 1285 gt_count = 1286 (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte; 1287 gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9; 1288 1289 header.desc_offset = 1; 1290 header.desc_size = 20; 1291 header.rgd_offset = header.desc_offset + header.desc_size; 1292 header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count); 1293 header.grain_offset = 1294 ((header.gd_offset + gd_size + (gt_size * gt_count) + 1295 header.granularity - 1) / header.granularity) * 1296 header.granularity; 1297 /* swap endianness for all header fields */ 1298 header.version = cpu_to_le32(header.version); 1299 header.flags = cpu_to_le32(header.flags); 1300 header.capacity = cpu_to_le64(header.capacity); 1301 header.granularity = cpu_to_le64(header.granularity); 1302 header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte); 1303 header.desc_offset = cpu_to_le64(header.desc_offset); 1304 header.desc_size = cpu_to_le64(header.desc_size); 1305 header.rgd_offset = cpu_to_le64(header.rgd_offset); 1306 header.gd_offset = cpu_to_le64(header.gd_offset); 1307 header.grain_offset = cpu_to_le64(header.grain_offset); 1308 header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm); 1309 1310 header.check_bytes[0] = 0xa; 1311 header.check_bytes[1] = 0x20; 1312 header.check_bytes[2] = 0xd; 1313 header.check_bytes[3] = 0xa; 1314 1315 /* write all the data */ 1316 ret = qemu_write_full(fd, &magic, sizeof(magic)); 1317 if (ret != sizeof(magic)) { 1318 ret = -errno; 1319 goto exit; 1320 } 1321 ret = qemu_write_full(fd, &header, sizeof(header)); 1322 if (ret != sizeof(header)) { 1323 ret = -errno; 1324 goto exit; 1325 } 1326 1327 ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9); 1328 if (ret < 0) { 1329 ret = -errno; 1330 goto exit; 1331 } 1332 1333 /* write grain directory */ 1334 lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET); 1335 for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size; 1336 i < gt_count; i++, tmp += gt_size) { 1337 ret = qemu_write_full(fd, &tmp, sizeof(tmp)); 1338 if (ret != sizeof(tmp)) { 1339 ret = -errno; 1340 goto exit; 1341 } 1342 } 1343 1344 /* write backup grain directory */ 1345 lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET); 1346 for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size; 1347 i < gt_count; i++, tmp += gt_size) { 1348 ret = qemu_write_full(fd, &tmp, sizeof(tmp)); 1349 if (ret != sizeof(tmp)) { 1350 ret = -errno; 1351 goto exit; 1352 } 1353 } 1354 1355 ret = 0; 1356 exit: 1357 qemu_close(fd); 1358 return ret; 1359 } 1360 1361 static int filename_decompose(const char *filename, char *path, char *prefix, 1362 char *postfix, size_t buf_len) 1363 { 1364 const char *p, *q; 1365 1366 if (filename == NULL || !strlen(filename)) { 1367 fprintf(stderr, "Vmdk: no filename provided.\n"); 1368 return -1; 1369 } 1370 p = strrchr(filename, '/'); 1371 if (p == NULL) { 1372 p = strrchr(filename, '\\'); 1373 } 1374 if (p == NULL) { 1375 p = strrchr(filename, ':'); 1376 } 1377 if (p != NULL) { 1378 p++; 1379 if (p - filename >= buf_len) { 1380 return -1; 1381 } 1382 pstrcpy(path, p - filename + 1, filename); 1383 } else { 1384 p = filename; 1385 path[0] = '\0'; 1386 } 1387 q = strrchr(p, '.'); 1388 if (q == NULL) { 1389 pstrcpy(prefix, buf_len, p); 1390 postfix[0] = '\0'; 1391 } else { 1392 if (q - p >= buf_len) { 1393 return -1; 1394 } 1395 pstrcpy(prefix, q - p + 1, p); 1396 pstrcpy(postfix, buf_len, q); 1397 } 1398 return 0; 1399 } 1400 1401 static int relative_path(char *dest, int dest_size, 1402 const char *base, const char *target) 1403 { 1404 int i = 0; 1405 int n = 0; 1406 const char *p, *q; 1407 #ifdef _WIN32 1408 const char *sep = "\\"; 1409 #else 1410 const char *sep = "/"; 1411 #endif 1412 1413 if (!(dest && base && target)) { 1414 return -1; 1415 } 1416 if (path_is_absolute(target)) { 1417 pstrcpy(dest, dest_size, target); 1418 return 0; 1419 } 1420 while (base[i] == target[i]) { 1421 i++; 1422 } 1423 p = &base[i]; 1424 q = &target[i]; 1425 while (*p) { 1426 if (*p == *sep) { 1427 n++; 1428 } 1429 p++; 1430 } 1431 dest[0] = '\0'; 1432 for (; n; n--) { 1433 pstrcat(dest, dest_size, ".."); 1434 pstrcat(dest, dest_size, sep); 1435 } 1436 pstrcat(dest, dest_size, q); 1437 return 0; 1438 } 1439 1440 static int vmdk_create(const char *filename, QEMUOptionParameter *options) 1441 { 1442 int fd, idx = 0; 1443 char desc[BUF_SIZE]; 1444 int64_t total_size = 0, filesize; 1445 const char *backing_file = NULL; 1446 const char *fmt = NULL; 1447 int flags = 0; 1448 int ret = 0; 1449 bool flat, split, compress; 1450 char ext_desc_lines[BUF_SIZE] = ""; 1451 char path[PATH_MAX], prefix[PATH_MAX], postfix[PATH_MAX]; 1452 const int64_t split_size = 0x80000000; /* VMDK has constant split size */ 1453 const char *desc_extent_line; 1454 char parent_desc_line[BUF_SIZE] = ""; 1455 uint32_t parent_cid = 0xffffffff; 1456 const char desc_template[] = 1457 "# Disk DescriptorFile\n" 1458 "version=1\n" 1459 "CID=%x\n" 1460 "parentCID=%x\n" 1461 "createType=\"%s\"\n" 1462 "%s" 1463 "\n" 1464 "# Extent description\n" 1465 "%s" 1466 "\n" 1467 "# The Disk Data Base\n" 1468 "#DDB\n" 1469 "\n" 1470 "ddb.virtualHWVersion = \"%d\"\n" 1471 "ddb.geometry.cylinders = \"%" PRId64 "\"\n" 1472 "ddb.geometry.heads = \"16\"\n" 1473 "ddb.geometry.sectors = \"63\"\n" 1474 "ddb.adapterType = \"ide\"\n"; 1475 1476 if (filename_decompose(filename, path, prefix, postfix, PATH_MAX)) { 1477 return -EINVAL; 1478 } 1479 /* Read out options */ 1480 while (options && options->name) { 1481 if (!strcmp(options->name, BLOCK_OPT_SIZE)) { 1482 total_size = options->value.n; 1483 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { 1484 backing_file = options->value.s; 1485 } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) { 1486 flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0; 1487 } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) { 1488 fmt = options->value.s; 1489 } 1490 options++; 1491 } 1492 if (!fmt) { 1493 /* Default format to monolithicSparse */ 1494 fmt = "monolithicSparse"; 1495 } else if (strcmp(fmt, "monolithicFlat") && 1496 strcmp(fmt, "monolithicSparse") && 1497 strcmp(fmt, "twoGbMaxExtentSparse") && 1498 strcmp(fmt, "twoGbMaxExtentFlat") && 1499 strcmp(fmt, "streamOptimized")) { 1500 fprintf(stderr, "VMDK: Unknown subformat: %s\n", fmt); 1501 return -EINVAL; 1502 } 1503 split = !(strcmp(fmt, "twoGbMaxExtentFlat") && 1504 strcmp(fmt, "twoGbMaxExtentSparse")); 1505 flat = !(strcmp(fmt, "monolithicFlat") && 1506 strcmp(fmt, "twoGbMaxExtentFlat")); 1507 compress = !strcmp(fmt, "streamOptimized"); 1508 if (flat) { 1509 desc_extent_line = "RW %lld FLAT \"%s\" 0\n"; 1510 } else { 1511 desc_extent_line = "RW %lld SPARSE \"%s\"\n"; 1512 } 1513 if (flat && backing_file) { 1514 /* not supporting backing file for flat image */ 1515 return -ENOTSUP; 1516 } 1517 if (backing_file) { 1518 char parent_filename[PATH_MAX]; 1519 BlockDriverState *bs = bdrv_new(""); 1520 ret = bdrv_open(bs, backing_file, 0, NULL); 1521 if (ret != 0) { 1522 bdrv_delete(bs); 1523 return ret; 1524 } 1525 if (strcmp(bs->drv->format_name, "vmdk")) { 1526 bdrv_delete(bs); 1527 return -EINVAL; 1528 } 1529 parent_cid = vmdk_read_cid(bs, 0); 1530 bdrv_delete(bs); 1531 relative_path(parent_filename, sizeof(parent_filename), 1532 filename, backing_file); 1533 snprintf(parent_desc_line, sizeof(parent_desc_line), 1534 "parentFileNameHint=\"%s\"", parent_filename); 1535 } 1536 1537 /* Create extents */ 1538 filesize = total_size; 1539 while (filesize > 0) { 1540 char desc_line[BUF_SIZE]; 1541 char ext_filename[PATH_MAX]; 1542 char desc_filename[PATH_MAX]; 1543 int64_t size = filesize; 1544 1545 if (split && size > split_size) { 1546 size = split_size; 1547 } 1548 if (split) { 1549 snprintf(desc_filename, sizeof(desc_filename), "%s-%c%03d%s", 1550 prefix, flat ? 'f' : 's', ++idx, postfix); 1551 } else if (flat) { 1552 snprintf(desc_filename, sizeof(desc_filename), "%s-flat%s", 1553 prefix, postfix); 1554 } else { 1555 snprintf(desc_filename, sizeof(desc_filename), "%s%s", 1556 prefix, postfix); 1557 } 1558 snprintf(ext_filename, sizeof(ext_filename), "%s%s", 1559 path, desc_filename); 1560 1561 if (vmdk_create_extent(ext_filename, size, flat, compress)) { 1562 return -EINVAL; 1563 } 1564 filesize -= size; 1565 1566 /* Format description line */ 1567 snprintf(desc_line, sizeof(desc_line), 1568 desc_extent_line, size / 512, desc_filename); 1569 pstrcat(ext_desc_lines, sizeof(ext_desc_lines), desc_line); 1570 } 1571 /* generate descriptor file */ 1572 snprintf(desc, sizeof(desc), desc_template, 1573 (unsigned int)time(NULL), 1574 parent_cid, 1575 fmt, 1576 parent_desc_line, 1577 ext_desc_lines, 1578 (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4), 1579 total_size / (int64_t)(63 * 16 * 512)); 1580 if (split || flat) { 1581 fd = qemu_open(filename, 1582 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 1583 0644); 1584 } else { 1585 fd = qemu_open(filename, 1586 O_WRONLY | O_BINARY | O_LARGEFILE, 1587 0644); 1588 } 1589 if (fd < 0) { 1590 return -errno; 1591 } 1592 /* the descriptor offset = 0x200 */ 1593 if (!split && !flat && 0x200 != lseek(fd, 0x200, SEEK_SET)) { 1594 ret = -errno; 1595 goto exit; 1596 } 1597 ret = qemu_write_full(fd, desc, strlen(desc)); 1598 if (ret != strlen(desc)) { 1599 ret = -errno; 1600 goto exit; 1601 } 1602 ret = 0; 1603 exit: 1604 qemu_close(fd); 1605 return ret; 1606 } 1607 1608 static void vmdk_close(BlockDriverState *bs) 1609 { 1610 BDRVVmdkState *s = bs->opaque; 1611 1612 vmdk_free_extents(bs); 1613 1614 migrate_del_blocker(s->migration_blocker); 1615 error_free(s->migration_blocker); 1616 } 1617 1618 static coroutine_fn int vmdk_co_flush(BlockDriverState *bs) 1619 { 1620 BDRVVmdkState *s = bs->opaque; 1621 int i, err; 1622 int ret = 0; 1623 1624 for (i = 0; i < s->num_extents; i++) { 1625 err = bdrv_co_flush(s->extents[i].file); 1626 if (err < 0) { 1627 ret = err; 1628 } 1629 } 1630 return ret; 1631 } 1632 1633 static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs) 1634 { 1635 int i; 1636 int64_t ret = 0; 1637 int64_t r; 1638 BDRVVmdkState *s = bs->opaque; 1639 1640 ret = bdrv_get_allocated_file_size(bs->file); 1641 if (ret < 0) { 1642 return ret; 1643 } 1644 for (i = 0; i < s->num_extents; i++) { 1645 if (s->extents[i].file == bs->file) { 1646 continue; 1647 } 1648 r = bdrv_get_allocated_file_size(s->extents[i].file); 1649 if (r < 0) { 1650 return r; 1651 } 1652 ret += r; 1653 } 1654 return ret; 1655 } 1656 1657 static QEMUOptionParameter vmdk_create_options[] = { 1658 { 1659 .name = BLOCK_OPT_SIZE, 1660 .type = OPT_SIZE, 1661 .help = "Virtual disk size" 1662 }, 1663 { 1664 .name = BLOCK_OPT_BACKING_FILE, 1665 .type = OPT_STRING, 1666 .help = "File name of a base image" 1667 }, 1668 { 1669 .name = BLOCK_OPT_COMPAT6, 1670 .type = OPT_FLAG, 1671 .help = "VMDK version 6 image" 1672 }, 1673 { 1674 .name = BLOCK_OPT_SUBFMT, 1675 .type = OPT_STRING, 1676 .help = 1677 "VMDK flat extent format, can be one of " 1678 "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} " 1679 }, 1680 { NULL } 1681 }; 1682 1683 static BlockDriver bdrv_vmdk = { 1684 .format_name = "vmdk", 1685 .instance_size = sizeof(BDRVVmdkState), 1686 .bdrv_probe = vmdk_probe, 1687 .bdrv_open = vmdk_open, 1688 .bdrv_reopen_prepare = vmdk_reopen_prepare, 1689 .bdrv_read = vmdk_co_read, 1690 .bdrv_write = vmdk_co_write, 1691 .bdrv_close = vmdk_close, 1692 .bdrv_create = vmdk_create, 1693 .bdrv_co_flush_to_disk = vmdk_co_flush, 1694 .bdrv_co_is_allocated = vmdk_co_is_allocated, 1695 .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, 1696 1697 .create_options = vmdk_create_options, 1698 }; 1699 1700 static void bdrv_vmdk_init(void) 1701 { 1702 bdrv_register(&bdrv_vmdk); 1703 } 1704 1705 block_init(bdrv_vmdk_init); 1706