1 /* 2 * Block driver for the VMDK format 3 * 4 * Copyright (c) 2004 Fabrice Bellard 5 * Copyright (c) 2005 Filip Navara 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu-common.h" 27 #include "block_int.h" 28 #include "module.h" 29 30 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D') 31 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V') 32 33 typedef struct { 34 uint32_t version; 35 uint32_t flags; 36 uint32_t disk_sectors; 37 uint32_t granularity; 38 uint32_t l1dir_offset; 39 uint32_t l1dir_size; 40 uint32_t file_sectors; 41 uint32_t cylinders; 42 uint32_t heads; 43 uint32_t sectors_per_track; 44 } VMDK3Header; 45 46 typedef struct { 47 uint32_t version; 48 uint32_t flags; 49 int64_t capacity; 50 int64_t granularity; 51 int64_t desc_offset; 52 int64_t desc_size; 53 int32_t num_gtes_per_gte; 54 int64_t rgd_offset; 55 int64_t gd_offset; 56 int64_t grain_offset; 57 char filler[1]; 58 char check_bytes[4]; 59 } __attribute__((packed)) VMDK4Header; 60 61 #define L2_CACHE_SIZE 16 62 63 typedef struct BDRVVmdkState { 64 int64_t l1_table_offset; 65 int64_t l1_backup_table_offset; 66 uint32_t *l1_table; 67 uint32_t *l1_backup_table; 68 unsigned int l1_size; 69 uint32_t l1_entry_sectors; 70 71 unsigned int l2_size; 72 uint32_t *l2_cache; 73 uint32_t l2_cache_offsets[L2_CACHE_SIZE]; 74 uint32_t l2_cache_counts[L2_CACHE_SIZE]; 75 76 unsigned int cluster_sectors; 77 uint32_t parent_cid; 78 } BDRVVmdkState; 79 80 typedef struct VmdkMetaData { 81 uint32_t offset; 82 unsigned int l1_index; 83 unsigned int l2_index; 84 unsigned int l2_offset; 85 int valid; 86 } VmdkMetaData; 87 88 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename) 89 { 90 uint32_t magic; 91 92 if (buf_size < 4) 93 return 0; 94 magic = be32_to_cpu(*(uint32_t *)buf); 95 if (magic == VMDK3_MAGIC || 96 magic == VMDK4_MAGIC) 97 return 100; 98 else 99 return 0; 100 } 101 102 #define CHECK_CID 1 103 104 #define SECTOR_SIZE 512 105 #define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each 106 #define HEADER_SIZE 512 // first sector of 512 bytes 107 108 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) 109 { 110 char desc[DESC_SIZE]; 111 uint32_t cid; 112 const char *p_name, *cid_str; 113 size_t cid_str_size; 114 115 /* the descriptor offset = 0x200 */ 116 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE) 117 return 0; 118 119 if (parent) { 120 cid_str = "parentCID"; 121 cid_str_size = sizeof("parentCID"); 122 } else { 123 cid_str = "CID"; 124 cid_str_size = sizeof("CID"); 125 } 126 127 if ((p_name = strstr(desc,cid_str)) != NULL) { 128 p_name += cid_str_size; 129 sscanf(p_name,"%x",&cid); 130 } 131 132 return cid; 133 } 134 135 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) 136 { 137 char desc[DESC_SIZE], tmp_desc[DESC_SIZE]; 138 char *p_name, *tmp_str; 139 140 /* the descriptor offset = 0x200 */ 141 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE) 142 return -1; 143 144 tmp_str = strstr(desc,"parentCID"); 145 pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str); 146 if ((p_name = strstr(desc,"CID")) != NULL) { 147 p_name += sizeof("CID"); 148 snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid); 149 pstrcat(desc, sizeof(desc), tmp_desc); 150 } 151 152 if (bdrv_pwrite_sync(bs->file, 0x200, desc, DESC_SIZE) < 0) 153 return -1; 154 return 0; 155 } 156 157 static int vmdk_is_cid_valid(BlockDriverState *bs) 158 { 159 #ifdef CHECK_CID 160 BDRVVmdkState *s = bs->opaque; 161 BlockDriverState *p_bs = bs->backing_hd; 162 uint32_t cur_pcid; 163 164 if (p_bs) { 165 cur_pcid = vmdk_read_cid(p_bs,0); 166 if (s->parent_cid != cur_pcid) 167 // CID not valid 168 return 0; 169 } 170 #endif 171 // CID valid 172 return 1; 173 } 174 175 static int vmdk_snapshot_create(const char *filename, const char *backing_file) 176 { 177 int snp_fd, p_fd; 178 int ret; 179 uint32_t p_cid; 180 char *p_name, *gd_buf, *rgd_buf; 181 const char *real_filename, *temp_str; 182 VMDK4Header header; 183 uint32_t gde_entries, gd_size; 184 int64_t gd_offset, rgd_offset, capacity, gt_size; 185 char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE]; 186 static const char desc_template[] = 187 "# Disk DescriptorFile\n" 188 "version=1\n" 189 "CID=%x\n" 190 "parentCID=%x\n" 191 "createType=\"monolithicSparse\"\n" 192 "parentFileNameHint=\"%s\"\n" 193 "\n" 194 "# Extent description\n" 195 "RW %u SPARSE \"%s\"\n" 196 "\n" 197 "# The Disk Data Base \n" 198 "#DDB\n" 199 "\n"; 200 201 snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644); 202 if (snp_fd < 0) 203 return -errno; 204 p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE); 205 if (p_fd < 0) { 206 close(snp_fd); 207 return -errno; 208 } 209 210 /* read the header */ 211 if (lseek(p_fd, 0x0, SEEK_SET) == -1) { 212 ret = -errno; 213 goto fail; 214 } 215 if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) { 216 ret = -errno; 217 goto fail; 218 } 219 220 /* write the header */ 221 if (lseek(snp_fd, 0x0, SEEK_SET) == -1) { 222 ret = -errno; 223 goto fail; 224 } 225 if (write(snp_fd, hdr, HEADER_SIZE) == -1) { 226 ret = -errno; 227 goto fail; 228 } 229 230 memset(&header, 0, sizeof(header)); 231 memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC 232 233 if (ftruncate(snp_fd, header.grain_offset << 9)) { 234 ret = -errno; 235 goto fail; 236 } 237 /* the descriptor offset = 0x200 */ 238 if (lseek(p_fd, 0x200, SEEK_SET) == -1) { 239 ret = -errno; 240 goto fail; 241 } 242 if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) { 243 ret = -errno; 244 goto fail; 245 } 246 247 if ((p_name = strstr(p_desc,"CID")) != NULL) { 248 p_name += sizeof("CID"); 249 sscanf(p_name,"%x",&p_cid); 250 } 251 252 real_filename = filename; 253 if ((temp_str = strrchr(real_filename, '\\')) != NULL) 254 real_filename = temp_str + 1; 255 if ((temp_str = strrchr(real_filename, '/')) != NULL) 256 real_filename = temp_str + 1; 257 if ((temp_str = strrchr(real_filename, ':')) != NULL) 258 real_filename = temp_str + 1; 259 260 snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file, 261 (uint32_t)header.capacity, real_filename); 262 263 /* write the descriptor */ 264 if (lseek(snp_fd, 0x200, SEEK_SET) == -1) { 265 ret = -errno; 266 goto fail; 267 } 268 if (write(snp_fd, s_desc, strlen(s_desc)) == -1) { 269 ret = -errno; 270 goto fail; 271 } 272 273 gd_offset = header.gd_offset * SECTOR_SIZE; // offset of GD table 274 rgd_offset = header.rgd_offset * SECTOR_SIZE; // offset of RGD table 275 capacity = header.capacity * SECTOR_SIZE; // Extent size 276 /* 277 * Each GDE span 32M disk, means: 278 * 512 GTE per GT, each GTE points to grain 279 */ 280 gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE; 281 if (!gt_size) { 282 ret = -EINVAL; 283 goto fail; 284 } 285 gde_entries = (uint32_t)(capacity / gt_size); // number of gde/rgde 286 gd_size = gde_entries * sizeof(uint32_t); 287 288 /* write RGD */ 289 rgd_buf = qemu_malloc(gd_size); 290 if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) { 291 ret = -errno; 292 goto fail_rgd; 293 } 294 if (read(p_fd, rgd_buf, gd_size) != gd_size) { 295 ret = -errno; 296 goto fail_rgd; 297 } 298 if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1) { 299 ret = -errno; 300 goto fail_rgd; 301 } 302 if (write(snp_fd, rgd_buf, gd_size) == -1) { 303 ret = -errno; 304 goto fail_rgd; 305 } 306 307 /* write GD */ 308 gd_buf = qemu_malloc(gd_size); 309 if (lseek(p_fd, gd_offset, SEEK_SET) == -1) { 310 ret = -errno; 311 goto fail_gd; 312 } 313 if (read(p_fd, gd_buf, gd_size) != gd_size) { 314 ret = -errno; 315 goto fail_gd; 316 } 317 if (lseek(snp_fd, gd_offset, SEEK_SET) == -1) { 318 ret = -errno; 319 goto fail_gd; 320 } 321 if (write(snp_fd, gd_buf, gd_size) == -1) { 322 ret = -errno; 323 goto fail_gd; 324 } 325 ret = 0; 326 327 fail_gd: 328 qemu_free(gd_buf); 329 fail_rgd: 330 qemu_free(rgd_buf); 331 fail: 332 close(p_fd); 333 close(snp_fd); 334 return ret; 335 } 336 337 static int vmdk_parent_open(BlockDriverState *bs) 338 { 339 char *p_name; 340 char desc[DESC_SIZE]; 341 342 /* the descriptor offset = 0x200 */ 343 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE) 344 return -1; 345 346 if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) { 347 char *end_name; 348 349 p_name += sizeof("parentFileNameHint") + 1; 350 if ((end_name = strchr(p_name,'\"')) == NULL) 351 return -1; 352 if ((end_name - p_name) > sizeof (bs->backing_file) - 1) 353 return -1; 354 355 pstrcpy(bs->backing_file, end_name - p_name + 1, p_name); 356 } 357 358 return 0; 359 } 360 361 static int vmdk_open(BlockDriverState *bs, int flags) 362 { 363 BDRVVmdkState *s = bs->opaque; 364 uint32_t magic; 365 int l1_size, i; 366 367 if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic)) 368 goto fail; 369 370 magic = be32_to_cpu(magic); 371 if (magic == VMDK3_MAGIC) { 372 VMDK3Header header; 373 374 if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header)) 375 goto fail; 376 s->cluster_sectors = le32_to_cpu(header.granularity); 377 s->l2_size = 1 << 9; 378 s->l1_size = 1 << 6; 379 bs->total_sectors = le32_to_cpu(header.disk_sectors); 380 s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9; 381 s->l1_backup_table_offset = 0; 382 s->l1_entry_sectors = s->l2_size * s->cluster_sectors; 383 } else if (magic == VMDK4_MAGIC) { 384 VMDK4Header header; 385 386 if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header)) 387 goto fail; 388 bs->total_sectors = le64_to_cpu(header.capacity); 389 s->cluster_sectors = le64_to_cpu(header.granularity); 390 s->l2_size = le32_to_cpu(header.num_gtes_per_gte); 391 s->l1_entry_sectors = s->l2_size * s->cluster_sectors; 392 if (s->l1_entry_sectors <= 0) 393 goto fail; 394 s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1) 395 / s->l1_entry_sectors; 396 s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9; 397 s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9; 398 399 // try to open parent images, if exist 400 if (vmdk_parent_open(bs) != 0) 401 goto fail; 402 // write the CID once after the image creation 403 s->parent_cid = vmdk_read_cid(bs,1); 404 } else { 405 goto fail; 406 } 407 408 /* read the L1 table */ 409 l1_size = s->l1_size * sizeof(uint32_t); 410 s->l1_table = qemu_malloc(l1_size); 411 if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, l1_size) != l1_size) 412 goto fail; 413 for(i = 0; i < s->l1_size; i++) { 414 le32_to_cpus(&s->l1_table[i]); 415 } 416 417 if (s->l1_backup_table_offset) { 418 s->l1_backup_table = qemu_malloc(l1_size); 419 if (bdrv_pread(bs->file, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size) 420 goto fail; 421 for(i = 0; i < s->l1_size; i++) { 422 le32_to_cpus(&s->l1_backup_table[i]); 423 } 424 } 425 426 s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t)); 427 return 0; 428 fail: 429 qemu_free(s->l1_backup_table); 430 qemu_free(s->l1_table); 431 qemu_free(s->l2_cache); 432 return -1; 433 } 434 435 static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data, 436 uint64_t offset, int allocate); 437 438 static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset, 439 uint64_t offset, int allocate) 440 { 441 BDRVVmdkState *s = bs->opaque; 442 uint8_t whole_grain[s->cluster_sectors*512]; // 128 sectors * 512 bytes each = grain size 64KB 443 444 // we will be here if it's first write on non-exist grain(cluster). 445 // try to read from parent image, if exist 446 if (bs->backing_hd) { 447 int ret; 448 449 if (!vmdk_is_cid_valid(bs)) 450 return -1; 451 452 ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain, 453 s->cluster_sectors); 454 if (ret < 0) { 455 return -1; 456 } 457 458 //Write grain only into the active image 459 ret = bdrv_write(bs->file, cluster_offset, whole_grain, 460 s->cluster_sectors); 461 if (ret < 0) { 462 return -1; 463 } 464 } 465 return 0; 466 } 467 468 static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data) 469 { 470 BDRVVmdkState *s = bs->opaque; 471 472 /* update L2 table */ 473 if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), 474 &(m_data->offset), sizeof(m_data->offset)) < 0) 475 return -1; 476 /* update backup L2 table */ 477 if (s->l1_backup_table_offset != 0) { 478 m_data->l2_offset = s->l1_backup_table[m_data->l1_index]; 479 if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), 480 &(m_data->offset), sizeof(m_data->offset)) < 0) 481 return -1; 482 } 483 484 return 0; 485 } 486 487 static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data, 488 uint64_t offset, int allocate) 489 { 490 BDRVVmdkState *s = bs->opaque; 491 unsigned int l1_index, l2_offset, l2_index; 492 int min_index, i, j; 493 uint32_t min_count, *l2_table, tmp = 0; 494 uint64_t cluster_offset; 495 496 if (m_data) 497 m_data->valid = 0; 498 499 l1_index = (offset >> 9) / s->l1_entry_sectors; 500 if (l1_index >= s->l1_size) 501 return 0; 502 l2_offset = s->l1_table[l1_index]; 503 if (!l2_offset) 504 return 0; 505 for(i = 0; i < L2_CACHE_SIZE; i++) { 506 if (l2_offset == s->l2_cache_offsets[i]) { 507 /* increment the hit count */ 508 if (++s->l2_cache_counts[i] == 0xffffffff) { 509 for(j = 0; j < L2_CACHE_SIZE; j++) { 510 s->l2_cache_counts[j] >>= 1; 511 } 512 } 513 l2_table = s->l2_cache + (i * s->l2_size); 514 goto found; 515 } 516 } 517 /* not found: load a new entry in the least used one */ 518 min_index = 0; 519 min_count = 0xffffffff; 520 for(i = 0; i < L2_CACHE_SIZE; i++) { 521 if (s->l2_cache_counts[i] < min_count) { 522 min_count = s->l2_cache_counts[i]; 523 min_index = i; 524 } 525 } 526 l2_table = s->l2_cache + (min_index * s->l2_size); 527 if (bdrv_pread(bs->file, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) != 528 s->l2_size * sizeof(uint32_t)) 529 return 0; 530 531 s->l2_cache_offsets[min_index] = l2_offset; 532 s->l2_cache_counts[min_index] = 1; 533 found: 534 l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size; 535 cluster_offset = le32_to_cpu(l2_table[l2_index]); 536 537 if (!cluster_offset) { 538 if (!allocate) 539 return 0; 540 541 // Avoid the L2 tables update for the images that have snapshots. 542 cluster_offset = bdrv_getlength(bs->file); 543 bdrv_truncate(bs->file, cluster_offset + (s->cluster_sectors << 9)); 544 545 cluster_offset >>= 9; 546 tmp = cpu_to_le32(cluster_offset); 547 l2_table[l2_index] = tmp; 548 549 /* First of all we write grain itself, to avoid race condition 550 * that may to corrupt the image. 551 * This problem may occur because of insufficient space on host disk 552 * or inappropriate VM shutdown. 553 */ 554 if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1) 555 return 0; 556 557 if (m_data) { 558 m_data->offset = tmp; 559 m_data->l1_index = l1_index; 560 m_data->l2_index = l2_index; 561 m_data->l2_offset = l2_offset; 562 m_data->valid = 1; 563 } 564 } 565 cluster_offset <<= 9; 566 return cluster_offset; 567 } 568 569 static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num, 570 int nb_sectors, int *pnum) 571 { 572 BDRVVmdkState *s = bs->opaque; 573 int index_in_cluster, n; 574 uint64_t cluster_offset; 575 576 cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0); 577 index_in_cluster = sector_num % s->cluster_sectors; 578 n = s->cluster_sectors - index_in_cluster; 579 if (n > nb_sectors) 580 n = nb_sectors; 581 *pnum = n; 582 return (cluster_offset != 0); 583 } 584 585 static int vmdk_read(BlockDriverState *bs, int64_t sector_num, 586 uint8_t *buf, int nb_sectors) 587 { 588 BDRVVmdkState *s = bs->opaque; 589 int index_in_cluster, n, ret; 590 uint64_t cluster_offset; 591 592 while (nb_sectors > 0) { 593 cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0); 594 index_in_cluster = sector_num % s->cluster_sectors; 595 n = s->cluster_sectors - index_in_cluster; 596 if (n > nb_sectors) 597 n = nb_sectors; 598 if (!cluster_offset) { 599 // try to read from parent image, if exist 600 if (bs->backing_hd) { 601 if (!vmdk_is_cid_valid(bs)) 602 return -1; 603 ret = bdrv_read(bs->backing_hd, sector_num, buf, n); 604 if (ret < 0) 605 return -1; 606 } else { 607 memset(buf, 0, 512 * n); 608 } 609 } else { 610 if(bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512) 611 return -1; 612 } 613 nb_sectors -= n; 614 sector_num += n; 615 buf += n * 512; 616 } 617 return 0; 618 } 619 620 static int vmdk_write(BlockDriverState *bs, int64_t sector_num, 621 const uint8_t *buf, int nb_sectors) 622 { 623 BDRVVmdkState *s = bs->opaque; 624 VmdkMetaData m_data; 625 int index_in_cluster, n; 626 uint64_t cluster_offset; 627 static int cid_update = 0; 628 629 if (sector_num > bs->total_sectors) { 630 fprintf(stderr, 631 "(VMDK) Wrong offset: sector_num=0x%" PRIx64 632 " total_sectors=0x%" PRIx64 "\n", 633 sector_num, bs->total_sectors); 634 return -1; 635 } 636 637 while (nb_sectors > 0) { 638 index_in_cluster = sector_num & (s->cluster_sectors - 1); 639 n = s->cluster_sectors - index_in_cluster; 640 if (n > nb_sectors) 641 n = nb_sectors; 642 cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1); 643 if (!cluster_offset) 644 return -1; 645 646 if (bdrv_pwrite(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512) 647 return -1; 648 if (m_data.valid) { 649 /* update L2 tables */ 650 if (vmdk_L2update(bs, &m_data) == -1) 651 return -1; 652 } 653 nb_sectors -= n; 654 sector_num += n; 655 buf += n * 512; 656 657 // update CID on the first write every time the virtual disk is opened 658 if (!cid_update) { 659 vmdk_write_cid(bs, time(NULL)); 660 cid_update++; 661 } 662 } 663 return 0; 664 } 665 666 static int vmdk_create(const char *filename, QEMUOptionParameter *options) 667 { 668 int fd, i; 669 VMDK4Header header; 670 uint32_t tmp, magic, grains, gd_size, gt_size, gt_count; 671 static const char desc_template[] = 672 "# Disk DescriptorFile\n" 673 "version=1\n" 674 "CID=%x\n" 675 "parentCID=ffffffff\n" 676 "createType=\"monolithicSparse\"\n" 677 "\n" 678 "# Extent description\n" 679 "RW %" PRId64 " SPARSE \"%s\"\n" 680 "\n" 681 "# The Disk Data Base \n" 682 "#DDB\n" 683 "\n" 684 "ddb.virtualHWVersion = \"%d\"\n" 685 "ddb.geometry.cylinders = \"%" PRId64 "\"\n" 686 "ddb.geometry.heads = \"16\"\n" 687 "ddb.geometry.sectors = \"63\"\n" 688 "ddb.adapterType = \"ide\"\n"; 689 char desc[1024]; 690 const char *real_filename, *temp_str; 691 int64_t total_size = 0; 692 const char *backing_file = NULL; 693 int flags = 0; 694 int ret; 695 696 // Read out options 697 while (options && options->name) { 698 if (!strcmp(options->name, BLOCK_OPT_SIZE)) { 699 total_size = options->value.n / 512; 700 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { 701 backing_file = options->value.s; 702 } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) { 703 flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0; 704 } 705 options++; 706 } 707 708 /* XXX: add support for backing file */ 709 if (backing_file) { 710 return vmdk_snapshot_create(filename, backing_file); 711 } 712 713 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 714 0644); 715 if (fd < 0) 716 return -errno; 717 magic = cpu_to_be32(VMDK4_MAGIC); 718 memset(&header, 0, sizeof(header)); 719 header.version = 1; 720 header.flags = 3; /* ?? */ 721 header.capacity = total_size; 722 header.granularity = 128; 723 header.num_gtes_per_gte = 512; 724 725 grains = (total_size + header.granularity - 1) / header.granularity; 726 gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9; 727 gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte; 728 gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9; 729 730 header.desc_offset = 1; 731 header.desc_size = 20; 732 header.rgd_offset = header.desc_offset + header.desc_size; 733 header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count); 734 header.grain_offset = 735 ((header.gd_offset + gd_size + (gt_size * gt_count) + 736 header.granularity - 1) / header.granularity) * 737 header.granularity; 738 739 /* swap endianness for all header fields */ 740 header.version = cpu_to_le32(header.version); 741 header.flags = cpu_to_le32(header.flags); 742 header.capacity = cpu_to_le64(header.capacity); 743 header.granularity = cpu_to_le64(header.granularity); 744 header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte); 745 header.desc_offset = cpu_to_le64(header.desc_offset); 746 header.desc_size = cpu_to_le64(header.desc_size); 747 header.rgd_offset = cpu_to_le64(header.rgd_offset); 748 header.gd_offset = cpu_to_le64(header.gd_offset); 749 header.grain_offset = cpu_to_le64(header.grain_offset); 750 751 header.check_bytes[0] = 0xa; 752 header.check_bytes[1] = 0x20; 753 header.check_bytes[2] = 0xd; 754 header.check_bytes[3] = 0xa; 755 756 /* write all the data */ 757 ret = qemu_write_full(fd, &magic, sizeof(magic)); 758 if (ret != sizeof(magic)) { 759 ret = -errno; 760 goto exit; 761 } 762 ret = qemu_write_full(fd, &header, sizeof(header)); 763 if (ret != sizeof(header)) { 764 ret = -errno; 765 goto exit; 766 } 767 768 ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9); 769 if (ret < 0) { 770 ret = -errno; 771 goto exit; 772 } 773 774 /* write grain directory */ 775 lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET); 776 for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size; 777 i < gt_count; i++, tmp += gt_size) { 778 ret = qemu_write_full(fd, &tmp, sizeof(tmp)); 779 if (ret != sizeof(tmp)) { 780 ret = -errno; 781 goto exit; 782 } 783 } 784 785 /* write backup grain directory */ 786 lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET); 787 for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size; 788 i < gt_count; i++, tmp += gt_size) { 789 ret = qemu_write_full(fd, &tmp, sizeof(tmp)); 790 if (ret != sizeof(tmp)) { 791 ret = -errno; 792 goto exit; 793 } 794 } 795 796 /* compose the descriptor */ 797 real_filename = filename; 798 if ((temp_str = strrchr(real_filename, '\\')) != NULL) 799 real_filename = temp_str + 1; 800 if ((temp_str = strrchr(real_filename, '/')) != NULL) 801 real_filename = temp_str + 1; 802 if ((temp_str = strrchr(real_filename, ':')) != NULL) 803 real_filename = temp_str + 1; 804 snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL), 805 total_size, real_filename, 806 (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4), 807 total_size / (int64_t)(63 * 16)); 808 809 /* write the descriptor */ 810 lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET); 811 ret = qemu_write_full(fd, desc, strlen(desc)); 812 if (ret != strlen(desc)) { 813 ret = -errno; 814 goto exit; 815 } 816 817 ret = 0; 818 exit: 819 close(fd); 820 return ret; 821 } 822 823 static void vmdk_close(BlockDriverState *bs) 824 { 825 BDRVVmdkState *s = bs->opaque; 826 827 qemu_free(s->l1_table); 828 qemu_free(s->l2_cache); 829 } 830 831 static int vmdk_flush(BlockDriverState *bs) 832 { 833 return bdrv_flush(bs->file); 834 } 835 836 837 static QEMUOptionParameter vmdk_create_options[] = { 838 { 839 .name = BLOCK_OPT_SIZE, 840 .type = OPT_SIZE, 841 .help = "Virtual disk size" 842 }, 843 { 844 .name = BLOCK_OPT_BACKING_FILE, 845 .type = OPT_STRING, 846 .help = "File name of a base image" 847 }, 848 { 849 .name = BLOCK_OPT_COMPAT6, 850 .type = OPT_FLAG, 851 .help = "VMDK version 6 image" 852 }, 853 { NULL } 854 }; 855 856 static BlockDriver bdrv_vmdk = { 857 .format_name = "vmdk", 858 .instance_size = sizeof(BDRVVmdkState), 859 .bdrv_probe = vmdk_probe, 860 .bdrv_open = vmdk_open, 861 .bdrv_read = vmdk_read, 862 .bdrv_write = vmdk_write, 863 .bdrv_close = vmdk_close, 864 .bdrv_create = vmdk_create, 865 .bdrv_flush = vmdk_flush, 866 .bdrv_is_allocated = vmdk_is_allocated, 867 868 .create_options = vmdk_create_options, 869 }; 870 871 static void bdrv_vmdk_init(void) 872 { 873 bdrv_register(&bdrv_vmdk); 874 } 875 876 block_init(bdrv_vmdk_init); 877