1 /* 2 * Block driver for the VMDK format 3 * 4 * Copyright (c) 2004 Fabrice Bellard 5 * Copyright (c) 2005 Filip Navara 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu-common.h" 27 #include "block_int.h" 28 #include "module.h" 29 30 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D') 31 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V') 32 33 typedef struct { 34 uint32_t version; 35 uint32_t flags; 36 uint32_t disk_sectors; 37 uint32_t granularity; 38 uint32_t l1dir_offset; 39 uint32_t l1dir_size; 40 uint32_t file_sectors; 41 uint32_t cylinders; 42 uint32_t heads; 43 uint32_t sectors_per_track; 44 } VMDK3Header; 45 46 typedef struct { 47 uint32_t version; 48 uint32_t flags; 49 int64_t capacity; 50 int64_t granularity; 51 int64_t desc_offset; 52 int64_t desc_size; 53 int32_t num_gtes_per_gte; 54 int64_t rgd_offset; 55 int64_t gd_offset; 56 int64_t grain_offset; 57 char filler[1]; 58 char check_bytes[4]; 59 } __attribute__((packed)) VMDK4Header; 60 61 #define L2_CACHE_SIZE 16 62 63 typedef struct BDRVVmdkState { 64 BlockDriverState *hd; 65 int64_t l1_table_offset; 66 int64_t l1_backup_table_offset; 67 uint32_t *l1_table; 68 uint32_t *l1_backup_table; 69 unsigned int l1_size; 70 uint32_t l1_entry_sectors; 71 72 unsigned int l2_size; 73 uint32_t *l2_cache; 74 uint32_t l2_cache_offsets[L2_CACHE_SIZE]; 75 uint32_t l2_cache_counts[L2_CACHE_SIZE]; 76 77 unsigned int cluster_sectors; 78 uint32_t parent_cid; 79 int is_parent; 80 } BDRVVmdkState; 81 82 typedef struct VmdkMetaData { 83 uint32_t offset; 84 unsigned int l1_index; 85 unsigned int l2_index; 86 unsigned int l2_offset; 87 int valid; 88 } VmdkMetaData; 89 90 typedef struct ActiveBDRVState{ 91 BlockDriverState *hd; // active image handler 92 uint64_t cluster_offset; // current write offset 93 }ActiveBDRVState; 94 95 static ActiveBDRVState activeBDRV; 96 97 98 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename) 99 { 100 uint32_t magic; 101 102 if (buf_size < 4) 103 return 0; 104 magic = be32_to_cpu(*(uint32_t *)buf); 105 if (magic == VMDK3_MAGIC || 106 magic == VMDK4_MAGIC) 107 return 100; 108 else 109 return 0; 110 } 111 112 #define CHECK_CID 1 113 114 #define SECTOR_SIZE 512 115 #define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each 116 #define HEADER_SIZE 512 // first sector of 512 bytes 117 118 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) 119 { 120 BDRVVmdkState *s = bs->opaque; 121 char desc[DESC_SIZE]; 122 uint32_t cid; 123 const char *p_name, *cid_str; 124 size_t cid_str_size; 125 126 /* the descriptor offset = 0x200 */ 127 if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE) 128 return 0; 129 130 if (parent) { 131 cid_str = "parentCID"; 132 cid_str_size = sizeof("parentCID"); 133 } else { 134 cid_str = "CID"; 135 cid_str_size = sizeof("CID"); 136 } 137 138 if ((p_name = strstr(desc,cid_str)) != NULL) { 139 p_name += cid_str_size; 140 sscanf(p_name,"%x",&cid); 141 } 142 143 return cid; 144 } 145 146 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) 147 { 148 BDRVVmdkState *s = bs->opaque; 149 char desc[DESC_SIZE], tmp_desc[DESC_SIZE]; 150 char *p_name, *tmp_str; 151 152 /* the descriptor offset = 0x200 */ 153 if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE) 154 return -1; 155 156 tmp_str = strstr(desc,"parentCID"); 157 pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str); 158 if ((p_name = strstr(desc,"CID")) != NULL) { 159 p_name += sizeof("CID"); 160 snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid); 161 pstrcat(desc, sizeof(desc), tmp_desc); 162 } 163 164 if (bdrv_pwrite(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE) 165 return -1; 166 return 0; 167 } 168 169 static int vmdk_is_cid_valid(BlockDriverState *bs) 170 { 171 #ifdef CHECK_CID 172 BDRVVmdkState *s = bs->opaque; 173 BlockDriverState *p_bs = bs->backing_hd; 174 uint32_t cur_pcid; 175 176 if (p_bs) { 177 cur_pcid = vmdk_read_cid(p_bs,0); 178 if (s->parent_cid != cur_pcid) 179 // CID not valid 180 return 0; 181 } 182 #endif 183 // CID valid 184 return 1; 185 } 186 187 static int vmdk_snapshot_create(const char *filename, const char *backing_file) 188 { 189 int snp_fd, p_fd; 190 int ret; 191 uint32_t p_cid; 192 char *p_name, *gd_buf, *rgd_buf; 193 const char *real_filename, *temp_str; 194 VMDK4Header header; 195 uint32_t gde_entries, gd_size; 196 int64_t gd_offset, rgd_offset, capacity, gt_size; 197 char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE]; 198 static const char desc_template[] = 199 "# Disk DescriptorFile\n" 200 "version=1\n" 201 "CID=%x\n" 202 "parentCID=%x\n" 203 "createType=\"monolithicSparse\"\n" 204 "parentFileNameHint=\"%s\"\n" 205 "\n" 206 "# Extent description\n" 207 "RW %u SPARSE \"%s\"\n" 208 "\n" 209 "# The Disk Data Base \n" 210 "#DDB\n" 211 "\n"; 212 213 snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644); 214 if (snp_fd < 0) 215 return -errno; 216 p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE); 217 if (p_fd < 0) { 218 close(snp_fd); 219 return -errno; 220 } 221 222 /* read the header */ 223 if (lseek(p_fd, 0x0, SEEK_SET) == -1) { 224 ret = -errno; 225 goto fail; 226 } 227 if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) { 228 ret = -errno; 229 goto fail; 230 } 231 232 /* write the header */ 233 if (lseek(snp_fd, 0x0, SEEK_SET) == -1) { 234 ret = -errno; 235 goto fail; 236 } 237 if (write(snp_fd, hdr, HEADER_SIZE) == -1) { 238 ret = -errno; 239 goto fail; 240 } 241 242 memset(&header, 0, sizeof(header)); 243 memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC 244 245 if (ftruncate(snp_fd, header.grain_offset << 9)) { 246 ret = -errno; 247 goto fail; 248 } 249 /* the descriptor offset = 0x200 */ 250 if (lseek(p_fd, 0x200, SEEK_SET) == -1) { 251 ret = -errno; 252 goto fail; 253 } 254 if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) { 255 ret = -errno; 256 goto fail; 257 } 258 259 if ((p_name = strstr(p_desc,"CID")) != NULL) { 260 p_name += sizeof("CID"); 261 sscanf(p_name,"%x",&p_cid); 262 } 263 264 real_filename = filename; 265 if ((temp_str = strrchr(real_filename, '\\')) != NULL) 266 real_filename = temp_str + 1; 267 if ((temp_str = strrchr(real_filename, '/')) != NULL) 268 real_filename = temp_str + 1; 269 if ((temp_str = strrchr(real_filename, ':')) != NULL) 270 real_filename = temp_str + 1; 271 272 snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file, 273 (uint32_t)header.capacity, real_filename); 274 275 /* write the descriptor */ 276 if (lseek(snp_fd, 0x200, SEEK_SET) == -1) { 277 ret = -errno; 278 goto fail; 279 } 280 if (write(snp_fd, s_desc, strlen(s_desc)) == -1) { 281 ret = -errno; 282 goto fail; 283 } 284 285 gd_offset = header.gd_offset * SECTOR_SIZE; // offset of GD table 286 rgd_offset = header.rgd_offset * SECTOR_SIZE; // offset of RGD table 287 capacity = header.capacity * SECTOR_SIZE; // Extent size 288 /* 289 * Each GDE span 32M disk, means: 290 * 512 GTE per GT, each GTE points to grain 291 */ 292 gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE; 293 if (!gt_size) { 294 ret = -EINVAL; 295 goto fail; 296 } 297 gde_entries = (uint32_t)(capacity / gt_size); // number of gde/rgde 298 gd_size = gde_entries * sizeof(uint32_t); 299 300 /* write RGD */ 301 rgd_buf = qemu_malloc(gd_size); 302 if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) { 303 ret = -errno; 304 goto fail_rgd; 305 } 306 if (read(p_fd, rgd_buf, gd_size) != gd_size) { 307 ret = -errno; 308 goto fail_rgd; 309 } 310 if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1) { 311 ret = -errno; 312 goto fail_rgd; 313 } 314 if (write(snp_fd, rgd_buf, gd_size) == -1) { 315 ret = -errno; 316 goto fail_rgd; 317 } 318 319 /* write GD */ 320 gd_buf = qemu_malloc(gd_size); 321 if (lseek(p_fd, gd_offset, SEEK_SET) == -1) { 322 ret = -errno; 323 goto fail_gd; 324 } 325 if (read(p_fd, gd_buf, gd_size) != gd_size) { 326 ret = -errno; 327 goto fail_gd; 328 } 329 if (lseek(snp_fd, gd_offset, SEEK_SET) == -1) { 330 ret = -errno; 331 goto fail_gd; 332 } 333 if (write(snp_fd, gd_buf, gd_size) == -1) { 334 ret = -errno; 335 goto fail_gd; 336 } 337 ret = 0; 338 339 fail_gd: 340 qemu_free(gd_buf); 341 fail_rgd: 342 qemu_free(rgd_buf); 343 fail: 344 close(p_fd); 345 close(snp_fd); 346 return ret; 347 } 348 349 static void vmdk_parent_close(BlockDriverState *bs) 350 { 351 if (bs->backing_hd) 352 bdrv_close(bs->backing_hd); 353 } 354 355 static int parent_open = 0; 356 static int vmdk_parent_open(BlockDriverState *bs, const char * filename) 357 { 358 BDRVVmdkState *s = bs->opaque; 359 char *p_name; 360 char desc[DESC_SIZE]; 361 char parent_img_name[1024]; 362 363 /* the descriptor offset = 0x200 */ 364 if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE) 365 return -1; 366 367 if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) { 368 char *end_name; 369 struct stat file_buf; 370 371 p_name += sizeof("parentFileNameHint") + 1; 372 if ((end_name = strchr(p_name,'\"')) == NULL) 373 return -1; 374 if ((end_name - p_name) > sizeof (bs->backing_file) - 1) 375 return -1; 376 377 pstrcpy(bs->backing_file, end_name - p_name + 1, p_name); 378 if (stat(bs->backing_file, &file_buf) != 0) { 379 path_combine(parent_img_name, sizeof(parent_img_name), 380 filename, bs->backing_file); 381 } else { 382 pstrcpy(parent_img_name, sizeof(parent_img_name), 383 bs->backing_file); 384 } 385 386 bs->backing_hd = bdrv_new(""); 387 if (!bs->backing_hd) { 388 failure: 389 bdrv_close(s->hd); 390 return -1; 391 } 392 parent_open = 1; 393 if (bdrv_open(bs->backing_hd, parent_img_name, 0, NULL) < 0) 394 goto failure; 395 parent_open = 0; 396 } 397 398 return 0; 399 } 400 401 static int vmdk_open(BlockDriverState *bs, const char *filename, int flags) 402 { 403 BDRVVmdkState *s = bs->opaque; 404 uint32_t magic; 405 int l1_size, i, ret; 406 407 if (parent_open) { 408 /* Parent must be opened as RO, no RDWR. */ 409 flags = 0; 410 } 411 412 ret = bdrv_file_open(&s->hd, filename, flags); 413 if (ret < 0) 414 return ret; 415 if (bdrv_pread(s->hd, 0, &magic, sizeof(magic)) != sizeof(magic)) 416 goto fail; 417 418 magic = be32_to_cpu(magic); 419 if (magic == VMDK3_MAGIC) { 420 VMDK3Header header; 421 422 if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header)) 423 goto fail; 424 s->cluster_sectors = le32_to_cpu(header.granularity); 425 s->l2_size = 1 << 9; 426 s->l1_size = 1 << 6; 427 bs->total_sectors = le32_to_cpu(header.disk_sectors); 428 s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9; 429 s->l1_backup_table_offset = 0; 430 s->l1_entry_sectors = s->l2_size * s->cluster_sectors; 431 } else if (magic == VMDK4_MAGIC) { 432 VMDK4Header header; 433 434 if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header)) 435 goto fail; 436 bs->total_sectors = le64_to_cpu(header.capacity); 437 s->cluster_sectors = le64_to_cpu(header.granularity); 438 s->l2_size = le32_to_cpu(header.num_gtes_per_gte); 439 s->l1_entry_sectors = s->l2_size * s->cluster_sectors; 440 if (s->l1_entry_sectors <= 0) 441 goto fail; 442 s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1) 443 / s->l1_entry_sectors; 444 s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9; 445 s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9; 446 447 if (parent_open) 448 s->is_parent = 1; 449 else 450 s->is_parent = 0; 451 452 // try to open parent images, if exist 453 if (vmdk_parent_open(bs, filename) != 0) 454 goto fail; 455 // write the CID once after the image creation 456 s->parent_cid = vmdk_read_cid(bs,1); 457 } else { 458 goto fail; 459 } 460 461 /* read the L1 table */ 462 l1_size = s->l1_size * sizeof(uint32_t); 463 s->l1_table = qemu_malloc(l1_size); 464 if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, l1_size) != l1_size) 465 goto fail; 466 for(i = 0; i < s->l1_size; i++) { 467 le32_to_cpus(&s->l1_table[i]); 468 } 469 470 if (s->l1_backup_table_offset) { 471 s->l1_backup_table = qemu_malloc(l1_size); 472 if (bdrv_pread(s->hd, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size) 473 goto fail; 474 for(i = 0; i < s->l1_size; i++) { 475 le32_to_cpus(&s->l1_backup_table[i]); 476 } 477 } 478 479 s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t)); 480 return 0; 481 fail: 482 qemu_free(s->l1_backup_table); 483 qemu_free(s->l1_table); 484 qemu_free(s->l2_cache); 485 bdrv_delete(s->hd); 486 return -1; 487 } 488 489 static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data, 490 uint64_t offset, int allocate); 491 492 static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset, 493 uint64_t offset, int allocate) 494 { 495 uint64_t parent_cluster_offset; 496 BDRVVmdkState *s = bs->opaque; 497 uint8_t whole_grain[s->cluster_sectors*512]; // 128 sectors * 512 bytes each = grain size 64KB 498 499 // we will be here if it's first write on non-exist grain(cluster). 500 // try to read from parent image, if exist 501 if (bs->backing_hd) { 502 BDRVVmdkState *ps = bs->backing_hd->opaque; 503 504 if (!vmdk_is_cid_valid(bs)) 505 return -1; 506 507 parent_cluster_offset = get_cluster_offset(bs->backing_hd, NULL, 508 offset, allocate); 509 510 if (parent_cluster_offset) { 511 BDRVVmdkState *act_s = activeBDRV.hd->opaque; 512 513 if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != ps->cluster_sectors*512) 514 return -1; 515 516 //Write grain only into the active image 517 if (bdrv_pwrite(act_s->hd, activeBDRV.cluster_offset << 9, whole_grain, sizeof(whole_grain)) != sizeof(whole_grain)) 518 return -1; 519 } 520 } 521 return 0; 522 } 523 524 static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data) 525 { 526 BDRVVmdkState *s = bs->opaque; 527 528 /* update L2 table */ 529 if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), 530 &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset)) 531 return -1; 532 /* update backup L2 table */ 533 if (s->l1_backup_table_offset != 0) { 534 m_data->l2_offset = s->l1_backup_table[m_data->l1_index]; 535 if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), 536 &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset)) 537 return -1; 538 } 539 540 return 0; 541 } 542 543 static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data, 544 uint64_t offset, int allocate) 545 { 546 BDRVVmdkState *s = bs->opaque; 547 unsigned int l1_index, l2_offset, l2_index; 548 int min_index, i, j; 549 uint32_t min_count, *l2_table, tmp = 0; 550 uint64_t cluster_offset; 551 552 if (m_data) 553 m_data->valid = 0; 554 555 l1_index = (offset >> 9) / s->l1_entry_sectors; 556 if (l1_index >= s->l1_size) 557 return 0; 558 l2_offset = s->l1_table[l1_index]; 559 if (!l2_offset) 560 return 0; 561 for(i = 0; i < L2_CACHE_SIZE; i++) { 562 if (l2_offset == s->l2_cache_offsets[i]) { 563 /* increment the hit count */ 564 if (++s->l2_cache_counts[i] == 0xffffffff) { 565 for(j = 0; j < L2_CACHE_SIZE; j++) { 566 s->l2_cache_counts[j] >>= 1; 567 } 568 } 569 l2_table = s->l2_cache + (i * s->l2_size); 570 goto found; 571 } 572 } 573 /* not found: load a new entry in the least used one */ 574 min_index = 0; 575 min_count = 0xffffffff; 576 for(i = 0; i < L2_CACHE_SIZE; i++) { 577 if (s->l2_cache_counts[i] < min_count) { 578 min_count = s->l2_cache_counts[i]; 579 min_index = i; 580 } 581 } 582 l2_table = s->l2_cache + (min_index * s->l2_size); 583 if (bdrv_pread(s->hd, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) != 584 s->l2_size * sizeof(uint32_t)) 585 return 0; 586 587 s->l2_cache_offsets[min_index] = l2_offset; 588 s->l2_cache_counts[min_index] = 1; 589 found: 590 l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size; 591 cluster_offset = le32_to_cpu(l2_table[l2_index]); 592 593 if (!cluster_offset) { 594 if (!allocate) 595 return 0; 596 // Avoid the L2 tables update for the images that have snapshots. 597 if (!s->is_parent) { 598 cluster_offset = bdrv_getlength(s->hd); 599 bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9)); 600 601 cluster_offset >>= 9; 602 tmp = cpu_to_le32(cluster_offset); 603 l2_table[l2_index] = tmp; 604 // Save the active image state 605 activeBDRV.cluster_offset = cluster_offset; 606 activeBDRV.hd = bs; 607 } 608 /* First of all we write grain itself, to avoid race condition 609 * that may to corrupt the image. 610 * This problem may occur because of insufficient space on host disk 611 * or inappropriate VM shutdown. 612 */ 613 if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1) 614 return 0; 615 616 if (m_data) { 617 m_data->offset = tmp; 618 m_data->l1_index = l1_index; 619 m_data->l2_index = l2_index; 620 m_data->l2_offset = l2_offset; 621 m_data->valid = 1; 622 } 623 } 624 cluster_offset <<= 9; 625 return cluster_offset; 626 } 627 628 static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num, 629 int nb_sectors, int *pnum) 630 { 631 BDRVVmdkState *s = bs->opaque; 632 int index_in_cluster, n; 633 uint64_t cluster_offset; 634 635 cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0); 636 index_in_cluster = sector_num % s->cluster_sectors; 637 n = s->cluster_sectors - index_in_cluster; 638 if (n > nb_sectors) 639 n = nb_sectors; 640 *pnum = n; 641 return (cluster_offset != 0); 642 } 643 644 static int vmdk_read(BlockDriverState *bs, int64_t sector_num, 645 uint8_t *buf, int nb_sectors) 646 { 647 BDRVVmdkState *s = bs->opaque; 648 int index_in_cluster, n, ret; 649 uint64_t cluster_offset; 650 651 while (nb_sectors > 0) { 652 cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0); 653 index_in_cluster = sector_num % s->cluster_sectors; 654 n = s->cluster_sectors - index_in_cluster; 655 if (n > nb_sectors) 656 n = nb_sectors; 657 if (!cluster_offset) { 658 // try to read from parent image, if exist 659 if (bs->backing_hd) { 660 if (!vmdk_is_cid_valid(bs)) 661 return -1; 662 ret = bdrv_read(bs->backing_hd, sector_num, buf, n); 663 if (ret < 0) 664 return -1; 665 } else { 666 memset(buf, 0, 512 * n); 667 } 668 } else { 669 if(bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512) 670 return -1; 671 } 672 nb_sectors -= n; 673 sector_num += n; 674 buf += n * 512; 675 } 676 return 0; 677 } 678 679 static int vmdk_write(BlockDriverState *bs, int64_t sector_num, 680 const uint8_t *buf, int nb_sectors) 681 { 682 BDRVVmdkState *s = bs->opaque; 683 VmdkMetaData m_data; 684 int index_in_cluster, n; 685 uint64_t cluster_offset; 686 static int cid_update = 0; 687 688 if (sector_num > bs->total_sectors) { 689 fprintf(stderr, 690 "(VMDK) Wrong offset: sector_num=0x%" PRIx64 691 " total_sectors=0x%" PRIx64 "\n", 692 sector_num, bs->total_sectors); 693 return -1; 694 } 695 696 while (nb_sectors > 0) { 697 index_in_cluster = sector_num & (s->cluster_sectors - 1); 698 n = s->cluster_sectors - index_in_cluster; 699 if (n > nb_sectors) 700 n = nb_sectors; 701 cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1); 702 if (!cluster_offset) 703 return -1; 704 705 if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512) 706 return -1; 707 if (m_data.valid) { 708 /* update L2 tables */ 709 if (vmdk_L2update(bs, &m_data) == -1) 710 return -1; 711 } 712 nb_sectors -= n; 713 sector_num += n; 714 buf += n * 512; 715 716 // update CID on the first write every time the virtual disk is opened 717 if (!cid_update) { 718 vmdk_write_cid(bs, time(NULL)); 719 cid_update++; 720 } 721 } 722 return 0; 723 } 724 725 static int vmdk_create(const char *filename, QEMUOptionParameter *options) 726 { 727 int fd, i; 728 VMDK4Header header; 729 uint32_t tmp, magic, grains, gd_size, gt_size, gt_count; 730 static const char desc_template[] = 731 "# Disk DescriptorFile\n" 732 "version=1\n" 733 "CID=%x\n" 734 "parentCID=ffffffff\n" 735 "createType=\"monolithicSparse\"\n" 736 "\n" 737 "# Extent description\n" 738 "RW %" PRId64 " SPARSE \"%s\"\n" 739 "\n" 740 "# The Disk Data Base \n" 741 "#DDB\n" 742 "\n" 743 "ddb.virtualHWVersion = \"%d\"\n" 744 "ddb.geometry.cylinders = \"%" PRId64 "\"\n" 745 "ddb.geometry.heads = \"16\"\n" 746 "ddb.geometry.sectors = \"63\"\n" 747 "ddb.adapterType = \"ide\"\n"; 748 char desc[1024]; 749 const char *real_filename, *temp_str; 750 int64_t total_size = 0; 751 const char *backing_file = NULL; 752 int flags = 0; 753 int ret; 754 755 // Read out options 756 while (options && options->name) { 757 if (!strcmp(options->name, BLOCK_OPT_SIZE)) { 758 total_size = options->value.n / 512; 759 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { 760 backing_file = options->value.s; 761 } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) { 762 flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0; 763 } 764 options++; 765 } 766 767 /* XXX: add support for backing file */ 768 if (backing_file) { 769 return vmdk_snapshot_create(filename, backing_file); 770 } 771 772 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 773 0644); 774 if (fd < 0) 775 return -errno; 776 magic = cpu_to_be32(VMDK4_MAGIC); 777 memset(&header, 0, sizeof(header)); 778 header.version = cpu_to_le32(1); 779 header.flags = cpu_to_le32(3); /* ?? */ 780 header.capacity = cpu_to_le64(total_size); 781 header.granularity = cpu_to_le64(128); 782 header.num_gtes_per_gte = cpu_to_le32(512); 783 784 grains = (total_size + header.granularity - 1) / header.granularity; 785 gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9; 786 gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte; 787 gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9; 788 789 header.desc_offset = 1; 790 header.desc_size = 20; 791 header.rgd_offset = header.desc_offset + header.desc_size; 792 header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count); 793 header.grain_offset = 794 ((header.gd_offset + gd_size + (gt_size * gt_count) + 795 header.granularity - 1) / header.granularity) * 796 header.granularity; 797 798 header.desc_offset = cpu_to_le64(header.desc_offset); 799 header.desc_size = cpu_to_le64(header.desc_size); 800 header.rgd_offset = cpu_to_le64(header.rgd_offset); 801 header.gd_offset = cpu_to_le64(header.gd_offset); 802 header.grain_offset = cpu_to_le64(header.grain_offset); 803 804 header.check_bytes[0] = 0xa; 805 header.check_bytes[1] = 0x20; 806 header.check_bytes[2] = 0xd; 807 header.check_bytes[3] = 0xa; 808 809 /* write all the data */ 810 ret = qemu_write_full(fd, &magic, sizeof(magic)); 811 if (ret != sizeof(magic)) { 812 ret = -errno; 813 goto exit; 814 } 815 ret = qemu_write_full(fd, &header, sizeof(header)); 816 if (ret != sizeof(header)) { 817 ret = -errno; 818 goto exit; 819 } 820 821 ret = ftruncate(fd, header.grain_offset << 9); 822 if (ret < 0) { 823 ret = -errno; 824 goto exit; 825 } 826 827 /* write grain directory */ 828 lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET); 829 for (i = 0, tmp = header.rgd_offset + gd_size; 830 i < gt_count; i++, tmp += gt_size) { 831 ret = qemu_write_full(fd, &tmp, sizeof(tmp)); 832 if (ret != sizeof(tmp)) { 833 ret = -errno; 834 goto exit; 835 } 836 } 837 838 /* write backup grain directory */ 839 lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET); 840 for (i = 0, tmp = header.gd_offset + gd_size; 841 i < gt_count; i++, tmp += gt_size) { 842 ret = qemu_write_full(fd, &tmp, sizeof(tmp)); 843 if (ret != sizeof(tmp)) { 844 ret = -errno; 845 goto exit; 846 } 847 } 848 849 /* compose the descriptor */ 850 real_filename = filename; 851 if ((temp_str = strrchr(real_filename, '\\')) != NULL) 852 real_filename = temp_str + 1; 853 if ((temp_str = strrchr(real_filename, '/')) != NULL) 854 real_filename = temp_str + 1; 855 if ((temp_str = strrchr(real_filename, ':')) != NULL) 856 real_filename = temp_str + 1; 857 snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL), 858 total_size, real_filename, 859 (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4), 860 total_size / (int64_t)(63 * 16)); 861 862 /* write the descriptor */ 863 lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET); 864 ret = qemu_write_full(fd, desc, strlen(desc)); 865 if (ret != strlen(desc)) { 866 ret = -errno; 867 goto exit; 868 } 869 870 ret = 0; 871 exit: 872 close(fd); 873 return ret; 874 } 875 876 static void vmdk_close(BlockDriverState *bs) 877 { 878 BDRVVmdkState *s = bs->opaque; 879 880 qemu_free(s->l1_table); 881 qemu_free(s->l2_cache); 882 // try to close parent image, if exist 883 vmdk_parent_close(s->hd); 884 bdrv_delete(s->hd); 885 } 886 887 static void vmdk_flush(BlockDriverState *bs) 888 { 889 BDRVVmdkState *s = bs->opaque; 890 bdrv_flush(s->hd); 891 } 892 893 894 static QEMUOptionParameter vmdk_create_options[] = { 895 { 896 .name = BLOCK_OPT_SIZE, 897 .type = OPT_SIZE, 898 .help = "Virtual disk size" 899 }, 900 { 901 .name = BLOCK_OPT_BACKING_FILE, 902 .type = OPT_STRING, 903 .help = "File name of a base image" 904 }, 905 { 906 .name = BLOCK_OPT_COMPAT6, 907 .type = OPT_FLAG, 908 .help = "VMDK version 6 image" 909 }, 910 { NULL } 911 }; 912 913 static BlockDriver bdrv_vmdk = { 914 .format_name = "vmdk", 915 .instance_size = sizeof(BDRVVmdkState), 916 .bdrv_probe = vmdk_probe, 917 .bdrv_open = vmdk_open, 918 .bdrv_read = vmdk_read, 919 .bdrv_write = vmdk_write, 920 .bdrv_close = vmdk_close, 921 .bdrv_create = vmdk_create, 922 .bdrv_flush = vmdk_flush, 923 .bdrv_is_allocated = vmdk_is_allocated, 924 925 .create_options = vmdk_create_options, 926 }; 927 928 static void bdrv_vmdk_init(void) 929 { 930 bdrv_register(&bdrv_vmdk); 931 } 932 933 block_init(bdrv_vmdk_init); 934