1019d6b8fSAnthony Liguori /* 2019d6b8fSAnthony Liguori * Block driver for the VMDK format 3019d6b8fSAnthony Liguori * 4019d6b8fSAnthony Liguori * Copyright (c) 2004 Fabrice Bellard 5019d6b8fSAnthony Liguori * Copyright (c) 2005 Filip Navara 6019d6b8fSAnthony Liguori * 7019d6b8fSAnthony Liguori * Permission is hereby granted, free of charge, to any person obtaining a copy 8019d6b8fSAnthony Liguori * of this software and associated documentation files (the "Software"), to deal 9019d6b8fSAnthony Liguori * in the Software without restriction, including without limitation the rights 10019d6b8fSAnthony Liguori * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11019d6b8fSAnthony Liguori * copies of the Software, and to permit persons to whom the Software is 12019d6b8fSAnthony Liguori * furnished to do so, subject to the following conditions: 13019d6b8fSAnthony Liguori * 14019d6b8fSAnthony Liguori * The above copyright notice and this permission notice shall be included in 15019d6b8fSAnthony Liguori * all copies or substantial portions of the Software. 16019d6b8fSAnthony Liguori * 17019d6b8fSAnthony Liguori * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18019d6b8fSAnthony Liguori * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19019d6b8fSAnthony Liguori * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20019d6b8fSAnthony Liguori * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21019d6b8fSAnthony Liguori * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22019d6b8fSAnthony Liguori * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23019d6b8fSAnthony Liguori * THE SOFTWARE. 24019d6b8fSAnthony Liguori */ 25019d6b8fSAnthony Liguori 26019d6b8fSAnthony Liguori #include "qemu-common.h" 27737e150eSPaolo Bonzini #include "block/block_int.h" 281de7afc9SPaolo Bonzini #include "qemu/module.h" 29caf71f86SPaolo Bonzini #include "migration/migration.h" 302923d34fSStefan Weil #include <zlib.h> 31019d6b8fSAnthony Liguori 32019d6b8fSAnthony Liguori #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D') 33019d6b8fSAnthony Liguori #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V') 34432bb170SFam Zheng #define VMDK4_COMPRESSION_DEFLATE 1 3595b0aa42SFam Zheng #define VMDK4_FLAG_NL_DETECT (1 << 0) 36bb45ded9SFam Zheng #define VMDK4_FLAG_RGD (1 << 1) 3714ead646SFam Zheng /* Zeroed-grain enable bit */ 3814ead646SFam Zheng #define VMDK4_FLAG_ZERO_GRAIN (1 << 2) 39432bb170SFam Zheng #define VMDK4_FLAG_COMPRESS (1 << 16) 40432bb170SFam Zheng #define VMDK4_FLAG_MARKER (1 << 17) 4165bd155cSKevin Wolf #define VMDK4_GD_AT_END 0xffffffffffffffffULL 42019d6b8fSAnthony Liguori 4314ead646SFam Zheng #define VMDK_GTE_ZEROED 0x1 4465f74725SFam Zheng 4565f74725SFam Zheng /* VMDK internal error codes */ 4665f74725SFam Zheng #define VMDK_OK 0 4765f74725SFam Zheng #define VMDK_ERROR (-1) 4865f74725SFam Zheng /* Cluster not allocated */ 4965f74725SFam Zheng #define VMDK_UNALLOC (-2) 5065f74725SFam Zheng #define VMDK_ZEROED (-3) 5165f74725SFam Zheng 5269e0b6dfSFam Zheng #define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain" 5369e0b6dfSFam Zheng 54019d6b8fSAnthony Liguori typedef struct { 55019d6b8fSAnthony Liguori uint32_t version; 56019d6b8fSAnthony Liguori uint32_t flags; 57019d6b8fSAnthony Liguori uint32_t disk_sectors; 58019d6b8fSAnthony Liguori uint32_t granularity; 59019d6b8fSAnthony Liguori uint32_t l1dir_offset; 60019d6b8fSAnthony Liguori uint32_t l1dir_size; 61019d6b8fSAnthony Liguori uint32_t file_sectors; 62019d6b8fSAnthony Liguori uint32_t cylinders; 63019d6b8fSAnthony Liguori uint32_t heads; 64019d6b8fSAnthony Liguori uint32_t sectors_per_track; 655d8caa54SFam Zheng } QEMU_PACKED VMDK3Header; 66019d6b8fSAnthony Liguori 67019d6b8fSAnthony Liguori typedef struct { 68019d6b8fSAnthony Liguori uint32_t version; 69019d6b8fSAnthony Liguori uint32_t flags; 70e98768d4SFam Zheng uint64_t capacity; 71e98768d4SFam Zheng uint64_t granularity; 72e98768d4SFam Zheng uint64_t desc_offset; 73e98768d4SFam Zheng uint64_t desc_size; 74e98768d4SFam Zheng uint32_t num_gtes_per_gte; 75e98768d4SFam Zheng uint64_t rgd_offset; 76e98768d4SFam Zheng uint64_t gd_offset; 77e98768d4SFam Zheng uint64_t grain_offset; 78019d6b8fSAnthony Liguori char filler[1]; 79019d6b8fSAnthony Liguori char check_bytes[4]; 80432bb170SFam Zheng uint16_t compressAlgorithm; 81541dc0d4SStefan Weil } QEMU_PACKED VMDK4Header; 82019d6b8fSAnthony Liguori 83019d6b8fSAnthony Liguori #define L2_CACHE_SIZE 16 84019d6b8fSAnthony Liguori 85b3976d3cSFam Zheng typedef struct VmdkExtent { 86b3976d3cSFam Zheng BlockDriverState *file; 87b3976d3cSFam Zheng bool flat; 88432bb170SFam Zheng bool compressed; 89432bb170SFam Zheng bool has_marker; 9014ead646SFam Zheng bool has_zero_grain; 9114ead646SFam Zheng int version; 92b3976d3cSFam Zheng int64_t sectors; 93b3976d3cSFam Zheng int64_t end_sector; 947fa60fa3SFam Zheng int64_t flat_start_offset; 95019d6b8fSAnthony Liguori int64_t l1_table_offset; 96019d6b8fSAnthony Liguori int64_t l1_backup_table_offset; 97019d6b8fSAnthony Liguori uint32_t *l1_table; 98019d6b8fSAnthony Liguori uint32_t *l1_backup_table; 99019d6b8fSAnthony Liguori unsigned int l1_size; 100019d6b8fSAnthony Liguori uint32_t l1_entry_sectors; 101019d6b8fSAnthony Liguori 102019d6b8fSAnthony Liguori unsigned int l2_size; 103019d6b8fSAnthony Liguori uint32_t *l2_cache; 104019d6b8fSAnthony Liguori uint32_t l2_cache_offsets[L2_CACHE_SIZE]; 105019d6b8fSAnthony Liguori uint32_t l2_cache_counts[L2_CACHE_SIZE]; 106019d6b8fSAnthony Liguori 107019d6b8fSAnthony Liguori unsigned int cluster_sectors; 108b3976d3cSFam Zheng } VmdkExtent; 109b3976d3cSFam Zheng 110b3976d3cSFam Zheng typedef struct BDRVVmdkState { 111848c66e8SPaolo Bonzini CoMutex lock; 112e98768d4SFam Zheng uint64_t desc_offset; 11369b4d86dSFam Zheng bool cid_updated; 114019d6b8fSAnthony Liguori uint32_t parent_cid; 115b3976d3cSFam Zheng int num_extents; 116b3976d3cSFam Zheng /* Extent array with num_extents entries, ascend ordered by address */ 117b3976d3cSFam Zheng VmdkExtent *extents; 1182bc3166cSKevin Wolf Error *migration_blocker; 119019d6b8fSAnthony Liguori } BDRVVmdkState; 120019d6b8fSAnthony Liguori 121019d6b8fSAnthony Liguori typedef struct VmdkMetaData { 122019d6b8fSAnthony Liguori uint32_t offset; 123019d6b8fSAnthony Liguori unsigned int l1_index; 124019d6b8fSAnthony Liguori unsigned int l2_index; 125019d6b8fSAnthony Liguori unsigned int l2_offset; 126019d6b8fSAnthony Liguori int valid; 127cdeaf1f1SFam Zheng uint32_t *l2_cache_entry; 128019d6b8fSAnthony Liguori } VmdkMetaData; 129019d6b8fSAnthony Liguori 130432bb170SFam Zheng typedef struct VmdkGrainMarker { 131432bb170SFam Zheng uint64_t lba; 132432bb170SFam Zheng uint32_t size; 133432bb170SFam Zheng uint8_t data[0]; 1345d8caa54SFam Zheng } QEMU_PACKED VmdkGrainMarker; 135432bb170SFam Zheng 13665bd155cSKevin Wolf enum { 13765bd155cSKevin Wolf MARKER_END_OF_STREAM = 0, 13865bd155cSKevin Wolf MARKER_GRAIN_TABLE = 1, 13965bd155cSKevin Wolf MARKER_GRAIN_DIRECTORY = 2, 14065bd155cSKevin Wolf MARKER_FOOTER = 3, 14165bd155cSKevin Wolf }; 14265bd155cSKevin Wolf 143019d6b8fSAnthony Liguori static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename) 144019d6b8fSAnthony Liguori { 145019d6b8fSAnthony Liguori uint32_t magic; 146019d6b8fSAnthony Liguori 147ae261c86SFam Zheng if (buf_size < 4) { 148019d6b8fSAnthony Liguori return 0; 149ae261c86SFam Zheng } 150019d6b8fSAnthony Liguori magic = be32_to_cpu(*(uint32_t *)buf); 151019d6b8fSAnthony Liguori if (magic == VMDK3_MAGIC || 15201fc99d6SFam Zheng magic == VMDK4_MAGIC) { 153019d6b8fSAnthony Liguori return 100; 15401fc99d6SFam Zheng } else { 15501fc99d6SFam Zheng const char *p = (const char *)buf; 15601fc99d6SFam Zheng const char *end = p + buf_size; 15701fc99d6SFam Zheng while (p < end) { 15801fc99d6SFam Zheng if (*p == '#') { 15901fc99d6SFam Zheng /* skip comment line */ 16001fc99d6SFam Zheng while (p < end && *p != '\n') { 16101fc99d6SFam Zheng p++; 16201fc99d6SFam Zheng } 16301fc99d6SFam Zheng p++; 16401fc99d6SFam Zheng continue; 16501fc99d6SFam Zheng } 16601fc99d6SFam Zheng if (*p == ' ') { 16701fc99d6SFam Zheng while (p < end && *p == ' ') { 16801fc99d6SFam Zheng p++; 16901fc99d6SFam Zheng } 17001fc99d6SFam Zheng /* skip '\r' if windows line endings used. */ 17101fc99d6SFam Zheng if (p < end && *p == '\r') { 17201fc99d6SFam Zheng p++; 17301fc99d6SFam Zheng } 17401fc99d6SFam Zheng /* only accept blank lines before 'version=' line */ 17501fc99d6SFam Zheng if (p == end || *p != '\n') { 176019d6b8fSAnthony Liguori return 0; 177019d6b8fSAnthony Liguori } 17801fc99d6SFam Zheng p++; 17901fc99d6SFam Zheng continue; 18001fc99d6SFam Zheng } 18101fc99d6SFam Zheng if (end - p >= strlen("version=X\n")) { 18201fc99d6SFam Zheng if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 || 18301fc99d6SFam Zheng strncmp("version=2\n", p, strlen("version=2\n")) == 0) { 18401fc99d6SFam Zheng return 100; 18501fc99d6SFam Zheng } 18601fc99d6SFam Zheng } 18701fc99d6SFam Zheng if (end - p >= strlen("version=X\r\n")) { 18801fc99d6SFam Zheng if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 || 18901fc99d6SFam Zheng strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) { 19001fc99d6SFam Zheng return 100; 19101fc99d6SFam Zheng } 19201fc99d6SFam Zheng } 19301fc99d6SFam Zheng return 0; 19401fc99d6SFam Zheng } 19501fc99d6SFam Zheng return 0; 19601fc99d6SFam Zheng } 19701fc99d6SFam Zheng } 198019d6b8fSAnthony Liguori 199019d6b8fSAnthony Liguori #define CHECK_CID 1 200019d6b8fSAnthony Liguori 201019d6b8fSAnthony Liguori #define SECTOR_SIZE 512 202f66fd6c3SFam Zheng #define DESC_SIZE (20 * SECTOR_SIZE) /* 20 sectors of 512 bytes each */ 203f66fd6c3SFam Zheng #define BUF_SIZE 4096 204f66fd6c3SFam Zheng #define HEADER_SIZE 512 /* first sector of 512 bytes */ 205019d6b8fSAnthony Liguori 206b3976d3cSFam Zheng static void vmdk_free_extents(BlockDriverState *bs) 207b3976d3cSFam Zheng { 208b3976d3cSFam Zheng int i; 209b3976d3cSFam Zheng BDRVVmdkState *s = bs->opaque; 210b3c0bfb6SFam Zheng VmdkExtent *e; 211b3976d3cSFam Zheng 212b3976d3cSFam Zheng for (i = 0; i < s->num_extents; i++) { 213b3c0bfb6SFam Zheng e = &s->extents[i]; 214b3c0bfb6SFam Zheng g_free(e->l1_table); 215b3c0bfb6SFam Zheng g_free(e->l2_cache); 216b3c0bfb6SFam Zheng g_free(e->l1_backup_table); 217b3c0bfb6SFam Zheng if (e->file != bs->file) { 218b3c0bfb6SFam Zheng bdrv_delete(e->file); 219b3c0bfb6SFam Zheng } 220b3976d3cSFam Zheng } 2217267c094SAnthony Liguori g_free(s->extents); 222b3976d3cSFam Zheng } 223b3976d3cSFam Zheng 22486c6b429SFam Zheng static void vmdk_free_last_extent(BlockDriverState *bs) 22586c6b429SFam Zheng { 22686c6b429SFam Zheng BDRVVmdkState *s = bs->opaque; 22786c6b429SFam Zheng 22886c6b429SFam Zheng if (s->num_extents == 0) { 22986c6b429SFam Zheng return; 23086c6b429SFam Zheng } 23186c6b429SFam Zheng s->num_extents--; 23286c6b429SFam Zheng s->extents = g_realloc(s->extents, s->num_extents * sizeof(VmdkExtent)); 23386c6b429SFam Zheng } 23486c6b429SFam Zheng 235019d6b8fSAnthony Liguori static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) 236019d6b8fSAnthony Liguori { 237019d6b8fSAnthony Liguori char desc[DESC_SIZE]; 2388379e46dSPavel Borzenkov uint32_t cid = 0xffffffff; 239019d6b8fSAnthony Liguori const char *p_name, *cid_str; 240019d6b8fSAnthony Liguori size_t cid_str_size; 241e1da9b24SFam Zheng BDRVVmdkState *s = bs->opaque; 24299f1835dSKevin Wolf int ret; 243019d6b8fSAnthony Liguori 24499f1835dSKevin Wolf ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); 24599f1835dSKevin Wolf if (ret < 0) { 246019d6b8fSAnthony Liguori return 0; 247e1da9b24SFam Zheng } 248019d6b8fSAnthony Liguori 249019d6b8fSAnthony Liguori if (parent) { 250019d6b8fSAnthony Liguori cid_str = "parentCID"; 251019d6b8fSAnthony Liguori cid_str_size = sizeof("parentCID"); 252019d6b8fSAnthony Liguori } else { 253019d6b8fSAnthony Liguori cid_str = "CID"; 254019d6b8fSAnthony Liguori cid_str_size = sizeof("CID"); 255019d6b8fSAnthony Liguori } 256019d6b8fSAnthony Liguori 25793897b9fSKevin Wolf desc[DESC_SIZE - 1] = '\0'; 258ae261c86SFam Zheng p_name = strstr(desc, cid_str); 259ae261c86SFam Zheng if (p_name != NULL) { 260019d6b8fSAnthony Liguori p_name += cid_str_size; 261019d6b8fSAnthony Liguori sscanf(p_name, "%x", &cid); 262019d6b8fSAnthony Liguori } 263019d6b8fSAnthony Liguori 264019d6b8fSAnthony Liguori return cid; 265019d6b8fSAnthony Liguori } 266019d6b8fSAnthony Liguori 267019d6b8fSAnthony Liguori static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) 268019d6b8fSAnthony Liguori { 269019d6b8fSAnthony Liguori char desc[DESC_SIZE], tmp_desc[DESC_SIZE]; 270019d6b8fSAnthony Liguori char *p_name, *tmp_str; 271e1da9b24SFam Zheng BDRVVmdkState *s = bs->opaque; 27299f1835dSKevin Wolf int ret; 273019d6b8fSAnthony Liguori 27499f1835dSKevin Wolf ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); 27599f1835dSKevin Wolf if (ret < 0) { 27699f1835dSKevin Wolf return ret; 277e1da9b24SFam Zheng } 278019d6b8fSAnthony Liguori 27993897b9fSKevin Wolf desc[DESC_SIZE - 1] = '\0'; 280019d6b8fSAnthony Liguori tmp_str = strstr(desc, "parentCID"); 28193897b9fSKevin Wolf if (tmp_str == NULL) { 28293897b9fSKevin Wolf return -EINVAL; 28393897b9fSKevin Wolf } 28493897b9fSKevin Wolf 285019d6b8fSAnthony Liguori pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str); 286ae261c86SFam Zheng p_name = strstr(desc, "CID"); 287ae261c86SFam Zheng if (p_name != NULL) { 288019d6b8fSAnthony Liguori p_name += sizeof("CID"); 289019d6b8fSAnthony Liguori snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid); 290019d6b8fSAnthony Liguori pstrcat(desc, sizeof(desc), tmp_desc); 291019d6b8fSAnthony Liguori } 292019d6b8fSAnthony Liguori 29399f1835dSKevin Wolf ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE); 29499f1835dSKevin Wolf if (ret < 0) { 29599f1835dSKevin Wolf return ret; 296e1da9b24SFam Zheng } 29799f1835dSKevin Wolf 298019d6b8fSAnthony Liguori return 0; 299019d6b8fSAnthony Liguori } 300019d6b8fSAnthony Liguori 301019d6b8fSAnthony Liguori static int vmdk_is_cid_valid(BlockDriverState *bs) 302019d6b8fSAnthony Liguori { 303019d6b8fSAnthony Liguori #ifdef CHECK_CID 304019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque; 305b171271aSKevin Wolf BlockDriverState *p_bs = bs->backing_hd; 306019d6b8fSAnthony Liguori uint32_t cur_pcid; 307019d6b8fSAnthony Liguori 308019d6b8fSAnthony Liguori if (p_bs) { 309019d6b8fSAnthony Liguori cur_pcid = vmdk_read_cid(p_bs, 0); 310ae261c86SFam Zheng if (s->parent_cid != cur_pcid) { 311ae261c86SFam Zheng /* CID not valid */ 312019d6b8fSAnthony Liguori return 0; 313019d6b8fSAnthony Liguori } 314ae261c86SFam Zheng } 315019d6b8fSAnthony Liguori #endif 316ae261c86SFam Zheng /* CID valid */ 317019d6b8fSAnthony Liguori return 1; 318019d6b8fSAnthony Liguori } 319019d6b8fSAnthony Liguori 3203897575fSJeff Cody /* Queue extents, if any, for reopen() */ 3213897575fSJeff Cody static int vmdk_reopen_prepare(BDRVReopenState *state, 3223897575fSJeff Cody BlockReopenQueue *queue, Error **errp) 3233897575fSJeff Cody { 3243897575fSJeff Cody BDRVVmdkState *s; 3253897575fSJeff Cody int ret = -1; 3263897575fSJeff Cody int i; 3273897575fSJeff Cody VmdkExtent *e; 3283897575fSJeff Cody 3293897575fSJeff Cody assert(state != NULL); 3303897575fSJeff Cody assert(state->bs != NULL); 3313897575fSJeff Cody 3323897575fSJeff Cody if (queue == NULL) { 3333897575fSJeff Cody error_set(errp, ERROR_CLASS_GENERIC_ERROR, 3343897575fSJeff Cody "No reopen queue for VMDK extents"); 3353897575fSJeff Cody goto exit; 3363897575fSJeff Cody } 3373897575fSJeff Cody 3383897575fSJeff Cody s = state->bs->opaque; 3393897575fSJeff Cody 3403897575fSJeff Cody assert(s != NULL); 3413897575fSJeff Cody 3423897575fSJeff Cody for (i = 0; i < s->num_extents; i++) { 3433897575fSJeff Cody e = &s->extents[i]; 3443897575fSJeff Cody if (e->file != state->bs->file) { 3453897575fSJeff Cody bdrv_reopen_queue(queue, e->file, state->flags); 3463897575fSJeff Cody } 3473897575fSJeff Cody } 3483897575fSJeff Cody ret = 0; 3493897575fSJeff Cody 3503897575fSJeff Cody exit: 3513897575fSJeff Cody return ret; 3523897575fSJeff Cody } 3533897575fSJeff Cody 3549949f97eSKevin Wolf static int vmdk_parent_open(BlockDriverState *bs) 355019d6b8fSAnthony Liguori { 356019d6b8fSAnthony Liguori char *p_name; 3577fa60fa3SFam Zheng char desc[DESC_SIZE + 1]; 358e1da9b24SFam Zheng BDRVVmdkState *s = bs->opaque; 359588b65a3SPaolo Bonzini int ret; 360019d6b8fSAnthony Liguori 3617fa60fa3SFam Zheng desc[DESC_SIZE] = '\0'; 362588b65a3SPaolo Bonzini ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); 363588b65a3SPaolo Bonzini if (ret < 0) { 364588b65a3SPaolo Bonzini return ret; 365e1da9b24SFam Zheng } 366019d6b8fSAnthony Liguori 367ae261c86SFam Zheng p_name = strstr(desc, "parentFileNameHint"); 368ae261c86SFam Zheng if (p_name != NULL) { 369019d6b8fSAnthony Liguori char *end_name; 370019d6b8fSAnthony Liguori 371019d6b8fSAnthony Liguori p_name += sizeof("parentFileNameHint") + 1; 372ae261c86SFam Zheng end_name = strchr(p_name, '\"'); 373ae261c86SFam Zheng if (end_name == NULL) { 374588b65a3SPaolo Bonzini return -EINVAL; 375ae261c86SFam Zheng } 376ae261c86SFam Zheng if ((end_name - p_name) > sizeof(bs->backing_file) - 1) { 377588b65a3SPaolo Bonzini return -EINVAL; 378ae261c86SFam Zheng } 379019d6b8fSAnthony Liguori 380b171271aSKevin Wolf pstrcpy(bs->backing_file, end_name - p_name + 1, p_name); 381019d6b8fSAnthony Liguori } 382019d6b8fSAnthony Liguori 383019d6b8fSAnthony Liguori return 0; 384019d6b8fSAnthony Liguori } 385019d6b8fSAnthony Liguori 386b3976d3cSFam Zheng /* Create and append extent to the extent array. Return the added VmdkExtent 387b3976d3cSFam Zheng * address. return NULL if allocation failed. */ 388*8aa1331cSFam Zheng static int vmdk_add_extent(BlockDriverState *bs, 389b3976d3cSFam Zheng BlockDriverState *file, bool flat, int64_t sectors, 390b3976d3cSFam Zheng int64_t l1_offset, int64_t l1_backup_offset, 391b3976d3cSFam Zheng uint32_t l1_size, 392*8aa1331cSFam Zheng int l2_size, uint64_t cluster_sectors, 393*8aa1331cSFam Zheng VmdkExtent **new_extent) 394b3976d3cSFam Zheng { 395b3976d3cSFam Zheng VmdkExtent *extent; 396b3976d3cSFam Zheng BDRVVmdkState *s = bs->opaque; 397b3976d3cSFam Zheng 398*8aa1331cSFam Zheng if (cluster_sectors > 0x200000) { 399*8aa1331cSFam Zheng /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */ 400*8aa1331cSFam Zheng error_report("invalid granularity, image may be corrupt"); 401*8aa1331cSFam Zheng return -EINVAL; 402*8aa1331cSFam Zheng } 403*8aa1331cSFam Zheng 4047267c094SAnthony Liguori s->extents = g_realloc(s->extents, 405b3976d3cSFam Zheng (s->num_extents + 1) * sizeof(VmdkExtent)); 406b3976d3cSFam Zheng extent = &s->extents[s->num_extents]; 407b3976d3cSFam Zheng s->num_extents++; 408b3976d3cSFam Zheng 409b3976d3cSFam Zheng memset(extent, 0, sizeof(VmdkExtent)); 410b3976d3cSFam Zheng extent->file = file; 411b3976d3cSFam Zheng extent->flat = flat; 412b3976d3cSFam Zheng extent->sectors = sectors; 413b3976d3cSFam Zheng extent->l1_table_offset = l1_offset; 414b3976d3cSFam Zheng extent->l1_backup_table_offset = l1_backup_offset; 415b3976d3cSFam Zheng extent->l1_size = l1_size; 416b3976d3cSFam Zheng extent->l1_entry_sectors = l2_size * cluster_sectors; 417b3976d3cSFam Zheng extent->l2_size = l2_size; 418b3976d3cSFam Zheng extent->cluster_sectors = cluster_sectors; 419b3976d3cSFam Zheng 420b3976d3cSFam Zheng if (s->num_extents > 1) { 421b3976d3cSFam Zheng extent->end_sector = (*(extent - 1)).end_sector + extent->sectors; 422b3976d3cSFam Zheng } else { 423b3976d3cSFam Zheng extent->end_sector = extent->sectors; 424b3976d3cSFam Zheng } 425b3976d3cSFam Zheng bs->total_sectors = extent->end_sector; 426*8aa1331cSFam Zheng if (new_extent) { 427*8aa1331cSFam Zheng *new_extent = extent; 428*8aa1331cSFam Zheng } 429*8aa1331cSFam Zheng return 0; 430b3976d3cSFam Zheng } 431b3976d3cSFam Zheng 432b4b3ab14SFam Zheng static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent) 433019d6b8fSAnthony Liguori { 434b4b3ab14SFam Zheng int ret; 435b4b3ab14SFam Zheng int l1_size, i; 436b4b3ab14SFam Zheng 437b4b3ab14SFam Zheng /* read the L1 table */ 438b4b3ab14SFam Zheng l1_size = extent->l1_size * sizeof(uint32_t); 4397267c094SAnthony Liguori extent->l1_table = g_malloc(l1_size); 440b4b3ab14SFam Zheng ret = bdrv_pread(extent->file, 441b4b3ab14SFam Zheng extent->l1_table_offset, 442b4b3ab14SFam Zheng extent->l1_table, 443b4b3ab14SFam Zheng l1_size); 444b4b3ab14SFam Zheng if (ret < 0) { 445b4b3ab14SFam Zheng goto fail_l1; 446b4b3ab14SFam Zheng } 447b4b3ab14SFam Zheng for (i = 0; i < extent->l1_size; i++) { 448b4b3ab14SFam Zheng le32_to_cpus(&extent->l1_table[i]); 449b4b3ab14SFam Zheng } 450b4b3ab14SFam Zheng 451b4b3ab14SFam Zheng if (extent->l1_backup_table_offset) { 4527267c094SAnthony Liguori extent->l1_backup_table = g_malloc(l1_size); 453b4b3ab14SFam Zheng ret = bdrv_pread(extent->file, 454b4b3ab14SFam Zheng extent->l1_backup_table_offset, 455b4b3ab14SFam Zheng extent->l1_backup_table, 456b4b3ab14SFam Zheng l1_size); 457b4b3ab14SFam Zheng if (ret < 0) { 458b4b3ab14SFam Zheng goto fail_l1b; 459b4b3ab14SFam Zheng } 460b4b3ab14SFam Zheng for (i = 0; i < extent->l1_size; i++) { 461b4b3ab14SFam Zheng le32_to_cpus(&extent->l1_backup_table[i]); 462b4b3ab14SFam Zheng } 463b4b3ab14SFam Zheng } 464b4b3ab14SFam Zheng 465b4b3ab14SFam Zheng extent->l2_cache = 4667267c094SAnthony Liguori g_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t)); 467b4b3ab14SFam Zheng return 0; 468b4b3ab14SFam Zheng fail_l1b: 4697267c094SAnthony Liguori g_free(extent->l1_backup_table); 470b4b3ab14SFam Zheng fail_l1: 4717267c094SAnthony Liguori g_free(extent->l1_table); 472b4b3ab14SFam Zheng return ret; 473b4b3ab14SFam Zheng } 474b4b3ab14SFam Zheng 47586c6b429SFam Zheng static int vmdk_open_vmdk3(BlockDriverState *bs, 47686c6b429SFam Zheng BlockDriverState *file, 47786c6b429SFam Zheng int flags) 478b4b3ab14SFam Zheng { 479b4b3ab14SFam Zheng int ret; 480019d6b8fSAnthony Liguori uint32_t magic; 481019d6b8fSAnthony Liguori VMDK3Header header; 482b4b3ab14SFam Zheng VmdkExtent *extent; 483b4b3ab14SFam Zheng 48486c6b429SFam Zheng ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); 485b4b3ab14SFam Zheng if (ret < 0) { 48686c6b429SFam Zheng return ret; 487b3976d3cSFam Zheng } 488*8aa1331cSFam Zheng 489*8aa1331cSFam Zheng ret = vmdk_add_extent(bs, 490b4b3ab14SFam Zheng bs->file, false, 491b3976d3cSFam Zheng le32_to_cpu(header.disk_sectors), 492b4b3ab14SFam Zheng le32_to_cpu(header.l1dir_offset) << 9, 493b4b3ab14SFam Zheng 0, 1 << 6, 1 << 9, 494*8aa1331cSFam Zheng le32_to_cpu(header.granularity), 495*8aa1331cSFam Zheng &extent); 496*8aa1331cSFam Zheng if (ret < 0) { 497*8aa1331cSFam Zheng return ret; 498*8aa1331cSFam Zheng } 499b4b3ab14SFam Zheng ret = vmdk_init_tables(bs, extent); 500b4b3ab14SFam Zheng if (ret) { 50186c6b429SFam Zheng /* free extent allocated by vmdk_add_extent */ 50286c6b429SFam Zheng vmdk_free_last_extent(bs); 503b4b3ab14SFam Zheng } 504b4b3ab14SFam Zheng return ret; 505b4b3ab14SFam Zheng } 506b4b3ab14SFam Zheng 507f16f509dSFam Zheng static int vmdk_open_desc_file(BlockDriverState *bs, int flags, 508e98768d4SFam Zheng uint64_t desc_offset); 509f16f509dSFam Zheng 51086c6b429SFam Zheng static int vmdk_open_vmdk4(BlockDriverState *bs, 51186c6b429SFam Zheng BlockDriverState *file, 51286c6b429SFam Zheng int flags) 513b4b3ab14SFam Zheng { 514b4b3ab14SFam Zheng int ret; 515b4b3ab14SFam Zheng uint32_t magic; 516b4b3ab14SFam Zheng uint32_t l1_size, l1_entry_sectors; 517019d6b8fSAnthony Liguori VMDK4Header header; 518b4b3ab14SFam Zheng VmdkExtent *extent; 519bb45ded9SFam Zheng int64_t l1_backup_offset = 0; 520b4b3ab14SFam Zheng 52186c6b429SFam Zheng ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); 522b4b3ab14SFam Zheng if (ret < 0) { 52386c6b429SFam Zheng return ret; 524b3976d3cSFam Zheng } 5255a394b9eSStefan Hajnoczi if (header.capacity == 0) { 526e98768d4SFam Zheng uint64_t desc_offset = le64_to_cpu(header.desc_offset); 5275a394b9eSStefan Hajnoczi if (desc_offset) { 5285a394b9eSStefan Hajnoczi return vmdk_open_desc_file(bs, flags, desc_offset << 9); 5295a394b9eSStefan Hajnoczi } 530f16f509dSFam Zheng } 53165bd155cSKevin Wolf 53265bd155cSKevin Wolf if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) { 53365bd155cSKevin Wolf /* 53465bd155cSKevin Wolf * The footer takes precedence over the header, so read it in. The 53565bd155cSKevin Wolf * footer starts at offset -1024 from the end: One sector for the 53665bd155cSKevin Wolf * footer, and another one for the end-of-stream marker. 53765bd155cSKevin Wolf */ 53865bd155cSKevin Wolf struct { 53965bd155cSKevin Wolf struct { 54065bd155cSKevin Wolf uint64_t val; 54165bd155cSKevin Wolf uint32_t size; 54265bd155cSKevin Wolf uint32_t type; 54365bd155cSKevin Wolf uint8_t pad[512 - 16]; 54465bd155cSKevin Wolf } QEMU_PACKED footer_marker; 54565bd155cSKevin Wolf 54665bd155cSKevin Wolf uint32_t magic; 54765bd155cSKevin Wolf VMDK4Header header; 54865bd155cSKevin Wolf uint8_t pad[512 - 4 - sizeof(VMDK4Header)]; 54965bd155cSKevin Wolf 55065bd155cSKevin Wolf struct { 55165bd155cSKevin Wolf uint64_t val; 55265bd155cSKevin Wolf uint32_t size; 55365bd155cSKevin Wolf uint32_t type; 55465bd155cSKevin Wolf uint8_t pad[512 - 16]; 55565bd155cSKevin Wolf } QEMU_PACKED eos_marker; 55665bd155cSKevin Wolf } QEMU_PACKED footer; 55765bd155cSKevin Wolf 55865bd155cSKevin Wolf ret = bdrv_pread(file, 55965bd155cSKevin Wolf bs->file->total_sectors * 512 - 1536, 56065bd155cSKevin Wolf &footer, sizeof(footer)); 56165bd155cSKevin Wolf if (ret < 0) { 56265bd155cSKevin Wolf return ret; 56365bd155cSKevin Wolf } 56465bd155cSKevin Wolf 56565bd155cSKevin Wolf /* Some sanity checks for the footer */ 56665bd155cSKevin Wolf if (be32_to_cpu(footer.magic) != VMDK4_MAGIC || 56765bd155cSKevin Wolf le32_to_cpu(footer.footer_marker.size) != 0 || 56865bd155cSKevin Wolf le32_to_cpu(footer.footer_marker.type) != MARKER_FOOTER || 56965bd155cSKevin Wolf le64_to_cpu(footer.eos_marker.val) != 0 || 57065bd155cSKevin Wolf le32_to_cpu(footer.eos_marker.size) != 0 || 57165bd155cSKevin Wolf le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM) 57265bd155cSKevin Wolf { 57365bd155cSKevin Wolf return -EINVAL; 57465bd155cSKevin Wolf } 57565bd155cSKevin Wolf 57665bd155cSKevin Wolf header = footer.header; 57765bd155cSKevin Wolf } 57865bd155cSKevin Wolf 57996c51eb5SFam Zheng if (le32_to_cpu(header.version) >= 3) { 58096c51eb5SFam Zheng char buf[64]; 58196c51eb5SFam Zheng snprintf(buf, sizeof(buf), "VMDK version %d", 58296c51eb5SFam Zheng le32_to_cpu(header.version)); 58396c51eb5SFam Zheng qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, 58496c51eb5SFam Zheng bs->device_name, "vmdk", buf); 58596c51eb5SFam Zheng return -ENOTSUP; 58696c51eb5SFam Zheng } 58796c51eb5SFam Zheng 588b3976d3cSFam Zheng l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte) 589b3976d3cSFam Zheng * le64_to_cpu(header.granularity); 59075d12341SStefan Weil if (l1_entry_sectors == 0) { 59186c6b429SFam Zheng return -EINVAL; 59286c6b429SFam Zheng } 593b3976d3cSFam Zheng l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1) 594b3976d3cSFam Zheng / l1_entry_sectors; 595bb45ded9SFam Zheng if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) { 596bb45ded9SFam Zheng l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9; 597bb45ded9SFam Zheng } 598*8aa1331cSFam Zheng ret = vmdk_add_extent(bs, file, false, 599b3976d3cSFam Zheng le64_to_cpu(header.capacity), 600b3976d3cSFam Zheng le64_to_cpu(header.gd_offset) << 9, 601bb45ded9SFam Zheng l1_backup_offset, 602b3976d3cSFam Zheng l1_size, 603b3976d3cSFam Zheng le32_to_cpu(header.num_gtes_per_gte), 604*8aa1331cSFam Zheng le64_to_cpu(header.granularity), 605*8aa1331cSFam Zheng &extent); 606*8aa1331cSFam Zheng if (ret < 0) { 607*8aa1331cSFam Zheng return ret; 608*8aa1331cSFam Zheng } 609432bb170SFam Zheng extent->compressed = 610432bb170SFam Zheng le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE; 611432bb170SFam Zheng extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER; 61214ead646SFam Zheng extent->version = le32_to_cpu(header.version); 61314ead646SFam Zheng extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN; 614b4b3ab14SFam Zheng ret = vmdk_init_tables(bs, extent); 615b4b3ab14SFam Zheng if (ret) { 61686c6b429SFam Zheng /* free extent allocated by vmdk_add_extent */ 61786c6b429SFam Zheng vmdk_free_last_extent(bs); 618019d6b8fSAnthony Liguori } 619b4b3ab14SFam Zheng return ret; 620b4b3ab14SFam Zheng } 621b4b3ab14SFam Zheng 6227fa60fa3SFam Zheng /* find an option value out of descriptor file */ 6237fa60fa3SFam Zheng static int vmdk_parse_description(const char *desc, const char *opt_name, 6247fa60fa3SFam Zheng char *buf, int buf_size) 6257fa60fa3SFam Zheng { 6267fa60fa3SFam Zheng char *opt_pos, *opt_end; 6277fa60fa3SFam Zheng const char *end = desc + strlen(desc); 6287fa60fa3SFam Zheng 6297fa60fa3SFam Zheng opt_pos = strstr(desc, opt_name); 6307fa60fa3SFam Zheng if (!opt_pos) { 63165f74725SFam Zheng return VMDK_ERROR; 6327fa60fa3SFam Zheng } 6337fa60fa3SFam Zheng /* Skip "=\"" following opt_name */ 6347fa60fa3SFam Zheng opt_pos += strlen(opt_name) + 2; 6357fa60fa3SFam Zheng if (opt_pos >= end) { 63665f74725SFam Zheng return VMDK_ERROR; 6377fa60fa3SFam Zheng } 6387fa60fa3SFam Zheng opt_end = opt_pos; 6397fa60fa3SFam Zheng while (opt_end < end && *opt_end != '"') { 6407fa60fa3SFam Zheng opt_end++; 6417fa60fa3SFam Zheng } 6427fa60fa3SFam Zheng if (opt_end == end || buf_size < opt_end - opt_pos + 1) { 64365f74725SFam Zheng return VMDK_ERROR; 6447fa60fa3SFam Zheng } 6457fa60fa3SFam Zheng pstrcpy(buf, opt_end - opt_pos + 1, opt_pos); 64665f74725SFam Zheng return VMDK_OK; 6477fa60fa3SFam Zheng } 6487fa60fa3SFam Zheng 64986c6b429SFam Zheng /* Open an extent file and append to bs array */ 65086c6b429SFam Zheng static int vmdk_open_sparse(BlockDriverState *bs, 65186c6b429SFam Zheng BlockDriverState *file, 65286c6b429SFam Zheng int flags) 65386c6b429SFam Zheng { 65486c6b429SFam Zheng uint32_t magic; 65586c6b429SFam Zheng 65686c6b429SFam Zheng if (bdrv_pread(file, 0, &magic, sizeof(magic)) != sizeof(magic)) { 65786c6b429SFam Zheng return -EIO; 65886c6b429SFam Zheng } 65986c6b429SFam Zheng 66086c6b429SFam Zheng magic = be32_to_cpu(magic); 66186c6b429SFam Zheng switch (magic) { 66286c6b429SFam Zheng case VMDK3_MAGIC: 66386c6b429SFam Zheng return vmdk_open_vmdk3(bs, file, flags); 66486c6b429SFam Zheng break; 66586c6b429SFam Zheng case VMDK4_MAGIC: 66686c6b429SFam Zheng return vmdk_open_vmdk4(bs, file, flags); 66786c6b429SFam Zheng break; 66886c6b429SFam Zheng default: 66915bac0d5SStefan Weil return -EMEDIUMTYPE; 67086c6b429SFam Zheng break; 67186c6b429SFam Zheng } 67286c6b429SFam Zheng } 67386c6b429SFam Zheng 6747fa60fa3SFam Zheng static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, 6757fa60fa3SFam Zheng const char *desc_file_path) 6767fa60fa3SFam Zheng { 6777fa60fa3SFam Zheng int ret; 6787fa60fa3SFam Zheng char access[11]; 6797fa60fa3SFam Zheng char type[11]; 6807fa60fa3SFam Zheng char fname[512]; 6817fa60fa3SFam Zheng const char *p = desc; 6827fa60fa3SFam Zheng int64_t sectors = 0; 6837fa60fa3SFam Zheng int64_t flat_offset; 68486c6b429SFam Zheng char extent_path[PATH_MAX]; 68586c6b429SFam Zheng BlockDriverState *extent_file; 6867fa60fa3SFam Zheng 6877fa60fa3SFam Zheng while (*p) { 6887fa60fa3SFam Zheng /* parse extent line: 6897fa60fa3SFam Zheng * RW [size in sectors] FLAT "file-name.vmdk" OFFSET 6907fa60fa3SFam Zheng * or 6917fa60fa3SFam Zheng * RW [size in sectors] SPARSE "file-name.vmdk" 6927fa60fa3SFam Zheng */ 6937fa60fa3SFam Zheng flat_offset = -1; 694cd923475SPhilipp Hahn ret = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64, 6957fa60fa3SFam Zheng access, §ors, type, fname, &flat_offset); 6967fa60fa3SFam Zheng if (ret < 4 || strcmp(access, "RW")) { 6977fa60fa3SFam Zheng goto next_line; 6987fa60fa3SFam Zheng } else if (!strcmp(type, "FLAT")) { 6997fa60fa3SFam Zheng if (ret != 5 || flat_offset < 0) { 7007fa60fa3SFam Zheng return -EINVAL; 7017fa60fa3SFam Zheng } 7027fa60fa3SFam Zheng } else if (ret != 4) { 7037fa60fa3SFam Zheng return -EINVAL; 7047fa60fa3SFam Zheng } 7057fa60fa3SFam Zheng 7067fa60fa3SFam Zheng if (sectors <= 0 || 7077fa60fa3SFam Zheng (strcmp(type, "FLAT") && strcmp(type, "SPARSE")) || 7087fa60fa3SFam Zheng (strcmp(access, "RW"))) { 7097fa60fa3SFam Zheng goto next_line; 7107fa60fa3SFam Zheng } 7117fa60fa3SFam Zheng 7127fa60fa3SFam Zheng path_combine(extent_path, sizeof(extent_path), 7137fa60fa3SFam Zheng desc_file_path, fname); 714787e4a85SKevin Wolf ret = bdrv_file_open(&extent_file, extent_path, NULL, bs->open_flags); 7157fa60fa3SFam Zheng if (ret) { 7167fa60fa3SFam Zheng return ret; 7177fa60fa3SFam Zheng } 71886c6b429SFam Zheng 71986c6b429SFam Zheng /* save to extents array */ 72086c6b429SFam Zheng if (!strcmp(type, "FLAT")) { 72186c6b429SFam Zheng /* FLAT extent */ 72286c6b429SFam Zheng VmdkExtent *extent; 72386c6b429SFam Zheng 724*8aa1331cSFam Zheng ret = vmdk_add_extent(bs, extent_file, true, sectors, 725*8aa1331cSFam Zheng 0, 0, 0, 0, sectors, &extent); 726*8aa1331cSFam Zheng if (ret < 0) { 727*8aa1331cSFam Zheng return ret; 728*8aa1331cSFam Zheng } 729f16f509dSFam Zheng extent->flat_start_offset = flat_offset << 9; 73086c6b429SFam Zheng } else if (!strcmp(type, "SPARSE")) { 73186c6b429SFam Zheng /* SPARSE extent */ 73286c6b429SFam Zheng ret = vmdk_open_sparse(bs, extent_file, bs->open_flags); 73386c6b429SFam Zheng if (ret) { 73486c6b429SFam Zheng bdrv_delete(extent_file); 73586c6b429SFam Zheng return ret; 73686c6b429SFam Zheng } 7377fa60fa3SFam Zheng } else { 7387fa60fa3SFam Zheng fprintf(stderr, 7397fa60fa3SFam Zheng "VMDK: Not supported extent type \"%s\""".\n", type); 7407fa60fa3SFam Zheng return -ENOTSUP; 7417fa60fa3SFam Zheng } 7427fa60fa3SFam Zheng next_line: 7437fa60fa3SFam Zheng /* move to next line */ 7447fa60fa3SFam Zheng while (*p && *p != '\n') { 7457fa60fa3SFam Zheng p++; 7467fa60fa3SFam Zheng } 7477fa60fa3SFam Zheng p++; 7487fa60fa3SFam Zheng } 7497fa60fa3SFam Zheng return 0; 7507fa60fa3SFam Zheng } 7517fa60fa3SFam Zheng 752f16f509dSFam Zheng static int vmdk_open_desc_file(BlockDriverState *bs, int flags, 753e98768d4SFam Zheng uint64_t desc_offset) 7547fa60fa3SFam Zheng { 7557fa60fa3SFam Zheng int ret; 7560bed087dSEvgeny Budilovsky char *buf = NULL; 7577fa60fa3SFam Zheng char ct[128]; 7587fa60fa3SFam Zheng BDRVVmdkState *s = bs->opaque; 7590bed087dSEvgeny Budilovsky int64_t size; 7607fa60fa3SFam Zheng 7610bed087dSEvgeny Budilovsky size = bdrv_getlength(bs->file); 7620bed087dSEvgeny Budilovsky if (size < 0) { 7630bed087dSEvgeny Budilovsky return -EINVAL; 7647fa60fa3SFam Zheng } 7650bed087dSEvgeny Budilovsky 7660bed087dSEvgeny Budilovsky size = MIN(size, 1 << 20); /* avoid unbounded allocation */ 7670bed087dSEvgeny Budilovsky buf = g_malloc0(size + 1); 7680bed087dSEvgeny Budilovsky 7690bed087dSEvgeny Budilovsky ret = bdrv_pread(bs->file, desc_offset, buf, size); 7700bed087dSEvgeny Budilovsky if (ret < 0) { 7710bed087dSEvgeny Budilovsky goto exit; 7720bed087dSEvgeny Budilovsky } 7737fa60fa3SFam Zheng if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) { 7740bed087dSEvgeny Budilovsky ret = -EMEDIUMTYPE; 7750bed087dSEvgeny Budilovsky goto exit; 7767fa60fa3SFam Zheng } 7776398de51SFam Zheng if (strcmp(ct, "monolithicFlat") && 77886c6b429SFam Zheng strcmp(ct, "twoGbMaxExtentSparse") && 7796398de51SFam Zheng strcmp(ct, "twoGbMaxExtentFlat")) { 7807fa60fa3SFam Zheng fprintf(stderr, 7817fa60fa3SFam Zheng "VMDK: Not supported image type \"%s\""".\n", ct); 7820bed087dSEvgeny Budilovsky ret = -ENOTSUP; 7830bed087dSEvgeny Budilovsky goto exit; 7847fa60fa3SFam Zheng } 7857fa60fa3SFam Zheng s->desc_offset = 0; 7860bed087dSEvgeny Budilovsky ret = vmdk_parse_extents(buf, bs, bs->file->filename); 7870bed087dSEvgeny Budilovsky exit: 7880bed087dSEvgeny Budilovsky g_free(buf); 7890bed087dSEvgeny Budilovsky return ret; 7907fa60fa3SFam Zheng } 7917fa60fa3SFam Zheng 7921a86938fSKevin Wolf static int vmdk_open(BlockDriverState *bs, QDict *options, int flags) 793b4b3ab14SFam Zheng { 79486c6b429SFam Zheng int ret; 79586c6b429SFam Zheng BDRVVmdkState *s = bs->opaque; 796b4b3ab14SFam Zheng 79786c6b429SFam Zheng if (vmdk_open_sparse(bs, bs->file, flags) == 0) { 79886c6b429SFam Zheng s->desc_offset = 0x200; 799bae0a0ccSPaolo Bonzini } else { 800bae0a0ccSPaolo Bonzini ret = vmdk_open_desc_file(bs, flags, 0); 801bae0a0ccSPaolo Bonzini if (ret) { 802bae0a0ccSPaolo Bonzini goto fail; 803bae0a0ccSPaolo Bonzini } 804bae0a0ccSPaolo Bonzini } 80586c6b429SFam Zheng /* try to open parent images, if exist */ 80686c6b429SFam Zheng ret = vmdk_parent_open(bs); 80786c6b429SFam Zheng if (ret) { 808bae0a0ccSPaolo Bonzini goto fail; 809b4b3ab14SFam Zheng } 81086c6b429SFam Zheng s->parent_cid = vmdk_read_cid(bs, 1); 811848c66e8SPaolo Bonzini qemu_co_mutex_init(&s->lock); 8122bc3166cSKevin Wolf 8132bc3166cSKevin Wolf /* Disable migration when VMDK images are used */ 8142bc3166cSKevin Wolf error_set(&s->migration_blocker, 8152bc3166cSKevin Wolf QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED, 8162bc3166cSKevin Wolf "vmdk", bs->device_name, "live migration"); 8172bc3166cSKevin Wolf migrate_add_blocker(s->migration_blocker); 8182bc3166cSKevin Wolf 8192bc3166cSKevin Wolf return 0; 820bae0a0ccSPaolo Bonzini 821bae0a0ccSPaolo Bonzini fail: 822bae0a0ccSPaolo Bonzini vmdk_free_extents(bs); 823bae0a0ccSPaolo Bonzini return ret; 824019d6b8fSAnthony Liguori } 825019d6b8fSAnthony Liguori 826b3976d3cSFam Zheng static int get_whole_cluster(BlockDriverState *bs, 827b3976d3cSFam Zheng VmdkExtent *extent, 828b3976d3cSFam Zheng uint64_t cluster_offset, 829b3976d3cSFam Zheng uint64_t offset, 830b3976d3cSFam Zheng bool allocate) 831019d6b8fSAnthony Liguori { 832b3976d3cSFam Zheng /* 128 sectors * 512 bytes each = grain size 64KB */ 833b3976d3cSFam Zheng uint8_t whole_grain[extent->cluster_sectors * 512]; 834019d6b8fSAnthony Liguori 8350e69c543SFam Zheng /* we will be here if it's first write on non-exist grain(cluster). 8360e69c543SFam Zheng * try to read from parent image, if exist */ 837b171271aSKevin Wolf if (bs->backing_hd) { 838c336500dSKevin Wolf int ret; 839019d6b8fSAnthony Liguori 840ae261c86SFam Zheng if (!vmdk_is_cid_valid(bs)) { 84165f74725SFam Zheng return VMDK_ERROR; 842ae261c86SFam Zheng } 843019d6b8fSAnthony Liguori 8440e69c543SFam Zheng /* floor offset to cluster */ 8450e69c543SFam Zheng offset -= offset % (extent->cluster_sectors * 512); 846c336500dSKevin Wolf ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain, 847b3976d3cSFam Zheng extent->cluster_sectors); 848c336500dSKevin Wolf if (ret < 0) { 84965f74725SFam Zheng return VMDK_ERROR; 850c336500dSKevin Wolf } 851019d6b8fSAnthony Liguori 8520e69c543SFam Zheng /* Write grain only into the active image */ 853b3976d3cSFam Zheng ret = bdrv_write(extent->file, cluster_offset, whole_grain, 854b3976d3cSFam Zheng extent->cluster_sectors); 855c336500dSKevin Wolf if (ret < 0) { 85665f74725SFam Zheng return VMDK_ERROR; 857019d6b8fSAnthony Liguori } 858019d6b8fSAnthony Liguori } 85965f74725SFam Zheng return VMDK_OK; 860019d6b8fSAnthony Liguori } 861019d6b8fSAnthony Liguori 862b3976d3cSFam Zheng static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) 863019d6b8fSAnthony Liguori { 864e304e8e5SFam Zheng uint32_t offset; 865e304e8e5SFam Zheng QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset)); 866e304e8e5SFam Zheng offset = cpu_to_le32(m_data->offset); 867019d6b8fSAnthony Liguori /* update L2 table */ 868b3976d3cSFam Zheng if (bdrv_pwrite_sync( 869b3976d3cSFam Zheng extent->file, 870b3976d3cSFam Zheng ((int64_t)m_data->l2_offset * 512) 871b3976d3cSFam Zheng + (m_data->l2_index * sizeof(m_data->offset)), 872e304e8e5SFam Zheng &offset, sizeof(offset)) < 0) { 87365f74725SFam Zheng return VMDK_ERROR; 874b3976d3cSFam Zheng } 875019d6b8fSAnthony Liguori /* update backup L2 table */ 876b3976d3cSFam Zheng if (extent->l1_backup_table_offset != 0) { 877b3976d3cSFam Zheng m_data->l2_offset = extent->l1_backup_table[m_data->l1_index]; 878b3976d3cSFam Zheng if (bdrv_pwrite_sync( 879b3976d3cSFam Zheng extent->file, 880b3976d3cSFam Zheng ((int64_t)m_data->l2_offset * 512) 881b3976d3cSFam Zheng + (m_data->l2_index * sizeof(m_data->offset)), 882e304e8e5SFam Zheng &offset, sizeof(offset)) < 0) { 88365f74725SFam Zheng return VMDK_ERROR; 884019d6b8fSAnthony Liguori } 885b3976d3cSFam Zheng } 886cdeaf1f1SFam Zheng if (m_data->l2_cache_entry) { 887cdeaf1f1SFam Zheng *m_data->l2_cache_entry = offset; 888cdeaf1f1SFam Zheng } 889019d6b8fSAnthony Liguori 89065f74725SFam Zheng return VMDK_OK; 891019d6b8fSAnthony Liguori } 892019d6b8fSAnthony Liguori 89391b85bd3SFam Zheng static int get_cluster_offset(BlockDriverState *bs, 894b3976d3cSFam Zheng VmdkExtent *extent, 895b3976d3cSFam Zheng VmdkMetaData *m_data, 89691b85bd3SFam Zheng uint64_t offset, 89791b85bd3SFam Zheng int allocate, 89891b85bd3SFam Zheng uint64_t *cluster_offset) 899019d6b8fSAnthony Liguori { 900019d6b8fSAnthony Liguori unsigned int l1_index, l2_offset, l2_index; 901019d6b8fSAnthony Liguori int min_index, i, j; 902e304e8e5SFam Zheng uint32_t min_count, *l2_table; 90314ead646SFam Zheng bool zeroed = false; 904019d6b8fSAnthony Liguori 905ae261c86SFam Zheng if (m_data) { 906019d6b8fSAnthony Liguori m_data->valid = 0; 907ae261c86SFam Zheng } 90891b85bd3SFam Zheng if (extent->flat) { 9097fa60fa3SFam Zheng *cluster_offset = extent->flat_start_offset; 91065f74725SFam Zheng return VMDK_OK; 91191b85bd3SFam Zheng } 912019d6b8fSAnthony Liguori 9136398de51SFam Zheng offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE; 914b3976d3cSFam Zheng l1_index = (offset >> 9) / extent->l1_entry_sectors; 915b3976d3cSFam Zheng if (l1_index >= extent->l1_size) { 91665f74725SFam Zheng return VMDK_ERROR; 917b3976d3cSFam Zheng } 918b3976d3cSFam Zheng l2_offset = extent->l1_table[l1_index]; 919b3976d3cSFam Zheng if (!l2_offset) { 92065f74725SFam Zheng return VMDK_UNALLOC; 921b3976d3cSFam Zheng } 922019d6b8fSAnthony Liguori for (i = 0; i < L2_CACHE_SIZE; i++) { 923b3976d3cSFam Zheng if (l2_offset == extent->l2_cache_offsets[i]) { 924019d6b8fSAnthony Liguori /* increment the hit count */ 925b3976d3cSFam Zheng if (++extent->l2_cache_counts[i] == 0xffffffff) { 926019d6b8fSAnthony Liguori for (j = 0; j < L2_CACHE_SIZE; j++) { 927b3976d3cSFam Zheng extent->l2_cache_counts[j] >>= 1; 928019d6b8fSAnthony Liguori } 929019d6b8fSAnthony Liguori } 930b3976d3cSFam Zheng l2_table = extent->l2_cache + (i * extent->l2_size); 931019d6b8fSAnthony Liguori goto found; 932019d6b8fSAnthony Liguori } 933019d6b8fSAnthony Liguori } 934019d6b8fSAnthony Liguori /* not found: load a new entry in the least used one */ 935019d6b8fSAnthony Liguori min_index = 0; 936019d6b8fSAnthony Liguori min_count = 0xffffffff; 937019d6b8fSAnthony Liguori for (i = 0; i < L2_CACHE_SIZE; i++) { 938b3976d3cSFam Zheng if (extent->l2_cache_counts[i] < min_count) { 939b3976d3cSFam Zheng min_count = extent->l2_cache_counts[i]; 940019d6b8fSAnthony Liguori min_index = i; 941019d6b8fSAnthony Liguori } 942019d6b8fSAnthony Liguori } 943b3976d3cSFam Zheng l2_table = extent->l2_cache + (min_index * extent->l2_size); 944b3976d3cSFam Zheng if (bdrv_pread( 945b3976d3cSFam Zheng extent->file, 946b3976d3cSFam Zheng (int64_t)l2_offset * 512, 947b3976d3cSFam Zheng l2_table, 948b3976d3cSFam Zheng extent->l2_size * sizeof(uint32_t) 949b3976d3cSFam Zheng ) != extent->l2_size * sizeof(uint32_t)) { 95065f74725SFam Zheng return VMDK_ERROR; 951b3976d3cSFam Zheng } 952019d6b8fSAnthony Liguori 953b3976d3cSFam Zheng extent->l2_cache_offsets[min_index] = l2_offset; 954b3976d3cSFam Zheng extent->l2_cache_counts[min_index] = 1; 955019d6b8fSAnthony Liguori found: 956b3976d3cSFam Zheng l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size; 95791b85bd3SFam Zheng *cluster_offset = le32_to_cpu(l2_table[l2_index]); 958019d6b8fSAnthony Liguori 959cdeaf1f1SFam Zheng if (m_data) { 960cdeaf1f1SFam Zheng m_data->valid = 1; 961cdeaf1f1SFam Zheng m_data->l1_index = l1_index; 962cdeaf1f1SFam Zheng m_data->l2_index = l2_index; 963cdeaf1f1SFam Zheng m_data->offset = *cluster_offset; 964cdeaf1f1SFam Zheng m_data->l2_offset = l2_offset; 965cdeaf1f1SFam Zheng m_data->l2_cache_entry = &l2_table[l2_index]; 966cdeaf1f1SFam Zheng } 96714ead646SFam Zheng if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) { 96814ead646SFam Zheng zeroed = true; 96914ead646SFam Zheng } 97014ead646SFam Zheng 97114ead646SFam Zheng if (!*cluster_offset || zeroed) { 97291b85bd3SFam Zheng if (!allocate) { 97314ead646SFam Zheng return zeroed ? VMDK_ZEROED : VMDK_UNALLOC; 97491b85bd3SFam Zheng } 9759949f97eSKevin Wolf 976ae261c86SFam Zheng /* Avoid the L2 tables update for the images that have snapshots. */ 97791b85bd3SFam Zheng *cluster_offset = bdrv_getlength(extent->file); 9782b2c8c5dSFam Zheng if (!extent->compressed) { 979b3976d3cSFam Zheng bdrv_truncate( 980b3976d3cSFam Zheng extent->file, 98191b85bd3SFam Zheng *cluster_offset + (extent->cluster_sectors << 9) 982b3976d3cSFam Zheng ); 9832b2c8c5dSFam Zheng } 984019d6b8fSAnthony Liguori 98591b85bd3SFam Zheng *cluster_offset >>= 9; 986e304e8e5SFam Zheng l2_table[l2_index] = cpu_to_le32(*cluster_offset); 9879949f97eSKevin Wolf 988019d6b8fSAnthony Liguori /* First of all we write grain itself, to avoid race condition 989019d6b8fSAnthony Liguori * that may to corrupt the image. 990019d6b8fSAnthony Liguori * This problem may occur because of insufficient space on host disk 991019d6b8fSAnthony Liguori * or inappropriate VM shutdown. 992019d6b8fSAnthony Liguori */ 993b3976d3cSFam Zheng if (get_whole_cluster( 994ae261c86SFam Zheng bs, extent, *cluster_offset, offset, allocate) == -1) { 99565f74725SFam Zheng return VMDK_ERROR; 996ae261c86SFam Zheng } 997019d6b8fSAnthony Liguori 998019d6b8fSAnthony Liguori if (m_data) { 999e304e8e5SFam Zheng m_data->offset = *cluster_offset; 1000019d6b8fSAnthony Liguori } 1001019d6b8fSAnthony Liguori } 100291b85bd3SFam Zheng *cluster_offset <<= 9; 100365f74725SFam Zheng return VMDK_OK; 1004019d6b8fSAnthony Liguori } 1005019d6b8fSAnthony Liguori 1006b3976d3cSFam Zheng static VmdkExtent *find_extent(BDRVVmdkState *s, 1007b3976d3cSFam Zheng int64_t sector_num, VmdkExtent *start_hint) 1008b3976d3cSFam Zheng { 1009b3976d3cSFam Zheng VmdkExtent *extent = start_hint; 1010b3976d3cSFam Zheng 1011b3976d3cSFam Zheng if (!extent) { 1012b3976d3cSFam Zheng extent = &s->extents[0]; 1013b3976d3cSFam Zheng } 1014b3976d3cSFam Zheng while (extent < &s->extents[s->num_extents]) { 1015b3976d3cSFam Zheng if (sector_num < extent->end_sector) { 1016b3976d3cSFam Zheng return extent; 1017b3976d3cSFam Zheng } 1018b3976d3cSFam Zheng extent++; 1019b3976d3cSFam Zheng } 1020b3976d3cSFam Zheng return NULL; 1021b3976d3cSFam Zheng } 1022b3976d3cSFam Zheng 1023f8a2e5e3SStefan Hajnoczi static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs, 1024f8a2e5e3SStefan Hajnoczi int64_t sector_num, int nb_sectors, int *pnum) 1025019d6b8fSAnthony Liguori { 1026019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque; 1027b3976d3cSFam Zheng int64_t index_in_cluster, n, ret; 1028b3976d3cSFam Zheng uint64_t offset; 1029b3976d3cSFam Zheng VmdkExtent *extent; 1030b3976d3cSFam Zheng 1031b3976d3cSFam Zheng extent = find_extent(s, sector_num, NULL); 1032b3976d3cSFam Zheng if (!extent) { 1033b3976d3cSFam Zheng return 0; 1034b3976d3cSFam Zheng } 1035f8a2e5e3SStefan Hajnoczi qemu_co_mutex_lock(&s->lock); 103691b85bd3SFam Zheng ret = get_cluster_offset(bs, extent, NULL, 103791b85bd3SFam Zheng sector_num * 512, 0, &offset); 1038f8a2e5e3SStefan Hajnoczi qemu_co_mutex_unlock(&s->lock); 103914ead646SFam Zheng 104014ead646SFam Zheng ret = (ret == VMDK_OK || ret == VMDK_ZEROED); 104191b85bd3SFam Zheng 1042b3976d3cSFam Zheng index_in_cluster = sector_num % extent->cluster_sectors; 1043b3976d3cSFam Zheng n = extent->cluster_sectors - index_in_cluster; 1044ae261c86SFam Zheng if (n > nb_sectors) { 1045019d6b8fSAnthony Liguori n = nb_sectors; 1046ae261c86SFam Zheng } 1047019d6b8fSAnthony Liguori *pnum = n; 1048b3976d3cSFam Zheng return ret; 1049019d6b8fSAnthony Liguori } 1050019d6b8fSAnthony Liguori 1051dd3f6ee2SFam Zheng static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, 1052dd3f6ee2SFam Zheng int64_t offset_in_cluster, const uint8_t *buf, 1053dd3f6ee2SFam Zheng int nb_sectors, int64_t sector_num) 1054dd3f6ee2SFam Zheng { 1055dd3f6ee2SFam Zheng int ret; 10562b2c8c5dSFam Zheng VmdkGrainMarker *data = NULL; 10572b2c8c5dSFam Zheng uLongf buf_len; 1058dd3f6ee2SFam Zheng const uint8_t *write_buf = buf; 1059dd3f6ee2SFam Zheng int write_len = nb_sectors * 512; 1060dd3f6ee2SFam Zheng 10612b2c8c5dSFam Zheng if (extent->compressed) { 10622b2c8c5dSFam Zheng if (!extent->has_marker) { 10632b2c8c5dSFam Zheng ret = -EINVAL; 10642b2c8c5dSFam Zheng goto out; 10652b2c8c5dSFam Zheng } 10662b2c8c5dSFam Zheng buf_len = (extent->cluster_sectors << 9) * 2; 10672b2c8c5dSFam Zheng data = g_malloc(buf_len + sizeof(VmdkGrainMarker)); 10682b2c8c5dSFam Zheng if (compress(data->data, &buf_len, buf, nb_sectors << 9) != Z_OK || 10692b2c8c5dSFam Zheng buf_len == 0) { 10702b2c8c5dSFam Zheng ret = -EINVAL; 10712b2c8c5dSFam Zheng goto out; 10722b2c8c5dSFam Zheng } 10732b2c8c5dSFam Zheng data->lba = sector_num; 10742b2c8c5dSFam Zheng data->size = buf_len; 10752b2c8c5dSFam Zheng write_buf = (uint8_t *)data; 10762b2c8c5dSFam Zheng write_len = buf_len + sizeof(VmdkGrainMarker); 10772b2c8c5dSFam Zheng } 1078dd3f6ee2SFam Zheng ret = bdrv_pwrite(extent->file, 1079dd3f6ee2SFam Zheng cluster_offset + offset_in_cluster, 1080dd3f6ee2SFam Zheng write_buf, 1081dd3f6ee2SFam Zheng write_len); 1082dd3f6ee2SFam Zheng if (ret != write_len) { 1083dd3f6ee2SFam Zheng ret = ret < 0 ? ret : -EIO; 1084dd3f6ee2SFam Zheng goto out; 1085dd3f6ee2SFam Zheng } 1086dd3f6ee2SFam Zheng ret = 0; 1087dd3f6ee2SFam Zheng out: 10882b2c8c5dSFam Zheng g_free(data); 1089dd3f6ee2SFam Zheng return ret; 1090dd3f6ee2SFam Zheng } 1091dd3f6ee2SFam Zheng 1092dd3f6ee2SFam Zheng static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset, 1093dd3f6ee2SFam Zheng int64_t offset_in_cluster, uint8_t *buf, 1094dd3f6ee2SFam Zheng int nb_sectors) 1095dd3f6ee2SFam Zheng { 1096dd3f6ee2SFam Zheng int ret; 10972b2c8c5dSFam Zheng int cluster_bytes, buf_bytes; 10982b2c8c5dSFam Zheng uint8_t *cluster_buf, *compressed_data; 10992b2c8c5dSFam Zheng uint8_t *uncomp_buf; 11002b2c8c5dSFam Zheng uint32_t data_len; 11012b2c8c5dSFam Zheng VmdkGrainMarker *marker; 11022b2c8c5dSFam Zheng uLongf buf_len; 1103dd3f6ee2SFam Zheng 11042b2c8c5dSFam Zheng 11052b2c8c5dSFam Zheng if (!extent->compressed) { 1106dd3f6ee2SFam Zheng ret = bdrv_pread(extent->file, 1107dd3f6ee2SFam Zheng cluster_offset + offset_in_cluster, 1108dd3f6ee2SFam Zheng buf, nb_sectors * 512); 1109dd3f6ee2SFam Zheng if (ret == nb_sectors * 512) { 1110dd3f6ee2SFam Zheng return 0; 1111dd3f6ee2SFam Zheng } else { 1112dd3f6ee2SFam Zheng return -EIO; 1113dd3f6ee2SFam Zheng } 1114dd3f6ee2SFam Zheng } 11152b2c8c5dSFam Zheng cluster_bytes = extent->cluster_sectors * 512; 11162b2c8c5dSFam Zheng /* Read two clusters in case GrainMarker + compressed data > one cluster */ 11172b2c8c5dSFam Zheng buf_bytes = cluster_bytes * 2; 11182b2c8c5dSFam Zheng cluster_buf = g_malloc(buf_bytes); 11192b2c8c5dSFam Zheng uncomp_buf = g_malloc(cluster_bytes); 11202b2c8c5dSFam Zheng ret = bdrv_pread(extent->file, 11212b2c8c5dSFam Zheng cluster_offset, 11222b2c8c5dSFam Zheng cluster_buf, buf_bytes); 11232b2c8c5dSFam Zheng if (ret < 0) { 11242b2c8c5dSFam Zheng goto out; 11252b2c8c5dSFam Zheng } 11262b2c8c5dSFam Zheng compressed_data = cluster_buf; 11272b2c8c5dSFam Zheng buf_len = cluster_bytes; 11282b2c8c5dSFam Zheng data_len = cluster_bytes; 11292b2c8c5dSFam Zheng if (extent->has_marker) { 11302b2c8c5dSFam Zheng marker = (VmdkGrainMarker *)cluster_buf; 11312b2c8c5dSFam Zheng compressed_data = marker->data; 11322b2c8c5dSFam Zheng data_len = le32_to_cpu(marker->size); 11332b2c8c5dSFam Zheng } 11342b2c8c5dSFam Zheng if (!data_len || data_len > buf_bytes) { 11352b2c8c5dSFam Zheng ret = -EINVAL; 11362b2c8c5dSFam Zheng goto out; 11372b2c8c5dSFam Zheng } 11382b2c8c5dSFam Zheng ret = uncompress(uncomp_buf, &buf_len, compressed_data, data_len); 11392b2c8c5dSFam Zheng if (ret != Z_OK) { 11402b2c8c5dSFam Zheng ret = -EINVAL; 11412b2c8c5dSFam Zheng goto out; 11422b2c8c5dSFam Zheng 11432b2c8c5dSFam Zheng } 11442b2c8c5dSFam Zheng if (offset_in_cluster < 0 || 11452b2c8c5dSFam Zheng offset_in_cluster + nb_sectors * 512 > buf_len) { 11462b2c8c5dSFam Zheng ret = -EINVAL; 11472b2c8c5dSFam Zheng goto out; 11482b2c8c5dSFam Zheng } 11492b2c8c5dSFam Zheng memcpy(buf, uncomp_buf + offset_in_cluster, nb_sectors * 512); 11502b2c8c5dSFam Zheng ret = 0; 11512b2c8c5dSFam Zheng 11522b2c8c5dSFam Zheng out: 11532b2c8c5dSFam Zheng g_free(uncomp_buf); 11542b2c8c5dSFam Zheng g_free(cluster_buf); 11552b2c8c5dSFam Zheng return ret; 11562b2c8c5dSFam Zheng } 1157dd3f6ee2SFam Zheng 1158019d6b8fSAnthony Liguori static int vmdk_read(BlockDriverState *bs, int64_t sector_num, 1159019d6b8fSAnthony Liguori uint8_t *buf, int nb_sectors) 1160019d6b8fSAnthony Liguori { 1161019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque; 1162b3976d3cSFam Zheng int ret; 1163b3976d3cSFam Zheng uint64_t n, index_in_cluster; 1164b1649faeSGerhard Wiesinger uint64_t extent_begin_sector, extent_relative_sector_num; 1165b3976d3cSFam Zheng VmdkExtent *extent = NULL; 1166019d6b8fSAnthony Liguori uint64_t cluster_offset; 1167019d6b8fSAnthony Liguori 1168019d6b8fSAnthony Liguori while (nb_sectors > 0) { 1169b3976d3cSFam Zheng extent = find_extent(s, sector_num, extent); 1170b3976d3cSFam Zheng if (!extent) { 1171b3976d3cSFam Zheng return -EIO; 1172b3976d3cSFam Zheng } 117391b85bd3SFam Zheng ret = get_cluster_offset( 117491b85bd3SFam Zheng bs, extent, NULL, 117591b85bd3SFam Zheng sector_num << 9, 0, &cluster_offset); 1176b1649faeSGerhard Wiesinger extent_begin_sector = extent->end_sector - extent->sectors; 1177b1649faeSGerhard Wiesinger extent_relative_sector_num = sector_num - extent_begin_sector; 1178b1649faeSGerhard Wiesinger index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; 1179b3976d3cSFam Zheng n = extent->cluster_sectors - index_in_cluster; 1180ae261c86SFam Zheng if (n > nb_sectors) { 1181019d6b8fSAnthony Liguori n = nb_sectors; 1182ae261c86SFam Zheng } 118314ead646SFam Zheng if (ret != VMDK_OK) { 118491b85bd3SFam Zheng /* if not allocated, try to read from parent image, if exist */ 118514ead646SFam Zheng if (bs->backing_hd && ret != VMDK_ZEROED) { 1186ae261c86SFam Zheng if (!vmdk_is_cid_valid(bs)) { 11877fa60fa3SFam Zheng return -EINVAL; 1188ae261c86SFam Zheng } 1189b171271aSKevin Wolf ret = bdrv_read(bs->backing_hd, sector_num, buf, n); 1190ae261c86SFam Zheng if (ret < 0) { 11917fa60fa3SFam Zheng return ret; 1192ae261c86SFam Zheng } 1193019d6b8fSAnthony Liguori } else { 1194019d6b8fSAnthony Liguori memset(buf, 0, 512 * n); 1195019d6b8fSAnthony Liguori } 1196019d6b8fSAnthony Liguori } else { 1197dd3f6ee2SFam Zheng ret = vmdk_read_extent(extent, 1198dd3f6ee2SFam Zheng cluster_offset, index_in_cluster * 512, 1199dd3f6ee2SFam Zheng buf, n); 1200dd3f6ee2SFam Zheng if (ret) { 12017fa60fa3SFam Zheng return ret; 12027fa60fa3SFam Zheng } 1203019d6b8fSAnthony Liguori } 1204019d6b8fSAnthony Liguori nb_sectors -= n; 1205019d6b8fSAnthony Liguori sector_num += n; 1206019d6b8fSAnthony Liguori buf += n * 512; 1207019d6b8fSAnthony Liguori } 1208019d6b8fSAnthony Liguori return 0; 1209019d6b8fSAnthony Liguori } 1210019d6b8fSAnthony Liguori 12112914caa0SPaolo Bonzini static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num, 12122914caa0SPaolo Bonzini uint8_t *buf, int nb_sectors) 12132914caa0SPaolo Bonzini { 12142914caa0SPaolo Bonzini int ret; 12152914caa0SPaolo Bonzini BDRVVmdkState *s = bs->opaque; 12162914caa0SPaolo Bonzini qemu_co_mutex_lock(&s->lock); 12172914caa0SPaolo Bonzini ret = vmdk_read(bs, sector_num, buf, nb_sectors); 12182914caa0SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 12192914caa0SPaolo Bonzini return ret; 12202914caa0SPaolo Bonzini } 12212914caa0SPaolo Bonzini 1222cdeaf1f1SFam Zheng /** 1223cdeaf1f1SFam Zheng * vmdk_write: 1224cdeaf1f1SFam Zheng * @zeroed: buf is ignored (data is zero), use zeroed_grain GTE feature 1225cdeaf1f1SFam Zheng * if possible, otherwise return -ENOTSUP. 12268e507243SFam Zheng * @zero_dry_run: used for zeroed == true only, don't update L2 table, just try 12278e507243SFam Zheng * with each cluster. By dry run we can find if the zero write 12288e507243SFam Zheng * is possible without modifying image data. 1229cdeaf1f1SFam Zheng * 1230cdeaf1f1SFam Zheng * Returns: error code with 0 for success. 1231cdeaf1f1SFam Zheng */ 1232019d6b8fSAnthony Liguori static int vmdk_write(BlockDriverState *bs, int64_t sector_num, 1233cdeaf1f1SFam Zheng const uint8_t *buf, int nb_sectors, 1234cdeaf1f1SFam Zheng bool zeroed, bool zero_dry_run) 1235019d6b8fSAnthony Liguori { 1236019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque; 1237b3976d3cSFam Zheng VmdkExtent *extent = NULL; 123891b85bd3SFam Zheng int n, ret; 1239b3976d3cSFam Zheng int64_t index_in_cluster; 1240b1649faeSGerhard Wiesinger uint64_t extent_begin_sector, extent_relative_sector_num; 1241019d6b8fSAnthony Liguori uint64_t cluster_offset; 1242b3976d3cSFam Zheng VmdkMetaData m_data; 1243019d6b8fSAnthony Liguori 1244019d6b8fSAnthony Liguori if (sector_num > bs->total_sectors) { 1245019d6b8fSAnthony Liguori fprintf(stderr, 1246019d6b8fSAnthony Liguori "(VMDK) Wrong offset: sector_num=0x%" PRIx64 1247019d6b8fSAnthony Liguori " total_sectors=0x%" PRIx64 "\n", 1248019d6b8fSAnthony Liguori sector_num, bs->total_sectors); 12497fa60fa3SFam Zheng return -EIO; 1250019d6b8fSAnthony Liguori } 1251019d6b8fSAnthony Liguori 1252019d6b8fSAnthony Liguori while (nb_sectors > 0) { 1253b3976d3cSFam Zheng extent = find_extent(s, sector_num, extent); 1254b3976d3cSFam Zheng if (!extent) { 1255b3976d3cSFam Zheng return -EIO; 1256b3976d3cSFam Zheng } 125791b85bd3SFam Zheng ret = get_cluster_offset( 1258b3976d3cSFam Zheng bs, 1259b3976d3cSFam Zheng extent, 1260b3976d3cSFam Zheng &m_data, 12612b2c8c5dSFam Zheng sector_num << 9, !extent->compressed, 12622b2c8c5dSFam Zheng &cluster_offset); 12632b2c8c5dSFam Zheng if (extent->compressed) { 126465f74725SFam Zheng if (ret == VMDK_OK) { 12652b2c8c5dSFam Zheng /* Refuse write to allocated cluster for streamOptimized */ 12662b2c8c5dSFam Zheng fprintf(stderr, 12672b2c8c5dSFam Zheng "VMDK: can't write to allocated cluster" 12682b2c8c5dSFam Zheng " for streamOptimized\n"); 12692b2c8c5dSFam Zheng return -EIO; 12702b2c8c5dSFam Zheng } else { 12712b2c8c5dSFam Zheng /* allocate */ 12722b2c8c5dSFam Zheng ret = get_cluster_offset( 12732b2c8c5dSFam Zheng bs, 12742b2c8c5dSFam Zheng extent, 12752b2c8c5dSFam Zheng &m_data, 127691b85bd3SFam Zheng sector_num << 9, 1, 127791b85bd3SFam Zheng &cluster_offset); 12782b2c8c5dSFam Zheng } 12792b2c8c5dSFam Zheng } 1280cdeaf1f1SFam Zheng if (ret == VMDK_ERROR) { 128191b85bd3SFam Zheng return -EINVAL; 1282b3976d3cSFam Zheng } 1283b1649faeSGerhard Wiesinger extent_begin_sector = extent->end_sector - extent->sectors; 1284b1649faeSGerhard Wiesinger extent_relative_sector_num = sector_num - extent_begin_sector; 1285b1649faeSGerhard Wiesinger index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; 1286b3976d3cSFam Zheng n = extent->cluster_sectors - index_in_cluster; 1287b3976d3cSFam Zheng if (n > nb_sectors) { 1288019d6b8fSAnthony Liguori n = nb_sectors; 1289b3976d3cSFam Zheng } 1290cdeaf1f1SFam Zheng if (zeroed) { 1291cdeaf1f1SFam Zheng /* Do zeroed write, buf is ignored */ 1292cdeaf1f1SFam Zheng if (extent->has_zero_grain && 1293cdeaf1f1SFam Zheng index_in_cluster == 0 && 1294cdeaf1f1SFam Zheng n >= extent->cluster_sectors) { 1295cdeaf1f1SFam Zheng n = extent->cluster_sectors; 1296cdeaf1f1SFam Zheng if (!zero_dry_run) { 1297cdeaf1f1SFam Zheng m_data.offset = VMDK_GTE_ZEROED; 1298cdeaf1f1SFam Zheng /* update L2 tables */ 1299cdeaf1f1SFam Zheng if (vmdk_L2update(extent, &m_data) != VMDK_OK) { 1300cdeaf1f1SFam Zheng return -EIO; 1301cdeaf1f1SFam Zheng } 1302cdeaf1f1SFam Zheng } 1303cdeaf1f1SFam Zheng } else { 1304cdeaf1f1SFam Zheng return -ENOTSUP; 1305cdeaf1f1SFam Zheng } 1306cdeaf1f1SFam Zheng } else { 1307dd3f6ee2SFam Zheng ret = vmdk_write_extent(extent, 1308dd3f6ee2SFam Zheng cluster_offset, index_in_cluster * 512, 1309dd3f6ee2SFam Zheng buf, n, sector_num); 1310dd3f6ee2SFam Zheng if (ret) { 13117fa60fa3SFam Zheng return ret; 1312b3976d3cSFam Zheng } 1313019d6b8fSAnthony Liguori if (m_data.valid) { 1314019d6b8fSAnthony Liguori /* update L2 tables */ 1315cdeaf1f1SFam Zheng if (vmdk_L2update(extent, &m_data) != VMDK_OK) { 13167fa60fa3SFam Zheng return -EIO; 1317019d6b8fSAnthony Liguori } 1318b3976d3cSFam Zheng } 1319cdeaf1f1SFam Zheng } 1320019d6b8fSAnthony Liguori nb_sectors -= n; 1321019d6b8fSAnthony Liguori sector_num += n; 1322019d6b8fSAnthony Liguori buf += n * 512; 1323019d6b8fSAnthony Liguori 1324ae261c86SFam Zheng /* update CID on the first write every time the virtual disk is 1325ae261c86SFam Zheng * opened */ 132669b4d86dSFam Zheng if (!s->cid_updated) { 132799f1835dSKevin Wolf ret = vmdk_write_cid(bs, time(NULL)); 132899f1835dSKevin Wolf if (ret < 0) { 132999f1835dSKevin Wolf return ret; 133099f1835dSKevin Wolf } 133169b4d86dSFam Zheng s->cid_updated = true; 1332019d6b8fSAnthony Liguori } 1333019d6b8fSAnthony Liguori } 1334019d6b8fSAnthony Liguori return 0; 1335019d6b8fSAnthony Liguori } 1336019d6b8fSAnthony Liguori 1337e183ef75SPaolo Bonzini static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num, 1338e183ef75SPaolo Bonzini const uint8_t *buf, int nb_sectors) 1339e183ef75SPaolo Bonzini { 1340e183ef75SPaolo Bonzini int ret; 1341e183ef75SPaolo Bonzini BDRVVmdkState *s = bs->opaque; 1342e183ef75SPaolo Bonzini qemu_co_mutex_lock(&s->lock); 1343cdeaf1f1SFam Zheng ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false); 1344cdeaf1f1SFam Zheng qemu_co_mutex_unlock(&s->lock); 1345cdeaf1f1SFam Zheng return ret; 1346cdeaf1f1SFam Zheng } 1347cdeaf1f1SFam Zheng 1348cdeaf1f1SFam Zheng static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs, 1349cdeaf1f1SFam Zheng int64_t sector_num, 1350cdeaf1f1SFam Zheng int nb_sectors) 1351cdeaf1f1SFam Zheng { 1352cdeaf1f1SFam Zheng int ret; 1353cdeaf1f1SFam Zheng BDRVVmdkState *s = bs->opaque; 1354cdeaf1f1SFam Zheng qemu_co_mutex_lock(&s->lock); 13558e507243SFam Zheng /* write zeroes could fail if sectors not aligned to cluster, test it with 13568e507243SFam Zheng * dry_run == true before really updating image */ 1357cdeaf1f1SFam Zheng ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true); 1358cdeaf1f1SFam Zheng if (!ret) { 1359cdeaf1f1SFam Zheng ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false); 1360cdeaf1f1SFam Zheng } 1361e183ef75SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 1362e183ef75SPaolo Bonzini return ret; 1363e183ef75SPaolo Bonzini } 1364e183ef75SPaolo Bonzini 1365f66fd6c3SFam Zheng 13666c031aacSFam Zheng static int vmdk_create_extent(const char *filename, int64_t filesize, 136769e0b6dfSFam Zheng bool flat, bool compress, bool zeroed_grain) 1368019d6b8fSAnthony Liguori { 1369f66fd6c3SFam Zheng int ret, i; 1370f66fd6c3SFam Zheng int fd = 0; 1371019d6b8fSAnthony Liguori VMDK4Header header; 1372019d6b8fSAnthony Liguori uint32_t tmp, magic, grains, gd_size, gt_size, gt_count; 13730e7e1989SKevin Wolf 13746165f4d8SCorey Bryant fd = qemu_open(filename, 1375f66fd6c3SFam Zheng O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 1376019d6b8fSAnthony Liguori 0644); 1377f66fd6c3SFam Zheng if (fd < 0) { 1378b781cce5SJuan Quintela return -errno; 1379f66fd6c3SFam Zheng } 1380f66fd6c3SFam Zheng if (flat) { 1381f66fd6c3SFam Zheng ret = ftruncate(fd, filesize); 1382f66fd6c3SFam Zheng if (ret < 0) { 1383f66fd6c3SFam Zheng ret = -errno; 1384f66fd6c3SFam Zheng } 1385f66fd6c3SFam Zheng goto exit; 1386f66fd6c3SFam Zheng } 1387019d6b8fSAnthony Liguori magic = cpu_to_be32(VMDK4_MAGIC); 1388019d6b8fSAnthony Liguori memset(&header, 0, sizeof(header)); 138969e0b6dfSFam Zheng header.version = zeroed_grain ? 2 : 1; 139095b0aa42SFam Zheng header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT 139169e0b6dfSFam Zheng | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0) 139269e0b6dfSFam Zheng | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0); 13936c031aacSFam Zheng header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0; 1394f66fd6c3SFam Zheng header.capacity = filesize / 512; 139516372ff0SAlexander Graf header.granularity = 128; 139616372ff0SAlexander Graf header.num_gtes_per_gte = 512; 1397019d6b8fSAnthony Liguori 1398f66fd6c3SFam Zheng grains = (filesize / 512 + header.granularity - 1) / header.granularity; 1399019d6b8fSAnthony Liguori gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9; 1400f66fd6c3SFam Zheng gt_count = 1401f66fd6c3SFam Zheng (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte; 1402019d6b8fSAnthony Liguori gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9; 1403019d6b8fSAnthony Liguori 1404019d6b8fSAnthony Liguori header.desc_offset = 1; 1405019d6b8fSAnthony Liguori header.desc_size = 20; 1406019d6b8fSAnthony Liguori header.rgd_offset = header.desc_offset + header.desc_size; 1407019d6b8fSAnthony Liguori header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count); 1408019d6b8fSAnthony Liguori header.grain_offset = 1409019d6b8fSAnthony Liguori ((header.gd_offset + gd_size + (gt_size * gt_count) + 1410019d6b8fSAnthony Liguori header.granularity - 1) / header.granularity) * 1411019d6b8fSAnthony Liguori header.granularity; 141216372ff0SAlexander Graf /* swap endianness for all header fields */ 141316372ff0SAlexander Graf header.version = cpu_to_le32(header.version); 141416372ff0SAlexander Graf header.flags = cpu_to_le32(header.flags); 141516372ff0SAlexander Graf header.capacity = cpu_to_le64(header.capacity); 141616372ff0SAlexander Graf header.granularity = cpu_to_le64(header.granularity); 141716372ff0SAlexander Graf header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte); 1418019d6b8fSAnthony Liguori header.desc_offset = cpu_to_le64(header.desc_offset); 1419019d6b8fSAnthony Liguori header.desc_size = cpu_to_le64(header.desc_size); 1420019d6b8fSAnthony Liguori header.rgd_offset = cpu_to_le64(header.rgd_offset); 1421019d6b8fSAnthony Liguori header.gd_offset = cpu_to_le64(header.gd_offset); 1422019d6b8fSAnthony Liguori header.grain_offset = cpu_to_le64(header.grain_offset); 14236c031aacSFam Zheng header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm); 1424019d6b8fSAnthony Liguori 1425019d6b8fSAnthony Liguori header.check_bytes[0] = 0xa; 1426019d6b8fSAnthony Liguori header.check_bytes[1] = 0x20; 1427019d6b8fSAnthony Liguori header.check_bytes[2] = 0xd; 1428019d6b8fSAnthony Liguori header.check_bytes[3] = 0xa; 1429019d6b8fSAnthony Liguori 1430019d6b8fSAnthony Liguori /* write all the data */ 14311640366cSKirill A. Shutemov ret = qemu_write_full(fd, &magic, sizeof(magic)); 14321640366cSKirill A. Shutemov if (ret != sizeof(magic)) { 1433b781cce5SJuan Quintela ret = -errno; 14341640366cSKirill A. Shutemov goto exit; 14351640366cSKirill A. Shutemov } 14361640366cSKirill A. Shutemov ret = qemu_write_full(fd, &header, sizeof(header)); 14371640366cSKirill A. Shutemov if (ret != sizeof(header)) { 1438b781cce5SJuan Quintela ret = -errno; 14391640366cSKirill A. Shutemov goto exit; 14401640366cSKirill A. Shutemov } 1441019d6b8fSAnthony Liguori 144216372ff0SAlexander Graf ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9); 14431640366cSKirill A. Shutemov if (ret < 0) { 1444b781cce5SJuan Quintela ret = -errno; 14451640366cSKirill A. Shutemov goto exit; 14461640366cSKirill A. Shutemov } 1447019d6b8fSAnthony Liguori 1448019d6b8fSAnthony Liguori /* write grain directory */ 1449019d6b8fSAnthony Liguori lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET); 145016372ff0SAlexander Graf for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size; 14511640366cSKirill A. Shutemov i < gt_count; i++, tmp += gt_size) { 14521640366cSKirill A. Shutemov ret = qemu_write_full(fd, &tmp, sizeof(tmp)); 14531640366cSKirill A. Shutemov if (ret != sizeof(tmp)) { 1454b781cce5SJuan Quintela ret = -errno; 14551640366cSKirill A. Shutemov goto exit; 14561640366cSKirill A. Shutemov } 14571640366cSKirill A. Shutemov } 1458019d6b8fSAnthony Liguori 1459019d6b8fSAnthony Liguori /* write backup grain directory */ 1460019d6b8fSAnthony Liguori lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET); 146116372ff0SAlexander Graf for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size; 14621640366cSKirill A. Shutemov i < gt_count; i++, tmp += gt_size) { 14631640366cSKirill A. Shutemov ret = qemu_write_full(fd, &tmp, sizeof(tmp)); 14641640366cSKirill A. Shutemov if (ret != sizeof(tmp)) { 1465b781cce5SJuan Quintela ret = -errno; 14661640366cSKirill A. Shutemov goto exit; 14671640366cSKirill A. Shutemov } 14681640366cSKirill A. Shutemov } 1469019d6b8fSAnthony Liguori 1470f66fd6c3SFam Zheng ret = 0; 1471f66fd6c3SFam Zheng exit: 14722e1e79daSCorey Bryant qemu_close(fd); 1473f66fd6c3SFam Zheng return ret; 1474f66fd6c3SFam Zheng } 1475019d6b8fSAnthony Liguori 1476f66fd6c3SFam Zheng static int filename_decompose(const char *filename, char *path, char *prefix, 1477f66fd6c3SFam Zheng char *postfix, size_t buf_len) 1478f66fd6c3SFam Zheng { 1479f66fd6c3SFam Zheng const char *p, *q; 1480f66fd6c3SFam Zheng 1481f66fd6c3SFam Zheng if (filename == NULL || !strlen(filename)) { 1482f66fd6c3SFam Zheng fprintf(stderr, "Vmdk: no filename provided.\n"); 148365f74725SFam Zheng return VMDK_ERROR; 1484f66fd6c3SFam Zheng } 1485f66fd6c3SFam Zheng p = strrchr(filename, '/'); 1486f66fd6c3SFam Zheng if (p == NULL) { 1487f66fd6c3SFam Zheng p = strrchr(filename, '\\'); 1488f66fd6c3SFam Zheng } 1489f66fd6c3SFam Zheng if (p == NULL) { 1490f66fd6c3SFam Zheng p = strrchr(filename, ':'); 1491f66fd6c3SFam Zheng } 1492f66fd6c3SFam Zheng if (p != NULL) { 1493f66fd6c3SFam Zheng p++; 1494f66fd6c3SFam Zheng if (p - filename >= buf_len) { 149565f74725SFam Zheng return VMDK_ERROR; 1496f66fd6c3SFam Zheng } 1497f66fd6c3SFam Zheng pstrcpy(path, p - filename + 1, filename); 1498f66fd6c3SFam Zheng } else { 1499f66fd6c3SFam Zheng p = filename; 1500f66fd6c3SFam Zheng path[0] = '\0'; 1501f66fd6c3SFam Zheng } 1502f66fd6c3SFam Zheng q = strrchr(p, '.'); 1503f66fd6c3SFam Zheng if (q == NULL) { 1504f66fd6c3SFam Zheng pstrcpy(prefix, buf_len, p); 1505f66fd6c3SFam Zheng postfix[0] = '\0'; 1506f66fd6c3SFam Zheng } else { 1507f66fd6c3SFam Zheng if (q - p >= buf_len) { 150865f74725SFam Zheng return VMDK_ERROR; 1509f66fd6c3SFam Zheng } 1510f66fd6c3SFam Zheng pstrcpy(prefix, q - p + 1, p); 1511f66fd6c3SFam Zheng pstrcpy(postfix, buf_len, q); 1512f66fd6c3SFam Zheng } 151365f74725SFam Zheng return VMDK_OK; 1514f66fd6c3SFam Zheng } 1515f66fd6c3SFam Zheng 1516f66fd6c3SFam Zheng static int vmdk_create(const char *filename, QEMUOptionParameter *options) 1517f66fd6c3SFam Zheng { 1518f66fd6c3SFam Zheng int fd, idx = 0; 1519f66fd6c3SFam Zheng char desc[BUF_SIZE]; 1520f66fd6c3SFam Zheng int64_t total_size = 0, filesize; 15217f2039f6SOthmar Pasteka const char *adapter_type = NULL; 1522f66fd6c3SFam Zheng const char *backing_file = NULL; 1523f66fd6c3SFam Zheng const char *fmt = NULL; 1524f66fd6c3SFam Zheng int flags = 0; 1525f66fd6c3SFam Zheng int ret = 0; 15266c031aacSFam Zheng bool flat, split, compress; 1527f66fd6c3SFam Zheng char ext_desc_lines[BUF_SIZE] = ""; 1528f66fd6c3SFam Zheng char path[PATH_MAX], prefix[PATH_MAX], postfix[PATH_MAX]; 1529f66fd6c3SFam Zheng const int64_t split_size = 0x80000000; /* VMDK has constant split size */ 1530f66fd6c3SFam Zheng const char *desc_extent_line; 1531f66fd6c3SFam Zheng char parent_desc_line[BUF_SIZE] = ""; 1532f66fd6c3SFam Zheng uint32_t parent_cid = 0xffffffff; 15337f2039f6SOthmar Pasteka uint32_t number_heads = 16; 153469e0b6dfSFam Zheng bool zeroed_grain = false; 1535f66fd6c3SFam Zheng const char desc_template[] = 1536f66fd6c3SFam Zheng "# Disk DescriptorFile\n" 1537f66fd6c3SFam Zheng "version=1\n" 1538f66fd6c3SFam Zheng "CID=%x\n" 1539f66fd6c3SFam Zheng "parentCID=%x\n" 1540f66fd6c3SFam Zheng "createType=\"%s\"\n" 1541f66fd6c3SFam Zheng "%s" 1542f66fd6c3SFam Zheng "\n" 1543f66fd6c3SFam Zheng "# Extent description\n" 1544f66fd6c3SFam Zheng "%s" 1545f66fd6c3SFam Zheng "\n" 1546f66fd6c3SFam Zheng "# The Disk Data Base\n" 1547f66fd6c3SFam Zheng "#DDB\n" 1548f66fd6c3SFam Zheng "\n" 1549f66fd6c3SFam Zheng "ddb.virtualHWVersion = \"%d\"\n" 1550f66fd6c3SFam Zheng "ddb.geometry.cylinders = \"%" PRId64 "\"\n" 15517f2039f6SOthmar Pasteka "ddb.geometry.heads = \"%d\"\n" 1552f66fd6c3SFam Zheng "ddb.geometry.sectors = \"63\"\n" 15537f2039f6SOthmar Pasteka "ddb.adapterType = \"%s\"\n"; 1554f66fd6c3SFam Zheng 1555f66fd6c3SFam Zheng if (filename_decompose(filename, path, prefix, postfix, PATH_MAX)) { 1556f66fd6c3SFam Zheng return -EINVAL; 1557f66fd6c3SFam Zheng } 1558f66fd6c3SFam Zheng /* Read out options */ 1559f66fd6c3SFam Zheng while (options && options->name) { 1560f66fd6c3SFam Zheng if (!strcmp(options->name, BLOCK_OPT_SIZE)) { 1561f66fd6c3SFam Zheng total_size = options->value.n; 15627f2039f6SOthmar Pasteka } else if (!strcmp(options->name, BLOCK_OPT_ADAPTER_TYPE)) { 15637f2039f6SOthmar Pasteka adapter_type = options->value.s; 1564f66fd6c3SFam Zheng } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { 1565f66fd6c3SFam Zheng backing_file = options->value.s; 1566f66fd6c3SFam Zheng } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) { 1567f66fd6c3SFam Zheng flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0; 1568f66fd6c3SFam Zheng } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) { 1569f66fd6c3SFam Zheng fmt = options->value.s; 157069e0b6dfSFam Zheng } else if (!strcmp(options->name, BLOCK_OPT_ZEROED_GRAIN)) { 157169e0b6dfSFam Zheng zeroed_grain |= options->value.n; 1572f66fd6c3SFam Zheng } 1573f66fd6c3SFam Zheng options++; 1574f66fd6c3SFam Zheng } 15757f2039f6SOthmar Pasteka if (!adapter_type) { 15767f2039f6SOthmar Pasteka adapter_type = "ide"; 15777f2039f6SOthmar Pasteka } else if (strcmp(adapter_type, "ide") && 15787f2039f6SOthmar Pasteka strcmp(adapter_type, "buslogic") && 15797f2039f6SOthmar Pasteka strcmp(adapter_type, "lsilogic") && 15807f2039f6SOthmar Pasteka strcmp(adapter_type, "legacyESX")) { 15817f2039f6SOthmar Pasteka fprintf(stderr, "VMDK: Unknown adapter type: '%s'.\n", adapter_type); 15827f2039f6SOthmar Pasteka return -EINVAL; 15837f2039f6SOthmar Pasteka } 15847f2039f6SOthmar Pasteka if (strcmp(adapter_type, "ide") != 0) { 15857f2039f6SOthmar Pasteka /* that's the number of heads with which vmware operates when 15867f2039f6SOthmar Pasteka creating, exporting, etc. vmdk files with a non-ide adapter type */ 15877f2039f6SOthmar Pasteka number_heads = 255; 15887f2039f6SOthmar Pasteka } 1589f66fd6c3SFam Zheng if (!fmt) { 1590f66fd6c3SFam Zheng /* Default format to monolithicSparse */ 1591f66fd6c3SFam Zheng fmt = "monolithicSparse"; 1592f66fd6c3SFam Zheng } else if (strcmp(fmt, "monolithicFlat") && 1593f66fd6c3SFam Zheng strcmp(fmt, "monolithicSparse") && 1594f66fd6c3SFam Zheng strcmp(fmt, "twoGbMaxExtentSparse") && 15956c031aacSFam Zheng strcmp(fmt, "twoGbMaxExtentFlat") && 15966c031aacSFam Zheng strcmp(fmt, "streamOptimized")) { 1597f66fd6c3SFam Zheng fprintf(stderr, "VMDK: Unknown subformat: %s\n", fmt); 1598f66fd6c3SFam Zheng return -EINVAL; 1599f66fd6c3SFam Zheng } 1600f66fd6c3SFam Zheng split = !(strcmp(fmt, "twoGbMaxExtentFlat") && 1601f66fd6c3SFam Zheng strcmp(fmt, "twoGbMaxExtentSparse")); 1602f66fd6c3SFam Zheng flat = !(strcmp(fmt, "monolithicFlat") && 1603f66fd6c3SFam Zheng strcmp(fmt, "twoGbMaxExtentFlat")); 16046c031aacSFam Zheng compress = !strcmp(fmt, "streamOptimized"); 1605f66fd6c3SFam Zheng if (flat) { 1606f66fd6c3SFam Zheng desc_extent_line = "RW %lld FLAT \"%s\" 0\n"; 1607f66fd6c3SFam Zheng } else { 1608f66fd6c3SFam Zheng desc_extent_line = "RW %lld SPARSE \"%s\"\n"; 1609f66fd6c3SFam Zheng } 1610f66fd6c3SFam Zheng if (flat && backing_file) { 1611f66fd6c3SFam Zheng /* not supporting backing file for flat image */ 1612f66fd6c3SFam Zheng return -ENOTSUP; 1613f66fd6c3SFam Zheng } 1614f66fd6c3SFam Zheng if (backing_file) { 1615f66fd6c3SFam Zheng BlockDriverState *bs = bdrv_new(""); 1616de9c0cecSKevin Wolf ret = bdrv_open(bs, backing_file, NULL, 0, NULL); 1617f66fd6c3SFam Zheng if (ret != 0) { 1618f66fd6c3SFam Zheng bdrv_delete(bs); 1619f66fd6c3SFam Zheng return ret; 1620f66fd6c3SFam Zheng } 1621f66fd6c3SFam Zheng if (strcmp(bs->drv->format_name, "vmdk")) { 1622f66fd6c3SFam Zheng bdrv_delete(bs); 1623f66fd6c3SFam Zheng return -EINVAL; 1624f66fd6c3SFam Zheng } 1625f66fd6c3SFam Zheng parent_cid = vmdk_read_cid(bs, 0); 1626f66fd6c3SFam Zheng bdrv_delete(bs); 1627f66fd6c3SFam Zheng snprintf(parent_desc_line, sizeof(parent_desc_line), 16288ed610a1SFam Zheng "parentFileNameHint=\"%s\"", backing_file); 1629f66fd6c3SFam Zheng } 1630f66fd6c3SFam Zheng 1631f66fd6c3SFam Zheng /* Create extents */ 1632f66fd6c3SFam Zheng filesize = total_size; 1633f66fd6c3SFam Zheng while (filesize > 0) { 1634f66fd6c3SFam Zheng char desc_line[BUF_SIZE]; 1635f66fd6c3SFam Zheng char ext_filename[PATH_MAX]; 1636f66fd6c3SFam Zheng char desc_filename[PATH_MAX]; 1637f66fd6c3SFam Zheng int64_t size = filesize; 1638f66fd6c3SFam Zheng 1639f66fd6c3SFam Zheng if (split && size > split_size) { 1640f66fd6c3SFam Zheng size = split_size; 1641f66fd6c3SFam Zheng } 1642f66fd6c3SFam Zheng if (split) { 1643f66fd6c3SFam Zheng snprintf(desc_filename, sizeof(desc_filename), "%s-%c%03d%s", 1644f66fd6c3SFam Zheng prefix, flat ? 'f' : 's', ++idx, postfix); 1645f66fd6c3SFam Zheng } else if (flat) { 1646f66fd6c3SFam Zheng snprintf(desc_filename, sizeof(desc_filename), "%s-flat%s", 1647f66fd6c3SFam Zheng prefix, postfix); 1648f66fd6c3SFam Zheng } else { 1649f66fd6c3SFam Zheng snprintf(desc_filename, sizeof(desc_filename), "%s%s", 1650f66fd6c3SFam Zheng prefix, postfix); 1651f66fd6c3SFam Zheng } 1652f66fd6c3SFam Zheng snprintf(ext_filename, sizeof(ext_filename), "%s%s", 1653f66fd6c3SFam Zheng path, desc_filename); 1654f66fd6c3SFam Zheng 165569e0b6dfSFam Zheng if (vmdk_create_extent(ext_filename, size, 165669e0b6dfSFam Zheng flat, compress, zeroed_grain)) { 1657f66fd6c3SFam Zheng return -EINVAL; 1658f66fd6c3SFam Zheng } 1659f66fd6c3SFam Zheng filesize -= size; 1660f66fd6c3SFam Zheng 1661f66fd6c3SFam Zheng /* Format description line */ 1662f66fd6c3SFam Zheng snprintf(desc_line, sizeof(desc_line), 1663f66fd6c3SFam Zheng desc_extent_line, size / 512, desc_filename); 1664f66fd6c3SFam Zheng pstrcat(ext_desc_lines, sizeof(ext_desc_lines), desc_line); 1665f66fd6c3SFam Zheng } 1666f66fd6c3SFam Zheng /* generate descriptor file */ 1667f66fd6c3SFam Zheng snprintf(desc, sizeof(desc), desc_template, 1668f66fd6c3SFam Zheng (unsigned int)time(NULL), 1669f66fd6c3SFam Zheng parent_cid, 1670f66fd6c3SFam Zheng fmt, 1671f66fd6c3SFam Zheng parent_desc_line, 1672f66fd6c3SFam Zheng ext_desc_lines, 1673f66fd6c3SFam Zheng (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4), 16747f2039f6SOthmar Pasteka total_size / (int64_t)(63 * number_heads * 512), number_heads, 16757f2039f6SOthmar Pasteka adapter_type); 1676f66fd6c3SFam Zheng if (split || flat) { 16776165f4d8SCorey Bryant fd = qemu_open(filename, 1678f66fd6c3SFam Zheng O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 1679f66fd6c3SFam Zheng 0644); 1680f66fd6c3SFam Zheng } else { 16816165f4d8SCorey Bryant fd = qemu_open(filename, 1682f66fd6c3SFam Zheng O_WRONLY | O_BINARY | O_LARGEFILE, 1683f66fd6c3SFam Zheng 0644); 1684f66fd6c3SFam Zheng } 1685f66fd6c3SFam Zheng if (fd < 0) { 1686f66fd6c3SFam Zheng return -errno; 1687f66fd6c3SFam Zheng } 1688f66fd6c3SFam Zheng /* the descriptor offset = 0x200 */ 1689f66fd6c3SFam Zheng if (!split && !flat && 0x200 != lseek(fd, 0x200, SEEK_SET)) { 1690f66fd6c3SFam Zheng ret = -errno; 1691f66fd6c3SFam Zheng goto exit; 1692f66fd6c3SFam Zheng } 16931640366cSKirill A. Shutemov ret = qemu_write_full(fd, desc, strlen(desc)); 16941640366cSKirill A. Shutemov if (ret != strlen(desc)) { 1695b781cce5SJuan Quintela ret = -errno; 16961640366cSKirill A. Shutemov goto exit; 16971640366cSKirill A. Shutemov } 16981640366cSKirill A. Shutemov ret = 0; 16991640366cSKirill A. Shutemov exit: 17002e1e79daSCorey Bryant qemu_close(fd); 17011640366cSKirill A. Shutemov return ret; 1702019d6b8fSAnthony Liguori } 1703019d6b8fSAnthony Liguori 1704019d6b8fSAnthony Liguori static void vmdk_close(BlockDriverState *bs) 1705019d6b8fSAnthony Liguori { 17062bc3166cSKevin Wolf BDRVVmdkState *s = bs->opaque; 17072bc3166cSKevin Wolf 1708b3976d3cSFam Zheng vmdk_free_extents(bs); 17092bc3166cSKevin Wolf 17102bc3166cSKevin Wolf migrate_del_blocker(s->migration_blocker); 17112bc3166cSKevin Wolf error_free(s->migration_blocker); 1712019d6b8fSAnthony Liguori } 1713019d6b8fSAnthony Liguori 17148b94ff85SPaolo Bonzini static coroutine_fn int vmdk_co_flush(BlockDriverState *bs) 1715019d6b8fSAnthony Liguori { 1716333c574dSFam Zheng BDRVVmdkState *s = bs->opaque; 171729cdb251SPaolo Bonzini int i, err; 171829cdb251SPaolo Bonzini int ret = 0; 1719333c574dSFam Zheng 1720333c574dSFam Zheng for (i = 0; i < s->num_extents; i++) { 17218b94ff85SPaolo Bonzini err = bdrv_co_flush(s->extents[i].file); 1722333c574dSFam Zheng if (err < 0) { 1723333c574dSFam Zheng ret = err; 1724333c574dSFam Zheng } 1725333c574dSFam Zheng } 1726333c574dSFam Zheng return ret; 1727019d6b8fSAnthony Liguori } 1728019d6b8fSAnthony Liguori 17294a1d5e1fSFam Zheng static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs) 17304a1d5e1fSFam Zheng { 17314a1d5e1fSFam Zheng int i; 17324a1d5e1fSFam Zheng int64_t ret = 0; 17334a1d5e1fSFam Zheng int64_t r; 17344a1d5e1fSFam Zheng BDRVVmdkState *s = bs->opaque; 17354a1d5e1fSFam Zheng 17364a1d5e1fSFam Zheng ret = bdrv_get_allocated_file_size(bs->file); 17374a1d5e1fSFam Zheng if (ret < 0) { 17384a1d5e1fSFam Zheng return ret; 17394a1d5e1fSFam Zheng } 17404a1d5e1fSFam Zheng for (i = 0; i < s->num_extents; i++) { 17414a1d5e1fSFam Zheng if (s->extents[i].file == bs->file) { 17424a1d5e1fSFam Zheng continue; 17434a1d5e1fSFam Zheng } 17444a1d5e1fSFam Zheng r = bdrv_get_allocated_file_size(s->extents[i].file); 17454a1d5e1fSFam Zheng if (r < 0) { 17464a1d5e1fSFam Zheng return r; 17474a1d5e1fSFam Zheng } 17484a1d5e1fSFam Zheng ret += r; 17494a1d5e1fSFam Zheng } 17504a1d5e1fSFam Zheng return ret; 17514a1d5e1fSFam Zheng } 17520e7e1989SKevin Wolf 1753da7a50f9SFam Zheng static int vmdk_has_zero_init(BlockDriverState *bs) 1754da7a50f9SFam Zheng { 1755da7a50f9SFam Zheng int i; 1756da7a50f9SFam Zheng BDRVVmdkState *s = bs->opaque; 1757da7a50f9SFam Zheng 1758da7a50f9SFam Zheng /* If has a flat extent and its underlying storage doesn't have zero init, 1759da7a50f9SFam Zheng * return 0. */ 1760da7a50f9SFam Zheng for (i = 0; i < s->num_extents; i++) { 1761da7a50f9SFam Zheng if (s->extents[i].flat) { 1762da7a50f9SFam Zheng if (!bdrv_has_zero_init(s->extents[i].file)) { 1763da7a50f9SFam Zheng return 0; 1764da7a50f9SFam Zheng } 1765da7a50f9SFam Zheng } 1766da7a50f9SFam Zheng } 1767da7a50f9SFam Zheng return 1; 1768da7a50f9SFam Zheng } 1769da7a50f9SFam Zheng 17700e7e1989SKevin Wolf static QEMUOptionParameter vmdk_create_options[] = { 1771db08adf5SKevin Wolf { 1772db08adf5SKevin Wolf .name = BLOCK_OPT_SIZE, 1773db08adf5SKevin Wolf .type = OPT_SIZE, 1774db08adf5SKevin Wolf .help = "Virtual disk size" 1775db08adf5SKevin Wolf }, 1776db08adf5SKevin Wolf { 17777f2039f6SOthmar Pasteka .name = BLOCK_OPT_ADAPTER_TYPE, 17787f2039f6SOthmar Pasteka .type = OPT_STRING, 17797f2039f6SOthmar Pasteka .help = "Virtual adapter type, can be one of " 17807f2039f6SOthmar Pasteka "ide (default), lsilogic, buslogic or legacyESX" 17817f2039f6SOthmar Pasteka }, 17827f2039f6SOthmar Pasteka { 1783db08adf5SKevin Wolf .name = BLOCK_OPT_BACKING_FILE, 1784db08adf5SKevin Wolf .type = OPT_STRING, 1785db08adf5SKevin Wolf .help = "File name of a base image" 1786db08adf5SKevin Wolf }, 1787db08adf5SKevin Wolf { 1788db08adf5SKevin Wolf .name = BLOCK_OPT_COMPAT6, 1789db08adf5SKevin Wolf .type = OPT_FLAG, 1790db08adf5SKevin Wolf .help = "VMDK version 6 image" 1791db08adf5SKevin Wolf }, 1792f66fd6c3SFam Zheng { 1793f66fd6c3SFam Zheng .name = BLOCK_OPT_SUBFMT, 1794f66fd6c3SFam Zheng .type = OPT_STRING, 1795f66fd6c3SFam Zheng .help = 1796f66fd6c3SFam Zheng "VMDK flat extent format, can be one of " 17976c031aacSFam Zheng "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} " 1798f66fd6c3SFam Zheng }, 179969e0b6dfSFam Zheng { 180069e0b6dfSFam Zheng .name = BLOCK_OPT_ZEROED_GRAIN, 180169e0b6dfSFam Zheng .type = OPT_FLAG, 180269e0b6dfSFam Zheng .help = "Enable efficient zero writes using the zeroed-grain GTE feature" 180369e0b6dfSFam Zheng }, 18040e7e1989SKevin Wolf { NULL } 18050e7e1989SKevin Wolf }; 18060e7e1989SKevin Wolf 1807019d6b8fSAnthony Liguori static BlockDriver bdrv_vmdk = { 1808019d6b8fSAnthony Liguori .format_name = "vmdk", 1809019d6b8fSAnthony Liguori .instance_size = sizeof(BDRVVmdkState), 1810019d6b8fSAnthony Liguori .bdrv_probe = vmdk_probe, 18116511ef77SKevin Wolf .bdrv_open = vmdk_open, 18123897575fSJeff Cody .bdrv_reopen_prepare = vmdk_reopen_prepare, 18132914caa0SPaolo Bonzini .bdrv_read = vmdk_co_read, 1814e183ef75SPaolo Bonzini .bdrv_write = vmdk_co_write, 1815cdeaf1f1SFam Zheng .bdrv_co_write_zeroes = vmdk_co_write_zeroes, 1816019d6b8fSAnthony Liguori .bdrv_close = vmdk_close, 1817019d6b8fSAnthony Liguori .bdrv_create = vmdk_create, 1818c68b89acSKevin Wolf .bdrv_co_flush_to_disk = vmdk_co_flush, 1819f8a2e5e3SStefan Hajnoczi .bdrv_co_is_allocated = vmdk_co_is_allocated, 18204a1d5e1fSFam Zheng .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, 1821da7a50f9SFam Zheng .bdrv_has_zero_init = vmdk_has_zero_init, 18220e7e1989SKevin Wolf 18230e7e1989SKevin Wolf .create_options = vmdk_create_options, 1824019d6b8fSAnthony Liguori }; 1825019d6b8fSAnthony Liguori 1826019d6b8fSAnthony Liguori static void bdrv_vmdk_init(void) 1827019d6b8fSAnthony Liguori { 1828019d6b8fSAnthony Liguori bdrv_register(&bdrv_vmdk); 1829019d6b8fSAnthony Liguori } 1830019d6b8fSAnthony Liguori 1831019d6b8fSAnthony Liguori block_init(bdrv_vmdk_init); 1832