1019d6b8fSAnthony Liguori /* 2019d6b8fSAnthony Liguori * Block driver for the VMDK format 3019d6b8fSAnthony Liguori * 4019d6b8fSAnthony Liguori * Copyright (c) 2004 Fabrice Bellard 5019d6b8fSAnthony Liguori * Copyright (c) 2005 Filip Navara 6019d6b8fSAnthony Liguori * 7019d6b8fSAnthony Liguori * Permission is hereby granted, free of charge, to any person obtaining a copy 8019d6b8fSAnthony Liguori * of this software and associated documentation files (the "Software"), to deal 9019d6b8fSAnthony Liguori * in the Software without restriction, including without limitation the rights 10019d6b8fSAnthony Liguori * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11019d6b8fSAnthony Liguori * copies of the Software, and to permit persons to whom the Software is 12019d6b8fSAnthony Liguori * furnished to do so, subject to the following conditions: 13019d6b8fSAnthony Liguori * 14019d6b8fSAnthony Liguori * The above copyright notice and this permission notice shall be included in 15019d6b8fSAnthony Liguori * all copies or substantial portions of the Software. 16019d6b8fSAnthony Liguori * 17019d6b8fSAnthony Liguori * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18019d6b8fSAnthony Liguori * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19019d6b8fSAnthony Liguori * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20019d6b8fSAnthony Liguori * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21019d6b8fSAnthony Liguori * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22019d6b8fSAnthony Liguori * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23019d6b8fSAnthony Liguori * THE SOFTWARE. 24019d6b8fSAnthony Liguori */ 25019d6b8fSAnthony Liguori 2680c71a24SPeter Maydell #include "qemu/osdep.h" 27da34e65cSMarkus Armbruster #include "qapi/error.h" 28737e150eSPaolo Bonzini #include "block/block_int.h" 29c4bea169SKevin Wolf #include "sysemu/block-backend.h" 30cc7a8ea7SMarkus Armbruster #include "qapi/qmp/qerror.h" 31d49b6836SMarkus Armbruster #include "qemu/error-report.h" 321de7afc9SPaolo Bonzini #include "qemu/module.h" 33922a01a0SMarkus Armbruster #include "qemu/option.h" 3458369e22SPaolo Bonzini #include "qemu/bswap.h" 35795c40b8SJuan Quintela #include "migration/blocker.h" 36f348b6d1SVeronia Bahaa #include "qemu/cutils.h" 372923d34fSStefan Weil #include <zlib.h> 38019d6b8fSAnthony Liguori 39019d6b8fSAnthony Liguori #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D') 40019d6b8fSAnthony Liguori #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V') 41432bb170SFam Zheng #define VMDK4_COMPRESSION_DEFLATE 1 4295b0aa42SFam Zheng #define VMDK4_FLAG_NL_DETECT (1 << 0) 43bb45ded9SFam Zheng #define VMDK4_FLAG_RGD (1 << 1) 4414ead646SFam Zheng /* Zeroed-grain enable bit */ 4514ead646SFam Zheng #define VMDK4_FLAG_ZERO_GRAIN (1 << 2) 46432bb170SFam Zheng #define VMDK4_FLAG_COMPRESS (1 << 16) 47432bb170SFam Zheng #define VMDK4_FLAG_MARKER (1 << 17) 4865bd155cSKevin Wolf #define VMDK4_GD_AT_END 0xffffffffffffffffULL 49019d6b8fSAnthony Liguori 5014ead646SFam Zheng #define VMDK_GTE_ZEROED 0x1 5165f74725SFam Zheng 5265f74725SFam Zheng /* VMDK internal error codes */ 5365f74725SFam Zheng #define VMDK_OK 0 5465f74725SFam Zheng #define VMDK_ERROR (-1) 5565f74725SFam Zheng /* Cluster not allocated */ 5665f74725SFam Zheng #define VMDK_UNALLOC (-2) 5765f74725SFam Zheng #define VMDK_ZEROED (-3) 5865f74725SFam Zheng 5969e0b6dfSFam Zheng #define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain" 6069e0b6dfSFam Zheng 61019d6b8fSAnthony Liguori typedef struct { 62019d6b8fSAnthony Liguori uint32_t version; 63019d6b8fSAnthony Liguori uint32_t flags; 64019d6b8fSAnthony Liguori uint32_t disk_sectors; 65019d6b8fSAnthony Liguori uint32_t granularity; 66019d6b8fSAnthony Liguori uint32_t l1dir_offset; 67019d6b8fSAnthony Liguori uint32_t l1dir_size; 68019d6b8fSAnthony Liguori uint32_t file_sectors; 69019d6b8fSAnthony Liguori uint32_t cylinders; 70019d6b8fSAnthony Liguori uint32_t heads; 71019d6b8fSAnthony Liguori uint32_t sectors_per_track; 725d8caa54SFam Zheng } QEMU_PACKED VMDK3Header; 73019d6b8fSAnthony Liguori 74019d6b8fSAnthony Liguori typedef struct { 75019d6b8fSAnthony Liguori uint32_t version; 76019d6b8fSAnthony Liguori uint32_t flags; 77e98768d4SFam Zheng uint64_t capacity; 78e98768d4SFam Zheng uint64_t granularity; 79e98768d4SFam Zheng uint64_t desc_offset; 80e98768d4SFam Zheng uint64_t desc_size; 81ca8804ceSFam Zheng /* Number of GrainTableEntries per GrainTable */ 82ca8804ceSFam Zheng uint32_t num_gtes_per_gt; 83e98768d4SFam Zheng uint64_t rgd_offset; 84e98768d4SFam Zheng uint64_t gd_offset; 85e98768d4SFam Zheng uint64_t grain_offset; 86019d6b8fSAnthony Liguori char filler[1]; 87019d6b8fSAnthony Liguori char check_bytes[4]; 88432bb170SFam Zheng uint16_t compressAlgorithm; 89541dc0d4SStefan Weil } QEMU_PACKED VMDK4Header; 90019d6b8fSAnthony Liguori 91019d6b8fSAnthony Liguori #define L2_CACHE_SIZE 16 92019d6b8fSAnthony Liguori 93b3976d3cSFam Zheng typedef struct VmdkExtent { 9424bc15d1SKevin Wolf BdrvChild *file; 95b3976d3cSFam Zheng bool flat; 96432bb170SFam Zheng bool compressed; 97432bb170SFam Zheng bool has_marker; 9814ead646SFam Zheng bool has_zero_grain; 9914ead646SFam Zheng int version; 100b3976d3cSFam Zheng int64_t sectors; 101b3976d3cSFam Zheng int64_t end_sector; 1027fa60fa3SFam Zheng int64_t flat_start_offset; 103019d6b8fSAnthony Liguori int64_t l1_table_offset; 104019d6b8fSAnthony Liguori int64_t l1_backup_table_offset; 105019d6b8fSAnthony Liguori uint32_t *l1_table; 106019d6b8fSAnthony Liguori uint32_t *l1_backup_table; 107019d6b8fSAnthony Liguori unsigned int l1_size; 108019d6b8fSAnthony Liguori uint32_t l1_entry_sectors; 109019d6b8fSAnthony Liguori 110019d6b8fSAnthony Liguori unsigned int l2_size; 111019d6b8fSAnthony Liguori uint32_t *l2_cache; 112019d6b8fSAnthony Liguori uint32_t l2_cache_offsets[L2_CACHE_SIZE]; 113019d6b8fSAnthony Liguori uint32_t l2_cache_counts[L2_CACHE_SIZE]; 114019d6b8fSAnthony Liguori 115301c7d38SFam Zheng int64_t cluster_sectors; 116c6ac36e1SFam Zheng int64_t next_cluster_sector; 117f4c129a3SFam Zheng char *type; 118b3976d3cSFam Zheng } VmdkExtent; 119b3976d3cSFam Zheng 120b3976d3cSFam Zheng typedef struct BDRVVmdkState { 121848c66e8SPaolo Bonzini CoMutex lock; 122e98768d4SFam Zheng uint64_t desc_offset; 12369b4d86dSFam Zheng bool cid_updated; 124c338b6adSFam Zheng bool cid_checked; 125f4c129a3SFam Zheng uint32_t cid; 126019d6b8fSAnthony Liguori uint32_t parent_cid; 127b3976d3cSFam Zheng int num_extents; 128b3976d3cSFam Zheng /* Extent array with num_extents entries, ascend ordered by address */ 129b3976d3cSFam Zheng VmdkExtent *extents; 1302bc3166cSKevin Wolf Error *migration_blocker; 131f4c129a3SFam Zheng char *create_type; 132019d6b8fSAnthony Liguori } BDRVVmdkState; 133019d6b8fSAnthony Liguori 134019d6b8fSAnthony Liguori typedef struct VmdkMetaData { 135019d6b8fSAnthony Liguori unsigned int l1_index; 136019d6b8fSAnthony Liguori unsigned int l2_index; 137019d6b8fSAnthony Liguori unsigned int l2_offset; 138019d6b8fSAnthony Liguori int valid; 139cdeaf1f1SFam Zheng uint32_t *l2_cache_entry; 140019d6b8fSAnthony Liguori } VmdkMetaData; 141019d6b8fSAnthony Liguori 142432bb170SFam Zheng typedef struct VmdkGrainMarker { 143432bb170SFam Zheng uint64_t lba; 144432bb170SFam Zheng uint32_t size; 145432bb170SFam Zheng uint8_t data[0]; 1465d8caa54SFam Zheng } QEMU_PACKED VmdkGrainMarker; 147432bb170SFam Zheng 14865bd155cSKevin Wolf enum { 14965bd155cSKevin Wolf MARKER_END_OF_STREAM = 0, 15065bd155cSKevin Wolf MARKER_GRAIN_TABLE = 1, 15165bd155cSKevin Wolf MARKER_GRAIN_DIRECTORY = 2, 15265bd155cSKevin Wolf MARKER_FOOTER = 3, 15365bd155cSKevin Wolf }; 15465bd155cSKevin Wolf 155019d6b8fSAnthony Liguori static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename) 156019d6b8fSAnthony Liguori { 157019d6b8fSAnthony Liguori uint32_t magic; 158019d6b8fSAnthony Liguori 159ae261c86SFam Zheng if (buf_size < 4) { 160019d6b8fSAnthony Liguori return 0; 161ae261c86SFam Zheng } 162019d6b8fSAnthony Liguori magic = be32_to_cpu(*(uint32_t *)buf); 163019d6b8fSAnthony Liguori if (magic == VMDK3_MAGIC || 16401fc99d6SFam Zheng magic == VMDK4_MAGIC) { 165019d6b8fSAnthony Liguori return 100; 16601fc99d6SFam Zheng } else { 16701fc99d6SFam Zheng const char *p = (const char *)buf; 16801fc99d6SFam Zheng const char *end = p + buf_size; 16901fc99d6SFam Zheng while (p < end) { 17001fc99d6SFam Zheng if (*p == '#') { 17101fc99d6SFam Zheng /* skip comment line */ 17201fc99d6SFam Zheng while (p < end && *p != '\n') { 17301fc99d6SFam Zheng p++; 17401fc99d6SFam Zheng } 17501fc99d6SFam Zheng p++; 17601fc99d6SFam Zheng continue; 17701fc99d6SFam Zheng } 17801fc99d6SFam Zheng if (*p == ' ') { 17901fc99d6SFam Zheng while (p < end && *p == ' ') { 18001fc99d6SFam Zheng p++; 18101fc99d6SFam Zheng } 18201fc99d6SFam Zheng /* skip '\r' if windows line endings used. */ 18301fc99d6SFam Zheng if (p < end && *p == '\r') { 18401fc99d6SFam Zheng p++; 18501fc99d6SFam Zheng } 18601fc99d6SFam Zheng /* only accept blank lines before 'version=' line */ 18701fc99d6SFam Zheng if (p == end || *p != '\n') { 188019d6b8fSAnthony Liguori return 0; 189019d6b8fSAnthony Liguori } 19001fc99d6SFam Zheng p++; 19101fc99d6SFam Zheng continue; 19201fc99d6SFam Zheng } 19301fc99d6SFam Zheng if (end - p >= strlen("version=X\n")) { 19401fc99d6SFam Zheng if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 || 19501fc99d6SFam Zheng strncmp("version=2\n", p, strlen("version=2\n")) == 0) { 19601fc99d6SFam Zheng return 100; 19701fc99d6SFam Zheng } 19801fc99d6SFam Zheng } 19901fc99d6SFam Zheng if (end - p >= strlen("version=X\r\n")) { 20001fc99d6SFam Zheng if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 || 20101fc99d6SFam Zheng strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) { 20201fc99d6SFam Zheng return 100; 20301fc99d6SFam Zheng } 20401fc99d6SFam Zheng } 20501fc99d6SFam Zheng return 0; 20601fc99d6SFam Zheng } 20701fc99d6SFam Zheng return 0; 20801fc99d6SFam Zheng } 20901fc99d6SFam Zheng } 210019d6b8fSAnthony Liguori 211019d6b8fSAnthony Liguori #define SECTOR_SIZE 512 212f66fd6c3SFam Zheng #define DESC_SIZE (20 * SECTOR_SIZE) /* 20 sectors of 512 bytes each */ 213f66fd6c3SFam Zheng #define BUF_SIZE 4096 214f66fd6c3SFam Zheng #define HEADER_SIZE 512 /* first sector of 512 bytes */ 215019d6b8fSAnthony Liguori 216b3976d3cSFam Zheng static void vmdk_free_extents(BlockDriverState *bs) 217b3976d3cSFam Zheng { 218b3976d3cSFam Zheng int i; 219b3976d3cSFam Zheng BDRVVmdkState *s = bs->opaque; 220b3c0bfb6SFam Zheng VmdkExtent *e; 221b3976d3cSFam Zheng 222b3976d3cSFam Zheng for (i = 0; i < s->num_extents; i++) { 223b3c0bfb6SFam Zheng e = &s->extents[i]; 224b3c0bfb6SFam Zheng g_free(e->l1_table); 225b3c0bfb6SFam Zheng g_free(e->l2_cache); 226b3c0bfb6SFam Zheng g_free(e->l1_backup_table); 227f4c129a3SFam Zheng g_free(e->type); 2289a4f4c31SKevin Wolf if (e->file != bs->file) { 22924bc15d1SKevin Wolf bdrv_unref_child(bs, e->file); 230b3c0bfb6SFam Zheng } 231b3976d3cSFam Zheng } 2327267c094SAnthony Liguori g_free(s->extents); 233b3976d3cSFam Zheng } 234b3976d3cSFam Zheng 23586c6b429SFam Zheng static void vmdk_free_last_extent(BlockDriverState *bs) 23686c6b429SFam Zheng { 23786c6b429SFam Zheng BDRVVmdkState *s = bs->opaque; 23886c6b429SFam Zheng 23986c6b429SFam Zheng if (s->num_extents == 0) { 24086c6b429SFam Zheng return; 24186c6b429SFam Zheng } 24286c6b429SFam Zheng s->num_extents--; 2435839e53bSMarkus Armbruster s->extents = g_renew(VmdkExtent, s->extents, s->num_extents); 24486c6b429SFam Zheng } 24586c6b429SFam Zheng 2469877860eSPeter Maydell /* Return -ve errno, or 0 on success and write CID into *pcid. */ 2479877860eSPeter Maydell static int vmdk_read_cid(BlockDriverState *bs, int parent, uint32_t *pcid) 248019d6b8fSAnthony Liguori { 2495997c210SFam Zheng char *desc; 2509877860eSPeter Maydell uint32_t cid; 251019d6b8fSAnthony Liguori const char *p_name, *cid_str; 252019d6b8fSAnthony Liguori size_t cid_str_size; 253e1da9b24SFam Zheng BDRVVmdkState *s = bs->opaque; 25499f1835dSKevin Wolf int ret; 255019d6b8fSAnthony Liguori 2565997c210SFam Zheng desc = g_malloc0(DESC_SIZE); 257cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); 25899f1835dSKevin Wolf if (ret < 0) { 2599877860eSPeter Maydell goto out; 260e1da9b24SFam Zheng } 261019d6b8fSAnthony Liguori 262019d6b8fSAnthony Liguori if (parent) { 263019d6b8fSAnthony Liguori cid_str = "parentCID"; 264019d6b8fSAnthony Liguori cid_str_size = sizeof("parentCID"); 265019d6b8fSAnthony Liguori } else { 266019d6b8fSAnthony Liguori cid_str = "CID"; 267019d6b8fSAnthony Liguori cid_str_size = sizeof("CID"); 268019d6b8fSAnthony Liguori } 269019d6b8fSAnthony Liguori 27093897b9fSKevin Wolf desc[DESC_SIZE - 1] = '\0'; 271ae261c86SFam Zheng p_name = strstr(desc, cid_str); 2729877860eSPeter Maydell if (p_name == NULL) { 2739877860eSPeter Maydell ret = -EINVAL; 2749877860eSPeter Maydell goto out; 275019d6b8fSAnthony Liguori } 2769877860eSPeter Maydell p_name += cid_str_size; 2779877860eSPeter Maydell if (sscanf(p_name, "%" SCNx32, &cid) != 1) { 2789877860eSPeter Maydell ret = -EINVAL; 2799877860eSPeter Maydell goto out; 2809877860eSPeter Maydell } 2819877860eSPeter Maydell *pcid = cid; 2829877860eSPeter Maydell ret = 0; 283019d6b8fSAnthony Liguori 2849877860eSPeter Maydell out: 2855997c210SFam Zheng g_free(desc); 2869877860eSPeter Maydell return ret; 287019d6b8fSAnthony Liguori } 288019d6b8fSAnthony Liguori 289019d6b8fSAnthony Liguori static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) 290019d6b8fSAnthony Liguori { 291965415ebSFam Zheng char *desc, *tmp_desc; 292019d6b8fSAnthony Liguori char *p_name, *tmp_str; 293e1da9b24SFam Zheng BDRVVmdkState *s = bs->opaque; 294965415ebSFam Zheng int ret = 0; 295019d6b8fSAnthony Liguori 296965415ebSFam Zheng desc = g_malloc0(DESC_SIZE); 297965415ebSFam Zheng tmp_desc = g_malloc0(DESC_SIZE); 298cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); 29999f1835dSKevin Wolf if (ret < 0) { 300965415ebSFam Zheng goto out; 301e1da9b24SFam Zheng } 302019d6b8fSAnthony Liguori 30393897b9fSKevin Wolf desc[DESC_SIZE - 1] = '\0'; 304019d6b8fSAnthony Liguori tmp_str = strstr(desc, "parentCID"); 30593897b9fSKevin Wolf if (tmp_str == NULL) { 306965415ebSFam Zheng ret = -EINVAL; 307965415ebSFam Zheng goto out; 30893897b9fSKevin Wolf } 30993897b9fSKevin Wolf 310965415ebSFam Zheng pstrcpy(tmp_desc, DESC_SIZE, tmp_str); 311ae261c86SFam Zheng p_name = strstr(desc, "CID"); 312ae261c86SFam Zheng if (p_name != NULL) { 313019d6b8fSAnthony Liguori p_name += sizeof("CID"); 314965415ebSFam Zheng snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid); 315965415ebSFam Zheng pstrcat(desc, DESC_SIZE, tmp_desc); 316019d6b8fSAnthony Liguori } 317019d6b8fSAnthony Liguori 318d9ca2ea2SKevin Wolf ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE); 31999f1835dSKevin Wolf 320965415ebSFam Zheng out: 321965415ebSFam Zheng g_free(desc); 322965415ebSFam Zheng g_free(tmp_desc); 323965415ebSFam Zheng return ret; 324019d6b8fSAnthony Liguori } 325019d6b8fSAnthony Liguori 326019d6b8fSAnthony Liguori static int vmdk_is_cid_valid(BlockDriverState *bs) 327019d6b8fSAnthony Liguori { 328019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque; 329019d6b8fSAnthony Liguori uint32_t cur_pcid; 330019d6b8fSAnthony Liguori 331760e0063SKevin Wolf if (!s->cid_checked && bs->backing) { 332760e0063SKevin Wolf BlockDriverState *p_bs = bs->backing->bs; 333760e0063SKevin Wolf 3349877860eSPeter Maydell if (vmdk_read_cid(p_bs, 0, &cur_pcid) != 0) { 3359877860eSPeter Maydell /* read failure: report as not valid */ 3369877860eSPeter Maydell return 0; 3379877860eSPeter Maydell } 338ae261c86SFam Zheng if (s->parent_cid != cur_pcid) { 339ae261c86SFam Zheng /* CID not valid */ 340019d6b8fSAnthony Liguori return 0; 341019d6b8fSAnthony Liguori } 342ae261c86SFam Zheng } 343c338b6adSFam Zheng s->cid_checked = true; 344ae261c86SFam Zheng /* CID valid */ 345019d6b8fSAnthony Liguori return 1; 346019d6b8fSAnthony Liguori } 347019d6b8fSAnthony Liguori 34867251a31SKevin Wolf /* We have nothing to do for VMDK reopen, stubs just return success */ 3493897575fSJeff Cody static int vmdk_reopen_prepare(BDRVReopenState *state, 3503897575fSJeff Cody BlockReopenQueue *queue, Error **errp) 3513897575fSJeff Cody { 3523897575fSJeff Cody assert(state != NULL); 3533897575fSJeff Cody assert(state->bs != NULL); 35467251a31SKevin Wolf return 0; 3553897575fSJeff Cody } 3563897575fSJeff Cody 3579949f97eSKevin Wolf static int vmdk_parent_open(BlockDriverState *bs) 358019d6b8fSAnthony Liguori { 359019d6b8fSAnthony Liguori char *p_name; 36071968dbfSFam Zheng char *desc; 361e1da9b24SFam Zheng BDRVVmdkState *s = bs->opaque; 362588b65a3SPaolo Bonzini int ret; 363019d6b8fSAnthony Liguori 36471968dbfSFam Zheng desc = g_malloc0(DESC_SIZE + 1); 365cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); 366588b65a3SPaolo Bonzini if (ret < 0) { 36771968dbfSFam Zheng goto out; 368e1da9b24SFam Zheng } 36971968dbfSFam Zheng ret = 0; 370019d6b8fSAnthony Liguori 371ae261c86SFam Zheng p_name = strstr(desc, "parentFileNameHint"); 372ae261c86SFam Zheng if (p_name != NULL) { 373019d6b8fSAnthony Liguori char *end_name; 374019d6b8fSAnthony Liguori 375019d6b8fSAnthony Liguori p_name += sizeof("parentFileNameHint") + 1; 376ae261c86SFam Zheng end_name = strchr(p_name, '\"'); 377ae261c86SFam Zheng if (end_name == NULL) { 37871968dbfSFam Zheng ret = -EINVAL; 37971968dbfSFam Zheng goto out; 380ae261c86SFam Zheng } 381ae261c86SFam Zheng if ((end_name - p_name) > sizeof(bs->backing_file) - 1) { 38271968dbfSFam Zheng ret = -EINVAL; 38371968dbfSFam Zheng goto out; 384ae261c86SFam Zheng } 385019d6b8fSAnthony Liguori 386b171271aSKevin Wolf pstrcpy(bs->backing_file, end_name - p_name + 1, p_name); 387019d6b8fSAnthony Liguori } 388019d6b8fSAnthony Liguori 38971968dbfSFam Zheng out: 39071968dbfSFam Zheng g_free(desc); 39171968dbfSFam Zheng return ret; 392019d6b8fSAnthony Liguori } 393019d6b8fSAnthony Liguori 394b3976d3cSFam Zheng /* Create and append extent to the extent array. Return the added VmdkExtent 395b3976d3cSFam Zheng * address. return NULL if allocation failed. */ 3968aa1331cSFam Zheng static int vmdk_add_extent(BlockDriverState *bs, 39724bc15d1SKevin Wolf BdrvChild *file, bool flat, int64_t sectors, 398b3976d3cSFam Zheng int64_t l1_offset, int64_t l1_backup_offset, 399b3976d3cSFam Zheng uint32_t l1_size, 4008aa1331cSFam Zheng int l2_size, uint64_t cluster_sectors, 4014823970bSFam Zheng VmdkExtent **new_extent, 4024823970bSFam Zheng Error **errp) 403b3976d3cSFam Zheng { 404b3976d3cSFam Zheng VmdkExtent *extent; 405b3976d3cSFam Zheng BDRVVmdkState *s = bs->opaque; 4060a156f7cSMarkus Armbruster int64_t nb_sectors; 407b3976d3cSFam Zheng 4088aa1331cSFam Zheng if (cluster_sectors > 0x200000) { 4098aa1331cSFam Zheng /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */ 4104823970bSFam Zheng error_setg(errp, "Invalid granularity, image may be corrupt"); 4114823970bSFam Zheng return -EFBIG; 4128aa1331cSFam Zheng } 413b0651b8cSFam Zheng if (l1_size > 512 * 1024 * 1024) { 414b0651b8cSFam Zheng /* Although with big capacity and small l1_entry_sectors, we can get a 415b0651b8cSFam Zheng * big l1_size, we don't want unbounded value to allocate the table. 416b0651b8cSFam Zheng * Limit it to 512M, which is 16PB for default cluster and L2 table 417b0651b8cSFam Zheng * size */ 4184823970bSFam Zheng error_setg(errp, "L1 size too big"); 419b0651b8cSFam Zheng return -EFBIG; 420b0651b8cSFam Zheng } 4218aa1331cSFam Zheng 42224bc15d1SKevin Wolf nb_sectors = bdrv_nb_sectors(file->bs); 4230a156f7cSMarkus Armbruster if (nb_sectors < 0) { 4240a156f7cSMarkus Armbruster return nb_sectors; 425c6ac36e1SFam Zheng } 426c6ac36e1SFam Zheng 4275839e53bSMarkus Armbruster s->extents = g_renew(VmdkExtent, s->extents, s->num_extents + 1); 428b3976d3cSFam Zheng extent = &s->extents[s->num_extents]; 429b3976d3cSFam Zheng s->num_extents++; 430b3976d3cSFam Zheng 431b3976d3cSFam Zheng memset(extent, 0, sizeof(VmdkExtent)); 432b3976d3cSFam Zheng extent->file = file; 433b3976d3cSFam Zheng extent->flat = flat; 434b3976d3cSFam Zheng extent->sectors = sectors; 435b3976d3cSFam Zheng extent->l1_table_offset = l1_offset; 436b3976d3cSFam Zheng extent->l1_backup_table_offset = l1_backup_offset; 437b3976d3cSFam Zheng extent->l1_size = l1_size; 438b3976d3cSFam Zheng extent->l1_entry_sectors = l2_size * cluster_sectors; 439b3976d3cSFam Zheng extent->l2_size = l2_size; 440301c7d38SFam Zheng extent->cluster_sectors = flat ? sectors : cluster_sectors; 4410a156f7cSMarkus Armbruster extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors); 442b3976d3cSFam Zheng 443b3976d3cSFam Zheng if (s->num_extents > 1) { 444b3976d3cSFam Zheng extent->end_sector = (*(extent - 1)).end_sector + extent->sectors; 445b3976d3cSFam Zheng } else { 446b3976d3cSFam Zheng extent->end_sector = extent->sectors; 447b3976d3cSFam Zheng } 448b3976d3cSFam Zheng bs->total_sectors = extent->end_sector; 4498aa1331cSFam Zheng if (new_extent) { 4508aa1331cSFam Zheng *new_extent = extent; 4518aa1331cSFam Zheng } 4528aa1331cSFam Zheng return 0; 453b3976d3cSFam Zheng } 454b3976d3cSFam Zheng 4554823970bSFam Zheng static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, 4564823970bSFam Zheng Error **errp) 457019d6b8fSAnthony Liguori { 458b4b3ab14SFam Zheng int ret; 45913c4941cSFam Zheng size_t l1_size; 46013c4941cSFam Zheng int i; 461b4b3ab14SFam Zheng 462b4b3ab14SFam Zheng /* read the L1 table */ 463b4b3ab14SFam Zheng l1_size = extent->l1_size * sizeof(uint32_t); 464d6e59931SKevin Wolf extent->l1_table = g_try_malloc(l1_size); 465d6e59931SKevin Wolf if (l1_size && extent->l1_table == NULL) { 466d6e59931SKevin Wolf return -ENOMEM; 467d6e59931SKevin Wolf } 468d6e59931SKevin Wolf 469cf2ab8fcSKevin Wolf ret = bdrv_pread(extent->file, 470b4b3ab14SFam Zheng extent->l1_table_offset, 471b4b3ab14SFam Zheng extent->l1_table, 472b4b3ab14SFam Zheng l1_size); 473b4b3ab14SFam Zheng if (ret < 0) { 4744823970bSFam Zheng error_setg_errno(errp, -ret, 4754823970bSFam Zheng "Could not read l1 table from extent '%s'", 47624bc15d1SKevin Wolf extent->file->bs->filename); 477b4b3ab14SFam Zheng goto fail_l1; 478b4b3ab14SFam Zheng } 479b4b3ab14SFam Zheng for (i = 0; i < extent->l1_size; i++) { 480b4b3ab14SFam Zheng le32_to_cpus(&extent->l1_table[i]); 481b4b3ab14SFam Zheng } 482b4b3ab14SFam Zheng 483b4b3ab14SFam Zheng if (extent->l1_backup_table_offset) { 484d6e59931SKevin Wolf extent->l1_backup_table = g_try_malloc(l1_size); 485d6e59931SKevin Wolf if (l1_size && extent->l1_backup_table == NULL) { 486d6e59931SKevin Wolf ret = -ENOMEM; 487d6e59931SKevin Wolf goto fail_l1; 488d6e59931SKevin Wolf } 489cf2ab8fcSKevin Wolf ret = bdrv_pread(extent->file, 490b4b3ab14SFam Zheng extent->l1_backup_table_offset, 491b4b3ab14SFam Zheng extent->l1_backup_table, 492b4b3ab14SFam Zheng l1_size); 493b4b3ab14SFam Zheng if (ret < 0) { 4944823970bSFam Zheng error_setg_errno(errp, -ret, 4954823970bSFam Zheng "Could not read l1 backup table from extent '%s'", 49624bc15d1SKevin Wolf extent->file->bs->filename); 497b4b3ab14SFam Zheng goto fail_l1b; 498b4b3ab14SFam Zheng } 499b4b3ab14SFam Zheng for (i = 0; i < extent->l1_size; i++) { 500b4b3ab14SFam Zheng le32_to_cpus(&extent->l1_backup_table[i]); 501b4b3ab14SFam Zheng } 502b4b3ab14SFam Zheng } 503b4b3ab14SFam Zheng 504b4b3ab14SFam Zheng extent->l2_cache = 5055839e53bSMarkus Armbruster g_new(uint32_t, extent->l2_size * L2_CACHE_SIZE); 506b4b3ab14SFam Zheng return 0; 507b4b3ab14SFam Zheng fail_l1b: 5087267c094SAnthony Liguori g_free(extent->l1_backup_table); 509b4b3ab14SFam Zheng fail_l1: 5107267c094SAnthony Liguori g_free(extent->l1_table); 511b4b3ab14SFam Zheng return ret; 512b4b3ab14SFam Zheng } 513b4b3ab14SFam Zheng 514daac8fdcSFam Zheng static int vmdk_open_vmfs_sparse(BlockDriverState *bs, 51524bc15d1SKevin Wolf BdrvChild *file, 5164823970bSFam Zheng int flags, Error **errp) 517b4b3ab14SFam Zheng { 518b4b3ab14SFam Zheng int ret; 519019d6b8fSAnthony Liguori uint32_t magic; 520019d6b8fSAnthony Liguori VMDK3Header header; 521b4b3ab14SFam Zheng VmdkExtent *extent; 522b4b3ab14SFam Zheng 523cf2ab8fcSKevin Wolf ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); 524b4b3ab14SFam Zheng if (ret < 0) { 5254823970bSFam Zheng error_setg_errno(errp, -ret, 5264823970bSFam Zheng "Could not read header from file '%s'", 52724bc15d1SKevin Wolf file->bs->filename); 52886c6b429SFam Zheng return ret; 529b3976d3cSFam Zheng } 530f6b61e54SFam Zheng ret = vmdk_add_extent(bs, file, false, 531b3976d3cSFam Zheng le32_to_cpu(header.disk_sectors), 5327237aecdSFam Zheng (int64_t)le32_to_cpu(header.l1dir_offset) << 9, 533f6b61e54SFam Zheng 0, 534f6b61e54SFam Zheng le32_to_cpu(header.l1dir_size), 535f6b61e54SFam Zheng 4096, 5368aa1331cSFam Zheng le32_to_cpu(header.granularity), 5374823970bSFam Zheng &extent, 5384823970bSFam Zheng errp); 5398aa1331cSFam Zheng if (ret < 0) { 5408aa1331cSFam Zheng return ret; 5418aa1331cSFam Zheng } 5424823970bSFam Zheng ret = vmdk_init_tables(bs, extent, errp); 543b4b3ab14SFam Zheng if (ret) { 54486c6b429SFam Zheng /* free extent allocated by vmdk_add_extent */ 54586c6b429SFam Zheng vmdk_free_last_extent(bs); 546b4b3ab14SFam Zheng } 547b4b3ab14SFam Zheng return ret; 548b4b3ab14SFam Zheng } 549b4b3ab14SFam Zheng 550d1833ef5SPaolo Bonzini static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, 551a6468367SKevin Wolf QDict *options, Error **errp); 552f16f509dSFam Zheng 553cf2ab8fcSKevin Wolf static char *vmdk_read_desc(BdrvChild *file, uint64_t desc_offset, Error **errp) 554a8842e6dSPaolo Bonzini { 555a8842e6dSPaolo Bonzini int64_t size; 556a8842e6dSPaolo Bonzini char *buf; 557a8842e6dSPaolo Bonzini int ret; 558a8842e6dSPaolo Bonzini 559cf2ab8fcSKevin Wolf size = bdrv_getlength(file->bs); 560a8842e6dSPaolo Bonzini if (size < 0) { 561a8842e6dSPaolo Bonzini error_setg_errno(errp, -size, "Could not access file"); 562a8842e6dSPaolo Bonzini return NULL; 563a8842e6dSPaolo Bonzini } 564a8842e6dSPaolo Bonzini 56503c3359dSFam Zheng if (size < 4) { 56603c3359dSFam Zheng /* Both descriptor file and sparse image must be much larger than 4 56703c3359dSFam Zheng * bytes, also callers of vmdk_read_desc want to compare the first 4 56803c3359dSFam Zheng * bytes with VMDK4_MAGIC, let's error out if less is read. */ 56903c3359dSFam Zheng error_setg(errp, "File is too small, not a valid image"); 57003c3359dSFam Zheng return NULL; 57103c3359dSFam Zheng } 57203c3359dSFam Zheng 57373b7bcadSFam Zheng size = MIN(size, (1 << 20) - 1); /* avoid unbounded allocation */ 57473b7bcadSFam Zheng buf = g_malloc(size + 1); 575a8842e6dSPaolo Bonzini 576a8842e6dSPaolo Bonzini ret = bdrv_pread(file, desc_offset, buf, size); 577a8842e6dSPaolo Bonzini if (ret < 0) { 578a8842e6dSPaolo Bonzini error_setg_errno(errp, -ret, "Could not read from file"); 579a8842e6dSPaolo Bonzini g_free(buf); 580a8842e6dSPaolo Bonzini return NULL; 581a8842e6dSPaolo Bonzini } 58273b7bcadSFam Zheng buf[ret] = 0; 583a8842e6dSPaolo Bonzini 584a8842e6dSPaolo Bonzini return buf; 585a8842e6dSPaolo Bonzini } 586a8842e6dSPaolo Bonzini 58786c6b429SFam Zheng static int vmdk_open_vmdk4(BlockDriverState *bs, 58824bc15d1SKevin Wolf BdrvChild *file, 589a6468367SKevin Wolf int flags, QDict *options, Error **errp) 590b4b3ab14SFam Zheng { 591b4b3ab14SFam Zheng int ret; 592b4b3ab14SFam Zheng uint32_t magic; 593b4b3ab14SFam Zheng uint32_t l1_size, l1_entry_sectors; 594019d6b8fSAnthony Liguori VMDK4Header header; 595b4b3ab14SFam Zheng VmdkExtent *extent; 596f4c129a3SFam Zheng BDRVVmdkState *s = bs->opaque; 597bb45ded9SFam Zheng int64_t l1_backup_offset = 0; 5983db1d98aSFam Zheng bool compressed; 599b4b3ab14SFam Zheng 600cf2ab8fcSKevin Wolf ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); 601b4b3ab14SFam Zheng if (ret < 0) { 6024823970bSFam Zheng error_setg_errno(errp, -ret, 6034823970bSFam Zheng "Could not read header from file '%s'", 60424bc15d1SKevin Wolf file->bs->filename); 60589ac8480SPaolo Bonzini return -EINVAL; 606b3976d3cSFam Zheng } 6075a394b9eSStefan Hajnoczi if (header.capacity == 0) { 608e98768d4SFam Zheng uint64_t desc_offset = le64_to_cpu(header.desc_offset); 6095a394b9eSStefan Hajnoczi if (desc_offset) { 610cf2ab8fcSKevin Wolf char *buf = vmdk_read_desc(file, desc_offset << 9, errp); 611d1833ef5SPaolo Bonzini if (!buf) { 612d1833ef5SPaolo Bonzini return -EINVAL; 613d1833ef5SPaolo Bonzini } 614a6468367SKevin Wolf ret = vmdk_open_desc_file(bs, flags, buf, options, errp); 615d1833ef5SPaolo Bonzini g_free(buf); 616d1833ef5SPaolo Bonzini return ret; 6175a394b9eSStefan Hajnoczi } 618f16f509dSFam Zheng } 61965bd155cSKevin Wolf 620f4c129a3SFam Zheng if (!s->create_type) { 621f4c129a3SFam Zheng s->create_type = g_strdup("monolithicSparse"); 622f4c129a3SFam Zheng } 623f4c129a3SFam Zheng 62465bd155cSKevin Wolf if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) { 62565bd155cSKevin Wolf /* 62665bd155cSKevin Wolf * The footer takes precedence over the header, so read it in. The 62765bd155cSKevin Wolf * footer starts at offset -1024 from the end: One sector for the 62865bd155cSKevin Wolf * footer, and another one for the end-of-stream marker. 62965bd155cSKevin Wolf */ 63065bd155cSKevin Wolf struct { 63165bd155cSKevin Wolf struct { 63265bd155cSKevin Wolf uint64_t val; 63365bd155cSKevin Wolf uint32_t size; 63465bd155cSKevin Wolf uint32_t type; 63565bd155cSKevin Wolf uint8_t pad[512 - 16]; 63665bd155cSKevin Wolf } QEMU_PACKED footer_marker; 63765bd155cSKevin Wolf 63865bd155cSKevin Wolf uint32_t magic; 63965bd155cSKevin Wolf VMDK4Header header; 64065bd155cSKevin Wolf uint8_t pad[512 - 4 - sizeof(VMDK4Header)]; 64165bd155cSKevin Wolf 64265bd155cSKevin Wolf struct { 64365bd155cSKevin Wolf uint64_t val; 64465bd155cSKevin Wolf uint32_t size; 64565bd155cSKevin Wolf uint32_t type; 64665bd155cSKevin Wolf uint8_t pad[512 - 16]; 64765bd155cSKevin Wolf } QEMU_PACKED eos_marker; 64865bd155cSKevin Wolf } QEMU_PACKED footer; 64965bd155cSKevin Wolf 650cf2ab8fcSKevin Wolf ret = bdrv_pread(file, 6519a4f4c31SKevin Wolf bs->file->bs->total_sectors * 512 - 1536, 65265bd155cSKevin Wolf &footer, sizeof(footer)); 65365bd155cSKevin Wolf if (ret < 0) { 654d899d2e2SFam Zheng error_setg_errno(errp, -ret, "Failed to read footer"); 65565bd155cSKevin Wolf return ret; 65665bd155cSKevin Wolf } 65765bd155cSKevin Wolf 65865bd155cSKevin Wolf /* Some sanity checks for the footer */ 65965bd155cSKevin Wolf if (be32_to_cpu(footer.magic) != VMDK4_MAGIC || 66065bd155cSKevin Wolf le32_to_cpu(footer.footer_marker.size) != 0 || 66165bd155cSKevin Wolf le32_to_cpu(footer.footer_marker.type) != MARKER_FOOTER || 66265bd155cSKevin Wolf le64_to_cpu(footer.eos_marker.val) != 0 || 66365bd155cSKevin Wolf le32_to_cpu(footer.eos_marker.size) != 0 || 66465bd155cSKevin Wolf le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM) 66565bd155cSKevin Wolf { 666d899d2e2SFam Zheng error_setg(errp, "Invalid footer"); 66765bd155cSKevin Wolf return -EINVAL; 66865bd155cSKevin Wolf } 66965bd155cSKevin Wolf 67065bd155cSKevin Wolf header = footer.header; 67165bd155cSKevin Wolf } 67265bd155cSKevin Wolf 6733db1d98aSFam Zheng compressed = 6743db1d98aSFam Zheng le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE; 675509d39aaSFam Zheng if (le32_to_cpu(header.version) > 3) { 676a55448b3SMax Reitz error_setg(errp, "Unsupported VMDK version %" PRIu32, 67796c51eb5SFam Zheng le32_to_cpu(header.version)); 67896c51eb5SFam Zheng return -ENOTSUP; 6793db1d98aSFam Zheng } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR) && 6803db1d98aSFam Zheng !compressed) { 681509d39aaSFam Zheng /* VMware KB 2064959 explains that version 3 added support for 682509d39aaSFam Zheng * persistent changed block tracking (CBT), and backup software can 683509d39aaSFam Zheng * read it as version=1 if it doesn't care about the changed area 684509d39aaSFam Zheng * information. So we are safe to enable read only. */ 685509d39aaSFam Zheng error_setg(errp, "VMDK version 3 must be read only"); 686509d39aaSFam Zheng return -EINVAL; 68796c51eb5SFam Zheng } 68896c51eb5SFam Zheng 689ca8804ceSFam Zheng if (le32_to_cpu(header.num_gtes_per_gt) > 512) { 69089ac8480SPaolo Bonzini error_setg(errp, "L2 table size too big"); 691f8ce0403SFam Zheng return -EINVAL; 692f8ce0403SFam Zheng } 693f8ce0403SFam Zheng 694ca8804ceSFam Zheng l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt) 695b3976d3cSFam Zheng * le64_to_cpu(header.granularity); 69675d12341SStefan Weil if (l1_entry_sectors == 0) { 697d899d2e2SFam Zheng error_setg(errp, "L1 entry size is invalid"); 69886c6b429SFam Zheng return -EINVAL; 69986c6b429SFam Zheng } 700b3976d3cSFam Zheng l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1) 701b3976d3cSFam Zheng / l1_entry_sectors; 702bb45ded9SFam Zheng if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) { 703bb45ded9SFam Zheng l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9; 704bb45ded9SFam Zheng } 70524bc15d1SKevin Wolf if (bdrv_nb_sectors(file->bs) < le64_to_cpu(header.grain_offset)) { 7064ab9dab5SFam Zheng error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes", 7074ab9dab5SFam Zheng (int64_t)(le64_to_cpu(header.grain_offset) 7084ab9dab5SFam Zheng * BDRV_SECTOR_SIZE)); 70934ceed81SFam Zheng return -EINVAL; 71034ceed81SFam Zheng } 71134ceed81SFam Zheng 7128aa1331cSFam Zheng ret = vmdk_add_extent(bs, file, false, 713b3976d3cSFam Zheng le64_to_cpu(header.capacity), 714b3976d3cSFam Zheng le64_to_cpu(header.gd_offset) << 9, 715bb45ded9SFam Zheng l1_backup_offset, 716b3976d3cSFam Zheng l1_size, 717ca8804ceSFam Zheng le32_to_cpu(header.num_gtes_per_gt), 7188aa1331cSFam Zheng le64_to_cpu(header.granularity), 7194823970bSFam Zheng &extent, 7204823970bSFam Zheng errp); 7218aa1331cSFam Zheng if (ret < 0) { 7228aa1331cSFam Zheng return ret; 7238aa1331cSFam Zheng } 724432bb170SFam Zheng extent->compressed = 725432bb170SFam Zheng le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE; 726d8a7b061SFam Zheng if (extent->compressed) { 727d8a7b061SFam Zheng g_free(s->create_type); 728d8a7b061SFam Zheng s->create_type = g_strdup("streamOptimized"); 729d8a7b061SFam Zheng } 730432bb170SFam Zheng extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER; 73114ead646SFam Zheng extent->version = le32_to_cpu(header.version); 73214ead646SFam Zheng extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN; 7334823970bSFam Zheng ret = vmdk_init_tables(bs, extent, errp); 734b4b3ab14SFam Zheng if (ret) { 73586c6b429SFam Zheng /* free extent allocated by vmdk_add_extent */ 73686c6b429SFam Zheng vmdk_free_last_extent(bs); 737019d6b8fSAnthony Liguori } 738b4b3ab14SFam Zheng return ret; 739b4b3ab14SFam Zheng } 740b4b3ab14SFam Zheng 7417fa60fa3SFam Zheng /* find an option value out of descriptor file */ 7427fa60fa3SFam Zheng static int vmdk_parse_description(const char *desc, const char *opt_name, 7437fa60fa3SFam Zheng char *buf, int buf_size) 7447fa60fa3SFam Zheng { 7457fa60fa3SFam Zheng char *opt_pos, *opt_end; 7467fa60fa3SFam Zheng const char *end = desc + strlen(desc); 7477fa60fa3SFam Zheng 7487fa60fa3SFam Zheng opt_pos = strstr(desc, opt_name); 7497fa60fa3SFam Zheng if (!opt_pos) { 75065f74725SFam Zheng return VMDK_ERROR; 7517fa60fa3SFam Zheng } 7527fa60fa3SFam Zheng /* Skip "=\"" following opt_name */ 7537fa60fa3SFam Zheng opt_pos += strlen(opt_name) + 2; 7547fa60fa3SFam Zheng if (opt_pos >= end) { 75565f74725SFam Zheng return VMDK_ERROR; 7567fa60fa3SFam Zheng } 7577fa60fa3SFam Zheng opt_end = opt_pos; 7587fa60fa3SFam Zheng while (opt_end < end && *opt_end != '"') { 7597fa60fa3SFam Zheng opt_end++; 7607fa60fa3SFam Zheng } 7617fa60fa3SFam Zheng if (opt_end == end || buf_size < opt_end - opt_pos + 1) { 76265f74725SFam Zheng return VMDK_ERROR; 7637fa60fa3SFam Zheng } 7647fa60fa3SFam Zheng pstrcpy(buf, opt_end - opt_pos + 1, opt_pos); 76565f74725SFam Zheng return VMDK_OK; 7667fa60fa3SFam Zheng } 7677fa60fa3SFam Zheng 76886c6b429SFam Zheng /* Open an extent file and append to bs array */ 76924bc15d1SKevin Wolf static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags, 770a6468367SKevin Wolf char *buf, QDict *options, Error **errp) 77186c6b429SFam Zheng { 77286c6b429SFam Zheng uint32_t magic; 77386c6b429SFam Zheng 774d1833ef5SPaolo Bonzini magic = ldl_be_p(buf); 77586c6b429SFam Zheng switch (magic) { 77686c6b429SFam Zheng case VMDK3_MAGIC: 7774823970bSFam Zheng return vmdk_open_vmfs_sparse(bs, file, flags, errp); 77886c6b429SFam Zheng break; 77986c6b429SFam Zheng case VMDK4_MAGIC: 780a6468367SKevin Wolf return vmdk_open_vmdk4(bs, file, flags, options, errp); 78186c6b429SFam Zheng break; 78286c6b429SFam Zheng default: 78376abe407SPaolo Bonzini error_setg(errp, "Image not in VMDK format"); 78476abe407SPaolo Bonzini return -EINVAL; 78586c6b429SFam Zheng break; 78686c6b429SFam Zheng } 78786c6b429SFam Zheng } 78886c6b429SFam Zheng 789e4937694SMarkus Armbruster static const char *next_line(const char *s) 790e4937694SMarkus Armbruster { 791e4937694SMarkus Armbruster while (*s) { 792e4937694SMarkus Armbruster if (*s == '\n') { 793e4937694SMarkus Armbruster return s + 1; 794e4937694SMarkus Armbruster } 795e4937694SMarkus Armbruster s++; 796e4937694SMarkus Armbruster } 797e4937694SMarkus Armbruster return s; 798e4937694SMarkus Armbruster } 799e4937694SMarkus Armbruster 8007fa60fa3SFam Zheng static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, 801a6468367SKevin Wolf const char *desc_file_path, QDict *options, 802a6468367SKevin Wolf Error **errp) 8037fa60fa3SFam Zheng { 8047fa60fa3SFam Zheng int ret; 805395a22faSJeff Cody int matches; 8067fa60fa3SFam Zheng char access[11]; 8077fa60fa3SFam Zheng char type[11]; 8087fa60fa3SFam Zheng char fname[512]; 809d28d737fSMarkus Armbruster const char *p, *np; 8107fa60fa3SFam Zheng int64_t sectors = 0; 8117fa60fa3SFam Zheng int64_t flat_offset; 812fe206562SJeff Cody char *extent_path; 81324bc15d1SKevin Wolf BdrvChild *extent_file; 814f4c129a3SFam Zheng BDRVVmdkState *s = bs->opaque; 815f4c129a3SFam Zheng VmdkExtent *extent; 816a6468367SKevin Wolf char extent_opt_prefix[32]; 81724bc15d1SKevin Wolf Error *local_err = NULL; 8187fa60fa3SFam Zheng 819e4937694SMarkus Armbruster for (p = desc; *p; p = next_line(p)) { 8208a3e0bc3SFam Zheng /* parse extent line in one of below formats: 8218a3e0bc3SFam Zheng * 8227fa60fa3SFam Zheng * RW [size in sectors] FLAT "file-name.vmdk" OFFSET 8237fa60fa3SFam Zheng * RW [size in sectors] SPARSE "file-name.vmdk" 8248a3e0bc3SFam Zheng * RW [size in sectors] VMFS "file-name.vmdk" 8258a3e0bc3SFam Zheng * RW [size in sectors] VMFSSPARSE "file-name.vmdk" 8267fa60fa3SFam Zheng */ 8277fa60fa3SFam Zheng flat_offset = -1; 828395a22faSJeff Cody matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64, 8297fa60fa3SFam Zheng access, §ors, type, fname, &flat_offset); 830395a22faSJeff Cody if (matches < 4 || strcmp(access, "RW")) { 831e4937694SMarkus Armbruster continue; 8327fa60fa3SFam Zheng } else if (!strcmp(type, "FLAT")) { 833395a22faSJeff Cody if (matches != 5 || flat_offset < 0) { 834d28d737fSMarkus Armbruster goto invalid; 8357fa60fa3SFam Zheng } 836dbbcaa8dSFam Zheng } else if (!strcmp(type, "VMFS")) { 837395a22faSJeff Cody if (matches == 4) { 838dbbcaa8dSFam Zheng flat_offset = 0; 839b47053bdSFam Zheng } else { 840d28d737fSMarkus Armbruster goto invalid; 841b47053bdSFam Zheng } 842395a22faSJeff Cody } else if (matches != 4) { 843d28d737fSMarkus Armbruster goto invalid; 8447fa60fa3SFam Zheng } 8457fa60fa3SFam Zheng 8467fa60fa3SFam Zheng if (sectors <= 0 || 847daac8fdcSFam Zheng (strcmp(type, "FLAT") && strcmp(type, "SPARSE") && 84804d542c8SPaolo Bonzini strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) || 8497fa60fa3SFam Zheng (strcmp(access, "RW"))) { 850e4937694SMarkus Armbruster continue; 8517fa60fa3SFam Zheng } 8527fa60fa3SFam Zheng 8535c98415bSMax Reitz if (!path_is_absolute(fname) && !path_has_protocol(fname) && 8545c98415bSMax Reitz !desc_file_path[0]) 8555c98415bSMax Reitz { 8565c98415bSMax Reitz error_setg(errp, "Cannot use relative extent paths with VMDK " 8579a4f4c31SKevin Wolf "descriptor file '%s'", bs->file->bs->filename); 8585c98415bSMax Reitz return -EINVAL; 8595c98415bSMax Reitz } 8605c98415bSMax Reitz 861fe206562SJeff Cody extent_path = g_malloc0(PATH_MAX); 862a7be17beSJeff Cody path_combine(extent_path, PATH_MAX, desc_file_path, fname); 863a6468367SKevin Wolf 864a6468367SKevin Wolf ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents); 865a6468367SKevin Wolf assert(ret < 32); 866a6468367SKevin Wolf 86724bc15d1SKevin Wolf extent_file = bdrv_open_child(extent_path, options, extent_opt_prefix, 86824bc15d1SKevin Wolf bs, &child_file, false, &local_err); 869fe206562SJeff Cody g_free(extent_path); 87024bc15d1SKevin Wolf if (local_err) { 87124bc15d1SKevin Wolf error_propagate(errp, local_err); 87224bc15d1SKevin Wolf return -EINVAL; 8737fa60fa3SFam Zheng } 87486c6b429SFam Zheng 87586c6b429SFam Zheng /* save to extents array */ 87604d542c8SPaolo Bonzini if (!strcmp(type, "FLAT") || !strcmp(type, "VMFS")) { 87786c6b429SFam Zheng /* FLAT extent */ 87886c6b429SFam Zheng 8798aa1331cSFam Zheng ret = vmdk_add_extent(bs, extent_file, true, sectors, 8804823970bSFam Zheng 0, 0, 0, 0, 0, &extent, errp); 8818aa1331cSFam Zheng if (ret < 0) { 88224bc15d1SKevin Wolf bdrv_unref_child(bs, extent_file); 8838aa1331cSFam Zheng return ret; 8848aa1331cSFam Zheng } 885f16f509dSFam Zheng extent->flat_start_offset = flat_offset << 9; 886daac8fdcSFam Zheng } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) { 887daac8fdcSFam Zheng /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/ 888cf2ab8fcSKevin Wolf char *buf = vmdk_read_desc(extent_file, 0, errp); 889d1833ef5SPaolo Bonzini if (!buf) { 890d1833ef5SPaolo Bonzini ret = -EINVAL; 891d1833ef5SPaolo Bonzini } else { 892a6468367SKevin Wolf ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf, 893a6468367SKevin Wolf options, errp); 894d1833ef5SPaolo Bonzini } 895d1833ef5SPaolo Bonzini g_free(buf); 896b6b1d31fSStefan Hajnoczi if (ret) { 89724bc15d1SKevin Wolf bdrv_unref_child(bs, extent_file); 89886c6b429SFam Zheng return ret; 89986c6b429SFam Zheng } 900f4c129a3SFam Zheng extent = &s->extents[s->num_extents - 1]; 9017fa60fa3SFam Zheng } else { 9024823970bSFam Zheng error_setg(errp, "Unsupported extent type '%s'", type); 90324bc15d1SKevin Wolf bdrv_unref_child(bs, extent_file); 9047fa60fa3SFam Zheng return -ENOTSUP; 9057fa60fa3SFam Zheng } 906f4c129a3SFam Zheng extent->type = g_strdup(type); 907899f1ae2SFam Zheng } 9087fa60fa3SFam Zheng return 0; 909d28d737fSMarkus Armbruster 910d28d737fSMarkus Armbruster invalid: 911d28d737fSMarkus Armbruster np = next_line(p); 912d28d737fSMarkus Armbruster assert(np != p); 913d28d737fSMarkus Armbruster if (np[-1] == '\n') { 914d28d737fSMarkus Armbruster np--; 915d28d737fSMarkus Armbruster } 916d28d737fSMarkus Armbruster error_setg(errp, "Invalid extent line: %.*s", (int)(np - p), p); 917d28d737fSMarkus Armbruster return -EINVAL; 9187fa60fa3SFam Zheng } 9197fa60fa3SFam Zheng 920d1833ef5SPaolo Bonzini static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, 921a6468367SKevin Wolf QDict *options, Error **errp) 9227fa60fa3SFam Zheng { 9237fa60fa3SFam Zheng int ret; 9247fa60fa3SFam Zheng char ct[128]; 9257fa60fa3SFam Zheng BDRVVmdkState *s = bs->opaque; 9267fa60fa3SFam Zheng 9277fa60fa3SFam Zheng if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) { 92876abe407SPaolo Bonzini error_setg(errp, "invalid VMDK image descriptor"); 92976abe407SPaolo Bonzini ret = -EINVAL; 9300bed087dSEvgeny Budilovsky goto exit; 9317fa60fa3SFam Zheng } 9326398de51SFam Zheng if (strcmp(ct, "monolithicFlat") && 93304d542c8SPaolo Bonzini strcmp(ct, "vmfs") && 934daac8fdcSFam Zheng strcmp(ct, "vmfsSparse") && 93586c6b429SFam Zheng strcmp(ct, "twoGbMaxExtentSparse") && 9366398de51SFam Zheng strcmp(ct, "twoGbMaxExtentFlat")) { 9374823970bSFam Zheng error_setg(errp, "Unsupported image type '%s'", ct); 9380bed087dSEvgeny Budilovsky ret = -ENOTSUP; 9390bed087dSEvgeny Budilovsky goto exit; 9407fa60fa3SFam Zheng } 941f4c129a3SFam Zheng s->create_type = g_strdup(ct); 9427fa60fa3SFam Zheng s->desc_offset = 0; 9439a4f4c31SKevin Wolf ret = vmdk_parse_extents(buf, bs, bs->file->bs->exact_filename, options, 9449a4f4c31SKevin Wolf errp); 9450bed087dSEvgeny Budilovsky exit: 9460bed087dSEvgeny Budilovsky return ret; 9477fa60fa3SFam Zheng } 9487fa60fa3SFam Zheng 949015a1036SMax Reitz static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, 950015a1036SMax Reitz Error **errp) 951b4b3ab14SFam Zheng { 9529aeecbbcSFam Zheng char *buf; 95386c6b429SFam Zheng int ret; 95486c6b429SFam Zheng BDRVVmdkState *s = bs->opaque; 95537f09e5eSPaolo Bonzini uint32_t magic; 956fe44dc91SAshijeet Acharya Error *local_err = NULL; 957b4b3ab14SFam Zheng 9584e4bf5c4SKevin Wolf bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 9594e4bf5c4SKevin Wolf false, errp); 9604e4bf5c4SKevin Wolf if (!bs->file) { 9614e4bf5c4SKevin Wolf return -EINVAL; 9624e4bf5c4SKevin Wolf } 9634e4bf5c4SKevin Wolf 964cf2ab8fcSKevin Wolf buf = vmdk_read_desc(bs->file, 0, errp); 965d1833ef5SPaolo Bonzini if (!buf) { 966d1833ef5SPaolo Bonzini return -EINVAL; 967d1833ef5SPaolo Bonzini } 968d1833ef5SPaolo Bonzini 96937f09e5eSPaolo Bonzini magic = ldl_be_p(buf); 97037f09e5eSPaolo Bonzini switch (magic) { 97137f09e5eSPaolo Bonzini case VMDK3_MAGIC: 97237f09e5eSPaolo Bonzini case VMDK4_MAGIC: 9739a4f4c31SKevin Wolf ret = vmdk_open_sparse(bs, bs->file, flags, buf, options, 97424bc15d1SKevin Wolf errp); 97586c6b429SFam Zheng s->desc_offset = 0x200; 97637f09e5eSPaolo Bonzini break; 97737f09e5eSPaolo Bonzini default: 978a6468367SKevin Wolf ret = vmdk_open_desc_file(bs, flags, buf, options, errp); 97937f09e5eSPaolo Bonzini break; 98037f09e5eSPaolo Bonzini } 981bae0a0ccSPaolo Bonzini if (ret) { 982bae0a0ccSPaolo Bonzini goto fail; 983bae0a0ccSPaolo Bonzini } 98437f09e5eSPaolo Bonzini 98586c6b429SFam Zheng /* try to open parent images, if exist */ 98686c6b429SFam Zheng ret = vmdk_parent_open(bs); 98786c6b429SFam Zheng if (ret) { 988bae0a0ccSPaolo Bonzini goto fail; 989b4b3ab14SFam Zheng } 9909877860eSPeter Maydell ret = vmdk_read_cid(bs, 0, &s->cid); 9919877860eSPeter Maydell if (ret) { 9929877860eSPeter Maydell goto fail; 9939877860eSPeter Maydell } 9949877860eSPeter Maydell ret = vmdk_read_cid(bs, 1, &s->parent_cid); 9959877860eSPeter Maydell if (ret) { 9969877860eSPeter Maydell goto fail; 9979877860eSPeter Maydell } 998848c66e8SPaolo Bonzini qemu_co_mutex_init(&s->lock); 9992bc3166cSKevin Wolf 10002bc3166cSKevin Wolf /* Disable migration when VMDK images are used */ 100181e5f78aSAlberto Garcia error_setg(&s->migration_blocker, "The vmdk format used by node '%s' " 100281e5f78aSAlberto Garcia "does not support live migration", 100381e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs)); 1004fe44dc91SAshijeet Acharya ret = migrate_add_blocker(s->migration_blocker, &local_err); 1005fe44dc91SAshijeet Acharya if (local_err) { 1006fe44dc91SAshijeet Acharya error_propagate(errp, local_err); 1007fe44dc91SAshijeet Acharya error_free(s->migration_blocker); 1008fe44dc91SAshijeet Acharya goto fail; 1009fe44dc91SAshijeet Acharya } 1010fe44dc91SAshijeet Acharya 1011d1833ef5SPaolo Bonzini g_free(buf); 10122bc3166cSKevin Wolf return 0; 1013bae0a0ccSPaolo Bonzini 1014bae0a0ccSPaolo Bonzini fail: 1015d1833ef5SPaolo Bonzini g_free(buf); 1016f4c129a3SFam Zheng g_free(s->create_type); 1017f4c129a3SFam Zheng s->create_type = NULL; 1018bae0a0ccSPaolo Bonzini vmdk_free_extents(bs); 1019bae0a0ccSPaolo Bonzini return ret; 1020019d6b8fSAnthony Liguori } 1021019d6b8fSAnthony Liguori 1022d34682cdSKevin Wolf 10233baca891SKevin Wolf static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp) 1024d34682cdSKevin Wolf { 1025d34682cdSKevin Wolf BDRVVmdkState *s = bs->opaque; 1026d34682cdSKevin Wolf int i; 1027d34682cdSKevin Wolf 1028d34682cdSKevin Wolf for (i = 0; i < s->num_extents; i++) { 1029d34682cdSKevin Wolf if (!s->extents[i].flat) { 1030cf081fcaSEric Blake bs->bl.pwrite_zeroes_alignment = 1031cf081fcaSEric Blake MAX(bs->bl.pwrite_zeroes_alignment, 1032cf081fcaSEric Blake s->extents[i].cluster_sectors << BDRV_SECTOR_BITS); 1033d34682cdSKevin Wolf } 1034d34682cdSKevin Wolf } 1035d34682cdSKevin Wolf } 1036d34682cdSKevin Wolf 1037c6ac36e1SFam Zheng /** 1038c6ac36e1SFam Zheng * get_whole_cluster 1039c6ac36e1SFam Zheng * 1040c6ac36e1SFam Zheng * Copy backing file's cluster that covers @sector_num, otherwise write zero, 1041c6ac36e1SFam Zheng * to the cluster at @cluster_sector_num. 1042c6ac36e1SFam Zheng * 1043c6ac36e1SFam Zheng * If @skip_start_sector < @skip_end_sector, the relative range 1044c6ac36e1SFam Zheng * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave 1045c6ac36e1SFam Zheng * it for call to write user data in the request. 1046c6ac36e1SFam Zheng */ 1047b3976d3cSFam Zheng static int get_whole_cluster(BlockDriverState *bs, 1048b3976d3cSFam Zheng VmdkExtent *extent, 104937b1d7d8SKevin Wolf uint64_t cluster_offset, 105037b1d7d8SKevin Wolf uint64_t offset, 105137b1d7d8SKevin Wolf uint64_t skip_start_bytes, 105237b1d7d8SKevin Wolf uint64_t skip_end_bytes) 1053019d6b8fSAnthony Liguori { 1054bf81507dSFam Zheng int ret = VMDK_OK; 1055c6ac36e1SFam Zheng int64_t cluster_bytes; 1056c6ac36e1SFam Zheng uint8_t *whole_grain; 1057019d6b8fSAnthony Liguori 1058c6ac36e1SFam Zheng /* For COW, align request sector_num to cluster start */ 1059c6ac36e1SFam Zheng cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS; 106037b1d7d8SKevin Wolf offset = QEMU_ALIGN_DOWN(offset, cluster_bytes); 1061c6ac36e1SFam Zheng whole_grain = qemu_blockalign(bs, cluster_bytes); 1062c6ac36e1SFam Zheng 1063760e0063SKevin Wolf if (!bs->backing) { 106437b1d7d8SKevin Wolf memset(whole_grain, 0, skip_start_bytes); 106537b1d7d8SKevin Wolf memset(whole_grain + skip_end_bytes, 0, cluster_bytes - skip_end_bytes); 1066c6ac36e1SFam Zheng } 1067c6ac36e1SFam Zheng 106837b1d7d8SKevin Wolf assert(skip_end_bytes <= cluster_bytes); 10690e69c543SFam Zheng /* we will be here if it's first write on non-exist grain(cluster). 10700e69c543SFam Zheng * try to read from parent image, if exist */ 1071760e0063SKevin Wolf if (bs->backing && !vmdk_is_cid_valid(bs)) { 1072c6ac36e1SFam Zheng ret = VMDK_ERROR; 1073c6ac36e1SFam Zheng goto exit; 1074c6ac36e1SFam Zheng } 1075c6ac36e1SFam Zheng 1076c6ac36e1SFam Zheng /* Read backing data before skip range */ 107737b1d7d8SKevin Wolf if (skip_start_bytes > 0) { 1078760e0063SKevin Wolf if (bs->backing) { 107923c4b2a8SMax Reitz /* qcow2 emits this on bs->file instead of bs->backing */ 108023c4b2a8SMax Reitz BLKDBG_EVENT(extent->file, BLKDBG_COW_READ); 1081cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->backing, offset, whole_grain, 108237b1d7d8SKevin Wolf skip_start_bytes); 1083c336500dSKevin Wolf if (ret < 0) { 1084bf81507dSFam Zheng ret = VMDK_ERROR; 1085bf81507dSFam Zheng goto exit; 1086019d6b8fSAnthony Liguori } 1087019d6b8fSAnthony Liguori } 108823c4b2a8SMax Reitz BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE); 1089d9ca2ea2SKevin Wolf ret = bdrv_pwrite(extent->file, cluster_offset, whole_grain, 109037b1d7d8SKevin Wolf skip_start_bytes); 1091c6ac36e1SFam Zheng if (ret < 0) { 1092c6ac36e1SFam Zheng ret = VMDK_ERROR; 1093c6ac36e1SFam Zheng goto exit; 1094c6ac36e1SFam Zheng } 1095c6ac36e1SFam Zheng } 1096c6ac36e1SFam Zheng /* Read backing data after skip range */ 109737b1d7d8SKevin Wolf if (skip_end_bytes < cluster_bytes) { 1098760e0063SKevin Wolf if (bs->backing) { 109923c4b2a8SMax Reitz /* qcow2 emits this on bs->file instead of bs->backing */ 110023c4b2a8SMax Reitz BLKDBG_EVENT(extent->file, BLKDBG_COW_READ); 1101cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->backing, offset + skip_end_bytes, 110237b1d7d8SKevin Wolf whole_grain + skip_end_bytes, 110337b1d7d8SKevin Wolf cluster_bytes - skip_end_bytes); 1104c6ac36e1SFam Zheng if (ret < 0) { 1105c6ac36e1SFam Zheng ret = VMDK_ERROR; 1106c6ac36e1SFam Zheng goto exit; 1107c6ac36e1SFam Zheng } 1108c6ac36e1SFam Zheng } 110923c4b2a8SMax Reitz BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE); 1110d9ca2ea2SKevin Wolf ret = bdrv_pwrite(extent->file, cluster_offset + skip_end_bytes, 111137b1d7d8SKevin Wolf whole_grain + skip_end_bytes, 111237b1d7d8SKevin Wolf cluster_bytes - skip_end_bytes); 1113c6ac36e1SFam Zheng if (ret < 0) { 1114c6ac36e1SFam Zheng ret = VMDK_ERROR; 1115c6ac36e1SFam Zheng goto exit; 1116c6ac36e1SFam Zheng } 1117c6ac36e1SFam Zheng } 1118c6ac36e1SFam Zheng 111937b1d7d8SKevin Wolf ret = VMDK_OK; 1120bf81507dSFam Zheng exit: 1121bf81507dSFam Zheng qemu_vfree(whole_grain); 1122bf81507dSFam Zheng return ret; 1123019d6b8fSAnthony Liguori } 1124019d6b8fSAnthony Liguori 1125c6ac36e1SFam Zheng static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, 1126c6ac36e1SFam Zheng uint32_t offset) 1127019d6b8fSAnthony Liguori { 1128c6ac36e1SFam Zheng offset = cpu_to_le32(offset); 1129019d6b8fSAnthony Liguori /* update L2 table */ 113023c4b2a8SMax Reitz BLKDBG_EVENT(extent->file, BLKDBG_L2_UPDATE); 1131d9ca2ea2SKevin Wolf if (bdrv_pwrite_sync(extent->file, 1132b3976d3cSFam Zheng ((int64_t)m_data->l2_offset * 512) 1133c6ac36e1SFam Zheng + (m_data->l2_index * sizeof(offset)), 1134e304e8e5SFam Zheng &offset, sizeof(offset)) < 0) { 113565f74725SFam Zheng return VMDK_ERROR; 1136b3976d3cSFam Zheng } 1137019d6b8fSAnthony Liguori /* update backup L2 table */ 1138b3976d3cSFam Zheng if (extent->l1_backup_table_offset != 0) { 1139b3976d3cSFam Zheng m_data->l2_offset = extent->l1_backup_table[m_data->l1_index]; 1140d9ca2ea2SKevin Wolf if (bdrv_pwrite_sync(extent->file, 1141b3976d3cSFam Zheng ((int64_t)m_data->l2_offset * 512) 1142c6ac36e1SFam Zheng + (m_data->l2_index * sizeof(offset)), 1143e304e8e5SFam Zheng &offset, sizeof(offset)) < 0) { 114465f74725SFam Zheng return VMDK_ERROR; 1145019d6b8fSAnthony Liguori } 1146b3976d3cSFam Zheng } 1147cdeaf1f1SFam Zheng if (m_data->l2_cache_entry) { 1148cdeaf1f1SFam Zheng *m_data->l2_cache_entry = offset; 1149cdeaf1f1SFam Zheng } 1150019d6b8fSAnthony Liguori 115165f74725SFam Zheng return VMDK_OK; 1152019d6b8fSAnthony Liguori } 1153019d6b8fSAnthony Liguori 1154c6ac36e1SFam Zheng /** 1155c6ac36e1SFam Zheng * get_cluster_offset 1156c6ac36e1SFam Zheng * 1157c6ac36e1SFam Zheng * Look up cluster offset in extent file by sector number, and store in 1158c6ac36e1SFam Zheng * @cluster_offset. 1159c6ac36e1SFam Zheng * 1160c6ac36e1SFam Zheng * For flat extents, the start offset as parsed from the description file is 1161c6ac36e1SFam Zheng * returned. 1162c6ac36e1SFam Zheng * 1163c6ac36e1SFam Zheng * For sparse extents, look up in L1, L2 table. If allocate is true, return an 1164c6ac36e1SFam Zheng * offset for a new cluster and update L2 cache. If there is a backing file, 1165c6ac36e1SFam Zheng * COW is done before returning; otherwise, zeroes are written to the allocated 1166c6ac36e1SFam Zheng * cluster. Both COW and zero writing skips the sector range 1167c6ac36e1SFam Zheng * [@skip_start_sector, @skip_end_sector) passed in by caller, because caller 1168c6ac36e1SFam Zheng * has new data to write there. 1169c6ac36e1SFam Zheng * 1170c6ac36e1SFam Zheng * Returns: VMDK_OK if cluster exists and mapped in the image. 1171c6ac36e1SFam Zheng * VMDK_UNALLOC if cluster is not mapped and @allocate is false. 1172c6ac36e1SFam Zheng * VMDK_ERROR if failed. 1173c6ac36e1SFam Zheng */ 117491b85bd3SFam Zheng static int get_cluster_offset(BlockDriverState *bs, 1175b3976d3cSFam Zheng VmdkExtent *extent, 1176b3976d3cSFam Zheng VmdkMetaData *m_data, 117791b85bd3SFam Zheng uint64_t offset, 1178c6ac36e1SFam Zheng bool allocate, 1179c6ac36e1SFam Zheng uint64_t *cluster_offset, 118037b1d7d8SKevin Wolf uint64_t skip_start_bytes, 118137b1d7d8SKevin Wolf uint64_t skip_end_bytes) 1182019d6b8fSAnthony Liguori { 1183019d6b8fSAnthony Liguori unsigned int l1_index, l2_offset, l2_index; 1184019d6b8fSAnthony Liguori int min_index, i, j; 1185e304e8e5SFam Zheng uint32_t min_count, *l2_table; 118614ead646SFam Zheng bool zeroed = false; 1187c6ac36e1SFam Zheng int64_t ret; 1188d1319b07SFam Zheng int64_t cluster_sector; 1189019d6b8fSAnthony Liguori 1190ae261c86SFam Zheng if (m_data) { 1191019d6b8fSAnthony Liguori m_data->valid = 0; 1192ae261c86SFam Zheng } 119391b85bd3SFam Zheng if (extent->flat) { 11947fa60fa3SFam Zheng *cluster_offset = extent->flat_start_offset; 119565f74725SFam Zheng return VMDK_OK; 119691b85bd3SFam Zheng } 1197019d6b8fSAnthony Liguori 11986398de51SFam Zheng offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE; 1199b3976d3cSFam Zheng l1_index = (offset >> 9) / extent->l1_entry_sectors; 1200b3976d3cSFam Zheng if (l1_index >= extent->l1_size) { 120165f74725SFam Zheng return VMDK_ERROR; 1202b3976d3cSFam Zheng } 1203b3976d3cSFam Zheng l2_offset = extent->l1_table[l1_index]; 1204b3976d3cSFam Zheng if (!l2_offset) { 120565f74725SFam Zheng return VMDK_UNALLOC; 1206b3976d3cSFam Zheng } 1207019d6b8fSAnthony Liguori for (i = 0; i < L2_CACHE_SIZE; i++) { 1208b3976d3cSFam Zheng if (l2_offset == extent->l2_cache_offsets[i]) { 1209019d6b8fSAnthony Liguori /* increment the hit count */ 1210b3976d3cSFam Zheng if (++extent->l2_cache_counts[i] == 0xffffffff) { 1211019d6b8fSAnthony Liguori for (j = 0; j < L2_CACHE_SIZE; j++) { 1212b3976d3cSFam Zheng extent->l2_cache_counts[j] >>= 1; 1213019d6b8fSAnthony Liguori } 1214019d6b8fSAnthony Liguori } 1215b3976d3cSFam Zheng l2_table = extent->l2_cache + (i * extent->l2_size); 1216019d6b8fSAnthony Liguori goto found; 1217019d6b8fSAnthony Liguori } 1218019d6b8fSAnthony Liguori } 1219019d6b8fSAnthony Liguori /* not found: load a new entry in the least used one */ 1220019d6b8fSAnthony Liguori min_index = 0; 1221019d6b8fSAnthony Liguori min_count = 0xffffffff; 1222019d6b8fSAnthony Liguori for (i = 0; i < L2_CACHE_SIZE; i++) { 1223b3976d3cSFam Zheng if (extent->l2_cache_counts[i] < min_count) { 1224b3976d3cSFam Zheng min_count = extent->l2_cache_counts[i]; 1225019d6b8fSAnthony Liguori min_index = i; 1226019d6b8fSAnthony Liguori } 1227019d6b8fSAnthony Liguori } 1228b3976d3cSFam Zheng l2_table = extent->l2_cache + (min_index * extent->l2_size); 122923c4b2a8SMax Reitz BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD); 1230cf2ab8fcSKevin Wolf if (bdrv_pread(extent->file, 1231b3976d3cSFam Zheng (int64_t)l2_offset * 512, 1232b3976d3cSFam Zheng l2_table, 1233b3976d3cSFam Zheng extent->l2_size * sizeof(uint32_t) 1234b3976d3cSFam Zheng ) != extent->l2_size * sizeof(uint32_t)) { 123565f74725SFam Zheng return VMDK_ERROR; 1236b3976d3cSFam Zheng } 1237019d6b8fSAnthony Liguori 1238b3976d3cSFam Zheng extent->l2_cache_offsets[min_index] = l2_offset; 1239b3976d3cSFam Zheng extent->l2_cache_counts[min_index] = 1; 1240019d6b8fSAnthony Liguori found: 1241b3976d3cSFam Zheng l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size; 1242c6ac36e1SFam Zheng cluster_sector = le32_to_cpu(l2_table[l2_index]); 1243019d6b8fSAnthony Liguori 1244c6ac36e1SFam Zheng if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) { 124514ead646SFam Zheng zeroed = true; 124614ead646SFam Zheng } 124714ead646SFam Zheng 1248c6ac36e1SFam Zheng if (!cluster_sector || zeroed) { 124991b85bd3SFam Zheng if (!allocate) { 125014ead646SFam Zheng return zeroed ? VMDK_ZEROED : VMDK_UNALLOC; 125191b85bd3SFam Zheng } 12529949f97eSKevin Wolf 1253c6ac36e1SFam Zheng cluster_sector = extent->next_cluster_sector; 1254c6ac36e1SFam Zheng extent->next_cluster_sector += extent->cluster_sectors; 12559949f97eSKevin Wolf 1256019d6b8fSAnthony Liguori /* First of all we write grain itself, to avoid race condition 1257019d6b8fSAnthony Liguori * that may to corrupt the image. 1258019d6b8fSAnthony Liguori * This problem may occur because of insufficient space on host disk 1259019d6b8fSAnthony Liguori * or inappropriate VM shutdown. 1260019d6b8fSAnthony Liguori */ 126137b1d7d8SKevin Wolf ret = get_whole_cluster(bs, extent, cluster_sector * BDRV_SECTOR_SIZE, 126237b1d7d8SKevin Wolf offset, skip_start_bytes, skip_end_bytes); 1263c6ac36e1SFam Zheng if (ret) { 1264c6ac36e1SFam Zheng return ret; 1265019d6b8fSAnthony Liguori } 1266524089bcSReda Sallahi if (m_data) { 1267524089bcSReda Sallahi m_data->valid = 1; 1268524089bcSReda Sallahi m_data->l1_index = l1_index; 1269524089bcSReda Sallahi m_data->l2_index = l2_index; 1270524089bcSReda Sallahi m_data->l2_offset = l2_offset; 1271524089bcSReda Sallahi m_data->l2_cache_entry = &l2_table[l2_index]; 1272524089bcSReda Sallahi } 1273019d6b8fSAnthony Liguori } 1274c6ac36e1SFam Zheng *cluster_offset = cluster_sector << BDRV_SECTOR_BITS; 127565f74725SFam Zheng return VMDK_OK; 1276019d6b8fSAnthony Liguori } 1277019d6b8fSAnthony Liguori 1278b3976d3cSFam Zheng static VmdkExtent *find_extent(BDRVVmdkState *s, 1279b3976d3cSFam Zheng int64_t sector_num, VmdkExtent *start_hint) 1280b3976d3cSFam Zheng { 1281b3976d3cSFam Zheng VmdkExtent *extent = start_hint; 1282b3976d3cSFam Zheng 1283b3976d3cSFam Zheng if (!extent) { 1284b3976d3cSFam Zheng extent = &s->extents[0]; 1285b3976d3cSFam Zheng } 1286b3976d3cSFam Zheng while (extent < &s->extents[s->num_extents]) { 1287b3976d3cSFam Zheng if (sector_num < extent->end_sector) { 1288b3976d3cSFam Zheng return extent; 1289b3976d3cSFam Zheng } 1290b3976d3cSFam Zheng extent++; 1291b3976d3cSFam Zheng } 1292b3976d3cSFam Zheng return NULL; 1293b3976d3cSFam Zheng } 1294b3976d3cSFam Zheng 1295a844a2b0SKevin Wolf static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent, 1296a844a2b0SKevin Wolf int64_t offset) 1297a844a2b0SKevin Wolf { 12989be38598SEduardo Habkost uint64_t extent_begin_offset, extent_relative_offset; 1299a844a2b0SKevin Wolf uint64_t cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE; 1300a844a2b0SKevin Wolf 1301a844a2b0SKevin Wolf extent_begin_offset = 1302a844a2b0SKevin Wolf (extent->end_sector - extent->sectors) * BDRV_SECTOR_SIZE; 1303a844a2b0SKevin Wolf extent_relative_offset = offset - extent_begin_offset; 13049be38598SEduardo Habkost return extent_relative_offset % cluster_size; 1305a844a2b0SKevin Wolf } 1306a844a2b0SKevin Wolf 1307c72080b9SEric Blake static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs, 1308c72080b9SEric Blake bool want_zero, 1309c72080b9SEric Blake int64_t offset, int64_t bytes, 1310c72080b9SEric Blake int64_t *pnum, int64_t *map, 1311c72080b9SEric Blake BlockDriverState **file) 1312019d6b8fSAnthony Liguori { 1313019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque; 1314b3976d3cSFam Zheng int64_t index_in_cluster, n, ret; 1315c72080b9SEric Blake uint64_t cluster_offset; 1316b3976d3cSFam Zheng VmdkExtent *extent; 1317b3976d3cSFam Zheng 1318c72080b9SEric Blake extent = find_extent(s, offset >> BDRV_SECTOR_BITS, NULL); 1319b3976d3cSFam Zheng if (!extent) { 1320c72080b9SEric Blake return -EIO; 1321b3976d3cSFam Zheng } 1322f8a2e5e3SStefan Hajnoczi qemu_co_mutex_lock(&s->lock); 1323c72080b9SEric Blake ret = get_cluster_offset(bs, extent, NULL, offset, false, &cluster_offset, 1324c6ac36e1SFam Zheng 0, 0); 1325f8a2e5e3SStefan Hajnoczi qemu_co_mutex_unlock(&s->lock); 132614ead646SFam Zheng 1327c72080b9SEric Blake index_in_cluster = vmdk_find_offset_in_cluster(extent, offset); 13284bc74be9SPaolo Bonzini switch (ret) { 13294bc74be9SPaolo Bonzini case VMDK_ERROR: 13304bc74be9SPaolo Bonzini ret = -EIO; 13314bc74be9SPaolo Bonzini break; 13324bc74be9SPaolo Bonzini case VMDK_UNALLOC: 13334bc74be9SPaolo Bonzini ret = 0; 13344bc74be9SPaolo Bonzini break; 13354bc74be9SPaolo Bonzini case VMDK_ZEROED: 13364bc74be9SPaolo Bonzini ret = BDRV_BLOCK_ZERO; 13374bc74be9SPaolo Bonzini break; 13384bc74be9SPaolo Bonzini case VMDK_OK: 13394bc74be9SPaolo Bonzini ret = BDRV_BLOCK_DATA; 1340e0f100f5SFam Zheng if (!extent->compressed) { 1341d0a18f10SFam Zheng ret |= BDRV_BLOCK_OFFSET_VALID; 1342c72080b9SEric Blake *map = cluster_offset + index_in_cluster; 13434bc74be9SPaolo Bonzini } 1344e0f100f5SFam Zheng *file = extent->file->bs; 13454bc74be9SPaolo Bonzini break; 13464bc74be9SPaolo Bonzini } 134791b85bd3SFam Zheng 1348c72080b9SEric Blake n = extent->cluster_sectors * BDRV_SECTOR_SIZE - index_in_cluster; 1349c72080b9SEric Blake *pnum = MIN(n, bytes); 1350b3976d3cSFam Zheng return ret; 1351019d6b8fSAnthony Liguori } 1352019d6b8fSAnthony Liguori 1353dd3f6ee2SFam Zheng static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, 135437b1d7d8SKevin Wolf int64_t offset_in_cluster, QEMUIOVector *qiov, 135537b1d7d8SKevin Wolf uint64_t qiov_offset, uint64_t n_bytes, 135637b1d7d8SKevin Wolf uint64_t offset) 1357dd3f6ee2SFam Zheng { 1358dd3f6ee2SFam Zheng int ret; 13592b2c8c5dSFam Zheng VmdkGrainMarker *data = NULL; 13602b2c8c5dSFam Zheng uLongf buf_len; 136137b1d7d8SKevin Wolf QEMUIOVector local_qiov; 136237b1d7d8SKevin Wolf struct iovec iov; 13635e82a31eSFam Zheng int64_t write_offset; 13645e82a31eSFam Zheng int64_t write_end_sector; 1365dd3f6ee2SFam Zheng 13662b2c8c5dSFam Zheng if (extent->compressed) { 136737b1d7d8SKevin Wolf void *compressed_data; 136837b1d7d8SKevin Wolf 13692b2c8c5dSFam Zheng if (!extent->has_marker) { 13702b2c8c5dSFam Zheng ret = -EINVAL; 13712b2c8c5dSFam Zheng goto out; 13722b2c8c5dSFam Zheng } 13732b2c8c5dSFam Zheng buf_len = (extent->cluster_sectors << 9) * 2; 13742b2c8c5dSFam Zheng data = g_malloc(buf_len + sizeof(VmdkGrainMarker)); 137537b1d7d8SKevin Wolf 137637b1d7d8SKevin Wolf compressed_data = g_malloc(n_bytes); 137737b1d7d8SKevin Wolf qemu_iovec_to_buf(qiov, qiov_offset, compressed_data, n_bytes); 137837b1d7d8SKevin Wolf ret = compress(data->data, &buf_len, compressed_data, n_bytes); 137937b1d7d8SKevin Wolf g_free(compressed_data); 138037b1d7d8SKevin Wolf 138137b1d7d8SKevin Wolf if (ret != Z_OK || buf_len == 0) { 13822b2c8c5dSFam Zheng ret = -EINVAL; 13832b2c8c5dSFam Zheng goto out; 13842b2c8c5dSFam Zheng } 13855e82a31eSFam Zheng 13864545d4f4SQingFeng Hao data->lba = cpu_to_le64(offset >> BDRV_SECTOR_BITS); 13874545d4f4SQingFeng Hao data->size = cpu_to_le32(buf_len); 138837b1d7d8SKevin Wolf 138937b1d7d8SKevin Wolf n_bytes = buf_len + sizeof(VmdkGrainMarker); 139037b1d7d8SKevin Wolf iov = (struct iovec) { 139137b1d7d8SKevin Wolf .iov_base = data, 139237b1d7d8SKevin Wolf .iov_len = n_bytes, 139337b1d7d8SKevin Wolf }; 139437b1d7d8SKevin Wolf qemu_iovec_init_external(&local_qiov, &iov, 1); 139523c4b2a8SMax Reitz 139623c4b2a8SMax Reitz BLKDBG_EVENT(extent->file, BLKDBG_WRITE_COMPRESSED); 139737b1d7d8SKevin Wolf } else { 139837b1d7d8SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov); 139937b1d7d8SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, qiov_offset, n_bytes); 140023c4b2a8SMax Reitz 140123c4b2a8SMax Reitz BLKDBG_EVENT(extent->file, BLKDBG_WRITE_AIO); 140237b1d7d8SKevin Wolf } 140337b1d7d8SKevin Wolf 14043c363575SMax Reitz write_offset = cluster_offset + offset_in_cluster; 1405a03ef88fSKevin Wolf ret = bdrv_co_pwritev(extent->file, write_offset, n_bytes, 140637b1d7d8SKevin Wolf &local_qiov, 0); 140737b1d7d8SKevin Wolf 140837b1d7d8SKevin Wolf write_end_sector = DIV_ROUND_UP(write_offset + n_bytes, BDRV_SECTOR_SIZE); 14095e82a31eSFam Zheng 14103efffc32SRadoslav Gerganov if (extent->compressed) { 14113efffc32SRadoslav Gerganov extent->next_cluster_sector = write_end_sector; 14123efffc32SRadoslav Gerganov } else { 14135e82a31eSFam Zheng extent->next_cluster_sector = MAX(extent->next_cluster_sector, 14145e82a31eSFam Zheng write_end_sector); 14153efffc32SRadoslav Gerganov } 14165e82a31eSFam Zheng 141737b1d7d8SKevin Wolf if (ret < 0) { 1418dd3f6ee2SFam Zheng goto out; 1419dd3f6ee2SFam Zheng } 1420dd3f6ee2SFam Zheng ret = 0; 1421dd3f6ee2SFam Zheng out: 14222b2c8c5dSFam Zheng g_free(data); 142337b1d7d8SKevin Wolf if (!extent->compressed) { 142437b1d7d8SKevin Wolf qemu_iovec_destroy(&local_qiov); 142537b1d7d8SKevin Wolf } 1426dd3f6ee2SFam Zheng return ret; 1427dd3f6ee2SFam Zheng } 1428dd3f6ee2SFam Zheng 1429dd3f6ee2SFam Zheng static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset, 1430f10cc243SKevin Wolf int64_t offset_in_cluster, QEMUIOVector *qiov, 1431f10cc243SKevin Wolf int bytes) 1432dd3f6ee2SFam Zheng { 1433dd3f6ee2SFam Zheng int ret; 14342b2c8c5dSFam Zheng int cluster_bytes, buf_bytes; 14352b2c8c5dSFam Zheng uint8_t *cluster_buf, *compressed_data; 14362b2c8c5dSFam Zheng uint8_t *uncomp_buf; 14372b2c8c5dSFam Zheng uint32_t data_len; 14382b2c8c5dSFam Zheng VmdkGrainMarker *marker; 14392b2c8c5dSFam Zheng uLongf buf_len; 1440dd3f6ee2SFam Zheng 14412b2c8c5dSFam Zheng 14422b2c8c5dSFam Zheng if (!extent->compressed) { 144323c4b2a8SMax Reitz BLKDBG_EVENT(extent->file, BLKDBG_READ_AIO); 1444a03ef88fSKevin Wolf ret = bdrv_co_preadv(extent->file, 1445f10cc243SKevin Wolf cluster_offset + offset_in_cluster, bytes, 1446f10cc243SKevin Wolf qiov, 0); 1447f10cc243SKevin Wolf if (ret < 0) { 1448f10cc243SKevin Wolf return ret; 1449dd3f6ee2SFam Zheng } 1450f10cc243SKevin Wolf return 0; 1451dd3f6ee2SFam Zheng } 14522b2c8c5dSFam Zheng cluster_bytes = extent->cluster_sectors * 512; 14532b2c8c5dSFam Zheng /* Read two clusters in case GrainMarker + compressed data > one cluster */ 14542b2c8c5dSFam Zheng buf_bytes = cluster_bytes * 2; 14552b2c8c5dSFam Zheng cluster_buf = g_malloc(buf_bytes); 14562b2c8c5dSFam Zheng uncomp_buf = g_malloc(cluster_bytes); 145723c4b2a8SMax Reitz BLKDBG_EVENT(extent->file, BLKDBG_READ_COMPRESSED); 1458cf2ab8fcSKevin Wolf ret = bdrv_pread(extent->file, 14592b2c8c5dSFam Zheng cluster_offset, 14602b2c8c5dSFam Zheng cluster_buf, buf_bytes); 14612b2c8c5dSFam Zheng if (ret < 0) { 14622b2c8c5dSFam Zheng goto out; 14632b2c8c5dSFam Zheng } 14642b2c8c5dSFam Zheng compressed_data = cluster_buf; 14652b2c8c5dSFam Zheng buf_len = cluster_bytes; 14662b2c8c5dSFam Zheng data_len = cluster_bytes; 14672b2c8c5dSFam Zheng if (extent->has_marker) { 14682b2c8c5dSFam Zheng marker = (VmdkGrainMarker *)cluster_buf; 14692b2c8c5dSFam Zheng compressed_data = marker->data; 14702b2c8c5dSFam Zheng data_len = le32_to_cpu(marker->size); 14712b2c8c5dSFam Zheng } 14722b2c8c5dSFam Zheng if (!data_len || data_len > buf_bytes) { 14732b2c8c5dSFam Zheng ret = -EINVAL; 14742b2c8c5dSFam Zheng goto out; 14752b2c8c5dSFam Zheng } 14762b2c8c5dSFam Zheng ret = uncompress(uncomp_buf, &buf_len, compressed_data, data_len); 14772b2c8c5dSFam Zheng if (ret != Z_OK) { 14782b2c8c5dSFam Zheng ret = -EINVAL; 14792b2c8c5dSFam Zheng goto out; 14802b2c8c5dSFam Zheng 14812b2c8c5dSFam Zheng } 14822b2c8c5dSFam Zheng if (offset_in_cluster < 0 || 1483f10cc243SKevin Wolf offset_in_cluster + bytes > buf_len) { 14842b2c8c5dSFam Zheng ret = -EINVAL; 14852b2c8c5dSFam Zheng goto out; 14862b2c8c5dSFam Zheng } 1487f10cc243SKevin Wolf qemu_iovec_from_buf(qiov, 0, uncomp_buf + offset_in_cluster, bytes); 14882b2c8c5dSFam Zheng ret = 0; 14892b2c8c5dSFam Zheng 14902b2c8c5dSFam Zheng out: 14912b2c8c5dSFam Zheng g_free(uncomp_buf); 14922b2c8c5dSFam Zheng g_free(cluster_buf); 14932b2c8c5dSFam Zheng return ret; 14942b2c8c5dSFam Zheng } 1495dd3f6ee2SFam Zheng 1496f10cc243SKevin Wolf static int coroutine_fn 1497f10cc243SKevin Wolf vmdk_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, 1498f10cc243SKevin Wolf QEMUIOVector *qiov, int flags) 1499019d6b8fSAnthony Liguori { 1500019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque; 1501b3976d3cSFam Zheng int ret; 1502f10cc243SKevin Wolf uint64_t n_bytes, offset_in_cluster; 1503b3976d3cSFam Zheng VmdkExtent *extent = NULL; 1504f10cc243SKevin Wolf QEMUIOVector local_qiov; 1505019d6b8fSAnthony Liguori uint64_t cluster_offset; 1506f10cc243SKevin Wolf uint64_t bytes_done = 0; 1507019d6b8fSAnthony Liguori 1508f10cc243SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov); 1509f10cc243SKevin Wolf qemu_co_mutex_lock(&s->lock); 1510f10cc243SKevin Wolf 1511f10cc243SKevin Wolf while (bytes > 0) { 1512f10cc243SKevin Wolf extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent); 1513b3976d3cSFam Zheng if (!extent) { 1514f10cc243SKevin Wolf ret = -EIO; 1515f10cc243SKevin Wolf goto fail; 1516b3976d3cSFam Zheng } 1517c6ac36e1SFam Zheng ret = get_cluster_offset(bs, extent, NULL, 1518f10cc243SKevin Wolf offset, false, &cluster_offset, 0, 0); 1519f10cc243SKevin Wolf offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset); 1520f10cc243SKevin Wolf 1521f10cc243SKevin Wolf n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE 1522f10cc243SKevin Wolf - offset_in_cluster); 1523f10cc243SKevin Wolf 152414ead646SFam Zheng if (ret != VMDK_OK) { 152591b85bd3SFam Zheng /* if not allocated, try to read from parent image, if exist */ 1526760e0063SKevin Wolf if (bs->backing && ret != VMDK_ZEROED) { 1527ae261c86SFam Zheng if (!vmdk_is_cid_valid(bs)) { 1528f10cc243SKevin Wolf ret = -EINVAL; 1529f10cc243SKevin Wolf goto fail; 1530019d6b8fSAnthony Liguori } 1531019d6b8fSAnthony Liguori 1532f10cc243SKevin Wolf qemu_iovec_reset(&local_qiov); 1533f10cc243SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes); 1534f10cc243SKevin Wolf 153523c4b2a8SMax Reitz /* qcow2 emits this on bs->file instead of bs->backing */ 153623c4b2a8SMax Reitz BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); 1537a03ef88fSKevin Wolf ret = bdrv_co_preadv(bs->backing, offset, n_bytes, 1538f10cc243SKevin Wolf &local_qiov, 0); 1539f10cc243SKevin Wolf if (ret < 0) { 1540f10cc243SKevin Wolf goto fail; 1541f10cc243SKevin Wolf } 1542f10cc243SKevin Wolf } else { 1543f10cc243SKevin Wolf qemu_iovec_memset(qiov, bytes_done, 0, n_bytes); 1544f10cc243SKevin Wolf } 1545f10cc243SKevin Wolf } else { 1546f10cc243SKevin Wolf qemu_iovec_reset(&local_qiov); 1547f10cc243SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes); 1548f10cc243SKevin Wolf 1549f10cc243SKevin Wolf ret = vmdk_read_extent(extent, cluster_offset, offset_in_cluster, 1550f10cc243SKevin Wolf &local_qiov, n_bytes); 1551f10cc243SKevin Wolf if (ret) { 1552f10cc243SKevin Wolf goto fail; 1553f10cc243SKevin Wolf } 1554f10cc243SKevin Wolf } 1555f10cc243SKevin Wolf bytes -= n_bytes; 1556f10cc243SKevin Wolf offset += n_bytes; 1557f10cc243SKevin Wolf bytes_done += n_bytes; 1558f10cc243SKevin Wolf } 1559f10cc243SKevin Wolf 1560f10cc243SKevin Wolf ret = 0; 1561f10cc243SKevin Wolf fail: 15622914caa0SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 1563f10cc243SKevin Wolf qemu_iovec_destroy(&local_qiov); 1564f10cc243SKevin Wolf 15652914caa0SPaolo Bonzini return ret; 15662914caa0SPaolo Bonzini } 15672914caa0SPaolo Bonzini 1568cdeaf1f1SFam Zheng /** 1569cdeaf1f1SFam Zheng * vmdk_write: 1570cdeaf1f1SFam Zheng * @zeroed: buf is ignored (data is zero), use zeroed_grain GTE feature 1571cdeaf1f1SFam Zheng * if possible, otherwise return -ENOTSUP. 15728e507243SFam Zheng * @zero_dry_run: used for zeroed == true only, don't update L2 table, just try 15738e507243SFam Zheng * with each cluster. By dry run we can find if the zero write 15748e507243SFam Zheng * is possible without modifying image data. 1575cdeaf1f1SFam Zheng * 1576cdeaf1f1SFam Zheng * Returns: error code with 0 for success. 1577cdeaf1f1SFam Zheng */ 157837b1d7d8SKevin Wolf static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, 157937b1d7d8SKevin Wolf uint64_t bytes, QEMUIOVector *qiov, 1580cdeaf1f1SFam Zheng bool zeroed, bool zero_dry_run) 1581019d6b8fSAnthony Liguori { 1582019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque; 1583b3976d3cSFam Zheng VmdkExtent *extent = NULL; 1584585ea0c8SFam Zheng int ret; 158537b1d7d8SKevin Wolf int64_t offset_in_cluster, n_bytes; 1586019d6b8fSAnthony Liguori uint64_t cluster_offset; 158737b1d7d8SKevin Wolf uint64_t bytes_done = 0; 1588b3976d3cSFam Zheng VmdkMetaData m_data; 1589019d6b8fSAnthony Liguori 159037b1d7d8SKevin Wolf if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) { 159137b1d7d8SKevin Wolf error_report("Wrong offset: offset=0x%" PRIx64 15929af9e0feSMarkus Armbruster " total_sectors=0x%" PRIx64, 159337b1d7d8SKevin Wolf offset, bs->total_sectors); 15947fa60fa3SFam Zheng return -EIO; 1595019d6b8fSAnthony Liguori } 1596019d6b8fSAnthony Liguori 159737b1d7d8SKevin Wolf while (bytes > 0) { 159837b1d7d8SKevin Wolf extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent); 1599b3976d3cSFam Zheng if (!extent) { 1600b3976d3cSFam Zheng return -EIO; 1601b3976d3cSFam Zheng } 160237b1d7d8SKevin Wolf offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset); 160337b1d7d8SKevin Wolf n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE 160437b1d7d8SKevin Wolf - offset_in_cluster); 160537b1d7d8SKevin Wolf 160637b1d7d8SKevin Wolf ret = get_cluster_offset(bs, extent, &m_data, offset, 1607c6ac36e1SFam Zheng !(extent->compressed || zeroed), 160837b1d7d8SKevin Wolf &cluster_offset, offset_in_cluster, 160937b1d7d8SKevin Wolf offset_in_cluster + n_bytes); 16102b2c8c5dSFam Zheng if (extent->compressed) { 161165f74725SFam Zheng if (ret == VMDK_OK) { 16122b2c8c5dSFam Zheng /* Refuse write to allocated cluster for streamOptimized */ 16134823970bSFam Zheng error_report("Could not write to allocated cluster" 16144823970bSFam Zheng " for streamOptimized"); 16152b2c8c5dSFam Zheng return -EIO; 16162b2c8c5dSFam Zheng } else { 16172b2c8c5dSFam Zheng /* allocate */ 161837b1d7d8SKevin Wolf ret = get_cluster_offset(bs, extent, &m_data, offset, 1619c6ac36e1SFam Zheng true, &cluster_offset, 0, 0); 16202b2c8c5dSFam Zheng } 16212b2c8c5dSFam Zheng } 1622cdeaf1f1SFam Zheng if (ret == VMDK_ERROR) { 162391b85bd3SFam Zheng return -EINVAL; 1624b3976d3cSFam Zheng } 1625cdeaf1f1SFam Zheng if (zeroed) { 1626cdeaf1f1SFam Zheng /* Do zeroed write, buf is ignored */ 1627cdeaf1f1SFam Zheng if (extent->has_zero_grain && 162837b1d7d8SKevin Wolf offset_in_cluster == 0 && 162937b1d7d8SKevin Wolf n_bytes >= extent->cluster_sectors * BDRV_SECTOR_SIZE) { 163037b1d7d8SKevin Wolf n_bytes = extent->cluster_sectors * BDRV_SECTOR_SIZE; 1631cdeaf1f1SFam Zheng if (!zero_dry_run) { 1632cdeaf1f1SFam Zheng /* update L2 tables */ 1633c6ac36e1SFam Zheng if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED) 1634c6ac36e1SFam Zheng != VMDK_OK) { 1635cdeaf1f1SFam Zheng return -EIO; 1636cdeaf1f1SFam Zheng } 1637cdeaf1f1SFam Zheng } 1638cdeaf1f1SFam Zheng } else { 1639cdeaf1f1SFam Zheng return -ENOTSUP; 1640cdeaf1f1SFam Zheng } 1641cdeaf1f1SFam Zheng } else { 164237b1d7d8SKevin Wolf ret = vmdk_write_extent(extent, cluster_offset, offset_in_cluster, 164337b1d7d8SKevin Wolf qiov, bytes_done, n_bytes, offset); 1644dd3f6ee2SFam Zheng if (ret) { 16457fa60fa3SFam Zheng return ret; 1646b3976d3cSFam Zheng } 1647019d6b8fSAnthony Liguori if (m_data.valid) { 1648019d6b8fSAnthony Liguori /* update L2 tables */ 1649c6ac36e1SFam Zheng if (vmdk_L2update(extent, &m_data, 1650c6ac36e1SFam Zheng cluster_offset >> BDRV_SECTOR_BITS) 1651c6ac36e1SFam Zheng != VMDK_OK) { 16527fa60fa3SFam Zheng return -EIO; 1653019d6b8fSAnthony Liguori } 1654b3976d3cSFam Zheng } 1655cdeaf1f1SFam Zheng } 165637b1d7d8SKevin Wolf bytes -= n_bytes; 165737b1d7d8SKevin Wolf offset += n_bytes; 165837b1d7d8SKevin Wolf bytes_done += n_bytes; 1659019d6b8fSAnthony Liguori 1660ae261c86SFam Zheng /* update CID on the first write every time the virtual disk is 1661ae261c86SFam Zheng * opened */ 166269b4d86dSFam Zheng if (!s->cid_updated) { 1663e5dc64b8SFam Zheng ret = vmdk_write_cid(bs, g_random_int()); 166499f1835dSKevin Wolf if (ret < 0) { 166599f1835dSKevin Wolf return ret; 166699f1835dSKevin Wolf } 166769b4d86dSFam Zheng s->cid_updated = true; 1668019d6b8fSAnthony Liguori } 1669019d6b8fSAnthony Liguori } 1670019d6b8fSAnthony Liguori return 0; 1671019d6b8fSAnthony Liguori } 1672019d6b8fSAnthony Liguori 167337b1d7d8SKevin Wolf static int coroutine_fn 167437b1d7d8SKevin Wolf vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, 167537b1d7d8SKevin Wolf QEMUIOVector *qiov, int flags) 1676e183ef75SPaolo Bonzini { 1677e183ef75SPaolo Bonzini int ret; 1678e183ef75SPaolo Bonzini BDRVVmdkState *s = bs->opaque; 1679e183ef75SPaolo Bonzini qemu_co_mutex_lock(&s->lock); 168037b1d7d8SKevin Wolf ret = vmdk_pwritev(bs, offset, bytes, qiov, false, false); 1681cdeaf1f1SFam Zheng qemu_co_mutex_unlock(&s->lock); 1682cdeaf1f1SFam Zheng return ret; 1683cdeaf1f1SFam Zheng } 1684cdeaf1f1SFam Zheng 1685b2c622d3SPavel Butsykin static int coroutine_fn 1686b2c622d3SPavel Butsykin vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, 1687b2c622d3SPavel Butsykin uint64_t bytes, QEMUIOVector *qiov) 168837b1d7d8SKevin Wolf { 1689b2c622d3SPavel Butsykin return vmdk_co_pwritev(bs, offset, bytes, qiov, 0); 1690ba0ad89eSFam Zheng } 1691ba0ad89eSFam Zheng 1692a620f2aeSEric Blake static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs, 1693a620f2aeSEric Blake int64_t offset, 1694a620f2aeSEric Blake int bytes, 1695aa7bfbffSPeter Lieven BdrvRequestFlags flags) 1696cdeaf1f1SFam Zheng { 1697cdeaf1f1SFam Zheng int ret; 1698cdeaf1f1SFam Zheng BDRVVmdkState *s = bs->opaque; 169937b1d7d8SKevin Wolf 1700cdeaf1f1SFam Zheng qemu_co_mutex_lock(&s->lock); 17018e507243SFam Zheng /* write zeroes could fail if sectors not aligned to cluster, test it with 17028e507243SFam Zheng * dry_run == true before really updating image */ 170337b1d7d8SKevin Wolf ret = vmdk_pwritev(bs, offset, bytes, NULL, true, true); 1704cdeaf1f1SFam Zheng if (!ret) { 170537b1d7d8SKevin Wolf ret = vmdk_pwritev(bs, offset, bytes, NULL, true, false); 1706cdeaf1f1SFam Zheng } 1707e183ef75SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 1708e183ef75SPaolo Bonzini return ret; 1709e183ef75SPaolo Bonzini } 1710e183ef75SPaolo Bonzini 17116c031aacSFam Zheng static int vmdk_create_extent(const char *filename, int64_t filesize, 1712917703c1SFam Zheng bool flat, bool compress, bool zeroed_grain, 17134ab15590SChunyan Liu QemuOpts *opts, Error **errp) 1714019d6b8fSAnthony Liguori { 1715f66fd6c3SFam Zheng int ret, i; 1716c4bea169SKevin Wolf BlockBackend *blk = NULL; 1717019d6b8fSAnthony Liguori VMDK4Header header; 1718c13959c7SFam Zheng Error *local_err = NULL; 1719917703c1SFam Zheng uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count; 1720917703c1SFam Zheng uint32_t *gd_buf = NULL; 1721917703c1SFam Zheng int gd_buf_size; 17220e7e1989SKevin Wolf 17234ab15590SChunyan Liu ret = bdrv_create_file(filename, opts, &local_err); 1724f66fd6c3SFam Zheng if (ret < 0) { 1725917703c1SFam Zheng error_propagate(errp, local_err); 1726917703c1SFam Zheng goto exit; 1727917703c1SFam Zheng } 1728917703c1SFam Zheng 1729efaa7c4eSMax Reitz blk = blk_new_open(filename, NULL, NULL, 173055880601SKevin Wolf BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, 173155880601SKevin Wolf &local_err); 1732c4bea169SKevin Wolf if (blk == NULL) { 1733917703c1SFam Zheng error_propagate(errp, local_err); 1734c4bea169SKevin Wolf ret = -EIO; 1735917703c1SFam Zheng goto exit; 1736917703c1SFam Zheng } 1737917703c1SFam Zheng 1738c4bea169SKevin Wolf blk_set_allow_write_beyond_eof(blk, true); 1739c4bea169SKevin Wolf 1740917703c1SFam Zheng if (flat) { 17413a691c50SMax Reitz ret = blk_truncate(blk, filesize, PREALLOC_MODE_OFF, errp); 1742f66fd6c3SFam Zheng goto exit; 1743f66fd6c3SFam Zheng } 1744019d6b8fSAnthony Liguori magic = cpu_to_be32(VMDK4_MAGIC); 1745019d6b8fSAnthony Liguori memset(&header, 0, sizeof(header)); 1746d62d9dc4SFam Zheng if (compress) { 1747d62d9dc4SFam Zheng header.version = 3; 1748d62d9dc4SFam Zheng } else if (zeroed_grain) { 1749d62d9dc4SFam Zheng header.version = 2; 1750d62d9dc4SFam Zheng } else { 1751d62d9dc4SFam Zheng header.version = 1; 1752d62d9dc4SFam Zheng } 175395b0aa42SFam Zheng header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT 175469e0b6dfSFam Zheng | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0) 175569e0b6dfSFam Zheng | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0); 17566c031aacSFam Zheng header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0; 1757917703c1SFam Zheng header.capacity = filesize / BDRV_SECTOR_SIZE; 175816372ff0SAlexander Graf header.granularity = 128; 1759917703c1SFam Zheng header.num_gtes_per_gt = BDRV_SECTOR_SIZE; 1760019d6b8fSAnthony Liguori 1761917703c1SFam Zheng grains = DIV_ROUND_UP(filesize / BDRV_SECTOR_SIZE, header.granularity); 1762917703c1SFam Zheng gt_size = DIV_ROUND_UP(header.num_gtes_per_gt * sizeof(uint32_t), 1763917703c1SFam Zheng BDRV_SECTOR_SIZE); 1764917703c1SFam Zheng gt_count = DIV_ROUND_UP(grains, header.num_gtes_per_gt); 1765917703c1SFam Zheng gd_sectors = DIV_ROUND_UP(gt_count * sizeof(uint32_t), BDRV_SECTOR_SIZE); 1766019d6b8fSAnthony Liguori 1767019d6b8fSAnthony Liguori header.desc_offset = 1; 1768019d6b8fSAnthony Liguori header.desc_size = 20; 1769019d6b8fSAnthony Liguori header.rgd_offset = header.desc_offset + header.desc_size; 1770917703c1SFam Zheng header.gd_offset = header.rgd_offset + gd_sectors + (gt_size * gt_count); 1771019d6b8fSAnthony Liguori header.grain_offset = 1772917703c1SFam Zheng ROUND_UP(header.gd_offset + gd_sectors + (gt_size * gt_count), 1773917703c1SFam Zheng header.granularity); 177416372ff0SAlexander Graf /* swap endianness for all header fields */ 177516372ff0SAlexander Graf header.version = cpu_to_le32(header.version); 177616372ff0SAlexander Graf header.flags = cpu_to_le32(header.flags); 177716372ff0SAlexander Graf header.capacity = cpu_to_le64(header.capacity); 177816372ff0SAlexander Graf header.granularity = cpu_to_le64(header.granularity); 1779ca8804ceSFam Zheng header.num_gtes_per_gt = cpu_to_le32(header.num_gtes_per_gt); 1780019d6b8fSAnthony Liguori header.desc_offset = cpu_to_le64(header.desc_offset); 1781019d6b8fSAnthony Liguori header.desc_size = cpu_to_le64(header.desc_size); 1782019d6b8fSAnthony Liguori header.rgd_offset = cpu_to_le64(header.rgd_offset); 1783019d6b8fSAnthony Liguori header.gd_offset = cpu_to_le64(header.gd_offset); 1784019d6b8fSAnthony Liguori header.grain_offset = cpu_to_le64(header.grain_offset); 17856c031aacSFam Zheng header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm); 1786019d6b8fSAnthony Liguori 1787019d6b8fSAnthony Liguori header.check_bytes[0] = 0xa; 1788019d6b8fSAnthony Liguori header.check_bytes[1] = 0x20; 1789019d6b8fSAnthony Liguori header.check_bytes[2] = 0xd; 1790019d6b8fSAnthony Liguori header.check_bytes[3] = 0xa; 1791019d6b8fSAnthony Liguori 1792019d6b8fSAnthony Liguori /* write all the data */ 17938341f00dSEric Blake ret = blk_pwrite(blk, 0, &magic, sizeof(magic), 0); 1794917703c1SFam Zheng if (ret < 0) { 1795c6bd8c70SMarkus Armbruster error_setg(errp, QERR_IO_ERROR); 17961640366cSKirill A. Shutemov goto exit; 17971640366cSKirill A. Shutemov } 17988341f00dSEric Blake ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header), 0); 1799917703c1SFam Zheng if (ret < 0) { 1800c6bd8c70SMarkus Armbruster error_setg(errp, QERR_IO_ERROR); 18011640366cSKirill A. Shutemov goto exit; 18021640366cSKirill A. Shutemov } 1803019d6b8fSAnthony Liguori 18043a691c50SMax Reitz ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, 18053a691c50SMax Reitz PREALLOC_MODE_OFF, errp); 18061640366cSKirill A. Shutemov if (ret < 0) { 18071640366cSKirill A. Shutemov goto exit; 18081640366cSKirill A. Shutemov } 1809019d6b8fSAnthony Liguori 1810019d6b8fSAnthony Liguori /* write grain directory */ 1811917703c1SFam Zheng gd_buf_size = gd_sectors * BDRV_SECTOR_SIZE; 1812917703c1SFam Zheng gd_buf = g_malloc0(gd_buf_size); 1813917703c1SFam Zheng for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_sectors; 18141640366cSKirill A. Shutemov i < gt_count; i++, tmp += gt_size) { 1815917703c1SFam Zheng gd_buf[i] = cpu_to_le32(tmp); 18161640366cSKirill A. Shutemov } 1817c4bea169SKevin Wolf ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE, 18188341f00dSEric Blake gd_buf, gd_buf_size, 0); 1819917703c1SFam Zheng if (ret < 0) { 1820c6bd8c70SMarkus Armbruster error_setg(errp, QERR_IO_ERROR); 1821917703c1SFam Zheng goto exit; 18221640366cSKirill A. Shutemov } 1823019d6b8fSAnthony Liguori 1824019d6b8fSAnthony Liguori /* write backup grain directory */ 1825917703c1SFam Zheng for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_sectors; 18261640366cSKirill A. Shutemov i < gt_count; i++, tmp += gt_size) { 1827917703c1SFam Zheng gd_buf[i] = cpu_to_le32(tmp); 18281640366cSKirill A. Shutemov } 1829c4bea169SKevin Wolf ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE, 18308341f00dSEric Blake gd_buf, gd_buf_size, 0); 1831917703c1SFam Zheng if (ret < 0) { 1832c6bd8c70SMarkus Armbruster error_setg(errp, QERR_IO_ERROR); 1833917703c1SFam Zheng goto exit; 18341640366cSKirill A. Shutemov } 1835019d6b8fSAnthony Liguori 1836f66fd6c3SFam Zheng ret = 0; 1837f66fd6c3SFam Zheng exit: 1838c4bea169SKevin Wolf if (blk) { 1839c4bea169SKevin Wolf blk_unref(blk); 1840917703c1SFam Zheng } 1841917703c1SFam Zheng g_free(gd_buf); 1842f66fd6c3SFam Zheng return ret; 1843f66fd6c3SFam Zheng } 1844019d6b8fSAnthony Liguori 1845f66fd6c3SFam Zheng static int filename_decompose(const char *filename, char *path, char *prefix, 18464823970bSFam Zheng char *postfix, size_t buf_len, Error **errp) 1847f66fd6c3SFam Zheng { 1848f66fd6c3SFam Zheng const char *p, *q; 1849f66fd6c3SFam Zheng 1850f66fd6c3SFam Zheng if (filename == NULL || !strlen(filename)) { 18514823970bSFam Zheng error_setg(errp, "No filename provided"); 185265f74725SFam Zheng return VMDK_ERROR; 1853f66fd6c3SFam Zheng } 1854f66fd6c3SFam Zheng p = strrchr(filename, '/'); 1855f66fd6c3SFam Zheng if (p == NULL) { 1856f66fd6c3SFam Zheng p = strrchr(filename, '\\'); 1857f66fd6c3SFam Zheng } 1858f66fd6c3SFam Zheng if (p == NULL) { 1859f66fd6c3SFam Zheng p = strrchr(filename, ':'); 1860f66fd6c3SFam Zheng } 1861f66fd6c3SFam Zheng if (p != NULL) { 1862f66fd6c3SFam Zheng p++; 1863f66fd6c3SFam Zheng if (p - filename >= buf_len) { 186465f74725SFam Zheng return VMDK_ERROR; 1865f66fd6c3SFam Zheng } 1866f66fd6c3SFam Zheng pstrcpy(path, p - filename + 1, filename); 1867f66fd6c3SFam Zheng } else { 1868f66fd6c3SFam Zheng p = filename; 1869f66fd6c3SFam Zheng path[0] = '\0'; 1870f66fd6c3SFam Zheng } 1871f66fd6c3SFam Zheng q = strrchr(p, '.'); 1872f66fd6c3SFam Zheng if (q == NULL) { 1873f66fd6c3SFam Zheng pstrcpy(prefix, buf_len, p); 1874f66fd6c3SFam Zheng postfix[0] = '\0'; 1875f66fd6c3SFam Zheng } else { 1876f66fd6c3SFam Zheng if (q - p >= buf_len) { 187765f74725SFam Zheng return VMDK_ERROR; 1878f66fd6c3SFam Zheng } 1879f66fd6c3SFam Zheng pstrcpy(prefix, q - p + 1, p); 1880f66fd6c3SFam Zheng pstrcpy(postfix, buf_len, q); 1881f66fd6c3SFam Zheng } 188265f74725SFam Zheng return VMDK_OK; 1883f66fd6c3SFam Zheng } 1884f66fd6c3SFam Zheng 1885*efc75e2aSStefan Hajnoczi static int coroutine_fn vmdk_co_create_opts(const char *filename, QemuOpts *opts, 1886*efc75e2aSStefan Hajnoczi Error **errp) 1887f66fd6c3SFam Zheng { 1888917703c1SFam Zheng int idx = 0; 1889c4bea169SKevin Wolf BlockBackend *new_blk = NULL; 1890c13959c7SFam Zheng Error *local_err = NULL; 1891af057fe7SFam Zheng char *desc = NULL; 1892f66fd6c3SFam Zheng int64_t total_size = 0, filesize; 18935820f1daSChunyan Liu char *adapter_type = NULL; 18945820f1daSChunyan Liu char *backing_file = NULL; 1895f249924eSJanne Karhunen char *hw_version = NULL; 18965820f1daSChunyan Liu char *fmt = NULL; 1897f66fd6c3SFam Zheng int ret = 0; 18986c031aacSFam Zheng bool flat, split, compress; 1899af057fe7SFam Zheng GString *ext_desc_lines; 1900fe206562SJeff Cody char *path = g_malloc0(PATH_MAX); 1901fe206562SJeff Cody char *prefix = g_malloc0(PATH_MAX); 1902fe206562SJeff Cody char *postfix = g_malloc0(PATH_MAX); 1903fe206562SJeff Cody char *desc_line = g_malloc0(BUF_SIZE); 1904fe206562SJeff Cody char *ext_filename = g_malloc0(PATH_MAX); 1905fe206562SJeff Cody char *desc_filename = g_malloc0(PATH_MAX); 1906f66fd6c3SFam Zheng const int64_t split_size = 0x80000000; /* VMDK has constant split size */ 1907f66fd6c3SFam Zheng const char *desc_extent_line; 1908fe206562SJeff Cody char *parent_desc_line = g_malloc0(BUF_SIZE); 1909f66fd6c3SFam Zheng uint32_t parent_cid = 0xffffffff; 19107f2039f6SOthmar Pasteka uint32_t number_heads = 16; 191169e0b6dfSFam Zheng bool zeroed_grain = false; 1912917703c1SFam Zheng uint32_t desc_offset = 0, desc_len; 1913f66fd6c3SFam Zheng const char desc_template[] = 1914f66fd6c3SFam Zheng "# Disk DescriptorFile\n" 1915f66fd6c3SFam Zheng "version=1\n" 19169b17031aSFam Zheng "CID=%" PRIx32 "\n" 19179b17031aSFam Zheng "parentCID=%" PRIx32 "\n" 1918f66fd6c3SFam Zheng "createType=\"%s\"\n" 1919f66fd6c3SFam Zheng "%s" 1920f66fd6c3SFam Zheng "\n" 1921f66fd6c3SFam Zheng "# Extent description\n" 1922f66fd6c3SFam Zheng "%s" 1923f66fd6c3SFam Zheng "\n" 1924f66fd6c3SFam Zheng "# The Disk Data Base\n" 1925f66fd6c3SFam Zheng "#DDB\n" 1926f66fd6c3SFam Zheng "\n" 1927f249924eSJanne Karhunen "ddb.virtualHWVersion = \"%s\"\n" 1928f66fd6c3SFam Zheng "ddb.geometry.cylinders = \"%" PRId64 "\"\n" 19294ab9dab5SFam Zheng "ddb.geometry.heads = \"%" PRIu32 "\"\n" 1930f66fd6c3SFam Zheng "ddb.geometry.sectors = \"63\"\n" 19317f2039f6SOthmar Pasteka "ddb.adapterType = \"%s\"\n"; 1932f66fd6c3SFam Zheng 1933af057fe7SFam Zheng ext_desc_lines = g_string_new(NULL); 1934af057fe7SFam Zheng 19354823970bSFam Zheng if (filename_decompose(filename, path, prefix, postfix, PATH_MAX, errp)) { 1936af057fe7SFam Zheng ret = -EINVAL; 1937af057fe7SFam Zheng goto exit; 1938f66fd6c3SFam Zheng } 1939f66fd6c3SFam Zheng /* Read out options */ 1940c2eb918eSHu Tao total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), 1941c2eb918eSHu Tao BDRV_SECTOR_SIZE); 19425820f1daSChunyan Liu adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE); 19435820f1daSChunyan Liu backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); 1944f249924eSJanne Karhunen hw_version = qemu_opt_get_del(opts, BLOCK_OPT_HWVERSION); 19455820f1daSChunyan Liu if (qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false)) { 1946f249924eSJanne Karhunen if (strcmp(hw_version, "undefined")) { 1947f249924eSJanne Karhunen error_setg(errp, 1948f249924eSJanne Karhunen "compat6 cannot be enabled with hwversion set"); 1949f249924eSJanne Karhunen ret = -EINVAL; 1950f249924eSJanne Karhunen goto exit; 1951f249924eSJanne Karhunen } 1952f249924eSJanne Karhunen g_free(hw_version); 1953f249924eSJanne Karhunen hw_version = g_strdup("6"); 1954f249924eSJanne Karhunen } 1955f249924eSJanne Karhunen if (strcmp(hw_version, "undefined") == 0) { 1956f249924eSJanne Karhunen g_free(hw_version); 1957f249924eSJanne Karhunen hw_version = g_strdup("4"); 1958f66fd6c3SFam Zheng } 19595820f1daSChunyan Liu fmt = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT); 19605820f1daSChunyan Liu if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ZEROED_GRAIN, false)) { 19615820f1daSChunyan Liu zeroed_grain = true; 1962f66fd6c3SFam Zheng } 19635820f1daSChunyan Liu 19647f2039f6SOthmar Pasteka if (!adapter_type) { 19655820f1daSChunyan Liu adapter_type = g_strdup("ide"); 19667f2039f6SOthmar Pasteka } else if (strcmp(adapter_type, "ide") && 19677f2039f6SOthmar Pasteka strcmp(adapter_type, "buslogic") && 19687f2039f6SOthmar Pasteka strcmp(adapter_type, "lsilogic") && 19697f2039f6SOthmar Pasteka strcmp(adapter_type, "legacyESX")) { 19704823970bSFam Zheng error_setg(errp, "Unknown adapter type: '%s'", adapter_type); 1971af057fe7SFam Zheng ret = -EINVAL; 1972af057fe7SFam Zheng goto exit; 19737f2039f6SOthmar Pasteka } 19747f2039f6SOthmar Pasteka if (strcmp(adapter_type, "ide") != 0) { 19757f2039f6SOthmar Pasteka /* that's the number of heads with which vmware operates when 19767f2039f6SOthmar Pasteka creating, exporting, etc. vmdk files with a non-ide adapter type */ 19777f2039f6SOthmar Pasteka number_heads = 255; 19787f2039f6SOthmar Pasteka } 1979f66fd6c3SFam Zheng if (!fmt) { 1980f66fd6c3SFam Zheng /* Default format to monolithicSparse */ 19815820f1daSChunyan Liu fmt = g_strdup("monolithicSparse"); 1982f66fd6c3SFam Zheng } else if (strcmp(fmt, "monolithicFlat") && 1983f66fd6c3SFam Zheng strcmp(fmt, "monolithicSparse") && 1984f66fd6c3SFam Zheng strcmp(fmt, "twoGbMaxExtentSparse") && 19856c031aacSFam Zheng strcmp(fmt, "twoGbMaxExtentFlat") && 19866c031aacSFam Zheng strcmp(fmt, "streamOptimized")) { 19874823970bSFam Zheng error_setg(errp, "Unknown subformat: '%s'", fmt); 1988af057fe7SFam Zheng ret = -EINVAL; 1989af057fe7SFam Zheng goto exit; 1990f66fd6c3SFam Zheng } 1991f66fd6c3SFam Zheng split = !(strcmp(fmt, "twoGbMaxExtentFlat") && 1992f66fd6c3SFam Zheng strcmp(fmt, "twoGbMaxExtentSparse")); 1993f66fd6c3SFam Zheng flat = !(strcmp(fmt, "monolithicFlat") && 1994f66fd6c3SFam Zheng strcmp(fmt, "twoGbMaxExtentFlat")); 19956c031aacSFam Zheng compress = !strcmp(fmt, "streamOptimized"); 1996f66fd6c3SFam Zheng if (flat) { 19974ab9dab5SFam Zheng desc_extent_line = "RW %" PRId64 " FLAT \"%s\" 0\n"; 1998f66fd6c3SFam Zheng } else { 19994ab9dab5SFam Zheng desc_extent_line = "RW %" PRId64 " SPARSE \"%s\"\n"; 2000f66fd6c3SFam Zheng } 2001f66fd6c3SFam Zheng if (flat && backing_file) { 20024823970bSFam Zheng error_setg(errp, "Flat image can't have backing file"); 2003af057fe7SFam Zheng ret = -ENOTSUP; 2004af057fe7SFam Zheng goto exit; 2005f66fd6c3SFam Zheng } 200652c8d629SFam Zheng if (flat && zeroed_grain) { 200752c8d629SFam Zheng error_setg(errp, "Flat image can't enable zeroed grain"); 2008af057fe7SFam Zheng ret = -ENOTSUP; 2009af057fe7SFam Zheng goto exit; 201052c8d629SFam Zheng } 2011f66fd6c3SFam Zheng if (backing_file) { 2012c4bea169SKevin Wolf BlockBackend *blk; 20131085daf9SMax Reitz char *full_backing = g_new0(char, PATH_MAX); 20141085daf9SMax Reitz bdrv_get_full_backing_filename_from_filename(filename, backing_file, 20151085daf9SMax Reitz full_backing, PATH_MAX, 20161085daf9SMax Reitz &local_err); 20171085daf9SMax Reitz if (local_err) { 20181085daf9SMax Reitz g_free(full_backing); 20191085daf9SMax Reitz error_propagate(errp, local_err); 20201085daf9SMax Reitz ret = -ENOENT; 20211085daf9SMax Reitz goto exit; 20221085daf9SMax Reitz } 2023c4bea169SKevin Wolf 2024efaa7c4eSMax Reitz blk = blk_new_open(full_backing, NULL, NULL, 202572e775c7SKevin Wolf BDRV_O_NO_BACKING, errp); 20261085daf9SMax Reitz g_free(full_backing); 2027c4bea169SKevin Wolf if (blk == NULL) { 2028c4bea169SKevin Wolf ret = -EIO; 2029af057fe7SFam Zheng goto exit; 2030f66fd6c3SFam Zheng } 2031c4bea169SKevin Wolf if (strcmp(blk_bs(blk)->drv->format_name, "vmdk")) { 2032c4bea169SKevin Wolf blk_unref(blk); 2033af057fe7SFam Zheng ret = -EINVAL; 2034af057fe7SFam Zheng goto exit; 2035f66fd6c3SFam Zheng } 20369877860eSPeter Maydell ret = vmdk_read_cid(blk_bs(blk), 0, &parent_cid); 2037c4bea169SKevin Wolf blk_unref(blk); 20389877860eSPeter Maydell if (ret) { 20399877860eSPeter Maydell goto exit; 20409877860eSPeter Maydell } 2041fe206562SJeff Cody snprintf(parent_desc_line, BUF_SIZE, 20428ed610a1SFam Zheng "parentFileNameHint=\"%s\"", backing_file); 2043f66fd6c3SFam Zheng } 2044f66fd6c3SFam Zheng 2045f66fd6c3SFam Zheng /* Create extents */ 2046f66fd6c3SFam Zheng filesize = total_size; 2047f66fd6c3SFam Zheng while (filesize > 0) { 2048f66fd6c3SFam Zheng int64_t size = filesize; 2049f66fd6c3SFam Zheng 2050f66fd6c3SFam Zheng if (split && size > split_size) { 2051f66fd6c3SFam Zheng size = split_size; 2052f66fd6c3SFam Zheng } 2053f66fd6c3SFam Zheng if (split) { 2054fe206562SJeff Cody snprintf(desc_filename, PATH_MAX, "%s-%c%03d%s", 2055f66fd6c3SFam Zheng prefix, flat ? 'f' : 's', ++idx, postfix); 2056f66fd6c3SFam Zheng } else if (flat) { 2057fe206562SJeff Cody snprintf(desc_filename, PATH_MAX, "%s-flat%s", prefix, postfix); 2058f66fd6c3SFam Zheng } else { 2059fe206562SJeff Cody snprintf(desc_filename, PATH_MAX, "%s%s", prefix, postfix); 2060f66fd6c3SFam Zheng } 2061fe206562SJeff Cody snprintf(ext_filename, PATH_MAX, "%s%s", path, desc_filename); 2062f66fd6c3SFam Zheng 206369e0b6dfSFam Zheng if (vmdk_create_extent(ext_filename, size, 20644ab15590SChunyan Liu flat, compress, zeroed_grain, opts, errp)) { 2065af057fe7SFam Zheng ret = -EINVAL; 2066af057fe7SFam Zheng goto exit; 2067f66fd6c3SFam Zheng } 2068f66fd6c3SFam Zheng filesize -= size; 2069f66fd6c3SFam Zheng 2070f66fd6c3SFam Zheng /* Format description line */ 2071fe206562SJeff Cody snprintf(desc_line, BUF_SIZE, 2072917703c1SFam Zheng desc_extent_line, size / BDRV_SECTOR_SIZE, desc_filename); 2073af057fe7SFam Zheng g_string_append(ext_desc_lines, desc_line); 2074f66fd6c3SFam Zheng } 2075f66fd6c3SFam Zheng /* generate descriptor file */ 2076af057fe7SFam Zheng desc = g_strdup_printf(desc_template, 2077e5dc64b8SFam Zheng g_random_int(), 2078f66fd6c3SFam Zheng parent_cid, 2079f66fd6c3SFam Zheng fmt, 2080f66fd6c3SFam Zheng parent_desc_line, 2081af057fe7SFam Zheng ext_desc_lines->str, 2082f249924eSJanne Karhunen hw_version, 2083917703c1SFam Zheng total_size / 2084917703c1SFam Zheng (int64_t)(63 * number_heads * BDRV_SECTOR_SIZE), 2085af057fe7SFam Zheng number_heads, 20867f2039f6SOthmar Pasteka adapter_type); 2087917703c1SFam Zheng desc_len = strlen(desc); 2088917703c1SFam Zheng /* the descriptor offset = 0x200 */ 2089917703c1SFam Zheng if (!split && !flat) { 2090917703c1SFam Zheng desc_offset = 0x200; 2091f66fd6c3SFam Zheng } else { 2092c282e1fdSChunyan Liu ret = bdrv_create_file(filename, opts, &local_err); 2093917703c1SFam Zheng if (ret < 0) { 2094c13959c7SFam Zheng error_propagate(errp, local_err); 2095af057fe7SFam Zheng goto exit; 2096f66fd6c3SFam Zheng } 2097f66fd6c3SFam Zheng } 2098c4bea169SKevin Wolf 2099efaa7c4eSMax Reitz new_blk = blk_new_open(filename, NULL, NULL, 210055880601SKevin Wolf BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, 210155880601SKevin Wolf &local_err); 2102c4bea169SKevin Wolf if (new_blk == NULL) { 2103c13959c7SFam Zheng error_propagate(errp, local_err); 2104c4bea169SKevin Wolf ret = -EIO; 2105917703c1SFam Zheng goto exit; 21061640366cSKirill A. Shutemov } 2107c4bea169SKevin Wolf 2108c4bea169SKevin Wolf blk_set_allow_write_beyond_eof(new_blk, true); 2109c4bea169SKevin Wolf 21108341f00dSEric Blake ret = blk_pwrite(new_blk, desc_offset, desc, desc_len, 0); 2111917703c1SFam Zheng if (ret < 0) { 2112917703c1SFam Zheng error_setg_errno(errp, -ret, "Could not write description"); 2113917703c1SFam Zheng goto exit; 2114917703c1SFam Zheng } 2115917703c1SFam Zheng /* bdrv_pwrite write padding zeros to align to sector, we don't need that 2116917703c1SFam Zheng * for description file */ 2117917703c1SFam Zheng if (desc_offset == 0) { 21183a691c50SMax Reitz ret = blk_truncate(new_blk, desc_len, PREALLOC_MODE_OFF, errp); 2119917703c1SFam Zheng } 2120af057fe7SFam Zheng exit: 2121c4bea169SKevin Wolf if (new_blk) { 2122c4bea169SKevin Wolf blk_unref(new_blk); 2123917703c1SFam Zheng } 21245820f1daSChunyan Liu g_free(adapter_type); 21255820f1daSChunyan Liu g_free(backing_file); 2126f249924eSJanne Karhunen g_free(hw_version); 21275820f1daSChunyan Liu g_free(fmt); 2128af057fe7SFam Zheng g_free(desc); 2129fe206562SJeff Cody g_free(path); 2130fe206562SJeff Cody g_free(prefix); 2131fe206562SJeff Cody g_free(postfix); 2132fe206562SJeff Cody g_free(desc_line); 2133fe206562SJeff Cody g_free(ext_filename); 2134fe206562SJeff Cody g_free(desc_filename); 2135fe206562SJeff Cody g_free(parent_desc_line); 2136af057fe7SFam Zheng g_string_free(ext_desc_lines, true); 21371640366cSKirill A. Shutemov return ret; 2138019d6b8fSAnthony Liguori } 2139019d6b8fSAnthony Liguori 2140019d6b8fSAnthony Liguori static void vmdk_close(BlockDriverState *bs) 2141019d6b8fSAnthony Liguori { 21422bc3166cSKevin Wolf BDRVVmdkState *s = bs->opaque; 21432bc3166cSKevin Wolf 2144b3976d3cSFam Zheng vmdk_free_extents(bs); 2145f4c129a3SFam Zheng g_free(s->create_type); 21462bc3166cSKevin Wolf 21472bc3166cSKevin Wolf migrate_del_blocker(s->migration_blocker); 21482bc3166cSKevin Wolf error_free(s->migration_blocker); 2149019d6b8fSAnthony Liguori } 2150019d6b8fSAnthony Liguori 21518b94ff85SPaolo Bonzini static coroutine_fn int vmdk_co_flush(BlockDriverState *bs) 2152019d6b8fSAnthony Liguori { 2153333c574dSFam Zheng BDRVVmdkState *s = bs->opaque; 215429cdb251SPaolo Bonzini int i, err; 215529cdb251SPaolo Bonzini int ret = 0; 2156333c574dSFam Zheng 2157333c574dSFam Zheng for (i = 0; i < s->num_extents; i++) { 215824bc15d1SKevin Wolf err = bdrv_co_flush(s->extents[i].file->bs); 2159333c574dSFam Zheng if (err < 0) { 2160333c574dSFam Zheng ret = err; 2161333c574dSFam Zheng } 2162333c574dSFam Zheng } 2163333c574dSFam Zheng return ret; 2164019d6b8fSAnthony Liguori } 2165019d6b8fSAnthony Liguori 21664a1d5e1fSFam Zheng static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs) 21674a1d5e1fSFam Zheng { 21684a1d5e1fSFam Zheng int i; 21694a1d5e1fSFam Zheng int64_t ret = 0; 21704a1d5e1fSFam Zheng int64_t r; 21714a1d5e1fSFam Zheng BDRVVmdkState *s = bs->opaque; 21724a1d5e1fSFam Zheng 21739a4f4c31SKevin Wolf ret = bdrv_get_allocated_file_size(bs->file->bs); 21744a1d5e1fSFam Zheng if (ret < 0) { 21754a1d5e1fSFam Zheng return ret; 21764a1d5e1fSFam Zheng } 21774a1d5e1fSFam Zheng for (i = 0; i < s->num_extents; i++) { 21789a4f4c31SKevin Wolf if (s->extents[i].file == bs->file) { 21794a1d5e1fSFam Zheng continue; 21804a1d5e1fSFam Zheng } 218124bc15d1SKevin Wolf r = bdrv_get_allocated_file_size(s->extents[i].file->bs); 21824a1d5e1fSFam Zheng if (r < 0) { 21834a1d5e1fSFam Zheng return r; 21844a1d5e1fSFam Zheng } 21854a1d5e1fSFam Zheng ret += r; 21864a1d5e1fSFam Zheng } 21874a1d5e1fSFam Zheng return ret; 21884a1d5e1fSFam Zheng } 21890e7e1989SKevin Wolf 2190da7a50f9SFam Zheng static int vmdk_has_zero_init(BlockDriverState *bs) 2191da7a50f9SFam Zheng { 2192da7a50f9SFam Zheng int i; 2193da7a50f9SFam Zheng BDRVVmdkState *s = bs->opaque; 2194da7a50f9SFam Zheng 2195da7a50f9SFam Zheng /* If has a flat extent and its underlying storage doesn't have zero init, 2196da7a50f9SFam Zheng * return 0. */ 2197da7a50f9SFam Zheng for (i = 0; i < s->num_extents; i++) { 2198da7a50f9SFam Zheng if (s->extents[i].flat) { 219924bc15d1SKevin Wolf if (!bdrv_has_zero_init(s->extents[i].file->bs)) { 2200da7a50f9SFam Zheng return 0; 2201da7a50f9SFam Zheng } 2202da7a50f9SFam Zheng } 2203da7a50f9SFam Zheng } 2204da7a50f9SFam Zheng return 1; 2205da7a50f9SFam Zheng } 2206da7a50f9SFam Zheng 2207f4c129a3SFam Zheng static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent) 2208f4c129a3SFam Zheng { 2209f4c129a3SFam Zheng ImageInfo *info = g_new0(ImageInfo, 1); 2210f4c129a3SFam Zheng 2211f4c129a3SFam Zheng *info = (ImageInfo){ 221224bc15d1SKevin Wolf .filename = g_strdup(extent->file->bs->filename), 2213f4c129a3SFam Zheng .format = g_strdup(extent->type), 2214f4c129a3SFam Zheng .virtual_size = extent->sectors * BDRV_SECTOR_SIZE, 2215f4c129a3SFam Zheng .compressed = extent->compressed, 2216f4c129a3SFam Zheng .has_compressed = extent->compressed, 2217f4c129a3SFam Zheng .cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE, 2218f4c129a3SFam Zheng .has_cluster_size = !extent->flat, 2219f4c129a3SFam Zheng }; 2220f4c129a3SFam Zheng 2221f4c129a3SFam Zheng return info; 2222f4c129a3SFam Zheng } 2223f4c129a3SFam Zheng 2224f43aa8e1SPeter Lieven static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result, 2225f43aa8e1SPeter Lieven BdrvCheckMode fix) 2226f43aa8e1SPeter Lieven { 2227f43aa8e1SPeter Lieven BDRVVmdkState *s = bs->opaque; 2228f43aa8e1SPeter Lieven VmdkExtent *extent = NULL; 2229f43aa8e1SPeter Lieven int64_t sector_num = 0; 223057322b78SMarkus Armbruster int64_t total_sectors = bdrv_nb_sectors(bs); 2231f43aa8e1SPeter Lieven int ret; 2232f43aa8e1SPeter Lieven uint64_t cluster_offset; 2233f43aa8e1SPeter Lieven 2234f43aa8e1SPeter Lieven if (fix) { 2235f43aa8e1SPeter Lieven return -ENOTSUP; 2236f43aa8e1SPeter Lieven } 2237f43aa8e1SPeter Lieven 2238f43aa8e1SPeter Lieven for (;;) { 2239f43aa8e1SPeter Lieven if (sector_num >= total_sectors) { 2240f43aa8e1SPeter Lieven return 0; 2241f43aa8e1SPeter Lieven } 2242f43aa8e1SPeter Lieven extent = find_extent(s, sector_num, extent); 2243f43aa8e1SPeter Lieven if (!extent) { 2244f43aa8e1SPeter Lieven fprintf(stderr, 2245f43aa8e1SPeter Lieven "ERROR: could not find extent for sector %" PRId64 "\n", 2246f43aa8e1SPeter Lieven sector_num); 22470e51b9b7SFam Zheng ret = -EINVAL; 2248f43aa8e1SPeter Lieven break; 2249f43aa8e1SPeter Lieven } 2250f43aa8e1SPeter Lieven ret = get_cluster_offset(bs, extent, NULL, 2251f43aa8e1SPeter Lieven sector_num << BDRV_SECTOR_BITS, 2252c6ac36e1SFam Zheng false, &cluster_offset, 0, 0); 2253f43aa8e1SPeter Lieven if (ret == VMDK_ERROR) { 2254f43aa8e1SPeter Lieven fprintf(stderr, 2255f43aa8e1SPeter Lieven "ERROR: could not get cluster_offset for sector %" 2256f43aa8e1SPeter Lieven PRId64 "\n", sector_num); 2257f43aa8e1SPeter Lieven break; 2258f43aa8e1SPeter Lieven } 22590e51b9b7SFam Zheng if (ret == VMDK_OK) { 22600e51b9b7SFam Zheng int64_t extent_len = bdrv_getlength(extent->file->bs); 22610e51b9b7SFam Zheng if (extent_len < 0) { 22620e51b9b7SFam Zheng fprintf(stderr, 22630e51b9b7SFam Zheng "ERROR: could not get extent file length for sector %" 22640e51b9b7SFam Zheng PRId64 "\n", sector_num); 22650e51b9b7SFam Zheng ret = extent_len; 22660e51b9b7SFam Zheng break; 22670e51b9b7SFam Zheng } 22680e51b9b7SFam Zheng if (cluster_offset >= extent_len) { 2269f43aa8e1SPeter Lieven fprintf(stderr, 2270f43aa8e1SPeter Lieven "ERROR: cluster offset for sector %" 2271f43aa8e1SPeter Lieven PRId64 " points after EOF\n", sector_num); 22720e51b9b7SFam Zheng ret = -EINVAL; 2273f43aa8e1SPeter Lieven break; 2274f43aa8e1SPeter Lieven } 22750e51b9b7SFam Zheng } 2276f43aa8e1SPeter Lieven sector_num += extent->cluster_sectors; 2277f43aa8e1SPeter Lieven } 2278f43aa8e1SPeter Lieven 2279f43aa8e1SPeter Lieven result->corruptions++; 22800e51b9b7SFam Zheng return ret; 2281f43aa8e1SPeter Lieven } 2282f43aa8e1SPeter Lieven 2283f4c129a3SFam Zheng static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs) 2284f4c129a3SFam Zheng { 2285f4c129a3SFam Zheng int i; 2286f4c129a3SFam Zheng BDRVVmdkState *s = bs->opaque; 2287f4c129a3SFam Zheng ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1); 2288f4c129a3SFam Zheng ImageInfoList **next; 2289f4c129a3SFam Zheng 2290f4c129a3SFam Zheng *spec_info = (ImageInfoSpecific){ 22916a8f9661SEric Blake .type = IMAGE_INFO_SPECIFIC_KIND_VMDK, 229232bafa8fSEric Blake .u = { 229332bafa8fSEric Blake .vmdk.data = g_new0(ImageInfoSpecificVmdk, 1), 2294f4c129a3SFam Zheng }, 2295f4c129a3SFam Zheng }; 2296f4c129a3SFam Zheng 229732bafa8fSEric Blake *spec_info->u.vmdk.data = (ImageInfoSpecificVmdk) { 2298f4c129a3SFam Zheng .create_type = g_strdup(s->create_type), 2299f4c129a3SFam Zheng .cid = s->cid, 2300f4c129a3SFam Zheng .parent_cid = s->parent_cid, 2301f4c129a3SFam Zheng }; 2302f4c129a3SFam Zheng 230332bafa8fSEric Blake next = &spec_info->u.vmdk.data->extents; 2304f4c129a3SFam Zheng for (i = 0; i < s->num_extents; i++) { 2305f4c129a3SFam Zheng *next = g_new0(ImageInfoList, 1); 2306f4c129a3SFam Zheng (*next)->value = vmdk_get_extent_info(&s->extents[i]); 2307f4c129a3SFam Zheng (*next)->next = NULL; 2308f4c129a3SFam Zheng next = &(*next)->next; 2309f4c129a3SFam Zheng } 2310f4c129a3SFam Zheng 2311f4c129a3SFam Zheng return spec_info; 2312f4c129a3SFam Zheng } 2313f4c129a3SFam Zheng 23145f583307SFam Zheng static bool vmdk_extents_type_eq(const VmdkExtent *a, const VmdkExtent *b) 23155f583307SFam Zheng { 23165f583307SFam Zheng return a->flat == b->flat && 23175f583307SFam Zheng a->compressed == b->compressed && 23185f583307SFam Zheng (a->flat || a->cluster_sectors == b->cluster_sectors); 23195f583307SFam Zheng } 23205f583307SFam Zheng 232174fe188cSFam Zheng static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 232274fe188cSFam Zheng { 232374fe188cSFam Zheng int i; 232474fe188cSFam Zheng BDRVVmdkState *s = bs->opaque; 232574fe188cSFam Zheng assert(s->num_extents); 23265f583307SFam Zheng 23275f583307SFam Zheng /* See if we have multiple extents but they have different cases */ 23285f583307SFam Zheng for (i = 1; i < s->num_extents; i++) { 23295f583307SFam Zheng if (!vmdk_extents_type_eq(&s->extents[0], &s->extents[i])) { 23305f583307SFam Zheng return -ENOTSUP; 23315f583307SFam Zheng } 23325f583307SFam Zheng } 233374fe188cSFam Zheng bdi->needs_compressed_writes = s->extents[0].compressed; 233474fe188cSFam Zheng if (!s->extents[0].flat) { 233574fe188cSFam Zheng bdi->cluster_size = s->extents[0].cluster_sectors << BDRV_SECTOR_BITS; 233674fe188cSFam Zheng } 233774fe188cSFam Zheng return 0; 233874fe188cSFam Zheng } 233974fe188cSFam Zheng 23405820f1daSChunyan Liu static QemuOptsList vmdk_create_opts = { 23415820f1daSChunyan Liu .name = "vmdk-create-opts", 23425820f1daSChunyan Liu .head = QTAILQ_HEAD_INITIALIZER(vmdk_create_opts.head), 23435820f1daSChunyan Liu .desc = { 2344db08adf5SKevin Wolf { 2345db08adf5SKevin Wolf .name = BLOCK_OPT_SIZE, 23465820f1daSChunyan Liu .type = QEMU_OPT_SIZE, 2347db08adf5SKevin Wolf .help = "Virtual disk size" 2348db08adf5SKevin Wolf }, 2349db08adf5SKevin Wolf { 23507f2039f6SOthmar Pasteka .name = BLOCK_OPT_ADAPTER_TYPE, 23515820f1daSChunyan Liu .type = QEMU_OPT_STRING, 23527f2039f6SOthmar Pasteka .help = "Virtual adapter type, can be one of " 23537f2039f6SOthmar Pasteka "ide (default), lsilogic, buslogic or legacyESX" 23547f2039f6SOthmar Pasteka }, 23557f2039f6SOthmar Pasteka { 2356db08adf5SKevin Wolf .name = BLOCK_OPT_BACKING_FILE, 23575820f1daSChunyan Liu .type = QEMU_OPT_STRING, 2358db08adf5SKevin Wolf .help = "File name of a base image" 2359db08adf5SKevin Wolf }, 2360db08adf5SKevin Wolf { 2361db08adf5SKevin Wolf .name = BLOCK_OPT_COMPAT6, 23625820f1daSChunyan Liu .type = QEMU_OPT_BOOL, 23635820f1daSChunyan Liu .help = "VMDK version 6 image", 23645820f1daSChunyan Liu .def_value_str = "off" 2365db08adf5SKevin Wolf }, 2366f66fd6c3SFam Zheng { 2367f249924eSJanne Karhunen .name = BLOCK_OPT_HWVERSION, 2368f249924eSJanne Karhunen .type = QEMU_OPT_STRING, 2369f249924eSJanne Karhunen .help = "VMDK hardware version", 2370f249924eSJanne Karhunen .def_value_str = "undefined" 2371f249924eSJanne Karhunen }, 2372f249924eSJanne Karhunen { 2373f66fd6c3SFam Zheng .name = BLOCK_OPT_SUBFMT, 23745820f1daSChunyan Liu .type = QEMU_OPT_STRING, 2375f66fd6c3SFam Zheng .help = 2376f66fd6c3SFam Zheng "VMDK flat extent format, can be one of " 23776c031aacSFam Zheng "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} " 2378f66fd6c3SFam Zheng }, 237969e0b6dfSFam Zheng { 238069e0b6dfSFam Zheng .name = BLOCK_OPT_ZEROED_GRAIN, 23815820f1daSChunyan Liu .type = QEMU_OPT_BOOL, 23825820f1daSChunyan Liu .help = "Enable efficient zero writes " 23835820f1daSChunyan Liu "using the zeroed-grain GTE feature" 238469e0b6dfSFam Zheng }, 23855820f1daSChunyan Liu { /* end of list */ } 23865820f1daSChunyan Liu } 23870e7e1989SKevin Wolf }; 23880e7e1989SKevin Wolf 2389019d6b8fSAnthony Liguori static BlockDriver bdrv_vmdk = { 2390019d6b8fSAnthony Liguori .format_name = "vmdk", 2391019d6b8fSAnthony Liguori .instance_size = sizeof(BDRVVmdkState), 2392019d6b8fSAnthony Liguori .bdrv_probe = vmdk_probe, 23936511ef77SKevin Wolf .bdrv_open = vmdk_open, 2394f43aa8e1SPeter Lieven .bdrv_check = vmdk_check, 23953897575fSJeff Cody .bdrv_reopen_prepare = vmdk_reopen_prepare, 2396862f215fSKevin Wolf .bdrv_child_perm = bdrv_format_default_perms, 2397f10cc243SKevin Wolf .bdrv_co_preadv = vmdk_co_preadv, 239837b1d7d8SKevin Wolf .bdrv_co_pwritev = vmdk_co_pwritev, 2399b2c622d3SPavel Butsykin .bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed, 2400a620f2aeSEric Blake .bdrv_co_pwrite_zeroes = vmdk_co_pwrite_zeroes, 2401019d6b8fSAnthony Liguori .bdrv_close = vmdk_close, 2402*efc75e2aSStefan Hajnoczi .bdrv_co_create_opts = vmdk_co_create_opts, 2403c68b89acSKevin Wolf .bdrv_co_flush_to_disk = vmdk_co_flush, 2404c72080b9SEric Blake .bdrv_co_block_status = vmdk_co_block_status, 24054a1d5e1fSFam Zheng .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, 2406da7a50f9SFam Zheng .bdrv_has_zero_init = vmdk_has_zero_init, 2407f4c129a3SFam Zheng .bdrv_get_specific_info = vmdk_get_specific_info, 2408d34682cdSKevin Wolf .bdrv_refresh_limits = vmdk_refresh_limits, 240974fe188cSFam Zheng .bdrv_get_info = vmdk_get_info, 24100e7e1989SKevin Wolf 24118ee79e70SKevin Wolf .supports_backing = true, 24125820f1daSChunyan Liu .create_opts = &vmdk_create_opts, 2413019d6b8fSAnthony Liguori }; 2414019d6b8fSAnthony Liguori 2415019d6b8fSAnthony Liguori static void bdrv_vmdk_init(void) 2416019d6b8fSAnthony Liguori { 2417019d6b8fSAnthony Liguori bdrv_register(&bdrv_vmdk); 2418019d6b8fSAnthony Liguori } 2419019d6b8fSAnthony Liguori 2420019d6b8fSAnthony Liguori block_init(bdrv_vmdk_init); 2421