1019d6b8fSAnthony Liguori /*
2019d6b8fSAnthony Liguori * Block driver for the VMDK format
3019d6b8fSAnthony Liguori *
4019d6b8fSAnthony Liguori * Copyright (c) 2004 Fabrice Bellard
5019d6b8fSAnthony Liguori * Copyright (c) 2005 Filip Navara
6019d6b8fSAnthony Liguori *
7019d6b8fSAnthony Liguori * Permission is hereby granted, free of charge, to any person obtaining a copy
8019d6b8fSAnthony Liguori * of this software and associated documentation files (the "Software"), to deal
9019d6b8fSAnthony Liguori * in the Software without restriction, including without limitation the rights
10019d6b8fSAnthony Liguori * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11019d6b8fSAnthony Liguori * copies of the Software, and to permit persons to whom the Software is
12019d6b8fSAnthony Liguori * furnished to do so, subject to the following conditions:
13019d6b8fSAnthony Liguori *
14019d6b8fSAnthony Liguori * The above copyright notice and this permission notice shall be included in
15019d6b8fSAnthony Liguori * all copies or substantial portions of the Software.
16019d6b8fSAnthony Liguori *
17019d6b8fSAnthony Liguori * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18019d6b8fSAnthony Liguori * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19019d6b8fSAnthony Liguori * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20019d6b8fSAnthony Liguori * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21019d6b8fSAnthony Liguori * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22019d6b8fSAnthony Liguori * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23019d6b8fSAnthony Liguori * THE SOFTWARE.
24019d6b8fSAnthony Liguori */
25019d6b8fSAnthony Liguori
2680c71a24SPeter Maydell #include "qemu/osdep.h"
27da34e65cSMarkus Armbruster #include "qapi/error.h"
28737e150eSPaolo Bonzini #include "block/block_int.h"
29c4bea169SKevin Wolf #include "sysemu/block-backend.h"
30abc521a9SMax Reitz #include "qapi/qmp/qdict.h"
31d49b6836SMarkus Armbruster #include "qemu/error-report.h"
321de7afc9SPaolo Bonzini #include "qemu/module.h"
33922a01a0SMarkus Armbruster #include "qemu/option.h"
3458369e22SPaolo Bonzini #include "qemu/bswap.h"
355df022cfSPeter Maydell #include "qemu/memalign.h"
36795c40b8SJuan Quintela #include "migration/blocker.h"
37f348b6d1SVeronia Bahaa #include "qemu/cutils.h"
382923d34fSStefan Weil #include <zlib.h>
39019d6b8fSAnthony Liguori
40019d6b8fSAnthony Liguori #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
41019d6b8fSAnthony Liguori #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
42432bb170SFam Zheng #define VMDK4_COMPRESSION_DEFLATE 1
4395b0aa42SFam Zheng #define VMDK4_FLAG_NL_DETECT (1 << 0)
44bb45ded9SFam Zheng #define VMDK4_FLAG_RGD (1 << 1)
4514ead646SFam Zheng /* Zeroed-grain enable bit */
4614ead646SFam Zheng #define VMDK4_FLAG_ZERO_GRAIN (1 << 2)
47432bb170SFam Zheng #define VMDK4_FLAG_COMPRESS (1 << 16)
48432bb170SFam Zheng #define VMDK4_FLAG_MARKER (1 << 17)
4965bd155cSKevin Wolf #define VMDK4_GD_AT_END 0xffffffffffffffffULL
50019d6b8fSAnthony Liguori
51a77672eaSyuchenlin #define VMDK_EXTENT_MAX_SECTORS (1ULL << 32)
52a77672eaSyuchenlin
5314ead646SFam Zheng #define VMDK_GTE_ZEROED 0x1
5465f74725SFam Zheng
5565f74725SFam Zheng /* VMDK internal error codes */
5665f74725SFam Zheng #define VMDK_OK 0
5765f74725SFam Zheng #define VMDK_ERROR (-1)
5865f74725SFam Zheng /* Cluster not allocated */
5965f74725SFam Zheng #define VMDK_UNALLOC (-2)
6065f74725SFam Zheng #define VMDK_ZEROED (-3)
6165f74725SFam Zheng
6269e0b6dfSFam Zheng #define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain"
63f3d43dfdSThomas Weißschuh #define BLOCK_OPT_TOOLSVERSION "toolsversion"
6469e0b6dfSFam Zheng
65019d6b8fSAnthony Liguori typedef struct {
66019d6b8fSAnthony Liguori uint32_t version;
67019d6b8fSAnthony Liguori uint32_t flags;
68019d6b8fSAnthony Liguori uint32_t disk_sectors;
69019d6b8fSAnthony Liguori uint32_t granularity;
70019d6b8fSAnthony Liguori uint32_t l1dir_offset;
71019d6b8fSAnthony Liguori uint32_t l1dir_size;
72019d6b8fSAnthony Liguori uint32_t file_sectors;
73019d6b8fSAnthony Liguori uint32_t cylinders;
74019d6b8fSAnthony Liguori uint32_t heads;
75019d6b8fSAnthony Liguori uint32_t sectors_per_track;
765d8caa54SFam Zheng } QEMU_PACKED VMDK3Header;
77019d6b8fSAnthony Liguori
78019d6b8fSAnthony Liguori typedef struct {
79019d6b8fSAnthony Liguori uint32_t version;
80019d6b8fSAnthony Liguori uint32_t flags;
81e98768d4SFam Zheng uint64_t capacity;
82e98768d4SFam Zheng uint64_t granularity;
83e98768d4SFam Zheng uint64_t desc_offset;
84e98768d4SFam Zheng uint64_t desc_size;
85ca8804ceSFam Zheng /* Number of GrainTableEntries per GrainTable */
86ca8804ceSFam Zheng uint32_t num_gtes_per_gt;
87e98768d4SFam Zheng uint64_t rgd_offset;
88e98768d4SFam Zheng uint64_t gd_offset;
89e98768d4SFam Zheng uint64_t grain_offset;
90019d6b8fSAnthony Liguori char filler[1];
91019d6b8fSAnthony Liguori char check_bytes[4];
92432bb170SFam Zheng uint16_t compressAlgorithm;
93541dc0d4SStefan Weil } QEMU_PACKED VMDK4Header;
94019d6b8fSAnthony Liguori
9598eb9733SSam Eiderman typedef struct VMDKSESparseConstHeader {
9698eb9733SSam Eiderman uint64_t magic;
9798eb9733SSam Eiderman uint64_t version;
9898eb9733SSam Eiderman uint64_t capacity;
9998eb9733SSam Eiderman uint64_t grain_size;
10098eb9733SSam Eiderman uint64_t grain_table_size;
10198eb9733SSam Eiderman uint64_t flags;
10298eb9733SSam Eiderman uint64_t reserved1;
10398eb9733SSam Eiderman uint64_t reserved2;
10498eb9733SSam Eiderman uint64_t reserved3;
10598eb9733SSam Eiderman uint64_t reserved4;
10698eb9733SSam Eiderman uint64_t volatile_header_offset;
10798eb9733SSam Eiderman uint64_t volatile_header_size;
10898eb9733SSam Eiderman uint64_t journal_header_offset;
10998eb9733SSam Eiderman uint64_t journal_header_size;
11098eb9733SSam Eiderman uint64_t journal_offset;
11198eb9733SSam Eiderman uint64_t journal_size;
11298eb9733SSam Eiderman uint64_t grain_dir_offset;
11398eb9733SSam Eiderman uint64_t grain_dir_size;
11498eb9733SSam Eiderman uint64_t grain_tables_offset;
11598eb9733SSam Eiderman uint64_t grain_tables_size;
11698eb9733SSam Eiderman uint64_t free_bitmap_offset;
11798eb9733SSam Eiderman uint64_t free_bitmap_size;
11898eb9733SSam Eiderman uint64_t backmap_offset;
11998eb9733SSam Eiderman uint64_t backmap_size;
12098eb9733SSam Eiderman uint64_t grains_offset;
12198eb9733SSam Eiderman uint64_t grains_size;
12298eb9733SSam Eiderman uint8_t pad[304];
12398eb9733SSam Eiderman } QEMU_PACKED VMDKSESparseConstHeader;
12498eb9733SSam Eiderman
12598eb9733SSam Eiderman typedef struct VMDKSESparseVolatileHeader {
12698eb9733SSam Eiderman uint64_t magic;
12798eb9733SSam Eiderman uint64_t free_gt_number;
12898eb9733SSam Eiderman uint64_t next_txn_seq_number;
12998eb9733SSam Eiderman uint64_t replay_journal;
13098eb9733SSam Eiderman uint8_t pad[480];
13198eb9733SSam Eiderman } QEMU_PACKED VMDKSESparseVolatileHeader;
13298eb9733SSam Eiderman
133019d6b8fSAnthony Liguori #define L2_CACHE_SIZE 16
134019d6b8fSAnthony Liguori
135b3976d3cSFam Zheng typedef struct VmdkExtent {
13624bc15d1SKevin Wolf BdrvChild *file;
137b3976d3cSFam Zheng bool flat;
138432bb170SFam Zheng bool compressed;
139432bb170SFam Zheng bool has_marker;
14014ead646SFam Zheng bool has_zero_grain;
14198eb9733SSam Eiderman bool sesparse;
14298eb9733SSam Eiderman uint64_t sesparse_l2_tables_offset;
14398eb9733SSam Eiderman uint64_t sesparse_clusters_offset;
14498eb9733SSam Eiderman int32_t entry_size;
14514ead646SFam Zheng int version;
146b3976d3cSFam Zheng int64_t sectors;
147b3976d3cSFam Zheng int64_t end_sector;
1487fa60fa3SFam Zheng int64_t flat_start_offset;
149019d6b8fSAnthony Liguori int64_t l1_table_offset;
150019d6b8fSAnthony Liguori int64_t l1_backup_table_offset;
15198eb9733SSam Eiderman void *l1_table;
152019d6b8fSAnthony Liguori uint32_t *l1_backup_table;
153019d6b8fSAnthony Liguori unsigned int l1_size;
154019d6b8fSAnthony Liguori uint32_t l1_entry_sectors;
155019d6b8fSAnthony Liguori
156019d6b8fSAnthony Liguori unsigned int l2_size;
15798eb9733SSam Eiderman void *l2_cache;
158019d6b8fSAnthony Liguori uint32_t l2_cache_offsets[L2_CACHE_SIZE];
159019d6b8fSAnthony Liguori uint32_t l2_cache_counts[L2_CACHE_SIZE];
160019d6b8fSAnthony Liguori
161301c7d38SFam Zheng int64_t cluster_sectors;
162c6ac36e1SFam Zheng int64_t next_cluster_sector;
163f4c129a3SFam Zheng char *type;
164b3976d3cSFam Zheng } VmdkExtent;
165b3976d3cSFam Zheng
166b3976d3cSFam Zheng typedef struct BDRVVmdkState {
167848c66e8SPaolo Bonzini CoMutex lock;
168e98768d4SFam Zheng uint64_t desc_offset;
16969b4d86dSFam Zheng bool cid_updated;
170c338b6adSFam Zheng bool cid_checked;
171f4c129a3SFam Zheng uint32_t cid;
172019d6b8fSAnthony Liguori uint32_t parent_cid;
173b3976d3cSFam Zheng int num_extents;
174b3976d3cSFam Zheng /* Extent array with num_extents entries, ascend ordered by address */
175b3976d3cSFam Zheng VmdkExtent *extents;
1762bc3166cSKevin Wolf Error *migration_blocker;
177f4c129a3SFam Zheng char *create_type;
178019d6b8fSAnthony Liguori } BDRVVmdkState;
179019d6b8fSAnthony Liguori
1806d17e287SHanna Reitz typedef struct BDRVVmdkReopenState {
1816d17e287SHanna Reitz bool *extents_using_bs_file;
1826d17e287SHanna Reitz } BDRVVmdkReopenState;
1836d17e287SHanna Reitz
184019d6b8fSAnthony Liguori typedef struct VmdkMetaData {
185019d6b8fSAnthony Liguori unsigned int l1_index;
186019d6b8fSAnthony Liguori unsigned int l2_index;
187019d6b8fSAnthony Liguori unsigned int l2_offset;
1884dc20e64SKevin Wolf bool new_allocation;
189cdeaf1f1SFam Zheng uint32_t *l2_cache_entry;
190019d6b8fSAnthony Liguori } VmdkMetaData;
191019d6b8fSAnthony Liguori
192432bb170SFam Zheng typedef struct VmdkGrainMarker {
193432bb170SFam Zheng uint64_t lba;
194432bb170SFam Zheng uint32_t size;
195880a7817SPhilippe Mathieu-Daudé uint8_t data[];
1965d8caa54SFam Zheng } QEMU_PACKED VmdkGrainMarker;
197432bb170SFam Zheng
19865bd155cSKevin Wolf enum {
19965bd155cSKevin Wolf MARKER_END_OF_STREAM = 0,
20065bd155cSKevin Wolf MARKER_GRAIN_TABLE = 1,
20165bd155cSKevin Wolf MARKER_GRAIN_DIRECTORY = 2,
20265bd155cSKevin Wolf MARKER_FOOTER = 3,
20365bd155cSKevin Wolf };
20465bd155cSKevin Wolf
vmdk_probe(const uint8_t * buf,int buf_size,const char * filename)205019d6b8fSAnthony Liguori static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
206019d6b8fSAnthony Liguori {
207019d6b8fSAnthony Liguori uint32_t magic;
208019d6b8fSAnthony Liguori
209ae261c86SFam Zheng if (buf_size < 4) {
210019d6b8fSAnthony Liguori return 0;
211ae261c86SFam Zheng }
212019d6b8fSAnthony Liguori magic = be32_to_cpu(*(uint32_t *)buf);
213019d6b8fSAnthony Liguori if (magic == VMDK3_MAGIC ||
21401fc99d6SFam Zheng magic == VMDK4_MAGIC) {
215019d6b8fSAnthony Liguori return 100;
21601fc99d6SFam Zheng } else {
21701fc99d6SFam Zheng const char *p = (const char *)buf;
21801fc99d6SFam Zheng const char *end = p + buf_size;
21901fc99d6SFam Zheng while (p < end) {
22001fc99d6SFam Zheng if (*p == '#') {
22101fc99d6SFam Zheng /* skip comment line */
22201fc99d6SFam Zheng while (p < end && *p != '\n') {
22301fc99d6SFam Zheng p++;
22401fc99d6SFam Zheng }
22501fc99d6SFam Zheng p++;
22601fc99d6SFam Zheng continue;
22701fc99d6SFam Zheng }
22801fc99d6SFam Zheng if (*p == ' ') {
22901fc99d6SFam Zheng while (p < end && *p == ' ') {
23001fc99d6SFam Zheng p++;
23101fc99d6SFam Zheng }
23201fc99d6SFam Zheng /* skip '\r' if windows line endings used. */
23301fc99d6SFam Zheng if (p < end && *p == '\r') {
23401fc99d6SFam Zheng p++;
23501fc99d6SFam Zheng }
23601fc99d6SFam Zheng /* only accept blank lines before 'version=' line */
23701fc99d6SFam Zheng if (p == end || *p != '\n') {
238019d6b8fSAnthony Liguori return 0;
239019d6b8fSAnthony Liguori }
24001fc99d6SFam Zheng p++;
24101fc99d6SFam Zheng continue;
24201fc99d6SFam Zheng }
24301fc99d6SFam Zheng if (end - p >= strlen("version=X\n")) {
24401fc99d6SFam Zheng if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
245b69864e5SSam Eiderman strncmp("version=2\n", p, strlen("version=2\n")) == 0 ||
246b69864e5SSam Eiderman strncmp("version=3\n", p, strlen("version=3\n")) == 0) {
24701fc99d6SFam Zheng return 100;
24801fc99d6SFam Zheng }
24901fc99d6SFam Zheng }
25001fc99d6SFam Zheng if (end - p >= strlen("version=X\r\n")) {
25101fc99d6SFam Zheng if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
252b69864e5SSam Eiderman strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0 ||
253b69864e5SSam Eiderman strncmp("version=3\r\n", p, strlen("version=3\r\n")) == 0) {
25401fc99d6SFam Zheng return 100;
25501fc99d6SFam Zheng }
25601fc99d6SFam Zheng }
25701fc99d6SFam Zheng return 0;
25801fc99d6SFam Zheng }
25901fc99d6SFam Zheng return 0;
26001fc99d6SFam Zheng }
26101fc99d6SFam Zheng }
262019d6b8fSAnthony Liguori
263019d6b8fSAnthony Liguori #define SECTOR_SIZE 512
264f66fd6c3SFam Zheng #define DESC_SIZE (20 * SECTOR_SIZE) /* 20 sectors of 512 bytes each */
265f66fd6c3SFam Zheng #define BUF_SIZE 4096
266f66fd6c3SFam Zheng #define HEADER_SIZE 512 /* first sector of 512 bytes */
267019d6b8fSAnthony Liguori
vmdk_free_extents(BlockDriverState * bs)268b3976d3cSFam Zheng static void vmdk_free_extents(BlockDriverState *bs)
269b3976d3cSFam Zheng {
270b3976d3cSFam Zheng int i;
271b3976d3cSFam Zheng BDRVVmdkState *s = bs->opaque;
272b3c0bfb6SFam Zheng VmdkExtent *e;
273b3976d3cSFam Zheng
2746bc30f19SStefan Hajnoczi bdrv_graph_wrlock();
275b3976d3cSFam Zheng for (i = 0; i < s->num_extents; i++) {
276b3c0bfb6SFam Zheng e = &s->extents[i];
277b3c0bfb6SFam Zheng g_free(e->l1_table);
278b3c0bfb6SFam Zheng g_free(e->l2_cache);
279b3c0bfb6SFam Zheng g_free(e->l1_backup_table);
280f4c129a3SFam Zheng g_free(e->type);
2819a4f4c31SKevin Wolf if (e->file != bs->file) {
28224bc15d1SKevin Wolf bdrv_unref_child(bs, e->file);
283b3c0bfb6SFam Zheng }
284b3976d3cSFam Zheng }
2856bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
28632a8aba3SKevin Wolf
2877267c094SAnthony Liguori g_free(s->extents);
288b3976d3cSFam Zheng }
289b3976d3cSFam Zheng
vmdk_free_last_extent(BlockDriverState * bs)29086c6b429SFam Zheng static void vmdk_free_last_extent(BlockDriverState *bs)
29186c6b429SFam Zheng {
29286c6b429SFam Zheng BDRVVmdkState *s = bs->opaque;
29386c6b429SFam Zheng
29486c6b429SFam Zheng if (s->num_extents == 0) {
29586c6b429SFam Zheng return;
29686c6b429SFam Zheng }
29786c6b429SFam Zheng s->num_extents--;
2985839e53bSMarkus Armbruster s->extents = g_renew(VmdkExtent, s->extents, s->num_extents);
29986c6b429SFam Zheng }
30086c6b429SFam Zheng
3019877860eSPeter Maydell /* Return -ve errno, or 0 on success and write CID into *pcid. */
3021f051dcbSKevin Wolf static int GRAPH_RDLOCK
vmdk_read_cid(BlockDriverState * bs,int parent,uint32_t * pcid)3031f051dcbSKevin Wolf vmdk_read_cid(BlockDriverState *bs, int parent, uint32_t *pcid)
304019d6b8fSAnthony Liguori {
3055997c210SFam Zheng char *desc;
3069877860eSPeter Maydell uint32_t cid;
307019d6b8fSAnthony Liguori const char *p_name, *cid_str;
308019d6b8fSAnthony Liguori size_t cid_str_size;
309e1da9b24SFam Zheng BDRVVmdkState *s = bs->opaque;
31099f1835dSKevin Wolf int ret;
311019d6b8fSAnthony Liguori
3125997c210SFam Zheng desc = g_malloc0(DESC_SIZE);
31332cc71deSAlberto Faria ret = bdrv_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
31499f1835dSKevin Wolf if (ret < 0) {
3159877860eSPeter Maydell goto out;
316e1da9b24SFam Zheng }
317019d6b8fSAnthony Liguori
318019d6b8fSAnthony Liguori if (parent) {
319019d6b8fSAnthony Liguori cid_str = "parentCID";
320019d6b8fSAnthony Liguori cid_str_size = sizeof("parentCID");
321019d6b8fSAnthony Liguori } else {
322019d6b8fSAnthony Liguori cid_str = "CID";
323019d6b8fSAnthony Liguori cid_str_size = sizeof("CID");
324019d6b8fSAnthony Liguori }
325019d6b8fSAnthony Liguori
32693897b9fSKevin Wolf desc[DESC_SIZE - 1] = '\0';
327ae261c86SFam Zheng p_name = strstr(desc, cid_str);
3289877860eSPeter Maydell if (p_name == NULL) {
3299877860eSPeter Maydell ret = -EINVAL;
3309877860eSPeter Maydell goto out;
331019d6b8fSAnthony Liguori }
3329877860eSPeter Maydell p_name += cid_str_size;
3339877860eSPeter Maydell if (sscanf(p_name, "%" SCNx32, &cid) != 1) {
3349877860eSPeter Maydell ret = -EINVAL;
3359877860eSPeter Maydell goto out;
3369877860eSPeter Maydell }
3379877860eSPeter Maydell *pcid = cid;
3389877860eSPeter Maydell ret = 0;
339019d6b8fSAnthony Liguori
3409877860eSPeter Maydell out:
3415997c210SFam Zheng g_free(desc);
3429877860eSPeter Maydell return ret;
343019d6b8fSAnthony Liguori }
344019d6b8fSAnthony Liguori
34528944f99SPaolo Bonzini static int coroutine_fn GRAPH_RDLOCK
vmdk_write_cid(BlockDriverState * bs,uint32_t cid)34628944f99SPaolo Bonzini vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
347019d6b8fSAnthony Liguori {
348965415ebSFam Zheng char *desc, *tmp_desc;
349019d6b8fSAnthony Liguori char *p_name, *tmp_str;
350e1da9b24SFam Zheng BDRVVmdkState *s = bs->opaque;
351965415ebSFam Zheng int ret = 0;
352019d6b8fSAnthony Liguori
3539fb7b350SFam Zheng size_t desc_buf_size;
3549fb7b350SFam Zheng
3559fb7b350SFam Zheng if (s->desc_offset == 0) {
3569fb7b350SFam Zheng desc_buf_size = bdrv_getlength(bs->file->bs);
3579fb7b350SFam Zheng if (desc_buf_size > 16ULL << 20) {
3589fb7b350SFam Zheng error_report("VMDK description file too big");
3599fb7b350SFam Zheng return -EFBIG;
3609fb7b350SFam Zheng }
3619fb7b350SFam Zheng } else {
3629fb7b350SFam Zheng desc_buf_size = DESC_SIZE;
3639fb7b350SFam Zheng }
3649fb7b350SFam Zheng
3659fb7b350SFam Zheng desc = g_malloc0(desc_buf_size);
3669fb7b350SFam Zheng tmp_desc = g_malloc0(desc_buf_size);
3679fb7b350SFam Zheng ret = bdrv_co_pread(bs->file, s->desc_offset, desc_buf_size, desc, 0);
36899f1835dSKevin Wolf if (ret < 0) {
369965415ebSFam Zheng goto out;
370e1da9b24SFam Zheng }
371019d6b8fSAnthony Liguori
3729fb7b350SFam Zheng desc[desc_buf_size - 1] = '\0';
373019d6b8fSAnthony Liguori tmp_str = strstr(desc, "parentCID");
37493897b9fSKevin Wolf if (tmp_str == NULL) {
375965415ebSFam Zheng ret = -EINVAL;
376965415ebSFam Zheng goto out;
37793897b9fSKevin Wolf }
37893897b9fSKevin Wolf
3799fb7b350SFam Zheng pstrcpy(tmp_desc, desc_buf_size, tmp_str);
380ae261c86SFam Zheng p_name = strstr(desc, "CID");
381ae261c86SFam Zheng if (p_name != NULL) {
382019d6b8fSAnthony Liguori p_name += sizeof("CID");
3839fb7b350SFam Zheng snprintf(p_name, desc_buf_size - (p_name - desc), "%" PRIx32 "\n", cid);
3849fb7b350SFam Zheng pstrcat(desc, desc_buf_size, tmp_desc);
385019d6b8fSAnthony Liguori }
386019d6b8fSAnthony Liguori
3879fb7b350SFam Zheng ret = bdrv_co_pwrite_sync(bs->file, s->desc_offset, desc_buf_size, desc, 0);
38899f1835dSKevin Wolf
389965415ebSFam Zheng out:
390965415ebSFam Zheng g_free(desc);
391965415ebSFam Zheng g_free(tmp_desc);
392965415ebSFam Zheng return ret;
393019d6b8fSAnthony Liguori }
394019d6b8fSAnthony Liguori
vmdk_is_cid_valid(BlockDriverState * bs)395004915a9SKevin Wolf static int coroutine_fn GRAPH_RDLOCK vmdk_is_cid_valid(BlockDriverState *bs)
396019d6b8fSAnthony Liguori {
397019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque;
398019d6b8fSAnthony Liguori uint32_t cur_pcid;
399019d6b8fSAnthony Liguori
400760e0063SKevin Wolf if (!s->cid_checked && bs->backing) {
401760e0063SKevin Wolf BlockDriverState *p_bs = bs->backing->bs;
402760e0063SKevin Wolf
403439e89fcSMax Reitz if (strcmp(p_bs->drv->format_name, "vmdk")) {
404439e89fcSMax Reitz /* Backing file is not in vmdk format, so it does not have
405439e89fcSMax Reitz * a CID, which makes the overlay's parent CID invalid */
406439e89fcSMax Reitz return 0;
407439e89fcSMax Reitz }
408439e89fcSMax Reitz
4099877860eSPeter Maydell if (vmdk_read_cid(p_bs, 0, &cur_pcid) != 0) {
4109877860eSPeter Maydell /* read failure: report as not valid */
4119877860eSPeter Maydell return 0;
4129877860eSPeter Maydell }
413ae261c86SFam Zheng if (s->parent_cid != cur_pcid) {
414ae261c86SFam Zheng /* CID not valid */
415019d6b8fSAnthony Liguori return 0;
416019d6b8fSAnthony Liguori }
417ae261c86SFam Zheng }
418c338b6adSFam Zheng s->cid_checked = true;
419ae261c86SFam Zheng /* CID valid */
420019d6b8fSAnthony Liguori return 1;
421019d6b8fSAnthony Liguori }
422019d6b8fSAnthony Liguori
vmdk_reopen_prepare(BDRVReopenState * state,BlockReopenQueue * queue,Error ** errp)4233897575fSJeff Cody static int vmdk_reopen_prepare(BDRVReopenState *state,
4243897575fSJeff Cody BlockReopenQueue *queue, Error **errp)
4253897575fSJeff Cody {
4266d17e287SHanna Reitz BDRVVmdkState *s;
4276d17e287SHanna Reitz BDRVVmdkReopenState *rs;
4286d17e287SHanna Reitz int i;
4296d17e287SHanna Reitz
4301f051dcbSKevin Wolf GLOBAL_STATE_CODE();
4311f051dcbSKevin Wolf GRAPH_RDLOCK_GUARD_MAINLOOP();
4321f051dcbSKevin Wolf
4333897575fSJeff Cody assert(state != NULL);
4343897575fSJeff Cody assert(state->bs != NULL);
4356d17e287SHanna Reitz assert(state->opaque == NULL);
4366d17e287SHanna Reitz
4376d17e287SHanna Reitz s = state->bs->opaque;
4386d17e287SHanna Reitz
4396d17e287SHanna Reitz rs = g_new0(BDRVVmdkReopenState, 1);
4406d17e287SHanna Reitz state->opaque = rs;
4416d17e287SHanna Reitz
4426d17e287SHanna Reitz /*
4436d17e287SHanna Reitz * Check whether there are any extents stored in bs->file; if bs->file
4446d17e287SHanna Reitz * changes, we will need to update their .file pointers to follow suit
4456d17e287SHanna Reitz */
4466d17e287SHanna Reitz rs->extents_using_bs_file = g_new(bool, s->num_extents);
4476d17e287SHanna Reitz for (i = 0; i < s->num_extents; i++) {
4486d17e287SHanna Reitz rs->extents_using_bs_file[i] = s->extents[i].file == state->bs->file;
4496d17e287SHanna Reitz }
4506d17e287SHanna Reitz
45167251a31SKevin Wolf return 0;
4523897575fSJeff Cody }
4533897575fSJeff Cody
vmdk_reopen_clean(BDRVReopenState * state)4546d17e287SHanna Reitz static void vmdk_reopen_clean(BDRVReopenState *state)
4556d17e287SHanna Reitz {
4566d17e287SHanna Reitz BDRVVmdkReopenState *rs = state->opaque;
4576d17e287SHanna Reitz
4586d17e287SHanna Reitz g_free(rs->extents_using_bs_file);
4596d17e287SHanna Reitz g_free(rs);
4606d17e287SHanna Reitz state->opaque = NULL;
4616d17e287SHanna Reitz }
4626d17e287SHanna Reitz
vmdk_reopen_commit(BDRVReopenState * state)4636d17e287SHanna Reitz static void vmdk_reopen_commit(BDRVReopenState *state)
4646d17e287SHanna Reitz {
4656d17e287SHanna Reitz BDRVVmdkState *s = state->bs->opaque;
4666d17e287SHanna Reitz BDRVVmdkReopenState *rs = state->opaque;
4676d17e287SHanna Reitz int i;
4686d17e287SHanna Reitz
4691f051dcbSKevin Wolf GLOBAL_STATE_CODE();
4701f051dcbSKevin Wolf GRAPH_RDLOCK_GUARD_MAINLOOP();
4711f051dcbSKevin Wolf
4726d17e287SHanna Reitz for (i = 0; i < s->num_extents; i++) {
4736d17e287SHanna Reitz if (rs->extents_using_bs_file[i]) {
4746d17e287SHanna Reitz s->extents[i].file = state->bs->file;
4756d17e287SHanna Reitz }
4766d17e287SHanna Reitz }
4776d17e287SHanna Reitz
4786d17e287SHanna Reitz vmdk_reopen_clean(state);
4796d17e287SHanna Reitz }
4806d17e287SHanna Reitz
vmdk_reopen_abort(BDRVReopenState * state)4816d17e287SHanna Reitz static void vmdk_reopen_abort(BDRVReopenState *state)
4826d17e287SHanna Reitz {
4836d17e287SHanna Reitz vmdk_reopen_clean(state);
4846d17e287SHanna Reitz }
4856d17e287SHanna Reitz
vmdk_parent_open(BlockDriverState * bs)4861f051dcbSKevin Wolf static int GRAPH_RDLOCK vmdk_parent_open(BlockDriverState *bs)
487019d6b8fSAnthony Liguori {
488019d6b8fSAnthony Liguori char *p_name;
48971968dbfSFam Zheng char *desc;
490e1da9b24SFam Zheng BDRVVmdkState *s = bs->opaque;
491588b65a3SPaolo Bonzini int ret;
492019d6b8fSAnthony Liguori
49371968dbfSFam Zheng desc = g_malloc0(DESC_SIZE + 1);
49432cc71deSAlberto Faria ret = bdrv_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
495588b65a3SPaolo Bonzini if (ret < 0) {
49671968dbfSFam Zheng goto out;
497e1da9b24SFam Zheng }
498019d6b8fSAnthony Liguori
499ae261c86SFam Zheng p_name = strstr(desc, "parentFileNameHint");
500ae261c86SFam Zheng if (p_name != NULL) {
501019d6b8fSAnthony Liguori char *end_name;
502019d6b8fSAnthony Liguori
503019d6b8fSAnthony Liguori p_name += sizeof("parentFileNameHint") + 1;
504ae261c86SFam Zheng end_name = strchr(p_name, '\"');
505ae261c86SFam Zheng if (end_name == NULL) {
50671968dbfSFam Zheng ret = -EINVAL;
50771968dbfSFam Zheng goto out;
508ae261c86SFam Zheng }
509998c2019SMax Reitz if ((end_name - p_name) > sizeof(bs->auto_backing_file) - 1) {
51071968dbfSFam Zheng ret = -EINVAL;
51171968dbfSFam Zheng goto out;
512ae261c86SFam Zheng }
513019d6b8fSAnthony Liguori
514998c2019SMax Reitz pstrcpy(bs->auto_backing_file, end_name - p_name + 1, p_name);
515998c2019SMax Reitz pstrcpy(bs->backing_file, sizeof(bs->backing_file),
516998c2019SMax Reitz bs->auto_backing_file);
5177502be83SSam Eiderman pstrcpy(bs->backing_format, sizeof(bs->backing_format),
5187502be83SSam Eiderman "vmdk");
519019d6b8fSAnthony Liguori }
520019d6b8fSAnthony Liguori
52171968dbfSFam Zheng out:
52271968dbfSFam Zheng g_free(desc);
52371968dbfSFam Zheng return ret;
524019d6b8fSAnthony Liguori }
525019d6b8fSAnthony Liguori
526b3976d3cSFam Zheng /* Create and append extent to the extent array. Return the added VmdkExtent
527b3976d3cSFam Zheng * address. return NULL if allocation failed. */
vmdk_add_extent(BlockDriverState * bs,BdrvChild * file,bool flat,int64_t sectors,int64_t l1_offset,int64_t l1_backup_offset,uint32_t l1_size,int l2_size,uint64_t cluster_sectors,VmdkExtent ** new_extent,Error ** errp)5288aa1331cSFam Zheng static int vmdk_add_extent(BlockDriverState *bs,
52924bc15d1SKevin Wolf BdrvChild *file, bool flat, int64_t sectors,
530b3976d3cSFam Zheng int64_t l1_offset, int64_t l1_backup_offset,
531b3976d3cSFam Zheng uint32_t l1_size,
5328aa1331cSFam Zheng int l2_size, uint64_t cluster_sectors,
5334823970bSFam Zheng VmdkExtent **new_extent,
5344823970bSFam Zheng Error **errp)
535b3976d3cSFam Zheng {
536b3976d3cSFam Zheng VmdkExtent *extent;
537b3976d3cSFam Zheng BDRVVmdkState *s = bs->opaque;
5380a156f7cSMarkus Armbruster int64_t nb_sectors;
539b3976d3cSFam Zheng
5408aa1331cSFam Zheng if (cluster_sectors > 0x200000) {
5418aa1331cSFam Zheng /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
5424823970bSFam Zheng error_setg(errp, "Invalid granularity, image may be corrupt");
5434823970bSFam Zheng return -EFBIG;
5448aa1331cSFam Zheng }
54559d6ee48SSam Eiderman if (l1_size > 32 * 1024 * 1024) {
546940a2cd5SSam Eiderman /*
547940a2cd5SSam Eiderman * Although with big capacity and small l1_entry_sectors, we can get a
548b0651b8cSFam Zheng * big l1_size, we don't want unbounded value to allocate the table.
54959d6ee48SSam Eiderman * Limit it to 32M, which is enough to store:
55059d6ee48SSam Eiderman * 8TB - for both VMDK3 & VMDK4 with
55159d6ee48SSam Eiderman * minimal cluster size: 512B
55259d6ee48SSam Eiderman * minimal L2 table size: 512 entries
55359d6ee48SSam Eiderman * 8 TB is still more than the maximal value supported for
55459d6ee48SSam Eiderman * VMDK3 & VMDK4 which is 2TB.
55598eb9733SSam Eiderman * 64TB - for "ESXi seSparse Extent"
55698eb9733SSam Eiderman * minimal cluster size: 512B (default is 4KB)
55798eb9733SSam Eiderman * L2 table size: 4096 entries (const).
55898eb9733SSam Eiderman * 64TB is more than the maximal value supported for
55998eb9733SSam Eiderman * seSparse VMDKs (which is slightly less than 64TB)
560940a2cd5SSam Eiderman */
5614823970bSFam Zheng error_setg(errp, "L1 size too big");
562b0651b8cSFam Zheng return -EFBIG;
563b0651b8cSFam Zheng }
5648aa1331cSFam Zheng
56524bc15d1SKevin Wolf nb_sectors = bdrv_nb_sectors(file->bs);
5660a156f7cSMarkus Armbruster if (nb_sectors < 0) {
5670a156f7cSMarkus Armbruster return nb_sectors;
568c6ac36e1SFam Zheng }
569c6ac36e1SFam Zheng
5705839e53bSMarkus Armbruster s->extents = g_renew(VmdkExtent, s->extents, s->num_extents + 1);
571b3976d3cSFam Zheng extent = &s->extents[s->num_extents];
572b3976d3cSFam Zheng s->num_extents++;
573b3976d3cSFam Zheng
574b3976d3cSFam Zheng memset(extent, 0, sizeof(VmdkExtent));
575b3976d3cSFam Zheng extent->file = file;
576b3976d3cSFam Zheng extent->flat = flat;
577b3976d3cSFam Zheng extent->sectors = sectors;
578b3976d3cSFam Zheng extent->l1_table_offset = l1_offset;
579b3976d3cSFam Zheng extent->l1_backup_table_offset = l1_backup_offset;
580b3976d3cSFam Zheng extent->l1_size = l1_size;
581b3976d3cSFam Zheng extent->l1_entry_sectors = l2_size * cluster_sectors;
582b3976d3cSFam Zheng extent->l2_size = l2_size;
583301c7d38SFam Zheng extent->cluster_sectors = flat ? sectors : cluster_sectors;
5840a156f7cSMarkus Armbruster extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors);
58598eb9733SSam Eiderman extent->entry_size = sizeof(uint32_t);
586b3976d3cSFam Zheng
587b3976d3cSFam Zheng if (s->num_extents > 1) {
588b3976d3cSFam Zheng extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
589b3976d3cSFam Zheng } else {
590b3976d3cSFam Zheng extent->end_sector = extent->sectors;
591b3976d3cSFam Zheng }
592b3976d3cSFam Zheng bs->total_sectors = extent->end_sector;
5938aa1331cSFam Zheng if (new_extent) {
5948aa1331cSFam Zheng *new_extent = extent;
5958aa1331cSFam Zheng }
5968aa1331cSFam Zheng return 0;
597b3976d3cSFam Zheng }
598b3976d3cSFam Zheng
599b7cfc7d5SKevin Wolf static int GRAPH_RDLOCK
vmdk_init_tables(BlockDriverState * bs,VmdkExtent * extent,Error ** errp)600b7cfc7d5SKevin Wolf vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, Error **errp)
601019d6b8fSAnthony Liguori {
602b4b3ab14SFam Zheng int ret;
60313c4941cSFam Zheng size_t l1_size;
60413c4941cSFam Zheng int i;
605b4b3ab14SFam Zheng
606b4b3ab14SFam Zheng /* read the L1 table */
60798eb9733SSam Eiderman l1_size = extent->l1_size * extent->entry_size;
608d6e59931SKevin Wolf extent->l1_table = g_try_malloc(l1_size);
609d6e59931SKevin Wolf if (l1_size && extent->l1_table == NULL) {
610d6e59931SKevin Wolf return -ENOMEM;
611d6e59931SKevin Wolf }
612d6e59931SKevin Wolf
61332cc71deSAlberto Faria ret = bdrv_pread(extent->file, extent->l1_table_offset, l1_size,
61432cc71deSAlberto Faria extent->l1_table, 0);
615b4b3ab14SFam Zheng if (ret < 0) {
616f30c66baSMax Reitz bdrv_refresh_filename(extent->file->bs);
6174823970bSFam Zheng error_setg_errno(errp, -ret,
6184823970bSFam Zheng "Could not read l1 table from extent '%s'",
61924bc15d1SKevin Wolf extent->file->bs->filename);
620b4b3ab14SFam Zheng goto fail_l1;
621b4b3ab14SFam Zheng }
622b4b3ab14SFam Zheng for (i = 0; i < extent->l1_size; i++) {
62398eb9733SSam Eiderman if (extent->entry_size == sizeof(uint64_t)) {
62498eb9733SSam Eiderman le64_to_cpus((uint64_t *)extent->l1_table + i);
62598eb9733SSam Eiderman } else {
62698eb9733SSam Eiderman assert(extent->entry_size == sizeof(uint32_t));
62798eb9733SSam Eiderman le32_to_cpus((uint32_t *)extent->l1_table + i);
62898eb9733SSam Eiderman }
629b4b3ab14SFam Zheng }
630b4b3ab14SFam Zheng
631b4b3ab14SFam Zheng if (extent->l1_backup_table_offset) {
63298eb9733SSam Eiderman assert(!extent->sesparse);
633d6e59931SKevin Wolf extent->l1_backup_table = g_try_malloc(l1_size);
634d6e59931SKevin Wolf if (l1_size && extent->l1_backup_table == NULL) {
635d6e59931SKevin Wolf ret = -ENOMEM;
636d6e59931SKevin Wolf goto fail_l1;
637d6e59931SKevin Wolf }
63853fb7844SAlberto Faria ret = bdrv_pread(extent->file, extent->l1_backup_table_offset,
63932cc71deSAlberto Faria l1_size, extent->l1_backup_table, 0);
640b4b3ab14SFam Zheng if (ret < 0) {
641f30c66baSMax Reitz bdrv_refresh_filename(extent->file->bs);
6424823970bSFam Zheng error_setg_errno(errp, -ret,
6434823970bSFam Zheng "Could not read l1 backup table from extent '%s'",
64424bc15d1SKevin Wolf extent->file->bs->filename);
645b4b3ab14SFam Zheng goto fail_l1b;
646b4b3ab14SFam Zheng }
647b4b3ab14SFam Zheng for (i = 0; i < extent->l1_size; i++) {
648b4b3ab14SFam Zheng le32_to_cpus(&extent->l1_backup_table[i]);
649b4b3ab14SFam Zheng }
650b4b3ab14SFam Zheng }
651b4b3ab14SFam Zheng
652b4b3ab14SFam Zheng extent->l2_cache =
65398eb9733SSam Eiderman g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE);
654b4b3ab14SFam Zheng return 0;
655b4b3ab14SFam Zheng fail_l1b:
6567267c094SAnthony Liguori g_free(extent->l1_backup_table);
657b4b3ab14SFam Zheng fail_l1:
6587267c094SAnthony Liguori g_free(extent->l1_table);
659b4b3ab14SFam Zheng return ret;
660b4b3ab14SFam Zheng }
661b4b3ab14SFam Zheng
662b7cfc7d5SKevin Wolf static int GRAPH_RDLOCK
vmdk_open_vmfs_sparse(BlockDriverState * bs,BdrvChild * file,int flags,Error ** errp)663b7cfc7d5SKevin Wolf vmdk_open_vmfs_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
664b7cfc7d5SKevin Wolf Error **errp)
665b4b3ab14SFam Zheng {
666b4b3ab14SFam Zheng int ret;
667019d6b8fSAnthony Liguori uint32_t magic;
668019d6b8fSAnthony Liguori VMDK3Header header;
669cd466702SChristian Borntraeger VmdkExtent *extent = NULL;
670b4b3ab14SFam Zheng
67132cc71deSAlberto Faria ret = bdrv_pread(file, sizeof(magic), sizeof(header), &header, 0);
672b4b3ab14SFam Zheng if (ret < 0) {
673f30c66baSMax Reitz bdrv_refresh_filename(file->bs);
6744823970bSFam Zheng error_setg_errno(errp, -ret,
6754823970bSFam Zheng "Could not read header from file '%s'",
67624bc15d1SKevin Wolf file->bs->filename);
67786c6b429SFam Zheng return ret;
678b3976d3cSFam Zheng }
679f6b61e54SFam Zheng ret = vmdk_add_extent(bs, file, false,
680b3976d3cSFam Zheng le32_to_cpu(header.disk_sectors),
6817237aecdSFam Zheng (int64_t)le32_to_cpu(header.l1dir_offset) << 9,
682f6b61e54SFam Zheng 0,
683f6b61e54SFam Zheng le32_to_cpu(header.l1dir_size),
684f6b61e54SFam Zheng 4096,
6858aa1331cSFam Zheng le32_to_cpu(header.granularity),
6864823970bSFam Zheng &extent,
6874823970bSFam Zheng errp);
6888aa1331cSFam Zheng if (ret < 0) {
6898aa1331cSFam Zheng return ret;
6908aa1331cSFam Zheng }
6914823970bSFam Zheng ret = vmdk_init_tables(bs, extent, errp);
692b4b3ab14SFam Zheng if (ret) {
69386c6b429SFam Zheng /* free extent allocated by vmdk_add_extent */
69486c6b429SFam Zheng vmdk_free_last_extent(bs);
695b4b3ab14SFam Zheng }
696b4b3ab14SFam Zheng return ret;
697b4b3ab14SFam Zheng }
698b4b3ab14SFam Zheng
69998eb9733SSam Eiderman #define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe)
70098eb9733SSam Eiderman #define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe)
70198eb9733SSam Eiderman
70298eb9733SSam Eiderman /* Strict checks - format not officially documented */
check_se_sparse_const_header(VMDKSESparseConstHeader * header,Error ** errp)70398eb9733SSam Eiderman static int check_se_sparse_const_header(VMDKSESparseConstHeader *header,
70498eb9733SSam Eiderman Error **errp)
70598eb9733SSam Eiderman {
70698eb9733SSam Eiderman header->magic = le64_to_cpu(header->magic);
70798eb9733SSam Eiderman header->version = le64_to_cpu(header->version);
70898eb9733SSam Eiderman header->grain_size = le64_to_cpu(header->grain_size);
70998eb9733SSam Eiderman header->grain_table_size = le64_to_cpu(header->grain_table_size);
71098eb9733SSam Eiderman header->flags = le64_to_cpu(header->flags);
71198eb9733SSam Eiderman header->reserved1 = le64_to_cpu(header->reserved1);
71298eb9733SSam Eiderman header->reserved2 = le64_to_cpu(header->reserved2);
71398eb9733SSam Eiderman header->reserved3 = le64_to_cpu(header->reserved3);
71498eb9733SSam Eiderman header->reserved4 = le64_to_cpu(header->reserved4);
71598eb9733SSam Eiderman
71698eb9733SSam Eiderman header->volatile_header_offset =
71798eb9733SSam Eiderman le64_to_cpu(header->volatile_header_offset);
71898eb9733SSam Eiderman header->volatile_header_size = le64_to_cpu(header->volatile_header_size);
71998eb9733SSam Eiderman
72098eb9733SSam Eiderman header->journal_header_offset = le64_to_cpu(header->journal_header_offset);
72198eb9733SSam Eiderman header->journal_header_size = le64_to_cpu(header->journal_header_size);
72298eb9733SSam Eiderman
72398eb9733SSam Eiderman header->journal_offset = le64_to_cpu(header->journal_offset);
72498eb9733SSam Eiderman header->journal_size = le64_to_cpu(header->journal_size);
72598eb9733SSam Eiderman
72698eb9733SSam Eiderman header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset);
72798eb9733SSam Eiderman header->grain_dir_size = le64_to_cpu(header->grain_dir_size);
72898eb9733SSam Eiderman
72998eb9733SSam Eiderman header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset);
73098eb9733SSam Eiderman header->grain_tables_size = le64_to_cpu(header->grain_tables_size);
73198eb9733SSam Eiderman
73298eb9733SSam Eiderman header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset);
73398eb9733SSam Eiderman header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size);
73498eb9733SSam Eiderman
73598eb9733SSam Eiderman header->backmap_offset = le64_to_cpu(header->backmap_offset);
73698eb9733SSam Eiderman header->backmap_size = le64_to_cpu(header->backmap_size);
73798eb9733SSam Eiderman
73898eb9733SSam Eiderman header->grains_offset = le64_to_cpu(header->grains_offset);
73998eb9733SSam Eiderman header->grains_size = le64_to_cpu(header->grains_size);
74098eb9733SSam Eiderman
74198eb9733SSam Eiderman if (header->magic != SESPARSE_CONST_HEADER_MAGIC) {
74298eb9733SSam Eiderman error_setg(errp, "Bad const header magic: 0x%016" PRIx64,
74398eb9733SSam Eiderman header->magic);
74498eb9733SSam Eiderman return -EINVAL;
74598eb9733SSam Eiderman }
74698eb9733SSam Eiderman
74798eb9733SSam Eiderman if (header->version != 0x0000000200000001) {
74898eb9733SSam Eiderman error_setg(errp, "Unsupported version: 0x%016" PRIx64,
74998eb9733SSam Eiderman header->version);
75098eb9733SSam Eiderman return -ENOTSUP;
75198eb9733SSam Eiderman }
75298eb9733SSam Eiderman
75398eb9733SSam Eiderman if (header->grain_size != 8) {
75498eb9733SSam Eiderman error_setg(errp, "Unsupported grain size: %" PRIu64,
75598eb9733SSam Eiderman header->grain_size);
75698eb9733SSam Eiderman return -ENOTSUP;
75798eb9733SSam Eiderman }
75898eb9733SSam Eiderman
75998eb9733SSam Eiderman if (header->grain_table_size != 64) {
76098eb9733SSam Eiderman error_setg(errp, "Unsupported grain table size: %" PRIu64,
76198eb9733SSam Eiderman header->grain_table_size);
76298eb9733SSam Eiderman return -ENOTSUP;
76398eb9733SSam Eiderman }
76498eb9733SSam Eiderman
76598eb9733SSam Eiderman if (header->flags != 0) {
76698eb9733SSam Eiderman error_setg(errp, "Unsupported flags: 0x%016" PRIx64,
76798eb9733SSam Eiderman header->flags);
76898eb9733SSam Eiderman return -ENOTSUP;
76998eb9733SSam Eiderman }
77098eb9733SSam Eiderman
77198eb9733SSam Eiderman if (header->reserved1 != 0 || header->reserved2 != 0 ||
77298eb9733SSam Eiderman header->reserved3 != 0 || header->reserved4 != 0) {
77398eb9733SSam Eiderman error_setg(errp, "Unsupported reserved bits:"
77498eb9733SSam Eiderman " 0x%016" PRIx64 " 0x%016" PRIx64
77598eb9733SSam Eiderman " 0x%016" PRIx64 " 0x%016" PRIx64,
77698eb9733SSam Eiderman header->reserved1, header->reserved2,
77798eb9733SSam Eiderman header->reserved3, header->reserved4);
77898eb9733SSam Eiderman return -ENOTSUP;
77998eb9733SSam Eiderman }
78098eb9733SSam Eiderman
78198eb9733SSam Eiderman /* check that padding is 0 */
78298eb9733SSam Eiderman if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
78398eb9733SSam Eiderman error_setg(errp, "Unsupported non-zero const header padding");
78498eb9733SSam Eiderman return -ENOTSUP;
78598eb9733SSam Eiderman }
78698eb9733SSam Eiderman
78798eb9733SSam Eiderman return 0;
78898eb9733SSam Eiderman }
78998eb9733SSam Eiderman
check_se_sparse_volatile_header(VMDKSESparseVolatileHeader * header,Error ** errp)79098eb9733SSam Eiderman static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header,
79198eb9733SSam Eiderman Error **errp)
79298eb9733SSam Eiderman {
79398eb9733SSam Eiderman header->magic = le64_to_cpu(header->magic);
79498eb9733SSam Eiderman header->free_gt_number = le64_to_cpu(header->free_gt_number);
79598eb9733SSam Eiderman header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number);
79698eb9733SSam Eiderman header->replay_journal = le64_to_cpu(header->replay_journal);
79798eb9733SSam Eiderman
79898eb9733SSam Eiderman if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) {
79998eb9733SSam Eiderman error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64,
80098eb9733SSam Eiderman header->magic);
80198eb9733SSam Eiderman return -EINVAL;
80298eb9733SSam Eiderman }
80398eb9733SSam Eiderman
80498eb9733SSam Eiderman if (header->replay_journal) {
80598eb9733SSam Eiderman error_setg(errp, "Image is dirty, Replaying journal not supported");
80698eb9733SSam Eiderman return -ENOTSUP;
80798eb9733SSam Eiderman }
80898eb9733SSam Eiderman
80998eb9733SSam Eiderman /* check that padding is 0 */
81098eb9733SSam Eiderman if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
81198eb9733SSam Eiderman error_setg(errp, "Unsupported non-zero volatile header padding");
81298eb9733SSam Eiderman return -ENOTSUP;
81398eb9733SSam Eiderman }
81498eb9733SSam Eiderman
81598eb9733SSam Eiderman return 0;
81698eb9733SSam Eiderman }
81798eb9733SSam Eiderman
818b7cfc7d5SKevin Wolf static int GRAPH_RDLOCK
vmdk_open_se_sparse(BlockDriverState * bs,BdrvChild * file,int flags,Error ** errp)819b7cfc7d5SKevin Wolf vmdk_open_se_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
820b7cfc7d5SKevin Wolf Error **errp)
82198eb9733SSam Eiderman {
82298eb9733SSam Eiderman int ret;
82398eb9733SSam Eiderman VMDKSESparseConstHeader const_header;
82498eb9733SSam Eiderman VMDKSESparseVolatileHeader volatile_header;
825cd466702SChristian Borntraeger VmdkExtent *extent = NULL;
82698eb9733SSam Eiderman
82798eb9733SSam Eiderman ret = bdrv_apply_auto_read_only(bs,
82898eb9733SSam Eiderman "No write support for seSparse images available", errp);
82998eb9733SSam Eiderman if (ret < 0) {
83098eb9733SSam Eiderman return ret;
83198eb9733SSam Eiderman }
83298eb9733SSam Eiderman
83398eb9733SSam Eiderman assert(sizeof(const_header) == SECTOR_SIZE);
83498eb9733SSam Eiderman
83532cc71deSAlberto Faria ret = bdrv_pread(file, 0, sizeof(const_header), &const_header, 0);
83698eb9733SSam Eiderman if (ret < 0) {
83798eb9733SSam Eiderman bdrv_refresh_filename(file->bs);
83898eb9733SSam Eiderman error_setg_errno(errp, -ret,
83998eb9733SSam Eiderman "Could not read const header from file '%s'",
84098eb9733SSam Eiderman file->bs->filename);
84198eb9733SSam Eiderman return ret;
84298eb9733SSam Eiderman }
84398eb9733SSam Eiderman
84498eb9733SSam Eiderman /* check const header */
84598eb9733SSam Eiderman ret = check_se_sparse_const_header(&const_header, errp);
84698eb9733SSam Eiderman if (ret < 0) {
84798eb9733SSam Eiderman return ret;
84898eb9733SSam Eiderman }
84998eb9733SSam Eiderman
85098eb9733SSam Eiderman assert(sizeof(volatile_header) == SECTOR_SIZE);
85198eb9733SSam Eiderman
85253fb7844SAlberto Faria ret = bdrv_pread(file, const_header.volatile_header_offset * SECTOR_SIZE,
85332cc71deSAlberto Faria sizeof(volatile_header), &volatile_header, 0);
85498eb9733SSam Eiderman if (ret < 0) {
85598eb9733SSam Eiderman bdrv_refresh_filename(file->bs);
85698eb9733SSam Eiderman error_setg_errno(errp, -ret,
85798eb9733SSam Eiderman "Could not read volatile header from file '%s'",
85898eb9733SSam Eiderman file->bs->filename);
85998eb9733SSam Eiderman return ret;
86098eb9733SSam Eiderman }
86198eb9733SSam Eiderman
86298eb9733SSam Eiderman /* check volatile header */
86398eb9733SSam Eiderman ret = check_se_sparse_volatile_header(&volatile_header, errp);
86498eb9733SSam Eiderman if (ret < 0) {
86598eb9733SSam Eiderman return ret;
86698eb9733SSam Eiderman }
86798eb9733SSam Eiderman
86898eb9733SSam Eiderman ret = vmdk_add_extent(bs, file, false,
86998eb9733SSam Eiderman const_header.capacity,
87098eb9733SSam Eiderman const_header.grain_dir_offset * SECTOR_SIZE,
87198eb9733SSam Eiderman 0,
87298eb9733SSam Eiderman const_header.grain_dir_size *
87398eb9733SSam Eiderman SECTOR_SIZE / sizeof(uint64_t),
87498eb9733SSam Eiderman const_header.grain_table_size *
87598eb9733SSam Eiderman SECTOR_SIZE / sizeof(uint64_t),
87698eb9733SSam Eiderman const_header.grain_size,
87798eb9733SSam Eiderman &extent,
87898eb9733SSam Eiderman errp);
87998eb9733SSam Eiderman if (ret < 0) {
88098eb9733SSam Eiderman return ret;
88198eb9733SSam Eiderman }
88298eb9733SSam Eiderman
88398eb9733SSam Eiderman extent->sesparse = true;
88498eb9733SSam Eiderman extent->sesparse_l2_tables_offset = const_header.grain_tables_offset;
88598eb9733SSam Eiderman extent->sesparse_clusters_offset = const_header.grains_offset;
88698eb9733SSam Eiderman extent->entry_size = sizeof(uint64_t);
88798eb9733SSam Eiderman
88898eb9733SSam Eiderman ret = vmdk_init_tables(bs, extent, errp);
88998eb9733SSam Eiderman if (ret) {
89098eb9733SSam Eiderman /* free extent allocated by vmdk_add_extent */
89198eb9733SSam Eiderman vmdk_free_last_extent(bs);
89298eb9733SSam Eiderman }
89398eb9733SSam Eiderman
89498eb9733SSam Eiderman return ret;
89598eb9733SSam Eiderman }
89698eb9733SSam Eiderman
897d1833ef5SPaolo Bonzini static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
898a6468367SKevin Wolf QDict *options, Error **errp);
899f16f509dSFam Zheng
vmdk_read_desc(BdrvChild * file,uint64_t desc_offset,Error ** errp)900cf2ab8fcSKevin Wolf static char *vmdk_read_desc(BdrvChild *file, uint64_t desc_offset, Error **errp)
901a8842e6dSPaolo Bonzini {
902a8842e6dSPaolo Bonzini int64_t size;
903a8842e6dSPaolo Bonzini char *buf;
904a8842e6dSPaolo Bonzini int ret;
905a8842e6dSPaolo Bonzini
906cf2ab8fcSKevin Wolf size = bdrv_getlength(file->bs);
907a8842e6dSPaolo Bonzini if (size < 0) {
908a8842e6dSPaolo Bonzini error_setg_errno(errp, -size, "Could not access file");
909a8842e6dSPaolo Bonzini return NULL;
910a8842e6dSPaolo Bonzini }
911a8842e6dSPaolo Bonzini
91203c3359dSFam Zheng if (size < 4) {
91303c3359dSFam Zheng /* Both descriptor file and sparse image must be much larger than 4
91403c3359dSFam Zheng * bytes, also callers of vmdk_read_desc want to compare the first 4
91503c3359dSFam Zheng * bytes with VMDK4_MAGIC, let's error out if less is read. */
91603c3359dSFam Zheng error_setg(errp, "File is too small, not a valid image");
91703c3359dSFam Zheng return NULL;
91803c3359dSFam Zheng }
91903c3359dSFam Zheng
92073b7bcadSFam Zheng size = MIN(size, (1 << 20) - 1); /* avoid unbounded allocation */
92173b7bcadSFam Zheng buf = g_malloc(size + 1);
922a8842e6dSPaolo Bonzini
92332cc71deSAlberto Faria ret = bdrv_pread(file, desc_offset, size, buf, 0);
924a8842e6dSPaolo Bonzini if (ret < 0) {
925a8842e6dSPaolo Bonzini error_setg_errno(errp, -ret, "Could not read from file");
926a8842e6dSPaolo Bonzini g_free(buf);
927a8842e6dSPaolo Bonzini return NULL;
928a8842e6dSPaolo Bonzini }
929353a5d84SAlberto Faria buf[size] = 0;
930a8842e6dSPaolo Bonzini
931a8842e6dSPaolo Bonzini return buf;
932a8842e6dSPaolo Bonzini }
933a8842e6dSPaolo Bonzini
934b7cfc7d5SKevin Wolf static int GRAPH_RDLOCK
vmdk_open_vmdk4(BlockDriverState * bs,BdrvChild * file,int flags,QDict * options,Error ** errp)935b7cfc7d5SKevin Wolf vmdk_open_vmdk4(BlockDriverState *bs, BdrvChild *file, int flags,
936b7cfc7d5SKevin Wolf QDict *options, Error **errp)
937b4b3ab14SFam Zheng {
938b4b3ab14SFam Zheng int ret;
939b4b3ab14SFam Zheng uint32_t magic;
940b4b3ab14SFam Zheng uint32_t l1_size, l1_entry_sectors;
941019d6b8fSAnthony Liguori VMDK4Header header;
942cd466702SChristian Borntraeger VmdkExtent *extent = NULL;
943f4c129a3SFam Zheng BDRVVmdkState *s = bs->opaque;
944bb45ded9SFam Zheng int64_t l1_backup_offset = 0;
9453db1d98aSFam Zheng bool compressed;
946b4b3ab14SFam Zheng
94732cc71deSAlberto Faria ret = bdrv_pread(file, sizeof(magic), sizeof(header), &header, 0);
948b4b3ab14SFam Zheng if (ret < 0) {
949f30c66baSMax Reitz bdrv_refresh_filename(file->bs);
9504823970bSFam Zheng error_setg_errno(errp, -ret,
9514823970bSFam Zheng "Could not read header from file '%s'",
95224bc15d1SKevin Wolf file->bs->filename);
95389ac8480SPaolo Bonzini return -EINVAL;
954b3976d3cSFam Zheng }
9555a394b9eSStefan Hajnoczi if (header.capacity == 0) {
956e98768d4SFam Zheng uint64_t desc_offset = le64_to_cpu(header.desc_offset);
9575a394b9eSStefan Hajnoczi if (desc_offset) {
958cf2ab8fcSKevin Wolf char *buf = vmdk_read_desc(file, desc_offset << 9, errp);
959d1833ef5SPaolo Bonzini if (!buf) {
960d1833ef5SPaolo Bonzini return -EINVAL;
961d1833ef5SPaolo Bonzini }
962a6468367SKevin Wolf ret = vmdk_open_desc_file(bs, flags, buf, options, errp);
963d1833ef5SPaolo Bonzini g_free(buf);
964d1833ef5SPaolo Bonzini return ret;
9655a394b9eSStefan Hajnoczi }
966f16f509dSFam Zheng }
96765bd155cSKevin Wolf
968f4c129a3SFam Zheng if (!s->create_type) {
969f4c129a3SFam Zheng s->create_type = g_strdup("monolithicSparse");
970f4c129a3SFam Zheng }
971f4c129a3SFam Zheng
97265bd155cSKevin Wolf if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) {
97365bd155cSKevin Wolf /*
97465bd155cSKevin Wolf * The footer takes precedence over the header, so read it in. The
97565bd155cSKevin Wolf * footer starts at offset -1024 from the end: One sector for the
97665bd155cSKevin Wolf * footer, and another one for the end-of-stream marker.
97765bd155cSKevin Wolf */
97865bd155cSKevin Wolf struct {
97965bd155cSKevin Wolf struct {
98065bd155cSKevin Wolf uint64_t val;
98165bd155cSKevin Wolf uint32_t size;
98265bd155cSKevin Wolf uint32_t type;
98365bd155cSKevin Wolf uint8_t pad[512 - 16];
98465bd155cSKevin Wolf } QEMU_PACKED footer_marker;
98565bd155cSKevin Wolf
98665bd155cSKevin Wolf uint32_t magic;
98765bd155cSKevin Wolf VMDK4Header header;
98865bd155cSKevin Wolf uint8_t pad[512 - 4 - sizeof(VMDK4Header)];
98965bd155cSKevin Wolf
99065bd155cSKevin Wolf struct {
99165bd155cSKevin Wolf uint64_t val;
99265bd155cSKevin Wolf uint32_t size;
99365bd155cSKevin Wolf uint32_t type;
99465bd155cSKevin Wolf uint8_t pad[512 - 16];
99565bd155cSKevin Wolf } QEMU_PACKED eos_marker;
99665bd155cSKevin Wolf } QEMU_PACKED footer;
99765bd155cSKevin Wolf
99853fb7844SAlberto Faria ret = bdrv_pread(file, bs->file->bs->total_sectors * 512 - 1536,
99932cc71deSAlberto Faria sizeof(footer), &footer, 0);
100065bd155cSKevin Wolf if (ret < 0) {
1001d899d2e2SFam Zheng error_setg_errno(errp, -ret, "Failed to read footer");
100265bd155cSKevin Wolf return ret;
100365bd155cSKevin Wolf }
100465bd155cSKevin Wolf
100565bd155cSKevin Wolf /* Some sanity checks for the footer */
100665bd155cSKevin Wolf if (be32_to_cpu(footer.magic) != VMDK4_MAGIC ||
100765bd155cSKevin Wolf le32_to_cpu(footer.footer_marker.size) != 0 ||
100865bd155cSKevin Wolf le32_to_cpu(footer.footer_marker.type) != MARKER_FOOTER ||
100965bd155cSKevin Wolf le64_to_cpu(footer.eos_marker.val) != 0 ||
101065bd155cSKevin Wolf le32_to_cpu(footer.eos_marker.size) != 0 ||
101165bd155cSKevin Wolf le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM)
101265bd155cSKevin Wolf {
1013d899d2e2SFam Zheng error_setg(errp, "Invalid footer");
101465bd155cSKevin Wolf return -EINVAL;
101565bd155cSKevin Wolf }
101665bd155cSKevin Wolf
101765bd155cSKevin Wolf header = footer.header;
101865bd155cSKevin Wolf }
101965bd155cSKevin Wolf
10203db1d98aSFam Zheng compressed =
10213db1d98aSFam Zheng le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
1022509d39aaSFam Zheng if (le32_to_cpu(header.version) > 3) {
1023a55448b3SMax Reitz error_setg(errp, "Unsupported VMDK version %" PRIu32,
102496c51eb5SFam Zheng le32_to_cpu(header.version));
102596c51eb5SFam Zheng return -ENOTSUP;
10263db1d98aSFam Zheng } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR) &&
10273db1d98aSFam Zheng !compressed) {
1028509d39aaSFam Zheng /* VMware KB 2064959 explains that version 3 added support for
1029509d39aaSFam Zheng * persistent changed block tracking (CBT), and backup software can
1030509d39aaSFam Zheng * read it as version=1 if it doesn't care about the changed area
1031509d39aaSFam Zheng * information. So we are safe to enable read only. */
1032509d39aaSFam Zheng error_setg(errp, "VMDK version 3 must be read only");
1033509d39aaSFam Zheng return -EINVAL;
103496c51eb5SFam Zheng }
103596c51eb5SFam Zheng
1036ca8804ceSFam Zheng if (le32_to_cpu(header.num_gtes_per_gt) > 512) {
103789ac8480SPaolo Bonzini error_setg(errp, "L2 table size too big");
1038f8ce0403SFam Zheng return -EINVAL;
1039f8ce0403SFam Zheng }
1040f8ce0403SFam Zheng
1041ca8804ceSFam Zheng l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt)
1042b3976d3cSFam Zheng * le64_to_cpu(header.granularity);
104375d12341SStefan Weil if (l1_entry_sectors == 0) {
1044d899d2e2SFam Zheng error_setg(errp, "L1 entry size is invalid");
104586c6b429SFam Zheng return -EINVAL;
104686c6b429SFam Zheng }
1047b3976d3cSFam Zheng l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
1048b3976d3cSFam Zheng / l1_entry_sectors;
1049bb45ded9SFam Zheng if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
1050bb45ded9SFam Zheng l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
1051bb45ded9SFam Zheng }
105224bc15d1SKevin Wolf if (bdrv_nb_sectors(file->bs) < le64_to_cpu(header.grain_offset)) {
10534ab9dab5SFam Zheng error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
10544ab9dab5SFam Zheng (int64_t)(le64_to_cpu(header.grain_offset)
10554ab9dab5SFam Zheng * BDRV_SECTOR_SIZE));
105634ceed81SFam Zheng return -EINVAL;
105734ceed81SFam Zheng }
105834ceed81SFam Zheng
10598aa1331cSFam Zheng ret = vmdk_add_extent(bs, file, false,
1060b3976d3cSFam Zheng le64_to_cpu(header.capacity),
1061b3976d3cSFam Zheng le64_to_cpu(header.gd_offset) << 9,
1062bb45ded9SFam Zheng l1_backup_offset,
1063b3976d3cSFam Zheng l1_size,
1064ca8804ceSFam Zheng le32_to_cpu(header.num_gtes_per_gt),
10658aa1331cSFam Zheng le64_to_cpu(header.granularity),
10664823970bSFam Zheng &extent,
10674823970bSFam Zheng errp);
10688aa1331cSFam Zheng if (ret < 0) {
10698aa1331cSFam Zheng return ret;
10708aa1331cSFam Zheng }
1071432bb170SFam Zheng extent->compressed =
1072432bb170SFam Zheng le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
1073d8a7b061SFam Zheng if (extent->compressed) {
1074d8a7b061SFam Zheng g_free(s->create_type);
1075d8a7b061SFam Zheng s->create_type = g_strdup("streamOptimized");
1076d8a7b061SFam Zheng }
1077432bb170SFam Zheng extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
107814ead646SFam Zheng extent->version = le32_to_cpu(header.version);
107914ead646SFam Zheng extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN;
10804823970bSFam Zheng ret = vmdk_init_tables(bs, extent, errp);
1081b4b3ab14SFam Zheng if (ret) {
108286c6b429SFam Zheng /* free extent allocated by vmdk_add_extent */
108386c6b429SFam Zheng vmdk_free_last_extent(bs);
1084019d6b8fSAnthony Liguori }
1085b4b3ab14SFam Zheng return ret;
1086b4b3ab14SFam Zheng }
1087b4b3ab14SFam Zheng
10887fa60fa3SFam Zheng /* find an option value out of descriptor file */
vmdk_parse_description(const char * desc,const char * opt_name,char * buf,int buf_size)10897fa60fa3SFam Zheng static int vmdk_parse_description(const char *desc, const char *opt_name,
10907fa60fa3SFam Zheng char *buf, int buf_size)
10917fa60fa3SFam Zheng {
10927fa60fa3SFam Zheng char *opt_pos, *opt_end;
10937fa60fa3SFam Zheng const char *end = desc + strlen(desc);
10947fa60fa3SFam Zheng
10957fa60fa3SFam Zheng opt_pos = strstr(desc, opt_name);
10967fa60fa3SFam Zheng if (!opt_pos) {
109765f74725SFam Zheng return VMDK_ERROR;
10987fa60fa3SFam Zheng }
10997fa60fa3SFam Zheng /* Skip "=\"" following opt_name */
11007fa60fa3SFam Zheng opt_pos += strlen(opt_name) + 2;
11017fa60fa3SFam Zheng if (opt_pos >= end) {
110265f74725SFam Zheng return VMDK_ERROR;
11037fa60fa3SFam Zheng }
11047fa60fa3SFam Zheng opt_end = opt_pos;
11057fa60fa3SFam Zheng while (opt_end < end && *opt_end != '"') {
11067fa60fa3SFam Zheng opt_end++;
11077fa60fa3SFam Zheng }
11087fa60fa3SFam Zheng if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
110965f74725SFam Zheng return VMDK_ERROR;
11107fa60fa3SFam Zheng }
11117fa60fa3SFam Zheng pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
111265f74725SFam Zheng return VMDK_OK;
11137fa60fa3SFam Zheng }
11147fa60fa3SFam Zheng
111586c6b429SFam Zheng /* Open an extent file and append to bs array */
1116b7cfc7d5SKevin Wolf static int GRAPH_RDLOCK
vmdk_open_sparse(BlockDriverState * bs,BdrvChild * file,int flags,char * buf,QDict * options,Error ** errp)1117b7cfc7d5SKevin Wolf vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
1118a6468367SKevin Wolf char *buf, QDict *options, Error **errp)
111986c6b429SFam Zheng {
112086c6b429SFam Zheng uint32_t magic;
112186c6b429SFam Zheng
1122d1833ef5SPaolo Bonzini magic = ldl_be_p(buf);
112386c6b429SFam Zheng switch (magic) {
112486c6b429SFam Zheng case VMDK3_MAGIC:
11254823970bSFam Zheng return vmdk_open_vmfs_sparse(bs, file, flags, errp);
112686c6b429SFam Zheng case VMDK4_MAGIC:
1127a6468367SKevin Wolf return vmdk_open_vmdk4(bs, file, flags, options, errp);
112886c6b429SFam Zheng default:
112976abe407SPaolo Bonzini error_setg(errp, "Image not in VMDK format");
113076abe407SPaolo Bonzini return -EINVAL;
113186c6b429SFam Zheng }
113286c6b429SFam Zheng }
113386c6b429SFam Zheng
next_line(const char * s)1134e4937694SMarkus Armbruster static const char *next_line(const char *s)
1135e4937694SMarkus Armbruster {
1136e4937694SMarkus Armbruster while (*s) {
1137e4937694SMarkus Armbruster if (*s == '\n') {
1138e4937694SMarkus Armbruster return s + 1;
1139e4937694SMarkus Armbruster }
1140e4937694SMarkus Armbruster s++;
1141e4937694SMarkus Armbruster }
1142e4937694SMarkus Armbruster return s;
1143e4937694SMarkus Armbruster }
1144e4937694SMarkus Armbruster
1145b7cfc7d5SKevin Wolf static int GRAPH_RDLOCK
vmdk_parse_extents(const char * desc,BlockDriverState * bs,QDict * options,Error ** errp)1146b7cfc7d5SKevin Wolf vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
1147b7cfc7d5SKevin Wolf Error **errp)
11487fa60fa3SFam Zheng {
114976db0ea3SZhao Liu ERRP_GUARD();
11507fa60fa3SFam Zheng int ret;
1151395a22faSJeff Cody int matches;
11527fa60fa3SFam Zheng char access[11];
11537fa60fa3SFam Zheng char type[11];
11547fa60fa3SFam Zheng char fname[512];
1155d28d737fSMarkus Armbruster const char *p, *np;
11567fa60fa3SFam Zheng int64_t sectors = 0;
11577fa60fa3SFam Zheng int64_t flat_offset;
1158cdc0dd25SMax Reitz char *desc_file_dir = NULL;
1159fe206562SJeff Cody char *extent_path;
116024bc15d1SKevin Wolf BdrvChild *extent_file;
11618b1869daSMax Reitz BdrvChildRole extent_role;
1162f4c129a3SFam Zheng BDRVVmdkState *s = bs->opaque;
1163cd466702SChristian Borntraeger VmdkExtent *extent = NULL;
1164a6468367SKevin Wolf char extent_opt_prefix[32];
116524bc15d1SKevin Wolf Error *local_err = NULL;
11667fa60fa3SFam Zheng
1167b7cfc7d5SKevin Wolf GLOBAL_STATE_CODE();
1168b7cfc7d5SKevin Wolf
1169e4937694SMarkus Armbruster for (p = desc; *p; p = next_line(p)) {
11708a3e0bc3SFam Zheng /* parse extent line in one of below formats:
11718a3e0bc3SFam Zheng *
11727fa60fa3SFam Zheng * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
11737fa60fa3SFam Zheng * RW [size in sectors] SPARSE "file-name.vmdk"
11748a3e0bc3SFam Zheng * RW [size in sectors] VMFS "file-name.vmdk"
11758a3e0bc3SFam Zheng * RW [size in sectors] VMFSSPARSE "file-name.vmdk"
117698eb9733SSam Eiderman * RW [size in sectors] SESPARSE "file-name.vmdk"
11777fa60fa3SFam Zheng */
11787fa60fa3SFam Zheng flat_offset = -1;
1179395a22faSJeff Cody matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
11807fa60fa3SFam Zheng access, §ors, type, fname, &flat_offset);
1181395a22faSJeff Cody if (matches < 4 || strcmp(access, "RW")) {
1182e4937694SMarkus Armbruster continue;
11837fa60fa3SFam Zheng } else if (!strcmp(type, "FLAT")) {
1184395a22faSJeff Cody if (matches != 5 || flat_offset < 0) {
1185d28d737fSMarkus Armbruster goto invalid;
11867fa60fa3SFam Zheng }
1187dbbcaa8dSFam Zheng } else if (!strcmp(type, "VMFS")) {
1188395a22faSJeff Cody if (matches == 4) {
1189dbbcaa8dSFam Zheng flat_offset = 0;
1190b47053bdSFam Zheng } else {
1191d28d737fSMarkus Armbruster goto invalid;
1192b47053bdSFam Zheng }
1193395a22faSJeff Cody } else if (matches != 4) {
1194d28d737fSMarkus Armbruster goto invalid;
11957fa60fa3SFam Zheng }
11967fa60fa3SFam Zheng
11977fa60fa3SFam Zheng if (sectors <= 0 ||
1198daac8fdcSFam Zheng (strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
119998eb9733SSam Eiderman strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") &&
120098eb9733SSam Eiderman strcmp(type, "SESPARSE")) ||
12017fa60fa3SFam Zheng (strcmp(access, "RW"))) {
1202e4937694SMarkus Armbruster continue;
12037fa60fa3SFam Zheng }
12047fa60fa3SFam Zheng
1205cdc0dd25SMax Reitz if (path_is_absolute(fname)) {
1206cdc0dd25SMax Reitz extent_path = g_strdup(fname);
1207cdc0dd25SMax Reitz } else {
1208cdc0dd25SMax Reitz if (!desc_file_dir) {
1209cdc0dd25SMax Reitz desc_file_dir = bdrv_dirname(bs->file->bs, errp);
1210cdc0dd25SMax Reitz if (!desc_file_dir) {
1211f30c66baSMax Reitz bdrv_refresh_filename(bs->file->bs);
1212cdc0dd25SMax Reitz error_prepend(errp, "Cannot use relative paths with VMDK "
1213cdc0dd25SMax Reitz "descriptor file '%s': ",
1214cdc0dd25SMax Reitz bs->file->bs->filename);
1215cdc0dd25SMax Reitz ret = -EINVAL;
1216cdc0dd25SMax Reitz goto out;
1217cdc0dd25SMax Reitz }
12185c98415bSMax Reitz }
12195c98415bSMax Reitz
1220cdc0dd25SMax Reitz extent_path = g_strconcat(desc_file_dir, fname, NULL);
1221cdc0dd25SMax Reitz }
1222a6468367SKevin Wolf
1223a6468367SKevin Wolf ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents);
1224a6468367SKevin Wolf assert(ret < 32);
1225a6468367SKevin Wolf
12268b1869daSMax Reitz extent_role = BDRV_CHILD_DATA;
12278b1869daSMax Reitz if (strcmp(type, "FLAT") != 0 && strcmp(type, "VMFS") != 0) {
12288b1869daSMax Reitz /* non-flat extents have metadata */
12298b1869daSMax Reitz extent_role |= BDRV_CHILD_METADATA;
12308b1869daSMax Reitz }
12318b1869daSMax Reitz
123224bc15d1SKevin Wolf extent_file = bdrv_open_child(extent_path, options, extent_opt_prefix,
12338b1869daSMax Reitz bs, &child_of_bds, extent_role, false,
12348b1869daSMax Reitz &local_err);
1235fe206562SJeff Cody g_free(extent_path);
1236a8d99c0eSDmitry Frolov if (!extent_file) {
123724bc15d1SKevin Wolf error_propagate(errp, local_err);
1238cdc0dd25SMax Reitz ret = -EINVAL;
1239cdc0dd25SMax Reitz goto out;
12407fa60fa3SFam Zheng }
124186c6b429SFam Zheng
124286c6b429SFam Zheng /* save to extents array */
124304d542c8SPaolo Bonzini if (!strcmp(type, "FLAT") || !strcmp(type, "VMFS")) {
124486c6b429SFam Zheng /* FLAT extent */
124586c6b429SFam Zheng
12468aa1331cSFam Zheng ret = vmdk_add_extent(bs, extent_file, true, sectors,
12474823970bSFam Zheng 0, 0, 0, 0, 0, &extent, errp);
12488aa1331cSFam Zheng if (ret < 0) {
1249b7cfc7d5SKevin Wolf bdrv_graph_rdunlock_main_loop();
12506bc30f19SStefan Hajnoczi bdrv_graph_wrlock();
125124bc15d1SKevin Wolf bdrv_unref_child(bs, extent_file);
12526bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
1253b7cfc7d5SKevin Wolf bdrv_graph_rdlock_main_loop();
1254cdc0dd25SMax Reitz goto out;
12558aa1331cSFam Zheng }
1256f16f509dSFam Zheng extent->flat_start_offset = flat_offset << 9;
1257daac8fdcSFam Zheng } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
1258daac8fdcSFam Zheng /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
1259cf2ab8fcSKevin Wolf char *buf = vmdk_read_desc(extent_file, 0, errp);
1260d1833ef5SPaolo Bonzini if (!buf) {
1261d1833ef5SPaolo Bonzini ret = -EINVAL;
1262d1833ef5SPaolo Bonzini } else {
1263a6468367SKevin Wolf ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf,
1264a6468367SKevin Wolf options, errp);
1265d1833ef5SPaolo Bonzini }
1266d1833ef5SPaolo Bonzini g_free(buf);
1267b6b1d31fSStefan Hajnoczi if (ret) {
1268b7cfc7d5SKevin Wolf bdrv_graph_rdunlock_main_loop();
12696bc30f19SStefan Hajnoczi bdrv_graph_wrlock();
127024bc15d1SKevin Wolf bdrv_unref_child(bs, extent_file);
12716bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
1272b7cfc7d5SKevin Wolf bdrv_graph_rdlock_main_loop();
1273cdc0dd25SMax Reitz goto out;
127486c6b429SFam Zheng }
1275f4c129a3SFam Zheng extent = &s->extents[s->num_extents - 1];
127698eb9733SSam Eiderman } else if (!strcmp(type, "SESPARSE")) {
127798eb9733SSam Eiderman ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
127898eb9733SSam Eiderman if (ret) {
1279b7cfc7d5SKevin Wolf bdrv_graph_rdunlock_main_loop();
12806bc30f19SStefan Hajnoczi bdrv_graph_wrlock();
128198eb9733SSam Eiderman bdrv_unref_child(bs, extent_file);
12826bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
1283b7cfc7d5SKevin Wolf bdrv_graph_rdlock_main_loop();
1284cdc0dd25SMax Reitz goto out;
128598eb9733SSam Eiderman }
128698eb9733SSam Eiderman extent = &s->extents[s->num_extents - 1];
12877fa60fa3SFam Zheng } else {
12884823970bSFam Zheng error_setg(errp, "Unsupported extent type '%s'", type);
1289b7cfc7d5SKevin Wolf bdrv_graph_rdunlock_main_loop();
12906bc30f19SStefan Hajnoczi bdrv_graph_wrlock();
129124bc15d1SKevin Wolf bdrv_unref_child(bs, extent_file);
12926bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
1293b7cfc7d5SKevin Wolf bdrv_graph_rdlock_main_loop();
1294cdc0dd25SMax Reitz ret = -ENOTSUP;
1295cdc0dd25SMax Reitz goto out;
12967fa60fa3SFam Zheng }
1297f4c129a3SFam Zheng extent->type = g_strdup(type);
1298899f1ae2SFam Zheng }
1299cdc0dd25SMax Reitz
1300cdc0dd25SMax Reitz ret = 0;
1301cdc0dd25SMax Reitz goto out;
1302d28d737fSMarkus Armbruster
1303d28d737fSMarkus Armbruster invalid:
1304d28d737fSMarkus Armbruster np = next_line(p);
1305d28d737fSMarkus Armbruster assert(np != p);
1306d28d737fSMarkus Armbruster if (np[-1] == '\n') {
1307d28d737fSMarkus Armbruster np--;
1308d28d737fSMarkus Armbruster }
1309d28d737fSMarkus Armbruster error_setg(errp, "Invalid extent line: %.*s", (int)(np - p), p);
1310cdc0dd25SMax Reitz ret = -EINVAL;
1311cdc0dd25SMax Reitz
1312cdc0dd25SMax Reitz out:
1313cdc0dd25SMax Reitz g_free(desc_file_dir);
1314cdc0dd25SMax Reitz return ret;
13157fa60fa3SFam Zheng }
13167fa60fa3SFam Zheng
1317b7cfc7d5SKevin Wolf static int GRAPH_RDLOCK
vmdk_open_desc_file(BlockDriverState * bs,int flags,char * buf,QDict * options,Error ** errp)1318b7cfc7d5SKevin Wolf vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, QDict *options,
1319b7cfc7d5SKevin Wolf Error **errp)
13207fa60fa3SFam Zheng {
13217fa60fa3SFam Zheng int ret;
13227fa60fa3SFam Zheng char ct[128];
13237fa60fa3SFam Zheng BDRVVmdkState *s = bs->opaque;
13247fa60fa3SFam Zheng
13257fa60fa3SFam Zheng if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
132676abe407SPaolo Bonzini error_setg(errp, "invalid VMDK image descriptor");
132776abe407SPaolo Bonzini ret = -EINVAL;
13280bed087dSEvgeny Budilovsky goto exit;
13297fa60fa3SFam Zheng }
13306398de51SFam Zheng if (strcmp(ct, "monolithicFlat") &&
133104d542c8SPaolo Bonzini strcmp(ct, "vmfs") &&
1332daac8fdcSFam Zheng strcmp(ct, "vmfsSparse") &&
133398eb9733SSam Eiderman strcmp(ct, "seSparse") &&
133486c6b429SFam Zheng strcmp(ct, "twoGbMaxExtentSparse") &&
13356398de51SFam Zheng strcmp(ct, "twoGbMaxExtentFlat")) {
13364823970bSFam Zheng error_setg(errp, "Unsupported image type '%s'", ct);
13370bed087dSEvgeny Budilovsky ret = -ENOTSUP;
13380bed087dSEvgeny Budilovsky goto exit;
13397fa60fa3SFam Zheng }
1340f4c129a3SFam Zheng s->create_type = g_strdup(ct);
13417fa60fa3SFam Zheng s->desc_offset = 0;
1342cdc0dd25SMax Reitz ret = vmdk_parse_extents(buf, bs, options, errp);
13430bed087dSEvgeny Budilovsky exit:
13440bed087dSEvgeny Budilovsky return ret;
13457fa60fa3SFam Zheng }
13467fa60fa3SFam Zheng
vmdk_open(BlockDriverState * bs,QDict * options,int flags,Error ** errp)1347015a1036SMax Reitz static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
1348015a1036SMax Reitz Error **errp)
1349b4b3ab14SFam Zheng {
13509aeecbbcSFam Zheng char *buf;
135186c6b429SFam Zheng int ret;
135286c6b429SFam Zheng BDRVVmdkState *s = bs->opaque;
135337f09e5eSPaolo Bonzini uint32_t magic;
1354b4b3ab14SFam Zheng
13553804e3cfSKevin Wolf GRAPH_RDLOCK_GUARD_MAINLOOP();
13563804e3cfSKevin Wolf
135783930780SVladimir Sementsov-Ogievskiy ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
135883930780SVladimir Sementsov-Ogievskiy if (ret < 0) {
135983930780SVladimir Sementsov-Ogievskiy return ret;
13604e4bf5c4SKevin Wolf }
13614e4bf5c4SKevin Wolf
1362cf2ab8fcSKevin Wolf buf = vmdk_read_desc(bs->file, 0, errp);
1363d1833ef5SPaolo Bonzini if (!buf) {
1364d1833ef5SPaolo Bonzini return -EINVAL;
1365d1833ef5SPaolo Bonzini }
1366d1833ef5SPaolo Bonzini
136737f09e5eSPaolo Bonzini magic = ldl_be_p(buf);
136837f09e5eSPaolo Bonzini switch (magic) {
136937f09e5eSPaolo Bonzini case VMDK3_MAGIC:
137037f09e5eSPaolo Bonzini case VMDK4_MAGIC:
13719a4f4c31SKevin Wolf ret = vmdk_open_sparse(bs, bs->file, flags, buf, options,
137224bc15d1SKevin Wolf errp);
137386c6b429SFam Zheng s->desc_offset = 0x200;
137437f09e5eSPaolo Bonzini break;
137537f09e5eSPaolo Bonzini default:
13768b1869daSMax Reitz /* No data in the descriptor file */
13778b1869daSMax Reitz bs->file->role &= ~BDRV_CHILD_DATA;
13788b1869daSMax Reitz
13798b1869daSMax Reitz /* Must succeed because we have given up permissions if anything */
13808b1869daSMax Reitz bdrv_child_refresh_perms(bs, bs->file, &error_abort);
13818b1869daSMax Reitz
1382a6468367SKevin Wolf ret = vmdk_open_desc_file(bs, flags, buf, options, errp);
138337f09e5eSPaolo Bonzini break;
138437f09e5eSPaolo Bonzini }
1385bae0a0ccSPaolo Bonzini if (ret) {
1386bae0a0ccSPaolo Bonzini goto fail;
1387bae0a0ccSPaolo Bonzini }
138837f09e5eSPaolo Bonzini
138986c6b429SFam Zheng /* try to open parent images, if exist */
139086c6b429SFam Zheng ret = vmdk_parent_open(bs);
139186c6b429SFam Zheng if (ret) {
1392bae0a0ccSPaolo Bonzini goto fail;
1393b4b3ab14SFam Zheng }
13949877860eSPeter Maydell ret = vmdk_read_cid(bs, 0, &s->cid);
13959877860eSPeter Maydell if (ret) {
13969877860eSPeter Maydell goto fail;
13979877860eSPeter Maydell }
13989877860eSPeter Maydell ret = vmdk_read_cid(bs, 1, &s->parent_cid);
13999877860eSPeter Maydell if (ret) {
14009877860eSPeter Maydell goto fail;
14019877860eSPeter Maydell }
1402848c66e8SPaolo Bonzini qemu_co_mutex_init(&s->lock);
14032bc3166cSKevin Wolf
14042bc3166cSKevin Wolf /* Disable migration when VMDK images are used */
140581e5f78aSAlberto Garcia error_setg(&s->migration_blocker, "The vmdk format used by node '%s' "
140681e5f78aSAlberto Garcia "does not support live migration",
140781e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs));
1408e0ee3a8fSSteve Sistare ret = migrate_add_blocker_normal(&s->migration_blocker, errp);
1409386f6c07SMarkus Armbruster if (ret < 0) {
1410fe44dc91SAshijeet Acharya goto fail;
1411fe44dc91SAshijeet Acharya }
1412fe44dc91SAshijeet Acharya
1413d1833ef5SPaolo Bonzini g_free(buf);
14142bc3166cSKevin Wolf return 0;
1415bae0a0ccSPaolo Bonzini
1416bae0a0ccSPaolo Bonzini fail:
1417d1833ef5SPaolo Bonzini g_free(buf);
1418f4c129a3SFam Zheng g_free(s->create_type);
1419f4c129a3SFam Zheng s->create_type = NULL;
1420bae0a0ccSPaolo Bonzini vmdk_free_extents(bs);
1421bae0a0ccSPaolo Bonzini return ret;
1422019d6b8fSAnthony Liguori }
1423019d6b8fSAnthony Liguori
1424d34682cdSKevin Wolf
vmdk_refresh_limits(BlockDriverState * bs,Error ** errp)14253baca891SKevin Wolf static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
1426d34682cdSKevin Wolf {
1427d34682cdSKevin Wolf BDRVVmdkState *s = bs->opaque;
1428d34682cdSKevin Wolf int i;
1429d34682cdSKevin Wolf
1430d34682cdSKevin Wolf for (i = 0; i < s->num_extents; i++) {
1431d34682cdSKevin Wolf if (!s->extents[i].flat) {
1432cf081fcaSEric Blake bs->bl.pwrite_zeroes_alignment =
1433cf081fcaSEric Blake MAX(bs->bl.pwrite_zeroes_alignment,
1434cf081fcaSEric Blake s->extents[i].cluster_sectors << BDRV_SECTOR_BITS);
1435d34682cdSKevin Wolf }
1436d34682cdSKevin Wolf }
1437d34682cdSKevin Wolf }
1438d34682cdSKevin Wolf
1439c6ac36e1SFam Zheng /**
1440c6ac36e1SFam Zheng * get_whole_cluster
1441c6ac36e1SFam Zheng *
1442c6ac36e1SFam Zheng * Copy backing file's cluster that covers @sector_num, otherwise write zero,
14434823cde5SKevin Wolf * to the cluster at @cluster_sector_num. If @zeroed is true, we're overwriting
14444823cde5SKevin Wolf * a zeroed cluster in the current layer and must not copy data from the
14454823cde5SKevin Wolf * backing file.
1446c6ac36e1SFam Zheng *
1447c6ac36e1SFam Zheng * If @skip_start_sector < @skip_end_sector, the relative range
1448c6ac36e1SFam Zheng * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
1449c6ac36e1SFam Zheng * it for call to write user data in the request.
1450c6ac36e1SFam Zheng */
1451b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
get_whole_cluster(BlockDriverState * bs,VmdkExtent * extent,uint64_t cluster_offset,uint64_t offset,uint64_t skip_start_bytes,uint64_t skip_end_bytes,bool zeroed)1452b9b10c35SKevin Wolf get_whole_cluster(BlockDriverState *bs, VmdkExtent *extent,
1453b9b10c35SKevin Wolf uint64_t cluster_offset, uint64_t offset,
1454b9b10c35SKevin Wolf uint64_t skip_start_bytes, uint64_t skip_end_bytes,
14554823cde5SKevin Wolf bool zeroed)
1456019d6b8fSAnthony Liguori {
1457bf81507dSFam Zheng int ret = VMDK_OK;
1458c6ac36e1SFam Zheng int64_t cluster_bytes;
1459c6ac36e1SFam Zheng uint8_t *whole_grain;
14604823cde5SKevin Wolf bool copy_from_backing;
1461019d6b8fSAnthony Liguori
1462c6ac36e1SFam Zheng /* For COW, align request sector_num to cluster start */
1463c6ac36e1SFam Zheng cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
146437b1d7d8SKevin Wolf offset = QEMU_ALIGN_DOWN(offset, cluster_bytes);
1465c6ac36e1SFam Zheng whole_grain = qemu_blockalign(bs, cluster_bytes);
14664823cde5SKevin Wolf copy_from_backing = bs->backing && !zeroed;
1467c6ac36e1SFam Zheng
14684823cde5SKevin Wolf if (!copy_from_backing) {
146937b1d7d8SKevin Wolf memset(whole_grain, 0, skip_start_bytes);
147037b1d7d8SKevin Wolf memset(whole_grain + skip_end_bytes, 0, cluster_bytes - skip_end_bytes);
1471c6ac36e1SFam Zheng }
1472c6ac36e1SFam Zheng
147337b1d7d8SKevin Wolf assert(skip_end_bytes <= cluster_bytes);
14740e69c543SFam Zheng /* we will be here if it's first write on non-exist grain(cluster).
14750e69c543SFam Zheng * try to read from parent image, if exist */
1476760e0063SKevin Wolf if (bs->backing && !vmdk_is_cid_valid(bs)) {
1477c6ac36e1SFam Zheng ret = VMDK_ERROR;
1478c6ac36e1SFam Zheng goto exit;
1479c6ac36e1SFam Zheng }
1480c6ac36e1SFam Zheng
1481c6ac36e1SFam Zheng /* Read backing data before skip range */
148237b1d7d8SKevin Wolf if (skip_start_bytes > 0) {
14834823cde5SKevin Wolf if (copy_from_backing) {
148423c4b2a8SMax Reitz /* qcow2 emits this on bs->file instead of bs->backing */
148517362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_READ);
1486a5c4e5beSAlberto Faria ret = bdrv_co_pread(bs->backing, offset, skip_start_bytes,
148732cc71deSAlberto Faria whole_grain, 0);
1488c336500dSKevin Wolf if (ret < 0) {
1489bf81507dSFam Zheng ret = VMDK_ERROR;
1490bf81507dSFam Zheng goto exit;
1491019d6b8fSAnthony Liguori }
1492019d6b8fSAnthony Liguori }
149317362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_WRITE);
1494a5c4e5beSAlberto Faria ret = bdrv_co_pwrite(extent->file, cluster_offset, skip_start_bytes,
149532cc71deSAlberto Faria whole_grain, 0);
1496c6ac36e1SFam Zheng if (ret < 0) {
1497c6ac36e1SFam Zheng ret = VMDK_ERROR;
1498c6ac36e1SFam Zheng goto exit;
1499c6ac36e1SFam Zheng }
1500c6ac36e1SFam Zheng }
1501c6ac36e1SFam Zheng /* Read backing data after skip range */
150237b1d7d8SKevin Wolf if (skip_end_bytes < cluster_bytes) {
15034823cde5SKevin Wolf if (copy_from_backing) {
150423c4b2a8SMax Reitz /* qcow2 emits this on bs->file instead of bs->backing */
150517362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_READ);
1506a5c4e5beSAlberto Faria ret = bdrv_co_pread(bs->backing, offset + skip_end_bytes,
150732cc71deSAlberto Faria cluster_bytes - skip_end_bytes,
150832cc71deSAlberto Faria whole_grain + skip_end_bytes, 0);
1509c6ac36e1SFam Zheng if (ret < 0) {
1510c6ac36e1SFam Zheng ret = VMDK_ERROR;
1511c6ac36e1SFam Zheng goto exit;
1512c6ac36e1SFam Zheng }
1513c6ac36e1SFam Zheng }
151417362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_WRITE);
1515a5c4e5beSAlberto Faria ret = bdrv_co_pwrite(extent->file, cluster_offset + skip_end_bytes,
151632cc71deSAlberto Faria cluster_bytes - skip_end_bytes,
151732cc71deSAlberto Faria whole_grain + skip_end_bytes, 0);
1518c6ac36e1SFam Zheng if (ret < 0) {
1519c6ac36e1SFam Zheng ret = VMDK_ERROR;
1520c6ac36e1SFam Zheng goto exit;
1521c6ac36e1SFam Zheng }
1522c6ac36e1SFam Zheng }
1523c6ac36e1SFam Zheng
152437b1d7d8SKevin Wolf ret = VMDK_OK;
1525bf81507dSFam Zheng exit:
1526bf81507dSFam Zheng qemu_vfree(whole_grain);
1527bf81507dSFam Zheng return ret;
1528019d6b8fSAnthony Liguori }
1529019d6b8fSAnthony Liguori
153088095349SEmanuele Giuseppe Esposito static int coroutine_fn GRAPH_RDLOCK
vmdk_L2update(VmdkExtent * extent,VmdkMetaData * m_data,uint32_t offset)153188095349SEmanuele Giuseppe Esposito vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, uint32_t offset)
1532019d6b8fSAnthony Liguori {
1533c6ac36e1SFam Zheng offset = cpu_to_le32(offset);
1534019d6b8fSAnthony Liguori /* update L2 table */
153517362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_L2_UPDATE);
1536a5c4e5beSAlberto Faria if (bdrv_co_pwrite(extent->file,
1537b3976d3cSFam Zheng ((int64_t)m_data->l2_offset * 512)
1538c6ac36e1SFam Zheng + (m_data->l2_index * sizeof(offset)),
153932cc71deSAlberto Faria sizeof(offset), &offset, 0) < 0) {
154065f74725SFam Zheng return VMDK_ERROR;
1541b3976d3cSFam Zheng }
1542019d6b8fSAnthony Liguori /* update backup L2 table */
1543b3976d3cSFam Zheng if (extent->l1_backup_table_offset != 0) {
1544b3976d3cSFam Zheng m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
1545a5c4e5beSAlberto Faria if (bdrv_co_pwrite(extent->file,
1546b3976d3cSFam Zheng ((int64_t)m_data->l2_offset * 512)
1547c6ac36e1SFam Zheng + (m_data->l2_index * sizeof(offset)),
154832cc71deSAlberto Faria sizeof(offset), &offset, 0) < 0) {
154965f74725SFam Zheng return VMDK_ERROR;
1550019d6b8fSAnthony Liguori }
1551b3976d3cSFam Zheng }
1552a5c4e5beSAlberto Faria if (bdrv_co_flush(extent->file->bs) < 0) {
15532758be05SKevin Wolf return VMDK_ERROR;
15542758be05SKevin Wolf }
1555cdeaf1f1SFam Zheng if (m_data->l2_cache_entry) {
1556cdeaf1f1SFam Zheng *m_data->l2_cache_entry = offset;
1557cdeaf1f1SFam Zheng }
1558019d6b8fSAnthony Liguori
155965f74725SFam Zheng return VMDK_OK;
1560019d6b8fSAnthony Liguori }
1561019d6b8fSAnthony Liguori
1562c6ac36e1SFam Zheng /**
1563c6ac36e1SFam Zheng * get_cluster_offset
1564c6ac36e1SFam Zheng *
1565c6ac36e1SFam Zheng * Look up cluster offset in extent file by sector number, and store in
1566c6ac36e1SFam Zheng * @cluster_offset.
1567c6ac36e1SFam Zheng *
1568c6ac36e1SFam Zheng * For flat extents, the start offset as parsed from the description file is
1569c6ac36e1SFam Zheng * returned.
1570c6ac36e1SFam Zheng *
1571c6ac36e1SFam Zheng * For sparse extents, look up in L1, L2 table. If allocate is true, return an
1572c6ac36e1SFam Zheng * offset for a new cluster and update L2 cache. If there is a backing file,
1573c6ac36e1SFam Zheng * COW is done before returning; otherwise, zeroes are written to the allocated
1574c6ac36e1SFam Zheng * cluster. Both COW and zero writing skips the sector range
1575c6ac36e1SFam Zheng * [@skip_start_sector, @skip_end_sector) passed in by caller, because caller
1576c6ac36e1SFam Zheng * has new data to write there.
1577c6ac36e1SFam Zheng *
1578c6ac36e1SFam Zheng * Returns: VMDK_OK if cluster exists and mapped in the image.
1579c6ac36e1SFam Zheng * VMDK_UNALLOC if cluster is not mapped and @allocate is false.
1580c6ac36e1SFam Zheng * VMDK_ERROR if failed.
1581c6ac36e1SFam Zheng */
1582b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
get_cluster_offset(BlockDriverState * bs,VmdkExtent * extent,VmdkMetaData * m_data,uint64_t offset,bool allocate,uint64_t * cluster_offset,uint64_t skip_start_bytes,uint64_t skip_end_bytes)1583b9b10c35SKevin Wolf get_cluster_offset(BlockDriverState *bs, VmdkExtent *extent,
1584b9b10c35SKevin Wolf VmdkMetaData *m_data, uint64_t offset, bool allocate,
1585b9b10c35SKevin Wolf uint64_t *cluster_offset, uint64_t skip_start_bytes,
158637b1d7d8SKevin Wolf uint64_t skip_end_bytes)
1587019d6b8fSAnthony Liguori {
1588019d6b8fSAnthony Liguori unsigned int l1_index, l2_offset, l2_index;
1589019d6b8fSAnthony Liguori int min_index, i, j;
159098eb9733SSam Eiderman uint32_t min_count;
159198eb9733SSam Eiderman void *l2_table;
159214ead646SFam Zheng bool zeroed = false;
1593c6ac36e1SFam Zheng int64_t ret;
1594d1319b07SFam Zheng int64_t cluster_sector;
159598eb9733SSam Eiderman unsigned int l2_size_bytes = extent->l2_size * extent->entry_size;
1596019d6b8fSAnthony Liguori
1597ae261c86SFam Zheng if (m_data) {
15984dc20e64SKevin Wolf m_data->new_allocation = false;
1599ae261c86SFam Zheng }
160091b85bd3SFam Zheng if (extent->flat) {
16017fa60fa3SFam Zheng *cluster_offset = extent->flat_start_offset;
160265f74725SFam Zheng return VMDK_OK;
160391b85bd3SFam Zheng }
1604019d6b8fSAnthony Liguori
16056398de51SFam Zheng offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
1606b3976d3cSFam Zheng l1_index = (offset >> 9) / extent->l1_entry_sectors;
1607b3976d3cSFam Zheng if (l1_index >= extent->l1_size) {
160865f74725SFam Zheng return VMDK_ERROR;
1609b3976d3cSFam Zheng }
161098eb9733SSam Eiderman if (extent->sesparse) {
161198eb9733SSam Eiderman uint64_t l2_offset_u64;
161298eb9733SSam Eiderman
161398eb9733SSam Eiderman assert(extent->entry_size == sizeof(uint64_t));
161498eb9733SSam Eiderman
161598eb9733SSam Eiderman l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index];
161698eb9733SSam Eiderman if (l2_offset_u64 == 0) {
161798eb9733SSam Eiderman l2_offset = 0;
161898eb9733SSam Eiderman } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) {
161998eb9733SSam Eiderman /*
162098eb9733SSam Eiderman * Top most nibble is 0x1 if grain table is allocated.
162198eb9733SSam Eiderman * strict check - top most 4 bytes must be 0x10000000 since max
162298eb9733SSam Eiderman * supported size is 64TB for disk - so no more than 64TB / 16MB
162398eb9733SSam Eiderman * grain directories which is smaller than uint32,
162498eb9733SSam Eiderman * where 16MB is the only supported default grain table coverage.
162598eb9733SSam Eiderman */
162698eb9733SSam Eiderman return VMDK_ERROR;
162798eb9733SSam Eiderman } else {
162898eb9733SSam Eiderman l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff;
162998eb9733SSam Eiderman l2_offset_u64 = extent->sesparse_l2_tables_offset +
163098eb9733SSam Eiderman l2_offset_u64 * l2_size_bytes / SECTOR_SIZE;
163198eb9733SSam Eiderman if (l2_offset_u64 > 0x00000000ffffffff) {
163298eb9733SSam Eiderman return VMDK_ERROR;
163398eb9733SSam Eiderman }
163498eb9733SSam Eiderman l2_offset = (unsigned int)(l2_offset_u64);
163598eb9733SSam Eiderman }
163698eb9733SSam Eiderman } else {
163798eb9733SSam Eiderman assert(extent->entry_size == sizeof(uint32_t));
163898eb9733SSam Eiderman l2_offset = ((uint32_t *)extent->l1_table)[l1_index];
163998eb9733SSam Eiderman }
1640b3976d3cSFam Zheng if (!l2_offset) {
164165f74725SFam Zheng return VMDK_UNALLOC;
1642b3976d3cSFam Zheng }
1643019d6b8fSAnthony Liguori for (i = 0; i < L2_CACHE_SIZE; i++) {
1644b3976d3cSFam Zheng if (l2_offset == extent->l2_cache_offsets[i]) {
1645019d6b8fSAnthony Liguori /* increment the hit count */
1646b3976d3cSFam Zheng if (++extent->l2_cache_counts[i] == 0xffffffff) {
1647019d6b8fSAnthony Liguori for (j = 0; j < L2_CACHE_SIZE; j++) {
1648b3976d3cSFam Zheng extent->l2_cache_counts[j] >>= 1;
1649019d6b8fSAnthony Liguori }
1650019d6b8fSAnthony Liguori }
165198eb9733SSam Eiderman l2_table = (char *)extent->l2_cache + (i * l2_size_bytes);
1652019d6b8fSAnthony Liguori goto found;
1653019d6b8fSAnthony Liguori }
1654019d6b8fSAnthony Liguori }
1655019d6b8fSAnthony Liguori /* not found: load a new entry in the least used one */
1656019d6b8fSAnthony Liguori min_index = 0;
1657019d6b8fSAnthony Liguori min_count = 0xffffffff;
1658019d6b8fSAnthony Liguori for (i = 0; i < L2_CACHE_SIZE; i++) {
1659b3976d3cSFam Zheng if (extent->l2_cache_counts[i] < min_count) {
1660b3976d3cSFam Zheng min_count = extent->l2_cache_counts[i];
1661019d6b8fSAnthony Liguori min_index = i;
1662019d6b8fSAnthony Liguori }
1663019d6b8fSAnthony Liguori }
166498eb9733SSam Eiderman l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes);
166517362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_L2_LOAD);
1666a5c4e5beSAlberto Faria if (bdrv_co_pread(extent->file,
1667b3976d3cSFam Zheng (int64_t)l2_offset * 512,
166853fb7844SAlberto Faria l2_size_bytes,
1669a5c4e5beSAlberto Faria l2_table, 0
1670353a5d84SAlberto Faria ) < 0) {
167165f74725SFam Zheng return VMDK_ERROR;
1672b3976d3cSFam Zheng }
1673019d6b8fSAnthony Liguori
1674b3976d3cSFam Zheng extent->l2_cache_offsets[min_index] = l2_offset;
1675b3976d3cSFam Zheng extent->l2_cache_counts[min_index] = 1;
1676019d6b8fSAnthony Liguori found:
1677b3976d3cSFam Zheng l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
16782821c1ccSKevin Wolf if (m_data) {
16792821c1ccSKevin Wolf m_data->l1_index = l1_index;
16802821c1ccSKevin Wolf m_data->l2_index = l2_index;
16812821c1ccSKevin Wolf m_data->l2_offset = l2_offset;
16822821c1ccSKevin Wolf m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
16832821c1ccSKevin Wolf }
168498eb9733SSam Eiderman
168598eb9733SSam Eiderman if (extent->sesparse) {
168698eb9733SSam Eiderman cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]);
168798eb9733SSam Eiderman switch (cluster_sector & 0xf000000000000000) {
168898eb9733SSam Eiderman case 0x0000000000000000:
168998eb9733SSam Eiderman /* unallocated grain */
169098eb9733SSam Eiderman if (cluster_sector != 0) {
169198eb9733SSam Eiderman return VMDK_ERROR;
169298eb9733SSam Eiderman }
169398eb9733SSam Eiderman break;
169498eb9733SSam Eiderman case 0x1000000000000000:
169598eb9733SSam Eiderman /* scsi-unmapped grain - fallthrough */
169698eb9733SSam Eiderman case 0x2000000000000000:
169798eb9733SSam Eiderman /* zero grain */
169898eb9733SSam Eiderman zeroed = true;
169998eb9733SSam Eiderman break;
170098eb9733SSam Eiderman case 0x3000000000000000:
170198eb9733SSam Eiderman /* allocated grain */
170298eb9733SSam Eiderman cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) |
170398eb9733SSam Eiderman ((cluster_sector & 0x0000ffffffffffff) << 12));
170498eb9733SSam Eiderman cluster_sector = extent->sesparse_clusters_offset +
170598eb9733SSam Eiderman cluster_sector * extent->cluster_sectors;
170698eb9733SSam Eiderman break;
170798eb9733SSam Eiderman default:
170898eb9733SSam Eiderman return VMDK_ERROR;
170998eb9733SSam Eiderman }
171098eb9733SSam Eiderman } else {
171198eb9733SSam Eiderman cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]);
1712019d6b8fSAnthony Liguori
1713c6ac36e1SFam Zheng if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
171414ead646SFam Zheng zeroed = true;
171514ead646SFam Zheng }
171698eb9733SSam Eiderman }
171714ead646SFam Zheng
1718c6ac36e1SFam Zheng if (!cluster_sector || zeroed) {
171991b85bd3SFam Zheng if (!allocate) {
172014ead646SFam Zheng return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
172191b85bd3SFam Zheng }
172298eb9733SSam Eiderman assert(!extent->sesparse);
17239949f97eSKevin Wolf
1724a77672eaSyuchenlin if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) {
1725a77672eaSyuchenlin return VMDK_ERROR;
1726a77672eaSyuchenlin }
1727a77672eaSyuchenlin
1728c6ac36e1SFam Zheng cluster_sector = extent->next_cluster_sector;
1729c6ac36e1SFam Zheng extent->next_cluster_sector += extent->cluster_sectors;
17309949f97eSKevin Wolf
1731019d6b8fSAnthony Liguori /* First of all we write grain itself, to avoid race condition
1732019d6b8fSAnthony Liguori * that may to corrupt the image.
1733019d6b8fSAnthony Liguori * This problem may occur because of insufficient space on host disk
1734019d6b8fSAnthony Liguori * or inappropriate VM shutdown.
1735019d6b8fSAnthony Liguori */
173637b1d7d8SKevin Wolf ret = get_whole_cluster(bs, extent, cluster_sector * BDRV_SECTOR_SIZE,
17374823cde5SKevin Wolf offset, skip_start_bytes, skip_end_bytes,
17384823cde5SKevin Wolf zeroed);
1739c6ac36e1SFam Zheng if (ret) {
1740c6ac36e1SFam Zheng return ret;
1741019d6b8fSAnthony Liguori }
1742524089bcSReda Sallahi if (m_data) {
17434dc20e64SKevin Wolf m_data->new_allocation = true;
1744524089bcSReda Sallahi }
1745019d6b8fSAnthony Liguori }
1746c6ac36e1SFam Zheng *cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
174765f74725SFam Zheng return VMDK_OK;
1748019d6b8fSAnthony Liguori }
1749019d6b8fSAnthony Liguori
find_extent(BDRVVmdkState * s,int64_t sector_num,VmdkExtent * start_hint)1750b3976d3cSFam Zheng static VmdkExtent *find_extent(BDRVVmdkState *s,
1751b3976d3cSFam Zheng int64_t sector_num, VmdkExtent *start_hint)
1752b3976d3cSFam Zheng {
1753b3976d3cSFam Zheng VmdkExtent *extent = start_hint;
1754b3976d3cSFam Zheng
1755b3976d3cSFam Zheng if (!extent) {
1756b3976d3cSFam Zheng extent = &s->extents[0];
1757b3976d3cSFam Zheng }
1758b3976d3cSFam Zheng while (extent < &s->extents[s->num_extents]) {
1759b3976d3cSFam Zheng if (sector_num < extent->end_sector) {
1760b3976d3cSFam Zheng return extent;
1761b3976d3cSFam Zheng }
1762b3976d3cSFam Zheng extent++;
1763b3976d3cSFam Zheng }
1764b3976d3cSFam Zheng return NULL;
1765b3976d3cSFam Zheng }
1766b3976d3cSFam Zheng
vmdk_find_offset_in_cluster(VmdkExtent * extent,int64_t offset)1767a844a2b0SKevin Wolf static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent,
1768a844a2b0SKevin Wolf int64_t offset)
1769a844a2b0SKevin Wolf {
17709be38598SEduardo Habkost uint64_t extent_begin_offset, extent_relative_offset;
1771a844a2b0SKevin Wolf uint64_t cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE;
1772a844a2b0SKevin Wolf
1773a844a2b0SKevin Wolf extent_begin_offset =
1774a844a2b0SKevin Wolf (extent->end_sector - extent->sectors) * BDRV_SECTOR_SIZE;
1775a844a2b0SKevin Wolf extent_relative_offset = offset - extent_begin_offset;
17769be38598SEduardo Habkost return extent_relative_offset % cluster_size;
1777a844a2b0SKevin Wolf }
1778a844a2b0SKevin Wolf
1779b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_co_block_status(BlockDriverState * bs,bool want_zero,int64_t offset,int64_t bytes,int64_t * pnum,int64_t * map,BlockDriverState ** file)1780b9b10c35SKevin Wolf vmdk_co_block_status(BlockDriverState *bs, bool want_zero,
1781b9b10c35SKevin Wolf int64_t offset, int64_t bytes, int64_t *pnum,
1782b9b10c35SKevin Wolf int64_t *map, BlockDriverState **file)
1783019d6b8fSAnthony Liguori {
1784019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque;
1785b3976d3cSFam Zheng int64_t index_in_cluster, n, ret;
1786c72080b9SEric Blake uint64_t cluster_offset;
1787b3976d3cSFam Zheng VmdkExtent *extent;
1788b3976d3cSFam Zheng
1789c72080b9SEric Blake extent = find_extent(s, offset >> BDRV_SECTOR_BITS, NULL);
1790b3976d3cSFam Zheng if (!extent) {
1791c72080b9SEric Blake return -EIO;
1792b3976d3cSFam Zheng }
1793f8a2e5e3SStefan Hajnoczi qemu_co_mutex_lock(&s->lock);
1794c72080b9SEric Blake ret = get_cluster_offset(bs, extent, NULL, offset, false, &cluster_offset,
1795c6ac36e1SFam Zheng 0, 0);
1796f8a2e5e3SStefan Hajnoczi qemu_co_mutex_unlock(&s->lock);
179714ead646SFam Zheng
1798c72080b9SEric Blake index_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
17994bc74be9SPaolo Bonzini switch (ret) {
18004bc74be9SPaolo Bonzini case VMDK_ERROR:
18014bc74be9SPaolo Bonzini ret = -EIO;
18024bc74be9SPaolo Bonzini break;
18034bc74be9SPaolo Bonzini case VMDK_UNALLOC:
18044bc74be9SPaolo Bonzini ret = 0;
18054bc74be9SPaolo Bonzini break;
18064bc74be9SPaolo Bonzini case VMDK_ZEROED:
18074bc74be9SPaolo Bonzini ret = BDRV_BLOCK_ZERO;
18084bc74be9SPaolo Bonzini break;
18094bc74be9SPaolo Bonzini case VMDK_OK:
18104bc74be9SPaolo Bonzini ret = BDRV_BLOCK_DATA;
1811e0f100f5SFam Zheng if (!extent->compressed) {
1812d0a18f10SFam Zheng ret |= BDRV_BLOCK_OFFSET_VALID;
1813c72080b9SEric Blake *map = cluster_offset + index_in_cluster;
18144dd84ac9SMax Reitz if (extent->flat) {
18154dd84ac9SMax Reitz ret |= BDRV_BLOCK_RECURSE;
18164dd84ac9SMax Reitz }
181728482891SAndrey Drobyshev via } else {
181828482891SAndrey Drobyshev via ret |= BDRV_BLOCK_COMPRESSED;
18194bc74be9SPaolo Bonzini }
1820e0f100f5SFam Zheng *file = extent->file->bs;
18214bc74be9SPaolo Bonzini break;
18224bc74be9SPaolo Bonzini }
182391b85bd3SFam Zheng
1824c72080b9SEric Blake n = extent->cluster_sectors * BDRV_SECTOR_SIZE - index_in_cluster;
1825c72080b9SEric Blake *pnum = MIN(n, bytes);
1826b3976d3cSFam Zheng return ret;
1827019d6b8fSAnthony Liguori }
1828019d6b8fSAnthony Liguori
1829b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_write_extent(VmdkExtent * extent,int64_t cluster_offset,int64_t offset_in_cluster,QEMUIOVector * qiov,uint64_t qiov_offset,uint64_t n_bytes,uint64_t offset)1830b4df9903SPaolo Bonzini vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
183137b1d7d8SKevin Wolf int64_t offset_in_cluster, QEMUIOVector *qiov,
183237b1d7d8SKevin Wolf uint64_t qiov_offset, uint64_t n_bytes,
183337b1d7d8SKevin Wolf uint64_t offset)
1834dd3f6ee2SFam Zheng {
1835dd3f6ee2SFam Zheng int ret;
18362b2c8c5dSFam Zheng VmdkGrainMarker *data = NULL;
18372b2c8c5dSFam Zheng uLongf buf_len;
183837b1d7d8SKevin Wolf QEMUIOVector local_qiov;
18395e82a31eSFam Zheng int64_t write_offset;
18405e82a31eSFam Zheng int64_t write_end_sector;
1841dd3f6ee2SFam Zheng
18422b2c8c5dSFam Zheng if (extent->compressed) {
184337b1d7d8SKevin Wolf void *compressed_data;
184437b1d7d8SKevin Wolf
1845bedb8bb4SMax Reitz /* Only whole clusters */
1846bedb8bb4SMax Reitz if (offset_in_cluster ||
1847bedb8bb4SMax Reitz n_bytes > (extent->cluster_sectors * SECTOR_SIZE) ||
1848bedb8bb4SMax Reitz (n_bytes < (extent->cluster_sectors * SECTOR_SIZE) &&
1849bedb8bb4SMax Reitz offset + n_bytes != extent->end_sector * SECTOR_SIZE))
1850bedb8bb4SMax Reitz {
1851bedb8bb4SMax Reitz ret = -EINVAL;
1852bedb8bb4SMax Reitz goto out;
1853bedb8bb4SMax Reitz }
1854bedb8bb4SMax Reitz
18552b2c8c5dSFam Zheng if (!extent->has_marker) {
18562b2c8c5dSFam Zheng ret = -EINVAL;
18572b2c8c5dSFam Zheng goto out;
18582b2c8c5dSFam Zheng }
18592b2c8c5dSFam Zheng buf_len = (extent->cluster_sectors << 9) * 2;
18602b2c8c5dSFam Zheng data = g_malloc(buf_len + sizeof(VmdkGrainMarker));
186137b1d7d8SKevin Wolf
186237b1d7d8SKevin Wolf compressed_data = g_malloc(n_bytes);
186337b1d7d8SKevin Wolf qemu_iovec_to_buf(qiov, qiov_offset, compressed_data, n_bytes);
186437b1d7d8SKevin Wolf ret = compress(data->data, &buf_len, compressed_data, n_bytes);
186537b1d7d8SKevin Wolf g_free(compressed_data);
186637b1d7d8SKevin Wolf
186737b1d7d8SKevin Wolf if (ret != Z_OK || buf_len == 0) {
18682b2c8c5dSFam Zheng ret = -EINVAL;
18692b2c8c5dSFam Zheng goto out;
18702b2c8c5dSFam Zheng }
18715e82a31eSFam Zheng
18724545d4f4SQingFeng Hao data->lba = cpu_to_le64(offset >> BDRV_SECTOR_BITS);
18734545d4f4SQingFeng Hao data->size = cpu_to_le32(buf_len);
187437b1d7d8SKevin Wolf
187537b1d7d8SKevin Wolf n_bytes = buf_len + sizeof(VmdkGrainMarker);
1876199d95b0SVladimir Sementsov-Ogievskiy qemu_iovec_init_buf(&local_qiov, data, n_bytes);
187723c4b2a8SMax Reitz
187817362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_WRITE_COMPRESSED);
187937b1d7d8SKevin Wolf } else {
188037b1d7d8SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov);
188137b1d7d8SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, qiov_offset, n_bytes);
188223c4b2a8SMax Reitz
188317362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_WRITE_AIO);
188437b1d7d8SKevin Wolf }
188537b1d7d8SKevin Wolf
18863c363575SMax Reitz write_offset = cluster_offset + offset_in_cluster;
1887a03ef88fSKevin Wolf ret = bdrv_co_pwritev(extent->file, write_offset, n_bytes,
188837b1d7d8SKevin Wolf &local_qiov, 0);
188937b1d7d8SKevin Wolf
189037b1d7d8SKevin Wolf write_end_sector = DIV_ROUND_UP(write_offset + n_bytes, BDRV_SECTOR_SIZE);
18915e82a31eSFam Zheng
18923efffc32SRadoslav Gerganov if (extent->compressed) {
18933efffc32SRadoslav Gerganov extent->next_cluster_sector = write_end_sector;
18943efffc32SRadoslav Gerganov } else {
18955e82a31eSFam Zheng extent->next_cluster_sector = MAX(extent->next_cluster_sector,
18965e82a31eSFam Zheng write_end_sector);
18973efffc32SRadoslav Gerganov }
18985e82a31eSFam Zheng
189937b1d7d8SKevin Wolf if (ret < 0) {
1900dd3f6ee2SFam Zheng goto out;
1901dd3f6ee2SFam Zheng }
1902dd3f6ee2SFam Zheng ret = 0;
1903dd3f6ee2SFam Zheng out:
19042b2c8c5dSFam Zheng g_free(data);
190537b1d7d8SKevin Wolf if (!extent->compressed) {
190637b1d7d8SKevin Wolf qemu_iovec_destroy(&local_qiov);
190737b1d7d8SKevin Wolf }
1908dd3f6ee2SFam Zheng return ret;
1909dd3f6ee2SFam Zheng }
1910dd3f6ee2SFam Zheng
1911b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_read_extent(VmdkExtent * extent,int64_t cluster_offset,int64_t offset_in_cluster,QEMUIOVector * qiov,int bytes)1912b4df9903SPaolo Bonzini vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
1913b9b10c35SKevin Wolf int64_t offset_in_cluster, QEMUIOVector *qiov, int bytes)
1914dd3f6ee2SFam Zheng {
1915dd3f6ee2SFam Zheng int ret;
19162b2c8c5dSFam Zheng int cluster_bytes, buf_bytes;
19172b2c8c5dSFam Zheng uint8_t *cluster_buf, *compressed_data;
19182b2c8c5dSFam Zheng uint8_t *uncomp_buf;
19192b2c8c5dSFam Zheng uint32_t data_len;
19202b2c8c5dSFam Zheng VmdkGrainMarker *marker;
19212b2c8c5dSFam Zheng uLongf buf_len;
1922dd3f6ee2SFam Zheng
19232b2c8c5dSFam Zheng
19242b2c8c5dSFam Zheng if (!extent->compressed) {
192517362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_READ_AIO);
1926a03ef88fSKevin Wolf ret = bdrv_co_preadv(extent->file,
1927f10cc243SKevin Wolf cluster_offset + offset_in_cluster, bytes,
1928f10cc243SKevin Wolf qiov, 0);
1929f10cc243SKevin Wolf if (ret < 0) {
1930f10cc243SKevin Wolf return ret;
1931dd3f6ee2SFam Zheng }
1932f10cc243SKevin Wolf return 0;
1933dd3f6ee2SFam Zheng }
19342b2c8c5dSFam Zheng cluster_bytes = extent->cluster_sectors * 512;
19352b2c8c5dSFam Zheng /* Read two clusters in case GrainMarker + compressed data > one cluster */
19362b2c8c5dSFam Zheng buf_bytes = cluster_bytes * 2;
19372b2c8c5dSFam Zheng cluster_buf = g_malloc(buf_bytes);
19382b2c8c5dSFam Zheng uncomp_buf = g_malloc(cluster_bytes);
193917362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_READ_COMPRESSED);
1940a5c4e5beSAlberto Faria ret = bdrv_co_pread(extent->file, cluster_offset, buf_bytes, cluster_buf,
1941a5c4e5beSAlberto Faria 0);
19422b2c8c5dSFam Zheng if (ret < 0) {
19432b2c8c5dSFam Zheng goto out;
19442b2c8c5dSFam Zheng }
19452b2c8c5dSFam Zheng compressed_data = cluster_buf;
19462b2c8c5dSFam Zheng buf_len = cluster_bytes;
19472b2c8c5dSFam Zheng data_len = cluster_bytes;
19482b2c8c5dSFam Zheng if (extent->has_marker) {
19492b2c8c5dSFam Zheng marker = (VmdkGrainMarker *)cluster_buf;
19502b2c8c5dSFam Zheng compressed_data = marker->data;
19512b2c8c5dSFam Zheng data_len = le32_to_cpu(marker->size);
19522b2c8c5dSFam Zheng }
19532b2c8c5dSFam Zheng if (!data_len || data_len > buf_bytes) {
19542b2c8c5dSFam Zheng ret = -EINVAL;
19552b2c8c5dSFam Zheng goto out;
19562b2c8c5dSFam Zheng }
19572b2c8c5dSFam Zheng ret = uncompress(uncomp_buf, &buf_len, compressed_data, data_len);
19582b2c8c5dSFam Zheng if (ret != Z_OK) {
19592b2c8c5dSFam Zheng ret = -EINVAL;
19602b2c8c5dSFam Zheng goto out;
19612b2c8c5dSFam Zheng
19622b2c8c5dSFam Zheng }
19632b2c8c5dSFam Zheng if (offset_in_cluster < 0 ||
1964f10cc243SKevin Wolf offset_in_cluster + bytes > buf_len) {
19652b2c8c5dSFam Zheng ret = -EINVAL;
19662b2c8c5dSFam Zheng goto out;
19672b2c8c5dSFam Zheng }
1968f10cc243SKevin Wolf qemu_iovec_from_buf(qiov, 0, uncomp_buf + offset_in_cluster, bytes);
19692b2c8c5dSFam Zheng ret = 0;
19702b2c8c5dSFam Zheng
19712b2c8c5dSFam Zheng out:
19722b2c8c5dSFam Zheng g_free(uncomp_buf);
19732b2c8c5dSFam Zheng g_free(cluster_buf);
19742b2c8c5dSFam Zheng return ret;
19752b2c8c5dSFam Zheng }
1976dd3f6ee2SFam Zheng
1977b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_co_preadv(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov,BdrvRequestFlags flags)1978f7ef38ddSVladimir Sementsov-Ogievskiy vmdk_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
1979f7ef38ddSVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, BdrvRequestFlags flags)
1980019d6b8fSAnthony Liguori {
1981019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque;
1982b3976d3cSFam Zheng int ret;
1983f10cc243SKevin Wolf uint64_t n_bytes, offset_in_cluster;
1984b3976d3cSFam Zheng VmdkExtent *extent = NULL;
1985f10cc243SKevin Wolf QEMUIOVector local_qiov;
1986019d6b8fSAnthony Liguori uint64_t cluster_offset;
1987f10cc243SKevin Wolf uint64_t bytes_done = 0;
1988019d6b8fSAnthony Liguori
1989f10cc243SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov);
1990f10cc243SKevin Wolf qemu_co_mutex_lock(&s->lock);
1991f10cc243SKevin Wolf
1992f10cc243SKevin Wolf while (bytes > 0) {
1993f10cc243SKevin Wolf extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
1994b3976d3cSFam Zheng if (!extent) {
1995f10cc243SKevin Wolf ret = -EIO;
1996f10cc243SKevin Wolf goto fail;
1997b3976d3cSFam Zheng }
1998c6ac36e1SFam Zheng ret = get_cluster_offset(bs, extent, NULL,
1999f10cc243SKevin Wolf offset, false, &cluster_offset, 0, 0);
2000f10cc243SKevin Wolf offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
2001f10cc243SKevin Wolf
2002f10cc243SKevin Wolf n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
2003f10cc243SKevin Wolf - offset_in_cluster);
2004f10cc243SKevin Wolf
200514ead646SFam Zheng if (ret != VMDK_OK) {
200691b85bd3SFam Zheng /* if not allocated, try to read from parent image, if exist */
2007760e0063SKevin Wolf if (bs->backing && ret != VMDK_ZEROED) {
2008ae261c86SFam Zheng if (!vmdk_is_cid_valid(bs)) {
2009f10cc243SKevin Wolf ret = -EINVAL;
2010f10cc243SKevin Wolf goto fail;
2011019d6b8fSAnthony Liguori }
2012019d6b8fSAnthony Liguori
2013f10cc243SKevin Wolf qemu_iovec_reset(&local_qiov);
2014f10cc243SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
2015f10cc243SKevin Wolf
201623c4b2a8SMax Reitz /* qcow2 emits this on bs->file instead of bs->backing */
201717362398SPaolo Bonzini BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
2018a03ef88fSKevin Wolf ret = bdrv_co_preadv(bs->backing, offset, n_bytes,
2019f10cc243SKevin Wolf &local_qiov, 0);
2020f10cc243SKevin Wolf if (ret < 0) {
2021f10cc243SKevin Wolf goto fail;
2022f10cc243SKevin Wolf }
2023f10cc243SKevin Wolf } else {
2024f10cc243SKevin Wolf qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
2025f10cc243SKevin Wolf }
2026f10cc243SKevin Wolf } else {
2027f10cc243SKevin Wolf qemu_iovec_reset(&local_qiov);
2028f10cc243SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
2029f10cc243SKevin Wolf
2030f10cc243SKevin Wolf ret = vmdk_read_extent(extent, cluster_offset, offset_in_cluster,
2031f10cc243SKevin Wolf &local_qiov, n_bytes);
2032f10cc243SKevin Wolf if (ret) {
2033f10cc243SKevin Wolf goto fail;
2034f10cc243SKevin Wolf }
2035f10cc243SKevin Wolf }
2036f10cc243SKevin Wolf bytes -= n_bytes;
2037f10cc243SKevin Wolf offset += n_bytes;
2038f10cc243SKevin Wolf bytes_done += n_bytes;
2039f10cc243SKevin Wolf }
2040f10cc243SKevin Wolf
2041f10cc243SKevin Wolf ret = 0;
2042f10cc243SKevin Wolf fail:
20432914caa0SPaolo Bonzini qemu_co_mutex_unlock(&s->lock);
2044f10cc243SKevin Wolf qemu_iovec_destroy(&local_qiov);
2045f10cc243SKevin Wolf
20462914caa0SPaolo Bonzini return ret;
20472914caa0SPaolo Bonzini }
20482914caa0SPaolo Bonzini
2049cdeaf1f1SFam Zheng /**
2050cdeaf1f1SFam Zheng * vmdk_write:
2051cdeaf1f1SFam Zheng * @zeroed: buf is ignored (data is zero), use zeroed_grain GTE feature
2052cdeaf1f1SFam Zheng * if possible, otherwise return -ENOTSUP.
20538e507243SFam Zheng * @zero_dry_run: used for zeroed == true only, don't update L2 table, just try
20548e507243SFam Zheng * with each cluster. By dry run we can find if the zero write
20558e507243SFam Zheng * is possible without modifying image data.
2056cdeaf1f1SFam Zheng *
2057cdeaf1f1SFam Zheng * Returns: error code with 0 for success.
2058cdeaf1f1SFam Zheng */
2059b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_pwritev(BlockDriverState * bs,uint64_t offset,uint64_t bytes,QEMUIOVector * qiov,bool zeroed,bool zero_dry_run)2060b9b10c35SKevin Wolf vmdk_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
2061b9b10c35SKevin Wolf QEMUIOVector *qiov, bool zeroed, bool zero_dry_run)
2062019d6b8fSAnthony Liguori {
2063019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque;
2064b3976d3cSFam Zheng VmdkExtent *extent = NULL;
2065585ea0c8SFam Zheng int ret;
206637b1d7d8SKevin Wolf int64_t offset_in_cluster, n_bytes;
2067019d6b8fSAnthony Liguori uint64_t cluster_offset;
206837b1d7d8SKevin Wolf uint64_t bytes_done = 0;
2069b3976d3cSFam Zheng VmdkMetaData m_data;
2070019d6b8fSAnthony Liguori
207137b1d7d8SKevin Wolf if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) {
207237b1d7d8SKevin Wolf error_report("Wrong offset: offset=0x%" PRIx64
20739af9e0feSMarkus Armbruster " total_sectors=0x%" PRIx64,
207437b1d7d8SKevin Wolf offset, bs->total_sectors);
20757fa60fa3SFam Zheng return -EIO;
2076019d6b8fSAnthony Liguori }
2077019d6b8fSAnthony Liguori
207837b1d7d8SKevin Wolf while (bytes > 0) {
207937b1d7d8SKevin Wolf extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
2080b3976d3cSFam Zheng if (!extent) {
2081b3976d3cSFam Zheng return -EIO;
2082b3976d3cSFam Zheng }
208398eb9733SSam Eiderman if (extent->sesparse) {
208498eb9733SSam Eiderman return -ENOTSUP;
208598eb9733SSam Eiderman }
208637b1d7d8SKevin Wolf offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
208737b1d7d8SKevin Wolf n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
208837b1d7d8SKevin Wolf - offset_in_cluster);
208937b1d7d8SKevin Wolf
209037b1d7d8SKevin Wolf ret = get_cluster_offset(bs, extent, &m_data, offset,
2091c6ac36e1SFam Zheng !(extent->compressed || zeroed),
209237b1d7d8SKevin Wolf &cluster_offset, offset_in_cluster,
209337b1d7d8SKevin Wolf offset_in_cluster + n_bytes);
20942b2c8c5dSFam Zheng if (extent->compressed) {
209565f74725SFam Zheng if (ret == VMDK_OK) {
20962b2c8c5dSFam Zheng /* Refuse write to allocated cluster for streamOptimized */
20974823970bSFam Zheng error_report("Could not write to allocated cluster"
20984823970bSFam Zheng " for streamOptimized");
20992b2c8c5dSFam Zheng return -EIO;
21002821c1ccSKevin Wolf } else if (!zeroed) {
21012b2c8c5dSFam Zheng /* allocate */
210237b1d7d8SKevin Wolf ret = get_cluster_offset(bs, extent, &m_data, offset,
2103c6ac36e1SFam Zheng true, &cluster_offset, 0, 0);
21042b2c8c5dSFam Zheng }
21052b2c8c5dSFam Zheng }
2106cdeaf1f1SFam Zheng if (ret == VMDK_ERROR) {
210791b85bd3SFam Zheng return -EINVAL;
2108b3976d3cSFam Zheng }
2109cdeaf1f1SFam Zheng if (zeroed) {
2110cdeaf1f1SFam Zheng /* Do zeroed write, buf is ignored */
2111cdeaf1f1SFam Zheng if (extent->has_zero_grain &&
211237b1d7d8SKevin Wolf offset_in_cluster == 0 &&
211337b1d7d8SKevin Wolf n_bytes >= extent->cluster_sectors * BDRV_SECTOR_SIZE) {
211437b1d7d8SKevin Wolf n_bytes = extent->cluster_sectors * BDRV_SECTOR_SIZE;
211578cae78dSKevin Wolf if (!zero_dry_run && ret != VMDK_ZEROED) {
2116cdeaf1f1SFam Zheng /* update L2 tables */
2117c6ac36e1SFam Zheng if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
2118c6ac36e1SFam Zheng != VMDK_OK) {
2119cdeaf1f1SFam Zheng return -EIO;
2120cdeaf1f1SFam Zheng }
2121cdeaf1f1SFam Zheng }
2122cdeaf1f1SFam Zheng } else {
2123cdeaf1f1SFam Zheng return -ENOTSUP;
2124cdeaf1f1SFam Zheng }
2125cdeaf1f1SFam Zheng } else {
212637b1d7d8SKevin Wolf ret = vmdk_write_extent(extent, cluster_offset, offset_in_cluster,
212737b1d7d8SKevin Wolf qiov, bytes_done, n_bytes, offset);
2128dd3f6ee2SFam Zheng if (ret) {
21297fa60fa3SFam Zheng return ret;
2130b3976d3cSFam Zheng }
21314dc20e64SKevin Wolf if (m_data.new_allocation) {
2132019d6b8fSAnthony Liguori /* update L2 tables */
2133c6ac36e1SFam Zheng if (vmdk_L2update(extent, &m_data,
2134c6ac36e1SFam Zheng cluster_offset >> BDRV_SECTOR_BITS)
2135c6ac36e1SFam Zheng != VMDK_OK) {
21367fa60fa3SFam Zheng return -EIO;
2137019d6b8fSAnthony Liguori }
2138b3976d3cSFam Zheng }
2139cdeaf1f1SFam Zheng }
214037b1d7d8SKevin Wolf bytes -= n_bytes;
214137b1d7d8SKevin Wolf offset += n_bytes;
214237b1d7d8SKevin Wolf bytes_done += n_bytes;
2143019d6b8fSAnthony Liguori
2144ae261c86SFam Zheng /* update CID on the first write every time the virtual disk is
2145ae261c86SFam Zheng * opened */
214669b4d86dSFam Zheng if (!s->cid_updated) {
2147e5dc64b8SFam Zheng ret = vmdk_write_cid(bs, g_random_int());
214899f1835dSKevin Wolf if (ret < 0) {
214999f1835dSKevin Wolf return ret;
215099f1835dSKevin Wolf }
215169b4d86dSFam Zheng s->cid_updated = true;
2152019d6b8fSAnthony Liguori }
2153019d6b8fSAnthony Liguori }
2154019d6b8fSAnthony Liguori return 0;
2155019d6b8fSAnthony Liguori }
2156019d6b8fSAnthony Liguori
2157b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_co_pwritev(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov,BdrvRequestFlags flags)2158e75abedaSVladimir Sementsov-Ogievskiy vmdk_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
2159e75abedaSVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, BdrvRequestFlags flags)
2160e183ef75SPaolo Bonzini {
2161e183ef75SPaolo Bonzini int ret;
2162e183ef75SPaolo Bonzini BDRVVmdkState *s = bs->opaque;
2163e183ef75SPaolo Bonzini qemu_co_mutex_lock(&s->lock);
216437b1d7d8SKevin Wolf ret = vmdk_pwritev(bs, offset, bytes, qiov, false, false);
2165cdeaf1f1SFam Zheng qemu_co_mutex_unlock(&s->lock);
2166cdeaf1f1SFam Zheng return ret;
2167cdeaf1f1SFam Zheng }
2168cdeaf1f1SFam Zheng
21697b1fb72eSKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_co_pwritev_compressed(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov)2170e75abedaSVladimir Sementsov-Ogievskiy vmdk_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
2171e75abedaSVladimir Sementsov-Ogievskiy QEMUIOVector *qiov)
217237b1d7d8SKevin Wolf {
217351b3c6b7Syuchenlin if (bytes == 0) {
217451b3c6b7Syuchenlin /* The caller will write bytes 0 to signal EOF.
217551b3c6b7Syuchenlin * When receive it, we align EOF to a sector boundary. */
217651b3c6b7Syuchenlin BDRVVmdkState *s = bs->opaque;
217751b3c6b7Syuchenlin int i, ret;
217851b3c6b7Syuchenlin int64_t length;
217951b3c6b7Syuchenlin
218051b3c6b7Syuchenlin for (i = 0; i < s->num_extents; i++) {
21810af02bd1SPaolo Bonzini length = bdrv_co_getlength(s->extents[i].file->bs);
218251b3c6b7Syuchenlin if (length < 0) {
218351b3c6b7Syuchenlin return length;
218451b3c6b7Syuchenlin }
218551b3c6b7Syuchenlin length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE);
2186a5c4e5beSAlberto Faria ret = bdrv_co_truncate(s->extents[i].file, length, false,
21877b8e4857SKevin Wolf PREALLOC_MODE_OFF, 0, NULL);
218851b3c6b7Syuchenlin if (ret < 0) {
218951b3c6b7Syuchenlin return ret;
219051b3c6b7Syuchenlin }
219151b3c6b7Syuchenlin }
219251b3c6b7Syuchenlin return 0;
219351b3c6b7Syuchenlin }
2194b2c622d3SPavel Butsykin return vmdk_co_pwritev(bs, offset, bytes, qiov, 0);
2195ba0ad89eSFam Zheng }
2196ba0ad89eSFam Zheng
2197b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_co_pwrite_zeroes(BlockDriverState * bs,int64_t offset,int64_t bytes,BdrvRequestFlags flags)2198b9b10c35SKevin Wolf vmdk_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
2199aa7bfbffSPeter Lieven BdrvRequestFlags flags)
2200cdeaf1f1SFam Zheng {
2201cdeaf1f1SFam Zheng int ret;
2202cdeaf1f1SFam Zheng BDRVVmdkState *s = bs->opaque;
220337b1d7d8SKevin Wolf
2204cdeaf1f1SFam Zheng qemu_co_mutex_lock(&s->lock);
22058e507243SFam Zheng /* write zeroes could fail if sectors not aligned to cluster, test it with
22068e507243SFam Zheng * dry_run == true before really updating image */
220737b1d7d8SKevin Wolf ret = vmdk_pwritev(bs, offset, bytes, NULL, true, true);
2208cdeaf1f1SFam Zheng if (!ret) {
220937b1d7d8SKevin Wolf ret = vmdk_pwritev(bs, offset, bytes, NULL, true, false);
2210cdeaf1f1SFam Zheng }
2211e183ef75SPaolo Bonzini qemu_co_mutex_unlock(&s->lock);
2212e183ef75SPaolo Bonzini return ret;
2213e183ef75SPaolo Bonzini }
2214e183ef75SPaolo Bonzini
221528944f99SPaolo Bonzini static int coroutine_fn GRAPH_UNLOCKED
vmdk_init_extent(BlockBackend * blk,int64_t filesize,bool flat,bool compress,bool zeroed_grain,Error ** errp)22164db7ba3bSKevin Wolf vmdk_init_extent(BlockBackend *blk, int64_t filesize, bool flat, bool compress,
22174db7ba3bSKevin Wolf bool zeroed_grain, Error **errp)
2218019d6b8fSAnthony Liguori {
2219f66fd6c3SFam Zheng int ret, i;
2220019d6b8fSAnthony Liguori VMDK4Header header;
2221917703c1SFam Zheng uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
2222917703c1SFam Zheng uint32_t *gd_buf = NULL;
2223917703c1SFam Zheng int gd_buf_size;
22240e7e1989SKevin Wolf
2225917703c1SFam Zheng if (flat) {
222628944f99SPaolo Bonzini ret = blk_co_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp);
2227f66fd6c3SFam Zheng goto exit;
2228f66fd6c3SFam Zheng }
2229019d6b8fSAnthony Liguori magic = cpu_to_be32(VMDK4_MAGIC);
2230019d6b8fSAnthony Liguori memset(&header, 0, sizeof(header));
2231d62d9dc4SFam Zheng if (compress) {
2232d62d9dc4SFam Zheng header.version = 3;
2233d62d9dc4SFam Zheng } else if (zeroed_grain) {
2234d62d9dc4SFam Zheng header.version = 2;
2235d62d9dc4SFam Zheng } else {
2236d62d9dc4SFam Zheng header.version = 1;
2237d62d9dc4SFam Zheng }
223895b0aa42SFam Zheng header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
223969e0b6dfSFam Zheng | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
224069e0b6dfSFam Zheng | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
22416c031aacSFam Zheng header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
2242917703c1SFam Zheng header.capacity = filesize / BDRV_SECTOR_SIZE;
224316372ff0SAlexander Graf header.granularity = 128;
2244917703c1SFam Zheng header.num_gtes_per_gt = BDRV_SECTOR_SIZE;
2245019d6b8fSAnthony Liguori
2246917703c1SFam Zheng grains = DIV_ROUND_UP(filesize / BDRV_SECTOR_SIZE, header.granularity);
2247917703c1SFam Zheng gt_size = DIV_ROUND_UP(header.num_gtes_per_gt * sizeof(uint32_t),
2248917703c1SFam Zheng BDRV_SECTOR_SIZE);
2249917703c1SFam Zheng gt_count = DIV_ROUND_UP(grains, header.num_gtes_per_gt);
2250917703c1SFam Zheng gd_sectors = DIV_ROUND_UP(gt_count * sizeof(uint32_t), BDRV_SECTOR_SIZE);
2251019d6b8fSAnthony Liguori
2252019d6b8fSAnthony Liguori header.desc_offset = 1;
2253019d6b8fSAnthony Liguori header.desc_size = 20;
2254019d6b8fSAnthony Liguori header.rgd_offset = header.desc_offset + header.desc_size;
2255917703c1SFam Zheng header.gd_offset = header.rgd_offset + gd_sectors + (gt_size * gt_count);
2256019d6b8fSAnthony Liguori header.grain_offset =
2257917703c1SFam Zheng ROUND_UP(header.gd_offset + gd_sectors + (gt_size * gt_count),
2258917703c1SFam Zheng header.granularity);
225916372ff0SAlexander Graf /* swap endianness for all header fields */
226016372ff0SAlexander Graf header.version = cpu_to_le32(header.version);
226116372ff0SAlexander Graf header.flags = cpu_to_le32(header.flags);
226216372ff0SAlexander Graf header.capacity = cpu_to_le64(header.capacity);
226316372ff0SAlexander Graf header.granularity = cpu_to_le64(header.granularity);
2264ca8804ceSFam Zheng header.num_gtes_per_gt = cpu_to_le32(header.num_gtes_per_gt);
2265019d6b8fSAnthony Liguori header.desc_offset = cpu_to_le64(header.desc_offset);
2266019d6b8fSAnthony Liguori header.desc_size = cpu_to_le64(header.desc_size);
2267019d6b8fSAnthony Liguori header.rgd_offset = cpu_to_le64(header.rgd_offset);
2268019d6b8fSAnthony Liguori header.gd_offset = cpu_to_le64(header.gd_offset);
2269019d6b8fSAnthony Liguori header.grain_offset = cpu_to_le64(header.grain_offset);
22706c031aacSFam Zheng header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm);
2271019d6b8fSAnthony Liguori
2272019d6b8fSAnthony Liguori header.check_bytes[0] = 0xa;
2273019d6b8fSAnthony Liguori header.check_bytes[1] = 0x20;
2274019d6b8fSAnthony Liguori header.check_bytes[2] = 0xd;
2275019d6b8fSAnthony Liguori header.check_bytes[3] = 0xa;
2276019d6b8fSAnthony Liguori
2277019d6b8fSAnthony Liguori /* write all the data */
227828944f99SPaolo Bonzini ret = blk_co_pwrite(blk, 0, sizeof(magic), &magic, 0);
2279917703c1SFam Zheng if (ret < 0) {
2280*29ad187cSMarkus Armbruster error_setg_errno(errp, -ret, "failed to write VMDK magic");
22811640366cSKirill A. Shutemov goto exit;
22821640366cSKirill A. Shutemov }
228328944f99SPaolo Bonzini ret = blk_co_pwrite(blk, sizeof(magic), sizeof(header), &header, 0);
2284917703c1SFam Zheng if (ret < 0) {
2285*29ad187cSMarkus Armbruster error_setg_errno(errp, -ret, "failed to write VMDK header");
22861640366cSKirill A. Shutemov goto exit;
22871640366cSKirill A. Shutemov }
2288019d6b8fSAnthony Liguori
228928944f99SPaolo Bonzini ret = blk_co_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false,
22908c6242b6SKevin Wolf PREALLOC_MODE_OFF, 0, errp);
22911640366cSKirill A. Shutemov if (ret < 0) {
22921640366cSKirill A. Shutemov goto exit;
22931640366cSKirill A. Shutemov }
2294019d6b8fSAnthony Liguori
2295019d6b8fSAnthony Liguori /* write grain directory */
2296917703c1SFam Zheng gd_buf_size = gd_sectors * BDRV_SECTOR_SIZE;
2297917703c1SFam Zheng gd_buf = g_malloc0(gd_buf_size);
2298917703c1SFam Zheng for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_sectors;
22991640366cSKirill A. Shutemov i < gt_count; i++, tmp += gt_size) {
2300917703c1SFam Zheng gd_buf[i] = cpu_to_le32(tmp);
23011640366cSKirill A. Shutemov }
230228944f99SPaolo Bonzini ret = blk_co_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
2303a9262f55SAlberto Faria gd_buf_size, gd_buf, 0);
2304917703c1SFam Zheng if (ret < 0) {
2305*29ad187cSMarkus Armbruster error_setg_errno(errp, -ret, "failed to write VMDK grain directory");
2306917703c1SFam Zheng goto exit;
23071640366cSKirill A. Shutemov }
2308019d6b8fSAnthony Liguori
2309019d6b8fSAnthony Liguori /* write backup grain directory */
2310917703c1SFam Zheng for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_sectors;
23111640366cSKirill A. Shutemov i < gt_count; i++, tmp += gt_size) {
2312917703c1SFam Zheng gd_buf[i] = cpu_to_le32(tmp);
23131640366cSKirill A. Shutemov }
231428944f99SPaolo Bonzini ret = blk_co_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
2315a9262f55SAlberto Faria gd_buf_size, gd_buf, 0);
2316917703c1SFam Zheng if (ret < 0) {
2317*29ad187cSMarkus Armbruster error_setg_errno(errp, -ret,
2318*29ad187cSMarkus Armbruster "failed to write VMDK backup grain directory");
23191640366cSKirill A. Shutemov }
2320019d6b8fSAnthony Liguori
2321f66fd6c3SFam Zheng ret = 0;
2322f66fd6c3SFam Zheng exit:
2323917703c1SFam Zheng g_free(gd_buf);
2324f66fd6c3SFam Zheng return ret;
2325f66fd6c3SFam Zheng }
2326019d6b8fSAnthony Liguori
23274db7ba3bSKevin Wolf static int coroutine_fn GRAPH_UNLOCKED
vmdk_create_extent(const char * filename,int64_t filesize,bool flat,bool compress,bool zeroed_grain,BlockBackend ** pbb,QemuOpts * opts,Error ** errp)23284ec8df01SKevin Wolf vmdk_create_extent(const char *filename, int64_t filesize, bool flat,
23294ec8df01SKevin Wolf bool compress, bool zeroed_grain, BlockBackend **pbb,
23305be28490SFam Zheng QemuOpts *opts, Error **errp)
23315be28490SFam Zheng {
23325be28490SFam Zheng int ret;
23335be28490SFam Zheng BlockBackend *blk = NULL;
23345be28490SFam Zheng
23352475a0d0SEmanuele Giuseppe Esposito ret = bdrv_co_create_file(filename, opts, errp);
23365be28490SFam Zheng if (ret < 0) {
23375be28490SFam Zheng goto exit;
23385be28490SFam Zheng }
23395be28490SFam Zheng
2340882f202eSKevin Wolf blk = blk_co_new_open(filename, NULL, NULL,
23415be28490SFam Zheng BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
2342af175e85SMarkus Armbruster errp);
23435be28490SFam Zheng if (blk == NULL) {
23445be28490SFam Zheng ret = -EIO;
23455be28490SFam Zheng goto exit;
23465be28490SFam Zheng }
23475be28490SFam Zheng
23485be28490SFam Zheng blk_set_allow_write_beyond_eof(blk, true);
23495be28490SFam Zheng
23505be28490SFam Zheng ret = vmdk_init_extent(blk, filesize, flat, compress, zeroed_grain, errp);
23515be28490SFam Zheng exit:
23525be28490SFam Zheng if (blk) {
23535be28490SFam Zheng if (pbb) {
23545be28490SFam Zheng *pbb = blk;
23555be28490SFam Zheng } else {
2356b2ab5f54SKevin Wolf blk_co_unref(blk);
23575be28490SFam Zheng blk = NULL;
23585be28490SFam Zheng }
23595be28490SFam Zheng }
23605be28490SFam Zheng return ret;
23615be28490SFam Zheng }
23625be28490SFam Zheng
filename_decompose(const char * filename,char * path,char * prefix,char * postfix,size_t buf_len,Error ** errp)2363f66fd6c3SFam Zheng static int filename_decompose(const char *filename, char *path, char *prefix,
23644823970bSFam Zheng char *postfix, size_t buf_len, Error **errp)
2365f66fd6c3SFam Zheng {
2366f66fd6c3SFam Zheng const char *p, *q;
2367f66fd6c3SFam Zheng
2368f66fd6c3SFam Zheng if (filename == NULL || !strlen(filename)) {
23694823970bSFam Zheng error_setg(errp, "No filename provided");
237065f74725SFam Zheng return VMDK_ERROR;
2371f66fd6c3SFam Zheng }
2372f66fd6c3SFam Zheng p = strrchr(filename, '/');
2373f66fd6c3SFam Zheng if (p == NULL) {
2374f66fd6c3SFam Zheng p = strrchr(filename, '\\');
2375f66fd6c3SFam Zheng }
2376f66fd6c3SFam Zheng if (p == NULL) {
2377f66fd6c3SFam Zheng p = strrchr(filename, ':');
2378f66fd6c3SFam Zheng }
2379f66fd6c3SFam Zheng if (p != NULL) {
2380f66fd6c3SFam Zheng p++;
2381f66fd6c3SFam Zheng if (p - filename >= buf_len) {
238265f74725SFam Zheng return VMDK_ERROR;
2383f66fd6c3SFam Zheng }
2384f66fd6c3SFam Zheng pstrcpy(path, p - filename + 1, filename);
2385f66fd6c3SFam Zheng } else {
2386f66fd6c3SFam Zheng p = filename;
2387f66fd6c3SFam Zheng path[0] = '\0';
2388f66fd6c3SFam Zheng }
2389f66fd6c3SFam Zheng q = strrchr(p, '.');
2390f66fd6c3SFam Zheng if (q == NULL) {
2391f66fd6c3SFam Zheng pstrcpy(prefix, buf_len, p);
2392f66fd6c3SFam Zheng postfix[0] = '\0';
2393f66fd6c3SFam Zheng } else {
2394f66fd6c3SFam Zheng if (q - p >= buf_len) {
239565f74725SFam Zheng return VMDK_ERROR;
2396f66fd6c3SFam Zheng }
2397f66fd6c3SFam Zheng pstrcpy(prefix, q - p + 1, p);
2398f66fd6c3SFam Zheng pstrcpy(postfix, buf_len, q);
2399f66fd6c3SFam Zheng }
240065f74725SFam Zheng return VMDK_OK;
2401f66fd6c3SFam Zheng }
2402f66fd6c3SFam Zheng
24033015372dSFam Zheng /*
24043015372dSFam Zheng * idx == 0: get or create the descriptor file (also the image file if in a
24053015372dSFam Zheng * non-split format.
24063015372dSFam Zheng * idx >= 1: get the n-th extent if in a split subformat
24073015372dSFam Zheng */
24084db7ba3bSKevin Wolf typedef BlockBackend * coroutine_fn GRAPH_UNLOCKED_PTR
24094ec8df01SKevin Wolf (*vmdk_create_extent_fn)(int64_t size, int idx, bool flat, bool split,
24104ec8df01SKevin Wolf bool compress, bool zeroed_grain, void *opaque,
24113015372dSFam Zheng Error **errp);
24123015372dSFam Zheng
vmdk_desc_add_extent(GString * desc,const char * extent_line_fmt,int64_t size,const char * filename)24133015372dSFam Zheng static void vmdk_desc_add_extent(GString *desc,
24143015372dSFam Zheng const char *extent_line_fmt,
24153015372dSFam Zheng int64_t size, const char *filename)
24163015372dSFam Zheng {
24173015372dSFam Zheng char *basename = g_path_get_basename(filename);
24183015372dSFam Zheng
24193015372dSFam Zheng g_string_append_printf(desc, extent_line_fmt,
24203015372dSFam Zheng DIV_ROUND_UP(size, BDRV_SECTOR_SIZE), basename);
24213015372dSFam Zheng g_free(basename);
24223015372dSFam Zheng }
24233015372dSFam Zheng
24244db7ba3bSKevin Wolf static int coroutine_fn GRAPH_UNLOCKED
vmdk_co_do_create(int64_t size,BlockdevVmdkSubformat subformat,BlockdevVmdkAdapterType adapter_type,const char * backing_file,const char * hw_version,const char * toolsversion,bool compat6,bool zeroed_grain,vmdk_create_extent_fn extent_fn,void * opaque,Error ** errp)24254ec8df01SKevin Wolf vmdk_co_do_create(int64_t size,
24263015372dSFam Zheng BlockdevVmdkSubformat subformat,
24273015372dSFam Zheng BlockdevVmdkAdapterType adapter_type,
24283015372dSFam Zheng const char *backing_file,
24293015372dSFam Zheng const char *hw_version,
2430f3d43dfdSThomas Weißschuh const char *toolsversion,
24313015372dSFam Zheng bool compat6,
24323015372dSFam Zheng bool zeroed_grain,
24333015372dSFam Zheng vmdk_create_extent_fn extent_fn,
24343015372dSFam Zheng void *opaque,
2435efc75e2aSStefan Hajnoczi Error **errp)
2436f66fd6c3SFam Zheng {
24373015372dSFam Zheng int extent_idx;
24383015372dSFam Zheng BlockBackend *blk = NULL;
24394a960eceSKevin Wolf BlockBackend *extent_blk;
2440c13959c7SFam Zheng Error *local_err = NULL;
2441af057fe7SFam Zheng char *desc = NULL;
2442f66fd6c3SFam Zheng int ret = 0;
24436c031aacSFam Zheng bool flat, split, compress;
2444af057fe7SFam Zheng GString *ext_desc_lines;
2445f66fd6c3SFam Zheng const int64_t split_size = 0x80000000; /* VMDK has constant split size */
24463015372dSFam Zheng int64_t extent_size;
24473015372dSFam Zheng int64_t created_size = 0;
24483015372dSFam Zheng const char *extent_line_fmt;
2449fe206562SJeff Cody char *parent_desc_line = g_malloc0(BUF_SIZE);
2450f66fd6c3SFam Zheng uint32_t parent_cid = 0xffffffff;
24517f2039f6SOthmar Pasteka uint32_t number_heads = 16;
2452917703c1SFam Zheng uint32_t desc_offset = 0, desc_len;
2453f66fd6c3SFam Zheng const char desc_template[] =
2454f66fd6c3SFam Zheng "# Disk DescriptorFile\n"
2455f66fd6c3SFam Zheng "version=1\n"
24569b17031aSFam Zheng "CID=%" PRIx32 "\n"
24579b17031aSFam Zheng "parentCID=%" PRIx32 "\n"
2458f66fd6c3SFam Zheng "createType=\"%s\"\n"
2459f66fd6c3SFam Zheng "%s"
2460f66fd6c3SFam Zheng "\n"
2461f66fd6c3SFam Zheng "# Extent description\n"
2462f66fd6c3SFam Zheng "%s"
2463f66fd6c3SFam Zheng "\n"
2464f66fd6c3SFam Zheng "# The Disk Data Base\n"
2465f66fd6c3SFam Zheng "#DDB\n"
2466f66fd6c3SFam Zheng "\n"
2467f249924eSJanne Karhunen "ddb.virtualHWVersion = \"%s\"\n"
2468f66fd6c3SFam Zheng "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
24694ab9dab5SFam Zheng "ddb.geometry.heads = \"%" PRIu32 "\"\n"
2470f66fd6c3SFam Zheng "ddb.geometry.sectors = \"63\"\n"
2471f3d43dfdSThomas Weißschuh "ddb.adapterType = \"%s\"\n"
2472f3d43dfdSThomas Weißschuh "ddb.toolsVersion = \"%s\"\n";
2473f66fd6c3SFam Zheng
2474af057fe7SFam Zheng ext_desc_lines = g_string_new(NULL);
2475af057fe7SFam Zheng
2476f66fd6c3SFam Zheng /* Read out options */
24773015372dSFam Zheng if (compat6) {
24783015372dSFam Zheng if (hw_version) {
2479f249924eSJanne Karhunen error_setg(errp,
2480f249924eSJanne Karhunen "compat6 cannot be enabled with hwversion set");
2481f249924eSJanne Karhunen ret = -EINVAL;
2482f249924eSJanne Karhunen goto exit;
2483f249924eSJanne Karhunen }
24843015372dSFam Zheng hw_version = "6";
2485f249924eSJanne Karhunen }
24863015372dSFam Zheng if (!hw_version) {
24873015372dSFam Zheng hw_version = "4";
2488f66fd6c3SFam Zheng }
2489f3d43dfdSThomas Weißschuh if (!toolsversion) {
2490f3d43dfdSThomas Weißschuh toolsversion = "2147483647";
2491f3d43dfdSThomas Weißschuh }
24925820f1daSChunyan Liu
24933015372dSFam Zheng if (adapter_type != BLOCKDEV_VMDK_ADAPTER_TYPE_IDE) {
24947f2039f6SOthmar Pasteka /* that's the number of heads with which vmware operates when
24957f2039f6SOthmar Pasteka creating, exporting, etc. vmdk files with a non-ide adapter type */
24967f2039f6SOthmar Pasteka number_heads = 255;
24977f2039f6SOthmar Pasteka }
24983015372dSFam Zheng split = (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTFLAT) ||
24993015372dSFam Zheng (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTSPARSE);
25003015372dSFam Zheng flat = (subformat == BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICFLAT) ||
25013015372dSFam Zheng (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTFLAT);
25023015372dSFam Zheng compress = subformat == BLOCKDEV_VMDK_SUBFORMAT_STREAMOPTIMIZED;
25033015372dSFam Zheng
2504f66fd6c3SFam Zheng if (flat) {
25053015372dSFam Zheng extent_line_fmt = "RW %" PRId64 " FLAT \"%s\" 0\n";
2506f66fd6c3SFam Zheng } else {
25073015372dSFam Zheng extent_line_fmt = "RW %" PRId64 " SPARSE \"%s\"\n";
2508f66fd6c3SFam Zheng }
2509f66fd6c3SFam Zheng if (flat && backing_file) {
25104823970bSFam Zheng error_setg(errp, "Flat image can't have backing file");
2511af057fe7SFam Zheng ret = -ENOTSUP;
2512af057fe7SFam Zheng goto exit;
2513f66fd6c3SFam Zheng }
251452c8d629SFam Zheng if (flat && zeroed_grain) {
251552c8d629SFam Zheng error_setg(errp, "Flat image can't enable zeroed grain");
2516af057fe7SFam Zheng ret = -ENOTSUP;
2517af057fe7SFam Zheng goto exit;
251852c8d629SFam Zheng }
25193015372dSFam Zheng
25203015372dSFam Zheng /* Create extents */
25213015372dSFam Zheng if (split) {
25223015372dSFam Zheng extent_size = split_size;
25233015372dSFam Zheng } else {
25243015372dSFam Zheng extent_size = size;
25253015372dSFam Zheng }
25263015372dSFam Zheng if (!split && !flat) {
25273015372dSFam Zheng created_size = extent_size;
25283015372dSFam Zheng } else {
25293015372dSFam Zheng created_size = 0;
25303015372dSFam Zheng }
25313015372dSFam Zheng /* Get the descriptor file BDS */
25323015372dSFam Zheng blk = extent_fn(created_size, 0, flat, split, compress, zeroed_grain,
25333015372dSFam Zheng opaque, errp);
25343015372dSFam Zheng if (!blk) {
25353015372dSFam Zheng ret = -EIO;
25363015372dSFam Zheng goto exit;
25373015372dSFam Zheng }
25383015372dSFam Zheng if (!split && !flat) {
25393015372dSFam Zheng vmdk_desc_add_extent(ext_desc_lines, extent_line_fmt, created_size,
25403015372dSFam Zheng blk_bs(blk)->filename);
25413015372dSFam Zheng }
25423015372dSFam Zheng
2543f66fd6c3SFam Zheng if (backing_file) {
25443015372dSFam Zheng BlockBackend *backing;
2545645ae7d8SMax Reitz char *full_backing =
2546645ae7d8SMax Reitz bdrv_get_full_backing_filename_from_filename(blk_bs(blk)->filename,
2547645ae7d8SMax Reitz backing_file,
25481085daf9SMax Reitz &local_err);
25491085daf9SMax Reitz if (local_err) {
25501085daf9SMax Reitz error_propagate(errp, local_err);
25511085daf9SMax Reitz ret = -ENOENT;
25521085daf9SMax Reitz goto exit;
25531085daf9SMax Reitz }
2554645ae7d8SMax Reitz assert(full_backing);
2555c4bea169SKevin Wolf
2556882f202eSKevin Wolf backing = blk_co_new_open(full_backing, NULL, NULL,
255772e775c7SKevin Wolf BDRV_O_NO_BACKING, errp);
25581085daf9SMax Reitz g_free(full_backing);
25593015372dSFam Zheng if (backing == NULL) {
2560c4bea169SKevin Wolf ret = -EIO;
2561af057fe7SFam Zheng goto exit;
2562f66fd6c3SFam Zheng }
25633015372dSFam Zheng if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) {
25643015372dSFam Zheng error_setg(errp, "Invalid backing file format: %s. Must be vmdk",
25653015372dSFam Zheng blk_bs(backing)->drv->format_name);
2566b2ab5f54SKevin Wolf blk_co_unref(backing);
2567af057fe7SFam Zheng ret = -EINVAL;
2568af057fe7SFam Zheng goto exit;
2569f66fd6c3SFam Zheng }
25701f051dcbSKevin Wolf
25711f051dcbSKevin Wolf bdrv_graph_co_rdlock();
25723015372dSFam Zheng ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid);
25731f051dcbSKevin Wolf bdrv_graph_co_rdunlock();
2574b2ab5f54SKevin Wolf blk_co_unref(backing);
25759877860eSPeter Maydell if (ret) {
25763015372dSFam Zheng error_setg(errp, "Failed to read parent CID");
25779877860eSPeter Maydell goto exit;
25789877860eSPeter Maydell }
2579fe206562SJeff Cody snprintf(parent_desc_line, BUF_SIZE,
25808ed610a1SFam Zheng "parentFileNameHint=\"%s\"", backing_file);
2581f66fd6c3SFam Zheng }
25823015372dSFam Zheng extent_idx = 1;
25833015372dSFam Zheng while (created_size < size) {
25843015372dSFam Zheng int64_t cur_size = MIN(size - created_size, extent_size);
25853015372dSFam Zheng extent_blk = extent_fn(cur_size, extent_idx, flat, split, compress,
25863015372dSFam Zheng zeroed_grain, opaque, errp);
25873015372dSFam Zheng if (!extent_blk) {
2588af057fe7SFam Zheng ret = -EINVAL;
2589af057fe7SFam Zheng goto exit;
2590f66fd6c3SFam Zheng }
25913015372dSFam Zheng vmdk_desc_add_extent(ext_desc_lines, extent_line_fmt, cur_size,
25923015372dSFam Zheng blk_bs(extent_blk)->filename);
25933015372dSFam Zheng created_size += cur_size;
25943015372dSFam Zheng extent_idx++;
2595b2ab5f54SKevin Wolf blk_co_unref(extent_blk);
2596f66fd6c3SFam Zheng }
25974a960eceSKevin Wolf
25984a960eceSKevin Wolf /* Check whether we got excess extents */
25994a960eceSKevin Wolf extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain,
26004a960eceSKevin Wolf opaque, NULL);
26014a960eceSKevin Wolf if (extent_blk) {
2602b2ab5f54SKevin Wolf blk_co_unref(extent_blk);
26034a960eceSKevin Wolf error_setg(errp, "List of extents contains unused extents");
26044a960eceSKevin Wolf ret = -EINVAL;
26054a960eceSKevin Wolf goto exit;
26064a960eceSKevin Wolf }
26074a960eceSKevin Wolf
2608f66fd6c3SFam Zheng /* generate descriptor file */
2609af057fe7SFam Zheng desc = g_strdup_printf(desc_template,
2610e5dc64b8SFam Zheng g_random_int(),
2611f66fd6c3SFam Zheng parent_cid,
26123015372dSFam Zheng BlockdevVmdkSubformat_str(subformat),
2613f66fd6c3SFam Zheng parent_desc_line,
2614af057fe7SFam Zheng ext_desc_lines->str,
2615f249924eSJanne Karhunen hw_version,
26163015372dSFam Zheng size /
2617917703c1SFam Zheng (int64_t)(63 * number_heads * BDRV_SECTOR_SIZE),
2618af057fe7SFam Zheng number_heads,
2619f3d43dfdSThomas Weißschuh BlockdevVmdkAdapterType_str(adapter_type),
2620f3d43dfdSThomas Weißschuh toolsversion);
2621917703c1SFam Zheng desc_len = strlen(desc);
2622917703c1SFam Zheng /* the descriptor offset = 0x200 */
2623917703c1SFam Zheng if (!split && !flat) {
2624917703c1SFam Zheng desc_offset = 0x200;
2625f66fd6c3SFam Zheng }
2626c4bea169SKevin Wolf
2627a5c4e5beSAlberto Faria ret = blk_co_pwrite(blk, desc_offset, desc_len, desc, 0);
2628917703c1SFam Zheng if (ret < 0) {
2629917703c1SFam Zheng error_setg_errno(errp, -ret, "Could not write description");
2630917703c1SFam Zheng goto exit;
2631917703c1SFam Zheng }
2632917703c1SFam Zheng /* bdrv_pwrite write padding zeros to align to sector, we don't need that
2633917703c1SFam Zheng * for description file */
2634917703c1SFam Zheng if (desc_offset == 0) {
2635a5c4e5beSAlberto Faria ret = blk_co_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp);
26363015372dSFam Zheng if (ret < 0) {
26373015372dSFam Zheng goto exit;
2638917703c1SFam Zheng }
26393015372dSFam Zheng }
26403015372dSFam Zheng ret = 0;
2641af057fe7SFam Zheng exit:
26423015372dSFam Zheng if (blk) {
2643b2ab5f54SKevin Wolf blk_co_unref(blk);
2644917703c1SFam Zheng }
26453015372dSFam Zheng g_free(desc);
26463015372dSFam Zheng g_free(parent_desc_line);
26473015372dSFam Zheng g_string_free(ext_desc_lines, true);
26483015372dSFam Zheng return ret;
26493015372dSFam Zheng }
26503015372dSFam Zheng
26513015372dSFam Zheng typedef struct {
26523015372dSFam Zheng char *path;
26533015372dSFam Zheng char *prefix;
26543015372dSFam Zheng char *postfix;
26553015372dSFam Zheng QemuOpts *opts;
26563015372dSFam Zheng } VMDKCreateOptsData;
26573015372dSFam Zheng
26584db7ba3bSKevin Wolf static BlockBackend * coroutine_fn GRAPH_UNLOCKED
vmdk_co_create_opts_cb(int64_t size,int idx,bool flat,bool split,bool compress,bool zeroed_grain,void * opaque,Error ** errp)26594ec8df01SKevin Wolf vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split,
26604ec8df01SKevin Wolf bool compress, bool zeroed_grain, void *opaque,
26613015372dSFam Zheng Error **errp)
26623015372dSFam Zheng {
26633015372dSFam Zheng BlockBackend *blk = NULL;
26643015372dSFam Zheng BlockDriverState *bs = NULL;
26653015372dSFam Zheng VMDKCreateOptsData *data = opaque;
26663015372dSFam Zheng char *ext_filename = NULL;
26673015372dSFam Zheng char *rel_filename = NULL;
26683015372dSFam Zheng
26694a960eceSKevin Wolf /* We're done, don't create excess extents. */
26704a960eceSKevin Wolf if (size == -1) {
26714a960eceSKevin Wolf assert(errp == NULL);
26724a960eceSKevin Wolf return NULL;
26734a960eceSKevin Wolf }
26744a960eceSKevin Wolf
26753015372dSFam Zheng if (idx == 0) {
26763015372dSFam Zheng rel_filename = g_strdup_printf("%s%s", data->prefix, data->postfix);
26773015372dSFam Zheng } else if (split) {
26783015372dSFam Zheng rel_filename = g_strdup_printf("%s-%c%03d%s",
26793015372dSFam Zheng data->prefix,
26803015372dSFam Zheng flat ? 'f' : 's', idx, data->postfix);
26813015372dSFam Zheng } else {
26823015372dSFam Zheng assert(idx == 1);
26833015372dSFam Zheng rel_filename = g_strdup_printf("%s-flat%s", data->prefix, data->postfix);
26843015372dSFam Zheng }
26853015372dSFam Zheng
26863015372dSFam Zheng ext_filename = g_strdup_printf("%s%s", data->path, rel_filename);
26873015372dSFam Zheng g_free(rel_filename);
26883015372dSFam Zheng
26893015372dSFam Zheng if (vmdk_create_extent(ext_filename, size,
26903015372dSFam Zheng flat, compress, zeroed_grain, &blk, data->opts,
26913015372dSFam Zheng errp)) {
26923015372dSFam Zheng goto exit;
26933015372dSFam Zheng }
2694b2ab5f54SKevin Wolf bdrv_co_unref(bs);
26953015372dSFam Zheng exit:
26963015372dSFam Zheng g_free(ext_filename);
26973015372dSFam Zheng return blk;
26983015372dSFam Zheng }
26993015372dSFam Zheng
27004db7ba3bSKevin Wolf static int coroutine_fn GRAPH_UNLOCKED
vmdk_co_create_opts(BlockDriver * drv,const char * filename,QemuOpts * opts,Error ** errp)27014ec8df01SKevin Wolf vmdk_co_create_opts(BlockDriver *drv, const char *filename,
27024ec8df01SKevin Wolf QemuOpts *opts, Error **errp)
27033015372dSFam Zheng {
27043015372dSFam Zheng Error *local_err = NULL;
27053015372dSFam Zheng char *desc = NULL;
27063015372dSFam Zheng int64_t total_size = 0;
27073015372dSFam Zheng char *adapter_type = NULL;
27083015372dSFam Zheng BlockdevVmdkAdapterType adapter_type_enum;
27093015372dSFam Zheng char *backing_file = NULL;
27103015372dSFam Zheng char *hw_version = NULL;
2711f3d43dfdSThomas Weißschuh char *toolsversion = NULL;
27123015372dSFam Zheng char *fmt = NULL;
27133015372dSFam Zheng BlockdevVmdkSubformat subformat;
27143015372dSFam Zheng int ret = 0;
27153015372dSFam Zheng char *path = g_malloc0(PATH_MAX);
27163015372dSFam Zheng char *prefix = g_malloc0(PATH_MAX);
27173015372dSFam Zheng char *postfix = g_malloc0(PATH_MAX);
27183015372dSFam Zheng char *desc_line = g_malloc0(BUF_SIZE);
27193015372dSFam Zheng char *ext_filename = g_malloc0(PATH_MAX);
27203015372dSFam Zheng char *desc_filename = g_malloc0(PATH_MAX);
27213015372dSFam Zheng char *parent_desc_line = g_malloc0(BUF_SIZE);
27223015372dSFam Zheng bool zeroed_grain;
27233015372dSFam Zheng bool compat6;
27243015372dSFam Zheng VMDKCreateOptsData data;
2725d51a814cSEric Blake char *backing_fmt = NULL;
2726d51a814cSEric Blake
2727d51a814cSEric Blake backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
2728d51a814cSEric Blake if (backing_fmt && strcmp(backing_fmt, "vmdk") != 0) {
2729d51a814cSEric Blake error_setg(errp, "backing_file must be a vmdk image");
2730d51a814cSEric Blake ret = -EINVAL;
2731d51a814cSEric Blake goto exit;
2732d51a814cSEric Blake }
27333015372dSFam Zheng
27343015372dSFam Zheng if (filename_decompose(filename, path, prefix, postfix, PATH_MAX, errp)) {
27353015372dSFam Zheng ret = -EINVAL;
27363015372dSFam Zheng goto exit;
27373015372dSFam Zheng }
27383015372dSFam Zheng /* Read out options */
27393015372dSFam Zheng total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
27403015372dSFam Zheng BDRV_SECTOR_SIZE);
27413015372dSFam Zheng adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE);
27423015372dSFam Zheng backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
27433015372dSFam Zheng hw_version = qemu_opt_get_del(opts, BLOCK_OPT_HWVERSION);
2744f3d43dfdSThomas Weißschuh toolsversion = qemu_opt_get_del(opts, BLOCK_OPT_TOOLSVERSION);
27453015372dSFam Zheng compat6 = qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false);
27463015372dSFam Zheng if (strcmp(hw_version, "undefined") == 0) {
27473015372dSFam Zheng g_free(hw_version);
274826c9296cSyuchenlin hw_version = NULL;
27493015372dSFam Zheng }
27503015372dSFam Zheng fmt = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
27513015372dSFam Zheng zeroed_grain = qemu_opt_get_bool_del(opts, BLOCK_OPT_ZEROED_GRAIN, false);
27523015372dSFam Zheng
27533015372dSFam Zheng if (adapter_type) {
27543015372dSFam Zheng adapter_type_enum = qapi_enum_parse(&BlockdevVmdkAdapterType_lookup,
27553015372dSFam Zheng adapter_type,
27563015372dSFam Zheng BLOCKDEV_VMDK_ADAPTER_TYPE_IDE,
27573015372dSFam Zheng &local_err);
27583015372dSFam Zheng if (local_err) {
27593015372dSFam Zheng error_propagate(errp, local_err);
27603015372dSFam Zheng ret = -EINVAL;
27613015372dSFam Zheng goto exit;
27623015372dSFam Zheng }
27633015372dSFam Zheng } else {
27643015372dSFam Zheng adapter_type_enum = BLOCKDEV_VMDK_ADAPTER_TYPE_IDE;
27653015372dSFam Zheng }
27663015372dSFam Zheng
27673015372dSFam Zheng if (!fmt) {
27683015372dSFam Zheng /* Default format to monolithicSparse */
27693015372dSFam Zheng subformat = BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICSPARSE;
27703015372dSFam Zheng } else {
27713015372dSFam Zheng subformat = qapi_enum_parse(&BlockdevVmdkSubformat_lookup,
27723015372dSFam Zheng fmt,
27733015372dSFam Zheng BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICSPARSE,
27743015372dSFam Zheng &local_err);
27753015372dSFam Zheng if (local_err) {
27763015372dSFam Zheng error_propagate(errp, local_err);
27773015372dSFam Zheng ret = -EINVAL;
27783015372dSFam Zheng goto exit;
27793015372dSFam Zheng }
27803015372dSFam Zheng }
27813015372dSFam Zheng data = (VMDKCreateOptsData){
27823015372dSFam Zheng .prefix = prefix,
27833015372dSFam Zheng .postfix = postfix,
27843015372dSFam Zheng .path = path,
27853015372dSFam Zheng .opts = opts,
27863015372dSFam Zheng };
27873015372dSFam Zheng ret = vmdk_co_do_create(total_size, subformat, adapter_type_enum,
2788f3d43dfdSThomas Weißschuh backing_file, hw_version, toolsversion, compat6,
2789f3d43dfdSThomas Weißschuh zeroed_grain, vmdk_co_create_opts_cb, &data, errp);
27903015372dSFam Zheng
27913015372dSFam Zheng exit:
2792d51a814cSEric Blake g_free(backing_fmt);
27935820f1daSChunyan Liu g_free(adapter_type);
27945820f1daSChunyan Liu g_free(backing_file);
2795f249924eSJanne Karhunen g_free(hw_version);
2796f3d43dfdSThomas Weißschuh g_free(toolsversion);
27975820f1daSChunyan Liu g_free(fmt);
2798af057fe7SFam Zheng g_free(desc);
2799fe206562SJeff Cody g_free(path);
2800fe206562SJeff Cody g_free(prefix);
2801fe206562SJeff Cody g_free(postfix);
2802fe206562SJeff Cody g_free(desc_line);
2803fe206562SJeff Cody g_free(ext_filename);
2804fe206562SJeff Cody g_free(desc_filename);
2805fe206562SJeff Cody g_free(parent_desc_line);
28063015372dSFam Zheng return ret;
28073015372dSFam Zheng }
28083015372dSFam Zheng
28094db7ba3bSKevin Wolf static BlockBackend * coroutine_fn GRAPH_UNLOCKED
vmdk_co_create_cb(int64_t size,int idx,bool flat,bool split,bool compress,bool zeroed_grain,void * opaque,Error ** errp)28104db7ba3bSKevin Wolf vmdk_co_create_cb(int64_t size, int idx, bool flat, bool split, bool compress,
28114db7ba3bSKevin Wolf bool zeroed_grain, void *opaque, Error **errp)
28123015372dSFam Zheng {
28133015372dSFam Zheng int ret;
28143015372dSFam Zheng BlockDriverState *bs;
28153015372dSFam Zheng BlockBackend *blk;
28163015372dSFam Zheng BlockdevCreateOptionsVmdk *opts = opaque;
28173015372dSFam Zheng
28183015372dSFam Zheng if (idx == 0) {
2819882f202eSKevin Wolf bs = bdrv_co_open_blockdev_ref(opts->file, errp);
28203015372dSFam Zheng } else {
28213015372dSFam Zheng int i;
28223015372dSFam Zheng BlockdevRefList *list = opts->extents;
28233015372dSFam Zheng for (i = 1; i < idx; i++) {
28243015372dSFam Zheng if (!list || !list->next) {
28253015372dSFam Zheng error_setg(errp, "Extent [%d] not specified", i);
28263015372dSFam Zheng return NULL;
28273015372dSFam Zheng }
28283015372dSFam Zheng list = list->next;
28293015372dSFam Zheng }
28303015372dSFam Zheng if (!list) {
28313015372dSFam Zheng error_setg(errp, "Extent [%d] not specified", idx - 1);
28323015372dSFam Zheng return NULL;
28333015372dSFam Zheng }
2834882f202eSKevin Wolf bs = bdrv_co_open_blockdev_ref(list->value, errp);
28353015372dSFam Zheng }
28363015372dSFam Zheng if (!bs) {
28373015372dSFam Zheng return NULL;
28383015372dSFam Zheng }
2839882f202eSKevin Wolf blk = blk_co_new_with_bs(bs,
2840882f202eSKevin Wolf BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
2841882f202eSKevin Wolf BLK_PERM_RESIZE,
2842882f202eSKevin Wolf BLK_PERM_ALL,
2843882f202eSKevin Wolf errp);
2844a3aeeab5SEric Blake if (!blk) {
28453015372dSFam Zheng return NULL;
28463015372dSFam Zheng }
28473015372dSFam Zheng blk_set_allow_write_beyond_eof(blk, true);
2848b2ab5f54SKevin Wolf bdrv_co_unref(bs);
28493015372dSFam Zheng
28504a960eceSKevin Wolf if (size != -1) {
28513015372dSFam Zheng ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp);
28523015372dSFam Zheng if (ret) {
2853b2ab5f54SKevin Wolf blk_co_unref(blk);
28543015372dSFam Zheng blk = NULL;
28553015372dSFam Zheng }
28564a960eceSKevin Wolf }
28573015372dSFam Zheng return blk;
28583015372dSFam Zheng }
28593015372dSFam Zheng
28604db7ba3bSKevin Wolf static int coroutine_fn GRAPH_UNLOCKED
vmdk_co_create(BlockdevCreateOptions * create_options,Error ** errp)28614ec8df01SKevin Wolf vmdk_co_create(BlockdevCreateOptions *create_options, Error **errp)
28623015372dSFam Zheng {
28633015372dSFam Zheng BlockdevCreateOptionsVmdk *opts;
28643015372dSFam Zheng
28653015372dSFam Zheng opts = &create_options->u.vmdk;
28663015372dSFam Zheng
28673015372dSFam Zheng /* Validate options */
28683015372dSFam Zheng if (!QEMU_IS_ALIGNED(opts->size, BDRV_SECTOR_SIZE)) {
28693015372dSFam Zheng error_setg(errp, "Image size must be a multiple of 512 bytes");
2870851fd4a0SMarkus Armbruster return -EINVAL;
28713015372dSFam Zheng }
28723015372dSFam Zheng
2873851fd4a0SMarkus Armbruster return vmdk_co_do_create(opts->size,
28743015372dSFam Zheng opts->subformat,
28753015372dSFam Zheng opts->adapter_type,
28763015372dSFam Zheng opts->backing_file,
28773015372dSFam Zheng opts->hwversion,
2878f3d43dfdSThomas Weißschuh opts->toolsversion,
28793015372dSFam Zheng false,
28803015372dSFam Zheng opts->zeroed_grain,
28813015372dSFam Zheng vmdk_co_create_cb,
28823015372dSFam Zheng opts, errp);
2883019d6b8fSAnthony Liguori }
2884019d6b8fSAnthony Liguori
vmdk_close(BlockDriverState * bs)2885019d6b8fSAnthony Liguori static void vmdk_close(BlockDriverState *bs)
2886019d6b8fSAnthony Liguori {
28872bc3166cSKevin Wolf BDRVVmdkState *s = bs->opaque;
28882bc3166cSKevin Wolf
2889b3976d3cSFam Zheng vmdk_free_extents(bs);
2890f4c129a3SFam Zheng g_free(s->create_type);
28912bc3166cSKevin Wolf
2892c8a7fc51SSteve Sistare migrate_del_blocker(&s->migration_blocker);
2893019d6b8fSAnthony Liguori }
2894019d6b8fSAnthony Liguori
2895de335638SEmanuele Giuseppe Esposito static int64_t coroutine_fn GRAPH_RDLOCK
vmdk_co_get_allocated_file_size(BlockDriverState * bs)289682618d7bSEmanuele Giuseppe Esposito vmdk_co_get_allocated_file_size(BlockDriverState *bs)
28974a1d5e1fSFam Zheng {
28984a1d5e1fSFam Zheng int i;
28994a1d5e1fSFam Zheng int64_t ret = 0;
29004a1d5e1fSFam Zheng int64_t r;
29014a1d5e1fSFam Zheng BDRVVmdkState *s = bs->opaque;
29024a1d5e1fSFam Zheng
290382618d7bSEmanuele Giuseppe Esposito ret = bdrv_co_get_allocated_file_size(bs->file->bs);
29044a1d5e1fSFam Zheng if (ret < 0) {
29054a1d5e1fSFam Zheng return ret;
29064a1d5e1fSFam Zheng }
29074a1d5e1fSFam Zheng for (i = 0; i < s->num_extents; i++) {
29089a4f4c31SKevin Wolf if (s->extents[i].file == bs->file) {
29094a1d5e1fSFam Zheng continue;
29104a1d5e1fSFam Zheng }
291182618d7bSEmanuele Giuseppe Esposito r = bdrv_co_get_allocated_file_size(s->extents[i].file->bs);
29124a1d5e1fSFam Zheng if (r < 0) {
29134a1d5e1fSFam Zheng return r;
29144a1d5e1fSFam Zheng }
29154a1d5e1fSFam Zheng ret += r;
29164a1d5e1fSFam Zheng }
29174a1d5e1fSFam Zheng return ret;
29184a1d5e1fSFam Zheng }
29190e7e1989SKevin Wolf
vmdk_has_zero_init(BlockDriverState * bs)292006717986SKevin Wolf static int GRAPH_RDLOCK vmdk_has_zero_init(BlockDriverState *bs)
2921da7a50f9SFam Zheng {
2922da7a50f9SFam Zheng int i;
2923da7a50f9SFam Zheng BDRVVmdkState *s = bs->opaque;
2924da7a50f9SFam Zheng
2925da7a50f9SFam Zheng /* If has a flat extent and its underlying storage doesn't have zero init,
2926da7a50f9SFam Zheng * return 0. */
2927da7a50f9SFam Zheng for (i = 0; i < s->num_extents; i++) {
2928da7a50f9SFam Zheng if (s->extents[i].flat) {
292924bc15d1SKevin Wolf if (!bdrv_has_zero_init(s->extents[i].file->bs)) {
2930da7a50f9SFam Zheng return 0;
2931da7a50f9SFam Zheng }
2932da7a50f9SFam Zheng }
2933da7a50f9SFam Zheng }
2934da7a50f9SFam Zheng return 1;
2935da7a50f9SFam Zheng }
2936da7a50f9SFam Zheng
vmdk_get_extent_info(VmdkExtent * extent)2937b7cfc7d5SKevin Wolf static VmdkExtentInfo * GRAPH_RDLOCK vmdk_get_extent_info(VmdkExtent *extent)
2938f4c129a3SFam Zheng {
2939456e7517SHanna Reitz VmdkExtentInfo *info = g_new0(VmdkExtentInfo, 1);
2940f4c129a3SFam Zheng
2941f30c66baSMax Reitz bdrv_refresh_filename(extent->file->bs);
2942456e7517SHanna Reitz *info = (VmdkExtentInfo){
294324bc15d1SKevin Wolf .filename = g_strdup(extent->file->bs->filename),
2944f4c129a3SFam Zheng .format = g_strdup(extent->type),
2945f4c129a3SFam Zheng .virtual_size = extent->sectors * BDRV_SECTOR_SIZE,
2946f4c129a3SFam Zheng .compressed = extent->compressed,
2947f4c129a3SFam Zheng .has_compressed = extent->compressed,
2948f4c129a3SFam Zheng .cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE,
2949f4c129a3SFam Zheng .has_cluster_size = !extent->flat,
2950f4c129a3SFam Zheng };
2951f4c129a3SFam Zheng
2952f4c129a3SFam Zheng return info;
2953f4c129a3SFam Zheng }
2954f4c129a3SFam Zheng
2955b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_co_check(BlockDriverState * bs,BdrvCheckResult * result,BdrvCheckMode fix)2956b9b10c35SKevin Wolf vmdk_co_check(BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix)
2957f43aa8e1SPeter Lieven {
2958f43aa8e1SPeter Lieven BDRVVmdkState *s = bs->opaque;
2959f43aa8e1SPeter Lieven VmdkExtent *extent = NULL;
2960f43aa8e1SPeter Lieven int64_t sector_num = 0;
296117362398SPaolo Bonzini int64_t total_sectors = bdrv_co_nb_sectors(bs);
2962f43aa8e1SPeter Lieven int ret;
2963f43aa8e1SPeter Lieven uint64_t cluster_offset;
2964f43aa8e1SPeter Lieven
2965f43aa8e1SPeter Lieven if (fix) {
2966f43aa8e1SPeter Lieven return -ENOTSUP;
2967f43aa8e1SPeter Lieven }
2968f43aa8e1SPeter Lieven
2969f43aa8e1SPeter Lieven for (;;) {
2970f43aa8e1SPeter Lieven if (sector_num >= total_sectors) {
2971f43aa8e1SPeter Lieven return 0;
2972f43aa8e1SPeter Lieven }
2973f43aa8e1SPeter Lieven extent = find_extent(s, sector_num, extent);
2974f43aa8e1SPeter Lieven if (!extent) {
2975f43aa8e1SPeter Lieven fprintf(stderr,
2976f43aa8e1SPeter Lieven "ERROR: could not find extent for sector %" PRId64 "\n",
2977f43aa8e1SPeter Lieven sector_num);
29780e51b9b7SFam Zheng ret = -EINVAL;
2979f43aa8e1SPeter Lieven break;
2980f43aa8e1SPeter Lieven }
2981f43aa8e1SPeter Lieven ret = get_cluster_offset(bs, extent, NULL,
2982f43aa8e1SPeter Lieven sector_num << BDRV_SECTOR_BITS,
2983c6ac36e1SFam Zheng false, &cluster_offset, 0, 0);
2984f43aa8e1SPeter Lieven if (ret == VMDK_ERROR) {
2985f43aa8e1SPeter Lieven fprintf(stderr,
2986f43aa8e1SPeter Lieven "ERROR: could not get cluster_offset for sector %"
2987f43aa8e1SPeter Lieven PRId64 "\n", sector_num);
2988f43aa8e1SPeter Lieven break;
2989f43aa8e1SPeter Lieven }
29900e51b9b7SFam Zheng if (ret == VMDK_OK) {
29910af02bd1SPaolo Bonzini int64_t extent_len = bdrv_co_getlength(extent->file->bs);
29920e51b9b7SFam Zheng if (extent_len < 0) {
29930e51b9b7SFam Zheng fprintf(stderr,
29940e51b9b7SFam Zheng "ERROR: could not get extent file length for sector %"
29950e51b9b7SFam Zheng PRId64 "\n", sector_num);
29960e51b9b7SFam Zheng ret = extent_len;
29970e51b9b7SFam Zheng break;
29980e51b9b7SFam Zheng }
29990e51b9b7SFam Zheng if (cluster_offset >= extent_len) {
3000f43aa8e1SPeter Lieven fprintf(stderr,
3001f43aa8e1SPeter Lieven "ERROR: cluster offset for sector %"
3002f43aa8e1SPeter Lieven PRId64 " points after EOF\n", sector_num);
30030e51b9b7SFam Zheng ret = -EINVAL;
3004f43aa8e1SPeter Lieven break;
3005f43aa8e1SPeter Lieven }
30060e51b9b7SFam Zheng }
3007f43aa8e1SPeter Lieven sector_num += extent->cluster_sectors;
3008f43aa8e1SPeter Lieven }
3009f43aa8e1SPeter Lieven
3010f43aa8e1SPeter Lieven result->corruptions++;
30110e51b9b7SFam Zheng return ret;
3012f43aa8e1SPeter Lieven }
3013f43aa8e1SPeter Lieven
30143574499aSKevin Wolf static ImageInfoSpecific * GRAPH_RDLOCK
vmdk_get_specific_info(BlockDriverState * bs,Error ** errp)30153574499aSKevin Wolf vmdk_get_specific_info(BlockDriverState *bs, Error **errp)
3016f4c129a3SFam Zheng {
3017f4c129a3SFam Zheng int i;
3018f4c129a3SFam Zheng BDRVVmdkState *s = bs->opaque;
3019f4c129a3SFam Zheng ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1);
3020456e7517SHanna Reitz VmdkExtentInfoList **tail;
3021f4c129a3SFam Zheng
3022f4c129a3SFam Zheng *spec_info = (ImageInfoSpecific){
30236a8f9661SEric Blake .type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
302432bafa8fSEric Blake .u = {
302532bafa8fSEric Blake .vmdk.data = g_new0(ImageInfoSpecificVmdk, 1),
3026f4c129a3SFam Zheng },
3027f4c129a3SFam Zheng };
3028f4c129a3SFam Zheng
302932bafa8fSEric Blake *spec_info->u.vmdk.data = (ImageInfoSpecificVmdk) {
3030f4c129a3SFam Zheng .create_type = g_strdup(s->create_type),
3031f4c129a3SFam Zheng .cid = s->cid,
3032f4c129a3SFam Zheng .parent_cid = s->parent_cid,
3033f4c129a3SFam Zheng };
3034f4c129a3SFam Zheng
3035c3033fd3SEric Blake tail = &spec_info->u.vmdk.data->extents;
3036f4c129a3SFam Zheng for (i = 0; i < s->num_extents; i++) {
3037c3033fd3SEric Blake QAPI_LIST_APPEND(tail, vmdk_get_extent_info(&s->extents[i]));
3038f4c129a3SFam Zheng }
3039f4c129a3SFam Zheng
3040f4c129a3SFam Zheng return spec_info;
3041f4c129a3SFam Zheng }
3042f4c129a3SFam Zheng
vmdk_extents_type_eq(const VmdkExtent * a,const VmdkExtent * b)30435f583307SFam Zheng static bool vmdk_extents_type_eq(const VmdkExtent *a, const VmdkExtent *b)
30445f583307SFam Zheng {
30455f583307SFam Zheng return a->flat == b->flat &&
30465f583307SFam Zheng a->compressed == b->compressed &&
30475f583307SFam Zheng (a->flat || a->cluster_sectors == b->cluster_sectors);
30485f583307SFam Zheng }
30495f583307SFam Zheng
30503d47eb0aSEmanuele Giuseppe Esposito static int coroutine_fn
vmdk_co_get_info(BlockDriverState * bs,BlockDriverInfo * bdi)30513d47eb0aSEmanuele Giuseppe Esposito vmdk_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
305274fe188cSFam Zheng {
305374fe188cSFam Zheng int i;
305474fe188cSFam Zheng BDRVVmdkState *s = bs->opaque;
305574fe188cSFam Zheng assert(s->num_extents);
30565f583307SFam Zheng
30575f583307SFam Zheng /* See if we have multiple extents but they have different cases */
30585f583307SFam Zheng for (i = 1; i < s->num_extents; i++) {
30595f583307SFam Zheng if (!vmdk_extents_type_eq(&s->extents[0], &s->extents[i])) {
30605f583307SFam Zheng return -ENOTSUP;
30615f583307SFam Zheng }
30625f583307SFam Zheng }
306374fe188cSFam Zheng bdi->needs_compressed_writes = s->extents[0].compressed;
306474fe188cSFam Zheng if (!s->extents[0].flat) {
306574fe188cSFam Zheng bdi->cluster_size = s->extents[0].cluster_sectors << BDRV_SECTOR_BITS;
306674fe188cSFam Zheng }
306774fe188cSFam Zheng return 0;
306874fe188cSFam Zheng }
306974fe188cSFam Zheng
3070004915a9SKevin Wolf static void GRAPH_RDLOCK
vmdk_gather_child_options(BlockDriverState * bs,QDict * target,bool backing_overridden)3071004915a9SKevin Wolf vmdk_gather_child_options(BlockDriverState *bs, QDict *target,
3072abc521a9SMax Reitz bool backing_overridden)
3073abc521a9SMax Reitz {
3074abc521a9SMax Reitz /* No children but file and backing can be explicitly specified (TODO) */
3075abc521a9SMax Reitz qdict_put(target, "file",
3076abc521a9SMax Reitz qobject_ref(bs->file->bs->full_open_options));
3077abc521a9SMax Reitz
3078abc521a9SMax Reitz if (backing_overridden) {
3079abc521a9SMax Reitz if (bs->backing) {
3080abc521a9SMax Reitz qdict_put(target, "backing",
3081abc521a9SMax Reitz qobject_ref(bs->backing->bs->full_open_options));
3082abc521a9SMax Reitz } else {
3083abc521a9SMax Reitz qdict_put_null(target, "backing");
3084abc521a9SMax Reitz }
3085abc521a9SMax Reitz }
3086abc521a9SMax Reitz }
3087abc521a9SMax Reitz
30885820f1daSChunyan Liu static QemuOptsList vmdk_create_opts = {
30895820f1daSChunyan Liu .name = "vmdk-create-opts",
30905820f1daSChunyan Liu .head = QTAILQ_HEAD_INITIALIZER(vmdk_create_opts.head),
30915820f1daSChunyan Liu .desc = {
3092db08adf5SKevin Wolf {
3093db08adf5SKevin Wolf .name = BLOCK_OPT_SIZE,
30945820f1daSChunyan Liu .type = QEMU_OPT_SIZE,
3095db08adf5SKevin Wolf .help = "Virtual disk size"
3096db08adf5SKevin Wolf },
3097db08adf5SKevin Wolf {
30987f2039f6SOthmar Pasteka .name = BLOCK_OPT_ADAPTER_TYPE,
30995820f1daSChunyan Liu .type = QEMU_OPT_STRING,
31007f2039f6SOthmar Pasteka .help = "Virtual adapter type, can be one of "
31017f2039f6SOthmar Pasteka "ide (default), lsilogic, buslogic or legacyESX"
31027f2039f6SOthmar Pasteka },
31037f2039f6SOthmar Pasteka {
3104db08adf5SKevin Wolf .name = BLOCK_OPT_BACKING_FILE,
31055820f1daSChunyan Liu .type = QEMU_OPT_STRING,
3106db08adf5SKevin Wolf .help = "File name of a base image"
3107db08adf5SKevin Wolf },
3108db08adf5SKevin Wolf {
3109d51a814cSEric Blake .name = BLOCK_OPT_BACKING_FMT,
3110d51a814cSEric Blake .type = QEMU_OPT_STRING,
3111d51a814cSEric Blake .help = "Must be 'vmdk' if present",
3112d51a814cSEric Blake },
3113d51a814cSEric Blake {
3114db08adf5SKevin Wolf .name = BLOCK_OPT_COMPAT6,
31155820f1daSChunyan Liu .type = QEMU_OPT_BOOL,
31165820f1daSChunyan Liu .help = "VMDK version 6 image",
31175820f1daSChunyan Liu .def_value_str = "off"
3118db08adf5SKevin Wolf },
3119f66fd6c3SFam Zheng {
3120f249924eSJanne Karhunen .name = BLOCK_OPT_HWVERSION,
3121f249924eSJanne Karhunen .type = QEMU_OPT_STRING,
3122f249924eSJanne Karhunen .help = "VMDK hardware version",
3123f249924eSJanne Karhunen .def_value_str = "undefined"
3124f249924eSJanne Karhunen },
3125f249924eSJanne Karhunen {
3126f3d43dfdSThomas Weißschuh .name = BLOCK_OPT_TOOLSVERSION,
3127f3d43dfdSThomas Weißschuh .type = QEMU_OPT_STRING,
3128f3d43dfdSThomas Weißschuh .help = "VMware guest tools version",
3129f3d43dfdSThomas Weißschuh },
3130f3d43dfdSThomas Weißschuh {
3131f66fd6c3SFam Zheng .name = BLOCK_OPT_SUBFMT,
31325820f1daSChunyan Liu .type = QEMU_OPT_STRING,
3133f66fd6c3SFam Zheng .help =
3134f66fd6c3SFam Zheng "VMDK flat extent format, can be one of "
31356c031aacSFam Zheng "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
3136f66fd6c3SFam Zheng },
313769e0b6dfSFam Zheng {
313869e0b6dfSFam Zheng .name = BLOCK_OPT_ZEROED_GRAIN,
31395820f1daSChunyan Liu .type = QEMU_OPT_BOOL,
31405820f1daSChunyan Liu .help = "Enable efficient zero writes "
31415820f1daSChunyan Liu "using the zeroed-grain GTE feature"
314269e0b6dfSFam Zheng },
31435820f1daSChunyan Liu { /* end of list */ }
31445820f1daSChunyan Liu }
31450e7e1989SKevin Wolf };
31460e7e1989SKevin Wolf
3147019d6b8fSAnthony Liguori static BlockDriver bdrv_vmdk = {
3148019d6b8fSAnthony Liguori .format_name = "vmdk",
3149019d6b8fSAnthony Liguori .instance_size = sizeof(BDRVVmdkState),
3150019d6b8fSAnthony Liguori .bdrv_probe = vmdk_probe,
31516511ef77SKevin Wolf .bdrv_open = vmdk_open,
31522fd61638SPaolo Bonzini .bdrv_co_check = vmdk_co_check,
31533897575fSJeff Cody .bdrv_reopen_prepare = vmdk_reopen_prepare,
31546d17e287SHanna Reitz .bdrv_reopen_commit = vmdk_reopen_commit,
31556d17e287SHanna Reitz .bdrv_reopen_abort = vmdk_reopen_abort,
315669dca43dSMax Reitz .bdrv_child_perm = bdrv_default_perms,
3157f10cc243SKevin Wolf .bdrv_co_preadv = vmdk_co_preadv,
315837b1d7d8SKevin Wolf .bdrv_co_pwritev = vmdk_co_pwritev,
3159b2c622d3SPavel Butsykin .bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed,
3160a620f2aeSEric Blake .bdrv_co_pwrite_zeroes = vmdk_co_pwrite_zeroes,
3161019d6b8fSAnthony Liguori .bdrv_close = vmdk_close,
3162efc75e2aSStefan Hajnoczi .bdrv_co_create_opts = vmdk_co_create_opts,
31633015372dSFam Zheng .bdrv_co_create = vmdk_co_create,
3164c72080b9SEric Blake .bdrv_co_block_status = vmdk_co_block_status,
316582618d7bSEmanuele Giuseppe Esposito .bdrv_co_get_allocated_file_size = vmdk_co_get_allocated_file_size,
3166da7a50f9SFam Zheng .bdrv_has_zero_init = vmdk_has_zero_init,
3167f4c129a3SFam Zheng .bdrv_get_specific_info = vmdk_get_specific_info,
3168d34682cdSKevin Wolf .bdrv_refresh_limits = vmdk_refresh_limits,
31693d47eb0aSEmanuele Giuseppe Esposito .bdrv_co_get_info = vmdk_co_get_info,
3170abc521a9SMax Reitz .bdrv_gather_child_options = vmdk_gather_child_options,
31710e7e1989SKevin Wolf
3172d67066d8SMax Reitz .is_format = true,
31738ee79e70SKevin Wolf .supports_backing = true,
31745820f1daSChunyan Liu .create_opts = &vmdk_create_opts,
3175019d6b8fSAnthony Liguori };
3176019d6b8fSAnthony Liguori
bdrv_vmdk_init(void)3177019d6b8fSAnthony Liguori static void bdrv_vmdk_init(void)
3178019d6b8fSAnthony Liguori {
3179019d6b8fSAnthony Liguori bdrv_register(&bdrv_vmdk);
3180019d6b8fSAnthony Liguori }
3181019d6b8fSAnthony Liguori
3182019d6b8fSAnthony Liguori block_init(bdrv_vmdk_init);
3183