xref: /openbmc/qemu/block/vmdk.c (revision 353a5d84)
1019d6b8fSAnthony Liguori /*
2019d6b8fSAnthony Liguori  * Block driver for the VMDK format
3019d6b8fSAnthony Liguori  *
4019d6b8fSAnthony Liguori  * Copyright (c) 2004 Fabrice Bellard
5019d6b8fSAnthony Liguori  * Copyright (c) 2005 Filip Navara
6019d6b8fSAnthony Liguori  *
7019d6b8fSAnthony Liguori  * Permission is hereby granted, free of charge, to any person obtaining a copy
8019d6b8fSAnthony Liguori  * of this software and associated documentation files (the "Software"), to deal
9019d6b8fSAnthony Liguori  * in the Software without restriction, including without limitation the rights
10019d6b8fSAnthony Liguori  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11019d6b8fSAnthony Liguori  * copies of the Software, and to permit persons to whom the Software is
12019d6b8fSAnthony Liguori  * furnished to do so, subject to the following conditions:
13019d6b8fSAnthony Liguori  *
14019d6b8fSAnthony Liguori  * The above copyright notice and this permission notice shall be included in
15019d6b8fSAnthony Liguori  * all copies or substantial portions of the Software.
16019d6b8fSAnthony Liguori  *
17019d6b8fSAnthony Liguori  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18019d6b8fSAnthony Liguori  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19019d6b8fSAnthony Liguori  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20019d6b8fSAnthony Liguori  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21019d6b8fSAnthony Liguori  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22019d6b8fSAnthony Liguori  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23019d6b8fSAnthony Liguori  * THE SOFTWARE.
24019d6b8fSAnthony Liguori  */
25019d6b8fSAnthony Liguori 
2680c71a24SPeter Maydell #include "qemu/osdep.h"
27da34e65cSMarkus Armbruster #include "qapi/error.h"
28737e150eSPaolo Bonzini #include "block/block_int.h"
29c4bea169SKevin Wolf #include "sysemu/block-backend.h"
30abc521a9SMax Reitz #include "qapi/qmp/qdict.h"
31cc7a8ea7SMarkus Armbruster #include "qapi/qmp/qerror.h"
32d49b6836SMarkus Armbruster #include "qemu/error-report.h"
331de7afc9SPaolo Bonzini #include "qemu/module.h"
34922a01a0SMarkus Armbruster #include "qemu/option.h"
3558369e22SPaolo Bonzini #include "qemu/bswap.h"
365df022cfSPeter Maydell #include "qemu/memalign.h"
37795c40b8SJuan Quintela #include "migration/blocker.h"
38f348b6d1SVeronia Bahaa #include "qemu/cutils.h"
392923d34fSStefan Weil #include <zlib.h>
40019d6b8fSAnthony Liguori 
41019d6b8fSAnthony Liguori #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
42019d6b8fSAnthony Liguori #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
43432bb170SFam Zheng #define VMDK4_COMPRESSION_DEFLATE 1
4495b0aa42SFam Zheng #define VMDK4_FLAG_NL_DETECT (1 << 0)
45bb45ded9SFam Zheng #define VMDK4_FLAG_RGD (1 << 1)
4614ead646SFam Zheng /* Zeroed-grain enable bit */
4714ead646SFam Zheng #define VMDK4_FLAG_ZERO_GRAIN   (1 << 2)
48432bb170SFam Zheng #define VMDK4_FLAG_COMPRESS (1 << 16)
49432bb170SFam Zheng #define VMDK4_FLAG_MARKER (1 << 17)
5065bd155cSKevin Wolf #define VMDK4_GD_AT_END 0xffffffffffffffffULL
51019d6b8fSAnthony Liguori 
52a77672eaSyuchenlin #define VMDK_EXTENT_MAX_SECTORS (1ULL << 32)
53a77672eaSyuchenlin 
5414ead646SFam Zheng #define VMDK_GTE_ZEROED 0x1
5565f74725SFam Zheng 
5665f74725SFam Zheng /* VMDK internal error codes */
5765f74725SFam Zheng #define VMDK_OK      0
5865f74725SFam Zheng #define VMDK_ERROR   (-1)
5965f74725SFam Zheng /* Cluster not allocated */
6065f74725SFam Zheng #define VMDK_UNALLOC (-2)
6165f74725SFam Zheng #define VMDK_ZEROED  (-3)
6265f74725SFam Zheng 
6369e0b6dfSFam Zheng #define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain"
64f3d43dfdSThomas Weißschuh #define BLOCK_OPT_TOOLSVERSION "toolsversion"
6569e0b6dfSFam Zheng 
66019d6b8fSAnthony Liguori typedef struct {
67019d6b8fSAnthony Liguori     uint32_t version;
68019d6b8fSAnthony Liguori     uint32_t flags;
69019d6b8fSAnthony Liguori     uint32_t disk_sectors;
70019d6b8fSAnthony Liguori     uint32_t granularity;
71019d6b8fSAnthony Liguori     uint32_t l1dir_offset;
72019d6b8fSAnthony Liguori     uint32_t l1dir_size;
73019d6b8fSAnthony Liguori     uint32_t file_sectors;
74019d6b8fSAnthony Liguori     uint32_t cylinders;
75019d6b8fSAnthony Liguori     uint32_t heads;
76019d6b8fSAnthony Liguori     uint32_t sectors_per_track;
775d8caa54SFam Zheng } QEMU_PACKED VMDK3Header;
78019d6b8fSAnthony Liguori 
79019d6b8fSAnthony Liguori typedef struct {
80019d6b8fSAnthony Liguori     uint32_t version;
81019d6b8fSAnthony Liguori     uint32_t flags;
82e98768d4SFam Zheng     uint64_t capacity;
83e98768d4SFam Zheng     uint64_t granularity;
84e98768d4SFam Zheng     uint64_t desc_offset;
85e98768d4SFam Zheng     uint64_t desc_size;
86ca8804ceSFam Zheng     /* Number of GrainTableEntries per GrainTable */
87ca8804ceSFam Zheng     uint32_t num_gtes_per_gt;
88e98768d4SFam Zheng     uint64_t rgd_offset;
89e98768d4SFam Zheng     uint64_t gd_offset;
90e98768d4SFam Zheng     uint64_t grain_offset;
91019d6b8fSAnthony Liguori     char filler[1];
92019d6b8fSAnthony Liguori     char check_bytes[4];
93432bb170SFam Zheng     uint16_t compressAlgorithm;
94541dc0d4SStefan Weil } QEMU_PACKED VMDK4Header;
95019d6b8fSAnthony Liguori 
9698eb9733SSam Eiderman typedef struct VMDKSESparseConstHeader {
9798eb9733SSam Eiderman     uint64_t magic;
9898eb9733SSam Eiderman     uint64_t version;
9998eb9733SSam Eiderman     uint64_t capacity;
10098eb9733SSam Eiderman     uint64_t grain_size;
10198eb9733SSam Eiderman     uint64_t grain_table_size;
10298eb9733SSam Eiderman     uint64_t flags;
10398eb9733SSam Eiderman     uint64_t reserved1;
10498eb9733SSam Eiderman     uint64_t reserved2;
10598eb9733SSam Eiderman     uint64_t reserved3;
10698eb9733SSam Eiderman     uint64_t reserved4;
10798eb9733SSam Eiderman     uint64_t volatile_header_offset;
10898eb9733SSam Eiderman     uint64_t volatile_header_size;
10998eb9733SSam Eiderman     uint64_t journal_header_offset;
11098eb9733SSam Eiderman     uint64_t journal_header_size;
11198eb9733SSam Eiderman     uint64_t journal_offset;
11298eb9733SSam Eiderman     uint64_t journal_size;
11398eb9733SSam Eiderman     uint64_t grain_dir_offset;
11498eb9733SSam Eiderman     uint64_t grain_dir_size;
11598eb9733SSam Eiderman     uint64_t grain_tables_offset;
11698eb9733SSam Eiderman     uint64_t grain_tables_size;
11798eb9733SSam Eiderman     uint64_t free_bitmap_offset;
11898eb9733SSam Eiderman     uint64_t free_bitmap_size;
11998eb9733SSam Eiderman     uint64_t backmap_offset;
12098eb9733SSam Eiderman     uint64_t backmap_size;
12198eb9733SSam Eiderman     uint64_t grains_offset;
12298eb9733SSam Eiderman     uint64_t grains_size;
12398eb9733SSam Eiderman     uint8_t pad[304];
12498eb9733SSam Eiderman } QEMU_PACKED VMDKSESparseConstHeader;
12598eb9733SSam Eiderman 
12698eb9733SSam Eiderman typedef struct VMDKSESparseVolatileHeader {
12798eb9733SSam Eiderman     uint64_t magic;
12898eb9733SSam Eiderman     uint64_t free_gt_number;
12998eb9733SSam Eiderman     uint64_t next_txn_seq_number;
13098eb9733SSam Eiderman     uint64_t replay_journal;
13198eb9733SSam Eiderman     uint8_t pad[480];
13298eb9733SSam Eiderman } QEMU_PACKED VMDKSESparseVolatileHeader;
13398eb9733SSam Eiderman 
134019d6b8fSAnthony Liguori #define L2_CACHE_SIZE 16
135019d6b8fSAnthony Liguori 
136b3976d3cSFam Zheng typedef struct VmdkExtent {
13724bc15d1SKevin Wolf     BdrvChild *file;
138b3976d3cSFam Zheng     bool flat;
139432bb170SFam Zheng     bool compressed;
140432bb170SFam Zheng     bool has_marker;
14114ead646SFam Zheng     bool has_zero_grain;
14298eb9733SSam Eiderman     bool sesparse;
14398eb9733SSam Eiderman     uint64_t sesparse_l2_tables_offset;
14498eb9733SSam Eiderman     uint64_t sesparse_clusters_offset;
14598eb9733SSam Eiderman     int32_t entry_size;
14614ead646SFam Zheng     int version;
147b3976d3cSFam Zheng     int64_t sectors;
148b3976d3cSFam Zheng     int64_t end_sector;
1497fa60fa3SFam Zheng     int64_t flat_start_offset;
150019d6b8fSAnthony Liguori     int64_t l1_table_offset;
151019d6b8fSAnthony Liguori     int64_t l1_backup_table_offset;
15298eb9733SSam Eiderman     void *l1_table;
153019d6b8fSAnthony Liguori     uint32_t *l1_backup_table;
154019d6b8fSAnthony Liguori     unsigned int l1_size;
155019d6b8fSAnthony Liguori     uint32_t l1_entry_sectors;
156019d6b8fSAnthony Liguori 
157019d6b8fSAnthony Liguori     unsigned int l2_size;
15898eb9733SSam Eiderman     void *l2_cache;
159019d6b8fSAnthony Liguori     uint32_t l2_cache_offsets[L2_CACHE_SIZE];
160019d6b8fSAnthony Liguori     uint32_t l2_cache_counts[L2_CACHE_SIZE];
161019d6b8fSAnthony Liguori 
162301c7d38SFam Zheng     int64_t cluster_sectors;
163c6ac36e1SFam Zheng     int64_t next_cluster_sector;
164f4c129a3SFam Zheng     char *type;
165b3976d3cSFam Zheng } VmdkExtent;
166b3976d3cSFam Zheng 
167b3976d3cSFam Zheng typedef struct BDRVVmdkState {
168848c66e8SPaolo Bonzini     CoMutex lock;
169e98768d4SFam Zheng     uint64_t desc_offset;
17069b4d86dSFam Zheng     bool cid_updated;
171c338b6adSFam Zheng     bool cid_checked;
172f4c129a3SFam Zheng     uint32_t cid;
173019d6b8fSAnthony Liguori     uint32_t parent_cid;
174b3976d3cSFam Zheng     int num_extents;
175b3976d3cSFam Zheng     /* Extent array with num_extents entries, ascend ordered by address */
176b3976d3cSFam Zheng     VmdkExtent *extents;
1772bc3166cSKevin Wolf     Error *migration_blocker;
178f4c129a3SFam Zheng     char *create_type;
179019d6b8fSAnthony Liguori } BDRVVmdkState;
180019d6b8fSAnthony Liguori 
1816d17e287SHanna Reitz typedef struct BDRVVmdkReopenState {
1826d17e287SHanna Reitz     bool *extents_using_bs_file;
1836d17e287SHanna Reitz } BDRVVmdkReopenState;
1846d17e287SHanna Reitz 
185019d6b8fSAnthony Liguori typedef struct VmdkMetaData {
186019d6b8fSAnthony Liguori     unsigned int l1_index;
187019d6b8fSAnthony Liguori     unsigned int l2_index;
188019d6b8fSAnthony Liguori     unsigned int l2_offset;
1894dc20e64SKevin Wolf     bool new_allocation;
190cdeaf1f1SFam Zheng     uint32_t *l2_cache_entry;
191019d6b8fSAnthony Liguori } VmdkMetaData;
192019d6b8fSAnthony Liguori 
193432bb170SFam Zheng typedef struct VmdkGrainMarker {
194432bb170SFam Zheng     uint64_t lba;
195432bb170SFam Zheng     uint32_t size;
196880a7817SPhilippe Mathieu-Daudé     uint8_t  data[];
1975d8caa54SFam Zheng } QEMU_PACKED VmdkGrainMarker;
198432bb170SFam Zheng 
19965bd155cSKevin Wolf enum {
20065bd155cSKevin Wolf     MARKER_END_OF_STREAM    = 0,
20165bd155cSKevin Wolf     MARKER_GRAIN_TABLE      = 1,
20265bd155cSKevin Wolf     MARKER_GRAIN_DIRECTORY  = 2,
20365bd155cSKevin Wolf     MARKER_FOOTER           = 3,
20465bd155cSKevin Wolf };
20565bd155cSKevin Wolf 
206019d6b8fSAnthony Liguori static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
207019d6b8fSAnthony Liguori {
208019d6b8fSAnthony Liguori     uint32_t magic;
209019d6b8fSAnthony Liguori 
210ae261c86SFam Zheng     if (buf_size < 4) {
211019d6b8fSAnthony Liguori         return 0;
212ae261c86SFam Zheng     }
213019d6b8fSAnthony Liguori     magic = be32_to_cpu(*(uint32_t *)buf);
214019d6b8fSAnthony Liguori     if (magic == VMDK3_MAGIC ||
21501fc99d6SFam Zheng         magic == VMDK4_MAGIC) {
216019d6b8fSAnthony Liguori         return 100;
21701fc99d6SFam Zheng     } else {
21801fc99d6SFam Zheng         const char *p = (const char *)buf;
21901fc99d6SFam Zheng         const char *end = p + buf_size;
22001fc99d6SFam Zheng         while (p < end) {
22101fc99d6SFam Zheng             if (*p == '#') {
22201fc99d6SFam Zheng                 /* skip comment line */
22301fc99d6SFam Zheng                 while (p < end && *p != '\n') {
22401fc99d6SFam Zheng                     p++;
22501fc99d6SFam Zheng                 }
22601fc99d6SFam Zheng                 p++;
22701fc99d6SFam Zheng                 continue;
22801fc99d6SFam Zheng             }
22901fc99d6SFam Zheng             if (*p == ' ') {
23001fc99d6SFam Zheng                 while (p < end && *p == ' ') {
23101fc99d6SFam Zheng                     p++;
23201fc99d6SFam Zheng                 }
23301fc99d6SFam Zheng                 /* skip '\r' if windows line endings used. */
23401fc99d6SFam Zheng                 if (p < end && *p == '\r') {
23501fc99d6SFam Zheng                     p++;
23601fc99d6SFam Zheng                 }
23701fc99d6SFam Zheng                 /* only accept blank lines before 'version=' line */
23801fc99d6SFam Zheng                 if (p == end || *p != '\n') {
239019d6b8fSAnthony Liguori                     return 0;
240019d6b8fSAnthony Liguori                 }
24101fc99d6SFam Zheng                 p++;
24201fc99d6SFam Zheng                 continue;
24301fc99d6SFam Zheng             }
24401fc99d6SFam Zheng             if (end - p >= strlen("version=X\n")) {
24501fc99d6SFam Zheng                 if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
246b69864e5SSam Eiderman                     strncmp("version=2\n", p, strlen("version=2\n")) == 0 ||
247b69864e5SSam Eiderman                     strncmp("version=3\n", p, strlen("version=3\n")) == 0) {
24801fc99d6SFam Zheng                     return 100;
24901fc99d6SFam Zheng                 }
25001fc99d6SFam Zheng             }
25101fc99d6SFam Zheng             if (end - p >= strlen("version=X\r\n")) {
25201fc99d6SFam Zheng                 if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
253b69864e5SSam Eiderman                     strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0 ||
254b69864e5SSam Eiderman                     strncmp("version=3\r\n", p, strlen("version=3\r\n")) == 0) {
25501fc99d6SFam Zheng                     return 100;
25601fc99d6SFam Zheng                 }
25701fc99d6SFam Zheng             }
25801fc99d6SFam Zheng             return 0;
25901fc99d6SFam Zheng         }
26001fc99d6SFam Zheng         return 0;
26101fc99d6SFam Zheng     }
26201fc99d6SFam Zheng }
263019d6b8fSAnthony Liguori 
264019d6b8fSAnthony Liguori #define SECTOR_SIZE 512
265f66fd6c3SFam Zheng #define DESC_SIZE (20 * SECTOR_SIZE)    /* 20 sectors of 512 bytes each */
266f66fd6c3SFam Zheng #define BUF_SIZE 4096
267f66fd6c3SFam Zheng #define HEADER_SIZE 512                 /* first sector of 512 bytes */
268019d6b8fSAnthony Liguori 
269b3976d3cSFam Zheng static void vmdk_free_extents(BlockDriverState *bs)
270b3976d3cSFam Zheng {
271b3976d3cSFam Zheng     int i;
272b3976d3cSFam Zheng     BDRVVmdkState *s = bs->opaque;
273b3c0bfb6SFam Zheng     VmdkExtent *e;
274b3976d3cSFam Zheng 
275b3976d3cSFam Zheng     for (i = 0; i < s->num_extents; i++) {
276b3c0bfb6SFam Zheng         e = &s->extents[i];
277b3c0bfb6SFam Zheng         g_free(e->l1_table);
278b3c0bfb6SFam Zheng         g_free(e->l2_cache);
279b3c0bfb6SFam Zheng         g_free(e->l1_backup_table);
280f4c129a3SFam Zheng         g_free(e->type);
2819a4f4c31SKevin Wolf         if (e->file != bs->file) {
28224bc15d1SKevin Wolf             bdrv_unref_child(bs, e->file);
283b3c0bfb6SFam Zheng         }
284b3976d3cSFam Zheng     }
2857267c094SAnthony Liguori     g_free(s->extents);
286b3976d3cSFam Zheng }
287b3976d3cSFam Zheng 
28886c6b429SFam Zheng static void vmdk_free_last_extent(BlockDriverState *bs)
28986c6b429SFam Zheng {
29086c6b429SFam Zheng     BDRVVmdkState *s = bs->opaque;
29186c6b429SFam Zheng 
29286c6b429SFam Zheng     if (s->num_extents == 0) {
29386c6b429SFam Zheng         return;
29486c6b429SFam Zheng     }
29586c6b429SFam Zheng     s->num_extents--;
2965839e53bSMarkus Armbruster     s->extents = g_renew(VmdkExtent, s->extents, s->num_extents);
29786c6b429SFam Zheng }
29886c6b429SFam Zheng 
2999877860eSPeter Maydell /* Return -ve errno, or 0 on success and write CID into *pcid. */
3009877860eSPeter Maydell static int vmdk_read_cid(BlockDriverState *bs, int parent, uint32_t *pcid)
301019d6b8fSAnthony Liguori {
3025997c210SFam Zheng     char *desc;
3039877860eSPeter Maydell     uint32_t cid;
304019d6b8fSAnthony Liguori     const char *p_name, *cid_str;
305019d6b8fSAnthony Liguori     size_t cid_str_size;
306e1da9b24SFam Zheng     BDRVVmdkState *s = bs->opaque;
30799f1835dSKevin Wolf     int ret;
308019d6b8fSAnthony Liguori 
3095997c210SFam Zheng     desc = g_malloc0(DESC_SIZE);
31032cc71deSAlberto Faria     ret = bdrv_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
31199f1835dSKevin Wolf     if (ret < 0) {
3129877860eSPeter Maydell         goto out;
313e1da9b24SFam Zheng     }
314019d6b8fSAnthony Liguori 
315019d6b8fSAnthony Liguori     if (parent) {
316019d6b8fSAnthony Liguori         cid_str = "parentCID";
317019d6b8fSAnthony Liguori         cid_str_size = sizeof("parentCID");
318019d6b8fSAnthony Liguori     } else {
319019d6b8fSAnthony Liguori         cid_str = "CID";
320019d6b8fSAnthony Liguori         cid_str_size = sizeof("CID");
321019d6b8fSAnthony Liguori     }
322019d6b8fSAnthony Liguori 
32393897b9fSKevin Wolf     desc[DESC_SIZE - 1] = '\0';
324ae261c86SFam Zheng     p_name = strstr(desc, cid_str);
3259877860eSPeter Maydell     if (p_name == NULL) {
3269877860eSPeter Maydell         ret = -EINVAL;
3279877860eSPeter Maydell         goto out;
328019d6b8fSAnthony Liguori     }
3299877860eSPeter Maydell     p_name += cid_str_size;
3309877860eSPeter Maydell     if (sscanf(p_name, "%" SCNx32, &cid) != 1) {
3319877860eSPeter Maydell         ret = -EINVAL;
3329877860eSPeter Maydell         goto out;
3339877860eSPeter Maydell     }
3349877860eSPeter Maydell     *pcid = cid;
3359877860eSPeter Maydell     ret = 0;
336019d6b8fSAnthony Liguori 
3379877860eSPeter Maydell out:
3385997c210SFam Zheng     g_free(desc);
3399877860eSPeter Maydell     return ret;
340019d6b8fSAnthony Liguori }
341019d6b8fSAnthony Liguori 
342019d6b8fSAnthony Liguori static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
343019d6b8fSAnthony Liguori {
344965415ebSFam Zheng     char *desc, *tmp_desc;
345019d6b8fSAnthony Liguori     char *p_name, *tmp_str;
346e1da9b24SFam Zheng     BDRVVmdkState *s = bs->opaque;
347965415ebSFam Zheng     int ret = 0;
348019d6b8fSAnthony Liguori 
349965415ebSFam Zheng     desc = g_malloc0(DESC_SIZE);
350965415ebSFam Zheng     tmp_desc = g_malloc0(DESC_SIZE);
35132cc71deSAlberto Faria     ret = bdrv_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
35299f1835dSKevin Wolf     if (ret < 0) {
353965415ebSFam Zheng         goto out;
354e1da9b24SFam Zheng     }
355019d6b8fSAnthony Liguori 
35693897b9fSKevin Wolf     desc[DESC_SIZE - 1] = '\0';
357019d6b8fSAnthony Liguori     tmp_str = strstr(desc, "parentCID");
35893897b9fSKevin Wolf     if (tmp_str == NULL) {
359965415ebSFam Zheng         ret = -EINVAL;
360965415ebSFam Zheng         goto out;
36193897b9fSKevin Wolf     }
36293897b9fSKevin Wolf 
363965415ebSFam Zheng     pstrcpy(tmp_desc, DESC_SIZE, tmp_str);
364ae261c86SFam Zheng     p_name = strstr(desc, "CID");
365ae261c86SFam Zheng     if (p_name != NULL) {
366019d6b8fSAnthony Liguori         p_name += sizeof("CID");
367965415ebSFam Zheng         snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid);
368965415ebSFam Zheng         pstrcat(desc, DESC_SIZE, tmp_desc);
369019d6b8fSAnthony Liguori     }
370019d6b8fSAnthony Liguori 
37132cc71deSAlberto Faria     ret = bdrv_pwrite_sync(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
37299f1835dSKevin Wolf 
373965415ebSFam Zheng out:
374965415ebSFam Zheng     g_free(desc);
375965415ebSFam Zheng     g_free(tmp_desc);
376965415ebSFam Zheng     return ret;
377019d6b8fSAnthony Liguori }
378019d6b8fSAnthony Liguori 
379019d6b8fSAnthony Liguori static int vmdk_is_cid_valid(BlockDriverState *bs)
380019d6b8fSAnthony Liguori {
381019d6b8fSAnthony Liguori     BDRVVmdkState *s = bs->opaque;
382019d6b8fSAnthony Liguori     uint32_t cur_pcid;
383019d6b8fSAnthony Liguori 
384760e0063SKevin Wolf     if (!s->cid_checked && bs->backing) {
385760e0063SKevin Wolf         BlockDriverState *p_bs = bs->backing->bs;
386760e0063SKevin Wolf 
387439e89fcSMax Reitz         if (strcmp(p_bs->drv->format_name, "vmdk")) {
388439e89fcSMax Reitz             /* Backing file is not in vmdk format, so it does not have
389439e89fcSMax Reitz              * a CID, which makes the overlay's parent CID invalid */
390439e89fcSMax Reitz             return 0;
391439e89fcSMax Reitz         }
392439e89fcSMax Reitz 
3939877860eSPeter Maydell         if (vmdk_read_cid(p_bs, 0, &cur_pcid) != 0) {
3949877860eSPeter Maydell             /* read failure: report as not valid */
3959877860eSPeter Maydell             return 0;
3969877860eSPeter Maydell         }
397ae261c86SFam Zheng         if (s->parent_cid != cur_pcid) {
398ae261c86SFam Zheng             /* CID not valid */
399019d6b8fSAnthony Liguori             return 0;
400019d6b8fSAnthony Liguori         }
401ae261c86SFam Zheng     }
402c338b6adSFam Zheng     s->cid_checked = true;
403ae261c86SFam Zheng     /* CID valid */
404019d6b8fSAnthony Liguori     return 1;
405019d6b8fSAnthony Liguori }
406019d6b8fSAnthony Liguori 
4073897575fSJeff Cody static int vmdk_reopen_prepare(BDRVReopenState *state,
4083897575fSJeff Cody                                BlockReopenQueue *queue, Error **errp)
4093897575fSJeff Cody {
4106d17e287SHanna Reitz     BDRVVmdkState *s;
4116d17e287SHanna Reitz     BDRVVmdkReopenState *rs;
4126d17e287SHanna Reitz     int i;
4136d17e287SHanna Reitz 
4143897575fSJeff Cody     assert(state != NULL);
4153897575fSJeff Cody     assert(state->bs != NULL);
4166d17e287SHanna Reitz     assert(state->opaque == NULL);
4176d17e287SHanna Reitz 
4186d17e287SHanna Reitz     s = state->bs->opaque;
4196d17e287SHanna Reitz 
4206d17e287SHanna Reitz     rs = g_new0(BDRVVmdkReopenState, 1);
4216d17e287SHanna Reitz     state->opaque = rs;
4226d17e287SHanna Reitz 
4236d17e287SHanna Reitz     /*
4246d17e287SHanna Reitz      * Check whether there are any extents stored in bs->file; if bs->file
4256d17e287SHanna Reitz      * changes, we will need to update their .file pointers to follow suit
4266d17e287SHanna Reitz      */
4276d17e287SHanna Reitz     rs->extents_using_bs_file = g_new(bool, s->num_extents);
4286d17e287SHanna Reitz     for (i = 0; i < s->num_extents; i++) {
4296d17e287SHanna Reitz         rs->extents_using_bs_file[i] = s->extents[i].file == state->bs->file;
4306d17e287SHanna Reitz     }
4316d17e287SHanna Reitz 
43267251a31SKevin Wolf     return 0;
4333897575fSJeff Cody }
4343897575fSJeff Cody 
4356d17e287SHanna Reitz static void vmdk_reopen_clean(BDRVReopenState *state)
4366d17e287SHanna Reitz {
4376d17e287SHanna Reitz     BDRVVmdkReopenState *rs = state->opaque;
4386d17e287SHanna Reitz 
4396d17e287SHanna Reitz     g_free(rs->extents_using_bs_file);
4406d17e287SHanna Reitz     g_free(rs);
4416d17e287SHanna Reitz     state->opaque = NULL;
4426d17e287SHanna Reitz }
4436d17e287SHanna Reitz 
4446d17e287SHanna Reitz static void vmdk_reopen_commit(BDRVReopenState *state)
4456d17e287SHanna Reitz {
4466d17e287SHanna Reitz     BDRVVmdkState *s = state->bs->opaque;
4476d17e287SHanna Reitz     BDRVVmdkReopenState *rs = state->opaque;
4486d17e287SHanna Reitz     int i;
4496d17e287SHanna Reitz 
4506d17e287SHanna Reitz     for (i = 0; i < s->num_extents; i++) {
4516d17e287SHanna Reitz         if (rs->extents_using_bs_file[i]) {
4526d17e287SHanna Reitz             s->extents[i].file = state->bs->file;
4536d17e287SHanna Reitz         }
4546d17e287SHanna Reitz     }
4556d17e287SHanna Reitz 
4566d17e287SHanna Reitz     vmdk_reopen_clean(state);
4576d17e287SHanna Reitz }
4586d17e287SHanna Reitz 
4596d17e287SHanna Reitz static void vmdk_reopen_abort(BDRVReopenState *state)
4606d17e287SHanna Reitz {
4616d17e287SHanna Reitz     vmdk_reopen_clean(state);
4626d17e287SHanna Reitz }
4636d17e287SHanna Reitz 
4649949f97eSKevin Wolf static int vmdk_parent_open(BlockDriverState *bs)
465019d6b8fSAnthony Liguori {
466019d6b8fSAnthony Liguori     char *p_name;
46771968dbfSFam Zheng     char *desc;
468e1da9b24SFam Zheng     BDRVVmdkState *s = bs->opaque;
469588b65a3SPaolo Bonzini     int ret;
470019d6b8fSAnthony Liguori 
47171968dbfSFam Zheng     desc = g_malloc0(DESC_SIZE + 1);
47232cc71deSAlberto Faria     ret = bdrv_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
473588b65a3SPaolo Bonzini     if (ret < 0) {
47471968dbfSFam Zheng         goto out;
475e1da9b24SFam Zheng     }
476019d6b8fSAnthony Liguori 
477ae261c86SFam Zheng     p_name = strstr(desc, "parentFileNameHint");
478ae261c86SFam Zheng     if (p_name != NULL) {
479019d6b8fSAnthony Liguori         char *end_name;
480019d6b8fSAnthony Liguori 
481019d6b8fSAnthony Liguori         p_name += sizeof("parentFileNameHint") + 1;
482ae261c86SFam Zheng         end_name = strchr(p_name, '\"');
483ae261c86SFam Zheng         if (end_name == NULL) {
48471968dbfSFam Zheng             ret = -EINVAL;
48571968dbfSFam Zheng             goto out;
486ae261c86SFam Zheng         }
487998c2019SMax Reitz         if ((end_name - p_name) > sizeof(bs->auto_backing_file) - 1) {
48871968dbfSFam Zheng             ret = -EINVAL;
48971968dbfSFam Zheng             goto out;
490ae261c86SFam Zheng         }
491019d6b8fSAnthony Liguori 
492998c2019SMax Reitz         pstrcpy(bs->auto_backing_file, end_name - p_name + 1, p_name);
493998c2019SMax Reitz         pstrcpy(bs->backing_file, sizeof(bs->backing_file),
494998c2019SMax Reitz                 bs->auto_backing_file);
4957502be83SSam Eiderman         pstrcpy(bs->backing_format, sizeof(bs->backing_format),
4967502be83SSam Eiderman                 "vmdk");
497019d6b8fSAnthony Liguori     }
498019d6b8fSAnthony Liguori 
49971968dbfSFam Zheng out:
50071968dbfSFam Zheng     g_free(desc);
50171968dbfSFam Zheng     return ret;
502019d6b8fSAnthony Liguori }
503019d6b8fSAnthony Liguori 
504b3976d3cSFam Zheng /* Create and append extent to the extent array. Return the added VmdkExtent
505b3976d3cSFam Zheng  * address. return NULL if allocation failed. */
5068aa1331cSFam Zheng static int vmdk_add_extent(BlockDriverState *bs,
50724bc15d1SKevin Wolf                            BdrvChild *file, bool flat, int64_t sectors,
508b3976d3cSFam Zheng                            int64_t l1_offset, int64_t l1_backup_offset,
509b3976d3cSFam Zheng                            uint32_t l1_size,
5108aa1331cSFam Zheng                            int l2_size, uint64_t cluster_sectors,
5114823970bSFam Zheng                            VmdkExtent **new_extent,
5124823970bSFam Zheng                            Error **errp)
513b3976d3cSFam Zheng {
514b3976d3cSFam Zheng     VmdkExtent *extent;
515b3976d3cSFam Zheng     BDRVVmdkState *s = bs->opaque;
5160a156f7cSMarkus Armbruster     int64_t nb_sectors;
517b3976d3cSFam Zheng 
5188aa1331cSFam Zheng     if (cluster_sectors > 0x200000) {
5198aa1331cSFam Zheng         /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
5204823970bSFam Zheng         error_setg(errp, "Invalid granularity, image may be corrupt");
5214823970bSFam Zheng         return -EFBIG;
5228aa1331cSFam Zheng     }
52359d6ee48SSam Eiderman     if (l1_size > 32 * 1024 * 1024) {
524940a2cd5SSam Eiderman         /*
525940a2cd5SSam Eiderman          * Although with big capacity and small l1_entry_sectors, we can get a
526b0651b8cSFam Zheng          * big l1_size, we don't want unbounded value to allocate the table.
52759d6ee48SSam Eiderman          * Limit it to 32M, which is enough to store:
52859d6ee48SSam Eiderman          *     8TB  - for both VMDK3 & VMDK4 with
52959d6ee48SSam Eiderman          *            minimal cluster size: 512B
53059d6ee48SSam Eiderman          *            minimal L2 table size: 512 entries
53159d6ee48SSam Eiderman          *            8 TB is still more than the maximal value supported for
53259d6ee48SSam Eiderman          *            VMDK3 & VMDK4 which is 2TB.
53398eb9733SSam Eiderman          *     64TB - for "ESXi seSparse Extent"
53498eb9733SSam Eiderman          *            minimal cluster size: 512B (default is 4KB)
53598eb9733SSam Eiderman          *            L2 table size: 4096 entries (const).
53698eb9733SSam Eiderman          *            64TB is more than the maximal value supported for
53798eb9733SSam Eiderman          *            seSparse VMDKs (which is slightly less than 64TB)
538940a2cd5SSam Eiderman          */
5394823970bSFam Zheng         error_setg(errp, "L1 size too big");
540b0651b8cSFam Zheng         return -EFBIG;
541b0651b8cSFam Zheng     }
5428aa1331cSFam Zheng 
54324bc15d1SKevin Wolf     nb_sectors = bdrv_nb_sectors(file->bs);
5440a156f7cSMarkus Armbruster     if (nb_sectors < 0) {
5450a156f7cSMarkus Armbruster         return nb_sectors;
546c6ac36e1SFam Zheng     }
547c6ac36e1SFam Zheng 
5485839e53bSMarkus Armbruster     s->extents = g_renew(VmdkExtent, s->extents, s->num_extents + 1);
549b3976d3cSFam Zheng     extent = &s->extents[s->num_extents];
550b3976d3cSFam Zheng     s->num_extents++;
551b3976d3cSFam Zheng 
552b3976d3cSFam Zheng     memset(extent, 0, sizeof(VmdkExtent));
553b3976d3cSFam Zheng     extent->file = file;
554b3976d3cSFam Zheng     extent->flat = flat;
555b3976d3cSFam Zheng     extent->sectors = sectors;
556b3976d3cSFam Zheng     extent->l1_table_offset = l1_offset;
557b3976d3cSFam Zheng     extent->l1_backup_table_offset = l1_backup_offset;
558b3976d3cSFam Zheng     extent->l1_size = l1_size;
559b3976d3cSFam Zheng     extent->l1_entry_sectors = l2_size * cluster_sectors;
560b3976d3cSFam Zheng     extent->l2_size = l2_size;
561301c7d38SFam Zheng     extent->cluster_sectors = flat ? sectors : cluster_sectors;
5620a156f7cSMarkus Armbruster     extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors);
56398eb9733SSam Eiderman     extent->entry_size = sizeof(uint32_t);
564b3976d3cSFam Zheng 
565b3976d3cSFam Zheng     if (s->num_extents > 1) {
566b3976d3cSFam Zheng         extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
567b3976d3cSFam Zheng     } else {
568b3976d3cSFam Zheng         extent->end_sector = extent->sectors;
569b3976d3cSFam Zheng     }
570b3976d3cSFam Zheng     bs->total_sectors = extent->end_sector;
5718aa1331cSFam Zheng     if (new_extent) {
5728aa1331cSFam Zheng         *new_extent = extent;
5738aa1331cSFam Zheng     }
5748aa1331cSFam Zheng     return 0;
575b3976d3cSFam Zheng }
576b3976d3cSFam Zheng 
5774823970bSFam Zheng static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
5784823970bSFam Zheng                             Error **errp)
579019d6b8fSAnthony Liguori {
580b4b3ab14SFam Zheng     int ret;
58113c4941cSFam Zheng     size_t l1_size;
58213c4941cSFam Zheng     int i;
583b4b3ab14SFam Zheng 
584b4b3ab14SFam Zheng     /* read the L1 table */
58598eb9733SSam Eiderman     l1_size = extent->l1_size * extent->entry_size;
586d6e59931SKevin Wolf     extent->l1_table = g_try_malloc(l1_size);
587d6e59931SKevin Wolf     if (l1_size && extent->l1_table == NULL) {
588d6e59931SKevin Wolf         return -ENOMEM;
589d6e59931SKevin Wolf     }
590d6e59931SKevin Wolf 
59132cc71deSAlberto Faria     ret = bdrv_pread(extent->file, extent->l1_table_offset, l1_size,
59232cc71deSAlberto Faria                      extent->l1_table, 0);
593b4b3ab14SFam Zheng     if (ret < 0) {
594f30c66baSMax Reitz         bdrv_refresh_filename(extent->file->bs);
5954823970bSFam Zheng         error_setg_errno(errp, -ret,
5964823970bSFam Zheng                          "Could not read l1 table from extent '%s'",
59724bc15d1SKevin Wolf                          extent->file->bs->filename);
598b4b3ab14SFam Zheng         goto fail_l1;
599b4b3ab14SFam Zheng     }
600b4b3ab14SFam Zheng     for (i = 0; i < extent->l1_size; i++) {
60198eb9733SSam Eiderman         if (extent->entry_size == sizeof(uint64_t)) {
60298eb9733SSam Eiderman             le64_to_cpus((uint64_t *)extent->l1_table + i);
60398eb9733SSam Eiderman         } else {
60498eb9733SSam Eiderman             assert(extent->entry_size == sizeof(uint32_t));
60598eb9733SSam Eiderman             le32_to_cpus((uint32_t *)extent->l1_table + i);
60698eb9733SSam Eiderman         }
607b4b3ab14SFam Zheng     }
608b4b3ab14SFam Zheng 
609b4b3ab14SFam Zheng     if (extent->l1_backup_table_offset) {
61098eb9733SSam Eiderman         assert(!extent->sesparse);
611d6e59931SKevin Wolf         extent->l1_backup_table = g_try_malloc(l1_size);
612d6e59931SKevin Wolf         if (l1_size && extent->l1_backup_table == NULL) {
613d6e59931SKevin Wolf             ret = -ENOMEM;
614d6e59931SKevin Wolf             goto fail_l1;
615d6e59931SKevin Wolf         }
61653fb7844SAlberto Faria         ret = bdrv_pread(extent->file, extent->l1_backup_table_offset,
61732cc71deSAlberto Faria                          l1_size, extent->l1_backup_table, 0);
618b4b3ab14SFam Zheng         if (ret < 0) {
619f30c66baSMax Reitz             bdrv_refresh_filename(extent->file->bs);
6204823970bSFam Zheng             error_setg_errno(errp, -ret,
6214823970bSFam Zheng                              "Could not read l1 backup table from extent '%s'",
62224bc15d1SKevin Wolf                              extent->file->bs->filename);
623b4b3ab14SFam Zheng             goto fail_l1b;
624b4b3ab14SFam Zheng         }
625b4b3ab14SFam Zheng         for (i = 0; i < extent->l1_size; i++) {
626b4b3ab14SFam Zheng             le32_to_cpus(&extent->l1_backup_table[i]);
627b4b3ab14SFam Zheng         }
628b4b3ab14SFam Zheng     }
629b4b3ab14SFam Zheng 
630b4b3ab14SFam Zheng     extent->l2_cache =
63198eb9733SSam Eiderman         g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE);
632b4b3ab14SFam Zheng     return 0;
633b4b3ab14SFam Zheng  fail_l1b:
6347267c094SAnthony Liguori     g_free(extent->l1_backup_table);
635b4b3ab14SFam Zheng  fail_l1:
6367267c094SAnthony Liguori     g_free(extent->l1_table);
637b4b3ab14SFam Zheng     return ret;
638b4b3ab14SFam Zheng }
639b4b3ab14SFam Zheng 
640daac8fdcSFam Zheng static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
64124bc15d1SKevin Wolf                                  BdrvChild *file,
6424823970bSFam Zheng                                  int flags, Error **errp)
643b4b3ab14SFam Zheng {
644b4b3ab14SFam Zheng     int ret;
645019d6b8fSAnthony Liguori     uint32_t magic;
646019d6b8fSAnthony Liguori     VMDK3Header header;
647cd466702SChristian Borntraeger     VmdkExtent *extent = NULL;
648b4b3ab14SFam Zheng 
64932cc71deSAlberto Faria     ret = bdrv_pread(file, sizeof(magic), sizeof(header), &header, 0);
650b4b3ab14SFam Zheng     if (ret < 0) {
651f30c66baSMax Reitz         bdrv_refresh_filename(file->bs);
6524823970bSFam Zheng         error_setg_errno(errp, -ret,
6534823970bSFam Zheng                          "Could not read header from file '%s'",
65424bc15d1SKevin Wolf                          file->bs->filename);
65586c6b429SFam Zheng         return ret;
656b3976d3cSFam Zheng     }
657f6b61e54SFam Zheng     ret = vmdk_add_extent(bs, file, false,
658b3976d3cSFam Zheng                           le32_to_cpu(header.disk_sectors),
6597237aecdSFam Zheng                           (int64_t)le32_to_cpu(header.l1dir_offset) << 9,
660f6b61e54SFam Zheng                           0,
661f6b61e54SFam Zheng                           le32_to_cpu(header.l1dir_size),
662f6b61e54SFam Zheng                           4096,
6638aa1331cSFam Zheng                           le32_to_cpu(header.granularity),
6644823970bSFam Zheng                           &extent,
6654823970bSFam Zheng                           errp);
6668aa1331cSFam Zheng     if (ret < 0) {
6678aa1331cSFam Zheng         return ret;
6688aa1331cSFam Zheng     }
6694823970bSFam Zheng     ret = vmdk_init_tables(bs, extent, errp);
670b4b3ab14SFam Zheng     if (ret) {
67186c6b429SFam Zheng         /* free extent allocated by vmdk_add_extent */
67286c6b429SFam Zheng         vmdk_free_last_extent(bs);
673b4b3ab14SFam Zheng     }
674b4b3ab14SFam Zheng     return ret;
675b4b3ab14SFam Zheng }
676b4b3ab14SFam Zheng 
67798eb9733SSam Eiderman #define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe)
67898eb9733SSam Eiderman #define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe)
67998eb9733SSam Eiderman 
68098eb9733SSam Eiderman /* Strict checks - format not officially documented */
68198eb9733SSam Eiderman static int check_se_sparse_const_header(VMDKSESparseConstHeader *header,
68298eb9733SSam Eiderman                                         Error **errp)
68398eb9733SSam Eiderman {
68498eb9733SSam Eiderman     header->magic = le64_to_cpu(header->magic);
68598eb9733SSam Eiderman     header->version = le64_to_cpu(header->version);
68698eb9733SSam Eiderman     header->grain_size = le64_to_cpu(header->grain_size);
68798eb9733SSam Eiderman     header->grain_table_size = le64_to_cpu(header->grain_table_size);
68898eb9733SSam Eiderman     header->flags = le64_to_cpu(header->flags);
68998eb9733SSam Eiderman     header->reserved1 = le64_to_cpu(header->reserved1);
69098eb9733SSam Eiderman     header->reserved2 = le64_to_cpu(header->reserved2);
69198eb9733SSam Eiderman     header->reserved3 = le64_to_cpu(header->reserved3);
69298eb9733SSam Eiderman     header->reserved4 = le64_to_cpu(header->reserved4);
69398eb9733SSam Eiderman 
69498eb9733SSam Eiderman     header->volatile_header_offset =
69598eb9733SSam Eiderman         le64_to_cpu(header->volatile_header_offset);
69698eb9733SSam Eiderman     header->volatile_header_size = le64_to_cpu(header->volatile_header_size);
69798eb9733SSam Eiderman 
69898eb9733SSam Eiderman     header->journal_header_offset = le64_to_cpu(header->journal_header_offset);
69998eb9733SSam Eiderman     header->journal_header_size = le64_to_cpu(header->journal_header_size);
70098eb9733SSam Eiderman 
70198eb9733SSam Eiderman     header->journal_offset = le64_to_cpu(header->journal_offset);
70298eb9733SSam Eiderman     header->journal_size = le64_to_cpu(header->journal_size);
70398eb9733SSam Eiderman 
70498eb9733SSam Eiderman     header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset);
70598eb9733SSam Eiderman     header->grain_dir_size = le64_to_cpu(header->grain_dir_size);
70698eb9733SSam Eiderman 
70798eb9733SSam Eiderman     header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset);
70898eb9733SSam Eiderman     header->grain_tables_size = le64_to_cpu(header->grain_tables_size);
70998eb9733SSam Eiderman 
71098eb9733SSam Eiderman     header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset);
71198eb9733SSam Eiderman     header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size);
71298eb9733SSam Eiderman 
71398eb9733SSam Eiderman     header->backmap_offset = le64_to_cpu(header->backmap_offset);
71498eb9733SSam Eiderman     header->backmap_size = le64_to_cpu(header->backmap_size);
71598eb9733SSam Eiderman 
71698eb9733SSam Eiderman     header->grains_offset = le64_to_cpu(header->grains_offset);
71798eb9733SSam Eiderman     header->grains_size = le64_to_cpu(header->grains_size);
71898eb9733SSam Eiderman 
71998eb9733SSam Eiderman     if (header->magic != SESPARSE_CONST_HEADER_MAGIC) {
72098eb9733SSam Eiderman         error_setg(errp, "Bad const header magic: 0x%016" PRIx64,
72198eb9733SSam Eiderman                    header->magic);
72298eb9733SSam Eiderman         return -EINVAL;
72398eb9733SSam Eiderman     }
72498eb9733SSam Eiderman 
72598eb9733SSam Eiderman     if (header->version != 0x0000000200000001) {
72698eb9733SSam Eiderman         error_setg(errp, "Unsupported version: 0x%016" PRIx64,
72798eb9733SSam Eiderman                    header->version);
72898eb9733SSam Eiderman         return -ENOTSUP;
72998eb9733SSam Eiderman     }
73098eb9733SSam Eiderman 
73198eb9733SSam Eiderman     if (header->grain_size != 8) {
73298eb9733SSam Eiderman         error_setg(errp, "Unsupported grain size: %" PRIu64,
73398eb9733SSam Eiderman                    header->grain_size);
73498eb9733SSam Eiderman         return -ENOTSUP;
73598eb9733SSam Eiderman     }
73698eb9733SSam Eiderman 
73798eb9733SSam Eiderman     if (header->grain_table_size != 64) {
73898eb9733SSam Eiderman         error_setg(errp, "Unsupported grain table size: %" PRIu64,
73998eb9733SSam Eiderman                    header->grain_table_size);
74098eb9733SSam Eiderman         return -ENOTSUP;
74198eb9733SSam Eiderman     }
74298eb9733SSam Eiderman 
74398eb9733SSam Eiderman     if (header->flags != 0) {
74498eb9733SSam Eiderman         error_setg(errp, "Unsupported flags: 0x%016" PRIx64,
74598eb9733SSam Eiderman                    header->flags);
74698eb9733SSam Eiderman         return -ENOTSUP;
74798eb9733SSam Eiderman     }
74898eb9733SSam Eiderman 
74998eb9733SSam Eiderman     if (header->reserved1 != 0 || header->reserved2 != 0 ||
75098eb9733SSam Eiderman         header->reserved3 != 0 || header->reserved4 != 0) {
75198eb9733SSam Eiderman         error_setg(errp, "Unsupported reserved bits:"
75298eb9733SSam Eiderman                    " 0x%016" PRIx64 " 0x%016" PRIx64
75398eb9733SSam Eiderman                    " 0x%016" PRIx64 " 0x%016" PRIx64,
75498eb9733SSam Eiderman                    header->reserved1, header->reserved2,
75598eb9733SSam Eiderman                    header->reserved3, header->reserved4);
75698eb9733SSam Eiderman         return -ENOTSUP;
75798eb9733SSam Eiderman     }
75898eb9733SSam Eiderman 
75998eb9733SSam Eiderman     /* check that padding is 0 */
76098eb9733SSam Eiderman     if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
76198eb9733SSam Eiderman         error_setg(errp, "Unsupported non-zero const header padding");
76298eb9733SSam Eiderman         return -ENOTSUP;
76398eb9733SSam Eiderman     }
76498eb9733SSam Eiderman 
76598eb9733SSam Eiderman     return 0;
76698eb9733SSam Eiderman }
76798eb9733SSam Eiderman 
76898eb9733SSam Eiderman static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header,
76998eb9733SSam Eiderman                                            Error **errp)
77098eb9733SSam Eiderman {
77198eb9733SSam Eiderman     header->magic = le64_to_cpu(header->magic);
77298eb9733SSam Eiderman     header->free_gt_number = le64_to_cpu(header->free_gt_number);
77398eb9733SSam Eiderman     header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number);
77498eb9733SSam Eiderman     header->replay_journal = le64_to_cpu(header->replay_journal);
77598eb9733SSam Eiderman 
77698eb9733SSam Eiderman     if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) {
77798eb9733SSam Eiderman         error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64,
77898eb9733SSam Eiderman                    header->magic);
77998eb9733SSam Eiderman         return -EINVAL;
78098eb9733SSam Eiderman     }
78198eb9733SSam Eiderman 
78298eb9733SSam Eiderman     if (header->replay_journal) {
78398eb9733SSam Eiderman         error_setg(errp, "Image is dirty, Replaying journal not supported");
78498eb9733SSam Eiderman         return -ENOTSUP;
78598eb9733SSam Eiderman     }
78698eb9733SSam Eiderman 
78798eb9733SSam Eiderman     /* check that padding is 0 */
78898eb9733SSam Eiderman     if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
78998eb9733SSam Eiderman         error_setg(errp, "Unsupported non-zero volatile header padding");
79098eb9733SSam Eiderman         return -ENOTSUP;
79198eb9733SSam Eiderman     }
79298eb9733SSam Eiderman 
79398eb9733SSam Eiderman     return 0;
79498eb9733SSam Eiderman }
79598eb9733SSam Eiderman 
79698eb9733SSam Eiderman static int vmdk_open_se_sparse(BlockDriverState *bs,
79798eb9733SSam Eiderman                                BdrvChild *file,
79898eb9733SSam Eiderman                                int flags, Error **errp)
79998eb9733SSam Eiderman {
80098eb9733SSam Eiderman     int ret;
80198eb9733SSam Eiderman     VMDKSESparseConstHeader const_header;
80298eb9733SSam Eiderman     VMDKSESparseVolatileHeader volatile_header;
803cd466702SChristian Borntraeger     VmdkExtent *extent = NULL;
80498eb9733SSam Eiderman 
80598eb9733SSam Eiderman     ret = bdrv_apply_auto_read_only(bs,
80698eb9733SSam Eiderman             "No write support for seSparse images available", errp);
80798eb9733SSam Eiderman     if (ret < 0) {
80898eb9733SSam Eiderman         return ret;
80998eb9733SSam Eiderman     }
81098eb9733SSam Eiderman 
81198eb9733SSam Eiderman     assert(sizeof(const_header) == SECTOR_SIZE);
81298eb9733SSam Eiderman 
81332cc71deSAlberto Faria     ret = bdrv_pread(file, 0, sizeof(const_header), &const_header, 0);
81498eb9733SSam Eiderman     if (ret < 0) {
81598eb9733SSam Eiderman         bdrv_refresh_filename(file->bs);
81698eb9733SSam Eiderman         error_setg_errno(errp, -ret,
81798eb9733SSam Eiderman                          "Could not read const header from file '%s'",
81898eb9733SSam Eiderman                          file->bs->filename);
81998eb9733SSam Eiderman         return ret;
82098eb9733SSam Eiderman     }
82198eb9733SSam Eiderman 
82298eb9733SSam Eiderman     /* check const header */
82398eb9733SSam Eiderman     ret = check_se_sparse_const_header(&const_header, errp);
82498eb9733SSam Eiderman     if (ret < 0) {
82598eb9733SSam Eiderman         return ret;
82698eb9733SSam Eiderman     }
82798eb9733SSam Eiderman 
82898eb9733SSam Eiderman     assert(sizeof(volatile_header) == SECTOR_SIZE);
82998eb9733SSam Eiderman 
83053fb7844SAlberto Faria     ret = bdrv_pread(file, const_header.volatile_header_offset * SECTOR_SIZE,
83132cc71deSAlberto Faria                      sizeof(volatile_header), &volatile_header, 0);
83298eb9733SSam Eiderman     if (ret < 0) {
83398eb9733SSam Eiderman         bdrv_refresh_filename(file->bs);
83498eb9733SSam Eiderman         error_setg_errno(errp, -ret,
83598eb9733SSam Eiderman                          "Could not read volatile header from file '%s'",
83698eb9733SSam Eiderman                          file->bs->filename);
83798eb9733SSam Eiderman         return ret;
83898eb9733SSam Eiderman     }
83998eb9733SSam Eiderman 
84098eb9733SSam Eiderman     /* check volatile header */
84198eb9733SSam Eiderman     ret = check_se_sparse_volatile_header(&volatile_header, errp);
84298eb9733SSam Eiderman     if (ret < 0) {
84398eb9733SSam Eiderman         return ret;
84498eb9733SSam Eiderman     }
84598eb9733SSam Eiderman 
84698eb9733SSam Eiderman     ret = vmdk_add_extent(bs, file, false,
84798eb9733SSam Eiderman                           const_header.capacity,
84898eb9733SSam Eiderman                           const_header.grain_dir_offset * SECTOR_SIZE,
84998eb9733SSam Eiderman                           0,
85098eb9733SSam Eiderman                           const_header.grain_dir_size *
85198eb9733SSam Eiderman                           SECTOR_SIZE / sizeof(uint64_t),
85298eb9733SSam Eiderman                           const_header.grain_table_size *
85398eb9733SSam Eiderman                           SECTOR_SIZE / sizeof(uint64_t),
85498eb9733SSam Eiderman                           const_header.grain_size,
85598eb9733SSam Eiderman                           &extent,
85698eb9733SSam Eiderman                           errp);
85798eb9733SSam Eiderman     if (ret < 0) {
85898eb9733SSam Eiderman         return ret;
85998eb9733SSam Eiderman     }
86098eb9733SSam Eiderman 
86198eb9733SSam Eiderman     extent->sesparse = true;
86298eb9733SSam Eiderman     extent->sesparse_l2_tables_offset = const_header.grain_tables_offset;
86398eb9733SSam Eiderman     extent->sesparse_clusters_offset = const_header.grains_offset;
86498eb9733SSam Eiderman     extent->entry_size = sizeof(uint64_t);
86598eb9733SSam Eiderman 
86698eb9733SSam Eiderman     ret = vmdk_init_tables(bs, extent, errp);
86798eb9733SSam Eiderman     if (ret) {
86898eb9733SSam Eiderman         /* free extent allocated by vmdk_add_extent */
86998eb9733SSam Eiderman         vmdk_free_last_extent(bs);
87098eb9733SSam Eiderman     }
87198eb9733SSam Eiderman 
87298eb9733SSam Eiderman     return ret;
87398eb9733SSam Eiderman }
87498eb9733SSam Eiderman 
875d1833ef5SPaolo Bonzini static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
876a6468367SKevin Wolf                                QDict *options, Error **errp);
877f16f509dSFam Zheng 
878cf2ab8fcSKevin Wolf static char *vmdk_read_desc(BdrvChild *file, uint64_t desc_offset, Error **errp)
879a8842e6dSPaolo Bonzini {
880a8842e6dSPaolo Bonzini     int64_t size;
881a8842e6dSPaolo Bonzini     char *buf;
882a8842e6dSPaolo Bonzini     int ret;
883a8842e6dSPaolo Bonzini 
884cf2ab8fcSKevin Wolf     size = bdrv_getlength(file->bs);
885a8842e6dSPaolo Bonzini     if (size < 0) {
886a8842e6dSPaolo Bonzini         error_setg_errno(errp, -size, "Could not access file");
887a8842e6dSPaolo Bonzini         return NULL;
888a8842e6dSPaolo Bonzini     }
889a8842e6dSPaolo Bonzini 
89003c3359dSFam Zheng     if (size < 4) {
89103c3359dSFam Zheng         /* Both descriptor file and sparse image must be much larger than 4
89203c3359dSFam Zheng          * bytes, also callers of vmdk_read_desc want to compare the first 4
89303c3359dSFam Zheng          * bytes with VMDK4_MAGIC, let's error out if less is read. */
89403c3359dSFam Zheng         error_setg(errp, "File is too small, not a valid image");
89503c3359dSFam Zheng         return NULL;
89603c3359dSFam Zheng     }
89703c3359dSFam Zheng 
89873b7bcadSFam Zheng     size = MIN(size, (1 << 20) - 1);  /* avoid unbounded allocation */
89973b7bcadSFam Zheng     buf = g_malloc(size + 1);
900a8842e6dSPaolo Bonzini 
90132cc71deSAlberto Faria     ret = bdrv_pread(file, desc_offset, size, buf, 0);
902a8842e6dSPaolo Bonzini     if (ret < 0) {
903a8842e6dSPaolo Bonzini         error_setg_errno(errp, -ret, "Could not read from file");
904a8842e6dSPaolo Bonzini         g_free(buf);
905a8842e6dSPaolo Bonzini         return NULL;
906a8842e6dSPaolo Bonzini     }
907*353a5d84SAlberto Faria     buf[size] = 0;
908a8842e6dSPaolo Bonzini 
909a8842e6dSPaolo Bonzini     return buf;
910a8842e6dSPaolo Bonzini }
911a8842e6dSPaolo Bonzini 
91286c6b429SFam Zheng static int vmdk_open_vmdk4(BlockDriverState *bs,
91324bc15d1SKevin Wolf                            BdrvChild *file,
914a6468367SKevin Wolf                            int flags, QDict *options, Error **errp)
915b4b3ab14SFam Zheng {
916b4b3ab14SFam Zheng     int ret;
917b4b3ab14SFam Zheng     uint32_t magic;
918b4b3ab14SFam Zheng     uint32_t l1_size, l1_entry_sectors;
919019d6b8fSAnthony Liguori     VMDK4Header header;
920cd466702SChristian Borntraeger     VmdkExtent *extent = NULL;
921f4c129a3SFam Zheng     BDRVVmdkState *s = bs->opaque;
922bb45ded9SFam Zheng     int64_t l1_backup_offset = 0;
9233db1d98aSFam Zheng     bool compressed;
924b4b3ab14SFam Zheng 
92532cc71deSAlberto Faria     ret = bdrv_pread(file, sizeof(magic), sizeof(header), &header, 0);
926b4b3ab14SFam Zheng     if (ret < 0) {
927f30c66baSMax Reitz         bdrv_refresh_filename(file->bs);
9284823970bSFam Zheng         error_setg_errno(errp, -ret,
9294823970bSFam Zheng                          "Could not read header from file '%s'",
93024bc15d1SKevin Wolf                          file->bs->filename);
93189ac8480SPaolo Bonzini         return -EINVAL;
932b3976d3cSFam Zheng     }
9335a394b9eSStefan Hajnoczi     if (header.capacity == 0) {
934e98768d4SFam Zheng         uint64_t desc_offset = le64_to_cpu(header.desc_offset);
9355a394b9eSStefan Hajnoczi         if (desc_offset) {
936cf2ab8fcSKevin Wolf             char *buf = vmdk_read_desc(file, desc_offset << 9, errp);
937d1833ef5SPaolo Bonzini             if (!buf) {
938d1833ef5SPaolo Bonzini                 return -EINVAL;
939d1833ef5SPaolo Bonzini             }
940a6468367SKevin Wolf             ret = vmdk_open_desc_file(bs, flags, buf, options, errp);
941d1833ef5SPaolo Bonzini             g_free(buf);
942d1833ef5SPaolo Bonzini             return ret;
9435a394b9eSStefan Hajnoczi         }
944f16f509dSFam Zheng     }
94565bd155cSKevin Wolf 
946f4c129a3SFam Zheng     if (!s->create_type) {
947f4c129a3SFam Zheng         s->create_type = g_strdup("monolithicSparse");
948f4c129a3SFam Zheng     }
949f4c129a3SFam Zheng 
95065bd155cSKevin Wolf     if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) {
95165bd155cSKevin Wolf         /*
95265bd155cSKevin Wolf          * The footer takes precedence over the header, so read it in. The
95365bd155cSKevin Wolf          * footer starts at offset -1024 from the end: One sector for the
95465bd155cSKevin Wolf          * footer, and another one for the end-of-stream marker.
95565bd155cSKevin Wolf          */
95665bd155cSKevin Wolf         struct {
95765bd155cSKevin Wolf             struct {
95865bd155cSKevin Wolf                 uint64_t val;
95965bd155cSKevin Wolf                 uint32_t size;
96065bd155cSKevin Wolf                 uint32_t type;
96165bd155cSKevin Wolf                 uint8_t pad[512 - 16];
96265bd155cSKevin Wolf             } QEMU_PACKED footer_marker;
96365bd155cSKevin Wolf 
96465bd155cSKevin Wolf             uint32_t magic;
96565bd155cSKevin Wolf             VMDK4Header header;
96665bd155cSKevin Wolf             uint8_t pad[512 - 4 - sizeof(VMDK4Header)];
96765bd155cSKevin Wolf 
96865bd155cSKevin Wolf             struct {
96965bd155cSKevin Wolf                 uint64_t val;
97065bd155cSKevin Wolf                 uint32_t size;
97165bd155cSKevin Wolf                 uint32_t type;
97265bd155cSKevin Wolf                 uint8_t pad[512 - 16];
97365bd155cSKevin Wolf             } QEMU_PACKED eos_marker;
97465bd155cSKevin Wolf         } QEMU_PACKED footer;
97565bd155cSKevin Wolf 
97653fb7844SAlberto Faria         ret = bdrv_pread(file, bs->file->bs->total_sectors * 512 - 1536,
97732cc71deSAlberto Faria                          sizeof(footer), &footer, 0);
97865bd155cSKevin Wolf         if (ret < 0) {
979d899d2e2SFam Zheng             error_setg_errno(errp, -ret, "Failed to read footer");
98065bd155cSKevin Wolf             return ret;
98165bd155cSKevin Wolf         }
98265bd155cSKevin Wolf 
98365bd155cSKevin Wolf         /* Some sanity checks for the footer */
98465bd155cSKevin Wolf         if (be32_to_cpu(footer.magic) != VMDK4_MAGIC ||
98565bd155cSKevin Wolf             le32_to_cpu(footer.footer_marker.size) != 0  ||
98665bd155cSKevin Wolf             le32_to_cpu(footer.footer_marker.type) != MARKER_FOOTER ||
98765bd155cSKevin Wolf             le64_to_cpu(footer.eos_marker.val) != 0  ||
98865bd155cSKevin Wolf             le32_to_cpu(footer.eos_marker.size) != 0  ||
98965bd155cSKevin Wolf             le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM)
99065bd155cSKevin Wolf         {
991d899d2e2SFam Zheng             error_setg(errp, "Invalid footer");
99265bd155cSKevin Wolf             return -EINVAL;
99365bd155cSKevin Wolf         }
99465bd155cSKevin Wolf 
99565bd155cSKevin Wolf         header = footer.header;
99665bd155cSKevin Wolf     }
99765bd155cSKevin Wolf 
9983db1d98aSFam Zheng     compressed =
9993db1d98aSFam Zheng         le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
1000509d39aaSFam Zheng     if (le32_to_cpu(header.version) > 3) {
1001a55448b3SMax Reitz         error_setg(errp, "Unsupported VMDK version %" PRIu32,
100296c51eb5SFam Zheng                    le32_to_cpu(header.version));
100396c51eb5SFam Zheng         return -ENOTSUP;
10043db1d98aSFam Zheng     } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR) &&
10053db1d98aSFam Zheng                !compressed) {
1006509d39aaSFam Zheng         /* VMware KB 2064959 explains that version 3 added support for
1007509d39aaSFam Zheng          * persistent changed block tracking (CBT), and backup software can
1008509d39aaSFam Zheng          * read it as version=1 if it doesn't care about the changed area
1009509d39aaSFam Zheng          * information. So we are safe to enable read only. */
1010509d39aaSFam Zheng         error_setg(errp, "VMDK version 3 must be read only");
1011509d39aaSFam Zheng         return -EINVAL;
101296c51eb5SFam Zheng     }
101396c51eb5SFam Zheng 
1014ca8804ceSFam Zheng     if (le32_to_cpu(header.num_gtes_per_gt) > 512) {
101589ac8480SPaolo Bonzini         error_setg(errp, "L2 table size too big");
1016f8ce0403SFam Zheng         return -EINVAL;
1017f8ce0403SFam Zheng     }
1018f8ce0403SFam Zheng 
1019ca8804ceSFam Zheng     l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt)
1020b3976d3cSFam Zheng                         * le64_to_cpu(header.granularity);
102175d12341SStefan Weil     if (l1_entry_sectors == 0) {
1022d899d2e2SFam Zheng         error_setg(errp, "L1 entry size is invalid");
102386c6b429SFam Zheng         return -EINVAL;
102486c6b429SFam Zheng     }
1025b3976d3cSFam Zheng     l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
1026b3976d3cSFam Zheng                 / l1_entry_sectors;
1027bb45ded9SFam Zheng     if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
1028bb45ded9SFam Zheng         l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
1029bb45ded9SFam Zheng     }
103024bc15d1SKevin Wolf     if (bdrv_nb_sectors(file->bs) < le64_to_cpu(header.grain_offset)) {
10314ab9dab5SFam Zheng         error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
10324ab9dab5SFam Zheng                    (int64_t)(le64_to_cpu(header.grain_offset)
10334ab9dab5SFam Zheng                              * BDRV_SECTOR_SIZE));
103434ceed81SFam Zheng         return -EINVAL;
103534ceed81SFam Zheng     }
103634ceed81SFam Zheng 
10378aa1331cSFam Zheng     ret = vmdk_add_extent(bs, file, false,
1038b3976d3cSFam Zheng                           le64_to_cpu(header.capacity),
1039b3976d3cSFam Zheng                           le64_to_cpu(header.gd_offset) << 9,
1040bb45ded9SFam Zheng                           l1_backup_offset,
1041b3976d3cSFam Zheng                           l1_size,
1042ca8804ceSFam Zheng                           le32_to_cpu(header.num_gtes_per_gt),
10438aa1331cSFam Zheng                           le64_to_cpu(header.granularity),
10444823970bSFam Zheng                           &extent,
10454823970bSFam Zheng                           errp);
10468aa1331cSFam Zheng     if (ret < 0) {
10478aa1331cSFam Zheng         return ret;
10488aa1331cSFam Zheng     }
1049432bb170SFam Zheng     extent->compressed =
1050432bb170SFam Zheng         le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
1051d8a7b061SFam Zheng     if (extent->compressed) {
1052d8a7b061SFam Zheng         g_free(s->create_type);
1053d8a7b061SFam Zheng         s->create_type = g_strdup("streamOptimized");
1054d8a7b061SFam Zheng     }
1055432bb170SFam Zheng     extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
105614ead646SFam Zheng     extent->version = le32_to_cpu(header.version);
105714ead646SFam Zheng     extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN;
10584823970bSFam Zheng     ret = vmdk_init_tables(bs, extent, errp);
1059b4b3ab14SFam Zheng     if (ret) {
106086c6b429SFam Zheng         /* free extent allocated by vmdk_add_extent */
106186c6b429SFam Zheng         vmdk_free_last_extent(bs);
1062019d6b8fSAnthony Liguori     }
1063b4b3ab14SFam Zheng     return ret;
1064b4b3ab14SFam Zheng }
1065b4b3ab14SFam Zheng 
10667fa60fa3SFam Zheng /* find an option value out of descriptor file */
10677fa60fa3SFam Zheng static int vmdk_parse_description(const char *desc, const char *opt_name,
10687fa60fa3SFam Zheng         char *buf, int buf_size)
10697fa60fa3SFam Zheng {
10707fa60fa3SFam Zheng     char *opt_pos, *opt_end;
10717fa60fa3SFam Zheng     const char *end = desc + strlen(desc);
10727fa60fa3SFam Zheng 
10737fa60fa3SFam Zheng     opt_pos = strstr(desc, opt_name);
10747fa60fa3SFam Zheng     if (!opt_pos) {
107565f74725SFam Zheng         return VMDK_ERROR;
10767fa60fa3SFam Zheng     }
10777fa60fa3SFam Zheng     /* Skip "=\"" following opt_name */
10787fa60fa3SFam Zheng     opt_pos += strlen(opt_name) + 2;
10797fa60fa3SFam Zheng     if (opt_pos >= end) {
108065f74725SFam Zheng         return VMDK_ERROR;
10817fa60fa3SFam Zheng     }
10827fa60fa3SFam Zheng     opt_end = opt_pos;
10837fa60fa3SFam Zheng     while (opt_end < end && *opt_end != '"') {
10847fa60fa3SFam Zheng         opt_end++;
10857fa60fa3SFam Zheng     }
10867fa60fa3SFam Zheng     if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
108765f74725SFam Zheng         return VMDK_ERROR;
10887fa60fa3SFam Zheng     }
10897fa60fa3SFam Zheng     pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
109065f74725SFam Zheng     return VMDK_OK;
10917fa60fa3SFam Zheng }
10927fa60fa3SFam Zheng 
109386c6b429SFam Zheng /* Open an extent file and append to bs array */
109424bc15d1SKevin Wolf static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
1095a6468367SKevin Wolf                             char *buf, QDict *options, Error **errp)
109686c6b429SFam Zheng {
109786c6b429SFam Zheng     uint32_t magic;
109886c6b429SFam Zheng 
1099d1833ef5SPaolo Bonzini     magic = ldl_be_p(buf);
110086c6b429SFam Zheng     switch (magic) {
110186c6b429SFam Zheng         case VMDK3_MAGIC:
11024823970bSFam Zheng             return vmdk_open_vmfs_sparse(bs, file, flags, errp);
110386c6b429SFam Zheng         case VMDK4_MAGIC:
1104a6468367SKevin Wolf             return vmdk_open_vmdk4(bs, file, flags, options, errp);
110586c6b429SFam Zheng         default:
110676abe407SPaolo Bonzini             error_setg(errp, "Image not in VMDK format");
110776abe407SPaolo Bonzini             return -EINVAL;
110886c6b429SFam Zheng     }
110986c6b429SFam Zheng }
111086c6b429SFam Zheng 
1111e4937694SMarkus Armbruster static const char *next_line(const char *s)
1112e4937694SMarkus Armbruster {
1113e4937694SMarkus Armbruster     while (*s) {
1114e4937694SMarkus Armbruster         if (*s == '\n') {
1115e4937694SMarkus Armbruster             return s + 1;
1116e4937694SMarkus Armbruster         }
1117e4937694SMarkus Armbruster         s++;
1118e4937694SMarkus Armbruster     }
1119e4937694SMarkus Armbruster     return s;
1120e4937694SMarkus Armbruster }
1121e4937694SMarkus Armbruster 
11227fa60fa3SFam Zheng static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
1123cdc0dd25SMax Reitz                               QDict *options, Error **errp)
11247fa60fa3SFam Zheng {
11257fa60fa3SFam Zheng     int ret;
1126395a22faSJeff Cody     int matches;
11277fa60fa3SFam Zheng     char access[11];
11287fa60fa3SFam Zheng     char type[11];
11297fa60fa3SFam Zheng     char fname[512];
1130d28d737fSMarkus Armbruster     const char *p, *np;
11317fa60fa3SFam Zheng     int64_t sectors = 0;
11327fa60fa3SFam Zheng     int64_t flat_offset;
1133cdc0dd25SMax Reitz     char *desc_file_dir = NULL;
1134fe206562SJeff Cody     char *extent_path;
113524bc15d1SKevin Wolf     BdrvChild *extent_file;
11368b1869daSMax Reitz     BdrvChildRole extent_role;
1137f4c129a3SFam Zheng     BDRVVmdkState *s = bs->opaque;
1138cd466702SChristian Borntraeger     VmdkExtent *extent = NULL;
1139a6468367SKevin Wolf     char extent_opt_prefix[32];
114024bc15d1SKevin Wolf     Error *local_err = NULL;
11417fa60fa3SFam Zheng 
1142e4937694SMarkus Armbruster     for (p = desc; *p; p = next_line(p)) {
11438a3e0bc3SFam Zheng         /* parse extent line in one of below formats:
11448a3e0bc3SFam Zheng          *
11457fa60fa3SFam Zheng          * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
11467fa60fa3SFam Zheng          * RW [size in sectors] SPARSE "file-name.vmdk"
11478a3e0bc3SFam Zheng          * RW [size in sectors] VMFS "file-name.vmdk"
11488a3e0bc3SFam Zheng          * RW [size in sectors] VMFSSPARSE "file-name.vmdk"
114998eb9733SSam Eiderman          * RW [size in sectors] SESPARSE "file-name.vmdk"
11507fa60fa3SFam Zheng          */
11517fa60fa3SFam Zheng         flat_offset = -1;
1152395a22faSJeff Cody         matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
11537fa60fa3SFam Zheng                          access, &sectors, type, fname, &flat_offset);
1154395a22faSJeff Cody         if (matches < 4 || strcmp(access, "RW")) {
1155e4937694SMarkus Armbruster             continue;
11567fa60fa3SFam Zheng         } else if (!strcmp(type, "FLAT")) {
1157395a22faSJeff Cody             if (matches != 5 || flat_offset < 0) {
1158d28d737fSMarkus Armbruster                 goto invalid;
11597fa60fa3SFam Zheng             }
1160dbbcaa8dSFam Zheng         } else if (!strcmp(type, "VMFS")) {
1161395a22faSJeff Cody             if (matches == 4) {
1162dbbcaa8dSFam Zheng                 flat_offset = 0;
1163b47053bdSFam Zheng             } else {
1164d28d737fSMarkus Armbruster                 goto invalid;
1165b47053bdSFam Zheng             }
1166395a22faSJeff Cody         } else if (matches != 4) {
1167d28d737fSMarkus Armbruster             goto invalid;
11687fa60fa3SFam Zheng         }
11697fa60fa3SFam Zheng 
11707fa60fa3SFam Zheng         if (sectors <= 0 ||
1171daac8fdcSFam Zheng             (strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
117298eb9733SSam Eiderman              strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") &&
117398eb9733SSam Eiderman              strcmp(type, "SESPARSE")) ||
11747fa60fa3SFam Zheng             (strcmp(access, "RW"))) {
1175e4937694SMarkus Armbruster             continue;
11767fa60fa3SFam Zheng         }
11777fa60fa3SFam Zheng 
1178cdc0dd25SMax Reitz         if (path_is_absolute(fname)) {
1179cdc0dd25SMax Reitz             extent_path = g_strdup(fname);
1180cdc0dd25SMax Reitz         } else {
1181cdc0dd25SMax Reitz             if (!desc_file_dir) {
1182cdc0dd25SMax Reitz                 desc_file_dir = bdrv_dirname(bs->file->bs, errp);
1183cdc0dd25SMax Reitz                 if (!desc_file_dir) {
1184f30c66baSMax Reitz                     bdrv_refresh_filename(bs->file->bs);
1185cdc0dd25SMax Reitz                     error_prepend(errp, "Cannot use relative paths with VMDK "
1186cdc0dd25SMax Reitz                                   "descriptor file '%s': ",
1187cdc0dd25SMax Reitz                                   bs->file->bs->filename);
1188cdc0dd25SMax Reitz                     ret = -EINVAL;
1189cdc0dd25SMax Reitz                     goto out;
1190cdc0dd25SMax Reitz                 }
11915c98415bSMax Reitz             }
11925c98415bSMax Reitz 
1193cdc0dd25SMax Reitz             extent_path = g_strconcat(desc_file_dir, fname, NULL);
1194cdc0dd25SMax Reitz         }
1195a6468367SKevin Wolf 
1196a6468367SKevin Wolf         ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents);
1197a6468367SKevin Wolf         assert(ret < 32);
1198a6468367SKevin Wolf 
11998b1869daSMax Reitz         extent_role = BDRV_CHILD_DATA;
12008b1869daSMax Reitz         if (strcmp(type, "FLAT") != 0 && strcmp(type, "VMFS") != 0) {
12018b1869daSMax Reitz             /* non-flat extents have metadata */
12028b1869daSMax Reitz             extent_role |= BDRV_CHILD_METADATA;
12038b1869daSMax Reitz         }
12048b1869daSMax Reitz 
120524bc15d1SKevin Wolf         extent_file = bdrv_open_child(extent_path, options, extent_opt_prefix,
12068b1869daSMax Reitz                                       bs, &child_of_bds, extent_role, false,
12078b1869daSMax Reitz                                       &local_err);
1208fe206562SJeff Cody         g_free(extent_path);
120924bc15d1SKevin Wolf         if (local_err) {
121024bc15d1SKevin Wolf             error_propagate(errp, local_err);
1211cdc0dd25SMax Reitz             ret = -EINVAL;
1212cdc0dd25SMax Reitz             goto out;
12137fa60fa3SFam Zheng         }
121486c6b429SFam Zheng 
121586c6b429SFam Zheng         /* save to extents array */
121604d542c8SPaolo Bonzini         if (!strcmp(type, "FLAT") || !strcmp(type, "VMFS")) {
121786c6b429SFam Zheng             /* FLAT extent */
121886c6b429SFam Zheng 
12198aa1331cSFam Zheng             ret = vmdk_add_extent(bs, extent_file, true, sectors,
12204823970bSFam Zheng                             0, 0, 0, 0, 0, &extent, errp);
12218aa1331cSFam Zheng             if (ret < 0) {
122224bc15d1SKevin Wolf                 bdrv_unref_child(bs, extent_file);
1223cdc0dd25SMax Reitz                 goto out;
12248aa1331cSFam Zheng             }
1225f16f509dSFam Zheng             extent->flat_start_offset = flat_offset << 9;
1226daac8fdcSFam Zheng         } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
1227daac8fdcSFam Zheng             /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
1228cf2ab8fcSKevin Wolf             char *buf = vmdk_read_desc(extent_file, 0, errp);
1229d1833ef5SPaolo Bonzini             if (!buf) {
1230d1833ef5SPaolo Bonzini                 ret = -EINVAL;
1231d1833ef5SPaolo Bonzini             } else {
1232a6468367SKevin Wolf                 ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf,
1233a6468367SKevin Wolf                                        options, errp);
1234d1833ef5SPaolo Bonzini             }
1235d1833ef5SPaolo Bonzini             g_free(buf);
1236b6b1d31fSStefan Hajnoczi             if (ret) {
123724bc15d1SKevin Wolf                 bdrv_unref_child(bs, extent_file);
1238cdc0dd25SMax Reitz                 goto out;
123986c6b429SFam Zheng             }
1240f4c129a3SFam Zheng             extent = &s->extents[s->num_extents - 1];
124198eb9733SSam Eiderman         } else if (!strcmp(type, "SESPARSE")) {
124298eb9733SSam Eiderman             ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
124398eb9733SSam Eiderman             if (ret) {
124498eb9733SSam Eiderman                 bdrv_unref_child(bs, extent_file);
1245cdc0dd25SMax Reitz                 goto out;
124698eb9733SSam Eiderman             }
124798eb9733SSam Eiderman             extent = &s->extents[s->num_extents - 1];
12487fa60fa3SFam Zheng         } else {
12494823970bSFam Zheng             error_setg(errp, "Unsupported extent type '%s'", type);
125024bc15d1SKevin Wolf             bdrv_unref_child(bs, extent_file);
1251cdc0dd25SMax Reitz             ret = -ENOTSUP;
1252cdc0dd25SMax Reitz             goto out;
12537fa60fa3SFam Zheng         }
1254f4c129a3SFam Zheng         extent->type = g_strdup(type);
1255899f1ae2SFam Zheng     }
1256cdc0dd25SMax Reitz 
1257cdc0dd25SMax Reitz     ret = 0;
1258cdc0dd25SMax Reitz     goto out;
1259d28d737fSMarkus Armbruster 
1260d28d737fSMarkus Armbruster invalid:
1261d28d737fSMarkus Armbruster     np = next_line(p);
1262d28d737fSMarkus Armbruster     assert(np != p);
1263d28d737fSMarkus Armbruster     if (np[-1] == '\n') {
1264d28d737fSMarkus Armbruster         np--;
1265d28d737fSMarkus Armbruster     }
1266d28d737fSMarkus Armbruster     error_setg(errp, "Invalid extent line: %.*s", (int)(np - p), p);
1267cdc0dd25SMax Reitz     ret = -EINVAL;
1268cdc0dd25SMax Reitz 
1269cdc0dd25SMax Reitz out:
1270cdc0dd25SMax Reitz     g_free(desc_file_dir);
1271cdc0dd25SMax Reitz     return ret;
12727fa60fa3SFam Zheng }
12737fa60fa3SFam Zheng 
1274d1833ef5SPaolo Bonzini static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
1275a6468367SKevin Wolf                                QDict *options, Error **errp)
12767fa60fa3SFam Zheng {
12777fa60fa3SFam Zheng     int ret;
12787fa60fa3SFam Zheng     char ct[128];
12797fa60fa3SFam Zheng     BDRVVmdkState *s = bs->opaque;
12807fa60fa3SFam Zheng 
12817fa60fa3SFam Zheng     if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
128276abe407SPaolo Bonzini         error_setg(errp, "invalid VMDK image descriptor");
128376abe407SPaolo Bonzini         ret = -EINVAL;
12840bed087dSEvgeny Budilovsky         goto exit;
12857fa60fa3SFam Zheng     }
12866398de51SFam Zheng     if (strcmp(ct, "monolithicFlat") &&
128704d542c8SPaolo Bonzini         strcmp(ct, "vmfs") &&
1288daac8fdcSFam Zheng         strcmp(ct, "vmfsSparse") &&
128998eb9733SSam Eiderman         strcmp(ct, "seSparse") &&
129086c6b429SFam Zheng         strcmp(ct, "twoGbMaxExtentSparse") &&
12916398de51SFam Zheng         strcmp(ct, "twoGbMaxExtentFlat")) {
12924823970bSFam Zheng         error_setg(errp, "Unsupported image type '%s'", ct);
12930bed087dSEvgeny Budilovsky         ret = -ENOTSUP;
12940bed087dSEvgeny Budilovsky         goto exit;
12957fa60fa3SFam Zheng     }
1296f4c129a3SFam Zheng     s->create_type = g_strdup(ct);
12977fa60fa3SFam Zheng     s->desc_offset = 0;
1298cdc0dd25SMax Reitz     ret = vmdk_parse_extents(buf, bs, options, errp);
12990bed087dSEvgeny Budilovsky exit:
13000bed087dSEvgeny Budilovsky     return ret;
13017fa60fa3SFam Zheng }
13027fa60fa3SFam Zheng 
1303015a1036SMax Reitz static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
1304015a1036SMax Reitz                      Error **errp)
1305b4b3ab14SFam Zheng {
13069aeecbbcSFam Zheng     char *buf;
130786c6b429SFam Zheng     int ret;
130886c6b429SFam Zheng     BDRVVmdkState *s = bs->opaque;
130937f09e5eSPaolo Bonzini     uint32_t magic;
1310b4b3ab14SFam Zheng 
13118b1869daSMax Reitz     bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
13128b1869daSMax Reitz                                BDRV_CHILD_IMAGE, false, errp);
13134e4bf5c4SKevin Wolf     if (!bs->file) {
13144e4bf5c4SKevin Wolf         return -EINVAL;
13154e4bf5c4SKevin Wolf     }
13164e4bf5c4SKevin Wolf 
1317cf2ab8fcSKevin Wolf     buf = vmdk_read_desc(bs->file, 0, errp);
1318d1833ef5SPaolo Bonzini     if (!buf) {
1319d1833ef5SPaolo Bonzini         return -EINVAL;
1320d1833ef5SPaolo Bonzini     }
1321d1833ef5SPaolo Bonzini 
132237f09e5eSPaolo Bonzini     magic = ldl_be_p(buf);
132337f09e5eSPaolo Bonzini     switch (magic) {
132437f09e5eSPaolo Bonzini         case VMDK3_MAGIC:
132537f09e5eSPaolo Bonzini         case VMDK4_MAGIC:
13269a4f4c31SKevin Wolf             ret = vmdk_open_sparse(bs, bs->file, flags, buf, options,
132724bc15d1SKevin Wolf                                    errp);
132886c6b429SFam Zheng             s->desc_offset = 0x200;
132937f09e5eSPaolo Bonzini             break;
133037f09e5eSPaolo Bonzini         default:
13318b1869daSMax Reitz             /* No data in the descriptor file */
13328b1869daSMax Reitz             bs->file->role &= ~BDRV_CHILD_DATA;
13338b1869daSMax Reitz 
13348b1869daSMax Reitz             /* Must succeed because we have given up permissions if anything */
13358b1869daSMax Reitz             bdrv_child_refresh_perms(bs, bs->file, &error_abort);
13368b1869daSMax Reitz 
1337a6468367SKevin Wolf             ret = vmdk_open_desc_file(bs, flags, buf, options, errp);
133837f09e5eSPaolo Bonzini             break;
133937f09e5eSPaolo Bonzini     }
1340bae0a0ccSPaolo Bonzini     if (ret) {
1341bae0a0ccSPaolo Bonzini         goto fail;
1342bae0a0ccSPaolo Bonzini     }
134337f09e5eSPaolo Bonzini 
134486c6b429SFam Zheng     /* try to open parent images, if exist */
134586c6b429SFam Zheng     ret = vmdk_parent_open(bs);
134686c6b429SFam Zheng     if (ret) {
1347bae0a0ccSPaolo Bonzini         goto fail;
1348b4b3ab14SFam Zheng     }
13499877860eSPeter Maydell     ret = vmdk_read_cid(bs, 0, &s->cid);
13509877860eSPeter Maydell     if (ret) {
13519877860eSPeter Maydell         goto fail;
13529877860eSPeter Maydell     }
13539877860eSPeter Maydell     ret = vmdk_read_cid(bs, 1, &s->parent_cid);
13549877860eSPeter Maydell     if (ret) {
13559877860eSPeter Maydell         goto fail;
13569877860eSPeter Maydell     }
1357848c66e8SPaolo Bonzini     qemu_co_mutex_init(&s->lock);
13582bc3166cSKevin Wolf 
13592bc3166cSKevin Wolf     /* Disable migration when VMDK images are used */
136081e5f78aSAlberto Garcia     error_setg(&s->migration_blocker, "The vmdk format used by node '%s' "
136181e5f78aSAlberto Garcia                "does not support live migration",
136281e5f78aSAlberto Garcia                bdrv_get_device_or_node_name(bs));
1363386f6c07SMarkus Armbruster     ret = migrate_add_blocker(s->migration_blocker, errp);
1364386f6c07SMarkus Armbruster     if (ret < 0) {
1365fe44dc91SAshijeet Acharya         error_free(s->migration_blocker);
1366fe44dc91SAshijeet Acharya         goto fail;
1367fe44dc91SAshijeet Acharya     }
1368fe44dc91SAshijeet Acharya 
1369d1833ef5SPaolo Bonzini     g_free(buf);
13702bc3166cSKevin Wolf     return 0;
1371bae0a0ccSPaolo Bonzini 
1372bae0a0ccSPaolo Bonzini fail:
1373d1833ef5SPaolo Bonzini     g_free(buf);
1374f4c129a3SFam Zheng     g_free(s->create_type);
1375f4c129a3SFam Zheng     s->create_type = NULL;
1376bae0a0ccSPaolo Bonzini     vmdk_free_extents(bs);
1377bae0a0ccSPaolo Bonzini     return ret;
1378019d6b8fSAnthony Liguori }
1379019d6b8fSAnthony Liguori 
1380d34682cdSKevin Wolf 
13813baca891SKevin Wolf static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
1382d34682cdSKevin Wolf {
1383d34682cdSKevin Wolf     BDRVVmdkState *s = bs->opaque;
1384d34682cdSKevin Wolf     int i;
1385d34682cdSKevin Wolf 
1386d34682cdSKevin Wolf     for (i = 0; i < s->num_extents; i++) {
1387d34682cdSKevin Wolf         if (!s->extents[i].flat) {
1388cf081fcaSEric Blake             bs->bl.pwrite_zeroes_alignment =
1389cf081fcaSEric Blake                 MAX(bs->bl.pwrite_zeroes_alignment,
1390cf081fcaSEric Blake                     s->extents[i].cluster_sectors << BDRV_SECTOR_BITS);
1391d34682cdSKevin Wolf         }
1392d34682cdSKevin Wolf     }
1393d34682cdSKevin Wolf }
1394d34682cdSKevin Wolf 
1395c6ac36e1SFam Zheng /**
1396c6ac36e1SFam Zheng  * get_whole_cluster
1397c6ac36e1SFam Zheng  *
1398c6ac36e1SFam Zheng  * Copy backing file's cluster that covers @sector_num, otherwise write zero,
13994823cde5SKevin Wolf  * to the cluster at @cluster_sector_num. If @zeroed is true, we're overwriting
14004823cde5SKevin Wolf  * a zeroed cluster in the current layer and must not copy data from the
14014823cde5SKevin Wolf  * backing file.
1402c6ac36e1SFam Zheng  *
1403c6ac36e1SFam Zheng  * If @skip_start_sector < @skip_end_sector, the relative range
1404c6ac36e1SFam Zheng  * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
1405c6ac36e1SFam Zheng  * it for call to write user data in the request.
1406c6ac36e1SFam Zheng  */
1407b3976d3cSFam Zheng static int get_whole_cluster(BlockDriverState *bs,
1408b3976d3cSFam Zheng                              VmdkExtent *extent,
140937b1d7d8SKevin Wolf                              uint64_t cluster_offset,
141037b1d7d8SKevin Wolf                              uint64_t offset,
141137b1d7d8SKevin Wolf                              uint64_t skip_start_bytes,
14124823cde5SKevin Wolf                              uint64_t skip_end_bytes,
14134823cde5SKevin Wolf                              bool zeroed)
1414019d6b8fSAnthony Liguori {
1415bf81507dSFam Zheng     int ret = VMDK_OK;
1416c6ac36e1SFam Zheng     int64_t cluster_bytes;
1417c6ac36e1SFam Zheng     uint8_t *whole_grain;
14184823cde5SKevin Wolf     bool copy_from_backing;
1419019d6b8fSAnthony Liguori 
1420c6ac36e1SFam Zheng     /* For COW, align request sector_num to cluster start */
1421c6ac36e1SFam Zheng     cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
142237b1d7d8SKevin Wolf     offset = QEMU_ALIGN_DOWN(offset, cluster_bytes);
1423c6ac36e1SFam Zheng     whole_grain = qemu_blockalign(bs, cluster_bytes);
14244823cde5SKevin Wolf     copy_from_backing = bs->backing && !zeroed;
1425c6ac36e1SFam Zheng 
14264823cde5SKevin Wolf     if (!copy_from_backing) {
142737b1d7d8SKevin Wolf         memset(whole_grain, 0, skip_start_bytes);
142837b1d7d8SKevin Wolf         memset(whole_grain + skip_end_bytes, 0, cluster_bytes - skip_end_bytes);
1429c6ac36e1SFam Zheng     }
1430c6ac36e1SFam Zheng 
143137b1d7d8SKevin Wolf     assert(skip_end_bytes <= cluster_bytes);
14320e69c543SFam Zheng     /* we will be here if it's first write on non-exist grain(cluster).
14330e69c543SFam Zheng      * try to read from parent image, if exist */
1434760e0063SKevin Wolf     if (bs->backing && !vmdk_is_cid_valid(bs)) {
1435c6ac36e1SFam Zheng         ret = VMDK_ERROR;
1436c6ac36e1SFam Zheng         goto exit;
1437c6ac36e1SFam Zheng     }
1438c6ac36e1SFam Zheng 
1439c6ac36e1SFam Zheng     /* Read backing data before skip range */
144037b1d7d8SKevin Wolf     if (skip_start_bytes > 0) {
14414823cde5SKevin Wolf         if (copy_from_backing) {
144223c4b2a8SMax Reitz             /* qcow2 emits this on bs->file instead of bs->backing */
144323c4b2a8SMax Reitz             BLKDBG_EVENT(extent->file, BLKDBG_COW_READ);
144432cc71deSAlberto Faria             ret = bdrv_pread(bs->backing, offset, skip_start_bytes,
144532cc71deSAlberto Faria                              whole_grain, 0);
1446c336500dSKevin Wolf             if (ret < 0) {
1447bf81507dSFam Zheng                 ret = VMDK_ERROR;
1448bf81507dSFam Zheng                 goto exit;
1449019d6b8fSAnthony Liguori             }
1450019d6b8fSAnthony Liguori         }
145123c4b2a8SMax Reitz         BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE);
145232cc71deSAlberto Faria         ret = bdrv_pwrite(extent->file, cluster_offset, skip_start_bytes,
145332cc71deSAlberto Faria                           whole_grain, 0);
1454c6ac36e1SFam Zheng         if (ret < 0) {
1455c6ac36e1SFam Zheng             ret = VMDK_ERROR;
1456c6ac36e1SFam Zheng             goto exit;
1457c6ac36e1SFam Zheng         }
1458c6ac36e1SFam Zheng     }
1459c6ac36e1SFam Zheng     /* Read backing data after skip range */
146037b1d7d8SKevin Wolf     if (skip_end_bytes < cluster_bytes) {
14614823cde5SKevin Wolf         if (copy_from_backing) {
146223c4b2a8SMax Reitz             /* qcow2 emits this on bs->file instead of bs->backing */
146323c4b2a8SMax Reitz             BLKDBG_EVENT(extent->file, BLKDBG_COW_READ);
1464cf2ab8fcSKevin Wolf             ret = bdrv_pread(bs->backing, offset + skip_end_bytes,
146532cc71deSAlberto Faria                              cluster_bytes - skip_end_bytes,
146632cc71deSAlberto Faria                              whole_grain + skip_end_bytes, 0);
1467c6ac36e1SFam Zheng             if (ret < 0) {
1468c6ac36e1SFam Zheng                 ret = VMDK_ERROR;
1469c6ac36e1SFam Zheng                 goto exit;
1470c6ac36e1SFam Zheng             }
1471c6ac36e1SFam Zheng         }
147223c4b2a8SMax Reitz         BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE);
1473d9ca2ea2SKevin Wolf         ret = bdrv_pwrite(extent->file, cluster_offset + skip_end_bytes,
147432cc71deSAlberto Faria                           cluster_bytes - skip_end_bytes,
147532cc71deSAlberto Faria                           whole_grain + skip_end_bytes, 0);
1476c6ac36e1SFam Zheng         if (ret < 0) {
1477c6ac36e1SFam Zheng             ret = VMDK_ERROR;
1478c6ac36e1SFam Zheng             goto exit;
1479c6ac36e1SFam Zheng         }
1480c6ac36e1SFam Zheng     }
1481c6ac36e1SFam Zheng 
148237b1d7d8SKevin Wolf     ret = VMDK_OK;
1483bf81507dSFam Zheng exit:
1484bf81507dSFam Zheng     qemu_vfree(whole_grain);
1485bf81507dSFam Zheng     return ret;
1486019d6b8fSAnthony Liguori }
1487019d6b8fSAnthony Liguori 
1488c6ac36e1SFam Zheng static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
1489c6ac36e1SFam Zheng                          uint32_t offset)
1490019d6b8fSAnthony Liguori {
1491c6ac36e1SFam Zheng     offset = cpu_to_le32(offset);
1492019d6b8fSAnthony Liguori     /* update L2 table */
149323c4b2a8SMax Reitz     BLKDBG_EVENT(extent->file, BLKDBG_L2_UPDATE);
14942758be05SKevin Wolf     if (bdrv_pwrite(extent->file,
1495b3976d3cSFam Zheng                 ((int64_t)m_data->l2_offset * 512)
1496c6ac36e1SFam Zheng                     + (m_data->l2_index * sizeof(offset)),
149732cc71deSAlberto Faria                 sizeof(offset), &offset, 0) < 0) {
149865f74725SFam Zheng         return VMDK_ERROR;
1499b3976d3cSFam Zheng     }
1500019d6b8fSAnthony Liguori     /* update backup L2 table */
1501b3976d3cSFam Zheng     if (extent->l1_backup_table_offset != 0) {
1502b3976d3cSFam Zheng         m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
15032758be05SKevin Wolf         if (bdrv_pwrite(extent->file,
1504b3976d3cSFam Zheng                     ((int64_t)m_data->l2_offset * 512)
1505c6ac36e1SFam Zheng                         + (m_data->l2_index * sizeof(offset)),
150632cc71deSAlberto Faria                     sizeof(offset), &offset, 0) < 0) {
150765f74725SFam Zheng             return VMDK_ERROR;
1508019d6b8fSAnthony Liguori         }
1509b3976d3cSFam Zheng     }
15102758be05SKevin Wolf     if (bdrv_flush(extent->file->bs) < 0) {
15112758be05SKevin Wolf         return VMDK_ERROR;
15122758be05SKevin Wolf     }
1513cdeaf1f1SFam Zheng     if (m_data->l2_cache_entry) {
1514cdeaf1f1SFam Zheng         *m_data->l2_cache_entry = offset;
1515cdeaf1f1SFam Zheng     }
1516019d6b8fSAnthony Liguori 
151765f74725SFam Zheng     return VMDK_OK;
1518019d6b8fSAnthony Liguori }
1519019d6b8fSAnthony Liguori 
1520c6ac36e1SFam Zheng /**
1521c6ac36e1SFam Zheng  * get_cluster_offset
1522c6ac36e1SFam Zheng  *
1523c6ac36e1SFam Zheng  * Look up cluster offset in extent file by sector number, and store in
1524c6ac36e1SFam Zheng  * @cluster_offset.
1525c6ac36e1SFam Zheng  *
1526c6ac36e1SFam Zheng  * For flat extents, the start offset as parsed from the description file is
1527c6ac36e1SFam Zheng  * returned.
1528c6ac36e1SFam Zheng  *
1529c6ac36e1SFam Zheng  * For sparse extents, look up in L1, L2 table. If allocate is true, return an
1530c6ac36e1SFam Zheng  * offset for a new cluster and update L2 cache. If there is a backing file,
1531c6ac36e1SFam Zheng  * COW is done before returning; otherwise, zeroes are written to the allocated
1532c6ac36e1SFam Zheng  * cluster. Both COW and zero writing skips the sector range
1533c6ac36e1SFam Zheng  * [@skip_start_sector, @skip_end_sector) passed in by caller, because caller
1534c6ac36e1SFam Zheng  * has new data to write there.
1535c6ac36e1SFam Zheng  *
1536c6ac36e1SFam Zheng  * Returns: VMDK_OK if cluster exists and mapped in the image.
1537c6ac36e1SFam Zheng  *          VMDK_UNALLOC if cluster is not mapped and @allocate is false.
1538c6ac36e1SFam Zheng  *          VMDK_ERROR if failed.
1539c6ac36e1SFam Zheng  */
154091b85bd3SFam Zheng static int get_cluster_offset(BlockDriverState *bs,
1541b3976d3cSFam Zheng                               VmdkExtent *extent,
1542b3976d3cSFam Zheng                               VmdkMetaData *m_data,
154391b85bd3SFam Zheng                               uint64_t offset,
1544c6ac36e1SFam Zheng                               bool allocate,
1545c6ac36e1SFam Zheng                               uint64_t *cluster_offset,
154637b1d7d8SKevin Wolf                               uint64_t skip_start_bytes,
154737b1d7d8SKevin Wolf                               uint64_t skip_end_bytes)
1548019d6b8fSAnthony Liguori {
1549019d6b8fSAnthony Liguori     unsigned int l1_index, l2_offset, l2_index;
1550019d6b8fSAnthony Liguori     int min_index, i, j;
155198eb9733SSam Eiderman     uint32_t min_count;
155298eb9733SSam Eiderman     void *l2_table;
155314ead646SFam Zheng     bool zeroed = false;
1554c6ac36e1SFam Zheng     int64_t ret;
1555d1319b07SFam Zheng     int64_t cluster_sector;
155698eb9733SSam Eiderman     unsigned int l2_size_bytes = extent->l2_size * extent->entry_size;
1557019d6b8fSAnthony Liguori 
1558ae261c86SFam Zheng     if (m_data) {
15594dc20e64SKevin Wolf         m_data->new_allocation = false;
1560ae261c86SFam Zheng     }
156191b85bd3SFam Zheng     if (extent->flat) {
15627fa60fa3SFam Zheng         *cluster_offset = extent->flat_start_offset;
156365f74725SFam Zheng         return VMDK_OK;
156491b85bd3SFam Zheng     }
1565019d6b8fSAnthony Liguori 
15666398de51SFam Zheng     offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
1567b3976d3cSFam Zheng     l1_index = (offset >> 9) / extent->l1_entry_sectors;
1568b3976d3cSFam Zheng     if (l1_index >= extent->l1_size) {
156965f74725SFam Zheng         return VMDK_ERROR;
1570b3976d3cSFam Zheng     }
157198eb9733SSam Eiderman     if (extent->sesparse) {
157298eb9733SSam Eiderman         uint64_t l2_offset_u64;
157398eb9733SSam Eiderman 
157498eb9733SSam Eiderman         assert(extent->entry_size == sizeof(uint64_t));
157598eb9733SSam Eiderman 
157698eb9733SSam Eiderman         l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index];
157798eb9733SSam Eiderman         if (l2_offset_u64 == 0) {
157898eb9733SSam Eiderman             l2_offset = 0;
157998eb9733SSam Eiderman         } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) {
158098eb9733SSam Eiderman             /*
158198eb9733SSam Eiderman              * Top most nibble is 0x1 if grain table is allocated.
158298eb9733SSam Eiderman              * strict check - top most 4 bytes must be 0x10000000 since max
158398eb9733SSam Eiderman              * supported size is 64TB for disk - so no more than 64TB / 16MB
158498eb9733SSam Eiderman              * grain directories which is smaller than uint32,
158598eb9733SSam Eiderman              * where 16MB is the only supported default grain table coverage.
158698eb9733SSam Eiderman              */
158798eb9733SSam Eiderman             return VMDK_ERROR;
158898eb9733SSam Eiderman         } else {
158998eb9733SSam Eiderman             l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff;
159098eb9733SSam Eiderman             l2_offset_u64 = extent->sesparse_l2_tables_offset +
159198eb9733SSam Eiderman                 l2_offset_u64 * l2_size_bytes / SECTOR_SIZE;
159298eb9733SSam Eiderman             if (l2_offset_u64 > 0x00000000ffffffff) {
159398eb9733SSam Eiderman                 return VMDK_ERROR;
159498eb9733SSam Eiderman             }
159598eb9733SSam Eiderman             l2_offset = (unsigned int)(l2_offset_u64);
159698eb9733SSam Eiderman         }
159798eb9733SSam Eiderman     } else {
159898eb9733SSam Eiderman         assert(extent->entry_size == sizeof(uint32_t));
159998eb9733SSam Eiderman         l2_offset = ((uint32_t *)extent->l1_table)[l1_index];
160098eb9733SSam Eiderman     }
1601b3976d3cSFam Zheng     if (!l2_offset) {
160265f74725SFam Zheng         return VMDK_UNALLOC;
1603b3976d3cSFam Zheng     }
1604019d6b8fSAnthony Liguori     for (i = 0; i < L2_CACHE_SIZE; i++) {
1605b3976d3cSFam Zheng         if (l2_offset == extent->l2_cache_offsets[i]) {
1606019d6b8fSAnthony Liguori             /* increment the hit count */
1607b3976d3cSFam Zheng             if (++extent->l2_cache_counts[i] == 0xffffffff) {
1608019d6b8fSAnthony Liguori                 for (j = 0; j < L2_CACHE_SIZE; j++) {
1609b3976d3cSFam Zheng                     extent->l2_cache_counts[j] >>= 1;
1610019d6b8fSAnthony Liguori                 }
1611019d6b8fSAnthony Liguori             }
161298eb9733SSam Eiderman             l2_table = (char *)extent->l2_cache + (i * l2_size_bytes);
1613019d6b8fSAnthony Liguori             goto found;
1614019d6b8fSAnthony Liguori         }
1615019d6b8fSAnthony Liguori     }
1616019d6b8fSAnthony Liguori     /* not found: load a new entry in the least used one */
1617019d6b8fSAnthony Liguori     min_index = 0;
1618019d6b8fSAnthony Liguori     min_count = 0xffffffff;
1619019d6b8fSAnthony Liguori     for (i = 0; i < L2_CACHE_SIZE; i++) {
1620b3976d3cSFam Zheng         if (extent->l2_cache_counts[i] < min_count) {
1621b3976d3cSFam Zheng             min_count = extent->l2_cache_counts[i];
1622019d6b8fSAnthony Liguori             min_index = i;
1623019d6b8fSAnthony Liguori         }
1624019d6b8fSAnthony Liguori     }
162598eb9733SSam Eiderman     l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes);
162623c4b2a8SMax Reitz     BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD);
1627cf2ab8fcSKevin Wolf     if (bdrv_pread(extent->file,
1628b3976d3cSFam Zheng                 (int64_t)l2_offset * 512,
162953fb7844SAlberto Faria                 l2_size_bytes,
163032cc71deSAlberto Faria                 l2_table,
163153fb7844SAlberto Faria                 0
1632*353a5d84SAlberto Faria             ) < 0) {
163365f74725SFam Zheng         return VMDK_ERROR;
1634b3976d3cSFam Zheng     }
1635019d6b8fSAnthony Liguori 
1636b3976d3cSFam Zheng     extent->l2_cache_offsets[min_index] = l2_offset;
1637b3976d3cSFam Zheng     extent->l2_cache_counts[min_index] = 1;
1638019d6b8fSAnthony Liguori  found:
1639b3976d3cSFam Zheng     l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
16402821c1ccSKevin Wolf     if (m_data) {
16412821c1ccSKevin Wolf         m_data->l1_index = l1_index;
16422821c1ccSKevin Wolf         m_data->l2_index = l2_index;
16432821c1ccSKevin Wolf         m_data->l2_offset = l2_offset;
16442821c1ccSKevin Wolf         m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
16452821c1ccSKevin Wolf     }
164698eb9733SSam Eiderman 
164798eb9733SSam Eiderman     if (extent->sesparse) {
164898eb9733SSam Eiderman         cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]);
164998eb9733SSam Eiderman         switch (cluster_sector & 0xf000000000000000) {
165098eb9733SSam Eiderman         case 0x0000000000000000:
165198eb9733SSam Eiderman             /* unallocated grain */
165298eb9733SSam Eiderman             if (cluster_sector != 0) {
165398eb9733SSam Eiderman                 return VMDK_ERROR;
165498eb9733SSam Eiderman             }
165598eb9733SSam Eiderman             break;
165698eb9733SSam Eiderman         case 0x1000000000000000:
165798eb9733SSam Eiderman             /* scsi-unmapped grain - fallthrough */
165898eb9733SSam Eiderman         case 0x2000000000000000:
165998eb9733SSam Eiderman             /* zero grain */
166098eb9733SSam Eiderman             zeroed = true;
166198eb9733SSam Eiderman             break;
166298eb9733SSam Eiderman         case 0x3000000000000000:
166398eb9733SSam Eiderman             /* allocated grain */
166498eb9733SSam Eiderman             cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) |
166598eb9733SSam Eiderman                               ((cluster_sector & 0x0000ffffffffffff) << 12));
166698eb9733SSam Eiderman             cluster_sector = extent->sesparse_clusters_offset +
166798eb9733SSam Eiderman                 cluster_sector * extent->cluster_sectors;
166898eb9733SSam Eiderman             break;
166998eb9733SSam Eiderman         default:
167098eb9733SSam Eiderman             return VMDK_ERROR;
167198eb9733SSam Eiderman         }
167298eb9733SSam Eiderman     } else {
167398eb9733SSam Eiderman         cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]);
1674019d6b8fSAnthony Liguori 
1675c6ac36e1SFam Zheng         if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
167614ead646SFam Zheng             zeroed = true;
167714ead646SFam Zheng         }
167898eb9733SSam Eiderman     }
167914ead646SFam Zheng 
1680c6ac36e1SFam Zheng     if (!cluster_sector || zeroed) {
168191b85bd3SFam Zheng         if (!allocate) {
168214ead646SFam Zheng             return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
168391b85bd3SFam Zheng         }
168498eb9733SSam Eiderman         assert(!extent->sesparse);
16859949f97eSKevin Wolf 
1686a77672eaSyuchenlin         if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) {
1687a77672eaSyuchenlin             return VMDK_ERROR;
1688a77672eaSyuchenlin         }
1689a77672eaSyuchenlin 
1690c6ac36e1SFam Zheng         cluster_sector = extent->next_cluster_sector;
1691c6ac36e1SFam Zheng         extent->next_cluster_sector += extent->cluster_sectors;
16929949f97eSKevin Wolf 
1693019d6b8fSAnthony Liguori         /* First of all we write grain itself, to avoid race condition
1694019d6b8fSAnthony Liguori          * that may to corrupt the image.
1695019d6b8fSAnthony Liguori          * This problem may occur because of insufficient space on host disk
1696019d6b8fSAnthony Liguori          * or inappropriate VM shutdown.
1697019d6b8fSAnthony Liguori          */
169837b1d7d8SKevin Wolf         ret = get_whole_cluster(bs, extent, cluster_sector * BDRV_SECTOR_SIZE,
16994823cde5SKevin Wolf                                 offset, skip_start_bytes, skip_end_bytes,
17004823cde5SKevin Wolf                                 zeroed);
1701c6ac36e1SFam Zheng         if (ret) {
1702c6ac36e1SFam Zheng             return ret;
1703019d6b8fSAnthony Liguori         }
1704524089bcSReda Sallahi         if (m_data) {
17054dc20e64SKevin Wolf             m_data->new_allocation = true;
1706524089bcSReda Sallahi         }
1707019d6b8fSAnthony Liguori     }
1708c6ac36e1SFam Zheng     *cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
170965f74725SFam Zheng     return VMDK_OK;
1710019d6b8fSAnthony Liguori }
1711019d6b8fSAnthony Liguori 
1712b3976d3cSFam Zheng static VmdkExtent *find_extent(BDRVVmdkState *s,
1713b3976d3cSFam Zheng                                 int64_t sector_num, VmdkExtent *start_hint)
1714b3976d3cSFam Zheng {
1715b3976d3cSFam Zheng     VmdkExtent *extent = start_hint;
1716b3976d3cSFam Zheng 
1717b3976d3cSFam Zheng     if (!extent) {
1718b3976d3cSFam Zheng         extent = &s->extents[0];
1719b3976d3cSFam Zheng     }
1720b3976d3cSFam Zheng     while (extent < &s->extents[s->num_extents]) {
1721b3976d3cSFam Zheng         if (sector_num < extent->end_sector) {
1722b3976d3cSFam Zheng             return extent;
1723b3976d3cSFam Zheng         }
1724b3976d3cSFam Zheng         extent++;
1725b3976d3cSFam Zheng     }
1726b3976d3cSFam Zheng     return NULL;
1727b3976d3cSFam Zheng }
1728b3976d3cSFam Zheng 
1729a844a2b0SKevin Wolf static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent,
1730a844a2b0SKevin Wolf                                                    int64_t offset)
1731a844a2b0SKevin Wolf {
17329be38598SEduardo Habkost     uint64_t extent_begin_offset, extent_relative_offset;
1733a844a2b0SKevin Wolf     uint64_t cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE;
1734a844a2b0SKevin Wolf 
1735a844a2b0SKevin Wolf     extent_begin_offset =
1736a844a2b0SKevin Wolf         (extent->end_sector - extent->sectors) * BDRV_SECTOR_SIZE;
1737a844a2b0SKevin Wolf     extent_relative_offset = offset - extent_begin_offset;
17389be38598SEduardo Habkost     return extent_relative_offset % cluster_size;
1739a844a2b0SKevin Wolf }
1740a844a2b0SKevin Wolf 
1741c72080b9SEric Blake static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs,
1742c72080b9SEric Blake                                              bool want_zero,
1743c72080b9SEric Blake                                              int64_t offset, int64_t bytes,
1744c72080b9SEric Blake                                              int64_t *pnum, int64_t *map,
1745c72080b9SEric Blake                                              BlockDriverState **file)
1746019d6b8fSAnthony Liguori {
1747019d6b8fSAnthony Liguori     BDRVVmdkState *s = bs->opaque;
1748b3976d3cSFam Zheng     int64_t index_in_cluster, n, ret;
1749c72080b9SEric Blake     uint64_t cluster_offset;
1750b3976d3cSFam Zheng     VmdkExtent *extent;
1751b3976d3cSFam Zheng 
1752c72080b9SEric Blake     extent = find_extent(s, offset >> BDRV_SECTOR_BITS, NULL);
1753b3976d3cSFam Zheng     if (!extent) {
1754c72080b9SEric Blake         return -EIO;
1755b3976d3cSFam Zheng     }
1756f8a2e5e3SStefan Hajnoczi     qemu_co_mutex_lock(&s->lock);
1757c72080b9SEric Blake     ret = get_cluster_offset(bs, extent, NULL, offset, false, &cluster_offset,
1758c6ac36e1SFam Zheng                              0, 0);
1759f8a2e5e3SStefan Hajnoczi     qemu_co_mutex_unlock(&s->lock);
176014ead646SFam Zheng 
1761c72080b9SEric Blake     index_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
17624bc74be9SPaolo Bonzini     switch (ret) {
17634bc74be9SPaolo Bonzini     case VMDK_ERROR:
17644bc74be9SPaolo Bonzini         ret = -EIO;
17654bc74be9SPaolo Bonzini         break;
17664bc74be9SPaolo Bonzini     case VMDK_UNALLOC:
17674bc74be9SPaolo Bonzini         ret = 0;
17684bc74be9SPaolo Bonzini         break;
17694bc74be9SPaolo Bonzini     case VMDK_ZEROED:
17704bc74be9SPaolo Bonzini         ret = BDRV_BLOCK_ZERO;
17714bc74be9SPaolo Bonzini         break;
17724bc74be9SPaolo Bonzini     case VMDK_OK:
17734bc74be9SPaolo Bonzini         ret = BDRV_BLOCK_DATA;
1774e0f100f5SFam Zheng         if (!extent->compressed) {
1775d0a18f10SFam Zheng             ret |= BDRV_BLOCK_OFFSET_VALID;
1776c72080b9SEric Blake             *map = cluster_offset + index_in_cluster;
17774dd84ac9SMax Reitz             if (extent->flat) {
17784dd84ac9SMax Reitz                 ret |= BDRV_BLOCK_RECURSE;
17794dd84ac9SMax Reitz             }
17804bc74be9SPaolo Bonzini         }
1781e0f100f5SFam Zheng         *file = extent->file->bs;
17824bc74be9SPaolo Bonzini         break;
17834bc74be9SPaolo Bonzini     }
178491b85bd3SFam Zheng 
1785c72080b9SEric Blake     n = extent->cluster_sectors * BDRV_SECTOR_SIZE - index_in_cluster;
1786c72080b9SEric Blake     *pnum = MIN(n, bytes);
1787b3976d3cSFam Zheng     return ret;
1788019d6b8fSAnthony Liguori }
1789019d6b8fSAnthony Liguori 
1790dd3f6ee2SFam Zheng static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
179137b1d7d8SKevin Wolf                             int64_t offset_in_cluster, QEMUIOVector *qiov,
179237b1d7d8SKevin Wolf                             uint64_t qiov_offset, uint64_t n_bytes,
179337b1d7d8SKevin Wolf                             uint64_t offset)
1794dd3f6ee2SFam Zheng {
1795dd3f6ee2SFam Zheng     int ret;
17962b2c8c5dSFam Zheng     VmdkGrainMarker *data = NULL;
17972b2c8c5dSFam Zheng     uLongf buf_len;
179837b1d7d8SKevin Wolf     QEMUIOVector local_qiov;
17995e82a31eSFam Zheng     int64_t write_offset;
18005e82a31eSFam Zheng     int64_t write_end_sector;
1801dd3f6ee2SFam Zheng 
18022b2c8c5dSFam Zheng     if (extent->compressed) {
180337b1d7d8SKevin Wolf         void *compressed_data;
180437b1d7d8SKevin Wolf 
1805bedb8bb4SMax Reitz         /* Only whole clusters */
1806bedb8bb4SMax Reitz         if (offset_in_cluster ||
1807bedb8bb4SMax Reitz             n_bytes > (extent->cluster_sectors * SECTOR_SIZE) ||
1808bedb8bb4SMax Reitz             (n_bytes < (extent->cluster_sectors * SECTOR_SIZE) &&
1809bedb8bb4SMax Reitz              offset + n_bytes != extent->end_sector * SECTOR_SIZE))
1810bedb8bb4SMax Reitz         {
1811bedb8bb4SMax Reitz             ret = -EINVAL;
1812bedb8bb4SMax Reitz             goto out;
1813bedb8bb4SMax Reitz         }
1814bedb8bb4SMax Reitz 
18152b2c8c5dSFam Zheng         if (!extent->has_marker) {
18162b2c8c5dSFam Zheng             ret = -EINVAL;
18172b2c8c5dSFam Zheng             goto out;
18182b2c8c5dSFam Zheng         }
18192b2c8c5dSFam Zheng         buf_len = (extent->cluster_sectors << 9) * 2;
18202b2c8c5dSFam Zheng         data = g_malloc(buf_len + sizeof(VmdkGrainMarker));
182137b1d7d8SKevin Wolf 
182237b1d7d8SKevin Wolf         compressed_data = g_malloc(n_bytes);
182337b1d7d8SKevin Wolf         qemu_iovec_to_buf(qiov, qiov_offset, compressed_data, n_bytes);
182437b1d7d8SKevin Wolf         ret = compress(data->data, &buf_len, compressed_data, n_bytes);
182537b1d7d8SKevin Wolf         g_free(compressed_data);
182637b1d7d8SKevin Wolf 
182737b1d7d8SKevin Wolf         if (ret != Z_OK || buf_len == 0) {
18282b2c8c5dSFam Zheng             ret = -EINVAL;
18292b2c8c5dSFam Zheng             goto out;
18302b2c8c5dSFam Zheng         }
18315e82a31eSFam Zheng 
18324545d4f4SQingFeng Hao         data->lba = cpu_to_le64(offset >> BDRV_SECTOR_BITS);
18334545d4f4SQingFeng Hao         data->size = cpu_to_le32(buf_len);
183437b1d7d8SKevin Wolf 
183537b1d7d8SKevin Wolf         n_bytes = buf_len + sizeof(VmdkGrainMarker);
1836199d95b0SVladimir Sementsov-Ogievskiy         qemu_iovec_init_buf(&local_qiov, data, n_bytes);
183723c4b2a8SMax Reitz 
183823c4b2a8SMax Reitz         BLKDBG_EVENT(extent->file, BLKDBG_WRITE_COMPRESSED);
183937b1d7d8SKevin Wolf     } else {
184037b1d7d8SKevin Wolf         qemu_iovec_init(&local_qiov, qiov->niov);
184137b1d7d8SKevin Wolf         qemu_iovec_concat(&local_qiov, qiov, qiov_offset, n_bytes);
184223c4b2a8SMax Reitz 
184323c4b2a8SMax Reitz         BLKDBG_EVENT(extent->file, BLKDBG_WRITE_AIO);
184437b1d7d8SKevin Wolf     }
184537b1d7d8SKevin Wolf 
18463c363575SMax Reitz     write_offset = cluster_offset + offset_in_cluster;
1847a03ef88fSKevin Wolf     ret = bdrv_co_pwritev(extent->file, write_offset, n_bytes,
184837b1d7d8SKevin Wolf                           &local_qiov, 0);
184937b1d7d8SKevin Wolf 
185037b1d7d8SKevin Wolf     write_end_sector = DIV_ROUND_UP(write_offset + n_bytes, BDRV_SECTOR_SIZE);
18515e82a31eSFam Zheng 
18523efffc32SRadoslav Gerganov     if (extent->compressed) {
18533efffc32SRadoslav Gerganov         extent->next_cluster_sector = write_end_sector;
18543efffc32SRadoslav Gerganov     } else {
18555e82a31eSFam Zheng         extent->next_cluster_sector = MAX(extent->next_cluster_sector,
18565e82a31eSFam Zheng                                           write_end_sector);
18573efffc32SRadoslav Gerganov     }
18585e82a31eSFam Zheng 
185937b1d7d8SKevin Wolf     if (ret < 0) {
1860dd3f6ee2SFam Zheng         goto out;
1861dd3f6ee2SFam Zheng     }
1862dd3f6ee2SFam Zheng     ret = 0;
1863dd3f6ee2SFam Zheng  out:
18642b2c8c5dSFam Zheng     g_free(data);
186537b1d7d8SKevin Wolf     if (!extent->compressed) {
186637b1d7d8SKevin Wolf         qemu_iovec_destroy(&local_qiov);
186737b1d7d8SKevin Wolf     }
1868dd3f6ee2SFam Zheng     return ret;
1869dd3f6ee2SFam Zheng }
1870dd3f6ee2SFam Zheng 
1871dd3f6ee2SFam Zheng static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
1872f10cc243SKevin Wolf                             int64_t offset_in_cluster, QEMUIOVector *qiov,
1873f10cc243SKevin Wolf                             int bytes)
1874dd3f6ee2SFam Zheng {
1875dd3f6ee2SFam Zheng     int ret;
18762b2c8c5dSFam Zheng     int cluster_bytes, buf_bytes;
18772b2c8c5dSFam Zheng     uint8_t *cluster_buf, *compressed_data;
18782b2c8c5dSFam Zheng     uint8_t *uncomp_buf;
18792b2c8c5dSFam Zheng     uint32_t data_len;
18802b2c8c5dSFam Zheng     VmdkGrainMarker *marker;
18812b2c8c5dSFam Zheng     uLongf buf_len;
1882dd3f6ee2SFam Zheng 
18832b2c8c5dSFam Zheng 
18842b2c8c5dSFam Zheng     if (!extent->compressed) {
188523c4b2a8SMax Reitz         BLKDBG_EVENT(extent->file, BLKDBG_READ_AIO);
1886a03ef88fSKevin Wolf         ret = bdrv_co_preadv(extent->file,
1887f10cc243SKevin Wolf                              cluster_offset + offset_in_cluster, bytes,
1888f10cc243SKevin Wolf                              qiov, 0);
1889f10cc243SKevin Wolf         if (ret < 0) {
1890f10cc243SKevin Wolf             return ret;
1891dd3f6ee2SFam Zheng         }
1892f10cc243SKevin Wolf         return 0;
1893dd3f6ee2SFam Zheng     }
18942b2c8c5dSFam Zheng     cluster_bytes = extent->cluster_sectors * 512;
18952b2c8c5dSFam Zheng     /* Read two clusters in case GrainMarker + compressed data > one cluster */
18962b2c8c5dSFam Zheng     buf_bytes = cluster_bytes * 2;
18972b2c8c5dSFam Zheng     cluster_buf = g_malloc(buf_bytes);
18982b2c8c5dSFam Zheng     uncomp_buf = g_malloc(cluster_bytes);
189923c4b2a8SMax Reitz     BLKDBG_EVENT(extent->file, BLKDBG_READ_COMPRESSED);
190032cc71deSAlberto Faria     ret = bdrv_pread(extent->file, cluster_offset, buf_bytes, cluster_buf, 0);
19012b2c8c5dSFam Zheng     if (ret < 0) {
19022b2c8c5dSFam Zheng         goto out;
19032b2c8c5dSFam Zheng     }
19042b2c8c5dSFam Zheng     compressed_data = cluster_buf;
19052b2c8c5dSFam Zheng     buf_len = cluster_bytes;
19062b2c8c5dSFam Zheng     data_len = cluster_bytes;
19072b2c8c5dSFam Zheng     if (extent->has_marker) {
19082b2c8c5dSFam Zheng         marker = (VmdkGrainMarker *)cluster_buf;
19092b2c8c5dSFam Zheng         compressed_data = marker->data;
19102b2c8c5dSFam Zheng         data_len = le32_to_cpu(marker->size);
19112b2c8c5dSFam Zheng     }
19122b2c8c5dSFam Zheng     if (!data_len || data_len > buf_bytes) {
19132b2c8c5dSFam Zheng         ret = -EINVAL;
19142b2c8c5dSFam Zheng         goto out;
19152b2c8c5dSFam Zheng     }
19162b2c8c5dSFam Zheng     ret = uncompress(uncomp_buf, &buf_len, compressed_data, data_len);
19172b2c8c5dSFam Zheng     if (ret != Z_OK) {
19182b2c8c5dSFam Zheng         ret = -EINVAL;
19192b2c8c5dSFam Zheng         goto out;
19202b2c8c5dSFam Zheng 
19212b2c8c5dSFam Zheng     }
19222b2c8c5dSFam Zheng     if (offset_in_cluster < 0 ||
1923f10cc243SKevin Wolf             offset_in_cluster + bytes > buf_len) {
19242b2c8c5dSFam Zheng         ret = -EINVAL;
19252b2c8c5dSFam Zheng         goto out;
19262b2c8c5dSFam Zheng     }
1927f10cc243SKevin Wolf     qemu_iovec_from_buf(qiov, 0, uncomp_buf + offset_in_cluster, bytes);
19282b2c8c5dSFam Zheng     ret = 0;
19292b2c8c5dSFam Zheng 
19302b2c8c5dSFam Zheng  out:
19312b2c8c5dSFam Zheng     g_free(uncomp_buf);
19322b2c8c5dSFam Zheng     g_free(cluster_buf);
19332b2c8c5dSFam Zheng     return ret;
19342b2c8c5dSFam Zheng }
1935dd3f6ee2SFam Zheng 
1936f10cc243SKevin Wolf static int coroutine_fn
1937f7ef38ddSVladimir Sementsov-Ogievskiy vmdk_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
1938f7ef38ddSVladimir Sementsov-Ogievskiy                QEMUIOVector *qiov, BdrvRequestFlags flags)
1939019d6b8fSAnthony Liguori {
1940019d6b8fSAnthony Liguori     BDRVVmdkState *s = bs->opaque;
1941b3976d3cSFam Zheng     int ret;
1942f10cc243SKevin Wolf     uint64_t n_bytes, offset_in_cluster;
1943b3976d3cSFam Zheng     VmdkExtent *extent = NULL;
1944f10cc243SKevin Wolf     QEMUIOVector local_qiov;
1945019d6b8fSAnthony Liguori     uint64_t cluster_offset;
1946f10cc243SKevin Wolf     uint64_t bytes_done = 0;
1947019d6b8fSAnthony Liguori 
1948f10cc243SKevin Wolf     qemu_iovec_init(&local_qiov, qiov->niov);
1949f10cc243SKevin Wolf     qemu_co_mutex_lock(&s->lock);
1950f10cc243SKevin Wolf 
1951f10cc243SKevin Wolf     while (bytes > 0) {
1952f10cc243SKevin Wolf         extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
1953b3976d3cSFam Zheng         if (!extent) {
1954f10cc243SKevin Wolf             ret = -EIO;
1955f10cc243SKevin Wolf             goto fail;
1956b3976d3cSFam Zheng         }
1957c6ac36e1SFam Zheng         ret = get_cluster_offset(bs, extent, NULL,
1958f10cc243SKevin Wolf                                  offset, false, &cluster_offset, 0, 0);
1959f10cc243SKevin Wolf         offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
1960f10cc243SKevin Wolf 
1961f10cc243SKevin Wolf         n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
1962f10cc243SKevin Wolf                              - offset_in_cluster);
1963f10cc243SKevin Wolf 
196414ead646SFam Zheng         if (ret != VMDK_OK) {
196591b85bd3SFam Zheng             /* if not allocated, try to read from parent image, if exist */
1966760e0063SKevin Wolf             if (bs->backing && ret != VMDK_ZEROED) {
1967ae261c86SFam Zheng                 if (!vmdk_is_cid_valid(bs)) {
1968f10cc243SKevin Wolf                     ret = -EINVAL;
1969f10cc243SKevin Wolf                     goto fail;
1970019d6b8fSAnthony Liguori                 }
1971019d6b8fSAnthony Liguori 
1972f10cc243SKevin Wolf                 qemu_iovec_reset(&local_qiov);
1973f10cc243SKevin Wolf                 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
1974f10cc243SKevin Wolf 
197523c4b2a8SMax Reitz                 /* qcow2 emits this on bs->file instead of bs->backing */
197623c4b2a8SMax Reitz                 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
1977a03ef88fSKevin Wolf                 ret = bdrv_co_preadv(bs->backing, offset, n_bytes,
1978f10cc243SKevin Wolf                                      &local_qiov, 0);
1979f10cc243SKevin Wolf                 if (ret < 0) {
1980f10cc243SKevin Wolf                     goto fail;
1981f10cc243SKevin Wolf                 }
1982f10cc243SKevin Wolf             } else {
1983f10cc243SKevin Wolf                 qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
1984f10cc243SKevin Wolf             }
1985f10cc243SKevin Wolf         } else {
1986f10cc243SKevin Wolf             qemu_iovec_reset(&local_qiov);
1987f10cc243SKevin Wolf             qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
1988f10cc243SKevin Wolf 
1989f10cc243SKevin Wolf             ret = vmdk_read_extent(extent, cluster_offset, offset_in_cluster,
1990f10cc243SKevin Wolf                                    &local_qiov, n_bytes);
1991f10cc243SKevin Wolf             if (ret) {
1992f10cc243SKevin Wolf                 goto fail;
1993f10cc243SKevin Wolf             }
1994f10cc243SKevin Wolf         }
1995f10cc243SKevin Wolf         bytes -= n_bytes;
1996f10cc243SKevin Wolf         offset += n_bytes;
1997f10cc243SKevin Wolf         bytes_done += n_bytes;
1998f10cc243SKevin Wolf     }
1999f10cc243SKevin Wolf 
2000f10cc243SKevin Wolf     ret = 0;
2001f10cc243SKevin Wolf fail:
20022914caa0SPaolo Bonzini     qemu_co_mutex_unlock(&s->lock);
2003f10cc243SKevin Wolf     qemu_iovec_destroy(&local_qiov);
2004f10cc243SKevin Wolf 
20052914caa0SPaolo Bonzini     return ret;
20062914caa0SPaolo Bonzini }
20072914caa0SPaolo Bonzini 
2008cdeaf1f1SFam Zheng /**
2009cdeaf1f1SFam Zheng  * vmdk_write:
2010cdeaf1f1SFam Zheng  * @zeroed:       buf is ignored (data is zero), use zeroed_grain GTE feature
2011cdeaf1f1SFam Zheng  *                if possible, otherwise return -ENOTSUP.
20128e507243SFam Zheng  * @zero_dry_run: used for zeroed == true only, don't update L2 table, just try
20138e507243SFam Zheng  *                with each cluster. By dry run we can find if the zero write
20148e507243SFam Zheng  *                is possible without modifying image data.
2015cdeaf1f1SFam Zheng  *
2016cdeaf1f1SFam Zheng  * Returns: error code with 0 for success.
2017cdeaf1f1SFam Zheng  */
201837b1d7d8SKevin Wolf static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
201937b1d7d8SKevin Wolf                        uint64_t bytes, QEMUIOVector *qiov,
2020cdeaf1f1SFam Zheng                        bool zeroed, bool zero_dry_run)
2021019d6b8fSAnthony Liguori {
2022019d6b8fSAnthony Liguori     BDRVVmdkState *s = bs->opaque;
2023b3976d3cSFam Zheng     VmdkExtent *extent = NULL;
2024585ea0c8SFam Zheng     int ret;
202537b1d7d8SKevin Wolf     int64_t offset_in_cluster, n_bytes;
2026019d6b8fSAnthony Liguori     uint64_t cluster_offset;
202737b1d7d8SKevin Wolf     uint64_t bytes_done = 0;
2028b3976d3cSFam Zheng     VmdkMetaData m_data;
2029019d6b8fSAnthony Liguori 
203037b1d7d8SKevin Wolf     if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) {
203137b1d7d8SKevin Wolf         error_report("Wrong offset: offset=0x%" PRIx64
20329af9e0feSMarkus Armbruster                      " total_sectors=0x%" PRIx64,
203337b1d7d8SKevin Wolf                      offset, bs->total_sectors);
20347fa60fa3SFam Zheng         return -EIO;
2035019d6b8fSAnthony Liguori     }
2036019d6b8fSAnthony Liguori 
203737b1d7d8SKevin Wolf     while (bytes > 0) {
203837b1d7d8SKevin Wolf         extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
2039b3976d3cSFam Zheng         if (!extent) {
2040b3976d3cSFam Zheng             return -EIO;
2041b3976d3cSFam Zheng         }
204298eb9733SSam Eiderman         if (extent->sesparse) {
204398eb9733SSam Eiderman             return -ENOTSUP;
204498eb9733SSam Eiderman         }
204537b1d7d8SKevin Wolf         offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
204637b1d7d8SKevin Wolf         n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
204737b1d7d8SKevin Wolf                              - offset_in_cluster);
204837b1d7d8SKevin Wolf 
204937b1d7d8SKevin Wolf         ret = get_cluster_offset(bs, extent, &m_data, offset,
2050c6ac36e1SFam Zheng                                  !(extent->compressed || zeroed),
205137b1d7d8SKevin Wolf                                  &cluster_offset, offset_in_cluster,
205237b1d7d8SKevin Wolf                                  offset_in_cluster + n_bytes);
20532b2c8c5dSFam Zheng         if (extent->compressed) {
205465f74725SFam Zheng             if (ret == VMDK_OK) {
20552b2c8c5dSFam Zheng                 /* Refuse write to allocated cluster for streamOptimized */
20564823970bSFam Zheng                 error_report("Could not write to allocated cluster"
20574823970bSFam Zheng                               " for streamOptimized");
20582b2c8c5dSFam Zheng                 return -EIO;
20592821c1ccSKevin Wolf             } else if (!zeroed) {
20602b2c8c5dSFam Zheng                 /* allocate */
206137b1d7d8SKevin Wolf                 ret = get_cluster_offset(bs, extent, &m_data, offset,
2062c6ac36e1SFam Zheng                                          true, &cluster_offset, 0, 0);
20632b2c8c5dSFam Zheng             }
20642b2c8c5dSFam Zheng         }
2065cdeaf1f1SFam Zheng         if (ret == VMDK_ERROR) {
206691b85bd3SFam Zheng             return -EINVAL;
2067b3976d3cSFam Zheng         }
2068cdeaf1f1SFam Zheng         if (zeroed) {
2069cdeaf1f1SFam Zheng             /* Do zeroed write, buf is ignored */
2070cdeaf1f1SFam Zheng             if (extent->has_zero_grain &&
207137b1d7d8SKevin Wolf                     offset_in_cluster == 0 &&
207237b1d7d8SKevin Wolf                     n_bytes >= extent->cluster_sectors * BDRV_SECTOR_SIZE) {
207337b1d7d8SKevin Wolf                 n_bytes = extent->cluster_sectors * BDRV_SECTOR_SIZE;
207478cae78dSKevin Wolf                 if (!zero_dry_run && ret != VMDK_ZEROED) {
2075cdeaf1f1SFam Zheng                     /* update L2 tables */
2076c6ac36e1SFam Zheng                     if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
2077c6ac36e1SFam Zheng                             != VMDK_OK) {
2078cdeaf1f1SFam Zheng                         return -EIO;
2079cdeaf1f1SFam Zheng                     }
2080cdeaf1f1SFam Zheng                 }
2081cdeaf1f1SFam Zheng             } else {
2082cdeaf1f1SFam Zheng                 return -ENOTSUP;
2083cdeaf1f1SFam Zheng             }
2084cdeaf1f1SFam Zheng         } else {
208537b1d7d8SKevin Wolf             ret = vmdk_write_extent(extent, cluster_offset, offset_in_cluster,
208637b1d7d8SKevin Wolf                                     qiov, bytes_done, n_bytes, offset);
2087dd3f6ee2SFam Zheng             if (ret) {
20887fa60fa3SFam Zheng                 return ret;
2089b3976d3cSFam Zheng             }
20904dc20e64SKevin Wolf             if (m_data.new_allocation) {
2091019d6b8fSAnthony Liguori                 /* update L2 tables */
2092c6ac36e1SFam Zheng                 if (vmdk_L2update(extent, &m_data,
2093c6ac36e1SFam Zheng                                   cluster_offset >> BDRV_SECTOR_BITS)
2094c6ac36e1SFam Zheng                         != VMDK_OK) {
20957fa60fa3SFam Zheng                     return -EIO;
2096019d6b8fSAnthony Liguori                 }
2097b3976d3cSFam Zheng             }
2098cdeaf1f1SFam Zheng         }
209937b1d7d8SKevin Wolf         bytes -= n_bytes;
210037b1d7d8SKevin Wolf         offset += n_bytes;
210137b1d7d8SKevin Wolf         bytes_done += n_bytes;
2102019d6b8fSAnthony Liguori 
2103ae261c86SFam Zheng         /* update CID on the first write every time the virtual disk is
2104ae261c86SFam Zheng          * opened */
210569b4d86dSFam Zheng         if (!s->cid_updated) {
2106e5dc64b8SFam Zheng             ret = vmdk_write_cid(bs, g_random_int());
210799f1835dSKevin Wolf             if (ret < 0) {
210899f1835dSKevin Wolf                 return ret;
210999f1835dSKevin Wolf             }
211069b4d86dSFam Zheng             s->cid_updated = true;
2111019d6b8fSAnthony Liguori         }
2112019d6b8fSAnthony Liguori     }
2113019d6b8fSAnthony Liguori     return 0;
2114019d6b8fSAnthony Liguori }
2115019d6b8fSAnthony Liguori 
211637b1d7d8SKevin Wolf static int coroutine_fn
2117e75abedaSVladimir Sementsov-Ogievskiy vmdk_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
2118e75abedaSVladimir Sementsov-Ogievskiy                 QEMUIOVector *qiov, BdrvRequestFlags flags)
2119e183ef75SPaolo Bonzini {
2120e183ef75SPaolo Bonzini     int ret;
2121e183ef75SPaolo Bonzini     BDRVVmdkState *s = bs->opaque;
2122e183ef75SPaolo Bonzini     qemu_co_mutex_lock(&s->lock);
212337b1d7d8SKevin Wolf     ret = vmdk_pwritev(bs, offset, bytes, qiov, false, false);
2124cdeaf1f1SFam Zheng     qemu_co_mutex_unlock(&s->lock);
2125cdeaf1f1SFam Zheng     return ret;
2126cdeaf1f1SFam Zheng }
2127cdeaf1f1SFam Zheng 
2128b2c622d3SPavel Butsykin static int coroutine_fn
2129e75abedaSVladimir Sementsov-Ogievskiy vmdk_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
2130e75abedaSVladimir Sementsov-Ogievskiy                            QEMUIOVector *qiov)
213137b1d7d8SKevin Wolf {
213251b3c6b7Syuchenlin     if (bytes == 0) {
213351b3c6b7Syuchenlin         /* The caller will write bytes 0 to signal EOF.
213451b3c6b7Syuchenlin          * When receive it, we align EOF to a sector boundary. */
213551b3c6b7Syuchenlin         BDRVVmdkState *s = bs->opaque;
213651b3c6b7Syuchenlin         int i, ret;
213751b3c6b7Syuchenlin         int64_t length;
213851b3c6b7Syuchenlin 
213951b3c6b7Syuchenlin         for (i = 0; i < s->num_extents; i++) {
214051b3c6b7Syuchenlin             length = bdrv_getlength(s->extents[i].file->bs);
214151b3c6b7Syuchenlin             if (length < 0) {
214251b3c6b7Syuchenlin                 return length;
214351b3c6b7Syuchenlin             }
214451b3c6b7Syuchenlin             length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE);
2145c80d8b06SMax Reitz             ret = bdrv_truncate(s->extents[i].file, length, false,
21467b8e4857SKevin Wolf                                 PREALLOC_MODE_OFF, 0, NULL);
214751b3c6b7Syuchenlin             if (ret < 0) {
214851b3c6b7Syuchenlin                 return ret;
214951b3c6b7Syuchenlin             }
215051b3c6b7Syuchenlin         }
215151b3c6b7Syuchenlin         return 0;
215251b3c6b7Syuchenlin     }
2153b2c622d3SPavel Butsykin     return vmdk_co_pwritev(bs, offset, bytes, qiov, 0);
2154ba0ad89eSFam Zheng }
2155ba0ad89eSFam Zheng 
2156a620f2aeSEric Blake static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs,
2157a620f2aeSEric Blake                                               int64_t offset,
2158f34b2bcfSVladimir Sementsov-Ogievskiy                                               int64_t bytes,
2159aa7bfbffSPeter Lieven                                               BdrvRequestFlags flags)
2160cdeaf1f1SFam Zheng {
2161cdeaf1f1SFam Zheng     int ret;
2162cdeaf1f1SFam Zheng     BDRVVmdkState *s = bs->opaque;
216337b1d7d8SKevin Wolf 
2164cdeaf1f1SFam Zheng     qemu_co_mutex_lock(&s->lock);
21658e507243SFam Zheng     /* write zeroes could fail if sectors not aligned to cluster, test it with
21668e507243SFam Zheng      * dry_run == true before really updating image */
216737b1d7d8SKevin Wolf     ret = vmdk_pwritev(bs, offset, bytes, NULL, true, true);
2168cdeaf1f1SFam Zheng     if (!ret) {
216937b1d7d8SKevin Wolf         ret = vmdk_pwritev(bs, offset, bytes, NULL, true, false);
2170cdeaf1f1SFam Zheng     }
2171e183ef75SPaolo Bonzini     qemu_co_mutex_unlock(&s->lock);
2172e183ef75SPaolo Bonzini     return ret;
2173e183ef75SPaolo Bonzini }
2174e183ef75SPaolo Bonzini 
21755be28490SFam Zheng static int vmdk_init_extent(BlockBackend *blk,
21765be28490SFam Zheng                             int64_t filesize, bool flat,
21775be28490SFam Zheng                             bool compress, bool zeroed_grain,
21785be28490SFam Zheng                             Error **errp)
2179019d6b8fSAnthony Liguori {
2180f66fd6c3SFam Zheng     int ret, i;
2181019d6b8fSAnthony Liguori     VMDK4Header header;
2182917703c1SFam Zheng     uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
2183917703c1SFam Zheng     uint32_t *gd_buf = NULL;
2184917703c1SFam Zheng     int gd_buf_size;
21850e7e1989SKevin Wolf 
2186917703c1SFam Zheng     if (flat) {
21878c6242b6SKevin Wolf         ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp);
2188f66fd6c3SFam Zheng         goto exit;
2189f66fd6c3SFam Zheng     }
2190019d6b8fSAnthony Liguori     magic = cpu_to_be32(VMDK4_MAGIC);
2191019d6b8fSAnthony Liguori     memset(&header, 0, sizeof(header));
2192d62d9dc4SFam Zheng     if (compress) {
2193d62d9dc4SFam Zheng         header.version = 3;
2194d62d9dc4SFam Zheng     } else if (zeroed_grain) {
2195d62d9dc4SFam Zheng         header.version = 2;
2196d62d9dc4SFam Zheng     } else {
2197d62d9dc4SFam Zheng         header.version = 1;
2198d62d9dc4SFam Zheng     }
219995b0aa42SFam Zheng     header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
220069e0b6dfSFam Zheng                    | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
220169e0b6dfSFam Zheng                    | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
22026c031aacSFam Zheng     header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
2203917703c1SFam Zheng     header.capacity = filesize / BDRV_SECTOR_SIZE;
220416372ff0SAlexander Graf     header.granularity = 128;
2205917703c1SFam Zheng     header.num_gtes_per_gt = BDRV_SECTOR_SIZE;
2206019d6b8fSAnthony Liguori 
2207917703c1SFam Zheng     grains = DIV_ROUND_UP(filesize / BDRV_SECTOR_SIZE, header.granularity);
2208917703c1SFam Zheng     gt_size = DIV_ROUND_UP(header.num_gtes_per_gt * sizeof(uint32_t),
2209917703c1SFam Zheng                            BDRV_SECTOR_SIZE);
2210917703c1SFam Zheng     gt_count = DIV_ROUND_UP(grains, header.num_gtes_per_gt);
2211917703c1SFam Zheng     gd_sectors = DIV_ROUND_UP(gt_count * sizeof(uint32_t), BDRV_SECTOR_SIZE);
2212019d6b8fSAnthony Liguori 
2213019d6b8fSAnthony Liguori     header.desc_offset = 1;
2214019d6b8fSAnthony Liguori     header.desc_size = 20;
2215019d6b8fSAnthony Liguori     header.rgd_offset = header.desc_offset + header.desc_size;
2216917703c1SFam Zheng     header.gd_offset = header.rgd_offset + gd_sectors + (gt_size * gt_count);
2217019d6b8fSAnthony Liguori     header.grain_offset =
2218917703c1SFam Zheng         ROUND_UP(header.gd_offset + gd_sectors + (gt_size * gt_count),
2219917703c1SFam Zheng                  header.granularity);
222016372ff0SAlexander Graf     /* swap endianness for all header fields */
222116372ff0SAlexander Graf     header.version = cpu_to_le32(header.version);
222216372ff0SAlexander Graf     header.flags = cpu_to_le32(header.flags);
222316372ff0SAlexander Graf     header.capacity = cpu_to_le64(header.capacity);
222416372ff0SAlexander Graf     header.granularity = cpu_to_le64(header.granularity);
2225ca8804ceSFam Zheng     header.num_gtes_per_gt = cpu_to_le32(header.num_gtes_per_gt);
2226019d6b8fSAnthony Liguori     header.desc_offset = cpu_to_le64(header.desc_offset);
2227019d6b8fSAnthony Liguori     header.desc_size = cpu_to_le64(header.desc_size);
2228019d6b8fSAnthony Liguori     header.rgd_offset = cpu_to_le64(header.rgd_offset);
2229019d6b8fSAnthony Liguori     header.gd_offset = cpu_to_le64(header.gd_offset);
2230019d6b8fSAnthony Liguori     header.grain_offset = cpu_to_le64(header.grain_offset);
22316c031aacSFam Zheng     header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm);
2232019d6b8fSAnthony Liguori 
2233019d6b8fSAnthony Liguori     header.check_bytes[0] = 0xa;
2234019d6b8fSAnthony Liguori     header.check_bytes[1] = 0x20;
2235019d6b8fSAnthony Liguori     header.check_bytes[2] = 0xd;
2236019d6b8fSAnthony Liguori     header.check_bytes[3] = 0xa;
2237019d6b8fSAnthony Liguori 
2238019d6b8fSAnthony Liguori     /* write all the data */
22398341f00dSEric Blake     ret = blk_pwrite(blk, 0, &magic, sizeof(magic), 0);
2240917703c1SFam Zheng     if (ret < 0) {
2241c6bd8c70SMarkus Armbruster         error_setg(errp, QERR_IO_ERROR);
22421640366cSKirill A. Shutemov         goto exit;
22431640366cSKirill A. Shutemov     }
22448341f00dSEric Blake     ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header), 0);
2245917703c1SFam Zheng     if (ret < 0) {
2246c6bd8c70SMarkus Armbruster         error_setg(errp, QERR_IO_ERROR);
22471640366cSKirill A. Shutemov         goto exit;
22481640366cSKirill A. Shutemov     }
2249019d6b8fSAnthony Liguori 
2250c80d8b06SMax Reitz     ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false,
22518c6242b6SKevin Wolf                        PREALLOC_MODE_OFF, 0, errp);
22521640366cSKirill A. Shutemov     if (ret < 0) {
22531640366cSKirill A. Shutemov         goto exit;
22541640366cSKirill A. Shutemov     }
2255019d6b8fSAnthony Liguori 
2256019d6b8fSAnthony Liguori     /* write grain directory */
2257917703c1SFam Zheng     gd_buf_size = gd_sectors * BDRV_SECTOR_SIZE;
2258917703c1SFam Zheng     gd_buf = g_malloc0(gd_buf_size);
2259917703c1SFam Zheng     for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_sectors;
22601640366cSKirill A. Shutemov          i < gt_count; i++, tmp += gt_size) {
2261917703c1SFam Zheng         gd_buf[i] = cpu_to_le32(tmp);
22621640366cSKirill A. Shutemov     }
2263c4bea169SKevin Wolf     ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
22648341f00dSEric Blake                      gd_buf, gd_buf_size, 0);
2265917703c1SFam Zheng     if (ret < 0) {
2266c6bd8c70SMarkus Armbruster         error_setg(errp, QERR_IO_ERROR);
2267917703c1SFam Zheng         goto exit;
22681640366cSKirill A. Shutemov     }
2269019d6b8fSAnthony Liguori 
2270019d6b8fSAnthony Liguori     /* write backup grain directory */
2271917703c1SFam Zheng     for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_sectors;
22721640366cSKirill A. Shutemov          i < gt_count; i++, tmp += gt_size) {
2273917703c1SFam Zheng         gd_buf[i] = cpu_to_le32(tmp);
22741640366cSKirill A. Shutemov     }
2275c4bea169SKevin Wolf     ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
22768341f00dSEric Blake                      gd_buf, gd_buf_size, 0);
2277917703c1SFam Zheng     if (ret < 0) {
2278c6bd8c70SMarkus Armbruster         error_setg(errp, QERR_IO_ERROR);
22791640366cSKirill A. Shutemov     }
2280019d6b8fSAnthony Liguori 
2281f66fd6c3SFam Zheng     ret = 0;
2282f66fd6c3SFam Zheng exit:
2283917703c1SFam Zheng     g_free(gd_buf);
2284f66fd6c3SFam Zheng     return ret;
2285f66fd6c3SFam Zheng }
2286019d6b8fSAnthony Liguori 
22875be28490SFam Zheng static int vmdk_create_extent(const char *filename, int64_t filesize,
22885be28490SFam Zheng                               bool flat, bool compress, bool zeroed_grain,
22895be28490SFam Zheng                               BlockBackend **pbb,
22905be28490SFam Zheng                               QemuOpts *opts, Error **errp)
22915be28490SFam Zheng {
22925be28490SFam Zheng     int ret;
22935be28490SFam Zheng     BlockBackend *blk = NULL;
22945be28490SFam Zheng 
2295668f62ecSMarkus Armbruster     ret = bdrv_create_file(filename, opts, errp);
22965be28490SFam Zheng     if (ret < 0) {
22975be28490SFam Zheng         goto exit;
22985be28490SFam Zheng     }
22995be28490SFam Zheng 
23005be28490SFam Zheng     blk = blk_new_open(filename, NULL, NULL,
23015be28490SFam Zheng                        BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
2302af175e85SMarkus Armbruster                        errp);
23035be28490SFam Zheng     if (blk == NULL) {
23045be28490SFam Zheng         ret = -EIO;
23055be28490SFam Zheng         goto exit;
23065be28490SFam Zheng     }
23075be28490SFam Zheng 
23085be28490SFam Zheng     blk_set_allow_write_beyond_eof(blk, true);
23095be28490SFam Zheng 
23105be28490SFam Zheng     ret = vmdk_init_extent(blk, filesize, flat, compress, zeroed_grain, errp);
23115be28490SFam Zheng exit:
23125be28490SFam Zheng     if (blk) {
23135be28490SFam Zheng         if (pbb) {
23145be28490SFam Zheng             *pbb = blk;
23155be28490SFam Zheng         } else {
23165be28490SFam Zheng             blk_unref(blk);
23175be28490SFam Zheng             blk = NULL;
23185be28490SFam Zheng         }
23195be28490SFam Zheng     }
23205be28490SFam Zheng     return ret;
23215be28490SFam Zheng }
23225be28490SFam Zheng 
2323f66fd6c3SFam Zheng static int filename_decompose(const char *filename, char *path, char *prefix,
23244823970bSFam Zheng                               char *postfix, size_t buf_len, Error **errp)
2325f66fd6c3SFam Zheng {
2326f66fd6c3SFam Zheng     const char *p, *q;
2327f66fd6c3SFam Zheng 
2328f66fd6c3SFam Zheng     if (filename == NULL || !strlen(filename)) {
23294823970bSFam Zheng         error_setg(errp, "No filename provided");
233065f74725SFam Zheng         return VMDK_ERROR;
2331f66fd6c3SFam Zheng     }
2332f66fd6c3SFam Zheng     p = strrchr(filename, '/');
2333f66fd6c3SFam Zheng     if (p == NULL) {
2334f66fd6c3SFam Zheng         p = strrchr(filename, '\\');
2335f66fd6c3SFam Zheng     }
2336f66fd6c3SFam Zheng     if (p == NULL) {
2337f66fd6c3SFam Zheng         p = strrchr(filename, ':');
2338f66fd6c3SFam Zheng     }
2339f66fd6c3SFam Zheng     if (p != NULL) {
2340f66fd6c3SFam Zheng         p++;
2341f66fd6c3SFam Zheng         if (p - filename >= buf_len) {
234265f74725SFam Zheng             return VMDK_ERROR;
2343f66fd6c3SFam Zheng         }
2344f66fd6c3SFam Zheng         pstrcpy(path, p - filename + 1, filename);
2345f66fd6c3SFam Zheng     } else {
2346f66fd6c3SFam Zheng         p = filename;
2347f66fd6c3SFam Zheng         path[0] = '\0';
2348f66fd6c3SFam Zheng     }
2349f66fd6c3SFam Zheng     q = strrchr(p, '.');
2350f66fd6c3SFam Zheng     if (q == NULL) {
2351f66fd6c3SFam Zheng         pstrcpy(prefix, buf_len, p);
2352f66fd6c3SFam Zheng         postfix[0] = '\0';
2353f66fd6c3SFam Zheng     } else {
2354f66fd6c3SFam Zheng         if (q - p >= buf_len) {
235565f74725SFam Zheng             return VMDK_ERROR;
2356f66fd6c3SFam Zheng         }
2357f66fd6c3SFam Zheng         pstrcpy(prefix, q - p + 1, p);
2358f66fd6c3SFam Zheng         pstrcpy(postfix, buf_len, q);
2359f66fd6c3SFam Zheng     }
236065f74725SFam Zheng     return VMDK_OK;
2361f66fd6c3SFam Zheng }
2362f66fd6c3SFam Zheng 
23633015372dSFam Zheng /*
23643015372dSFam Zheng  * idx == 0: get or create the descriptor file (also the image file if in a
23653015372dSFam Zheng  *           non-split format.
23663015372dSFam Zheng  * idx >= 1: get the n-th extent if in a split subformat
23673015372dSFam Zheng  */
23683015372dSFam Zheng typedef BlockBackend *(*vmdk_create_extent_fn)(int64_t size,
23693015372dSFam Zheng                                                int idx,
23703015372dSFam Zheng                                                bool flat,
23713015372dSFam Zheng                                                bool split,
23723015372dSFam Zheng                                                bool compress,
23733015372dSFam Zheng                                                bool zeroed_grain,
23743015372dSFam Zheng                                                void *opaque,
23753015372dSFam Zheng                                                Error **errp);
23763015372dSFam Zheng 
23773015372dSFam Zheng static void vmdk_desc_add_extent(GString *desc,
23783015372dSFam Zheng                                  const char *extent_line_fmt,
23793015372dSFam Zheng                                  int64_t size, const char *filename)
23803015372dSFam Zheng {
23813015372dSFam Zheng     char *basename = g_path_get_basename(filename);
23823015372dSFam Zheng 
23833015372dSFam Zheng     g_string_append_printf(desc, extent_line_fmt,
23843015372dSFam Zheng                            DIV_ROUND_UP(size, BDRV_SECTOR_SIZE), basename);
23853015372dSFam Zheng     g_free(basename);
23863015372dSFam Zheng }
23873015372dSFam Zheng 
23883015372dSFam Zheng static int coroutine_fn vmdk_co_do_create(int64_t size,
23893015372dSFam Zheng                                           BlockdevVmdkSubformat subformat,
23903015372dSFam Zheng                                           BlockdevVmdkAdapterType adapter_type,
23913015372dSFam Zheng                                           const char *backing_file,
23923015372dSFam Zheng                                           const char *hw_version,
2393f3d43dfdSThomas Weißschuh                                           const char *toolsversion,
23943015372dSFam Zheng                                           bool compat6,
23953015372dSFam Zheng                                           bool zeroed_grain,
23963015372dSFam Zheng                                           vmdk_create_extent_fn extent_fn,
23973015372dSFam Zheng                                           void *opaque,
2398efc75e2aSStefan Hajnoczi                                           Error **errp)
2399f66fd6c3SFam Zheng {
24003015372dSFam Zheng     int extent_idx;
24013015372dSFam Zheng     BlockBackend *blk = NULL;
24024a960eceSKevin Wolf     BlockBackend *extent_blk;
2403c13959c7SFam Zheng     Error *local_err = NULL;
2404af057fe7SFam Zheng     char *desc = NULL;
2405f66fd6c3SFam Zheng     int ret = 0;
24066c031aacSFam Zheng     bool flat, split, compress;
2407af057fe7SFam Zheng     GString *ext_desc_lines;
2408f66fd6c3SFam Zheng     const int64_t split_size = 0x80000000;  /* VMDK has constant split size */
24093015372dSFam Zheng     int64_t extent_size;
24103015372dSFam Zheng     int64_t created_size = 0;
24113015372dSFam Zheng     const char *extent_line_fmt;
2412fe206562SJeff Cody     char *parent_desc_line = g_malloc0(BUF_SIZE);
2413f66fd6c3SFam Zheng     uint32_t parent_cid = 0xffffffff;
24147f2039f6SOthmar Pasteka     uint32_t number_heads = 16;
2415917703c1SFam Zheng     uint32_t desc_offset = 0, desc_len;
2416f66fd6c3SFam Zheng     const char desc_template[] =
2417f66fd6c3SFam Zheng         "# Disk DescriptorFile\n"
2418f66fd6c3SFam Zheng         "version=1\n"
24199b17031aSFam Zheng         "CID=%" PRIx32 "\n"
24209b17031aSFam Zheng         "parentCID=%" PRIx32 "\n"
2421f66fd6c3SFam Zheng         "createType=\"%s\"\n"
2422f66fd6c3SFam Zheng         "%s"
2423f66fd6c3SFam Zheng         "\n"
2424f66fd6c3SFam Zheng         "# Extent description\n"
2425f66fd6c3SFam Zheng         "%s"
2426f66fd6c3SFam Zheng         "\n"
2427f66fd6c3SFam Zheng         "# The Disk Data Base\n"
2428f66fd6c3SFam Zheng         "#DDB\n"
2429f66fd6c3SFam Zheng         "\n"
2430f249924eSJanne Karhunen         "ddb.virtualHWVersion = \"%s\"\n"
2431f66fd6c3SFam Zheng         "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
24324ab9dab5SFam Zheng         "ddb.geometry.heads = \"%" PRIu32 "\"\n"
2433f66fd6c3SFam Zheng         "ddb.geometry.sectors = \"63\"\n"
2434f3d43dfdSThomas Weißschuh         "ddb.adapterType = \"%s\"\n"
2435f3d43dfdSThomas Weißschuh         "ddb.toolsVersion = \"%s\"\n";
2436f66fd6c3SFam Zheng 
2437af057fe7SFam Zheng     ext_desc_lines = g_string_new(NULL);
2438af057fe7SFam Zheng 
2439f66fd6c3SFam Zheng     /* Read out options */
24403015372dSFam Zheng     if (compat6) {
24413015372dSFam Zheng         if (hw_version) {
2442f249924eSJanne Karhunen             error_setg(errp,
2443f249924eSJanne Karhunen                        "compat6 cannot be enabled with hwversion set");
2444f249924eSJanne Karhunen             ret = -EINVAL;
2445f249924eSJanne Karhunen             goto exit;
2446f249924eSJanne Karhunen         }
24473015372dSFam Zheng         hw_version = "6";
2448f249924eSJanne Karhunen     }
24493015372dSFam Zheng     if (!hw_version) {
24503015372dSFam Zheng         hw_version = "4";
2451f66fd6c3SFam Zheng     }
2452f3d43dfdSThomas Weißschuh     if (!toolsversion) {
2453f3d43dfdSThomas Weißschuh         toolsversion = "2147483647";
2454f3d43dfdSThomas Weißschuh     }
24555820f1daSChunyan Liu 
24563015372dSFam Zheng     if (adapter_type != BLOCKDEV_VMDK_ADAPTER_TYPE_IDE) {
24577f2039f6SOthmar Pasteka         /* that's the number of heads with which vmware operates when
24587f2039f6SOthmar Pasteka            creating, exporting, etc. vmdk files with a non-ide adapter type */
24597f2039f6SOthmar Pasteka         number_heads = 255;
24607f2039f6SOthmar Pasteka     }
24613015372dSFam Zheng     split = (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTFLAT) ||
24623015372dSFam Zheng             (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTSPARSE);
24633015372dSFam Zheng     flat = (subformat == BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICFLAT) ||
24643015372dSFam Zheng            (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTFLAT);
24653015372dSFam Zheng     compress = subformat == BLOCKDEV_VMDK_SUBFORMAT_STREAMOPTIMIZED;
24663015372dSFam Zheng 
2467f66fd6c3SFam Zheng     if (flat) {
24683015372dSFam Zheng         extent_line_fmt = "RW %" PRId64 " FLAT \"%s\" 0\n";
2469f66fd6c3SFam Zheng     } else {
24703015372dSFam Zheng         extent_line_fmt = "RW %" PRId64 " SPARSE \"%s\"\n";
2471f66fd6c3SFam Zheng     }
2472f66fd6c3SFam Zheng     if (flat && backing_file) {
24734823970bSFam Zheng         error_setg(errp, "Flat image can't have backing file");
2474af057fe7SFam Zheng         ret = -ENOTSUP;
2475af057fe7SFam Zheng         goto exit;
2476f66fd6c3SFam Zheng     }
247752c8d629SFam Zheng     if (flat && zeroed_grain) {
247852c8d629SFam Zheng         error_setg(errp, "Flat image can't enable zeroed grain");
2479af057fe7SFam Zheng         ret = -ENOTSUP;
2480af057fe7SFam Zheng         goto exit;
248152c8d629SFam Zheng     }
24823015372dSFam Zheng 
24833015372dSFam Zheng     /* Create extents */
24843015372dSFam Zheng     if (split) {
24853015372dSFam Zheng         extent_size = split_size;
24863015372dSFam Zheng     } else {
24873015372dSFam Zheng         extent_size = size;
24883015372dSFam Zheng     }
24893015372dSFam Zheng     if (!split && !flat) {
24903015372dSFam Zheng         created_size = extent_size;
24913015372dSFam Zheng     } else {
24923015372dSFam Zheng         created_size = 0;
24933015372dSFam Zheng     }
24943015372dSFam Zheng     /* Get the descriptor file BDS */
24953015372dSFam Zheng     blk = extent_fn(created_size, 0, flat, split, compress, zeroed_grain,
24963015372dSFam Zheng                     opaque, errp);
24973015372dSFam Zheng     if (!blk) {
24983015372dSFam Zheng         ret = -EIO;
24993015372dSFam Zheng         goto exit;
25003015372dSFam Zheng     }
25013015372dSFam Zheng     if (!split && !flat) {
25023015372dSFam Zheng         vmdk_desc_add_extent(ext_desc_lines, extent_line_fmt, created_size,
25033015372dSFam Zheng                              blk_bs(blk)->filename);
25043015372dSFam Zheng     }
25053015372dSFam Zheng 
2506f66fd6c3SFam Zheng     if (backing_file) {
25073015372dSFam Zheng         BlockBackend *backing;
2508645ae7d8SMax Reitz         char *full_backing =
2509645ae7d8SMax Reitz             bdrv_get_full_backing_filename_from_filename(blk_bs(blk)->filename,
2510645ae7d8SMax Reitz                                                          backing_file,
25111085daf9SMax Reitz                                                          &local_err);
25121085daf9SMax Reitz         if (local_err) {
25131085daf9SMax Reitz             error_propagate(errp, local_err);
25141085daf9SMax Reitz             ret = -ENOENT;
25151085daf9SMax Reitz             goto exit;
25161085daf9SMax Reitz         }
2517645ae7d8SMax Reitz         assert(full_backing);
2518c4bea169SKevin Wolf 
25193015372dSFam Zheng         backing = blk_new_open(full_backing, NULL, NULL,
252072e775c7SKevin Wolf                                BDRV_O_NO_BACKING, errp);
25211085daf9SMax Reitz         g_free(full_backing);
25223015372dSFam Zheng         if (backing == NULL) {
2523c4bea169SKevin Wolf             ret = -EIO;
2524af057fe7SFam Zheng             goto exit;
2525f66fd6c3SFam Zheng         }
25263015372dSFam Zheng         if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) {
25273015372dSFam Zheng             error_setg(errp, "Invalid backing file format: %s. Must be vmdk",
25283015372dSFam Zheng                        blk_bs(backing)->drv->format_name);
25293015372dSFam Zheng             blk_unref(backing);
2530af057fe7SFam Zheng             ret = -EINVAL;
2531af057fe7SFam Zheng             goto exit;
2532f66fd6c3SFam Zheng         }
25333015372dSFam Zheng         ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid);
25343015372dSFam Zheng         blk_unref(backing);
25359877860eSPeter Maydell         if (ret) {
25363015372dSFam Zheng             error_setg(errp, "Failed to read parent CID");
25379877860eSPeter Maydell             goto exit;
25389877860eSPeter Maydell         }
2539fe206562SJeff Cody         snprintf(parent_desc_line, BUF_SIZE,
25408ed610a1SFam Zheng                 "parentFileNameHint=\"%s\"", backing_file);
2541f66fd6c3SFam Zheng     }
25423015372dSFam Zheng     extent_idx = 1;
25433015372dSFam Zheng     while (created_size < size) {
25443015372dSFam Zheng         int64_t cur_size = MIN(size - created_size, extent_size);
25453015372dSFam Zheng         extent_blk = extent_fn(cur_size, extent_idx, flat, split, compress,
25463015372dSFam Zheng                                zeroed_grain, opaque, errp);
25473015372dSFam Zheng         if (!extent_blk) {
2548af057fe7SFam Zheng             ret = -EINVAL;
2549af057fe7SFam Zheng             goto exit;
2550f66fd6c3SFam Zheng         }
25513015372dSFam Zheng         vmdk_desc_add_extent(ext_desc_lines, extent_line_fmt, cur_size,
25523015372dSFam Zheng                              blk_bs(extent_blk)->filename);
25533015372dSFam Zheng         created_size += cur_size;
25543015372dSFam Zheng         extent_idx++;
25553015372dSFam Zheng         blk_unref(extent_blk);
2556f66fd6c3SFam Zheng     }
25574a960eceSKevin Wolf 
25584a960eceSKevin Wolf     /* Check whether we got excess extents */
25594a960eceSKevin Wolf     extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain,
25604a960eceSKevin Wolf                            opaque, NULL);
25614a960eceSKevin Wolf     if (extent_blk) {
25624a960eceSKevin Wolf         blk_unref(extent_blk);
25634a960eceSKevin Wolf         error_setg(errp, "List of extents contains unused extents");
25644a960eceSKevin Wolf         ret = -EINVAL;
25654a960eceSKevin Wolf         goto exit;
25664a960eceSKevin Wolf     }
25674a960eceSKevin Wolf 
2568f66fd6c3SFam Zheng     /* generate descriptor file */
2569af057fe7SFam Zheng     desc = g_strdup_printf(desc_template,
2570e5dc64b8SFam Zheng                            g_random_int(),
2571f66fd6c3SFam Zheng                            parent_cid,
25723015372dSFam Zheng                            BlockdevVmdkSubformat_str(subformat),
2573f66fd6c3SFam Zheng                            parent_desc_line,
2574af057fe7SFam Zheng                            ext_desc_lines->str,
2575f249924eSJanne Karhunen                            hw_version,
25763015372dSFam Zheng                            size /
2577917703c1SFam Zheng                                (int64_t)(63 * number_heads * BDRV_SECTOR_SIZE),
2578af057fe7SFam Zheng                            number_heads,
2579f3d43dfdSThomas Weißschuh                            BlockdevVmdkAdapterType_str(adapter_type),
2580f3d43dfdSThomas Weißschuh                            toolsversion);
2581917703c1SFam Zheng     desc_len = strlen(desc);
2582917703c1SFam Zheng     /* the descriptor offset = 0x200 */
2583917703c1SFam Zheng     if (!split && !flat) {
2584917703c1SFam Zheng         desc_offset = 0x200;
2585f66fd6c3SFam Zheng     }
2586c4bea169SKevin Wolf 
25873015372dSFam Zheng     ret = blk_pwrite(blk, desc_offset, desc, desc_len, 0);
2588917703c1SFam Zheng     if (ret < 0) {
2589917703c1SFam Zheng         error_setg_errno(errp, -ret, "Could not write description");
2590917703c1SFam Zheng         goto exit;
2591917703c1SFam Zheng     }
2592917703c1SFam Zheng     /* bdrv_pwrite write padding zeros to align to sector, we don't need that
2593917703c1SFam Zheng      * for description file */
2594917703c1SFam Zheng     if (desc_offset == 0) {
25958c6242b6SKevin Wolf         ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp);
25963015372dSFam Zheng         if (ret < 0) {
25973015372dSFam Zheng             goto exit;
2598917703c1SFam Zheng         }
25993015372dSFam Zheng     }
26003015372dSFam Zheng     ret = 0;
2601af057fe7SFam Zheng exit:
26023015372dSFam Zheng     if (blk) {
26033015372dSFam Zheng         blk_unref(blk);
2604917703c1SFam Zheng     }
26053015372dSFam Zheng     g_free(desc);
26063015372dSFam Zheng     g_free(parent_desc_line);
26073015372dSFam Zheng     g_string_free(ext_desc_lines, true);
26083015372dSFam Zheng     return ret;
26093015372dSFam Zheng }
26103015372dSFam Zheng 
26113015372dSFam Zheng typedef struct {
26123015372dSFam Zheng     char *path;
26133015372dSFam Zheng     char *prefix;
26143015372dSFam Zheng     char *postfix;
26153015372dSFam Zheng     QemuOpts *opts;
26163015372dSFam Zheng } VMDKCreateOptsData;
26173015372dSFam Zheng 
26183015372dSFam Zheng static BlockBackend *vmdk_co_create_opts_cb(int64_t size, int idx,
26193015372dSFam Zheng                                             bool flat, bool split, bool compress,
26203015372dSFam Zheng                                             bool zeroed_grain, void *opaque,
26213015372dSFam Zheng                                             Error **errp)
26223015372dSFam Zheng {
26233015372dSFam Zheng     BlockBackend *blk = NULL;
26243015372dSFam Zheng     BlockDriverState *bs = NULL;
26253015372dSFam Zheng     VMDKCreateOptsData *data = opaque;
26263015372dSFam Zheng     char *ext_filename = NULL;
26273015372dSFam Zheng     char *rel_filename = NULL;
26283015372dSFam Zheng 
26294a960eceSKevin Wolf     /* We're done, don't create excess extents. */
26304a960eceSKevin Wolf     if (size == -1) {
26314a960eceSKevin Wolf         assert(errp == NULL);
26324a960eceSKevin Wolf         return NULL;
26334a960eceSKevin Wolf     }
26344a960eceSKevin Wolf 
26353015372dSFam Zheng     if (idx == 0) {
26363015372dSFam Zheng         rel_filename = g_strdup_printf("%s%s", data->prefix, data->postfix);
26373015372dSFam Zheng     } else if (split) {
26383015372dSFam Zheng         rel_filename = g_strdup_printf("%s-%c%03d%s",
26393015372dSFam Zheng                                        data->prefix,
26403015372dSFam Zheng                                        flat ? 'f' : 's', idx, data->postfix);
26413015372dSFam Zheng     } else {
26423015372dSFam Zheng         assert(idx == 1);
26433015372dSFam Zheng         rel_filename = g_strdup_printf("%s-flat%s", data->prefix, data->postfix);
26443015372dSFam Zheng     }
26453015372dSFam Zheng 
26463015372dSFam Zheng     ext_filename = g_strdup_printf("%s%s", data->path, rel_filename);
26473015372dSFam Zheng     g_free(rel_filename);
26483015372dSFam Zheng 
26493015372dSFam Zheng     if (vmdk_create_extent(ext_filename, size,
26503015372dSFam Zheng                            flat, compress, zeroed_grain, &blk, data->opts,
26513015372dSFam Zheng                            errp)) {
26523015372dSFam Zheng         goto exit;
26533015372dSFam Zheng     }
26543015372dSFam Zheng     bdrv_unref(bs);
26553015372dSFam Zheng exit:
26563015372dSFam Zheng     g_free(ext_filename);
26573015372dSFam Zheng     return blk;
26583015372dSFam Zheng }
26593015372dSFam Zheng 
2660b92902dfSMaxim Levitsky static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv,
2661b92902dfSMaxim Levitsky                                             const char *filename,
2662b92902dfSMaxim Levitsky                                             QemuOpts *opts,
26633015372dSFam Zheng                                             Error **errp)
26643015372dSFam Zheng {
26653015372dSFam Zheng     Error *local_err = NULL;
26663015372dSFam Zheng     char *desc = NULL;
26673015372dSFam Zheng     int64_t total_size = 0;
26683015372dSFam Zheng     char *adapter_type = NULL;
26693015372dSFam Zheng     BlockdevVmdkAdapterType adapter_type_enum;
26703015372dSFam Zheng     char *backing_file = NULL;
26713015372dSFam Zheng     char *hw_version = NULL;
2672f3d43dfdSThomas Weißschuh     char *toolsversion = NULL;
26733015372dSFam Zheng     char *fmt = NULL;
26743015372dSFam Zheng     BlockdevVmdkSubformat subformat;
26753015372dSFam Zheng     int ret = 0;
26763015372dSFam Zheng     char *path = g_malloc0(PATH_MAX);
26773015372dSFam Zheng     char *prefix = g_malloc0(PATH_MAX);
26783015372dSFam Zheng     char *postfix = g_malloc0(PATH_MAX);
26793015372dSFam Zheng     char *desc_line = g_malloc0(BUF_SIZE);
26803015372dSFam Zheng     char *ext_filename = g_malloc0(PATH_MAX);
26813015372dSFam Zheng     char *desc_filename = g_malloc0(PATH_MAX);
26823015372dSFam Zheng     char *parent_desc_line = g_malloc0(BUF_SIZE);
26833015372dSFam Zheng     bool zeroed_grain;
26843015372dSFam Zheng     bool compat6;
26853015372dSFam Zheng     VMDKCreateOptsData data;
2686d51a814cSEric Blake     char *backing_fmt = NULL;
2687d51a814cSEric Blake 
2688d51a814cSEric Blake     backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
2689d51a814cSEric Blake     if (backing_fmt && strcmp(backing_fmt, "vmdk") != 0) {
2690d51a814cSEric Blake         error_setg(errp, "backing_file must be a vmdk image");
2691d51a814cSEric Blake         ret = -EINVAL;
2692d51a814cSEric Blake         goto exit;
2693d51a814cSEric Blake     }
26943015372dSFam Zheng 
26953015372dSFam Zheng     if (filename_decompose(filename, path, prefix, postfix, PATH_MAX, errp)) {
26963015372dSFam Zheng         ret = -EINVAL;
26973015372dSFam Zheng         goto exit;
26983015372dSFam Zheng     }
26993015372dSFam Zheng     /* Read out options */
27003015372dSFam Zheng     total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
27013015372dSFam Zheng                           BDRV_SECTOR_SIZE);
27023015372dSFam Zheng     adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE);
27033015372dSFam Zheng     backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
27043015372dSFam Zheng     hw_version = qemu_opt_get_del(opts, BLOCK_OPT_HWVERSION);
2705f3d43dfdSThomas Weißschuh     toolsversion = qemu_opt_get_del(opts, BLOCK_OPT_TOOLSVERSION);
27063015372dSFam Zheng     compat6 = qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false);
27073015372dSFam Zheng     if (strcmp(hw_version, "undefined") == 0) {
27083015372dSFam Zheng         g_free(hw_version);
270926c9296cSyuchenlin         hw_version = NULL;
27103015372dSFam Zheng     }
27113015372dSFam Zheng     fmt = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
27123015372dSFam Zheng     zeroed_grain = qemu_opt_get_bool_del(opts, BLOCK_OPT_ZEROED_GRAIN, false);
27133015372dSFam Zheng 
27143015372dSFam Zheng     if (adapter_type) {
27153015372dSFam Zheng         adapter_type_enum = qapi_enum_parse(&BlockdevVmdkAdapterType_lookup,
27163015372dSFam Zheng                                             adapter_type,
27173015372dSFam Zheng                                             BLOCKDEV_VMDK_ADAPTER_TYPE_IDE,
27183015372dSFam Zheng                                             &local_err);
27193015372dSFam Zheng         if (local_err) {
27203015372dSFam Zheng             error_propagate(errp, local_err);
27213015372dSFam Zheng             ret = -EINVAL;
27223015372dSFam Zheng             goto exit;
27233015372dSFam Zheng         }
27243015372dSFam Zheng     } else {
27253015372dSFam Zheng         adapter_type_enum = BLOCKDEV_VMDK_ADAPTER_TYPE_IDE;
27263015372dSFam Zheng     }
27273015372dSFam Zheng 
27283015372dSFam Zheng     if (!fmt) {
27293015372dSFam Zheng         /* Default format to monolithicSparse */
27303015372dSFam Zheng         subformat = BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICSPARSE;
27313015372dSFam Zheng     } else {
27323015372dSFam Zheng         subformat = qapi_enum_parse(&BlockdevVmdkSubformat_lookup,
27333015372dSFam Zheng                                     fmt,
27343015372dSFam Zheng                                     BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICSPARSE,
27353015372dSFam Zheng                                     &local_err);
27363015372dSFam Zheng         if (local_err) {
27373015372dSFam Zheng             error_propagate(errp, local_err);
27383015372dSFam Zheng             ret = -EINVAL;
27393015372dSFam Zheng             goto exit;
27403015372dSFam Zheng         }
27413015372dSFam Zheng     }
27423015372dSFam Zheng     data = (VMDKCreateOptsData){
27433015372dSFam Zheng         .prefix = prefix,
27443015372dSFam Zheng         .postfix = postfix,
27453015372dSFam Zheng         .path = path,
27463015372dSFam Zheng         .opts = opts,
27473015372dSFam Zheng     };
27483015372dSFam Zheng     ret = vmdk_co_do_create(total_size, subformat, adapter_type_enum,
2749f3d43dfdSThomas Weißschuh                             backing_file, hw_version, toolsversion, compat6,
2750f3d43dfdSThomas Weißschuh                             zeroed_grain, vmdk_co_create_opts_cb, &data, errp);
27513015372dSFam Zheng 
27523015372dSFam Zheng exit:
2753d51a814cSEric Blake     g_free(backing_fmt);
27545820f1daSChunyan Liu     g_free(adapter_type);
27555820f1daSChunyan Liu     g_free(backing_file);
2756f249924eSJanne Karhunen     g_free(hw_version);
2757f3d43dfdSThomas Weißschuh     g_free(toolsversion);
27585820f1daSChunyan Liu     g_free(fmt);
2759af057fe7SFam Zheng     g_free(desc);
2760fe206562SJeff Cody     g_free(path);
2761fe206562SJeff Cody     g_free(prefix);
2762fe206562SJeff Cody     g_free(postfix);
2763fe206562SJeff Cody     g_free(desc_line);
2764fe206562SJeff Cody     g_free(ext_filename);
2765fe206562SJeff Cody     g_free(desc_filename);
2766fe206562SJeff Cody     g_free(parent_desc_line);
27673015372dSFam Zheng     return ret;
27683015372dSFam Zheng }
27693015372dSFam Zheng 
27703015372dSFam Zheng static BlockBackend *vmdk_co_create_cb(int64_t size, int idx,
27713015372dSFam Zheng                                        bool flat, bool split, bool compress,
27723015372dSFam Zheng                                        bool zeroed_grain, void *opaque,
27733015372dSFam Zheng                                        Error **errp)
27743015372dSFam Zheng {
27753015372dSFam Zheng     int ret;
27763015372dSFam Zheng     BlockDriverState *bs;
27773015372dSFam Zheng     BlockBackend *blk;
27783015372dSFam Zheng     BlockdevCreateOptionsVmdk *opts = opaque;
27793015372dSFam Zheng 
27803015372dSFam Zheng     if (idx == 0) {
27813015372dSFam Zheng         bs = bdrv_open_blockdev_ref(opts->file, errp);
27823015372dSFam Zheng     } else {
27833015372dSFam Zheng         int i;
27843015372dSFam Zheng         BlockdevRefList *list = opts->extents;
27853015372dSFam Zheng         for (i = 1; i < idx; i++) {
27863015372dSFam Zheng             if (!list || !list->next) {
27873015372dSFam Zheng                 error_setg(errp, "Extent [%d] not specified", i);
27883015372dSFam Zheng                 return NULL;
27893015372dSFam Zheng             }
27903015372dSFam Zheng             list = list->next;
27913015372dSFam Zheng         }
27923015372dSFam Zheng         if (!list) {
27933015372dSFam Zheng             error_setg(errp, "Extent [%d] not specified", idx - 1);
27943015372dSFam Zheng             return NULL;
27953015372dSFam Zheng         }
27963015372dSFam Zheng         bs = bdrv_open_blockdev_ref(list->value, errp);
27973015372dSFam Zheng     }
27983015372dSFam Zheng     if (!bs) {
27993015372dSFam Zheng         return NULL;
28003015372dSFam Zheng     }
2801a3aeeab5SEric Blake     blk = blk_new_with_bs(bs,
2802d861ab3aSKevin Wolf                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
2803a3aeeab5SEric Blake                           BLK_PERM_ALL, errp);
2804a3aeeab5SEric Blake     if (!blk) {
28053015372dSFam Zheng         return NULL;
28063015372dSFam Zheng     }
28073015372dSFam Zheng     blk_set_allow_write_beyond_eof(blk, true);
28083015372dSFam Zheng     bdrv_unref(bs);
28093015372dSFam Zheng 
28104a960eceSKevin Wolf     if (size != -1) {
28113015372dSFam Zheng         ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp);
28123015372dSFam Zheng         if (ret) {
28133015372dSFam Zheng             blk_unref(blk);
28143015372dSFam Zheng             blk = NULL;
28153015372dSFam Zheng         }
28164a960eceSKevin Wolf     }
28173015372dSFam Zheng     return blk;
28183015372dSFam Zheng }
28193015372dSFam Zheng 
28203015372dSFam Zheng static int coroutine_fn vmdk_co_create(BlockdevCreateOptions *create_options,
28213015372dSFam Zheng                                        Error **errp)
28223015372dSFam Zheng {
28233015372dSFam Zheng     int ret;
28243015372dSFam Zheng     BlockdevCreateOptionsVmdk *opts;
28253015372dSFam Zheng 
28263015372dSFam Zheng     opts = &create_options->u.vmdk;
28273015372dSFam Zheng 
28283015372dSFam Zheng     /* Validate options */
28293015372dSFam Zheng     if (!QEMU_IS_ALIGNED(opts->size, BDRV_SECTOR_SIZE)) {
28303015372dSFam Zheng         error_setg(errp, "Image size must be a multiple of 512 bytes");
28313015372dSFam Zheng         ret = -EINVAL;
28323015372dSFam Zheng         goto out;
28333015372dSFam Zheng     }
28343015372dSFam Zheng 
28353015372dSFam Zheng     ret = vmdk_co_do_create(opts->size,
28363015372dSFam Zheng                             opts->subformat,
28373015372dSFam Zheng                             opts->adapter_type,
28383015372dSFam Zheng                             opts->backing_file,
28393015372dSFam Zheng                             opts->hwversion,
2840f3d43dfdSThomas Weißschuh                             opts->toolsversion,
28413015372dSFam Zheng                             false,
28423015372dSFam Zheng                             opts->zeroed_grain,
28433015372dSFam Zheng                             vmdk_co_create_cb,
28443015372dSFam Zheng                             opts, errp);
28453015372dSFam Zheng     return ret;
28463015372dSFam Zheng 
28473015372dSFam Zheng out:
28481640366cSKirill A. Shutemov     return ret;
2849019d6b8fSAnthony Liguori }
2850019d6b8fSAnthony Liguori 
2851019d6b8fSAnthony Liguori static void vmdk_close(BlockDriverState *bs)
2852019d6b8fSAnthony Liguori {
28532bc3166cSKevin Wolf     BDRVVmdkState *s = bs->opaque;
28542bc3166cSKevin Wolf 
2855b3976d3cSFam Zheng     vmdk_free_extents(bs);
2856f4c129a3SFam Zheng     g_free(s->create_type);
28572bc3166cSKevin Wolf 
28582bc3166cSKevin Wolf     migrate_del_blocker(s->migration_blocker);
28592bc3166cSKevin Wolf     error_free(s->migration_blocker);
2860019d6b8fSAnthony Liguori }
2861019d6b8fSAnthony Liguori 
28624a1d5e1fSFam Zheng static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
28634a1d5e1fSFam Zheng {
28644a1d5e1fSFam Zheng     int i;
28654a1d5e1fSFam Zheng     int64_t ret = 0;
28664a1d5e1fSFam Zheng     int64_t r;
28674a1d5e1fSFam Zheng     BDRVVmdkState *s = bs->opaque;
28684a1d5e1fSFam Zheng 
28699a4f4c31SKevin Wolf     ret = bdrv_get_allocated_file_size(bs->file->bs);
28704a1d5e1fSFam Zheng     if (ret < 0) {
28714a1d5e1fSFam Zheng         return ret;
28724a1d5e1fSFam Zheng     }
28734a1d5e1fSFam Zheng     for (i = 0; i < s->num_extents; i++) {
28749a4f4c31SKevin Wolf         if (s->extents[i].file == bs->file) {
28754a1d5e1fSFam Zheng             continue;
28764a1d5e1fSFam Zheng         }
287724bc15d1SKevin Wolf         r = bdrv_get_allocated_file_size(s->extents[i].file->bs);
28784a1d5e1fSFam Zheng         if (r < 0) {
28794a1d5e1fSFam Zheng             return r;
28804a1d5e1fSFam Zheng         }
28814a1d5e1fSFam Zheng         ret += r;
28824a1d5e1fSFam Zheng     }
28834a1d5e1fSFam Zheng     return ret;
28844a1d5e1fSFam Zheng }
28850e7e1989SKevin Wolf 
2886da7a50f9SFam Zheng static int vmdk_has_zero_init(BlockDriverState *bs)
2887da7a50f9SFam Zheng {
2888da7a50f9SFam Zheng     int i;
2889da7a50f9SFam Zheng     BDRVVmdkState *s = bs->opaque;
2890da7a50f9SFam Zheng 
2891da7a50f9SFam Zheng     /* If has a flat extent and its underlying storage doesn't have zero init,
2892da7a50f9SFam Zheng      * return 0. */
2893da7a50f9SFam Zheng     for (i = 0; i < s->num_extents; i++) {
2894da7a50f9SFam Zheng         if (s->extents[i].flat) {
289524bc15d1SKevin Wolf             if (!bdrv_has_zero_init(s->extents[i].file->bs)) {
2896da7a50f9SFam Zheng                 return 0;
2897da7a50f9SFam Zheng             }
2898da7a50f9SFam Zheng         }
2899da7a50f9SFam Zheng     }
2900da7a50f9SFam Zheng     return 1;
2901da7a50f9SFam Zheng }
2902da7a50f9SFam Zheng 
2903f4c129a3SFam Zheng static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
2904f4c129a3SFam Zheng {
2905f4c129a3SFam Zheng     ImageInfo *info = g_new0(ImageInfo, 1);
2906f4c129a3SFam Zheng 
2907f30c66baSMax Reitz     bdrv_refresh_filename(extent->file->bs);
2908f4c129a3SFam Zheng     *info = (ImageInfo){
290924bc15d1SKevin Wolf         .filename         = g_strdup(extent->file->bs->filename),
2910f4c129a3SFam Zheng         .format           = g_strdup(extent->type),
2911f4c129a3SFam Zheng         .virtual_size     = extent->sectors * BDRV_SECTOR_SIZE,
2912f4c129a3SFam Zheng         .compressed       = extent->compressed,
2913f4c129a3SFam Zheng         .has_compressed   = extent->compressed,
2914f4c129a3SFam Zheng         .cluster_size     = extent->cluster_sectors * BDRV_SECTOR_SIZE,
2915f4c129a3SFam Zheng         .has_cluster_size = !extent->flat,
2916f4c129a3SFam Zheng     };
2917f4c129a3SFam Zheng 
2918f4c129a3SFam Zheng     return info;
2919f4c129a3SFam Zheng }
2920f4c129a3SFam Zheng 
29212fd61638SPaolo Bonzini static int coroutine_fn vmdk_co_check(BlockDriverState *bs,
29222fd61638SPaolo Bonzini                                       BdrvCheckResult *result,
2923f43aa8e1SPeter Lieven                                       BdrvCheckMode fix)
2924f43aa8e1SPeter Lieven {
2925f43aa8e1SPeter Lieven     BDRVVmdkState *s = bs->opaque;
2926f43aa8e1SPeter Lieven     VmdkExtent *extent = NULL;
2927f43aa8e1SPeter Lieven     int64_t sector_num = 0;
292857322b78SMarkus Armbruster     int64_t total_sectors = bdrv_nb_sectors(bs);
2929f43aa8e1SPeter Lieven     int ret;
2930f43aa8e1SPeter Lieven     uint64_t cluster_offset;
2931f43aa8e1SPeter Lieven 
2932f43aa8e1SPeter Lieven     if (fix) {
2933f43aa8e1SPeter Lieven         return -ENOTSUP;
2934f43aa8e1SPeter Lieven     }
2935f43aa8e1SPeter Lieven 
2936f43aa8e1SPeter Lieven     for (;;) {
2937f43aa8e1SPeter Lieven         if (sector_num >= total_sectors) {
2938f43aa8e1SPeter Lieven             return 0;
2939f43aa8e1SPeter Lieven         }
2940f43aa8e1SPeter Lieven         extent = find_extent(s, sector_num, extent);
2941f43aa8e1SPeter Lieven         if (!extent) {
2942f43aa8e1SPeter Lieven             fprintf(stderr,
2943f43aa8e1SPeter Lieven                     "ERROR: could not find extent for sector %" PRId64 "\n",
2944f43aa8e1SPeter Lieven                     sector_num);
29450e51b9b7SFam Zheng             ret = -EINVAL;
2946f43aa8e1SPeter Lieven             break;
2947f43aa8e1SPeter Lieven         }
2948f43aa8e1SPeter Lieven         ret = get_cluster_offset(bs, extent, NULL,
2949f43aa8e1SPeter Lieven                                  sector_num << BDRV_SECTOR_BITS,
2950c6ac36e1SFam Zheng                                  false, &cluster_offset, 0, 0);
2951f43aa8e1SPeter Lieven         if (ret == VMDK_ERROR) {
2952f43aa8e1SPeter Lieven             fprintf(stderr,
2953f43aa8e1SPeter Lieven                     "ERROR: could not get cluster_offset for sector %"
2954f43aa8e1SPeter Lieven                     PRId64 "\n", sector_num);
2955f43aa8e1SPeter Lieven             break;
2956f43aa8e1SPeter Lieven         }
29570e51b9b7SFam Zheng         if (ret == VMDK_OK) {
29580e51b9b7SFam Zheng             int64_t extent_len = bdrv_getlength(extent->file->bs);
29590e51b9b7SFam Zheng             if (extent_len < 0) {
29600e51b9b7SFam Zheng                 fprintf(stderr,
29610e51b9b7SFam Zheng                         "ERROR: could not get extent file length for sector %"
29620e51b9b7SFam Zheng                         PRId64 "\n", sector_num);
29630e51b9b7SFam Zheng                 ret = extent_len;
29640e51b9b7SFam Zheng                 break;
29650e51b9b7SFam Zheng             }
29660e51b9b7SFam Zheng             if (cluster_offset >= extent_len) {
2967f43aa8e1SPeter Lieven                 fprintf(stderr,
2968f43aa8e1SPeter Lieven                         "ERROR: cluster offset for sector %"
2969f43aa8e1SPeter Lieven                         PRId64 " points after EOF\n", sector_num);
29700e51b9b7SFam Zheng                 ret = -EINVAL;
2971f43aa8e1SPeter Lieven                 break;
2972f43aa8e1SPeter Lieven             }
29730e51b9b7SFam Zheng         }
2974f43aa8e1SPeter Lieven         sector_num += extent->cluster_sectors;
2975f43aa8e1SPeter Lieven     }
2976f43aa8e1SPeter Lieven 
2977f43aa8e1SPeter Lieven     result->corruptions++;
29780e51b9b7SFam Zheng     return ret;
2979f43aa8e1SPeter Lieven }
2980f43aa8e1SPeter Lieven 
29811bf6e9caSAndrey Shinkevich static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs,
29821bf6e9caSAndrey Shinkevich                                                  Error **errp)
2983f4c129a3SFam Zheng {
2984f4c129a3SFam Zheng     int i;
2985f4c129a3SFam Zheng     BDRVVmdkState *s = bs->opaque;
2986f4c129a3SFam Zheng     ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1);
2987c3033fd3SEric Blake     ImageInfoList **tail;
2988f4c129a3SFam Zheng 
2989f4c129a3SFam Zheng     *spec_info = (ImageInfoSpecific){
29906a8f9661SEric Blake         .type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
299132bafa8fSEric Blake         .u = {
299232bafa8fSEric Blake             .vmdk.data = g_new0(ImageInfoSpecificVmdk, 1),
2993f4c129a3SFam Zheng         },
2994f4c129a3SFam Zheng     };
2995f4c129a3SFam Zheng 
299632bafa8fSEric Blake     *spec_info->u.vmdk.data = (ImageInfoSpecificVmdk) {
2997f4c129a3SFam Zheng         .create_type = g_strdup(s->create_type),
2998f4c129a3SFam Zheng         .cid = s->cid,
2999f4c129a3SFam Zheng         .parent_cid = s->parent_cid,
3000f4c129a3SFam Zheng     };
3001f4c129a3SFam Zheng 
3002c3033fd3SEric Blake     tail = &spec_info->u.vmdk.data->extents;
3003f4c129a3SFam Zheng     for (i = 0; i < s->num_extents; i++) {
3004c3033fd3SEric Blake         QAPI_LIST_APPEND(tail, vmdk_get_extent_info(&s->extents[i]));
3005f4c129a3SFam Zheng     }
3006f4c129a3SFam Zheng 
3007f4c129a3SFam Zheng     return spec_info;
3008f4c129a3SFam Zheng }
3009f4c129a3SFam Zheng 
30105f583307SFam Zheng static bool vmdk_extents_type_eq(const VmdkExtent *a, const VmdkExtent *b)
30115f583307SFam Zheng {
30125f583307SFam Zheng     return a->flat == b->flat &&
30135f583307SFam Zheng            a->compressed == b->compressed &&
30145f583307SFam Zheng            (a->flat || a->cluster_sectors == b->cluster_sectors);
30155f583307SFam Zheng }
30165f583307SFam Zheng 
301774fe188cSFam Zheng static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
301874fe188cSFam Zheng {
301974fe188cSFam Zheng     int i;
302074fe188cSFam Zheng     BDRVVmdkState *s = bs->opaque;
302174fe188cSFam Zheng     assert(s->num_extents);
30225f583307SFam Zheng 
30235f583307SFam Zheng     /* See if we have multiple extents but they have different cases */
30245f583307SFam Zheng     for (i = 1; i < s->num_extents; i++) {
30255f583307SFam Zheng         if (!vmdk_extents_type_eq(&s->extents[0], &s->extents[i])) {
30265f583307SFam Zheng             return -ENOTSUP;
30275f583307SFam Zheng         }
30285f583307SFam Zheng     }
302974fe188cSFam Zheng     bdi->needs_compressed_writes = s->extents[0].compressed;
303074fe188cSFam Zheng     if (!s->extents[0].flat) {
303174fe188cSFam Zheng         bdi->cluster_size = s->extents[0].cluster_sectors << BDRV_SECTOR_BITS;
303274fe188cSFam Zheng     }
303374fe188cSFam Zheng     return 0;
303474fe188cSFam Zheng }
303574fe188cSFam Zheng 
3036abc521a9SMax Reitz static void vmdk_gather_child_options(BlockDriverState *bs, QDict *target,
3037abc521a9SMax Reitz                                       bool backing_overridden)
3038abc521a9SMax Reitz {
3039abc521a9SMax Reitz     /* No children but file and backing can be explicitly specified (TODO) */
3040abc521a9SMax Reitz     qdict_put(target, "file",
3041abc521a9SMax Reitz               qobject_ref(bs->file->bs->full_open_options));
3042abc521a9SMax Reitz 
3043abc521a9SMax Reitz     if (backing_overridden) {
3044abc521a9SMax Reitz         if (bs->backing) {
3045abc521a9SMax Reitz             qdict_put(target, "backing",
3046abc521a9SMax Reitz                       qobject_ref(bs->backing->bs->full_open_options));
3047abc521a9SMax Reitz         } else {
3048abc521a9SMax Reitz             qdict_put_null(target, "backing");
3049abc521a9SMax Reitz         }
3050abc521a9SMax Reitz     }
3051abc521a9SMax Reitz }
3052abc521a9SMax Reitz 
30535820f1daSChunyan Liu static QemuOptsList vmdk_create_opts = {
30545820f1daSChunyan Liu     .name = "vmdk-create-opts",
30555820f1daSChunyan Liu     .head = QTAILQ_HEAD_INITIALIZER(vmdk_create_opts.head),
30565820f1daSChunyan Liu     .desc = {
3057db08adf5SKevin Wolf         {
3058db08adf5SKevin Wolf             .name = BLOCK_OPT_SIZE,
30595820f1daSChunyan Liu             .type = QEMU_OPT_SIZE,
3060db08adf5SKevin Wolf             .help = "Virtual disk size"
3061db08adf5SKevin Wolf         },
3062db08adf5SKevin Wolf         {
30637f2039f6SOthmar Pasteka             .name = BLOCK_OPT_ADAPTER_TYPE,
30645820f1daSChunyan Liu             .type = QEMU_OPT_STRING,
30657f2039f6SOthmar Pasteka             .help = "Virtual adapter type, can be one of "
30667f2039f6SOthmar Pasteka                     "ide (default), lsilogic, buslogic or legacyESX"
30677f2039f6SOthmar Pasteka         },
30687f2039f6SOthmar Pasteka         {
3069db08adf5SKevin Wolf             .name = BLOCK_OPT_BACKING_FILE,
30705820f1daSChunyan Liu             .type = QEMU_OPT_STRING,
3071db08adf5SKevin Wolf             .help = "File name of a base image"
3072db08adf5SKevin Wolf         },
3073db08adf5SKevin Wolf         {
3074d51a814cSEric Blake             .name = BLOCK_OPT_BACKING_FMT,
3075d51a814cSEric Blake             .type = QEMU_OPT_STRING,
3076d51a814cSEric Blake             .help = "Must be 'vmdk' if present",
3077d51a814cSEric Blake         },
3078d51a814cSEric Blake         {
3079db08adf5SKevin Wolf             .name = BLOCK_OPT_COMPAT6,
30805820f1daSChunyan Liu             .type = QEMU_OPT_BOOL,
30815820f1daSChunyan Liu             .help = "VMDK version 6 image",
30825820f1daSChunyan Liu             .def_value_str = "off"
3083db08adf5SKevin Wolf         },
3084f66fd6c3SFam Zheng         {
3085f249924eSJanne Karhunen             .name = BLOCK_OPT_HWVERSION,
3086f249924eSJanne Karhunen             .type = QEMU_OPT_STRING,
3087f249924eSJanne Karhunen             .help = "VMDK hardware version",
3088f249924eSJanne Karhunen             .def_value_str = "undefined"
3089f249924eSJanne Karhunen         },
3090f249924eSJanne Karhunen         {
3091f3d43dfdSThomas Weißschuh             .name = BLOCK_OPT_TOOLSVERSION,
3092f3d43dfdSThomas Weißschuh             .type = QEMU_OPT_STRING,
3093f3d43dfdSThomas Weißschuh             .help = "VMware guest tools version",
3094f3d43dfdSThomas Weißschuh         },
3095f3d43dfdSThomas Weißschuh         {
3096f66fd6c3SFam Zheng             .name = BLOCK_OPT_SUBFMT,
30975820f1daSChunyan Liu             .type = QEMU_OPT_STRING,
3098f66fd6c3SFam Zheng             .help =
3099f66fd6c3SFam Zheng                 "VMDK flat extent format, can be one of "
31006c031aacSFam Zheng                 "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
3101f66fd6c3SFam Zheng         },
310269e0b6dfSFam Zheng         {
310369e0b6dfSFam Zheng             .name = BLOCK_OPT_ZEROED_GRAIN,
31045820f1daSChunyan Liu             .type = QEMU_OPT_BOOL,
31055820f1daSChunyan Liu             .help = "Enable efficient zero writes "
31065820f1daSChunyan Liu                     "using the zeroed-grain GTE feature"
310769e0b6dfSFam Zheng         },
31085820f1daSChunyan Liu         { /* end of list */ }
31095820f1daSChunyan Liu     }
31100e7e1989SKevin Wolf };
31110e7e1989SKevin Wolf 
3112019d6b8fSAnthony Liguori static BlockDriver bdrv_vmdk = {
3113019d6b8fSAnthony Liguori     .format_name                  = "vmdk",
3114019d6b8fSAnthony Liguori     .instance_size                = sizeof(BDRVVmdkState),
3115019d6b8fSAnthony Liguori     .bdrv_probe                   = vmdk_probe,
31166511ef77SKevin Wolf     .bdrv_open                    = vmdk_open,
31172fd61638SPaolo Bonzini     .bdrv_co_check                = vmdk_co_check,
31183897575fSJeff Cody     .bdrv_reopen_prepare          = vmdk_reopen_prepare,
31196d17e287SHanna Reitz     .bdrv_reopen_commit           = vmdk_reopen_commit,
31206d17e287SHanna Reitz     .bdrv_reopen_abort            = vmdk_reopen_abort,
312169dca43dSMax Reitz     .bdrv_child_perm              = bdrv_default_perms,
3122f10cc243SKevin Wolf     .bdrv_co_preadv               = vmdk_co_preadv,
312337b1d7d8SKevin Wolf     .bdrv_co_pwritev              = vmdk_co_pwritev,
3124b2c622d3SPavel Butsykin     .bdrv_co_pwritev_compressed   = vmdk_co_pwritev_compressed,
3125a620f2aeSEric Blake     .bdrv_co_pwrite_zeroes        = vmdk_co_pwrite_zeroes,
3126019d6b8fSAnthony Liguori     .bdrv_close                   = vmdk_close,
3127efc75e2aSStefan Hajnoczi     .bdrv_co_create_opts          = vmdk_co_create_opts,
31283015372dSFam Zheng     .bdrv_co_create               = vmdk_co_create,
3129c72080b9SEric Blake     .bdrv_co_block_status         = vmdk_co_block_status,
31304a1d5e1fSFam Zheng     .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
3131da7a50f9SFam Zheng     .bdrv_has_zero_init           = vmdk_has_zero_init,
3132f4c129a3SFam Zheng     .bdrv_get_specific_info       = vmdk_get_specific_info,
3133d34682cdSKevin Wolf     .bdrv_refresh_limits          = vmdk_refresh_limits,
313474fe188cSFam Zheng     .bdrv_get_info                = vmdk_get_info,
3135abc521a9SMax Reitz     .bdrv_gather_child_options    = vmdk_gather_child_options,
31360e7e1989SKevin Wolf 
3137d67066d8SMax Reitz     .is_format                    = true,
31388ee79e70SKevin Wolf     .supports_backing             = true,
31395820f1daSChunyan Liu     .create_opts                  = &vmdk_create_opts,
3140019d6b8fSAnthony Liguori };
3141019d6b8fSAnthony Liguori 
3142019d6b8fSAnthony Liguori static void bdrv_vmdk_init(void)
3143019d6b8fSAnthony Liguori {
3144019d6b8fSAnthony Liguori     bdrv_register(&bdrv_vmdk);
3145019d6b8fSAnthony Liguori }
3146019d6b8fSAnthony Liguori 
3147019d6b8fSAnthony Liguori block_init(bdrv_vmdk_init);
3148