1019d6b8fSAnthony Liguori /*
2019d6b8fSAnthony Liguori * Block driver for the VMDK format
3019d6b8fSAnthony Liguori *
4019d6b8fSAnthony Liguori * Copyright (c) 2004 Fabrice Bellard
5019d6b8fSAnthony Liguori * Copyright (c) 2005 Filip Navara
6019d6b8fSAnthony Liguori *
7019d6b8fSAnthony Liguori * Permission is hereby granted, free of charge, to any person obtaining a copy
8019d6b8fSAnthony Liguori * of this software and associated documentation files (the "Software"), to deal
9019d6b8fSAnthony Liguori * in the Software without restriction, including without limitation the rights
10019d6b8fSAnthony Liguori * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11019d6b8fSAnthony Liguori * copies of the Software, and to permit persons to whom the Software is
12019d6b8fSAnthony Liguori * furnished to do so, subject to the following conditions:
13019d6b8fSAnthony Liguori *
14019d6b8fSAnthony Liguori * The above copyright notice and this permission notice shall be included in
15019d6b8fSAnthony Liguori * all copies or substantial portions of the Software.
16019d6b8fSAnthony Liguori *
17019d6b8fSAnthony Liguori * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18019d6b8fSAnthony Liguori * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19019d6b8fSAnthony Liguori * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20019d6b8fSAnthony Liguori * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21019d6b8fSAnthony Liguori * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22019d6b8fSAnthony Liguori * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23019d6b8fSAnthony Liguori * THE SOFTWARE.
24019d6b8fSAnthony Liguori */
25019d6b8fSAnthony Liguori
2680c71a24SPeter Maydell #include "qemu/osdep.h"
27da34e65cSMarkus Armbruster #include "qapi/error.h"
28737e150eSPaolo Bonzini #include "block/block_int.h"
29c4bea169SKevin Wolf #include "sysemu/block-backend.h"
30abc521a9SMax Reitz #include "qapi/qmp/qdict.h"
31cc7a8ea7SMarkus Armbruster #include "qapi/qmp/qerror.h"
32d49b6836SMarkus Armbruster #include "qemu/error-report.h"
331de7afc9SPaolo Bonzini #include "qemu/module.h"
34922a01a0SMarkus Armbruster #include "qemu/option.h"
3558369e22SPaolo Bonzini #include "qemu/bswap.h"
365df022cfSPeter Maydell #include "qemu/memalign.h"
37795c40b8SJuan Quintela #include "migration/blocker.h"
38f348b6d1SVeronia Bahaa #include "qemu/cutils.h"
392923d34fSStefan Weil #include <zlib.h>
40019d6b8fSAnthony Liguori
41019d6b8fSAnthony Liguori #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
42019d6b8fSAnthony Liguori #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
43432bb170SFam Zheng #define VMDK4_COMPRESSION_DEFLATE 1
4495b0aa42SFam Zheng #define VMDK4_FLAG_NL_DETECT (1 << 0)
45bb45ded9SFam Zheng #define VMDK4_FLAG_RGD (1 << 1)
4614ead646SFam Zheng /* Zeroed-grain enable bit */
4714ead646SFam Zheng #define VMDK4_FLAG_ZERO_GRAIN (1 << 2)
48432bb170SFam Zheng #define VMDK4_FLAG_COMPRESS (1 << 16)
49432bb170SFam Zheng #define VMDK4_FLAG_MARKER (1 << 17)
5065bd155cSKevin Wolf #define VMDK4_GD_AT_END 0xffffffffffffffffULL
51019d6b8fSAnthony Liguori
52a77672eaSyuchenlin #define VMDK_EXTENT_MAX_SECTORS (1ULL << 32)
53a77672eaSyuchenlin
5414ead646SFam Zheng #define VMDK_GTE_ZEROED 0x1
5565f74725SFam Zheng
5665f74725SFam Zheng /* VMDK internal error codes */
5765f74725SFam Zheng #define VMDK_OK 0
5865f74725SFam Zheng #define VMDK_ERROR (-1)
5965f74725SFam Zheng /* Cluster not allocated */
6065f74725SFam Zheng #define VMDK_UNALLOC (-2)
6165f74725SFam Zheng #define VMDK_ZEROED (-3)
6265f74725SFam Zheng
6369e0b6dfSFam Zheng #define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain"
64f3d43dfdSThomas Weißschuh #define BLOCK_OPT_TOOLSVERSION "toolsversion"
6569e0b6dfSFam Zheng
66019d6b8fSAnthony Liguori typedef struct {
67019d6b8fSAnthony Liguori uint32_t version;
68019d6b8fSAnthony Liguori uint32_t flags;
69019d6b8fSAnthony Liguori uint32_t disk_sectors;
70019d6b8fSAnthony Liguori uint32_t granularity;
71019d6b8fSAnthony Liguori uint32_t l1dir_offset;
72019d6b8fSAnthony Liguori uint32_t l1dir_size;
73019d6b8fSAnthony Liguori uint32_t file_sectors;
74019d6b8fSAnthony Liguori uint32_t cylinders;
75019d6b8fSAnthony Liguori uint32_t heads;
76019d6b8fSAnthony Liguori uint32_t sectors_per_track;
775d8caa54SFam Zheng } QEMU_PACKED VMDK3Header;
78019d6b8fSAnthony Liguori
79019d6b8fSAnthony Liguori typedef struct {
80019d6b8fSAnthony Liguori uint32_t version;
81019d6b8fSAnthony Liguori uint32_t flags;
82e98768d4SFam Zheng uint64_t capacity;
83e98768d4SFam Zheng uint64_t granularity;
84e98768d4SFam Zheng uint64_t desc_offset;
85e98768d4SFam Zheng uint64_t desc_size;
86ca8804ceSFam Zheng /* Number of GrainTableEntries per GrainTable */
87ca8804ceSFam Zheng uint32_t num_gtes_per_gt;
88e98768d4SFam Zheng uint64_t rgd_offset;
89e98768d4SFam Zheng uint64_t gd_offset;
90e98768d4SFam Zheng uint64_t grain_offset;
91019d6b8fSAnthony Liguori char filler[1];
92019d6b8fSAnthony Liguori char check_bytes[4];
93432bb170SFam Zheng uint16_t compressAlgorithm;
94541dc0d4SStefan Weil } QEMU_PACKED VMDK4Header;
95019d6b8fSAnthony Liguori
9698eb9733SSam Eiderman typedef struct VMDKSESparseConstHeader {
9798eb9733SSam Eiderman uint64_t magic;
9898eb9733SSam Eiderman uint64_t version;
9998eb9733SSam Eiderman uint64_t capacity;
10098eb9733SSam Eiderman uint64_t grain_size;
10198eb9733SSam Eiderman uint64_t grain_table_size;
10298eb9733SSam Eiderman uint64_t flags;
10398eb9733SSam Eiderman uint64_t reserved1;
10498eb9733SSam Eiderman uint64_t reserved2;
10598eb9733SSam Eiderman uint64_t reserved3;
10698eb9733SSam Eiderman uint64_t reserved4;
10798eb9733SSam Eiderman uint64_t volatile_header_offset;
10898eb9733SSam Eiderman uint64_t volatile_header_size;
10998eb9733SSam Eiderman uint64_t journal_header_offset;
11098eb9733SSam Eiderman uint64_t journal_header_size;
11198eb9733SSam Eiderman uint64_t journal_offset;
11298eb9733SSam Eiderman uint64_t journal_size;
11398eb9733SSam Eiderman uint64_t grain_dir_offset;
11498eb9733SSam Eiderman uint64_t grain_dir_size;
11598eb9733SSam Eiderman uint64_t grain_tables_offset;
11698eb9733SSam Eiderman uint64_t grain_tables_size;
11798eb9733SSam Eiderman uint64_t free_bitmap_offset;
11898eb9733SSam Eiderman uint64_t free_bitmap_size;
11998eb9733SSam Eiderman uint64_t backmap_offset;
12098eb9733SSam Eiderman uint64_t backmap_size;
12198eb9733SSam Eiderman uint64_t grains_offset;
12298eb9733SSam Eiderman uint64_t grains_size;
12398eb9733SSam Eiderman uint8_t pad[304];
12498eb9733SSam Eiderman } QEMU_PACKED VMDKSESparseConstHeader;
12598eb9733SSam Eiderman
12698eb9733SSam Eiderman typedef struct VMDKSESparseVolatileHeader {
12798eb9733SSam Eiderman uint64_t magic;
12898eb9733SSam Eiderman uint64_t free_gt_number;
12998eb9733SSam Eiderman uint64_t next_txn_seq_number;
13098eb9733SSam Eiderman uint64_t replay_journal;
13198eb9733SSam Eiderman uint8_t pad[480];
13298eb9733SSam Eiderman } QEMU_PACKED VMDKSESparseVolatileHeader;
13398eb9733SSam Eiderman
134019d6b8fSAnthony Liguori #define L2_CACHE_SIZE 16
135019d6b8fSAnthony Liguori
136b3976d3cSFam Zheng typedef struct VmdkExtent {
13724bc15d1SKevin Wolf BdrvChild *file;
138b3976d3cSFam Zheng bool flat;
139432bb170SFam Zheng bool compressed;
140432bb170SFam Zheng bool has_marker;
14114ead646SFam Zheng bool has_zero_grain;
14298eb9733SSam Eiderman bool sesparse;
14398eb9733SSam Eiderman uint64_t sesparse_l2_tables_offset;
14498eb9733SSam Eiderman uint64_t sesparse_clusters_offset;
14598eb9733SSam Eiderman int32_t entry_size;
14614ead646SFam Zheng int version;
147b3976d3cSFam Zheng int64_t sectors;
148b3976d3cSFam Zheng int64_t end_sector;
1497fa60fa3SFam Zheng int64_t flat_start_offset;
150019d6b8fSAnthony Liguori int64_t l1_table_offset;
151019d6b8fSAnthony Liguori int64_t l1_backup_table_offset;
15298eb9733SSam Eiderman void *l1_table;
153019d6b8fSAnthony Liguori uint32_t *l1_backup_table;
154019d6b8fSAnthony Liguori unsigned int l1_size;
155019d6b8fSAnthony Liguori uint32_t l1_entry_sectors;
156019d6b8fSAnthony Liguori
157019d6b8fSAnthony Liguori unsigned int l2_size;
15898eb9733SSam Eiderman void *l2_cache;
159019d6b8fSAnthony Liguori uint32_t l2_cache_offsets[L2_CACHE_SIZE];
160019d6b8fSAnthony Liguori uint32_t l2_cache_counts[L2_CACHE_SIZE];
161019d6b8fSAnthony Liguori
162301c7d38SFam Zheng int64_t cluster_sectors;
163c6ac36e1SFam Zheng int64_t next_cluster_sector;
164f4c129a3SFam Zheng char *type;
165b3976d3cSFam Zheng } VmdkExtent;
166b3976d3cSFam Zheng
167b3976d3cSFam Zheng typedef struct BDRVVmdkState {
168848c66e8SPaolo Bonzini CoMutex lock;
169e98768d4SFam Zheng uint64_t desc_offset;
17069b4d86dSFam Zheng bool cid_updated;
171c338b6adSFam Zheng bool cid_checked;
172f4c129a3SFam Zheng uint32_t cid;
173019d6b8fSAnthony Liguori uint32_t parent_cid;
174b3976d3cSFam Zheng int num_extents;
175b3976d3cSFam Zheng /* Extent array with num_extents entries, ascend ordered by address */
176b3976d3cSFam Zheng VmdkExtent *extents;
1772bc3166cSKevin Wolf Error *migration_blocker;
178f4c129a3SFam Zheng char *create_type;
179019d6b8fSAnthony Liguori } BDRVVmdkState;
180019d6b8fSAnthony Liguori
1816d17e287SHanna Reitz typedef struct BDRVVmdkReopenState {
1826d17e287SHanna Reitz bool *extents_using_bs_file;
1836d17e287SHanna Reitz } BDRVVmdkReopenState;
1846d17e287SHanna Reitz
185019d6b8fSAnthony Liguori typedef struct VmdkMetaData {
186019d6b8fSAnthony Liguori unsigned int l1_index;
187019d6b8fSAnthony Liguori unsigned int l2_index;
188019d6b8fSAnthony Liguori unsigned int l2_offset;
1894dc20e64SKevin Wolf bool new_allocation;
190cdeaf1f1SFam Zheng uint32_t *l2_cache_entry;
191019d6b8fSAnthony Liguori } VmdkMetaData;
192019d6b8fSAnthony Liguori
193432bb170SFam Zheng typedef struct VmdkGrainMarker {
194432bb170SFam Zheng uint64_t lba;
195432bb170SFam Zheng uint32_t size;
196880a7817SPhilippe Mathieu-Daudé uint8_t data[];
1975d8caa54SFam Zheng } QEMU_PACKED VmdkGrainMarker;
198432bb170SFam Zheng
19965bd155cSKevin Wolf enum {
20065bd155cSKevin Wolf MARKER_END_OF_STREAM = 0,
20165bd155cSKevin Wolf MARKER_GRAIN_TABLE = 1,
20265bd155cSKevin Wolf MARKER_GRAIN_DIRECTORY = 2,
20365bd155cSKevin Wolf MARKER_FOOTER = 3,
20465bd155cSKevin Wolf };
20565bd155cSKevin Wolf
vmdk_probe(const uint8_t * buf,int buf_size,const char * filename)206019d6b8fSAnthony Liguori static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
207019d6b8fSAnthony Liguori {
208019d6b8fSAnthony Liguori uint32_t magic;
209019d6b8fSAnthony Liguori
210ae261c86SFam Zheng if (buf_size < 4) {
211019d6b8fSAnthony Liguori return 0;
212ae261c86SFam Zheng }
213019d6b8fSAnthony Liguori magic = be32_to_cpu(*(uint32_t *)buf);
214019d6b8fSAnthony Liguori if (magic == VMDK3_MAGIC ||
21501fc99d6SFam Zheng magic == VMDK4_MAGIC) {
216019d6b8fSAnthony Liguori return 100;
21701fc99d6SFam Zheng } else {
21801fc99d6SFam Zheng const char *p = (const char *)buf;
21901fc99d6SFam Zheng const char *end = p + buf_size;
22001fc99d6SFam Zheng while (p < end) {
22101fc99d6SFam Zheng if (*p == '#') {
22201fc99d6SFam Zheng /* skip comment line */
22301fc99d6SFam Zheng while (p < end && *p != '\n') {
22401fc99d6SFam Zheng p++;
22501fc99d6SFam Zheng }
22601fc99d6SFam Zheng p++;
22701fc99d6SFam Zheng continue;
22801fc99d6SFam Zheng }
22901fc99d6SFam Zheng if (*p == ' ') {
23001fc99d6SFam Zheng while (p < end && *p == ' ') {
23101fc99d6SFam Zheng p++;
23201fc99d6SFam Zheng }
23301fc99d6SFam Zheng /* skip '\r' if windows line endings used. */
23401fc99d6SFam Zheng if (p < end && *p == '\r') {
23501fc99d6SFam Zheng p++;
23601fc99d6SFam Zheng }
23701fc99d6SFam Zheng /* only accept blank lines before 'version=' line */
23801fc99d6SFam Zheng if (p == end || *p != '\n') {
239019d6b8fSAnthony Liguori return 0;
240019d6b8fSAnthony Liguori }
24101fc99d6SFam Zheng p++;
24201fc99d6SFam Zheng continue;
24301fc99d6SFam Zheng }
24401fc99d6SFam Zheng if (end - p >= strlen("version=X\n")) {
24501fc99d6SFam Zheng if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
246b69864e5SSam Eiderman strncmp("version=2\n", p, strlen("version=2\n")) == 0 ||
247b69864e5SSam Eiderman strncmp("version=3\n", p, strlen("version=3\n")) == 0) {
24801fc99d6SFam Zheng return 100;
24901fc99d6SFam Zheng }
25001fc99d6SFam Zheng }
25101fc99d6SFam Zheng if (end - p >= strlen("version=X\r\n")) {
25201fc99d6SFam Zheng if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
253b69864e5SSam Eiderman strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0 ||
254b69864e5SSam Eiderman strncmp("version=3\r\n", p, strlen("version=3\r\n")) == 0) {
25501fc99d6SFam Zheng return 100;
25601fc99d6SFam Zheng }
25701fc99d6SFam Zheng }
25801fc99d6SFam Zheng return 0;
25901fc99d6SFam Zheng }
26001fc99d6SFam Zheng return 0;
26101fc99d6SFam Zheng }
26201fc99d6SFam Zheng }
263019d6b8fSAnthony Liguori
264019d6b8fSAnthony Liguori #define SECTOR_SIZE 512
265f66fd6c3SFam Zheng #define DESC_SIZE (20 * SECTOR_SIZE) /* 20 sectors of 512 bytes each */
266f66fd6c3SFam Zheng #define BUF_SIZE 4096
267f66fd6c3SFam Zheng #define HEADER_SIZE 512 /* first sector of 512 bytes */
268019d6b8fSAnthony Liguori
vmdk_free_extents(BlockDriverState * bs)269b3976d3cSFam Zheng static void vmdk_free_extents(BlockDriverState *bs)
270b3976d3cSFam Zheng {
271b3976d3cSFam Zheng int i;
272b3976d3cSFam Zheng BDRVVmdkState *s = bs->opaque;
273b3c0bfb6SFam Zheng VmdkExtent *e;
274b3976d3cSFam Zheng
27532a8aba3SKevin Wolf bdrv_graph_wrlock(NULL);
276b3976d3cSFam Zheng for (i = 0; i < s->num_extents; i++) {
277b3c0bfb6SFam Zheng e = &s->extents[i];
278b3c0bfb6SFam Zheng g_free(e->l1_table);
279b3c0bfb6SFam Zheng g_free(e->l2_cache);
280b3c0bfb6SFam Zheng g_free(e->l1_backup_table);
281f4c129a3SFam Zheng g_free(e->type);
2829a4f4c31SKevin Wolf if (e->file != bs->file) {
28324bc15d1SKevin Wolf bdrv_unref_child(bs, e->file);
284b3c0bfb6SFam Zheng }
285b3976d3cSFam Zheng }
2866bc0bcc8SKevin Wolf bdrv_graph_wrunlock(NULL);
28732a8aba3SKevin Wolf
2887267c094SAnthony Liguori g_free(s->extents);
289b3976d3cSFam Zheng }
290b3976d3cSFam Zheng
vmdk_free_last_extent(BlockDriverState * bs)29186c6b429SFam Zheng static void vmdk_free_last_extent(BlockDriverState *bs)
29286c6b429SFam Zheng {
29386c6b429SFam Zheng BDRVVmdkState *s = bs->opaque;
29486c6b429SFam Zheng
29586c6b429SFam Zheng if (s->num_extents == 0) {
29686c6b429SFam Zheng return;
29786c6b429SFam Zheng }
29886c6b429SFam Zheng s->num_extents--;
2995839e53bSMarkus Armbruster s->extents = g_renew(VmdkExtent, s->extents, s->num_extents);
30086c6b429SFam Zheng }
30186c6b429SFam Zheng
3029877860eSPeter Maydell /* Return -ve errno, or 0 on success and write CID into *pcid. */
3031f051dcbSKevin Wolf static int GRAPH_RDLOCK
vmdk_read_cid(BlockDriverState * bs,int parent,uint32_t * pcid)3041f051dcbSKevin Wolf vmdk_read_cid(BlockDriverState *bs, int parent, uint32_t *pcid)
305019d6b8fSAnthony Liguori {
3065997c210SFam Zheng char *desc;
3079877860eSPeter Maydell uint32_t cid;
308019d6b8fSAnthony Liguori const char *p_name, *cid_str;
309019d6b8fSAnthony Liguori size_t cid_str_size;
310e1da9b24SFam Zheng BDRVVmdkState *s = bs->opaque;
31199f1835dSKevin Wolf int ret;
312019d6b8fSAnthony Liguori
3135997c210SFam Zheng desc = g_malloc0(DESC_SIZE);
31432cc71deSAlberto Faria ret = bdrv_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
31599f1835dSKevin Wolf if (ret < 0) {
3169877860eSPeter Maydell goto out;
317e1da9b24SFam Zheng }
318019d6b8fSAnthony Liguori
319019d6b8fSAnthony Liguori if (parent) {
320019d6b8fSAnthony Liguori cid_str = "parentCID";
321019d6b8fSAnthony Liguori cid_str_size = sizeof("parentCID");
322019d6b8fSAnthony Liguori } else {
323019d6b8fSAnthony Liguori cid_str = "CID";
324019d6b8fSAnthony Liguori cid_str_size = sizeof("CID");
325019d6b8fSAnthony Liguori }
326019d6b8fSAnthony Liguori
32793897b9fSKevin Wolf desc[DESC_SIZE - 1] = '\0';
328ae261c86SFam Zheng p_name = strstr(desc, cid_str);
3299877860eSPeter Maydell if (p_name == NULL) {
3309877860eSPeter Maydell ret = -EINVAL;
3319877860eSPeter Maydell goto out;
332019d6b8fSAnthony Liguori }
3339877860eSPeter Maydell p_name += cid_str_size;
3349877860eSPeter Maydell if (sscanf(p_name, "%" SCNx32, &cid) != 1) {
3359877860eSPeter Maydell ret = -EINVAL;
3369877860eSPeter Maydell goto out;
3379877860eSPeter Maydell }
3389877860eSPeter Maydell *pcid = cid;
3399877860eSPeter Maydell ret = 0;
340019d6b8fSAnthony Liguori
3419877860eSPeter Maydell out:
3425997c210SFam Zheng g_free(desc);
3439877860eSPeter Maydell return ret;
344019d6b8fSAnthony Liguori }
345019d6b8fSAnthony Liguori
34628944f99SPaolo Bonzini static int coroutine_fn GRAPH_RDLOCK
vmdk_write_cid(BlockDriverState * bs,uint32_t cid)34728944f99SPaolo Bonzini vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
348019d6b8fSAnthony Liguori {
349965415ebSFam Zheng char *desc, *tmp_desc;
350019d6b8fSAnthony Liguori char *p_name, *tmp_str;
351e1da9b24SFam Zheng BDRVVmdkState *s = bs->opaque;
352965415ebSFam Zheng int ret = 0;
353019d6b8fSAnthony Liguori
354*9fb7b350SFam Zheng size_t desc_buf_size;
355*9fb7b350SFam Zheng
356*9fb7b350SFam Zheng if (s->desc_offset == 0) {
357*9fb7b350SFam Zheng desc_buf_size = bdrv_getlength(bs->file->bs);
358*9fb7b350SFam Zheng if (desc_buf_size > 16ULL << 20) {
359*9fb7b350SFam Zheng error_report("VMDK description file too big");
360*9fb7b350SFam Zheng return -EFBIG;
361*9fb7b350SFam Zheng }
362*9fb7b350SFam Zheng } else {
363*9fb7b350SFam Zheng desc_buf_size = DESC_SIZE;
364*9fb7b350SFam Zheng }
365*9fb7b350SFam Zheng
366*9fb7b350SFam Zheng desc = g_malloc0(desc_buf_size);
367*9fb7b350SFam Zheng tmp_desc = g_malloc0(desc_buf_size);
368*9fb7b350SFam Zheng ret = bdrv_co_pread(bs->file, s->desc_offset, desc_buf_size, desc, 0);
36999f1835dSKevin Wolf if (ret < 0) {
370965415ebSFam Zheng goto out;
371e1da9b24SFam Zheng }
372019d6b8fSAnthony Liguori
373*9fb7b350SFam Zheng desc[desc_buf_size - 1] = '\0';
374019d6b8fSAnthony Liguori tmp_str = strstr(desc, "parentCID");
37593897b9fSKevin Wolf if (tmp_str == NULL) {
376965415ebSFam Zheng ret = -EINVAL;
377965415ebSFam Zheng goto out;
37893897b9fSKevin Wolf }
37993897b9fSKevin Wolf
380*9fb7b350SFam Zheng pstrcpy(tmp_desc, desc_buf_size, tmp_str);
381ae261c86SFam Zheng p_name = strstr(desc, "CID");
382ae261c86SFam Zheng if (p_name != NULL) {
383019d6b8fSAnthony Liguori p_name += sizeof("CID");
384*9fb7b350SFam Zheng snprintf(p_name, desc_buf_size - (p_name - desc), "%" PRIx32 "\n", cid);
385*9fb7b350SFam Zheng pstrcat(desc, desc_buf_size, tmp_desc);
386019d6b8fSAnthony Liguori }
387019d6b8fSAnthony Liguori
388*9fb7b350SFam Zheng ret = bdrv_co_pwrite_sync(bs->file, s->desc_offset, desc_buf_size, desc, 0);
38999f1835dSKevin Wolf
390965415ebSFam Zheng out:
391965415ebSFam Zheng g_free(desc);
392965415ebSFam Zheng g_free(tmp_desc);
393965415ebSFam Zheng return ret;
394019d6b8fSAnthony Liguori }
395019d6b8fSAnthony Liguori
vmdk_is_cid_valid(BlockDriverState * bs)396004915a9SKevin Wolf static int coroutine_fn GRAPH_RDLOCK vmdk_is_cid_valid(BlockDriverState *bs)
397019d6b8fSAnthony Liguori {
398019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque;
399019d6b8fSAnthony Liguori uint32_t cur_pcid;
400019d6b8fSAnthony Liguori
401760e0063SKevin Wolf if (!s->cid_checked && bs->backing) {
402760e0063SKevin Wolf BlockDriverState *p_bs = bs->backing->bs;
403760e0063SKevin Wolf
404439e89fcSMax Reitz if (strcmp(p_bs->drv->format_name, "vmdk")) {
405439e89fcSMax Reitz /* Backing file is not in vmdk format, so it does not have
406439e89fcSMax Reitz * a CID, which makes the overlay's parent CID invalid */
407439e89fcSMax Reitz return 0;
408439e89fcSMax Reitz }
409439e89fcSMax Reitz
4109877860eSPeter Maydell if (vmdk_read_cid(p_bs, 0, &cur_pcid) != 0) {
4119877860eSPeter Maydell /* read failure: report as not valid */
4129877860eSPeter Maydell return 0;
4139877860eSPeter Maydell }
414ae261c86SFam Zheng if (s->parent_cid != cur_pcid) {
415ae261c86SFam Zheng /* CID not valid */
416019d6b8fSAnthony Liguori return 0;
417019d6b8fSAnthony Liguori }
418ae261c86SFam Zheng }
419c338b6adSFam Zheng s->cid_checked = true;
420ae261c86SFam Zheng /* CID valid */
421019d6b8fSAnthony Liguori return 1;
422019d6b8fSAnthony Liguori }
423019d6b8fSAnthony Liguori
vmdk_reopen_prepare(BDRVReopenState * state,BlockReopenQueue * queue,Error ** errp)4243897575fSJeff Cody static int vmdk_reopen_prepare(BDRVReopenState *state,
4253897575fSJeff Cody BlockReopenQueue *queue, Error **errp)
4263897575fSJeff Cody {
4276d17e287SHanna Reitz BDRVVmdkState *s;
4286d17e287SHanna Reitz BDRVVmdkReopenState *rs;
4296d17e287SHanna Reitz int i;
4306d17e287SHanna Reitz
4311f051dcbSKevin Wolf GLOBAL_STATE_CODE();
4321f051dcbSKevin Wolf GRAPH_RDLOCK_GUARD_MAINLOOP();
4331f051dcbSKevin Wolf
4343897575fSJeff Cody assert(state != NULL);
4353897575fSJeff Cody assert(state->bs != NULL);
4366d17e287SHanna Reitz assert(state->opaque == NULL);
4376d17e287SHanna Reitz
4386d17e287SHanna Reitz s = state->bs->opaque;
4396d17e287SHanna Reitz
4406d17e287SHanna Reitz rs = g_new0(BDRVVmdkReopenState, 1);
4416d17e287SHanna Reitz state->opaque = rs;
4426d17e287SHanna Reitz
4436d17e287SHanna Reitz /*
4446d17e287SHanna Reitz * Check whether there are any extents stored in bs->file; if bs->file
4456d17e287SHanna Reitz * changes, we will need to update their .file pointers to follow suit
4466d17e287SHanna Reitz */
4476d17e287SHanna Reitz rs->extents_using_bs_file = g_new(bool, s->num_extents);
4486d17e287SHanna Reitz for (i = 0; i < s->num_extents; i++) {
4496d17e287SHanna Reitz rs->extents_using_bs_file[i] = s->extents[i].file == state->bs->file;
4506d17e287SHanna Reitz }
4516d17e287SHanna Reitz
45267251a31SKevin Wolf return 0;
4533897575fSJeff Cody }
4543897575fSJeff Cody
vmdk_reopen_clean(BDRVReopenState * state)4556d17e287SHanna Reitz static void vmdk_reopen_clean(BDRVReopenState *state)
4566d17e287SHanna Reitz {
4576d17e287SHanna Reitz BDRVVmdkReopenState *rs = state->opaque;
4586d17e287SHanna Reitz
4596d17e287SHanna Reitz g_free(rs->extents_using_bs_file);
4606d17e287SHanna Reitz g_free(rs);
4616d17e287SHanna Reitz state->opaque = NULL;
4626d17e287SHanna Reitz }
4636d17e287SHanna Reitz
vmdk_reopen_commit(BDRVReopenState * state)4646d17e287SHanna Reitz static void vmdk_reopen_commit(BDRVReopenState *state)
4656d17e287SHanna Reitz {
4666d17e287SHanna Reitz BDRVVmdkState *s = state->bs->opaque;
4676d17e287SHanna Reitz BDRVVmdkReopenState *rs = state->opaque;
4686d17e287SHanna Reitz int i;
4696d17e287SHanna Reitz
4701f051dcbSKevin Wolf GLOBAL_STATE_CODE();
4711f051dcbSKevin Wolf GRAPH_RDLOCK_GUARD_MAINLOOP();
4721f051dcbSKevin Wolf
4736d17e287SHanna Reitz for (i = 0; i < s->num_extents; i++) {
4746d17e287SHanna Reitz if (rs->extents_using_bs_file[i]) {
4756d17e287SHanna Reitz s->extents[i].file = state->bs->file;
4766d17e287SHanna Reitz }
4776d17e287SHanna Reitz }
4786d17e287SHanna Reitz
4796d17e287SHanna Reitz vmdk_reopen_clean(state);
4806d17e287SHanna Reitz }
4816d17e287SHanna Reitz
vmdk_reopen_abort(BDRVReopenState * state)4826d17e287SHanna Reitz static void vmdk_reopen_abort(BDRVReopenState *state)
4836d17e287SHanna Reitz {
4846d17e287SHanna Reitz vmdk_reopen_clean(state);
4856d17e287SHanna Reitz }
4866d17e287SHanna Reitz
vmdk_parent_open(BlockDriverState * bs)4871f051dcbSKevin Wolf static int GRAPH_RDLOCK vmdk_parent_open(BlockDriverState *bs)
488019d6b8fSAnthony Liguori {
489019d6b8fSAnthony Liguori char *p_name;
49071968dbfSFam Zheng char *desc;
491e1da9b24SFam Zheng BDRVVmdkState *s = bs->opaque;
492588b65a3SPaolo Bonzini int ret;
493019d6b8fSAnthony Liguori
49471968dbfSFam Zheng desc = g_malloc0(DESC_SIZE + 1);
49532cc71deSAlberto Faria ret = bdrv_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
496588b65a3SPaolo Bonzini if (ret < 0) {
49771968dbfSFam Zheng goto out;
498e1da9b24SFam Zheng }
499019d6b8fSAnthony Liguori
500ae261c86SFam Zheng p_name = strstr(desc, "parentFileNameHint");
501ae261c86SFam Zheng if (p_name != NULL) {
502019d6b8fSAnthony Liguori char *end_name;
503019d6b8fSAnthony Liguori
504019d6b8fSAnthony Liguori p_name += sizeof("parentFileNameHint") + 1;
505ae261c86SFam Zheng end_name = strchr(p_name, '\"');
506ae261c86SFam Zheng if (end_name == NULL) {
50771968dbfSFam Zheng ret = -EINVAL;
50871968dbfSFam Zheng goto out;
509ae261c86SFam Zheng }
510998c2019SMax Reitz if ((end_name - p_name) > sizeof(bs->auto_backing_file) - 1) {
51171968dbfSFam Zheng ret = -EINVAL;
51271968dbfSFam Zheng goto out;
513ae261c86SFam Zheng }
514019d6b8fSAnthony Liguori
515998c2019SMax Reitz pstrcpy(bs->auto_backing_file, end_name - p_name + 1, p_name);
516998c2019SMax Reitz pstrcpy(bs->backing_file, sizeof(bs->backing_file),
517998c2019SMax Reitz bs->auto_backing_file);
5187502be83SSam Eiderman pstrcpy(bs->backing_format, sizeof(bs->backing_format),
5197502be83SSam Eiderman "vmdk");
520019d6b8fSAnthony Liguori }
521019d6b8fSAnthony Liguori
52271968dbfSFam Zheng out:
52371968dbfSFam Zheng g_free(desc);
52471968dbfSFam Zheng return ret;
525019d6b8fSAnthony Liguori }
526019d6b8fSAnthony Liguori
527b3976d3cSFam Zheng /* Create and append extent to the extent array. Return the added VmdkExtent
528b3976d3cSFam Zheng * address. return NULL if allocation failed. */
vmdk_add_extent(BlockDriverState * bs,BdrvChild * file,bool flat,int64_t sectors,int64_t l1_offset,int64_t l1_backup_offset,uint32_t l1_size,int l2_size,uint64_t cluster_sectors,VmdkExtent ** new_extent,Error ** errp)5298aa1331cSFam Zheng static int vmdk_add_extent(BlockDriverState *bs,
53024bc15d1SKevin Wolf BdrvChild *file, bool flat, int64_t sectors,
531b3976d3cSFam Zheng int64_t l1_offset, int64_t l1_backup_offset,
532b3976d3cSFam Zheng uint32_t l1_size,
5338aa1331cSFam Zheng int l2_size, uint64_t cluster_sectors,
5344823970bSFam Zheng VmdkExtent **new_extent,
5354823970bSFam Zheng Error **errp)
536b3976d3cSFam Zheng {
537b3976d3cSFam Zheng VmdkExtent *extent;
538b3976d3cSFam Zheng BDRVVmdkState *s = bs->opaque;
5390a156f7cSMarkus Armbruster int64_t nb_sectors;
540b3976d3cSFam Zheng
5418aa1331cSFam Zheng if (cluster_sectors > 0x200000) {
5428aa1331cSFam Zheng /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
5434823970bSFam Zheng error_setg(errp, "Invalid granularity, image may be corrupt");
5444823970bSFam Zheng return -EFBIG;
5458aa1331cSFam Zheng }
54659d6ee48SSam Eiderman if (l1_size > 32 * 1024 * 1024) {
547940a2cd5SSam Eiderman /*
548940a2cd5SSam Eiderman * Although with big capacity and small l1_entry_sectors, we can get a
549b0651b8cSFam Zheng * big l1_size, we don't want unbounded value to allocate the table.
55059d6ee48SSam Eiderman * Limit it to 32M, which is enough to store:
55159d6ee48SSam Eiderman * 8TB - for both VMDK3 & VMDK4 with
55259d6ee48SSam Eiderman * minimal cluster size: 512B
55359d6ee48SSam Eiderman * minimal L2 table size: 512 entries
55459d6ee48SSam Eiderman * 8 TB is still more than the maximal value supported for
55559d6ee48SSam Eiderman * VMDK3 & VMDK4 which is 2TB.
55698eb9733SSam Eiderman * 64TB - for "ESXi seSparse Extent"
55798eb9733SSam Eiderman * minimal cluster size: 512B (default is 4KB)
55898eb9733SSam Eiderman * L2 table size: 4096 entries (const).
55998eb9733SSam Eiderman * 64TB is more than the maximal value supported for
56098eb9733SSam Eiderman * seSparse VMDKs (which is slightly less than 64TB)
561940a2cd5SSam Eiderman */
5624823970bSFam Zheng error_setg(errp, "L1 size too big");
563b0651b8cSFam Zheng return -EFBIG;
564b0651b8cSFam Zheng }
5658aa1331cSFam Zheng
56624bc15d1SKevin Wolf nb_sectors = bdrv_nb_sectors(file->bs);
5670a156f7cSMarkus Armbruster if (nb_sectors < 0) {
5680a156f7cSMarkus Armbruster return nb_sectors;
569c6ac36e1SFam Zheng }
570c6ac36e1SFam Zheng
5715839e53bSMarkus Armbruster s->extents = g_renew(VmdkExtent, s->extents, s->num_extents + 1);
572b3976d3cSFam Zheng extent = &s->extents[s->num_extents];
573b3976d3cSFam Zheng s->num_extents++;
574b3976d3cSFam Zheng
575b3976d3cSFam Zheng memset(extent, 0, sizeof(VmdkExtent));
576b3976d3cSFam Zheng extent->file = file;
577b3976d3cSFam Zheng extent->flat = flat;
578b3976d3cSFam Zheng extent->sectors = sectors;
579b3976d3cSFam Zheng extent->l1_table_offset = l1_offset;
580b3976d3cSFam Zheng extent->l1_backup_table_offset = l1_backup_offset;
581b3976d3cSFam Zheng extent->l1_size = l1_size;
582b3976d3cSFam Zheng extent->l1_entry_sectors = l2_size * cluster_sectors;
583b3976d3cSFam Zheng extent->l2_size = l2_size;
584301c7d38SFam Zheng extent->cluster_sectors = flat ? sectors : cluster_sectors;
5850a156f7cSMarkus Armbruster extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors);
58698eb9733SSam Eiderman extent->entry_size = sizeof(uint32_t);
587b3976d3cSFam Zheng
588b3976d3cSFam Zheng if (s->num_extents > 1) {
589b3976d3cSFam Zheng extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
590b3976d3cSFam Zheng } else {
591b3976d3cSFam Zheng extent->end_sector = extent->sectors;
592b3976d3cSFam Zheng }
593b3976d3cSFam Zheng bs->total_sectors = extent->end_sector;
5948aa1331cSFam Zheng if (new_extent) {
5958aa1331cSFam Zheng *new_extent = extent;
5968aa1331cSFam Zheng }
5978aa1331cSFam Zheng return 0;
598b3976d3cSFam Zheng }
599b3976d3cSFam Zheng
600b7cfc7d5SKevin Wolf static int GRAPH_RDLOCK
vmdk_init_tables(BlockDriverState * bs,VmdkExtent * extent,Error ** errp)601b7cfc7d5SKevin Wolf vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, Error **errp)
602019d6b8fSAnthony Liguori {
603b4b3ab14SFam Zheng int ret;
60413c4941cSFam Zheng size_t l1_size;
60513c4941cSFam Zheng int i;
606b4b3ab14SFam Zheng
607b4b3ab14SFam Zheng /* read the L1 table */
60898eb9733SSam Eiderman l1_size = extent->l1_size * extent->entry_size;
609d6e59931SKevin Wolf extent->l1_table = g_try_malloc(l1_size);
610d6e59931SKevin Wolf if (l1_size && extent->l1_table == NULL) {
611d6e59931SKevin Wolf return -ENOMEM;
612d6e59931SKevin Wolf }
613d6e59931SKevin Wolf
61432cc71deSAlberto Faria ret = bdrv_pread(extent->file, extent->l1_table_offset, l1_size,
61532cc71deSAlberto Faria extent->l1_table, 0);
616b4b3ab14SFam Zheng if (ret < 0) {
617f30c66baSMax Reitz bdrv_refresh_filename(extent->file->bs);
6184823970bSFam Zheng error_setg_errno(errp, -ret,
6194823970bSFam Zheng "Could not read l1 table from extent '%s'",
62024bc15d1SKevin Wolf extent->file->bs->filename);
621b4b3ab14SFam Zheng goto fail_l1;
622b4b3ab14SFam Zheng }
623b4b3ab14SFam Zheng for (i = 0; i < extent->l1_size; i++) {
62498eb9733SSam Eiderman if (extent->entry_size == sizeof(uint64_t)) {
62598eb9733SSam Eiderman le64_to_cpus((uint64_t *)extent->l1_table + i);
62698eb9733SSam Eiderman } else {
62798eb9733SSam Eiderman assert(extent->entry_size == sizeof(uint32_t));
62898eb9733SSam Eiderman le32_to_cpus((uint32_t *)extent->l1_table + i);
62998eb9733SSam Eiderman }
630b4b3ab14SFam Zheng }
631b4b3ab14SFam Zheng
632b4b3ab14SFam Zheng if (extent->l1_backup_table_offset) {
63398eb9733SSam Eiderman assert(!extent->sesparse);
634d6e59931SKevin Wolf extent->l1_backup_table = g_try_malloc(l1_size);
635d6e59931SKevin Wolf if (l1_size && extent->l1_backup_table == NULL) {
636d6e59931SKevin Wolf ret = -ENOMEM;
637d6e59931SKevin Wolf goto fail_l1;
638d6e59931SKevin Wolf }
63953fb7844SAlberto Faria ret = bdrv_pread(extent->file, extent->l1_backup_table_offset,
64032cc71deSAlberto Faria l1_size, extent->l1_backup_table, 0);
641b4b3ab14SFam Zheng if (ret < 0) {
642f30c66baSMax Reitz bdrv_refresh_filename(extent->file->bs);
6434823970bSFam Zheng error_setg_errno(errp, -ret,
6444823970bSFam Zheng "Could not read l1 backup table from extent '%s'",
64524bc15d1SKevin Wolf extent->file->bs->filename);
646b4b3ab14SFam Zheng goto fail_l1b;
647b4b3ab14SFam Zheng }
648b4b3ab14SFam Zheng for (i = 0; i < extent->l1_size; i++) {
649b4b3ab14SFam Zheng le32_to_cpus(&extent->l1_backup_table[i]);
650b4b3ab14SFam Zheng }
651b4b3ab14SFam Zheng }
652b4b3ab14SFam Zheng
653b4b3ab14SFam Zheng extent->l2_cache =
65498eb9733SSam Eiderman g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE);
655b4b3ab14SFam Zheng return 0;
656b4b3ab14SFam Zheng fail_l1b:
6577267c094SAnthony Liguori g_free(extent->l1_backup_table);
658b4b3ab14SFam Zheng fail_l1:
6597267c094SAnthony Liguori g_free(extent->l1_table);
660b4b3ab14SFam Zheng return ret;
661b4b3ab14SFam Zheng }
662b4b3ab14SFam Zheng
663b7cfc7d5SKevin Wolf static int GRAPH_RDLOCK
vmdk_open_vmfs_sparse(BlockDriverState * bs,BdrvChild * file,int flags,Error ** errp)664b7cfc7d5SKevin Wolf vmdk_open_vmfs_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
665b7cfc7d5SKevin Wolf Error **errp)
666b4b3ab14SFam Zheng {
667b4b3ab14SFam Zheng int ret;
668019d6b8fSAnthony Liguori uint32_t magic;
669019d6b8fSAnthony Liguori VMDK3Header header;
670cd466702SChristian Borntraeger VmdkExtent *extent = NULL;
671b4b3ab14SFam Zheng
67232cc71deSAlberto Faria ret = bdrv_pread(file, sizeof(magic), sizeof(header), &header, 0);
673b4b3ab14SFam Zheng if (ret < 0) {
674f30c66baSMax Reitz bdrv_refresh_filename(file->bs);
6754823970bSFam Zheng error_setg_errno(errp, -ret,
6764823970bSFam Zheng "Could not read header from file '%s'",
67724bc15d1SKevin Wolf file->bs->filename);
67886c6b429SFam Zheng return ret;
679b3976d3cSFam Zheng }
680f6b61e54SFam Zheng ret = vmdk_add_extent(bs, file, false,
681b3976d3cSFam Zheng le32_to_cpu(header.disk_sectors),
6827237aecdSFam Zheng (int64_t)le32_to_cpu(header.l1dir_offset) << 9,
683f6b61e54SFam Zheng 0,
684f6b61e54SFam Zheng le32_to_cpu(header.l1dir_size),
685f6b61e54SFam Zheng 4096,
6868aa1331cSFam Zheng le32_to_cpu(header.granularity),
6874823970bSFam Zheng &extent,
6884823970bSFam Zheng errp);
6898aa1331cSFam Zheng if (ret < 0) {
6908aa1331cSFam Zheng return ret;
6918aa1331cSFam Zheng }
6924823970bSFam Zheng ret = vmdk_init_tables(bs, extent, errp);
693b4b3ab14SFam Zheng if (ret) {
69486c6b429SFam Zheng /* free extent allocated by vmdk_add_extent */
69586c6b429SFam Zheng vmdk_free_last_extent(bs);
696b4b3ab14SFam Zheng }
697b4b3ab14SFam Zheng return ret;
698b4b3ab14SFam Zheng }
699b4b3ab14SFam Zheng
70098eb9733SSam Eiderman #define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe)
70198eb9733SSam Eiderman #define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe)
70298eb9733SSam Eiderman
70398eb9733SSam Eiderman /* Strict checks - format not officially documented */
check_se_sparse_const_header(VMDKSESparseConstHeader * header,Error ** errp)70498eb9733SSam Eiderman static int check_se_sparse_const_header(VMDKSESparseConstHeader *header,
70598eb9733SSam Eiderman Error **errp)
70698eb9733SSam Eiderman {
70798eb9733SSam Eiderman header->magic = le64_to_cpu(header->magic);
70898eb9733SSam Eiderman header->version = le64_to_cpu(header->version);
70998eb9733SSam Eiderman header->grain_size = le64_to_cpu(header->grain_size);
71098eb9733SSam Eiderman header->grain_table_size = le64_to_cpu(header->grain_table_size);
71198eb9733SSam Eiderman header->flags = le64_to_cpu(header->flags);
71298eb9733SSam Eiderman header->reserved1 = le64_to_cpu(header->reserved1);
71398eb9733SSam Eiderman header->reserved2 = le64_to_cpu(header->reserved2);
71498eb9733SSam Eiderman header->reserved3 = le64_to_cpu(header->reserved3);
71598eb9733SSam Eiderman header->reserved4 = le64_to_cpu(header->reserved4);
71698eb9733SSam Eiderman
71798eb9733SSam Eiderman header->volatile_header_offset =
71898eb9733SSam Eiderman le64_to_cpu(header->volatile_header_offset);
71998eb9733SSam Eiderman header->volatile_header_size = le64_to_cpu(header->volatile_header_size);
72098eb9733SSam Eiderman
72198eb9733SSam Eiderman header->journal_header_offset = le64_to_cpu(header->journal_header_offset);
72298eb9733SSam Eiderman header->journal_header_size = le64_to_cpu(header->journal_header_size);
72398eb9733SSam Eiderman
72498eb9733SSam Eiderman header->journal_offset = le64_to_cpu(header->journal_offset);
72598eb9733SSam Eiderman header->journal_size = le64_to_cpu(header->journal_size);
72698eb9733SSam Eiderman
72798eb9733SSam Eiderman header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset);
72898eb9733SSam Eiderman header->grain_dir_size = le64_to_cpu(header->grain_dir_size);
72998eb9733SSam Eiderman
73098eb9733SSam Eiderman header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset);
73198eb9733SSam Eiderman header->grain_tables_size = le64_to_cpu(header->grain_tables_size);
73298eb9733SSam Eiderman
73398eb9733SSam Eiderman header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset);
73498eb9733SSam Eiderman header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size);
73598eb9733SSam Eiderman
73698eb9733SSam Eiderman header->backmap_offset = le64_to_cpu(header->backmap_offset);
73798eb9733SSam Eiderman header->backmap_size = le64_to_cpu(header->backmap_size);
73898eb9733SSam Eiderman
73998eb9733SSam Eiderman header->grains_offset = le64_to_cpu(header->grains_offset);
74098eb9733SSam Eiderman header->grains_size = le64_to_cpu(header->grains_size);
74198eb9733SSam Eiderman
74298eb9733SSam Eiderman if (header->magic != SESPARSE_CONST_HEADER_MAGIC) {
74398eb9733SSam Eiderman error_setg(errp, "Bad const header magic: 0x%016" PRIx64,
74498eb9733SSam Eiderman header->magic);
74598eb9733SSam Eiderman return -EINVAL;
74698eb9733SSam Eiderman }
74798eb9733SSam Eiderman
74898eb9733SSam Eiderman if (header->version != 0x0000000200000001) {
74998eb9733SSam Eiderman error_setg(errp, "Unsupported version: 0x%016" PRIx64,
75098eb9733SSam Eiderman header->version);
75198eb9733SSam Eiderman return -ENOTSUP;
75298eb9733SSam Eiderman }
75398eb9733SSam Eiderman
75498eb9733SSam Eiderman if (header->grain_size != 8) {
75598eb9733SSam Eiderman error_setg(errp, "Unsupported grain size: %" PRIu64,
75698eb9733SSam Eiderman header->grain_size);
75798eb9733SSam Eiderman return -ENOTSUP;
75898eb9733SSam Eiderman }
75998eb9733SSam Eiderman
76098eb9733SSam Eiderman if (header->grain_table_size != 64) {
76198eb9733SSam Eiderman error_setg(errp, "Unsupported grain table size: %" PRIu64,
76298eb9733SSam Eiderman header->grain_table_size);
76398eb9733SSam Eiderman return -ENOTSUP;
76498eb9733SSam Eiderman }
76598eb9733SSam Eiderman
76698eb9733SSam Eiderman if (header->flags != 0) {
76798eb9733SSam Eiderman error_setg(errp, "Unsupported flags: 0x%016" PRIx64,
76898eb9733SSam Eiderman header->flags);
76998eb9733SSam Eiderman return -ENOTSUP;
77098eb9733SSam Eiderman }
77198eb9733SSam Eiderman
77298eb9733SSam Eiderman if (header->reserved1 != 0 || header->reserved2 != 0 ||
77398eb9733SSam Eiderman header->reserved3 != 0 || header->reserved4 != 0) {
77498eb9733SSam Eiderman error_setg(errp, "Unsupported reserved bits:"
77598eb9733SSam Eiderman " 0x%016" PRIx64 " 0x%016" PRIx64
77698eb9733SSam Eiderman " 0x%016" PRIx64 " 0x%016" PRIx64,
77798eb9733SSam Eiderman header->reserved1, header->reserved2,
77898eb9733SSam Eiderman header->reserved3, header->reserved4);
77998eb9733SSam Eiderman return -ENOTSUP;
78098eb9733SSam Eiderman }
78198eb9733SSam Eiderman
78298eb9733SSam Eiderman /* check that padding is 0 */
78398eb9733SSam Eiderman if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
78498eb9733SSam Eiderman error_setg(errp, "Unsupported non-zero const header padding");
78598eb9733SSam Eiderman return -ENOTSUP;
78698eb9733SSam Eiderman }
78798eb9733SSam Eiderman
78898eb9733SSam Eiderman return 0;
78998eb9733SSam Eiderman }
79098eb9733SSam Eiderman
check_se_sparse_volatile_header(VMDKSESparseVolatileHeader * header,Error ** errp)79198eb9733SSam Eiderman static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header,
79298eb9733SSam Eiderman Error **errp)
79398eb9733SSam Eiderman {
79498eb9733SSam Eiderman header->magic = le64_to_cpu(header->magic);
79598eb9733SSam Eiderman header->free_gt_number = le64_to_cpu(header->free_gt_number);
79698eb9733SSam Eiderman header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number);
79798eb9733SSam Eiderman header->replay_journal = le64_to_cpu(header->replay_journal);
79898eb9733SSam Eiderman
79998eb9733SSam Eiderman if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) {
80098eb9733SSam Eiderman error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64,
80198eb9733SSam Eiderman header->magic);
80298eb9733SSam Eiderman return -EINVAL;
80398eb9733SSam Eiderman }
80498eb9733SSam Eiderman
80598eb9733SSam Eiderman if (header->replay_journal) {
80698eb9733SSam Eiderman error_setg(errp, "Image is dirty, Replaying journal not supported");
80798eb9733SSam Eiderman return -ENOTSUP;
80898eb9733SSam Eiderman }
80998eb9733SSam Eiderman
81098eb9733SSam Eiderman /* check that padding is 0 */
81198eb9733SSam Eiderman if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
81298eb9733SSam Eiderman error_setg(errp, "Unsupported non-zero volatile header padding");
81398eb9733SSam Eiderman return -ENOTSUP;
81498eb9733SSam Eiderman }
81598eb9733SSam Eiderman
81698eb9733SSam Eiderman return 0;
81798eb9733SSam Eiderman }
81898eb9733SSam Eiderman
819b7cfc7d5SKevin Wolf static int GRAPH_RDLOCK
vmdk_open_se_sparse(BlockDriverState * bs,BdrvChild * file,int flags,Error ** errp)820b7cfc7d5SKevin Wolf vmdk_open_se_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
821b7cfc7d5SKevin Wolf Error **errp)
82298eb9733SSam Eiderman {
82398eb9733SSam Eiderman int ret;
82498eb9733SSam Eiderman VMDKSESparseConstHeader const_header;
82598eb9733SSam Eiderman VMDKSESparseVolatileHeader volatile_header;
826cd466702SChristian Borntraeger VmdkExtent *extent = NULL;
82798eb9733SSam Eiderman
82898eb9733SSam Eiderman ret = bdrv_apply_auto_read_only(bs,
82998eb9733SSam Eiderman "No write support for seSparse images available", errp);
83098eb9733SSam Eiderman if (ret < 0) {
83198eb9733SSam Eiderman return ret;
83298eb9733SSam Eiderman }
83398eb9733SSam Eiderman
83498eb9733SSam Eiderman assert(sizeof(const_header) == SECTOR_SIZE);
83598eb9733SSam Eiderman
83632cc71deSAlberto Faria ret = bdrv_pread(file, 0, sizeof(const_header), &const_header, 0);
83798eb9733SSam Eiderman if (ret < 0) {
83898eb9733SSam Eiderman bdrv_refresh_filename(file->bs);
83998eb9733SSam Eiderman error_setg_errno(errp, -ret,
84098eb9733SSam Eiderman "Could not read const header from file '%s'",
84198eb9733SSam Eiderman file->bs->filename);
84298eb9733SSam Eiderman return ret;
84398eb9733SSam Eiderman }
84498eb9733SSam Eiderman
84598eb9733SSam Eiderman /* check const header */
84698eb9733SSam Eiderman ret = check_se_sparse_const_header(&const_header, errp);
84798eb9733SSam Eiderman if (ret < 0) {
84898eb9733SSam Eiderman return ret;
84998eb9733SSam Eiderman }
85098eb9733SSam Eiderman
85198eb9733SSam Eiderman assert(sizeof(volatile_header) == SECTOR_SIZE);
85298eb9733SSam Eiderman
85353fb7844SAlberto Faria ret = bdrv_pread(file, const_header.volatile_header_offset * SECTOR_SIZE,
85432cc71deSAlberto Faria sizeof(volatile_header), &volatile_header, 0);
85598eb9733SSam Eiderman if (ret < 0) {
85698eb9733SSam Eiderman bdrv_refresh_filename(file->bs);
85798eb9733SSam Eiderman error_setg_errno(errp, -ret,
85898eb9733SSam Eiderman "Could not read volatile header from file '%s'",
85998eb9733SSam Eiderman file->bs->filename);
86098eb9733SSam Eiderman return ret;
86198eb9733SSam Eiderman }
86298eb9733SSam Eiderman
86398eb9733SSam Eiderman /* check volatile header */
86498eb9733SSam Eiderman ret = check_se_sparse_volatile_header(&volatile_header, errp);
86598eb9733SSam Eiderman if (ret < 0) {
86698eb9733SSam Eiderman return ret;
86798eb9733SSam Eiderman }
86898eb9733SSam Eiderman
86998eb9733SSam Eiderman ret = vmdk_add_extent(bs, file, false,
87098eb9733SSam Eiderman const_header.capacity,
87198eb9733SSam Eiderman const_header.grain_dir_offset * SECTOR_SIZE,
87298eb9733SSam Eiderman 0,
87398eb9733SSam Eiderman const_header.grain_dir_size *
87498eb9733SSam Eiderman SECTOR_SIZE / sizeof(uint64_t),
87598eb9733SSam Eiderman const_header.grain_table_size *
87698eb9733SSam Eiderman SECTOR_SIZE / sizeof(uint64_t),
87798eb9733SSam Eiderman const_header.grain_size,
87898eb9733SSam Eiderman &extent,
87998eb9733SSam Eiderman errp);
88098eb9733SSam Eiderman if (ret < 0) {
88198eb9733SSam Eiderman return ret;
88298eb9733SSam Eiderman }
88398eb9733SSam Eiderman
88498eb9733SSam Eiderman extent->sesparse = true;
88598eb9733SSam Eiderman extent->sesparse_l2_tables_offset = const_header.grain_tables_offset;
88698eb9733SSam Eiderman extent->sesparse_clusters_offset = const_header.grains_offset;
88798eb9733SSam Eiderman extent->entry_size = sizeof(uint64_t);
88898eb9733SSam Eiderman
88998eb9733SSam Eiderman ret = vmdk_init_tables(bs, extent, errp);
89098eb9733SSam Eiderman if (ret) {
89198eb9733SSam Eiderman /* free extent allocated by vmdk_add_extent */
89298eb9733SSam Eiderman vmdk_free_last_extent(bs);
89398eb9733SSam Eiderman }
89498eb9733SSam Eiderman
89598eb9733SSam Eiderman return ret;
89698eb9733SSam Eiderman }
89798eb9733SSam Eiderman
898d1833ef5SPaolo Bonzini static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
899a6468367SKevin Wolf QDict *options, Error **errp);
900f16f509dSFam Zheng
vmdk_read_desc(BdrvChild * file,uint64_t desc_offset,Error ** errp)901cf2ab8fcSKevin Wolf static char *vmdk_read_desc(BdrvChild *file, uint64_t desc_offset, Error **errp)
902a8842e6dSPaolo Bonzini {
903a8842e6dSPaolo Bonzini int64_t size;
904a8842e6dSPaolo Bonzini char *buf;
905a8842e6dSPaolo Bonzini int ret;
906a8842e6dSPaolo Bonzini
907cf2ab8fcSKevin Wolf size = bdrv_getlength(file->bs);
908a8842e6dSPaolo Bonzini if (size < 0) {
909a8842e6dSPaolo Bonzini error_setg_errno(errp, -size, "Could not access file");
910a8842e6dSPaolo Bonzini return NULL;
911a8842e6dSPaolo Bonzini }
912a8842e6dSPaolo Bonzini
91303c3359dSFam Zheng if (size < 4) {
91403c3359dSFam Zheng /* Both descriptor file and sparse image must be much larger than 4
91503c3359dSFam Zheng * bytes, also callers of vmdk_read_desc want to compare the first 4
91603c3359dSFam Zheng * bytes with VMDK4_MAGIC, let's error out if less is read. */
91703c3359dSFam Zheng error_setg(errp, "File is too small, not a valid image");
91803c3359dSFam Zheng return NULL;
91903c3359dSFam Zheng }
92003c3359dSFam Zheng
92173b7bcadSFam Zheng size = MIN(size, (1 << 20) - 1); /* avoid unbounded allocation */
92273b7bcadSFam Zheng buf = g_malloc(size + 1);
923a8842e6dSPaolo Bonzini
92432cc71deSAlberto Faria ret = bdrv_pread(file, desc_offset, size, buf, 0);
925a8842e6dSPaolo Bonzini if (ret < 0) {
926a8842e6dSPaolo Bonzini error_setg_errno(errp, -ret, "Could not read from file");
927a8842e6dSPaolo Bonzini g_free(buf);
928a8842e6dSPaolo Bonzini return NULL;
929a8842e6dSPaolo Bonzini }
930353a5d84SAlberto Faria buf[size] = 0;
931a8842e6dSPaolo Bonzini
932a8842e6dSPaolo Bonzini return buf;
933a8842e6dSPaolo Bonzini }
934a8842e6dSPaolo Bonzini
935b7cfc7d5SKevin Wolf static int GRAPH_RDLOCK
vmdk_open_vmdk4(BlockDriverState * bs,BdrvChild * file,int flags,QDict * options,Error ** errp)936b7cfc7d5SKevin Wolf vmdk_open_vmdk4(BlockDriverState *bs, BdrvChild *file, int flags,
937b7cfc7d5SKevin Wolf QDict *options, Error **errp)
938b4b3ab14SFam Zheng {
939b4b3ab14SFam Zheng int ret;
940b4b3ab14SFam Zheng uint32_t magic;
941b4b3ab14SFam Zheng uint32_t l1_size, l1_entry_sectors;
942019d6b8fSAnthony Liguori VMDK4Header header;
943cd466702SChristian Borntraeger VmdkExtent *extent = NULL;
944f4c129a3SFam Zheng BDRVVmdkState *s = bs->opaque;
945bb45ded9SFam Zheng int64_t l1_backup_offset = 0;
9463db1d98aSFam Zheng bool compressed;
947b4b3ab14SFam Zheng
94832cc71deSAlberto Faria ret = bdrv_pread(file, sizeof(magic), sizeof(header), &header, 0);
949b4b3ab14SFam Zheng if (ret < 0) {
950f30c66baSMax Reitz bdrv_refresh_filename(file->bs);
9514823970bSFam Zheng error_setg_errno(errp, -ret,
9524823970bSFam Zheng "Could not read header from file '%s'",
95324bc15d1SKevin Wolf file->bs->filename);
95489ac8480SPaolo Bonzini return -EINVAL;
955b3976d3cSFam Zheng }
9565a394b9eSStefan Hajnoczi if (header.capacity == 0) {
957e98768d4SFam Zheng uint64_t desc_offset = le64_to_cpu(header.desc_offset);
9585a394b9eSStefan Hajnoczi if (desc_offset) {
959cf2ab8fcSKevin Wolf char *buf = vmdk_read_desc(file, desc_offset << 9, errp);
960d1833ef5SPaolo Bonzini if (!buf) {
961d1833ef5SPaolo Bonzini return -EINVAL;
962d1833ef5SPaolo Bonzini }
963a6468367SKevin Wolf ret = vmdk_open_desc_file(bs, flags, buf, options, errp);
964d1833ef5SPaolo Bonzini g_free(buf);
965d1833ef5SPaolo Bonzini return ret;
9665a394b9eSStefan Hajnoczi }
967f16f509dSFam Zheng }
96865bd155cSKevin Wolf
969f4c129a3SFam Zheng if (!s->create_type) {
970f4c129a3SFam Zheng s->create_type = g_strdup("monolithicSparse");
971f4c129a3SFam Zheng }
972f4c129a3SFam Zheng
97365bd155cSKevin Wolf if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) {
97465bd155cSKevin Wolf /*
97565bd155cSKevin Wolf * The footer takes precedence over the header, so read it in. The
97665bd155cSKevin Wolf * footer starts at offset -1024 from the end: One sector for the
97765bd155cSKevin Wolf * footer, and another one for the end-of-stream marker.
97865bd155cSKevin Wolf */
97965bd155cSKevin Wolf struct {
98065bd155cSKevin Wolf struct {
98165bd155cSKevin Wolf uint64_t val;
98265bd155cSKevin Wolf uint32_t size;
98365bd155cSKevin Wolf uint32_t type;
98465bd155cSKevin Wolf uint8_t pad[512 - 16];
98565bd155cSKevin Wolf } QEMU_PACKED footer_marker;
98665bd155cSKevin Wolf
98765bd155cSKevin Wolf uint32_t magic;
98865bd155cSKevin Wolf VMDK4Header header;
98965bd155cSKevin Wolf uint8_t pad[512 - 4 - sizeof(VMDK4Header)];
99065bd155cSKevin Wolf
99165bd155cSKevin Wolf struct {
99265bd155cSKevin Wolf uint64_t val;
99365bd155cSKevin Wolf uint32_t size;
99465bd155cSKevin Wolf uint32_t type;
99565bd155cSKevin Wolf uint8_t pad[512 - 16];
99665bd155cSKevin Wolf } QEMU_PACKED eos_marker;
99765bd155cSKevin Wolf } QEMU_PACKED footer;
99865bd155cSKevin Wolf
99953fb7844SAlberto Faria ret = bdrv_pread(file, bs->file->bs->total_sectors * 512 - 1536,
100032cc71deSAlberto Faria sizeof(footer), &footer, 0);
100165bd155cSKevin Wolf if (ret < 0) {
1002d899d2e2SFam Zheng error_setg_errno(errp, -ret, "Failed to read footer");
100365bd155cSKevin Wolf return ret;
100465bd155cSKevin Wolf }
100565bd155cSKevin Wolf
100665bd155cSKevin Wolf /* Some sanity checks for the footer */
100765bd155cSKevin Wolf if (be32_to_cpu(footer.magic) != VMDK4_MAGIC ||
100865bd155cSKevin Wolf le32_to_cpu(footer.footer_marker.size) != 0 ||
100965bd155cSKevin Wolf le32_to_cpu(footer.footer_marker.type) != MARKER_FOOTER ||
101065bd155cSKevin Wolf le64_to_cpu(footer.eos_marker.val) != 0 ||
101165bd155cSKevin Wolf le32_to_cpu(footer.eos_marker.size) != 0 ||
101265bd155cSKevin Wolf le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM)
101365bd155cSKevin Wolf {
1014d899d2e2SFam Zheng error_setg(errp, "Invalid footer");
101565bd155cSKevin Wolf return -EINVAL;
101665bd155cSKevin Wolf }
101765bd155cSKevin Wolf
101865bd155cSKevin Wolf header = footer.header;
101965bd155cSKevin Wolf }
102065bd155cSKevin Wolf
10213db1d98aSFam Zheng compressed =
10223db1d98aSFam Zheng le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
1023509d39aaSFam Zheng if (le32_to_cpu(header.version) > 3) {
1024a55448b3SMax Reitz error_setg(errp, "Unsupported VMDK version %" PRIu32,
102596c51eb5SFam Zheng le32_to_cpu(header.version));
102696c51eb5SFam Zheng return -ENOTSUP;
10273db1d98aSFam Zheng } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR) &&
10283db1d98aSFam Zheng !compressed) {
1029509d39aaSFam Zheng /* VMware KB 2064959 explains that version 3 added support for
1030509d39aaSFam Zheng * persistent changed block tracking (CBT), and backup software can
1031509d39aaSFam Zheng * read it as version=1 if it doesn't care about the changed area
1032509d39aaSFam Zheng * information. So we are safe to enable read only. */
1033509d39aaSFam Zheng error_setg(errp, "VMDK version 3 must be read only");
1034509d39aaSFam Zheng return -EINVAL;
103596c51eb5SFam Zheng }
103696c51eb5SFam Zheng
1037ca8804ceSFam Zheng if (le32_to_cpu(header.num_gtes_per_gt) > 512) {
103889ac8480SPaolo Bonzini error_setg(errp, "L2 table size too big");
1039f8ce0403SFam Zheng return -EINVAL;
1040f8ce0403SFam Zheng }
1041f8ce0403SFam Zheng
1042ca8804ceSFam Zheng l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt)
1043b3976d3cSFam Zheng * le64_to_cpu(header.granularity);
104475d12341SStefan Weil if (l1_entry_sectors == 0) {
1045d899d2e2SFam Zheng error_setg(errp, "L1 entry size is invalid");
104686c6b429SFam Zheng return -EINVAL;
104786c6b429SFam Zheng }
1048b3976d3cSFam Zheng l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
1049b3976d3cSFam Zheng / l1_entry_sectors;
1050bb45ded9SFam Zheng if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
1051bb45ded9SFam Zheng l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
1052bb45ded9SFam Zheng }
105324bc15d1SKevin Wolf if (bdrv_nb_sectors(file->bs) < le64_to_cpu(header.grain_offset)) {
10544ab9dab5SFam Zheng error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
10554ab9dab5SFam Zheng (int64_t)(le64_to_cpu(header.grain_offset)
10564ab9dab5SFam Zheng * BDRV_SECTOR_SIZE));
105734ceed81SFam Zheng return -EINVAL;
105834ceed81SFam Zheng }
105934ceed81SFam Zheng
10608aa1331cSFam Zheng ret = vmdk_add_extent(bs, file, false,
1061b3976d3cSFam Zheng le64_to_cpu(header.capacity),
1062b3976d3cSFam Zheng le64_to_cpu(header.gd_offset) << 9,
1063bb45ded9SFam Zheng l1_backup_offset,
1064b3976d3cSFam Zheng l1_size,
1065ca8804ceSFam Zheng le32_to_cpu(header.num_gtes_per_gt),
10668aa1331cSFam Zheng le64_to_cpu(header.granularity),
10674823970bSFam Zheng &extent,
10684823970bSFam Zheng errp);
10698aa1331cSFam Zheng if (ret < 0) {
10708aa1331cSFam Zheng return ret;
10718aa1331cSFam Zheng }
1072432bb170SFam Zheng extent->compressed =
1073432bb170SFam Zheng le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
1074d8a7b061SFam Zheng if (extent->compressed) {
1075d8a7b061SFam Zheng g_free(s->create_type);
1076d8a7b061SFam Zheng s->create_type = g_strdup("streamOptimized");
1077d8a7b061SFam Zheng }
1078432bb170SFam Zheng extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
107914ead646SFam Zheng extent->version = le32_to_cpu(header.version);
108014ead646SFam Zheng extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN;
10814823970bSFam Zheng ret = vmdk_init_tables(bs, extent, errp);
1082b4b3ab14SFam Zheng if (ret) {
108386c6b429SFam Zheng /* free extent allocated by vmdk_add_extent */
108486c6b429SFam Zheng vmdk_free_last_extent(bs);
1085019d6b8fSAnthony Liguori }
1086b4b3ab14SFam Zheng return ret;
1087b4b3ab14SFam Zheng }
1088b4b3ab14SFam Zheng
10897fa60fa3SFam Zheng /* find an option value out of descriptor file */
vmdk_parse_description(const char * desc,const char * opt_name,char * buf,int buf_size)10907fa60fa3SFam Zheng static int vmdk_parse_description(const char *desc, const char *opt_name,
10917fa60fa3SFam Zheng char *buf, int buf_size)
10927fa60fa3SFam Zheng {
10937fa60fa3SFam Zheng char *opt_pos, *opt_end;
10947fa60fa3SFam Zheng const char *end = desc + strlen(desc);
10957fa60fa3SFam Zheng
10967fa60fa3SFam Zheng opt_pos = strstr(desc, opt_name);
10977fa60fa3SFam Zheng if (!opt_pos) {
109865f74725SFam Zheng return VMDK_ERROR;
10997fa60fa3SFam Zheng }
11007fa60fa3SFam Zheng /* Skip "=\"" following opt_name */
11017fa60fa3SFam Zheng opt_pos += strlen(opt_name) + 2;
11027fa60fa3SFam Zheng if (opt_pos >= end) {
110365f74725SFam Zheng return VMDK_ERROR;
11047fa60fa3SFam Zheng }
11057fa60fa3SFam Zheng opt_end = opt_pos;
11067fa60fa3SFam Zheng while (opt_end < end && *opt_end != '"') {
11077fa60fa3SFam Zheng opt_end++;
11087fa60fa3SFam Zheng }
11097fa60fa3SFam Zheng if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
111065f74725SFam Zheng return VMDK_ERROR;
11117fa60fa3SFam Zheng }
11127fa60fa3SFam Zheng pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
111365f74725SFam Zheng return VMDK_OK;
11147fa60fa3SFam Zheng }
11157fa60fa3SFam Zheng
111686c6b429SFam Zheng /* Open an extent file and append to bs array */
1117b7cfc7d5SKevin Wolf static int GRAPH_RDLOCK
vmdk_open_sparse(BlockDriverState * bs,BdrvChild * file,int flags,char * buf,QDict * options,Error ** errp)1118b7cfc7d5SKevin Wolf vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
1119a6468367SKevin Wolf char *buf, QDict *options, Error **errp)
112086c6b429SFam Zheng {
112186c6b429SFam Zheng uint32_t magic;
112286c6b429SFam Zheng
1123d1833ef5SPaolo Bonzini magic = ldl_be_p(buf);
112486c6b429SFam Zheng switch (magic) {
112586c6b429SFam Zheng case VMDK3_MAGIC:
11264823970bSFam Zheng return vmdk_open_vmfs_sparse(bs, file, flags, errp);
112786c6b429SFam Zheng case VMDK4_MAGIC:
1128a6468367SKevin Wolf return vmdk_open_vmdk4(bs, file, flags, options, errp);
112986c6b429SFam Zheng default:
113076abe407SPaolo Bonzini error_setg(errp, "Image not in VMDK format");
113176abe407SPaolo Bonzini return -EINVAL;
113286c6b429SFam Zheng }
113386c6b429SFam Zheng }
113486c6b429SFam Zheng
next_line(const char * s)1135e4937694SMarkus Armbruster static const char *next_line(const char *s)
1136e4937694SMarkus Armbruster {
1137e4937694SMarkus Armbruster while (*s) {
1138e4937694SMarkus Armbruster if (*s == '\n') {
1139e4937694SMarkus Armbruster return s + 1;
1140e4937694SMarkus Armbruster }
1141e4937694SMarkus Armbruster s++;
1142e4937694SMarkus Armbruster }
1143e4937694SMarkus Armbruster return s;
1144e4937694SMarkus Armbruster }
1145e4937694SMarkus Armbruster
1146b7cfc7d5SKevin Wolf static int GRAPH_RDLOCK
vmdk_parse_extents(const char * desc,BlockDriverState * bs,QDict * options,Error ** errp)1147b7cfc7d5SKevin Wolf vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
1148b7cfc7d5SKevin Wolf Error **errp)
11497fa60fa3SFam Zheng {
11507fa60fa3SFam Zheng int ret;
1151395a22faSJeff Cody int matches;
11527fa60fa3SFam Zheng char access[11];
11537fa60fa3SFam Zheng char type[11];
11547fa60fa3SFam Zheng char fname[512];
1155d28d737fSMarkus Armbruster const char *p, *np;
11567fa60fa3SFam Zheng int64_t sectors = 0;
11577fa60fa3SFam Zheng int64_t flat_offset;
1158cdc0dd25SMax Reitz char *desc_file_dir = NULL;
1159fe206562SJeff Cody char *extent_path;
116024bc15d1SKevin Wolf BdrvChild *extent_file;
11618b1869daSMax Reitz BdrvChildRole extent_role;
1162f4c129a3SFam Zheng BDRVVmdkState *s = bs->opaque;
1163cd466702SChristian Borntraeger VmdkExtent *extent = NULL;
1164a6468367SKevin Wolf char extent_opt_prefix[32];
116524bc15d1SKevin Wolf Error *local_err = NULL;
11667fa60fa3SFam Zheng
1167b7cfc7d5SKevin Wolf GLOBAL_STATE_CODE();
1168b7cfc7d5SKevin Wolf
1169e4937694SMarkus Armbruster for (p = desc; *p; p = next_line(p)) {
11708a3e0bc3SFam Zheng /* parse extent line in one of below formats:
11718a3e0bc3SFam Zheng *
11727fa60fa3SFam Zheng * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
11737fa60fa3SFam Zheng * RW [size in sectors] SPARSE "file-name.vmdk"
11748a3e0bc3SFam Zheng * RW [size in sectors] VMFS "file-name.vmdk"
11758a3e0bc3SFam Zheng * RW [size in sectors] VMFSSPARSE "file-name.vmdk"
117698eb9733SSam Eiderman * RW [size in sectors] SESPARSE "file-name.vmdk"
11777fa60fa3SFam Zheng */
11787fa60fa3SFam Zheng flat_offset = -1;
1179395a22faSJeff Cody matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
11807fa60fa3SFam Zheng access, §ors, type, fname, &flat_offset);
1181395a22faSJeff Cody if (matches < 4 || strcmp(access, "RW")) {
1182e4937694SMarkus Armbruster continue;
11837fa60fa3SFam Zheng } else if (!strcmp(type, "FLAT")) {
1184395a22faSJeff Cody if (matches != 5 || flat_offset < 0) {
1185d28d737fSMarkus Armbruster goto invalid;
11867fa60fa3SFam Zheng }
1187dbbcaa8dSFam Zheng } else if (!strcmp(type, "VMFS")) {
1188395a22faSJeff Cody if (matches == 4) {
1189dbbcaa8dSFam Zheng flat_offset = 0;
1190b47053bdSFam Zheng } else {
1191d28d737fSMarkus Armbruster goto invalid;
1192b47053bdSFam Zheng }
1193395a22faSJeff Cody } else if (matches != 4) {
1194d28d737fSMarkus Armbruster goto invalid;
11957fa60fa3SFam Zheng }
11967fa60fa3SFam Zheng
11977fa60fa3SFam Zheng if (sectors <= 0 ||
1198daac8fdcSFam Zheng (strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
119998eb9733SSam Eiderman strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") &&
120098eb9733SSam Eiderman strcmp(type, "SESPARSE")) ||
12017fa60fa3SFam Zheng (strcmp(access, "RW"))) {
1202e4937694SMarkus Armbruster continue;
12037fa60fa3SFam Zheng }
12047fa60fa3SFam Zheng
1205cdc0dd25SMax Reitz if (path_is_absolute(fname)) {
1206cdc0dd25SMax Reitz extent_path = g_strdup(fname);
1207cdc0dd25SMax Reitz } else {
1208cdc0dd25SMax Reitz if (!desc_file_dir) {
1209cdc0dd25SMax Reitz desc_file_dir = bdrv_dirname(bs->file->bs, errp);
1210cdc0dd25SMax Reitz if (!desc_file_dir) {
1211f30c66baSMax Reitz bdrv_refresh_filename(bs->file->bs);
1212cdc0dd25SMax Reitz error_prepend(errp, "Cannot use relative paths with VMDK "
1213cdc0dd25SMax Reitz "descriptor file '%s': ",
1214cdc0dd25SMax Reitz bs->file->bs->filename);
1215cdc0dd25SMax Reitz ret = -EINVAL;
1216cdc0dd25SMax Reitz goto out;
1217cdc0dd25SMax Reitz }
12185c98415bSMax Reitz }
12195c98415bSMax Reitz
1220cdc0dd25SMax Reitz extent_path = g_strconcat(desc_file_dir, fname, NULL);
1221cdc0dd25SMax Reitz }
1222a6468367SKevin Wolf
1223a6468367SKevin Wolf ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents);
1224a6468367SKevin Wolf assert(ret < 32);
1225a6468367SKevin Wolf
12268b1869daSMax Reitz extent_role = BDRV_CHILD_DATA;
12278b1869daSMax Reitz if (strcmp(type, "FLAT") != 0 && strcmp(type, "VMFS") != 0) {
12288b1869daSMax Reitz /* non-flat extents have metadata */
12298b1869daSMax Reitz extent_role |= BDRV_CHILD_METADATA;
12308b1869daSMax Reitz }
12318b1869daSMax Reitz
123224bc15d1SKevin Wolf extent_file = bdrv_open_child(extent_path, options, extent_opt_prefix,
12338b1869daSMax Reitz bs, &child_of_bds, extent_role, false,
12348b1869daSMax Reitz &local_err);
1235fe206562SJeff Cody g_free(extent_path);
1236a8d99c0eSDmitry Frolov if (!extent_file) {
123724bc15d1SKevin Wolf error_propagate(errp, local_err);
1238cdc0dd25SMax Reitz ret = -EINVAL;
1239cdc0dd25SMax Reitz goto out;
12407fa60fa3SFam Zheng }
124186c6b429SFam Zheng
124286c6b429SFam Zheng /* save to extents array */
124304d542c8SPaolo Bonzini if (!strcmp(type, "FLAT") || !strcmp(type, "VMFS")) {
124486c6b429SFam Zheng /* FLAT extent */
124586c6b429SFam Zheng
12468aa1331cSFam Zheng ret = vmdk_add_extent(bs, extent_file, true, sectors,
12474823970bSFam Zheng 0, 0, 0, 0, 0, &extent, errp);
12488aa1331cSFam Zheng if (ret < 0) {
1249b7cfc7d5SKevin Wolf bdrv_graph_rdunlock_main_loop();
125032a8aba3SKevin Wolf bdrv_graph_wrlock(NULL);
125124bc15d1SKevin Wolf bdrv_unref_child(bs, extent_file);
12526bc0bcc8SKevin Wolf bdrv_graph_wrunlock(NULL);
1253b7cfc7d5SKevin Wolf bdrv_graph_rdlock_main_loop();
1254cdc0dd25SMax Reitz goto out;
12558aa1331cSFam Zheng }
1256f16f509dSFam Zheng extent->flat_start_offset = flat_offset << 9;
1257daac8fdcSFam Zheng } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
1258daac8fdcSFam Zheng /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
1259cf2ab8fcSKevin Wolf char *buf = vmdk_read_desc(extent_file, 0, errp);
1260d1833ef5SPaolo Bonzini if (!buf) {
1261d1833ef5SPaolo Bonzini ret = -EINVAL;
1262d1833ef5SPaolo Bonzini } else {
1263a6468367SKevin Wolf ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf,
1264a6468367SKevin Wolf options, errp);
1265d1833ef5SPaolo Bonzini }
1266d1833ef5SPaolo Bonzini g_free(buf);
1267b6b1d31fSStefan Hajnoczi if (ret) {
1268b7cfc7d5SKevin Wolf bdrv_graph_rdunlock_main_loop();
126932a8aba3SKevin Wolf bdrv_graph_wrlock(NULL);
127024bc15d1SKevin Wolf bdrv_unref_child(bs, extent_file);
12716bc0bcc8SKevin Wolf bdrv_graph_wrunlock(NULL);
1272b7cfc7d5SKevin Wolf bdrv_graph_rdlock_main_loop();
1273cdc0dd25SMax Reitz goto out;
127486c6b429SFam Zheng }
1275f4c129a3SFam Zheng extent = &s->extents[s->num_extents - 1];
127698eb9733SSam Eiderman } else if (!strcmp(type, "SESPARSE")) {
127798eb9733SSam Eiderman ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
127898eb9733SSam Eiderman if (ret) {
1279b7cfc7d5SKevin Wolf bdrv_graph_rdunlock_main_loop();
128032a8aba3SKevin Wolf bdrv_graph_wrlock(NULL);
128198eb9733SSam Eiderman bdrv_unref_child(bs, extent_file);
12826bc0bcc8SKevin Wolf bdrv_graph_wrunlock(NULL);
1283b7cfc7d5SKevin Wolf bdrv_graph_rdlock_main_loop();
1284cdc0dd25SMax Reitz goto out;
128598eb9733SSam Eiderman }
128698eb9733SSam Eiderman extent = &s->extents[s->num_extents - 1];
12877fa60fa3SFam Zheng } else {
12884823970bSFam Zheng error_setg(errp, "Unsupported extent type '%s'", type);
1289b7cfc7d5SKevin Wolf bdrv_graph_rdunlock_main_loop();
129032a8aba3SKevin Wolf bdrv_graph_wrlock(NULL);
129124bc15d1SKevin Wolf bdrv_unref_child(bs, extent_file);
12926bc0bcc8SKevin Wolf bdrv_graph_wrunlock(NULL);
1293b7cfc7d5SKevin Wolf bdrv_graph_rdlock_main_loop();
1294cdc0dd25SMax Reitz ret = -ENOTSUP;
1295cdc0dd25SMax Reitz goto out;
12967fa60fa3SFam Zheng }
1297f4c129a3SFam Zheng extent->type = g_strdup(type);
1298899f1ae2SFam Zheng }
1299cdc0dd25SMax Reitz
1300cdc0dd25SMax Reitz ret = 0;
1301cdc0dd25SMax Reitz goto out;
1302d28d737fSMarkus Armbruster
1303d28d737fSMarkus Armbruster invalid:
1304d28d737fSMarkus Armbruster np = next_line(p);
1305d28d737fSMarkus Armbruster assert(np != p);
1306d28d737fSMarkus Armbruster if (np[-1] == '\n') {
1307d28d737fSMarkus Armbruster np--;
1308d28d737fSMarkus Armbruster }
1309d28d737fSMarkus Armbruster error_setg(errp, "Invalid extent line: %.*s", (int)(np - p), p);
1310cdc0dd25SMax Reitz ret = -EINVAL;
1311cdc0dd25SMax Reitz
1312cdc0dd25SMax Reitz out:
1313cdc0dd25SMax Reitz g_free(desc_file_dir);
1314cdc0dd25SMax Reitz return ret;
13157fa60fa3SFam Zheng }
13167fa60fa3SFam Zheng
1317b7cfc7d5SKevin Wolf static int GRAPH_RDLOCK
vmdk_open_desc_file(BlockDriverState * bs,int flags,char * buf,QDict * options,Error ** errp)1318b7cfc7d5SKevin Wolf vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, QDict *options,
1319b7cfc7d5SKevin Wolf Error **errp)
13207fa60fa3SFam Zheng {
13217fa60fa3SFam Zheng int ret;
13227fa60fa3SFam Zheng char ct[128];
13237fa60fa3SFam Zheng BDRVVmdkState *s = bs->opaque;
13247fa60fa3SFam Zheng
13257fa60fa3SFam Zheng if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
132676abe407SPaolo Bonzini error_setg(errp, "invalid VMDK image descriptor");
132776abe407SPaolo Bonzini ret = -EINVAL;
13280bed087dSEvgeny Budilovsky goto exit;
13297fa60fa3SFam Zheng }
13306398de51SFam Zheng if (strcmp(ct, "monolithicFlat") &&
133104d542c8SPaolo Bonzini strcmp(ct, "vmfs") &&
1332daac8fdcSFam Zheng strcmp(ct, "vmfsSparse") &&
133398eb9733SSam Eiderman strcmp(ct, "seSparse") &&
133486c6b429SFam Zheng strcmp(ct, "twoGbMaxExtentSparse") &&
13356398de51SFam Zheng strcmp(ct, "twoGbMaxExtentFlat")) {
13364823970bSFam Zheng error_setg(errp, "Unsupported image type '%s'", ct);
13370bed087dSEvgeny Budilovsky ret = -ENOTSUP;
13380bed087dSEvgeny Budilovsky goto exit;
13397fa60fa3SFam Zheng }
1340f4c129a3SFam Zheng s->create_type = g_strdup(ct);
13417fa60fa3SFam Zheng s->desc_offset = 0;
1342cdc0dd25SMax Reitz ret = vmdk_parse_extents(buf, bs, options, errp);
13430bed087dSEvgeny Budilovsky exit:
13440bed087dSEvgeny Budilovsky return ret;
13457fa60fa3SFam Zheng }
13467fa60fa3SFam Zheng
vmdk_open(BlockDriverState * bs,QDict * options,int flags,Error ** errp)1347015a1036SMax Reitz static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
1348015a1036SMax Reitz Error **errp)
1349b4b3ab14SFam Zheng {
13509aeecbbcSFam Zheng char *buf;
135186c6b429SFam Zheng int ret;
135286c6b429SFam Zheng BDRVVmdkState *s = bs->opaque;
135337f09e5eSPaolo Bonzini uint32_t magic;
1354b4b3ab14SFam Zheng
13553804e3cfSKevin Wolf GRAPH_RDLOCK_GUARD_MAINLOOP();
13563804e3cfSKevin Wolf
135783930780SVladimir Sementsov-Ogievskiy ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
135883930780SVladimir Sementsov-Ogievskiy if (ret < 0) {
135983930780SVladimir Sementsov-Ogievskiy return ret;
13604e4bf5c4SKevin Wolf }
13614e4bf5c4SKevin Wolf
1362cf2ab8fcSKevin Wolf buf = vmdk_read_desc(bs->file, 0, errp);
1363d1833ef5SPaolo Bonzini if (!buf) {
1364d1833ef5SPaolo Bonzini return -EINVAL;
1365d1833ef5SPaolo Bonzini }
1366d1833ef5SPaolo Bonzini
136737f09e5eSPaolo Bonzini magic = ldl_be_p(buf);
136837f09e5eSPaolo Bonzini switch (magic) {
136937f09e5eSPaolo Bonzini case VMDK3_MAGIC:
137037f09e5eSPaolo Bonzini case VMDK4_MAGIC:
13719a4f4c31SKevin Wolf ret = vmdk_open_sparse(bs, bs->file, flags, buf, options,
137224bc15d1SKevin Wolf errp);
137386c6b429SFam Zheng s->desc_offset = 0x200;
137437f09e5eSPaolo Bonzini break;
137537f09e5eSPaolo Bonzini default:
13768b1869daSMax Reitz /* No data in the descriptor file */
13778b1869daSMax Reitz bs->file->role &= ~BDRV_CHILD_DATA;
13788b1869daSMax Reitz
13798b1869daSMax Reitz /* Must succeed because we have given up permissions if anything */
13808b1869daSMax Reitz bdrv_child_refresh_perms(bs, bs->file, &error_abort);
13818b1869daSMax Reitz
1382a6468367SKevin Wolf ret = vmdk_open_desc_file(bs, flags, buf, options, errp);
138337f09e5eSPaolo Bonzini break;
138437f09e5eSPaolo Bonzini }
1385bae0a0ccSPaolo Bonzini if (ret) {
1386bae0a0ccSPaolo Bonzini goto fail;
1387bae0a0ccSPaolo Bonzini }
138837f09e5eSPaolo Bonzini
138986c6b429SFam Zheng /* try to open parent images, if exist */
139086c6b429SFam Zheng ret = vmdk_parent_open(bs);
139186c6b429SFam Zheng if (ret) {
1392bae0a0ccSPaolo Bonzini goto fail;
1393b4b3ab14SFam Zheng }
13949877860eSPeter Maydell ret = vmdk_read_cid(bs, 0, &s->cid);
13959877860eSPeter Maydell if (ret) {
13969877860eSPeter Maydell goto fail;
13979877860eSPeter Maydell }
13989877860eSPeter Maydell ret = vmdk_read_cid(bs, 1, &s->parent_cid);
13999877860eSPeter Maydell if (ret) {
14009877860eSPeter Maydell goto fail;
14019877860eSPeter Maydell }
1402848c66e8SPaolo Bonzini qemu_co_mutex_init(&s->lock);
14032bc3166cSKevin Wolf
14042bc3166cSKevin Wolf /* Disable migration when VMDK images are used */
140581e5f78aSAlberto Garcia error_setg(&s->migration_blocker, "The vmdk format used by node '%s' "
140681e5f78aSAlberto Garcia "does not support live migration",
140781e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs));
1408e0ee3a8fSSteve Sistare ret = migrate_add_blocker_normal(&s->migration_blocker, errp);
1409386f6c07SMarkus Armbruster if (ret < 0) {
1410fe44dc91SAshijeet Acharya goto fail;
1411fe44dc91SAshijeet Acharya }
1412fe44dc91SAshijeet Acharya
1413d1833ef5SPaolo Bonzini g_free(buf);
14142bc3166cSKevin Wolf return 0;
1415bae0a0ccSPaolo Bonzini
1416bae0a0ccSPaolo Bonzini fail:
1417d1833ef5SPaolo Bonzini g_free(buf);
1418f4c129a3SFam Zheng g_free(s->create_type);
1419f4c129a3SFam Zheng s->create_type = NULL;
1420bae0a0ccSPaolo Bonzini vmdk_free_extents(bs);
1421bae0a0ccSPaolo Bonzini return ret;
1422019d6b8fSAnthony Liguori }
1423019d6b8fSAnthony Liguori
1424d34682cdSKevin Wolf
vmdk_refresh_limits(BlockDriverState * bs,Error ** errp)14253baca891SKevin Wolf static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
1426d34682cdSKevin Wolf {
1427d34682cdSKevin Wolf BDRVVmdkState *s = bs->opaque;
1428d34682cdSKevin Wolf int i;
1429d34682cdSKevin Wolf
1430d34682cdSKevin Wolf for (i = 0; i < s->num_extents; i++) {
1431d34682cdSKevin Wolf if (!s->extents[i].flat) {
1432cf081fcaSEric Blake bs->bl.pwrite_zeroes_alignment =
1433cf081fcaSEric Blake MAX(bs->bl.pwrite_zeroes_alignment,
1434cf081fcaSEric Blake s->extents[i].cluster_sectors << BDRV_SECTOR_BITS);
1435d34682cdSKevin Wolf }
1436d34682cdSKevin Wolf }
1437d34682cdSKevin Wolf }
1438d34682cdSKevin Wolf
1439c6ac36e1SFam Zheng /**
1440c6ac36e1SFam Zheng * get_whole_cluster
1441c6ac36e1SFam Zheng *
1442c6ac36e1SFam Zheng * Copy backing file's cluster that covers @sector_num, otherwise write zero,
14434823cde5SKevin Wolf * to the cluster at @cluster_sector_num. If @zeroed is true, we're overwriting
14444823cde5SKevin Wolf * a zeroed cluster in the current layer and must not copy data from the
14454823cde5SKevin Wolf * backing file.
1446c6ac36e1SFam Zheng *
1447c6ac36e1SFam Zheng * If @skip_start_sector < @skip_end_sector, the relative range
1448c6ac36e1SFam Zheng * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
1449c6ac36e1SFam Zheng * it for call to write user data in the request.
1450c6ac36e1SFam Zheng */
1451b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
get_whole_cluster(BlockDriverState * bs,VmdkExtent * extent,uint64_t cluster_offset,uint64_t offset,uint64_t skip_start_bytes,uint64_t skip_end_bytes,bool zeroed)1452b9b10c35SKevin Wolf get_whole_cluster(BlockDriverState *bs, VmdkExtent *extent,
1453b9b10c35SKevin Wolf uint64_t cluster_offset, uint64_t offset,
1454b9b10c35SKevin Wolf uint64_t skip_start_bytes, uint64_t skip_end_bytes,
14554823cde5SKevin Wolf bool zeroed)
1456019d6b8fSAnthony Liguori {
1457bf81507dSFam Zheng int ret = VMDK_OK;
1458c6ac36e1SFam Zheng int64_t cluster_bytes;
1459c6ac36e1SFam Zheng uint8_t *whole_grain;
14604823cde5SKevin Wolf bool copy_from_backing;
1461019d6b8fSAnthony Liguori
1462c6ac36e1SFam Zheng /* For COW, align request sector_num to cluster start */
1463c6ac36e1SFam Zheng cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
146437b1d7d8SKevin Wolf offset = QEMU_ALIGN_DOWN(offset, cluster_bytes);
1465c6ac36e1SFam Zheng whole_grain = qemu_blockalign(bs, cluster_bytes);
14664823cde5SKevin Wolf copy_from_backing = bs->backing && !zeroed;
1467c6ac36e1SFam Zheng
14684823cde5SKevin Wolf if (!copy_from_backing) {
146937b1d7d8SKevin Wolf memset(whole_grain, 0, skip_start_bytes);
147037b1d7d8SKevin Wolf memset(whole_grain + skip_end_bytes, 0, cluster_bytes - skip_end_bytes);
1471c6ac36e1SFam Zheng }
1472c6ac36e1SFam Zheng
147337b1d7d8SKevin Wolf assert(skip_end_bytes <= cluster_bytes);
14740e69c543SFam Zheng /* we will be here if it's first write on non-exist grain(cluster).
14750e69c543SFam Zheng * try to read from parent image, if exist */
1476760e0063SKevin Wolf if (bs->backing && !vmdk_is_cid_valid(bs)) {
1477c6ac36e1SFam Zheng ret = VMDK_ERROR;
1478c6ac36e1SFam Zheng goto exit;
1479c6ac36e1SFam Zheng }
1480c6ac36e1SFam Zheng
1481c6ac36e1SFam Zheng /* Read backing data before skip range */
148237b1d7d8SKevin Wolf if (skip_start_bytes > 0) {
14834823cde5SKevin Wolf if (copy_from_backing) {
148423c4b2a8SMax Reitz /* qcow2 emits this on bs->file instead of bs->backing */
148517362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_READ);
1486a5c4e5beSAlberto Faria ret = bdrv_co_pread(bs->backing, offset, skip_start_bytes,
148732cc71deSAlberto Faria whole_grain, 0);
1488c336500dSKevin Wolf if (ret < 0) {
1489bf81507dSFam Zheng ret = VMDK_ERROR;
1490bf81507dSFam Zheng goto exit;
1491019d6b8fSAnthony Liguori }
1492019d6b8fSAnthony Liguori }
149317362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_WRITE);
1494a5c4e5beSAlberto Faria ret = bdrv_co_pwrite(extent->file, cluster_offset, skip_start_bytes,
149532cc71deSAlberto Faria whole_grain, 0);
1496c6ac36e1SFam Zheng if (ret < 0) {
1497c6ac36e1SFam Zheng ret = VMDK_ERROR;
1498c6ac36e1SFam Zheng goto exit;
1499c6ac36e1SFam Zheng }
1500c6ac36e1SFam Zheng }
1501c6ac36e1SFam Zheng /* Read backing data after skip range */
150237b1d7d8SKevin Wolf if (skip_end_bytes < cluster_bytes) {
15034823cde5SKevin Wolf if (copy_from_backing) {
150423c4b2a8SMax Reitz /* qcow2 emits this on bs->file instead of bs->backing */
150517362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_READ);
1506a5c4e5beSAlberto Faria ret = bdrv_co_pread(bs->backing, offset + skip_end_bytes,
150732cc71deSAlberto Faria cluster_bytes - skip_end_bytes,
150832cc71deSAlberto Faria whole_grain + skip_end_bytes, 0);
1509c6ac36e1SFam Zheng if (ret < 0) {
1510c6ac36e1SFam Zheng ret = VMDK_ERROR;
1511c6ac36e1SFam Zheng goto exit;
1512c6ac36e1SFam Zheng }
1513c6ac36e1SFam Zheng }
151417362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_WRITE);
1515a5c4e5beSAlberto Faria ret = bdrv_co_pwrite(extent->file, cluster_offset + skip_end_bytes,
151632cc71deSAlberto Faria cluster_bytes - skip_end_bytes,
151732cc71deSAlberto Faria whole_grain + skip_end_bytes, 0);
1518c6ac36e1SFam Zheng if (ret < 0) {
1519c6ac36e1SFam Zheng ret = VMDK_ERROR;
1520c6ac36e1SFam Zheng goto exit;
1521c6ac36e1SFam Zheng }
1522c6ac36e1SFam Zheng }
1523c6ac36e1SFam Zheng
152437b1d7d8SKevin Wolf ret = VMDK_OK;
1525bf81507dSFam Zheng exit:
1526bf81507dSFam Zheng qemu_vfree(whole_grain);
1527bf81507dSFam Zheng return ret;
1528019d6b8fSAnthony Liguori }
1529019d6b8fSAnthony Liguori
153088095349SEmanuele Giuseppe Esposito static int coroutine_fn GRAPH_RDLOCK
vmdk_L2update(VmdkExtent * extent,VmdkMetaData * m_data,uint32_t offset)153188095349SEmanuele Giuseppe Esposito vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, uint32_t offset)
1532019d6b8fSAnthony Liguori {
1533c6ac36e1SFam Zheng offset = cpu_to_le32(offset);
1534019d6b8fSAnthony Liguori /* update L2 table */
153517362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_L2_UPDATE);
1536a5c4e5beSAlberto Faria if (bdrv_co_pwrite(extent->file,
1537b3976d3cSFam Zheng ((int64_t)m_data->l2_offset * 512)
1538c6ac36e1SFam Zheng + (m_data->l2_index * sizeof(offset)),
153932cc71deSAlberto Faria sizeof(offset), &offset, 0) < 0) {
154065f74725SFam Zheng return VMDK_ERROR;
1541b3976d3cSFam Zheng }
1542019d6b8fSAnthony Liguori /* update backup L2 table */
1543b3976d3cSFam Zheng if (extent->l1_backup_table_offset != 0) {
1544b3976d3cSFam Zheng m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
1545a5c4e5beSAlberto Faria if (bdrv_co_pwrite(extent->file,
1546b3976d3cSFam Zheng ((int64_t)m_data->l2_offset * 512)
1547c6ac36e1SFam Zheng + (m_data->l2_index * sizeof(offset)),
154832cc71deSAlberto Faria sizeof(offset), &offset, 0) < 0) {
154965f74725SFam Zheng return VMDK_ERROR;
1550019d6b8fSAnthony Liguori }
1551b3976d3cSFam Zheng }
1552a5c4e5beSAlberto Faria if (bdrv_co_flush(extent->file->bs) < 0) {
15532758be05SKevin Wolf return VMDK_ERROR;
15542758be05SKevin Wolf }
1555cdeaf1f1SFam Zheng if (m_data->l2_cache_entry) {
1556cdeaf1f1SFam Zheng *m_data->l2_cache_entry = offset;
1557cdeaf1f1SFam Zheng }
1558019d6b8fSAnthony Liguori
155965f74725SFam Zheng return VMDK_OK;
1560019d6b8fSAnthony Liguori }
1561019d6b8fSAnthony Liguori
1562c6ac36e1SFam Zheng /**
1563c6ac36e1SFam Zheng * get_cluster_offset
1564c6ac36e1SFam Zheng *
1565c6ac36e1SFam Zheng * Look up cluster offset in extent file by sector number, and store in
1566c6ac36e1SFam Zheng * @cluster_offset.
1567c6ac36e1SFam Zheng *
1568c6ac36e1SFam Zheng * For flat extents, the start offset as parsed from the description file is
1569c6ac36e1SFam Zheng * returned.
1570c6ac36e1SFam Zheng *
1571c6ac36e1SFam Zheng * For sparse extents, look up in L1, L2 table. If allocate is true, return an
1572c6ac36e1SFam Zheng * offset for a new cluster and update L2 cache. If there is a backing file,
1573c6ac36e1SFam Zheng * COW is done before returning; otherwise, zeroes are written to the allocated
1574c6ac36e1SFam Zheng * cluster. Both COW and zero writing skips the sector range
1575c6ac36e1SFam Zheng * [@skip_start_sector, @skip_end_sector) passed in by caller, because caller
1576c6ac36e1SFam Zheng * has new data to write there.
1577c6ac36e1SFam Zheng *
1578c6ac36e1SFam Zheng * Returns: VMDK_OK if cluster exists and mapped in the image.
1579c6ac36e1SFam Zheng * VMDK_UNALLOC if cluster is not mapped and @allocate is false.
1580c6ac36e1SFam Zheng * VMDK_ERROR if failed.
1581c6ac36e1SFam Zheng */
1582b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
get_cluster_offset(BlockDriverState * bs,VmdkExtent * extent,VmdkMetaData * m_data,uint64_t offset,bool allocate,uint64_t * cluster_offset,uint64_t skip_start_bytes,uint64_t skip_end_bytes)1583b9b10c35SKevin Wolf get_cluster_offset(BlockDriverState *bs, VmdkExtent *extent,
1584b9b10c35SKevin Wolf VmdkMetaData *m_data, uint64_t offset, bool allocate,
1585b9b10c35SKevin Wolf uint64_t *cluster_offset, uint64_t skip_start_bytes,
158637b1d7d8SKevin Wolf uint64_t skip_end_bytes)
1587019d6b8fSAnthony Liguori {
1588019d6b8fSAnthony Liguori unsigned int l1_index, l2_offset, l2_index;
1589019d6b8fSAnthony Liguori int min_index, i, j;
159098eb9733SSam Eiderman uint32_t min_count;
159198eb9733SSam Eiderman void *l2_table;
159214ead646SFam Zheng bool zeroed = false;
1593c6ac36e1SFam Zheng int64_t ret;
1594d1319b07SFam Zheng int64_t cluster_sector;
159598eb9733SSam Eiderman unsigned int l2_size_bytes = extent->l2_size * extent->entry_size;
1596019d6b8fSAnthony Liguori
1597ae261c86SFam Zheng if (m_data) {
15984dc20e64SKevin Wolf m_data->new_allocation = false;
1599ae261c86SFam Zheng }
160091b85bd3SFam Zheng if (extent->flat) {
16017fa60fa3SFam Zheng *cluster_offset = extent->flat_start_offset;
160265f74725SFam Zheng return VMDK_OK;
160391b85bd3SFam Zheng }
1604019d6b8fSAnthony Liguori
16056398de51SFam Zheng offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
1606b3976d3cSFam Zheng l1_index = (offset >> 9) / extent->l1_entry_sectors;
1607b3976d3cSFam Zheng if (l1_index >= extent->l1_size) {
160865f74725SFam Zheng return VMDK_ERROR;
1609b3976d3cSFam Zheng }
161098eb9733SSam Eiderman if (extent->sesparse) {
161198eb9733SSam Eiderman uint64_t l2_offset_u64;
161298eb9733SSam Eiderman
161398eb9733SSam Eiderman assert(extent->entry_size == sizeof(uint64_t));
161498eb9733SSam Eiderman
161598eb9733SSam Eiderman l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index];
161698eb9733SSam Eiderman if (l2_offset_u64 == 0) {
161798eb9733SSam Eiderman l2_offset = 0;
161898eb9733SSam Eiderman } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) {
161998eb9733SSam Eiderman /*
162098eb9733SSam Eiderman * Top most nibble is 0x1 if grain table is allocated.
162198eb9733SSam Eiderman * strict check - top most 4 bytes must be 0x10000000 since max
162298eb9733SSam Eiderman * supported size is 64TB for disk - so no more than 64TB / 16MB
162398eb9733SSam Eiderman * grain directories which is smaller than uint32,
162498eb9733SSam Eiderman * where 16MB is the only supported default grain table coverage.
162598eb9733SSam Eiderman */
162698eb9733SSam Eiderman return VMDK_ERROR;
162798eb9733SSam Eiderman } else {
162898eb9733SSam Eiderman l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff;
162998eb9733SSam Eiderman l2_offset_u64 = extent->sesparse_l2_tables_offset +
163098eb9733SSam Eiderman l2_offset_u64 * l2_size_bytes / SECTOR_SIZE;
163198eb9733SSam Eiderman if (l2_offset_u64 > 0x00000000ffffffff) {
163298eb9733SSam Eiderman return VMDK_ERROR;
163398eb9733SSam Eiderman }
163498eb9733SSam Eiderman l2_offset = (unsigned int)(l2_offset_u64);
163598eb9733SSam Eiderman }
163698eb9733SSam Eiderman } else {
163798eb9733SSam Eiderman assert(extent->entry_size == sizeof(uint32_t));
163898eb9733SSam Eiderman l2_offset = ((uint32_t *)extent->l1_table)[l1_index];
163998eb9733SSam Eiderman }
1640b3976d3cSFam Zheng if (!l2_offset) {
164165f74725SFam Zheng return VMDK_UNALLOC;
1642b3976d3cSFam Zheng }
1643019d6b8fSAnthony Liguori for (i = 0; i < L2_CACHE_SIZE; i++) {
1644b3976d3cSFam Zheng if (l2_offset == extent->l2_cache_offsets[i]) {
1645019d6b8fSAnthony Liguori /* increment the hit count */
1646b3976d3cSFam Zheng if (++extent->l2_cache_counts[i] == 0xffffffff) {
1647019d6b8fSAnthony Liguori for (j = 0; j < L2_CACHE_SIZE; j++) {
1648b3976d3cSFam Zheng extent->l2_cache_counts[j] >>= 1;
1649019d6b8fSAnthony Liguori }
1650019d6b8fSAnthony Liguori }
165198eb9733SSam Eiderman l2_table = (char *)extent->l2_cache + (i * l2_size_bytes);
1652019d6b8fSAnthony Liguori goto found;
1653019d6b8fSAnthony Liguori }
1654019d6b8fSAnthony Liguori }
1655019d6b8fSAnthony Liguori /* not found: load a new entry in the least used one */
1656019d6b8fSAnthony Liguori min_index = 0;
1657019d6b8fSAnthony Liguori min_count = 0xffffffff;
1658019d6b8fSAnthony Liguori for (i = 0; i < L2_CACHE_SIZE; i++) {
1659b3976d3cSFam Zheng if (extent->l2_cache_counts[i] < min_count) {
1660b3976d3cSFam Zheng min_count = extent->l2_cache_counts[i];
1661019d6b8fSAnthony Liguori min_index = i;
1662019d6b8fSAnthony Liguori }
1663019d6b8fSAnthony Liguori }
166498eb9733SSam Eiderman l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes);
166517362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_L2_LOAD);
1666a5c4e5beSAlberto Faria if (bdrv_co_pread(extent->file,
1667b3976d3cSFam Zheng (int64_t)l2_offset * 512,
166853fb7844SAlberto Faria l2_size_bytes,
1669a5c4e5beSAlberto Faria l2_table, 0
1670353a5d84SAlberto Faria ) < 0) {
167165f74725SFam Zheng return VMDK_ERROR;
1672b3976d3cSFam Zheng }
1673019d6b8fSAnthony Liguori
1674b3976d3cSFam Zheng extent->l2_cache_offsets[min_index] = l2_offset;
1675b3976d3cSFam Zheng extent->l2_cache_counts[min_index] = 1;
1676019d6b8fSAnthony Liguori found:
1677b3976d3cSFam Zheng l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
16782821c1ccSKevin Wolf if (m_data) {
16792821c1ccSKevin Wolf m_data->l1_index = l1_index;
16802821c1ccSKevin Wolf m_data->l2_index = l2_index;
16812821c1ccSKevin Wolf m_data->l2_offset = l2_offset;
16822821c1ccSKevin Wolf m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
16832821c1ccSKevin Wolf }
168498eb9733SSam Eiderman
168598eb9733SSam Eiderman if (extent->sesparse) {
168698eb9733SSam Eiderman cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]);
168798eb9733SSam Eiderman switch (cluster_sector & 0xf000000000000000) {
168898eb9733SSam Eiderman case 0x0000000000000000:
168998eb9733SSam Eiderman /* unallocated grain */
169098eb9733SSam Eiderman if (cluster_sector != 0) {
169198eb9733SSam Eiderman return VMDK_ERROR;
169298eb9733SSam Eiderman }
169398eb9733SSam Eiderman break;
169498eb9733SSam Eiderman case 0x1000000000000000:
169598eb9733SSam Eiderman /* scsi-unmapped grain - fallthrough */
169698eb9733SSam Eiderman case 0x2000000000000000:
169798eb9733SSam Eiderman /* zero grain */
169898eb9733SSam Eiderman zeroed = true;
169998eb9733SSam Eiderman break;
170098eb9733SSam Eiderman case 0x3000000000000000:
170198eb9733SSam Eiderman /* allocated grain */
170298eb9733SSam Eiderman cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) |
170398eb9733SSam Eiderman ((cluster_sector & 0x0000ffffffffffff) << 12));
170498eb9733SSam Eiderman cluster_sector = extent->sesparse_clusters_offset +
170598eb9733SSam Eiderman cluster_sector * extent->cluster_sectors;
170698eb9733SSam Eiderman break;
170798eb9733SSam Eiderman default:
170898eb9733SSam Eiderman return VMDK_ERROR;
170998eb9733SSam Eiderman }
171098eb9733SSam Eiderman } else {
171198eb9733SSam Eiderman cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]);
1712019d6b8fSAnthony Liguori
1713c6ac36e1SFam Zheng if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
171414ead646SFam Zheng zeroed = true;
171514ead646SFam Zheng }
171698eb9733SSam Eiderman }
171714ead646SFam Zheng
1718c6ac36e1SFam Zheng if (!cluster_sector || zeroed) {
171991b85bd3SFam Zheng if (!allocate) {
172014ead646SFam Zheng return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
172191b85bd3SFam Zheng }
172298eb9733SSam Eiderman assert(!extent->sesparse);
17239949f97eSKevin Wolf
1724a77672eaSyuchenlin if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) {
1725a77672eaSyuchenlin return VMDK_ERROR;
1726a77672eaSyuchenlin }
1727a77672eaSyuchenlin
1728c6ac36e1SFam Zheng cluster_sector = extent->next_cluster_sector;
1729c6ac36e1SFam Zheng extent->next_cluster_sector += extent->cluster_sectors;
17309949f97eSKevin Wolf
1731019d6b8fSAnthony Liguori /* First of all we write grain itself, to avoid race condition
1732019d6b8fSAnthony Liguori * that may to corrupt the image.
1733019d6b8fSAnthony Liguori * This problem may occur because of insufficient space on host disk
1734019d6b8fSAnthony Liguori * or inappropriate VM shutdown.
1735019d6b8fSAnthony Liguori */
173637b1d7d8SKevin Wolf ret = get_whole_cluster(bs, extent, cluster_sector * BDRV_SECTOR_SIZE,
17374823cde5SKevin Wolf offset, skip_start_bytes, skip_end_bytes,
17384823cde5SKevin Wolf zeroed);
1739c6ac36e1SFam Zheng if (ret) {
1740c6ac36e1SFam Zheng return ret;
1741019d6b8fSAnthony Liguori }
1742524089bcSReda Sallahi if (m_data) {
17434dc20e64SKevin Wolf m_data->new_allocation = true;
1744524089bcSReda Sallahi }
1745019d6b8fSAnthony Liguori }
1746c6ac36e1SFam Zheng *cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
174765f74725SFam Zheng return VMDK_OK;
1748019d6b8fSAnthony Liguori }
1749019d6b8fSAnthony Liguori
find_extent(BDRVVmdkState * s,int64_t sector_num,VmdkExtent * start_hint)1750b3976d3cSFam Zheng static VmdkExtent *find_extent(BDRVVmdkState *s,
1751b3976d3cSFam Zheng int64_t sector_num, VmdkExtent *start_hint)
1752b3976d3cSFam Zheng {
1753b3976d3cSFam Zheng VmdkExtent *extent = start_hint;
1754b3976d3cSFam Zheng
1755b3976d3cSFam Zheng if (!extent) {
1756b3976d3cSFam Zheng extent = &s->extents[0];
1757b3976d3cSFam Zheng }
1758b3976d3cSFam Zheng while (extent < &s->extents[s->num_extents]) {
1759b3976d3cSFam Zheng if (sector_num < extent->end_sector) {
1760b3976d3cSFam Zheng return extent;
1761b3976d3cSFam Zheng }
1762b3976d3cSFam Zheng extent++;
1763b3976d3cSFam Zheng }
1764b3976d3cSFam Zheng return NULL;
1765b3976d3cSFam Zheng }
1766b3976d3cSFam Zheng
vmdk_find_offset_in_cluster(VmdkExtent * extent,int64_t offset)1767a844a2b0SKevin Wolf static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent,
1768a844a2b0SKevin Wolf int64_t offset)
1769a844a2b0SKevin Wolf {
17709be38598SEduardo Habkost uint64_t extent_begin_offset, extent_relative_offset;
1771a844a2b0SKevin Wolf uint64_t cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE;
1772a844a2b0SKevin Wolf
1773a844a2b0SKevin Wolf extent_begin_offset =
1774a844a2b0SKevin Wolf (extent->end_sector - extent->sectors) * BDRV_SECTOR_SIZE;
1775a844a2b0SKevin Wolf extent_relative_offset = offset - extent_begin_offset;
17769be38598SEduardo Habkost return extent_relative_offset % cluster_size;
1777a844a2b0SKevin Wolf }
1778a844a2b0SKevin Wolf
1779b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_co_block_status(BlockDriverState * bs,bool want_zero,int64_t offset,int64_t bytes,int64_t * pnum,int64_t * map,BlockDriverState ** file)1780b9b10c35SKevin Wolf vmdk_co_block_status(BlockDriverState *bs, bool want_zero,
1781b9b10c35SKevin Wolf int64_t offset, int64_t bytes, int64_t *pnum,
1782b9b10c35SKevin Wolf int64_t *map, BlockDriverState **file)
1783019d6b8fSAnthony Liguori {
1784019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque;
1785b3976d3cSFam Zheng int64_t index_in_cluster, n, ret;
1786c72080b9SEric Blake uint64_t cluster_offset;
1787b3976d3cSFam Zheng VmdkExtent *extent;
1788b3976d3cSFam Zheng
1789c72080b9SEric Blake extent = find_extent(s, offset >> BDRV_SECTOR_BITS, NULL);
1790b3976d3cSFam Zheng if (!extent) {
1791c72080b9SEric Blake return -EIO;
1792b3976d3cSFam Zheng }
1793f8a2e5e3SStefan Hajnoczi qemu_co_mutex_lock(&s->lock);
1794c72080b9SEric Blake ret = get_cluster_offset(bs, extent, NULL, offset, false, &cluster_offset,
1795c6ac36e1SFam Zheng 0, 0);
1796f8a2e5e3SStefan Hajnoczi qemu_co_mutex_unlock(&s->lock);
179714ead646SFam Zheng
1798c72080b9SEric Blake index_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
17994bc74be9SPaolo Bonzini switch (ret) {
18004bc74be9SPaolo Bonzini case VMDK_ERROR:
18014bc74be9SPaolo Bonzini ret = -EIO;
18024bc74be9SPaolo Bonzini break;
18034bc74be9SPaolo Bonzini case VMDK_UNALLOC:
18044bc74be9SPaolo Bonzini ret = 0;
18054bc74be9SPaolo Bonzini break;
18064bc74be9SPaolo Bonzini case VMDK_ZEROED:
18074bc74be9SPaolo Bonzini ret = BDRV_BLOCK_ZERO;
18084bc74be9SPaolo Bonzini break;
18094bc74be9SPaolo Bonzini case VMDK_OK:
18104bc74be9SPaolo Bonzini ret = BDRV_BLOCK_DATA;
1811e0f100f5SFam Zheng if (!extent->compressed) {
1812d0a18f10SFam Zheng ret |= BDRV_BLOCK_OFFSET_VALID;
1813c72080b9SEric Blake *map = cluster_offset + index_in_cluster;
18144dd84ac9SMax Reitz if (extent->flat) {
18154dd84ac9SMax Reitz ret |= BDRV_BLOCK_RECURSE;
18164dd84ac9SMax Reitz }
181728482891SAndrey Drobyshev via } else {
181828482891SAndrey Drobyshev via ret |= BDRV_BLOCK_COMPRESSED;
18194bc74be9SPaolo Bonzini }
1820e0f100f5SFam Zheng *file = extent->file->bs;
18214bc74be9SPaolo Bonzini break;
18224bc74be9SPaolo Bonzini }
182391b85bd3SFam Zheng
1824c72080b9SEric Blake n = extent->cluster_sectors * BDRV_SECTOR_SIZE - index_in_cluster;
1825c72080b9SEric Blake *pnum = MIN(n, bytes);
1826b3976d3cSFam Zheng return ret;
1827019d6b8fSAnthony Liguori }
1828019d6b8fSAnthony Liguori
1829b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_write_extent(VmdkExtent * extent,int64_t cluster_offset,int64_t offset_in_cluster,QEMUIOVector * qiov,uint64_t qiov_offset,uint64_t n_bytes,uint64_t offset)1830b4df9903SPaolo Bonzini vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
183137b1d7d8SKevin Wolf int64_t offset_in_cluster, QEMUIOVector *qiov,
183237b1d7d8SKevin Wolf uint64_t qiov_offset, uint64_t n_bytes,
183337b1d7d8SKevin Wolf uint64_t offset)
1834dd3f6ee2SFam Zheng {
1835dd3f6ee2SFam Zheng int ret;
18362b2c8c5dSFam Zheng VmdkGrainMarker *data = NULL;
18372b2c8c5dSFam Zheng uLongf buf_len;
183837b1d7d8SKevin Wolf QEMUIOVector local_qiov;
18395e82a31eSFam Zheng int64_t write_offset;
18405e82a31eSFam Zheng int64_t write_end_sector;
1841dd3f6ee2SFam Zheng
18422b2c8c5dSFam Zheng if (extent->compressed) {
184337b1d7d8SKevin Wolf void *compressed_data;
184437b1d7d8SKevin Wolf
1845bedb8bb4SMax Reitz /* Only whole clusters */
1846bedb8bb4SMax Reitz if (offset_in_cluster ||
1847bedb8bb4SMax Reitz n_bytes > (extent->cluster_sectors * SECTOR_SIZE) ||
1848bedb8bb4SMax Reitz (n_bytes < (extent->cluster_sectors * SECTOR_SIZE) &&
1849bedb8bb4SMax Reitz offset + n_bytes != extent->end_sector * SECTOR_SIZE))
1850bedb8bb4SMax Reitz {
1851bedb8bb4SMax Reitz ret = -EINVAL;
1852bedb8bb4SMax Reitz goto out;
1853bedb8bb4SMax Reitz }
1854bedb8bb4SMax Reitz
18552b2c8c5dSFam Zheng if (!extent->has_marker) {
18562b2c8c5dSFam Zheng ret = -EINVAL;
18572b2c8c5dSFam Zheng goto out;
18582b2c8c5dSFam Zheng }
18592b2c8c5dSFam Zheng buf_len = (extent->cluster_sectors << 9) * 2;
18602b2c8c5dSFam Zheng data = g_malloc(buf_len + sizeof(VmdkGrainMarker));
186137b1d7d8SKevin Wolf
186237b1d7d8SKevin Wolf compressed_data = g_malloc(n_bytes);
186337b1d7d8SKevin Wolf qemu_iovec_to_buf(qiov, qiov_offset, compressed_data, n_bytes);
186437b1d7d8SKevin Wolf ret = compress(data->data, &buf_len, compressed_data, n_bytes);
186537b1d7d8SKevin Wolf g_free(compressed_data);
186637b1d7d8SKevin Wolf
186737b1d7d8SKevin Wolf if (ret != Z_OK || buf_len == 0) {
18682b2c8c5dSFam Zheng ret = -EINVAL;
18692b2c8c5dSFam Zheng goto out;
18702b2c8c5dSFam Zheng }
18715e82a31eSFam Zheng
18724545d4f4SQingFeng Hao data->lba = cpu_to_le64(offset >> BDRV_SECTOR_BITS);
18734545d4f4SQingFeng Hao data->size = cpu_to_le32(buf_len);
187437b1d7d8SKevin Wolf
187537b1d7d8SKevin Wolf n_bytes = buf_len + sizeof(VmdkGrainMarker);
1876199d95b0SVladimir Sementsov-Ogievskiy qemu_iovec_init_buf(&local_qiov, data, n_bytes);
187723c4b2a8SMax Reitz
187817362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_WRITE_COMPRESSED);
187937b1d7d8SKevin Wolf } else {
188037b1d7d8SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov);
188137b1d7d8SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, qiov_offset, n_bytes);
188223c4b2a8SMax Reitz
188317362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_WRITE_AIO);
188437b1d7d8SKevin Wolf }
188537b1d7d8SKevin Wolf
18863c363575SMax Reitz write_offset = cluster_offset + offset_in_cluster;
1887a03ef88fSKevin Wolf ret = bdrv_co_pwritev(extent->file, write_offset, n_bytes,
188837b1d7d8SKevin Wolf &local_qiov, 0);
188937b1d7d8SKevin Wolf
189037b1d7d8SKevin Wolf write_end_sector = DIV_ROUND_UP(write_offset + n_bytes, BDRV_SECTOR_SIZE);
18915e82a31eSFam Zheng
18923efffc32SRadoslav Gerganov if (extent->compressed) {
18933efffc32SRadoslav Gerganov extent->next_cluster_sector = write_end_sector;
18943efffc32SRadoslav Gerganov } else {
18955e82a31eSFam Zheng extent->next_cluster_sector = MAX(extent->next_cluster_sector,
18965e82a31eSFam Zheng write_end_sector);
18973efffc32SRadoslav Gerganov }
18985e82a31eSFam Zheng
189937b1d7d8SKevin Wolf if (ret < 0) {
1900dd3f6ee2SFam Zheng goto out;
1901dd3f6ee2SFam Zheng }
1902dd3f6ee2SFam Zheng ret = 0;
1903dd3f6ee2SFam Zheng out:
19042b2c8c5dSFam Zheng g_free(data);
190537b1d7d8SKevin Wolf if (!extent->compressed) {
190637b1d7d8SKevin Wolf qemu_iovec_destroy(&local_qiov);
190737b1d7d8SKevin Wolf }
1908dd3f6ee2SFam Zheng return ret;
1909dd3f6ee2SFam Zheng }
1910dd3f6ee2SFam Zheng
1911b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_read_extent(VmdkExtent * extent,int64_t cluster_offset,int64_t offset_in_cluster,QEMUIOVector * qiov,int bytes)1912b4df9903SPaolo Bonzini vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
1913b9b10c35SKevin Wolf int64_t offset_in_cluster, QEMUIOVector *qiov, int bytes)
1914dd3f6ee2SFam Zheng {
1915dd3f6ee2SFam Zheng int ret;
19162b2c8c5dSFam Zheng int cluster_bytes, buf_bytes;
19172b2c8c5dSFam Zheng uint8_t *cluster_buf, *compressed_data;
19182b2c8c5dSFam Zheng uint8_t *uncomp_buf;
19192b2c8c5dSFam Zheng uint32_t data_len;
19202b2c8c5dSFam Zheng VmdkGrainMarker *marker;
19212b2c8c5dSFam Zheng uLongf buf_len;
1922dd3f6ee2SFam Zheng
19232b2c8c5dSFam Zheng
19242b2c8c5dSFam Zheng if (!extent->compressed) {
192517362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_READ_AIO);
1926a03ef88fSKevin Wolf ret = bdrv_co_preadv(extent->file,
1927f10cc243SKevin Wolf cluster_offset + offset_in_cluster, bytes,
1928f10cc243SKevin Wolf qiov, 0);
1929f10cc243SKevin Wolf if (ret < 0) {
1930f10cc243SKevin Wolf return ret;
1931dd3f6ee2SFam Zheng }
1932f10cc243SKevin Wolf return 0;
1933dd3f6ee2SFam Zheng }
19342b2c8c5dSFam Zheng cluster_bytes = extent->cluster_sectors * 512;
19352b2c8c5dSFam Zheng /* Read two clusters in case GrainMarker + compressed data > one cluster */
19362b2c8c5dSFam Zheng buf_bytes = cluster_bytes * 2;
19372b2c8c5dSFam Zheng cluster_buf = g_malloc(buf_bytes);
19382b2c8c5dSFam Zheng uncomp_buf = g_malloc(cluster_bytes);
193917362398SPaolo Bonzini BLKDBG_CO_EVENT(extent->file, BLKDBG_READ_COMPRESSED);
1940a5c4e5beSAlberto Faria ret = bdrv_co_pread(extent->file, cluster_offset, buf_bytes, cluster_buf,
1941a5c4e5beSAlberto Faria 0);
19422b2c8c5dSFam Zheng if (ret < 0) {
19432b2c8c5dSFam Zheng goto out;
19442b2c8c5dSFam Zheng }
19452b2c8c5dSFam Zheng compressed_data = cluster_buf;
19462b2c8c5dSFam Zheng buf_len = cluster_bytes;
19472b2c8c5dSFam Zheng data_len = cluster_bytes;
19482b2c8c5dSFam Zheng if (extent->has_marker) {
19492b2c8c5dSFam Zheng marker = (VmdkGrainMarker *)cluster_buf;
19502b2c8c5dSFam Zheng compressed_data = marker->data;
19512b2c8c5dSFam Zheng data_len = le32_to_cpu(marker->size);
19522b2c8c5dSFam Zheng }
19532b2c8c5dSFam Zheng if (!data_len || data_len > buf_bytes) {
19542b2c8c5dSFam Zheng ret = -EINVAL;
19552b2c8c5dSFam Zheng goto out;
19562b2c8c5dSFam Zheng }
19572b2c8c5dSFam Zheng ret = uncompress(uncomp_buf, &buf_len, compressed_data, data_len);
19582b2c8c5dSFam Zheng if (ret != Z_OK) {
19592b2c8c5dSFam Zheng ret = -EINVAL;
19602b2c8c5dSFam Zheng goto out;
19612b2c8c5dSFam Zheng
19622b2c8c5dSFam Zheng }
19632b2c8c5dSFam Zheng if (offset_in_cluster < 0 ||
1964f10cc243SKevin Wolf offset_in_cluster + bytes > buf_len) {
19652b2c8c5dSFam Zheng ret = -EINVAL;
19662b2c8c5dSFam Zheng goto out;
19672b2c8c5dSFam Zheng }
1968f10cc243SKevin Wolf qemu_iovec_from_buf(qiov, 0, uncomp_buf + offset_in_cluster, bytes);
19692b2c8c5dSFam Zheng ret = 0;
19702b2c8c5dSFam Zheng
19712b2c8c5dSFam Zheng out:
19722b2c8c5dSFam Zheng g_free(uncomp_buf);
19732b2c8c5dSFam Zheng g_free(cluster_buf);
19742b2c8c5dSFam Zheng return ret;
19752b2c8c5dSFam Zheng }
1976dd3f6ee2SFam Zheng
1977b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_co_preadv(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov,BdrvRequestFlags flags)1978f7ef38ddSVladimir Sementsov-Ogievskiy vmdk_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
1979f7ef38ddSVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, BdrvRequestFlags flags)
1980019d6b8fSAnthony Liguori {
1981019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque;
1982b3976d3cSFam Zheng int ret;
1983f10cc243SKevin Wolf uint64_t n_bytes, offset_in_cluster;
1984b3976d3cSFam Zheng VmdkExtent *extent = NULL;
1985f10cc243SKevin Wolf QEMUIOVector local_qiov;
1986019d6b8fSAnthony Liguori uint64_t cluster_offset;
1987f10cc243SKevin Wolf uint64_t bytes_done = 0;
1988019d6b8fSAnthony Liguori
1989f10cc243SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov);
1990f10cc243SKevin Wolf qemu_co_mutex_lock(&s->lock);
1991f10cc243SKevin Wolf
1992f10cc243SKevin Wolf while (bytes > 0) {
1993f10cc243SKevin Wolf extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
1994b3976d3cSFam Zheng if (!extent) {
1995f10cc243SKevin Wolf ret = -EIO;
1996f10cc243SKevin Wolf goto fail;
1997b3976d3cSFam Zheng }
1998c6ac36e1SFam Zheng ret = get_cluster_offset(bs, extent, NULL,
1999f10cc243SKevin Wolf offset, false, &cluster_offset, 0, 0);
2000f10cc243SKevin Wolf offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
2001f10cc243SKevin Wolf
2002f10cc243SKevin Wolf n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
2003f10cc243SKevin Wolf - offset_in_cluster);
2004f10cc243SKevin Wolf
200514ead646SFam Zheng if (ret != VMDK_OK) {
200691b85bd3SFam Zheng /* if not allocated, try to read from parent image, if exist */
2007760e0063SKevin Wolf if (bs->backing && ret != VMDK_ZEROED) {
2008ae261c86SFam Zheng if (!vmdk_is_cid_valid(bs)) {
2009f10cc243SKevin Wolf ret = -EINVAL;
2010f10cc243SKevin Wolf goto fail;
2011019d6b8fSAnthony Liguori }
2012019d6b8fSAnthony Liguori
2013f10cc243SKevin Wolf qemu_iovec_reset(&local_qiov);
2014f10cc243SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
2015f10cc243SKevin Wolf
201623c4b2a8SMax Reitz /* qcow2 emits this on bs->file instead of bs->backing */
201717362398SPaolo Bonzini BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
2018a03ef88fSKevin Wolf ret = bdrv_co_preadv(bs->backing, offset, n_bytes,
2019f10cc243SKevin Wolf &local_qiov, 0);
2020f10cc243SKevin Wolf if (ret < 0) {
2021f10cc243SKevin Wolf goto fail;
2022f10cc243SKevin Wolf }
2023f10cc243SKevin Wolf } else {
2024f10cc243SKevin Wolf qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
2025f10cc243SKevin Wolf }
2026f10cc243SKevin Wolf } else {
2027f10cc243SKevin Wolf qemu_iovec_reset(&local_qiov);
2028f10cc243SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
2029f10cc243SKevin Wolf
2030f10cc243SKevin Wolf ret = vmdk_read_extent(extent, cluster_offset, offset_in_cluster,
2031f10cc243SKevin Wolf &local_qiov, n_bytes);
2032f10cc243SKevin Wolf if (ret) {
2033f10cc243SKevin Wolf goto fail;
2034f10cc243SKevin Wolf }
2035f10cc243SKevin Wolf }
2036f10cc243SKevin Wolf bytes -= n_bytes;
2037f10cc243SKevin Wolf offset += n_bytes;
2038f10cc243SKevin Wolf bytes_done += n_bytes;
2039f10cc243SKevin Wolf }
2040f10cc243SKevin Wolf
2041f10cc243SKevin Wolf ret = 0;
2042f10cc243SKevin Wolf fail:
20432914caa0SPaolo Bonzini qemu_co_mutex_unlock(&s->lock);
2044f10cc243SKevin Wolf qemu_iovec_destroy(&local_qiov);
2045f10cc243SKevin Wolf
20462914caa0SPaolo Bonzini return ret;
20472914caa0SPaolo Bonzini }
20482914caa0SPaolo Bonzini
2049cdeaf1f1SFam Zheng /**
2050cdeaf1f1SFam Zheng * vmdk_write:
2051cdeaf1f1SFam Zheng * @zeroed: buf is ignored (data is zero), use zeroed_grain GTE feature
2052cdeaf1f1SFam Zheng * if possible, otherwise return -ENOTSUP.
20538e507243SFam Zheng * @zero_dry_run: used for zeroed == true only, don't update L2 table, just try
20548e507243SFam Zheng * with each cluster. By dry run we can find if the zero write
20558e507243SFam Zheng * is possible without modifying image data.
2056cdeaf1f1SFam Zheng *
2057cdeaf1f1SFam Zheng * Returns: error code with 0 for success.
2058cdeaf1f1SFam Zheng */
2059b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_pwritev(BlockDriverState * bs,uint64_t offset,uint64_t bytes,QEMUIOVector * qiov,bool zeroed,bool zero_dry_run)2060b9b10c35SKevin Wolf vmdk_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
2061b9b10c35SKevin Wolf QEMUIOVector *qiov, bool zeroed, bool zero_dry_run)
2062019d6b8fSAnthony Liguori {
2063019d6b8fSAnthony Liguori BDRVVmdkState *s = bs->opaque;
2064b3976d3cSFam Zheng VmdkExtent *extent = NULL;
2065585ea0c8SFam Zheng int ret;
206637b1d7d8SKevin Wolf int64_t offset_in_cluster, n_bytes;
2067019d6b8fSAnthony Liguori uint64_t cluster_offset;
206837b1d7d8SKevin Wolf uint64_t bytes_done = 0;
2069b3976d3cSFam Zheng VmdkMetaData m_data;
2070019d6b8fSAnthony Liguori
207137b1d7d8SKevin Wolf if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) {
207237b1d7d8SKevin Wolf error_report("Wrong offset: offset=0x%" PRIx64
20739af9e0feSMarkus Armbruster " total_sectors=0x%" PRIx64,
207437b1d7d8SKevin Wolf offset, bs->total_sectors);
20757fa60fa3SFam Zheng return -EIO;
2076019d6b8fSAnthony Liguori }
2077019d6b8fSAnthony Liguori
207837b1d7d8SKevin Wolf while (bytes > 0) {
207937b1d7d8SKevin Wolf extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
2080b3976d3cSFam Zheng if (!extent) {
2081b3976d3cSFam Zheng return -EIO;
2082b3976d3cSFam Zheng }
208398eb9733SSam Eiderman if (extent->sesparse) {
208498eb9733SSam Eiderman return -ENOTSUP;
208598eb9733SSam Eiderman }
208637b1d7d8SKevin Wolf offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
208737b1d7d8SKevin Wolf n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
208837b1d7d8SKevin Wolf - offset_in_cluster);
208937b1d7d8SKevin Wolf
209037b1d7d8SKevin Wolf ret = get_cluster_offset(bs, extent, &m_data, offset,
2091c6ac36e1SFam Zheng !(extent->compressed || zeroed),
209237b1d7d8SKevin Wolf &cluster_offset, offset_in_cluster,
209337b1d7d8SKevin Wolf offset_in_cluster + n_bytes);
20942b2c8c5dSFam Zheng if (extent->compressed) {
209565f74725SFam Zheng if (ret == VMDK_OK) {
20962b2c8c5dSFam Zheng /* Refuse write to allocated cluster for streamOptimized */
20974823970bSFam Zheng error_report("Could not write to allocated cluster"
20984823970bSFam Zheng " for streamOptimized");
20992b2c8c5dSFam Zheng return -EIO;
21002821c1ccSKevin Wolf } else if (!zeroed) {
21012b2c8c5dSFam Zheng /* allocate */
210237b1d7d8SKevin Wolf ret = get_cluster_offset(bs, extent, &m_data, offset,
2103c6ac36e1SFam Zheng true, &cluster_offset, 0, 0);
21042b2c8c5dSFam Zheng }
21052b2c8c5dSFam Zheng }
2106cdeaf1f1SFam Zheng if (ret == VMDK_ERROR) {
210791b85bd3SFam Zheng return -EINVAL;
2108b3976d3cSFam Zheng }
2109cdeaf1f1SFam Zheng if (zeroed) {
2110cdeaf1f1SFam Zheng /* Do zeroed write, buf is ignored */
2111cdeaf1f1SFam Zheng if (extent->has_zero_grain &&
211237b1d7d8SKevin Wolf offset_in_cluster == 0 &&
211337b1d7d8SKevin Wolf n_bytes >= extent->cluster_sectors * BDRV_SECTOR_SIZE) {
211437b1d7d8SKevin Wolf n_bytes = extent->cluster_sectors * BDRV_SECTOR_SIZE;
211578cae78dSKevin Wolf if (!zero_dry_run && ret != VMDK_ZEROED) {
2116cdeaf1f1SFam Zheng /* update L2 tables */
2117c6ac36e1SFam Zheng if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
2118c6ac36e1SFam Zheng != VMDK_OK) {
2119cdeaf1f1SFam Zheng return -EIO;
2120cdeaf1f1SFam Zheng }
2121cdeaf1f1SFam Zheng }
2122cdeaf1f1SFam Zheng } else {
2123cdeaf1f1SFam Zheng return -ENOTSUP;
2124cdeaf1f1SFam Zheng }
2125cdeaf1f1SFam Zheng } else {
212637b1d7d8SKevin Wolf ret = vmdk_write_extent(extent, cluster_offset, offset_in_cluster,
212737b1d7d8SKevin Wolf qiov, bytes_done, n_bytes, offset);
2128dd3f6ee2SFam Zheng if (ret) {
21297fa60fa3SFam Zheng return ret;
2130b3976d3cSFam Zheng }
21314dc20e64SKevin Wolf if (m_data.new_allocation) {
2132019d6b8fSAnthony Liguori /* update L2 tables */
2133c6ac36e1SFam Zheng if (vmdk_L2update(extent, &m_data,
2134c6ac36e1SFam Zheng cluster_offset >> BDRV_SECTOR_BITS)
2135c6ac36e1SFam Zheng != VMDK_OK) {
21367fa60fa3SFam Zheng return -EIO;
2137019d6b8fSAnthony Liguori }
2138b3976d3cSFam Zheng }
2139cdeaf1f1SFam Zheng }
214037b1d7d8SKevin Wolf bytes -= n_bytes;
214137b1d7d8SKevin Wolf offset += n_bytes;
214237b1d7d8SKevin Wolf bytes_done += n_bytes;
2143019d6b8fSAnthony Liguori
2144ae261c86SFam Zheng /* update CID on the first write every time the virtual disk is
2145ae261c86SFam Zheng * opened */
214669b4d86dSFam Zheng if (!s->cid_updated) {
2147e5dc64b8SFam Zheng ret = vmdk_write_cid(bs, g_random_int());
214899f1835dSKevin Wolf if (ret < 0) {
214999f1835dSKevin Wolf return ret;
215099f1835dSKevin Wolf }
215169b4d86dSFam Zheng s->cid_updated = true;
2152019d6b8fSAnthony Liguori }
2153019d6b8fSAnthony Liguori }
2154019d6b8fSAnthony Liguori return 0;
2155019d6b8fSAnthony Liguori }
2156019d6b8fSAnthony Liguori
2157b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_co_pwritev(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov,BdrvRequestFlags flags)2158e75abedaSVladimir Sementsov-Ogievskiy vmdk_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
2159e75abedaSVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, BdrvRequestFlags flags)
2160e183ef75SPaolo Bonzini {
2161e183ef75SPaolo Bonzini int ret;
2162e183ef75SPaolo Bonzini BDRVVmdkState *s = bs->opaque;
2163e183ef75SPaolo Bonzini qemu_co_mutex_lock(&s->lock);
216437b1d7d8SKevin Wolf ret = vmdk_pwritev(bs, offset, bytes, qiov, false, false);
2165cdeaf1f1SFam Zheng qemu_co_mutex_unlock(&s->lock);
2166cdeaf1f1SFam Zheng return ret;
2167cdeaf1f1SFam Zheng }
2168cdeaf1f1SFam Zheng
21697b1fb72eSKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_co_pwritev_compressed(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov)2170e75abedaSVladimir Sementsov-Ogievskiy vmdk_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
2171e75abedaSVladimir Sementsov-Ogievskiy QEMUIOVector *qiov)
217237b1d7d8SKevin Wolf {
217351b3c6b7Syuchenlin if (bytes == 0) {
217451b3c6b7Syuchenlin /* The caller will write bytes 0 to signal EOF.
217551b3c6b7Syuchenlin * When receive it, we align EOF to a sector boundary. */
217651b3c6b7Syuchenlin BDRVVmdkState *s = bs->opaque;
217751b3c6b7Syuchenlin int i, ret;
217851b3c6b7Syuchenlin int64_t length;
217951b3c6b7Syuchenlin
218051b3c6b7Syuchenlin for (i = 0; i < s->num_extents; i++) {
21810af02bd1SPaolo Bonzini length = bdrv_co_getlength(s->extents[i].file->bs);
218251b3c6b7Syuchenlin if (length < 0) {
218351b3c6b7Syuchenlin return length;
218451b3c6b7Syuchenlin }
218551b3c6b7Syuchenlin length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE);
2186a5c4e5beSAlberto Faria ret = bdrv_co_truncate(s->extents[i].file, length, false,
21877b8e4857SKevin Wolf PREALLOC_MODE_OFF, 0, NULL);
218851b3c6b7Syuchenlin if (ret < 0) {
218951b3c6b7Syuchenlin return ret;
219051b3c6b7Syuchenlin }
219151b3c6b7Syuchenlin }
219251b3c6b7Syuchenlin return 0;
219351b3c6b7Syuchenlin }
2194b2c622d3SPavel Butsykin return vmdk_co_pwritev(bs, offset, bytes, qiov, 0);
2195ba0ad89eSFam Zheng }
2196ba0ad89eSFam Zheng
2197b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_co_pwrite_zeroes(BlockDriverState * bs,int64_t offset,int64_t bytes,BdrvRequestFlags flags)2198b9b10c35SKevin Wolf vmdk_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
2199aa7bfbffSPeter Lieven BdrvRequestFlags flags)
2200cdeaf1f1SFam Zheng {
2201cdeaf1f1SFam Zheng int ret;
2202cdeaf1f1SFam Zheng BDRVVmdkState *s = bs->opaque;
220337b1d7d8SKevin Wolf
2204cdeaf1f1SFam Zheng qemu_co_mutex_lock(&s->lock);
22058e507243SFam Zheng /* write zeroes could fail if sectors not aligned to cluster, test it with
22068e507243SFam Zheng * dry_run == true before really updating image */
220737b1d7d8SKevin Wolf ret = vmdk_pwritev(bs, offset, bytes, NULL, true, true);
2208cdeaf1f1SFam Zheng if (!ret) {
220937b1d7d8SKevin Wolf ret = vmdk_pwritev(bs, offset, bytes, NULL, true, false);
2210cdeaf1f1SFam Zheng }
2211e183ef75SPaolo Bonzini qemu_co_mutex_unlock(&s->lock);
2212e183ef75SPaolo Bonzini return ret;
2213e183ef75SPaolo Bonzini }
2214e183ef75SPaolo Bonzini
221528944f99SPaolo Bonzini static int coroutine_fn GRAPH_UNLOCKED
vmdk_init_extent(BlockBackend * blk,int64_t filesize,bool flat,bool compress,bool zeroed_grain,Error ** errp)22164db7ba3bSKevin Wolf vmdk_init_extent(BlockBackend *blk, int64_t filesize, bool flat, bool compress,
22174db7ba3bSKevin Wolf bool zeroed_grain, Error **errp)
2218019d6b8fSAnthony Liguori {
2219f66fd6c3SFam Zheng int ret, i;
2220019d6b8fSAnthony Liguori VMDK4Header header;
2221917703c1SFam Zheng uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
2222917703c1SFam Zheng uint32_t *gd_buf = NULL;
2223917703c1SFam Zheng int gd_buf_size;
22240e7e1989SKevin Wolf
2225917703c1SFam Zheng if (flat) {
222628944f99SPaolo Bonzini ret = blk_co_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp);
2227f66fd6c3SFam Zheng goto exit;
2228f66fd6c3SFam Zheng }
2229019d6b8fSAnthony Liguori magic = cpu_to_be32(VMDK4_MAGIC);
2230019d6b8fSAnthony Liguori memset(&header, 0, sizeof(header));
2231d62d9dc4SFam Zheng if (compress) {
2232d62d9dc4SFam Zheng header.version = 3;
2233d62d9dc4SFam Zheng } else if (zeroed_grain) {
2234d62d9dc4SFam Zheng header.version = 2;
2235d62d9dc4SFam Zheng } else {
2236d62d9dc4SFam Zheng header.version = 1;
2237d62d9dc4SFam Zheng }
223895b0aa42SFam Zheng header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
223969e0b6dfSFam Zheng | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
224069e0b6dfSFam Zheng | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
22416c031aacSFam Zheng header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
2242917703c1SFam Zheng header.capacity = filesize / BDRV_SECTOR_SIZE;
224316372ff0SAlexander Graf header.granularity = 128;
2244917703c1SFam Zheng header.num_gtes_per_gt = BDRV_SECTOR_SIZE;
2245019d6b8fSAnthony Liguori
2246917703c1SFam Zheng grains = DIV_ROUND_UP(filesize / BDRV_SECTOR_SIZE, header.granularity);
2247917703c1SFam Zheng gt_size = DIV_ROUND_UP(header.num_gtes_per_gt * sizeof(uint32_t),
2248917703c1SFam Zheng BDRV_SECTOR_SIZE);
2249917703c1SFam Zheng gt_count = DIV_ROUND_UP(grains, header.num_gtes_per_gt);
2250917703c1SFam Zheng gd_sectors = DIV_ROUND_UP(gt_count * sizeof(uint32_t), BDRV_SECTOR_SIZE);
2251019d6b8fSAnthony Liguori
2252019d6b8fSAnthony Liguori header.desc_offset = 1;
2253019d6b8fSAnthony Liguori header.desc_size = 20;
2254019d6b8fSAnthony Liguori header.rgd_offset = header.desc_offset + header.desc_size;
2255917703c1SFam Zheng header.gd_offset = header.rgd_offset + gd_sectors + (gt_size * gt_count);
2256019d6b8fSAnthony Liguori header.grain_offset =
2257917703c1SFam Zheng ROUND_UP(header.gd_offset + gd_sectors + (gt_size * gt_count),
2258917703c1SFam Zheng header.granularity);
225916372ff0SAlexander Graf /* swap endianness for all header fields */
226016372ff0SAlexander Graf header.version = cpu_to_le32(header.version);
226116372ff0SAlexander Graf header.flags = cpu_to_le32(header.flags);
226216372ff0SAlexander Graf header.capacity = cpu_to_le64(header.capacity);
226316372ff0SAlexander Graf header.granularity = cpu_to_le64(header.granularity);
2264ca8804ceSFam Zheng header.num_gtes_per_gt = cpu_to_le32(header.num_gtes_per_gt);
2265019d6b8fSAnthony Liguori header.desc_offset = cpu_to_le64(header.desc_offset);
2266019d6b8fSAnthony Liguori header.desc_size = cpu_to_le64(header.desc_size);
2267019d6b8fSAnthony Liguori header.rgd_offset = cpu_to_le64(header.rgd_offset);
2268019d6b8fSAnthony Liguori header.gd_offset = cpu_to_le64(header.gd_offset);
2269019d6b8fSAnthony Liguori header.grain_offset = cpu_to_le64(header.grain_offset);
22706c031aacSFam Zheng header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm);
2271019d6b8fSAnthony Liguori
2272019d6b8fSAnthony Liguori header.check_bytes[0] = 0xa;
2273019d6b8fSAnthony Liguori header.check_bytes[1] = 0x20;
2274019d6b8fSAnthony Liguori header.check_bytes[2] = 0xd;
2275019d6b8fSAnthony Liguori header.check_bytes[3] = 0xa;
2276019d6b8fSAnthony Liguori
2277019d6b8fSAnthony Liguori /* write all the data */
227828944f99SPaolo Bonzini ret = blk_co_pwrite(blk, 0, sizeof(magic), &magic, 0);
2279917703c1SFam Zheng if (ret < 0) {
2280c6bd8c70SMarkus Armbruster error_setg(errp, QERR_IO_ERROR);
22811640366cSKirill A. Shutemov goto exit;
22821640366cSKirill A. Shutemov }
228328944f99SPaolo Bonzini ret = blk_co_pwrite(blk, sizeof(magic), sizeof(header), &header, 0);
2284917703c1SFam Zheng if (ret < 0) {
2285c6bd8c70SMarkus Armbruster error_setg(errp, QERR_IO_ERROR);
22861640366cSKirill A. Shutemov goto exit;
22871640366cSKirill A. Shutemov }
2288019d6b8fSAnthony Liguori
228928944f99SPaolo Bonzini ret = blk_co_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false,
22908c6242b6SKevin Wolf PREALLOC_MODE_OFF, 0, errp);
22911640366cSKirill A. Shutemov if (ret < 0) {
22921640366cSKirill A. Shutemov goto exit;
22931640366cSKirill A. Shutemov }
2294019d6b8fSAnthony Liguori
2295019d6b8fSAnthony Liguori /* write grain directory */
2296917703c1SFam Zheng gd_buf_size = gd_sectors * BDRV_SECTOR_SIZE;
2297917703c1SFam Zheng gd_buf = g_malloc0(gd_buf_size);
2298917703c1SFam Zheng for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_sectors;
22991640366cSKirill A. Shutemov i < gt_count; i++, tmp += gt_size) {
2300917703c1SFam Zheng gd_buf[i] = cpu_to_le32(tmp);
23011640366cSKirill A. Shutemov }
230228944f99SPaolo Bonzini ret = blk_co_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
2303a9262f55SAlberto Faria gd_buf_size, gd_buf, 0);
2304917703c1SFam Zheng if (ret < 0) {
2305c6bd8c70SMarkus Armbruster error_setg(errp, QERR_IO_ERROR);
2306917703c1SFam Zheng goto exit;
23071640366cSKirill A. Shutemov }
2308019d6b8fSAnthony Liguori
2309019d6b8fSAnthony Liguori /* write backup grain directory */
2310917703c1SFam Zheng for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_sectors;
23111640366cSKirill A. Shutemov i < gt_count; i++, tmp += gt_size) {
2312917703c1SFam Zheng gd_buf[i] = cpu_to_le32(tmp);
23131640366cSKirill A. Shutemov }
231428944f99SPaolo Bonzini ret = blk_co_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
2315a9262f55SAlberto Faria gd_buf_size, gd_buf, 0);
2316917703c1SFam Zheng if (ret < 0) {
2317c6bd8c70SMarkus Armbruster error_setg(errp, QERR_IO_ERROR);
23181640366cSKirill A. Shutemov }
2319019d6b8fSAnthony Liguori
2320f66fd6c3SFam Zheng ret = 0;
2321f66fd6c3SFam Zheng exit:
2322917703c1SFam Zheng g_free(gd_buf);
2323f66fd6c3SFam Zheng return ret;
2324f66fd6c3SFam Zheng }
2325019d6b8fSAnthony Liguori
23264db7ba3bSKevin Wolf static int coroutine_fn GRAPH_UNLOCKED
vmdk_create_extent(const char * filename,int64_t filesize,bool flat,bool compress,bool zeroed_grain,BlockBackend ** pbb,QemuOpts * opts,Error ** errp)23274ec8df01SKevin Wolf vmdk_create_extent(const char *filename, int64_t filesize, bool flat,
23284ec8df01SKevin Wolf bool compress, bool zeroed_grain, BlockBackend **pbb,
23295be28490SFam Zheng QemuOpts *opts, Error **errp)
23305be28490SFam Zheng {
23315be28490SFam Zheng int ret;
23325be28490SFam Zheng BlockBackend *blk = NULL;
23335be28490SFam Zheng
23342475a0d0SEmanuele Giuseppe Esposito ret = bdrv_co_create_file(filename, opts, errp);
23355be28490SFam Zheng if (ret < 0) {
23365be28490SFam Zheng goto exit;
23375be28490SFam Zheng }
23385be28490SFam Zheng
2339882f202eSKevin Wolf blk = blk_co_new_open(filename, NULL, NULL,
23405be28490SFam Zheng BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
2341af175e85SMarkus Armbruster errp);
23425be28490SFam Zheng if (blk == NULL) {
23435be28490SFam Zheng ret = -EIO;
23445be28490SFam Zheng goto exit;
23455be28490SFam Zheng }
23465be28490SFam Zheng
23475be28490SFam Zheng blk_set_allow_write_beyond_eof(blk, true);
23485be28490SFam Zheng
23495be28490SFam Zheng ret = vmdk_init_extent(blk, filesize, flat, compress, zeroed_grain, errp);
23505be28490SFam Zheng exit:
23515be28490SFam Zheng if (blk) {
23525be28490SFam Zheng if (pbb) {
23535be28490SFam Zheng *pbb = blk;
23545be28490SFam Zheng } else {
2355b2ab5f54SKevin Wolf blk_co_unref(blk);
23565be28490SFam Zheng blk = NULL;
23575be28490SFam Zheng }
23585be28490SFam Zheng }
23595be28490SFam Zheng return ret;
23605be28490SFam Zheng }
23615be28490SFam Zheng
filename_decompose(const char * filename,char * path,char * prefix,char * postfix,size_t buf_len,Error ** errp)2362f66fd6c3SFam Zheng static int filename_decompose(const char *filename, char *path, char *prefix,
23634823970bSFam Zheng char *postfix, size_t buf_len, Error **errp)
2364f66fd6c3SFam Zheng {
2365f66fd6c3SFam Zheng const char *p, *q;
2366f66fd6c3SFam Zheng
2367f66fd6c3SFam Zheng if (filename == NULL || !strlen(filename)) {
23684823970bSFam Zheng error_setg(errp, "No filename provided");
236965f74725SFam Zheng return VMDK_ERROR;
2370f66fd6c3SFam Zheng }
2371f66fd6c3SFam Zheng p = strrchr(filename, '/');
2372f66fd6c3SFam Zheng if (p == NULL) {
2373f66fd6c3SFam Zheng p = strrchr(filename, '\\');
2374f66fd6c3SFam Zheng }
2375f66fd6c3SFam Zheng if (p == NULL) {
2376f66fd6c3SFam Zheng p = strrchr(filename, ':');
2377f66fd6c3SFam Zheng }
2378f66fd6c3SFam Zheng if (p != NULL) {
2379f66fd6c3SFam Zheng p++;
2380f66fd6c3SFam Zheng if (p - filename >= buf_len) {
238165f74725SFam Zheng return VMDK_ERROR;
2382f66fd6c3SFam Zheng }
2383f66fd6c3SFam Zheng pstrcpy(path, p - filename + 1, filename);
2384f66fd6c3SFam Zheng } else {
2385f66fd6c3SFam Zheng p = filename;
2386f66fd6c3SFam Zheng path[0] = '\0';
2387f66fd6c3SFam Zheng }
2388f66fd6c3SFam Zheng q = strrchr(p, '.');
2389f66fd6c3SFam Zheng if (q == NULL) {
2390f66fd6c3SFam Zheng pstrcpy(prefix, buf_len, p);
2391f66fd6c3SFam Zheng postfix[0] = '\0';
2392f66fd6c3SFam Zheng } else {
2393f66fd6c3SFam Zheng if (q - p >= buf_len) {
239465f74725SFam Zheng return VMDK_ERROR;
2395f66fd6c3SFam Zheng }
2396f66fd6c3SFam Zheng pstrcpy(prefix, q - p + 1, p);
2397f66fd6c3SFam Zheng pstrcpy(postfix, buf_len, q);
2398f66fd6c3SFam Zheng }
239965f74725SFam Zheng return VMDK_OK;
2400f66fd6c3SFam Zheng }
2401f66fd6c3SFam Zheng
24023015372dSFam Zheng /*
24033015372dSFam Zheng * idx == 0: get or create the descriptor file (also the image file if in a
24043015372dSFam Zheng * non-split format.
24053015372dSFam Zheng * idx >= 1: get the n-th extent if in a split subformat
24063015372dSFam Zheng */
24074db7ba3bSKevin Wolf typedef BlockBackend * coroutine_fn GRAPH_UNLOCKED_PTR
24084ec8df01SKevin Wolf (*vmdk_create_extent_fn)(int64_t size, int idx, bool flat, bool split,
24094ec8df01SKevin Wolf bool compress, bool zeroed_grain, void *opaque,
24103015372dSFam Zheng Error **errp);
24113015372dSFam Zheng
vmdk_desc_add_extent(GString * desc,const char * extent_line_fmt,int64_t size,const char * filename)24123015372dSFam Zheng static void vmdk_desc_add_extent(GString *desc,
24133015372dSFam Zheng const char *extent_line_fmt,
24143015372dSFam Zheng int64_t size, const char *filename)
24153015372dSFam Zheng {
24163015372dSFam Zheng char *basename = g_path_get_basename(filename);
24173015372dSFam Zheng
24183015372dSFam Zheng g_string_append_printf(desc, extent_line_fmt,
24193015372dSFam Zheng DIV_ROUND_UP(size, BDRV_SECTOR_SIZE), basename);
24203015372dSFam Zheng g_free(basename);
24213015372dSFam Zheng }
24223015372dSFam Zheng
24234db7ba3bSKevin Wolf static int coroutine_fn GRAPH_UNLOCKED
vmdk_co_do_create(int64_t size,BlockdevVmdkSubformat subformat,BlockdevVmdkAdapterType adapter_type,const char * backing_file,const char * hw_version,const char * toolsversion,bool compat6,bool zeroed_grain,vmdk_create_extent_fn extent_fn,void * opaque,Error ** errp)24244ec8df01SKevin Wolf vmdk_co_do_create(int64_t size,
24253015372dSFam Zheng BlockdevVmdkSubformat subformat,
24263015372dSFam Zheng BlockdevVmdkAdapterType adapter_type,
24273015372dSFam Zheng const char *backing_file,
24283015372dSFam Zheng const char *hw_version,
2429f3d43dfdSThomas Weißschuh const char *toolsversion,
24303015372dSFam Zheng bool compat6,
24313015372dSFam Zheng bool zeroed_grain,
24323015372dSFam Zheng vmdk_create_extent_fn extent_fn,
24333015372dSFam Zheng void *opaque,
2434efc75e2aSStefan Hajnoczi Error **errp)
2435f66fd6c3SFam Zheng {
24363015372dSFam Zheng int extent_idx;
24373015372dSFam Zheng BlockBackend *blk = NULL;
24384a960eceSKevin Wolf BlockBackend *extent_blk;
2439c13959c7SFam Zheng Error *local_err = NULL;
2440af057fe7SFam Zheng char *desc = NULL;
2441f66fd6c3SFam Zheng int ret = 0;
24426c031aacSFam Zheng bool flat, split, compress;
2443af057fe7SFam Zheng GString *ext_desc_lines;
2444f66fd6c3SFam Zheng const int64_t split_size = 0x80000000; /* VMDK has constant split size */
24453015372dSFam Zheng int64_t extent_size;
24463015372dSFam Zheng int64_t created_size = 0;
24473015372dSFam Zheng const char *extent_line_fmt;
2448fe206562SJeff Cody char *parent_desc_line = g_malloc0(BUF_SIZE);
2449f66fd6c3SFam Zheng uint32_t parent_cid = 0xffffffff;
24507f2039f6SOthmar Pasteka uint32_t number_heads = 16;
2451917703c1SFam Zheng uint32_t desc_offset = 0, desc_len;
2452f66fd6c3SFam Zheng const char desc_template[] =
2453f66fd6c3SFam Zheng "# Disk DescriptorFile\n"
2454f66fd6c3SFam Zheng "version=1\n"
24559b17031aSFam Zheng "CID=%" PRIx32 "\n"
24569b17031aSFam Zheng "parentCID=%" PRIx32 "\n"
2457f66fd6c3SFam Zheng "createType=\"%s\"\n"
2458f66fd6c3SFam Zheng "%s"
2459f66fd6c3SFam Zheng "\n"
2460f66fd6c3SFam Zheng "# Extent description\n"
2461f66fd6c3SFam Zheng "%s"
2462f66fd6c3SFam Zheng "\n"
2463f66fd6c3SFam Zheng "# The Disk Data Base\n"
2464f66fd6c3SFam Zheng "#DDB\n"
2465f66fd6c3SFam Zheng "\n"
2466f249924eSJanne Karhunen "ddb.virtualHWVersion = \"%s\"\n"
2467f66fd6c3SFam Zheng "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
24684ab9dab5SFam Zheng "ddb.geometry.heads = \"%" PRIu32 "\"\n"
2469f66fd6c3SFam Zheng "ddb.geometry.sectors = \"63\"\n"
2470f3d43dfdSThomas Weißschuh "ddb.adapterType = \"%s\"\n"
2471f3d43dfdSThomas Weißschuh "ddb.toolsVersion = \"%s\"\n";
2472f66fd6c3SFam Zheng
2473af057fe7SFam Zheng ext_desc_lines = g_string_new(NULL);
2474af057fe7SFam Zheng
2475f66fd6c3SFam Zheng /* Read out options */
24763015372dSFam Zheng if (compat6) {
24773015372dSFam Zheng if (hw_version) {
2478f249924eSJanne Karhunen error_setg(errp,
2479f249924eSJanne Karhunen "compat6 cannot be enabled with hwversion set");
2480f249924eSJanne Karhunen ret = -EINVAL;
2481f249924eSJanne Karhunen goto exit;
2482f249924eSJanne Karhunen }
24833015372dSFam Zheng hw_version = "6";
2484f249924eSJanne Karhunen }
24853015372dSFam Zheng if (!hw_version) {
24863015372dSFam Zheng hw_version = "4";
2487f66fd6c3SFam Zheng }
2488f3d43dfdSThomas Weißschuh if (!toolsversion) {
2489f3d43dfdSThomas Weißschuh toolsversion = "2147483647";
2490f3d43dfdSThomas Weißschuh }
24915820f1daSChunyan Liu
24923015372dSFam Zheng if (adapter_type != BLOCKDEV_VMDK_ADAPTER_TYPE_IDE) {
24937f2039f6SOthmar Pasteka /* that's the number of heads with which vmware operates when
24947f2039f6SOthmar Pasteka creating, exporting, etc. vmdk files with a non-ide adapter type */
24957f2039f6SOthmar Pasteka number_heads = 255;
24967f2039f6SOthmar Pasteka }
24973015372dSFam Zheng split = (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTFLAT) ||
24983015372dSFam Zheng (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTSPARSE);
24993015372dSFam Zheng flat = (subformat == BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICFLAT) ||
25003015372dSFam Zheng (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTFLAT);
25013015372dSFam Zheng compress = subformat == BLOCKDEV_VMDK_SUBFORMAT_STREAMOPTIMIZED;
25023015372dSFam Zheng
2503f66fd6c3SFam Zheng if (flat) {
25043015372dSFam Zheng extent_line_fmt = "RW %" PRId64 " FLAT \"%s\" 0\n";
2505f66fd6c3SFam Zheng } else {
25063015372dSFam Zheng extent_line_fmt = "RW %" PRId64 " SPARSE \"%s\"\n";
2507f66fd6c3SFam Zheng }
2508f66fd6c3SFam Zheng if (flat && backing_file) {
25094823970bSFam Zheng error_setg(errp, "Flat image can't have backing file");
2510af057fe7SFam Zheng ret = -ENOTSUP;
2511af057fe7SFam Zheng goto exit;
2512f66fd6c3SFam Zheng }
251352c8d629SFam Zheng if (flat && zeroed_grain) {
251452c8d629SFam Zheng error_setg(errp, "Flat image can't enable zeroed grain");
2515af057fe7SFam Zheng ret = -ENOTSUP;
2516af057fe7SFam Zheng goto exit;
251752c8d629SFam Zheng }
25183015372dSFam Zheng
25193015372dSFam Zheng /* Create extents */
25203015372dSFam Zheng if (split) {
25213015372dSFam Zheng extent_size = split_size;
25223015372dSFam Zheng } else {
25233015372dSFam Zheng extent_size = size;
25243015372dSFam Zheng }
25253015372dSFam Zheng if (!split && !flat) {
25263015372dSFam Zheng created_size = extent_size;
25273015372dSFam Zheng } else {
25283015372dSFam Zheng created_size = 0;
25293015372dSFam Zheng }
25303015372dSFam Zheng /* Get the descriptor file BDS */
25313015372dSFam Zheng blk = extent_fn(created_size, 0, flat, split, compress, zeroed_grain,
25323015372dSFam Zheng opaque, errp);
25333015372dSFam Zheng if (!blk) {
25343015372dSFam Zheng ret = -EIO;
25353015372dSFam Zheng goto exit;
25363015372dSFam Zheng }
25373015372dSFam Zheng if (!split && !flat) {
25383015372dSFam Zheng vmdk_desc_add_extent(ext_desc_lines, extent_line_fmt, created_size,
25393015372dSFam Zheng blk_bs(blk)->filename);
25403015372dSFam Zheng }
25413015372dSFam Zheng
2542f66fd6c3SFam Zheng if (backing_file) {
25433015372dSFam Zheng BlockBackend *backing;
2544645ae7d8SMax Reitz char *full_backing =
2545645ae7d8SMax Reitz bdrv_get_full_backing_filename_from_filename(blk_bs(blk)->filename,
2546645ae7d8SMax Reitz backing_file,
25471085daf9SMax Reitz &local_err);
25481085daf9SMax Reitz if (local_err) {
25491085daf9SMax Reitz error_propagate(errp, local_err);
25501085daf9SMax Reitz ret = -ENOENT;
25511085daf9SMax Reitz goto exit;
25521085daf9SMax Reitz }
2553645ae7d8SMax Reitz assert(full_backing);
2554c4bea169SKevin Wolf
2555882f202eSKevin Wolf backing = blk_co_new_open(full_backing, NULL, NULL,
255672e775c7SKevin Wolf BDRV_O_NO_BACKING, errp);
25571085daf9SMax Reitz g_free(full_backing);
25583015372dSFam Zheng if (backing == NULL) {
2559c4bea169SKevin Wolf ret = -EIO;
2560af057fe7SFam Zheng goto exit;
2561f66fd6c3SFam Zheng }
25623015372dSFam Zheng if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) {
25633015372dSFam Zheng error_setg(errp, "Invalid backing file format: %s. Must be vmdk",
25643015372dSFam Zheng blk_bs(backing)->drv->format_name);
2565b2ab5f54SKevin Wolf blk_co_unref(backing);
2566af057fe7SFam Zheng ret = -EINVAL;
2567af057fe7SFam Zheng goto exit;
2568f66fd6c3SFam Zheng }
25691f051dcbSKevin Wolf
25701f051dcbSKevin Wolf bdrv_graph_co_rdlock();
25713015372dSFam Zheng ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid);
25721f051dcbSKevin Wolf bdrv_graph_co_rdunlock();
2573b2ab5f54SKevin Wolf blk_co_unref(backing);
25749877860eSPeter Maydell if (ret) {
25753015372dSFam Zheng error_setg(errp, "Failed to read parent CID");
25769877860eSPeter Maydell goto exit;
25779877860eSPeter Maydell }
2578fe206562SJeff Cody snprintf(parent_desc_line, BUF_SIZE,
25798ed610a1SFam Zheng "parentFileNameHint=\"%s\"", backing_file);
2580f66fd6c3SFam Zheng }
25813015372dSFam Zheng extent_idx = 1;
25823015372dSFam Zheng while (created_size < size) {
25833015372dSFam Zheng int64_t cur_size = MIN(size - created_size, extent_size);
25843015372dSFam Zheng extent_blk = extent_fn(cur_size, extent_idx, flat, split, compress,
25853015372dSFam Zheng zeroed_grain, opaque, errp);
25863015372dSFam Zheng if (!extent_blk) {
2587af057fe7SFam Zheng ret = -EINVAL;
2588af057fe7SFam Zheng goto exit;
2589f66fd6c3SFam Zheng }
25903015372dSFam Zheng vmdk_desc_add_extent(ext_desc_lines, extent_line_fmt, cur_size,
25913015372dSFam Zheng blk_bs(extent_blk)->filename);
25923015372dSFam Zheng created_size += cur_size;
25933015372dSFam Zheng extent_idx++;
2594b2ab5f54SKevin Wolf blk_co_unref(extent_blk);
2595f66fd6c3SFam Zheng }
25964a960eceSKevin Wolf
25974a960eceSKevin Wolf /* Check whether we got excess extents */
25984a960eceSKevin Wolf extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain,
25994a960eceSKevin Wolf opaque, NULL);
26004a960eceSKevin Wolf if (extent_blk) {
2601b2ab5f54SKevin Wolf blk_co_unref(extent_blk);
26024a960eceSKevin Wolf error_setg(errp, "List of extents contains unused extents");
26034a960eceSKevin Wolf ret = -EINVAL;
26044a960eceSKevin Wolf goto exit;
26054a960eceSKevin Wolf }
26064a960eceSKevin Wolf
2607f66fd6c3SFam Zheng /* generate descriptor file */
2608af057fe7SFam Zheng desc = g_strdup_printf(desc_template,
2609e5dc64b8SFam Zheng g_random_int(),
2610f66fd6c3SFam Zheng parent_cid,
26113015372dSFam Zheng BlockdevVmdkSubformat_str(subformat),
2612f66fd6c3SFam Zheng parent_desc_line,
2613af057fe7SFam Zheng ext_desc_lines->str,
2614f249924eSJanne Karhunen hw_version,
26153015372dSFam Zheng size /
2616917703c1SFam Zheng (int64_t)(63 * number_heads * BDRV_SECTOR_SIZE),
2617af057fe7SFam Zheng number_heads,
2618f3d43dfdSThomas Weißschuh BlockdevVmdkAdapterType_str(adapter_type),
2619f3d43dfdSThomas Weißschuh toolsversion);
2620917703c1SFam Zheng desc_len = strlen(desc);
2621917703c1SFam Zheng /* the descriptor offset = 0x200 */
2622917703c1SFam Zheng if (!split && !flat) {
2623917703c1SFam Zheng desc_offset = 0x200;
2624f66fd6c3SFam Zheng }
2625c4bea169SKevin Wolf
2626a5c4e5beSAlberto Faria ret = blk_co_pwrite(blk, desc_offset, desc_len, desc, 0);
2627917703c1SFam Zheng if (ret < 0) {
2628917703c1SFam Zheng error_setg_errno(errp, -ret, "Could not write description");
2629917703c1SFam Zheng goto exit;
2630917703c1SFam Zheng }
2631917703c1SFam Zheng /* bdrv_pwrite write padding zeros to align to sector, we don't need that
2632917703c1SFam Zheng * for description file */
2633917703c1SFam Zheng if (desc_offset == 0) {
2634a5c4e5beSAlberto Faria ret = blk_co_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp);
26353015372dSFam Zheng if (ret < 0) {
26363015372dSFam Zheng goto exit;
2637917703c1SFam Zheng }
26383015372dSFam Zheng }
26393015372dSFam Zheng ret = 0;
2640af057fe7SFam Zheng exit:
26413015372dSFam Zheng if (blk) {
2642b2ab5f54SKevin Wolf blk_co_unref(blk);
2643917703c1SFam Zheng }
26443015372dSFam Zheng g_free(desc);
26453015372dSFam Zheng g_free(parent_desc_line);
26463015372dSFam Zheng g_string_free(ext_desc_lines, true);
26473015372dSFam Zheng return ret;
26483015372dSFam Zheng }
26493015372dSFam Zheng
26503015372dSFam Zheng typedef struct {
26513015372dSFam Zheng char *path;
26523015372dSFam Zheng char *prefix;
26533015372dSFam Zheng char *postfix;
26543015372dSFam Zheng QemuOpts *opts;
26553015372dSFam Zheng } VMDKCreateOptsData;
26563015372dSFam Zheng
26574db7ba3bSKevin Wolf static BlockBackend * coroutine_fn GRAPH_UNLOCKED
vmdk_co_create_opts_cb(int64_t size,int idx,bool flat,bool split,bool compress,bool zeroed_grain,void * opaque,Error ** errp)26584ec8df01SKevin Wolf vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split,
26594ec8df01SKevin Wolf bool compress, bool zeroed_grain, void *opaque,
26603015372dSFam Zheng Error **errp)
26613015372dSFam Zheng {
26623015372dSFam Zheng BlockBackend *blk = NULL;
26633015372dSFam Zheng BlockDriverState *bs = NULL;
26643015372dSFam Zheng VMDKCreateOptsData *data = opaque;
26653015372dSFam Zheng char *ext_filename = NULL;
26663015372dSFam Zheng char *rel_filename = NULL;
26673015372dSFam Zheng
26684a960eceSKevin Wolf /* We're done, don't create excess extents. */
26694a960eceSKevin Wolf if (size == -1) {
26704a960eceSKevin Wolf assert(errp == NULL);
26714a960eceSKevin Wolf return NULL;
26724a960eceSKevin Wolf }
26734a960eceSKevin Wolf
26743015372dSFam Zheng if (idx == 0) {
26753015372dSFam Zheng rel_filename = g_strdup_printf("%s%s", data->prefix, data->postfix);
26763015372dSFam Zheng } else if (split) {
26773015372dSFam Zheng rel_filename = g_strdup_printf("%s-%c%03d%s",
26783015372dSFam Zheng data->prefix,
26793015372dSFam Zheng flat ? 'f' : 's', idx, data->postfix);
26803015372dSFam Zheng } else {
26813015372dSFam Zheng assert(idx == 1);
26823015372dSFam Zheng rel_filename = g_strdup_printf("%s-flat%s", data->prefix, data->postfix);
26833015372dSFam Zheng }
26843015372dSFam Zheng
26853015372dSFam Zheng ext_filename = g_strdup_printf("%s%s", data->path, rel_filename);
26863015372dSFam Zheng g_free(rel_filename);
26873015372dSFam Zheng
26883015372dSFam Zheng if (vmdk_create_extent(ext_filename, size,
26893015372dSFam Zheng flat, compress, zeroed_grain, &blk, data->opts,
26903015372dSFam Zheng errp)) {
26913015372dSFam Zheng goto exit;
26923015372dSFam Zheng }
2693b2ab5f54SKevin Wolf bdrv_co_unref(bs);
26943015372dSFam Zheng exit:
26953015372dSFam Zheng g_free(ext_filename);
26963015372dSFam Zheng return blk;
26973015372dSFam Zheng }
26983015372dSFam Zheng
26994db7ba3bSKevin Wolf static int coroutine_fn GRAPH_UNLOCKED
vmdk_co_create_opts(BlockDriver * drv,const char * filename,QemuOpts * opts,Error ** errp)27004ec8df01SKevin Wolf vmdk_co_create_opts(BlockDriver *drv, const char *filename,
27014ec8df01SKevin Wolf QemuOpts *opts, Error **errp)
27023015372dSFam Zheng {
27033015372dSFam Zheng Error *local_err = NULL;
27043015372dSFam Zheng char *desc = NULL;
27053015372dSFam Zheng int64_t total_size = 0;
27063015372dSFam Zheng char *adapter_type = NULL;
27073015372dSFam Zheng BlockdevVmdkAdapterType adapter_type_enum;
27083015372dSFam Zheng char *backing_file = NULL;
27093015372dSFam Zheng char *hw_version = NULL;
2710f3d43dfdSThomas Weißschuh char *toolsversion = NULL;
27113015372dSFam Zheng char *fmt = NULL;
27123015372dSFam Zheng BlockdevVmdkSubformat subformat;
27133015372dSFam Zheng int ret = 0;
27143015372dSFam Zheng char *path = g_malloc0(PATH_MAX);
27153015372dSFam Zheng char *prefix = g_malloc0(PATH_MAX);
27163015372dSFam Zheng char *postfix = g_malloc0(PATH_MAX);
27173015372dSFam Zheng char *desc_line = g_malloc0(BUF_SIZE);
27183015372dSFam Zheng char *ext_filename = g_malloc0(PATH_MAX);
27193015372dSFam Zheng char *desc_filename = g_malloc0(PATH_MAX);
27203015372dSFam Zheng char *parent_desc_line = g_malloc0(BUF_SIZE);
27213015372dSFam Zheng bool zeroed_grain;
27223015372dSFam Zheng bool compat6;
27233015372dSFam Zheng VMDKCreateOptsData data;
2724d51a814cSEric Blake char *backing_fmt = NULL;
2725d51a814cSEric Blake
2726d51a814cSEric Blake backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
2727d51a814cSEric Blake if (backing_fmt && strcmp(backing_fmt, "vmdk") != 0) {
2728d51a814cSEric Blake error_setg(errp, "backing_file must be a vmdk image");
2729d51a814cSEric Blake ret = -EINVAL;
2730d51a814cSEric Blake goto exit;
2731d51a814cSEric Blake }
27323015372dSFam Zheng
27333015372dSFam Zheng if (filename_decompose(filename, path, prefix, postfix, PATH_MAX, errp)) {
27343015372dSFam Zheng ret = -EINVAL;
27353015372dSFam Zheng goto exit;
27363015372dSFam Zheng }
27373015372dSFam Zheng /* Read out options */
27383015372dSFam Zheng total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
27393015372dSFam Zheng BDRV_SECTOR_SIZE);
27403015372dSFam Zheng adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE);
27413015372dSFam Zheng backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
27423015372dSFam Zheng hw_version = qemu_opt_get_del(opts, BLOCK_OPT_HWVERSION);
2743f3d43dfdSThomas Weißschuh toolsversion = qemu_opt_get_del(opts, BLOCK_OPT_TOOLSVERSION);
27443015372dSFam Zheng compat6 = qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false);
27453015372dSFam Zheng if (strcmp(hw_version, "undefined") == 0) {
27463015372dSFam Zheng g_free(hw_version);
274726c9296cSyuchenlin hw_version = NULL;
27483015372dSFam Zheng }
27493015372dSFam Zheng fmt = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
27503015372dSFam Zheng zeroed_grain = qemu_opt_get_bool_del(opts, BLOCK_OPT_ZEROED_GRAIN, false);
27513015372dSFam Zheng
27523015372dSFam Zheng if (adapter_type) {
27533015372dSFam Zheng adapter_type_enum = qapi_enum_parse(&BlockdevVmdkAdapterType_lookup,
27543015372dSFam Zheng adapter_type,
27553015372dSFam Zheng BLOCKDEV_VMDK_ADAPTER_TYPE_IDE,
27563015372dSFam Zheng &local_err);
27573015372dSFam Zheng if (local_err) {
27583015372dSFam Zheng error_propagate(errp, local_err);
27593015372dSFam Zheng ret = -EINVAL;
27603015372dSFam Zheng goto exit;
27613015372dSFam Zheng }
27623015372dSFam Zheng } else {
27633015372dSFam Zheng adapter_type_enum = BLOCKDEV_VMDK_ADAPTER_TYPE_IDE;
27643015372dSFam Zheng }
27653015372dSFam Zheng
27663015372dSFam Zheng if (!fmt) {
27673015372dSFam Zheng /* Default format to monolithicSparse */
27683015372dSFam Zheng subformat = BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICSPARSE;
27693015372dSFam Zheng } else {
27703015372dSFam Zheng subformat = qapi_enum_parse(&BlockdevVmdkSubformat_lookup,
27713015372dSFam Zheng fmt,
27723015372dSFam Zheng BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICSPARSE,
27733015372dSFam Zheng &local_err);
27743015372dSFam Zheng if (local_err) {
27753015372dSFam Zheng error_propagate(errp, local_err);
27763015372dSFam Zheng ret = -EINVAL;
27773015372dSFam Zheng goto exit;
27783015372dSFam Zheng }
27793015372dSFam Zheng }
27803015372dSFam Zheng data = (VMDKCreateOptsData){
27813015372dSFam Zheng .prefix = prefix,
27823015372dSFam Zheng .postfix = postfix,
27833015372dSFam Zheng .path = path,
27843015372dSFam Zheng .opts = opts,
27853015372dSFam Zheng };
27863015372dSFam Zheng ret = vmdk_co_do_create(total_size, subformat, adapter_type_enum,
2787f3d43dfdSThomas Weißschuh backing_file, hw_version, toolsversion, compat6,
2788f3d43dfdSThomas Weißschuh zeroed_grain, vmdk_co_create_opts_cb, &data, errp);
27893015372dSFam Zheng
27903015372dSFam Zheng exit:
2791d51a814cSEric Blake g_free(backing_fmt);
27925820f1daSChunyan Liu g_free(adapter_type);
27935820f1daSChunyan Liu g_free(backing_file);
2794f249924eSJanne Karhunen g_free(hw_version);
2795f3d43dfdSThomas Weißschuh g_free(toolsversion);
27965820f1daSChunyan Liu g_free(fmt);
2797af057fe7SFam Zheng g_free(desc);
2798fe206562SJeff Cody g_free(path);
2799fe206562SJeff Cody g_free(prefix);
2800fe206562SJeff Cody g_free(postfix);
2801fe206562SJeff Cody g_free(desc_line);
2802fe206562SJeff Cody g_free(ext_filename);
2803fe206562SJeff Cody g_free(desc_filename);
2804fe206562SJeff Cody g_free(parent_desc_line);
28053015372dSFam Zheng return ret;
28063015372dSFam Zheng }
28073015372dSFam Zheng
28084db7ba3bSKevin Wolf static BlockBackend * coroutine_fn GRAPH_UNLOCKED
vmdk_co_create_cb(int64_t size,int idx,bool flat,bool split,bool compress,bool zeroed_grain,void * opaque,Error ** errp)28094db7ba3bSKevin Wolf vmdk_co_create_cb(int64_t size, int idx, bool flat, bool split, bool compress,
28104db7ba3bSKevin Wolf bool zeroed_grain, void *opaque, Error **errp)
28113015372dSFam Zheng {
28123015372dSFam Zheng int ret;
28133015372dSFam Zheng BlockDriverState *bs;
28143015372dSFam Zheng BlockBackend *blk;
28153015372dSFam Zheng BlockdevCreateOptionsVmdk *opts = opaque;
28163015372dSFam Zheng
28173015372dSFam Zheng if (idx == 0) {
2818882f202eSKevin Wolf bs = bdrv_co_open_blockdev_ref(opts->file, errp);
28193015372dSFam Zheng } else {
28203015372dSFam Zheng int i;
28213015372dSFam Zheng BlockdevRefList *list = opts->extents;
28223015372dSFam Zheng for (i = 1; i < idx; i++) {
28233015372dSFam Zheng if (!list || !list->next) {
28243015372dSFam Zheng error_setg(errp, "Extent [%d] not specified", i);
28253015372dSFam Zheng return NULL;
28263015372dSFam Zheng }
28273015372dSFam Zheng list = list->next;
28283015372dSFam Zheng }
28293015372dSFam Zheng if (!list) {
28303015372dSFam Zheng error_setg(errp, "Extent [%d] not specified", idx - 1);
28313015372dSFam Zheng return NULL;
28323015372dSFam Zheng }
2833882f202eSKevin Wolf bs = bdrv_co_open_blockdev_ref(list->value, errp);
28343015372dSFam Zheng }
28353015372dSFam Zheng if (!bs) {
28363015372dSFam Zheng return NULL;
28373015372dSFam Zheng }
2838882f202eSKevin Wolf blk = blk_co_new_with_bs(bs,
2839882f202eSKevin Wolf BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
2840882f202eSKevin Wolf BLK_PERM_RESIZE,
2841882f202eSKevin Wolf BLK_PERM_ALL,
2842882f202eSKevin Wolf errp);
2843a3aeeab5SEric Blake if (!blk) {
28443015372dSFam Zheng return NULL;
28453015372dSFam Zheng }
28463015372dSFam Zheng blk_set_allow_write_beyond_eof(blk, true);
2847b2ab5f54SKevin Wolf bdrv_co_unref(bs);
28483015372dSFam Zheng
28494a960eceSKevin Wolf if (size != -1) {
28503015372dSFam Zheng ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp);
28513015372dSFam Zheng if (ret) {
2852b2ab5f54SKevin Wolf blk_co_unref(blk);
28533015372dSFam Zheng blk = NULL;
28543015372dSFam Zheng }
28554a960eceSKevin Wolf }
28563015372dSFam Zheng return blk;
28573015372dSFam Zheng }
28583015372dSFam Zheng
28594db7ba3bSKevin Wolf static int coroutine_fn GRAPH_UNLOCKED
vmdk_co_create(BlockdevCreateOptions * create_options,Error ** errp)28604ec8df01SKevin Wolf vmdk_co_create(BlockdevCreateOptions *create_options, Error **errp)
28613015372dSFam Zheng {
28623015372dSFam Zheng BlockdevCreateOptionsVmdk *opts;
28633015372dSFam Zheng
28643015372dSFam Zheng opts = &create_options->u.vmdk;
28653015372dSFam Zheng
28663015372dSFam Zheng /* Validate options */
28673015372dSFam Zheng if (!QEMU_IS_ALIGNED(opts->size, BDRV_SECTOR_SIZE)) {
28683015372dSFam Zheng error_setg(errp, "Image size must be a multiple of 512 bytes");
2869851fd4a0SMarkus Armbruster return -EINVAL;
28703015372dSFam Zheng }
28713015372dSFam Zheng
2872851fd4a0SMarkus Armbruster return vmdk_co_do_create(opts->size,
28733015372dSFam Zheng opts->subformat,
28743015372dSFam Zheng opts->adapter_type,
28753015372dSFam Zheng opts->backing_file,
28763015372dSFam Zheng opts->hwversion,
2877f3d43dfdSThomas Weißschuh opts->toolsversion,
28783015372dSFam Zheng false,
28793015372dSFam Zheng opts->zeroed_grain,
28803015372dSFam Zheng vmdk_co_create_cb,
28813015372dSFam Zheng opts, errp);
2882019d6b8fSAnthony Liguori }
2883019d6b8fSAnthony Liguori
vmdk_close(BlockDriverState * bs)2884019d6b8fSAnthony Liguori static void vmdk_close(BlockDriverState *bs)
2885019d6b8fSAnthony Liguori {
28862bc3166cSKevin Wolf BDRVVmdkState *s = bs->opaque;
28872bc3166cSKevin Wolf
2888b3976d3cSFam Zheng vmdk_free_extents(bs);
2889f4c129a3SFam Zheng g_free(s->create_type);
28902bc3166cSKevin Wolf
2891c8a7fc51SSteve Sistare migrate_del_blocker(&s->migration_blocker);
2892019d6b8fSAnthony Liguori }
2893019d6b8fSAnthony Liguori
2894de335638SEmanuele Giuseppe Esposito static int64_t coroutine_fn GRAPH_RDLOCK
vmdk_co_get_allocated_file_size(BlockDriverState * bs)289582618d7bSEmanuele Giuseppe Esposito vmdk_co_get_allocated_file_size(BlockDriverState *bs)
28964a1d5e1fSFam Zheng {
28974a1d5e1fSFam Zheng int i;
28984a1d5e1fSFam Zheng int64_t ret = 0;
28994a1d5e1fSFam Zheng int64_t r;
29004a1d5e1fSFam Zheng BDRVVmdkState *s = bs->opaque;
29014a1d5e1fSFam Zheng
290282618d7bSEmanuele Giuseppe Esposito ret = bdrv_co_get_allocated_file_size(bs->file->bs);
29034a1d5e1fSFam Zheng if (ret < 0) {
29044a1d5e1fSFam Zheng return ret;
29054a1d5e1fSFam Zheng }
29064a1d5e1fSFam Zheng for (i = 0; i < s->num_extents; i++) {
29079a4f4c31SKevin Wolf if (s->extents[i].file == bs->file) {
29084a1d5e1fSFam Zheng continue;
29094a1d5e1fSFam Zheng }
291082618d7bSEmanuele Giuseppe Esposito r = bdrv_co_get_allocated_file_size(s->extents[i].file->bs);
29114a1d5e1fSFam Zheng if (r < 0) {
29124a1d5e1fSFam Zheng return r;
29134a1d5e1fSFam Zheng }
29144a1d5e1fSFam Zheng ret += r;
29154a1d5e1fSFam Zheng }
29164a1d5e1fSFam Zheng return ret;
29174a1d5e1fSFam Zheng }
29180e7e1989SKevin Wolf
vmdk_has_zero_init(BlockDriverState * bs)291906717986SKevin Wolf static int GRAPH_RDLOCK vmdk_has_zero_init(BlockDriverState *bs)
2920da7a50f9SFam Zheng {
2921da7a50f9SFam Zheng int i;
2922da7a50f9SFam Zheng BDRVVmdkState *s = bs->opaque;
2923da7a50f9SFam Zheng
2924da7a50f9SFam Zheng /* If has a flat extent and its underlying storage doesn't have zero init,
2925da7a50f9SFam Zheng * return 0. */
2926da7a50f9SFam Zheng for (i = 0; i < s->num_extents; i++) {
2927da7a50f9SFam Zheng if (s->extents[i].flat) {
292824bc15d1SKevin Wolf if (!bdrv_has_zero_init(s->extents[i].file->bs)) {
2929da7a50f9SFam Zheng return 0;
2930da7a50f9SFam Zheng }
2931da7a50f9SFam Zheng }
2932da7a50f9SFam Zheng }
2933da7a50f9SFam Zheng return 1;
2934da7a50f9SFam Zheng }
2935da7a50f9SFam Zheng
vmdk_get_extent_info(VmdkExtent * extent)2936b7cfc7d5SKevin Wolf static VmdkExtentInfo * GRAPH_RDLOCK vmdk_get_extent_info(VmdkExtent *extent)
2937f4c129a3SFam Zheng {
2938456e7517SHanna Reitz VmdkExtentInfo *info = g_new0(VmdkExtentInfo, 1);
2939f4c129a3SFam Zheng
2940f30c66baSMax Reitz bdrv_refresh_filename(extent->file->bs);
2941456e7517SHanna Reitz *info = (VmdkExtentInfo){
294224bc15d1SKevin Wolf .filename = g_strdup(extent->file->bs->filename),
2943f4c129a3SFam Zheng .format = g_strdup(extent->type),
2944f4c129a3SFam Zheng .virtual_size = extent->sectors * BDRV_SECTOR_SIZE,
2945f4c129a3SFam Zheng .compressed = extent->compressed,
2946f4c129a3SFam Zheng .has_compressed = extent->compressed,
2947f4c129a3SFam Zheng .cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE,
2948f4c129a3SFam Zheng .has_cluster_size = !extent->flat,
2949f4c129a3SFam Zheng };
2950f4c129a3SFam Zheng
2951f4c129a3SFam Zheng return info;
2952f4c129a3SFam Zheng }
2953f4c129a3SFam Zheng
2954b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
vmdk_co_check(BlockDriverState * bs,BdrvCheckResult * result,BdrvCheckMode fix)2955b9b10c35SKevin Wolf vmdk_co_check(BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix)
2956f43aa8e1SPeter Lieven {
2957f43aa8e1SPeter Lieven BDRVVmdkState *s = bs->opaque;
2958f43aa8e1SPeter Lieven VmdkExtent *extent = NULL;
2959f43aa8e1SPeter Lieven int64_t sector_num = 0;
296017362398SPaolo Bonzini int64_t total_sectors = bdrv_co_nb_sectors(bs);
2961f43aa8e1SPeter Lieven int ret;
2962f43aa8e1SPeter Lieven uint64_t cluster_offset;
2963f43aa8e1SPeter Lieven
2964f43aa8e1SPeter Lieven if (fix) {
2965f43aa8e1SPeter Lieven return -ENOTSUP;
2966f43aa8e1SPeter Lieven }
2967f43aa8e1SPeter Lieven
2968f43aa8e1SPeter Lieven for (;;) {
2969f43aa8e1SPeter Lieven if (sector_num >= total_sectors) {
2970f43aa8e1SPeter Lieven return 0;
2971f43aa8e1SPeter Lieven }
2972f43aa8e1SPeter Lieven extent = find_extent(s, sector_num, extent);
2973f43aa8e1SPeter Lieven if (!extent) {
2974f43aa8e1SPeter Lieven fprintf(stderr,
2975f43aa8e1SPeter Lieven "ERROR: could not find extent for sector %" PRId64 "\n",
2976f43aa8e1SPeter Lieven sector_num);
29770e51b9b7SFam Zheng ret = -EINVAL;
2978f43aa8e1SPeter Lieven break;
2979f43aa8e1SPeter Lieven }
2980f43aa8e1SPeter Lieven ret = get_cluster_offset(bs, extent, NULL,
2981f43aa8e1SPeter Lieven sector_num << BDRV_SECTOR_BITS,
2982c6ac36e1SFam Zheng false, &cluster_offset, 0, 0);
2983f43aa8e1SPeter Lieven if (ret == VMDK_ERROR) {
2984f43aa8e1SPeter Lieven fprintf(stderr,
2985f43aa8e1SPeter Lieven "ERROR: could not get cluster_offset for sector %"
2986f43aa8e1SPeter Lieven PRId64 "\n", sector_num);
2987f43aa8e1SPeter Lieven break;
2988f43aa8e1SPeter Lieven }
29890e51b9b7SFam Zheng if (ret == VMDK_OK) {
29900af02bd1SPaolo Bonzini int64_t extent_len = bdrv_co_getlength(extent->file->bs);
29910e51b9b7SFam Zheng if (extent_len < 0) {
29920e51b9b7SFam Zheng fprintf(stderr,
29930e51b9b7SFam Zheng "ERROR: could not get extent file length for sector %"
29940e51b9b7SFam Zheng PRId64 "\n", sector_num);
29950e51b9b7SFam Zheng ret = extent_len;
29960e51b9b7SFam Zheng break;
29970e51b9b7SFam Zheng }
29980e51b9b7SFam Zheng if (cluster_offset >= extent_len) {
2999f43aa8e1SPeter Lieven fprintf(stderr,
3000f43aa8e1SPeter Lieven "ERROR: cluster offset for sector %"
3001f43aa8e1SPeter Lieven PRId64 " points after EOF\n", sector_num);
30020e51b9b7SFam Zheng ret = -EINVAL;
3003f43aa8e1SPeter Lieven break;
3004f43aa8e1SPeter Lieven }
30050e51b9b7SFam Zheng }
3006f43aa8e1SPeter Lieven sector_num += extent->cluster_sectors;
3007f43aa8e1SPeter Lieven }
3008f43aa8e1SPeter Lieven
3009f43aa8e1SPeter Lieven result->corruptions++;
30100e51b9b7SFam Zheng return ret;
3011f43aa8e1SPeter Lieven }
3012f43aa8e1SPeter Lieven
30133574499aSKevin Wolf static ImageInfoSpecific * GRAPH_RDLOCK
vmdk_get_specific_info(BlockDriverState * bs,Error ** errp)30143574499aSKevin Wolf vmdk_get_specific_info(BlockDriverState *bs, Error **errp)
3015f4c129a3SFam Zheng {
3016f4c129a3SFam Zheng int i;
3017f4c129a3SFam Zheng BDRVVmdkState *s = bs->opaque;
3018f4c129a3SFam Zheng ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1);
3019456e7517SHanna Reitz VmdkExtentInfoList **tail;
3020f4c129a3SFam Zheng
3021f4c129a3SFam Zheng *spec_info = (ImageInfoSpecific){
30226a8f9661SEric Blake .type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
302332bafa8fSEric Blake .u = {
302432bafa8fSEric Blake .vmdk.data = g_new0(ImageInfoSpecificVmdk, 1),
3025f4c129a3SFam Zheng },
3026f4c129a3SFam Zheng };
3027f4c129a3SFam Zheng
302832bafa8fSEric Blake *spec_info->u.vmdk.data = (ImageInfoSpecificVmdk) {
3029f4c129a3SFam Zheng .create_type = g_strdup(s->create_type),
3030f4c129a3SFam Zheng .cid = s->cid,
3031f4c129a3SFam Zheng .parent_cid = s->parent_cid,
3032f4c129a3SFam Zheng };
3033f4c129a3SFam Zheng
3034c3033fd3SEric Blake tail = &spec_info->u.vmdk.data->extents;
3035f4c129a3SFam Zheng for (i = 0; i < s->num_extents; i++) {
3036c3033fd3SEric Blake QAPI_LIST_APPEND(tail, vmdk_get_extent_info(&s->extents[i]));
3037f4c129a3SFam Zheng }
3038f4c129a3SFam Zheng
3039f4c129a3SFam Zheng return spec_info;
3040f4c129a3SFam Zheng }
3041f4c129a3SFam Zheng
vmdk_extents_type_eq(const VmdkExtent * a,const VmdkExtent * b)30425f583307SFam Zheng static bool vmdk_extents_type_eq(const VmdkExtent *a, const VmdkExtent *b)
30435f583307SFam Zheng {
30445f583307SFam Zheng return a->flat == b->flat &&
30455f583307SFam Zheng a->compressed == b->compressed &&
30465f583307SFam Zheng (a->flat || a->cluster_sectors == b->cluster_sectors);
30475f583307SFam Zheng }
30485f583307SFam Zheng
30493d47eb0aSEmanuele Giuseppe Esposito static int coroutine_fn
vmdk_co_get_info(BlockDriverState * bs,BlockDriverInfo * bdi)30503d47eb0aSEmanuele Giuseppe Esposito vmdk_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
305174fe188cSFam Zheng {
305274fe188cSFam Zheng int i;
305374fe188cSFam Zheng BDRVVmdkState *s = bs->opaque;
305474fe188cSFam Zheng assert(s->num_extents);
30555f583307SFam Zheng
30565f583307SFam Zheng /* See if we have multiple extents but they have different cases */
30575f583307SFam Zheng for (i = 1; i < s->num_extents; i++) {
30585f583307SFam Zheng if (!vmdk_extents_type_eq(&s->extents[0], &s->extents[i])) {
30595f583307SFam Zheng return -ENOTSUP;
30605f583307SFam Zheng }
30615f583307SFam Zheng }
306274fe188cSFam Zheng bdi->needs_compressed_writes = s->extents[0].compressed;
306374fe188cSFam Zheng if (!s->extents[0].flat) {
306474fe188cSFam Zheng bdi->cluster_size = s->extents[0].cluster_sectors << BDRV_SECTOR_BITS;
306574fe188cSFam Zheng }
306674fe188cSFam Zheng return 0;
306774fe188cSFam Zheng }
306874fe188cSFam Zheng
3069004915a9SKevin Wolf static void GRAPH_RDLOCK
vmdk_gather_child_options(BlockDriverState * bs,QDict * target,bool backing_overridden)3070004915a9SKevin Wolf vmdk_gather_child_options(BlockDriverState *bs, QDict *target,
3071abc521a9SMax Reitz bool backing_overridden)
3072abc521a9SMax Reitz {
3073abc521a9SMax Reitz /* No children but file and backing can be explicitly specified (TODO) */
3074abc521a9SMax Reitz qdict_put(target, "file",
3075abc521a9SMax Reitz qobject_ref(bs->file->bs->full_open_options));
3076abc521a9SMax Reitz
3077abc521a9SMax Reitz if (backing_overridden) {
3078abc521a9SMax Reitz if (bs->backing) {
3079abc521a9SMax Reitz qdict_put(target, "backing",
3080abc521a9SMax Reitz qobject_ref(bs->backing->bs->full_open_options));
3081abc521a9SMax Reitz } else {
3082abc521a9SMax Reitz qdict_put_null(target, "backing");
3083abc521a9SMax Reitz }
3084abc521a9SMax Reitz }
3085abc521a9SMax Reitz }
3086abc521a9SMax Reitz
30875820f1daSChunyan Liu static QemuOptsList vmdk_create_opts = {
30885820f1daSChunyan Liu .name = "vmdk-create-opts",
30895820f1daSChunyan Liu .head = QTAILQ_HEAD_INITIALIZER(vmdk_create_opts.head),
30905820f1daSChunyan Liu .desc = {
3091db08adf5SKevin Wolf {
3092db08adf5SKevin Wolf .name = BLOCK_OPT_SIZE,
30935820f1daSChunyan Liu .type = QEMU_OPT_SIZE,
3094db08adf5SKevin Wolf .help = "Virtual disk size"
3095db08adf5SKevin Wolf },
3096db08adf5SKevin Wolf {
30977f2039f6SOthmar Pasteka .name = BLOCK_OPT_ADAPTER_TYPE,
30985820f1daSChunyan Liu .type = QEMU_OPT_STRING,
30997f2039f6SOthmar Pasteka .help = "Virtual adapter type, can be one of "
31007f2039f6SOthmar Pasteka "ide (default), lsilogic, buslogic or legacyESX"
31017f2039f6SOthmar Pasteka },
31027f2039f6SOthmar Pasteka {
3103db08adf5SKevin Wolf .name = BLOCK_OPT_BACKING_FILE,
31045820f1daSChunyan Liu .type = QEMU_OPT_STRING,
3105db08adf5SKevin Wolf .help = "File name of a base image"
3106db08adf5SKevin Wolf },
3107db08adf5SKevin Wolf {
3108d51a814cSEric Blake .name = BLOCK_OPT_BACKING_FMT,
3109d51a814cSEric Blake .type = QEMU_OPT_STRING,
3110d51a814cSEric Blake .help = "Must be 'vmdk' if present",
3111d51a814cSEric Blake },
3112d51a814cSEric Blake {
3113db08adf5SKevin Wolf .name = BLOCK_OPT_COMPAT6,
31145820f1daSChunyan Liu .type = QEMU_OPT_BOOL,
31155820f1daSChunyan Liu .help = "VMDK version 6 image",
31165820f1daSChunyan Liu .def_value_str = "off"
3117db08adf5SKevin Wolf },
3118f66fd6c3SFam Zheng {
3119f249924eSJanne Karhunen .name = BLOCK_OPT_HWVERSION,
3120f249924eSJanne Karhunen .type = QEMU_OPT_STRING,
3121f249924eSJanne Karhunen .help = "VMDK hardware version",
3122f249924eSJanne Karhunen .def_value_str = "undefined"
3123f249924eSJanne Karhunen },
3124f249924eSJanne Karhunen {
3125f3d43dfdSThomas Weißschuh .name = BLOCK_OPT_TOOLSVERSION,
3126f3d43dfdSThomas Weißschuh .type = QEMU_OPT_STRING,
3127f3d43dfdSThomas Weißschuh .help = "VMware guest tools version",
3128f3d43dfdSThomas Weißschuh },
3129f3d43dfdSThomas Weißschuh {
3130f66fd6c3SFam Zheng .name = BLOCK_OPT_SUBFMT,
31315820f1daSChunyan Liu .type = QEMU_OPT_STRING,
3132f66fd6c3SFam Zheng .help =
3133f66fd6c3SFam Zheng "VMDK flat extent format, can be one of "
31346c031aacSFam Zheng "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
3135f66fd6c3SFam Zheng },
313669e0b6dfSFam Zheng {
313769e0b6dfSFam Zheng .name = BLOCK_OPT_ZEROED_GRAIN,
31385820f1daSChunyan Liu .type = QEMU_OPT_BOOL,
31395820f1daSChunyan Liu .help = "Enable efficient zero writes "
31405820f1daSChunyan Liu "using the zeroed-grain GTE feature"
314169e0b6dfSFam Zheng },
31425820f1daSChunyan Liu { /* end of list */ }
31435820f1daSChunyan Liu }
31440e7e1989SKevin Wolf };
31450e7e1989SKevin Wolf
3146019d6b8fSAnthony Liguori static BlockDriver bdrv_vmdk = {
3147019d6b8fSAnthony Liguori .format_name = "vmdk",
3148019d6b8fSAnthony Liguori .instance_size = sizeof(BDRVVmdkState),
3149019d6b8fSAnthony Liguori .bdrv_probe = vmdk_probe,
31506511ef77SKevin Wolf .bdrv_open = vmdk_open,
31512fd61638SPaolo Bonzini .bdrv_co_check = vmdk_co_check,
31523897575fSJeff Cody .bdrv_reopen_prepare = vmdk_reopen_prepare,
31536d17e287SHanna Reitz .bdrv_reopen_commit = vmdk_reopen_commit,
31546d17e287SHanna Reitz .bdrv_reopen_abort = vmdk_reopen_abort,
315569dca43dSMax Reitz .bdrv_child_perm = bdrv_default_perms,
3156f10cc243SKevin Wolf .bdrv_co_preadv = vmdk_co_preadv,
315737b1d7d8SKevin Wolf .bdrv_co_pwritev = vmdk_co_pwritev,
3158b2c622d3SPavel Butsykin .bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed,
3159a620f2aeSEric Blake .bdrv_co_pwrite_zeroes = vmdk_co_pwrite_zeroes,
3160019d6b8fSAnthony Liguori .bdrv_close = vmdk_close,
3161efc75e2aSStefan Hajnoczi .bdrv_co_create_opts = vmdk_co_create_opts,
31623015372dSFam Zheng .bdrv_co_create = vmdk_co_create,
3163c72080b9SEric Blake .bdrv_co_block_status = vmdk_co_block_status,
316482618d7bSEmanuele Giuseppe Esposito .bdrv_co_get_allocated_file_size = vmdk_co_get_allocated_file_size,
3165da7a50f9SFam Zheng .bdrv_has_zero_init = vmdk_has_zero_init,
3166f4c129a3SFam Zheng .bdrv_get_specific_info = vmdk_get_specific_info,
3167d34682cdSKevin Wolf .bdrv_refresh_limits = vmdk_refresh_limits,
31683d47eb0aSEmanuele Giuseppe Esposito .bdrv_co_get_info = vmdk_co_get_info,
3169abc521a9SMax Reitz .bdrv_gather_child_options = vmdk_gather_child_options,
31700e7e1989SKevin Wolf
3171d67066d8SMax Reitz .is_format = true,
31728ee79e70SKevin Wolf .supports_backing = true,
31735820f1daSChunyan Liu .create_opts = &vmdk_create_opts,
3174019d6b8fSAnthony Liguori };
3175019d6b8fSAnthony Liguori
bdrv_vmdk_init(void)3176019d6b8fSAnthony Liguori static void bdrv_vmdk_init(void)
3177019d6b8fSAnthony Liguori {
3178019d6b8fSAnthony Liguori bdrv_register(&bdrv_vmdk);
3179019d6b8fSAnthony Liguori }
3180019d6b8fSAnthony Liguori
3181019d6b8fSAnthony Liguori block_init(bdrv_vmdk_init);
3182