xref: /openbmc/qemu/block/vmdk.c (revision 29cdb251)
1019d6b8fSAnthony Liguori /*
2019d6b8fSAnthony Liguori  * Block driver for the VMDK format
3019d6b8fSAnthony Liguori  *
4019d6b8fSAnthony Liguori  * Copyright (c) 2004 Fabrice Bellard
5019d6b8fSAnthony Liguori  * Copyright (c) 2005 Filip Navara
6019d6b8fSAnthony Liguori  *
7019d6b8fSAnthony Liguori  * Permission is hereby granted, free of charge, to any person obtaining a copy
8019d6b8fSAnthony Liguori  * of this software and associated documentation files (the "Software"), to deal
9019d6b8fSAnthony Liguori  * in the Software without restriction, including without limitation the rights
10019d6b8fSAnthony Liguori  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11019d6b8fSAnthony Liguori  * copies of the Software, and to permit persons to whom the Software is
12019d6b8fSAnthony Liguori  * furnished to do so, subject to the following conditions:
13019d6b8fSAnthony Liguori  *
14019d6b8fSAnthony Liguori  * The above copyright notice and this permission notice shall be included in
15019d6b8fSAnthony Liguori  * all copies or substantial portions of the Software.
16019d6b8fSAnthony Liguori  *
17019d6b8fSAnthony Liguori  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18019d6b8fSAnthony Liguori  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19019d6b8fSAnthony Liguori  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20019d6b8fSAnthony Liguori  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21019d6b8fSAnthony Liguori  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22019d6b8fSAnthony Liguori  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23019d6b8fSAnthony Liguori  * THE SOFTWARE.
24019d6b8fSAnthony Liguori  */
25019d6b8fSAnthony Liguori 
26019d6b8fSAnthony Liguori #include "qemu-common.h"
27019d6b8fSAnthony Liguori #include "block_int.h"
28019d6b8fSAnthony Liguori #include "module.h"
292bc3166cSKevin Wolf #include "migration.h"
302923d34fSStefan Weil #include <zlib.h>
31019d6b8fSAnthony Liguori 
32019d6b8fSAnthony Liguori #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
33019d6b8fSAnthony Liguori #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
34432bb170SFam Zheng #define VMDK4_COMPRESSION_DEFLATE 1
35bb45ded9SFam Zheng #define VMDK4_FLAG_RGD (1 << 1)
36432bb170SFam Zheng #define VMDK4_FLAG_COMPRESS (1 << 16)
37432bb170SFam Zheng #define VMDK4_FLAG_MARKER (1 << 17)
38019d6b8fSAnthony Liguori 
39019d6b8fSAnthony Liguori typedef struct {
40019d6b8fSAnthony Liguori     uint32_t version;
41019d6b8fSAnthony Liguori     uint32_t flags;
42019d6b8fSAnthony Liguori     uint32_t disk_sectors;
43019d6b8fSAnthony Liguori     uint32_t granularity;
44019d6b8fSAnthony Liguori     uint32_t l1dir_offset;
45019d6b8fSAnthony Liguori     uint32_t l1dir_size;
46019d6b8fSAnthony Liguori     uint32_t file_sectors;
47019d6b8fSAnthony Liguori     uint32_t cylinders;
48019d6b8fSAnthony Liguori     uint32_t heads;
49019d6b8fSAnthony Liguori     uint32_t sectors_per_track;
50019d6b8fSAnthony Liguori } VMDK3Header;
51019d6b8fSAnthony Liguori 
52019d6b8fSAnthony Liguori typedef struct {
53019d6b8fSAnthony Liguori     uint32_t version;
54019d6b8fSAnthony Liguori     uint32_t flags;
55019d6b8fSAnthony Liguori     int64_t capacity;
56019d6b8fSAnthony Liguori     int64_t granularity;
57019d6b8fSAnthony Liguori     int64_t desc_offset;
58019d6b8fSAnthony Liguori     int64_t desc_size;
59019d6b8fSAnthony Liguori     int32_t num_gtes_per_gte;
60019d6b8fSAnthony Liguori     int64_t gd_offset;
61bb45ded9SFam Zheng     int64_t rgd_offset;
62019d6b8fSAnthony Liguori     int64_t grain_offset;
63019d6b8fSAnthony Liguori     char filler[1];
64019d6b8fSAnthony Liguori     char check_bytes[4];
65432bb170SFam Zheng     uint16_t compressAlgorithm;
66541dc0d4SStefan Weil } QEMU_PACKED VMDK4Header;
67019d6b8fSAnthony Liguori 
68019d6b8fSAnthony Liguori #define L2_CACHE_SIZE 16
69019d6b8fSAnthony Liguori 
70b3976d3cSFam Zheng typedef struct VmdkExtent {
71b3976d3cSFam Zheng     BlockDriverState *file;
72b3976d3cSFam Zheng     bool flat;
73432bb170SFam Zheng     bool compressed;
74432bb170SFam Zheng     bool has_marker;
75b3976d3cSFam Zheng     int64_t sectors;
76b3976d3cSFam Zheng     int64_t end_sector;
777fa60fa3SFam Zheng     int64_t flat_start_offset;
78019d6b8fSAnthony Liguori     int64_t l1_table_offset;
79019d6b8fSAnthony Liguori     int64_t l1_backup_table_offset;
80019d6b8fSAnthony Liguori     uint32_t *l1_table;
81019d6b8fSAnthony Liguori     uint32_t *l1_backup_table;
82019d6b8fSAnthony Liguori     unsigned int l1_size;
83019d6b8fSAnthony Liguori     uint32_t l1_entry_sectors;
84019d6b8fSAnthony Liguori 
85019d6b8fSAnthony Liguori     unsigned int l2_size;
86019d6b8fSAnthony Liguori     uint32_t *l2_cache;
87019d6b8fSAnthony Liguori     uint32_t l2_cache_offsets[L2_CACHE_SIZE];
88019d6b8fSAnthony Liguori     uint32_t l2_cache_counts[L2_CACHE_SIZE];
89019d6b8fSAnthony Liguori 
90019d6b8fSAnthony Liguori     unsigned int cluster_sectors;
91b3976d3cSFam Zheng } VmdkExtent;
92b3976d3cSFam Zheng 
93b3976d3cSFam Zheng typedef struct BDRVVmdkState {
94848c66e8SPaolo Bonzini     CoMutex lock;
95e1da9b24SFam Zheng     int desc_offset;
9669b4d86dSFam Zheng     bool cid_updated;
97019d6b8fSAnthony Liguori     uint32_t parent_cid;
98b3976d3cSFam Zheng     int num_extents;
99b3976d3cSFam Zheng     /* Extent array with num_extents entries, ascend ordered by address */
100b3976d3cSFam Zheng     VmdkExtent *extents;
1012bc3166cSKevin Wolf     Error *migration_blocker;
102019d6b8fSAnthony Liguori } BDRVVmdkState;
103019d6b8fSAnthony Liguori 
104019d6b8fSAnthony Liguori typedef struct VmdkMetaData {
105019d6b8fSAnthony Liguori     uint32_t offset;
106019d6b8fSAnthony Liguori     unsigned int l1_index;
107019d6b8fSAnthony Liguori     unsigned int l2_index;
108019d6b8fSAnthony Liguori     unsigned int l2_offset;
109019d6b8fSAnthony Liguori     int valid;
110019d6b8fSAnthony Liguori } VmdkMetaData;
111019d6b8fSAnthony Liguori 
112432bb170SFam Zheng typedef struct VmdkGrainMarker {
113432bb170SFam Zheng     uint64_t lba;
114432bb170SFam Zheng     uint32_t size;
115432bb170SFam Zheng     uint8_t  data[0];
116432bb170SFam Zheng } VmdkGrainMarker;
117432bb170SFam Zheng 
118019d6b8fSAnthony Liguori static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
119019d6b8fSAnthony Liguori {
120019d6b8fSAnthony Liguori     uint32_t magic;
121019d6b8fSAnthony Liguori 
122ae261c86SFam Zheng     if (buf_size < 4) {
123019d6b8fSAnthony Liguori         return 0;
124ae261c86SFam Zheng     }
125019d6b8fSAnthony Liguori     magic = be32_to_cpu(*(uint32_t *)buf);
126019d6b8fSAnthony Liguori     if (magic == VMDK3_MAGIC ||
12701fc99d6SFam Zheng         magic == VMDK4_MAGIC) {
128019d6b8fSAnthony Liguori         return 100;
12901fc99d6SFam Zheng     } else {
13001fc99d6SFam Zheng         const char *p = (const char *)buf;
13101fc99d6SFam Zheng         const char *end = p + buf_size;
13201fc99d6SFam Zheng         while (p < end) {
13301fc99d6SFam Zheng             if (*p == '#') {
13401fc99d6SFam Zheng                 /* skip comment line */
13501fc99d6SFam Zheng                 while (p < end && *p != '\n') {
13601fc99d6SFam Zheng                     p++;
13701fc99d6SFam Zheng                 }
13801fc99d6SFam Zheng                 p++;
13901fc99d6SFam Zheng                 continue;
14001fc99d6SFam Zheng             }
14101fc99d6SFam Zheng             if (*p == ' ') {
14201fc99d6SFam Zheng                 while (p < end && *p == ' ') {
14301fc99d6SFam Zheng                     p++;
14401fc99d6SFam Zheng                 }
14501fc99d6SFam Zheng                 /* skip '\r' if windows line endings used. */
14601fc99d6SFam Zheng                 if (p < end && *p == '\r') {
14701fc99d6SFam Zheng                     p++;
14801fc99d6SFam Zheng                 }
14901fc99d6SFam Zheng                 /* only accept blank lines before 'version=' line */
15001fc99d6SFam Zheng                 if (p == end || *p != '\n') {
151019d6b8fSAnthony Liguori                     return 0;
152019d6b8fSAnthony Liguori                 }
15301fc99d6SFam Zheng                 p++;
15401fc99d6SFam Zheng                 continue;
15501fc99d6SFam Zheng             }
15601fc99d6SFam Zheng             if (end - p >= strlen("version=X\n")) {
15701fc99d6SFam Zheng                 if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
15801fc99d6SFam Zheng                     strncmp("version=2\n", p, strlen("version=2\n")) == 0) {
15901fc99d6SFam Zheng                     return 100;
16001fc99d6SFam Zheng                 }
16101fc99d6SFam Zheng             }
16201fc99d6SFam Zheng             if (end - p >= strlen("version=X\r\n")) {
16301fc99d6SFam Zheng                 if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
16401fc99d6SFam Zheng                     strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) {
16501fc99d6SFam Zheng                     return 100;
16601fc99d6SFam Zheng                 }
16701fc99d6SFam Zheng             }
16801fc99d6SFam Zheng             return 0;
16901fc99d6SFam Zheng         }
17001fc99d6SFam Zheng         return 0;
17101fc99d6SFam Zheng     }
17201fc99d6SFam Zheng }
173019d6b8fSAnthony Liguori 
174019d6b8fSAnthony Liguori #define CHECK_CID 1
175019d6b8fSAnthony Liguori 
176019d6b8fSAnthony Liguori #define SECTOR_SIZE 512
177f66fd6c3SFam Zheng #define DESC_SIZE (20 * SECTOR_SIZE)    /* 20 sectors of 512 bytes each */
178f66fd6c3SFam Zheng #define BUF_SIZE 4096
179f66fd6c3SFam Zheng #define HEADER_SIZE 512                 /* first sector of 512 bytes */
180019d6b8fSAnthony Liguori 
181b3976d3cSFam Zheng static void vmdk_free_extents(BlockDriverState *bs)
182b3976d3cSFam Zheng {
183b3976d3cSFam Zheng     int i;
184b3976d3cSFam Zheng     BDRVVmdkState *s = bs->opaque;
185b3c0bfb6SFam Zheng     VmdkExtent *e;
186b3976d3cSFam Zheng 
187b3976d3cSFam Zheng     for (i = 0; i < s->num_extents; i++) {
188b3c0bfb6SFam Zheng         e = &s->extents[i];
189b3c0bfb6SFam Zheng         g_free(e->l1_table);
190b3c0bfb6SFam Zheng         g_free(e->l2_cache);
191b3c0bfb6SFam Zheng         g_free(e->l1_backup_table);
192b3c0bfb6SFam Zheng         if (e->file != bs->file) {
193b3c0bfb6SFam Zheng             bdrv_delete(e->file);
194b3c0bfb6SFam Zheng         }
195b3976d3cSFam Zheng     }
1967267c094SAnthony Liguori     g_free(s->extents);
197b3976d3cSFam Zheng }
198b3976d3cSFam Zheng 
19986c6b429SFam Zheng static void vmdk_free_last_extent(BlockDriverState *bs)
20086c6b429SFam Zheng {
20186c6b429SFam Zheng     BDRVVmdkState *s = bs->opaque;
20286c6b429SFam Zheng 
20386c6b429SFam Zheng     if (s->num_extents == 0) {
20486c6b429SFam Zheng         return;
20586c6b429SFam Zheng     }
20686c6b429SFam Zheng     s->num_extents--;
20786c6b429SFam Zheng     s->extents = g_realloc(s->extents, s->num_extents * sizeof(VmdkExtent));
20886c6b429SFam Zheng }
20986c6b429SFam Zheng 
210019d6b8fSAnthony Liguori static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
211019d6b8fSAnthony Liguori {
212019d6b8fSAnthony Liguori     char desc[DESC_SIZE];
2138379e46dSPavel Borzenkov     uint32_t cid = 0xffffffff;
214019d6b8fSAnthony Liguori     const char *p_name, *cid_str;
215019d6b8fSAnthony Liguori     size_t cid_str_size;
216e1da9b24SFam Zheng     BDRVVmdkState *s = bs->opaque;
21799f1835dSKevin Wolf     int ret;
218019d6b8fSAnthony Liguori 
21999f1835dSKevin Wolf     ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
22099f1835dSKevin Wolf     if (ret < 0) {
221019d6b8fSAnthony Liguori         return 0;
222e1da9b24SFam Zheng     }
223019d6b8fSAnthony Liguori 
224019d6b8fSAnthony Liguori     if (parent) {
225019d6b8fSAnthony Liguori         cid_str = "parentCID";
226019d6b8fSAnthony Liguori         cid_str_size = sizeof("parentCID");
227019d6b8fSAnthony Liguori     } else {
228019d6b8fSAnthony Liguori         cid_str = "CID";
229019d6b8fSAnthony Liguori         cid_str_size = sizeof("CID");
230019d6b8fSAnthony Liguori     }
231019d6b8fSAnthony Liguori 
23293897b9fSKevin Wolf     desc[DESC_SIZE - 1] = '\0';
233ae261c86SFam Zheng     p_name = strstr(desc, cid_str);
234ae261c86SFam Zheng     if (p_name != NULL) {
235019d6b8fSAnthony Liguori         p_name += cid_str_size;
236019d6b8fSAnthony Liguori         sscanf(p_name, "%x", &cid);
237019d6b8fSAnthony Liguori     }
238019d6b8fSAnthony Liguori 
239019d6b8fSAnthony Liguori     return cid;
240019d6b8fSAnthony Liguori }
241019d6b8fSAnthony Liguori 
242019d6b8fSAnthony Liguori static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
243019d6b8fSAnthony Liguori {
244019d6b8fSAnthony Liguori     char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
245019d6b8fSAnthony Liguori     char *p_name, *tmp_str;
246e1da9b24SFam Zheng     BDRVVmdkState *s = bs->opaque;
24799f1835dSKevin Wolf     int ret;
248019d6b8fSAnthony Liguori 
24999f1835dSKevin Wolf     ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
25099f1835dSKevin Wolf     if (ret < 0) {
25199f1835dSKevin Wolf         return ret;
252e1da9b24SFam Zheng     }
253019d6b8fSAnthony Liguori 
25493897b9fSKevin Wolf     desc[DESC_SIZE - 1] = '\0';
255019d6b8fSAnthony Liguori     tmp_str = strstr(desc, "parentCID");
25693897b9fSKevin Wolf     if (tmp_str == NULL) {
25793897b9fSKevin Wolf         return -EINVAL;
25893897b9fSKevin Wolf     }
25993897b9fSKevin Wolf 
260019d6b8fSAnthony Liguori     pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
261ae261c86SFam Zheng     p_name = strstr(desc, "CID");
262ae261c86SFam Zheng     if (p_name != NULL) {
263019d6b8fSAnthony Liguori         p_name += sizeof("CID");
264019d6b8fSAnthony Liguori         snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
265019d6b8fSAnthony Liguori         pstrcat(desc, sizeof(desc), tmp_desc);
266019d6b8fSAnthony Liguori     }
267019d6b8fSAnthony Liguori 
26899f1835dSKevin Wolf     ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);
26999f1835dSKevin Wolf     if (ret < 0) {
27099f1835dSKevin Wolf         return ret;
271e1da9b24SFam Zheng     }
27299f1835dSKevin Wolf 
273019d6b8fSAnthony Liguori     return 0;
274019d6b8fSAnthony Liguori }
275019d6b8fSAnthony Liguori 
276019d6b8fSAnthony Liguori static int vmdk_is_cid_valid(BlockDriverState *bs)
277019d6b8fSAnthony Liguori {
278019d6b8fSAnthony Liguori #ifdef CHECK_CID
279019d6b8fSAnthony Liguori     BDRVVmdkState *s = bs->opaque;
280b171271aSKevin Wolf     BlockDriverState *p_bs = bs->backing_hd;
281019d6b8fSAnthony Liguori     uint32_t cur_pcid;
282019d6b8fSAnthony Liguori 
283019d6b8fSAnthony Liguori     if (p_bs) {
284019d6b8fSAnthony Liguori         cur_pcid = vmdk_read_cid(p_bs, 0);
285ae261c86SFam Zheng         if (s->parent_cid != cur_pcid) {
286ae261c86SFam Zheng             /* CID not valid */
287019d6b8fSAnthony Liguori             return 0;
288019d6b8fSAnthony Liguori         }
289ae261c86SFam Zheng     }
290019d6b8fSAnthony Liguori #endif
291ae261c86SFam Zheng     /* CID valid */
292019d6b8fSAnthony Liguori     return 1;
293019d6b8fSAnthony Liguori }
294019d6b8fSAnthony Liguori 
2959949f97eSKevin Wolf static int vmdk_parent_open(BlockDriverState *bs)
296019d6b8fSAnthony Liguori {
297019d6b8fSAnthony Liguori     char *p_name;
2987fa60fa3SFam Zheng     char desc[DESC_SIZE + 1];
299e1da9b24SFam Zheng     BDRVVmdkState *s = bs->opaque;
300588b65a3SPaolo Bonzini     int ret;
301019d6b8fSAnthony Liguori 
3027fa60fa3SFam Zheng     desc[DESC_SIZE] = '\0';
303588b65a3SPaolo Bonzini     ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
304588b65a3SPaolo Bonzini     if (ret < 0) {
305588b65a3SPaolo Bonzini         return ret;
306e1da9b24SFam Zheng     }
307019d6b8fSAnthony Liguori 
308ae261c86SFam Zheng     p_name = strstr(desc, "parentFileNameHint");
309ae261c86SFam Zheng     if (p_name != NULL) {
310019d6b8fSAnthony Liguori         char *end_name;
311019d6b8fSAnthony Liguori 
312019d6b8fSAnthony Liguori         p_name += sizeof("parentFileNameHint") + 1;
313ae261c86SFam Zheng         end_name = strchr(p_name, '\"');
314ae261c86SFam Zheng         if (end_name == NULL) {
315588b65a3SPaolo Bonzini             return -EINVAL;
316ae261c86SFam Zheng         }
317ae261c86SFam Zheng         if ((end_name - p_name) > sizeof(bs->backing_file) - 1) {
318588b65a3SPaolo Bonzini             return -EINVAL;
319ae261c86SFam Zheng         }
320019d6b8fSAnthony Liguori 
321b171271aSKevin Wolf         pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
322019d6b8fSAnthony Liguori     }
323019d6b8fSAnthony Liguori 
324019d6b8fSAnthony Liguori     return 0;
325019d6b8fSAnthony Liguori }
326019d6b8fSAnthony Liguori 
327b3976d3cSFam Zheng /* Create and append extent to the extent array. Return the added VmdkExtent
328b3976d3cSFam Zheng  * address. return NULL if allocation failed. */
329b3976d3cSFam Zheng static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
330b3976d3cSFam Zheng                            BlockDriverState *file, bool flat, int64_t sectors,
331b3976d3cSFam Zheng                            int64_t l1_offset, int64_t l1_backup_offset,
332b3976d3cSFam Zheng                            uint32_t l1_size,
333b3976d3cSFam Zheng                            int l2_size, unsigned int cluster_sectors)
334b3976d3cSFam Zheng {
335b3976d3cSFam Zheng     VmdkExtent *extent;
336b3976d3cSFam Zheng     BDRVVmdkState *s = bs->opaque;
337b3976d3cSFam Zheng 
3387267c094SAnthony Liguori     s->extents = g_realloc(s->extents,
339b3976d3cSFam Zheng                               (s->num_extents + 1) * sizeof(VmdkExtent));
340b3976d3cSFam Zheng     extent = &s->extents[s->num_extents];
341b3976d3cSFam Zheng     s->num_extents++;
342b3976d3cSFam Zheng 
343b3976d3cSFam Zheng     memset(extent, 0, sizeof(VmdkExtent));
344b3976d3cSFam Zheng     extent->file = file;
345b3976d3cSFam Zheng     extent->flat = flat;
346b3976d3cSFam Zheng     extent->sectors = sectors;
347b3976d3cSFam Zheng     extent->l1_table_offset = l1_offset;
348b3976d3cSFam Zheng     extent->l1_backup_table_offset = l1_backup_offset;
349b3976d3cSFam Zheng     extent->l1_size = l1_size;
350b3976d3cSFam Zheng     extent->l1_entry_sectors = l2_size * cluster_sectors;
351b3976d3cSFam Zheng     extent->l2_size = l2_size;
352b3976d3cSFam Zheng     extent->cluster_sectors = cluster_sectors;
353b3976d3cSFam Zheng 
354b3976d3cSFam Zheng     if (s->num_extents > 1) {
355b3976d3cSFam Zheng         extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
356b3976d3cSFam Zheng     } else {
357b3976d3cSFam Zheng         extent->end_sector = extent->sectors;
358b3976d3cSFam Zheng     }
359b3976d3cSFam Zheng     bs->total_sectors = extent->end_sector;
360b3976d3cSFam Zheng     return extent;
361b3976d3cSFam Zheng }
362b3976d3cSFam Zheng 
363b4b3ab14SFam Zheng static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent)
364019d6b8fSAnthony Liguori {
365b4b3ab14SFam Zheng     int ret;
366b4b3ab14SFam Zheng     int l1_size, i;
367b4b3ab14SFam Zheng 
368b4b3ab14SFam Zheng     /* read the L1 table */
369b4b3ab14SFam Zheng     l1_size = extent->l1_size * sizeof(uint32_t);
3707267c094SAnthony Liguori     extent->l1_table = g_malloc(l1_size);
371b4b3ab14SFam Zheng     ret = bdrv_pread(extent->file,
372b4b3ab14SFam Zheng                     extent->l1_table_offset,
373b4b3ab14SFam Zheng                     extent->l1_table,
374b4b3ab14SFam Zheng                     l1_size);
375b4b3ab14SFam Zheng     if (ret < 0) {
376b4b3ab14SFam Zheng         goto fail_l1;
377b4b3ab14SFam Zheng     }
378b4b3ab14SFam Zheng     for (i = 0; i < extent->l1_size; i++) {
379b4b3ab14SFam Zheng         le32_to_cpus(&extent->l1_table[i]);
380b4b3ab14SFam Zheng     }
381b4b3ab14SFam Zheng 
382b4b3ab14SFam Zheng     if (extent->l1_backup_table_offset) {
3837267c094SAnthony Liguori         extent->l1_backup_table = g_malloc(l1_size);
384b4b3ab14SFam Zheng         ret = bdrv_pread(extent->file,
385b4b3ab14SFam Zheng                         extent->l1_backup_table_offset,
386b4b3ab14SFam Zheng                         extent->l1_backup_table,
387b4b3ab14SFam Zheng                         l1_size);
388b4b3ab14SFam Zheng         if (ret < 0) {
389b4b3ab14SFam Zheng             goto fail_l1b;
390b4b3ab14SFam Zheng         }
391b4b3ab14SFam Zheng         for (i = 0; i < extent->l1_size; i++) {
392b4b3ab14SFam Zheng             le32_to_cpus(&extent->l1_backup_table[i]);
393b4b3ab14SFam Zheng         }
394b4b3ab14SFam Zheng     }
395b4b3ab14SFam Zheng 
396b4b3ab14SFam Zheng     extent->l2_cache =
3977267c094SAnthony Liguori         g_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
398b4b3ab14SFam Zheng     return 0;
399b4b3ab14SFam Zheng  fail_l1b:
4007267c094SAnthony Liguori     g_free(extent->l1_backup_table);
401b4b3ab14SFam Zheng  fail_l1:
4027267c094SAnthony Liguori     g_free(extent->l1_table);
403b4b3ab14SFam Zheng     return ret;
404b4b3ab14SFam Zheng }
405b4b3ab14SFam Zheng 
40686c6b429SFam Zheng static int vmdk_open_vmdk3(BlockDriverState *bs,
40786c6b429SFam Zheng                            BlockDriverState *file,
40886c6b429SFam Zheng                            int flags)
409b4b3ab14SFam Zheng {
410b4b3ab14SFam Zheng     int ret;
411019d6b8fSAnthony Liguori     uint32_t magic;
412019d6b8fSAnthony Liguori     VMDK3Header header;
413b4b3ab14SFam Zheng     VmdkExtent *extent;
414b4b3ab14SFam Zheng 
41586c6b429SFam Zheng     ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
416b4b3ab14SFam Zheng     if (ret < 0) {
41786c6b429SFam Zheng         return ret;
418b3976d3cSFam Zheng     }
419b4b3ab14SFam Zheng     extent = vmdk_add_extent(bs,
420b4b3ab14SFam Zheng                              bs->file, false,
421b3976d3cSFam Zheng                              le32_to_cpu(header.disk_sectors),
422b4b3ab14SFam Zheng                              le32_to_cpu(header.l1dir_offset) << 9,
423b4b3ab14SFam Zheng                              0, 1 << 6, 1 << 9,
424b4b3ab14SFam Zheng                              le32_to_cpu(header.granularity));
425b4b3ab14SFam Zheng     ret = vmdk_init_tables(bs, extent);
426b4b3ab14SFam Zheng     if (ret) {
42786c6b429SFam Zheng         /* free extent allocated by vmdk_add_extent */
42886c6b429SFam Zheng         vmdk_free_last_extent(bs);
429b4b3ab14SFam Zheng     }
430b4b3ab14SFam Zheng     return ret;
431b4b3ab14SFam Zheng }
432b4b3ab14SFam Zheng 
433f16f509dSFam Zheng static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
434f16f509dSFam Zheng                                int64_t desc_offset);
435f16f509dSFam Zheng 
43686c6b429SFam Zheng static int vmdk_open_vmdk4(BlockDriverState *bs,
43786c6b429SFam Zheng                            BlockDriverState *file,
43886c6b429SFam Zheng                            int flags)
439b4b3ab14SFam Zheng {
440b4b3ab14SFam Zheng     int ret;
441b4b3ab14SFam Zheng     uint32_t magic;
442b4b3ab14SFam Zheng     uint32_t l1_size, l1_entry_sectors;
443019d6b8fSAnthony Liguori     VMDK4Header header;
444b4b3ab14SFam Zheng     VmdkExtent *extent;
445bb45ded9SFam Zheng     int64_t l1_backup_offset = 0;
446b4b3ab14SFam Zheng 
44786c6b429SFam Zheng     ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
448b4b3ab14SFam Zheng     if (ret < 0) {
44986c6b429SFam Zheng         return ret;
450b3976d3cSFam Zheng     }
451f16f509dSFam Zheng     if (header.capacity == 0 && header.desc_offset) {
452f16f509dSFam Zheng         return vmdk_open_desc_file(bs, flags, header.desc_offset << 9);
453f16f509dSFam Zheng     }
454b3976d3cSFam Zheng     l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
455b3976d3cSFam Zheng                         * le64_to_cpu(header.granularity);
45675d12341SStefan Weil     if (l1_entry_sectors == 0) {
45786c6b429SFam Zheng         return -EINVAL;
45886c6b429SFam Zheng     }
459b3976d3cSFam Zheng     l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
460b3976d3cSFam Zheng                 / l1_entry_sectors;
461bb45ded9SFam Zheng     if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
462bb45ded9SFam Zheng         l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
463bb45ded9SFam Zheng     }
46486c6b429SFam Zheng     extent = vmdk_add_extent(bs, file, false,
465b3976d3cSFam Zheng                           le64_to_cpu(header.capacity),
466b3976d3cSFam Zheng                           le64_to_cpu(header.gd_offset) << 9,
467bb45ded9SFam Zheng                           l1_backup_offset,
468b3976d3cSFam Zheng                           l1_size,
469b3976d3cSFam Zheng                           le32_to_cpu(header.num_gtes_per_gte),
470b3976d3cSFam Zheng                           le64_to_cpu(header.granularity));
471432bb170SFam Zheng     extent->compressed =
472432bb170SFam Zheng         le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
473432bb170SFam Zheng     extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
474b4b3ab14SFam Zheng     ret = vmdk_init_tables(bs, extent);
475b4b3ab14SFam Zheng     if (ret) {
47686c6b429SFam Zheng         /* free extent allocated by vmdk_add_extent */
47786c6b429SFam Zheng         vmdk_free_last_extent(bs);
478019d6b8fSAnthony Liguori     }
479b4b3ab14SFam Zheng     return ret;
480b4b3ab14SFam Zheng }
481b4b3ab14SFam Zheng 
4827fa60fa3SFam Zheng /* find an option value out of descriptor file */
4837fa60fa3SFam Zheng static int vmdk_parse_description(const char *desc, const char *opt_name,
4847fa60fa3SFam Zheng         char *buf, int buf_size)
4857fa60fa3SFam Zheng {
4867fa60fa3SFam Zheng     char *opt_pos, *opt_end;
4877fa60fa3SFam Zheng     const char *end = desc + strlen(desc);
4887fa60fa3SFam Zheng 
4897fa60fa3SFam Zheng     opt_pos = strstr(desc, opt_name);
4907fa60fa3SFam Zheng     if (!opt_pos) {
4917fa60fa3SFam Zheng         return -1;
4927fa60fa3SFam Zheng     }
4937fa60fa3SFam Zheng     /* Skip "=\"" following opt_name */
4947fa60fa3SFam Zheng     opt_pos += strlen(opt_name) + 2;
4957fa60fa3SFam Zheng     if (opt_pos >= end) {
4967fa60fa3SFam Zheng         return -1;
4977fa60fa3SFam Zheng     }
4987fa60fa3SFam Zheng     opt_end = opt_pos;
4997fa60fa3SFam Zheng     while (opt_end < end && *opt_end != '"') {
5007fa60fa3SFam Zheng         opt_end++;
5017fa60fa3SFam Zheng     }
5027fa60fa3SFam Zheng     if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
5037fa60fa3SFam Zheng         return -1;
5047fa60fa3SFam Zheng     }
5057fa60fa3SFam Zheng     pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
5067fa60fa3SFam Zheng     return 0;
5077fa60fa3SFam Zheng }
5087fa60fa3SFam Zheng 
50986c6b429SFam Zheng /* Open an extent file and append to bs array */
51086c6b429SFam Zheng static int vmdk_open_sparse(BlockDriverState *bs,
51186c6b429SFam Zheng                             BlockDriverState *file,
51286c6b429SFam Zheng                             int flags)
51386c6b429SFam Zheng {
51486c6b429SFam Zheng     uint32_t magic;
51586c6b429SFam Zheng 
51686c6b429SFam Zheng     if (bdrv_pread(file, 0, &magic, sizeof(magic)) != sizeof(magic)) {
51786c6b429SFam Zheng         return -EIO;
51886c6b429SFam Zheng     }
51986c6b429SFam Zheng 
52086c6b429SFam Zheng     magic = be32_to_cpu(magic);
52186c6b429SFam Zheng     switch (magic) {
52286c6b429SFam Zheng         case VMDK3_MAGIC:
52386c6b429SFam Zheng             return vmdk_open_vmdk3(bs, file, flags);
52486c6b429SFam Zheng             break;
52586c6b429SFam Zheng         case VMDK4_MAGIC:
52686c6b429SFam Zheng             return vmdk_open_vmdk4(bs, file, flags);
52786c6b429SFam Zheng             break;
52886c6b429SFam Zheng         default:
52986c6b429SFam Zheng             return -EINVAL;
53086c6b429SFam Zheng             break;
53186c6b429SFam Zheng     }
53286c6b429SFam Zheng }
53386c6b429SFam Zheng 
5347fa60fa3SFam Zheng static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
5357fa60fa3SFam Zheng         const char *desc_file_path)
5367fa60fa3SFam Zheng {
5377fa60fa3SFam Zheng     int ret;
5387fa60fa3SFam Zheng     char access[11];
5397fa60fa3SFam Zheng     char type[11];
5407fa60fa3SFam Zheng     char fname[512];
5417fa60fa3SFam Zheng     const char *p = desc;
5427fa60fa3SFam Zheng     int64_t sectors = 0;
5437fa60fa3SFam Zheng     int64_t flat_offset;
54486c6b429SFam Zheng     char extent_path[PATH_MAX];
54586c6b429SFam Zheng     BlockDriverState *extent_file;
5467fa60fa3SFam Zheng 
5477fa60fa3SFam Zheng     while (*p) {
5487fa60fa3SFam Zheng         /* parse extent line:
5497fa60fa3SFam Zheng          * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
5507fa60fa3SFam Zheng          * or
5517fa60fa3SFam Zheng          * RW [size in sectors] SPARSE "file-name.vmdk"
5527fa60fa3SFam Zheng          */
5537fa60fa3SFam Zheng         flat_offset = -1;
5547fa60fa3SFam Zheng         ret = sscanf(p, "%10s %" SCNd64 " %10s %511s %" SCNd64,
5557fa60fa3SFam Zheng                 access, &sectors, type, fname, &flat_offset);
5567fa60fa3SFam Zheng         if (ret < 4 || strcmp(access, "RW")) {
5577fa60fa3SFam Zheng             goto next_line;
5587fa60fa3SFam Zheng         } else if (!strcmp(type, "FLAT")) {
5597fa60fa3SFam Zheng             if (ret != 5 || flat_offset < 0) {
5607fa60fa3SFam Zheng                 return -EINVAL;
5617fa60fa3SFam Zheng             }
5627fa60fa3SFam Zheng         } else if (ret != 4) {
5637fa60fa3SFam Zheng             return -EINVAL;
5647fa60fa3SFam Zheng         }
5657fa60fa3SFam Zheng 
5667fa60fa3SFam Zheng         /* trim the quotation marks around */
5677fa60fa3SFam Zheng         if (fname[0] == '"') {
5687fa60fa3SFam Zheng             memmove(fname, fname + 1, strlen(fname));
5697fa60fa3SFam Zheng             if (strlen(fname) <= 1 || fname[strlen(fname) - 1] != '"') {
5707fa60fa3SFam Zheng                 return -EINVAL;
5717fa60fa3SFam Zheng             }
5727fa60fa3SFam Zheng             fname[strlen(fname) - 1] = '\0';
5737fa60fa3SFam Zheng         }
5747fa60fa3SFam Zheng         if (sectors <= 0 ||
5757fa60fa3SFam Zheng             (strcmp(type, "FLAT") && strcmp(type, "SPARSE")) ||
5767fa60fa3SFam Zheng             (strcmp(access, "RW"))) {
5777fa60fa3SFam Zheng             goto next_line;
5787fa60fa3SFam Zheng         }
5797fa60fa3SFam Zheng 
5807fa60fa3SFam Zheng         path_combine(extent_path, sizeof(extent_path),
5817fa60fa3SFam Zheng                 desc_file_path, fname);
5827fa60fa3SFam Zheng         ret = bdrv_file_open(&extent_file, extent_path, bs->open_flags);
5837fa60fa3SFam Zheng         if (ret) {
5847fa60fa3SFam Zheng             return ret;
5857fa60fa3SFam Zheng         }
58686c6b429SFam Zheng 
58786c6b429SFam Zheng         /* save to extents array */
58886c6b429SFam Zheng         if (!strcmp(type, "FLAT")) {
58986c6b429SFam Zheng             /* FLAT extent */
59086c6b429SFam Zheng             VmdkExtent *extent;
59186c6b429SFam Zheng 
5927fa60fa3SFam Zheng             extent = vmdk_add_extent(bs, extent_file, true, sectors,
5937fa60fa3SFam Zheng                             0, 0, 0, 0, sectors);
594f16f509dSFam Zheng             extent->flat_start_offset = flat_offset << 9;
59586c6b429SFam Zheng         } else if (!strcmp(type, "SPARSE")) {
59686c6b429SFam Zheng             /* SPARSE extent */
59786c6b429SFam Zheng             ret = vmdk_open_sparse(bs, extent_file, bs->open_flags);
59886c6b429SFam Zheng             if (ret) {
59986c6b429SFam Zheng                 bdrv_delete(extent_file);
60086c6b429SFam Zheng                 return ret;
60186c6b429SFam Zheng             }
6027fa60fa3SFam Zheng         } else {
6037fa60fa3SFam Zheng             fprintf(stderr,
6047fa60fa3SFam Zheng                 "VMDK: Not supported extent type \"%s\""".\n", type);
6057fa60fa3SFam Zheng             return -ENOTSUP;
6067fa60fa3SFam Zheng         }
6077fa60fa3SFam Zheng next_line:
6087fa60fa3SFam Zheng         /* move to next line */
6097fa60fa3SFam Zheng         while (*p && *p != '\n') {
6107fa60fa3SFam Zheng             p++;
6117fa60fa3SFam Zheng         }
6127fa60fa3SFam Zheng         p++;
6137fa60fa3SFam Zheng     }
6147fa60fa3SFam Zheng     return 0;
6157fa60fa3SFam Zheng }
6167fa60fa3SFam Zheng 
617f16f509dSFam Zheng static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
618f16f509dSFam Zheng                                int64_t desc_offset)
6197fa60fa3SFam Zheng {
6207fa60fa3SFam Zheng     int ret;
6217fa60fa3SFam Zheng     char buf[2048];
6227fa60fa3SFam Zheng     char ct[128];
6237fa60fa3SFam Zheng     BDRVVmdkState *s = bs->opaque;
6247fa60fa3SFam Zheng 
625f16f509dSFam Zheng     ret = bdrv_pread(bs->file, desc_offset, buf, sizeof(buf));
6267fa60fa3SFam Zheng     if (ret < 0) {
6277fa60fa3SFam Zheng         return ret;
6287fa60fa3SFam Zheng     }
6297fa60fa3SFam Zheng     buf[2047] = '\0';
6307fa60fa3SFam Zheng     if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
6317fa60fa3SFam Zheng         return -EINVAL;
6327fa60fa3SFam Zheng     }
6336398de51SFam Zheng     if (strcmp(ct, "monolithicFlat") &&
63486c6b429SFam Zheng         strcmp(ct, "twoGbMaxExtentSparse") &&
6356398de51SFam Zheng         strcmp(ct, "twoGbMaxExtentFlat")) {
6367fa60fa3SFam Zheng         fprintf(stderr,
6377fa60fa3SFam Zheng                 "VMDK: Not supported image type \"%s\""".\n", ct);
6387fa60fa3SFam Zheng         return -ENOTSUP;
6397fa60fa3SFam Zheng     }
6407fa60fa3SFam Zheng     s->desc_offset = 0;
641bae0a0ccSPaolo Bonzini     return vmdk_parse_extents(buf, bs, bs->file->filename);
6427fa60fa3SFam Zheng }
6437fa60fa3SFam Zheng 
644b4b3ab14SFam Zheng static int vmdk_open(BlockDriverState *bs, int flags)
645b4b3ab14SFam Zheng {
64686c6b429SFam Zheng     int ret;
64786c6b429SFam Zheng     BDRVVmdkState *s = bs->opaque;
648b4b3ab14SFam Zheng 
64986c6b429SFam Zheng     if (vmdk_open_sparse(bs, bs->file, flags) == 0) {
65086c6b429SFam Zheng         s->desc_offset = 0x200;
651bae0a0ccSPaolo Bonzini     } else {
652bae0a0ccSPaolo Bonzini         ret = vmdk_open_desc_file(bs, flags, 0);
653bae0a0ccSPaolo Bonzini         if (ret) {
654bae0a0ccSPaolo Bonzini             goto fail;
655bae0a0ccSPaolo Bonzini         }
656bae0a0ccSPaolo Bonzini     }
65786c6b429SFam Zheng     /* try to open parent images, if exist */
65886c6b429SFam Zheng     ret = vmdk_parent_open(bs);
65986c6b429SFam Zheng     if (ret) {
660bae0a0ccSPaolo Bonzini         goto fail;
661b4b3ab14SFam Zheng     }
66286c6b429SFam Zheng     s->parent_cid = vmdk_read_cid(bs, 1);
663848c66e8SPaolo Bonzini     qemu_co_mutex_init(&s->lock);
6642bc3166cSKevin Wolf 
6652bc3166cSKevin Wolf     /* Disable migration when VMDK images are used */
6662bc3166cSKevin Wolf     error_set(&s->migration_blocker,
6672bc3166cSKevin Wolf               QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
6682bc3166cSKevin Wolf               "vmdk", bs->device_name, "live migration");
6692bc3166cSKevin Wolf     migrate_add_blocker(s->migration_blocker);
6702bc3166cSKevin Wolf 
6712bc3166cSKevin Wolf     return 0;
672bae0a0ccSPaolo Bonzini 
673bae0a0ccSPaolo Bonzini fail:
674bae0a0ccSPaolo Bonzini     vmdk_free_extents(bs);
675bae0a0ccSPaolo Bonzini     return ret;
676019d6b8fSAnthony Liguori }
677019d6b8fSAnthony Liguori 
678b3976d3cSFam Zheng static int get_whole_cluster(BlockDriverState *bs,
679b3976d3cSFam Zheng                 VmdkExtent *extent,
680b3976d3cSFam Zheng                 uint64_t cluster_offset,
681b3976d3cSFam Zheng                 uint64_t offset,
682b3976d3cSFam Zheng                 bool allocate)
683019d6b8fSAnthony Liguori {
684b3976d3cSFam Zheng     /* 128 sectors * 512 bytes each = grain size 64KB */
685b3976d3cSFam Zheng     uint8_t  whole_grain[extent->cluster_sectors * 512];
686019d6b8fSAnthony Liguori 
6870e69c543SFam Zheng     /* we will be here if it's first write on non-exist grain(cluster).
6880e69c543SFam Zheng      * try to read from parent image, if exist */
689b171271aSKevin Wolf     if (bs->backing_hd) {
690c336500dSKevin Wolf         int ret;
691019d6b8fSAnthony Liguori 
692ae261c86SFam Zheng         if (!vmdk_is_cid_valid(bs)) {
693019d6b8fSAnthony Liguori             return -1;
694ae261c86SFam Zheng         }
695019d6b8fSAnthony Liguori 
6960e69c543SFam Zheng         /* floor offset to cluster */
6970e69c543SFam Zheng         offset -= offset % (extent->cluster_sectors * 512);
698c336500dSKevin Wolf         ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
699b3976d3cSFam Zheng                 extent->cluster_sectors);
700c336500dSKevin Wolf         if (ret < 0) {
701019d6b8fSAnthony Liguori             return -1;
702c336500dSKevin Wolf         }
703019d6b8fSAnthony Liguori 
7040e69c543SFam Zheng         /* Write grain only into the active image */
705b3976d3cSFam Zheng         ret = bdrv_write(extent->file, cluster_offset, whole_grain,
706b3976d3cSFam Zheng                 extent->cluster_sectors);
707c336500dSKevin Wolf         if (ret < 0) {
708019d6b8fSAnthony Liguori             return -1;
709019d6b8fSAnthony Liguori         }
710019d6b8fSAnthony Liguori     }
711019d6b8fSAnthony Liguori     return 0;
712019d6b8fSAnthony Liguori }
713019d6b8fSAnthony Liguori 
714b3976d3cSFam Zheng static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
715019d6b8fSAnthony Liguori {
716019d6b8fSAnthony Liguori     /* update L2 table */
717b3976d3cSFam Zheng     if (bdrv_pwrite_sync(
718b3976d3cSFam Zheng                 extent->file,
719b3976d3cSFam Zheng                 ((int64_t)m_data->l2_offset * 512)
720b3976d3cSFam Zheng                     + (m_data->l2_index * sizeof(m_data->offset)),
721b3976d3cSFam Zheng                 &(m_data->offset),
722b3976d3cSFam Zheng                 sizeof(m_data->offset)
723b3976d3cSFam Zheng             ) < 0) {
724019d6b8fSAnthony Liguori         return -1;
725b3976d3cSFam Zheng     }
726019d6b8fSAnthony Liguori     /* update backup L2 table */
727b3976d3cSFam Zheng     if (extent->l1_backup_table_offset != 0) {
728b3976d3cSFam Zheng         m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
729b3976d3cSFam Zheng         if (bdrv_pwrite_sync(
730b3976d3cSFam Zheng                     extent->file,
731b3976d3cSFam Zheng                     ((int64_t)m_data->l2_offset * 512)
732b3976d3cSFam Zheng                         + (m_data->l2_index * sizeof(m_data->offset)),
733b3976d3cSFam Zheng                     &(m_data->offset), sizeof(m_data->offset)
734b3976d3cSFam Zheng                 ) < 0) {
735019d6b8fSAnthony Liguori             return -1;
736019d6b8fSAnthony Liguori         }
737b3976d3cSFam Zheng     }
738019d6b8fSAnthony Liguori 
739019d6b8fSAnthony Liguori     return 0;
740019d6b8fSAnthony Liguori }
741019d6b8fSAnthony Liguori 
74291b85bd3SFam Zheng static int get_cluster_offset(BlockDriverState *bs,
743b3976d3cSFam Zheng                                     VmdkExtent *extent,
744b3976d3cSFam Zheng                                     VmdkMetaData *m_data,
74591b85bd3SFam Zheng                                     uint64_t offset,
74691b85bd3SFam Zheng                                     int allocate,
74791b85bd3SFam Zheng                                     uint64_t *cluster_offset)
748019d6b8fSAnthony Liguori {
749019d6b8fSAnthony Liguori     unsigned int l1_index, l2_offset, l2_index;
750019d6b8fSAnthony Liguori     int min_index, i, j;
751019d6b8fSAnthony Liguori     uint32_t min_count, *l2_table, tmp = 0;
752019d6b8fSAnthony Liguori 
753ae261c86SFam Zheng     if (m_data) {
754019d6b8fSAnthony Liguori         m_data->valid = 0;
755ae261c86SFam Zheng     }
75691b85bd3SFam Zheng     if (extent->flat) {
7577fa60fa3SFam Zheng         *cluster_offset = extent->flat_start_offset;
75891b85bd3SFam Zheng         return 0;
75991b85bd3SFam Zheng     }
760019d6b8fSAnthony Liguori 
7616398de51SFam Zheng     offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
762b3976d3cSFam Zheng     l1_index = (offset >> 9) / extent->l1_entry_sectors;
763b3976d3cSFam Zheng     if (l1_index >= extent->l1_size) {
76491b85bd3SFam Zheng         return -1;
765b3976d3cSFam Zheng     }
766b3976d3cSFam Zheng     l2_offset = extent->l1_table[l1_index];
767b3976d3cSFam Zheng     if (!l2_offset) {
76891b85bd3SFam Zheng         return -1;
769b3976d3cSFam Zheng     }
770019d6b8fSAnthony Liguori     for (i = 0; i < L2_CACHE_SIZE; i++) {
771b3976d3cSFam Zheng         if (l2_offset == extent->l2_cache_offsets[i]) {
772019d6b8fSAnthony Liguori             /* increment the hit count */
773b3976d3cSFam Zheng             if (++extent->l2_cache_counts[i] == 0xffffffff) {
774019d6b8fSAnthony Liguori                 for (j = 0; j < L2_CACHE_SIZE; j++) {
775b3976d3cSFam Zheng                     extent->l2_cache_counts[j] >>= 1;
776019d6b8fSAnthony Liguori                 }
777019d6b8fSAnthony Liguori             }
778b3976d3cSFam Zheng             l2_table = extent->l2_cache + (i * extent->l2_size);
779019d6b8fSAnthony Liguori             goto found;
780019d6b8fSAnthony Liguori         }
781019d6b8fSAnthony Liguori     }
782019d6b8fSAnthony Liguori     /* not found: load a new entry in the least used one */
783019d6b8fSAnthony Liguori     min_index = 0;
784019d6b8fSAnthony Liguori     min_count = 0xffffffff;
785019d6b8fSAnthony Liguori     for (i = 0; i < L2_CACHE_SIZE; i++) {
786b3976d3cSFam Zheng         if (extent->l2_cache_counts[i] < min_count) {
787b3976d3cSFam Zheng             min_count = extent->l2_cache_counts[i];
788019d6b8fSAnthony Liguori             min_index = i;
789019d6b8fSAnthony Liguori         }
790019d6b8fSAnthony Liguori     }
791b3976d3cSFam Zheng     l2_table = extent->l2_cache + (min_index * extent->l2_size);
792b3976d3cSFam Zheng     if (bdrv_pread(
793b3976d3cSFam Zheng                 extent->file,
794b3976d3cSFam Zheng                 (int64_t)l2_offset * 512,
795b3976d3cSFam Zheng                 l2_table,
796b3976d3cSFam Zheng                 extent->l2_size * sizeof(uint32_t)
797b3976d3cSFam Zheng             ) != extent->l2_size * sizeof(uint32_t)) {
79891b85bd3SFam Zheng         return -1;
799b3976d3cSFam Zheng     }
800019d6b8fSAnthony Liguori 
801b3976d3cSFam Zheng     extent->l2_cache_offsets[min_index] = l2_offset;
802b3976d3cSFam Zheng     extent->l2_cache_counts[min_index] = 1;
803019d6b8fSAnthony Liguori  found:
804b3976d3cSFam Zheng     l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
80591b85bd3SFam Zheng     *cluster_offset = le32_to_cpu(l2_table[l2_index]);
806019d6b8fSAnthony Liguori 
80791b85bd3SFam Zheng     if (!*cluster_offset) {
80891b85bd3SFam Zheng         if (!allocate) {
80991b85bd3SFam Zheng             return -1;
81091b85bd3SFam Zheng         }
8119949f97eSKevin Wolf 
812ae261c86SFam Zheng         /* Avoid the L2 tables update for the images that have snapshots. */
81391b85bd3SFam Zheng         *cluster_offset = bdrv_getlength(extent->file);
8142b2c8c5dSFam Zheng         if (!extent->compressed) {
815b3976d3cSFam Zheng             bdrv_truncate(
816b3976d3cSFam Zheng                 extent->file,
81791b85bd3SFam Zheng                 *cluster_offset + (extent->cluster_sectors << 9)
818b3976d3cSFam Zheng             );
8192b2c8c5dSFam Zheng         }
820019d6b8fSAnthony Liguori 
82191b85bd3SFam Zheng         *cluster_offset >>= 9;
82291b85bd3SFam Zheng         tmp = cpu_to_le32(*cluster_offset);
823019d6b8fSAnthony Liguori         l2_table[l2_index] = tmp;
8249949f97eSKevin Wolf 
825019d6b8fSAnthony Liguori         /* First of all we write grain itself, to avoid race condition
826019d6b8fSAnthony Liguori          * that may to corrupt the image.
827019d6b8fSAnthony Liguori          * This problem may occur because of insufficient space on host disk
828019d6b8fSAnthony Liguori          * or inappropriate VM shutdown.
829019d6b8fSAnthony Liguori          */
830b3976d3cSFam Zheng         if (get_whole_cluster(
831ae261c86SFam Zheng                 bs, extent, *cluster_offset, offset, allocate) == -1) {
83291b85bd3SFam Zheng             return -1;
833ae261c86SFam Zheng         }
834019d6b8fSAnthony Liguori 
835019d6b8fSAnthony Liguori         if (m_data) {
836019d6b8fSAnthony Liguori             m_data->offset = tmp;
837019d6b8fSAnthony Liguori             m_data->l1_index = l1_index;
838019d6b8fSAnthony Liguori             m_data->l2_index = l2_index;
839019d6b8fSAnthony Liguori             m_data->l2_offset = l2_offset;
840019d6b8fSAnthony Liguori             m_data->valid = 1;
841019d6b8fSAnthony Liguori         }
842019d6b8fSAnthony Liguori     }
84391b85bd3SFam Zheng     *cluster_offset <<= 9;
84491b85bd3SFam Zheng     return 0;
845019d6b8fSAnthony Liguori }
846019d6b8fSAnthony Liguori 
847b3976d3cSFam Zheng static VmdkExtent *find_extent(BDRVVmdkState *s,
848b3976d3cSFam Zheng                                 int64_t sector_num, VmdkExtent *start_hint)
849b3976d3cSFam Zheng {
850b3976d3cSFam Zheng     VmdkExtent *extent = start_hint;
851b3976d3cSFam Zheng 
852b3976d3cSFam Zheng     if (!extent) {
853b3976d3cSFam Zheng         extent = &s->extents[0];
854b3976d3cSFam Zheng     }
855b3976d3cSFam Zheng     while (extent < &s->extents[s->num_extents]) {
856b3976d3cSFam Zheng         if (sector_num < extent->end_sector) {
857b3976d3cSFam Zheng             return extent;
858b3976d3cSFam Zheng         }
859b3976d3cSFam Zheng         extent++;
860b3976d3cSFam Zheng     }
861b3976d3cSFam Zheng     return NULL;
862b3976d3cSFam Zheng }
863b3976d3cSFam Zheng 
864f8a2e5e3SStefan Hajnoczi static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs,
865f8a2e5e3SStefan Hajnoczi         int64_t sector_num, int nb_sectors, int *pnum)
866019d6b8fSAnthony Liguori {
867019d6b8fSAnthony Liguori     BDRVVmdkState *s = bs->opaque;
868b3976d3cSFam Zheng     int64_t index_in_cluster, n, ret;
869b3976d3cSFam Zheng     uint64_t offset;
870b3976d3cSFam Zheng     VmdkExtent *extent;
871b3976d3cSFam Zheng 
872b3976d3cSFam Zheng     extent = find_extent(s, sector_num, NULL);
873b3976d3cSFam Zheng     if (!extent) {
874b3976d3cSFam Zheng         return 0;
875b3976d3cSFam Zheng     }
876f8a2e5e3SStefan Hajnoczi     qemu_co_mutex_lock(&s->lock);
87791b85bd3SFam Zheng     ret = get_cluster_offset(bs, extent, NULL,
87891b85bd3SFam Zheng                             sector_num * 512, 0, &offset);
879f8a2e5e3SStefan Hajnoczi     qemu_co_mutex_unlock(&s->lock);
88091b85bd3SFam Zheng     /* get_cluster_offset returning 0 means success */
88191b85bd3SFam Zheng     ret = !ret;
88291b85bd3SFam Zheng 
883b3976d3cSFam Zheng     index_in_cluster = sector_num % extent->cluster_sectors;
884b3976d3cSFam Zheng     n = extent->cluster_sectors - index_in_cluster;
885ae261c86SFam Zheng     if (n > nb_sectors) {
886019d6b8fSAnthony Liguori         n = nb_sectors;
887ae261c86SFam Zheng     }
888019d6b8fSAnthony Liguori     *pnum = n;
889b3976d3cSFam Zheng     return ret;
890019d6b8fSAnthony Liguori }
891019d6b8fSAnthony Liguori 
892dd3f6ee2SFam Zheng static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
893dd3f6ee2SFam Zheng                             int64_t offset_in_cluster, const uint8_t *buf,
894dd3f6ee2SFam Zheng                             int nb_sectors, int64_t sector_num)
895dd3f6ee2SFam Zheng {
896dd3f6ee2SFam Zheng     int ret;
8972b2c8c5dSFam Zheng     VmdkGrainMarker *data = NULL;
8982b2c8c5dSFam Zheng     uLongf buf_len;
899dd3f6ee2SFam Zheng     const uint8_t *write_buf = buf;
900dd3f6ee2SFam Zheng     int write_len = nb_sectors * 512;
901dd3f6ee2SFam Zheng 
9022b2c8c5dSFam Zheng     if (extent->compressed) {
9032b2c8c5dSFam Zheng         if (!extent->has_marker) {
9042b2c8c5dSFam Zheng             ret = -EINVAL;
9052b2c8c5dSFam Zheng             goto out;
9062b2c8c5dSFam Zheng         }
9072b2c8c5dSFam Zheng         buf_len = (extent->cluster_sectors << 9) * 2;
9082b2c8c5dSFam Zheng         data = g_malloc(buf_len + sizeof(VmdkGrainMarker));
9092b2c8c5dSFam Zheng         if (compress(data->data, &buf_len, buf, nb_sectors << 9) != Z_OK ||
9102b2c8c5dSFam Zheng                 buf_len == 0) {
9112b2c8c5dSFam Zheng             ret = -EINVAL;
9122b2c8c5dSFam Zheng             goto out;
9132b2c8c5dSFam Zheng         }
9142b2c8c5dSFam Zheng         data->lba = sector_num;
9152b2c8c5dSFam Zheng         data->size = buf_len;
9162b2c8c5dSFam Zheng         write_buf = (uint8_t *)data;
9172b2c8c5dSFam Zheng         write_len = buf_len + sizeof(VmdkGrainMarker);
9182b2c8c5dSFam Zheng     }
919dd3f6ee2SFam Zheng     ret = bdrv_pwrite(extent->file,
920dd3f6ee2SFam Zheng                         cluster_offset + offset_in_cluster,
921dd3f6ee2SFam Zheng                         write_buf,
922dd3f6ee2SFam Zheng                         write_len);
923dd3f6ee2SFam Zheng     if (ret != write_len) {
924dd3f6ee2SFam Zheng         ret = ret < 0 ? ret : -EIO;
925dd3f6ee2SFam Zheng         goto out;
926dd3f6ee2SFam Zheng     }
927dd3f6ee2SFam Zheng     ret = 0;
928dd3f6ee2SFam Zheng  out:
9292b2c8c5dSFam Zheng     g_free(data);
930dd3f6ee2SFam Zheng     return ret;
931dd3f6ee2SFam Zheng }
932dd3f6ee2SFam Zheng 
933dd3f6ee2SFam Zheng static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
934dd3f6ee2SFam Zheng                             int64_t offset_in_cluster, uint8_t *buf,
935dd3f6ee2SFam Zheng                             int nb_sectors)
936dd3f6ee2SFam Zheng {
937dd3f6ee2SFam Zheng     int ret;
9382b2c8c5dSFam Zheng     int cluster_bytes, buf_bytes;
9392b2c8c5dSFam Zheng     uint8_t *cluster_buf, *compressed_data;
9402b2c8c5dSFam Zheng     uint8_t *uncomp_buf;
9412b2c8c5dSFam Zheng     uint32_t data_len;
9422b2c8c5dSFam Zheng     VmdkGrainMarker *marker;
9432b2c8c5dSFam Zheng     uLongf buf_len;
944dd3f6ee2SFam Zheng 
9452b2c8c5dSFam Zheng 
9462b2c8c5dSFam Zheng     if (!extent->compressed) {
947dd3f6ee2SFam Zheng         ret = bdrv_pread(extent->file,
948dd3f6ee2SFam Zheng                           cluster_offset + offset_in_cluster,
949dd3f6ee2SFam Zheng                           buf, nb_sectors * 512);
950dd3f6ee2SFam Zheng         if (ret == nb_sectors * 512) {
951dd3f6ee2SFam Zheng             return 0;
952dd3f6ee2SFam Zheng         } else {
953dd3f6ee2SFam Zheng             return -EIO;
954dd3f6ee2SFam Zheng         }
955dd3f6ee2SFam Zheng     }
9562b2c8c5dSFam Zheng     cluster_bytes = extent->cluster_sectors * 512;
9572b2c8c5dSFam Zheng     /* Read two clusters in case GrainMarker + compressed data > one cluster */
9582b2c8c5dSFam Zheng     buf_bytes = cluster_bytes * 2;
9592b2c8c5dSFam Zheng     cluster_buf = g_malloc(buf_bytes);
9602b2c8c5dSFam Zheng     uncomp_buf = g_malloc(cluster_bytes);
9612b2c8c5dSFam Zheng     ret = bdrv_pread(extent->file,
9622b2c8c5dSFam Zheng                 cluster_offset,
9632b2c8c5dSFam Zheng                 cluster_buf, buf_bytes);
9642b2c8c5dSFam Zheng     if (ret < 0) {
9652b2c8c5dSFam Zheng         goto out;
9662b2c8c5dSFam Zheng     }
9672b2c8c5dSFam Zheng     compressed_data = cluster_buf;
9682b2c8c5dSFam Zheng     buf_len = cluster_bytes;
9692b2c8c5dSFam Zheng     data_len = cluster_bytes;
9702b2c8c5dSFam Zheng     if (extent->has_marker) {
9712b2c8c5dSFam Zheng         marker = (VmdkGrainMarker *)cluster_buf;
9722b2c8c5dSFam Zheng         compressed_data = marker->data;
9732b2c8c5dSFam Zheng         data_len = le32_to_cpu(marker->size);
9742b2c8c5dSFam Zheng     }
9752b2c8c5dSFam Zheng     if (!data_len || data_len > buf_bytes) {
9762b2c8c5dSFam Zheng         ret = -EINVAL;
9772b2c8c5dSFam Zheng         goto out;
9782b2c8c5dSFam Zheng     }
9792b2c8c5dSFam Zheng     ret = uncompress(uncomp_buf, &buf_len, compressed_data, data_len);
9802b2c8c5dSFam Zheng     if (ret != Z_OK) {
9812b2c8c5dSFam Zheng         ret = -EINVAL;
9822b2c8c5dSFam Zheng         goto out;
9832b2c8c5dSFam Zheng 
9842b2c8c5dSFam Zheng     }
9852b2c8c5dSFam Zheng     if (offset_in_cluster < 0 ||
9862b2c8c5dSFam Zheng             offset_in_cluster + nb_sectors * 512 > buf_len) {
9872b2c8c5dSFam Zheng         ret = -EINVAL;
9882b2c8c5dSFam Zheng         goto out;
9892b2c8c5dSFam Zheng     }
9902b2c8c5dSFam Zheng     memcpy(buf, uncomp_buf + offset_in_cluster, nb_sectors * 512);
9912b2c8c5dSFam Zheng     ret = 0;
9922b2c8c5dSFam Zheng 
9932b2c8c5dSFam Zheng  out:
9942b2c8c5dSFam Zheng     g_free(uncomp_buf);
9952b2c8c5dSFam Zheng     g_free(cluster_buf);
9962b2c8c5dSFam Zheng     return ret;
9972b2c8c5dSFam Zheng }
998dd3f6ee2SFam Zheng 
999019d6b8fSAnthony Liguori static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
1000019d6b8fSAnthony Liguori                     uint8_t *buf, int nb_sectors)
1001019d6b8fSAnthony Liguori {
1002019d6b8fSAnthony Liguori     BDRVVmdkState *s = bs->opaque;
1003b3976d3cSFam Zheng     int ret;
1004b3976d3cSFam Zheng     uint64_t n, index_in_cluster;
1005b3976d3cSFam Zheng     VmdkExtent *extent = NULL;
1006019d6b8fSAnthony Liguori     uint64_t cluster_offset;
1007019d6b8fSAnthony Liguori 
1008019d6b8fSAnthony Liguori     while (nb_sectors > 0) {
1009b3976d3cSFam Zheng         extent = find_extent(s, sector_num, extent);
1010b3976d3cSFam Zheng         if (!extent) {
1011b3976d3cSFam Zheng             return -EIO;
1012b3976d3cSFam Zheng         }
101391b85bd3SFam Zheng         ret = get_cluster_offset(
101491b85bd3SFam Zheng                             bs, extent, NULL,
101591b85bd3SFam Zheng                             sector_num << 9, 0, &cluster_offset);
1016b3976d3cSFam Zheng         index_in_cluster = sector_num % extent->cluster_sectors;
1017b3976d3cSFam Zheng         n = extent->cluster_sectors - index_in_cluster;
1018ae261c86SFam Zheng         if (n > nb_sectors) {
1019019d6b8fSAnthony Liguori             n = nb_sectors;
1020ae261c86SFam Zheng         }
102191b85bd3SFam Zheng         if (ret) {
102291b85bd3SFam Zheng             /* if not allocated, try to read from parent image, if exist */
1023b171271aSKevin Wolf             if (bs->backing_hd) {
1024ae261c86SFam Zheng                 if (!vmdk_is_cid_valid(bs)) {
10257fa60fa3SFam Zheng                     return -EINVAL;
1026ae261c86SFam Zheng                 }
1027b171271aSKevin Wolf                 ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
1028ae261c86SFam Zheng                 if (ret < 0) {
10297fa60fa3SFam Zheng                     return ret;
1030ae261c86SFam Zheng                 }
1031019d6b8fSAnthony Liguori             } else {
1032019d6b8fSAnthony Liguori                 memset(buf, 0, 512 * n);
1033019d6b8fSAnthony Liguori             }
1034019d6b8fSAnthony Liguori         } else {
1035dd3f6ee2SFam Zheng             ret = vmdk_read_extent(extent,
1036dd3f6ee2SFam Zheng                             cluster_offset, index_in_cluster * 512,
1037dd3f6ee2SFam Zheng                             buf, n);
1038dd3f6ee2SFam Zheng             if (ret) {
10397fa60fa3SFam Zheng                 return ret;
10407fa60fa3SFam Zheng             }
1041019d6b8fSAnthony Liguori         }
1042019d6b8fSAnthony Liguori         nb_sectors -= n;
1043019d6b8fSAnthony Liguori         sector_num += n;
1044019d6b8fSAnthony Liguori         buf += n * 512;
1045019d6b8fSAnthony Liguori     }
1046019d6b8fSAnthony Liguori     return 0;
1047019d6b8fSAnthony Liguori }
1048019d6b8fSAnthony Liguori 
10492914caa0SPaolo Bonzini static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
10502914caa0SPaolo Bonzini                                      uint8_t *buf, int nb_sectors)
10512914caa0SPaolo Bonzini {
10522914caa0SPaolo Bonzini     int ret;
10532914caa0SPaolo Bonzini     BDRVVmdkState *s = bs->opaque;
10542914caa0SPaolo Bonzini     qemu_co_mutex_lock(&s->lock);
10552914caa0SPaolo Bonzini     ret = vmdk_read(bs, sector_num, buf, nb_sectors);
10562914caa0SPaolo Bonzini     qemu_co_mutex_unlock(&s->lock);
10572914caa0SPaolo Bonzini     return ret;
10582914caa0SPaolo Bonzini }
10592914caa0SPaolo Bonzini 
1060019d6b8fSAnthony Liguori static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
1061019d6b8fSAnthony Liguori                      const uint8_t *buf, int nb_sectors)
1062019d6b8fSAnthony Liguori {
1063019d6b8fSAnthony Liguori     BDRVVmdkState *s = bs->opaque;
1064b3976d3cSFam Zheng     VmdkExtent *extent = NULL;
106591b85bd3SFam Zheng     int n, ret;
1066b3976d3cSFam Zheng     int64_t index_in_cluster;
1067019d6b8fSAnthony Liguori     uint64_t cluster_offset;
1068b3976d3cSFam Zheng     VmdkMetaData m_data;
1069019d6b8fSAnthony Liguori 
1070019d6b8fSAnthony Liguori     if (sector_num > bs->total_sectors) {
1071019d6b8fSAnthony Liguori         fprintf(stderr,
1072019d6b8fSAnthony Liguori                 "(VMDK) Wrong offset: sector_num=0x%" PRIx64
1073019d6b8fSAnthony Liguori                 " total_sectors=0x%" PRIx64 "\n",
1074019d6b8fSAnthony Liguori                 sector_num, bs->total_sectors);
10757fa60fa3SFam Zheng         return -EIO;
1076019d6b8fSAnthony Liguori     }
1077019d6b8fSAnthony Liguori 
1078019d6b8fSAnthony Liguori     while (nb_sectors > 0) {
1079b3976d3cSFam Zheng         extent = find_extent(s, sector_num, extent);
1080b3976d3cSFam Zheng         if (!extent) {
1081b3976d3cSFam Zheng             return -EIO;
1082b3976d3cSFam Zheng         }
108391b85bd3SFam Zheng         ret = get_cluster_offset(
1084b3976d3cSFam Zheng                                 bs,
1085b3976d3cSFam Zheng                                 extent,
1086b3976d3cSFam Zheng                                 &m_data,
10872b2c8c5dSFam Zheng                                 sector_num << 9, !extent->compressed,
10882b2c8c5dSFam Zheng                                 &cluster_offset);
10892b2c8c5dSFam Zheng         if (extent->compressed) {
10902b2c8c5dSFam Zheng             if (ret == 0) {
10912b2c8c5dSFam Zheng                 /* Refuse write to allocated cluster for streamOptimized */
10922b2c8c5dSFam Zheng                 fprintf(stderr,
10932b2c8c5dSFam Zheng                         "VMDK: can't write to allocated cluster"
10942b2c8c5dSFam Zheng                         " for streamOptimized\n");
10952b2c8c5dSFam Zheng                 return -EIO;
10962b2c8c5dSFam Zheng             } else {
10972b2c8c5dSFam Zheng                 /* allocate */
10982b2c8c5dSFam Zheng                 ret = get_cluster_offset(
10992b2c8c5dSFam Zheng                                         bs,
11002b2c8c5dSFam Zheng                                         extent,
11012b2c8c5dSFam Zheng                                         &m_data,
110291b85bd3SFam Zheng                                         sector_num << 9, 1,
110391b85bd3SFam Zheng                                         &cluster_offset);
11042b2c8c5dSFam Zheng             }
11052b2c8c5dSFam Zheng         }
110691b85bd3SFam Zheng         if (ret) {
110791b85bd3SFam Zheng             return -EINVAL;
1108b3976d3cSFam Zheng         }
1109b3976d3cSFam Zheng         index_in_cluster = sector_num % extent->cluster_sectors;
1110b3976d3cSFam Zheng         n = extent->cluster_sectors - index_in_cluster;
1111b3976d3cSFam Zheng         if (n > nb_sectors) {
1112019d6b8fSAnthony Liguori             n = nb_sectors;
1113b3976d3cSFam Zheng         }
1114019d6b8fSAnthony Liguori 
1115dd3f6ee2SFam Zheng         ret = vmdk_write_extent(extent,
1116dd3f6ee2SFam Zheng                         cluster_offset, index_in_cluster * 512,
1117dd3f6ee2SFam Zheng                         buf, n, sector_num);
1118dd3f6ee2SFam Zheng         if (ret) {
11197fa60fa3SFam Zheng             return ret;
1120b3976d3cSFam Zheng         }
1121019d6b8fSAnthony Liguori         if (m_data.valid) {
1122019d6b8fSAnthony Liguori             /* update L2 tables */
1123b3976d3cSFam Zheng             if (vmdk_L2update(extent, &m_data) == -1) {
11247fa60fa3SFam Zheng                 return -EIO;
1125019d6b8fSAnthony Liguori             }
1126b3976d3cSFam Zheng         }
1127019d6b8fSAnthony Liguori         nb_sectors -= n;
1128019d6b8fSAnthony Liguori         sector_num += n;
1129019d6b8fSAnthony Liguori         buf += n * 512;
1130019d6b8fSAnthony Liguori 
1131ae261c86SFam Zheng         /* update CID on the first write every time the virtual disk is
1132ae261c86SFam Zheng          * opened */
113369b4d86dSFam Zheng         if (!s->cid_updated) {
113499f1835dSKevin Wolf             ret = vmdk_write_cid(bs, time(NULL));
113599f1835dSKevin Wolf             if (ret < 0) {
113699f1835dSKevin Wolf                 return ret;
113799f1835dSKevin Wolf             }
113869b4d86dSFam Zheng             s->cid_updated = true;
1139019d6b8fSAnthony Liguori         }
1140019d6b8fSAnthony Liguori     }
1141019d6b8fSAnthony Liguori     return 0;
1142019d6b8fSAnthony Liguori }
1143019d6b8fSAnthony Liguori 
1144e183ef75SPaolo Bonzini static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
1145e183ef75SPaolo Bonzini                                       const uint8_t *buf, int nb_sectors)
1146e183ef75SPaolo Bonzini {
1147e183ef75SPaolo Bonzini     int ret;
1148e183ef75SPaolo Bonzini     BDRVVmdkState *s = bs->opaque;
1149e183ef75SPaolo Bonzini     qemu_co_mutex_lock(&s->lock);
1150e183ef75SPaolo Bonzini     ret = vmdk_write(bs, sector_num, buf, nb_sectors);
1151e183ef75SPaolo Bonzini     qemu_co_mutex_unlock(&s->lock);
1152e183ef75SPaolo Bonzini     return ret;
1153e183ef75SPaolo Bonzini }
1154e183ef75SPaolo Bonzini 
1155f66fd6c3SFam Zheng 
11566c031aacSFam Zheng static int vmdk_create_extent(const char *filename, int64_t filesize,
11576c031aacSFam Zheng                               bool flat, bool compress)
1158019d6b8fSAnthony Liguori {
1159f66fd6c3SFam Zheng     int ret, i;
1160f66fd6c3SFam Zheng     int fd = 0;
1161019d6b8fSAnthony Liguori     VMDK4Header header;
1162019d6b8fSAnthony Liguori     uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
11630e7e1989SKevin Wolf 
1164f66fd6c3SFam Zheng     fd = open(
1165f66fd6c3SFam Zheng         filename,
1166f66fd6c3SFam Zheng         O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
1167019d6b8fSAnthony Liguori         0644);
1168f66fd6c3SFam Zheng     if (fd < 0) {
1169b781cce5SJuan Quintela         return -errno;
1170f66fd6c3SFam Zheng     }
1171f66fd6c3SFam Zheng     if (flat) {
1172f66fd6c3SFam Zheng         ret = ftruncate(fd, filesize);
1173f66fd6c3SFam Zheng         if (ret < 0) {
1174f66fd6c3SFam Zheng             ret = -errno;
1175f66fd6c3SFam Zheng         }
1176f66fd6c3SFam Zheng         goto exit;
1177f66fd6c3SFam Zheng     }
1178019d6b8fSAnthony Liguori     magic = cpu_to_be32(VMDK4_MAGIC);
1179019d6b8fSAnthony Liguori     memset(&header, 0, sizeof(header));
118016372ff0SAlexander Graf     header.version = 1;
11816c031aacSFam Zheng     header.flags =
11826c031aacSFam Zheng         3 | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0);
11836c031aacSFam Zheng     header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
1184f66fd6c3SFam Zheng     header.capacity = filesize / 512;
118516372ff0SAlexander Graf     header.granularity = 128;
118616372ff0SAlexander Graf     header.num_gtes_per_gte = 512;
1187019d6b8fSAnthony Liguori 
1188f66fd6c3SFam Zheng     grains = (filesize / 512 + header.granularity - 1) / header.granularity;
1189019d6b8fSAnthony Liguori     gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
1190f66fd6c3SFam Zheng     gt_count =
1191f66fd6c3SFam Zheng         (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
1192019d6b8fSAnthony Liguori     gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
1193019d6b8fSAnthony Liguori 
1194019d6b8fSAnthony Liguori     header.desc_offset = 1;
1195019d6b8fSAnthony Liguori     header.desc_size = 20;
1196019d6b8fSAnthony Liguori     header.rgd_offset = header.desc_offset + header.desc_size;
1197019d6b8fSAnthony Liguori     header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
1198019d6b8fSAnthony Liguori     header.grain_offset =
1199019d6b8fSAnthony Liguori        ((header.gd_offset + gd_size + (gt_size * gt_count) +
1200019d6b8fSAnthony Liguori          header.granularity - 1) / header.granularity) *
1201019d6b8fSAnthony Liguori         header.granularity;
120216372ff0SAlexander Graf     /* swap endianness for all header fields */
120316372ff0SAlexander Graf     header.version = cpu_to_le32(header.version);
120416372ff0SAlexander Graf     header.flags = cpu_to_le32(header.flags);
120516372ff0SAlexander Graf     header.capacity = cpu_to_le64(header.capacity);
120616372ff0SAlexander Graf     header.granularity = cpu_to_le64(header.granularity);
120716372ff0SAlexander Graf     header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte);
1208019d6b8fSAnthony Liguori     header.desc_offset = cpu_to_le64(header.desc_offset);
1209019d6b8fSAnthony Liguori     header.desc_size = cpu_to_le64(header.desc_size);
1210019d6b8fSAnthony Liguori     header.rgd_offset = cpu_to_le64(header.rgd_offset);
1211019d6b8fSAnthony Liguori     header.gd_offset = cpu_to_le64(header.gd_offset);
1212019d6b8fSAnthony Liguori     header.grain_offset = cpu_to_le64(header.grain_offset);
12136c031aacSFam Zheng     header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm);
1214019d6b8fSAnthony Liguori 
1215019d6b8fSAnthony Liguori     header.check_bytes[0] = 0xa;
1216019d6b8fSAnthony Liguori     header.check_bytes[1] = 0x20;
1217019d6b8fSAnthony Liguori     header.check_bytes[2] = 0xd;
1218019d6b8fSAnthony Liguori     header.check_bytes[3] = 0xa;
1219019d6b8fSAnthony Liguori 
1220019d6b8fSAnthony Liguori     /* write all the data */
12211640366cSKirill A. Shutemov     ret = qemu_write_full(fd, &magic, sizeof(magic));
12221640366cSKirill A. Shutemov     if (ret != sizeof(magic)) {
1223b781cce5SJuan Quintela         ret = -errno;
12241640366cSKirill A. Shutemov         goto exit;
12251640366cSKirill A. Shutemov     }
12261640366cSKirill A. Shutemov     ret = qemu_write_full(fd, &header, sizeof(header));
12271640366cSKirill A. Shutemov     if (ret != sizeof(header)) {
1228b781cce5SJuan Quintela         ret = -errno;
12291640366cSKirill A. Shutemov         goto exit;
12301640366cSKirill A. Shutemov     }
1231019d6b8fSAnthony Liguori 
123216372ff0SAlexander Graf     ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9);
12331640366cSKirill A. Shutemov     if (ret < 0) {
1234b781cce5SJuan Quintela         ret = -errno;
12351640366cSKirill A. Shutemov         goto exit;
12361640366cSKirill A. Shutemov     }
1237019d6b8fSAnthony Liguori 
1238019d6b8fSAnthony Liguori     /* write grain directory */
1239019d6b8fSAnthony Liguori     lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
124016372ff0SAlexander Graf     for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size;
12411640366cSKirill A. Shutemov          i < gt_count; i++, tmp += gt_size) {
12421640366cSKirill A. Shutemov         ret = qemu_write_full(fd, &tmp, sizeof(tmp));
12431640366cSKirill A. Shutemov         if (ret != sizeof(tmp)) {
1244b781cce5SJuan Quintela             ret = -errno;
12451640366cSKirill A. Shutemov             goto exit;
12461640366cSKirill A. Shutemov         }
12471640366cSKirill A. Shutemov     }
1248019d6b8fSAnthony Liguori 
1249019d6b8fSAnthony Liguori     /* write backup grain directory */
1250019d6b8fSAnthony Liguori     lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
125116372ff0SAlexander Graf     for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size;
12521640366cSKirill A. Shutemov          i < gt_count; i++, tmp += gt_size) {
12531640366cSKirill A. Shutemov         ret = qemu_write_full(fd, &tmp, sizeof(tmp));
12541640366cSKirill A. Shutemov         if (ret != sizeof(tmp)) {
1255b781cce5SJuan Quintela             ret = -errno;
12561640366cSKirill A. Shutemov             goto exit;
12571640366cSKirill A. Shutemov         }
12581640366cSKirill A. Shutemov     }
1259019d6b8fSAnthony Liguori 
1260f66fd6c3SFam Zheng     ret = 0;
1261f66fd6c3SFam Zheng  exit:
1262f66fd6c3SFam Zheng     close(fd);
1263f66fd6c3SFam Zheng     return ret;
1264f66fd6c3SFam Zheng }
1265019d6b8fSAnthony Liguori 
1266f66fd6c3SFam Zheng static int filename_decompose(const char *filename, char *path, char *prefix,
1267f66fd6c3SFam Zheng         char *postfix, size_t buf_len)
1268f66fd6c3SFam Zheng {
1269f66fd6c3SFam Zheng     const char *p, *q;
1270f66fd6c3SFam Zheng 
1271f66fd6c3SFam Zheng     if (filename == NULL || !strlen(filename)) {
1272f66fd6c3SFam Zheng         fprintf(stderr, "Vmdk: no filename provided.\n");
1273f66fd6c3SFam Zheng         return -1;
1274f66fd6c3SFam Zheng     }
1275f66fd6c3SFam Zheng     p = strrchr(filename, '/');
1276f66fd6c3SFam Zheng     if (p == NULL) {
1277f66fd6c3SFam Zheng         p = strrchr(filename, '\\');
1278f66fd6c3SFam Zheng     }
1279f66fd6c3SFam Zheng     if (p == NULL) {
1280f66fd6c3SFam Zheng         p = strrchr(filename, ':');
1281f66fd6c3SFam Zheng     }
1282f66fd6c3SFam Zheng     if (p != NULL) {
1283f66fd6c3SFam Zheng         p++;
1284f66fd6c3SFam Zheng         if (p - filename >= buf_len) {
1285f66fd6c3SFam Zheng             return -1;
1286f66fd6c3SFam Zheng         }
1287f66fd6c3SFam Zheng         pstrcpy(path, p - filename + 1, filename);
1288f66fd6c3SFam Zheng     } else {
1289f66fd6c3SFam Zheng         p = filename;
1290f66fd6c3SFam Zheng         path[0] = '\0';
1291f66fd6c3SFam Zheng     }
1292f66fd6c3SFam Zheng     q = strrchr(p, '.');
1293f66fd6c3SFam Zheng     if (q == NULL) {
1294f66fd6c3SFam Zheng         pstrcpy(prefix, buf_len, p);
1295f66fd6c3SFam Zheng         postfix[0] = '\0';
1296f66fd6c3SFam Zheng     } else {
1297f66fd6c3SFam Zheng         if (q - p >= buf_len) {
1298f66fd6c3SFam Zheng             return -1;
1299f66fd6c3SFam Zheng         }
1300f66fd6c3SFam Zheng         pstrcpy(prefix, q - p + 1, p);
1301f66fd6c3SFam Zheng         pstrcpy(postfix, buf_len, q);
1302f66fd6c3SFam Zheng     }
1303f66fd6c3SFam Zheng     return 0;
1304f66fd6c3SFam Zheng }
1305f66fd6c3SFam Zheng 
1306f66fd6c3SFam Zheng static int relative_path(char *dest, int dest_size,
1307f66fd6c3SFam Zheng         const char *base, const char *target)
1308f66fd6c3SFam Zheng {
1309f66fd6c3SFam Zheng     int i = 0;
1310f66fd6c3SFam Zheng     int n = 0;
1311f66fd6c3SFam Zheng     const char *p, *q;
1312f66fd6c3SFam Zheng #ifdef _WIN32
1313f66fd6c3SFam Zheng     const char *sep = "\\";
1314f66fd6c3SFam Zheng #else
1315f66fd6c3SFam Zheng     const char *sep = "/";
1316f66fd6c3SFam Zheng #endif
1317f66fd6c3SFam Zheng 
1318f66fd6c3SFam Zheng     if (!(dest && base && target)) {
1319f66fd6c3SFam Zheng         return -1;
1320f66fd6c3SFam Zheng     }
1321f66fd6c3SFam Zheng     if (path_is_absolute(target)) {
1322f66fd6c3SFam Zheng         dest[dest_size - 1] = '\0';
1323f66fd6c3SFam Zheng         strncpy(dest, target, dest_size - 1);
1324f66fd6c3SFam Zheng         return 0;
1325f66fd6c3SFam Zheng     }
1326f66fd6c3SFam Zheng     while (base[i] == target[i]) {
1327f66fd6c3SFam Zheng         i++;
1328f66fd6c3SFam Zheng     }
1329f66fd6c3SFam Zheng     p = &base[i];
1330f66fd6c3SFam Zheng     q = &target[i];
1331f66fd6c3SFam Zheng     while (*p) {
1332f66fd6c3SFam Zheng         if (*p == *sep) {
1333f66fd6c3SFam Zheng             n++;
1334f66fd6c3SFam Zheng         }
1335f66fd6c3SFam Zheng         p++;
1336f66fd6c3SFam Zheng     }
1337f66fd6c3SFam Zheng     dest[0] = '\0';
1338f66fd6c3SFam Zheng     for (; n; n--) {
1339f66fd6c3SFam Zheng         pstrcat(dest, dest_size, "..");
1340f66fd6c3SFam Zheng         pstrcat(dest, dest_size, sep);
1341f66fd6c3SFam Zheng     }
1342f66fd6c3SFam Zheng     pstrcat(dest, dest_size, q);
1343f66fd6c3SFam Zheng     return 0;
1344f66fd6c3SFam Zheng }
1345f66fd6c3SFam Zheng 
1346f66fd6c3SFam Zheng static int vmdk_create(const char *filename, QEMUOptionParameter *options)
1347f66fd6c3SFam Zheng {
1348f66fd6c3SFam Zheng     int fd, idx = 0;
1349f66fd6c3SFam Zheng     char desc[BUF_SIZE];
1350f66fd6c3SFam Zheng     int64_t total_size = 0, filesize;
1351f66fd6c3SFam Zheng     const char *backing_file = NULL;
1352f66fd6c3SFam Zheng     const char *fmt = NULL;
1353f66fd6c3SFam Zheng     int flags = 0;
1354f66fd6c3SFam Zheng     int ret = 0;
13556c031aacSFam Zheng     bool flat, split, compress;
1356f66fd6c3SFam Zheng     char ext_desc_lines[BUF_SIZE] = "";
1357f66fd6c3SFam Zheng     char path[PATH_MAX], prefix[PATH_MAX], postfix[PATH_MAX];
1358f66fd6c3SFam Zheng     const int64_t split_size = 0x80000000;  /* VMDK has constant split size */
1359f66fd6c3SFam Zheng     const char *desc_extent_line;
1360f66fd6c3SFam Zheng     char parent_desc_line[BUF_SIZE] = "";
1361f66fd6c3SFam Zheng     uint32_t parent_cid = 0xffffffff;
1362f66fd6c3SFam Zheng     const char desc_template[] =
1363f66fd6c3SFam Zheng         "# Disk DescriptorFile\n"
1364f66fd6c3SFam Zheng         "version=1\n"
1365f66fd6c3SFam Zheng         "CID=%x\n"
1366f66fd6c3SFam Zheng         "parentCID=%x\n"
1367f66fd6c3SFam Zheng         "createType=\"%s\"\n"
1368f66fd6c3SFam Zheng         "%s"
1369f66fd6c3SFam Zheng         "\n"
1370f66fd6c3SFam Zheng         "# Extent description\n"
1371f66fd6c3SFam Zheng         "%s"
1372f66fd6c3SFam Zheng         "\n"
1373f66fd6c3SFam Zheng         "# The Disk Data Base\n"
1374f66fd6c3SFam Zheng         "#DDB\n"
1375f66fd6c3SFam Zheng         "\n"
1376f66fd6c3SFam Zheng         "ddb.virtualHWVersion = \"%d\"\n"
1377f66fd6c3SFam Zheng         "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
1378f66fd6c3SFam Zheng         "ddb.geometry.heads = \"16\"\n"
1379f66fd6c3SFam Zheng         "ddb.geometry.sectors = \"63\"\n"
1380f66fd6c3SFam Zheng         "ddb.adapterType = \"ide\"\n";
1381f66fd6c3SFam Zheng 
1382f66fd6c3SFam Zheng     if (filename_decompose(filename, path, prefix, postfix, PATH_MAX)) {
1383f66fd6c3SFam Zheng         return -EINVAL;
1384f66fd6c3SFam Zheng     }
1385f66fd6c3SFam Zheng     /* Read out options */
1386f66fd6c3SFam Zheng     while (options && options->name) {
1387f66fd6c3SFam Zheng         if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
1388f66fd6c3SFam Zheng             total_size = options->value.n;
1389f66fd6c3SFam Zheng         } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
1390f66fd6c3SFam Zheng             backing_file = options->value.s;
1391f66fd6c3SFam Zheng         } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
1392f66fd6c3SFam Zheng             flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0;
1393f66fd6c3SFam Zheng         } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) {
1394f66fd6c3SFam Zheng             fmt = options->value.s;
1395f66fd6c3SFam Zheng         }
1396f66fd6c3SFam Zheng         options++;
1397f66fd6c3SFam Zheng     }
1398f66fd6c3SFam Zheng     if (!fmt) {
1399f66fd6c3SFam Zheng         /* Default format to monolithicSparse */
1400f66fd6c3SFam Zheng         fmt = "monolithicSparse";
1401f66fd6c3SFam Zheng     } else if (strcmp(fmt, "monolithicFlat") &&
1402f66fd6c3SFam Zheng                strcmp(fmt, "monolithicSparse") &&
1403f66fd6c3SFam Zheng                strcmp(fmt, "twoGbMaxExtentSparse") &&
14046c031aacSFam Zheng                strcmp(fmt, "twoGbMaxExtentFlat") &&
14056c031aacSFam Zheng                strcmp(fmt, "streamOptimized")) {
1406f66fd6c3SFam Zheng         fprintf(stderr, "VMDK: Unknown subformat: %s\n", fmt);
1407f66fd6c3SFam Zheng         return -EINVAL;
1408f66fd6c3SFam Zheng     }
1409f66fd6c3SFam Zheng     split = !(strcmp(fmt, "twoGbMaxExtentFlat") &&
1410f66fd6c3SFam Zheng               strcmp(fmt, "twoGbMaxExtentSparse"));
1411f66fd6c3SFam Zheng     flat = !(strcmp(fmt, "monolithicFlat") &&
1412f66fd6c3SFam Zheng              strcmp(fmt, "twoGbMaxExtentFlat"));
14136c031aacSFam Zheng     compress = !strcmp(fmt, "streamOptimized");
1414f66fd6c3SFam Zheng     if (flat) {
1415f66fd6c3SFam Zheng         desc_extent_line = "RW %lld FLAT \"%s\" 0\n";
1416f66fd6c3SFam Zheng     } else {
1417f66fd6c3SFam Zheng         desc_extent_line = "RW %lld SPARSE \"%s\"\n";
1418f66fd6c3SFam Zheng     }
1419f66fd6c3SFam Zheng     if (flat && backing_file) {
1420f66fd6c3SFam Zheng         /* not supporting backing file for flat image */
1421f66fd6c3SFam Zheng         return -ENOTSUP;
1422f66fd6c3SFam Zheng     }
1423f66fd6c3SFam Zheng     if (backing_file) {
1424f66fd6c3SFam Zheng         char parent_filename[PATH_MAX];
1425f66fd6c3SFam Zheng         BlockDriverState *bs = bdrv_new("");
1426f66fd6c3SFam Zheng         ret = bdrv_open(bs, backing_file, 0, NULL);
1427f66fd6c3SFam Zheng         if (ret != 0) {
1428f66fd6c3SFam Zheng             bdrv_delete(bs);
1429f66fd6c3SFam Zheng             return ret;
1430f66fd6c3SFam Zheng         }
1431f66fd6c3SFam Zheng         if (strcmp(bs->drv->format_name, "vmdk")) {
1432f66fd6c3SFam Zheng             bdrv_delete(bs);
1433f66fd6c3SFam Zheng             return -EINVAL;
1434f66fd6c3SFam Zheng         }
1435f66fd6c3SFam Zheng         parent_cid = vmdk_read_cid(bs, 0);
1436f66fd6c3SFam Zheng         bdrv_delete(bs);
1437f66fd6c3SFam Zheng         relative_path(parent_filename, sizeof(parent_filename),
1438f66fd6c3SFam Zheng                       filename, backing_file);
1439f66fd6c3SFam Zheng         snprintf(parent_desc_line, sizeof(parent_desc_line),
1440f66fd6c3SFam Zheng                 "parentFileNameHint=\"%s\"", parent_filename);
1441f66fd6c3SFam Zheng     }
1442f66fd6c3SFam Zheng 
1443f66fd6c3SFam Zheng     /* Create extents */
1444f66fd6c3SFam Zheng     filesize = total_size;
1445f66fd6c3SFam Zheng     while (filesize > 0) {
1446f66fd6c3SFam Zheng         char desc_line[BUF_SIZE];
1447f66fd6c3SFam Zheng         char ext_filename[PATH_MAX];
1448f66fd6c3SFam Zheng         char desc_filename[PATH_MAX];
1449f66fd6c3SFam Zheng         int64_t size = filesize;
1450f66fd6c3SFam Zheng 
1451f66fd6c3SFam Zheng         if (split && size > split_size) {
1452f66fd6c3SFam Zheng             size = split_size;
1453f66fd6c3SFam Zheng         }
1454f66fd6c3SFam Zheng         if (split) {
1455f66fd6c3SFam Zheng             snprintf(desc_filename, sizeof(desc_filename), "%s-%c%03d%s",
1456f66fd6c3SFam Zheng                     prefix, flat ? 'f' : 's', ++idx, postfix);
1457f66fd6c3SFam Zheng         } else if (flat) {
1458f66fd6c3SFam Zheng             snprintf(desc_filename, sizeof(desc_filename), "%s-flat%s",
1459f66fd6c3SFam Zheng                     prefix, postfix);
1460f66fd6c3SFam Zheng         } else {
1461f66fd6c3SFam Zheng             snprintf(desc_filename, sizeof(desc_filename), "%s%s",
1462f66fd6c3SFam Zheng                     prefix, postfix);
1463f66fd6c3SFam Zheng         }
1464f66fd6c3SFam Zheng         snprintf(ext_filename, sizeof(ext_filename), "%s%s",
1465f66fd6c3SFam Zheng                 path, desc_filename);
1466f66fd6c3SFam Zheng 
14676c031aacSFam Zheng         if (vmdk_create_extent(ext_filename, size, flat, compress)) {
1468f66fd6c3SFam Zheng             return -EINVAL;
1469f66fd6c3SFam Zheng         }
1470f66fd6c3SFam Zheng         filesize -= size;
1471f66fd6c3SFam Zheng 
1472f66fd6c3SFam Zheng         /* Format description line */
1473f66fd6c3SFam Zheng         snprintf(desc_line, sizeof(desc_line),
1474f66fd6c3SFam Zheng                     desc_extent_line, size / 512, desc_filename);
1475f66fd6c3SFam Zheng         pstrcat(ext_desc_lines, sizeof(ext_desc_lines), desc_line);
1476f66fd6c3SFam Zheng     }
1477f66fd6c3SFam Zheng     /* generate descriptor file */
1478f66fd6c3SFam Zheng     snprintf(desc, sizeof(desc), desc_template,
1479f66fd6c3SFam Zheng             (unsigned int)time(NULL),
1480f66fd6c3SFam Zheng             parent_cid,
1481f66fd6c3SFam Zheng             fmt,
1482f66fd6c3SFam Zheng             parent_desc_line,
1483f66fd6c3SFam Zheng             ext_desc_lines,
1484f66fd6c3SFam Zheng             (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
1485f66fd6c3SFam Zheng             total_size / (int64_t)(63 * 16 * 512));
1486f66fd6c3SFam Zheng     if (split || flat) {
1487f66fd6c3SFam Zheng         fd = open(
1488f66fd6c3SFam Zheng                 filename,
1489f66fd6c3SFam Zheng                 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
1490f66fd6c3SFam Zheng                 0644);
1491f66fd6c3SFam Zheng     } else {
1492f66fd6c3SFam Zheng         fd = open(
1493f66fd6c3SFam Zheng                 filename,
1494f66fd6c3SFam Zheng                 O_WRONLY | O_BINARY | O_LARGEFILE,
1495f66fd6c3SFam Zheng                 0644);
1496f66fd6c3SFam Zheng     }
1497f66fd6c3SFam Zheng     if (fd < 0) {
1498f66fd6c3SFam Zheng         return -errno;
1499f66fd6c3SFam Zheng     }
1500f66fd6c3SFam Zheng     /* the descriptor offset = 0x200 */
1501f66fd6c3SFam Zheng     if (!split && !flat && 0x200 != lseek(fd, 0x200, SEEK_SET)) {
1502f66fd6c3SFam Zheng         ret = -errno;
1503f66fd6c3SFam Zheng         goto exit;
1504f66fd6c3SFam Zheng     }
15051640366cSKirill A. Shutemov     ret = qemu_write_full(fd, desc, strlen(desc));
15061640366cSKirill A. Shutemov     if (ret != strlen(desc)) {
1507b781cce5SJuan Quintela         ret = -errno;
15081640366cSKirill A. Shutemov         goto exit;
15091640366cSKirill A. Shutemov     }
15101640366cSKirill A. Shutemov     ret = 0;
15111640366cSKirill A. Shutemov exit:
1512019d6b8fSAnthony Liguori     close(fd);
15131640366cSKirill A. Shutemov     return ret;
1514019d6b8fSAnthony Liguori }
1515019d6b8fSAnthony Liguori 
1516019d6b8fSAnthony Liguori static void vmdk_close(BlockDriverState *bs)
1517019d6b8fSAnthony Liguori {
15182bc3166cSKevin Wolf     BDRVVmdkState *s = bs->opaque;
15192bc3166cSKevin Wolf 
1520b3976d3cSFam Zheng     vmdk_free_extents(bs);
15212bc3166cSKevin Wolf 
15222bc3166cSKevin Wolf     migrate_del_blocker(s->migration_blocker);
15232bc3166cSKevin Wolf     error_free(s->migration_blocker);
1524019d6b8fSAnthony Liguori }
1525019d6b8fSAnthony Liguori 
15268b94ff85SPaolo Bonzini static coroutine_fn int vmdk_co_flush(BlockDriverState *bs)
1527019d6b8fSAnthony Liguori {
1528333c574dSFam Zheng     BDRVVmdkState *s = bs->opaque;
1529*29cdb251SPaolo Bonzini     int i, err;
1530*29cdb251SPaolo Bonzini     int ret = 0;
1531333c574dSFam Zheng 
1532333c574dSFam Zheng     for (i = 0; i < s->num_extents; i++) {
15338b94ff85SPaolo Bonzini         err = bdrv_co_flush(s->extents[i].file);
1534333c574dSFam Zheng         if (err < 0) {
1535333c574dSFam Zheng             ret = err;
1536333c574dSFam Zheng         }
1537333c574dSFam Zheng     }
1538333c574dSFam Zheng     return ret;
1539019d6b8fSAnthony Liguori }
1540019d6b8fSAnthony Liguori 
15414a1d5e1fSFam Zheng static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
15424a1d5e1fSFam Zheng {
15434a1d5e1fSFam Zheng     int i;
15444a1d5e1fSFam Zheng     int64_t ret = 0;
15454a1d5e1fSFam Zheng     int64_t r;
15464a1d5e1fSFam Zheng     BDRVVmdkState *s = bs->opaque;
15474a1d5e1fSFam Zheng 
15484a1d5e1fSFam Zheng     ret = bdrv_get_allocated_file_size(bs->file);
15494a1d5e1fSFam Zheng     if (ret < 0) {
15504a1d5e1fSFam Zheng         return ret;
15514a1d5e1fSFam Zheng     }
15524a1d5e1fSFam Zheng     for (i = 0; i < s->num_extents; i++) {
15534a1d5e1fSFam Zheng         if (s->extents[i].file == bs->file) {
15544a1d5e1fSFam Zheng             continue;
15554a1d5e1fSFam Zheng         }
15564a1d5e1fSFam Zheng         r = bdrv_get_allocated_file_size(s->extents[i].file);
15574a1d5e1fSFam Zheng         if (r < 0) {
15584a1d5e1fSFam Zheng             return r;
15594a1d5e1fSFam Zheng         }
15604a1d5e1fSFam Zheng         ret += r;
15614a1d5e1fSFam Zheng     }
15624a1d5e1fSFam Zheng     return ret;
15634a1d5e1fSFam Zheng }
15640e7e1989SKevin Wolf 
15650e7e1989SKevin Wolf static QEMUOptionParameter vmdk_create_options[] = {
1566db08adf5SKevin Wolf     {
1567db08adf5SKevin Wolf         .name = BLOCK_OPT_SIZE,
1568db08adf5SKevin Wolf         .type = OPT_SIZE,
1569db08adf5SKevin Wolf         .help = "Virtual disk size"
1570db08adf5SKevin Wolf     },
1571db08adf5SKevin Wolf     {
1572db08adf5SKevin Wolf         .name = BLOCK_OPT_BACKING_FILE,
1573db08adf5SKevin Wolf         .type = OPT_STRING,
1574db08adf5SKevin Wolf         .help = "File name of a base image"
1575db08adf5SKevin Wolf     },
1576db08adf5SKevin Wolf     {
1577db08adf5SKevin Wolf         .name = BLOCK_OPT_COMPAT6,
1578db08adf5SKevin Wolf         .type = OPT_FLAG,
1579db08adf5SKevin Wolf         .help = "VMDK version 6 image"
1580db08adf5SKevin Wolf     },
1581f66fd6c3SFam Zheng     {
1582f66fd6c3SFam Zheng         .name = BLOCK_OPT_SUBFMT,
1583f66fd6c3SFam Zheng         .type = OPT_STRING,
1584f66fd6c3SFam Zheng         .help =
1585f66fd6c3SFam Zheng             "VMDK flat extent format, can be one of "
15866c031aacSFam Zheng             "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
1587f66fd6c3SFam Zheng     },
15880e7e1989SKevin Wolf     { NULL }
15890e7e1989SKevin Wolf };
15900e7e1989SKevin Wolf 
1591019d6b8fSAnthony Liguori static BlockDriver bdrv_vmdk = {
1592019d6b8fSAnthony Liguori     .format_name    = "vmdk",
1593019d6b8fSAnthony Liguori     .instance_size  = sizeof(BDRVVmdkState),
1594019d6b8fSAnthony Liguori     .bdrv_probe     = vmdk_probe,
15956511ef77SKevin Wolf     .bdrv_open      = vmdk_open,
15962914caa0SPaolo Bonzini     .bdrv_read      = vmdk_co_read,
1597e183ef75SPaolo Bonzini     .bdrv_write     = vmdk_co_write,
1598019d6b8fSAnthony Liguori     .bdrv_close     = vmdk_close,
1599019d6b8fSAnthony Liguori     .bdrv_create    = vmdk_create,
1600c68b89acSKevin Wolf     .bdrv_co_flush_to_disk  = vmdk_co_flush,
1601f8a2e5e3SStefan Hajnoczi     .bdrv_co_is_allocated   = vmdk_co_is_allocated,
16024a1d5e1fSFam Zheng     .bdrv_get_allocated_file_size  = vmdk_get_allocated_file_size,
16030e7e1989SKevin Wolf 
16040e7e1989SKevin Wolf     .create_options = vmdk_create_options,
1605019d6b8fSAnthony Liguori };
1606019d6b8fSAnthony Liguori 
1607019d6b8fSAnthony Liguori static void bdrv_vmdk_init(void)
1608019d6b8fSAnthony Liguori {
1609019d6b8fSAnthony Liguori     bdrv_register(&bdrv_vmdk);
1610019d6b8fSAnthony Liguori }
1611019d6b8fSAnthony Liguori 
1612019d6b8fSAnthony Liguori block_init(bdrv_vmdk_init);
1613