xref: /openbmc/qemu/block/vhdx.h (revision 200280af0e19bfaeb9431eb0ee1ee2d8bf8d3a0a)
1 /*
2  * Block driver for Hyper-V VHDX Images
3  *
4  * Copyright (c) 2013 Red Hat, Inc.,
5  *
6  * Authors:
7  *  Jeff Cody <jcody@redhat.com>
8  *
9  *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
10  *  by Microsoft:
11  *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
12  *
13  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
14  * See the COPYING.LIB file in the top-level directory.
15  *
16  */
17 
18 #ifndef BLOCK_VHDX_H
19 #define BLOCK_VHDX_H
20 
21 #define KiB              (1 * 1024)
22 #define MiB            (KiB * 1024)
23 #define GiB            (MiB * 1024)
24 #define TiB ((uint64_t) GiB * 1024)
25 
26 #define DEFAULT_LOG_SIZE 1048576 /* 1MiB */
27 /* Structures and fields present in the VHDX file */
28 
29 /* The header section has the following blocks,
30  * each block is 64KB:
31  *
32  * _____________________________________________________________________________
33  * | File Id. |   Header 1    | Header 2   | Region Table |  Reserved (768KB)  |
34  * |----------|---------------|------------|--------------|--------------------|
35  * |          |               |            |              |                    |
36  * 0.........64KB...........128KB........192KB..........256KB................1MB
37  */
38 
39 #define VHDX_HEADER_BLOCK_SIZE      (64 * 1024)
40 
41 #define VHDX_FILE_ID_OFFSET         0
42 #define VHDX_HEADER1_OFFSET         (VHDX_HEADER_BLOCK_SIZE * 1)
43 #define VHDX_HEADER2_OFFSET         (VHDX_HEADER_BLOCK_SIZE * 2)
44 #define VHDX_REGION_TABLE_OFFSET    (VHDX_HEADER_BLOCK_SIZE * 3)
45 #define VHDX_REGION_TABLE2_OFFSET   (VHDX_HEADER_BLOCK_SIZE * 4)
46 
47 #define VHDX_HEADER_SECTION_END     (1 * MiB)
48 /*
49  * A note on the use of MS-GUID fields.  For more details on the GUID,
50  * please see: https://en.wikipedia.org/wiki/Globally_unique_identifier.
51  *
52  * The VHDX specification only states that these are MS GUIDs, and which
53  * bytes are data1-data4. It makes no mention of what algorithm should be used
54  * to generate the GUID, nor what standard.  However, looking at the specified
55  * known GUID fields, it appears the GUIDs are:
56  *  Standard/DCE GUID type  (noted by 10b in the MSB of byte 0 of .data4)
57  *  Random algorithm        (noted by 0x4XXX for .data3)
58  */
59 
60 /* ---- HEADER SECTION STRUCTURES ---- */
61 
62 /* These structures are ones that are defined in the VHDX specification
63  * document */
64 
65 #define VHDX_FILE_SIGNATURE 0x656C696678646876ULL  /* "vhdxfile" in ASCII */
66 typedef struct VHDXFileIdentifier {
67     uint64_t    signature;              /* "vhdxfile" in ASCII */
68     uint16_t    creator[256];           /* optional; utf-16 string to identify
69                                            the vhdx file creator.  Diagnostic
70                                            only */
71 } VHDXFileIdentifier;
72 
73 
74 /* the guid is a 16 byte unique ID - the definition for this used by
75  * Microsoft is not just 16 bytes though - it is a structure that is defined,
76  * so we need to follow it here so that endianness does not trip us up */
77 
78 typedef struct QEMU_PACKED MSGUID {
79     uint32_t  data1;
80     uint16_t  data2;
81     uint16_t  data3;
82     uint8_t   data4[8];
83 } MSGUID;
84 
85 #define guid_eq(a, b) \
86     (memcmp(&(a), &(b), sizeof(MSGUID)) == 0)
87 
88 #define VHDX_HEADER_SIZE (4 * 1024)   /* although the vhdx_header struct in disk
89                                          is only 582 bytes, for purposes of crc
90                                          the header is the first 4KB of the 64KB
91                                          block */
92 
93 /* The full header is 4KB, although the actual header data is much smaller.
94  * But for the checksum calculation, it is over the entire 4KB structure,
95  * not just the defined portion of it */
96 #define VHDX_HEADER_SIGNATURE 0x64616568
97 typedef struct QEMU_PACKED VHDXHeader {
98     uint32_t    signature;              /* "head" in ASCII */
99     uint32_t    checksum;               /* CRC-32C hash of the whole header */
100     uint64_t    sequence_number;        /* Seq number of this header.  Each
101                                            VHDX file has 2 of these headers,
102                                            and only the header with the highest
103                                            sequence number is valid */
104     MSGUID      file_write_guid;        /* 128 bit unique identifier. Must be
105                                            updated to new, unique value before
106                                            the first modification is made to
107                                            file */
108     MSGUID      data_write_guid;        /* 128 bit unique identifier. Must be
109                                            updated to new, unique value before
110                                            the first modification is made to
111                                            visible data.   Visbile data is
112                                            defined as:
113                                                     - system & user metadata
114                                                     - raw block data
115                                                     - disk size
116                                                     - any change that will
117                                                       cause the virtual disk
118                                                       sector read to differ
119 
120                                            This does not need to change if
121                                            blocks are re-arranged */
122     MSGUID      log_guid;               /* 128 bit unique identifier. If zero,
123                                            there is no valid log. If non-zero,
124                                            log entries with this guid are
125                                            valid. */
126     uint16_t    log_version;            /* version of the log format. Must be
127                                            set to zero */
128     uint16_t    version;                /* version of the vhdx file.  Currently,
129                                            only supported version is "1" */
130     uint32_t    log_length;             /* length of the log.  Must be multiple
131                                            of 1MB */
132     uint64_t    log_offset;             /* byte offset in the file of the log.
133                                            Must also be a multiple of 1MB */
134 } VHDXHeader;
135 
136 /* Header for the region table block */
137 #define VHDX_REGION_SIGNATURE  0x69676572  /* "regi" in ASCII */
138 typedef struct QEMU_PACKED VHDXRegionTableHeader {
139     uint32_t    signature;              /* "regi" in ASCII */
140     uint32_t    checksum;               /* CRC-32C hash of the 64KB table */
141     uint32_t    entry_count;            /* number of valid entries */
142     uint32_t    reserved;
143 } VHDXRegionTableHeader;
144 
145 /* Individual region table entry.  There may be a maximum of 2047 of these
146  *
147  *  There are two known region table properties.  Both are required.
148  *  BAT (block allocation table):  2DC27766F62342009D64115E9BFD4A08
149  *  Metadata:                      8B7CA20647904B9AB8FE575F050F886E
150  */
151 #define VHDX_REGION_ENTRY_REQUIRED  0x01    /* if set, parser must understand
152                                                this entry in order to open
153                                                file */
154 typedef struct QEMU_PACKED VHDXRegionTableEntry {
155     MSGUID      guid;                   /* 128-bit unique identifier */
156     uint64_t    file_offset;            /* offset of the object in the file.
157                                            Must be multiple of 1MB */
158     uint32_t    length;                 /* length, in bytes, of the object */
159     uint32_t    data_bits;
160 } VHDXRegionTableEntry;
161 
162 
163 /* ---- LOG ENTRY STRUCTURES ---- */
164 #define VHDX_LOG_MIN_SIZE (1024 * 1024)
165 #define VHDX_LOG_SECTOR_SIZE 4096
166 #define VHDX_LOG_HDR_SIZE 64
167 #define VHDX_LOG_SIGNATURE 0x65676f6c
168 typedef struct QEMU_PACKED VHDXLogEntryHeader {
169     uint32_t    signature;              /* "loge" in ASCII */
170     uint32_t    checksum;               /* CRC-32C hash of the 64KB table */
171     uint32_t    entry_length;           /* length in bytes, multiple of 1MB */
172     uint32_t    tail;                   /* byte offset of first log entry of a
173                                            seq, where this entry is the last
174                                            entry */
175     uint64_t    sequence_number;        /* incremented with each log entry.
176                                            May not be zero. */
177     uint32_t    descriptor_count;       /* number of descriptors in this log
178                                            entry, must be >= 0 */
179     uint32_t    reserved;
180     MSGUID      log_guid;               /* value of the log_guid from
181                                            vhdx_header.  If not found in
182                                            vhdx_header, it is invalid */
183     uint64_t    flushed_file_offset;    /* see spec for full details - this
184                                            should be vhdx file size in bytes */
185     uint64_t    last_file_offset;       /* size in bytes that all allocated
186                                            file structures fit into */
187 } VHDXLogEntryHeader;
188 
189 #define VHDX_LOG_DESC_SIZE 32
190 #define VHDX_LOG_DESC_SIGNATURE 0x63736564
191 #define VHDX_LOG_ZERO_SIGNATURE 0x6f72657a
192 typedef struct QEMU_PACKED VHDXLogDescriptor {
193     uint32_t    signature;              /* "zero" or "desc" in ASCII */
194     union  {
195         uint32_t    reserved;           /* zero desc */
196         uint32_t    trailing_bytes;     /* data desc: bytes 4092-4096 of the
197                                            data sector */
198     };
199     union {
200         uint64_t    zero_length;        /* zero desc: length of the section to
201                                            zero */
202         uint64_t    leading_bytes;      /* data desc: bytes 0-7 of the data
203                                            sector */
204     };
205     uint64_t    file_offset;            /* file offset to write zeros - multiple
206                                            of 4kB */
207     uint64_t    sequence_number;        /* must match same field in
208                                            vhdx_log_entry_header */
209 } VHDXLogDescriptor;
210 
211 #define VHDX_LOG_DATA_SIGNATURE 0x61746164
212 typedef struct QEMU_PACKED VHDXLogDataSector {
213     uint32_t    data_signature;         /* "data" in ASCII */
214     uint32_t    sequence_high;          /* 4 MSB of 8 byte sequence_number */
215     uint8_t     data[4084];             /* raw data, bytes 8-4091 (inclusive).
216                                            see the data descriptor field for the
217                                            other mising bytes */
218     uint32_t    sequence_low;           /* 4 LSB of 8 byte sequence_number */
219 } VHDXLogDataSector;
220 
221 
222 
223 /* block states - different state values depending on whether it is a
224  * payload block, or a sector block. */
225 
226 #define PAYLOAD_BLOCK_NOT_PRESENT       0
227 #define PAYLOAD_BLOCK_UNDEFINED         1
228 #define PAYLOAD_BLOCK_ZERO              2
229 #define PAYLOAD_BLOCK_UNMAPPED          3
230 #define PAYLOAD_BLOCK_UNMAPPED_v095     5
231 #define PAYLOAD_BLOCK_FULLY_PRESENT     6
232 #define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7
233 
234 #define SB_BLOCK_NOT_PRESENT    0
235 #define SB_BLOCK_PRESENT        6
236 
237 /* per the spec */
238 #define VHDX_MAX_SECTORS_PER_BLOCK  (1 << 23)
239 
240 /* upper 44 bits are the file offset in 1MB units lower 3 bits are the state
241    other bits are reserved */
242 #define VHDX_BAT_STATE_BIT_MASK 0x07
243 #define VHDX_BAT_FILE_OFF_MASK  0xFFFFFFFFFFF00000ULL /* upper 44 bits */
244 typedef uint64_t VHDXBatEntry;
245 
246 /* ---- METADATA REGION STRUCTURES ---- */
247 
248 #define VHDX_METADATA_ENTRY_SIZE 32
249 #define VHDX_METADATA_MAX_ENTRIES 2047  /* not including the header */
250 #define VHDX_METADATA_TABLE_MAX_SIZE \
251     (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1))
252 #define VHDX_METADATA_SIGNATURE 0x617461646174656DULL  /* "metadata" in ASCII */
253 typedef struct QEMU_PACKED VHDXMetadataTableHeader {
254     uint64_t    signature;              /* "metadata" in ASCII */
255     uint16_t    reserved;
256     uint16_t    entry_count;            /* number table entries. <= 2047 */
257     uint32_t    reserved2[5];
258 } VHDXMetadataTableHeader;
259 
260 #define VHDX_META_FLAGS_IS_USER         0x01    /* max 1024 entries */
261 #define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02    /* virtual disk metadata if set,
262                                                    otherwise file metdata */
263 #define VHDX_META_FLAGS_IS_REQUIRED     0x04    /* parse must understand this
264                                                    entry to open the file */
265 typedef struct QEMU_PACKED VHDXMetadataTableEntry {
266     MSGUID      item_id;                /* 128-bit identifier for metadata */
267     uint32_t    offset;                 /* byte offset of the metadata.  At
268                                            least 64kB.  Relative to start of
269                                            metadata region */
270                                         /* note: if length = 0, so is offset */
271     uint32_t    length;                 /* length of metadata. <= 1MB. */
272     uint32_t    data_bits;              /* least-significant 3 bits are flags,
273                                            the rest are reserved (see above) */
274     uint32_t    reserved2;
275 } VHDXMetadataTableEntry;
276 
277 #define VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED 0x01   /* Do not change any blocks to
278                                                    be BLOCK_NOT_PRESENT.
279                                                    If set indicates a fixed
280                                                    size VHDX file */
281 #define VHDX_PARAMS_HAS_PARENT           0x02    /* has parent / backing file */
282 #define VHDX_BLOCK_SIZE_MIN             (1   * MiB)
283 #define VHDX_BLOCK_SIZE_MAX             (256 * MiB)
284 typedef struct QEMU_PACKED VHDXFileParameters {
285     uint32_t    block_size;             /* size of each payload block, always
286                                            power of 2, <= 256MB and >= 1MB. */
287     uint32_t data_bits;                 /* least-significant 2 bits are flags,
288                                            the rest are reserved (see above) */
289 } VHDXFileParameters;
290 
291 #define VHDX_MAX_IMAGE_SIZE  ((uint64_t) 64 * TiB)
292 typedef struct QEMU_PACKED VHDXVirtualDiskSize {
293     uint64_t    virtual_disk_size;      /* Size of the virtual disk, in bytes.
294                                            Must be multiple of the sector size,
295                                            max of 64TB */
296 } VHDXVirtualDiskSize;
297 
298 typedef struct QEMU_PACKED VHDXPage83Data {
299     MSGUID      page_83_data;           /* unique id for scsi devices that
300                                            support page 0x83 */
301 } VHDXPage83Data;
302 
303 typedef struct QEMU_PACKED VHDXVirtualDiskLogicalSectorSize {
304     uint32_t    logical_sector_size;    /* virtual disk sector size (in bytes).
305                                            Can only be 512 or 4096 bytes */
306 } VHDXVirtualDiskLogicalSectorSize;
307 
308 typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize {
309     uint32_t    physical_sector_size;   /* physical sector size (in bytes).
310                                            Can only be 512 or 4096 bytes */
311 } VHDXVirtualDiskPhysicalSectorSize;
312 
313 typedef struct QEMU_PACKED VHDXParentLocatorHeader {
314     MSGUID      locator_type;           /* type of the parent virtual disk. */
315     uint16_t    reserved;
316     uint16_t    key_value_count;        /* number of key/value pairs for this
317                                            locator */
318 } VHDXParentLocatorHeader;
319 
320 /* key and value strings are UNICODE strings, UTF-16 LE encoding, no NULs */
321 typedef struct QEMU_PACKED VHDXParentLocatorEntry {
322     uint32_t    key_offset;             /* offset in metadata for key, > 0 */
323     uint32_t    value_offset;           /* offset in metadata for value, >0 */
324     uint16_t    key_length;             /* length of entry key, > 0 */
325     uint16_t    value_length;           /* length of entry value, > 0 */
326 } VHDXParentLocatorEntry;
327 
328 
329 /* ----- END VHDX SPECIFICATION STRUCTURES ---- */
330 
331 typedef struct VHDXMetadataEntries {
332     VHDXMetadataTableEntry file_parameters_entry;
333     VHDXMetadataTableEntry virtual_disk_size_entry;
334     VHDXMetadataTableEntry page83_data_entry;
335     VHDXMetadataTableEntry logical_sector_size_entry;
336     VHDXMetadataTableEntry phys_sector_size_entry;
337     VHDXMetadataTableEntry parent_locator_entry;
338     uint16_t present;
339 } VHDXMetadataEntries;
340 
341 typedef struct VHDXLogEntries {
342     uint64_t offset;
343     uint64_t length;
344     uint32_t write;
345     uint32_t read;
346     VHDXLogEntryHeader *hdr;
347     void *desc_buffer;
348     uint64_t sequence;
349     uint32_t tail;
350 } VHDXLogEntries;
351 
352 typedef struct VHDXRegionEntry {
353     uint64_t start;
354     uint64_t end;
355     QLIST_ENTRY(VHDXRegionEntry) entries;
356 } VHDXRegionEntry;
357 
358 typedef struct BDRVVHDXState {
359     CoMutex lock;
360 
361     int curr_header;
362     VHDXHeader *headers[2];
363 
364     VHDXRegionTableHeader rt;
365     VHDXRegionTableEntry bat_rt;         /* region table for the BAT */
366     VHDXRegionTableEntry metadata_rt;    /* region table for the metadata */
367 
368     VHDXMetadataTableHeader metadata_hdr;
369     VHDXMetadataEntries metadata_entries;
370 
371     VHDXFileParameters params;
372     uint32_t block_size;
373     uint32_t block_size_bits;
374     uint32_t sectors_per_block;
375     uint32_t sectors_per_block_bits;
376 
377     uint64_t virtual_disk_size;
378     uint32_t logical_sector_size;
379     uint32_t physical_sector_size;
380 
381     uint64_t chunk_ratio;
382     uint32_t chunk_ratio_bits;
383     uint32_t logical_sector_size_bits;
384 
385     uint32_t bat_entries;
386     VHDXBatEntry *bat;
387     uint64_t bat_offset;
388 
389     bool first_visible_write;
390     MSGUID session_guid;
391 
392     VHDXLogEntries log;
393 
394     VHDXParentLocatorHeader parent_header;
395     VHDXParentLocatorEntry *parent_entries;
396 
397     Error *migration_blocker;
398 
399     bool log_replayed_on_open;
400 
401     QLIST_HEAD(, VHDXRegionEntry) regions;
402 } BDRVVHDXState;
403 
404 void vhdx_guid_generate(MSGUID *guid);
405 
406 int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw,
407                         MSGUID *log_guid);
408 
409 uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset);
410 uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
411                             int crc_offset);
412 
413 bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
414 
415 int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
416                    Error **errp);
417 
418 int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
419                              void *data, uint32_t length, uint64_t offset);
420 
421 static inline void leguid_to_cpus(MSGUID *guid)
422 {
423     guid->data1 = le32_to_cpu(guid->data1);
424     guid->data2 = le16_to_cpu(guid->data2);
425     guid->data3 = le16_to_cpu(guid->data3);
426 }
427 
428 static inline void cpu_to_leguids(MSGUID *guid)
429 {
430     guid->data1 = cpu_to_le32(guid->data1);
431     guid->data2 = cpu_to_le16(guid->data2);
432     guid->data3 = cpu_to_le16(guid->data3);
433 }
434 
435 void vhdx_header_le_import(VHDXHeader *h);
436 void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h);
437 void vhdx_log_desc_le_import(VHDXLogDescriptor *d);
438 void vhdx_log_desc_le_export(VHDXLogDescriptor *d);
439 void vhdx_log_data_le_import(VHDXLogDataSector *d);
440 void vhdx_log_data_le_export(VHDXLogDataSector *d);
441 void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr);
442 void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr);
443 void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr);
444 void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr);
445 void vhdx_region_entry_le_import(VHDXRegionTableEntry *e);
446 void vhdx_region_entry_le_export(VHDXRegionTableEntry *e);
447 void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr);
448 void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr);
449 void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e);
450 void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e);
451 int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s);
452 
453 #endif
454