xref: /openbmc/qemu/block/vhdx.h (revision ad6ef0a42e314a8c6ac6c96d5f6e607a1e5644b5)
1  /*
2   * Block driver for Hyper-V VHDX Images
3   *
4   * Copyright (c) 2013 Red Hat, Inc.,
5   *
6   * Authors:
7   *  Jeff Cody <jcody@redhat.com>
8   *
9   *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
10   *  by Microsoft:
11   *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
12   *
13   * This work is licensed under the terms of the GNU LGPL, version 2 or later.
14   * See the COPYING.LIB file in the top-level directory.
15   *
16   */
17  
18  #ifndef BLOCK_VHDX_H
19  #define BLOCK_VHDX_H
20  #include "qemu/units.h"
21  
22  #define DEFAULT_LOG_SIZE 1048576 /* 1MiB */
23  /* Note: can't use 1 * MiB, because it's passed to stringify() */
24  
25  /* Structures and fields present in the VHDX file */
26  
27  /* The header section has the following blocks,
28   * each block is 64KB:
29   *
30   * _____________________________________________________________________________
31   * | File Id. |   Header 1    | Header 2   | Region Table |  Reserved (768KB)  |
32   * |----------|---------------|------------|--------------|--------------------|
33   * |          |               |            |              |                    |
34   * 0.........64KB...........128KB........192KB..........256KB................1MB
35   */
36  
37  #define VHDX_HEADER_BLOCK_SIZE      (64 * KiB)
38  
39  #define VHDX_FILE_ID_OFFSET         0
40  #define VHDX_HEADER1_OFFSET         (VHDX_HEADER_BLOCK_SIZE * 1)
41  #define VHDX_HEADER2_OFFSET         (VHDX_HEADER_BLOCK_SIZE * 2)
42  #define VHDX_REGION_TABLE_OFFSET    (VHDX_HEADER_BLOCK_SIZE * 3)
43  #define VHDX_REGION_TABLE2_OFFSET   (VHDX_HEADER_BLOCK_SIZE * 4)
44  
45  #define VHDX_HEADER_SECTION_END     (1 * MiB)
46  /*
47   * A note on the use of MS-GUID fields.  For more details on the GUID,
48   * please see: https://en.wikipedia.org/wiki/Globally_unique_identifier.
49   *
50   * The VHDX specification only states that these are MS GUIDs, and which
51   * bytes are data1-data4. It makes no mention of what algorithm should be used
52   * to generate the GUID, nor what standard.  However, looking at the specified
53   * known GUID fields, it appears the GUIDs are:
54   *  Standard/DCE GUID type  (noted by 10b in the MSB of byte 0 of .data4)
55   *  Random algorithm        (noted by 0x4XXX for .data3)
56   */
57  
58  /* ---- HEADER SECTION STRUCTURES ---- */
59  
60  /* These structures are ones that are defined in the VHDX specification
61   * document */
62  
63  #define VHDX_FILE_SIGNATURE 0x656C696678646876ULL  /* "vhdxfile" in ASCII */
64  typedef struct VHDXFileIdentifier {
65      uint64_t    signature;              /* "vhdxfile" in ASCII */
66      uint16_t    creator[256];           /* optional; utf-16 string to identify
67                                             the vhdx file creator.  Diagnostic
68                                             only */
69  } VHDXFileIdentifier;
70  
71  
72  /* the guid is a 16 byte unique ID - the definition for this used by
73   * Microsoft is not just 16 bytes though - it is a structure that is defined,
74   * so we need to follow it here so that endianness does not trip us up */
75  
76  typedef struct QEMU_PACKED MSGUID {
77      uint32_t  data1;
78      uint16_t  data2;
79      uint16_t  data3;
80      uint8_t   data4[8];
81  } MSGUID;
82  
83  #define guid_eq(a, b) \
84      (memcmp(&(a), &(b), sizeof(MSGUID)) == 0)
85  
86  #define VHDX_HEADER_SIZE (4 * KiB)    /* although the vhdx_header struct in disk
87                                           is only 582 bytes, for purposes of crc
88                                           the header is the first 4KB of the 64KB
89                                           block */
90  
91  /* The full header is 4KB, although the actual header data is much smaller.
92   * But for the checksum calculation, it is over the entire 4KB structure,
93   * not just the defined portion of it */
94  #define VHDX_HEADER_SIGNATURE 0x64616568
95  typedef struct QEMU_PACKED VHDXHeader {
96      uint32_t    signature;              /* "head" in ASCII */
97      uint32_t    checksum;               /* CRC-32C hash of the whole header */
98      uint64_t    sequence_number;        /* Seq number of this header.  Each
99                                             VHDX file has 2 of these headers,
100                                             and only the header with the highest
101                                             sequence number is valid */
102      MSGUID      file_write_guid;        /* 128 bit unique identifier. Must be
103                                             updated to new, unique value before
104                                             the first modification is made to
105                                             file */
106      MSGUID      data_write_guid;        /* 128 bit unique identifier. Must be
107                                             updated to new, unique value before
108                                             the first modification is made to
109                                             visible data.   Visbile data is
110                                             defined as:
111                                                      - system & user metadata
112                                                      - raw block data
113                                                      - disk size
114                                                      - any change that will
115                                                        cause the virtual disk
116                                                        sector read to differ
117  
118                                             This does not need to change if
119                                             blocks are re-arranged */
120      MSGUID      log_guid;               /* 128 bit unique identifier. If zero,
121                                             there is no valid log. If non-zero,
122                                             log entries with this guid are
123                                             valid. */
124      uint16_t    log_version;            /* version of the log format. Must be
125                                             set to zero */
126      uint16_t    version;                /* version of the vhdx file.  Currently,
127                                             only supported version is "1" */
128      uint32_t    log_length;             /* length of the log.  Must be multiple
129                                             of 1MB */
130      uint64_t    log_offset;             /* byte offset in the file of the log.
131                                             Must also be a multiple of 1MB */
132  } VHDXHeader;
133  
134  /* Header for the region table block */
135  #define VHDX_REGION_SIGNATURE  0x69676572  /* "regi" in ASCII */
136  typedef struct QEMU_PACKED VHDXRegionTableHeader {
137      uint32_t    signature;              /* "regi" in ASCII */
138      uint32_t    checksum;               /* CRC-32C hash of the 64KB table */
139      uint32_t    entry_count;            /* number of valid entries */
140      uint32_t    reserved;
141  } VHDXRegionTableHeader;
142  
143  /* Individual region table entry.  There may be a maximum of 2047 of these
144   *
145   *  There are two known region table properties.  Both are required.
146   *  BAT (block allocation table):  2DC27766F62342009D64115E9BFD4A08
147   *  Metadata:                      8B7CA20647904B9AB8FE575F050F886E
148   */
149  #define VHDX_REGION_ENTRY_REQUIRED  0x01    /* if set, parser must understand
150                                                 this entry in order to open
151                                                 file */
152  typedef struct QEMU_PACKED VHDXRegionTableEntry {
153      MSGUID      guid;                   /* 128-bit unique identifier */
154      uint64_t    file_offset;            /* offset of the object in the file.
155                                             Must be multiple of 1MB */
156      uint32_t    length;                 /* length, in bytes, of the object */
157      uint32_t    data_bits;
158  } VHDXRegionTableEntry;
159  
160  
161  /* ---- LOG ENTRY STRUCTURES ---- */
162  #define VHDX_LOG_MIN_SIZE (1 * MiB)
163  #define VHDX_LOG_SECTOR_SIZE (4 * KiB)
164  #define VHDX_LOG_HDR_SIZE 64
165  #define VHDX_LOG_SIGNATURE 0x65676f6c
166  typedef struct QEMU_PACKED VHDXLogEntryHeader {
167      uint32_t    signature;              /* "loge" in ASCII */
168      uint32_t    checksum;               /* CRC-32C hash of the 64KB table */
169      uint32_t    entry_length;           /* length in bytes, multiple of 1MB */
170      uint32_t    tail;                   /* byte offset of first log entry of a
171                                             seq, where this entry is the last
172                                             entry */
173      uint64_t    sequence_number;        /* incremented with each log entry.
174                                             May not be zero. */
175      uint32_t    descriptor_count;       /* number of descriptors in this log
176                                             entry, must be >= 0 */
177      uint32_t    reserved;
178      MSGUID      log_guid;               /* value of the log_guid from
179                                             vhdx_header.  If not found in
180                                             vhdx_header, it is invalid */
181      uint64_t    flushed_file_offset;    /* see spec for full details - this
182                                             should be vhdx file size in bytes */
183      uint64_t    last_file_offset;       /* size in bytes that all allocated
184                                             file structures fit into */
185  } VHDXLogEntryHeader;
186  
187  #define VHDX_LOG_DESC_SIZE 32
188  #define VHDX_LOG_DESC_SIGNATURE 0x63736564
189  #define VHDX_LOG_ZERO_SIGNATURE 0x6f72657a
190  typedef struct QEMU_PACKED VHDXLogDescriptor {
191      uint32_t    signature;              /* "zero" or "desc" in ASCII */
192      union  {
193          uint32_t    reserved;           /* zero desc */
194          uint32_t    trailing_bytes;     /* data desc: bytes 4092-4096 of the
195                                             data sector */
196      };
197      union {
198          uint64_t    zero_length;        /* zero desc: length of the section to
199                                             zero */
200          uint64_t    leading_bytes;      /* data desc: bytes 0-7 of the data
201                                             sector */
202      };
203      uint64_t    file_offset;            /* file offset to write zeros - multiple
204                                             of 4kB */
205      uint64_t    sequence_number;        /* must match same field in
206                                             vhdx_log_entry_header */
207  } VHDXLogDescriptor;
208  
209  #define VHDX_LOG_DATA_SIGNATURE 0x61746164
210  typedef struct QEMU_PACKED VHDXLogDataSector {
211      uint32_t    data_signature;         /* "data" in ASCII */
212      uint32_t    sequence_high;          /* 4 MSB of 8 byte sequence_number */
213      uint8_t     data[4084];             /* raw data, bytes 8-4091 (inclusive).
214                                             see the data descriptor field for the
215                                             other missing bytes */
216      uint32_t    sequence_low;           /* 4 LSB of 8 byte sequence_number */
217  } VHDXLogDataSector;
218  
219  
220  
221  /* block states - different state values depending on whether it is a
222   * payload block, or a sector block. */
223  
224  #define PAYLOAD_BLOCK_NOT_PRESENT       0
225  #define PAYLOAD_BLOCK_UNDEFINED         1
226  #define PAYLOAD_BLOCK_ZERO              2
227  #define PAYLOAD_BLOCK_UNMAPPED          3
228  #define PAYLOAD_BLOCK_UNMAPPED_v095     5
229  #define PAYLOAD_BLOCK_FULLY_PRESENT     6
230  #define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7
231  
232  #define SB_BLOCK_NOT_PRESENT    0
233  #define SB_BLOCK_PRESENT        6
234  
235  /* per the spec */
236  #define VHDX_MAX_SECTORS_PER_BLOCK  (1 << 23)
237  
238  /* upper 44 bits are the file offset in 1MB units lower 3 bits are the state
239     other bits are reserved */
240  #define VHDX_BAT_STATE_BIT_MASK 0x07
241  #define VHDX_BAT_FILE_OFF_MASK  0xFFFFFFFFFFF00000ULL /* upper 44 bits */
242  typedef uint64_t VHDXBatEntry;
243  
244  /* ---- METADATA REGION STRUCTURES ---- */
245  
246  #define VHDX_METADATA_ENTRY_SIZE 32
247  #define VHDX_METADATA_MAX_ENTRIES 2047  /* not including the header */
248  #define VHDX_METADATA_TABLE_MAX_SIZE \
249      (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1))
250  #define VHDX_METADATA_SIGNATURE 0x617461646174656DULL  /* "metadata" in ASCII */
251  typedef struct QEMU_PACKED VHDXMetadataTableHeader {
252      uint64_t    signature;              /* "metadata" in ASCII */
253      uint16_t    reserved;
254      uint16_t    entry_count;            /* number table entries. <= 2047 */
255      uint32_t    reserved2[5];
256  } VHDXMetadataTableHeader;
257  
258  #define VHDX_META_FLAGS_IS_USER         0x01    /* max 1024 entries */
259  #define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02    /* virtual disk metadata if set,
260                                                     otherwise file metadata */
261  #define VHDX_META_FLAGS_IS_REQUIRED     0x04    /* parse must understand this
262                                                     entry to open the file */
263  typedef struct QEMU_PACKED VHDXMetadataTableEntry {
264      MSGUID      item_id;                /* 128-bit identifier for metadata */
265      uint32_t    offset;                 /* byte offset of the metadata.  At
266                                             least 64kB.  Relative to start of
267                                             metadata region */
268                                          /* note: if length = 0, so is offset */
269      uint32_t    length;                 /* length of metadata. <= 1MB. */
270      uint32_t    data_bits;              /* least-significant 3 bits are flags,
271                                             the rest are reserved (see above) */
272      uint32_t    reserved2;
273  } VHDXMetadataTableEntry;
274  
275  #define VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED 0x01   /* Do not change any blocks to
276                                                     be BLOCK_NOT_PRESENT.
277                                                     If set indicates a fixed
278                                                     size VHDX file */
279  #define VHDX_PARAMS_HAS_PARENT           0x02    /* has parent / backing file */
280  #define VHDX_BLOCK_SIZE_MIN             (1   * MiB)
281  #define VHDX_BLOCK_SIZE_MAX             (256 * MiB)
282  typedef struct QEMU_PACKED VHDXFileParameters {
283      uint32_t    block_size;             /* size of each payload block, always
284                                             power of 2, <= 256MB and >= 1MB. */
285      uint32_t data_bits;                 /* least-significant 2 bits are flags,
286                                             the rest are reserved (see above) */
287  } VHDXFileParameters;
288  
289  #define VHDX_MAX_IMAGE_SIZE  ((uint64_t) 64 * TiB)
290  typedef struct QEMU_PACKED VHDXVirtualDiskSize {
291      uint64_t    virtual_disk_size;      /* Size of the virtual disk, in bytes.
292                                             Must be multiple of the sector size,
293                                             max of 64TB */
294  } VHDXVirtualDiskSize;
295  
296  typedef struct QEMU_PACKED VHDXPage83Data {
297      MSGUID      page_83_data;           /* unique id for scsi devices that
298                                             support page 0x83 */
299  } VHDXPage83Data;
300  
301  typedef struct QEMU_PACKED VHDXVirtualDiskLogicalSectorSize {
302      uint32_t    logical_sector_size;    /* virtual disk sector size (in bytes).
303                                             Can only be 512 or 4096 bytes */
304  } VHDXVirtualDiskLogicalSectorSize;
305  
306  typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize {
307      uint32_t    physical_sector_size;   /* physical sector size (in bytes).
308                                             Can only be 512 or 4096 bytes */
309  } VHDXVirtualDiskPhysicalSectorSize;
310  
311  typedef struct QEMU_PACKED VHDXParentLocatorHeader {
312      MSGUID      locator_type;           /* type of the parent virtual disk. */
313      uint16_t    reserved;
314      uint16_t    key_value_count;        /* number of key/value pairs for this
315                                             locator */
316  } VHDXParentLocatorHeader;
317  
318  /* key and value strings are UNICODE strings, UTF-16 LE encoding, no NULs */
319  typedef struct QEMU_PACKED VHDXParentLocatorEntry {
320      uint32_t    key_offset;             /* offset in metadata for key, > 0 */
321      uint32_t    value_offset;           /* offset in metadata for value, >0 */
322      uint16_t    key_length;             /* length of entry key, > 0 */
323      uint16_t    value_length;           /* length of entry value, > 0 */
324  } VHDXParentLocatorEntry;
325  
326  
327  /* ----- END VHDX SPECIFICATION STRUCTURES ---- */
328  
329  typedef struct VHDXMetadataEntries {
330      VHDXMetadataTableEntry file_parameters_entry;
331      VHDXMetadataTableEntry virtual_disk_size_entry;
332      VHDXMetadataTableEntry page83_data_entry;
333      VHDXMetadataTableEntry logical_sector_size_entry;
334      VHDXMetadataTableEntry phys_sector_size_entry;
335      VHDXMetadataTableEntry parent_locator_entry;
336      uint16_t present;
337  } VHDXMetadataEntries;
338  
339  typedef struct VHDXLogEntries {
340      uint64_t offset;
341      uint64_t length;
342      uint32_t write;
343      uint32_t read;
344      VHDXLogEntryHeader *hdr;
345      void *desc_buffer;
346      uint64_t sequence;
347      uint32_t tail;
348  } VHDXLogEntries;
349  
350  typedef struct VHDXRegionEntry {
351      uint64_t start;
352      uint64_t end;
353      QLIST_ENTRY(VHDXRegionEntry) entries;
354  } VHDXRegionEntry;
355  
356  typedef struct BDRVVHDXState {
357      CoMutex lock;
358  
359      int curr_header;
360      VHDXHeader *headers[2];
361  
362      VHDXRegionTableHeader rt;
363      VHDXRegionTableEntry bat_rt;         /* region table for the BAT */
364      VHDXRegionTableEntry metadata_rt;    /* region table for the metadata */
365  
366      VHDXMetadataTableHeader metadata_hdr;
367      VHDXMetadataEntries metadata_entries;
368  
369      VHDXFileParameters params;
370      uint32_t block_size;
371      uint32_t block_size_bits;
372      uint32_t sectors_per_block;
373      uint32_t sectors_per_block_bits;
374  
375      uint64_t virtual_disk_size;
376      uint32_t logical_sector_size;
377      uint32_t physical_sector_size;
378  
379      uint64_t chunk_ratio;
380      uint32_t chunk_ratio_bits;
381      uint32_t logical_sector_size_bits;
382  
383      uint32_t bat_entries;
384      VHDXBatEntry *bat;
385      uint64_t bat_offset;
386  
387      bool first_visible_write;
388      MSGUID session_guid;
389  
390      VHDXLogEntries log;
391  
392      VHDXParentLocatorHeader parent_header;
393      VHDXParentLocatorEntry *parent_entries;
394  
395      Error *migration_blocker;
396  
397      bool log_replayed_on_open;
398  
399      QLIST_HEAD(, VHDXRegionEntry) regions;
400  } BDRVVHDXState;
401  
402  void vhdx_guid_generate(MSGUID *guid);
403  
404  int GRAPH_RDLOCK
405  vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw,
406                      MSGUID *log_guid);
407  
408  uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset);
409  uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
410                              int crc_offset);
411  
412  bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
413  
414  int GRAPH_RDLOCK
415  vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
416                 Error **errp);
417  
418  int coroutine_fn GRAPH_RDLOCK
419  vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
420                           void *data, uint32_t length, uint64_t offset);
421  
leguid_to_cpus(MSGUID * guid)422  static inline void leguid_to_cpus(MSGUID *guid)
423  {
424      guid->data1 = le32_to_cpu(guid->data1);
425      guid->data2 = le16_to_cpu(guid->data2);
426      guid->data3 = le16_to_cpu(guid->data3);
427  }
428  
cpu_to_leguids(MSGUID * guid)429  static inline void cpu_to_leguids(MSGUID *guid)
430  {
431      guid->data1 = cpu_to_le32(guid->data1);
432      guid->data2 = cpu_to_le16(guid->data2);
433      guid->data3 = cpu_to_le16(guid->data3);
434  }
435  
436  void vhdx_header_le_import(VHDXHeader *h);
437  void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h);
438  void vhdx_log_desc_le_import(VHDXLogDescriptor *d);
439  void vhdx_log_desc_le_export(VHDXLogDescriptor *d);
440  void vhdx_log_data_le_import(VHDXLogDataSector *d);
441  void vhdx_log_data_le_export(VHDXLogDataSector *d);
442  void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr);
443  void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr);
444  void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr);
445  void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr);
446  void vhdx_region_entry_le_import(VHDXRegionTableEntry *e);
447  void vhdx_region_entry_le_export(VHDXRegionTableEntry *e);
448  void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr);
449  void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr);
450  void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e);
451  void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e);
452  
453  int GRAPH_RDLOCK
454  vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s);
455  
456  #endif
457