1 /* 2 * Block driver for Hyper-V VHDX Images 3 * 4 * Copyright (c) 2013 Red Hat, Inc., 5 * 6 * Authors: 7 * Jeff Cody <jcody@redhat.com> 8 * 9 * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 10 * by Microsoft: 11 * https://www.microsoft.com/en-us/download/details.aspx?id=34750 12 * 13 * This work is licensed under the terms of the GNU LGPL, version 2 or later. 14 * See the COPYING.LIB file in the top-level directory. 15 * 16 */ 17 18 #ifndef BLOCK_VHDX_H 19 #define BLOCK_VHDX_H 20 21 #define KiB (1 * 1024) 22 #define MiB (KiB * 1024) 23 #define GiB (MiB * 1024) 24 #define TiB ((uint64_t) GiB * 1024) 25 26 /* Structures and fields present in the VHDX file */ 27 28 /* The header section has the following blocks, 29 * each block is 64KB: 30 * 31 * _____________________________________________________________________________ 32 * | File Id. | Header 1 | Header 2 | Region Table | Reserved (768KB) | 33 * |----------|---------------|------------|--------------|--------------------| 34 * | | | | | | 35 * 0.........64KB...........128KB........192KB..........256KB................1MB 36 */ 37 38 #define VHDX_HEADER_BLOCK_SIZE (64 * 1024) 39 40 #define VHDX_FILE_ID_OFFSET 0 41 #define VHDX_HEADER1_OFFSET (VHDX_HEADER_BLOCK_SIZE * 1) 42 #define VHDX_HEADER2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 2) 43 #define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE * 3) 44 #define VHDX_REGION_TABLE2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 4) 45 46 #define VHDX_HEADER_SECTION_END (1 * MiB) 47 /* 48 * A note on the use of MS-GUID fields. For more details on the GUID, 49 * please see: https://en.wikipedia.org/wiki/Globally_unique_identifier. 50 * 51 * The VHDX specification only states that these are MS GUIDs, and which 52 * bytes are data1-data4. It makes no mention of what algorithm should be used 53 * to generate the GUID, nor what standard. However, looking at the specified 54 * known GUID fields, it appears the GUIDs are: 55 * Standard/DCE GUID type (noted by 10b in the MSB of byte 0 of .data4) 56 * Random algorithm (noted by 0x4XXX for .data3) 57 */ 58 59 /* ---- HEADER SECTION STRUCTURES ---- */ 60 61 /* These structures are ones that are defined in the VHDX specification 62 * document */ 63 64 #define VHDX_FILE_SIGNATURE 0x656C696678646876 /* "vhdxfile" in ASCII */ 65 typedef struct VHDXFileIdentifier { 66 uint64_t signature; /* "vhdxfile" in ASCII */ 67 uint16_t creator[256]; /* optional; utf-16 string to identify 68 the vhdx file creator. Diagnostic 69 only */ 70 } VHDXFileIdentifier; 71 72 73 /* the guid is a 16 byte unique ID - the definition for this used by 74 * Microsoft is not just 16 bytes though - it is a structure that is defined, 75 * so we need to follow it here so that endianness does not trip us up */ 76 77 typedef struct QEMU_PACKED MSGUID { 78 uint32_t data1; 79 uint16_t data2; 80 uint16_t data3; 81 uint8_t data4[8]; 82 } MSGUID; 83 84 #define guid_eq(a, b) \ 85 (memcmp(&(a), &(b), sizeof(MSGUID)) == 0) 86 87 #define VHDX_HEADER_SIZE (4 * 1024) /* although the vhdx_header struct in disk 88 is only 582 bytes, for purposes of crc 89 the header is the first 4KB of the 64KB 90 block */ 91 92 /* The full header is 4KB, although the actual header data is much smaller. 93 * But for the checksum calculation, it is over the entire 4KB structure, 94 * not just the defined portion of it */ 95 #define VHDX_HEADER_SIGNATURE 0x64616568 96 typedef struct QEMU_PACKED VHDXHeader { 97 uint32_t signature; /* "head" in ASCII */ 98 uint32_t checksum; /* CRC-32C hash of the whole header */ 99 uint64_t sequence_number; /* Seq number of this header. Each 100 VHDX file has 2 of these headers, 101 and only the header with the highest 102 sequence number is valid */ 103 MSGUID file_write_guid; /* 128 bit unique identifier. Must be 104 updated to new, unique value before 105 the first modification is made to 106 file */ 107 MSGUID data_write_guid; /* 128 bit unique identifier. Must be 108 updated to new, unique value before 109 the first modification is made to 110 visible data. Visbile data is 111 defined as: 112 - system & user metadata 113 - raw block data 114 - disk size 115 - any change that will 116 cause the virtual disk 117 sector read to differ 118 119 This does not need to change if 120 blocks are re-arranged */ 121 MSGUID log_guid; /* 128 bit unique identifier. If zero, 122 there is no valid log. If non-zero, 123 log entries with this guid are 124 valid. */ 125 uint16_t log_version; /* version of the log format. Must be 126 set to zero */ 127 uint16_t version; /* version of the vhdx file. Currently, 128 only supported version is "1" */ 129 uint32_t log_length; /* length of the log. Must be multiple 130 of 1MB */ 131 uint64_t log_offset; /* byte offset in the file of the log. 132 Must also be a multiple of 1MB */ 133 } VHDXHeader; 134 135 /* Header for the region table block */ 136 #define VHDX_REGION_SIGNATURE 0x69676572 /* "regi" in ASCII */ 137 typedef struct QEMU_PACKED VHDXRegionTableHeader { 138 uint32_t signature; /* "regi" in ASCII */ 139 uint32_t checksum; /* CRC-32C hash of the 64KB table */ 140 uint32_t entry_count; /* number of valid entries */ 141 uint32_t reserved; 142 } VHDXRegionTableHeader; 143 144 /* Individual region table entry. There may be a maximum of 2047 of these 145 * 146 * There are two known region table properties. Both are required. 147 * BAT (block allocation table): 2DC27766F62342009D64115E9BFD4A08 148 * Metadata: 8B7CA20647904B9AB8FE575F050F886E 149 */ 150 #define VHDX_REGION_ENTRY_REQUIRED 0x01 /* if set, parser must understand 151 this entry in order to open 152 file */ 153 typedef struct QEMU_PACKED VHDXRegionTableEntry { 154 MSGUID guid; /* 128-bit unique identifier */ 155 uint64_t file_offset; /* offset of the object in the file. 156 Must be multiple of 1MB */ 157 uint32_t length; /* length, in bytes, of the object */ 158 uint32_t data_bits; 159 } VHDXRegionTableEntry; 160 161 162 /* ---- LOG ENTRY STRUCTURES ---- */ 163 #define VHDX_LOG_MIN_SIZE (1024 * 1024) 164 #define VHDX_LOG_SECTOR_SIZE 4096 165 #define VHDX_LOG_HDR_SIZE 64 166 #define VHDX_LOG_SIGNATURE 0x65676f6c 167 typedef struct QEMU_PACKED VHDXLogEntryHeader { 168 uint32_t signature; /* "loge" in ASCII */ 169 uint32_t checksum; /* CRC-32C hash of the 64KB table */ 170 uint32_t entry_length; /* length in bytes, multiple of 1MB */ 171 uint32_t tail; /* byte offset of first log entry of a 172 seq, where this entry is the last 173 entry */ 174 uint64_t sequence_number; /* incremented with each log entry. 175 May not be zero. */ 176 uint32_t descriptor_count; /* number of descriptors in this log 177 entry, must be >= 0 */ 178 uint32_t reserved; 179 MSGUID log_guid; /* value of the log_guid from 180 vhdx_header. If not found in 181 vhdx_header, it is invalid */ 182 uint64_t flushed_file_offset; /* see spec for full details - this 183 should be vhdx file size in bytes */ 184 uint64_t last_file_offset; /* size in bytes that all allocated 185 file structures fit into */ 186 } VHDXLogEntryHeader; 187 188 #define VHDX_LOG_DESC_SIZE 32 189 #define VHDX_LOG_DESC_SIGNATURE 0x63736564 190 #define VHDX_LOG_ZERO_SIGNATURE 0x6f72657a 191 typedef struct QEMU_PACKED VHDXLogDescriptor { 192 uint32_t signature; /* "zero" or "desc" in ASCII */ 193 union { 194 uint32_t reserved; /* zero desc */ 195 uint32_t trailing_bytes; /* data desc: bytes 4092-4096 of the 196 data sector */ 197 }; 198 union { 199 uint64_t zero_length; /* zero desc: length of the section to 200 zero */ 201 uint64_t leading_bytes; /* data desc: bytes 0-7 of the data 202 sector */ 203 }; 204 uint64_t file_offset; /* file offset to write zeros - multiple 205 of 4kB */ 206 uint64_t sequence_number; /* must match same field in 207 vhdx_log_entry_header */ 208 } VHDXLogDescriptor; 209 210 #define VHDX_LOG_DATA_SIGNATURE 0x61746164 211 typedef struct QEMU_PACKED VHDXLogDataSector { 212 uint32_t data_signature; /* "data" in ASCII */ 213 uint32_t sequence_high; /* 4 MSB of 8 byte sequence_number */ 214 uint8_t data[4084]; /* raw data, bytes 8-4091 (inclusive). 215 see the data descriptor field for the 216 other mising bytes */ 217 uint32_t sequence_low; /* 4 LSB of 8 byte sequence_number */ 218 } VHDXLogDataSector; 219 220 221 222 /* block states - different state values depending on whether it is a 223 * payload block, or a sector block. */ 224 225 #define PAYLOAD_BLOCK_NOT_PRESENT 0 226 #define PAYLOAD_BLOCK_UNDEFINED 1 227 #define PAYLOAD_BLOCK_ZERO 2 228 #define PAYLOAD_BLOCK_UNMAPPED 5 229 #define PAYLOAD_BLOCK_FULLY_PRESENT 6 230 #define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7 231 232 #define SB_BLOCK_NOT_PRESENT 0 233 #define SB_BLOCK_PRESENT 6 234 235 /* per the spec */ 236 #define VHDX_MAX_SECTORS_PER_BLOCK (1 << 23) 237 238 /* upper 44 bits are the file offset in 1MB units lower 3 bits are the state 239 other bits are reserved */ 240 #define VHDX_BAT_STATE_BIT_MASK 0x07 241 #define VHDX_BAT_FILE_OFF_MASK 0xFFFFFFFFFFF00000 /* upper 44 bits */ 242 typedef uint64_t VHDXBatEntry; 243 244 /* ---- METADATA REGION STRUCTURES ---- */ 245 246 #define VHDX_METADATA_ENTRY_SIZE 32 247 #define VHDX_METADATA_MAX_ENTRIES 2047 /* not including the header */ 248 #define VHDX_METADATA_TABLE_MAX_SIZE \ 249 (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1)) 250 #define VHDX_METADATA_SIGNATURE 0x617461646174656D /* "metadata" in ASCII */ 251 typedef struct QEMU_PACKED VHDXMetadataTableHeader { 252 uint64_t signature; /* "metadata" in ASCII */ 253 uint16_t reserved; 254 uint16_t entry_count; /* number table entries. <= 2047 */ 255 uint32_t reserved2[5]; 256 } VHDXMetadataTableHeader; 257 258 #define VHDX_META_FLAGS_IS_USER 0x01 /* max 1024 entries */ 259 #define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02 /* virtual disk metadata if set, 260 otherwise file metdata */ 261 #define VHDX_META_FLAGS_IS_REQUIRED 0x04 /* parse must understand this 262 entry to open the file */ 263 typedef struct QEMU_PACKED VHDXMetadataTableEntry { 264 MSGUID item_id; /* 128-bit identifier for metadata */ 265 uint32_t offset; /* byte offset of the metadata. At 266 least 64kB. Relative to start of 267 metadata region */ 268 /* note: if length = 0, so is offset */ 269 uint32_t length; /* length of metadata. <= 1MB. */ 270 uint32_t data_bits; /* least-significant 3 bits are flags, 271 the rest are reserved (see above) */ 272 uint32_t reserved2; 273 } VHDXMetadataTableEntry; 274 275 #define VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED 0x01 /* Do not change any blocks to 276 be BLOCK_NOT_PRESENT. 277 If set indicates a fixed 278 size VHDX file */ 279 #define VHDX_PARAMS_HAS_PARENT 0x02 /* has parent / backing file */ 280 #define VHDX_BLOCK_SIZE_MIN (1 * MiB) 281 #define VHDX_BLOCK_SIZE_MAX (256 * MiB) 282 typedef struct QEMU_PACKED VHDXFileParameters { 283 uint32_t block_size; /* size of each payload block, always 284 power of 2, <= 256MB and >= 1MB. */ 285 uint32_t data_bits; /* least-significant 2 bits are flags, 286 the rest are reserved (see above) */ 287 } VHDXFileParameters; 288 289 #define VHDX_MAX_IMAGE_SIZE ((uint64_t) 64 * TiB) 290 typedef struct QEMU_PACKED VHDXVirtualDiskSize { 291 uint64_t virtual_disk_size; /* Size of the virtual disk, in bytes. 292 Must be multiple of the sector size, 293 max of 64TB */ 294 } VHDXVirtualDiskSize; 295 296 typedef struct QEMU_PACKED VHDXPage83Data { 297 MSGUID page_83_data; /* unique id for scsi devices that 298 support page 0x83 */ 299 } VHDXPage83Data; 300 301 typedef struct QEMU_PACKED VHDXVirtualDiskLogicalSectorSize { 302 uint32_t logical_sector_size; /* virtual disk sector size (in bytes). 303 Can only be 512 or 4096 bytes */ 304 } VHDXVirtualDiskLogicalSectorSize; 305 306 typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize { 307 uint32_t physical_sector_size; /* physical sector size (in bytes). 308 Can only be 512 or 4096 bytes */ 309 } VHDXVirtualDiskPhysicalSectorSize; 310 311 typedef struct QEMU_PACKED VHDXParentLocatorHeader { 312 MSGUID locator_type; /* type of the parent virtual disk. */ 313 uint16_t reserved; 314 uint16_t key_value_count; /* number of key/value pairs for this 315 locator */ 316 } VHDXParentLocatorHeader; 317 318 /* key and value strings are UNICODE strings, UTF-16 LE encoding, no NULs */ 319 typedef struct QEMU_PACKED VHDXParentLocatorEntry { 320 uint32_t key_offset; /* offset in metadata for key, > 0 */ 321 uint32_t value_offset; /* offset in metadata for value, >0 */ 322 uint16_t key_length; /* length of entry key, > 0 */ 323 uint16_t value_length; /* length of entry value, > 0 */ 324 } VHDXParentLocatorEntry; 325 326 327 /* ----- END VHDX SPECIFICATION STRUCTURES ---- */ 328 329 typedef struct VHDXMetadataEntries { 330 VHDXMetadataTableEntry file_parameters_entry; 331 VHDXMetadataTableEntry virtual_disk_size_entry; 332 VHDXMetadataTableEntry page83_data_entry; 333 VHDXMetadataTableEntry logical_sector_size_entry; 334 VHDXMetadataTableEntry phys_sector_size_entry; 335 VHDXMetadataTableEntry parent_locator_entry; 336 uint16_t present; 337 } VHDXMetadataEntries; 338 339 typedef struct VHDXLogEntries { 340 uint64_t offset; 341 uint64_t length; 342 uint32_t write; 343 uint32_t read; 344 VHDXLogEntryHeader *hdr; 345 void *desc_buffer; 346 uint64_t sequence; 347 uint32_t tail; 348 } VHDXLogEntries; 349 350 typedef struct VHDXRegionEntry { 351 uint64_t start; 352 uint64_t end; 353 QLIST_ENTRY(VHDXRegionEntry) entries; 354 } VHDXRegionEntry; 355 356 typedef struct BDRVVHDXState { 357 CoMutex lock; 358 359 int curr_header; 360 VHDXHeader *headers[2]; 361 362 VHDXRegionTableHeader rt; 363 VHDXRegionTableEntry bat_rt; /* region table for the BAT */ 364 VHDXRegionTableEntry metadata_rt; /* region table for the metadata */ 365 366 VHDXMetadataTableHeader metadata_hdr; 367 VHDXMetadataEntries metadata_entries; 368 369 VHDXFileParameters params; 370 uint32_t block_size; 371 uint32_t block_size_bits; 372 uint32_t sectors_per_block; 373 uint32_t sectors_per_block_bits; 374 375 uint64_t virtual_disk_size; 376 uint32_t logical_sector_size; 377 uint32_t physical_sector_size; 378 379 uint64_t chunk_ratio; 380 uint32_t chunk_ratio_bits; 381 uint32_t logical_sector_size_bits; 382 383 uint32_t bat_entries; 384 VHDXBatEntry *bat; 385 uint64_t bat_offset; 386 387 bool first_visible_write; 388 MSGUID session_guid; 389 390 VHDXLogEntries log; 391 392 VHDXParentLocatorHeader parent_header; 393 VHDXParentLocatorEntry *parent_entries; 394 395 Error *migration_blocker; 396 397 QLIST_HEAD(VHDXRegionHead, VHDXRegionEntry) regions; 398 } BDRVVHDXState; 399 400 void vhdx_guid_generate(MSGUID *guid); 401 402 int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw, 403 MSGUID *log_guid); 404 405 uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset); 406 uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, 407 int crc_offset); 408 409 bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset); 410 411 int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed); 412 413 int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, 414 void *data, uint32_t length, uint64_t offset); 415 416 static inline void leguid_to_cpus(MSGUID *guid) 417 { 418 le32_to_cpus(&guid->data1); 419 le16_to_cpus(&guid->data2); 420 le16_to_cpus(&guid->data3); 421 } 422 423 static inline void cpu_to_leguids(MSGUID *guid) 424 { 425 cpu_to_le32s(&guid->data1); 426 cpu_to_le16s(&guid->data2); 427 cpu_to_le16s(&guid->data3); 428 } 429 430 void vhdx_header_le_import(VHDXHeader *h); 431 void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h); 432 void vhdx_log_desc_le_import(VHDXLogDescriptor *d); 433 void vhdx_log_desc_le_export(VHDXLogDescriptor *d); 434 void vhdx_log_data_le_export(VHDXLogDataSector *d); 435 void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr); 436 void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr); 437 void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr); 438 void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr); 439 void vhdx_region_entry_le_import(VHDXRegionTableEntry *e); 440 void vhdx_region_entry_le_export(VHDXRegionTableEntry *e); 441 void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr); 442 void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr); 443 void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e); 444 void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e); 445 int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s); 446 447 #endif 448