1 /* 2 * Block driver for Hyper-V VHDX Images 3 * 4 * Copyright (c) 2013 Red Hat, Inc., 5 * 6 * Authors: 7 * Jeff Cody <jcody@redhat.com> 8 * 9 * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 10 * by Microsoft: 11 * https://www.microsoft.com/en-us/download/details.aspx?id=34750 12 * 13 * This file covers the functionality of the metadata log writing, parsing, and 14 * replay. 15 * 16 * This work is licensed under the terms of the GNU LGPL, version 2 or later. 17 * See the COPYING.LIB file in the top-level directory. 18 * 19 */ 20 21 #include "qemu/osdep.h" 22 #include "qapi/error.h" 23 #include "block/block_int.h" 24 #include "qemu/error-report.h" 25 #include "qemu/bswap.h" 26 #include "vhdx.h" 27 28 29 typedef struct VHDXLogSequence { 30 bool valid; 31 uint32_t count; 32 VHDXLogEntries log; 33 VHDXLogEntryHeader hdr; 34 } VHDXLogSequence; 35 36 typedef struct VHDXLogDescEntries { 37 VHDXLogEntryHeader hdr; 38 VHDXLogDescriptor desc[]; 39 } VHDXLogDescEntries; 40 41 static const MSGUID zero_guid = { 0 }; 42 43 /* The log located on the disk is circular buffer containing 44 * sectors of 4096 bytes each. 45 * 46 * It is assumed for the read/write functions below that the 47 * circular buffer scheme uses a 'one sector open' to indicate 48 * the buffer is full. Given the validation methods used for each 49 * sector, this method should be compatible with other methods that 50 * do not waste a sector. 51 */ 52 53 54 /* Allow peeking at the hdr entry at the beginning of the current 55 * read index, without advancing the read index */ 56 static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log, 57 VHDXLogEntryHeader *hdr) 58 { 59 int ret = 0; 60 uint64_t offset; 61 uint32_t read; 62 63 assert(hdr != NULL); 64 65 /* peek is only supported on sector boundaries */ 66 if (log->read % VHDX_LOG_SECTOR_SIZE) { 67 ret = -EFAULT; 68 goto exit; 69 } 70 71 read = log->read; 72 /* we are guaranteed that a) log sectors are 4096 bytes, 73 * and b) the log length is a multiple of 1MB. So, there 74 * is always a round number of sectors in the buffer */ 75 if ((read + sizeof(VHDXLogEntryHeader)) > log->length) { 76 read = 0; 77 } 78 79 if (read == log->write) { 80 ret = -EINVAL; 81 goto exit; 82 } 83 84 offset = log->offset + read; 85 86 ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader)); 87 if (ret < 0) { 88 goto exit; 89 } 90 vhdx_log_entry_hdr_le_import(hdr); 91 92 exit: 93 return ret; 94 } 95 96 /* Index increment for log, based on sector boundaries */ 97 static int vhdx_log_inc_idx(uint32_t idx, uint64_t length) 98 { 99 idx += VHDX_LOG_SECTOR_SIZE; 100 /* we are guaranteed that a) log sectors are 4096 bytes, 101 * and b) the log length is a multiple of 1MB. So, there 102 * is always a round number of sectors in the buffer */ 103 return idx >= length ? 0 : idx; 104 } 105 106 107 /* Reset the log to empty */ 108 static void vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s) 109 { 110 MSGUID guid = { 0 }; 111 s->log.read = s->log.write = 0; 112 /* a log guid of 0 indicates an empty log to any parser of v0 113 * VHDX logs */ 114 vhdx_update_headers(bs, s, false, &guid); 115 } 116 117 /* Reads num_sectors from the log (all log sectors are 4096 bytes), 118 * into buffer 'buffer'. Upon return, *sectors_read will contain 119 * the number of sectors successfully read. 120 * 121 * It is assumed that 'buffer' is already allocated, and of sufficient 122 * size (i.e. >= 4096*num_sectors). 123 * 124 * If 'peek' is true, then the tail (read) pointer for the circular buffer is 125 * not modified. 126 * 127 * 0 is returned on success, -errno otherwise. */ 128 static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log, 129 uint32_t *sectors_read, void *buffer, 130 uint32_t num_sectors, bool peek) 131 { 132 int ret = 0; 133 uint64_t offset; 134 uint32_t read; 135 136 read = log->read; 137 138 *sectors_read = 0; 139 while (num_sectors) { 140 if (read == log->write) { 141 /* empty */ 142 break; 143 } 144 offset = log->offset + read; 145 146 ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE); 147 if (ret < 0) { 148 goto exit; 149 } 150 read = vhdx_log_inc_idx(read, log->length); 151 152 *sectors_read = *sectors_read + 1; 153 num_sectors--; 154 } 155 156 exit: 157 if (!peek) { 158 log->read = read; 159 } 160 return ret; 161 } 162 163 /* Writes num_sectors to the log (all log sectors are 4096 bytes), 164 * from buffer 'buffer'. Upon return, *sectors_written will contain 165 * the number of sectors successfully written. 166 * 167 * It is assumed that 'buffer' is at least 4096*num_sectors large. 168 * 169 * 0 is returned on success, -errno otherwise */ 170 static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log, 171 uint32_t *sectors_written, void *buffer, 172 uint32_t num_sectors) 173 { 174 int ret = 0; 175 uint64_t offset; 176 uint32_t write; 177 void *buffer_tmp; 178 BDRVVHDXState *s = bs->opaque; 179 180 ret = vhdx_user_visible_write(bs, s); 181 if (ret < 0) { 182 goto exit; 183 } 184 185 write = log->write; 186 187 buffer_tmp = buffer; 188 while (num_sectors) { 189 190 offset = log->offset + write; 191 write = vhdx_log_inc_idx(write, log->length); 192 if (write == log->read) { 193 /* full */ 194 break; 195 } 196 ret = bdrv_pwrite(bs->file, offset, buffer_tmp, 197 VHDX_LOG_SECTOR_SIZE); 198 if (ret < 0) { 199 goto exit; 200 } 201 buffer_tmp += VHDX_LOG_SECTOR_SIZE; 202 203 log->write = write; 204 *sectors_written = *sectors_written + 1; 205 num_sectors--; 206 } 207 208 exit: 209 return ret; 210 } 211 212 213 /* Validates a log entry header */ 214 static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr, 215 BDRVVHDXState *s) 216 { 217 int valid = false; 218 219 if (hdr->signature != VHDX_LOG_SIGNATURE) { 220 goto exit; 221 } 222 223 /* if the individual entry length is larger than the whole log 224 * buffer, that is obviously invalid */ 225 if (log->length < hdr->entry_length) { 226 goto exit; 227 } 228 229 /* length of entire entry must be in units of 4KB (log sector size) */ 230 if (hdr->entry_length % (VHDX_LOG_SECTOR_SIZE)) { 231 goto exit; 232 } 233 234 /* per spec, sequence # must be > 0 */ 235 if (hdr->sequence_number == 0) { 236 goto exit; 237 } 238 239 /* log entries are only valid if they match the file-wide log guid 240 * found in the active header */ 241 if (!guid_eq(hdr->log_guid, s->headers[s->curr_header]->log_guid)) { 242 goto exit; 243 } 244 245 if (hdr->descriptor_count * sizeof(VHDXLogDescriptor) > hdr->entry_length) { 246 goto exit; 247 } 248 249 valid = true; 250 251 exit: 252 return valid; 253 } 254 255 /* 256 * Given a log header, this will validate that the descriptors and the 257 * corresponding data sectors (if applicable) 258 * 259 * Validation consists of: 260 * 1. Making sure the sequence numbers matches the entry header 261 * 2. Verifying a valid signature ('zero' or 'desc' for descriptors) 262 * 3. File offset field is a multiple of 4KB 263 * 4. If a data descriptor, the corresponding data sector 264 * has its signature ('data') and matching sequence number 265 * 266 * @desc: the data buffer containing the descriptor 267 * @hdr: the log entry header 268 * 269 * Returns true if valid 270 */ 271 static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc, 272 VHDXLogEntryHeader *hdr) 273 { 274 bool ret = false; 275 276 if (desc->sequence_number != hdr->sequence_number) { 277 goto exit; 278 } 279 if (desc->file_offset % VHDX_LOG_SECTOR_SIZE) { 280 goto exit; 281 } 282 283 if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) { 284 if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) { 285 /* valid */ 286 ret = true; 287 } 288 } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) { 289 /* valid */ 290 ret = true; 291 } 292 293 exit: 294 return ret; 295 } 296 297 298 /* Prior to sector data for a log entry, there is the header 299 * and the descriptors referenced in the header: 300 * 301 * [] = 4KB sector 302 * 303 * [ hdr, desc ][ desc ][ ... ][ data ][ ... ] 304 * 305 * The first sector in a log entry has a 64 byte header, and 306 * up to 126 32-byte descriptors. If more descriptors than 307 * 126 are required, then subsequent sectors can have up to 128 308 * descriptors. Each sector is 4KB. Data follows the descriptor 309 * sectors. 310 * 311 * This will return the number of sectors needed to encompass 312 * the passed number of descriptors in desc_cnt. 313 * 314 * This will never return 0, even if desc_cnt is 0. 315 */ 316 static int vhdx_compute_desc_sectors(uint32_t desc_cnt) 317 { 318 uint32_t desc_sectors; 319 320 desc_cnt += 2; /* account for header in first sector */ 321 desc_sectors = desc_cnt / 128; 322 if (desc_cnt % 128) { 323 desc_sectors++; 324 } 325 326 return desc_sectors; 327 } 328 329 330 /* Reads the log header, and subsequent descriptors (if any). This 331 * will allocate all the space for buffer, which must be NULL when 332 * passed into this function. Each descriptor will also be validated, 333 * and error returned if any are invalid. */ 334 static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s, 335 VHDXLogEntries *log, VHDXLogDescEntries **buffer, 336 bool convert_endian) 337 { 338 int ret = 0; 339 uint32_t desc_sectors; 340 uint32_t sectors_read; 341 VHDXLogEntryHeader hdr; 342 VHDXLogDescEntries *desc_entries = NULL; 343 VHDXLogDescriptor desc; 344 int i; 345 346 assert(*buffer == NULL); 347 348 ret = vhdx_log_peek_hdr(bs, log, &hdr); 349 if (ret < 0) { 350 goto exit; 351 } 352 353 if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) { 354 ret = -EINVAL; 355 goto exit; 356 } 357 358 desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count); 359 desc_entries = qemu_try_blockalign(bs->file->bs, 360 desc_sectors * VHDX_LOG_SECTOR_SIZE); 361 if (desc_entries == NULL) { 362 ret = -ENOMEM; 363 goto exit; 364 } 365 366 ret = vhdx_log_read_sectors(bs, log, §ors_read, desc_entries, 367 desc_sectors, false); 368 if (ret < 0) { 369 goto free_and_exit; 370 } 371 if (sectors_read != desc_sectors) { 372 ret = -EINVAL; 373 goto free_and_exit; 374 } 375 376 /* put in proper endianness, and validate each desc */ 377 for (i = 0; i < hdr.descriptor_count; i++) { 378 desc = desc_entries->desc[i]; 379 vhdx_log_desc_le_import(&desc); 380 if (convert_endian) { 381 desc_entries->desc[i] = desc; 382 } 383 if (vhdx_log_desc_is_valid(&desc, &hdr) == false) { 384 ret = -EINVAL; 385 goto free_and_exit; 386 } 387 } 388 if (convert_endian) { 389 desc_entries->hdr = hdr; 390 } 391 392 *buffer = desc_entries; 393 goto exit; 394 395 free_and_exit: 396 qemu_vfree(desc_entries); 397 exit: 398 return ret; 399 } 400 401 402 /* Flushes the descriptor described by desc to the VHDX image file. 403 * If the descriptor is a data descriptor, than 'data' must be non-NULL, 404 * and >= 4096 bytes (VHDX_LOG_SECTOR_SIZE), containing the data to be 405 * written. 406 * 407 * Verification is performed to make sure the sequence numbers of a data 408 * descriptor match the sequence number in the desc. 409 * 410 * For a zero descriptor, it may describe multiple sectors to fill with zeroes. 411 * In this case, it should be noted that zeroes are written to disk, and the 412 * image file is not extended as a sparse file. */ 413 static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc, 414 VHDXLogDataSector *data) 415 { 416 int ret = 0; 417 uint64_t seq, file_offset; 418 uint32_t offset = 0; 419 void *buffer = NULL; 420 uint64_t count = 1; 421 int i; 422 423 buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); 424 425 if (desc->signature == VHDX_LOG_DESC_SIGNATURE) { 426 /* data sector */ 427 if (data == NULL) { 428 ret = -EFAULT; 429 goto exit; 430 } 431 432 /* The sequence number of the data sector must match that 433 * in the descriptor */ 434 seq = data->sequence_high; 435 seq <<= 32; 436 seq |= data->sequence_low & 0xffffffff; 437 438 if (seq != desc->sequence_number) { 439 ret = -EINVAL; 440 goto exit; 441 } 442 443 /* Each data sector is in total 4096 bytes, however the first 444 * 8 bytes, and last 4 bytes, are located in the descriptor */ 445 memcpy(buffer, &desc->leading_bytes, 8); 446 offset += 8; 447 448 memcpy(buffer+offset, data->data, 4084); 449 offset += 4084; 450 451 memcpy(buffer+offset, &desc->trailing_bytes, 4); 452 453 } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) { 454 /* write 'count' sectors of sector */ 455 memset(buffer, 0, VHDX_LOG_SECTOR_SIZE); 456 count = desc->zero_length / VHDX_LOG_SECTOR_SIZE; 457 } else { 458 error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32, 459 desc->signature); 460 ret = -EINVAL; 461 goto exit; 462 } 463 464 file_offset = desc->file_offset; 465 466 /* count is only > 1 if we are writing zeroes */ 467 for (i = 0; i < count; i++) { 468 ret = bdrv_pwrite_sync(bs->file, file_offset, buffer, 469 VHDX_LOG_SECTOR_SIZE); 470 if (ret < 0) { 471 goto exit; 472 } 473 file_offset += VHDX_LOG_SECTOR_SIZE; 474 } 475 476 exit: 477 qemu_vfree(buffer); 478 return ret; 479 } 480 481 /* Flush the entire log (as described by 'logs') to the VHDX image 482 * file, and then set the log to 'empty' status once complete. 483 * 484 * The log entries should be validate prior to flushing */ 485 static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, 486 VHDXLogSequence *logs) 487 { 488 int ret = 0; 489 int i; 490 uint32_t cnt, sectors_read; 491 uint64_t new_file_size; 492 void *data = NULL; 493 int64_t file_length; 494 VHDXLogDescEntries *desc_entries = NULL; 495 VHDXLogEntryHeader hdr_tmp = { 0 }; 496 497 cnt = logs->count; 498 499 data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); 500 501 ret = vhdx_user_visible_write(bs, s); 502 if (ret < 0) { 503 goto exit; 504 } 505 506 /* each iteration represents one log sequence, which may span multiple 507 * sectors */ 508 while (cnt--) { 509 ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp); 510 if (ret < 0) { 511 goto exit; 512 } 513 file_length = bdrv_getlength(bs->file->bs); 514 if (file_length < 0) { 515 ret = file_length; 516 goto exit; 517 } 518 /* if the log shows a FlushedFileOffset larger than our current file 519 * size, then that means the file has been truncated / corrupted, and 520 * we must refused to open it / use it */ 521 if (hdr_tmp.flushed_file_offset > file_length) { 522 ret = -EINVAL; 523 goto exit; 524 } 525 526 ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true); 527 if (ret < 0) { 528 goto exit; 529 } 530 531 for (i = 0; i < desc_entries->hdr.descriptor_count; i++) { 532 if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) { 533 /* data sector, so read a sector to flush */ 534 ret = vhdx_log_read_sectors(bs, &logs->log, §ors_read, 535 data, 1, false); 536 if (ret < 0) { 537 goto exit; 538 } 539 if (sectors_read != 1) { 540 ret = -EINVAL; 541 goto exit; 542 } 543 vhdx_log_data_le_import(data); 544 } 545 546 ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data); 547 if (ret < 0) { 548 goto exit; 549 } 550 } 551 if (file_length < desc_entries->hdr.last_file_offset) { 552 new_file_size = desc_entries->hdr.last_file_offset; 553 if (new_file_size % (1 * MiB)) { 554 /* round up to nearest 1MB boundary */ 555 new_file_size = QEMU_ALIGN_UP(new_file_size, MiB); 556 if (new_file_size > INT64_MAX) { 557 ret = -EINVAL; 558 goto exit; 559 } 560 ret = bdrv_truncate(bs->file, new_file_size, false, 561 PREALLOC_MODE_OFF, NULL); 562 if (ret < 0) { 563 goto exit; 564 } 565 } 566 } 567 qemu_vfree(desc_entries); 568 desc_entries = NULL; 569 } 570 571 ret = bdrv_flush(bs); 572 if (ret < 0) { 573 goto exit; 574 } 575 /* once the log is fully flushed, indicate that we have an empty log 576 * now. This also sets the log guid to 0, to indicate an empty log */ 577 vhdx_log_reset(bs, s); 578 579 exit: 580 qemu_vfree(data); 581 qemu_vfree(desc_entries); 582 return ret; 583 } 584 585 static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s, 586 VHDXLogEntries *log, uint64_t seq, 587 bool *valid, VHDXLogEntryHeader *entry) 588 { 589 int ret = 0; 590 VHDXLogEntryHeader hdr; 591 void *buffer = NULL; 592 uint32_t i, desc_sectors, total_sectors, crc; 593 uint32_t sectors_read = 0; 594 VHDXLogDescEntries *desc_buffer = NULL; 595 596 *valid = false; 597 598 ret = vhdx_log_peek_hdr(bs, log, &hdr); 599 if (ret < 0) { 600 goto inc_and_exit; 601 } 602 603 if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) { 604 goto inc_and_exit; 605 } 606 607 if (seq > 0) { 608 if (hdr.sequence_number != seq + 1) { 609 goto inc_and_exit; 610 } 611 } 612 613 desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count); 614 615 /* Read all log sectors, and calculate log checksum */ 616 617 total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE; 618 619 620 /* read_desc() will increment the read idx */ 621 ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false); 622 if (ret < 0) { 623 goto free_and_exit; 624 } 625 626 crc = vhdx_checksum_calc(0xffffffff, (void *)desc_buffer, 627 desc_sectors * VHDX_LOG_SECTOR_SIZE, 4); 628 crc ^= 0xffffffff; 629 630 buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); 631 if (total_sectors > desc_sectors) { 632 for (i = 0; i < total_sectors - desc_sectors; i++) { 633 sectors_read = 0; 634 ret = vhdx_log_read_sectors(bs, log, §ors_read, buffer, 635 1, false); 636 if (ret < 0 || sectors_read != 1) { 637 goto free_and_exit; 638 } 639 crc = vhdx_checksum_calc(crc, buffer, VHDX_LOG_SECTOR_SIZE, -1); 640 crc ^= 0xffffffff; 641 } 642 } 643 crc ^= 0xffffffff; 644 if (crc != hdr.checksum) { 645 goto free_and_exit; 646 } 647 648 *valid = true; 649 *entry = hdr; 650 goto free_and_exit; 651 652 inc_and_exit: 653 log->read = vhdx_log_inc_idx(log->read, log->length); 654 655 free_and_exit: 656 qemu_vfree(buffer); 657 qemu_vfree(desc_buffer); 658 return ret; 659 } 660 661 /* Search through the log circular buffer, and find the valid, active 662 * log sequence, if any exists 663 * */ 664 static int vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s, 665 VHDXLogSequence *logs) 666 { 667 int ret = 0; 668 uint32_t tail; 669 bool seq_valid = false; 670 VHDXLogSequence candidate = { 0 }; 671 VHDXLogEntryHeader hdr = { 0 }; 672 VHDXLogEntries curr_log; 673 674 memcpy(&curr_log, &s->log, sizeof(VHDXLogEntries)); 675 curr_log.write = curr_log.length; /* assume log is full */ 676 curr_log.read = 0; 677 678 679 /* now we will go through the whole log sector by sector, until 680 * we find a valid, active log sequence, or reach the end of the 681 * log buffer */ 682 for (;;) { 683 uint64_t curr_seq = 0; 684 VHDXLogSequence current = { 0 }; 685 686 tail = curr_log.read; 687 688 ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq, 689 &seq_valid, &hdr); 690 if (ret < 0) { 691 goto exit; 692 } 693 694 if (seq_valid) { 695 current.valid = true; 696 current.log = curr_log; 697 current.log.read = tail; 698 current.log.write = curr_log.read; 699 current.count = 1; 700 current.hdr = hdr; 701 702 703 for (;;) { 704 ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq, 705 &seq_valid, &hdr); 706 if (ret < 0) { 707 goto exit; 708 } 709 if (seq_valid == false) { 710 break; 711 } 712 current.log.write = curr_log.read; 713 current.count++; 714 715 curr_seq = hdr.sequence_number; 716 } 717 } 718 719 if (current.valid) { 720 if (candidate.valid == false || 721 current.hdr.sequence_number > candidate.hdr.sequence_number) { 722 candidate = current; 723 } 724 } 725 726 if (curr_log.read < tail) { 727 break; 728 } 729 } 730 731 *logs = candidate; 732 733 if (candidate.valid) { 734 /* this is the next sequence number, for writes */ 735 s->log.sequence = candidate.hdr.sequence_number + 1; 736 } 737 738 739 exit: 740 return ret; 741 } 742 743 /* Parse the replay log. Per the VHDX spec, if the log is present 744 * it must be replayed prior to opening the file, even read-only. 745 * 746 * If read-only, we must replay the log in RAM (or refuse to open 747 * a dirty VHDX file read-only) */ 748 int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed, 749 Error **errp) 750 { 751 int ret = 0; 752 VHDXHeader *hdr; 753 VHDXLogSequence logs = { 0 }; 754 755 hdr = s->headers[s->curr_header]; 756 757 *flushed = false; 758 759 /* s->log.hdr is freed in vhdx_close() */ 760 if (s->log.hdr == NULL) { 761 s->log.hdr = qemu_blockalign(bs, sizeof(VHDXLogEntryHeader)); 762 } 763 764 s->log.offset = hdr->log_offset; 765 s->log.length = hdr->log_length; 766 767 if (s->log.offset < VHDX_LOG_MIN_SIZE || 768 s->log.offset % VHDX_LOG_MIN_SIZE) { 769 ret = -EINVAL; 770 goto exit; 771 } 772 773 /* per spec, only log version of 0 is supported */ 774 if (hdr->log_version != 0) { 775 ret = -EINVAL; 776 goto exit; 777 } 778 779 /* If either the log guid, or log length is zero, 780 * then a replay log is not present */ 781 if (guid_eq(hdr->log_guid, zero_guid)) { 782 goto exit; 783 } 784 785 if (hdr->log_length == 0) { 786 goto exit; 787 } 788 789 if (hdr->log_length % VHDX_LOG_MIN_SIZE) { 790 ret = -EINVAL; 791 goto exit; 792 } 793 794 795 /* The log is present, we need to find if and where there is an active 796 * sequence of valid entries present in the log. */ 797 798 ret = vhdx_log_search(bs, s, &logs); 799 if (ret < 0) { 800 goto exit; 801 } 802 803 if (logs.valid) { 804 if (bs->read_only) { 805 bdrv_refresh_filename(bs); 806 ret = -EPERM; 807 error_setg(errp, 808 "VHDX image file '%s' opened read-only, but " 809 "contains a log that needs to be replayed", 810 bs->filename); 811 error_append_hint(errp, "To replay the log, run:\n" 812 "qemu-img check -r all '%s'\n", 813 bs->filename); 814 goto exit; 815 } 816 /* now flush the log */ 817 ret = vhdx_log_flush(bs, s, &logs); 818 if (ret < 0) { 819 goto exit; 820 } 821 *flushed = true; 822 } 823 824 825 exit: 826 return ret; 827 } 828 829 830 831 static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc, 832 VHDXLogDataSector *sector, void *data, 833 uint64_t seq) 834 { 835 /* 8 + 4084 + 4 = 4096, 1 log sector */ 836 memcpy(&desc->leading_bytes, data, 8); 837 data += 8; 838 desc->leading_bytes = cpu_to_le64(desc->leading_bytes); 839 memcpy(sector->data, data, 4084); 840 data += 4084; 841 memcpy(&desc->trailing_bytes, data, 4); 842 desc->trailing_bytes = cpu_to_le32(desc->trailing_bytes); 843 data += 4; 844 845 sector->sequence_high = (uint32_t) (seq >> 32); 846 sector->sequence_low = (uint32_t) (seq & 0xffffffff); 847 sector->data_signature = VHDX_LOG_DATA_SIGNATURE; 848 849 vhdx_log_desc_le_export(desc); 850 vhdx_log_data_le_export(sector); 851 } 852 853 854 static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s, 855 void *data, uint32_t length, uint64_t offset) 856 { 857 int ret = 0; 858 void *buffer = NULL; 859 void *merged_sector = NULL; 860 void *data_tmp, *sector_write; 861 unsigned int i; 862 int sector_offset; 863 uint32_t desc_sectors, sectors, total_length; 864 uint32_t sectors_written = 0; 865 uint32_t aligned_length; 866 uint32_t leading_length = 0; 867 uint32_t trailing_length = 0; 868 uint32_t partial_sectors = 0; 869 uint32_t bytes_written = 0; 870 uint64_t file_offset; 871 int64_t file_length; 872 VHDXHeader *header; 873 VHDXLogEntryHeader new_hdr; 874 VHDXLogDescriptor *new_desc = NULL; 875 VHDXLogDataSector *data_sector = NULL; 876 MSGUID new_guid = { 0 }; 877 878 header = s->headers[s->curr_header]; 879 880 /* need to have offset read data, and be on 4096 byte boundary */ 881 882 if (length > header->log_length) { 883 /* no log present. we could create a log here instead of failing */ 884 ret = -EINVAL; 885 goto exit; 886 } 887 888 if (guid_eq(header->log_guid, zero_guid)) { 889 vhdx_guid_generate(&new_guid); 890 vhdx_update_headers(bs, s, false, &new_guid); 891 } else { 892 /* currently, we require that the log be flushed after 893 * every write. */ 894 ret = -ENOTSUP; 895 goto exit; 896 } 897 898 /* 0 is an invalid sequence number, but may also represent the first 899 * log write (or a wrapped seq) */ 900 if (s->log.sequence == 0) { 901 s->log.sequence = 1; 902 } 903 904 sector_offset = offset % VHDX_LOG_SECTOR_SIZE; 905 file_offset = QEMU_ALIGN_DOWN(offset, VHDX_LOG_SECTOR_SIZE); 906 907 aligned_length = length; 908 909 /* add in the unaligned head and tail bytes */ 910 if (sector_offset) { 911 leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset); 912 leading_length = leading_length > length ? length : leading_length; 913 aligned_length -= leading_length; 914 partial_sectors++; 915 } 916 917 sectors = aligned_length / VHDX_LOG_SECTOR_SIZE; 918 trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE); 919 if (trailing_length) { 920 partial_sectors++; 921 } 922 923 sectors += partial_sectors; 924 925 file_length = bdrv_getlength(bs->file->bs); 926 if (file_length < 0) { 927 ret = file_length; 928 goto exit; 929 } 930 931 /* sectors is now how many sectors the data itself takes, not 932 * including the header and descriptor metadata */ 933 934 new_hdr = (VHDXLogEntryHeader) { 935 .signature = VHDX_LOG_SIGNATURE, 936 .tail = s->log.tail, 937 .sequence_number = s->log.sequence, 938 .descriptor_count = sectors, 939 .reserved = 0, 940 .flushed_file_offset = file_length, 941 .last_file_offset = file_length, 942 .log_guid = header->log_guid, 943 }; 944 945 946 desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count); 947 948 total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE; 949 new_hdr.entry_length = total_length; 950 951 vhdx_log_entry_hdr_le_export(&new_hdr); 952 953 buffer = qemu_blockalign(bs, total_length); 954 memcpy(buffer, &new_hdr, sizeof(new_hdr)); 955 956 new_desc = buffer + sizeof(new_hdr); 957 data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE); 958 data_tmp = data; 959 960 /* All log sectors are 4KB, so for any partial sectors we must 961 * merge the data with preexisting data from the final file 962 * destination */ 963 merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); 964 965 for (i = 0; i < sectors; i++) { 966 new_desc->signature = VHDX_LOG_DESC_SIGNATURE; 967 new_desc->sequence_number = s->log.sequence; 968 new_desc->file_offset = file_offset; 969 970 if (i == 0 && leading_length) { 971 /* partial sector at the front of the buffer */ 972 ret = bdrv_pread(bs->file, file_offset, merged_sector, 973 VHDX_LOG_SECTOR_SIZE); 974 if (ret < 0) { 975 goto exit; 976 } 977 memcpy(merged_sector + sector_offset, data_tmp, leading_length); 978 bytes_written = leading_length; 979 sector_write = merged_sector; 980 } else if (i == sectors - 1 && trailing_length) { 981 /* partial sector at the end of the buffer */ 982 ret = bdrv_pread(bs->file, 983 file_offset, 984 merged_sector + trailing_length, 985 VHDX_LOG_SECTOR_SIZE - trailing_length); 986 if (ret < 0) { 987 goto exit; 988 } 989 memcpy(merged_sector, data_tmp, trailing_length); 990 bytes_written = trailing_length; 991 sector_write = merged_sector; 992 } else { 993 bytes_written = VHDX_LOG_SECTOR_SIZE; 994 sector_write = data_tmp; 995 } 996 997 /* populate the raw sector data into the proper structures, 998 * as well as update the descriptor, and convert to proper 999 * endianness */ 1000 vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write, 1001 s->log.sequence); 1002 1003 data_tmp += bytes_written; 1004 data_sector++; 1005 new_desc++; 1006 file_offset += VHDX_LOG_SECTOR_SIZE; 1007 } 1008 1009 /* checksum covers entire entry, from the log header through the 1010 * last data sector */ 1011 vhdx_update_checksum(buffer, total_length, 1012 offsetof(VHDXLogEntryHeader, checksum)); 1013 1014 /* now write to the log */ 1015 ret = vhdx_log_write_sectors(bs, &s->log, §ors_written, buffer, 1016 desc_sectors + sectors); 1017 if (ret < 0) { 1018 goto exit; 1019 } 1020 1021 if (sectors_written != desc_sectors + sectors) { 1022 /* instead of failing, we could flush the log here */ 1023 ret = -EINVAL; 1024 goto exit; 1025 } 1026 1027 s->log.sequence++; 1028 /* write new tail */ 1029 s->log.tail = s->log.write; 1030 1031 exit: 1032 qemu_vfree(buffer); 1033 qemu_vfree(merged_sector); 1034 return ret; 1035 } 1036 1037 /* Perform a log write, and then immediately flush the entire log */ 1038 int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, 1039 void *data, uint32_t length, uint64_t offset) 1040 { 1041 int ret = 0; 1042 VHDXLogSequence logs = { .valid = true, 1043 .count = 1, 1044 .hdr = { 0 } }; 1045 1046 1047 /* Make sure data written (new and/or changed blocks) is stable 1048 * on disk, before creating log entry */ 1049 ret = bdrv_flush(bs); 1050 if (ret < 0) { 1051 goto exit; 1052 } 1053 1054 ret = vhdx_log_write(bs, s, data, length, offset); 1055 if (ret < 0) { 1056 goto exit; 1057 } 1058 logs.log = s->log; 1059 1060 /* Make sure log is stable on disk */ 1061 ret = bdrv_flush(bs); 1062 if (ret < 0) { 1063 goto exit; 1064 } 1065 1066 ret = vhdx_log_flush(bs, s, &logs); 1067 if (ret < 0) { 1068 goto exit; 1069 } 1070 1071 s->log = logs.log; 1072 1073 exit: 1074 return ret; 1075 } 1076 1077