1 /* 2 * Block driver for Hyper-V VHDX Images 3 * 4 * Copyright (c) 2013 Red Hat, Inc., 5 * 6 * Authors: 7 * Jeff Cody <jcody@redhat.com> 8 * 9 * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 10 * by Microsoft: 11 * https://www.microsoft.com/en-us/download/details.aspx?id=34750 12 * 13 * This file covers the functionality of the metadata log writing, parsing, and 14 * replay. 15 * 16 * This work is licensed under the terms of the GNU LGPL, version 2 or later. 17 * See the COPYING.LIB file in the top-level directory. 18 * 19 */ 20 #include "qemu/osdep.h" 21 #include "qapi/error.h" 22 #include "qemu-common.h" 23 #include "block/block_int.h" 24 #include "qemu/error-report.h" 25 #include "qemu/module.h" 26 #include "block/vhdx.h" 27 28 29 typedef struct VHDXLogSequence { 30 bool valid; 31 uint32_t count; 32 VHDXLogEntries log; 33 VHDXLogEntryHeader hdr; 34 } VHDXLogSequence; 35 36 typedef struct VHDXLogDescEntries { 37 VHDXLogEntryHeader hdr; 38 VHDXLogDescriptor desc[]; 39 } VHDXLogDescEntries; 40 41 static const MSGUID zero_guid = { 0 }; 42 43 /* The log located on the disk is circular buffer containing 44 * sectors of 4096 bytes each. 45 * 46 * It is assumed for the read/write functions below that the 47 * circular buffer scheme uses a 'one sector open' to indicate 48 * the buffer is full. Given the validation methods used for each 49 * sector, this method should be compatible with other methods that 50 * do not waste a sector. 51 */ 52 53 54 /* Allow peeking at the hdr entry at the beginning of the current 55 * read index, without advancing the read index */ 56 static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log, 57 VHDXLogEntryHeader *hdr) 58 { 59 int ret = 0; 60 uint64_t offset; 61 uint32_t read; 62 63 assert(hdr != NULL); 64 65 /* peek is only supported on sector boundaries */ 66 if (log->read % VHDX_LOG_SECTOR_SIZE) { 67 ret = -EFAULT; 68 goto exit; 69 } 70 71 read = log->read; 72 /* we are guaranteed that a) log sectors are 4096 bytes, 73 * and b) the log length is a multiple of 1MB. So, there 74 * is always a round number of sectors in the buffer */ 75 if ((read + sizeof(VHDXLogEntryHeader)) > log->length) { 76 read = 0; 77 } 78 79 if (read == log->write) { 80 ret = -EINVAL; 81 goto exit; 82 } 83 84 offset = log->offset + read; 85 86 ret = bdrv_pread(bs->file->bs, offset, hdr, sizeof(VHDXLogEntryHeader)); 87 if (ret < 0) { 88 goto exit; 89 } 90 vhdx_log_entry_hdr_le_import(hdr); 91 92 exit: 93 return ret; 94 } 95 96 /* Index increment for log, based on sector boundaries */ 97 static int vhdx_log_inc_idx(uint32_t idx, uint64_t length) 98 { 99 idx += VHDX_LOG_SECTOR_SIZE; 100 /* we are guaranteed that a) log sectors are 4096 bytes, 101 * and b) the log length is a multiple of 1MB. So, there 102 * is always a round number of sectors in the buffer */ 103 return idx >= length ? 0 : idx; 104 } 105 106 107 /* Reset the log to empty */ 108 static void vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s) 109 { 110 MSGUID guid = { 0 }; 111 s->log.read = s->log.write = 0; 112 /* a log guid of 0 indicates an empty log to any parser of v0 113 * VHDX logs */ 114 vhdx_update_headers(bs, s, false, &guid); 115 } 116 117 /* Reads num_sectors from the log (all log sectors are 4096 bytes), 118 * into buffer 'buffer'. Upon return, *sectors_read will contain 119 * the number of sectors successfully read. 120 * 121 * It is assumed that 'buffer' is already allocated, and of sufficient 122 * size (i.e. >= 4096*num_sectors). 123 * 124 * If 'peek' is true, then the tail (read) pointer for the circular buffer is 125 * not modified. 126 * 127 * 0 is returned on success, -errno otherwise. */ 128 static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log, 129 uint32_t *sectors_read, void *buffer, 130 uint32_t num_sectors, bool peek) 131 { 132 int ret = 0; 133 uint64_t offset; 134 uint32_t read; 135 136 read = log->read; 137 138 *sectors_read = 0; 139 while (num_sectors) { 140 if (read == log->write) { 141 /* empty */ 142 break; 143 } 144 offset = log->offset + read; 145 146 ret = bdrv_pread(bs->file->bs, offset, buffer, VHDX_LOG_SECTOR_SIZE); 147 if (ret < 0) { 148 goto exit; 149 } 150 read = vhdx_log_inc_idx(read, log->length); 151 152 *sectors_read = *sectors_read + 1; 153 num_sectors--; 154 } 155 156 exit: 157 if (!peek) { 158 log->read = read; 159 } 160 return ret; 161 } 162 163 /* Writes num_sectors to the log (all log sectors are 4096 bytes), 164 * from buffer 'buffer'. Upon return, *sectors_written will contain 165 * the number of sectors successfully written. 166 * 167 * It is assumed that 'buffer' is at least 4096*num_sectors large. 168 * 169 * 0 is returned on success, -errno otherwise */ 170 static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log, 171 uint32_t *sectors_written, void *buffer, 172 uint32_t num_sectors) 173 { 174 int ret = 0; 175 uint64_t offset; 176 uint32_t write; 177 void *buffer_tmp; 178 BDRVVHDXState *s = bs->opaque; 179 180 ret = vhdx_user_visible_write(bs, s); 181 if (ret < 0) { 182 goto exit; 183 } 184 185 write = log->write; 186 187 buffer_tmp = buffer; 188 while (num_sectors) { 189 190 offset = log->offset + write; 191 write = vhdx_log_inc_idx(write, log->length); 192 if (write == log->read) { 193 /* full */ 194 break; 195 } 196 ret = bdrv_pwrite(bs->file->bs, offset, buffer_tmp, 197 VHDX_LOG_SECTOR_SIZE); 198 if (ret < 0) { 199 goto exit; 200 } 201 buffer_tmp += VHDX_LOG_SECTOR_SIZE; 202 203 log->write = write; 204 *sectors_written = *sectors_written + 1; 205 num_sectors--; 206 } 207 208 exit: 209 return ret; 210 } 211 212 213 /* Validates a log entry header */ 214 static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr, 215 BDRVVHDXState *s) 216 { 217 int valid = false; 218 219 if (hdr->signature != VHDX_LOG_SIGNATURE) { 220 goto exit; 221 } 222 223 /* if the individual entry length is larger than the whole log 224 * buffer, that is obviously invalid */ 225 if (log->length < hdr->entry_length) { 226 goto exit; 227 } 228 229 /* length of entire entry must be in units of 4KB (log sector size) */ 230 if (hdr->entry_length % (VHDX_LOG_SECTOR_SIZE)) { 231 goto exit; 232 } 233 234 /* per spec, sequence # must be > 0 */ 235 if (hdr->sequence_number == 0) { 236 goto exit; 237 } 238 239 /* log entries are only valid if they match the file-wide log guid 240 * found in the active header */ 241 if (!guid_eq(hdr->log_guid, s->headers[s->curr_header]->log_guid)) { 242 goto exit; 243 } 244 245 if (hdr->descriptor_count * sizeof(VHDXLogDescriptor) > hdr->entry_length) { 246 goto exit; 247 } 248 249 valid = true; 250 251 exit: 252 return valid; 253 } 254 255 /* 256 * Given a log header, this will validate that the descriptors and the 257 * corresponding data sectors (if applicable) 258 * 259 * Validation consists of: 260 * 1. Making sure the sequence numbers matches the entry header 261 * 2. Verifying a valid signature ('zero' or 'desc' for descriptors) 262 * 3. File offset field is a multiple of 4KB 263 * 4. If a data descriptor, the corresponding data sector 264 * has its signature ('data') and matching sequence number 265 * 266 * @desc: the data buffer containing the descriptor 267 * @hdr: the log entry header 268 * 269 * Returns true if valid 270 */ 271 static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc, 272 VHDXLogEntryHeader *hdr) 273 { 274 bool ret = false; 275 276 if (desc->sequence_number != hdr->sequence_number) { 277 goto exit; 278 } 279 if (desc->file_offset % VHDX_LOG_SECTOR_SIZE) { 280 goto exit; 281 } 282 283 if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) { 284 if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) { 285 /* valid */ 286 ret = true; 287 } 288 } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) { 289 /* valid */ 290 ret = true; 291 } 292 293 exit: 294 return ret; 295 } 296 297 298 /* Prior to sector data for a log entry, there is the header 299 * and the descriptors referenced in the header: 300 * 301 * [] = 4KB sector 302 * 303 * [ hdr, desc ][ desc ][ ... ][ data ][ ... ] 304 * 305 * The first sector in a log entry has a 64 byte header, and 306 * up to 126 32-byte descriptors. If more descriptors than 307 * 126 are required, then subsequent sectors can have up to 128 308 * descriptors. Each sector is 4KB. Data follows the descriptor 309 * sectors. 310 * 311 * This will return the number of sectors needed to encompass 312 * the passed number of descriptors in desc_cnt. 313 * 314 * This will never return 0, even if desc_cnt is 0. 315 */ 316 static int vhdx_compute_desc_sectors(uint32_t desc_cnt) 317 { 318 uint32_t desc_sectors; 319 320 desc_cnt += 2; /* account for header in first sector */ 321 desc_sectors = desc_cnt / 128; 322 if (desc_cnt % 128) { 323 desc_sectors++; 324 } 325 326 return desc_sectors; 327 } 328 329 330 /* Reads the log header, and subsequent descriptors (if any). This 331 * will allocate all the space for buffer, which must be NULL when 332 * passed into this function. Each descriptor will also be validated, 333 * and error returned if any are invalid. */ 334 static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s, 335 VHDXLogEntries *log, VHDXLogDescEntries **buffer, 336 bool convert_endian) 337 { 338 int ret = 0; 339 uint32_t desc_sectors; 340 uint32_t sectors_read; 341 VHDXLogEntryHeader hdr; 342 VHDXLogDescEntries *desc_entries = NULL; 343 VHDXLogDescriptor desc; 344 int i; 345 346 assert(*buffer == NULL); 347 348 ret = vhdx_log_peek_hdr(bs, log, &hdr); 349 if (ret < 0) { 350 goto exit; 351 } 352 353 if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) { 354 ret = -EINVAL; 355 goto exit; 356 } 357 358 desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count); 359 desc_entries = qemu_try_blockalign(bs->file->bs, 360 desc_sectors * VHDX_LOG_SECTOR_SIZE); 361 if (desc_entries == NULL) { 362 ret = -ENOMEM; 363 goto exit; 364 } 365 366 ret = vhdx_log_read_sectors(bs, log, §ors_read, desc_entries, 367 desc_sectors, false); 368 if (ret < 0) { 369 goto free_and_exit; 370 } 371 if (sectors_read != desc_sectors) { 372 ret = -EINVAL; 373 goto free_and_exit; 374 } 375 376 /* put in proper endianness, and validate each desc */ 377 for (i = 0; i < hdr.descriptor_count; i++) { 378 desc = desc_entries->desc[i]; 379 vhdx_log_desc_le_import(&desc); 380 if (convert_endian) { 381 desc_entries->desc[i] = desc; 382 } 383 if (vhdx_log_desc_is_valid(&desc, &hdr) == false) { 384 ret = -EINVAL; 385 goto free_and_exit; 386 } 387 } 388 if (convert_endian) { 389 desc_entries->hdr = hdr; 390 } 391 392 *buffer = desc_entries; 393 goto exit; 394 395 free_and_exit: 396 qemu_vfree(desc_entries); 397 exit: 398 return ret; 399 } 400 401 402 /* Flushes the descriptor described by desc to the VHDX image file. 403 * If the descriptor is a data descriptor, than 'data' must be non-NULL, 404 * and >= 4096 bytes (VHDX_LOG_SECTOR_SIZE), containing the data to be 405 * written. 406 * 407 * Verification is performed to make sure the sequence numbers of a data 408 * descriptor match the sequence number in the desc. 409 * 410 * For a zero descriptor, it may describe multiple sectors to fill with zeroes. 411 * In this case, it should be noted that zeroes are written to disk, and the 412 * image file is not extended as a sparse file. */ 413 static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc, 414 VHDXLogDataSector *data) 415 { 416 int ret = 0; 417 uint64_t seq, file_offset; 418 uint32_t offset = 0; 419 void *buffer = NULL; 420 uint64_t count = 1; 421 int i; 422 423 buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); 424 425 if (desc->signature == VHDX_LOG_DESC_SIGNATURE) { 426 /* data sector */ 427 if (data == NULL) { 428 ret = -EFAULT; 429 goto exit; 430 } 431 432 /* The sequence number of the data sector must match that 433 * in the descriptor */ 434 seq = data->sequence_high; 435 seq <<= 32; 436 seq |= data->sequence_low & 0xffffffff; 437 438 if (seq != desc->sequence_number) { 439 ret = -EINVAL; 440 goto exit; 441 } 442 443 /* Each data sector is in total 4096 bytes, however the first 444 * 8 bytes, and last 4 bytes, are located in the descriptor */ 445 memcpy(buffer, &desc->leading_bytes, 8); 446 offset += 8; 447 448 memcpy(buffer+offset, data->data, 4084); 449 offset += 4084; 450 451 memcpy(buffer+offset, &desc->trailing_bytes, 4); 452 453 } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) { 454 /* write 'count' sectors of sector */ 455 memset(buffer, 0, VHDX_LOG_SECTOR_SIZE); 456 count = desc->zero_length / VHDX_LOG_SECTOR_SIZE; 457 } else { 458 error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32, 459 desc->signature); 460 ret = -EINVAL; 461 goto exit; 462 } 463 464 file_offset = desc->file_offset; 465 466 /* count is only > 1 if we are writing zeroes */ 467 for (i = 0; i < count; i++) { 468 ret = bdrv_pwrite_sync(bs->file->bs, file_offset, buffer, 469 VHDX_LOG_SECTOR_SIZE); 470 if (ret < 0) { 471 goto exit; 472 } 473 file_offset += VHDX_LOG_SECTOR_SIZE; 474 } 475 476 exit: 477 qemu_vfree(buffer); 478 return ret; 479 } 480 481 /* Flush the entire log (as described by 'logs') to the VHDX image 482 * file, and then set the log to 'empty' status once complete. 483 * 484 * The log entries should be validate prior to flushing */ 485 static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, 486 VHDXLogSequence *logs) 487 { 488 int ret = 0; 489 int i; 490 uint32_t cnt, sectors_read; 491 uint64_t new_file_size; 492 void *data = NULL; 493 VHDXLogDescEntries *desc_entries = NULL; 494 VHDXLogEntryHeader hdr_tmp = { 0 }; 495 496 cnt = logs->count; 497 498 data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); 499 500 ret = vhdx_user_visible_write(bs, s); 501 if (ret < 0) { 502 goto exit; 503 } 504 505 /* each iteration represents one log sequence, which may span multiple 506 * sectors */ 507 while (cnt--) { 508 ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp); 509 if (ret < 0) { 510 goto exit; 511 } 512 /* if the log shows a FlushedFileOffset larger than our current file 513 * size, then that means the file has been truncated / corrupted, and 514 * we must refused to open it / use it */ 515 if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file->bs)) { 516 ret = -EINVAL; 517 goto exit; 518 } 519 520 ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true); 521 if (ret < 0) { 522 goto exit; 523 } 524 525 for (i = 0; i < desc_entries->hdr.descriptor_count; i++) { 526 if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) { 527 /* data sector, so read a sector to flush */ 528 ret = vhdx_log_read_sectors(bs, &logs->log, §ors_read, 529 data, 1, false); 530 if (ret < 0) { 531 goto exit; 532 } 533 if (sectors_read != 1) { 534 ret = -EINVAL; 535 goto exit; 536 } 537 vhdx_log_data_le_import(data); 538 } 539 540 ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data); 541 if (ret < 0) { 542 goto exit; 543 } 544 } 545 if (bdrv_getlength(bs->file->bs) < desc_entries->hdr.last_file_offset) { 546 new_file_size = desc_entries->hdr.last_file_offset; 547 if (new_file_size % (1024*1024)) { 548 /* round up to nearest 1MB boundary */ 549 new_file_size = ((new_file_size >> 20) + 1) << 20; 550 bdrv_truncate(bs->file->bs, new_file_size); 551 } 552 } 553 qemu_vfree(desc_entries); 554 desc_entries = NULL; 555 } 556 557 bdrv_flush(bs); 558 /* once the log is fully flushed, indicate that we have an empty log 559 * now. This also sets the log guid to 0, to indicate an empty log */ 560 vhdx_log_reset(bs, s); 561 562 exit: 563 qemu_vfree(data); 564 qemu_vfree(desc_entries); 565 return ret; 566 } 567 568 static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s, 569 VHDXLogEntries *log, uint64_t seq, 570 bool *valid, VHDXLogEntryHeader *entry) 571 { 572 int ret = 0; 573 VHDXLogEntryHeader hdr; 574 void *buffer = NULL; 575 uint32_t i, desc_sectors, total_sectors, crc; 576 uint32_t sectors_read = 0; 577 VHDXLogDescEntries *desc_buffer = NULL; 578 579 *valid = false; 580 581 ret = vhdx_log_peek_hdr(bs, log, &hdr); 582 if (ret < 0) { 583 goto inc_and_exit; 584 } 585 586 if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) { 587 goto inc_and_exit; 588 } 589 590 if (seq > 0) { 591 if (hdr.sequence_number != seq + 1) { 592 goto inc_and_exit; 593 } 594 } 595 596 desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count); 597 598 /* Read all log sectors, and calculate log checksum */ 599 600 total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE; 601 602 603 /* read_desc() will increment the read idx */ 604 ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false); 605 if (ret < 0) { 606 goto free_and_exit; 607 } 608 609 crc = vhdx_checksum_calc(0xffffffff, (void *)desc_buffer, 610 desc_sectors * VHDX_LOG_SECTOR_SIZE, 4); 611 crc ^= 0xffffffff; 612 613 buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); 614 if (total_sectors > desc_sectors) { 615 for (i = 0; i < total_sectors - desc_sectors; i++) { 616 sectors_read = 0; 617 ret = vhdx_log_read_sectors(bs, log, §ors_read, buffer, 618 1, false); 619 if (ret < 0 || sectors_read != 1) { 620 goto free_and_exit; 621 } 622 crc = vhdx_checksum_calc(crc, buffer, VHDX_LOG_SECTOR_SIZE, -1); 623 crc ^= 0xffffffff; 624 } 625 } 626 crc ^= 0xffffffff; 627 if (crc != hdr.checksum) { 628 goto free_and_exit; 629 } 630 631 *valid = true; 632 *entry = hdr; 633 goto free_and_exit; 634 635 inc_and_exit: 636 log->read = vhdx_log_inc_idx(log->read, log->length); 637 638 free_and_exit: 639 qemu_vfree(buffer); 640 qemu_vfree(desc_buffer); 641 return ret; 642 } 643 644 /* Search through the log circular buffer, and find the valid, active 645 * log sequence, if any exists 646 * */ 647 static int vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s, 648 VHDXLogSequence *logs) 649 { 650 int ret = 0; 651 uint32_t tail; 652 bool seq_valid = false; 653 VHDXLogSequence candidate = { 0 }; 654 VHDXLogEntryHeader hdr = { 0 }; 655 VHDXLogEntries curr_log; 656 657 memcpy(&curr_log, &s->log, sizeof(VHDXLogEntries)); 658 curr_log.write = curr_log.length; /* assume log is full */ 659 curr_log.read = 0; 660 661 662 /* now we will go through the whole log sector by sector, until 663 * we find a valid, active log sequence, or reach the end of the 664 * log buffer */ 665 for (;;) { 666 uint64_t curr_seq = 0; 667 VHDXLogSequence current = { 0 }; 668 669 tail = curr_log.read; 670 671 ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq, 672 &seq_valid, &hdr); 673 if (ret < 0) { 674 goto exit; 675 } 676 677 if (seq_valid) { 678 current.valid = true; 679 current.log = curr_log; 680 current.log.read = tail; 681 current.log.write = curr_log.read; 682 current.count = 1; 683 current.hdr = hdr; 684 685 686 for (;;) { 687 ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq, 688 &seq_valid, &hdr); 689 if (ret < 0) { 690 goto exit; 691 } 692 if (seq_valid == false) { 693 break; 694 } 695 current.log.write = curr_log.read; 696 current.count++; 697 698 curr_seq = hdr.sequence_number; 699 } 700 } 701 702 if (current.valid) { 703 if (candidate.valid == false || 704 current.hdr.sequence_number > candidate.hdr.sequence_number) { 705 candidate = current; 706 } 707 } 708 709 if (curr_log.read < tail) { 710 break; 711 } 712 } 713 714 *logs = candidate; 715 716 if (candidate.valid) { 717 /* this is the next sequence number, for writes */ 718 s->log.sequence = candidate.hdr.sequence_number + 1; 719 } 720 721 722 exit: 723 return ret; 724 } 725 726 /* Parse the replay log. Per the VHDX spec, if the log is present 727 * it must be replayed prior to opening the file, even read-only. 728 * 729 * If read-only, we must replay the log in RAM (or refuse to open 730 * a dirty VHDX file read-only) */ 731 int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed, 732 Error **errp) 733 { 734 int ret = 0; 735 VHDXHeader *hdr; 736 VHDXLogSequence logs = { 0 }; 737 738 hdr = s->headers[s->curr_header]; 739 740 *flushed = false; 741 742 /* s->log.hdr is freed in vhdx_close() */ 743 if (s->log.hdr == NULL) { 744 s->log.hdr = qemu_blockalign(bs, sizeof(VHDXLogEntryHeader)); 745 } 746 747 s->log.offset = hdr->log_offset; 748 s->log.length = hdr->log_length; 749 750 if (s->log.offset < VHDX_LOG_MIN_SIZE || 751 s->log.offset % VHDX_LOG_MIN_SIZE) { 752 ret = -EINVAL; 753 goto exit; 754 } 755 756 /* per spec, only log version of 0 is supported */ 757 if (hdr->log_version != 0) { 758 ret = -EINVAL; 759 goto exit; 760 } 761 762 /* If either the log guid, or log length is zero, 763 * then a replay log is not present */ 764 if (guid_eq(hdr->log_guid, zero_guid)) { 765 goto exit; 766 } 767 768 if (hdr->log_length == 0) { 769 goto exit; 770 } 771 772 if (hdr->log_length % VHDX_LOG_MIN_SIZE) { 773 ret = -EINVAL; 774 goto exit; 775 } 776 777 778 /* The log is present, we need to find if and where there is an active 779 * sequence of valid entries present in the log. */ 780 781 ret = vhdx_log_search(bs, s, &logs); 782 if (ret < 0) { 783 goto exit; 784 } 785 786 if (logs.valid) { 787 if (bs->read_only) { 788 ret = -EPERM; 789 error_setg(errp, 790 "VHDX image file '%s' opened read-only, but " 791 "contains a log that needs to be replayed", 792 bs->filename); 793 error_append_hint(errp, "To replay the log, run:\n" 794 "qemu-img check -r all '%s'\n", 795 bs->filename); 796 goto exit; 797 } 798 /* now flush the log */ 799 ret = vhdx_log_flush(bs, s, &logs); 800 if (ret < 0) { 801 goto exit; 802 } 803 *flushed = true; 804 } 805 806 807 exit: 808 return ret; 809 } 810 811 812 813 static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc, 814 VHDXLogDataSector *sector, void *data, 815 uint64_t seq) 816 { 817 /* 8 + 4084 + 4 = 4096, 1 log sector */ 818 memcpy(&desc->leading_bytes, data, 8); 819 data += 8; 820 cpu_to_le64s(&desc->leading_bytes); 821 memcpy(sector->data, data, 4084); 822 data += 4084; 823 memcpy(&desc->trailing_bytes, data, 4); 824 cpu_to_le32s(&desc->trailing_bytes); 825 data += 4; 826 827 sector->sequence_high = (uint32_t) (seq >> 32); 828 sector->sequence_low = (uint32_t) (seq & 0xffffffff); 829 sector->data_signature = VHDX_LOG_DATA_SIGNATURE; 830 831 vhdx_log_desc_le_export(desc); 832 vhdx_log_data_le_export(sector); 833 } 834 835 836 static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s, 837 void *data, uint32_t length, uint64_t offset) 838 { 839 int ret = 0; 840 void *buffer = NULL; 841 void *merged_sector = NULL; 842 void *data_tmp, *sector_write; 843 unsigned int i; 844 int sector_offset; 845 uint32_t desc_sectors, sectors, total_length; 846 uint32_t sectors_written = 0; 847 uint32_t aligned_length; 848 uint32_t leading_length = 0; 849 uint32_t trailing_length = 0; 850 uint32_t partial_sectors = 0; 851 uint32_t bytes_written = 0; 852 uint64_t file_offset; 853 VHDXHeader *header; 854 VHDXLogEntryHeader new_hdr; 855 VHDXLogDescriptor *new_desc = NULL; 856 VHDXLogDataSector *data_sector = NULL; 857 MSGUID new_guid = { 0 }; 858 859 header = s->headers[s->curr_header]; 860 861 /* need to have offset read data, and be on 4096 byte boundary */ 862 863 if (length > header->log_length) { 864 /* no log present. we could create a log here instead of failing */ 865 ret = -EINVAL; 866 goto exit; 867 } 868 869 if (guid_eq(header->log_guid, zero_guid)) { 870 vhdx_guid_generate(&new_guid); 871 vhdx_update_headers(bs, s, false, &new_guid); 872 } else { 873 /* currently, we require that the log be flushed after 874 * every write. */ 875 ret = -ENOTSUP; 876 goto exit; 877 } 878 879 /* 0 is an invalid sequence number, but may also represent the first 880 * log write (or a wrapped seq) */ 881 if (s->log.sequence == 0) { 882 s->log.sequence = 1; 883 } 884 885 sector_offset = offset % VHDX_LOG_SECTOR_SIZE; 886 file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE; 887 888 aligned_length = length; 889 890 /* add in the unaligned head and tail bytes */ 891 if (sector_offset) { 892 leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset); 893 leading_length = leading_length > length ? length : leading_length; 894 aligned_length -= leading_length; 895 partial_sectors++; 896 } 897 898 sectors = aligned_length / VHDX_LOG_SECTOR_SIZE; 899 trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE); 900 if (trailing_length) { 901 partial_sectors++; 902 } 903 904 sectors += partial_sectors; 905 906 /* sectors is now how many sectors the data itself takes, not 907 * including the header and descriptor metadata */ 908 909 new_hdr = (VHDXLogEntryHeader) { 910 .signature = VHDX_LOG_SIGNATURE, 911 .tail = s->log.tail, 912 .sequence_number = s->log.sequence, 913 .descriptor_count = sectors, 914 .reserved = 0, 915 .flushed_file_offset = bdrv_getlength(bs->file->bs), 916 .last_file_offset = bdrv_getlength(bs->file->bs), 917 }; 918 919 new_hdr.log_guid = header->log_guid; 920 921 desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count); 922 923 total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE; 924 new_hdr.entry_length = total_length; 925 926 vhdx_log_entry_hdr_le_export(&new_hdr); 927 928 buffer = qemu_blockalign(bs, total_length); 929 memcpy(buffer, &new_hdr, sizeof(new_hdr)); 930 931 new_desc = buffer + sizeof(new_hdr); 932 data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE); 933 data_tmp = data; 934 935 /* All log sectors are 4KB, so for any partial sectors we must 936 * merge the data with preexisting data from the final file 937 * destination */ 938 merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); 939 940 for (i = 0; i < sectors; i++) { 941 new_desc->signature = VHDX_LOG_DESC_SIGNATURE; 942 new_desc->sequence_number = s->log.sequence; 943 new_desc->file_offset = file_offset; 944 945 if (i == 0 && leading_length) { 946 /* partial sector at the front of the buffer */ 947 ret = bdrv_pread(bs->file->bs, file_offset, merged_sector, 948 VHDX_LOG_SECTOR_SIZE); 949 if (ret < 0) { 950 goto exit; 951 } 952 memcpy(merged_sector + sector_offset, data_tmp, leading_length); 953 bytes_written = leading_length; 954 sector_write = merged_sector; 955 } else if (i == sectors - 1 && trailing_length) { 956 /* partial sector at the end of the buffer */ 957 ret = bdrv_pread(bs->file->bs, 958 file_offset, 959 merged_sector + trailing_length, 960 VHDX_LOG_SECTOR_SIZE - trailing_length); 961 if (ret < 0) { 962 goto exit; 963 } 964 memcpy(merged_sector, data_tmp, trailing_length); 965 bytes_written = trailing_length; 966 sector_write = merged_sector; 967 } else { 968 bytes_written = VHDX_LOG_SECTOR_SIZE; 969 sector_write = data_tmp; 970 } 971 972 /* populate the raw sector data into the proper structures, 973 * as well as update the descriptor, and convert to proper 974 * endianness */ 975 vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write, 976 s->log.sequence); 977 978 data_tmp += bytes_written; 979 data_sector++; 980 new_desc++; 981 file_offset += VHDX_LOG_SECTOR_SIZE; 982 } 983 984 /* checksum covers entire entry, from the log header through the 985 * last data sector */ 986 vhdx_update_checksum(buffer, total_length, 987 offsetof(VHDXLogEntryHeader, checksum)); 988 989 /* now write to the log */ 990 ret = vhdx_log_write_sectors(bs, &s->log, §ors_written, buffer, 991 desc_sectors + sectors); 992 if (ret < 0) { 993 goto exit; 994 } 995 996 if (sectors_written != desc_sectors + sectors) { 997 /* instead of failing, we could flush the log here */ 998 ret = -EINVAL; 999 goto exit; 1000 } 1001 1002 s->log.sequence++; 1003 /* write new tail */ 1004 s->log.tail = s->log.write; 1005 1006 exit: 1007 qemu_vfree(buffer); 1008 qemu_vfree(merged_sector); 1009 return ret; 1010 } 1011 1012 /* Perform a log write, and then immediately flush the entire log */ 1013 int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, 1014 void *data, uint32_t length, uint64_t offset) 1015 { 1016 int ret = 0; 1017 VHDXLogSequence logs = { .valid = true, 1018 .count = 1, 1019 .hdr = { 0 } }; 1020 1021 1022 /* Make sure data written (new and/or changed blocks) is stable 1023 * on disk, before creating log entry */ 1024 bdrv_flush(bs); 1025 ret = vhdx_log_write(bs, s, data, length, offset); 1026 if (ret < 0) { 1027 goto exit; 1028 } 1029 logs.log = s->log; 1030 1031 /* Make sure log is stable on disk */ 1032 bdrv_flush(bs); 1033 ret = vhdx_log_flush(bs, s, &logs); 1034 if (ret < 0) { 1035 goto exit; 1036 } 1037 1038 s->log = logs.log; 1039 1040 exit: 1041 return ret; 1042 } 1043 1044