xref: /openbmc/qemu/block/vhdx-log.c (revision d2dfe0b5)
1 /*
2  * Block driver for Hyper-V VHDX Images
3  *
4  * Copyright (c) 2013 Red Hat, Inc.,
5  *
6  * Authors:
7  *  Jeff Cody <jcody@redhat.com>
8  *
9  *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
10  *  by Microsoft:
11  *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
12  *
13  * This file covers the functionality of the metadata log writing, parsing, and
14  * replay.
15  *
16  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
17  * See the COPYING.LIB file in the top-level directory.
18  *
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qapi/error.h"
23 #include "block/block-io.h"
24 #include "block/block_int.h"
25 #include "qemu/error-report.h"
26 #include "qemu/bswap.h"
27 #include "qemu/memalign.h"
28 #include "vhdx.h"
29 
30 
31 typedef struct VHDXLogSequence {
32     bool valid;
33     uint32_t count;
34     VHDXLogEntries log;
35     VHDXLogEntryHeader hdr;
36 } VHDXLogSequence;
37 
38 typedef struct VHDXLogDescEntries {
39     VHDXLogEntryHeader hdr;
40     VHDXLogDescriptor desc[];
41 } VHDXLogDescEntries;
42 
43 static const MSGUID zero_guid = { 0 };
44 
45 /* The log located on the disk is circular buffer containing
46  * sectors of 4096 bytes each.
47  *
48  * It is assumed for the read/write functions below that the
49  * circular buffer scheme uses a 'one sector open' to indicate
50  * the buffer is full.  Given the validation methods used for each
51  * sector, this method should be compatible with other methods that
52  * do not waste a sector.
53  */
54 
55 
56 /* Allow peeking at the hdr entry at the beginning of the current
57  * read index, without advancing the read index */
58 static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
59                              VHDXLogEntryHeader *hdr)
60 {
61     int ret = 0;
62     uint64_t offset;
63     uint32_t read;
64 
65     assert(hdr != NULL);
66 
67     /* peek is only supported on sector boundaries */
68     if (log->read % VHDX_LOG_SECTOR_SIZE) {
69         ret = -EFAULT;
70         goto exit;
71     }
72 
73     read = log->read;
74     /* we are guaranteed that a) log sectors are 4096 bytes,
75      * and b) the log length is a multiple of 1MB. So, there
76      * is always a round number of sectors in the buffer */
77     if ((read + sizeof(VHDXLogEntryHeader)) > log->length) {
78         read = 0;
79     }
80 
81     if (read == log->write) {
82         ret = -EINVAL;
83         goto exit;
84     }
85 
86     offset = log->offset + read;
87 
88     ret = bdrv_pread(bs->file, offset, sizeof(VHDXLogEntryHeader), hdr, 0);
89     if (ret < 0) {
90         goto exit;
91     }
92     vhdx_log_entry_hdr_le_import(hdr);
93 
94 exit:
95     return ret;
96 }
97 
98 /* Index increment for log, based on sector boundaries */
99 static int vhdx_log_inc_idx(uint32_t idx, uint64_t length)
100 {
101     idx += VHDX_LOG_SECTOR_SIZE;
102     /* we are guaranteed that a) log sectors are 4096 bytes,
103      * and b) the log length is a multiple of 1MB. So, there
104      * is always a round number of sectors in the buffer */
105     return idx >= length ? 0 : idx;
106 }
107 
108 
109 /* Reset the log to empty */
110 static void vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s)
111 {
112     MSGUID guid = { 0 };
113     s->log.read = s->log.write = 0;
114     /* a log guid of 0 indicates an empty log to any parser of v0
115      * VHDX logs */
116     vhdx_update_headers(bs, s, false, &guid);
117 }
118 
119 /* Reads num_sectors from the log (all log sectors are 4096 bytes),
120  * into buffer 'buffer'.  Upon return, *sectors_read will contain
121  * the number of sectors successfully read.
122  *
123  * It is assumed that 'buffer' is already allocated, and of sufficient
124  * size (i.e. >= 4096*num_sectors).
125  *
126  * If 'peek' is true, then the tail (read) pointer for the circular buffer is
127  * not modified.
128  *
129  * 0 is returned on success, -errno otherwise.  */
130 static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
131                                  uint32_t *sectors_read, void *buffer,
132                                  uint32_t num_sectors, bool peek)
133 {
134     int ret = 0;
135     uint64_t offset;
136     uint32_t read;
137 
138     read = log->read;
139 
140     *sectors_read = 0;
141     while (num_sectors) {
142         if (read == log->write) {
143             /* empty */
144             break;
145         }
146         offset = log->offset + read;
147 
148         ret = bdrv_pread(bs->file, offset, VHDX_LOG_SECTOR_SIZE, buffer, 0);
149         if (ret < 0) {
150             goto exit;
151         }
152         read = vhdx_log_inc_idx(read, log->length);
153 
154         *sectors_read = *sectors_read + 1;
155         num_sectors--;
156     }
157 
158 exit:
159     if (!peek) {
160         log->read = read;
161     }
162     return ret;
163 }
164 
165 /* Writes num_sectors to the log (all log sectors are 4096 bytes),
166  * from buffer 'buffer'.  Upon return, *sectors_written will contain
167  * the number of sectors successfully written.
168  *
169  * It is assumed that 'buffer' is at least 4096*num_sectors large.
170  *
171  * 0 is returned on success, -errno otherwise */
172 static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
173                                   uint32_t *sectors_written, void *buffer,
174                                   uint32_t num_sectors)
175 {
176     int ret = 0;
177     uint64_t offset;
178     uint32_t write;
179     void *buffer_tmp;
180     BDRVVHDXState *s = bs->opaque;
181 
182     ret = vhdx_user_visible_write(bs, s);
183     if (ret < 0) {
184         goto exit;
185     }
186 
187     write = log->write;
188 
189     buffer_tmp = buffer;
190     while (num_sectors) {
191 
192         offset = log->offset + write;
193         write = vhdx_log_inc_idx(write, log->length);
194         if (write == log->read) {
195             /* full */
196             break;
197         }
198         ret = bdrv_pwrite(bs->file, offset, VHDX_LOG_SECTOR_SIZE, buffer_tmp,
199                           0);
200         if (ret < 0) {
201             goto exit;
202         }
203         buffer_tmp += VHDX_LOG_SECTOR_SIZE;
204 
205         log->write = write;
206         *sectors_written = *sectors_written + 1;
207         num_sectors--;
208     }
209 
210 exit:
211     return ret;
212 }
213 
214 
215 /* Validates a log entry header */
216 static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
217                                   BDRVVHDXState *s)
218 {
219     int valid = false;
220 
221     if (hdr->signature != VHDX_LOG_SIGNATURE) {
222         goto exit;
223     }
224 
225     /* if the individual entry length is larger than the whole log
226      * buffer, that is obviously invalid */
227     if (log->length < hdr->entry_length) {
228         goto exit;
229     }
230 
231     /* length of entire entry must be in units of 4KB (log sector size) */
232     if (hdr->entry_length % (VHDX_LOG_SECTOR_SIZE)) {
233         goto exit;
234     }
235 
236     /* per spec, sequence # must be > 0 */
237     if (hdr->sequence_number == 0) {
238         goto exit;
239     }
240 
241     /* log entries are only valid if they match the file-wide log guid
242      * found in the active header */
243     if (!guid_eq(hdr->log_guid, s->headers[s->curr_header]->log_guid)) {
244         goto exit;
245     }
246 
247     if (hdr->descriptor_count * sizeof(VHDXLogDescriptor) > hdr->entry_length) {
248         goto exit;
249     }
250 
251     valid = true;
252 
253 exit:
254     return valid;
255 }
256 
257 /*
258  * Given a log header, this will validate that the descriptors and the
259  * corresponding data sectors (if applicable)
260  *
261  * Validation consists of:
262  *      1. Making sure the sequence numbers matches the entry header
263  *      2. Verifying a valid signature ('zero' or 'desc' for descriptors)
264  *      3. File offset field is a multiple of 4KB
265  *      4. If a data descriptor, the corresponding data sector
266  *         has its signature ('data') and matching sequence number
267  *
268  * @desc: the data buffer containing the descriptor
269  * @hdr:  the log entry header
270  *
271  * Returns true if valid
272  */
273 static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc,
274                                    VHDXLogEntryHeader *hdr)
275 {
276     bool ret = false;
277 
278     if (desc->sequence_number != hdr->sequence_number) {
279         goto exit;
280     }
281     if (desc->file_offset % VHDX_LOG_SECTOR_SIZE) {
282         goto exit;
283     }
284 
285     if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
286         if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) {
287             /* valid */
288             ret = true;
289         }
290     } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
291             /* valid */
292             ret = true;
293     }
294 
295 exit:
296     return ret;
297 }
298 
299 
300 /* Prior to sector data for a log entry, there is the header
301  * and the descriptors referenced in the header:
302  *
303  * [] = 4KB sector
304  *
305  * [ hdr, desc ][   desc   ][ ... ][ data ][ ... ]
306  *
307  * The first sector in a log entry has a 64 byte header, and
308  * up to 126 32-byte descriptors.  If more descriptors than
309  * 126 are required, then subsequent sectors can have up to 128
310  * descriptors.  Each sector is 4KB.  Data follows the descriptor
311  * sectors.
312  *
313  * This will return the number of sectors needed to encompass
314  * the passed number of descriptors in desc_cnt.
315  *
316  * This will never return 0, even if desc_cnt is 0.
317  */
318 static int vhdx_compute_desc_sectors(uint32_t desc_cnt)
319 {
320     uint32_t desc_sectors;
321 
322     desc_cnt += 2; /* account for header in first sector */
323     desc_sectors = desc_cnt / 128;
324     if (desc_cnt % 128) {
325         desc_sectors++;
326     }
327 
328     return desc_sectors;
329 }
330 
331 
332 /* Reads the log header, and subsequent descriptors (if any).  This
333  * will allocate all the space for buffer, which must be NULL when
334  * passed into this function. Each descriptor will also be validated,
335  * and error returned if any are invalid. */
336 static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
337                               VHDXLogEntries *log, VHDXLogDescEntries **buffer,
338                               bool convert_endian)
339 {
340     int ret = 0;
341     uint32_t desc_sectors;
342     uint32_t sectors_read;
343     VHDXLogEntryHeader hdr;
344     VHDXLogDescEntries *desc_entries = NULL;
345     VHDXLogDescriptor desc;
346     int i;
347 
348     assert(*buffer == NULL);
349 
350     ret = vhdx_log_peek_hdr(bs, log, &hdr);
351     if (ret < 0) {
352         goto exit;
353     }
354 
355     if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
356         ret = -EINVAL;
357         goto exit;
358     }
359 
360     desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
361     desc_entries = qemu_try_blockalign(bs->file->bs,
362                                        desc_sectors * VHDX_LOG_SECTOR_SIZE);
363     if (desc_entries == NULL) {
364         ret = -ENOMEM;
365         goto exit;
366     }
367 
368     ret = vhdx_log_read_sectors(bs, log, &sectors_read, desc_entries,
369                                 desc_sectors, false);
370     if (ret < 0) {
371         goto free_and_exit;
372     }
373     if (sectors_read != desc_sectors) {
374         ret = -EINVAL;
375         goto free_and_exit;
376     }
377 
378     /* put in proper endianness, and validate each desc */
379     for (i = 0; i < hdr.descriptor_count; i++) {
380         desc = desc_entries->desc[i];
381         vhdx_log_desc_le_import(&desc);
382         if (convert_endian) {
383             desc_entries->desc[i] = desc;
384         }
385         if (vhdx_log_desc_is_valid(&desc, &hdr) == false) {
386             ret = -EINVAL;
387             goto free_and_exit;
388         }
389     }
390     if (convert_endian) {
391         desc_entries->hdr = hdr;
392     }
393 
394     *buffer = desc_entries;
395     goto exit;
396 
397 free_and_exit:
398     qemu_vfree(desc_entries);
399 exit:
400     return ret;
401 }
402 
403 
404 /* Flushes the descriptor described by desc to the VHDX image file.
405  * If the descriptor is a data descriptor, than 'data' must be non-NULL,
406  * and >= 4096 bytes (VHDX_LOG_SECTOR_SIZE), containing the data to be
407  * written.
408  *
409  * Verification is performed to make sure the sequence numbers of a data
410  * descriptor match the sequence number in the desc.
411  *
412  * For a zero descriptor, it may describe multiple sectors to fill with zeroes.
413  * In this case, it should be noted that zeroes are written to disk, and the
414  * image file is not extended as a sparse file.  */
415 static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
416                                VHDXLogDataSector *data)
417 {
418     int ret = 0;
419     uint64_t seq, file_offset;
420     uint32_t offset = 0;
421     void *buffer = NULL;
422     uint64_t count = 1;
423     int i;
424 
425     buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
426 
427     if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
428         /* data sector */
429         if (data == NULL) {
430             ret = -EFAULT;
431             goto exit;
432         }
433 
434         /* The sequence number of the data sector must match that
435          * in the descriptor */
436         seq = data->sequence_high;
437         seq <<= 32;
438         seq |= data->sequence_low & 0xffffffff;
439 
440         if (seq != desc->sequence_number) {
441             ret = -EINVAL;
442             goto exit;
443         }
444 
445         /* Each data sector is in total 4096 bytes, however the first
446          * 8 bytes, and last 4 bytes, are located in the descriptor */
447         memcpy(buffer, &desc->leading_bytes, 8);
448         offset += 8;
449 
450         memcpy(buffer+offset, data->data, 4084);
451         offset += 4084;
452 
453         memcpy(buffer+offset, &desc->trailing_bytes, 4);
454 
455     } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
456         /* write 'count' sectors of sector */
457         memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
458         count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
459     } else {
460         error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32,
461                       desc->signature);
462         ret = -EINVAL;
463         goto exit;
464     }
465 
466     file_offset = desc->file_offset;
467 
468     /* count is only > 1 if we are writing zeroes */
469     for (i = 0; i < count; i++) {
470         ret = bdrv_pwrite_sync(bs->file, file_offset, VHDX_LOG_SECTOR_SIZE,
471                                buffer, 0);
472         if (ret < 0) {
473             goto exit;
474         }
475         file_offset += VHDX_LOG_SECTOR_SIZE;
476     }
477 
478 exit:
479     qemu_vfree(buffer);
480     return ret;
481 }
482 
483 /* Flush the entire log (as described by 'logs') to the VHDX image
484  * file, and then set the log to 'empty' status once complete.
485  *
486  * The log entries should be validate prior to flushing */
487 static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
488                           VHDXLogSequence *logs)
489 {
490     int ret = 0;
491     int i;
492     uint32_t cnt, sectors_read;
493     uint64_t new_file_size;
494     void *data = NULL;
495     int64_t file_length;
496     VHDXLogDescEntries *desc_entries = NULL;
497     VHDXLogEntryHeader hdr_tmp = { 0 };
498 
499     cnt = logs->count;
500 
501     data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
502 
503     ret = vhdx_user_visible_write(bs, s);
504     if (ret < 0) {
505         goto exit;
506     }
507 
508     /* each iteration represents one log sequence, which may span multiple
509      * sectors */
510     while (cnt--) {
511         ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp);
512         if (ret < 0) {
513             goto exit;
514         }
515         file_length = bdrv_getlength(bs->file->bs);
516         if (file_length < 0) {
517             ret = file_length;
518             goto exit;
519         }
520         /* if the log shows a FlushedFileOffset larger than our current file
521          * size, then that means the file has been truncated / corrupted, and
522          * we must refused to open it / use it */
523         if (hdr_tmp.flushed_file_offset > file_length) {
524             ret = -EINVAL;
525             goto exit;
526         }
527 
528         ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true);
529         if (ret < 0) {
530             goto exit;
531         }
532 
533         for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
534             if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) {
535                 /* data sector, so read a sector to flush */
536                 ret = vhdx_log_read_sectors(bs, &logs->log, &sectors_read,
537                                             data, 1, false);
538                 if (ret < 0) {
539                     goto exit;
540                 }
541                 if (sectors_read != 1) {
542                     ret = -EINVAL;
543                     goto exit;
544                 }
545                 vhdx_log_data_le_import(data);
546             }
547 
548             ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
549             if (ret < 0) {
550                 goto exit;
551             }
552         }
553         if (file_length < desc_entries->hdr.last_file_offset) {
554             new_file_size = desc_entries->hdr.last_file_offset;
555             if (new_file_size % (1 * MiB)) {
556                 /* round up to nearest 1MB boundary */
557                 new_file_size = QEMU_ALIGN_UP(new_file_size, MiB);
558                 if (new_file_size > INT64_MAX) {
559                     ret = -EINVAL;
560                     goto exit;
561                 }
562                 ret = bdrv_truncate(bs->file, new_file_size, false,
563                                     PREALLOC_MODE_OFF, 0, NULL);
564                 if (ret < 0) {
565                     goto exit;
566                 }
567             }
568         }
569         qemu_vfree(desc_entries);
570         desc_entries = NULL;
571     }
572 
573     ret = bdrv_flush(bs);
574     if (ret < 0) {
575         goto exit;
576     }
577     /* once the log is fully flushed, indicate that we have an empty log
578      * now.  This also sets the log guid to 0, to indicate an empty log */
579     vhdx_log_reset(bs, s);
580 
581 exit:
582     qemu_vfree(data);
583     qemu_vfree(desc_entries);
584     return ret;
585 }
586 
587 static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
588                                    VHDXLogEntries *log, uint64_t seq,
589                                    bool *valid, VHDXLogEntryHeader *entry)
590 {
591     int ret = 0;
592     VHDXLogEntryHeader hdr;
593     void *buffer = NULL;
594     uint32_t i, desc_sectors, total_sectors, crc;
595     uint32_t sectors_read = 0;
596     VHDXLogDescEntries *desc_buffer = NULL;
597 
598     *valid = false;
599 
600     ret = vhdx_log_peek_hdr(bs, log, &hdr);
601     if (ret < 0) {
602         goto inc_and_exit;
603     }
604 
605     if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
606         goto inc_and_exit;
607     }
608 
609     if (seq > 0) {
610         if (hdr.sequence_number != seq + 1) {
611             goto inc_and_exit;
612         }
613     }
614 
615     desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
616 
617     /* Read all log sectors, and calculate log checksum */
618 
619     total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;
620 
621 
622     /* read_desc() will increment the read idx */
623     ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false);
624     if (ret < 0) {
625         goto free_and_exit;
626     }
627 
628     crc = vhdx_checksum_calc(0xffffffff, (void *)desc_buffer,
629                             desc_sectors * VHDX_LOG_SECTOR_SIZE, 4);
630     crc ^= 0xffffffff;
631 
632     buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
633     if (total_sectors > desc_sectors) {
634         for (i = 0; i < total_sectors - desc_sectors; i++) {
635             sectors_read = 0;
636             ret = vhdx_log_read_sectors(bs, log, &sectors_read, buffer,
637                                         1, false);
638             if (ret < 0 || sectors_read != 1) {
639                 goto free_and_exit;
640             }
641             crc = vhdx_checksum_calc(crc, buffer, VHDX_LOG_SECTOR_SIZE, -1);
642             crc ^= 0xffffffff;
643         }
644     }
645     crc ^= 0xffffffff;
646     if (crc != hdr.checksum) {
647         goto free_and_exit;
648     }
649 
650     *valid = true;
651     *entry = hdr;
652     goto free_and_exit;
653 
654 inc_and_exit:
655     log->read = vhdx_log_inc_idx(log->read, log->length);
656 
657 free_and_exit:
658     qemu_vfree(buffer);
659     qemu_vfree(desc_buffer);
660     return ret;
661 }
662 
663 /* Search through the log circular buffer, and find the valid, active
664  * log sequence, if any exists
665  * */
666 static int vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s,
667                            VHDXLogSequence *logs)
668 {
669     int ret = 0;
670     uint32_t tail;
671     bool seq_valid = false;
672     VHDXLogSequence candidate = { 0 };
673     VHDXLogEntryHeader hdr = { 0 };
674     VHDXLogEntries curr_log;
675 
676     memcpy(&curr_log, &s->log, sizeof(VHDXLogEntries));
677     curr_log.write = curr_log.length;   /* assume log is full */
678     curr_log.read = 0;
679 
680 
681     /* now we will go through the whole log sector by sector, until
682      * we find a valid, active log sequence, or reach the end of the
683      * log buffer */
684     for (;;) {
685         uint64_t curr_seq = 0;
686         VHDXLogSequence current = { 0 };
687 
688         tail = curr_log.read;
689 
690         ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
691                                       &seq_valid, &hdr);
692         if (ret < 0) {
693             goto exit;
694         }
695 
696         if (seq_valid) {
697             current.valid     = true;
698             current.log       = curr_log;
699             current.log.read  = tail;
700             current.log.write = curr_log.read;
701             current.count     = 1;
702             current.hdr       = hdr;
703 
704 
705             for (;;) {
706                 ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
707                                               &seq_valid, &hdr);
708                 if (ret < 0) {
709                     goto exit;
710                 }
711                 if (seq_valid == false) {
712                     break;
713                 }
714                 current.log.write = curr_log.read;
715                 current.count++;
716 
717                 curr_seq = hdr.sequence_number;
718             }
719         }
720 
721         if (current.valid) {
722             if (candidate.valid == false ||
723                 current.hdr.sequence_number > candidate.hdr.sequence_number) {
724                 candidate = current;
725             }
726         }
727 
728         if (curr_log.read < tail) {
729             break;
730         }
731     }
732 
733     *logs = candidate;
734 
735     if (candidate.valid) {
736         /* this is the next sequence number, for writes */
737         s->log.sequence = candidate.hdr.sequence_number + 1;
738     }
739 
740 
741 exit:
742     return ret;
743 }
744 
745 /* Parse the replay log.  Per the VHDX spec, if the log is present
746  * it must be replayed prior to opening the file, even read-only.
747  *
748  * If read-only, we must replay the log in RAM (or refuse to open
749  * a dirty VHDX file read-only) */
750 int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
751                    Error **errp)
752 {
753     int ret = 0;
754     VHDXHeader *hdr;
755     VHDXLogSequence logs = { 0 };
756 
757     hdr = s->headers[s->curr_header];
758 
759     *flushed = false;
760 
761     /* s->log.hdr is freed in vhdx_close() */
762     if (s->log.hdr == NULL) {
763         s->log.hdr = qemu_blockalign(bs, sizeof(VHDXLogEntryHeader));
764     }
765 
766     s->log.offset = hdr->log_offset;
767     s->log.length = hdr->log_length;
768 
769     if (s->log.offset < VHDX_LOG_MIN_SIZE ||
770         s->log.offset % VHDX_LOG_MIN_SIZE) {
771         ret = -EINVAL;
772         goto exit;
773     }
774 
775     /* per spec, only log version of 0 is supported */
776     if (hdr->log_version != 0) {
777         ret = -EINVAL;
778         goto exit;
779     }
780 
781     /* If either the log guid, or log length is zero,
782      * then a replay log is not present */
783     if (guid_eq(hdr->log_guid, zero_guid)) {
784         goto exit;
785     }
786 
787     if (hdr->log_length == 0) {
788         goto exit;
789     }
790 
791     if (hdr->log_length % VHDX_LOG_MIN_SIZE) {
792         ret = -EINVAL;
793         goto exit;
794     }
795 
796 
797     /* The log is present, we need to find if and where there is an active
798      * sequence of valid entries present in the log.  */
799 
800     ret = vhdx_log_search(bs, s, &logs);
801     if (ret < 0) {
802         goto exit;
803     }
804 
805     if (logs.valid) {
806         if (bdrv_is_read_only(bs)) {
807             bdrv_refresh_filename(bs);
808             ret = -EPERM;
809             error_setg(errp,
810                        "VHDX image file '%s' opened read-only, but "
811                        "contains a log that needs to be replayed",
812                        bs->filename);
813             error_append_hint(errp,  "To replay the log, run:\n"
814                               "qemu-img check -r all '%s'\n",
815                               bs->filename);
816             goto exit;
817         }
818         /* now flush the log */
819         ret = vhdx_log_flush(bs, s, &logs);
820         if (ret < 0) {
821             goto exit;
822         }
823         *flushed = true;
824     }
825 
826 
827 exit:
828     return ret;
829 }
830 
831 
832 
833 static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
834                                       VHDXLogDataSector *sector, void *data,
835                                       uint64_t seq)
836 {
837     /* 8 + 4084 + 4 = 4096, 1 log sector */
838     memcpy(&desc->leading_bytes, data, 8);
839     data += 8;
840     desc->leading_bytes = cpu_to_le64(desc->leading_bytes);
841     memcpy(sector->data, data, 4084);
842     data += 4084;
843     memcpy(&desc->trailing_bytes, data, 4);
844     desc->trailing_bytes = cpu_to_le32(desc->trailing_bytes);
845     data += 4;
846 
847     sector->sequence_high  = (uint32_t) (seq >> 32);
848     sector->sequence_low   = (uint32_t) (seq & 0xffffffff);
849     sector->data_signature = VHDX_LOG_DATA_SIGNATURE;
850 
851     vhdx_log_desc_le_export(desc);
852     vhdx_log_data_le_export(sector);
853 }
854 
855 
856 static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
857                           void *data, uint32_t length, uint64_t offset)
858 {
859     int ret = 0;
860     void *buffer = NULL;
861     void *merged_sector = NULL;
862     void *data_tmp, *sector_write;
863     unsigned int i;
864     int sector_offset;
865     uint32_t desc_sectors, sectors, total_length;
866     uint32_t sectors_written = 0;
867     uint32_t aligned_length;
868     uint32_t leading_length = 0;
869     uint32_t trailing_length = 0;
870     uint32_t partial_sectors = 0;
871     uint32_t bytes_written = 0;
872     uint64_t file_offset;
873     int64_t file_length;
874     VHDXHeader *header;
875     VHDXLogEntryHeader new_hdr;
876     VHDXLogDescriptor *new_desc = NULL;
877     VHDXLogDataSector *data_sector = NULL;
878     MSGUID new_guid = { 0 };
879 
880     header = s->headers[s->curr_header];
881 
882     /* need to have offset read data, and be on 4096 byte boundary */
883 
884     if (length > header->log_length) {
885         /* no log present.  we could create a log here instead of failing */
886         ret = -EINVAL;
887         goto exit;
888     }
889 
890     if (guid_eq(header->log_guid, zero_guid)) {
891         vhdx_guid_generate(&new_guid);
892         vhdx_update_headers(bs, s, false, &new_guid);
893     } else {
894         /* currently, we require that the log be flushed after
895          * every write. */
896         ret = -ENOTSUP;
897         goto exit;
898     }
899 
900     /* 0 is an invalid sequence number, but may also represent the first
901      * log write (or a wrapped seq) */
902     if (s->log.sequence == 0) {
903         s->log.sequence = 1;
904     }
905 
906     sector_offset = offset % VHDX_LOG_SECTOR_SIZE;
907     file_offset = QEMU_ALIGN_DOWN(offset, VHDX_LOG_SECTOR_SIZE);
908 
909     aligned_length = length;
910 
911     /* add in the unaligned head and tail bytes */
912     if (sector_offset) {
913         leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset);
914         leading_length = leading_length > length ? length : leading_length;
915         aligned_length -= leading_length;
916         partial_sectors++;
917     }
918 
919     sectors = aligned_length / VHDX_LOG_SECTOR_SIZE;
920     trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE);
921     if (trailing_length) {
922         partial_sectors++;
923     }
924 
925     sectors += partial_sectors;
926 
927     file_length = bdrv_getlength(bs->file->bs);
928     if (file_length < 0) {
929         ret = file_length;
930         goto exit;
931     }
932 
933     /* sectors is now how many sectors the data itself takes, not
934      * including the header and descriptor metadata */
935 
936     new_hdr = (VHDXLogEntryHeader) {
937                 .signature           = VHDX_LOG_SIGNATURE,
938                 .tail                = s->log.tail,
939                 .sequence_number     = s->log.sequence,
940                 .descriptor_count    = sectors,
941                 .reserved            = 0,
942                 .flushed_file_offset = file_length,
943                 .last_file_offset    = file_length,
944                 .log_guid            = header->log_guid,
945               };
946 
947 
948     desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count);
949 
950     total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE;
951     new_hdr.entry_length = total_length;
952 
953     vhdx_log_entry_hdr_le_export(&new_hdr);
954 
955     buffer = qemu_blockalign(bs, total_length);
956     memcpy(buffer, &new_hdr, sizeof(new_hdr));
957 
958     new_desc = buffer + sizeof(new_hdr);
959     data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE);
960     data_tmp = data;
961 
962     /* All log sectors are 4KB, so for any partial sectors we must
963      * merge the data with preexisting data from the final file
964      * destination */
965     merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
966 
967     for (i = 0; i < sectors; i++) {
968         new_desc->signature       = VHDX_LOG_DESC_SIGNATURE;
969         new_desc->sequence_number = s->log.sequence;
970         new_desc->file_offset     = file_offset;
971 
972         if (i == 0 && leading_length) {
973             /* partial sector at the front of the buffer */
974             ret = bdrv_pread(bs->file, file_offset, VHDX_LOG_SECTOR_SIZE,
975                              merged_sector, 0);
976             if (ret < 0) {
977                 goto exit;
978             }
979             memcpy(merged_sector + sector_offset, data_tmp, leading_length);
980             bytes_written = leading_length;
981             sector_write = merged_sector;
982         } else if (i == sectors - 1 && trailing_length) {
983             /* partial sector at the end of the buffer */
984             ret = bdrv_pread(bs->file, file_offset + trailing_length,
985                              VHDX_LOG_SECTOR_SIZE - trailing_length,
986                              merged_sector + trailing_length, 0);
987             if (ret < 0) {
988                 goto exit;
989             }
990             memcpy(merged_sector, data_tmp, trailing_length);
991             bytes_written = trailing_length;
992             sector_write = merged_sector;
993         } else {
994             bytes_written = VHDX_LOG_SECTOR_SIZE;
995             sector_write = data_tmp;
996         }
997 
998         /* populate the raw sector data into the proper structures,
999          * as well as update the descriptor, and convert to proper
1000          * endianness */
1001         vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write,
1002                                   s->log.sequence);
1003 
1004         data_tmp += bytes_written;
1005         data_sector++;
1006         new_desc++;
1007         file_offset += VHDX_LOG_SECTOR_SIZE;
1008     }
1009 
1010     /* checksum covers entire entry, from the log header through the
1011      * last data sector */
1012     vhdx_update_checksum(buffer, total_length,
1013                          offsetof(VHDXLogEntryHeader, checksum));
1014 
1015     /* now write to the log */
1016     ret = vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
1017                                  desc_sectors + sectors);
1018     if (ret < 0) {
1019         goto exit;
1020     }
1021 
1022     if (sectors_written != desc_sectors + sectors) {
1023         /* instead of failing, we could flush the log here */
1024         ret = -EINVAL;
1025         goto exit;
1026     }
1027 
1028     s->log.sequence++;
1029     /* write new tail */
1030     s->log.tail = s->log.write;
1031 
1032 exit:
1033     qemu_vfree(buffer);
1034     qemu_vfree(merged_sector);
1035     return ret;
1036 }
1037 
1038 /* Perform a log write, and then immediately flush the entire log */
1039 int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
1040                              void *data, uint32_t length, uint64_t offset)
1041 {
1042     int ret = 0;
1043     VHDXLogSequence logs = { .valid = true,
1044                              .count = 1,
1045                              .hdr = { 0 } };
1046 
1047 
1048     /* Make sure data written (new and/or changed blocks) is stable
1049      * on disk, before creating log entry */
1050     ret = bdrv_flush(bs);
1051     if (ret < 0) {
1052         goto exit;
1053     }
1054 
1055     ret = vhdx_log_write(bs, s, data, length, offset);
1056     if (ret < 0) {
1057         goto exit;
1058     }
1059     logs.log = s->log;
1060 
1061     /* Make sure log is stable on disk */
1062     ret = bdrv_flush(bs);
1063     if (ret < 0) {
1064         goto exit;
1065     }
1066 
1067     ret = vhdx_log_flush(bs, s, &logs);
1068     if (ret < 0) {
1069         goto exit;
1070     }
1071 
1072     s->log = logs.log;
1073 
1074 exit:
1075     return ret;
1076 }
1077 
1078