xref: /openbmc/qemu/block/qcow2.c (revision ec603b55)
1 /*
2  * Block driver for the QCOW version 2 format
3  *
4  * Copyright (c) 2004-2006 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "block/block_int.h"
26 #include "sysemu/block-backend.h"
27 #include "qemu/module.h"
28 #include <zlib.h>
29 #include "block/qcow2.h"
30 #include "qemu/error-report.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qapi/qmp/qbool.h"
33 #include "qapi/qmp/types.h"
34 #include "qapi-event.h"
35 #include "trace.h"
36 #include "qemu/option_int.h"
37 #include "qemu/cutils.h"
38 #include "qemu/bswap.h"
39 #include "qapi/opts-visitor.h"
40 #include "qapi-visit.h"
41 #include "block/crypto.h"
42 
43 /*
44   Differences with QCOW:
45 
46   - Support for multiple incremental snapshots.
47   - Memory management by reference counts.
48   - Clusters which have a reference count of one have the bit
49     QCOW_OFLAG_COPIED to optimize write performance.
50   - Size of compressed clusters is stored in sectors to reduce bit usage
51     in the cluster offsets.
52   - Support for storing additional data (such as the VM state) in the
53     snapshots.
54   - If a backing store is used, the cluster size is not constrained
55     (could be backported to QCOW).
56   - L2 tables have always a size of one cluster.
57 */
58 
59 
60 typedef struct {
61     uint32_t magic;
62     uint32_t len;
63 } QEMU_PACKED QCowExtension;
64 
65 #define  QCOW2_EXT_MAGIC_END 0
66 #define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
67 #define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
68 #define  QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
69 #define  QCOW2_EXT_MAGIC_BITMAPS 0x23852875
70 
71 static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
72 {
73     const QCowHeader *cow_header = (const void *)buf;
74 
75     if (buf_size >= sizeof(QCowHeader) &&
76         be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
77         be32_to_cpu(cow_header->version) >= 2)
78         return 100;
79     else
80         return 0;
81 }
82 
83 
84 static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
85                                           uint8_t *buf, size_t buflen,
86                                           void *opaque, Error **errp)
87 {
88     BlockDriverState *bs = opaque;
89     BDRVQcow2State *s = bs->opaque;
90     ssize_t ret;
91 
92     if ((offset + buflen) > s->crypto_header.length) {
93         error_setg(errp, "Request for data outside of extension header");
94         return -1;
95     }
96 
97     ret = bdrv_pread(bs->file,
98                      s->crypto_header.offset + offset, buf, buflen);
99     if (ret < 0) {
100         error_setg_errno(errp, -ret, "Could not read encryption header");
101         return -1;
102     }
103     return ret;
104 }
105 
106 
107 static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
108                                           void *opaque, Error **errp)
109 {
110     BlockDriverState *bs = opaque;
111     BDRVQcow2State *s = bs->opaque;
112     int64_t ret;
113     int64_t clusterlen;
114 
115     ret = qcow2_alloc_clusters(bs, headerlen);
116     if (ret < 0) {
117         error_setg_errno(errp, -ret,
118                          "Cannot allocate cluster for LUKS header size %zu",
119                          headerlen);
120         return -1;
121     }
122 
123     s->crypto_header.length = headerlen;
124     s->crypto_header.offset = ret;
125 
126     /* Zero fill remaining space in cluster so it has predictable
127      * content in case of future spec changes */
128     clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
129     assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen) == 0);
130     ret = bdrv_pwrite_zeroes(bs->file,
131                              ret + headerlen,
132                              clusterlen - headerlen, 0);
133     if (ret < 0) {
134         error_setg_errno(errp, -ret, "Could not zero fill encryption header");
135         return -1;
136     }
137 
138     return ret;
139 }
140 
141 
142 static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
143                                            const uint8_t *buf, size_t buflen,
144                                            void *opaque, Error **errp)
145 {
146     BlockDriverState *bs = opaque;
147     BDRVQcow2State *s = bs->opaque;
148     ssize_t ret;
149 
150     if ((offset + buflen) > s->crypto_header.length) {
151         error_setg(errp, "Request for data outside of extension header");
152         return -1;
153     }
154 
155     ret = bdrv_pwrite(bs->file,
156                       s->crypto_header.offset + offset, buf, buflen);
157     if (ret < 0) {
158         error_setg_errno(errp, -ret, "Could not read encryption header");
159         return -1;
160     }
161     return ret;
162 }
163 
164 
165 /*
166  * read qcow2 extension and fill bs
167  * start reading from start_offset
168  * finish reading upon magic of value 0 or when end_offset reached
169  * unknown magic is skipped (future extension this version knows nothing about)
170  * return 0 upon success, non-0 otherwise
171  */
172 static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
173                                  uint64_t end_offset, void **p_feature_table,
174                                  int flags, bool *need_update_header,
175                                  Error **errp)
176 {
177     BDRVQcow2State *s = bs->opaque;
178     QCowExtension ext;
179     uint64_t offset;
180     int ret;
181     Qcow2BitmapHeaderExt bitmaps_ext;
182 
183     if (need_update_header != NULL) {
184         *need_update_header = false;
185     }
186 
187 #ifdef DEBUG_EXT
188     printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
189 #endif
190     offset = start_offset;
191     while (offset < end_offset) {
192 
193 #ifdef DEBUG_EXT
194         /* Sanity check */
195         if (offset > s->cluster_size)
196             printf("qcow2_read_extension: suspicious offset %lu\n", offset);
197 
198         printf("attempting to read extended header in offset %lu\n", offset);
199 #endif
200 
201         ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
202         if (ret < 0) {
203             error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
204                              "pread fail from offset %" PRIu64, offset);
205             return 1;
206         }
207         be32_to_cpus(&ext.magic);
208         be32_to_cpus(&ext.len);
209         offset += sizeof(ext);
210 #ifdef DEBUG_EXT
211         printf("ext.magic = 0x%x\n", ext.magic);
212 #endif
213         if (offset > end_offset || ext.len > end_offset - offset) {
214             error_setg(errp, "Header extension too large");
215             return -EINVAL;
216         }
217 
218         switch (ext.magic) {
219         case QCOW2_EXT_MAGIC_END:
220             return 0;
221 
222         case QCOW2_EXT_MAGIC_BACKING_FORMAT:
223             if (ext.len >= sizeof(bs->backing_format)) {
224                 error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32
225                            " too large (>=%zu)", ext.len,
226                            sizeof(bs->backing_format));
227                 return 2;
228             }
229             ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
230             if (ret < 0) {
231                 error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
232                                  "Could not read format name");
233                 return 3;
234             }
235             bs->backing_format[ext.len] = '\0';
236             s->image_backing_format = g_strdup(bs->backing_format);
237 #ifdef DEBUG_EXT
238             printf("Qcow2: Got format extension %s\n", bs->backing_format);
239 #endif
240             break;
241 
242         case QCOW2_EXT_MAGIC_FEATURE_TABLE:
243             if (p_feature_table != NULL) {
244                 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
245                 ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
246                 if (ret < 0) {
247                     error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
248                                      "Could not read table");
249                     return ret;
250                 }
251 
252                 *p_feature_table = feature_table;
253             }
254             break;
255 
256         case QCOW2_EXT_MAGIC_CRYPTO_HEADER: {
257             unsigned int cflags = 0;
258             if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
259                 error_setg(errp, "CRYPTO header extension only "
260                            "expected with LUKS encryption method");
261                 return -EINVAL;
262             }
263             if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) {
264                 error_setg(errp, "CRYPTO header extension size %u, "
265                            "but expected size %zu", ext.len,
266                            sizeof(Qcow2CryptoHeaderExtension));
267                 return -EINVAL;
268             }
269 
270             ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len);
271             if (ret < 0) {
272                 error_setg_errno(errp, -ret,
273                                  "Unable to read CRYPTO header extension");
274                 return ret;
275             }
276             be64_to_cpus(&s->crypto_header.offset);
277             be64_to_cpus(&s->crypto_header.length);
278 
279             if ((s->crypto_header.offset % s->cluster_size) != 0) {
280                 error_setg(errp, "Encryption header offset '%" PRIu64 "' is "
281                            "not a multiple of cluster size '%u'",
282                            s->crypto_header.offset, s->cluster_size);
283                 return -EINVAL;
284             }
285 
286             if (flags & BDRV_O_NO_IO) {
287                 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
288             }
289             s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
290                                            qcow2_crypto_hdr_read_func,
291                                            bs, cflags, errp);
292             if (!s->crypto) {
293                 return -EINVAL;
294             }
295         }   break;
296 
297         case QCOW2_EXT_MAGIC_BITMAPS:
298             if (ext.len != sizeof(bitmaps_ext)) {
299                 error_setg_errno(errp, -ret, "bitmaps_ext: "
300                                  "Invalid extension length");
301                 return -EINVAL;
302             }
303 
304             if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) {
305                 warn_report("a program lacking bitmap support "
306                             "modified this file, so all bitmaps are now "
307                             "considered inconsistent");
308                 error_printf("Some clusters may be leaked, "
309                              "run 'qemu-img check -r' on the image "
310                              "file to fix.");
311                 if (need_update_header != NULL) {
312                     /* Updating is needed to drop invalid bitmap extension. */
313                     *need_update_header = true;
314                 }
315                 break;
316             }
317 
318             ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len);
319             if (ret < 0) {
320                 error_setg_errno(errp, -ret, "bitmaps_ext: "
321                                  "Could not read ext header");
322                 return ret;
323             }
324 
325             if (bitmaps_ext.reserved32 != 0) {
326                 error_setg_errno(errp, -ret, "bitmaps_ext: "
327                                  "Reserved field is not zero");
328                 return -EINVAL;
329             }
330 
331             be32_to_cpus(&bitmaps_ext.nb_bitmaps);
332             be64_to_cpus(&bitmaps_ext.bitmap_directory_size);
333             be64_to_cpus(&bitmaps_ext.bitmap_directory_offset);
334 
335             if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) {
336                 error_setg(errp,
337                            "bitmaps_ext: Image has %" PRIu32 " bitmaps, "
338                            "exceeding the QEMU supported maximum of %d",
339                            bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS);
340                 return -EINVAL;
341             }
342 
343             if (bitmaps_ext.nb_bitmaps == 0) {
344                 error_setg(errp, "found bitmaps extension with zero bitmaps");
345                 return -EINVAL;
346             }
347 
348             if (bitmaps_ext.bitmap_directory_offset & (s->cluster_size - 1)) {
349                 error_setg(errp, "bitmaps_ext: "
350                                  "invalid bitmap directory offset");
351                 return -EINVAL;
352             }
353 
354             if (bitmaps_ext.bitmap_directory_size >
355                 QCOW2_MAX_BITMAP_DIRECTORY_SIZE) {
356                 error_setg(errp, "bitmaps_ext: "
357                                  "bitmap directory size (%" PRIu64 ") exceeds "
358                                  "the maximum supported size (%d)",
359                                  bitmaps_ext.bitmap_directory_size,
360                                  QCOW2_MAX_BITMAP_DIRECTORY_SIZE);
361                 return -EINVAL;
362             }
363 
364             s->nb_bitmaps = bitmaps_ext.nb_bitmaps;
365             s->bitmap_directory_offset =
366                     bitmaps_ext.bitmap_directory_offset;
367             s->bitmap_directory_size =
368                     bitmaps_ext.bitmap_directory_size;
369 
370 #ifdef DEBUG_EXT
371             printf("Qcow2: Got bitmaps extension: "
372                    "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
373                    s->bitmap_directory_offset, s->nb_bitmaps);
374 #endif
375             break;
376 
377         default:
378             /* unknown magic - save it in case we need to rewrite the header */
379             {
380                 Qcow2UnknownHeaderExtension *uext;
381 
382                 uext = g_malloc0(sizeof(*uext)  + ext.len);
383                 uext->magic = ext.magic;
384                 uext->len = ext.len;
385                 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
386 
387                 ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
388                 if (ret < 0) {
389                     error_setg_errno(errp, -ret, "ERROR: unknown extension: "
390                                      "Could not read data");
391                     return ret;
392                 }
393             }
394             break;
395         }
396 
397         offset += ((ext.len + 7) & ~7);
398     }
399 
400     return 0;
401 }
402 
403 static void cleanup_unknown_header_ext(BlockDriverState *bs)
404 {
405     BDRVQcow2State *s = bs->opaque;
406     Qcow2UnknownHeaderExtension *uext, *next;
407 
408     QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
409         QLIST_REMOVE(uext, next);
410         g_free(uext);
411     }
412 }
413 
414 static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
415                                        uint64_t mask)
416 {
417     char *features = g_strdup("");
418     char *old;
419 
420     while (table && table->name[0] != '\0') {
421         if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
422             if (mask & (1ULL << table->bit)) {
423                 old = features;
424                 features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "",
425                                            table->name);
426                 g_free(old);
427                 mask &= ~(1ULL << table->bit);
428             }
429         }
430         table++;
431     }
432 
433     if (mask) {
434         old = features;
435         features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64,
436                                    old, *old ? ", " : "", mask);
437         g_free(old);
438     }
439 
440     error_setg(errp, "Unsupported qcow2 feature(s): %s", features);
441     g_free(features);
442 }
443 
444 /*
445  * Sets the dirty bit and flushes afterwards if necessary.
446  *
447  * The incompatible_features bit is only set if the image file header was
448  * updated successfully.  Therefore it is not required to check the return
449  * value of this function.
450  */
451 int qcow2_mark_dirty(BlockDriverState *bs)
452 {
453     BDRVQcow2State *s = bs->opaque;
454     uint64_t val;
455     int ret;
456 
457     assert(s->qcow_version >= 3);
458 
459     if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
460         return 0; /* already dirty */
461     }
462 
463     val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
464     ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
465                       &val, sizeof(val));
466     if (ret < 0) {
467         return ret;
468     }
469     ret = bdrv_flush(bs->file->bs);
470     if (ret < 0) {
471         return ret;
472     }
473 
474     /* Only treat image as dirty if the header was updated successfully */
475     s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
476     return 0;
477 }
478 
479 /*
480  * Clears the dirty bit and flushes before if necessary.  Only call this
481  * function when there are no pending requests, it does not guard against
482  * concurrent requests dirtying the image.
483  */
484 static int qcow2_mark_clean(BlockDriverState *bs)
485 {
486     BDRVQcow2State *s = bs->opaque;
487 
488     if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
489         int ret;
490 
491         s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
492 
493         ret = bdrv_flush(bs);
494         if (ret < 0) {
495             return ret;
496         }
497 
498         return qcow2_update_header(bs);
499     }
500     return 0;
501 }
502 
503 /*
504  * Marks the image as corrupt.
505  */
506 int qcow2_mark_corrupt(BlockDriverState *bs)
507 {
508     BDRVQcow2State *s = bs->opaque;
509 
510     s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
511     return qcow2_update_header(bs);
512 }
513 
514 /*
515  * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes
516  * before if necessary.
517  */
518 int qcow2_mark_consistent(BlockDriverState *bs)
519 {
520     BDRVQcow2State *s = bs->opaque;
521 
522     if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
523         int ret = bdrv_flush(bs);
524         if (ret < 0) {
525             return ret;
526         }
527 
528         s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
529         return qcow2_update_header(bs);
530     }
531     return 0;
532 }
533 
534 static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
535                        BdrvCheckMode fix)
536 {
537     int ret = qcow2_check_refcounts(bs, result, fix);
538     if (ret < 0) {
539         return ret;
540     }
541 
542     if (fix && result->check_errors == 0 && result->corruptions == 0) {
543         ret = qcow2_mark_clean(bs);
544         if (ret < 0) {
545             return ret;
546         }
547         return qcow2_mark_consistent(bs);
548     }
549     return ret;
550 }
551 
552 static int validate_table_offset(BlockDriverState *bs, uint64_t offset,
553                                  uint64_t entries, size_t entry_len)
554 {
555     BDRVQcow2State *s = bs->opaque;
556     uint64_t size;
557 
558     /* Use signed INT64_MAX as the maximum even for uint64_t header fields,
559      * because values will be passed to qemu functions taking int64_t. */
560     if (entries > INT64_MAX / entry_len) {
561         return -EINVAL;
562     }
563 
564     size = entries * entry_len;
565 
566     if (INT64_MAX - size < offset) {
567         return -EINVAL;
568     }
569 
570     /* Tables must be cluster aligned */
571     if (offset_into_cluster(s, offset) != 0) {
572         return -EINVAL;
573     }
574 
575     return 0;
576 }
577 
578 static QemuOptsList qcow2_runtime_opts = {
579     .name = "qcow2",
580     .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
581     .desc = {
582         {
583             .name = QCOW2_OPT_LAZY_REFCOUNTS,
584             .type = QEMU_OPT_BOOL,
585             .help = "Postpone refcount updates",
586         },
587         {
588             .name = QCOW2_OPT_DISCARD_REQUEST,
589             .type = QEMU_OPT_BOOL,
590             .help = "Pass guest discard requests to the layer below",
591         },
592         {
593             .name = QCOW2_OPT_DISCARD_SNAPSHOT,
594             .type = QEMU_OPT_BOOL,
595             .help = "Generate discard requests when snapshot related space "
596                     "is freed",
597         },
598         {
599             .name = QCOW2_OPT_DISCARD_OTHER,
600             .type = QEMU_OPT_BOOL,
601             .help = "Generate discard requests when other clusters are freed",
602         },
603         {
604             .name = QCOW2_OPT_OVERLAP,
605             .type = QEMU_OPT_STRING,
606             .help = "Selects which overlap checks to perform from a range of "
607                     "templates (none, constant, cached, all)",
608         },
609         {
610             .name = QCOW2_OPT_OVERLAP_TEMPLATE,
611             .type = QEMU_OPT_STRING,
612             .help = "Selects which overlap checks to perform from a range of "
613                     "templates (none, constant, cached, all)",
614         },
615         {
616             .name = QCOW2_OPT_OVERLAP_MAIN_HEADER,
617             .type = QEMU_OPT_BOOL,
618             .help = "Check for unintended writes into the main qcow2 header",
619         },
620         {
621             .name = QCOW2_OPT_OVERLAP_ACTIVE_L1,
622             .type = QEMU_OPT_BOOL,
623             .help = "Check for unintended writes into the active L1 table",
624         },
625         {
626             .name = QCOW2_OPT_OVERLAP_ACTIVE_L2,
627             .type = QEMU_OPT_BOOL,
628             .help = "Check for unintended writes into an active L2 table",
629         },
630         {
631             .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
632             .type = QEMU_OPT_BOOL,
633             .help = "Check for unintended writes into the refcount table",
634         },
635         {
636             .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
637             .type = QEMU_OPT_BOOL,
638             .help = "Check for unintended writes into a refcount block",
639         },
640         {
641             .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
642             .type = QEMU_OPT_BOOL,
643             .help = "Check for unintended writes into the snapshot table",
644         },
645         {
646             .name = QCOW2_OPT_OVERLAP_INACTIVE_L1,
647             .type = QEMU_OPT_BOOL,
648             .help = "Check for unintended writes into an inactive L1 table",
649         },
650         {
651             .name = QCOW2_OPT_OVERLAP_INACTIVE_L2,
652             .type = QEMU_OPT_BOOL,
653             .help = "Check for unintended writes into an inactive L2 table",
654         },
655         {
656             .name = QCOW2_OPT_CACHE_SIZE,
657             .type = QEMU_OPT_SIZE,
658             .help = "Maximum combined metadata (L2 tables and refcount blocks) "
659                     "cache size",
660         },
661         {
662             .name = QCOW2_OPT_L2_CACHE_SIZE,
663             .type = QEMU_OPT_SIZE,
664             .help = "Maximum L2 table cache size",
665         },
666         {
667             .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE,
668             .type = QEMU_OPT_SIZE,
669             .help = "Maximum refcount block cache size",
670         },
671         {
672             .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
673             .type = QEMU_OPT_NUMBER,
674             .help = "Clean unused cache entries after this time (in seconds)",
675         },
676         BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
677             "ID of secret providing qcow2 AES key or LUKS passphrase"),
678         { /* end of list */ }
679     },
680 };
681 
682 static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
683     [QCOW2_OL_MAIN_HEADER_BITNR]    = QCOW2_OPT_OVERLAP_MAIN_HEADER,
684     [QCOW2_OL_ACTIVE_L1_BITNR]      = QCOW2_OPT_OVERLAP_ACTIVE_L1,
685     [QCOW2_OL_ACTIVE_L2_BITNR]      = QCOW2_OPT_OVERLAP_ACTIVE_L2,
686     [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
687     [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
688     [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
689     [QCOW2_OL_INACTIVE_L1_BITNR]    = QCOW2_OPT_OVERLAP_INACTIVE_L1,
690     [QCOW2_OL_INACTIVE_L2_BITNR]    = QCOW2_OPT_OVERLAP_INACTIVE_L2,
691 };
692 
693 static void cache_clean_timer_cb(void *opaque)
694 {
695     BlockDriverState *bs = opaque;
696     BDRVQcow2State *s = bs->opaque;
697     qcow2_cache_clean_unused(bs, s->l2_table_cache);
698     qcow2_cache_clean_unused(bs, s->refcount_block_cache);
699     timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
700               (int64_t) s->cache_clean_interval * 1000);
701 }
702 
703 static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
704 {
705     BDRVQcow2State *s = bs->opaque;
706     if (s->cache_clean_interval > 0) {
707         s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
708                                              SCALE_MS, cache_clean_timer_cb,
709                                              bs);
710         timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
711                   (int64_t) s->cache_clean_interval * 1000);
712     }
713 }
714 
715 static void cache_clean_timer_del(BlockDriverState *bs)
716 {
717     BDRVQcow2State *s = bs->opaque;
718     if (s->cache_clean_timer) {
719         timer_del(s->cache_clean_timer);
720         timer_free(s->cache_clean_timer);
721         s->cache_clean_timer = NULL;
722     }
723 }
724 
725 static void qcow2_detach_aio_context(BlockDriverState *bs)
726 {
727     cache_clean_timer_del(bs);
728 }
729 
730 static void qcow2_attach_aio_context(BlockDriverState *bs,
731                                      AioContext *new_context)
732 {
733     cache_clean_timer_init(bs, new_context);
734 }
735 
736 static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
737                              uint64_t *l2_cache_size,
738                              uint64_t *refcount_cache_size, Error **errp)
739 {
740     BDRVQcow2State *s = bs->opaque;
741     uint64_t combined_cache_size;
742     bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
743 
744     combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
745     l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
746     refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
747 
748     combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0);
749     *l2_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, 0);
750     *refcount_cache_size = qemu_opt_get_size(opts,
751                                              QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);
752 
753     if (combined_cache_size_set) {
754         if (l2_cache_size_set && refcount_cache_size_set) {
755             error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
756                        " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set "
757                        "the same time");
758             return;
759         } else if (*l2_cache_size > combined_cache_size) {
760             error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed "
761                        QCOW2_OPT_CACHE_SIZE);
762             return;
763         } else if (*refcount_cache_size > combined_cache_size) {
764             error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed "
765                        QCOW2_OPT_CACHE_SIZE);
766             return;
767         }
768 
769         if (l2_cache_size_set) {
770             *refcount_cache_size = combined_cache_size - *l2_cache_size;
771         } else if (refcount_cache_size_set) {
772             *l2_cache_size = combined_cache_size - *refcount_cache_size;
773         } else {
774             *refcount_cache_size = combined_cache_size
775                                  / (DEFAULT_L2_REFCOUNT_SIZE_RATIO + 1);
776             *l2_cache_size = combined_cache_size - *refcount_cache_size;
777         }
778     } else {
779         if (!l2_cache_size_set && !refcount_cache_size_set) {
780             *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE,
781                                  (uint64_t)DEFAULT_L2_CACHE_CLUSTERS
782                                  * s->cluster_size);
783             *refcount_cache_size = *l2_cache_size
784                                  / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
785         } else if (!l2_cache_size_set) {
786             *l2_cache_size = *refcount_cache_size
787                            * DEFAULT_L2_REFCOUNT_SIZE_RATIO;
788         } else if (!refcount_cache_size_set) {
789             *refcount_cache_size = *l2_cache_size
790                                  / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
791         }
792     }
793 }
794 
795 typedef struct Qcow2ReopenState {
796     Qcow2Cache *l2_table_cache;
797     Qcow2Cache *refcount_block_cache;
798     bool use_lazy_refcounts;
799     int overlap_check;
800     bool discard_passthrough[QCOW2_DISCARD_MAX];
801     uint64_t cache_clean_interval;
802     QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
803 } Qcow2ReopenState;
804 
805 static int qcow2_update_options_prepare(BlockDriverState *bs,
806                                         Qcow2ReopenState *r,
807                                         QDict *options, int flags,
808                                         Error **errp)
809 {
810     BDRVQcow2State *s = bs->opaque;
811     QemuOpts *opts = NULL;
812     const char *opt_overlap_check, *opt_overlap_check_template;
813     int overlap_check_template = 0;
814     uint64_t l2_cache_size, refcount_cache_size;
815     int i;
816     const char *encryptfmt;
817     QDict *encryptopts = NULL;
818     Error *local_err = NULL;
819     int ret;
820 
821     qdict_extract_subqdict(options, &encryptopts, "encrypt.");
822     encryptfmt = qdict_get_try_str(encryptopts, "format");
823 
824     opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
825     qemu_opts_absorb_qdict(opts, options, &local_err);
826     if (local_err) {
827         error_propagate(errp, local_err);
828         ret = -EINVAL;
829         goto fail;
830     }
831 
832     /* get L2 table/refcount block cache size from command line options */
833     read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size,
834                      &local_err);
835     if (local_err) {
836         error_propagate(errp, local_err);
837         ret = -EINVAL;
838         goto fail;
839     }
840 
841     l2_cache_size /= s->cluster_size;
842     if (l2_cache_size < MIN_L2_CACHE_SIZE) {
843         l2_cache_size = MIN_L2_CACHE_SIZE;
844     }
845     if (l2_cache_size > INT_MAX) {
846         error_setg(errp, "L2 cache size too big");
847         ret = -EINVAL;
848         goto fail;
849     }
850 
851     refcount_cache_size /= s->cluster_size;
852     if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
853         refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
854     }
855     if (refcount_cache_size > INT_MAX) {
856         error_setg(errp, "Refcount cache size too big");
857         ret = -EINVAL;
858         goto fail;
859     }
860 
861     /* alloc new L2 table/refcount block cache, flush old one */
862     if (s->l2_table_cache) {
863         ret = qcow2_cache_flush(bs, s->l2_table_cache);
864         if (ret) {
865             error_setg_errno(errp, -ret, "Failed to flush the L2 table cache");
866             goto fail;
867         }
868     }
869 
870     if (s->refcount_block_cache) {
871         ret = qcow2_cache_flush(bs, s->refcount_block_cache);
872         if (ret) {
873             error_setg_errno(errp, -ret,
874                              "Failed to flush the refcount block cache");
875             goto fail;
876         }
877     }
878 
879     r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size);
880     r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size);
881     if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
882         error_setg(errp, "Could not allocate metadata caches");
883         ret = -ENOMEM;
884         goto fail;
885     }
886 
887     /* New interval for cache cleanup timer */
888     r->cache_clean_interval =
889         qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
890                             s->cache_clean_interval);
891 #ifndef CONFIG_LINUX
892     if (r->cache_clean_interval != 0) {
893         error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL
894                    " not supported on this host");
895         ret = -EINVAL;
896         goto fail;
897     }
898 #endif
899     if (r->cache_clean_interval > UINT_MAX) {
900         error_setg(errp, "Cache clean interval too big");
901         ret = -EINVAL;
902         goto fail;
903     }
904 
905     /* lazy-refcounts; flush if going from enabled to disabled */
906     r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
907         (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
908     if (r->use_lazy_refcounts && s->qcow_version < 3) {
909         error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
910                    "qemu 1.1 compatibility level");
911         ret = -EINVAL;
912         goto fail;
913     }
914 
915     if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
916         ret = qcow2_mark_clean(bs);
917         if (ret < 0) {
918             error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
919             goto fail;
920         }
921     }
922 
923     /* Overlap check options */
924     opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
925     opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
926     if (opt_overlap_check_template && opt_overlap_check &&
927         strcmp(opt_overlap_check_template, opt_overlap_check))
928     {
929         error_setg(errp, "Conflicting values for qcow2 options '"
930                    QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
931                    "' ('%s')", opt_overlap_check, opt_overlap_check_template);
932         ret = -EINVAL;
933         goto fail;
934     }
935     if (!opt_overlap_check) {
936         opt_overlap_check = opt_overlap_check_template ?: "cached";
937     }
938 
939     if (!strcmp(opt_overlap_check, "none")) {
940         overlap_check_template = 0;
941     } else if (!strcmp(opt_overlap_check, "constant")) {
942         overlap_check_template = QCOW2_OL_CONSTANT;
943     } else if (!strcmp(opt_overlap_check, "cached")) {
944         overlap_check_template = QCOW2_OL_CACHED;
945     } else if (!strcmp(opt_overlap_check, "all")) {
946         overlap_check_template = QCOW2_OL_ALL;
947     } else {
948         error_setg(errp, "Unsupported value '%s' for qcow2 option "
949                    "'overlap-check'. Allowed are any of the following: "
950                    "none, constant, cached, all", opt_overlap_check);
951         ret = -EINVAL;
952         goto fail;
953     }
954 
955     r->overlap_check = 0;
956     for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
957         /* overlap-check defines a template bitmask, but every flag may be
958          * overwritten through the associated boolean option */
959         r->overlap_check |=
960             qemu_opt_get_bool(opts, overlap_bool_option_names[i],
961                               overlap_check_template & (1 << i)) << i;
962     }
963 
964     r->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
965     r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
966     r->discard_passthrough[QCOW2_DISCARD_REQUEST] =
967         qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
968                           flags & BDRV_O_UNMAP);
969     r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
970         qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
971     r->discard_passthrough[QCOW2_DISCARD_OTHER] =
972         qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
973 
974     switch (s->crypt_method_header) {
975     case QCOW_CRYPT_NONE:
976         if (encryptfmt) {
977             error_setg(errp, "No encryption in image header, but options "
978                        "specified format '%s'", encryptfmt);
979             ret = -EINVAL;
980             goto fail;
981         }
982         break;
983 
984     case QCOW_CRYPT_AES:
985         if (encryptfmt && !g_str_equal(encryptfmt, "aes")) {
986             error_setg(errp,
987                        "Header reported 'aes' encryption format but "
988                        "options specify '%s'", encryptfmt);
989             ret = -EINVAL;
990             goto fail;
991         }
992         qdict_del(encryptopts, "format");
993         r->crypto_opts = block_crypto_open_opts_init(
994             Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp);
995         break;
996 
997     case QCOW_CRYPT_LUKS:
998         if (encryptfmt && !g_str_equal(encryptfmt, "luks")) {
999             error_setg(errp,
1000                        "Header reported 'luks' encryption format but "
1001                        "options specify '%s'", encryptfmt);
1002             ret = -EINVAL;
1003             goto fail;
1004         }
1005         qdict_del(encryptopts, "format");
1006         r->crypto_opts = block_crypto_open_opts_init(
1007             Q_CRYPTO_BLOCK_FORMAT_LUKS, encryptopts, errp);
1008         break;
1009 
1010     default:
1011         error_setg(errp, "Unsupported encryption method %d",
1012                    s->crypt_method_header);
1013         break;
1014     }
1015     if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) {
1016         ret = -EINVAL;
1017         goto fail;
1018     }
1019 
1020     ret = 0;
1021 fail:
1022     QDECREF(encryptopts);
1023     qemu_opts_del(opts);
1024     opts = NULL;
1025     return ret;
1026 }
1027 
1028 static void qcow2_update_options_commit(BlockDriverState *bs,
1029                                         Qcow2ReopenState *r)
1030 {
1031     BDRVQcow2State *s = bs->opaque;
1032     int i;
1033 
1034     if (s->l2_table_cache) {
1035         qcow2_cache_destroy(bs, s->l2_table_cache);
1036     }
1037     if (s->refcount_block_cache) {
1038         qcow2_cache_destroy(bs, s->refcount_block_cache);
1039     }
1040     s->l2_table_cache = r->l2_table_cache;
1041     s->refcount_block_cache = r->refcount_block_cache;
1042 
1043     s->overlap_check = r->overlap_check;
1044     s->use_lazy_refcounts = r->use_lazy_refcounts;
1045 
1046     for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
1047         s->discard_passthrough[i] = r->discard_passthrough[i];
1048     }
1049 
1050     if (s->cache_clean_interval != r->cache_clean_interval) {
1051         cache_clean_timer_del(bs);
1052         s->cache_clean_interval = r->cache_clean_interval;
1053         cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
1054     }
1055 
1056     qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1057     s->crypto_opts = r->crypto_opts;
1058 }
1059 
1060 static void qcow2_update_options_abort(BlockDriverState *bs,
1061                                        Qcow2ReopenState *r)
1062 {
1063     if (r->l2_table_cache) {
1064         qcow2_cache_destroy(bs, r->l2_table_cache);
1065     }
1066     if (r->refcount_block_cache) {
1067         qcow2_cache_destroy(bs, r->refcount_block_cache);
1068     }
1069     qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
1070 }
1071 
1072 static int qcow2_update_options(BlockDriverState *bs, QDict *options,
1073                                 int flags, Error **errp)
1074 {
1075     Qcow2ReopenState r = {};
1076     int ret;
1077 
1078     ret = qcow2_update_options_prepare(bs, &r, options, flags, errp);
1079     if (ret >= 0) {
1080         qcow2_update_options_commit(bs, &r);
1081     } else {
1082         qcow2_update_options_abort(bs, &r);
1083     }
1084 
1085     return ret;
1086 }
1087 
1088 static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
1089                          Error **errp)
1090 {
1091     BDRVQcow2State *s = bs->opaque;
1092     unsigned int len, i;
1093     int ret = 0;
1094     QCowHeader header;
1095     Error *local_err = NULL;
1096     uint64_t ext_end;
1097     uint64_t l1_vm_state_index;
1098     bool update_header = false;
1099 
1100     ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
1101     if (ret < 0) {
1102         error_setg_errno(errp, -ret, "Could not read qcow2 header");
1103         goto fail;
1104     }
1105     be32_to_cpus(&header.magic);
1106     be32_to_cpus(&header.version);
1107     be64_to_cpus(&header.backing_file_offset);
1108     be32_to_cpus(&header.backing_file_size);
1109     be64_to_cpus(&header.size);
1110     be32_to_cpus(&header.cluster_bits);
1111     be32_to_cpus(&header.crypt_method);
1112     be64_to_cpus(&header.l1_table_offset);
1113     be32_to_cpus(&header.l1_size);
1114     be64_to_cpus(&header.refcount_table_offset);
1115     be32_to_cpus(&header.refcount_table_clusters);
1116     be64_to_cpus(&header.snapshots_offset);
1117     be32_to_cpus(&header.nb_snapshots);
1118 
1119     if (header.magic != QCOW_MAGIC) {
1120         error_setg(errp, "Image is not in qcow2 format");
1121         ret = -EINVAL;
1122         goto fail;
1123     }
1124     if (header.version < 2 || header.version > 3) {
1125         error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
1126         ret = -ENOTSUP;
1127         goto fail;
1128     }
1129 
1130     s->qcow_version = header.version;
1131 
1132     /* Initialise cluster size */
1133     if (header.cluster_bits < MIN_CLUSTER_BITS ||
1134         header.cluster_bits > MAX_CLUSTER_BITS) {
1135         error_setg(errp, "Unsupported cluster size: 2^%" PRIu32,
1136                    header.cluster_bits);
1137         ret = -EINVAL;
1138         goto fail;
1139     }
1140 
1141     s->cluster_bits = header.cluster_bits;
1142     s->cluster_size = 1 << s->cluster_bits;
1143     s->cluster_sectors = 1 << (s->cluster_bits - BDRV_SECTOR_BITS);
1144 
1145     /* Initialise version 3 header fields */
1146     if (header.version == 2) {
1147         header.incompatible_features    = 0;
1148         header.compatible_features      = 0;
1149         header.autoclear_features       = 0;
1150         header.refcount_order           = 4;
1151         header.header_length            = 72;
1152     } else {
1153         be64_to_cpus(&header.incompatible_features);
1154         be64_to_cpus(&header.compatible_features);
1155         be64_to_cpus(&header.autoclear_features);
1156         be32_to_cpus(&header.refcount_order);
1157         be32_to_cpus(&header.header_length);
1158 
1159         if (header.header_length < 104) {
1160             error_setg(errp, "qcow2 header too short");
1161             ret = -EINVAL;
1162             goto fail;
1163         }
1164     }
1165 
1166     if (header.header_length > s->cluster_size) {
1167         error_setg(errp, "qcow2 header exceeds cluster size");
1168         ret = -EINVAL;
1169         goto fail;
1170     }
1171 
1172     if (header.header_length > sizeof(header)) {
1173         s->unknown_header_fields_size = header.header_length - sizeof(header);
1174         s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
1175         ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
1176                          s->unknown_header_fields_size);
1177         if (ret < 0) {
1178             error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
1179                              "fields");
1180             goto fail;
1181         }
1182     }
1183 
1184     if (header.backing_file_offset > s->cluster_size) {
1185         error_setg(errp, "Invalid backing file offset");
1186         ret = -EINVAL;
1187         goto fail;
1188     }
1189 
1190     if (header.backing_file_offset) {
1191         ext_end = header.backing_file_offset;
1192     } else {
1193         ext_end = 1 << header.cluster_bits;
1194     }
1195 
1196     /* Handle feature bits */
1197     s->incompatible_features    = header.incompatible_features;
1198     s->compatible_features      = header.compatible_features;
1199     s->autoclear_features       = header.autoclear_features;
1200 
1201     if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
1202         void *feature_table = NULL;
1203         qcow2_read_extensions(bs, header.header_length, ext_end,
1204                               &feature_table, flags, NULL, NULL);
1205         report_unsupported_feature(errp, feature_table,
1206                                    s->incompatible_features &
1207                                    ~QCOW2_INCOMPAT_MASK);
1208         ret = -ENOTSUP;
1209         g_free(feature_table);
1210         goto fail;
1211     }
1212 
1213     if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
1214         /* Corrupt images may not be written to unless they are being repaired
1215          */
1216         if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
1217             error_setg(errp, "qcow2: Image is corrupt; cannot be opened "
1218                        "read/write");
1219             ret = -EACCES;
1220             goto fail;
1221         }
1222     }
1223 
1224     /* Check support for various header values */
1225     if (header.refcount_order > 6) {
1226         error_setg(errp, "Reference count entry width too large; may not "
1227                    "exceed 64 bits");
1228         ret = -EINVAL;
1229         goto fail;
1230     }
1231     s->refcount_order = header.refcount_order;
1232     s->refcount_bits = 1 << s->refcount_order;
1233     s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
1234     s->refcount_max += s->refcount_max - 1;
1235 
1236     s->crypt_method_header = header.crypt_method;
1237     if (s->crypt_method_header) {
1238         if (bdrv_uses_whitelist() &&
1239             s->crypt_method_header == QCOW_CRYPT_AES) {
1240             error_setg(errp,
1241                        "Use of AES-CBC encrypted qcow2 images is no longer "
1242                        "supported in system emulators");
1243             error_append_hint(errp,
1244                               "You can use 'qemu-img convert' to convert your "
1245                               "image to an alternative supported format, such "
1246                               "as unencrypted qcow2, or raw with the LUKS "
1247                               "format instead.\n");
1248             ret = -ENOSYS;
1249             goto fail;
1250         }
1251 
1252         if (s->crypt_method_header == QCOW_CRYPT_AES) {
1253             s->crypt_physical_offset = false;
1254         } else {
1255             /* Assuming LUKS and any future crypt methods we
1256              * add will all use physical offsets, due to the
1257              * fact that the alternative is insecure...  */
1258             s->crypt_physical_offset = true;
1259         }
1260 
1261         bs->encrypted = true;
1262     }
1263 
1264     s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
1265     s->l2_size = 1 << s->l2_bits;
1266     /* 2^(s->refcount_order - 3) is the refcount width in bytes */
1267     s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3);
1268     s->refcount_block_size = 1 << s->refcount_block_bits;
1269     bs->total_sectors = header.size / 512;
1270     s->csize_shift = (62 - (s->cluster_bits - 8));
1271     s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
1272     s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
1273 
1274     s->refcount_table_offset = header.refcount_table_offset;
1275     s->refcount_table_size =
1276         header.refcount_table_clusters << (s->cluster_bits - 3);
1277 
1278     if (header.refcount_table_clusters > qcow2_max_refcount_clusters(s)) {
1279         error_setg(errp, "Reference count table too large");
1280         ret = -EINVAL;
1281         goto fail;
1282     }
1283 
1284     if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) {
1285         error_setg(errp, "Image does not contain a reference count table");
1286         ret = -EINVAL;
1287         goto fail;
1288     }
1289 
1290     ret = validate_table_offset(bs, s->refcount_table_offset,
1291                                 s->refcount_table_size, sizeof(uint64_t));
1292     if (ret < 0) {
1293         error_setg(errp, "Invalid reference count table offset");
1294         goto fail;
1295     }
1296 
1297     /* Snapshot table offset/length */
1298     if (header.nb_snapshots > QCOW_MAX_SNAPSHOTS) {
1299         error_setg(errp, "Too many snapshots");
1300         ret = -EINVAL;
1301         goto fail;
1302     }
1303 
1304     ret = validate_table_offset(bs, header.snapshots_offset,
1305                                 header.nb_snapshots,
1306                                 sizeof(QCowSnapshotHeader));
1307     if (ret < 0) {
1308         error_setg(errp, "Invalid snapshot table offset");
1309         goto fail;
1310     }
1311 
1312     /* read the level 1 table */
1313     if (header.l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) {
1314         error_setg(errp, "Active L1 table too large");
1315         ret = -EFBIG;
1316         goto fail;
1317     }
1318     s->l1_size = header.l1_size;
1319 
1320     l1_vm_state_index = size_to_l1(s, header.size);
1321     if (l1_vm_state_index > INT_MAX) {
1322         error_setg(errp, "Image is too big");
1323         ret = -EFBIG;
1324         goto fail;
1325     }
1326     s->l1_vm_state_index = l1_vm_state_index;
1327 
1328     /* the L1 table must contain at least enough entries to put
1329        header.size bytes */
1330     if (s->l1_size < s->l1_vm_state_index) {
1331         error_setg(errp, "L1 table is too small");
1332         ret = -EINVAL;
1333         goto fail;
1334     }
1335 
1336     ret = validate_table_offset(bs, header.l1_table_offset,
1337                                 header.l1_size, sizeof(uint64_t));
1338     if (ret < 0) {
1339         error_setg(errp, "Invalid L1 table offset");
1340         goto fail;
1341     }
1342     s->l1_table_offset = header.l1_table_offset;
1343 
1344 
1345     if (s->l1_size > 0) {
1346         s->l1_table = qemu_try_blockalign(bs->file->bs,
1347             align_offset(s->l1_size * sizeof(uint64_t), 512));
1348         if (s->l1_table == NULL) {
1349             error_setg(errp, "Could not allocate L1 table");
1350             ret = -ENOMEM;
1351             goto fail;
1352         }
1353         ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
1354                          s->l1_size * sizeof(uint64_t));
1355         if (ret < 0) {
1356             error_setg_errno(errp, -ret, "Could not read L1 table");
1357             goto fail;
1358         }
1359         for(i = 0;i < s->l1_size; i++) {
1360             be64_to_cpus(&s->l1_table[i]);
1361         }
1362     }
1363 
1364     /* Parse driver-specific options */
1365     ret = qcow2_update_options(bs, options, flags, errp);
1366     if (ret < 0) {
1367         goto fail;
1368     }
1369 
1370     s->cluster_cache_offset = -1;
1371     s->flags = flags;
1372 
1373     ret = qcow2_refcount_init(bs);
1374     if (ret != 0) {
1375         error_setg_errno(errp, -ret, "Could not initialize refcount handling");
1376         goto fail;
1377     }
1378 
1379     QLIST_INIT(&s->cluster_allocs);
1380     QTAILQ_INIT(&s->discards);
1381 
1382     /* read qcow2 extensions */
1383     if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL,
1384                               flags, &update_header, &local_err)) {
1385         error_propagate(errp, local_err);
1386         ret = -EINVAL;
1387         goto fail;
1388     }
1389 
1390     /* qcow2_read_extension may have set up the crypto context
1391      * if the crypt method needs a header region, some methods
1392      * don't need header extensions, so must check here
1393      */
1394     if (s->crypt_method_header && !s->crypto) {
1395         if (s->crypt_method_header == QCOW_CRYPT_AES) {
1396             unsigned int cflags = 0;
1397             if (flags & BDRV_O_NO_IO) {
1398                 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
1399             }
1400             s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
1401                                            NULL, NULL, cflags, errp);
1402             if (!s->crypto) {
1403                 ret = -EINVAL;
1404                 goto fail;
1405             }
1406         } else if (!(flags & BDRV_O_NO_IO)) {
1407             error_setg(errp, "Missing CRYPTO header for crypt method %d",
1408                        s->crypt_method_header);
1409             ret = -EINVAL;
1410             goto fail;
1411         }
1412     }
1413 
1414     /* read the backing file name */
1415     if (header.backing_file_offset != 0) {
1416         len = header.backing_file_size;
1417         if (len > MIN(1023, s->cluster_size - header.backing_file_offset) ||
1418             len >= sizeof(bs->backing_file)) {
1419             error_setg(errp, "Backing file name too long");
1420             ret = -EINVAL;
1421             goto fail;
1422         }
1423         ret = bdrv_pread(bs->file, header.backing_file_offset,
1424                          bs->backing_file, len);
1425         if (ret < 0) {
1426             error_setg_errno(errp, -ret, "Could not read backing file name");
1427             goto fail;
1428         }
1429         bs->backing_file[len] = '\0';
1430         s->image_backing_file = g_strdup(bs->backing_file);
1431     }
1432 
1433     /* Internal snapshots */
1434     s->snapshots_offset = header.snapshots_offset;
1435     s->nb_snapshots = header.nb_snapshots;
1436 
1437     ret = qcow2_read_snapshots(bs);
1438     if (ret < 0) {
1439         error_setg_errno(errp, -ret, "Could not read snapshots");
1440         goto fail;
1441     }
1442 
1443     /* Clear unknown autoclear feature bits */
1444     update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
1445     update_header =
1446         update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE);
1447     if (update_header) {
1448         s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
1449     }
1450 
1451     if (qcow2_load_autoloading_dirty_bitmaps(bs, &local_err)) {
1452         update_header = false;
1453     }
1454     if (local_err != NULL) {
1455         error_propagate(errp, local_err);
1456         ret = -EINVAL;
1457         goto fail;
1458     }
1459 
1460     if (update_header) {
1461         ret = qcow2_update_header(bs);
1462         if (ret < 0) {
1463             error_setg_errno(errp, -ret, "Could not update qcow2 header");
1464             goto fail;
1465         }
1466     }
1467 
1468     /* Initialise locks */
1469     qemu_co_mutex_init(&s->lock);
1470     bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
1471 
1472     /* Repair image if dirty */
1473     if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
1474         (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
1475         BdrvCheckResult result = {0};
1476 
1477         ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
1478         if (ret < 0) {
1479             error_setg_errno(errp, -ret, "Could not repair dirty image");
1480             goto fail;
1481         }
1482     }
1483 
1484 #ifdef DEBUG_ALLOC
1485     {
1486         BdrvCheckResult result = {0};
1487         qcow2_check_refcounts(bs, &result, 0);
1488     }
1489 #endif
1490     return ret;
1491 
1492  fail:
1493     g_free(s->unknown_header_fields);
1494     cleanup_unknown_header_ext(bs);
1495     qcow2_free_snapshots(bs);
1496     qcow2_refcount_close(bs);
1497     qemu_vfree(s->l1_table);
1498     /* else pre-write overlap checks in cache_destroy may crash */
1499     s->l1_table = NULL;
1500     cache_clean_timer_del(bs);
1501     if (s->l2_table_cache) {
1502         qcow2_cache_destroy(bs, s->l2_table_cache);
1503     }
1504     if (s->refcount_block_cache) {
1505         qcow2_cache_destroy(bs, s->refcount_block_cache);
1506     }
1507     qcrypto_block_free(s->crypto);
1508     qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1509     return ret;
1510 }
1511 
1512 static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
1513                       Error **errp)
1514 {
1515     bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
1516                                false, errp);
1517     if (!bs->file) {
1518         return -EINVAL;
1519     }
1520 
1521     return qcow2_do_open(bs, options, flags, errp);
1522 }
1523 
1524 static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
1525 {
1526     BDRVQcow2State *s = bs->opaque;
1527 
1528     if (bs->encrypted) {
1529         /* Encryption works on a sector granularity */
1530         bs->bl.request_alignment = BDRV_SECTOR_SIZE;
1531     }
1532     bs->bl.pwrite_zeroes_alignment = s->cluster_size;
1533     bs->bl.pdiscard_alignment = s->cluster_size;
1534 }
1535 
1536 static int qcow2_reopen_prepare(BDRVReopenState *state,
1537                                 BlockReopenQueue *queue, Error **errp)
1538 {
1539     Qcow2ReopenState *r;
1540     int ret;
1541 
1542     r = g_new0(Qcow2ReopenState, 1);
1543     state->opaque = r;
1544 
1545     ret = qcow2_update_options_prepare(state->bs, r, state->options,
1546                                        state->flags, errp);
1547     if (ret < 0) {
1548         goto fail;
1549     }
1550 
1551     /* We need to write out any unwritten data if we reopen read-only. */
1552     if ((state->flags & BDRV_O_RDWR) == 0) {
1553         ret = qcow2_reopen_bitmaps_ro(state->bs, errp);
1554         if (ret < 0) {
1555             goto fail;
1556         }
1557 
1558         ret = bdrv_flush(state->bs);
1559         if (ret < 0) {
1560             goto fail;
1561         }
1562 
1563         ret = qcow2_mark_clean(state->bs);
1564         if (ret < 0) {
1565             goto fail;
1566         }
1567     }
1568 
1569     return 0;
1570 
1571 fail:
1572     qcow2_update_options_abort(state->bs, r);
1573     g_free(r);
1574     return ret;
1575 }
1576 
1577 static void qcow2_reopen_commit(BDRVReopenState *state)
1578 {
1579     qcow2_update_options_commit(state->bs, state->opaque);
1580     g_free(state->opaque);
1581 }
1582 
1583 static void qcow2_reopen_abort(BDRVReopenState *state)
1584 {
1585     qcow2_update_options_abort(state->bs, state->opaque);
1586     g_free(state->opaque);
1587 }
1588 
1589 static void qcow2_join_options(QDict *options, QDict *old_options)
1590 {
1591     bool has_new_overlap_template =
1592         qdict_haskey(options, QCOW2_OPT_OVERLAP) ||
1593         qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE);
1594     bool has_new_total_cache_size =
1595         qdict_haskey(options, QCOW2_OPT_CACHE_SIZE);
1596     bool has_all_cache_options;
1597 
1598     /* New overlap template overrides all old overlap options */
1599     if (has_new_overlap_template) {
1600         qdict_del(old_options, QCOW2_OPT_OVERLAP);
1601         qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE);
1602         qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER);
1603         qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1);
1604         qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2);
1605         qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE);
1606         qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK);
1607         qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE);
1608         qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1);
1609         qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2);
1610     }
1611 
1612     /* New total cache size overrides all old options */
1613     if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) {
1614         qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE);
1615         qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
1616     }
1617 
1618     qdict_join(options, old_options, false);
1619 
1620     /*
1621      * If after merging all cache size options are set, an old total size is
1622      * overwritten. Do keep all options, however, if all three are new. The
1623      * resulting error message is what we want to happen.
1624      */
1625     has_all_cache_options =
1626         qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) ||
1627         qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) ||
1628         qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
1629 
1630     if (has_all_cache_options && !has_new_total_cache_size) {
1631         qdict_del(options, QCOW2_OPT_CACHE_SIZE);
1632     }
1633 }
1634 
1635 static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
1636         int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
1637 {
1638     BDRVQcow2State *s = bs->opaque;
1639     uint64_t cluster_offset;
1640     int index_in_cluster, ret;
1641     unsigned int bytes;
1642     int64_t status = 0;
1643 
1644     bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE);
1645     qemu_co_mutex_lock(&s->lock);
1646     ret = qcow2_get_cluster_offset(bs, sector_num << BDRV_SECTOR_BITS, &bytes,
1647                                    &cluster_offset);
1648     qemu_co_mutex_unlock(&s->lock);
1649     if (ret < 0) {
1650         return ret;
1651     }
1652 
1653     *pnum = bytes >> BDRV_SECTOR_BITS;
1654 
1655     if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
1656         !s->crypto) {
1657         index_in_cluster = sector_num & (s->cluster_sectors - 1);
1658         cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
1659         *file = bs->file->bs;
1660         status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset;
1661     }
1662     if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) {
1663         status |= BDRV_BLOCK_ZERO;
1664     } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
1665         status |= BDRV_BLOCK_DATA;
1666     }
1667     return status;
1668 }
1669 
1670 /* handle reading after the end of the backing file */
1671 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
1672                         int64_t offset, int bytes)
1673 {
1674     uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
1675     int n1;
1676 
1677     if ((offset + bytes) <= bs_size) {
1678         return bytes;
1679     }
1680 
1681     if (offset >= bs_size) {
1682         n1 = 0;
1683     } else {
1684         n1 = bs_size - offset;
1685     }
1686 
1687     qemu_iovec_memset(qiov, n1, 0, bytes - n1);
1688 
1689     return n1;
1690 }
1691 
1692 static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
1693                                         uint64_t bytes, QEMUIOVector *qiov,
1694                                         int flags)
1695 {
1696     BDRVQcow2State *s = bs->opaque;
1697     int offset_in_cluster, n1;
1698     int ret;
1699     unsigned int cur_bytes; /* number of bytes in current iteration */
1700     uint64_t cluster_offset = 0;
1701     uint64_t bytes_done = 0;
1702     QEMUIOVector hd_qiov;
1703     uint8_t *cluster_data = NULL;
1704 
1705     qemu_iovec_init(&hd_qiov, qiov->niov);
1706 
1707     qemu_co_mutex_lock(&s->lock);
1708 
1709     while (bytes != 0) {
1710 
1711         /* prepare next request */
1712         cur_bytes = MIN(bytes, INT_MAX);
1713         if (s->crypto) {
1714             cur_bytes = MIN(cur_bytes,
1715                             QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
1716         }
1717 
1718         ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
1719         if (ret < 0) {
1720             goto fail;
1721         }
1722 
1723         offset_in_cluster = offset_into_cluster(s, offset);
1724 
1725         qemu_iovec_reset(&hd_qiov);
1726         qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
1727 
1728         switch (ret) {
1729         case QCOW2_CLUSTER_UNALLOCATED:
1730 
1731             if (bs->backing) {
1732                 /* read from the base image */
1733                 n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
1734                                          offset, cur_bytes);
1735                 if (n1 > 0) {
1736                     QEMUIOVector local_qiov;
1737 
1738                     qemu_iovec_init(&local_qiov, hd_qiov.niov);
1739                     qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
1740 
1741                     BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
1742                     qemu_co_mutex_unlock(&s->lock);
1743                     ret = bdrv_co_preadv(bs->backing, offset, n1,
1744                                          &local_qiov, 0);
1745                     qemu_co_mutex_lock(&s->lock);
1746 
1747                     qemu_iovec_destroy(&local_qiov);
1748 
1749                     if (ret < 0) {
1750                         goto fail;
1751                     }
1752                 }
1753             } else {
1754                 /* Note: in this case, no need to wait */
1755                 qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
1756             }
1757             break;
1758 
1759         case QCOW2_CLUSTER_ZERO_PLAIN:
1760         case QCOW2_CLUSTER_ZERO_ALLOC:
1761             qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
1762             break;
1763 
1764         case QCOW2_CLUSTER_COMPRESSED:
1765             /* add AIO support for compressed blocks ? */
1766             ret = qcow2_decompress_cluster(bs, cluster_offset);
1767             if (ret < 0) {
1768                 goto fail;
1769             }
1770 
1771             qemu_iovec_from_buf(&hd_qiov, 0,
1772                                 s->cluster_cache + offset_in_cluster,
1773                                 cur_bytes);
1774             break;
1775 
1776         case QCOW2_CLUSTER_NORMAL:
1777             if ((cluster_offset & 511) != 0) {
1778                 ret = -EIO;
1779                 goto fail;
1780             }
1781 
1782             if (bs->encrypted) {
1783                 assert(s->crypto);
1784 
1785                 /*
1786                  * For encrypted images, read everything into a temporary
1787                  * contiguous buffer on which the AES functions can work.
1788                  */
1789                 if (!cluster_data) {
1790                     cluster_data =
1791                         qemu_try_blockalign(bs->file->bs,
1792                                             QCOW_MAX_CRYPT_CLUSTERS
1793                                             * s->cluster_size);
1794                     if (cluster_data == NULL) {
1795                         ret = -ENOMEM;
1796                         goto fail;
1797                     }
1798                 }
1799 
1800                 assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
1801                 qemu_iovec_reset(&hd_qiov);
1802                 qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
1803             }
1804 
1805             BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
1806             qemu_co_mutex_unlock(&s->lock);
1807             ret = bdrv_co_preadv(bs->file,
1808                                  cluster_offset + offset_in_cluster,
1809                                  cur_bytes, &hd_qiov, 0);
1810             qemu_co_mutex_lock(&s->lock);
1811             if (ret < 0) {
1812                 goto fail;
1813             }
1814             if (bs->encrypted) {
1815                 assert(s->crypto);
1816                 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
1817                 assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
1818                 if (qcrypto_block_decrypt(s->crypto,
1819                                           (s->crypt_physical_offset ?
1820                                            cluster_offset + offset_in_cluster :
1821                                            offset),
1822                                           cluster_data,
1823                                           cur_bytes,
1824                                           NULL) < 0) {
1825                     ret = -EIO;
1826                     goto fail;
1827                 }
1828                 qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
1829             }
1830             break;
1831 
1832         default:
1833             g_assert_not_reached();
1834             ret = -EIO;
1835             goto fail;
1836         }
1837 
1838         bytes -= cur_bytes;
1839         offset += cur_bytes;
1840         bytes_done += cur_bytes;
1841     }
1842     ret = 0;
1843 
1844 fail:
1845     qemu_co_mutex_unlock(&s->lock);
1846 
1847     qemu_iovec_destroy(&hd_qiov);
1848     qemu_vfree(cluster_data);
1849 
1850     return ret;
1851 }
1852 
1853 /* Check if it's possible to merge a write request with the writing of
1854  * the data from the COW regions */
1855 static bool merge_cow(uint64_t offset, unsigned bytes,
1856                       QEMUIOVector *hd_qiov, QCowL2Meta *l2meta)
1857 {
1858     QCowL2Meta *m;
1859 
1860     for (m = l2meta; m != NULL; m = m->next) {
1861         /* If both COW regions are empty then there's nothing to merge */
1862         if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) {
1863             continue;
1864         }
1865 
1866         /* The data (middle) region must be immediately after the
1867          * start region */
1868         if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
1869             continue;
1870         }
1871 
1872         /* The end region must be immediately after the data (middle)
1873          * region */
1874         if (m->offset + m->cow_end.offset != offset + bytes) {
1875             continue;
1876         }
1877 
1878         /* Make sure that adding both COW regions to the QEMUIOVector
1879          * does not exceed IOV_MAX */
1880         if (hd_qiov->niov > IOV_MAX - 2) {
1881             continue;
1882         }
1883 
1884         m->data_qiov = hd_qiov;
1885         return true;
1886     }
1887 
1888     return false;
1889 }
1890 
1891 static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
1892                                          uint64_t bytes, QEMUIOVector *qiov,
1893                                          int flags)
1894 {
1895     BDRVQcow2State *s = bs->opaque;
1896     int offset_in_cluster;
1897     int ret;
1898     unsigned int cur_bytes; /* number of sectors in current iteration */
1899     uint64_t cluster_offset;
1900     QEMUIOVector hd_qiov;
1901     uint64_t bytes_done = 0;
1902     uint8_t *cluster_data = NULL;
1903     QCowL2Meta *l2meta = NULL;
1904 
1905     trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
1906 
1907     qemu_iovec_init(&hd_qiov, qiov->niov);
1908 
1909     s->cluster_cache_offset = -1; /* disable compressed cache */
1910 
1911     qemu_co_mutex_lock(&s->lock);
1912 
1913     while (bytes != 0) {
1914 
1915         l2meta = NULL;
1916 
1917         trace_qcow2_writev_start_part(qemu_coroutine_self());
1918         offset_in_cluster = offset_into_cluster(s, offset);
1919         cur_bytes = MIN(bytes, INT_MAX);
1920         if (bs->encrypted) {
1921             cur_bytes = MIN(cur_bytes,
1922                             QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
1923                             - offset_in_cluster);
1924         }
1925 
1926         ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
1927                                          &cluster_offset, &l2meta);
1928         if (ret < 0) {
1929             goto fail;
1930         }
1931 
1932         assert((cluster_offset & 511) == 0);
1933 
1934         qemu_iovec_reset(&hd_qiov);
1935         qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
1936 
1937         if (bs->encrypted) {
1938             assert(s->crypto);
1939             if (!cluster_data) {
1940                 cluster_data = qemu_try_blockalign(bs->file->bs,
1941                                                    QCOW_MAX_CRYPT_CLUSTERS
1942                                                    * s->cluster_size);
1943                 if (cluster_data == NULL) {
1944                     ret = -ENOMEM;
1945                     goto fail;
1946                 }
1947             }
1948 
1949             assert(hd_qiov.size <=
1950                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
1951             qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
1952 
1953             if (qcrypto_block_encrypt(s->crypto,
1954                                       (s->crypt_physical_offset ?
1955                                        cluster_offset + offset_in_cluster :
1956                                        offset),
1957                                       cluster_data,
1958                                       cur_bytes, NULL) < 0) {
1959                 ret = -EIO;
1960                 goto fail;
1961             }
1962 
1963             qemu_iovec_reset(&hd_qiov);
1964             qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
1965         }
1966 
1967         ret = qcow2_pre_write_overlap_check(bs, 0,
1968                 cluster_offset + offset_in_cluster, cur_bytes);
1969         if (ret < 0) {
1970             goto fail;
1971         }
1972 
1973         /* If we need to do COW, check if it's possible to merge the
1974          * writing of the guest data together with that of the COW regions.
1975          * If it's not possible (or not necessary) then write the
1976          * guest data now. */
1977         if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) {
1978             qemu_co_mutex_unlock(&s->lock);
1979             BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
1980             trace_qcow2_writev_data(qemu_coroutine_self(),
1981                                     cluster_offset + offset_in_cluster);
1982             ret = bdrv_co_pwritev(bs->file,
1983                                   cluster_offset + offset_in_cluster,
1984                                   cur_bytes, &hd_qiov, 0);
1985             qemu_co_mutex_lock(&s->lock);
1986             if (ret < 0) {
1987                 goto fail;
1988             }
1989         }
1990 
1991         while (l2meta != NULL) {
1992             QCowL2Meta *next;
1993 
1994             ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
1995             if (ret < 0) {
1996                 goto fail;
1997             }
1998 
1999             /* Take the request off the list of running requests */
2000             if (l2meta->nb_clusters != 0) {
2001                 QLIST_REMOVE(l2meta, next_in_flight);
2002             }
2003 
2004             qemu_co_queue_restart_all(&l2meta->dependent_requests);
2005 
2006             next = l2meta->next;
2007             g_free(l2meta);
2008             l2meta = next;
2009         }
2010 
2011         bytes -= cur_bytes;
2012         offset += cur_bytes;
2013         bytes_done += cur_bytes;
2014         trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
2015     }
2016     ret = 0;
2017 
2018 fail:
2019     while (l2meta != NULL) {
2020         QCowL2Meta *next;
2021 
2022         if (l2meta->nb_clusters != 0) {
2023             QLIST_REMOVE(l2meta, next_in_flight);
2024         }
2025         qemu_co_queue_restart_all(&l2meta->dependent_requests);
2026 
2027         next = l2meta->next;
2028         g_free(l2meta);
2029         l2meta = next;
2030     }
2031 
2032     qemu_co_mutex_unlock(&s->lock);
2033 
2034     qemu_iovec_destroy(&hd_qiov);
2035     qemu_vfree(cluster_data);
2036     trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
2037 
2038     return ret;
2039 }
2040 
2041 static int qcow2_inactivate(BlockDriverState *bs)
2042 {
2043     BDRVQcow2State *s = bs->opaque;
2044     int ret, result = 0;
2045     Error *local_err = NULL;
2046 
2047     qcow2_store_persistent_dirty_bitmaps(bs, &local_err);
2048     if (local_err != NULL) {
2049         result = -EINVAL;
2050         error_report_err(local_err);
2051         error_report("Persistent bitmaps are lost for node '%s'",
2052                      bdrv_get_device_or_node_name(bs));
2053     }
2054 
2055     ret = qcow2_cache_flush(bs, s->l2_table_cache);
2056     if (ret) {
2057         result = ret;
2058         error_report("Failed to flush the L2 table cache: %s",
2059                      strerror(-ret));
2060     }
2061 
2062     ret = qcow2_cache_flush(bs, s->refcount_block_cache);
2063     if (ret) {
2064         result = ret;
2065         error_report("Failed to flush the refcount block cache: %s",
2066                      strerror(-ret));
2067     }
2068 
2069     if (result == 0) {
2070         qcow2_mark_clean(bs);
2071     }
2072 
2073     return result;
2074 }
2075 
2076 static void qcow2_close(BlockDriverState *bs)
2077 {
2078     BDRVQcow2State *s = bs->opaque;
2079     qemu_vfree(s->l1_table);
2080     /* else pre-write overlap checks in cache_destroy may crash */
2081     s->l1_table = NULL;
2082 
2083     if (!(s->flags & BDRV_O_INACTIVE)) {
2084         qcow2_inactivate(bs);
2085     }
2086 
2087     cache_clean_timer_del(bs);
2088     qcow2_cache_destroy(bs, s->l2_table_cache);
2089     qcow2_cache_destroy(bs, s->refcount_block_cache);
2090 
2091     qcrypto_block_free(s->crypto);
2092     s->crypto = NULL;
2093 
2094     g_free(s->unknown_header_fields);
2095     cleanup_unknown_header_ext(bs);
2096 
2097     g_free(s->image_backing_file);
2098     g_free(s->image_backing_format);
2099 
2100     g_free(s->cluster_cache);
2101     qemu_vfree(s->cluster_data);
2102     qcow2_refcount_close(bs);
2103     qcow2_free_snapshots(bs);
2104 }
2105 
2106 static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
2107 {
2108     BDRVQcow2State *s = bs->opaque;
2109     int flags = s->flags;
2110     QCryptoBlock *crypto = NULL;
2111     QDict *options;
2112     Error *local_err = NULL;
2113     int ret;
2114 
2115     /*
2116      * Backing files are read-only which makes all of their metadata immutable,
2117      * that means we don't have to worry about reopening them here.
2118      */
2119 
2120     crypto = s->crypto;
2121     s->crypto = NULL;
2122 
2123     qcow2_close(bs);
2124 
2125     memset(s, 0, sizeof(BDRVQcow2State));
2126     options = qdict_clone_shallow(bs->options);
2127 
2128     flags &= ~BDRV_O_INACTIVE;
2129     ret = qcow2_do_open(bs, options, flags, &local_err);
2130     QDECREF(options);
2131     if (local_err) {
2132         error_propagate(errp, local_err);
2133         error_prepend(errp, "Could not reopen qcow2 layer: ");
2134         bs->drv = NULL;
2135         return;
2136     } else if (ret < 0) {
2137         error_setg_errno(errp, -ret, "Could not reopen qcow2 layer");
2138         bs->drv = NULL;
2139         return;
2140     }
2141 
2142     s->crypto = crypto;
2143 }
2144 
2145 static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
2146     size_t len, size_t buflen)
2147 {
2148     QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
2149     size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
2150 
2151     if (buflen < ext_len) {
2152         return -ENOSPC;
2153     }
2154 
2155     *ext_backing_fmt = (QCowExtension) {
2156         .magic  = cpu_to_be32(magic),
2157         .len    = cpu_to_be32(len),
2158     };
2159 
2160     if (len) {
2161         memcpy(buf + sizeof(QCowExtension), s, len);
2162     }
2163 
2164     return ext_len;
2165 }
2166 
2167 /*
2168  * Updates the qcow2 header, including the variable length parts of it, i.e.
2169  * the backing file name and all extensions. qcow2 was not designed to allow
2170  * such changes, so if we run out of space (we can only use the first cluster)
2171  * this function may fail.
2172  *
2173  * Returns 0 on success, -errno in error cases.
2174  */
2175 int qcow2_update_header(BlockDriverState *bs)
2176 {
2177     BDRVQcow2State *s = bs->opaque;
2178     QCowHeader *header;
2179     char *buf;
2180     size_t buflen = s->cluster_size;
2181     int ret;
2182     uint64_t total_size;
2183     uint32_t refcount_table_clusters;
2184     size_t header_length;
2185     Qcow2UnknownHeaderExtension *uext;
2186 
2187     buf = qemu_blockalign(bs, buflen);
2188 
2189     /* Header structure */
2190     header = (QCowHeader*) buf;
2191 
2192     if (buflen < sizeof(*header)) {
2193         ret = -ENOSPC;
2194         goto fail;
2195     }
2196 
2197     header_length = sizeof(*header) + s->unknown_header_fields_size;
2198     total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
2199     refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
2200 
2201     *header = (QCowHeader) {
2202         /* Version 2 fields */
2203         .magic                  = cpu_to_be32(QCOW_MAGIC),
2204         .version                = cpu_to_be32(s->qcow_version),
2205         .backing_file_offset    = 0,
2206         .backing_file_size      = 0,
2207         .cluster_bits           = cpu_to_be32(s->cluster_bits),
2208         .size                   = cpu_to_be64(total_size),
2209         .crypt_method           = cpu_to_be32(s->crypt_method_header),
2210         .l1_size                = cpu_to_be32(s->l1_size),
2211         .l1_table_offset        = cpu_to_be64(s->l1_table_offset),
2212         .refcount_table_offset  = cpu_to_be64(s->refcount_table_offset),
2213         .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
2214         .nb_snapshots           = cpu_to_be32(s->nb_snapshots),
2215         .snapshots_offset       = cpu_to_be64(s->snapshots_offset),
2216 
2217         /* Version 3 fields */
2218         .incompatible_features  = cpu_to_be64(s->incompatible_features),
2219         .compatible_features    = cpu_to_be64(s->compatible_features),
2220         .autoclear_features     = cpu_to_be64(s->autoclear_features),
2221         .refcount_order         = cpu_to_be32(s->refcount_order),
2222         .header_length          = cpu_to_be32(header_length),
2223     };
2224 
2225     /* For older versions, write a shorter header */
2226     switch (s->qcow_version) {
2227     case 2:
2228         ret = offsetof(QCowHeader, incompatible_features);
2229         break;
2230     case 3:
2231         ret = sizeof(*header);
2232         break;
2233     default:
2234         ret = -EINVAL;
2235         goto fail;
2236     }
2237 
2238     buf += ret;
2239     buflen -= ret;
2240     memset(buf, 0, buflen);
2241 
2242     /* Preserve any unknown field in the header */
2243     if (s->unknown_header_fields_size) {
2244         if (buflen < s->unknown_header_fields_size) {
2245             ret = -ENOSPC;
2246             goto fail;
2247         }
2248 
2249         memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
2250         buf += s->unknown_header_fields_size;
2251         buflen -= s->unknown_header_fields_size;
2252     }
2253 
2254     /* Backing file format header extension */
2255     if (s->image_backing_format) {
2256         ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
2257                              s->image_backing_format,
2258                              strlen(s->image_backing_format),
2259                              buflen);
2260         if (ret < 0) {
2261             goto fail;
2262         }
2263 
2264         buf += ret;
2265         buflen -= ret;
2266     }
2267 
2268     /* Full disk encryption header pointer extension */
2269     if (s->crypto_header.offset != 0) {
2270         cpu_to_be64s(&s->crypto_header.offset);
2271         cpu_to_be64s(&s->crypto_header.length);
2272         ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER,
2273                              &s->crypto_header, sizeof(s->crypto_header),
2274                              buflen);
2275         be64_to_cpus(&s->crypto_header.offset);
2276         be64_to_cpus(&s->crypto_header.length);
2277         if (ret < 0) {
2278             goto fail;
2279         }
2280         buf += ret;
2281         buflen -= ret;
2282     }
2283 
2284     /* Feature table */
2285     if (s->qcow_version >= 3) {
2286         Qcow2Feature features[] = {
2287             {
2288                 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2289                 .bit  = QCOW2_INCOMPAT_DIRTY_BITNR,
2290                 .name = "dirty bit",
2291             },
2292             {
2293                 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2294                 .bit  = QCOW2_INCOMPAT_CORRUPT_BITNR,
2295                 .name = "corrupt bit",
2296             },
2297             {
2298                 .type = QCOW2_FEAT_TYPE_COMPATIBLE,
2299                 .bit  = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
2300                 .name = "lazy refcounts",
2301             },
2302         };
2303 
2304         ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
2305                              features, sizeof(features), buflen);
2306         if (ret < 0) {
2307             goto fail;
2308         }
2309         buf += ret;
2310         buflen -= ret;
2311     }
2312 
2313     /* Bitmap extension */
2314     if (s->nb_bitmaps > 0) {
2315         Qcow2BitmapHeaderExt bitmaps_header = {
2316             .nb_bitmaps = cpu_to_be32(s->nb_bitmaps),
2317             .bitmap_directory_size =
2318                     cpu_to_be64(s->bitmap_directory_size),
2319             .bitmap_directory_offset =
2320                     cpu_to_be64(s->bitmap_directory_offset)
2321         };
2322         ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS,
2323                              &bitmaps_header, sizeof(bitmaps_header),
2324                              buflen);
2325         if (ret < 0) {
2326             goto fail;
2327         }
2328         buf += ret;
2329         buflen -= ret;
2330     }
2331 
2332     /* Keep unknown header extensions */
2333     QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
2334         ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
2335         if (ret < 0) {
2336             goto fail;
2337         }
2338 
2339         buf += ret;
2340         buflen -= ret;
2341     }
2342 
2343     /* End of header extensions */
2344     ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
2345     if (ret < 0) {
2346         goto fail;
2347     }
2348 
2349     buf += ret;
2350     buflen -= ret;
2351 
2352     /* Backing file name */
2353     if (s->image_backing_file) {
2354         size_t backing_file_len = strlen(s->image_backing_file);
2355 
2356         if (buflen < backing_file_len) {
2357             ret = -ENOSPC;
2358             goto fail;
2359         }
2360 
2361         /* Using strncpy is ok here, since buf is not NUL-terminated. */
2362         strncpy(buf, s->image_backing_file, buflen);
2363 
2364         header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
2365         header->backing_file_size   = cpu_to_be32(backing_file_len);
2366     }
2367 
2368     /* Write the new header */
2369     ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
2370     if (ret < 0) {
2371         goto fail;
2372     }
2373 
2374     ret = 0;
2375 fail:
2376     qemu_vfree(header);
2377     return ret;
2378 }
2379 
2380 static int qcow2_change_backing_file(BlockDriverState *bs,
2381     const char *backing_file, const char *backing_fmt)
2382 {
2383     BDRVQcow2State *s = bs->opaque;
2384 
2385     if (backing_file && strlen(backing_file) > 1023) {
2386         return -EINVAL;
2387     }
2388 
2389     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2390     pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2391 
2392     g_free(s->image_backing_file);
2393     g_free(s->image_backing_format);
2394 
2395     s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
2396     s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
2397 
2398     return qcow2_update_header(bs);
2399 }
2400 
2401 static int qcow2_crypt_method_from_format(const char *encryptfmt)
2402 {
2403     if (g_str_equal(encryptfmt, "luks")) {
2404         return QCOW_CRYPT_LUKS;
2405     } else if (g_str_equal(encryptfmt, "aes")) {
2406         return QCOW_CRYPT_AES;
2407     } else {
2408         return -EINVAL;
2409     }
2410 }
2411 
2412 static int qcow2_set_up_encryption(BlockDriverState *bs, const char *encryptfmt,
2413                                    QemuOpts *opts, Error **errp)
2414 {
2415     BDRVQcow2State *s = bs->opaque;
2416     QCryptoBlockCreateOptions *cryptoopts = NULL;
2417     QCryptoBlock *crypto = NULL;
2418     int ret = -EINVAL;
2419     QDict *options, *encryptopts;
2420     int fmt;
2421 
2422     options = qemu_opts_to_qdict(opts, NULL);
2423     qdict_extract_subqdict(options, &encryptopts, "encrypt.");
2424     QDECREF(options);
2425 
2426     fmt = qcow2_crypt_method_from_format(encryptfmt);
2427 
2428     switch (fmt) {
2429     case QCOW_CRYPT_LUKS:
2430         cryptoopts = block_crypto_create_opts_init(
2431             Q_CRYPTO_BLOCK_FORMAT_LUKS, encryptopts, errp);
2432         break;
2433     case QCOW_CRYPT_AES:
2434         cryptoopts = block_crypto_create_opts_init(
2435             Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp);
2436         break;
2437     default:
2438         error_setg(errp, "Unknown encryption format '%s'", encryptfmt);
2439         break;
2440     }
2441     if (!cryptoopts) {
2442         ret = -EINVAL;
2443         goto out;
2444     }
2445     s->crypt_method_header = fmt;
2446 
2447     crypto = qcrypto_block_create(cryptoopts, "encrypt.",
2448                                   qcow2_crypto_hdr_init_func,
2449                                   qcow2_crypto_hdr_write_func,
2450                                   bs, errp);
2451     if (!crypto) {
2452         ret = -EINVAL;
2453         goto out;
2454     }
2455 
2456     ret = qcow2_update_header(bs);
2457     if (ret < 0) {
2458         error_setg_errno(errp, -ret, "Could not write encryption header");
2459         goto out;
2460     }
2461 
2462  out:
2463     QDECREF(encryptopts);
2464     qcrypto_block_free(crypto);
2465     qapi_free_QCryptoBlockCreateOptions(cryptoopts);
2466     return ret;
2467 }
2468 
2469 
2470 typedef struct PreallocCo {
2471     BlockDriverState *bs;
2472     uint64_t offset;
2473     uint64_t new_length;
2474 
2475     int ret;
2476 } PreallocCo;
2477 
2478 /**
2479  * Preallocates metadata structures for data clusters between @offset (in the
2480  * guest disk) and @new_length (which is thus generally the new guest disk
2481  * size).
2482  *
2483  * Returns: 0 on success, -errno on failure.
2484  */
2485 static void coroutine_fn preallocate_co(void *opaque)
2486 {
2487     PreallocCo *params = opaque;
2488     BlockDriverState *bs = params->bs;
2489     uint64_t offset = params->offset;
2490     uint64_t new_length = params->new_length;
2491     BDRVQcow2State *s = bs->opaque;
2492     uint64_t bytes;
2493     uint64_t host_offset = 0;
2494     unsigned int cur_bytes;
2495     int ret;
2496     QCowL2Meta *meta;
2497 
2498     qemu_co_mutex_lock(&s->lock);
2499 
2500     assert(offset <= new_length);
2501     bytes = new_length - offset;
2502 
2503     while (bytes) {
2504         cur_bytes = MIN(bytes, INT_MAX);
2505         ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
2506                                          &host_offset, &meta);
2507         if (ret < 0) {
2508             goto done;
2509         }
2510 
2511         while (meta) {
2512             QCowL2Meta *next = meta->next;
2513 
2514             ret = qcow2_alloc_cluster_link_l2(bs, meta);
2515             if (ret < 0) {
2516                 qcow2_free_any_clusters(bs, meta->alloc_offset,
2517                                         meta->nb_clusters, QCOW2_DISCARD_NEVER);
2518                 goto done;
2519             }
2520 
2521             /* There are no dependent requests, but we need to remove our
2522              * request from the list of in-flight requests */
2523             QLIST_REMOVE(meta, next_in_flight);
2524 
2525             g_free(meta);
2526             meta = next;
2527         }
2528 
2529         /* TODO Preallocate data if requested */
2530 
2531         bytes -= cur_bytes;
2532         offset += cur_bytes;
2533     }
2534 
2535     /*
2536      * It is expected that the image file is large enough to actually contain
2537      * all of the allocated clusters (otherwise we get failing reads after
2538      * EOF). Extend the image to the last allocated sector.
2539      */
2540     if (host_offset != 0) {
2541         uint8_t data = 0;
2542         ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1,
2543                           &data, 1);
2544         if (ret < 0) {
2545             goto done;
2546         }
2547     }
2548 
2549     ret = 0;
2550 
2551 done:
2552     qemu_co_mutex_unlock(&s->lock);
2553     params->ret = ret;
2554 }
2555 
2556 static int preallocate(BlockDriverState *bs,
2557                        uint64_t offset, uint64_t new_length)
2558 {
2559     PreallocCo params = {
2560         .bs         = bs,
2561         .offset     = offset,
2562         .new_length = new_length,
2563         .ret        = -EINPROGRESS,
2564     };
2565 
2566     if (qemu_in_coroutine()) {
2567         preallocate_co(&params);
2568     } else {
2569         Coroutine *co = qemu_coroutine_create(preallocate_co, &params);
2570         bdrv_coroutine_enter(bs, co);
2571         BDRV_POLL_WHILE(bs, params.ret == -EINPROGRESS);
2572     }
2573     return params.ret;
2574 }
2575 
2576 /* qcow2_refcount_metadata_size:
2577  * @clusters: number of clusters to refcount (including data and L1/L2 tables)
2578  * @cluster_size: size of a cluster, in bytes
2579  * @refcount_order: refcount bits power-of-2 exponent
2580  * @generous_increase: allow for the refcount table to be 1.5x as large as it
2581  *                     needs to be
2582  *
2583  * Returns: Number of bytes required for refcount blocks and table metadata.
2584  */
2585 int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
2586                                      int refcount_order, bool generous_increase,
2587                                      uint64_t *refblock_count)
2588 {
2589     /*
2590      * Every host cluster is reference-counted, including metadata (even
2591      * refcount metadata is recursively included).
2592      *
2593      * An accurate formula for the size of refcount metadata size is difficult
2594      * to derive.  An easier method of calculation is finding the fixed point
2595      * where no further refcount blocks or table clusters are required to
2596      * reference count every cluster.
2597      */
2598     int64_t blocks_per_table_cluster = cluster_size / sizeof(uint64_t);
2599     int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order);
2600     int64_t table = 0;  /* number of refcount table clusters */
2601     int64_t blocks = 0; /* number of refcount block clusters */
2602     int64_t last;
2603     int64_t n = 0;
2604 
2605     do {
2606         last = n;
2607         blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block);
2608         table = DIV_ROUND_UP(blocks, blocks_per_table_cluster);
2609         n = clusters + blocks + table;
2610 
2611         if (n == last && generous_increase) {
2612             clusters += DIV_ROUND_UP(table, 2);
2613             n = 0; /* force another loop */
2614             generous_increase = false;
2615         }
2616     } while (n != last);
2617 
2618     if (refblock_count) {
2619         *refblock_count = blocks;
2620     }
2621 
2622     return (blocks + table) * cluster_size;
2623 }
2624 
2625 /**
2626  * qcow2_calc_prealloc_size:
2627  * @total_size: virtual disk size in bytes
2628  * @cluster_size: cluster size in bytes
2629  * @refcount_order: refcount bits power-of-2 exponent
2630  *
2631  * Returns: Total number of bytes required for the fully allocated image
2632  * (including metadata).
2633  */
2634 static int64_t qcow2_calc_prealloc_size(int64_t total_size,
2635                                         size_t cluster_size,
2636                                         int refcount_order)
2637 {
2638     int64_t meta_size = 0;
2639     uint64_t nl1e, nl2e;
2640     int64_t aligned_total_size = align_offset(total_size, cluster_size);
2641 
2642     /* header: 1 cluster */
2643     meta_size += cluster_size;
2644 
2645     /* total size of L2 tables */
2646     nl2e = aligned_total_size / cluster_size;
2647     nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t));
2648     meta_size += nl2e * sizeof(uint64_t);
2649 
2650     /* total size of L1 tables */
2651     nl1e = nl2e * sizeof(uint64_t) / cluster_size;
2652     nl1e = align_offset(nl1e, cluster_size / sizeof(uint64_t));
2653     meta_size += nl1e * sizeof(uint64_t);
2654 
2655     /* total size of refcount table and blocks */
2656     meta_size += qcow2_refcount_metadata_size(
2657             (meta_size + aligned_total_size) / cluster_size,
2658             cluster_size, refcount_order, false, NULL);
2659 
2660     return meta_size + aligned_total_size;
2661 }
2662 
2663 static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, Error **errp)
2664 {
2665     size_t cluster_size;
2666     int cluster_bits;
2667 
2668     cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
2669                                          DEFAULT_CLUSTER_SIZE);
2670     cluster_bits = ctz32(cluster_size);
2671     if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
2672         (1 << cluster_bits) != cluster_size)
2673     {
2674         error_setg(errp, "Cluster size must be a power of two between %d and "
2675                    "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
2676         return 0;
2677     }
2678     return cluster_size;
2679 }
2680 
2681 static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp)
2682 {
2683     char *buf;
2684     int ret;
2685 
2686     buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL);
2687     if (!buf) {
2688         ret = 3; /* default */
2689     } else if (!strcmp(buf, "0.10")) {
2690         ret = 2;
2691     } else if (!strcmp(buf, "1.1")) {
2692         ret = 3;
2693     } else {
2694         error_setg(errp, "Invalid compatibility level: '%s'", buf);
2695         ret = -EINVAL;
2696     }
2697     g_free(buf);
2698     return ret;
2699 }
2700 
2701 static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
2702                                                 Error **errp)
2703 {
2704     uint64_t refcount_bits;
2705 
2706     refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16);
2707     if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) {
2708         error_setg(errp, "Refcount width must be a power of two and may not "
2709                    "exceed 64 bits");
2710         return 0;
2711     }
2712 
2713     if (version < 3 && refcount_bits != 16) {
2714         error_setg(errp, "Different refcount widths than 16 bits require "
2715                    "compatibility level 1.1 or above (use compat=1.1 or "
2716                    "greater)");
2717         return 0;
2718     }
2719 
2720     return refcount_bits;
2721 }
2722 
2723 static int qcow2_create2(const char *filename, int64_t total_size,
2724                          const char *backing_file, const char *backing_format,
2725                          int flags, size_t cluster_size, PreallocMode prealloc,
2726                          QemuOpts *opts, int version, int refcount_order,
2727                          const char *encryptfmt, Error **errp)
2728 {
2729     QDict *options;
2730 
2731     /*
2732      * Open the image file and write a minimal qcow2 header.
2733      *
2734      * We keep things simple and start with a zero-sized image. We also
2735      * do without refcount blocks or a L1 table for now. We'll fix the
2736      * inconsistency later.
2737      *
2738      * We do need a refcount table because growing the refcount table means
2739      * allocating two new refcount blocks - the seconds of which would be at
2740      * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
2741      * size for any qcow2 image.
2742      */
2743     BlockBackend *blk;
2744     QCowHeader *header;
2745     uint64_t* refcount_table;
2746     Error *local_err = NULL;
2747     int ret;
2748 
2749     if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
2750         int64_t prealloc_size =
2751             qcow2_calc_prealloc_size(total_size, cluster_size, refcount_order);
2752         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, prealloc_size, &error_abort);
2753         qemu_opt_set(opts, BLOCK_OPT_PREALLOC, PreallocMode_str(prealloc),
2754                      &error_abort);
2755     }
2756 
2757     ret = bdrv_create_file(filename, opts, &local_err);
2758     if (ret < 0) {
2759         error_propagate(errp, local_err);
2760         return ret;
2761     }
2762 
2763     blk = blk_new_open(filename, NULL, NULL,
2764                        BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
2765                        &local_err);
2766     if (blk == NULL) {
2767         error_propagate(errp, local_err);
2768         return -EIO;
2769     }
2770 
2771     blk_set_allow_write_beyond_eof(blk, true);
2772 
2773     /* Write the header */
2774     QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
2775     header = g_malloc0(cluster_size);
2776     *header = (QCowHeader) {
2777         .magic                      = cpu_to_be32(QCOW_MAGIC),
2778         .version                    = cpu_to_be32(version),
2779         .cluster_bits               = cpu_to_be32(ctz32(cluster_size)),
2780         .size                       = cpu_to_be64(0),
2781         .l1_table_offset            = cpu_to_be64(0),
2782         .l1_size                    = cpu_to_be32(0),
2783         .refcount_table_offset      = cpu_to_be64(cluster_size),
2784         .refcount_table_clusters    = cpu_to_be32(1),
2785         .refcount_order             = cpu_to_be32(refcount_order),
2786         .header_length              = cpu_to_be32(sizeof(*header)),
2787     };
2788 
2789     /* We'll update this to correct value later */
2790     header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
2791 
2792     if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) {
2793         header->compatible_features |=
2794             cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
2795     }
2796 
2797     ret = blk_pwrite(blk, 0, header, cluster_size, 0);
2798     g_free(header);
2799     if (ret < 0) {
2800         error_setg_errno(errp, -ret, "Could not write qcow2 header");
2801         goto out;
2802     }
2803 
2804     /* Write a refcount table with one refcount block */
2805     refcount_table = g_malloc0(2 * cluster_size);
2806     refcount_table[0] = cpu_to_be64(2 * cluster_size);
2807     ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0);
2808     g_free(refcount_table);
2809 
2810     if (ret < 0) {
2811         error_setg_errno(errp, -ret, "Could not write refcount table");
2812         goto out;
2813     }
2814 
2815     blk_unref(blk);
2816     blk = NULL;
2817 
2818     /*
2819      * And now open the image and make it consistent first (i.e. increase the
2820      * refcount of the cluster that is occupied by the header and the refcount
2821      * table)
2822      */
2823     options = qdict_new();
2824     qdict_put_str(options, "driver", "qcow2");
2825     blk = blk_new_open(filename, NULL, options,
2826                        BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
2827                        &local_err);
2828     if (blk == NULL) {
2829         error_propagate(errp, local_err);
2830         ret = -EIO;
2831         goto out;
2832     }
2833 
2834     ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
2835     if (ret < 0) {
2836         error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
2837                          "header and refcount table");
2838         goto out;
2839 
2840     } else if (ret != 0) {
2841         error_report("Huh, first cluster in empty image is already in use?");
2842         abort();
2843     }
2844 
2845     /* Create a full header (including things like feature table) */
2846     ret = qcow2_update_header(blk_bs(blk));
2847     if (ret < 0) {
2848         error_setg_errno(errp, -ret, "Could not update qcow2 header");
2849         goto out;
2850     }
2851 
2852     /* Okay, now that we have a valid image, let's give it the right size */
2853     ret = blk_truncate(blk, total_size, PREALLOC_MODE_OFF, errp);
2854     if (ret < 0) {
2855         error_prepend(errp, "Could not resize image: ");
2856         goto out;
2857     }
2858 
2859     /* Want a backing file? There you go.*/
2860     if (backing_file) {
2861         ret = bdrv_change_backing_file(blk_bs(blk), backing_file, backing_format);
2862         if (ret < 0) {
2863             error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
2864                              "with format '%s'", backing_file, backing_format);
2865             goto out;
2866         }
2867     }
2868 
2869     /* Want encryption? There you go. */
2870     if (encryptfmt) {
2871         ret = qcow2_set_up_encryption(blk_bs(blk), encryptfmt, opts, errp);
2872         if (ret < 0) {
2873             goto out;
2874         }
2875     }
2876 
2877     /* And if we're supposed to preallocate metadata, do that now */
2878     if (prealloc != PREALLOC_MODE_OFF) {
2879         ret = preallocate(blk_bs(blk), 0, total_size);
2880         if (ret < 0) {
2881             error_setg_errno(errp, -ret, "Could not preallocate metadata");
2882             goto out;
2883         }
2884     }
2885 
2886     blk_unref(blk);
2887     blk = NULL;
2888 
2889     /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning.
2890      * Using BDRV_O_NO_IO, since encryption is now setup we don't want to
2891      * have to setup decryption context. We're not doing any I/O on the top
2892      * level BlockDriverState, only lower layers, where BDRV_O_NO_IO does
2893      * not have effect.
2894      */
2895     options = qdict_new();
2896     qdict_put_str(options, "driver", "qcow2");
2897     blk = blk_new_open(filename, NULL, options,
2898                        BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
2899                        &local_err);
2900     if (blk == NULL) {
2901         error_propagate(errp, local_err);
2902         ret = -EIO;
2903         goto out;
2904     }
2905 
2906     ret = 0;
2907 out:
2908     if (blk) {
2909         blk_unref(blk);
2910     }
2911     return ret;
2912 }
2913 
2914 static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
2915 {
2916     char *backing_file = NULL;
2917     char *backing_fmt = NULL;
2918     char *buf = NULL;
2919     uint64_t size = 0;
2920     int flags = 0;
2921     size_t cluster_size = DEFAULT_CLUSTER_SIZE;
2922     PreallocMode prealloc;
2923     int version;
2924     uint64_t refcount_bits;
2925     int refcount_order;
2926     char *encryptfmt = NULL;
2927     Error *local_err = NULL;
2928     int ret;
2929 
2930     /* Read out options */
2931     size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
2932                     BDRV_SECTOR_SIZE);
2933     backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
2934     backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
2935     encryptfmt = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2936     if (encryptfmt) {
2937         if (qemu_opt_get(opts, BLOCK_OPT_ENCRYPT)) {
2938             error_setg(errp, "Options " BLOCK_OPT_ENCRYPT " and "
2939                        BLOCK_OPT_ENCRYPT_FORMAT " are mutually exclusive");
2940             ret = -EINVAL;
2941             goto finish;
2942         }
2943     } else if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
2944         encryptfmt = g_strdup("aes");
2945     }
2946     cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err);
2947     if (local_err) {
2948         error_propagate(errp, local_err);
2949         ret = -EINVAL;
2950         goto finish;
2951     }
2952     buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
2953     prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
2954                                PREALLOC_MODE_OFF, &local_err);
2955     if (local_err) {
2956         error_propagate(errp, local_err);
2957         ret = -EINVAL;
2958         goto finish;
2959     }
2960 
2961     version = qcow2_opt_get_version_del(opts, &local_err);
2962     if (local_err) {
2963         error_propagate(errp, local_err);
2964         ret = -EINVAL;
2965         goto finish;
2966     }
2967 
2968     if (qemu_opt_get_bool_del(opts, BLOCK_OPT_LAZY_REFCOUNTS, false)) {
2969         flags |= BLOCK_FLAG_LAZY_REFCOUNTS;
2970     }
2971 
2972     if (backing_file && prealloc != PREALLOC_MODE_OFF) {
2973         error_setg(errp, "Backing file and preallocation cannot be used at "
2974                    "the same time");
2975         ret = -EINVAL;
2976         goto finish;
2977     }
2978 
2979     if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) {
2980         error_setg(errp, "Lazy refcounts only supported with compatibility "
2981                    "level 1.1 and above (use compat=1.1 or greater)");
2982         ret = -EINVAL;
2983         goto finish;
2984     }
2985 
2986     refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err);
2987     if (local_err) {
2988         error_propagate(errp, local_err);
2989         ret = -EINVAL;
2990         goto finish;
2991     }
2992 
2993     refcount_order = ctz32(refcount_bits);
2994 
2995     ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags,
2996                         cluster_size, prealloc, opts, version, refcount_order,
2997                         encryptfmt, &local_err);
2998     error_propagate(errp, local_err);
2999 
3000 finish:
3001     g_free(backing_file);
3002     g_free(backing_fmt);
3003     g_free(encryptfmt);
3004     g_free(buf);
3005     return ret;
3006 }
3007 
3008 
3009 static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
3010 {
3011     int64_t nr;
3012     int res;
3013 
3014     /* Clamp to image length, before checking status of underlying sectors */
3015     if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
3016         bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset;
3017     }
3018 
3019     if (!bytes) {
3020         return true;
3021     }
3022     res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
3023     return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes;
3024 }
3025 
3026 static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
3027     int64_t offset, int bytes, BdrvRequestFlags flags)
3028 {
3029     int ret;
3030     BDRVQcow2State *s = bs->opaque;
3031 
3032     uint32_t head = offset % s->cluster_size;
3033     uint32_t tail = (offset + bytes) % s->cluster_size;
3034 
3035     trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes);
3036     if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) {
3037         tail = 0;
3038     }
3039 
3040     if (head || tail) {
3041         uint64_t off;
3042         unsigned int nr;
3043 
3044         assert(head + bytes <= s->cluster_size);
3045 
3046         /* check whether remainder of cluster already reads as zero */
3047         if (!(is_zero(bs, offset - head, head) &&
3048               is_zero(bs, offset + bytes,
3049                       tail ? s->cluster_size - tail : 0))) {
3050             return -ENOTSUP;
3051         }
3052 
3053         qemu_co_mutex_lock(&s->lock);
3054         /* We can have new write after previous check */
3055         offset = QEMU_ALIGN_DOWN(offset, s->cluster_size);
3056         bytes = s->cluster_size;
3057         nr = s->cluster_size;
3058         ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
3059         if (ret != QCOW2_CLUSTER_UNALLOCATED &&
3060             ret != QCOW2_CLUSTER_ZERO_PLAIN &&
3061             ret != QCOW2_CLUSTER_ZERO_ALLOC) {
3062             qemu_co_mutex_unlock(&s->lock);
3063             return -ENOTSUP;
3064         }
3065     } else {
3066         qemu_co_mutex_lock(&s->lock);
3067     }
3068 
3069     trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes);
3070 
3071     /* Whatever is left can use real zero clusters */
3072     ret = qcow2_cluster_zeroize(bs, offset, bytes, flags);
3073     qemu_co_mutex_unlock(&s->lock);
3074 
3075     return ret;
3076 }
3077 
3078 static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
3079                                           int64_t offset, int bytes)
3080 {
3081     int ret;
3082     BDRVQcow2State *s = bs->opaque;
3083 
3084     if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) {
3085         assert(bytes < s->cluster_size);
3086         /* Ignore partial clusters, except for the special case of the
3087          * complete partial cluster at the end of an unaligned file */
3088         if (!QEMU_IS_ALIGNED(offset, s->cluster_size) ||
3089             offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) {
3090             return -ENOTSUP;
3091         }
3092     }
3093 
3094     qemu_co_mutex_lock(&s->lock);
3095     ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST,
3096                                 false);
3097     qemu_co_mutex_unlock(&s->lock);
3098     return ret;
3099 }
3100 
3101 static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
3102                           PreallocMode prealloc, Error **errp)
3103 {
3104     BDRVQcow2State *s = bs->opaque;
3105     uint64_t old_length;
3106     int64_t new_l1_size;
3107     int ret;
3108 
3109     if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA &&
3110         prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL)
3111     {
3112         error_setg(errp, "Unsupported preallocation mode '%s'",
3113                    PreallocMode_str(prealloc));
3114         return -ENOTSUP;
3115     }
3116 
3117     if (offset & 511) {
3118         error_setg(errp, "The new size must be a multiple of 512");
3119         return -EINVAL;
3120     }
3121 
3122     /* cannot proceed if image has snapshots */
3123     if (s->nb_snapshots) {
3124         error_setg(errp, "Can't resize an image which has snapshots");
3125         return -ENOTSUP;
3126     }
3127 
3128     /* cannot proceed if image has bitmaps */
3129     if (s->nb_bitmaps) {
3130         /* TODO: resize bitmaps in the image */
3131         error_setg(errp, "Can't resize an image which has bitmaps");
3132         return -ENOTSUP;
3133     }
3134 
3135     old_length = bs->total_sectors * 512;
3136     new_l1_size = size_to_l1(s, offset);
3137 
3138     if (offset < old_length) {
3139         int64_t last_cluster, old_file_size;
3140         if (prealloc != PREALLOC_MODE_OFF) {
3141             error_setg(errp,
3142                        "Preallocation can't be used for shrinking an image");
3143             return -EINVAL;
3144         }
3145 
3146         ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size),
3147                                     old_length - ROUND_UP(offset,
3148                                                           s->cluster_size),
3149                                     QCOW2_DISCARD_ALWAYS, true);
3150         if (ret < 0) {
3151             error_setg_errno(errp, -ret, "Failed to discard cropped clusters");
3152             return ret;
3153         }
3154 
3155         ret = qcow2_shrink_l1_table(bs, new_l1_size);
3156         if (ret < 0) {
3157             error_setg_errno(errp, -ret,
3158                              "Failed to reduce the number of L2 tables");
3159             return ret;
3160         }
3161 
3162         ret = qcow2_shrink_reftable(bs);
3163         if (ret < 0) {
3164             error_setg_errno(errp, -ret,
3165                              "Failed to discard unused refblocks");
3166             return ret;
3167         }
3168 
3169         old_file_size = bdrv_getlength(bs->file->bs);
3170         if (old_file_size < 0) {
3171             error_setg_errno(errp, -old_file_size,
3172                              "Failed to inquire current file length");
3173             return old_file_size;
3174         }
3175         last_cluster = qcow2_get_last_cluster(bs, old_file_size);
3176         if (last_cluster < 0) {
3177             error_setg_errno(errp, -last_cluster,
3178                              "Failed to find the last cluster");
3179             return last_cluster;
3180         }
3181         if ((last_cluster + 1) * s->cluster_size < old_file_size) {
3182             Error *local_err = NULL;
3183 
3184             bdrv_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
3185                           PREALLOC_MODE_OFF, &local_err);
3186             if (local_err) {
3187                 warn_reportf_err(local_err,
3188                                  "Failed to truncate the tail of the image: ");
3189             }
3190         }
3191     } else {
3192         ret = qcow2_grow_l1_table(bs, new_l1_size, true);
3193         if (ret < 0) {
3194             error_setg_errno(errp, -ret, "Failed to grow the L1 table");
3195             return ret;
3196         }
3197     }
3198 
3199     switch (prealloc) {
3200     case PREALLOC_MODE_OFF:
3201         break;
3202 
3203     case PREALLOC_MODE_METADATA:
3204         ret = preallocate(bs, old_length, offset);
3205         if (ret < 0) {
3206             error_setg_errno(errp, -ret, "Preallocation failed");
3207             return ret;
3208         }
3209         break;
3210 
3211     case PREALLOC_MODE_FALLOC:
3212     case PREALLOC_MODE_FULL:
3213     {
3214         int64_t allocation_start, host_offset, guest_offset;
3215         int64_t clusters_allocated;
3216         int64_t old_file_size, new_file_size;
3217         uint64_t nb_new_data_clusters, nb_new_l2_tables;
3218 
3219         old_file_size = bdrv_getlength(bs->file->bs);
3220         if (old_file_size < 0) {
3221             error_setg_errno(errp, -old_file_size,
3222                              "Failed to inquire current file length");
3223             return old_file_size;
3224         }
3225         old_file_size = ROUND_UP(old_file_size, s->cluster_size);
3226 
3227         nb_new_data_clusters = DIV_ROUND_UP(offset - old_length,
3228                                             s->cluster_size);
3229 
3230         /* This is an overestimation; we will not actually allocate space for
3231          * these in the file but just make sure the new refcount structures are
3232          * able to cover them so we will not have to allocate new refblocks
3233          * while entering the data blocks in the potentially new L2 tables.
3234          * (We do not actually care where the L2 tables are placed. Maybe they
3235          *  are already allocated or they can be placed somewhere before
3236          *  @old_file_size. It does not matter because they will be fully
3237          *  allocated automatically, so they do not need to be covered by the
3238          *  preallocation. All that matters is that we will not have to allocate
3239          *  new refcount structures for them.) */
3240         nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters,
3241                                         s->cluster_size / sizeof(uint64_t));
3242         /* The cluster range may not be aligned to L2 boundaries, so add one L2
3243          * table for a potential head/tail */
3244         nb_new_l2_tables++;
3245 
3246         allocation_start = qcow2_refcount_area(bs, old_file_size,
3247                                                nb_new_data_clusters +
3248                                                nb_new_l2_tables,
3249                                                true, 0, 0);
3250         if (allocation_start < 0) {
3251             error_setg_errno(errp, -allocation_start,
3252                              "Failed to resize refcount structures");
3253             return allocation_start;
3254         }
3255 
3256         clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start,
3257                                                      nb_new_data_clusters);
3258         if (clusters_allocated < 0) {
3259             error_setg_errno(errp, -clusters_allocated,
3260                              "Failed to allocate data clusters");
3261             return -clusters_allocated;
3262         }
3263 
3264         assert(clusters_allocated == nb_new_data_clusters);
3265 
3266         /* Allocate the data area */
3267         new_file_size = allocation_start +
3268                         nb_new_data_clusters * s->cluster_size;
3269         ret = bdrv_truncate(bs->file, new_file_size, prealloc, errp);
3270         if (ret < 0) {
3271             error_prepend(errp, "Failed to resize underlying file: ");
3272             qcow2_free_clusters(bs, allocation_start,
3273                                 nb_new_data_clusters * s->cluster_size,
3274                                 QCOW2_DISCARD_OTHER);
3275             return ret;
3276         }
3277 
3278         /* Create the necessary L2 entries */
3279         host_offset = allocation_start;
3280         guest_offset = old_length;
3281         while (nb_new_data_clusters) {
3282             int64_t guest_cluster = guest_offset >> s->cluster_bits;
3283             int64_t nb_clusters = MIN(nb_new_data_clusters,
3284                                       s->l2_size - guest_cluster % s->l2_size);
3285             QCowL2Meta allocation = {
3286                 .offset       = guest_offset,
3287                 .alloc_offset = host_offset,
3288                 .nb_clusters  = nb_clusters,
3289             };
3290             qemu_co_queue_init(&allocation.dependent_requests);
3291 
3292             ret = qcow2_alloc_cluster_link_l2(bs, &allocation);
3293             if (ret < 0) {
3294                 error_setg_errno(errp, -ret, "Failed to update L2 tables");
3295                 qcow2_free_clusters(bs, host_offset,
3296                                     nb_new_data_clusters * s->cluster_size,
3297                                     QCOW2_DISCARD_OTHER);
3298                 return ret;
3299             }
3300 
3301             guest_offset += nb_clusters * s->cluster_size;
3302             host_offset += nb_clusters * s->cluster_size;
3303             nb_new_data_clusters -= nb_clusters;
3304         }
3305         break;
3306     }
3307 
3308     default:
3309         g_assert_not_reached();
3310     }
3311 
3312     if (prealloc != PREALLOC_MODE_OFF) {
3313         /* Flush metadata before actually changing the image size */
3314         ret = bdrv_flush(bs);
3315         if (ret < 0) {
3316             error_setg_errno(errp, -ret,
3317                              "Failed to flush the preallocated area to disk");
3318             return ret;
3319         }
3320     }
3321 
3322     /* write updated header.size */
3323     offset = cpu_to_be64(offset);
3324     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
3325                            &offset, sizeof(uint64_t));
3326     if (ret < 0) {
3327         error_setg_errno(errp, -ret, "Failed to update the image size");
3328         return ret;
3329     }
3330 
3331     s->l1_vm_state_index = new_l1_size;
3332     return 0;
3333 }
3334 
3335 /* XXX: put compressed sectors first, then all the cluster aligned
3336    tables to avoid losing bytes in alignment */
3337 static coroutine_fn int
3338 qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
3339                             uint64_t bytes, QEMUIOVector *qiov)
3340 {
3341     BDRVQcow2State *s = bs->opaque;
3342     QEMUIOVector hd_qiov;
3343     struct iovec iov;
3344     z_stream strm;
3345     int ret, out_len;
3346     uint8_t *buf, *out_buf;
3347     int64_t cluster_offset;
3348 
3349     if (bytes == 0) {
3350         /* align end of file to a sector boundary to ease reading with
3351            sector based I/Os */
3352         cluster_offset = bdrv_getlength(bs->file->bs);
3353         if (cluster_offset < 0) {
3354             return cluster_offset;
3355         }
3356         return bdrv_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, NULL);
3357     }
3358 
3359     buf = qemu_blockalign(bs, s->cluster_size);
3360     if (bytes != s->cluster_size) {
3361         if (bytes > s->cluster_size ||
3362             offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
3363         {
3364             qemu_vfree(buf);
3365             return -EINVAL;
3366         }
3367         /* Zero-pad last write if image size is not cluster aligned */
3368         memset(buf + bytes, 0, s->cluster_size - bytes);
3369     }
3370     qemu_iovec_to_buf(qiov, 0, buf, bytes);
3371 
3372     out_buf = g_malloc(s->cluster_size);
3373 
3374     /* best compression, small window, no zlib header */
3375     memset(&strm, 0, sizeof(strm));
3376     ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
3377                        Z_DEFLATED, -12,
3378                        9, Z_DEFAULT_STRATEGY);
3379     if (ret != 0) {
3380         ret = -EINVAL;
3381         goto fail;
3382     }
3383 
3384     strm.avail_in = s->cluster_size;
3385     strm.next_in = (uint8_t *)buf;
3386     strm.avail_out = s->cluster_size;
3387     strm.next_out = out_buf;
3388 
3389     ret = deflate(&strm, Z_FINISH);
3390     if (ret != Z_STREAM_END && ret != Z_OK) {
3391         deflateEnd(&strm);
3392         ret = -EINVAL;
3393         goto fail;
3394     }
3395     out_len = strm.next_out - out_buf;
3396 
3397     deflateEnd(&strm);
3398 
3399     if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
3400         /* could not compress: write normal cluster */
3401         ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
3402         if (ret < 0) {
3403             goto fail;
3404         }
3405         goto success;
3406     }
3407 
3408     qemu_co_mutex_lock(&s->lock);
3409     cluster_offset =
3410         qcow2_alloc_compressed_cluster_offset(bs, offset, out_len);
3411     if (!cluster_offset) {
3412         qemu_co_mutex_unlock(&s->lock);
3413         ret = -EIO;
3414         goto fail;
3415     }
3416     cluster_offset &= s->cluster_offset_mask;
3417 
3418     ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
3419     qemu_co_mutex_unlock(&s->lock);
3420     if (ret < 0) {
3421         goto fail;
3422     }
3423 
3424     iov = (struct iovec) {
3425         .iov_base   = out_buf,
3426         .iov_len    = out_len,
3427     };
3428     qemu_iovec_init_external(&hd_qiov, &iov, 1);
3429 
3430     BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
3431     ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
3432     if (ret < 0) {
3433         goto fail;
3434     }
3435 success:
3436     ret = 0;
3437 fail:
3438     qemu_vfree(buf);
3439     g_free(out_buf);
3440     return ret;
3441 }
3442 
3443 static int make_completely_empty(BlockDriverState *bs)
3444 {
3445     BDRVQcow2State *s = bs->opaque;
3446     Error *local_err = NULL;
3447     int ret, l1_clusters;
3448     int64_t offset;
3449     uint64_t *new_reftable = NULL;
3450     uint64_t rt_entry, l1_size2;
3451     struct {
3452         uint64_t l1_offset;
3453         uint64_t reftable_offset;
3454         uint32_t reftable_clusters;
3455     } QEMU_PACKED l1_ofs_rt_ofs_cls;
3456 
3457     ret = qcow2_cache_empty(bs, s->l2_table_cache);
3458     if (ret < 0) {
3459         goto fail;
3460     }
3461 
3462     ret = qcow2_cache_empty(bs, s->refcount_block_cache);
3463     if (ret < 0) {
3464         goto fail;
3465     }
3466 
3467     /* Refcounts will be broken utterly */
3468     ret = qcow2_mark_dirty(bs);
3469     if (ret < 0) {
3470         goto fail;
3471     }
3472 
3473     BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
3474 
3475     l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
3476     l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t);
3477 
3478     /* After this call, neither the in-memory nor the on-disk refcount
3479      * information accurately describe the actual references */
3480 
3481     ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
3482                              l1_clusters * s->cluster_size, 0);
3483     if (ret < 0) {
3484         goto fail_broken_refcounts;
3485     }
3486     memset(s->l1_table, 0, l1_size2);
3487 
3488     BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE);
3489 
3490     /* Overwrite enough clusters at the beginning of the sectors to place
3491      * the refcount table, a refcount block and the L1 table in; this may
3492      * overwrite parts of the existing refcount and L1 table, which is not
3493      * an issue because the dirty flag is set, complete data loss is in fact
3494      * desired and partial data loss is consequently fine as well */
3495     ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
3496                              (2 + l1_clusters) * s->cluster_size, 0);
3497     /* This call (even if it failed overall) may have overwritten on-disk
3498      * refcount structures; in that case, the in-memory refcount information
3499      * will probably differ from the on-disk information which makes the BDS
3500      * unusable */
3501     if (ret < 0) {
3502         goto fail_broken_refcounts;
3503     }
3504 
3505     BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
3506     BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
3507 
3508     /* "Create" an empty reftable (one cluster) directly after the image
3509      * header and an empty L1 table three clusters after the image header;
3510      * the cluster between those two will be used as the first refblock */
3511     l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
3512     l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
3513     l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
3514     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
3515                            &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
3516     if (ret < 0) {
3517         goto fail_broken_refcounts;
3518     }
3519 
3520     s->l1_table_offset = 3 * s->cluster_size;
3521 
3522     new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t));
3523     if (!new_reftable) {
3524         ret = -ENOMEM;
3525         goto fail_broken_refcounts;
3526     }
3527 
3528     s->refcount_table_offset = s->cluster_size;
3529     s->refcount_table_size   = s->cluster_size / sizeof(uint64_t);
3530     s->max_refcount_table_index = 0;
3531 
3532     g_free(s->refcount_table);
3533     s->refcount_table = new_reftable;
3534     new_reftable = NULL;
3535 
3536     /* Now the in-memory refcount information again corresponds to the on-disk
3537      * information (reftable is empty and no refblocks (the refblock cache is
3538      * empty)); however, this means some clusters (e.g. the image header) are
3539      * referenced, but not refcounted, but the normal qcow2 code assumes that
3540      * the in-memory information is always correct */
3541 
3542     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
3543 
3544     /* Enter the first refblock into the reftable */
3545     rt_entry = cpu_to_be64(2 * s->cluster_size);
3546     ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
3547                            &rt_entry, sizeof(rt_entry));
3548     if (ret < 0) {
3549         goto fail_broken_refcounts;
3550     }
3551     s->refcount_table[0] = 2 * s->cluster_size;
3552 
3553     s->free_cluster_index = 0;
3554     assert(3 + l1_clusters <= s->refcount_block_size);
3555     offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2);
3556     if (offset < 0) {
3557         ret = offset;
3558         goto fail_broken_refcounts;
3559     } else if (offset > 0) {
3560         error_report("First cluster in emptied image is in use");
3561         abort();
3562     }
3563 
3564     /* Now finally the in-memory information corresponds to the on-disk
3565      * structures and is correct */
3566     ret = qcow2_mark_clean(bs);
3567     if (ret < 0) {
3568         goto fail;
3569     }
3570 
3571     ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size,
3572                         PREALLOC_MODE_OFF, &local_err);
3573     if (ret < 0) {
3574         error_report_err(local_err);
3575         goto fail;
3576     }
3577 
3578     return 0;
3579 
3580 fail_broken_refcounts:
3581     /* The BDS is unusable at this point. If we wanted to make it usable, we
3582      * would have to call qcow2_refcount_close(), qcow2_refcount_init(),
3583      * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init()
3584      * again. However, because the functions which could have caused this error
3585      * path to be taken are used by those functions as well, it's very likely
3586      * that that sequence will fail as well. Therefore, just eject the BDS. */
3587     bs->drv = NULL;
3588 
3589 fail:
3590     g_free(new_reftable);
3591     return ret;
3592 }
3593 
3594 static int qcow2_make_empty(BlockDriverState *bs)
3595 {
3596     BDRVQcow2State *s = bs->opaque;
3597     uint64_t offset, end_offset;
3598     int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size);
3599     int l1_clusters, ret = 0;
3600 
3601     l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
3602 
3603     if (s->qcow_version >= 3 && !s->snapshots &&
3604         3 + l1_clusters <= s->refcount_block_size) {
3605         /* The following function only works for qcow2 v3 images (it requires
3606          * the dirty flag) and only as long as there are no snapshots (because
3607          * it completely empties the image). Furthermore, the L1 table and three
3608          * additional clusters (image header, refcount table, one refcount
3609          * block) have to fit inside one refcount block. */
3610         return make_completely_empty(bs);
3611     }
3612 
3613     /* This fallback code simply discards every active cluster; this is slow,
3614      * but works in all cases */
3615     end_offset = bs->total_sectors * BDRV_SECTOR_SIZE;
3616     for (offset = 0; offset < end_offset; offset += step) {
3617         /* As this function is generally used after committing an external
3618          * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the
3619          * default action for this kind of discard is to pass the discard,
3620          * which will ideally result in an actually smaller image file, as
3621          * is probably desired. */
3622         ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset),
3623                                     QCOW2_DISCARD_SNAPSHOT, true);
3624         if (ret < 0) {
3625             break;
3626         }
3627     }
3628 
3629     return ret;
3630 }
3631 
3632 static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
3633 {
3634     BDRVQcow2State *s = bs->opaque;
3635     int ret;
3636 
3637     qemu_co_mutex_lock(&s->lock);
3638     ret = qcow2_cache_write(bs, s->l2_table_cache);
3639     if (ret < 0) {
3640         qemu_co_mutex_unlock(&s->lock);
3641         return ret;
3642     }
3643 
3644     if (qcow2_need_accurate_refcounts(s)) {
3645         ret = qcow2_cache_write(bs, s->refcount_block_cache);
3646         if (ret < 0) {
3647             qemu_co_mutex_unlock(&s->lock);
3648             return ret;
3649         }
3650     }
3651     qemu_co_mutex_unlock(&s->lock);
3652 
3653     return 0;
3654 }
3655 
3656 static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
3657                                        Error **errp)
3658 {
3659     Error *local_err = NULL;
3660     BlockMeasureInfo *info;
3661     uint64_t required = 0; /* bytes that contribute to required size */
3662     uint64_t virtual_size; /* disk size as seen by guest */
3663     uint64_t refcount_bits;
3664     uint64_t l2_tables;
3665     size_t cluster_size;
3666     int version;
3667     char *optstr;
3668     PreallocMode prealloc;
3669     bool has_backing_file;
3670 
3671     /* Parse image creation options */
3672     cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err);
3673     if (local_err) {
3674         goto err;
3675     }
3676 
3677     version = qcow2_opt_get_version_del(opts, &local_err);
3678     if (local_err) {
3679         goto err;
3680     }
3681 
3682     refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err);
3683     if (local_err) {
3684         goto err;
3685     }
3686 
3687     optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
3688     prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr,
3689                                PREALLOC_MODE_OFF, &local_err);
3690     g_free(optstr);
3691     if (local_err) {
3692         goto err;
3693     }
3694 
3695     optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
3696     has_backing_file = !!optstr;
3697     g_free(optstr);
3698 
3699     virtual_size = align_offset(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
3700                                 cluster_size);
3701 
3702     /* Check that virtual disk size is valid */
3703     l2_tables = DIV_ROUND_UP(virtual_size / cluster_size,
3704                              cluster_size / sizeof(uint64_t));
3705     if (l2_tables * sizeof(uint64_t) > QCOW_MAX_L1_SIZE) {
3706         error_setg(&local_err, "The image size is too large "
3707                                "(try using a larger cluster size)");
3708         goto err;
3709     }
3710 
3711     /* Account for input image */
3712     if (in_bs) {
3713         int64_t ssize = bdrv_getlength(in_bs);
3714         if (ssize < 0) {
3715             error_setg_errno(&local_err, -ssize,
3716                              "Unable to get image virtual_size");
3717             goto err;
3718         }
3719 
3720         virtual_size = align_offset(ssize, cluster_size);
3721 
3722         if (has_backing_file) {
3723             /* We don't how much of the backing chain is shared by the input
3724              * image and the new image file.  In the worst case the new image's
3725              * backing file has nothing in common with the input image.  Be
3726              * conservative and assume all clusters need to be written.
3727              */
3728             required = virtual_size;
3729         } else {
3730             int64_t offset;
3731             int64_t pnum = 0;
3732 
3733             for (offset = 0; offset < ssize; offset += pnum) {
3734                 int ret;
3735 
3736                 ret = bdrv_block_status_above(in_bs, NULL, offset,
3737                                               ssize - offset, &pnum, NULL,
3738                                               NULL);
3739                 if (ret < 0) {
3740                     error_setg_errno(&local_err, -ret,
3741                                      "Unable to get block status");
3742                     goto err;
3743                 }
3744 
3745                 if (ret & BDRV_BLOCK_ZERO) {
3746                     /* Skip zero regions (safe with no backing file) */
3747                 } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
3748                            (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
3749                     /* Extend pnum to end of cluster for next iteration */
3750                     pnum = ROUND_UP(offset + pnum, cluster_size) - offset;
3751 
3752                     /* Count clusters we've seen */
3753                     required += offset % cluster_size + pnum;
3754                 }
3755             }
3756         }
3757     }
3758 
3759     /* Take into account preallocation.  Nothing special is needed for
3760      * PREALLOC_MODE_METADATA since metadata is always counted.
3761      */
3762     if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
3763         required = virtual_size;
3764     }
3765 
3766     info = g_new(BlockMeasureInfo, 1);
3767     info->fully_allocated =
3768         qcow2_calc_prealloc_size(virtual_size, cluster_size,
3769                                  ctz32(refcount_bits));
3770 
3771     /* Remove data clusters that are not required.  This overestimates the
3772      * required size because metadata needed for the fully allocated file is
3773      * still counted.
3774      */
3775     info->required = info->fully_allocated - virtual_size + required;
3776     return info;
3777 
3778 err:
3779     error_propagate(errp, local_err);
3780     return NULL;
3781 }
3782 
3783 static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3784 {
3785     BDRVQcow2State *s = bs->opaque;
3786     bdi->unallocated_blocks_are_zero = true;
3787     bdi->can_write_zeroes_with_unmap = (s->qcow_version >= 3);
3788     bdi->cluster_size = s->cluster_size;
3789     bdi->vm_state_offset = qcow2_vm_state_offset(s);
3790     return 0;
3791 }
3792 
3793 static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
3794 {
3795     BDRVQcow2State *s = bs->opaque;
3796     ImageInfoSpecific *spec_info;
3797     QCryptoBlockInfo *encrypt_info = NULL;
3798 
3799     if (s->crypto != NULL) {
3800         encrypt_info = qcrypto_block_get_info(s->crypto, &error_abort);
3801     }
3802 
3803     spec_info = g_new(ImageInfoSpecific, 1);
3804     *spec_info = (ImageInfoSpecific){
3805         .type  = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
3806         .u.qcow2.data = g_new(ImageInfoSpecificQCow2, 1),
3807     };
3808     if (s->qcow_version == 2) {
3809         *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
3810             .compat             = g_strdup("0.10"),
3811             .refcount_bits      = s->refcount_bits,
3812         };
3813     } else if (s->qcow_version == 3) {
3814         *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
3815             .compat             = g_strdup("1.1"),
3816             .lazy_refcounts     = s->compatible_features &
3817                                   QCOW2_COMPAT_LAZY_REFCOUNTS,
3818             .has_lazy_refcounts = true,
3819             .corrupt            = s->incompatible_features &
3820                                   QCOW2_INCOMPAT_CORRUPT,
3821             .has_corrupt        = true,
3822             .refcount_bits      = s->refcount_bits,
3823         };
3824     } else {
3825         /* if this assertion fails, this probably means a new version was
3826          * added without having it covered here */
3827         assert(false);
3828     }
3829 
3830     if (encrypt_info) {
3831         ImageInfoSpecificQCow2Encryption *qencrypt =
3832             g_new(ImageInfoSpecificQCow2Encryption, 1);
3833         switch (encrypt_info->format) {
3834         case Q_CRYPTO_BLOCK_FORMAT_QCOW:
3835             qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES;
3836             qencrypt->u.aes = encrypt_info->u.qcow;
3837             break;
3838         case Q_CRYPTO_BLOCK_FORMAT_LUKS:
3839             qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS;
3840             qencrypt->u.luks = encrypt_info->u.luks;
3841             break;
3842         default:
3843             abort();
3844         }
3845         /* Since we did shallow copy above, erase any pointers
3846          * in the original info */
3847         memset(&encrypt_info->u, 0, sizeof(encrypt_info->u));
3848         qapi_free_QCryptoBlockInfo(encrypt_info);
3849 
3850         spec_info->u.qcow2.data->has_encrypt = true;
3851         spec_info->u.qcow2.data->encrypt = qencrypt;
3852     }
3853 
3854     return spec_info;
3855 }
3856 
3857 static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
3858                               int64_t pos)
3859 {
3860     BDRVQcow2State *s = bs->opaque;
3861 
3862     BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
3863     return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos,
3864                                     qiov->size, qiov, 0);
3865 }
3866 
3867 static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
3868                               int64_t pos)
3869 {
3870     BDRVQcow2State *s = bs->opaque;
3871 
3872     BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
3873     return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos,
3874                                    qiov->size, qiov, 0);
3875 }
3876 
3877 /*
3878  * Downgrades an image's version. To achieve this, any incompatible features
3879  * have to be removed.
3880  */
3881 static int qcow2_downgrade(BlockDriverState *bs, int target_version,
3882                            BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
3883 {
3884     BDRVQcow2State *s = bs->opaque;
3885     int current_version = s->qcow_version;
3886     int ret;
3887 
3888     if (target_version == current_version) {
3889         return 0;
3890     } else if (target_version > current_version) {
3891         return -EINVAL;
3892     } else if (target_version != 2) {
3893         return -EINVAL;
3894     }
3895 
3896     if (s->refcount_order != 4) {
3897         error_report("compat=0.10 requires refcount_bits=16");
3898         return -ENOTSUP;
3899     }
3900 
3901     /* clear incompatible features */
3902     if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
3903         ret = qcow2_mark_clean(bs);
3904         if (ret < 0) {
3905             return ret;
3906         }
3907     }
3908 
3909     /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in
3910      * the first place; if that happens nonetheless, returning -ENOTSUP is the
3911      * best thing to do anyway */
3912 
3913     if (s->incompatible_features) {
3914         return -ENOTSUP;
3915     }
3916 
3917     /* since we can ignore compatible features, we can set them to 0 as well */
3918     s->compatible_features = 0;
3919     /* if lazy refcounts have been used, they have already been fixed through
3920      * clearing the dirty flag */
3921 
3922     /* clearing autoclear features is trivial */
3923     s->autoclear_features = 0;
3924 
3925     ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
3926     if (ret < 0) {
3927         return ret;
3928     }
3929 
3930     s->qcow_version = target_version;
3931     ret = qcow2_update_header(bs);
3932     if (ret < 0) {
3933         s->qcow_version = current_version;
3934         return ret;
3935     }
3936     return 0;
3937 }
3938 
3939 typedef enum Qcow2AmendOperation {
3940     /* This is the value Qcow2AmendHelperCBInfo::last_operation will be
3941      * statically initialized to so that the helper CB can discern the first
3942      * invocation from an operation change */
3943     QCOW2_NO_OPERATION = 0,
3944 
3945     QCOW2_CHANGING_REFCOUNT_ORDER,
3946     QCOW2_DOWNGRADING,
3947 } Qcow2AmendOperation;
3948 
3949 typedef struct Qcow2AmendHelperCBInfo {
3950     /* The code coordinating the amend operations should only modify
3951      * these four fields; the rest will be managed by the CB */
3952     BlockDriverAmendStatusCB *original_status_cb;
3953     void *original_cb_opaque;
3954 
3955     Qcow2AmendOperation current_operation;
3956 
3957     /* Total number of operations to perform (only set once) */
3958     int total_operations;
3959 
3960     /* The following fields are managed by the CB */
3961 
3962     /* Number of operations completed */
3963     int operations_completed;
3964 
3965     /* Cumulative offset of all completed operations */
3966     int64_t offset_completed;
3967 
3968     Qcow2AmendOperation last_operation;
3969     int64_t last_work_size;
3970 } Qcow2AmendHelperCBInfo;
3971 
3972 static void qcow2_amend_helper_cb(BlockDriverState *bs,
3973                                   int64_t operation_offset,
3974                                   int64_t operation_work_size, void *opaque)
3975 {
3976     Qcow2AmendHelperCBInfo *info = opaque;
3977     int64_t current_work_size;
3978     int64_t projected_work_size;
3979 
3980     if (info->current_operation != info->last_operation) {
3981         if (info->last_operation != QCOW2_NO_OPERATION) {
3982             info->offset_completed += info->last_work_size;
3983             info->operations_completed++;
3984         }
3985 
3986         info->last_operation = info->current_operation;
3987     }
3988 
3989     assert(info->total_operations > 0);
3990     assert(info->operations_completed < info->total_operations);
3991 
3992     info->last_work_size = operation_work_size;
3993 
3994     current_work_size = info->offset_completed + operation_work_size;
3995 
3996     /* current_work_size is the total work size for (operations_completed + 1)
3997      * operations (which includes this one), so multiply it by the number of
3998      * operations not covered and divide it by the number of operations
3999      * covered to get a projection for the operations not covered */
4000     projected_work_size = current_work_size * (info->total_operations -
4001                                                info->operations_completed - 1)
4002                                             / (info->operations_completed + 1);
4003 
4004     info->original_status_cb(bs, info->offset_completed + operation_offset,
4005                              current_work_size + projected_work_size,
4006                              info->original_cb_opaque);
4007 }
4008 
4009 static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
4010                                BlockDriverAmendStatusCB *status_cb,
4011                                void *cb_opaque)
4012 {
4013     BDRVQcow2State *s = bs->opaque;
4014     int old_version = s->qcow_version, new_version = old_version;
4015     uint64_t new_size = 0;
4016     const char *backing_file = NULL, *backing_format = NULL;
4017     bool lazy_refcounts = s->use_lazy_refcounts;
4018     const char *compat = NULL;
4019     uint64_t cluster_size = s->cluster_size;
4020     bool encrypt;
4021     int encformat;
4022     int refcount_bits = s->refcount_bits;
4023     Error *local_err = NULL;
4024     int ret;
4025     QemuOptDesc *desc = opts->list->desc;
4026     Qcow2AmendHelperCBInfo helper_cb_info;
4027 
4028     while (desc && desc->name) {
4029         if (!qemu_opt_find(opts, desc->name)) {
4030             /* only change explicitly defined options */
4031             desc++;
4032             continue;
4033         }
4034 
4035         if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) {
4036             compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL);
4037             if (!compat) {
4038                 /* preserve default */
4039             } else if (!strcmp(compat, "0.10")) {
4040                 new_version = 2;
4041             } else if (!strcmp(compat, "1.1")) {
4042                 new_version = 3;
4043             } else {
4044                 error_report("Unknown compatibility level %s", compat);
4045                 return -EINVAL;
4046             }
4047         } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) {
4048             error_report("Cannot change preallocation mode");
4049             return -ENOTSUP;
4050         } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
4051             new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
4052         } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) {
4053             backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
4054         } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
4055             backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
4056         } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) {
4057             encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT,
4058                                         !!s->crypto);
4059 
4060             if (encrypt != !!s->crypto) {
4061                 error_report("Changing the encryption flag is not supported");
4062                 return -ENOTSUP;
4063             }
4064         } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) {
4065             encformat = qcow2_crypt_method_from_format(
4066                 qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT));
4067 
4068             if (encformat != s->crypt_method_header) {
4069                 error_report("Changing the encryption format is not supported");
4070                 return -ENOTSUP;
4071             }
4072         } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
4073             cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
4074                                              cluster_size);
4075             if (cluster_size != s->cluster_size) {
4076                 error_report("Changing the cluster size is not supported");
4077                 return -ENOTSUP;
4078             }
4079         } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
4080             lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
4081                                                lazy_refcounts);
4082         } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) {
4083             refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS,
4084                                                 refcount_bits);
4085 
4086             if (refcount_bits <= 0 || refcount_bits > 64 ||
4087                 !is_power_of_2(refcount_bits))
4088             {
4089                 error_report("Refcount width must be a power of two and may "
4090                              "not exceed 64 bits");
4091                 return -EINVAL;
4092             }
4093         } else {
4094             /* if this point is reached, this probably means a new option was
4095              * added without having it covered here */
4096             abort();
4097         }
4098 
4099         desc++;
4100     }
4101 
4102     helper_cb_info = (Qcow2AmendHelperCBInfo){
4103         .original_status_cb = status_cb,
4104         .original_cb_opaque = cb_opaque,
4105         .total_operations = (new_version < old_version)
4106                           + (s->refcount_bits != refcount_bits)
4107     };
4108 
4109     /* Upgrade first (some features may require compat=1.1) */
4110     if (new_version > old_version) {
4111         s->qcow_version = new_version;
4112         ret = qcow2_update_header(bs);
4113         if (ret < 0) {
4114             s->qcow_version = old_version;
4115             return ret;
4116         }
4117     }
4118 
4119     if (s->refcount_bits != refcount_bits) {
4120         int refcount_order = ctz32(refcount_bits);
4121 
4122         if (new_version < 3 && refcount_bits != 16) {
4123             error_report("Different refcount widths than 16 bits require "
4124                          "compatibility level 1.1 or above (use compat=1.1 or "
4125                          "greater)");
4126             return -EINVAL;
4127         }
4128 
4129         helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
4130         ret = qcow2_change_refcount_order(bs, refcount_order,
4131                                           &qcow2_amend_helper_cb,
4132                                           &helper_cb_info, &local_err);
4133         if (ret < 0) {
4134             error_report_err(local_err);
4135             return ret;
4136         }
4137     }
4138 
4139     if (backing_file || backing_format) {
4140         ret = qcow2_change_backing_file(bs,
4141                     backing_file ?: s->image_backing_file,
4142                     backing_format ?: s->image_backing_format);
4143         if (ret < 0) {
4144             return ret;
4145         }
4146     }
4147 
4148     if (s->use_lazy_refcounts != lazy_refcounts) {
4149         if (lazy_refcounts) {
4150             if (new_version < 3) {
4151                 error_report("Lazy refcounts only supported with compatibility "
4152                              "level 1.1 and above (use compat=1.1 or greater)");
4153                 return -EINVAL;
4154             }
4155             s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
4156             ret = qcow2_update_header(bs);
4157             if (ret < 0) {
4158                 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
4159                 return ret;
4160             }
4161             s->use_lazy_refcounts = true;
4162         } else {
4163             /* make image clean first */
4164             ret = qcow2_mark_clean(bs);
4165             if (ret < 0) {
4166                 return ret;
4167             }
4168             /* now disallow lazy refcounts */
4169             s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
4170             ret = qcow2_update_header(bs);
4171             if (ret < 0) {
4172                 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
4173                 return ret;
4174             }
4175             s->use_lazy_refcounts = false;
4176         }
4177     }
4178 
4179     if (new_size) {
4180         BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
4181         ret = blk_insert_bs(blk, bs, &local_err);
4182         if (ret < 0) {
4183             error_report_err(local_err);
4184             blk_unref(blk);
4185             return ret;
4186         }
4187 
4188         ret = blk_truncate(blk, new_size, PREALLOC_MODE_OFF, &local_err);
4189         blk_unref(blk);
4190         if (ret < 0) {
4191             error_report_err(local_err);
4192             return ret;
4193         }
4194     }
4195 
4196     /* Downgrade last (so unsupported features can be removed before) */
4197     if (new_version < old_version) {
4198         helper_cb_info.current_operation = QCOW2_DOWNGRADING;
4199         ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
4200                               &helper_cb_info);
4201         if (ret < 0) {
4202             return ret;
4203         }
4204     }
4205 
4206     return 0;
4207 }
4208 
4209 /*
4210  * If offset or size are negative, respectively, they will not be included in
4211  * the BLOCK_IMAGE_CORRUPTED event emitted.
4212  * fatal will be ignored for read-only BDS; corruptions found there will always
4213  * be considered non-fatal.
4214  */
4215 void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
4216                              int64_t size, const char *message_format, ...)
4217 {
4218     BDRVQcow2State *s = bs->opaque;
4219     const char *node_name;
4220     char *message;
4221     va_list ap;
4222 
4223     fatal = fatal && !bs->read_only;
4224 
4225     if (s->signaled_corruption &&
4226         (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
4227     {
4228         return;
4229     }
4230 
4231     va_start(ap, message_format);
4232     message = g_strdup_vprintf(message_format, ap);
4233     va_end(ap);
4234 
4235     if (fatal) {
4236         fprintf(stderr, "qcow2: Marking image as corrupt: %s; further "
4237                 "corruption events will be suppressed\n", message);
4238     } else {
4239         fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal "
4240                 "corruption events will be suppressed\n", message);
4241     }
4242 
4243     node_name = bdrv_get_node_name(bs);
4244     qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
4245                                           *node_name != '\0', node_name,
4246                                           message, offset >= 0, offset,
4247                                           size >= 0, size,
4248                                           fatal, &error_abort);
4249     g_free(message);
4250 
4251     if (fatal) {
4252         qcow2_mark_corrupt(bs);
4253         bs->drv = NULL; /* make BDS unusable */
4254     }
4255 
4256     s->signaled_corruption = true;
4257 }
4258 
4259 static QemuOptsList qcow2_create_opts = {
4260     .name = "qcow2-create-opts",
4261     .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
4262     .desc = {
4263         {
4264             .name = BLOCK_OPT_SIZE,
4265             .type = QEMU_OPT_SIZE,
4266             .help = "Virtual disk size"
4267         },
4268         {
4269             .name = BLOCK_OPT_COMPAT_LEVEL,
4270             .type = QEMU_OPT_STRING,
4271             .help = "Compatibility level (0.10 or 1.1)"
4272         },
4273         {
4274             .name = BLOCK_OPT_BACKING_FILE,
4275             .type = QEMU_OPT_STRING,
4276             .help = "File name of a base image"
4277         },
4278         {
4279             .name = BLOCK_OPT_BACKING_FMT,
4280             .type = QEMU_OPT_STRING,
4281             .help = "Image format of the base image"
4282         },
4283         {
4284             .name = BLOCK_OPT_ENCRYPT,
4285             .type = QEMU_OPT_BOOL,
4286             .help = "Encrypt the image with format 'aes'. (Deprecated "
4287                     "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",
4288         },
4289         {
4290             .name = BLOCK_OPT_ENCRYPT_FORMAT,
4291             .type = QEMU_OPT_STRING,
4292             .help = "Encrypt the image, format choices: 'aes', 'luks'",
4293         },
4294         BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
4295             "ID of secret providing qcow AES key or LUKS passphrase"),
4296         BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."),
4297         BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."),
4298         BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."),
4299         BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."),
4300         BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."),
4301         BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."),
4302         {
4303             .name = BLOCK_OPT_CLUSTER_SIZE,
4304             .type = QEMU_OPT_SIZE,
4305             .help = "qcow2 cluster size",
4306             .def_value_str = stringify(DEFAULT_CLUSTER_SIZE)
4307         },
4308         {
4309             .name = BLOCK_OPT_PREALLOC,
4310             .type = QEMU_OPT_STRING,
4311             .help = "Preallocation mode (allowed values: off, metadata, "
4312                     "falloc, full)"
4313         },
4314         {
4315             .name = BLOCK_OPT_LAZY_REFCOUNTS,
4316             .type = QEMU_OPT_BOOL,
4317             .help = "Postpone refcount updates",
4318             .def_value_str = "off"
4319         },
4320         {
4321             .name = BLOCK_OPT_REFCOUNT_BITS,
4322             .type = QEMU_OPT_NUMBER,
4323             .help = "Width of a reference count entry in bits",
4324             .def_value_str = "16"
4325         },
4326         { /* end of list */ }
4327     }
4328 };
4329 
4330 BlockDriver bdrv_qcow2 = {
4331     .format_name        = "qcow2",
4332     .instance_size      = sizeof(BDRVQcow2State),
4333     .bdrv_probe         = qcow2_probe,
4334     .bdrv_open          = qcow2_open,
4335     .bdrv_close         = qcow2_close,
4336     .bdrv_reopen_prepare  = qcow2_reopen_prepare,
4337     .bdrv_reopen_commit   = qcow2_reopen_commit,
4338     .bdrv_reopen_abort    = qcow2_reopen_abort,
4339     .bdrv_join_options    = qcow2_join_options,
4340     .bdrv_child_perm      = bdrv_format_default_perms,
4341     .bdrv_create        = qcow2_create,
4342     .bdrv_has_zero_init = bdrv_has_zero_init_1,
4343     .bdrv_co_get_block_status = qcow2_co_get_block_status,
4344 
4345     .bdrv_co_preadv         = qcow2_co_preadv,
4346     .bdrv_co_pwritev        = qcow2_co_pwritev,
4347     .bdrv_co_flush_to_os    = qcow2_co_flush_to_os,
4348 
4349     .bdrv_co_pwrite_zeroes  = qcow2_co_pwrite_zeroes,
4350     .bdrv_co_pdiscard       = qcow2_co_pdiscard,
4351     .bdrv_truncate          = qcow2_truncate,
4352     .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
4353     .bdrv_make_empty        = qcow2_make_empty,
4354 
4355     .bdrv_snapshot_create   = qcow2_snapshot_create,
4356     .bdrv_snapshot_goto     = qcow2_snapshot_goto,
4357     .bdrv_snapshot_delete   = qcow2_snapshot_delete,
4358     .bdrv_snapshot_list     = qcow2_snapshot_list,
4359     .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
4360     .bdrv_measure           = qcow2_measure,
4361     .bdrv_get_info          = qcow2_get_info,
4362     .bdrv_get_specific_info = qcow2_get_specific_info,
4363 
4364     .bdrv_save_vmstate    = qcow2_save_vmstate,
4365     .bdrv_load_vmstate    = qcow2_load_vmstate,
4366 
4367     .supports_backing           = true,
4368     .bdrv_change_backing_file   = qcow2_change_backing_file,
4369 
4370     .bdrv_refresh_limits        = qcow2_refresh_limits,
4371     .bdrv_invalidate_cache      = qcow2_invalidate_cache,
4372     .bdrv_inactivate            = qcow2_inactivate,
4373 
4374     .create_opts         = &qcow2_create_opts,
4375     .bdrv_check          = qcow2_check,
4376     .bdrv_amend_options  = qcow2_amend_options,
4377 
4378     .bdrv_detach_aio_context  = qcow2_detach_aio_context,
4379     .bdrv_attach_aio_context  = qcow2_attach_aio_context,
4380 
4381     .bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw,
4382     .bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap,
4383     .bdrv_remove_persistent_dirty_bitmap = qcow2_remove_persistent_dirty_bitmap,
4384 };
4385 
4386 static void bdrv_qcow2_init(void)
4387 {
4388     bdrv_register(&bdrv_qcow2);
4389 }
4390 
4391 block_init(bdrv_qcow2_init);
4392