xref: /openbmc/qemu/block/qcow2.c (revision 56c39a41adadfc567e1ac22089670bfde6b35365)
1  /*
2   * Block driver for the QCOW version 2 format
3   *
4   * Copyright (c) 2004-2006 Fabrice Bellard
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a copy
7   * of this software and associated documentation files (the "Software"), to deal
8   * in the Software without restriction, including without limitation the rights
9   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10   * copies of the Software, and to permit persons to whom the Software is
11   * furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22   * THE SOFTWARE.
23   */
24  
25  #include "qemu/osdep.h"
26  
27  #include "block/qdict.h"
28  #include "sysemu/block-backend.h"
29  #include "qemu/main-loop.h"
30  #include "qemu/module.h"
31  #include "qcow2.h"
32  #include "qemu/error-report.h"
33  #include "qapi/error.h"
34  #include "qapi/qapi-events-block-core.h"
35  #include "qapi/qmp/qdict.h"
36  #include "qapi/qmp/qstring.h"
37  #include "trace.h"
38  #include "qemu/option_int.h"
39  #include "qemu/cutils.h"
40  #include "qemu/bswap.h"
41  #include "qemu/memalign.h"
42  #include "qapi/qobject-input-visitor.h"
43  #include "qapi/qapi-visit-block-core.h"
44  #include "crypto.h"
45  #include "block/aio_task.h"
46  #include "block/dirty-bitmap.h"
47  
48  /*
49    Differences with QCOW:
50  
51    - Support for multiple incremental snapshots.
52    - Memory management by reference counts.
53    - Clusters which have a reference count of one have the bit
54      QCOW_OFLAG_COPIED to optimize write performance.
55    - Size of compressed clusters is stored in sectors to reduce bit usage
56      in the cluster offsets.
57    - Support for storing additional data (such as the VM state) in the
58      snapshots.
59    - If a backing store is used, the cluster size is not constrained
60      (could be backported to QCOW).
61    - L2 tables have always a size of one cluster.
62  */
63  
64  
65  typedef struct {
66      uint32_t magic;
67      uint32_t len;
68  } QEMU_PACKED QCowExtension;
69  
70  #define  QCOW2_EXT_MAGIC_END 0
71  #define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xe2792aca
72  #define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
73  #define  QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
74  #define  QCOW2_EXT_MAGIC_BITMAPS 0x23852875
75  #define  QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
76  
77  static int coroutine_fn
78  qcow2_co_preadv_compressed(BlockDriverState *bs,
79                             uint64_t l2_entry,
80                             uint64_t offset,
81                             uint64_t bytes,
82                             QEMUIOVector *qiov,
83                             size_t qiov_offset);
84  
85  static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
86  {
87      const QCowHeader *cow_header = (const void *)buf;
88  
89      if (buf_size >= sizeof(QCowHeader) &&
90          be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
91          be32_to_cpu(cow_header->version) >= 2)
92          return 100;
93      else
94          return 0;
95  }
96  
97  
98  static int qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
99                                        uint8_t *buf, size_t buflen,
100                                        void *opaque, Error **errp)
101  {
102      BlockDriverState *bs = opaque;
103      BDRVQcow2State *s = bs->opaque;
104      ssize_t ret;
105  
106      if ((offset + buflen) > s->crypto_header.length) {
107          error_setg(errp, "Request for data outside of extension header");
108          return -1;
109      }
110  
111      ret = bdrv_pread(bs->file, s->crypto_header.offset + offset, buflen, buf,
112                       0);
113      if (ret < 0) {
114          error_setg_errno(errp, -ret, "Could not read encryption header");
115          return -1;
116      }
117      return 0;
118  }
119  
120  
121  static int coroutine_fn GRAPH_RDLOCK
122  qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, void *opaque,
123                             Error **errp)
124  {
125      BlockDriverState *bs = opaque;
126      BDRVQcow2State *s = bs->opaque;
127      int64_t ret;
128      int64_t clusterlen;
129  
130      ret = qcow2_alloc_clusters(bs, headerlen);
131      if (ret < 0) {
132          error_setg_errno(errp, -ret,
133                           "Cannot allocate cluster for LUKS header size %zu",
134                           headerlen);
135          return -1;
136      }
137  
138      s->crypto_header.length = headerlen;
139      s->crypto_header.offset = ret;
140  
141      /*
142       * Zero fill all space in cluster so it has predictable
143       * content, as we may not initialize some regions of the
144       * header (eg only 1 out of 8 key slots will be initialized)
145       */
146      clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
147      assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0);
148      ret = bdrv_co_pwrite_zeroes(bs->file, ret, clusterlen, 0);
149      if (ret < 0) {
150          error_setg_errno(errp, -ret, "Could not zero fill encryption header");
151          return -1;
152      }
153  
154      return 0;
155  }
156  
157  
158  /* The graph lock must be held when called in coroutine context */
159  static int coroutine_mixed_fn
160  qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
161                              const uint8_t *buf, size_t buflen,
162                              void *opaque, Error **errp)
163  {
164      BlockDriverState *bs = opaque;
165      BDRVQcow2State *s = bs->opaque;
166      ssize_t ret;
167  
168      if ((offset + buflen) > s->crypto_header.length) {
169          error_setg(errp, "Request for data outside of extension header");
170          return -1;
171      }
172  
173      ret = bdrv_pwrite(bs->file, s->crypto_header.offset + offset, buflen, buf,
174                        0);
175      if (ret < 0) {
176          error_setg_errno(errp, -ret, "Could not read encryption header");
177          return -1;
178      }
179      return 0;
180  }
181  
182  static QDict*
183  qcow2_extract_crypto_opts(QemuOpts *opts, const char *fmt, Error **errp)
184  {
185      QDict *cryptoopts_qdict;
186      QDict *opts_qdict;
187  
188      /* Extract "encrypt." options into a qdict */
189      opts_qdict = qemu_opts_to_qdict(opts, NULL);
190      qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt.");
191      qobject_unref(opts_qdict);
192      qdict_put_str(cryptoopts_qdict, "format", fmt);
193      return cryptoopts_qdict;
194  }
195  
196  /*
197   * read qcow2 extension and fill bs
198   * start reading from start_offset
199   * finish reading upon magic of value 0 or when end_offset reached
200   * unknown magic is skipped (future extension this version knows nothing about)
201   * return 0 upon success, non-0 otherwise
202   */
203  static int coroutine_fn GRAPH_RDLOCK
204  qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
205                        uint64_t end_offset, void **p_feature_table,
206                        int flags, bool *need_update_header, Error **errp)
207  {
208      BDRVQcow2State *s = bs->opaque;
209      QCowExtension ext;
210      uint64_t offset;
211      int ret;
212      Qcow2BitmapHeaderExt bitmaps_ext;
213  
214      if (need_update_header != NULL) {
215          *need_update_header = false;
216      }
217  
218  #ifdef DEBUG_EXT
219      printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
220  #endif
221      offset = start_offset;
222      while (offset < end_offset) {
223  
224  #ifdef DEBUG_EXT
225          /* Sanity check */
226          if (offset > s->cluster_size)
227              printf("qcow2_read_extension: suspicious offset %lu\n", offset);
228  
229          printf("attempting to read extended header in offset %lu\n", offset);
230  #endif
231  
232          ret = bdrv_co_pread(bs->file, offset, sizeof(ext), &ext, 0);
233          if (ret < 0) {
234              error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
235                               "pread fail from offset %" PRIu64, offset);
236              return 1;
237          }
238          ext.magic = be32_to_cpu(ext.magic);
239          ext.len = be32_to_cpu(ext.len);
240          offset += sizeof(ext);
241  #ifdef DEBUG_EXT
242          printf("ext.magic = 0x%x\n", ext.magic);
243  #endif
244          if (offset > end_offset || ext.len > end_offset - offset) {
245              error_setg(errp, "Header extension too large");
246              return -EINVAL;
247          }
248  
249          switch (ext.magic) {
250          case QCOW2_EXT_MAGIC_END:
251              return 0;
252  
253          case QCOW2_EXT_MAGIC_BACKING_FORMAT:
254              if (ext.len >= sizeof(bs->backing_format)) {
255                  error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32
256                             " too large (>=%zu)", ext.len,
257                             sizeof(bs->backing_format));
258                  return 2;
259              }
260              ret = bdrv_co_pread(bs->file, offset, ext.len, bs->backing_format, 0);
261              if (ret < 0) {
262                  error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
263                                   "Could not read format name");
264                  return 3;
265              }
266              bs->backing_format[ext.len] = '\0';
267              s->image_backing_format = g_strdup(bs->backing_format);
268  #ifdef DEBUG_EXT
269              printf("Qcow2: Got format extension %s\n", bs->backing_format);
270  #endif
271              break;
272  
273          case QCOW2_EXT_MAGIC_FEATURE_TABLE:
274              if (p_feature_table != NULL) {
275                  void *feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
276                  ret = bdrv_co_pread(bs->file, offset, ext.len, feature_table, 0);
277                  if (ret < 0) {
278                      error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
279                                       "Could not read table");
280                      g_free(feature_table);
281                      return ret;
282                  }
283  
284                  *p_feature_table = feature_table;
285              }
286              break;
287  
288          case QCOW2_EXT_MAGIC_CRYPTO_HEADER: {
289              unsigned int cflags = 0;
290              if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
291                  error_setg(errp, "CRYPTO header extension only "
292                             "expected with LUKS encryption method");
293                  return -EINVAL;
294              }
295              if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) {
296                  error_setg(errp, "CRYPTO header extension size %u, "
297                             "but expected size %zu", ext.len,
298                             sizeof(Qcow2CryptoHeaderExtension));
299                  return -EINVAL;
300              }
301  
302              ret = bdrv_co_pread(bs->file, offset, ext.len, &s->crypto_header, 0);
303              if (ret < 0) {
304                  error_setg_errno(errp, -ret,
305                                   "Unable to read CRYPTO header extension");
306                  return ret;
307              }
308              s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
309              s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
310  
311              if ((s->crypto_header.offset % s->cluster_size) != 0) {
312                  error_setg(errp, "Encryption header offset '%" PRIu64 "' is "
313                             "not a multiple of cluster size '%u'",
314                             s->crypto_header.offset, s->cluster_size);
315                  return -EINVAL;
316              }
317  
318              if (flags & BDRV_O_NO_IO) {
319                  cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
320              }
321              s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
322                                             qcow2_crypto_hdr_read_func,
323                                             bs, cflags, QCOW2_MAX_THREADS, errp);
324              if (!s->crypto) {
325                  return -EINVAL;
326              }
327          }   break;
328  
329          case QCOW2_EXT_MAGIC_BITMAPS:
330              if (ext.len != sizeof(bitmaps_ext)) {
331                  error_setg_errno(errp, -ret, "bitmaps_ext: "
332                                   "Invalid extension length");
333                  return -EINVAL;
334              }
335  
336              if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) {
337                  if (s->qcow_version < 3) {
338                      /* Let's be a bit more specific */
339                      warn_report("This qcow2 v2 image contains bitmaps, but "
340                                  "they may have been modified by a program "
341                                  "without persistent bitmap support; so now "
342                                  "they must all be considered inconsistent");
343                  } else {
344                      warn_report("a program lacking bitmap support "
345                                  "modified this file, so all bitmaps are now "
346                                  "considered inconsistent");
347                  }
348                  error_printf("Some clusters may be leaked, "
349                               "run 'qemu-img check -r' on the image "
350                               "file to fix.");
351                  if (need_update_header != NULL) {
352                      /* Updating is needed to drop invalid bitmap extension. */
353                      *need_update_header = true;
354                  }
355                  break;
356              }
357  
358              ret = bdrv_co_pread(bs->file, offset, ext.len, &bitmaps_ext, 0);
359              if (ret < 0) {
360                  error_setg_errno(errp, -ret, "bitmaps_ext: "
361                                   "Could not read ext header");
362                  return ret;
363              }
364  
365              if (bitmaps_ext.reserved32 != 0) {
366                  error_setg_errno(errp, -ret, "bitmaps_ext: "
367                                   "Reserved field is not zero");
368                  return -EINVAL;
369              }
370  
371              bitmaps_ext.nb_bitmaps = be32_to_cpu(bitmaps_ext.nb_bitmaps);
372              bitmaps_ext.bitmap_directory_size =
373                  be64_to_cpu(bitmaps_ext.bitmap_directory_size);
374              bitmaps_ext.bitmap_directory_offset =
375                  be64_to_cpu(bitmaps_ext.bitmap_directory_offset);
376  
377              if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) {
378                  error_setg(errp,
379                             "bitmaps_ext: Image has %" PRIu32 " bitmaps, "
380                             "exceeding the QEMU supported maximum of %d",
381                             bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS);
382                  return -EINVAL;
383              }
384  
385              if (bitmaps_ext.nb_bitmaps == 0) {
386                  error_setg(errp, "found bitmaps extension with zero bitmaps");
387                  return -EINVAL;
388              }
389  
390              if (offset_into_cluster(s, bitmaps_ext.bitmap_directory_offset)) {
391                  error_setg(errp, "bitmaps_ext: "
392                                   "invalid bitmap directory offset");
393                  return -EINVAL;
394              }
395  
396              if (bitmaps_ext.bitmap_directory_size >
397                  QCOW2_MAX_BITMAP_DIRECTORY_SIZE) {
398                  error_setg(errp, "bitmaps_ext: "
399                                   "bitmap directory size (%" PRIu64 ") exceeds "
400                                   "the maximum supported size (%d)",
401                                   bitmaps_ext.bitmap_directory_size,
402                                   QCOW2_MAX_BITMAP_DIRECTORY_SIZE);
403                  return -EINVAL;
404              }
405  
406              s->nb_bitmaps = bitmaps_ext.nb_bitmaps;
407              s->bitmap_directory_offset =
408                      bitmaps_ext.bitmap_directory_offset;
409              s->bitmap_directory_size =
410                      bitmaps_ext.bitmap_directory_size;
411  
412  #ifdef DEBUG_EXT
413              printf("Qcow2: Got bitmaps extension: "
414                     "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
415                     s->bitmap_directory_offset, s->nb_bitmaps);
416  #endif
417              break;
418  
419          case QCOW2_EXT_MAGIC_DATA_FILE:
420          {
421              s->image_data_file = g_malloc0(ext.len + 1);
422              ret = bdrv_co_pread(bs->file, offset, ext.len, s->image_data_file, 0);
423              if (ret < 0) {
424                  error_setg_errno(errp, -ret,
425                                   "ERROR: Could not read data file name");
426                  return ret;
427              }
428  #ifdef DEBUG_EXT
429              printf("Qcow2: Got external data file %s\n", s->image_data_file);
430  #endif
431              break;
432          }
433  
434          default:
435              /* unknown magic - save it in case we need to rewrite the header */
436              /* If you add a new feature, make sure to also update the fast
437               * path of qcow2_make_empty() to deal with it. */
438              {
439                  Qcow2UnknownHeaderExtension *uext;
440  
441                  uext = g_malloc0(sizeof(*uext)  + ext.len);
442                  uext->magic = ext.magic;
443                  uext->len = ext.len;
444                  QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
445  
446                  ret = bdrv_co_pread(bs->file, offset, uext->len, uext->data, 0);
447                  if (ret < 0) {
448                      error_setg_errno(errp, -ret, "ERROR: unknown extension: "
449                                       "Could not read data");
450                      return ret;
451                  }
452              }
453              break;
454          }
455  
456          offset += ((ext.len + 7) & ~7);
457      }
458  
459      return 0;
460  }
461  
462  static void cleanup_unknown_header_ext(BlockDriverState *bs)
463  {
464      BDRVQcow2State *s = bs->opaque;
465      Qcow2UnknownHeaderExtension *uext, *next;
466  
467      QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
468          QLIST_REMOVE(uext, next);
469          g_free(uext);
470      }
471  }
472  
473  static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
474                                         uint64_t mask)
475  {
476      g_autoptr(GString) features = g_string_sized_new(60);
477  
478      while (table && table->name[0] != '\0') {
479          if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
480              if (mask & (1ULL << table->bit)) {
481                  if (features->len > 0) {
482                      g_string_append(features, ", ");
483                  }
484                  g_string_append_printf(features, "%.46s", table->name);
485                  mask &= ~(1ULL << table->bit);
486              }
487          }
488          table++;
489      }
490  
491      if (mask) {
492          if (features->len > 0) {
493              g_string_append(features, ", ");
494          }
495          g_string_append_printf(features,
496                                 "Unknown incompatible feature: %" PRIx64, mask);
497      }
498  
499      error_setg(errp, "Unsupported qcow2 feature(s): %s", features->str);
500  }
501  
502  /*
503   * Sets the dirty bit and flushes afterwards if necessary.
504   *
505   * The incompatible_features bit is only set if the image file header was
506   * updated successfully.  Therefore it is not required to check the return
507   * value of this function.
508   */
509  int qcow2_mark_dirty(BlockDriverState *bs)
510  {
511      BDRVQcow2State *s = bs->opaque;
512      uint64_t val;
513      int ret;
514  
515      assert(s->qcow_version >= 3);
516  
517      if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
518          return 0; /* already dirty */
519      }
520  
521      val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
522      ret = bdrv_pwrite_sync(bs->file,
523                             offsetof(QCowHeader, incompatible_features),
524                             sizeof(val), &val, 0);
525      if (ret < 0) {
526          return ret;
527      }
528  
529      /* Only treat image as dirty if the header was updated successfully */
530      s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
531      return 0;
532  }
533  
534  /*
535   * Clears the dirty bit and flushes before if necessary.  Only call this
536   * function when there are no pending requests, it does not guard against
537   * concurrent requests dirtying the image.
538   */
539  static int qcow2_mark_clean(BlockDriverState *bs)
540  {
541      BDRVQcow2State *s = bs->opaque;
542  
543      if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
544          int ret;
545  
546          s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
547  
548          ret = qcow2_flush_caches(bs);
549          if (ret < 0) {
550              return ret;
551          }
552  
553          return qcow2_update_header(bs);
554      }
555      return 0;
556  }
557  
558  /*
559   * Marks the image as corrupt.
560   */
561  int qcow2_mark_corrupt(BlockDriverState *bs)
562  {
563      BDRVQcow2State *s = bs->opaque;
564  
565      s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
566      return qcow2_update_header(bs);
567  }
568  
569  /*
570   * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes
571   * before if necessary.
572   */
573  static int coroutine_fn qcow2_mark_consistent(BlockDriverState *bs)
574  {
575      BDRVQcow2State *s = bs->opaque;
576  
577      if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
578          int ret = qcow2_flush_caches(bs);
579          if (ret < 0) {
580              return ret;
581          }
582  
583          s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
584          return qcow2_update_header(bs);
585      }
586      return 0;
587  }
588  
589  static void qcow2_add_check_result(BdrvCheckResult *out,
590                                     const BdrvCheckResult *src,
591                                     bool set_allocation_info)
592  {
593      out->corruptions += src->corruptions;
594      out->leaks += src->leaks;
595      out->check_errors += src->check_errors;
596      out->corruptions_fixed += src->corruptions_fixed;
597      out->leaks_fixed += src->leaks_fixed;
598  
599      if (set_allocation_info) {
600          out->image_end_offset = src->image_end_offset;
601          out->bfi = src->bfi;
602      }
603  }
604  
605  static int coroutine_fn GRAPH_RDLOCK
606  qcow2_co_check_locked(BlockDriverState *bs, BdrvCheckResult *result,
607                        BdrvCheckMode fix)
608  {
609      BdrvCheckResult snapshot_res = {};
610      BdrvCheckResult refcount_res = {};
611      int ret;
612  
613      memset(result, 0, sizeof(*result));
614  
615      ret = qcow2_check_read_snapshot_table(bs, &snapshot_res, fix);
616      if (ret < 0) {
617          qcow2_add_check_result(result, &snapshot_res, false);
618          return ret;
619      }
620  
621      ret = qcow2_check_refcounts(bs, &refcount_res, fix);
622      qcow2_add_check_result(result, &refcount_res, true);
623      if (ret < 0) {
624          qcow2_add_check_result(result, &snapshot_res, false);
625          return ret;
626      }
627  
628      ret = qcow2_check_fix_snapshot_table(bs, &snapshot_res, fix);
629      qcow2_add_check_result(result, &snapshot_res, false);
630      if (ret < 0) {
631          return ret;
632      }
633  
634      if (fix && result->check_errors == 0 && result->corruptions == 0) {
635          ret = qcow2_mark_clean(bs);
636          if (ret < 0) {
637              return ret;
638          }
639          return qcow2_mark_consistent(bs);
640      }
641      return ret;
642  }
643  
644  static int coroutine_fn GRAPH_RDLOCK
645  qcow2_co_check(BlockDriverState *bs, BdrvCheckResult *result,
646                 BdrvCheckMode fix)
647  {
648      BDRVQcow2State *s = bs->opaque;
649      int ret;
650  
651      qemu_co_mutex_lock(&s->lock);
652      ret = qcow2_co_check_locked(bs, result, fix);
653      qemu_co_mutex_unlock(&s->lock);
654      return ret;
655  }
656  
657  int qcow2_validate_table(BlockDriverState *bs, uint64_t offset,
658                           uint64_t entries, size_t entry_len,
659                           int64_t max_size_bytes, const char *table_name,
660                           Error **errp)
661  {
662      BDRVQcow2State *s = bs->opaque;
663  
664      if (entries > max_size_bytes / entry_len) {
665          error_setg(errp, "%s too large", table_name);
666          return -EFBIG;
667      }
668  
669      /* Use signed INT64_MAX as the maximum even for uint64_t header fields,
670       * because values will be passed to qemu functions taking int64_t. */
671      if ((INT64_MAX - entries * entry_len < offset) ||
672          (offset_into_cluster(s, offset) != 0)) {
673          error_setg(errp, "%s offset invalid", table_name);
674          return -EINVAL;
675      }
676  
677      return 0;
678  }
679  
680  static const char *const mutable_opts[] = {
681      QCOW2_OPT_LAZY_REFCOUNTS,
682      QCOW2_OPT_DISCARD_REQUEST,
683      QCOW2_OPT_DISCARD_SNAPSHOT,
684      QCOW2_OPT_DISCARD_OTHER,
685      QCOW2_OPT_DISCARD_NO_UNREF,
686      QCOW2_OPT_OVERLAP,
687      QCOW2_OPT_OVERLAP_TEMPLATE,
688      QCOW2_OPT_OVERLAP_MAIN_HEADER,
689      QCOW2_OPT_OVERLAP_ACTIVE_L1,
690      QCOW2_OPT_OVERLAP_ACTIVE_L2,
691      QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
692      QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
693      QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
694      QCOW2_OPT_OVERLAP_INACTIVE_L1,
695      QCOW2_OPT_OVERLAP_INACTIVE_L2,
696      QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
697      QCOW2_OPT_CACHE_SIZE,
698      QCOW2_OPT_L2_CACHE_SIZE,
699      QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
700      QCOW2_OPT_REFCOUNT_CACHE_SIZE,
701      QCOW2_OPT_CACHE_CLEAN_INTERVAL,
702      NULL
703  };
704  
705  static QemuOptsList qcow2_runtime_opts = {
706      .name = "qcow2",
707      .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
708      .desc = {
709          {
710              .name = QCOW2_OPT_LAZY_REFCOUNTS,
711              .type = QEMU_OPT_BOOL,
712              .help = "Postpone refcount updates",
713          },
714          {
715              .name = QCOW2_OPT_DISCARD_REQUEST,
716              .type = QEMU_OPT_BOOL,
717              .help = "Pass guest discard requests to the layer below",
718          },
719          {
720              .name = QCOW2_OPT_DISCARD_SNAPSHOT,
721              .type = QEMU_OPT_BOOL,
722              .help = "Generate discard requests when snapshot related space "
723                      "is freed",
724          },
725          {
726              .name = QCOW2_OPT_DISCARD_OTHER,
727              .type = QEMU_OPT_BOOL,
728              .help = "Generate discard requests when other clusters are freed",
729          },
730          {
731              .name = QCOW2_OPT_DISCARD_NO_UNREF,
732              .type = QEMU_OPT_BOOL,
733              .help = "Do not unreference discarded clusters",
734          },
735          {
736              .name = QCOW2_OPT_OVERLAP,
737              .type = QEMU_OPT_STRING,
738              .help = "Selects which overlap checks to perform from a range of "
739                      "templates (none, constant, cached, all)",
740          },
741          {
742              .name = QCOW2_OPT_OVERLAP_TEMPLATE,
743              .type = QEMU_OPT_STRING,
744              .help = "Selects which overlap checks to perform from a range of "
745                      "templates (none, constant, cached, all)",
746          },
747          {
748              .name = QCOW2_OPT_OVERLAP_MAIN_HEADER,
749              .type = QEMU_OPT_BOOL,
750              .help = "Check for unintended writes into the main qcow2 header",
751          },
752          {
753              .name = QCOW2_OPT_OVERLAP_ACTIVE_L1,
754              .type = QEMU_OPT_BOOL,
755              .help = "Check for unintended writes into the active L1 table",
756          },
757          {
758              .name = QCOW2_OPT_OVERLAP_ACTIVE_L2,
759              .type = QEMU_OPT_BOOL,
760              .help = "Check for unintended writes into an active L2 table",
761          },
762          {
763              .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
764              .type = QEMU_OPT_BOOL,
765              .help = "Check for unintended writes into the refcount table",
766          },
767          {
768              .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
769              .type = QEMU_OPT_BOOL,
770              .help = "Check for unintended writes into a refcount block",
771          },
772          {
773              .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
774              .type = QEMU_OPT_BOOL,
775              .help = "Check for unintended writes into the snapshot table",
776          },
777          {
778              .name = QCOW2_OPT_OVERLAP_INACTIVE_L1,
779              .type = QEMU_OPT_BOOL,
780              .help = "Check for unintended writes into an inactive L1 table",
781          },
782          {
783              .name = QCOW2_OPT_OVERLAP_INACTIVE_L2,
784              .type = QEMU_OPT_BOOL,
785              .help = "Check for unintended writes into an inactive L2 table",
786          },
787          {
788              .name = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
789              .type = QEMU_OPT_BOOL,
790              .help = "Check for unintended writes into the bitmap directory",
791          },
792          {
793              .name = QCOW2_OPT_CACHE_SIZE,
794              .type = QEMU_OPT_SIZE,
795              .help = "Maximum combined metadata (L2 tables and refcount blocks) "
796                      "cache size",
797          },
798          {
799              .name = QCOW2_OPT_L2_CACHE_SIZE,
800              .type = QEMU_OPT_SIZE,
801              .help = "Maximum L2 table cache size",
802          },
803          {
804              .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
805              .type = QEMU_OPT_SIZE,
806              .help = "Size of each entry in the L2 cache",
807          },
808          {
809              .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE,
810              .type = QEMU_OPT_SIZE,
811              .help = "Maximum refcount block cache size",
812          },
813          {
814              .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
815              .type = QEMU_OPT_NUMBER,
816              .help = "Clean unused cache entries after this time (in seconds)",
817          },
818          BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
819              "ID of secret providing qcow2 AES key or LUKS passphrase"),
820          { /* end of list */ }
821      },
822  };
823  
824  static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
825      [QCOW2_OL_MAIN_HEADER_BITNR]      = QCOW2_OPT_OVERLAP_MAIN_HEADER,
826      [QCOW2_OL_ACTIVE_L1_BITNR]        = QCOW2_OPT_OVERLAP_ACTIVE_L1,
827      [QCOW2_OL_ACTIVE_L2_BITNR]        = QCOW2_OPT_OVERLAP_ACTIVE_L2,
828      [QCOW2_OL_REFCOUNT_TABLE_BITNR]   = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
829      [QCOW2_OL_REFCOUNT_BLOCK_BITNR]   = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
830      [QCOW2_OL_SNAPSHOT_TABLE_BITNR]   = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
831      [QCOW2_OL_INACTIVE_L1_BITNR]      = QCOW2_OPT_OVERLAP_INACTIVE_L1,
832      [QCOW2_OL_INACTIVE_L2_BITNR]      = QCOW2_OPT_OVERLAP_INACTIVE_L2,
833      [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
834  };
835  
836  static void cache_clean_timer_cb(void *opaque)
837  {
838      BlockDriverState *bs = opaque;
839      BDRVQcow2State *s = bs->opaque;
840      qcow2_cache_clean_unused(s->l2_table_cache);
841      qcow2_cache_clean_unused(s->refcount_block_cache);
842      timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
843                (int64_t) s->cache_clean_interval * 1000);
844  }
845  
846  static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
847  {
848      BDRVQcow2State *s = bs->opaque;
849      if (s->cache_clean_interval > 0) {
850          s->cache_clean_timer =
851              aio_timer_new_with_attrs(context, QEMU_CLOCK_VIRTUAL,
852                                       SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL,
853                                       cache_clean_timer_cb, bs);
854          timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
855                    (int64_t) s->cache_clean_interval * 1000);
856      }
857  }
858  
859  static void cache_clean_timer_del(BlockDriverState *bs)
860  {
861      BDRVQcow2State *s = bs->opaque;
862      if (s->cache_clean_timer) {
863          timer_free(s->cache_clean_timer);
864          s->cache_clean_timer = NULL;
865      }
866  }
867  
868  static void qcow2_detach_aio_context(BlockDriverState *bs)
869  {
870      cache_clean_timer_del(bs);
871  }
872  
873  static void qcow2_attach_aio_context(BlockDriverState *bs,
874                                       AioContext *new_context)
875  {
876      cache_clean_timer_init(bs, new_context);
877  }
878  
879  static bool read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
880                               uint64_t *l2_cache_size,
881                               uint64_t *l2_cache_entry_size,
882                               uint64_t *refcount_cache_size, Error **errp)
883  {
884      BDRVQcow2State *s = bs->opaque;
885      uint64_t combined_cache_size, l2_cache_max_setting;
886      bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
887      bool l2_cache_entry_size_set;
888      int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
889      uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
890      uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size);
891      /* An L2 table is always one cluster in size so the max cache size
892       * should be a multiple of the cluster size. */
893      uint64_t max_l2_cache = ROUND_UP(max_l2_entries * l2_entry_size(s),
894                                       s->cluster_size);
895  
896      combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
897      l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
898      refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
899      l2_cache_entry_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE);
900  
901      combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0);
902      l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE,
903                                               DEFAULT_L2_CACHE_MAX_SIZE);
904      *refcount_cache_size = qemu_opt_get_size(opts,
905                                               QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);
906  
907      *l2_cache_entry_size = qemu_opt_get_size(
908          opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size);
909  
910      *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting);
911  
912      if (combined_cache_size_set) {
913          if (l2_cache_size_set && refcount_cache_size_set) {
914              error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
915                         " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set "
916                         "at the same time");
917              return false;
918          } else if (l2_cache_size_set &&
919                     (l2_cache_max_setting > combined_cache_size)) {
920              error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed "
921                         QCOW2_OPT_CACHE_SIZE);
922              return false;
923          } else if (*refcount_cache_size > combined_cache_size) {
924              error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed "
925                         QCOW2_OPT_CACHE_SIZE);
926              return false;
927          }
928  
929          if (l2_cache_size_set) {
930              *refcount_cache_size = combined_cache_size - *l2_cache_size;
931          } else if (refcount_cache_size_set) {
932              *l2_cache_size = combined_cache_size - *refcount_cache_size;
933          } else {
934              /* Assign as much memory as possible to the L2 cache, and
935               * use the remainder for the refcount cache */
936              if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
937                  *l2_cache_size = max_l2_cache;
938                  *refcount_cache_size = combined_cache_size - *l2_cache_size;
939              } else {
940                  *refcount_cache_size =
941                      MIN(combined_cache_size, min_refcount_cache);
942                  *l2_cache_size = combined_cache_size - *refcount_cache_size;
943              }
944          }
945      }
946  
947      /*
948       * If the L2 cache is not enough to cover the whole disk then
949       * default to 4KB entries. Smaller entries reduce the cost of
950       * loads and evictions and increase I/O performance.
951       */
952      if (*l2_cache_size < max_l2_cache && !l2_cache_entry_size_set) {
953          *l2_cache_entry_size = MIN(s->cluster_size, 4096);
954      }
955  
956      /* l2_cache_size and refcount_cache_size are ensured to have at least
957       * their minimum values in qcow2_update_options_prepare() */
958  
959      if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) ||
960          *l2_cache_entry_size > s->cluster_size ||
961          !is_power_of_2(*l2_cache_entry_size)) {
962          error_setg(errp, "L2 cache entry size must be a power of two "
963                     "between %d and the cluster size (%d)",
964                     1 << MIN_CLUSTER_BITS, s->cluster_size);
965          return false;
966      }
967  
968      return true;
969  }
970  
971  typedef struct Qcow2ReopenState {
972      Qcow2Cache *l2_table_cache;
973      Qcow2Cache *refcount_block_cache;
974      int l2_slice_size; /* Number of entries in a slice of the L2 table */
975      bool use_lazy_refcounts;
976      int overlap_check;
977      bool discard_passthrough[QCOW2_DISCARD_MAX];
978      bool discard_no_unref;
979      uint64_t cache_clean_interval;
980      QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
981  } Qcow2ReopenState;
982  
983  static int qcow2_update_options_prepare(BlockDriverState *bs,
984                                          Qcow2ReopenState *r,
985                                          QDict *options, int flags,
986                                          Error **errp)
987  {
988      BDRVQcow2State *s = bs->opaque;
989      QemuOpts *opts = NULL;
990      const char *opt_overlap_check, *opt_overlap_check_template;
991      int overlap_check_template = 0;
992      uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size;
993      int i;
994      const char *encryptfmt;
995      QDict *encryptopts = NULL;
996      int ret;
997  
998      qdict_extract_subqdict(options, &encryptopts, "encrypt.");
999      encryptfmt = qdict_get_try_str(encryptopts, "format");
1000  
1001      opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
1002      if (!qemu_opts_absorb_qdict(opts, options, errp)) {
1003          ret = -EINVAL;
1004          goto fail;
1005      }
1006  
1007      /* get L2 table/refcount block cache size from command line options */
1008      if (!read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size,
1009                            &refcount_cache_size, errp)) {
1010          ret = -EINVAL;
1011          goto fail;
1012      }
1013  
1014      l2_cache_size /= l2_cache_entry_size;
1015      if (l2_cache_size < MIN_L2_CACHE_SIZE) {
1016          l2_cache_size = MIN_L2_CACHE_SIZE;
1017      }
1018      if (l2_cache_size > INT_MAX) {
1019          error_setg(errp, "L2 cache size too big");
1020          ret = -EINVAL;
1021          goto fail;
1022      }
1023  
1024      refcount_cache_size /= s->cluster_size;
1025      if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
1026          refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
1027      }
1028      if (refcount_cache_size > INT_MAX) {
1029          error_setg(errp, "Refcount cache size too big");
1030          ret = -EINVAL;
1031          goto fail;
1032      }
1033  
1034      /* alloc new L2 table/refcount block cache, flush old one */
1035      if (s->l2_table_cache) {
1036          ret = qcow2_cache_flush(bs, s->l2_table_cache);
1037          if (ret) {
1038              error_setg_errno(errp, -ret, "Failed to flush the L2 table cache");
1039              goto fail;
1040          }
1041      }
1042  
1043      if (s->refcount_block_cache) {
1044          ret = qcow2_cache_flush(bs, s->refcount_block_cache);
1045          if (ret) {
1046              error_setg_errno(errp, -ret,
1047                               "Failed to flush the refcount block cache");
1048              goto fail;
1049          }
1050      }
1051  
1052      r->l2_slice_size = l2_cache_entry_size / l2_entry_size(s);
1053      r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size,
1054                                             l2_cache_entry_size);
1055      r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size,
1056                                                   s->cluster_size);
1057      if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
1058          error_setg(errp, "Could not allocate metadata caches");
1059          ret = -ENOMEM;
1060          goto fail;
1061      }
1062  
1063      /* New interval for cache cleanup timer */
1064      r->cache_clean_interval =
1065          qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
1066                              DEFAULT_CACHE_CLEAN_INTERVAL);
1067  #ifndef CONFIG_LINUX
1068      if (r->cache_clean_interval != 0) {
1069          error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL
1070                     " not supported on this host");
1071          ret = -EINVAL;
1072          goto fail;
1073      }
1074  #endif
1075      if (r->cache_clean_interval > UINT_MAX) {
1076          error_setg(errp, "Cache clean interval too big");
1077          ret = -EINVAL;
1078          goto fail;
1079      }
1080  
1081      /* lazy-refcounts; flush if going from enabled to disabled */
1082      r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
1083          (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
1084      if (r->use_lazy_refcounts && s->qcow_version < 3) {
1085          error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
1086                     "qemu 1.1 compatibility level");
1087          ret = -EINVAL;
1088          goto fail;
1089      }
1090  
1091      if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
1092          ret = qcow2_mark_clean(bs);
1093          if (ret < 0) {
1094              error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
1095              goto fail;
1096          }
1097      }
1098  
1099      /* Overlap check options */
1100      opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
1101      opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
1102      if (opt_overlap_check_template && opt_overlap_check &&
1103          strcmp(opt_overlap_check_template, opt_overlap_check))
1104      {
1105          error_setg(errp, "Conflicting values for qcow2 options '"
1106                     QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
1107                     "' ('%s')", opt_overlap_check, opt_overlap_check_template);
1108          ret = -EINVAL;
1109          goto fail;
1110      }
1111      if (!opt_overlap_check) {
1112          opt_overlap_check = opt_overlap_check_template ?: "cached";
1113      }
1114  
1115      if (!strcmp(opt_overlap_check, "none")) {
1116          overlap_check_template = 0;
1117      } else if (!strcmp(opt_overlap_check, "constant")) {
1118          overlap_check_template = QCOW2_OL_CONSTANT;
1119      } else if (!strcmp(opt_overlap_check, "cached")) {
1120          overlap_check_template = QCOW2_OL_CACHED;
1121      } else if (!strcmp(opt_overlap_check, "all")) {
1122          overlap_check_template = QCOW2_OL_ALL;
1123      } else {
1124          error_setg(errp, "Unsupported value '%s' for qcow2 option "
1125                     "'overlap-check'. Allowed are any of the following: "
1126                     "none, constant, cached, all", opt_overlap_check);
1127          ret = -EINVAL;
1128          goto fail;
1129      }
1130  
1131      r->overlap_check = 0;
1132      for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
1133          /* overlap-check defines a template bitmask, but every flag may be
1134           * overwritten through the associated boolean option */
1135          r->overlap_check |=
1136              qemu_opt_get_bool(opts, overlap_bool_option_names[i],
1137                                overlap_check_template & (1 << i)) << i;
1138      }
1139  
1140      r->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
1141      r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
1142      r->discard_passthrough[QCOW2_DISCARD_REQUEST] =
1143          qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
1144                            flags & BDRV_O_UNMAP);
1145      r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
1146          qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
1147      r->discard_passthrough[QCOW2_DISCARD_OTHER] =
1148          qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
1149  
1150      r->discard_no_unref = qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_NO_UNREF,
1151                                              false);
1152      if (r->discard_no_unref && s->qcow_version < 3) {
1153          error_setg(errp,
1154                     "discard-no-unref is only supported since qcow2 version 3");
1155          ret = -EINVAL;
1156          goto fail;
1157      }
1158  
1159      switch (s->crypt_method_header) {
1160      case QCOW_CRYPT_NONE:
1161          if (encryptfmt) {
1162              error_setg(errp, "No encryption in image header, but options "
1163                         "specified format '%s'", encryptfmt);
1164              ret = -EINVAL;
1165              goto fail;
1166          }
1167          break;
1168  
1169      case QCOW_CRYPT_AES:
1170          if (encryptfmt && !g_str_equal(encryptfmt, "aes")) {
1171              error_setg(errp,
1172                         "Header reported 'aes' encryption format but "
1173                         "options specify '%s'", encryptfmt);
1174              ret = -EINVAL;
1175              goto fail;
1176          }
1177          qdict_put_str(encryptopts, "format", "qcow");
1178          r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
1179          if (!r->crypto_opts) {
1180              ret = -EINVAL;
1181              goto fail;
1182          }
1183          break;
1184  
1185      case QCOW_CRYPT_LUKS:
1186          if (encryptfmt && !g_str_equal(encryptfmt, "luks")) {
1187              error_setg(errp,
1188                         "Header reported 'luks' encryption format but "
1189                         "options specify '%s'", encryptfmt);
1190              ret = -EINVAL;
1191              goto fail;
1192          }
1193          qdict_put_str(encryptopts, "format", "luks");
1194          r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
1195          if (!r->crypto_opts) {
1196              ret = -EINVAL;
1197              goto fail;
1198          }
1199          break;
1200  
1201      default:
1202          error_setg(errp, "Unsupported encryption method %d",
1203                     s->crypt_method_header);
1204          ret = -EINVAL;
1205          goto fail;
1206      }
1207  
1208      ret = 0;
1209  fail:
1210      qobject_unref(encryptopts);
1211      qemu_opts_del(opts);
1212      opts = NULL;
1213      return ret;
1214  }
1215  
1216  static void qcow2_update_options_commit(BlockDriverState *bs,
1217                                          Qcow2ReopenState *r)
1218  {
1219      BDRVQcow2State *s = bs->opaque;
1220      int i;
1221  
1222      if (s->l2_table_cache) {
1223          qcow2_cache_destroy(s->l2_table_cache);
1224      }
1225      if (s->refcount_block_cache) {
1226          qcow2_cache_destroy(s->refcount_block_cache);
1227      }
1228      s->l2_table_cache = r->l2_table_cache;
1229      s->refcount_block_cache = r->refcount_block_cache;
1230      s->l2_slice_size = r->l2_slice_size;
1231  
1232      s->overlap_check = r->overlap_check;
1233      s->use_lazy_refcounts = r->use_lazy_refcounts;
1234  
1235      for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
1236          s->discard_passthrough[i] = r->discard_passthrough[i];
1237      }
1238  
1239      s->discard_no_unref = r->discard_no_unref;
1240  
1241      if (s->cache_clean_interval != r->cache_clean_interval) {
1242          cache_clean_timer_del(bs);
1243          s->cache_clean_interval = r->cache_clean_interval;
1244          cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
1245      }
1246  
1247      qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1248      s->crypto_opts = r->crypto_opts;
1249  }
1250  
1251  static void qcow2_update_options_abort(BlockDriverState *bs,
1252                                         Qcow2ReopenState *r)
1253  {
1254      if (r->l2_table_cache) {
1255          qcow2_cache_destroy(r->l2_table_cache);
1256      }
1257      if (r->refcount_block_cache) {
1258          qcow2_cache_destroy(r->refcount_block_cache);
1259      }
1260      qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
1261  }
1262  
1263  static int coroutine_fn
1264  qcow2_update_options(BlockDriverState *bs, QDict *options, int flags,
1265                       Error **errp)
1266  {
1267      Qcow2ReopenState r = {};
1268      int ret;
1269  
1270      ret = qcow2_update_options_prepare(bs, &r, options, flags, errp);
1271      if (ret >= 0) {
1272          qcow2_update_options_commit(bs, &r);
1273      } else {
1274          qcow2_update_options_abort(bs, &r);
1275      }
1276  
1277      return ret;
1278  }
1279  
1280  static int validate_compression_type(BDRVQcow2State *s, Error **errp)
1281  {
1282      switch (s->compression_type) {
1283      case QCOW2_COMPRESSION_TYPE_ZLIB:
1284  #ifdef CONFIG_ZSTD
1285      case QCOW2_COMPRESSION_TYPE_ZSTD:
1286  #endif
1287          break;
1288  
1289      default:
1290          error_setg(errp, "qcow2: unknown compression type: %u",
1291                     s->compression_type);
1292          return -ENOTSUP;
1293      }
1294  
1295      /*
1296       * if the compression type differs from QCOW2_COMPRESSION_TYPE_ZLIB
1297       * the incompatible feature flag must be set
1298       */
1299      if (s->compression_type == QCOW2_COMPRESSION_TYPE_ZLIB) {
1300          if (s->incompatible_features & QCOW2_INCOMPAT_COMPRESSION) {
1301              error_setg(errp, "qcow2: Compression type incompatible feature "
1302                               "bit must not be set");
1303              return -EINVAL;
1304          }
1305      } else {
1306          if (!(s->incompatible_features & QCOW2_INCOMPAT_COMPRESSION)) {
1307              error_setg(errp, "qcow2: Compression type incompatible feature "
1308                               "bit must be set");
1309              return -EINVAL;
1310          }
1311      }
1312  
1313      return 0;
1314  }
1315  
1316  /* Called with s->lock held.  */
1317  static int coroutine_fn GRAPH_RDLOCK
1318  qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
1319                bool open_data_file, Error **errp)
1320  {
1321      ERRP_GUARD();
1322      BDRVQcow2State *s = bs->opaque;
1323      unsigned int len, i;
1324      int ret = 0;
1325      QCowHeader header;
1326      uint64_t ext_end;
1327      uint64_t l1_vm_state_index;
1328      bool update_header = false;
1329  
1330      ret = bdrv_co_pread(bs->file, 0, sizeof(header), &header, 0);
1331      if (ret < 0) {
1332          error_setg_errno(errp, -ret, "Could not read qcow2 header");
1333          goto fail;
1334      }
1335      header.magic = be32_to_cpu(header.magic);
1336      header.version = be32_to_cpu(header.version);
1337      header.backing_file_offset = be64_to_cpu(header.backing_file_offset);
1338      header.backing_file_size = be32_to_cpu(header.backing_file_size);
1339      header.size = be64_to_cpu(header.size);
1340      header.cluster_bits = be32_to_cpu(header.cluster_bits);
1341      header.crypt_method = be32_to_cpu(header.crypt_method);
1342      header.l1_table_offset = be64_to_cpu(header.l1_table_offset);
1343      header.l1_size = be32_to_cpu(header.l1_size);
1344      header.refcount_table_offset = be64_to_cpu(header.refcount_table_offset);
1345      header.refcount_table_clusters =
1346          be32_to_cpu(header.refcount_table_clusters);
1347      header.snapshots_offset = be64_to_cpu(header.snapshots_offset);
1348      header.nb_snapshots = be32_to_cpu(header.nb_snapshots);
1349  
1350      if (header.magic != QCOW_MAGIC) {
1351          error_setg(errp, "Image is not in qcow2 format");
1352          ret = -EINVAL;
1353          goto fail;
1354      }
1355      if (header.version < 2 || header.version > 3) {
1356          error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
1357          ret = -ENOTSUP;
1358          goto fail;
1359      }
1360  
1361      s->qcow_version = header.version;
1362  
1363      /* Initialise cluster size */
1364      if (header.cluster_bits < MIN_CLUSTER_BITS ||
1365          header.cluster_bits > MAX_CLUSTER_BITS) {
1366          error_setg(errp, "Unsupported cluster size: 2^%" PRIu32,
1367                     header.cluster_bits);
1368          ret = -EINVAL;
1369          goto fail;
1370      }
1371  
1372      s->cluster_bits = header.cluster_bits;
1373      s->cluster_size = 1 << s->cluster_bits;
1374  
1375      /* Initialise version 3 header fields */
1376      if (header.version == 2) {
1377          header.incompatible_features    = 0;
1378          header.compatible_features      = 0;
1379          header.autoclear_features       = 0;
1380          header.refcount_order           = 4;
1381          header.header_length            = 72;
1382      } else {
1383          header.incompatible_features =
1384              be64_to_cpu(header.incompatible_features);
1385          header.compatible_features = be64_to_cpu(header.compatible_features);
1386          header.autoclear_features = be64_to_cpu(header.autoclear_features);
1387          header.refcount_order = be32_to_cpu(header.refcount_order);
1388          header.header_length = be32_to_cpu(header.header_length);
1389  
1390          if (header.header_length < 104) {
1391              error_setg(errp, "qcow2 header too short");
1392              ret = -EINVAL;
1393              goto fail;
1394          }
1395      }
1396  
1397      if (header.header_length > s->cluster_size) {
1398          error_setg(errp, "qcow2 header exceeds cluster size");
1399          ret = -EINVAL;
1400          goto fail;
1401      }
1402  
1403      if (header.header_length > sizeof(header)) {
1404          s->unknown_header_fields_size = header.header_length - sizeof(header);
1405          s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
1406          ret = bdrv_co_pread(bs->file, sizeof(header),
1407                              s->unknown_header_fields_size,
1408                              s->unknown_header_fields, 0);
1409          if (ret < 0) {
1410              error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
1411                               "fields");
1412              goto fail;
1413          }
1414      }
1415  
1416      if (header.backing_file_offset > s->cluster_size) {
1417          error_setg(errp, "Invalid backing file offset");
1418          ret = -EINVAL;
1419          goto fail;
1420      }
1421  
1422      if (header.backing_file_offset) {
1423          ext_end = header.backing_file_offset;
1424      } else {
1425          ext_end = 1 << header.cluster_bits;
1426      }
1427  
1428      /* Handle feature bits */
1429      s->incompatible_features    = header.incompatible_features;
1430      s->compatible_features      = header.compatible_features;
1431      s->autoclear_features       = header.autoclear_features;
1432  
1433      /*
1434       * Handle compression type
1435       * Older qcow2 images don't contain the compression type header.
1436       * Distinguish them by the header length and use
1437       * the only valid (default) compression type in that case
1438       */
1439      if (header.header_length > offsetof(QCowHeader, compression_type)) {
1440          s->compression_type = header.compression_type;
1441      } else {
1442          s->compression_type = QCOW2_COMPRESSION_TYPE_ZLIB;
1443      }
1444  
1445      ret = validate_compression_type(s, errp);
1446      if (ret) {
1447          goto fail;
1448      }
1449  
1450      if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
1451          void *feature_table = NULL;
1452          qcow2_read_extensions(bs, header.header_length, ext_end,
1453                                &feature_table, flags, NULL, NULL);
1454          report_unsupported_feature(errp, feature_table,
1455                                     s->incompatible_features &
1456                                     ~QCOW2_INCOMPAT_MASK);
1457          ret = -ENOTSUP;
1458          g_free(feature_table);
1459          goto fail;
1460      }
1461  
1462      if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
1463          /* Corrupt images may not be written to unless they are being repaired
1464           */
1465          if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
1466              error_setg(errp, "qcow2: Image is corrupt; cannot be opened "
1467                         "read/write");
1468              ret = -EACCES;
1469              goto fail;
1470          }
1471      }
1472  
1473      s->subclusters_per_cluster =
1474          has_subclusters(s) ? QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER : 1;
1475      s->subcluster_size = s->cluster_size / s->subclusters_per_cluster;
1476      s->subcluster_bits = ctz32(s->subcluster_size);
1477  
1478      if (s->subcluster_size < (1 << MIN_CLUSTER_BITS)) {
1479          error_setg(errp, "Unsupported subcluster size: %d", s->subcluster_size);
1480          ret = -EINVAL;
1481          goto fail;
1482      }
1483  
1484      /* Check support for various header values */
1485      if (header.refcount_order > 6) {
1486          error_setg(errp, "Reference count entry width too large; may not "
1487                     "exceed 64 bits");
1488          ret = -EINVAL;
1489          goto fail;
1490      }
1491      s->refcount_order = header.refcount_order;
1492      s->refcount_bits = 1 << s->refcount_order;
1493      s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
1494      s->refcount_max += s->refcount_max - 1;
1495  
1496      s->crypt_method_header = header.crypt_method;
1497      if (s->crypt_method_header) {
1498          if (bdrv_uses_whitelist() &&
1499              s->crypt_method_header == QCOW_CRYPT_AES) {
1500              error_setg(errp,
1501                         "Use of AES-CBC encrypted qcow2 images is no longer "
1502                         "supported in system emulators");
1503              error_append_hint(errp,
1504                                "You can use 'qemu-img convert' to convert your "
1505                                "image to an alternative supported format, such "
1506                                "as unencrypted qcow2, or raw with the LUKS "
1507                                "format instead.\n");
1508              ret = -ENOSYS;
1509              goto fail;
1510          }
1511  
1512          if (s->crypt_method_header == QCOW_CRYPT_AES) {
1513              s->crypt_physical_offset = false;
1514          } else {
1515              /* Assuming LUKS and any future crypt methods we
1516               * add will all use physical offsets, due to the
1517               * fact that the alternative is insecure...  */
1518              s->crypt_physical_offset = true;
1519          }
1520  
1521          bs->encrypted = true;
1522      }
1523  
1524      s->l2_bits = s->cluster_bits - ctz32(l2_entry_size(s));
1525      s->l2_size = 1 << s->l2_bits;
1526      /* 2^(s->refcount_order - 3) is the refcount width in bytes */
1527      s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3);
1528      s->refcount_block_size = 1 << s->refcount_block_bits;
1529      bs->total_sectors = header.size / BDRV_SECTOR_SIZE;
1530      s->csize_shift = (62 - (s->cluster_bits - 8));
1531      s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
1532      s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
1533  
1534      s->refcount_table_offset = header.refcount_table_offset;
1535      s->refcount_table_size =
1536          header.refcount_table_clusters << (s->cluster_bits - 3);
1537  
1538      if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) {
1539          error_setg(errp, "Image does not contain a reference count table");
1540          ret = -EINVAL;
1541          goto fail;
1542      }
1543  
1544      ret = qcow2_validate_table(bs, s->refcount_table_offset,
1545                                 header.refcount_table_clusters,
1546                                 s->cluster_size, QCOW_MAX_REFTABLE_SIZE,
1547                                 "Reference count table", errp);
1548      if (ret < 0) {
1549          goto fail;
1550      }
1551  
1552      if (!(flags & BDRV_O_CHECK)) {
1553          /*
1554           * The total size in bytes of the snapshot table is checked in
1555           * qcow2_read_snapshots() because the size of each snapshot is
1556           * variable and we don't know it yet.
1557           * Here we only check the offset and number of snapshots.
1558           */
1559          ret = qcow2_validate_table(bs, header.snapshots_offset,
1560                                     header.nb_snapshots,
1561                                     sizeof(QCowSnapshotHeader),
1562                                     sizeof(QCowSnapshotHeader) *
1563                                         QCOW_MAX_SNAPSHOTS,
1564                                     "Snapshot table", errp);
1565          if (ret < 0) {
1566              goto fail;
1567          }
1568      }
1569  
1570      /* read the level 1 table */
1571      ret = qcow2_validate_table(bs, header.l1_table_offset,
1572                                 header.l1_size, L1E_SIZE,
1573                                 QCOW_MAX_L1_SIZE, "Active L1 table", errp);
1574      if (ret < 0) {
1575          goto fail;
1576      }
1577      s->l1_size = header.l1_size;
1578      s->l1_table_offset = header.l1_table_offset;
1579  
1580      l1_vm_state_index = size_to_l1(s, header.size);
1581      if (l1_vm_state_index > INT_MAX) {
1582          error_setg(errp, "Image is too big");
1583          ret = -EFBIG;
1584          goto fail;
1585      }
1586      s->l1_vm_state_index = l1_vm_state_index;
1587  
1588      /* the L1 table must contain at least enough entries to put
1589         header.size bytes */
1590      if (s->l1_size < s->l1_vm_state_index) {
1591          error_setg(errp, "L1 table is too small");
1592          ret = -EINVAL;
1593          goto fail;
1594      }
1595  
1596      if (s->l1_size > 0) {
1597          s->l1_table = qemu_try_blockalign(bs->file->bs, s->l1_size * L1E_SIZE);
1598          if (s->l1_table == NULL) {
1599              error_setg(errp, "Could not allocate L1 table");
1600              ret = -ENOMEM;
1601              goto fail;
1602          }
1603          ret = bdrv_co_pread(bs->file, s->l1_table_offset, s->l1_size * L1E_SIZE,
1604                              s->l1_table, 0);
1605          if (ret < 0) {
1606              error_setg_errno(errp, -ret, "Could not read L1 table");
1607              goto fail;
1608          }
1609          for(i = 0;i < s->l1_size; i++) {
1610              s->l1_table[i] = be64_to_cpu(s->l1_table[i]);
1611          }
1612      }
1613  
1614      /* Parse driver-specific options */
1615      ret = qcow2_update_options(bs, options, flags, errp);
1616      if (ret < 0) {
1617          goto fail;
1618      }
1619  
1620      s->flags = flags;
1621  
1622      ret = qcow2_refcount_init(bs);
1623      if (ret != 0) {
1624          error_setg_errno(errp, -ret, "Could not initialize refcount handling");
1625          goto fail;
1626      }
1627  
1628      QLIST_INIT(&s->cluster_allocs);
1629      QTAILQ_INIT(&s->discards);
1630  
1631      /* read qcow2 extensions */
1632      if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL,
1633                                flags, &update_header, errp)) {
1634          ret = -EINVAL;
1635          goto fail;
1636      }
1637  
1638      if (open_data_file) {
1639          /* Open external data file */
1640          bdrv_graph_co_rdunlock();
1641          s->data_file = bdrv_co_open_child(NULL, options, "data-file", bs,
1642                                            &child_of_bds, BDRV_CHILD_DATA,
1643                                            true, errp);
1644          bdrv_graph_co_rdlock();
1645          if (*errp) {
1646              ret = -EINVAL;
1647              goto fail;
1648          }
1649  
1650          if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
1651              if (!s->data_file && s->image_data_file) {
1652                  bdrv_graph_co_rdunlock();
1653                  s->data_file = bdrv_co_open_child(s->image_data_file, options,
1654                                                    "data-file", bs,
1655                                                    &child_of_bds,
1656                                                    BDRV_CHILD_DATA, false, errp);
1657                  bdrv_graph_co_rdlock();
1658                  if (!s->data_file) {
1659                      ret = -EINVAL;
1660                      goto fail;
1661                  }
1662              }
1663              if (!s->data_file) {
1664                  error_setg(errp, "'data-file' is required for this image");
1665                  ret = -EINVAL;
1666                  goto fail;
1667              }
1668  
1669              /* No data here */
1670              bs->file->role &= ~BDRV_CHILD_DATA;
1671  
1672              /* Must succeed because we have given up permissions if anything */
1673              bdrv_child_refresh_perms(bs, bs->file, &error_abort);
1674          } else {
1675              if (s->data_file) {
1676                  error_setg(errp, "'data-file' can only be set for images with "
1677                                   "an external data file");
1678                  ret = -EINVAL;
1679                  goto fail;
1680              }
1681  
1682              s->data_file = bs->file;
1683  
1684              if (data_file_is_raw(bs)) {
1685                  error_setg(errp, "data-file-raw requires a data file");
1686                  ret = -EINVAL;
1687                  goto fail;
1688              }
1689          }
1690      }
1691  
1692      /* qcow2_read_extension may have set up the crypto context
1693       * if the crypt method needs a header region, some methods
1694       * don't need header extensions, so must check here
1695       */
1696      if (s->crypt_method_header && !s->crypto) {
1697          if (s->crypt_method_header == QCOW_CRYPT_AES) {
1698              unsigned int cflags = 0;
1699              if (flags & BDRV_O_NO_IO) {
1700                  cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
1701              }
1702              s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
1703                                             NULL, NULL, cflags,
1704                                             QCOW2_MAX_THREADS, errp);
1705              if (!s->crypto) {
1706                  ret = -EINVAL;
1707                  goto fail;
1708              }
1709          } else if (!(flags & BDRV_O_NO_IO)) {
1710              error_setg(errp, "Missing CRYPTO header for crypt method %d",
1711                         s->crypt_method_header);
1712              ret = -EINVAL;
1713              goto fail;
1714          }
1715      }
1716  
1717      /* read the backing file name */
1718      if (header.backing_file_offset != 0) {
1719          len = header.backing_file_size;
1720          if (len > MIN(1023, s->cluster_size - header.backing_file_offset) ||
1721              len >= sizeof(bs->backing_file)) {
1722              error_setg(errp, "Backing file name too long");
1723              ret = -EINVAL;
1724              goto fail;
1725          }
1726  
1727          s->image_backing_file = g_malloc(len + 1);
1728          ret = bdrv_co_pread(bs->file, header.backing_file_offset, len,
1729                              s->image_backing_file, 0);
1730          if (ret < 0) {
1731              error_setg_errno(errp, -ret, "Could not read backing file name");
1732              goto fail;
1733          }
1734          s->image_backing_file[len] = '\0';
1735  
1736          /*
1737           * Update only when something has changed.  This function is called by
1738           * qcow2_co_invalidate_cache(), and we do not want to reset
1739           * auto_backing_file unless necessary.
1740           */
1741          if (!g_str_equal(s->image_backing_file, bs->backing_file)) {
1742              pstrcpy(bs->backing_file, sizeof(bs->backing_file),
1743                      s->image_backing_file);
1744              pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
1745                      s->image_backing_file);
1746          }
1747      }
1748  
1749      /*
1750       * Internal snapshots; skip reading them in check mode, because
1751       * we do not need them then, and we do not want to abort because
1752       * of a broken table.
1753       */
1754      if (!(flags & BDRV_O_CHECK)) {
1755          s->snapshots_offset = header.snapshots_offset;
1756          s->nb_snapshots = header.nb_snapshots;
1757  
1758          ret = qcow2_read_snapshots(bs, errp);
1759          if (ret < 0) {
1760              goto fail;
1761          }
1762      }
1763  
1764      /* Clear unknown autoclear feature bits */
1765      update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
1766      update_header = update_header && bdrv_is_writable(bs);
1767      if (update_header) {
1768          s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
1769      }
1770  
1771      /* == Handle persistent dirty bitmaps ==
1772       *
1773       * We want load dirty bitmaps in three cases:
1774       *
1775       * 1. Normal open of the disk in active mode, not related to invalidation
1776       *    after migration.
1777       *
1778       * 2. Invalidation of the target vm after pre-copy phase of migration, if
1779       *    bitmaps are _not_ migrating through migration channel, i.e.
1780       *    'dirty-bitmaps' capability is disabled.
1781       *
1782       * 3. Invalidation of source vm after failed or canceled migration.
1783       *    This is a very interesting case. There are two possible types of
1784       *    bitmaps:
1785       *
1786       *    A. Stored on inactivation and removed. They should be loaded from the
1787       *       image.
1788       *
1789       *    B. Not stored: not-persistent bitmaps and bitmaps, migrated through
1790       *       the migration channel (with dirty-bitmaps capability).
1791       *
1792       *    On the other hand, there are two possible sub-cases:
1793       *
1794       *    3.1 disk was changed by somebody else while were inactive. In this
1795       *        case all in-RAM dirty bitmaps (both persistent and not) are
1796       *        definitely invalid. And we don't have any method to determine
1797       *        this.
1798       *
1799       *        Simple and safe thing is to just drop all the bitmaps of type B on
1800       *        inactivation. But in this case we lose bitmaps in valid 4.2 case.
1801       *
1802       *        On the other hand, resuming source vm, if disk was already changed
1803       *        is a bad thing anyway: not only bitmaps, the whole vm state is
1804       *        out of sync with disk.
1805       *
1806       *        This means, that user or management tool, who for some reason
1807       *        decided to resume source vm, after disk was already changed by
1808       *        target vm, should at least drop all dirty bitmaps by hand.
1809       *
1810       *        So, we can ignore this case for now, but TODO: "generation"
1811       *        extension for qcow2, to determine, that image was changed after
1812       *        last inactivation. And if it is changed, we will drop (or at least
1813       *        mark as 'invalid' all the bitmaps of type B, both persistent
1814       *        and not).
1815       *
1816       *    3.2 disk was _not_ changed while were inactive. Bitmaps may be saved
1817       *        to disk ('dirty-bitmaps' capability disabled), or not saved
1818       *        ('dirty-bitmaps' capability enabled), but we don't need to care
1819       *        of: let's load bitmaps as always: stored bitmaps will be loaded,
1820       *        and not stored has flag IN_USE=1 in the image and will be skipped
1821       *        on loading.
1822       *
1823       * One remaining possible case when we don't want load bitmaps:
1824       *
1825       * 4. Open disk in inactive mode in target vm (bitmaps are migrating or
1826       *    will be loaded on invalidation, no needs try loading them before)
1827       */
1828  
1829      if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) {
1830          /* It's case 1, 2 or 3.2. Or 3.1 which is BUG in management layer. */
1831          bool header_updated;
1832          if (!qcow2_load_dirty_bitmaps(bs, &header_updated, errp)) {
1833              ret = -EINVAL;
1834              goto fail;
1835          }
1836  
1837          update_header = update_header && !header_updated;
1838      }
1839  
1840      if (update_header) {
1841          ret = qcow2_update_header(bs);
1842          if (ret < 0) {
1843              error_setg_errno(errp, -ret, "Could not update qcow2 header");
1844              goto fail;
1845          }
1846      }
1847  
1848      bs->supported_zero_flags = header.version >= 3 ?
1849                                 BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK : 0;
1850      bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
1851  
1852      /* Repair image if dirty */
1853      if (!(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) &&
1854          (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
1855          BdrvCheckResult result = {0};
1856  
1857          ret = qcow2_co_check_locked(bs, &result,
1858                                      BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
1859          if (ret < 0 || result.check_errors) {
1860              if (ret >= 0) {
1861                  ret = -EIO;
1862              }
1863              error_setg_errno(errp, -ret, "Could not repair dirty image");
1864              goto fail;
1865          }
1866      }
1867  
1868  #ifdef DEBUG_ALLOC
1869      {
1870          BdrvCheckResult result = {0};
1871          qcow2_check_refcounts(bs, &result, 0);
1872      }
1873  #endif
1874  
1875      qemu_co_queue_init(&s->thread_task_queue);
1876  
1877      return ret;
1878  
1879   fail:
1880      g_free(s->image_data_file);
1881      if (open_data_file && has_data_file(bs)) {
1882          bdrv_graph_co_rdunlock();
1883          bdrv_unref_child(bs, s->data_file);
1884          bdrv_graph_co_rdlock();
1885          s->data_file = NULL;
1886      }
1887      g_free(s->unknown_header_fields);
1888      cleanup_unknown_header_ext(bs);
1889      qcow2_free_snapshots(bs);
1890      qcow2_refcount_close(bs);
1891      qemu_vfree(s->l1_table);
1892      /* else pre-write overlap checks in cache_destroy may crash */
1893      s->l1_table = NULL;
1894      cache_clean_timer_del(bs);
1895      if (s->l2_table_cache) {
1896          qcow2_cache_destroy(s->l2_table_cache);
1897      }
1898      if (s->refcount_block_cache) {
1899          qcow2_cache_destroy(s->refcount_block_cache);
1900      }
1901      qcrypto_block_free(s->crypto);
1902      qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1903      return ret;
1904  }
1905  
1906  typedef struct QCow2OpenCo {
1907      BlockDriverState *bs;
1908      QDict *options;
1909      int flags;
1910      Error **errp;
1911      int ret;
1912  } QCow2OpenCo;
1913  
1914  static void coroutine_fn qcow2_open_entry(void *opaque)
1915  {
1916      QCow2OpenCo *qoc = opaque;
1917      BDRVQcow2State *s = qoc->bs->opaque;
1918  
1919      GRAPH_RDLOCK_GUARD();
1920  
1921      qemu_co_mutex_lock(&s->lock);
1922      qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, true,
1923                               qoc->errp);
1924      qemu_co_mutex_unlock(&s->lock);
1925  
1926      aio_wait_kick();
1927  }
1928  
1929  static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
1930                        Error **errp)
1931  {
1932      BDRVQcow2State *s = bs->opaque;
1933      QCow2OpenCo qoc = {
1934          .bs = bs,
1935          .options = options,
1936          .flags = flags,
1937          .errp = errp,
1938          .ret = -EINPROGRESS
1939      };
1940      int ret;
1941  
1942      ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
1943      if (ret < 0) {
1944          return ret;
1945      }
1946  
1947      /* Initialise locks */
1948      qemu_co_mutex_init(&s->lock);
1949  
1950      assert(!qemu_in_coroutine());
1951      assert(qemu_get_current_aio_context() == qemu_get_aio_context());
1952  
1953      aio_co_enter(bdrv_get_aio_context(bs),
1954                   qemu_coroutine_create(qcow2_open_entry, &qoc));
1955      AIO_WAIT_WHILE_UNLOCKED(NULL, qoc.ret == -EINPROGRESS);
1956  
1957      return qoc.ret;
1958  }
1959  
1960  static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
1961  {
1962      BDRVQcow2State *s = bs->opaque;
1963  
1964      if (bs->encrypted) {
1965          /* Encryption works on a sector granularity */
1966          bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto);
1967      }
1968      bs->bl.pwrite_zeroes_alignment = s->subcluster_size;
1969      bs->bl.pdiscard_alignment = s->cluster_size;
1970  }
1971  
1972  static int qcow2_reopen_prepare(BDRVReopenState *state,
1973                                  BlockReopenQueue *queue, Error **errp)
1974  {
1975      BDRVQcow2State *s = state->bs->opaque;
1976      Qcow2ReopenState *r;
1977      int ret;
1978  
1979      r = g_new0(Qcow2ReopenState, 1);
1980      state->opaque = r;
1981  
1982      ret = qcow2_update_options_prepare(state->bs, r, state->options,
1983                                         state->flags, errp);
1984      if (ret < 0) {
1985          goto fail;
1986      }
1987  
1988      /* We need to write out any unwritten data if we reopen read-only. */
1989      if ((state->flags & BDRV_O_RDWR) == 0) {
1990          ret = qcow2_reopen_bitmaps_ro(state->bs, errp);
1991          if (ret < 0) {
1992              goto fail;
1993          }
1994  
1995          ret = bdrv_flush(state->bs);
1996          if (ret < 0) {
1997              goto fail;
1998          }
1999  
2000          ret = qcow2_mark_clean(state->bs);
2001          if (ret < 0) {
2002              goto fail;
2003          }
2004      }
2005  
2006      /*
2007       * Without an external data file, s->data_file points to the same BdrvChild
2008       * as bs->file. It needs to be resynced after reopen because bs->file may
2009       * be changed. We can't use it in the meantime.
2010       */
2011      if (!has_data_file(state->bs)) {
2012          assert(s->data_file == state->bs->file);
2013          s->data_file = NULL;
2014      }
2015  
2016      return 0;
2017  
2018  fail:
2019      qcow2_update_options_abort(state->bs, r);
2020      g_free(r);
2021      return ret;
2022  }
2023  
2024  static void qcow2_reopen_commit(BDRVReopenState *state)
2025  {
2026      BDRVQcow2State *s = state->bs->opaque;
2027  
2028      qcow2_update_options_commit(state->bs, state->opaque);
2029      if (!s->data_file) {
2030          /*
2031           * If we don't have an external data file, s->data_file was cleared by
2032           * qcow2_reopen_prepare() and needs to be updated.
2033           */
2034          s->data_file = state->bs->file;
2035      }
2036      g_free(state->opaque);
2037  }
2038  
2039  static void qcow2_reopen_commit_post(BDRVReopenState *state)
2040  {
2041      if (state->flags & BDRV_O_RDWR) {
2042          Error *local_err = NULL;
2043  
2044          if (qcow2_reopen_bitmaps_rw(state->bs, &local_err) < 0) {
2045              /*
2046               * This is not fatal, bitmaps just left read-only, so all following
2047               * writes will fail. User can remove read-only bitmaps to unblock
2048               * writes or retry reopen.
2049               */
2050              error_reportf_err(local_err,
2051                                "%s: Failed to make dirty bitmaps writable: ",
2052                                bdrv_get_node_name(state->bs));
2053          }
2054      }
2055  }
2056  
2057  static void qcow2_reopen_abort(BDRVReopenState *state)
2058  {
2059      BDRVQcow2State *s = state->bs->opaque;
2060  
2061      if (!s->data_file) {
2062          /*
2063           * If we don't have an external data file, s->data_file was cleared by
2064           * qcow2_reopen_prepare() and needs to be restored.
2065           */
2066          s->data_file = state->bs->file;
2067      }
2068      qcow2_update_options_abort(state->bs, state->opaque);
2069      g_free(state->opaque);
2070  }
2071  
2072  static void qcow2_join_options(QDict *options, QDict *old_options)
2073  {
2074      bool has_new_overlap_template =
2075          qdict_haskey(options, QCOW2_OPT_OVERLAP) ||
2076          qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE);
2077      bool has_new_total_cache_size =
2078          qdict_haskey(options, QCOW2_OPT_CACHE_SIZE);
2079      bool has_all_cache_options;
2080  
2081      /* New overlap template overrides all old overlap options */
2082      if (has_new_overlap_template) {
2083          qdict_del(old_options, QCOW2_OPT_OVERLAP);
2084          qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE);
2085          qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER);
2086          qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1);
2087          qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2);
2088          qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE);
2089          qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK);
2090          qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE);
2091          qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1);
2092          qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2);
2093      }
2094  
2095      /* New total cache size overrides all old options */
2096      if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) {
2097          qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE);
2098          qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
2099      }
2100  
2101      qdict_join(options, old_options, false);
2102  
2103      /*
2104       * If after merging all cache size options are set, an old total size is
2105       * overwritten. Do keep all options, however, if all three are new. The
2106       * resulting error message is what we want to happen.
2107       */
2108      has_all_cache_options =
2109          qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) ||
2110          qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) ||
2111          qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
2112  
2113      if (has_all_cache_options && !has_new_total_cache_size) {
2114          qdict_del(options, QCOW2_OPT_CACHE_SIZE);
2115      }
2116  }
2117  
2118  static int coroutine_fn GRAPH_RDLOCK
2119  qcow2_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
2120                        int64_t count, int64_t *pnum, int64_t *map,
2121                        BlockDriverState **file)
2122  {
2123      BDRVQcow2State *s = bs->opaque;
2124      uint64_t host_offset;
2125      unsigned int bytes;
2126      QCow2SubclusterType type;
2127      int ret, status = 0;
2128  
2129      qemu_co_mutex_lock(&s->lock);
2130  
2131      if (!s->metadata_preallocation_checked) {
2132          ret = qcow2_detect_metadata_preallocation(bs);
2133          s->metadata_preallocation = (ret == 1);
2134          s->metadata_preallocation_checked = true;
2135      }
2136  
2137      bytes = MIN(INT_MAX, count);
2138      ret = qcow2_get_host_offset(bs, offset, &bytes, &host_offset, &type);
2139      qemu_co_mutex_unlock(&s->lock);
2140      if (ret < 0) {
2141          return ret;
2142      }
2143  
2144      *pnum = bytes;
2145  
2146      if ((type == QCOW2_SUBCLUSTER_NORMAL ||
2147           type == QCOW2_SUBCLUSTER_ZERO_ALLOC ||
2148           type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC) && !s->crypto) {
2149          *map = host_offset;
2150          *file = s->data_file->bs;
2151          status |= BDRV_BLOCK_OFFSET_VALID;
2152      }
2153      if (type == QCOW2_SUBCLUSTER_ZERO_PLAIN ||
2154          type == QCOW2_SUBCLUSTER_ZERO_ALLOC) {
2155          status |= BDRV_BLOCK_ZERO;
2156      } else if (type != QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN &&
2157                 type != QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC) {
2158          status |= BDRV_BLOCK_DATA;
2159      }
2160      if (s->metadata_preallocation && (status & BDRV_BLOCK_DATA) &&
2161          (status & BDRV_BLOCK_OFFSET_VALID))
2162      {
2163          status |= BDRV_BLOCK_RECURSE;
2164      }
2165      return status;
2166  }
2167  
2168  static int coroutine_fn GRAPH_RDLOCK
2169  qcow2_handle_l2meta(BlockDriverState *bs, QCowL2Meta **pl2meta, bool link_l2)
2170  {
2171      int ret = 0;
2172      QCowL2Meta *l2meta = *pl2meta;
2173  
2174      while (l2meta != NULL) {
2175          QCowL2Meta *next;
2176  
2177          if (link_l2) {
2178              ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
2179              if (ret) {
2180                  goto out;
2181              }
2182          } else {
2183              qcow2_alloc_cluster_abort(bs, l2meta);
2184          }
2185  
2186          /* Take the request off the list of running requests */
2187          QLIST_REMOVE(l2meta, next_in_flight);
2188  
2189          qemu_co_queue_restart_all(&l2meta->dependent_requests);
2190  
2191          next = l2meta->next;
2192          g_free(l2meta);
2193          l2meta = next;
2194      }
2195  out:
2196      *pl2meta = l2meta;
2197      return ret;
2198  }
2199  
2200  static int coroutine_fn GRAPH_RDLOCK
2201  qcow2_co_preadv_encrypted(BlockDriverState *bs,
2202                             uint64_t host_offset,
2203                             uint64_t offset,
2204                             uint64_t bytes,
2205                             QEMUIOVector *qiov,
2206                             uint64_t qiov_offset)
2207  {
2208      int ret;
2209      BDRVQcow2State *s = bs->opaque;
2210      uint8_t *buf;
2211  
2212      assert(bs->encrypted && s->crypto);
2213      assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2214  
2215      /*
2216       * For encrypted images, read everything into a temporary
2217       * contiguous buffer on which the AES functions can work.
2218       * Also, decryption in a separate buffer is better as it
2219       * prevents the guest from learning information about the
2220       * encrypted nature of the virtual disk.
2221       */
2222  
2223      buf = qemu_try_blockalign(s->data_file->bs, bytes);
2224      if (buf == NULL) {
2225          return -ENOMEM;
2226      }
2227  
2228      BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_AIO);
2229      ret = bdrv_co_pread(s->data_file, host_offset, bytes, buf, 0);
2230      if (ret < 0) {
2231          goto fail;
2232      }
2233  
2234      if (qcow2_co_decrypt(bs, host_offset, offset, buf, bytes) < 0)
2235      {
2236          ret = -EIO;
2237          goto fail;
2238      }
2239      qemu_iovec_from_buf(qiov, qiov_offset, buf, bytes);
2240  
2241  fail:
2242      qemu_vfree(buf);
2243  
2244      return ret;
2245  }
2246  
2247  typedef struct Qcow2AioTask {
2248      AioTask task;
2249  
2250      BlockDriverState *bs;
2251      QCow2SubclusterType subcluster_type; /* only for read */
2252      uint64_t host_offset; /* or l2_entry for compressed read */
2253      uint64_t offset;
2254      uint64_t bytes;
2255      QEMUIOVector *qiov;
2256      uint64_t qiov_offset;
2257      QCowL2Meta *l2meta; /* only for write */
2258  } Qcow2AioTask;
2259  
2260  static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task);
2261  static coroutine_fn int qcow2_add_task(BlockDriverState *bs,
2262                                         AioTaskPool *pool,
2263                                         AioTaskFunc func,
2264                                         QCow2SubclusterType subcluster_type,
2265                                         uint64_t host_offset,
2266                                         uint64_t offset,
2267                                         uint64_t bytes,
2268                                         QEMUIOVector *qiov,
2269                                         size_t qiov_offset,
2270                                         QCowL2Meta *l2meta)
2271  {
2272      Qcow2AioTask local_task;
2273      Qcow2AioTask *task = pool ? g_new(Qcow2AioTask, 1) : &local_task;
2274  
2275      *task = (Qcow2AioTask) {
2276          .task.func = func,
2277          .bs = bs,
2278          .subcluster_type = subcluster_type,
2279          .qiov = qiov,
2280          .host_offset = host_offset,
2281          .offset = offset,
2282          .bytes = bytes,
2283          .qiov_offset = qiov_offset,
2284          .l2meta = l2meta,
2285      };
2286  
2287      trace_qcow2_add_task(qemu_coroutine_self(), bs, pool,
2288                           func == qcow2_co_preadv_task_entry ? "read" : "write",
2289                           subcluster_type, host_offset, offset, bytes,
2290                           qiov, qiov_offset);
2291  
2292      if (!pool) {
2293          return func(&task->task);
2294      }
2295  
2296      aio_task_pool_start_task(pool, &task->task);
2297  
2298      return 0;
2299  }
2300  
2301  static int coroutine_fn GRAPH_RDLOCK
2302  qcow2_co_preadv_task(BlockDriverState *bs, QCow2SubclusterType subc_type,
2303                       uint64_t host_offset, uint64_t offset, uint64_t bytes,
2304                       QEMUIOVector *qiov, size_t qiov_offset)
2305  {
2306      BDRVQcow2State *s = bs->opaque;
2307  
2308      switch (subc_type) {
2309      case QCOW2_SUBCLUSTER_ZERO_PLAIN:
2310      case QCOW2_SUBCLUSTER_ZERO_ALLOC:
2311          /* Both zero types are handled in qcow2_co_preadv_part */
2312          g_assert_not_reached();
2313  
2314      case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
2315      case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
2316          assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */
2317  
2318          BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
2319          return bdrv_co_preadv_part(bs->backing, offset, bytes,
2320                                     qiov, qiov_offset, 0);
2321  
2322      case QCOW2_SUBCLUSTER_COMPRESSED:
2323          return qcow2_co_preadv_compressed(bs, host_offset,
2324                                            offset, bytes, qiov, qiov_offset);
2325  
2326      case QCOW2_SUBCLUSTER_NORMAL:
2327          if (bs->encrypted) {
2328              return qcow2_co_preadv_encrypted(bs, host_offset,
2329                                               offset, bytes, qiov, qiov_offset);
2330          }
2331  
2332          BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_AIO);
2333          return bdrv_co_preadv_part(s->data_file, host_offset,
2334                                     bytes, qiov, qiov_offset, 0);
2335  
2336      default:
2337          g_assert_not_reached();
2338      }
2339  
2340      g_assert_not_reached();
2341  }
2342  
2343  /*
2344   * This function can count as GRAPH_RDLOCK because qcow2_co_preadv_part() holds
2345   * the graph lock and keeps it until this coroutine has terminated.
2346   */
2347  static int coroutine_fn GRAPH_RDLOCK qcow2_co_preadv_task_entry(AioTask *task)
2348  {
2349      Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
2350  
2351      assert(!t->l2meta);
2352  
2353      return qcow2_co_preadv_task(t->bs, t->subcluster_type,
2354                                  t->host_offset, t->offset, t->bytes,
2355                                  t->qiov, t->qiov_offset);
2356  }
2357  
2358  static int coroutine_fn GRAPH_RDLOCK
2359  qcow2_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
2360                       QEMUIOVector *qiov, size_t qiov_offset,
2361                       BdrvRequestFlags flags)
2362  {
2363      BDRVQcow2State *s = bs->opaque;
2364      int ret = 0;
2365      unsigned int cur_bytes; /* number of bytes in current iteration */
2366      uint64_t host_offset = 0;
2367      QCow2SubclusterType type;
2368      AioTaskPool *aio = NULL;
2369  
2370      while (bytes != 0 && aio_task_pool_status(aio) == 0) {
2371          /* prepare next request */
2372          cur_bytes = MIN(bytes, INT_MAX);
2373          if (s->crypto) {
2374              cur_bytes = MIN(cur_bytes,
2375                              QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2376          }
2377  
2378          qemu_co_mutex_lock(&s->lock);
2379          ret = qcow2_get_host_offset(bs, offset, &cur_bytes,
2380                                      &host_offset, &type);
2381          qemu_co_mutex_unlock(&s->lock);
2382          if (ret < 0) {
2383              goto out;
2384          }
2385  
2386          if (type == QCOW2_SUBCLUSTER_ZERO_PLAIN ||
2387              type == QCOW2_SUBCLUSTER_ZERO_ALLOC ||
2388              (type == QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN && !bs->backing) ||
2389              (type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC && !bs->backing))
2390          {
2391              qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
2392          } else {
2393              if (!aio && cur_bytes != bytes) {
2394                  aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
2395              }
2396              ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, type,
2397                                   host_offset, offset, cur_bytes,
2398                                   qiov, qiov_offset, NULL);
2399              if (ret < 0) {
2400                  goto out;
2401              }
2402          }
2403  
2404          bytes -= cur_bytes;
2405          offset += cur_bytes;
2406          qiov_offset += cur_bytes;
2407      }
2408  
2409  out:
2410      if (aio) {
2411          aio_task_pool_wait_all(aio);
2412          if (ret == 0) {
2413              ret = aio_task_pool_status(aio);
2414          }
2415          g_free(aio);
2416      }
2417  
2418      return ret;
2419  }
2420  
2421  /* Check if it's possible to merge a write request with the writing of
2422   * the data from the COW regions */
2423  static bool merge_cow(uint64_t offset, unsigned bytes,
2424                        QEMUIOVector *qiov, size_t qiov_offset,
2425                        QCowL2Meta *l2meta)
2426  {
2427      QCowL2Meta *m;
2428  
2429      for (m = l2meta; m != NULL; m = m->next) {
2430          /* If both COW regions are empty then there's nothing to merge */
2431          if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) {
2432              continue;
2433          }
2434  
2435          /* If COW regions are handled already, skip this too */
2436          if (m->skip_cow) {
2437              continue;
2438          }
2439  
2440          /*
2441           * The write request should start immediately after the first
2442           * COW region. This does not always happen because the area
2443           * touched by the request can be larger than the one defined
2444           * by @m (a single request can span an area consisting of a
2445           * mix of previously unallocated and allocated clusters, that
2446           * is why @l2meta is a list).
2447           */
2448          if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
2449              /* In this case the request starts before this region */
2450              assert(offset < l2meta_cow_start(m));
2451              assert(m->cow_start.nb_bytes == 0);
2452              continue;
2453          }
2454  
2455          /* The write request should end immediately before the second
2456           * COW region (see above for why it does not always happen) */
2457          if (m->offset + m->cow_end.offset != offset + bytes) {
2458              assert(offset + bytes > m->offset + m->cow_end.offset);
2459              assert(m->cow_end.nb_bytes == 0);
2460              continue;
2461          }
2462  
2463          /* Make sure that adding both COW regions to the QEMUIOVector
2464           * does not exceed IOV_MAX */
2465          if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) {
2466              continue;
2467          }
2468  
2469          m->data_qiov = qiov;
2470          m->data_qiov_offset = qiov_offset;
2471          return true;
2472      }
2473  
2474      return false;
2475  }
2476  
2477  /*
2478   * Return 1 if the COW regions read as zeroes, 0 if not, < 0 on error.
2479   * Note that returning 0 does not guarantee non-zero data.
2480   */
2481  static int coroutine_fn GRAPH_RDLOCK
2482  is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
2483  {
2484      /*
2485       * This check is designed for optimization shortcut so it must be
2486       * efficient.
2487       * Instead of is_zero(), use bdrv_co_is_zero_fast() as it is
2488       * faster (but not as accurate and can result in false negatives).
2489       */
2490      int ret = bdrv_co_is_zero_fast(bs, m->offset + m->cow_start.offset,
2491                                     m->cow_start.nb_bytes);
2492      if (ret <= 0) {
2493          return ret;
2494      }
2495  
2496      return bdrv_co_is_zero_fast(bs, m->offset + m->cow_end.offset,
2497                                  m->cow_end.nb_bytes);
2498  }
2499  
2500  static int coroutine_fn GRAPH_RDLOCK
2501  handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
2502  {
2503      BDRVQcow2State *s = bs->opaque;
2504      QCowL2Meta *m;
2505  
2506      if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) {
2507          return 0;
2508      }
2509  
2510      if (bs->encrypted) {
2511          return 0;
2512      }
2513  
2514      for (m = l2meta; m != NULL; m = m->next) {
2515          int ret;
2516          uint64_t start_offset = m->alloc_offset + m->cow_start.offset;
2517          unsigned nb_bytes = m->cow_end.offset + m->cow_end.nb_bytes -
2518              m->cow_start.offset;
2519  
2520          if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) {
2521              continue;
2522          }
2523  
2524          ret = is_zero_cow(bs, m);
2525          if (ret < 0) {
2526              return ret;
2527          } else if (ret == 0) {
2528              continue;
2529          }
2530  
2531          /*
2532           * instead of writing zero COW buffers,
2533           * efficiently zero out the whole clusters
2534           */
2535  
2536          ret = qcow2_pre_write_overlap_check(bs, 0, start_offset, nb_bytes,
2537                                              true);
2538          if (ret < 0) {
2539              return ret;
2540          }
2541  
2542          BLKDBG_CO_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
2543          ret = bdrv_co_pwrite_zeroes(s->data_file, start_offset, nb_bytes,
2544                                      BDRV_REQ_NO_FALLBACK);
2545          if (ret < 0) {
2546              if (ret != -ENOTSUP && ret != -EAGAIN) {
2547                  return ret;
2548              }
2549              continue;
2550          }
2551  
2552          trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset, m->nb_clusters);
2553          m->skip_cow = true;
2554      }
2555      return 0;
2556  }
2557  
2558  /*
2559   * qcow2_co_pwritev_task
2560   * Called with s->lock unlocked
2561   * l2meta  - if not NULL, qcow2_co_pwritev_task() will consume it. Caller must
2562   *           not use it somehow after qcow2_co_pwritev_task() call
2563   */
2564  static coroutine_fn GRAPH_RDLOCK
2565  int qcow2_co_pwritev_task(BlockDriverState *bs, uint64_t host_offset,
2566                            uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
2567                            uint64_t qiov_offset, QCowL2Meta *l2meta)
2568  {
2569      int ret;
2570      BDRVQcow2State *s = bs->opaque;
2571      void *crypt_buf = NULL;
2572      QEMUIOVector encrypted_qiov;
2573  
2574      if (bs->encrypted) {
2575          assert(s->crypto);
2576          assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2577          crypt_buf = qemu_try_blockalign(bs->file->bs, bytes);
2578          if (crypt_buf == NULL) {
2579              ret = -ENOMEM;
2580              goto out_unlocked;
2581          }
2582          qemu_iovec_to_buf(qiov, qiov_offset, crypt_buf, bytes);
2583  
2584          if (qcow2_co_encrypt(bs, host_offset, offset, crypt_buf, bytes) < 0) {
2585              ret = -EIO;
2586              goto out_unlocked;
2587          }
2588  
2589          qemu_iovec_init_buf(&encrypted_qiov, crypt_buf, bytes);
2590          qiov = &encrypted_qiov;
2591          qiov_offset = 0;
2592      }
2593  
2594      /* Try to efficiently initialize the physical space with zeroes */
2595      ret = handle_alloc_space(bs, l2meta);
2596      if (ret < 0) {
2597          goto out_unlocked;
2598      }
2599  
2600      /*
2601       * If we need to do COW, check if it's possible to merge the
2602       * writing of the guest data together with that of the COW regions.
2603       * If it's not possible (or not necessary) then write the
2604       * guest data now.
2605       */
2606      if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) {
2607          BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO);
2608          trace_qcow2_writev_data(qemu_coroutine_self(), host_offset);
2609          ret = bdrv_co_pwritev_part(s->data_file, host_offset,
2610                                     bytes, qiov, qiov_offset, 0);
2611          if (ret < 0) {
2612              goto out_unlocked;
2613          }
2614      }
2615  
2616      qemu_co_mutex_lock(&s->lock);
2617  
2618      ret = qcow2_handle_l2meta(bs, &l2meta, true);
2619      goto out_locked;
2620  
2621  out_unlocked:
2622      qemu_co_mutex_lock(&s->lock);
2623  
2624  out_locked:
2625      qcow2_handle_l2meta(bs, &l2meta, false);
2626      qemu_co_mutex_unlock(&s->lock);
2627  
2628      qemu_vfree(crypt_buf);
2629  
2630      return ret;
2631  }
2632  
2633  /*
2634   * This function can count as GRAPH_RDLOCK because qcow2_co_pwritev_part() holds
2635   * the graph lock and keeps it until this coroutine has terminated.
2636   */
2637  static coroutine_fn GRAPH_RDLOCK int qcow2_co_pwritev_task_entry(AioTask *task)
2638  {
2639      Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
2640  
2641      assert(!t->subcluster_type);
2642  
2643      return qcow2_co_pwritev_task(t->bs, t->host_offset,
2644                                   t->offset, t->bytes, t->qiov, t->qiov_offset,
2645                                   t->l2meta);
2646  }
2647  
2648  static int coroutine_fn GRAPH_RDLOCK
2649  qcow2_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
2650                        QEMUIOVector *qiov, size_t qiov_offset,
2651                        BdrvRequestFlags flags)
2652  {
2653      BDRVQcow2State *s = bs->opaque;
2654      int offset_in_cluster;
2655      int ret;
2656      unsigned int cur_bytes; /* number of sectors in current iteration */
2657      uint64_t host_offset;
2658      QCowL2Meta *l2meta = NULL;
2659      AioTaskPool *aio = NULL;
2660  
2661      trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
2662  
2663      while (bytes != 0 && aio_task_pool_status(aio) == 0) {
2664  
2665          l2meta = NULL;
2666  
2667          trace_qcow2_writev_start_part(qemu_coroutine_self());
2668          offset_in_cluster = offset_into_cluster(s, offset);
2669          cur_bytes = MIN(bytes, INT_MAX);
2670          if (bs->encrypted) {
2671              cur_bytes = MIN(cur_bytes,
2672                              QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
2673                              - offset_in_cluster);
2674          }
2675  
2676          qemu_co_mutex_lock(&s->lock);
2677  
2678          ret = qcow2_alloc_host_offset(bs, offset, &cur_bytes,
2679                                        &host_offset, &l2meta);
2680          if (ret < 0) {
2681              goto out_locked;
2682          }
2683  
2684          ret = qcow2_pre_write_overlap_check(bs, 0, host_offset,
2685                                              cur_bytes, true);
2686          if (ret < 0) {
2687              goto out_locked;
2688          }
2689  
2690          qemu_co_mutex_unlock(&s->lock);
2691  
2692          if (!aio && cur_bytes != bytes) {
2693              aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
2694          }
2695          ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0,
2696                               host_offset, offset,
2697                               cur_bytes, qiov, qiov_offset, l2meta);
2698          l2meta = NULL; /* l2meta is consumed by qcow2_co_pwritev_task() */
2699          if (ret < 0) {
2700              goto fail_nometa;
2701          }
2702  
2703          bytes -= cur_bytes;
2704          offset += cur_bytes;
2705          qiov_offset += cur_bytes;
2706          trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
2707      }
2708      ret = 0;
2709  
2710      qemu_co_mutex_lock(&s->lock);
2711  
2712  out_locked:
2713      qcow2_handle_l2meta(bs, &l2meta, false);
2714  
2715      qemu_co_mutex_unlock(&s->lock);
2716  
2717  fail_nometa:
2718      if (aio) {
2719          aio_task_pool_wait_all(aio);
2720          if (ret == 0) {
2721              ret = aio_task_pool_status(aio);
2722          }
2723          g_free(aio);
2724      }
2725  
2726      trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
2727  
2728      return ret;
2729  }
2730  
2731  static int qcow2_inactivate(BlockDriverState *bs)
2732  {
2733      BDRVQcow2State *s = bs->opaque;
2734      int ret, result = 0;
2735      Error *local_err = NULL;
2736  
2737      qcow2_store_persistent_dirty_bitmaps(bs, true, &local_err);
2738      if (local_err != NULL) {
2739          result = -EINVAL;
2740          error_reportf_err(local_err, "Lost persistent bitmaps during "
2741                            "inactivation of node '%s': ",
2742                            bdrv_get_device_or_node_name(bs));
2743      }
2744  
2745      ret = qcow2_cache_flush(bs, s->l2_table_cache);
2746      if (ret) {
2747          result = ret;
2748          error_report("Failed to flush the L2 table cache: %s",
2749                       strerror(-ret));
2750      }
2751  
2752      ret = qcow2_cache_flush(bs, s->refcount_block_cache);
2753      if (ret) {
2754          result = ret;
2755          error_report("Failed to flush the refcount block cache: %s",
2756                       strerror(-ret));
2757      }
2758  
2759      if (result == 0) {
2760          qcow2_mark_clean(bs);
2761      }
2762  
2763      return result;
2764  }
2765  
2766  static void qcow2_do_close(BlockDriverState *bs, bool close_data_file)
2767  {
2768      BDRVQcow2State *s = bs->opaque;
2769      qemu_vfree(s->l1_table);
2770      /* else pre-write overlap checks in cache_destroy may crash */
2771      s->l1_table = NULL;
2772  
2773      if (!(s->flags & BDRV_O_INACTIVE)) {
2774          qcow2_inactivate(bs);
2775      }
2776  
2777      cache_clean_timer_del(bs);
2778      qcow2_cache_destroy(s->l2_table_cache);
2779      qcow2_cache_destroy(s->refcount_block_cache);
2780  
2781      qcrypto_block_free(s->crypto);
2782      s->crypto = NULL;
2783      qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
2784  
2785      g_free(s->unknown_header_fields);
2786      cleanup_unknown_header_ext(bs);
2787  
2788      g_free(s->image_data_file);
2789      g_free(s->image_backing_file);
2790      g_free(s->image_backing_format);
2791  
2792      if (close_data_file && has_data_file(bs)) {
2793          bdrv_unref_child(bs, s->data_file);
2794          s->data_file = NULL;
2795      }
2796  
2797      qcow2_refcount_close(bs);
2798      qcow2_free_snapshots(bs);
2799  }
2800  
2801  static void qcow2_close(BlockDriverState *bs)
2802  {
2803      qcow2_do_close(bs, true);
2804  }
2805  
2806  static void coroutine_fn GRAPH_RDLOCK
2807  qcow2_co_invalidate_cache(BlockDriverState *bs, Error **errp)
2808  {
2809      ERRP_GUARD();
2810      BDRVQcow2State *s = bs->opaque;
2811      BdrvChild *data_file;
2812      int flags = s->flags;
2813      QCryptoBlock *crypto = NULL;
2814      QDict *options;
2815      int ret;
2816  
2817      /*
2818       * Backing files are read-only which makes all of their metadata immutable,
2819       * that means we don't have to worry about reopening them here.
2820       */
2821  
2822      crypto = s->crypto;
2823      s->crypto = NULL;
2824  
2825      /*
2826       * Do not reopen s->data_file (i.e., have qcow2_do_close() not close it,
2827       * and then prevent qcow2_do_open() from opening it), because this function
2828       * runs in the I/O path and as such we must not invoke global-state
2829       * functions like bdrv_unref_child() and bdrv_open_child().
2830       */
2831  
2832      qcow2_do_close(bs, false);
2833  
2834      data_file = s->data_file;
2835      memset(s, 0, sizeof(BDRVQcow2State));
2836      s->data_file = data_file;
2837  
2838      options = qdict_clone_shallow(bs->options);
2839  
2840      flags &= ~BDRV_O_INACTIVE;
2841      qemu_co_mutex_lock(&s->lock);
2842      ret = qcow2_do_open(bs, options, flags, false, errp);
2843      qemu_co_mutex_unlock(&s->lock);
2844      qobject_unref(options);
2845      if (ret < 0) {
2846          error_prepend(errp, "Could not reopen qcow2 layer: ");
2847          bs->drv = NULL;
2848          return;
2849      }
2850  
2851      s->crypto = crypto;
2852  }
2853  
2854  static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
2855      size_t len, size_t buflen)
2856  {
2857      QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
2858      size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
2859  
2860      if (buflen < ext_len) {
2861          return -ENOSPC;
2862      }
2863  
2864      *ext_backing_fmt = (QCowExtension) {
2865          .magic  = cpu_to_be32(magic),
2866          .len    = cpu_to_be32(len),
2867      };
2868  
2869      if (len) {
2870          memcpy(buf + sizeof(QCowExtension), s, len);
2871      }
2872  
2873      return ext_len;
2874  }
2875  
2876  /*
2877   * Updates the qcow2 header, including the variable length parts of it, i.e.
2878   * the backing file name and all extensions. qcow2 was not designed to allow
2879   * such changes, so if we run out of space (we can only use the first cluster)
2880   * this function may fail.
2881   *
2882   * Returns 0 on success, -errno in error cases.
2883   */
2884  int qcow2_update_header(BlockDriverState *bs)
2885  {
2886      BDRVQcow2State *s = bs->opaque;
2887      QCowHeader *header;
2888      char *buf;
2889      size_t buflen = s->cluster_size;
2890      int ret;
2891      uint64_t total_size;
2892      uint32_t refcount_table_clusters;
2893      size_t header_length;
2894      Qcow2UnknownHeaderExtension *uext;
2895  
2896      buf = qemu_blockalign(bs, buflen);
2897  
2898      /* Header structure */
2899      header = (QCowHeader*) buf;
2900  
2901      if (buflen < sizeof(*header)) {
2902          ret = -ENOSPC;
2903          goto fail;
2904      }
2905  
2906      header_length = sizeof(*header) + s->unknown_header_fields_size;
2907      total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
2908      refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
2909  
2910      ret = validate_compression_type(s, NULL);
2911      if (ret) {
2912          goto fail;
2913      }
2914  
2915      *header = (QCowHeader) {
2916          /* Version 2 fields */
2917          .magic                  = cpu_to_be32(QCOW_MAGIC),
2918          .version                = cpu_to_be32(s->qcow_version),
2919          .backing_file_offset    = 0,
2920          .backing_file_size      = 0,
2921          .cluster_bits           = cpu_to_be32(s->cluster_bits),
2922          .size                   = cpu_to_be64(total_size),
2923          .crypt_method           = cpu_to_be32(s->crypt_method_header),
2924          .l1_size                = cpu_to_be32(s->l1_size),
2925          .l1_table_offset        = cpu_to_be64(s->l1_table_offset),
2926          .refcount_table_offset  = cpu_to_be64(s->refcount_table_offset),
2927          .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
2928          .nb_snapshots           = cpu_to_be32(s->nb_snapshots),
2929          .snapshots_offset       = cpu_to_be64(s->snapshots_offset),
2930  
2931          /* Version 3 fields */
2932          .incompatible_features  = cpu_to_be64(s->incompatible_features),
2933          .compatible_features    = cpu_to_be64(s->compatible_features),
2934          .autoclear_features     = cpu_to_be64(s->autoclear_features),
2935          .refcount_order         = cpu_to_be32(s->refcount_order),
2936          .header_length          = cpu_to_be32(header_length),
2937          .compression_type       = s->compression_type,
2938      };
2939  
2940      /* For older versions, write a shorter header */
2941      switch (s->qcow_version) {
2942      case 2:
2943          ret = offsetof(QCowHeader, incompatible_features);
2944          break;
2945      case 3:
2946          ret = sizeof(*header);
2947          break;
2948      default:
2949          ret = -EINVAL;
2950          goto fail;
2951      }
2952  
2953      buf += ret;
2954      buflen -= ret;
2955      memset(buf, 0, buflen);
2956  
2957      /* Preserve any unknown field in the header */
2958      if (s->unknown_header_fields_size) {
2959          if (buflen < s->unknown_header_fields_size) {
2960              ret = -ENOSPC;
2961              goto fail;
2962          }
2963  
2964          memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
2965          buf += s->unknown_header_fields_size;
2966          buflen -= s->unknown_header_fields_size;
2967      }
2968  
2969      /* Backing file format header extension */
2970      if (s->image_backing_format) {
2971          ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
2972                               s->image_backing_format,
2973                               strlen(s->image_backing_format),
2974                               buflen);
2975          if (ret < 0) {
2976              goto fail;
2977          }
2978  
2979          buf += ret;
2980          buflen -= ret;
2981      }
2982  
2983      /* External data file header extension */
2984      if (has_data_file(bs) && s->image_data_file) {
2985          ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DATA_FILE,
2986                               s->image_data_file, strlen(s->image_data_file),
2987                               buflen);
2988          if (ret < 0) {
2989              goto fail;
2990          }
2991  
2992          buf += ret;
2993          buflen -= ret;
2994      }
2995  
2996      /* Full disk encryption header pointer extension */
2997      if (s->crypto_header.offset != 0) {
2998          s->crypto_header.offset = cpu_to_be64(s->crypto_header.offset);
2999          s->crypto_header.length = cpu_to_be64(s->crypto_header.length);
3000          ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER,
3001                               &s->crypto_header, sizeof(s->crypto_header),
3002                               buflen);
3003          s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
3004          s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
3005          if (ret < 0) {
3006              goto fail;
3007          }
3008          buf += ret;
3009          buflen -= ret;
3010      }
3011  
3012      /*
3013       * Feature table.  A mere 8 feature names occupies 392 bytes, and
3014       * when coupled with the v3 minimum header of 104 bytes plus the
3015       * 8-byte end-of-extension marker, that would leave only 8 bytes
3016       * for a backing file name in an image with 512-byte clusters.
3017       * Thus, we choose to omit this header for cluster sizes 4k and
3018       * smaller.
3019       */
3020      if (s->qcow_version >= 3 && s->cluster_size > 4096) {
3021          static const Qcow2Feature features[] = {
3022              {
3023                  .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
3024                  .bit  = QCOW2_INCOMPAT_DIRTY_BITNR,
3025                  .name = "dirty bit",
3026              },
3027              {
3028                  .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
3029                  .bit  = QCOW2_INCOMPAT_CORRUPT_BITNR,
3030                  .name = "corrupt bit",
3031              },
3032              {
3033                  .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
3034                  .bit  = QCOW2_INCOMPAT_DATA_FILE_BITNR,
3035                  .name = "external data file",
3036              },
3037              {
3038                  .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
3039                  .bit  = QCOW2_INCOMPAT_COMPRESSION_BITNR,
3040                  .name = "compression type",
3041              },
3042              {
3043                  .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
3044                  .bit  = QCOW2_INCOMPAT_EXTL2_BITNR,
3045                  .name = "extended L2 entries",
3046              },
3047              {
3048                  .type = QCOW2_FEAT_TYPE_COMPATIBLE,
3049                  .bit  = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
3050                  .name = "lazy refcounts",
3051              },
3052              {
3053                  .type = QCOW2_FEAT_TYPE_AUTOCLEAR,
3054                  .bit  = QCOW2_AUTOCLEAR_BITMAPS_BITNR,
3055                  .name = "bitmaps",
3056              },
3057              {
3058                  .type = QCOW2_FEAT_TYPE_AUTOCLEAR,
3059                  .bit  = QCOW2_AUTOCLEAR_DATA_FILE_RAW_BITNR,
3060                  .name = "raw external data",
3061              },
3062          };
3063  
3064          ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
3065                               features, sizeof(features), buflen);
3066          if (ret < 0) {
3067              goto fail;
3068          }
3069          buf += ret;
3070          buflen -= ret;
3071      }
3072  
3073      /* Bitmap extension */
3074      if (s->nb_bitmaps > 0) {
3075          Qcow2BitmapHeaderExt bitmaps_header = {
3076              .nb_bitmaps = cpu_to_be32(s->nb_bitmaps),
3077              .bitmap_directory_size =
3078                      cpu_to_be64(s->bitmap_directory_size),
3079              .bitmap_directory_offset =
3080                      cpu_to_be64(s->bitmap_directory_offset)
3081          };
3082          ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS,
3083                               &bitmaps_header, sizeof(bitmaps_header),
3084                               buflen);
3085          if (ret < 0) {
3086              goto fail;
3087          }
3088          buf += ret;
3089          buflen -= ret;
3090      }
3091  
3092      /* Keep unknown header extensions */
3093      QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
3094          ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
3095          if (ret < 0) {
3096              goto fail;
3097          }
3098  
3099          buf += ret;
3100          buflen -= ret;
3101      }
3102  
3103      /* End of header extensions */
3104      ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
3105      if (ret < 0) {
3106          goto fail;
3107      }
3108  
3109      buf += ret;
3110      buflen -= ret;
3111  
3112      /* Backing file name */
3113      if (s->image_backing_file) {
3114          size_t backing_file_len = strlen(s->image_backing_file);
3115  
3116          if (buflen < backing_file_len) {
3117              ret = -ENOSPC;
3118              goto fail;
3119          }
3120  
3121          /* Using strncpy is ok here, since buf is not NUL-terminated. */
3122          strncpy(buf, s->image_backing_file, buflen);
3123  
3124          header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
3125          header->backing_file_size   = cpu_to_be32(backing_file_len);
3126      }
3127  
3128      /* Write the new header */
3129      ret = bdrv_pwrite(bs->file, 0, s->cluster_size, header, 0);
3130      if (ret < 0) {
3131          goto fail;
3132      }
3133  
3134      ret = 0;
3135  fail:
3136      qemu_vfree(header);
3137      return ret;
3138  }
3139  
3140  static int qcow2_change_backing_file(BlockDriverState *bs,
3141      const char *backing_file, const char *backing_fmt)
3142  {
3143      BDRVQcow2State *s = bs->opaque;
3144  
3145      /* Adding a backing file means that the external data file alone won't be
3146       * enough to make sense of the content */
3147      if (backing_file && data_file_is_raw(bs)) {
3148          return -EINVAL;
3149      }
3150  
3151      if (backing_file && strlen(backing_file) > 1023) {
3152          return -EINVAL;
3153      }
3154  
3155      pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
3156              backing_file ?: "");
3157      pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
3158      pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
3159  
3160      g_free(s->image_backing_file);
3161      g_free(s->image_backing_format);
3162  
3163      s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
3164      s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
3165  
3166      return qcow2_update_header(bs);
3167  }
3168  
3169  static int coroutine_fn GRAPH_RDLOCK
3170  qcow2_set_up_encryption(BlockDriverState *bs,
3171                          QCryptoBlockCreateOptions *cryptoopts,
3172                          Error **errp)
3173  {
3174      BDRVQcow2State *s = bs->opaque;
3175      QCryptoBlock *crypto = NULL;
3176      int fmt, ret;
3177  
3178      switch (cryptoopts->format) {
3179      case Q_CRYPTO_BLOCK_FORMAT_LUKS:
3180          fmt = QCOW_CRYPT_LUKS;
3181          break;
3182      case Q_CRYPTO_BLOCK_FORMAT_QCOW:
3183          fmt = QCOW_CRYPT_AES;
3184          break;
3185      default:
3186          error_setg(errp, "Crypto format not supported in qcow2");
3187          return -EINVAL;
3188      }
3189  
3190      s->crypt_method_header = fmt;
3191  
3192      crypto = qcrypto_block_create(cryptoopts, "encrypt.",
3193                                    qcow2_crypto_hdr_init_func,
3194                                    qcow2_crypto_hdr_write_func,
3195                                    bs, errp);
3196      if (!crypto) {
3197          return -EINVAL;
3198      }
3199  
3200      ret = qcow2_update_header(bs);
3201      if (ret < 0) {
3202          error_setg_errno(errp, -ret, "Could not write encryption header");
3203          goto out;
3204      }
3205  
3206      ret = 0;
3207   out:
3208      qcrypto_block_free(crypto);
3209      return ret;
3210  }
3211  
3212  /**
3213   * Preallocates metadata structures for data clusters between @offset (in the
3214   * guest disk) and @new_length (which is thus generally the new guest disk
3215   * size).
3216   *
3217   * Returns: 0 on success, -errno on failure.
3218   */
3219  static int coroutine_fn GRAPH_RDLOCK
3220  preallocate_co(BlockDriverState *bs, uint64_t offset, uint64_t new_length,
3221                 PreallocMode mode, Error **errp)
3222  {
3223      BDRVQcow2State *s = bs->opaque;
3224      uint64_t bytes;
3225      uint64_t host_offset = 0;
3226      int64_t file_length;
3227      unsigned int cur_bytes;
3228      int ret;
3229      QCowL2Meta *meta = NULL, *m;
3230  
3231      assert(offset <= new_length);
3232      bytes = new_length - offset;
3233  
3234      while (bytes) {
3235          cur_bytes = MIN(bytes, QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size));
3236          ret = qcow2_alloc_host_offset(bs, offset, &cur_bytes,
3237                                        &host_offset, &meta);
3238          if (ret < 0) {
3239              error_setg_errno(errp, -ret, "Allocating clusters failed");
3240              goto out;
3241          }
3242  
3243          for (m = meta; m != NULL; m = m->next) {
3244              m->prealloc = true;
3245          }
3246  
3247          ret = qcow2_handle_l2meta(bs, &meta, true);
3248          if (ret < 0) {
3249              error_setg_errno(errp, -ret, "Mapping clusters failed");
3250              goto out;
3251          }
3252  
3253          /* TODO Preallocate data if requested */
3254  
3255          bytes -= cur_bytes;
3256          offset += cur_bytes;
3257      }
3258  
3259      /*
3260       * It is expected that the image file is large enough to actually contain
3261       * all of the allocated clusters (otherwise we get failing reads after
3262       * EOF). Extend the image to the last allocated sector.
3263       */
3264      file_length = bdrv_co_getlength(s->data_file->bs);
3265      if (file_length < 0) {
3266          error_setg_errno(errp, -file_length, "Could not get file size");
3267          ret = file_length;
3268          goto out;
3269      }
3270  
3271      if (host_offset + cur_bytes > file_length) {
3272          if (mode == PREALLOC_MODE_METADATA) {
3273              mode = PREALLOC_MODE_OFF;
3274          }
3275          ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false,
3276                                 mode, 0, errp);
3277          if (ret < 0) {
3278              goto out;
3279          }
3280      }
3281  
3282      ret = 0;
3283  
3284  out:
3285      qcow2_handle_l2meta(bs, &meta, false);
3286      return ret;
3287  }
3288  
3289  /* qcow2_refcount_metadata_size:
3290   * @clusters: number of clusters to refcount (including data and L1/L2 tables)
3291   * @cluster_size: size of a cluster, in bytes
3292   * @refcount_order: refcount bits power-of-2 exponent
3293   * @generous_increase: allow for the refcount table to be 1.5x as large as it
3294   *                     needs to be
3295   *
3296   * Returns: Number of bytes required for refcount blocks and table metadata.
3297   */
3298  int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
3299                                       int refcount_order, bool generous_increase,
3300                                       uint64_t *refblock_count)
3301  {
3302      /*
3303       * Every host cluster is reference-counted, including metadata (even
3304       * refcount metadata is recursively included).
3305       *
3306       * An accurate formula for the size of refcount metadata size is difficult
3307       * to derive.  An easier method of calculation is finding the fixed point
3308       * where no further refcount blocks or table clusters are required to
3309       * reference count every cluster.
3310       */
3311      int64_t blocks_per_table_cluster = cluster_size / REFTABLE_ENTRY_SIZE;
3312      int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order);
3313      int64_t table = 0;  /* number of refcount table clusters */
3314      int64_t blocks = 0; /* number of refcount block clusters */
3315      int64_t last;
3316      int64_t n = 0;
3317  
3318      do {
3319          last = n;
3320          blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block);
3321          table = DIV_ROUND_UP(blocks, blocks_per_table_cluster);
3322          n = clusters + blocks + table;
3323  
3324          if (n == last && generous_increase) {
3325              clusters += DIV_ROUND_UP(table, 2);
3326              n = 0; /* force another loop */
3327              generous_increase = false;
3328          }
3329      } while (n != last);
3330  
3331      if (refblock_count) {
3332          *refblock_count = blocks;
3333      }
3334  
3335      return (blocks + table) * cluster_size;
3336  }
3337  
3338  /**
3339   * qcow2_calc_prealloc_size:
3340   * @total_size: virtual disk size in bytes
3341   * @cluster_size: cluster size in bytes
3342   * @refcount_order: refcount bits power-of-2 exponent
3343   * @extended_l2: true if the image has extended L2 entries
3344   *
3345   * Returns: Total number of bytes required for the fully allocated image
3346   * (including metadata).
3347   */
3348  static int64_t qcow2_calc_prealloc_size(int64_t total_size,
3349                                          size_t cluster_size,
3350                                          int refcount_order,
3351                                          bool extended_l2)
3352  {
3353      int64_t meta_size = 0;
3354      uint64_t nl1e, nl2e;
3355      int64_t aligned_total_size = ROUND_UP(total_size, cluster_size);
3356      size_t l2e_size = extended_l2 ? L2E_SIZE_EXTENDED : L2E_SIZE_NORMAL;
3357  
3358      /* header: 1 cluster */
3359      meta_size += cluster_size;
3360  
3361      /* total size of L2 tables */
3362      nl2e = aligned_total_size / cluster_size;
3363      nl2e = ROUND_UP(nl2e, cluster_size / l2e_size);
3364      meta_size += nl2e * l2e_size;
3365  
3366      /* total size of L1 tables */
3367      nl1e = nl2e * l2e_size / cluster_size;
3368      nl1e = ROUND_UP(nl1e, cluster_size / L1E_SIZE);
3369      meta_size += nl1e * L1E_SIZE;
3370  
3371      /* total size of refcount table and blocks */
3372      meta_size += qcow2_refcount_metadata_size(
3373              (meta_size + aligned_total_size) / cluster_size,
3374              cluster_size, refcount_order, false, NULL);
3375  
3376      return meta_size + aligned_total_size;
3377  }
3378  
3379  static bool validate_cluster_size(size_t cluster_size, bool extended_l2,
3380                                    Error **errp)
3381  {
3382      int cluster_bits = ctz32(cluster_size);
3383      if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
3384          (1 << cluster_bits) != cluster_size)
3385      {
3386          error_setg(errp, "Cluster size must be a power of two between %d and "
3387                     "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
3388          return false;
3389      }
3390  
3391      if (extended_l2) {
3392          unsigned min_cluster_size =
3393              (1 << MIN_CLUSTER_BITS) * QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER;
3394          if (cluster_size < min_cluster_size) {
3395              error_setg(errp, "Extended L2 entries are only supported with "
3396                         "cluster sizes of at least %u bytes", min_cluster_size);
3397              return false;
3398          }
3399      }
3400  
3401      return true;
3402  }
3403  
3404  static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, bool extended_l2,
3405                                               Error **errp)
3406  {
3407      size_t cluster_size;
3408  
3409      cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
3410                                           DEFAULT_CLUSTER_SIZE);
3411      if (!validate_cluster_size(cluster_size, extended_l2, errp)) {
3412          return 0;
3413      }
3414      return cluster_size;
3415  }
3416  
3417  static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp)
3418  {
3419      char *buf;
3420      int ret;
3421  
3422      buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL);
3423      if (!buf) {
3424          ret = 3; /* default */
3425      } else if (!strcmp(buf, "0.10")) {
3426          ret = 2;
3427      } else if (!strcmp(buf, "1.1")) {
3428          ret = 3;
3429      } else {
3430          error_setg(errp, "Invalid compatibility level: '%s'", buf);
3431          ret = -EINVAL;
3432      }
3433      g_free(buf);
3434      return ret;
3435  }
3436  
3437  static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
3438                                                  Error **errp)
3439  {
3440      uint64_t refcount_bits;
3441  
3442      refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16);
3443      if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) {
3444          error_setg(errp, "Refcount width must be a power of two and may not "
3445                     "exceed 64 bits");
3446          return 0;
3447      }
3448  
3449      if (version < 3 && refcount_bits != 16) {
3450          error_setg(errp, "Different refcount widths than 16 bits require "
3451                     "compatibility level 1.1 or above (use compat=1.1 or "
3452                     "greater)");
3453          return 0;
3454      }
3455  
3456      return refcount_bits;
3457  }
3458  
3459  static int coroutine_fn GRAPH_UNLOCKED
3460  qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
3461  {
3462      BlockdevCreateOptionsQcow2 *qcow2_opts;
3463      QDict *options;
3464  
3465      /*
3466       * Open the image file and write a minimal qcow2 header.
3467       *
3468       * We keep things simple and start with a zero-sized image. We also
3469       * do without refcount blocks or a L1 table for now. We'll fix the
3470       * inconsistency later.
3471       *
3472       * We do need a refcount table because growing the refcount table means
3473       * allocating two new refcount blocks - the second of which would be at
3474       * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
3475       * size for any qcow2 image.
3476       */
3477      BlockBackend *blk = NULL;
3478      BlockDriverState *bs = NULL;
3479      BlockDriverState *data_bs = NULL;
3480      QCowHeader *header;
3481      size_t cluster_size;
3482      int version;
3483      int refcount_order;
3484      uint64_t *refcount_table;
3485      int ret;
3486      uint8_t compression_type = QCOW2_COMPRESSION_TYPE_ZLIB;
3487  
3488      assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2);
3489      qcow2_opts = &create_options->u.qcow2;
3490  
3491      bs = bdrv_co_open_blockdev_ref(qcow2_opts->file, errp);
3492      if (bs == NULL) {
3493          return -EIO;
3494      }
3495  
3496      /* Validate options and set default values */
3497      if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) {
3498          error_setg(errp, "Image size must be a multiple of %u bytes",
3499                     (unsigned) BDRV_SECTOR_SIZE);
3500          ret = -EINVAL;
3501          goto out;
3502      }
3503  
3504      if (qcow2_opts->has_version) {
3505          switch (qcow2_opts->version) {
3506          case BLOCKDEV_QCOW2_VERSION_V2:
3507              version = 2;
3508              break;
3509          case BLOCKDEV_QCOW2_VERSION_V3:
3510              version = 3;
3511              break;
3512          default:
3513              g_assert_not_reached();
3514          }
3515      } else {
3516          version = 3;
3517      }
3518  
3519      if (qcow2_opts->has_cluster_size) {
3520          cluster_size = qcow2_opts->cluster_size;
3521      } else {
3522          cluster_size = DEFAULT_CLUSTER_SIZE;
3523      }
3524  
3525      if (!qcow2_opts->has_extended_l2) {
3526          qcow2_opts->extended_l2 = false;
3527      }
3528      if (qcow2_opts->extended_l2) {
3529          if (version < 3) {
3530              error_setg(errp, "Extended L2 entries are only supported with "
3531                         "compatibility level 1.1 and above (use version=v3 or "
3532                         "greater)");
3533              ret = -EINVAL;
3534              goto out;
3535          }
3536      }
3537  
3538      if (!validate_cluster_size(cluster_size, qcow2_opts->extended_l2, errp)) {
3539          ret = -EINVAL;
3540          goto out;
3541      }
3542  
3543      if (!qcow2_opts->has_preallocation) {
3544          qcow2_opts->preallocation = PREALLOC_MODE_OFF;
3545      }
3546      if (qcow2_opts->backing_file &&
3547          qcow2_opts->preallocation != PREALLOC_MODE_OFF &&
3548          !qcow2_opts->extended_l2)
3549      {
3550          error_setg(errp, "Backing file and preallocation can only be used at "
3551                     "the same time if extended_l2 is on");
3552          ret = -EINVAL;
3553          goto out;
3554      }
3555      if (qcow2_opts->has_backing_fmt && !qcow2_opts->backing_file) {
3556          error_setg(errp, "Backing format cannot be used without backing file");
3557          ret = -EINVAL;
3558          goto out;
3559      }
3560  
3561      if (!qcow2_opts->has_lazy_refcounts) {
3562          qcow2_opts->lazy_refcounts = false;
3563      }
3564      if (version < 3 && qcow2_opts->lazy_refcounts) {
3565          error_setg(errp, "Lazy refcounts only supported with compatibility "
3566                     "level 1.1 and above (use version=v3 or greater)");
3567          ret = -EINVAL;
3568          goto out;
3569      }
3570  
3571      if (!qcow2_opts->has_refcount_bits) {
3572          qcow2_opts->refcount_bits = 16;
3573      }
3574      if (qcow2_opts->refcount_bits > 64 ||
3575          !is_power_of_2(qcow2_opts->refcount_bits))
3576      {
3577          error_setg(errp, "Refcount width must be a power of two and may not "
3578                     "exceed 64 bits");
3579          ret = -EINVAL;
3580          goto out;
3581      }
3582      if (version < 3 && qcow2_opts->refcount_bits != 16) {
3583          error_setg(errp, "Different refcount widths than 16 bits require "
3584                     "compatibility level 1.1 or above (use version=v3 or "
3585                     "greater)");
3586          ret = -EINVAL;
3587          goto out;
3588      }
3589      refcount_order = ctz32(qcow2_opts->refcount_bits);
3590  
3591      if (qcow2_opts->data_file_raw && !qcow2_opts->data_file) {
3592          error_setg(errp, "data-file-raw requires data-file");
3593          ret = -EINVAL;
3594          goto out;
3595      }
3596      if (qcow2_opts->data_file_raw && qcow2_opts->backing_file) {
3597          error_setg(errp, "Backing file and data-file-raw cannot be used at "
3598                     "the same time");
3599          ret = -EINVAL;
3600          goto out;
3601      }
3602      if (qcow2_opts->data_file_raw &&
3603          qcow2_opts->preallocation == PREALLOC_MODE_OFF)
3604      {
3605          /*
3606           * data-file-raw means that "the external data file can be
3607           * read as a consistent standalone raw image without looking
3608           * at the qcow2 metadata."  It does not say that the metadata
3609           * must be ignored, though (and the qcow2 driver in fact does
3610           * not ignore it), so the L1/L2 tables must be present and
3611           * give a 1:1 mapping, so you get the same result regardless
3612           * of whether you look at the metadata or whether you ignore
3613           * it.
3614           */
3615          qcow2_opts->preallocation = PREALLOC_MODE_METADATA;
3616  
3617          /*
3618           * Cannot use preallocation with backing files, but giving a
3619           * backing file when specifying data_file_raw is an error
3620           * anyway.
3621           */
3622          assert(!qcow2_opts->backing_file);
3623      }
3624  
3625      if (qcow2_opts->data_file) {
3626          if (version < 3) {
3627              error_setg(errp, "External data files are only supported with "
3628                         "compatibility level 1.1 and above (use version=v3 or "
3629                         "greater)");
3630              ret = -EINVAL;
3631              goto out;
3632          }
3633          data_bs = bdrv_co_open_blockdev_ref(qcow2_opts->data_file, errp);
3634          if (data_bs == NULL) {
3635              ret = -EIO;
3636              goto out;
3637          }
3638      }
3639  
3640      if (qcow2_opts->has_compression_type &&
3641          qcow2_opts->compression_type != QCOW2_COMPRESSION_TYPE_ZLIB) {
3642  
3643          ret = -EINVAL;
3644  
3645          if (version < 3) {
3646              error_setg(errp, "Non-zlib compression type is only supported with "
3647                         "compatibility level 1.1 and above (use version=v3 or "
3648                         "greater)");
3649              goto out;
3650          }
3651  
3652          switch (qcow2_opts->compression_type) {
3653  #ifdef CONFIG_ZSTD
3654          case QCOW2_COMPRESSION_TYPE_ZSTD:
3655              break;
3656  #endif
3657          default:
3658              error_setg(errp, "Unknown compression type");
3659              goto out;
3660          }
3661  
3662          compression_type = qcow2_opts->compression_type;
3663      }
3664  
3665      /* Create BlockBackend to write to the image */
3666      blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
3667                               errp);
3668      if (!blk) {
3669          ret = -EPERM;
3670          goto out;
3671      }
3672      blk_set_allow_write_beyond_eof(blk, true);
3673  
3674      /* Write the header */
3675      QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
3676      header = g_malloc0(cluster_size);
3677      *header = (QCowHeader) {
3678          .magic                      = cpu_to_be32(QCOW_MAGIC),
3679          .version                    = cpu_to_be32(version),
3680          .cluster_bits               = cpu_to_be32(ctz32(cluster_size)),
3681          .size                       = cpu_to_be64(0),
3682          .l1_table_offset            = cpu_to_be64(0),
3683          .l1_size                    = cpu_to_be32(0),
3684          .refcount_table_offset      = cpu_to_be64(cluster_size),
3685          .refcount_table_clusters    = cpu_to_be32(1),
3686          .refcount_order             = cpu_to_be32(refcount_order),
3687          /* don't deal with endianness since compression_type is 1 byte long */
3688          .compression_type           = compression_type,
3689          .header_length              = cpu_to_be32(sizeof(*header)),
3690      };
3691  
3692      /* We'll update this to correct value later */
3693      header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
3694  
3695      if (qcow2_opts->lazy_refcounts) {
3696          header->compatible_features |=
3697              cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
3698      }
3699      if (data_bs) {
3700          header->incompatible_features |=
3701              cpu_to_be64(QCOW2_INCOMPAT_DATA_FILE);
3702      }
3703      if (qcow2_opts->data_file_raw) {
3704          header->autoclear_features |=
3705              cpu_to_be64(QCOW2_AUTOCLEAR_DATA_FILE_RAW);
3706      }
3707      if (compression_type != QCOW2_COMPRESSION_TYPE_ZLIB) {
3708          header->incompatible_features |=
3709              cpu_to_be64(QCOW2_INCOMPAT_COMPRESSION);
3710      }
3711  
3712      if (qcow2_opts->extended_l2) {
3713          header->incompatible_features |=
3714              cpu_to_be64(QCOW2_INCOMPAT_EXTL2);
3715      }
3716  
3717      ret = blk_co_pwrite(blk, 0, cluster_size, header, 0);
3718      g_free(header);
3719      if (ret < 0) {
3720          error_setg_errno(errp, -ret, "Could not write qcow2 header");
3721          goto out;
3722      }
3723  
3724      /* Write a refcount table with one refcount block */
3725      refcount_table = g_malloc0(2 * cluster_size);
3726      refcount_table[0] = cpu_to_be64(2 * cluster_size);
3727      ret = blk_co_pwrite(blk, cluster_size, 2 * cluster_size, refcount_table, 0);
3728      g_free(refcount_table);
3729  
3730      if (ret < 0) {
3731          error_setg_errno(errp, -ret, "Could not write refcount table");
3732          goto out;
3733      }
3734  
3735      blk_co_unref(blk);
3736      blk = NULL;
3737  
3738      /*
3739       * And now open the image and make it consistent first (i.e. increase the
3740       * refcount of the cluster that is occupied by the header and the refcount
3741       * table)
3742       */
3743      options = qdict_new();
3744      qdict_put_str(options, "driver", "qcow2");
3745      qdict_put_str(options, "file", bs->node_name);
3746      if (data_bs) {
3747          qdict_put_str(options, "data-file", data_bs->node_name);
3748      }
3749      blk = blk_co_new_open(NULL, NULL, options,
3750                            BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
3751                            errp);
3752      if (blk == NULL) {
3753          ret = -EIO;
3754          goto out;
3755      }
3756  
3757      bdrv_graph_co_rdlock();
3758      ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
3759      if (ret < 0) {
3760          bdrv_graph_co_rdunlock();
3761          error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
3762                           "header and refcount table");
3763          goto out;
3764  
3765      } else if (ret != 0) {
3766          error_report("Huh, first cluster in empty image is already in use?");
3767          abort();
3768      }
3769  
3770      /* Set the external data file if necessary */
3771      if (data_bs) {
3772          BDRVQcow2State *s = blk_bs(blk)->opaque;
3773          s->image_data_file = g_strdup(data_bs->filename);
3774      }
3775  
3776      /* Create a full header (including things like feature table) */
3777      ret = qcow2_update_header(blk_bs(blk));
3778      bdrv_graph_co_rdunlock();
3779  
3780      if (ret < 0) {
3781          error_setg_errno(errp, -ret, "Could not update qcow2 header");
3782          goto out;
3783      }
3784  
3785      /* Okay, now that we have a valid image, let's give it the right size */
3786      ret = blk_co_truncate(blk, qcow2_opts->size, false,
3787                            qcow2_opts->preallocation, 0, errp);
3788      if (ret < 0) {
3789          error_prepend(errp, "Could not resize image: ");
3790          goto out;
3791      }
3792  
3793      /* Want a backing file? There you go. */
3794      if (qcow2_opts->backing_file) {
3795          const char *backing_format = NULL;
3796  
3797          if (qcow2_opts->has_backing_fmt) {
3798              backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt);
3799          }
3800  
3801          ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file,
3802                                         backing_format, false);
3803          if (ret < 0) {
3804              error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
3805                               "with format '%s'", qcow2_opts->backing_file,
3806                               backing_format);
3807              goto out;
3808          }
3809      }
3810  
3811      /* Want encryption? There you go. */
3812      if (qcow2_opts->encrypt) {
3813          bdrv_graph_co_rdlock();
3814          ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp);
3815          bdrv_graph_co_rdunlock();
3816  
3817          if (ret < 0) {
3818              goto out;
3819          }
3820      }
3821  
3822      blk_co_unref(blk);
3823      blk = NULL;
3824  
3825      /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning.
3826       * Using BDRV_O_NO_IO, since encryption is now setup we don't want to
3827       * have to setup decryption context. We're not doing any I/O on the top
3828       * level BlockDriverState, only lower layers, where BDRV_O_NO_IO does
3829       * not have effect.
3830       */
3831      options = qdict_new();
3832      qdict_put_str(options, "driver", "qcow2");
3833      qdict_put_str(options, "file", bs->node_name);
3834      if (data_bs) {
3835          qdict_put_str(options, "data-file", data_bs->node_name);
3836      }
3837      blk = blk_co_new_open(NULL, NULL, options,
3838                            BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
3839                            errp);
3840      if (blk == NULL) {
3841          ret = -EIO;
3842          goto out;
3843      }
3844  
3845      ret = 0;
3846  out:
3847      blk_co_unref(blk);
3848      bdrv_co_unref(bs);
3849      bdrv_co_unref(data_bs);
3850      return ret;
3851  }
3852  
3853  static int coroutine_fn GRAPH_UNLOCKED
3854  qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
3855                       Error **errp)
3856  {
3857      BlockdevCreateOptions *create_options = NULL;
3858      QDict *qdict;
3859      Visitor *v;
3860      BlockDriverState *bs = NULL;
3861      BlockDriverState *data_bs = NULL;
3862      const char *val;
3863      int ret;
3864  
3865      /* Only the keyval visitor supports the dotted syntax needed for
3866       * encryption, so go through a QDict before getting a QAPI type. Ignore
3867       * options meant for the protocol layer so that the visitor doesn't
3868       * complain. */
3869      qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts,
3870                                          true);
3871  
3872      /* Handle encryption options */
3873      val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT);
3874      if (val && !strcmp(val, "on")) {
3875          qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow");
3876      } else if (val && !strcmp(val, "off")) {
3877          qdict_del(qdict, BLOCK_OPT_ENCRYPT);
3878      }
3879  
3880      val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT);
3881      if (val && !strcmp(val, "aes")) {
3882          qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow");
3883      }
3884  
3885      /* Convert compat=0.10/1.1 into compat=v2/v3, to be renamed into
3886       * version=v2/v3 below. */
3887      val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL);
3888      if (val && !strcmp(val, "0.10")) {
3889          qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2");
3890      } else if (val && !strcmp(val, "1.1")) {
3891          qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3");
3892      }
3893  
3894      /* Change legacy command line options into QMP ones */
3895      static const QDictRenames opt_renames[] = {
3896          { BLOCK_OPT_BACKING_FILE,       "backing-file" },
3897          { BLOCK_OPT_BACKING_FMT,        "backing-fmt" },
3898          { BLOCK_OPT_CLUSTER_SIZE,       "cluster-size" },
3899          { BLOCK_OPT_LAZY_REFCOUNTS,     "lazy-refcounts" },
3900          { BLOCK_OPT_EXTL2,              "extended-l2" },
3901          { BLOCK_OPT_REFCOUNT_BITS,      "refcount-bits" },
3902          { BLOCK_OPT_ENCRYPT,            BLOCK_OPT_ENCRYPT_FORMAT },
3903          { BLOCK_OPT_COMPAT_LEVEL,       "version" },
3904          { BLOCK_OPT_DATA_FILE_RAW,      "data-file-raw" },
3905          { BLOCK_OPT_COMPRESSION_TYPE,   "compression-type" },
3906          { NULL, NULL },
3907      };
3908  
3909      if (!qdict_rename_keys(qdict, opt_renames, errp)) {
3910          ret = -EINVAL;
3911          goto finish;
3912      }
3913  
3914      /* Create and open the file (protocol layer) */
3915      ret = bdrv_co_create_file(filename, opts, errp);
3916      if (ret < 0) {
3917          goto finish;
3918      }
3919  
3920      bs = bdrv_co_open(filename, NULL, NULL,
3921                        BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
3922      if (bs == NULL) {
3923          ret = -EIO;
3924          goto finish;
3925      }
3926  
3927      /* Create and open an external data file (protocol layer) */
3928      val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE);
3929      if (val) {
3930          ret = bdrv_co_create_file(val, opts, errp);
3931          if (ret < 0) {
3932              goto finish;
3933          }
3934  
3935          data_bs = bdrv_co_open(val, NULL, NULL,
3936                                 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
3937                                 errp);
3938          if (data_bs == NULL) {
3939              ret = -EIO;
3940              goto finish;
3941          }
3942  
3943          qdict_del(qdict, BLOCK_OPT_DATA_FILE);
3944          qdict_put_str(qdict, "data-file", data_bs->node_name);
3945      }
3946  
3947      /* Set 'driver' and 'node' options */
3948      qdict_put_str(qdict, "driver", "qcow2");
3949      qdict_put_str(qdict, "file", bs->node_name);
3950  
3951      /* Now get the QAPI type BlockdevCreateOptions */
3952      v = qobject_input_visitor_new_flat_confused(qdict, errp);
3953      if (!v) {
3954          ret = -EINVAL;
3955          goto finish;
3956      }
3957  
3958      visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp);
3959      visit_free(v);
3960      if (!create_options) {
3961          ret = -EINVAL;
3962          goto finish;
3963      }
3964  
3965      /* Silently round up size */
3966      create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size,
3967                                              BDRV_SECTOR_SIZE);
3968  
3969      /* Create the qcow2 image (format layer) */
3970      ret = qcow2_co_create(create_options, errp);
3971  finish:
3972      if (ret < 0) {
3973          bdrv_graph_co_rdlock();
3974          bdrv_co_delete_file_noerr(bs);
3975          bdrv_co_delete_file_noerr(data_bs);
3976          bdrv_graph_co_rdunlock();
3977      } else {
3978          ret = 0;
3979      }
3980  
3981      qobject_unref(qdict);
3982      bdrv_co_unref(bs);
3983      bdrv_co_unref(data_bs);
3984      qapi_free_BlockdevCreateOptions(create_options);
3985      return ret;
3986  }
3987  
3988  
3989  static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
3990  {
3991      int64_t nr;
3992      int res;
3993  
3994      /* Clamp to image length, before checking status of underlying sectors */
3995      if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
3996          bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset;
3997      }
3998  
3999      if (!bytes) {
4000          return true;
4001      }
4002  
4003      /*
4004       * bdrv_block_status_above doesn't merge different types of zeros, for
4005       * example, zeros which come from the region which is unallocated in
4006       * the whole backing chain, and zeros which come because of a short
4007       * backing file. So, we need a loop.
4008       */
4009      do {
4010          res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
4011          offset += nr;
4012          bytes -= nr;
4013      } while (res >= 0 && (res & BDRV_BLOCK_ZERO) && nr && bytes);
4014  
4015      return res >= 0 && (res & BDRV_BLOCK_ZERO) && bytes == 0;
4016  }
4017  
4018  static int coroutine_fn GRAPH_RDLOCK
4019  qcow2_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
4020                         BdrvRequestFlags flags)
4021  {
4022      int ret;
4023      BDRVQcow2State *s = bs->opaque;
4024  
4025      uint32_t head = offset_into_subcluster(s, offset);
4026      uint32_t tail = ROUND_UP(offset + bytes, s->subcluster_size) -
4027          (offset + bytes);
4028  
4029      trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes);
4030      if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) {
4031          tail = 0;
4032      }
4033  
4034      if (head || tail) {
4035          uint64_t off;
4036          unsigned int nr;
4037          QCow2SubclusterType type;
4038  
4039          assert(head + bytes + tail <= s->subcluster_size);
4040  
4041          /* check whether remainder of cluster already reads as zero */
4042          if (!(is_zero(bs, offset - head, head) &&
4043                is_zero(bs, offset + bytes, tail))) {
4044              return -ENOTSUP;
4045          }
4046  
4047          qemu_co_mutex_lock(&s->lock);
4048          /* We can have new write after previous check */
4049          offset -= head;
4050          bytes = s->subcluster_size;
4051          nr = s->subcluster_size;
4052          ret = qcow2_get_host_offset(bs, offset, &nr, &off, &type);
4053          if (ret < 0 ||
4054              (type != QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN &&
4055               type != QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC &&
4056               type != QCOW2_SUBCLUSTER_ZERO_PLAIN &&
4057               type != QCOW2_SUBCLUSTER_ZERO_ALLOC)) {
4058              qemu_co_mutex_unlock(&s->lock);
4059              return ret < 0 ? ret : -ENOTSUP;
4060          }
4061      } else {
4062          qemu_co_mutex_lock(&s->lock);
4063      }
4064  
4065      trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes);
4066  
4067      /* Whatever is left can use real zero subclusters */
4068      ret = qcow2_subcluster_zeroize(bs, offset, bytes, flags);
4069      qemu_co_mutex_unlock(&s->lock);
4070  
4071      return ret;
4072  }
4073  
4074  static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
4075                                            int64_t offset, int64_t bytes)
4076  {
4077      int ret;
4078      BDRVQcow2State *s = bs->opaque;
4079  
4080      /* If the image does not support QCOW_OFLAG_ZERO then discarding
4081       * clusters could expose stale data from the backing file. */
4082      if (s->qcow_version < 3 && bs->backing) {
4083          return -ENOTSUP;
4084      }
4085  
4086      if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) {
4087          assert(bytes < s->cluster_size);
4088          /* Ignore partial clusters, except for the special case of the
4089           * complete partial cluster at the end of an unaligned file */
4090          if (!QEMU_IS_ALIGNED(offset, s->cluster_size) ||
4091              offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) {
4092              return -ENOTSUP;
4093          }
4094      }
4095  
4096      qemu_co_mutex_lock(&s->lock);
4097      ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST,
4098                                  false);
4099      qemu_co_mutex_unlock(&s->lock);
4100      return ret;
4101  }
4102  
4103  static int coroutine_fn GRAPH_RDLOCK
4104  qcow2_co_copy_range_from(BlockDriverState *bs,
4105                           BdrvChild *src, int64_t src_offset,
4106                           BdrvChild *dst, int64_t dst_offset,
4107                           int64_t bytes, BdrvRequestFlags read_flags,
4108                           BdrvRequestFlags write_flags)
4109  {
4110      BDRVQcow2State *s = bs->opaque;
4111      int ret;
4112      unsigned int cur_bytes; /* number of bytes in current iteration */
4113      BdrvChild *child = NULL;
4114      BdrvRequestFlags cur_write_flags;
4115  
4116      assert(!bs->encrypted);
4117      qemu_co_mutex_lock(&s->lock);
4118  
4119      while (bytes != 0) {
4120          uint64_t copy_offset = 0;
4121          QCow2SubclusterType type;
4122          /* prepare next request */
4123          cur_bytes = MIN(bytes, INT_MAX);
4124          cur_write_flags = write_flags;
4125  
4126          ret = qcow2_get_host_offset(bs, src_offset, &cur_bytes,
4127                                      &copy_offset, &type);
4128          if (ret < 0) {
4129              goto out;
4130          }
4131  
4132          switch (type) {
4133          case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
4134          case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
4135              if (bs->backing && bs->backing->bs) {
4136                  int64_t backing_length = bdrv_co_getlength(bs->backing->bs);
4137                  if (src_offset >= backing_length) {
4138                      cur_write_flags |= BDRV_REQ_ZERO_WRITE;
4139                  } else {
4140                      child = bs->backing;
4141                      cur_bytes = MIN(cur_bytes, backing_length - src_offset);
4142                      copy_offset = src_offset;
4143                  }
4144              } else {
4145                  cur_write_flags |= BDRV_REQ_ZERO_WRITE;
4146              }
4147              break;
4148  
4149          case QCOW2_SUBCLUSTER_ZERO_PLAIN:
4150          case QCOW2_SUBCLUSTER_ZERO_ALLOC:
4151              cur_write_flags |= BDRV_REQ_ZERO_WRITE;
4152              break;
4153  
4154          case QCOW2_SUBCLUSTER_COMPRESSED:
4155              ret = -ENOTSUP;
4156              goto out;
4157  
4158          case QCOW2_SUBCLUSTER_NORMAL:
4159              child = s->data_file;
4160              break;
4161  
4162          default:
4163              abort();
4164          }
4165          qemu_co_mutex_unlock(&s->lock);
4166          ret = bdrv_co_copy_range_from(child,
4167                                        copy_offset,
4168                                        dst, dst_offset,
4169                                        cur_bytes, read_flags, cur_write_flags);
4170          qemu_co_mutex_lock(&s->lock);
4171          if (ret < 0) {
4172              goto out;
4173          }
4174  
4175          bytes -= cur_bytes;
4176          src_offset += cur_bytes;
4177          dst_offset += cur_bytes;
4178      }
4179      ret = 0;
4180  
4181  out:
4182      qemu_co_mutex_unlock(&s->lock);
4183      return ret;
4184  }
4185  
4186  static int coroutine_fn GRAPH_RDLOCK
4187  qcow2_co_copy_range_to(BlockDriverState *bs,
4188                         BdrvChild *src, int64_t src_offset,
4189                         BdrvChild *dst, int64_t dst_offset,
4190                         int64_t bytes, BdrvRequestFlags read_flags,
4191                         BdrvRequestFlags write_flags)
4192  {
4193      BDRVQcow2State *s = bs->opaque;
4194      int ret;
4195      unsigned int cur_bytes; /* number of sectors in current iteration */
4196      uint64_t host_offset;
4197      QCowL2Meta *l2meta = NULL;
4198  
4199      assert(!bs->encrypted);
4200  
4201      qemu_co_mutex_lock(&s->lock);
4202  
4203      while (bytes != 0) {
4204  
4205          l2meta = NULL;
4206  
4207          cur_bytes = MIN(bytes, INT_MAX);
4208  
4209          /* TODO:
4210           * If src->bs == dst->bs, we could simply copy by incrementing
4211           * the refcnt, without copying user data.
4212           * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */
4213          ret = qcow2_alloc_host_offset(bs, dst_offset, &cur_bytes,
4214                                        &host_offset, &l2meta);
4215          if (ret < 0) {
4216              goto fail;
4217          }
4218  
4219          ret = qcow2_pre_write_overlap_check(bs, 0, host_offset, cur_bytes,
4220                                              true);
4221          if (ret < 0) {
4222              goto fail;
4223          }
4224  
4225          qemu_co_mutex_unlock(&s->lock);
4226          ret = bdrv_co_copy_range_to(src, src_offset, s->data_file, host_offset,
4227                                      cur_bytes, read_flags, write_flags);
4228          qemu_co_mutex_lock(&s->lock);
4229          if (ret < 0) {
4230              goto fail;
4231          }
4232  
4233          ret = qcow2_handle_l2meta(bs, &l2meta, true);
4234          if (ret) {
4235              goto fail;
4236          }
4237  
4238          bytes -= cur_bytes;
4239          src_offset += cur_bytes;
4240          dst_offset += cur_bytes;
4241      }
4242      ret = 0;
4243  
4244  fail:
4245      qcow2_handle_l2meta(bs, &l2meta, false);
4246  
4247      qemu_co_mutex_unlock(&s->lock);
4248  
4249      trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
4250  
4251      return ret;
4252  }
4253  
4254  static int coroutine_fn GRAPH_RDLOCK
4255  qcow2_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
4256                    PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
4257  {
4258      BDRVQcow2State *s = bs->opaque;
4259      uint64_t old_length;
4260      int64_t new_l1_size;
4261      int ret;
4262      QDict *options;
4263  
4264      if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA &&
4265          prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL)
4266      {
4267          error_setg(errp, "Unsupported preallocation mode '%s'",
4268                     PreallocMode_str(prealloc));
4269          return -ENOTSUP;
4270      }
4271  
4272      if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) {
4273          error_setg(errp, "The new size must be a multiple of %u",
4274                     (unsigned) BDRV_SECTOR_SIZE);
4275          return -EINVAL;
4276      }
4277  
4278      qemu_co_mutex_lock(&s->lock);
4279  
4280      /*
4281       * Even though we store snapshot size for all images, it was not
4282       * required until v3, so it is not safe to proceed for v2.
4283       */
4284      if (s->nb_snapshots && s->qcow_version < 3) {
4285          error_setg(errp, "Can't resize a v2 image which has snapshots");
4286          ret = -ENOTSUP;
4287          goto fail;
4288      }
4289  
4290      /* See qcow2-bitmap.c for which bitmap scenarios prevent a resize. */
4291      if (qcow2_truncate_bitmaps_check(bs, errp)) {
4292          ret = -ENOTSUP;
4293          goto fail;
4294      }
4295  
4296      old_length = bs->total_sectors * BDRV_SECTOR_SIZE;
4297      new_l1_size = size_to_l1(s, offset);
4298  
4299      if (offset < old_length) {
4300          int64_t last_cluster, old_file_size;
4301          if (prealloc != PREALLOC_MODE_OFF) {
4302              error_setg(errp,
4303                         "Preallocation can't be used for shrinking an image");
4304              ret = -EINVAL;
4305              goto fail;
4306          }
4307  
4308          ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size),
4309                                      old_length - ROUND_UP(offset,
4310                                                            s->cluster_size),
4311                                      QCOW2_DISCARD_ALWAYS, true);
4312          if (ret < 0) {
4313              error_setg_errno(errp, -ret, "Failed to discard cropped clusters");
4314              goto fail;
4315          }
4316  
4317          ret = qcow2_shrink_l1_table(bs, new_l1_size);
4318          if (ret < 0) {
4319              error_setg_errno(errp, -ret,
4320                               "Failed to reduce the number of L2 tables");
4321              goto fail;
4322          }
4323  
4324          ret = qcow2_shrink_reftable(bs);
4325          if (ret < 0) {
4326              error_setg_errno(errp, -ret,
4327                               "Failed to discard unused refblocks");
4328              goto fail;
4329          }
4330  
4331          old_file_size = bdrv_co_getlength(bs->file->bs);
4332          if (old_file_size < 0) {
4333              error_setg_errno(errp, -old_file_size,
4334                               "Failed to inquire current file length");
4335              ret = old_file_size;
4336              goto fail;
4337          }
4338          last_cluster = qcow2_get_last_cluster(bs, old_file_size);
4339          if (last_cluster < 0) {
4340              error_setg_errno(errp, -last_cluster,
4341                               "Failed to find the last cluster");
4342              ret = last_cluster;
4343              goto fail;
4344          }
4345          if ((last_cluster + 1) * s->cluster_size < old_file_size) {
4346              Error *local_err = NULL;
4347  
4348              /*
4349               * Do not pass @exact here: It will not help the user if
4350               * we get an error here just because they wanted to shrink
4351               * their qcow2 image (on a block device) with qemu-img.
4352               * (And on the qcow2 layer, the @exact requirement is
4353               * always fulfilled, so there is no need to pass it on.)
4354               */
4355              bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
4356                               false, PREALLOC_MODE_OFF, 0, &local_err);
4357              if (local_err) {
4358                  warn_reportf_err(local_err,
4359                                   "Failed to truncate the tail of the image: ");
4360              }
4361          }
4362      } else {
4363          ret = qcow2_grow_l1_table(bs, new_l1_size, true);
4364          if (ret < 0) {
4365              error_setg_errno(errp, -ret, "Failed to grow the L1 table");
4366              goto fail;
4367          }
4368  
4369          if (data_file_is_raw(bs) && prealloc == PREALLOC_MODE_OFF) {
4370              /*
4371               * When creating a qcow2 image with data-file-raw, we enforce
4372               * at least prealloc=metadata, so that the L1/L2 tables are
4373               * fully allocated and reading from the data file will return
4374               * the same data as reading from the qcow2 image.  When the
4375               * image is grown, we must consequently preallocate the
4376               * metadata structures to cover the added area.
4377               */
4378              prealloc = PREALLOC_MODE_METADATA;
4379          }
4380      }
4381  
4382      switch (prealloc) {
4383      case PREALLOC_MODE_OFF:
4384          if (has_data_file(bs)) {
4385              /*
4386               * If the caller wants an exact resize, the external data
4387               * file should be resized to the exact target size, too,
4388               * so we pass @exact here.
4389               */
4390              ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, 0,
4391                                     errp);
4392              if (ret < 0) {
4393                  goto fail;
4394              }
4395          }
4396          break;
4397  
4398      case PREALLOC_MODE_METADATA:
4399          ret = preallocate_co(bs, old_length, offset, prealloc, errp);
4400          if (ret < 0) {
4401              goto fail;
4402          }
4403          break;
4404  
4405      case PREALLOC_MODE_FALLOC:
4406      case PREALLOC_MODE_FULL:
4407      {
4408          int64_t allocation_start, host_offset, guest_offset;
4409          int64_t clusters_allocated;
4410          int64_t old_file_size, last_cluster, new_file_size;
4411          uint64_t nb_new_data_clusters, nb_new_l2_tables;
4412          bool subclusters_need_allocation = false;
4413  
4414          /* With a data file, preallocation means just allocating the metadata
4415           * and forwarding the truncate request to the data file */
4416          if (has_data_file(bs)) {
4417              ret = preallocate_co(bs, old_length, offset, prealloc, errp);
4418              if (ret < 0) {
4419                  goto fail;
4420              }
4421              break;
4422          }
4423  
4424          old_file_size = bdrv_co_getlength(bs->file->bs);
4425          if (old_file_size < 0) {
4426              error_setg_errno(errp, -old_file_size,
4427                               "Failed to inquire current file length");
4428              ret = old_file_size;
4429              goto fail;
4430          }
4431  
4432          last_cluster = qcow2_get_last_cluster(bs, old_file_size);
4433          if (last_cluster >= 0) {
4434              old_file_size = (last_cluster + 1) * s->cluster_size;
4435          } else {
4436              old_file_size = ROUND_UP(old_file_size, s->cluster_size);
4437          }
4438  
4439          nb_new_data_clusters = (ROUND_UP(offset, s->cluster_size) -
4440              start_of_cluster(s, old_length)) >> s->cluster_bits;
4441  
4442          /* This is an overestimation; we will not actually allocate space for
4443           * these in the file but just make sure the new refcount structures are
4444           * able to cover them so we will not have to allocate new refblocks
4445           * while entering the data blocks in the potentially new L2 tables.
4446           * (We do not actually care where the L2 tables are placed. Maybe they
4447           *  are already allocated or they can be placed somewhere before
4448           *  @old_file_size. It does not matter because they will be fully
4449           *  allocated automatically, so they do not need to be covered by the
4450           *  preallocation. All that matters is that we will not have to allocate
4451           *  new refcount structures for them.) */
4452          nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters,
4453                                          s->cluster_size / l2_entry_size(s));
4454          /* The cluster range may not be aligned to L2 boundaries, so add one L2
4455           * table for a potential head/tail */
4456          nb_new_l2_tables++;
4457  
4458          allocation_start = qcow2_refcount_area(bs, old_file_size,
4459                                                 nb_new_data_clusters +
4460                                                 nb_new_l2_tables,
4461                                                 true, 0, 0);
4462          if (allocation_start < 0) {
4463              error_setg_errno(errp, -allocation_start,
4464                               "Failed to resize refcount structures");
4465              ret = allocation_start;
4466              goto fail;
4467          }
4468  
4469          clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start,
4470                                                       nb_new_data_clusters);
4471          if (clusters_allocated < 0) {
4472              error_setg_errno(errp, -clusters_allocated,
4473                               "Failed to allocate data clusters");
4474              ret = clusters_allocated;
4475              goto fail;
4476          }
4477  
4478          assert(clusters_allocated == nb_new_data_clusters);
4479  
4480          /* Allocate the data area */
4481          new_file_size = allocation_start +
4482                          nb_new_data_clusters * s->cluster_size;
4483          /*
4484           * Image file grows, so @exact does not matter.
4485           *
4486           * If we need to zero out the new area, try first whether the protocol
4487           * driver can already take care of this.
4488           */
4489          if (flags & BDRV_REQ_ZERO_WRITE) {
4490              ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc,
4491                                     BDRV_REQ_ZERO_WRITE, NULL);
4492              if (ret >= 0) {
4493                  flags &= ~BDRV_REQ_ZERO_WRITE;
4494                  /* Ensure that we read zeroes and not backing file data */
4495                  subclusters_need_allocation = true;
4496              }
4497          } else {
4498              ret = -1;
4499          }
4500          if (ret < 0) {
4501              ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0,
4502                                     errp);
4503          }
4504          if (ret < 0) {
4505              error_prepend(errp, "Failed to resize underlying file: ");
4506              qcow2_free_clusters(bs, allocation_start,
4507                                  nb_new_data_clusters * s->cluster_size,
4508                                  QCOW2_DISCARD_OTHER);
4509              goto fail;
4510          }
4511  
4512          /* Create the necessary L2 entries */
4513          host_offset = allocation_start;
4514          guest_offset = old_length;
4515          while (nb_new_data_clusters) {
4516              int64_t nb_clusters = MIN(
4517                  nb_new_data_clusters,
4518                  s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset));
4519              unsigned cow_start_length = offset_into_cluster(s, guest_offset);
4520              QCowL2Meta allocation;
4521              guest_offset = start_of_cluster(s, guest_offset);
4522              allocation = (QCowL2Meta) {
4523                  .offset       = guest_offset,
4524                  .alloc_offset = host_offset,
4525                  .nb_clusters  = nb_clusters,
4526                  .cow_start    = {
4527                      .offset       = 0,
4528                      .nb_bytes     = cow_start_length,
4529                  },
4530                  .cow_end      = {
4531                      .offset       = nb_clusters << s->cluster_bits,
4532                      .nb_bytes     = 0,
4533                  },
4534                  .prealloc     = !subclusters_need_allocation,
4535              };
4536              qemu_co_queue_init(&allocation.dependent_requests);
4537  
4538              ret = qcow2_alloc_cluster_link_l2(bs, &allocation);
4539              if (ret < 0) {
4540                  error_setg_errno(errp, -ret, "Failed to update L2 tables");
4541                  qcow2_free_clusters(bs, host_offset,
4542                                      nb_new_data_clusters * s->cluster_size,
4543                                      QCOW2_DISCARD_OTHER);
4544                  goto fail;
4545              }
4546  
4547              guest_offset += nb_clusters * s->cluster_size;
4548              host_offset += nb_clusters * s->cluster_size;
4549              nb_new_data_clusters -= nb_clusters;
4550          }
4551          break;
4552      }
4553  
4554      default:
4555          g_assert_not_reached();
4556      }
4557  
4558      if ((flags & BDRV_REQ_ZERO_WRITE) && offset > old_length) {
4559          uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->subcluster_size);
4560  
4561          /*
4562           * Use zero clusters as much as we can. qcow2_subcluster_zeroize()
4563           * requires a subcluster-aligned start. The end may be unaligned if
4564           * it is at the end of the image (which it is here).
4565           */
4566          if (offset > zero_start) {
4567              ret = qcow2_subcluster_zeroize(bs, zero_start, offset - zero_start,
4568                                             0);
4569              if (ret < 0) {
4570                  error_setg_errno(errp, -ret, "Failed to zero out new clusters");
4571                  goto fail;
4572              }
4573          }
4574  
4575          /* Write explicit zeros for the unaligned head */
4576          if (zero_start > old_length) {
4577              uint64_t len = MIN(zero_start, offset) - old_length;
4578              uint8_t *buf = qemu_blockalign0(bs, len);
4579              QEMUIOVector qiov;
4580              qemu_iovec_init_buf(&qiov, buf, len);
4581  
4582              qemu_co_mutex_unlock(&s->lock);
4583              ret = qcow2_co_pwritev_part(bs, old_length, len, &qiov, 0, 0);
4584              qemu_co_mutex_lock(&s->lock);
4585  
4586              qemu_vfree(buf);
4587              if (ret < 0) {
4588                  error_setg_errno(errp, -ret, "Failed to zero out the new area");
4589                  goto fail;
4590              }
4591          }
4592      }
4593  
4594      if (prealloc != PREALLOC_MODE_OFF) {
4595          /* Flush metadata before actually changing the image size */
4596          ret = qcow2_write_caches(bs);
4597          if (ret < 0) {
4598              error_setg_errno(errp, -ret,
4599                               "Failed to flush the preallocated area to disk");
4600              goto fail;
4601          }
4602      }
4603  
4604      bs->total_sectors = offset / BDRV_SECTOR_SIZE;
4605  
4606      /* write updated header.size */
4607      offset = cpu_to_be64(offset);
4608      ret = bdrv_co_pwrite_sync(bs->file, offsetof(QCowHeader, size),
4609                                sizeof(offset), &offset, 0);
4610      if (ret < 0) {
4611          error_setg_errno(errp, -ret, "Failed to update the image size");
4612          goto fail;
4613      }
4614  
4615      s->l1_vm_state_index = new_l1_size;
4616  
4617      /* Update cache sizes */
4618      options = qdict_clone_shallow(bs->options);
4619      ret = qcow2_update_options(bs, options, s->flags, errp);
4620      qobject_unref(options);
4621      if (ret < 0) {
4622          goto fail;
4623      }
4624      ret = 0;
4625  fail:
4626      qemu_co_mutex_unlock(&s->lock);
4627      return ret;
4628  }
4629  
4630  static int coroutine_fn GRAPH_RDLOCK
4631  qcow2_co_pwritev_compressed_task(BlockDriverState *bs,
4632                                   uint64_t offset, uint64_t bytes,
4633                                   QEMUIOVector *qiov, size_t qiov_offset)
4634  {
4635      BDRVQcow2State *s = bs->opaque;
4636      int ret;
4637      ssize_t out_len;
4638      uint8_t *buf, *out_buf;
4639      uint64_t cluster_offset;
4640  
4641      assert(bytes == s->cluster_size || (bytes < s->cluster_size &&
4642             (offset + bytes == bs->total_sectors << BDRV_SECTOR_BITS)));
4643  
4644      buf = qemu_blockalign(bs, s->cluster_size);
4645      if (bytes < s->cluster_size) {
4646          /* Zero-pad last write if image size is not cluster aligned */
4647          memset(buf + bytes, 0, s->cluster_size - bytes);
4648      }
4649      qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes);
4650  
4651      out_buf = g_malloc(s->cluster_size);
4652  
4653      out_len = qcow2_co_compress(bs, out_buf, s->cluster_size - 1,
4654                                  buf, s->cluster_size);
4655      if (out_len == -ENOMEM) {
4656          /* could not compress: write normal cluster */
4657          ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0);
4658          if (ret < 0) {
4659              goto fail;
4660          }
4661          goto success;
4662      } else if (out_len < 0) {
4663          ret = -EINVAL;
4664          goto fail;
4665      }
4666  
4667      qemu_co_mutex_lock(&s->lock);
4668      ret = qcow2_alloc_compressed_cluster_offset(bs, offset, out_len,
4669                                                  &cluster_offset);
4670      if (ret < 0) {
4671          qemu_co_mutex_unlock(&s->lock);
4672          goto fail;
4673      }
4674  
4675      ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len, true);
4676      qemu_co_mutex_unlock(&s->lock);
4677      if (ret < 0) {
4678          goto fail;
4679      }
4680  
4681      BLKDBG_CO_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
4682      ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0);
4683      if (ret < 0) {
4684          goto fail;
4685      }
4686  success:
4687      ret = 0;
4688  fail:
4689      qemu_vfree(buf);
4690      g_free(out_buf);
4691      return ret;
4692  }
4693  
4694  /*
4695   * This function can count as GRAPH_RDLOCK because
4696   * qcow2_co_pwritev_compressed_part() holds the graph lock and keeps it until
4697   * this coroutine has terminated.
4698   */
4699  static int coroutine_fn GRAPH_RDLOCK
4700  qcow2_co_pwritev_compressed_task_entry(AioTask *task)
4701  {
4702      Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
4703  
4704      assert(!t->subcluster_type && !t->l2meta);
4705  
4706      return qcow2_co_pwritev_compressed_task(t->bs, t->offset, t->bytes, t->qiov,
4707                                              t->qiov_offset);
4708  }
4709  
4710  /*
4711   * XXX: put compressed sectors first, then all the cluster aligned
4712   * tables to avoid losing bytes in alignment
4713   */
4714  static int coroutine_fn GRAPH_RDLOCK
4715  qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
4716                                   int64_t offset, int64_t bytes,
4717                                   QEMUIOVector *qiov, size_t qiov_offset)
4718  {
4719      BDRVQcow2State *s = bs->opaque;
4720      AioTaskPool *aio = NULL;
4721      int ret = 0;
4722  
4723      if (has_data_file(bs)) {
4724          return -ENOTSUP;
4725      }
4726  
4727      if (bytes == 0) {
4728          /*
4729           * align end of file to a sector boundary to ease reading with
4730           * sector based I/Os
4731           */
4732          int64_t len = bdrv_co_getlength(bs->file->bs);
4733          if (len < 0) {
4734              return len;
4735          }
4736          return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, 0,
4737                                  NULL);
4738      }
4739  
4740      if (offset_into_cluster(s, offset)) {
4741          return -EINVAL;
4742      }
4743  
4744      if (offset_into_cluster(s, bytes) &&
4745          (offset + bytes) != (bs->total_sectors << BDRV_SECTOR_BITS)) {
4746          return -EINVAL;
4747      }
4748  
4749      while (bytes && aio_task_pool_status(aio) == 0) {
4750          uint64_t chunk_size = MIN(bytes, s->cluster_size);
4751  
4752          if (!aio && chunk_size != bytes) {
4753              aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
4754          }
4755  
4756          ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_compressed_task_entry,
4757                               0, 0, offset, chunk_size, qiov, qiov_offset, NULL);
4758          if (ret < 0) {
4759              break;
4760          }
4761          qiov_offset += chunk_size;
4762          offset += chunk_size;
4763          bytes -= chunk_size;
4764      }
4765  
4766      if (aio) {
4767          aio_task_pool_wait_all(aio);
4768          if (ret == 0) {
4769              ret = aio_task_pool_status(aio);
4770          }
4771          g_free(aio);
4772      }
4773  
4774      return ret;
4775  }
4776  
4777  static int coroutine_fn GRAPH_RDLOCK
4778  qcow2_co_preadv_compressed(BlockDriverState *bs,
4779                             uint64_t l2_entry,
4780                             uint64_t offset,
4781                             uint64_t bytes,
4782                             QEMUIOVector *qiov,
4783                             size_t qiov_offset)
4784  {
4785      BDRVQcow2State *s = bs->opaque;
4786      int ret = 0, csize;
4787      uint64_t coffset;
4788      uint8_t *buf, *out_buf;
4789      int offset_in_cluster = offset_into_cluster(s, offset);
4790  
4791      qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
4792  
4793      buf = g_try_malloc(csize);
4794      if (!buf) {
4795          return -ENOMEM;
4796      }
4797  
4798      out_buf = qemu_blockalign(bs, s->cluster_size);
4799  
4800      BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
4801      ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0);
4802      if (ret < 0) {
4803          goto fail;
4804      }
4805  
4806      if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) {
4807          ret = -EIO;
4808          goto fail;
4809      }
4810  
4811      qemu_iovec_from_buf(qiov, qiov_offset, out_buf + offset_in_cluster, bytes);
4812  
4813  fail:
4814      qemu_vfree(out_buf);
4815      g_free(buf);
4816  
4817      return ret;
4818  }
4819  
4820  static int make_completely_empty(BlockDriverState *bs)
4821  {
4822      BDRVQcow2State *s = bs->opaque;
4823      Error *local_err = NULL;
4824      int ret, l1_clusters;
4825      int64_t offset;
4826      uint64_t *new_reftable = NULL;
4827      uint64_t rt_entry, l1_size2;
4828      struct {
4829          uint64_t l1_offset;
4830          uint64_t reftable_offset;
4831          uint32_t reftable_clusters;
4832      } QEMU_PACKED l1_ofs_rt_ofs_cls;
4833  
4834      ret = qcow2_cache_empty(bs, s->l2_table_cache);
4835      if (ret < 0) {
4836          goto fail;
4837      }
4838  
4839      ret = qcow2_cache_empty(bs, s->refcount_block_cache);
4840      if (ret < 0) {
4841          goto fail;
4842      }
4843  
4844      /* Refcounts will be broken utterly */
4845      ret = qcow2_mark_dirty(bs);
4846      if (ret < 0) {
4847          goto fail;
4848      }
4849  
4850      BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
4851  
4852      l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / L1E_SIZE);
4853      l1_size2 = (uint64_t)s->l1_size * L1E_SIZE;
4854  
4855      /* After this call, neither the in-memory nor the on-disk refcount
4856       * information accurately describe the actual references */
4857  
4858      ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
4859                               l1_clusters * s->cluster_size, 0);
4860      if (ret < 0) {
4861          goto fail_broken_refcounts;
4862      }
4863      memset(s->l1_table, 0, l1_size2);
4864  
4865      BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE);
4866  
4867      /* Overwrite enough clusters at the beginning of the sectors to place
4868       * the refcount table, a refcount block and the L1 table in; this may
4869       * overwrite parts of the existing refcount and L1 table, which is not
4870       * an issue because the dirty flag is set, complete data loss is in fact
4871       * desired and partial data loss is consequently fine as well */
4872      ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
4873                               (2 + l1_clusters) * s->cluster_size, 0);
4874      /* This call (even if it failed overall) may have overwritten on-disk
4875       * refcount structures; in that case, the in-memory refcount information
4876       * will probably differ from the on-disk information which makes the BDS
4877       * unusable */
4878      if (ret < 0) {
4879          goto fail_broken_refcounts;
4880      }
4881  
4882      BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
4883      BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
4884  
4885      /* "Create" an empty reftable (one cluster) directly after the image
4886       * header and an empty L1 table three clusters after the image header;
4887       * the cluster between those two will be used as the first refblock */
4888      l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
4889      l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
4890      l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
4891      ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
4892                             sizeof(l1_ofs_rt_ofs_cls), &l1_ofs_rt_ofs_cls, 0);
4893      if (ret < 0) {
4894          goto fail_broken_refcounts;
4895      }
4896  
4897      s->l1_table_offset = 3 * s->cluster_size;
4898  
4899      new_reftable = g_try_new0(uint64_t, s->cluster_size / REFTABLE_ENTRY_SIZE);
4900      if (!new_reftable) {
4901          ret = -ENOMEM;
4902          goto fail_broken_refcounts;
4903      }
4904  
4905      s->refcount_table_offset = s->cluster_size;
4906      s->refcount_table_size   = s->cluster_size / REFTABLE_ENTRY_SIZE;
4907      s->max_refcount_table_index = 0;
4908  
4909      g_free(s->refcount_table);
4910      s->refcount_table = new_reftable;
4911      new_reftable = NULL;
4912  
4913      /* Now the in-memory refcount information again corresponds to the on-disk
4914       * information (reftable is empty and no refblocks (the refblock cache is
4915       * empty)); however, this means some clusters (e.g. the image header) are
4916       * referenced, but not refcounted, but the normal qcow2 code assumes that
4917       * the in-memory information is always correct */
4918  
4919      BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
4920  
4921      /* Enter the first refblock into the reftable */
4922      rt_entry = cpu_to_be64(2 * s->cluster_size);
4923      ret = bdrv_pwrite_sync(bs->file, s->cluster_size, sizeof(rt_entry),
4924                             &rt_entry, 0);
4925      if (ret < 0) {
4926          goto fail_broken_refcounts;
4927      }
4928      s->refcount_table[0] = 2 * s->cluster_size;
4929  
4930      s->free_cluster_index = 0;
4931      assert(3 + l1_clusters <= s->refcount_block_size);
4932      offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2);
4933      if (offset < 0) {
4934          ret = offset;
4935          goto fail_broken_refcounts;
4936      } else if (offset > 0) {
4937          error_report("First cluster in emptied image is in use");
4938          abort();
4939      }
4940  
4941      /* Now finally the in-memory information corresponds to the on-disk
4942       * structures and is correct */
4943      ret = qcow2_mark_clean(bs);
4944      if (ret < 0) {
4945          goto fail;
4946      }
4947  
4948      ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false,
4949                          PREALLOC_MODE_OFF, 0, &local_err);
4950      if (ret < 0) {
4951          error_report_err(local_err);
4952          goto fail;
4953      }
4954  
4955      return 0;
4956  
4957  fail_broken_refcounts:
4958      /* The BDS is unusable at this point. If we wanted to make it usable, we
4959       * would have to call qcow2_refcount_close(), qcow2_refcount_init(),
4960       * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init()
4961       * again. However, because the functions which could have caused this error
4962       * path to be taken are used by those functions as well, it's very likely
4963       * that that sequence will fail as well. Therefore, just eject the BDS. */
4964      bs->drv = NULL;
4965  
4966  fail:
4967      g_free(new_reftable);
4968      return ret;
4969  }
4970  
4971  static int qcow2_make_empty(BlockDriverState *bs)
4972  {
4973      BDRVQcow2State *s = bs->opaque;
4974      uint64_t offset, end_offset;
4975      int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size);
4976      int l1_clusters, ret = 0;
4977  
4978      l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / L1E_SIZE);
4979  
4980      if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps &&
4981          3 + l1_clusters <= s->refcount_block_size &&
4982          s->crypt_method_header != QCOW_CRYPT_LUKS &&
4983          !has_data_file(bs)) {
4984          /* The following function only works for qcow2 v3 images (it
4985           * requires the dirty flag) and only as long as there are no
4986           * features that reserve extra clusters (such as snapshots,
4987           * LUKS header, or persistent bitmaps), because it completely
4988           * empties the image.  Furthermore, the L1 table and three
4989           * additional clusters (image header, refcount table, one
4990           * refcount block) have to fit inside one refcount block. It
4991           * only resets the image file, i.e. does not work with an
4992           * external data file. */
4993          return make_completely_empty(bs);
4994      }
4995  
4996      /* This fallback code simply discards every active cluster; this is slow,
4997       * but works in all cases */
4998      end_offset = bs->total_sectors * BDRV_SECTOR_SIZE;
4999      for (offset = 0; offset < end_offset; offset += step) {
5000          /* As this function is generally used after committing an external
5001           * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the
5002           * default action for this kind of discard is to pass the discard,
5003           * which will ideally result in an actually smaller image file, as
5004           * is probably desired. */
5005          ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset),
5006                                      QCOW2_DISCARD_SNAPSHOT, true);
5007          if (ret < 0) {
5008              break;
5009          }
5010      }
5011  
5012      return ret;
5013  }
5014  
5015  static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
5016  {
5017      BDRVQcow2State *s = bs->opaque;
5018      int ret;
5019  
5020      qemu_co_mutex_lock(&s->lock);
5021      ret = qcow2_write_caches(bs);
5022      qemu_co_mutex_unlock(&s->lock);
5023  
5024      return ret;
5025  }
5026  
5027  static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
5028                                         Error **errp)
5029  {
5030      Error *local_err = NULL;
5031      BlockMeasureInfo *info;
5032      uint64_t required = 0; /* bytes that contribute to required size */
5033      uint64_t virtual_size; /* disk size as seen by guest */
5034      uint64_t refcount_bits;
5035      uint64_t l2_tables;
5036      uint64_t luks_payload_size = 0;
5037      size_t cluster_size;
5038      int version;
5039      char *optstr;
5040      PreallocMode prealloc;
5041      bool has_backing_file;
5042      bool has_luks;
5043      bool extended_l2;
5044      size_t l2e_size;
5045  
5046      /* Parse image creation options */
5047      extended_l2 = qemu_opt_get_bool_del(opts, BLOCK_OPT_EXTL2, false);
5048  
5049      cluster_size = qcow2_opt_get_cluster_size_del(opts, extended_l2,
5050                                                    &local_err);
5051      if (local_err) {
5052          goto err;
5053      }
5054  
5055      version = qcow2_opt_get_version_del(opts, &local_err);
5056      if (local_err) {
5057          goto err;
5058      }
5059  
5060      refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err);
5061      if (local_err) {
5062          goto err;
5063      }
5064  
5065      optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
5066      prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr,
5067                                 PREALLOC_MODE_OFF, &local_err);
5068      g_free(optstr);
5069      if (local_err) {
5070          goto err;
5071      }
5072  
5073      optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
5074      has_backing_file = !!optstr;
5075      g_free(optstr);
5076  
5077      optstr = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT);
5078      has_luks = optstr && strcmp(optstr, "luks") == 0;
5079      g_free(optstr);
5080  
5081      if (has_luks) {
5082          g_autoptr(QCryptoBlockCreateOptions) create_opts = NULL;
5083          QDict *cryptoopts = qcow2_extract_crypto_opts(opts, "luks", errp);
5084          size_t headerlen;
5085  
5086          create_opts = block_crypto_create_opts_init(cryptoopts, errp);
5087          qobject_unref(cryptoopts);
5088          if (!create_opts) {
5089              goto err;
5090          }
5091  
5092          if (!qcrypto_block_calculate_payload_offset(create_opts,
5093                                                      "encrypt.",
5094                                                      &headerlen,
5095                                                      &local_err)) {
5096              goto err;
5097          }
5098  
5099          luks_payload_size = ROUND_UP(headerlen, cluster_size);
5100      }
5101  
5102      virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
5103      virtual_size = ROUND_UP(virtual_size, cluster_size);
5104  
5105      /* Check that virtual disk size is valid */
5106      l2e_size = extended_l2 ? L2E_SIZE_EXTENDED : L2E_SIZE_NORMAL;
5107      l2_tables = DIV_ROUND_UP(virtual_size / cluster_size,
5108                               cluster_size / l2e_size);
5109      if (l2_tables * L1E_SIZE > QCOW_MAX_L1_SIZE) {
5110          error_setg(&local_err, "The image size is too large "
5111                                 "(try using a larger cluster size)");
5112          goto err;
5113      }
5114  
5115      /* Account for input image */
5116      if (in_bs) {
5117          int64_t ssize = bdrv_getlength(in_bs);
5118          if (ssize < 0) {
5119              error_setg_errno(&local_err, -ssize,
5120                               "Unable to get image virtual_size");
5121              goto err;
5122          }
5123  
5124          virtual_size = ROUND_UP(ssize, cluster_size);
5125  
5126          if (has_backing_file) {
5127              /* We don't how much of the backing chain is shared by the input
5128               * image and the new image file.  In the worst case the new image's
5129               * backing file has nothing in common with the input image.  Be
5130               * conservative and assume all clusters need to be written.
5131               */
5132              required = virtual_size;
5133          } else {
5134              int64_t offset;
5135              int64_t pnum = 0;
5136  
5137              for (offset = 0; offset < ssize; offset += pnum) {
5138                  int ret;
5139  
5140                  ret = bdrv_block_status_above(in_bs, NULL, offset,
5141                                                ssize - offset, &pnum, NULL,
5142                                                NULL);
5143                  if (ret < 0) {
5144                      error_setg_errno(&local_err, -ret,
5145                                       "Unable to get block status");
5146                      goto err;
5147                  }
5148  
5149                  if (ret & BDRV_BLOCK_ZERO) {
5150                      /* Skip zero regions (safe with no backing file) */
5151                  } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
5152                             (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
5153                      /* Extend pnum to end of cluster for next iteration */
5154                      pnum = ROUND_UP(offset + pnum, cluster_size) - offset;
5155  
5156                      /* Count clusters we've seen */
5157                      required += offset % cluster_size + pnum;
5158                  }
5159              }
5160          }
5161      }
5162  
5163      /* Take into account preallocation.  Nothing special is needed for
5164       * PREALLOC_MODE_METADATA since metadata is always counted.
5165       */
5166      if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
5167          required = virtual_size;
5168      }
5169  
5170      info = g_new0(BlockMeasureInfo, 1);
5171      info->fully_allocated = luks_payload_size +
5172          qcow2_calc_prealloc_size(virtual_size, cluster_size,
5173                                   ctz32(refcount_bits), extended_l2);
5174  
5175      /*
5176       * Remove data clusters that are not required.  This overestimates the
5177       * required size because metadata needed for the fully allocated file is
5178       * still counted.  Show bitmaps only if both source and destination
5179       * would support them.
5180       */
5181      info->required = info->fully_allocated - virtual_size + required;
5182      info->has_bitmaps = version >= 3 && in_bs &&
5183          bdrv_supports_persistent_dirty_bitmap(in_bs);
5184      if (info->has_bitmaps) {
5185          info->bitmaps = qcow2_get_persistent_dirty_bitmap_size(in_bs,
5186                                                                 cluster_size);
5187      }
5188      return info;
5189  
5190  err:
5191      error_propagate(errp, local_err);
5192      return NULL;
5193  }
5194  
5195  static int coroutine_fn
5196  qcow2_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
5197  {
5198      BDRVQcow2State *s = bs->opaque;
5199      bdi->cluster_size = s->cluster_size;
5200      bdi->subcluster_size = s->subcluster_size;
5201      bdi->vm_state_offset = qcow2_vm_state_offset(s);
5202      bdi->is_dirty = s->incompatible_features & QCOW2_INCOMPAT_DIRTY;
5203      return 0;
5204  }
5205  
5206  static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs,
5207                                                    Error **errp)
5208  {
5209      BDRVQcow2State *s = bs->opaque;
5210      ImageInfoSpecific *spec_info;
5211      QCryptoBlockInfo *encrypt_info = NULL;
5212  
5213      if (s->crypto != NULL) {
5214          encrypt_info = qcrypto_block_get_info(s->crypto, errp);
5215          if (!encrypt_info) {
5216              return NULL;
5217          }
5218      }
5219  
5220      spec_info = g_new(ImageInfoSpecific, 1);
5221      *spec_info = (ImageInfoSpecific){
5222          .type  = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
5223          .u.qcow2.data = g_new0(ImageInfoSpecificQCow2, 1),
5224      };
5225      if (s->qcow_version == 2) {
5226          *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
5227              .compat             = g_strdup("0.10"),
5228              .refcount_bits      = s->refcount_bits,
5229          };
5230      } else if (s->qcow_version == 3) {
5231          Qcow2BitmapInfoList *bitmaps;
5232          if (!qcow2_get_bitmap_info_list(bs, &bitmaps, errp)) {
5233              qapi_free_ImageInfoSpecific(spec_info);
5234              qapi_free_QCryptoBlockInfo(encrypt_info);
5235              return NULL;
5236          }
5237          *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
5238              .compat             = g_strdup("1.1"),
5239              .lazy_refcounts     = s->compatible_features &
5240                                    QCOW2_COMPAT_LAZY_REFCOUNTS,
5241              .has_lazy_refcounts = true,
5242              .corrupt            = s->incompatible_features &
5243                                    QCOW2_INCOMPAT_CORRUPT,
5244              .has_corrupt        = true,
5245              .has_extended_l2    = true,
5246              .extended_l2        = has_subclusters(s),
5247              .refcount_bits      = s->refcount_bits,
5248              .has_bitmaps        = !!bitmaps,
5249              .bitmaps            = bitmaps,
5250              .data_file          = g_strdup(s->image_data_file),
5251              .has_data_file_raw  = has_data_file(bs),
5252              .data_file_raw      = data_file_is_raw(bs),
5253              .compression_type   = s->compression_type,
5254          };
5255      } else {
5256          /* if this assertion fails, this probably means a new version was
5257           * added without having it covered here */
5258          assert(false);
5259      }
5260  
5261      if (encrypt_info) {
5262          ImageInfoSpecificQCow2Encryption *qencrypt =
5263              g_new(ImageInfoSpecificQCow2Encryption, 1);
5264          switch (encrypt_info->format) {
5265          case Q_CRYPTO_BLOCK_FORMAT_QCOW:
5266              qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES;
5267              break;
5268          case Q_CRYPTO_BLOCK_FORMAT_LUKS:
5269              qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS;
5270              qencrypt->u.luks = encrypt_info->u.luks;
5271              break;
5272          default:
5273              abort();
5274          }
5275          /* Since we did shallow copy above, erase any pointers
5276           * in the original info */
5277          memset(&encrypt_info->u, 0, sizeof(encrypt_info->u));
5278          qapi_free_QCryptoBlockInfo(encrypt_info);
5279  
5280          spec_info->u.qcow2.data->encrypt = qencrypt;
5281      }
5282  
5283      return spec_info;
5284  }
5285  
5286  static int qcow2_has_zero_init(BlockDriverState *bs)
5287  {
5288      BDRVQcow2State *s = bs->opaque;
5289      bool preallocated;
5290  
5291      if (qemu_in_coroutine()) {
5292          qemu_co_mutex_lock(&s->lock);
5293      }
5294      /*
5295       * Check preallocation status: Preallocated images have all L2
5296       * tables allocated, nonpreallocated images have none.  It is
5297       * therefore enough to check the first one.
5298       */
5299      preallocated = s->l1_size > 0 && s->l1_table[0] != 0;
5300      if (qemu_in_coroutine()) {
5301          qemu_co_mutex_unlock(&s->lock);
5302      }
5303  
5304      if (!preallocated) {
5305          return 1;
5306      } else if (bs->encrypted) {
5307          return 0;
5308      } else {
5309          return bdrv_has_zero_init(s->data_file->bs);
5310      }
5311  }
5312  
5313  /*
5314   * Check the request to vmstate. On success return
5315   *      qcow2_vm_state_offset(bs) + @pos
5316   */
5317  static int64_t qcow2_check_vmstate_request(BlockDriverState *bs,
5318                                             QEMUIOVector *qiov, int64_t pos)
5319  {
5320      BDRVQcow2State *s = bs->opaque;
5321      int64_t vmstate_offset = qcow2_vm_state_offset(s);
5322      int ret;
5323  
5324      /* Incoming requests must be OK */
5325      bdrv_check_qiov_request(pos, qiov->size, qiov, 0, &error_abort);
5326  
5327      if (INT64_MAX - pos < vmstate_offset) {
5328          return -EIO;
5329      }
5330  
5331      pos += vmstate_offset;
5332      ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
5333      if (ret < 0) {
5334          return ret;
5335      }
5336  
5337      return pos;
5338  }
5339  
5340  static int coroutine_fn GRAPH_RDLOCK
5341  qcow2_co_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
5342  {
5343      int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos);
5344      if (offset < 0) {
5345          return offset;
5346      }
5347  
5348      BLKDBG_CO_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
5349      return bs->drv->bdrv_co_pwritev_part(bs, offset, qiov->size, qiov, 0, 0);
5350  }
5351  
5352  static int coroutine_fn GRAPH_RDLOCK
5353  qcow2_co_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
5354  {
5355      int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos);
5356      if (offset < 0) {
5357          return offset;
5358      }
5359  
5360      BLKDBG_CO_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
5361      return bs->drv->bdrv_co_preadv_part(bs, offset, qiov->size, qiov, 0, 0);
5362  }
5363  
5364  static int qcow2_has_compressed_clusters(BlockDriverState *bs)
5365  {
5366      int64_t offset = 0;
5367      int64_t bytes = bdrv_getlength(bs);
5368  
5369      if (bytes < 0) {
5370          return bytes;
5371      }
5372  
5373      while (bytes != 0) {
5374          int ret;
5375          QCow2SubclusterType type;
5376          unsigned int cur_bytes = MIN(INT_MAX, bytes);
5377          uint64_t host_offset;
5378  
5379          ret = qcow2_get_host_offset(bs, offset, &cur_bytes, &host_offset,
5380                                      &type);
5381          if (ret < 0) {
5382              return ret;
5383          }
5384  
5385          if (type == QCOW2_SUBCLUSTER_COMPRESSED) {
5386              return 1;
5387          }
5388  
5389          offset += cur_bytes;
5390          bytes -= cur_bytes;
5391      }
5392  
5393      return 0;
5394  }
5395  
5396  /*
5397   * Downgrades an image's version. To achieve this, any incompatible features
5398   * have to be removed.
5399   */
5400  static int qcow2_downgrade(BlockDriverState *bs, int target_version,
5401                             BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
5402                             Error **errp)
5403  {
5404      BDRVQcow2State *s = bs->opaque;
5405      int current_version = s->qcow_version;
5406      int ret;
5407      int i;
5408  
5409      /* This is qcow2_downgrade(), not qcow2_upgrade() */
5410      assert(target_version < current_version);
5411  
5412      /* There are no other versions (now) that you can downgrade to */
5413      assert(target_version == 2);
5414  
5415      if (s->refcount_order != 4) {
5416          error_setg(errp, "compat=0.10 requires refcount_bits=16");
5417          return -ENOTSUP;
5418      }
5419  
5420      if (has_data_file(bs)) {
5421          error_setg(errp, "Cannot downgrade an image with a data file");
5422          return -ENOTSUP;
5423      }
5424  
5425      /*
5426       * If any internal snapshot has a different size than the current
5427       * image size, or VM state size that exceeds 32 bits, downgrading
5428       * is unsafe.  Even though we would still use v3-compliant output
5429       * to preserve that data, other v2 programs might not realize
5430       * those optional fields are important.
5431       */
5432      for (i = 0; i < s->nb_snapshots; i++) {
5433          if (s->snapshots[i].vm_state_size > UINT32_MAX ||
5434              s->snapshots[i].disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
5435              error_setg(errp, "Internal snapshots prevent downgrade of image");
5436              return -ENOTSUP;
5437          }
5438      }
5439  
5440      /* clear incompatible features */
5441      if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
5442          ret = qcow2_mark_clean(bs);
5443          if (ret < 0) {
5444              error_setg_errno(errp, -ret, "Failed to make the image clean");
5445              return ret;
5446          }
5447      }
5448  
5449      /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in
5450       * the first place; if that happens nonetheless, returning -ENOTSUP is the
5451       * best thing to do anyway */
5452  
5453      if (s->incompatible_features & ~QCOW2_INCOMPAT_COMPRESSION) {
5454          error_setg(errp, "Cannot downgrade an image with incompatible features "
5455                     "0x%" PRIx64 " set",
5456                     s->incompatible_features & ~QCOW2_INCOMPAT_COMPRESSION);
5457          return -ENOTSUP;
5458      }
5459  
5460      /* since we can ignore compatible features, we can set them to 0 as well */
5461      s->compatible_features = 0;
5462      /* if lazy refcounts have been used, they have already been fixed through
5463       * clearing the dirty flag */
5464  
5465      /* clearing autoclear features is trivial */
5466      s->autoclear_features = 0;
5467  
5468      ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
5469      if (ret < 0) {
5470          error_setg_errno(errp, -ret, "Failed to turn zero into data clusters");
5471          return ret;
5472      }
5473  
5474      if (s->incompatible_features & QCOW2_INCOMPAT_COMPRESSION) {
5475          ret = qcow2_has_compressed_clusters(bs);
5476          if (ret < 0) {
5477              error_setg(errp, "Failed to check block status");
5478              return -EINVAL;
5479          }
5480          if (ret) {
5481              error_setg(errp, "Cannot downgrade an image with zstd compression "
5482                         "type and existing compressed clusters");
5483              return -ENOTSUP;
5484          }
5485          /*
5486           * No compressed clusters for now, so just chose default zlib
5487           * compression.
5488           */
5489          s->incompatible_features &= ~QCOW2_INCOMPAT_COMPRESSION;
5490          s->compression_type = QCOW2_COMPRESSION_TYPE_ZLIB;
5491      }
5492  
5493      assert(s->incompatible_features == 0);
5494  
5495      s->qcow_version = target_version;
5496      ret = qcow2_update_header(bs);
5497      if (ret < 0) {
5498          s->qcow_version = current_version;
5499          error_setg_errno(errp, -ret, "Failed to update the image header");
5500          return ret;
5501      }
5502      return 0;
5503  }
5504  
5505  /*
5506   * Upgrades an image's version.  While newer versions encompass all
5507   * features of older versions, some things may have to be presented
5508   * differently.
5509   */
5510  static int qcow2_upgrade(BlockDriverState *bs, int target_version,
5511                           BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
5512                           Error **errp)
5513  {
5514      BDRVQcow2State *s = bs->opaque;
5515      bool need_snapshot_update;
5516      int current_version = s->qcow_version;
5517      int i;
5518      int ret;
5519  
5520      /* This is qcow2_upgrade(), not qcow2_downgrade() */
5521      assert(target_version > current_version);
5522  
5523      /* There are no other versions (yet) that you can upgrade to */
5524      assert(target_version == 3);
5525  
5526      status_cb(bs, 0, 2, cb_opaque);
5527  
5528      /*
5529       * In v2, snapshots do not need to have extra data.  v3 requires
5530       * the 64-bit VM state size and the virtual disk size to be
5531       * present.
5532       * qcow2_write_snapshots() will always write the list in the
5533       * v3-compliant format.
5534       */
5535      need_snapshot_update = false;
5536      for (i = 0; i < s->nb_snapshots; i++) {
5537          if (s->snapshots[i].extra_data_size <
5538              sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
5539              sizeof_field(QCowSnapshotExtraData, disk_size))
5540          {
5541              need_snapshot_update = true;
5542              break;
5543          }
5544      }
5545      if (need_snapshot_update) {
5546          ret = qcow2_write_snapshots(bs);
5547          if (ret < 0) {
5548              error_setg_errno(errp, -ret, "Failed to update the snapshot table");
5549              return ret;
5550          }
5551      }
5552      status_cb(bs, 1, 2, cb_opaque);
5553  
5554      s->qcow_version = target_version;
5555      ret = qcow2_update_header(bs);
5556      if (ret < 0) {
5557          s->qcow_version = current_version;
5558          error_setg_errno(errp, -ret, "Failed to update the image header");
5559          return ret;
5560      }
5561      status_cb(bs, 2, 2, cb_opaque);
5562  
5563      return 0;
5564  }
5565  
5566  typedef enum Qcow2AmendOperation {
5567      /* This is the value Qcow2AmendHelperCBInfo::last_operation will be
5568       * statically initialized to so that the helper CB can discern the first
5569       * invocation from an operation change */
5570      QCOW2_NO_OPERATION = 0,
5571  
5572      QCOW2_UPGRADING,
5573      QCOW2_UPDATING_ENCRYPTION,
5574      QCOW2_CHANGING_REFCOUNT_ORDER,
5575      QCOW2_DOWNGRADING,
5576  } Qcow2AmendOperation;
5577  
5578  typedef struct Qcow2AmendHelperCBInfo {
5579      /* The code coordinating the amend operations should only modify
5580       * these four fields; the rest will be managed by the CB */
5581      BlockDriverAmendStatusCB *original_status_cb;
5582      void *original_cb_opaque;
5583  
5584      Qcow2AmendOperation current_operation;
5585  
5586      /* Total number of operations to perform (only set once) */
5587      int total_operations;
5588  
5589      /* The following fields are managed by the CB */
5590  
5591      /* Number of operations completed */
5592      int operations_completed;
5593  
5594      /* Cumulative offset of all completed operations */
5595      int64_t offset_completed;
5596  
5597      Qcow2AmendOperation last_operation;
5598      int64_t last_work_size;
5599  } Qcow2AmendHelperCBInfo;
5600  
5601  static void qcow2_amend_helper_cb(BlockDriverState *bs,
5602                                    int64_t operation_offset,
5603                                    int64_t operation_work_size, void *opaque)
5604  {
5605      Qcow2AmendHelperCBInfo *info = opaque;
5606      int64_t current_work_size;
5607      int64_t projected_work_size;
5608  
5609      if (info->current_operation != info->last_operation) {
5610          if (info->last_operation != QCOW2_NO_OPERATION) {
5611              info->offset_completed += info->last_work_size;
5612              info->operations_completed++;
5613          }
5614  
5615          info->last_operation = info->current_operation;
5616      }
5617  
5618      assert(info->total_operations > 0);
5619      assert(info->operations_completed < info->total_operations);
5620  
5621      info->last_work_size = operation_work_size;
5622  
5623      current_work_size = info->offset_completed + operation_work_size;
5624  
5625      /* current_work_size is the total work size for (operations_completed + 1)
5626       * operations (which includes this one), so multiply it by the number of
5627       * operations not covered and divide it by the number of operations
5628       * covered to get a projection for the operations not covered */
5629      projected_work_size = current_work_size * (info->total_operations -
5630                                                 info->operations_completed - 1)
5631                                              / (info->operations_completed + 1);
5632  
5633      info->original_status_cb(bs, info->offset_completed + operation_offset,
5634                               current_work_size + projected_work_size,
5635                               info->original_cb_opaque);
5636  }
5637  
5638  static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
5639                                 BlockDriverAmendStatusCB *status_cb,
5640                                 void *cb_opaque,
5641                                 bool force,
5642                                 Error **errp)
5643  {
5644      BDRVQcow2State *s = bs->opaque;
5645      int old_version = s->qcow_version, new_version = old_version;
5646      uint64_t new_size = 0;
5647      const char *backing_file = NULL, *backing_format = NULL, *data_file = NULL;
5648      bool lazy_refcounts = s->use_lazy_refcounts;
5649      bool data_file_raw = data_file_is_raw(bs);
5650      const char *compat = NULL;
5651      int refcount_bits = s->refcount_bits;
5652      int ret;
5653      QemuOptDesc *desc = opts->list->desc;
5654      Qcow2AmendHelperCBInfo helper_cb_info;
5655      bool encryption_update = false;
5656  
5657      while (desc && desc->name) {
5658          if (!qemu_opt_find(opts, desc->name)) {
5659              /* only change explicitly defined options */
5660              desc++;
5661              continue;
5662          }
5663  
5664          if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) {
5665              compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL);
5666              if (!compat) {
5667                  /* preserve default */
5668              } else if (!strcmp(compat, "0.10") || !strcmp(compat, "v2")) {
5669                  new_version = 2;
5670              } else if (!strcmp(compat, "1.1") || !strcmp(compat, "v3")) {
5671                  new_version = 3;
5672              } else {
5673                  error_setg(errp, "Unknown compatibility level %s", compat);
5674                  return -EINVAL;
5675              }
5676          } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
5677              new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5678          } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) {
5679              backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5680          } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
5681              backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5682          } else if (g_str_has_prefix(desc->name, "encrypt.")) {
5683              if (!s->crypto) {
5684                  error_setg(errp,
5685                             "Can't amend encryption options - encryption not present");
5686                  return -EINVAL;
5687              }
5688              if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
5689                  error_setg(errp,
5690                             "Only LUKS encryption options can be amended");
5691                  return -ENOTSUP;
5692              }
5693              encryption_update = true;
5694          } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
5695              lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
5696                                                 lazy_refcounts);
5697          } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) {
5698              refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS,
5699                                                  refcount_bits);
5700  
5701              if (refcount_bits <= 0 || refcount_bits > 64 ||
5702                  !is_power_of_2(refcount_bits))
5703              {
5704                  error_setg(errp, "Refcount width must be a power of two and "
5705                             "may not exceed 64 bits");
5706                  return -EINVAL;
5707              }
5708          } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE)) {
5709              data_file = qemu_opt_get(opts, BLOCK_OPT_DATA_FILE);
5710              if (data_file && !has_data_file(bs)) {
5711                  error_setg(errp, "data-file can only be set for images that "
5712                                   "use an external data file");
5713                  return -EINVAL;
5714              }
5715          } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE_RAW)) {
5716              data_file_raw = qemu_opt_get_bool(opts, BLOCK_OPT_DATA_FILE_RAW,
5717                                                data_file_raw);
5718              if (data_file_raw && !data_file_is_raw(bs)) {
5719                  error_setg(errp, "data-file-raw cannot be set on existing "
5720                                   "images");
5721                  return -EINVAL;
5722              }
5723          } else {
5724              /* if this point is reached, this probably means a new option was
5725               * added without having it covered here */
5726              abort();
5727          }
5728  
5729          desc++;
5730      }
5731  
5732      helper_cb_info = (Qcow2AmendHelperCBInfo){
5733          .original_status_cb = status_cb,
5734          .original_cb_opaque = cb_opaque,
5735          .total_operations = (new_version != old_version)
5736                            + (s->refcount_bits != refcount_bits) +
5737                              (encryption_update == true)
5738      };
5739  
5740      /* Upgrade first (some features may require compat=1.1) */
5741      if (new_version > old_version) {
5742          helper_cb_info.current_operation = QCOW2_UPGRADING;
5743          ret = qcow2_upgrade(bs, new_version, &qcow2_amend_helper_cb,
5744                              &helper_cb_info, errp);
5745          if (ret < 0) {
5746              return ret;
5747          }
5748      }
5749  
5750      if (encryption_update) {
5751          QDict *amend_opts_dict;
5752          QCryptoBlockAmendOptions *amend_opts;
5753  
5754          helper_cb_info.current_operation = QCOW2_UPDATING_ENCRYPTION;
5755          amend_opts_dict = qcow2_extract_crypto_opts(opts, "luks", errp);
5756          if (!amend_opts_dict) {
5757              return -EINVAL;
5758          }
5759          amend_opts = block_crypto_amend_opts_init(amend_opts_dict, errp);
5760          qobject_unref(amend_opts_dict);
5761          if (!amend_opts) {
5762              return -EINVAL;
5763          }
5764          ret = qcrypto_block_amend_options(s->crypto,
5765                                            qcow2_crypto_hdr_read_func,
5766                                            qcow2_crypto_hdr_write_func,
5767                                            bs,
5768                                            amend_opts,
5769                                            force,
5770                                            errp);
5771          qapi_free_QCryptoBlockAmendOptions(amend_opts);
5772          if (ret < 0) {
5773              return ret;
5774          }
5775      }
5776  
5777      if (s->refcount_bits != refcount_bits) {
5778          int refcount_order = ctz32(refcount_bits);
5779  
5780          if (new_version < 3 && refcount_bits != 16) {
5781              error_setg(errp, "Refcount widths other than 16 bits require "
5782                         "compatibility level 1.1 or above (use compat=1.1 or "
5783                         "greater)");
5784              return -EINVAL;
5785          }
5786  
5787          helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
5788          ret = qcow2_change_refcount_order(bs, refcount_order,
5789                                            &qcow2_amend_helper_cb,
5790                                            &helper_cb_info, errp);
5791          if (ret < 0) {
5792              return ret;
5793          }
5794      }
5795  
5796      /* data-file-raw blocks backing files, so clear it first if requested */
5797      if (data_file_raw) {
5798          s->autoclear_features |= QCOW2_AUTOCLEAR_DATA_FILE_RAW;
5799      } else {
5800          s->autoclear_features &= ~QCOW2_AUTOCLEAR_DATA_FILE_RAW;
5801      }
5802  
5803      if (data_file) {
5804          g_free(s->image_data_file);
5805          s->image_data_file = *data_file ? g_strdup(data_file) : NULL;
5806      }
5807  
5808      ret = qcow2_update_header(bs);
5809      if (ret < 0) {
5810          error_setg_errno(errp, -ret, "Failed to update the image header");
5811          return ret;
5812      }
5813  
5814      if (backing_file || backing_format) {
5815          if (g_strcmp0(backing_file, s->image_backing_file) ||
5816              g_strcmp0(backing_format, s->image_backing_format)) {
5817              error_setg(errp, "Cannot amend the backing file");
5818              error_append_hint(errp,
5819                                "You can use 'qemu-img rebase' instead.\n");
5820              return -EINVAL;
5821          }
5822      }
5823  
5824      if (s->use_lazy_refcounts != lazy_refcounts) {
5825          if (lazy_refcounts) {
5826              if (new_version < 3) {
5827                  error_setg(errp, "Lazy refcounts only supported with "
5828                             "compatibility level 1.1 and above (use compat=1.1 "
5829                             "or greater)");
5830                  return -EINVAL;
5831              }
5832              s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
5833              ret = qcow2_update_header(bs);
5834              if (ret < 0) {
5835                  s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
5836                  error_setg_errno(errp, -ret, "Failed to update the image header");
5837                  return ret;
5838              }
5839              s->use_lazy_refcounts = true;
5840          } else {
5841              /* make image clean first */
5842              ret = qcow2_mark_clean(bs);
5843              if (ret < 0) {
5844                  error_setg_errno(errp, -ret, "Failed to make the image clean");
5845                  return ret;
5846              }
5847              /* now disallow lazy refcounts */
5848              s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
5849              ret = qcow2_update_header(bs);
5850              if (ret < 0) {
5851                  s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
5852                  error_setg_errno(errp, -ret, "Failed to update the image header");
5853                  return ret;
5854              }
5855              s->use_lazy_refcounts = false;
5856          }
5857      }
5858  
5859      if (new_size) {
5860          BlockBackend *blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL,
5861                                              errp);
5862          if (!blk) {
5863              return -EPERM;
5864          }
5865  
5866          /*
5867           * Amending image options should ensure that the image has
5868           * exactly the given new values, so pass exact=true here.
5869           */
5870          ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, 0, errp);
5871          blk_unref(blk);
5872          if (ret < 0) {
5873              return ret;
5874          }
5875      }
5876  
5877      /* Downgrade last (so unsupported features can be removed before) */
5878      if (new_version < old_version) {
5879          helper_cb_info.current_operation = QCOW2_DOWNGRADING;
5880          ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
5881                                &helper_cb_info, errp);
5882          if (ret < 0) {
5883              return ret;
5884          }
5885      }
5886  
5887      return 0;
5888  }
5889  
5890  static int coroutine_fn qcow2_co_amend(BlockDriverState *bs,
5891                                         BlockdevAmendOptions *opts,
5892                                         bool force,
5893                                         Error **errp)
5894  {
5895      BlockdevAmendOptionsQcow2 *qopts = &opts->u.qcow2;
5896      BDRVQcow2State *s = bs->opaque;
5897      int ret = 0;
5898  
5899      if (qopts->encrypt) {
5900          if (!s->crypto) {
5901              error_setg(errp, "image is not encrypted, can't amend");
5902              return -EOPNOTSUPP;
5903          }
5904  
5905          if (qopts->encrypt->format != Q_CRYPTO_BLOCK_FORMAT_LUKS) {
5906              error_setg(errp,
5907                         "Amend can't be used to change the qcow2 encryption format");
5908              return -EOPNOTSUPP;
5909          }
5910  
5911          if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
5912              error_setg(errp,
5913                         "Only LUKS encryption options can be amended for qcow2 with blockdev-amend");
5914              return -EOPNOTSUPP;
5915          }
5916  
5917          ret = qcrypto_block_amend_options(s->crypto,
5918                                            qcow2_crypto_hdr_read_func,
5919                                            qcow2_crypto_hdr_write_func,
5920                                            bs,
5921                                            qopts->encrypt,
5922                                            force,
5923                                            errp);
5924      }
5925      return ret;
5926  }
5927  
5928  /*
5929   * If offset or size are negative, respectively, they will not be included in
5930   * the BLOCK_IMAGE_CORRUPTED event emitted.
5931   * fatal will be ignored for read-only BDS; corruptions found there will always
5932   * be considered non-fatal.
5933   */
5934  void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
5935                               int64_t size, const char *message_format, ...)
5936  {
5937      BDRVQcow2State *s = bs->opaque;
5938      const char *node_name;
5939      char *message;
5940      va_list ap;
5941  
5942      fatal = fatal && bdrv_is_writable(bs);
5943  
5944      if (s->signaled_corruption &&
5945          (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
5946      {
5947          return;
5948      }
5949  
5950      va_start(ap, message_format);
5951      message = g_strdup_vprintf(message_format, ap);
5952      va_end(ap);
5953  
5954      if (fatal) {
5955          fprintf(stderr, "qcow2: Marking image as corrupt: %s; further "
5956                  "corruption events will be suppressed\n", message);
5957      } else {
5958          fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal "
5959                  "corruption events will be suppressed\n", message);
5960      }
5961  
5962      node_name = bdrv_get_node_name(bs);
5963      qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
5964                                            *node_name ? node_name : NULL,
5965                                            message, offset >= 0, offset,
5966                                            size >= 0, size,
5967                                            fatal);
5968      g_free(message);
5969  
5970      if (fatal) {
5971          qcow2_mark_corrupt(bs);
5972          bs->drv = NULL; /* make BDS unusable */
5973      }
5974  
5975      s->signaled_corruption = true;
5976  }
5977  
5978  #define QCOW_COMMON_OPTIONS                                         \
5979      {                                                               \
5980          .name = BLOCK_OPT_SIZE,                                     \
5981          .type = QEMU_OPT_SIZE,                                      \
5982          .help = "Virtual disk size"                                 \
5983      },                                                              \
5984      {                                                               \
5985          .name = BLOCK_OPT_COMPAT_LEVEL,                             \
5986          .type = QEMU_OPT_STRING,                                    \
5987          .help = "Compatibility level (v2 [0.10] or v3 [1.1])"       \
5988      },                                                              \
5989      {                                                               \
5990          .name = BLOCK_OPT_BACKING_FILE,                             \
5991          .type = QEMU_OPT_STRING,                                    \
5992          .help = "File name of a base image"                         \
5993      },                                                              \
5994      {                                                               \
5995          .name = BLOCK_OPT_BACKING_FMT,                              \
5996          .type = QEMU_OPT_STRING,                                    \
5997          .help = "Image format of the base image"                    \
5998      },                                                              \
5999      {                                                               \
6000          .name = BLOCK_OPT_DATA_FILE,                                \
6001          .type = QEMU_OPT_STRING,                                    \
6002          .help = "File name of an external data file"                \
6003      },                                                              \
6004      {                                                               \
6005          .name = BLOCK_OPT_DATA_FILE_RAW,                            \
6006          .type = QEMU_OPT_BOOL,                                      \
6007          .help = "The external data file must stay valid "           \
6008                  "as a raw image"                                    \
6009      },                                                              \
6010      {                                                               \
6011          .name = BLOCK_OPT_LAZY_REFCOUNTS,                           \
6012          .type = QEMU_OPT_BOOL,                                      \
6013          .help = "Postpone refcount updates",                        \
6014          .def_value_str = "off"                                      \
6015      },                                                              \
6016      {                                                               \
6017          .name = BLOCK_OPT_REFCOUNT_BITS,                            \
6018          .type = QEMU_OPT_NUMBER,                                    \
6019          .help = "Width of a reference count entry in bits",         \
6020          .def_value_str = "16"                                       \
6021      }
6022  
6023  static QemuOptsList qcow2_create_opts = {
6024      .name = "qcow2-create-opts",
6025      .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
6026      .desc = {
6027          {                                                               \
6028              .name = BLOCK_OPT_ENCRYPT,                                  \
6029              .type = QEMU_OPT_BOOL,                                      \
6030              .help = "Encrypt the image with format 'aes'. (Deprecated " \
6031                      "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",    \
6032          },                                                              \
6033          {                                                               \
6034              .name = BLOCK_OPT_ENCRYPT_FORMAT,                           \
6035              .type = QEMU_OPT_STRING,                                    \
6036              .help = "Encrypt the image, format choices: 'aes', 'luks'", \
6037          },                                                              \
6038          BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",                     \
6039              "ID of secret providing qcow AES key or LUKS passphrase"),  \
6040          BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."),               \
6041          BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."),              \
6042          BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."),                \
6043          BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."),           \
6044          BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."),                 \
6045          BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."),                \
6046          {                                                               \
6047              .name = BLOCK_OPT_CLUSTER_SIZE,                             \
6048              .type = QEMU_OPT_SIZE,                                      \
6049              .help = "qcow2 cluster size",                               \
6050              .def_value_str = stringify(DEFAULT_CLUSTER_SIZE)            \
6051          },                                                              \
6052          {                                                               \
6053              .name = BLOCK_OPT_EXTL2,                                    \
6054              .type = QEMU_OPT_BOOL,                                      \
6055              .help = "Extended L2 tables",                               \
6056              .def_value_str = "off"                                      \
6057          },                                                              \
6058          {                                                               \
6059              .name = BLOCK_OPT_PREALLOC,                                 \
6060              .type = QEMU_OPT_STRING,                                    \
6061              .help = "Preallocation mode (allowed values: off, "         \
6062                      "metadata, falloc, full)"                           \
6063          },                                                              \
6064          {                                                               \
6065              .name = BLOCK_OPT_COMPRESSION_TYPE,                         \
6066              .type = QEMU_OPT_STRING,                                    \
6067              .help = "Compression method used for image cluster "        \
6068                      "compression",                                      \
6069              .def_value_str = "zlib"                                     \
6070          },
6071          QCOW_COMMON_OPTIONS,
6072          { /* end of list */ }
6073      }
6074  };
6075  
6076  static QemuOptsList qcow2_amend_opts = {
6077      .name = "qcow2-amend-opts",
6078      .head = QTAILQ_HEAD_INITIALIZER(qcow2_amend_opts.head),
6079      .desc = {
6080          BLOCK_CRYPTO_OPT_DEF_LUKS_STATE("encrypt."),
6081          BLOCK_CRYPTO_OPT_DEF_LUKS_KEYSLOT("encrypt."),
6082          BLOCK_CRYPTO_OPT_DEF_LUKS_OLD_SECRET("encrypt."),
6083          BLOCK_CRYPTO_OPT_DEF_LUKS_NEW_SECRET("encrypt."),
6084          BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."),
6085          QCOW_COMMON_OPTIONS,
6086          { /* end of list */ }
6087      }
6088  };
6089  
6090  static const char *const qcow2_strong_runtime_opts[] = {
6091      "encrypt." BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET,
6092  
6093      NULL
6094  };
6095  
6096  BlockDriver bdrv_qcow2 = {
6097      .format_name        = "qcow2",
6098      .instance_size      = sizeof(BDRVQcow2State),
6099      .bdrv_probe         = qcow2_probe,
6100      .bdrv_open          = qcow2_open,
6101      .bdrv_close         = qcow2_close,
6102      .bdrv_reopen_prepare  = qcow2_reopen_prepare,
6103      .bdrv_reopen_commit   = qcow2_reopen_commit,
6104      .bdrv_reopen_commit_post = qcow2_reopen_commit_post,
6105      .bdrv_reopen_abort    = qcow2_reopen_abort,
6106      .bdrv_join_options    = qcow2_join_options,
6107      .bdrv_child_perm      = bdrv_default_perms,
6108      .bdrv_co_create_opts  = qcow2_co_create_opts,
6109      .bdrv_co_create       = qcow2_co_create,
6110      .bdrv_has_zero_init   = qcow2_has_zero_init,
6111      .bdrv_co_block_status = qcow2_co_block_status,
6112  
6113      .bdrv_co_preadv_part    = qcow2_co_preadv_part,
6114      .bdrv_co_pwritev_part   = qcow2_co_pwritev_part,
6115      .bdrv_co_flush_to_os    = qcow2_co_flush_to_os,
6116  
6117      .bdrv_co_pwrite_zeroes  = qcow2_co_pwrite_zeroes,
6118      .bdrv_co_pdiscard       = qcow2_co_pdiscard,
6119      .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
6120      .bdrv_co_copy_range_to  = qcow2_co_copy_range_to,
6121      .bdrv_co_truncate       = qcow2_co_truncate,
6122      .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part,
6123      .bdrv_make_empty        = qcow2_make_empty,
6124  
6125      .bdrv_snapshot_create   = qcow2_snapshot_create,
6126      .bdrv_snapshot_goto     = qcow2_snapshot_goto,
6127      .bdrv_snapshot_delete   = qcow2_snapshot_delete,
6128      .bdrv_snapshot_list     = qcow2_snapshot_list,
6129      .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
6130      .bdrv_measure           = qcow2_measure,
6131      .bdrv_co_get_info       = qcow2_co_get_info,
6132      .bdrv_get_specific_info = qcow2_get_specific_info,
6133  
6134      .bdrv_co_save_vmstate   = qcow2_co_save_vmstate,
6135      .bdrv_co_load_vmstate   = qcow2_co_load_vmstate,
6136  
6137      .is_format                  = true,
6138      .supports_backing           = true,
6139      .bdrv_change_backing_file   = qcow2_change_backing_file,
6140  
6141      .bdrv_refresh_limits        = qcow2_refresh_limits,
6142      .bdrv_co_invalidate_cache   = qcow2_co_invalidate_cache,
6143      .bdrv_inactivate            = qcow2_inactivate,
6144  
6145      .create_opts         = &qcow2_create_opts,
6146      .amend_opts          = &qcow2_amend_opts,
6147      .strong_runtime_opts = qcow2_strong_runtime_opts,
6148      .mutable_opts        = mutable_opts,
6149      .bdrv_co_check       = qcow2_co_check,
6150      .bdrv_amend_options  = qcow2_amend_options,
6151      .bdrv_co_amend       = qcow2_co_amend,
6152  
6153      .bdrv_detach_aio_context  = qcow2_detach_aio_context,
6154      .bdrv_attach_aio_context  = qcow2_attach_aio_context,
6155  
6156      .bdrv_supports_persistent_dirty_bitmap =
6157              qcow2_supports_persistent_dirty_bitmap,
6158      .bdrv_co_can_store_new_dirty_bitmap = qcow2_co_can_store_new_dirty_bitmap,
6159      .bdrv_co_remove_persistent_dirty_bitmap =
6160              qcow2_co_remove_persistent_dirty_bitmap,
6161  };
6162  
6163  static void bdrv_qcow2_init(void)
6164  {
6165      bdrv_register(&bdrv_qcow2);
6166  }
6167  
6168  block_init(bdrv_qcow2_init);
6169