xref: /openbmc/qemu/block.c (revision 48805df9c22a0700fba4b3b548fafaa21726ca68)
1  /*
2   * QEMU System Emulator block driver
3   *
4   * Copyright (c) 2003 Fabrice Bellard
5   * Copyright (c) 2020 Virtuozzo International GmbH.
6   *
7   * Permission is hereby granted, free of charge, to any person obtaining a copy
8   * of this software and associated documentation files (the "Software"), to deal
9   * in the Software without restriction, including without limitation the rights
10   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11   * copies of the Software, and to permit persons to whom the Software is
12   * furnished to do so, subject to the following conditions:
13   *
14   * The above copyright notice and this permission notice shall be included in
15   * all copies or substantial portions of the Software.
16   *
17   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23   * THE SOFTWARE.
24   */
25  
26  #include "qemu/osdep.h"
27  #include "block/trace.h"
28  #include "block/block_int.h"
29  #include "block/blockjob.h"
30  #include "block/dirty-bitmap.h"
31  #include "block/fuse.h"
32  #include "block/nbd.h"
33  #include "block/qdict.h"
34  #include "qemu/error-report.h"
35  #include "block/module_block.h"
36  #include "qemu/main-loop.h"
37  #include "qemu/module.h"
38  #include "qapi/error.h"
39  #include "qapi/qmp/qdict.h"
40  #include "qapi/qmp/qjson.h"
41  #include "qapi/qmp/qnull.h"
42  #include "qapi/qmp/qstring.h"
43  #include "qapi/qobject-output-visitor.h"
44  #include "qapi/qapi-visit-block-core.h"
45  #include "sysemu/block-backend.h"
46  #include "qemu/notify.h"
47  #include "qemu/option.h"
48  #include "qemu/coroutine.h"
49  #include "block/qapi.h"
50  #include "qemu/timer.h"
51  #include "qemu/cutils.h"
52  #include "qemu/id.h"
53  #include "qemu/range.h"
54  #include "qemu/rcu.h"
55  #include "block/coroutines.h"
56  
57  #ifdef CONFIG_BSD
58  #include <sys/ioctl.h>
59  #include <sys/queue.h>
60  #if defined(HAVE_SYS_DISK_H)
61  #include <sys/disk.h>
62  #endif
63  #endif
64  
65  #ifdef _WIN32
66  #include <windows.h>
67  #endif
68  
69  #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
70  
71  /* Protected by BQL */
72  static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
73      QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
74  
75  /* Protected by BQL */
76  static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
77      QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
78  
79  /* Protected by BQL */
80  static QLIST_HEAD(, BlockDriver) bdrv_drivers =
81      QLIST_HEAD_INITIALIZER(bdrv_drivers);
82  
83  static BlockDriverState *bdrv_open_inherit(const char *filename,
84                                             const char *reference,
85                                             QDict *options, int flags,
86                                             BlockDriverState *parent,
87                                             const BdrvChildClass *child_class,
88                                             BdrvChildRole child_role,
89                                             Error **errp);
90  
91  static bool bdrv_recurse_has_child(BlockDriverState *bs,
92                                     BlockDriverState *child);
93  
94  static void bdrv_replace_child_noperm(BdrvChild *child,
95                                        BlockDriverState *new_bs);
96  static void bdrv_remove_child(BdrvChild *child, Transaction *tran);
97  
98  static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
99                                 BlockReopenQueue *queue,
100                                 Transaction *change_child_tran, Error **errp);
101  static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
102  static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
103  
104  static bool bdrv_backing_overridden(BlockDriverState *bs);
105  
106  static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
107                                      GHashTable *visited, Transaction *tran,
108                                      Error **errp);
109  
110  /* If non-zero, use only whitelisted block drivers */
111  static int use_bdrv_whitelist;
112  
113  #ifdef _WIN32
114  static int is_windows_drive_prefix(const char *filename)
115  {
116      return (((filename[0] >= 'a' && filename[0] <= 'z') ||
117               (filename[0] >= 'A' && filename[0] <= 'Z')) &&
118              filename[1] == ':');
119  }
120  
121  int is_windows_drive(const char *filename)
122  {
123      if (is_windows_drive_prefix(filename) &&
124          filename[2] == '\0')
125          return 1;
126      if (strstart(filename, "\\\\.\\", NULL) ||
127          strstart(filename, "//./", NULL))
128          return 1;
129      return 0;
130  }
131  #endif
132  
133  size_t bdrv_opt_mem_align(BlockDriverState *bs)
134  {
135      if (!bs || !bs->drv) {
136          /* page size or 4k (hdd sector size) should be on the safe side */
137          return MAX(4096, qemu_real_host_page_size());
138      }
139      IO_CODE();
140  
141      return bs->bl.opt_mem_alignment;
142  }
143  
144  size_t bdrv_min_mem_align(BlockDriverState *bs)
145  {
146      if (!bs || !bs->drv) {
147          /* page size or 4k (hdd sector size) should be on the safe side */
148          return MAX(4096, qemu_real_host_page_size());
149      }
150      IO_CODE();
151  
152      return bs->bl.min_mem_alignment;
153  }
154  
155  /* check if the path starts with "<protocol>:" */
156  int path_has_protocol(const char *path)
157  {
158      const char *p;
159  
160  #ifdef _WIN32
161      if (is_windows_drive(path) ||
162          is_windows_drive_prefix(path)) {
163          return 0;
164      }
165      p = path + strcspn(path, ":/\\");
166  #else
167      p = path + strcspn(path, ":/");
168  #endif
169  
170      return *p == ':';
171  }
172  
173  int path_is_absolute(const char *path)
174  {
175  #ifdef _WIN32
176      /* specific case for names like: "\\.\d:" */
177      if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
178          return 1;
179      }
180      return (*path == '/' || *path == '\\');
181  #else
182      return (*path == '/');
183  #endif
184  }
185  
186  /* if filename is absolute, just return its duplicate. Otherwise, build a
187     path to it by considering it is relative to base_path. URL are
188     supported. */
189  char *path_combine(const char *base_path, const char *filename)
190  {
191      const char *protocol_stripped = NULL;
192      const char *p, *p1;
193      char *result;
194      int len;
195  
196      if (path_is_absolute(filename)) {
197          return g_strdup(filename);
198      }
199  
200      if (path_has_protocol(base_path)) {
201          protocol_stripped = strchr(base_path, ':');
202          if (protocol_stripped) {
203              protocol_stripped++;
204          }
205      }
206      p = protocol_stripped ?: base_path;
207  
208      p1 = strrchr(base_path, '/');
209  #ifdef _WIN32
210      {
211          const char *p2;
212          p2 = strrchr(base_path, '\\');
213          if (!p1 || p2 > p1) {
214              p1 = p2;
215          }
216      }
217  #endif
218      if (p1) {
219          p1++;
220      } else {
221          p1 = base_path;
222      }
223      if (p1 > p) {
224          p = p1;
225      }
226      len = p - base_path;
227  
228      result = g_malloc(len + strlen(filename) + 1);
229      memcpy(result, base_path, len);
230      strcpy(result + len, filename);
231  
232      return result;
233  }
234  
235  /*
236   * Helper function for bdrv_parse_filename() implementations to remove optional
237   * protocol prefixes (especially "file:") from a filename and for putting the
238   * stripped filename into the options QDict if there is such a prefix.
239   */
240  void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
241                                        QDict *options)
242  {
243      if (strstart(filename, prefix, &filename)) {
244          /* Stripping the explicit protocol prefix may result in a protocol
245           * prefix being (wrongly) detected (if the filename contains a colon) */
246          if (path_has_protocol(filename)) {
247              GString *fat_filename;
248  
249              /* This means there is some colon before the first slash; therefore,
250               * this cannot be an absolute path */
251              assert(!path_is_absolute(filename));
252  
253              /* And we can thus fix the protocol detection issue by prefixing it
254               * by "./" */
255              fat_filename = g_string_new("./");
256              g_string_append(fat_filename, filename);
257  
258              assert(!path_has_protocol(fat_filename->str));
259  
260              qdict_put(options, "filename",
261                        qstring_from_gstring(fat_filename));
262          } else {
263              /* If no protocol prefix was detected, we can use the shortened
264               * filename as-is */
265              qdict_put_str(options, "filename", filename);
266          }
267      }
268  }
269  
270  
271  /* Returns whether the image file is opened as read-only. Note that this can
272   * return false and writing to the image file is still not possible because the
273   * image is inactivated. */
274  bool bdrv_is_read_only(BlockDriverState *bs)
275  {
276      IO_CODE();
277      return !(bs->open_flags & BDRV_O_RDWR);
278  }
279  
280  static int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
281                                    bool ignore_allow_rdw, Error **errp)
282  {
283      IO_CODE();
284  
285      /* Do not set read_only if copy_on_read is enabled */
286      if (bs->copy_on_read && read_only) {
287          error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
288                     bdrv_get_device_or_node_name(bs));
289          return -EINVAL;
290      }
291  
292      /* Do not clear read_only if it is prohibited */
293      if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) &&
294          !ignore_allow_rdw)
295      {
296          error_setg(errp, "Node '%s' is read only",
297                     bdrv_get_device_or_node_name(bs));
298          return -EPERM;
299      }
300  
301      return 0;
302  }
303  
304  /*
305   * Called by a driver that can only provide a read-only image.
306   *
307   * Returns 0 if the node is already read-only or it could switch the node to
308   * read-only because BDRV_O_AUTO_RDONLY is set.
309   *
310   * Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set
311   * or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg
312   * is not NULL, it is used as the error message for the Error object.
313   */
314  int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
315                                Error **errp)
316  {
317      int ret = 0;
318      IO_CODE();
319  
320      if (!(bs->open_flags & BDRV_O_RDWR)) {
321          return 0;
322      }
323      if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) {
324          goto fail;
325      }
326  
327      ret = bdrv_can_set_read_only(bs, true, false, NULL);
328      if (ret < 0) {
329          goto fail;
330      }
331  
332      bs->open_flags &= ~BDRV_O_RDWR;
333  
334      return 0;
335  
336  fail:
337      error_setg(errp, "%s", errmsg ?: "Image is read-only");
338      return -EACCES;
339  }
340  
341  /*
342   * If @backing is empty, this function returns NULL without setting
343   * @errp.  In all other cases, NULL will only be returned with @errp
344   * set.
345   *
346   * Therefore, a return value of NULL without @errp set means that
347   * there is no backing file; if @errp is set, there is one but its
348   * absolute filename cannot be generated.
349   */
350  char *bdrv_get_full_backing_filename_from_filename(const char *backed,
351                                                     const char *backing,
352                                                     Error **errp)
353  {
354      if (backing[0] == '\0') {
355          return NULL;
356      } else if (path_has_protocol(backing) || path_is_absolute(backing)) {
357          return g_strdup(backing);
358      } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
359          error_setg(errp, "Cannot use relative backing file names for '%s'",
360                     backed);
361          return NULL;
362      } else {
363          return path_combine(backed, backing);
364      }
365  }
366  
367  /*
368   * If @filename is empty or NULL, this function returns NULL without
369   * setting @errp.  In all other cases, NULL will only be returned with
370   * @errp set.
371   */
372  static char *bdrv_make_absolute_filename(BlockDriverState *relative_to,
373                                           const char *filename, Error **errp)
374  {
375      char *dir, *full_name;
376  
377      if (!filename || filename[0] == '\0') {
378          return NULL;
379      } else if (path_has_protocol(filename) || path_is_absolute(filename)) {
380          return g_strdup(filename);
381      }
382  
383      dir = bdrv_dirname(relative_to, errp);
384      if (!dir) {
385          return NULL;
386      }
387  
388      full_name = g_strconcat(dir, filename, NULL);
389      g_free(dir);
390      return full_name;
391  }
392  
393  char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp)
394  {
395      GLOBAL_STATE_CODE();
396      return bdrv_make_absolute_filename(bs, bs->backing_file, errp);
397  }
398  
399  void bdrv_register(BlockDriver *bdrv)
400  {
401      assert(bdrv->format_name);
402      GLOBAL_STATE_CODE();
403      QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
404  }
405  
406  BlockDriverState *bdrv_new(void)
407  {
408      BlockDriverState *bs;
409      int i;
410  
411      GLOBAL_STATE_CODE();
412  
413      bs = g_new0(BlockDriverState, 1);
414      QLIST_INIT(&bs->dirty_bitmaps);
415      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
416          QLIST_INIT(&bs->op_blockers[i]);
417      }
418      qemu_co_mutex_init(&bs->reqs_lock);
419      qemu_mutex_init(&bs->dirty_bitmap_mutex);
420      bs->refcnt = 1;
421      bs->aio_context = qemu_get_aio_context();
422  
423      qemu_co_queue_init(&bs->flush_queue);
424  
425      qemu_co_mutex_init(&bs->bsc_modify_lock);
426      bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1);
427  
428      for (i = 0; i < bdrv_drain_all_count; i++) {
429          bdrv_drained_begin(bs);
430      }
431  
432      QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
433  
434      return bs;
435  }
436  
437  static BlockDriver *bdrv_do_find_format(const char *format_name)
438  {
439      BlockDriver *drv1;
440      GLOBAL_STATE_CODE();
441  
442      QLIST_FOREACH(drv1, &bdrv_drivers, list) {
443          if (!strcmp(drv1->format_name, format_name)) {
444              return drv1;
445          }
446      }
447  
448      return NULL;
449  }
450  
451  BlockDriver *bdrv_find_format(const char *format_name)
452  {
453      BlockDriver *drv1;
454      int i;
455  
456      GLOBAL_STATE_CODE();
457  
458      drv1 = bdrv_do_find_format(format_name);
459      if (drv1) {
460          return drv1;
461      }
462  
463      /* The driver isn't registered, maybe we need to load a module */
464      for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
465          if (!strcmp(block_driver_modules[i].format_name, format_name)) {
466              Error *local_err = NULL;
467              int rv = block_module_load(block_driver_modules[i].library_name,
468                                         &local_err);
469              if (rv > 0) {
470                  return bdrv_do_find_format(format_name);
471              } else if (rv < 0) {
472                  error_report_err(local_err);
473              }
474              break;
475          }
476      }
477      return NULL;
478  }
479  
480  static int bdrv_format_is_whitelisted(const char *format_name, bool read_only)
481  {
482      static const char *whitelist_rw[] = {
483          CONFIG_BDRV_RW_WHITELIST
484          NULL
485      };
486      static const char *whitelist_ro[] = {
487          CONFIG_BDRV_RO_WHITELIST
488          NULL
489      };
490      const char **p;
491  
492      if (!whitelist_rw[0] && !whitelist_ro[0]) {
493          return 1;               /* no whitelist, anything goes */
494      }
495  
496      for (p = whitelist_rw; *p; p++) {
497          if (!strcmp(format_name, *p)) {
498              return 1;
499          }
500      }
501      if (read_only) {
502          for (p = whitelist_ro; *p; p++) {
503              if (!strcmp(format_name, *p)) {
504                  return 1;
505              }
506          }
507      }
508      return 0;
509  }
510  
511  int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
512  {
513      GLOBAL_STATE_CODE();
514      return bdrv_format_is_whitelisted(drv->format_name, read_only);
515  }
516  
517  bool bdrv_uses_whitelist(void)
518  {
519      return use_bdrv_whitelist;
520  }
521  
522  typedef struct CreateCo {
523      BlockDriver *drv;
524      char *filename;
525      QemuOpts *opts;
526      int ret;
527      Error *err;
528  } CreateCo;
529  
530  int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
531                                  QemuOpts *opts, Error **errp)
532  {
533      int ret;
534      GLOBAL_STATE_CODE();
535      ERRP_GUARD();
536      assert_bdrv_graph_readable();
537  
538      if (!drv->bdrv_co_create_opts) {
539          error_setg(errp, "Driver '%s' does not support image creation",
540                     drv->format_name);
541          return -ENOTSUP;
542      }
543  
544      ret = drv->bdrv_co_create_opts(drv, filename, opts, errp);
545      if (ret < 0 && !*errp) {
546          error_setg_errno(errp, -ret, "Could not create image");
547      }
548  
549      return ret;
550  }
551  
552  /**
553   * Helper function for bdrv_create_file_fallback(): Resize @blk to at
554   * least the given @minimum_size.
555   *
556   * On success, return @blk's actual length.
557   * Otherwise, return -errno.
558   */
559  static int64_t create_file_fallback_truncate(BlockBackend *blk,
560                                               int64_t minimum_size, Error **errp)
561  {
562      Error *local_err = NULL;
563      int64_t size;
564      int ret;
565  
566      GLOBAL_STATE_CODE();
567  
568      ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
569                         &local_err);
570      if (ret < 0 && ret != -ENOTSUP) {
571          error_propagate(errp, local_err);
572          return ret;
573      }
574  
575      size = blk_getlength(blk);
576      if (size < 0) {
577          error_free(local_err);
578          error_setg_errno(errp, -size,
579                           "Failed to inquire the new image file's length");
580          return size;
581      }
582  
583      if (size < minimum_size) {
584          /* Need to grow the image, but we failed to do that */
585          error_propagate(errp, local_err);
586          return -ENOTSUP;
587      }
588  
589      error_free(local_err);
590      local_err = NULL;
591  
592      return size;
593  }
594  
595  /**
596   * Helper function for bdrv_create_file_fallback(): Zero the first
597   * sector to remove any potentially pre-existing image header.
598   */
599  static int coroutine_fn
600  create_file_fallback_zero_first_sector(BlockBackend *blk,
601                                         int64_t current_size,
602                                         Error **errp)
603  {
604      int64_t bytes_to_clear;
605      int ret;
606  
607      GLOBAL_STATE_CODE();
608  
609      bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE);
610      if (bytes_to_clear) {
611          ret = blk_co_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP);
612          if (ret < 0) {
613              error_setg_errno(errp, -ret,
614                               "Failed to clear the new image's first sector");
615              return ret;
616          }
617      }
618  
619      return 0;
620  }
621  
622  /**
623   * Simple implementation of bdrv_co_create_opts for protocol drivers
624   * which only support creation via opening a file
625   * (usually existing raw storage device)
626   */
627  int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
628                                              const char *filename,
629                                              QemuOpts *opts,
630                                              Error **errp)
631  {
632      BlockBackend *blk;
633      QDict *options;
634      int64_t size = 0;
635      char *buf = NULL;
636      PreallocMode prealloc;
637      Error *local_err = NULL;
638      int ret;
639  
640      GLOBAL_STATE_CODE();
641  
642      size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
643      buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
644      prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
645                                 PREALLOC_MODE_OFF, &local_err);
646      g_free(buf);
647      if (local_err) {
648          error_propagate(errp, local_err);
649          return -EINVAL;
650      }
651  
652      if (prealloc != PREALLOC_MODE_OFF) {
653          error_setg(errp, "Unsupported preallocation mode '%s'",
654                     PreallocMode_str(prealloc));
655          return -ENOTSUP;
656      }
657  
658      options = qdict_new();
659      qdict_put_str(options, "driver", drv->format_name);
660  
661      blk = blk_co_new_open(filename, NULL, options,
662                            BDRV_O_RDWR | BDRV_O_RESIZE, errp);
663      if (!blk) {
664          error_prepend(errp, "Protocol driver '%s' does not support image "
665                        "creation, and opening the image failed: ",
666                        drv->format_name);
667          return -EINVAL;
668      }
669  
670      size = create_file_fallback_truncate(blk, size, errp);
671      if (size < 0) {
672          ret = size;
673          goto out;
674      }
675  
676      ret = create_file_fallback_zero_first_sector(blk, size, errp);
677      if (ret < 0) {
678          goto out;
679      }
680  
681      ret = 0;
682  out:
683      blk_unref(blk);
684      return ret;
685  }
686  
687  int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts,
688                                       Error **errp)
689  {
690      QemuOpts *protocol_opts;
691      BlockDriver *drv;
692      QDict *qdict;
693      int ret;
694  
695      GLOBAL_STATE_CODE();
696  
697      drv = bdrv_find_protocol(filename, true, errp);
698      if (drv == NULL) {
699          return -ENOENT;
700      }
701  
702      if (!drv->create_opts) {
703          error_setg(errp, "Driver '%s' does not support image creation",
704                     drv->format_name);
705          return -ENOTSUP;
706      }
707  
708      /*
709       * 'opts' contains a QemuOptsList with a combination of format and protocol
710       * default values.
711       *
712       * The format properly removes its options, but the default values remain
713       * in 'opts->list'.  So if the protocol has options with the same name
714       * (e.g. rbd has 'cluster_size' as qcow2), it will see the default values
715       * of the format, since for overlapping options, the format wins.
716       *
717       * To avoid this issue, lets convert QemuOpts to QDict, in this way we take
718       * only the set options, and then convert it back to QemuOpts, using the
719       * create_opts of the protocol. So the new QemuOpts, will contain only the
720       * protocol defaults.
721       */
722      qdict = qemu_opts_to_qdict(opts, NULL);
723      protocol_opts = qemu_opts_from_qdict(drv->create_opts, qdict, errp);
724      if (protocol_opts == NULL) {
725          ret = -EINVAL;
726          goto out;
727      }
728  
729      ret = bdrv_co_create(drv, filename, protocol_opts, errp);
730  out:
731      qemu_opts_del(protocol_opts);
732      qobject_unref(qdict);
733      return ret;
734  }
735  
736  int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
737  {
738      Error *local_err = NULL;
739      int ret;
740  
741      IO_CODE();
742      assert(bs != NULL);
743      assert_bdrv_graph_readable();
744  
745      if (!bs->drv) {
746          error_setg(errp, "Block node '%s' is not opened", bs->filename);
747          return -ENOMEDIUM;
748      }
749  
750      if (!bs->drv->bdrv_co_delete_file) {
751          error_setg(errp, "Driver '%s' does not support image deletion",
752                     bs->drv->format_name);
753          return -ENOTSUP;
754      }
755  
756      ret = bs->drv->bdrv_co_delete_file(bs, &local_err);
757      if (ret < 0) {
758          error_propagate(errp, local_err);
759      }
760  
761      return ret;
762  }
763  
764  void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs)
765  {
766      Error *local_err = NULL;
767      int ret;
768      IO_CODE();
769  
770      if (!bs) {
771          return;
772      }
773  
774      ret = bdrv_co_delete_file(bs, &local_err);
775      /*
776       * ENOTSUP will happen if the block driver doesn't support
777       * the 'bdrv_co_delete_file' interface. This is a predictable
778       * scenario and shouldn't be reported back to the user.
779       */
780      if (ret == -ENOTSUP) {
781          error_free(local_err);
782      } else if (ret < 0) {
783          error_report_err(local_err);
784      }
785  }
786  
787  /**
788   * Try to get @bs's logical and physical block size.
789   * On success, store them in @bsz struct and return 0.
790   * On failure return -errno.
791   * @bs must not be empty.
792   */
793  int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
794  {
795      BlockDriver *drv = bs->drv;
796      BlockDriverState *filtered = bdrv_filter_bs(bs);
797      GLOBAL_STATE_CODE();
798  
799      if (drv && drv->bdrv_probe_blocksizes) {
800          return drv->bdrv_probe_blocksizes(bs, bsz);
801      } else if (filtered) {
802          return bdrv_probe_blocksizes(filtered, bsz);
803      }
804  
805      return -ENOTSUP;
806  }
807  
808  /**
809   * Try to get @bs's geometry (cyls, heads, sectors).
810   * On success, store them in @geo struct and return 0.
811   * On failure return -errno.
812   * @bs must not be empty.
813   */
814  int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
815  {
816      BlockDriver *drv = bs->drv;
817      BlockDriverState *filtered = bdrv_filter_bs(bs);
818      GLOBAL_STATE_CODE();
819  
820      if (drv && drv->bdrv_probe_geometry) {
821          return drv->bdrv_probe_geometry(bs, geo);
822      } else if (filtered) {
823          return bdrv_probe_geometry(filtered, geo);
824      }
825  
826      return -ENOTSUP;
827  }
828  
829  /*
830   * Create a uniquely-named empty temporary file.
831   * Return the actual file name used upon success, otherwise NULL.
832   * This string should be freed with g_free() when not needed any longer.
833   *
834   * Note: creating a temporary file for the caller to (re)open is
835   * inherently racy. Use g_file_open_tmp() instead whenever practical.
836   */
837  char *create_tmp_file(Error **errp)
838  {
839      int fd;
840      const char *tmpdir;
841      g_autofree char *filename = NULL;
842  
843      tmpdir = g_get_tmp_dir();
844  #ifndef _WIN32
845      /*
846       * See commit 69bef79 ("block: use /var/tmp instead of /tmp for -snapshot")
847       *
848       * This function is used to create temporary disk images (like -snapshot),
849       * so the files can become very large. /tmp is often a tmpfs where as
850       * /var/tmp is usually on a disk, so more appropriate for disk images.
851       */
852      if (!g_strcmp0(tmpdir, "/tmp")) {
853          tmpdir = "/var/tmp";
854      }
855  #endif
856  
857      filename = g_strdup_printf("%s/vl.XXXXXX", tmpdir);
858      fd = g_mkstemp(filename);
859      if (fd < 0) {
860          error_setg_errno(errp, errno, "Could not open temporary file '%s'",
861                           filename);
862          return NULL;
863      }
864      close(fd);
865  
866      return g_steal_pointer(&filename);
867  }
868  
869  /*
870   * Detect host devices. By convention, /dev/cdrom[N] is always
871   * recognized as a host CDROM.
872   */
873  static BlockDriver *find_hdev_driver(const char *filename)
874  {
875      int score_max = 0, score;
876      BlockDriver *drv = NULL, *d;
877      GLOBAL_STATE_CODE();
878  
879      QLIST_FOREACH(d, &bdrv_drivers, list) {
880          if (d->bdrv_probe_device) {
881              score = d->bdrv_probe_device(filename);
882              if (score > score_max) {
883                  score_max = score;
884                  drv = d;
885              }
886          }
887      }
888  
889      return drv;
890  }
891  
892  static BlockDriver *bdrv_do_find_protocol(const char *protocol)
893  {
894      BlockDriver *drv1;
895      GLOBAL_STATE_CODE();
896  
897      QLIST_FOREACH(drv1, &bdrv_drivers, list) {
898          if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
899              return drv1;
900          }
901      }
902  
903      return NULL;
904  }
905  
906  BlockDriver *bdrv_find_protocol(const char *filename,
907                                  bool allow_protocol_prefix,
908                                  Error **errp)
909  {
910      BlockDriver *drv1;
911      char protocol[128];
912      int len;
913      const char *p;
914      int i;
915  
916      GLOBAL_STATE_CODE();
917      /* TODO Drivers without bdrv_file_open must be specified explicitly */
918  
919      /*
920       * XXX(hch): we really should not let host device detection
921       * override an explicit protocol specification, but moving this
922       * later breaks access to device names with colons in them.
923       * Thanks to the brain-dead persistent naming schemes on udev-
924       * based Linux systems those actually are quite common.
925       */
926      drv1 = find_hdev_driver(filename);
927      if (drv1) {
928          return drv1;
929      }
930  
931      if (!path_has_protocol(filename) || !allow_protocol_prefix) {
932          return &bdrv_file;
933      }
934  
935      p = strchr(filename, ':');
936      assert(p != NULL);
937      len = p - filename;
938      if (len > sizeof(protocol) - 1)
939          len = sizeof(protocol) - 1;
940      memcpy(protocol, filename, len);
941      protocol[len] = '\0';
942  
943      drv1 = bdrv_do_find_protocol(protocol);
944      if (drv1) {
945          return drv1;
946      }
947  
948      for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
949          if (block_driver_modules[i].protocol_name &&
950              !strcmp(block_driver_modules[i].protocol_name, protocol)) {
951              int rv = block_module_load(block_driver_modules[i].library_name, errp);
952              if (rv > 0) {
953                  drv1 = bdrv_do_find_protocol(protocol);
954              } else if (rv < 0) {
955                  return NULL;
956              }
957              break;
958          }
959      }
960  
961      if (!drv1) {
962          error_setg(errp, "Unknown protocol '%s'", protocol);
963      }
964      return drv1;
965  }
966  
967  /*
968   * Guess image format by probing its contents.
969   * This is not a good idea when your image is raw (CVE-2008-2004), but
970   * we do it anyway for backward compatibility.
971   *
972   * @buf         contains the image's first @buf_size bytes.
973   * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
974   *              but can be smaller if the image file is smaller)
975   * @filename    is its filename.
976   *
977   * For all block drivers, call the bdrv_probe() method to get its
978   * probing score.
979   * Return the first block driver with the highest probing score.
980   */
981  BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
982                              const char *filename)
983  {
984      int score_max = 0, score;
985      BlockDriver *drv = NULL, *d;
986      IO_CODE();
987  
988      QLIST_FOREACH(d, &bdrv_drivers, list) {
989          if (d->bdrv_probe) {
990              score = d->bdrv_probe(buf, buf_size, filename);
991              if (score > score_max) {
992                  score_max = score;
993                  drv = d;
994              }
995          }
996      }
997  
998      return drv;
999  }
1000  
1001  static int find_image_format(BlockBackend *file, const char *filename,
1002                               BlockDriver **pdrv, Error **errp)
1003  {
1004      BlockDriver *drv;
1005      uint8_t buf[BLOCK_PROBE_BUF_SIZE];
1006      int ret = 0;
1007  
1008      GLOBAL_STATE_CODE();
1009  
1010      /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
1011      if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
1012          *pdrv = &bdrv_raw;
1013          return ret;
1014      }
1015  
1016      ret = blk_pread(file, 0, sizeof(buf), buf, 0);
1017      if (ret < 0) {
1018          error_setg_errno(errp, -ret, "Could not read image for determining its "
1019                           "format");
1020          *pdrv = NULL;
1021          return ret;
1022      }
1023  
1024      drv = bdrv_probe_all(buf, sizeof(buf), filename);
1025      if (!drv) {
1026          error_setg(errp, "Could not determine image format: No compatible "
1027                     "driver found");
1028          *pdrv = NULL;
1029          return -ENOENT;
1030      }
1031  
1032      *pdrv = drv;
1033      return 0;
1034  }
1035  
1036  /**
1037   * Set the current 'total_sectors' value
1038   * Return 0 on success, -errno on error.
1039   */
1040  int coroutine_fn bdrv_co_refresh_total_sectors(BlockDriverState *bs,
1041                                                 int64_t hint)
1042  {
1043      BlockDriver *drv = bs->drv;
1044      IO_CODE();
1045      assert_bdrv_graph_readable();
1046  
1047      if (!drv) {
1048          return -ENOMEDIUM;
1049      }
1050  
1051      /* Do not attempt drv->bdrv_co_getlength() on scsi-generic devices */
1052      if (bdrv_is_sg(bs))
1053          return 0;
1054  
1055      /* query actual device if possible, otherwise just trust the hint */
1056      if (drv->bdrv_co_getlength) {
1057          int64_t length = drv->bdrv_co_getlength(bs);
1058          if (length < 0) {
1059              return length;
1060          }
1061          hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
1062      }
1063  
1064      bs->total_sectors = hint;
1065  
1066      if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) {
1067          return -EFBIG;
1068      }
1069  
1070      return 0;
1071  }
1072  
1073  /**
1074   * Combines a QDict of new block driver @options with any missing options taken
1075   * from @old_options, so that leaving out an option defaults to its old value.
1076   */
1077  static void bdrv_join_options(BlockDriverState *bs, QDict *options,
1078                                QDict *old_options)
1079  {
1080      GLOBAL_STATE_CODE();
1081      if (bs->drv && bs->drv->bdrv_join_options) {
1082          bs->drv->bdrv_join_options(options, old_options);
1083      } else {
1084          qdict_join(options, old_options, false);
1085      }
1086  }
1087  
1088  static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts,
1089                                                              int open_flags,
1090                                                              Error **errp)
1091  {
1092      Error *local_err = NULL;
1093      char *value = qemu_opt_get_del(opts, "detect-zeroes");
1094      BlockdevDetectZeroesOptions detect_zeroes =
1095          qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value,
1096                          BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err);
1097      GLOBAL_STATE_CODE();
1098      g_free(value);
1099      if (local_err) {
1100          error_propagate(errp, local_err);
1101          return detect_zeroes;
1102      }
1103  
1104      if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
1105          !(open_flags & BDRV_O_UNMAP))
1106      {
1107          error_setg(errp, "setting detect-zeroes to unmap is not allowed "
1108                     "without setting discard operation to unmap");
1109      }
1110  
1111      return detect_zeroes;
1112  }
1113  
1114  /**
1115   * Set open flags for aio engine
1116   *
1117   * Return 0 on success, -1 if the engine specified is invalid
1118   */
1119  int bdrv_parse_aio(const char *mode, int *flags)
1120  {
1121      if (!strcmp(mode, "threads")) {
1122          /* do nothing, default */
1123      } else if (!strcmp(mode, "native")) {
1124          *flags |= BDRV_O_NATIVE_AIO;
1125  #ifdef CONFIG_LINUX_IO_URING
1126      } else if (!strcmp(mode, "io_uring")) {
1127          *flags |= BDRV_O_IO_URING;
1128  #endif
1129      } else {
1130          return -1;
1131      }
1132  
1133      return 0;
1134  }
1135  
1136  /**
1137   * Set open flags for a given discard mode
1138   *
1139   * Return 0 on success, -1 if the discard mode was invalid.
1140   */
1141  int bdrv_parse_discard_flags(const char *mode, int *flags)
1142  {
1143      *flags &= ~BDRV_O_UNMAP;
1144  
1145      if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
1146          /* do nothing */
1147      } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
1148          *flags |= BDRV_O_UNMAP;
1149      } else {
1150          return -1;
1151      }
1152  
1153      return 0;
1154  }
1155  
1156  /**
1157   * Set open flags for a given cache mode
1158   *
1159   * Return 0 on success, -1 if the cache mode was invalid.
1160   */
1161  int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
1162  {
1163      *flags &= ~BDRV_O_CACHE_MASK;
1164  
1165      if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
1166          *writethrough = false;
1167          *flags |= BDRV_O_NOCACHE;
1168      } else if (!strcmp(mode, "directsync")) {
1169          *writethrough = true;
1170          *flags |= BDRV_O_NOCACHE;
1171      } else if (!strcmp(mode, "writeback")) {
1172          *writethrough = false;
1173      } else if (!strcmp(mode, "unsafe")) {
1174          *writethrough = false;
1175          *flags |= BDRV_O_NO_FLUSH;
1176      } else if (!strcmp(mode, "writethrough")) {
1177          *writethrough = true;
1178      } else {
1179          return -1;
1180      }
1181  
1182      return 0;
1183  }
1184  
1185  static char *bdrv_child_get_parent_desc(BdrvChild *c)
1186  {
1187      BlockDriverState *parent = c->opaque;
1188      return g_strdup_printf("node '%s'", bdrv_get_node_name(parent));
1189  }
1190  
1191  static void bdrv_child_cb_drained_begin(BdrvChild *child)
1192  {
1193      BlockDriverState *bs = child->opaque;
1194      bdrv_do_drained_begin_quiesce(bs, NULL);
1195  }
1196  
1197  static bool bdrv_child_cb_drained_poll(BdrvChild *child)
1198  {
1199      BlockDriverState *bs = child->opaque;
1200      return bdrv_drain_poll(bs, NULL, false);
1201  }
1202  
1203  static void bdrv_child_cb_drained_end(BdrvChild *child)
1204  {
1205      BlockDriverState *bs = child->opaque;
1206      bdrv_drained_end(bs);
1207  }
1208  
1209  static int bdrv_child_cb_inactivate(BdrvChild *child)
1210  {
1211      BlockDriverState *bs = child->opaque;
1212      GLOBAL_STATE_CODE();
1213      assert(bs->open_flags & BDRV_O_INACTIVE);
1214      return 0;
1215  }
1216  
1217  static bool bdrv_child_cb_change_aio_ctx(BdrvChild *child, AioContext *ctx,
1218                                           GHashTable *visited, Transaction *tran,
1219                                           Error **errp)
1220  {
1221      BlockDriverState *bs = child->opaque;
1222      return bdrv_change_aio_context(bs, ctx, visited, tran, errp);
1223  }
1224  
1225  /*
1226   * Returns the options and flags that a temporary snapshot should get, based on
1227   * the originally requested flags (the originally requested image will have
1228   * flags like a backing file)
1229   */
1230  static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
1231                                         int parent_flags, QDict *parent_options)
1232  {
1233      GLOBAL_STATE_CODE();
1234      *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
1235  
1236      /* For temporary files, unconditional cache=unsafe is fine */
1237      qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
1238      qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
1239  
1240      /* Copy the read-only and discard options from the parent */
1241      qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
1242      qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD);
1243  
1244      /* aio=native doesn't work for cache.direct=off, so disable it for the
1245       * temporary snapshot */
1246      *child_flags &= ~BDRV_O_NATIVE_AIO;
1247  }
1248  
1249  static void bdrv_backing_attach(BdrvChild *c)
1250  {
1251      BlockDriverState *parent = c->opaque;
1252      BlockDriverState *backing_hd = c->bs;
1253  
1254      GLOBAL_STATE_CODE();
1255      assert(!parent->backing_blocker);
1256      error_setg(&parent->backing_blocker,
1257                 "node is used as backing hd of '%s'",
1258                 bdrv_get_device_or_node_name(parent));
1259  
1260      bdrv_refresh_filename(backing_hd);
1261  
1262      parent->open_flags &= ~BDRV_O_NO_BACKING;
1263  
1264      bdrv_op_block_all(backing_hd, parent->backing_blocker);
1265      /* Otherwise we won't be able to commit or stream */
1266      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1267                      parent->backing_blocker);
1268      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
1269                      parent->backing_blocker);
1270      /*
1271       * We do backup in 3 ways:
1272       * 1. drive backup
1273       *    The target bs is new opened, and the source is top BDS
1274       * 2. blockdev backup
1275       *    Both the source and the target are top BDSes.
1276       * 3. internal backup(used for block replication)
1277       *    Both the source and the target are backing file
1278       *
1279       * In case 1 and 2, neither the source nor the target is the backing file.
1280       * In case 3, we will block the top BDS, so there is only one block job
1281       * for the top BDS and its backing chain.
1282       */
1283      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
1284                      parent->backing_blocker);
1285      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
1286                      parent->backing_blocker);
1287  }
1288  
1289  static void bdrv_backing_detach(BdrvChild *c)
1290  {
1291      BlockDriverState *parent = c->opaque;
1292  
1293      GLOBAL_STATE_CODE();
1294      assert(parent->backing_blocker);
1295      bdrv_op_unblock_all(c->bs, parent->backing_blocker);
1296      error_free(parent->backing_blocker);
1297      parent->backing_blocker = NULL;
1298  }
1299  
1300  static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
1301                                          const char *filename, Error **errp)
1302  {
1303      BlockDriverState *parent = c->opaque;
1304      bool read_only = bdrv_is_read_only(parent);
1305      int ret;
1306      GLOBAL_STATE_CODE();
1307  
1308      if (read_only) {
1309          ret = bdrv_reopen_set_read_only(parent, false, errp);
1310          if (ret < 0) {
1311              return ret;
1312          }
1313      }
1314  
1315      ret = bdrv_change_backing_file(parent, filename,
1316                                     base->drv ? base->drv->format_name : "",
1317                                     false);
1318      if (ret < 0) {
1319          error_setg_errno(errp, -ret, "Could not update backing file link");
1320      }
1321  
1322      if (read_only) {
1323          bdrv_reopen_set_read_only(parent, true, NULL);
1324      }
1325  
1326      return ret;
1327  }
1328  
1329  /*
1330   * Returns the options and flags that a generic child of a BDS should
1331   * get, based on the given options and flags for the parent BDS.
1332   */
1333  static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
1334                                     int *child_flags, QDict *child_options,
1335                                     int parent_flags, QDict *parent_options)
1336  {
1337      int flags = parent_flags;
1338      GLOBAL_STATE_CODE();
1339  
1340      /*
1341       * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL.
1342       * Generally, the question to answer is: Should this child be
1343       * format-probed by default?
1344       */
1345  
1346      /*
1347       * Pure and non-filtered data children of non-format nodes should
1348       * be probed by default (even when the node itself has BDRV_O_PROTOCOL
1349       * set).  This only affects a very limited set of drivers (namely
1350       * quorum and blkverify when this comment was written).
1351       * Force-clear BDRV_O_PROTOCOL then.
1352       */
1353      if (!parent_is_format &&
1354          (role & BDRV_CHILD_DATA) &&
1355          !(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED)))
1356      {
1357          flags &= ~BDRV_O_PROTOCOL;
1358      }
1359  
1360      /*
1361       * All children of format nodes (except for COW children) and all
1362       * metadata children in general should never be format-probed.
1363       * Force-set BDRV_O_PROTOCOL then.
1364       */
1365      if ((parent_is_format && !(role & BDRV_CHILD_COW)) ||
1366          (role & BDRV_CHILD_METADATA))
1367      {
1368          flags |= BDRV_O_PROTOCOL;
1369      }
1370  
1371      /*
1372       * If the cache mode isn't explicitly set, inherit direct and no-flush from
1373       * the parent.
1374       */
1375      qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
1376      qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
1377      qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);
1378  
1379      if (role & BDRV_CHILD_COW) {
1380          /* backing files are opened read-only by default */
1381          qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on");
1382          qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off");
1383      } else {
1384          /* Inherit the read-only option from the parent if it's not set */
1385          qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
1386          qdict_copy_default(child_options, parent_options,
1387                             BDRV_OPT_AUTO_READ_ONLY);
1388      }
1389  
1390      /*
1391       * bdrv_co_pdiscard() respects unmap policy for the parent, so we
1392       * can default to enable it on lower layers regardless of the
1393       * parent option.
1394       */
1395      qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap");
1396  
1397      /* Clear flags that only apply to the top layer */
1398      flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
1399  
1400      if (role & BDRV_CHILD_METADATA) {
1401          flags &= ~BDRV_O_NO_IO;
1402      }
1403      if (role & BDRV_CHILD_COW) {
1404          flags &= ~BDRV_O_TEMPORARY;
1405      }
1406  
1407      *child_flags = flags;
1408  }
1409  
1410  static void GRAPH_WRLOCK bdrv_child_cb_attach(BdrvChild *child)
1411  {
1412      BlockDriverState *bs = child->opaque;
1413  
1414      assert_bdrv_graph_writable();
1415      QLIST_INSERT_HEAD(&bs->children, child, next);
1416      if (bs->drv->is_filter || (child->role & BDRV_CHILD_FILTERED)) {
1417          /*
1418           * Here we handle filters and block/raw-format.c when it behave like
1419           * filter. They generally have a single PRIMARY child, which is also the
1420           * FILTERED child, and that they may have multiple more children, which
1421           * are neither PRIMARY nor FILTERED. And never we have a COW child here.
1422           * So bs->file will be the PRIMARY child, unless the PRIMARY child goes
1423           * into bs->backing on exceptional cases; and bs->backing will be
1424           * nothing else.
1425           */
1426          assert(!(child->role & BDRV_CHILD_COW));
1427          if (child->role & BDRV_CHILD_PRIMARY) {
1428              assert(child->role & BDRV_CHILD_FILTERED);
1429              assert(!bs->backing);
1430              assert(!bs->file);
1431  
1432              if (bs->drv->filtered_child_is_backing) {
1433                  bs->backing = child;
1434              } else {
1435                  bs->file = child;
1436              }
1437          } else {
1438              assert(!(child->role & BDRV_CHILD_FILTERED));
1439          }
1440      } else if (child->role & BDRV_CHILD_COW) {
1441          assert(bs->drv->supports_backing);
1442          assert(!(child->role & BDRV_CHILD_PRIMARY));
1443          assert(!bs->backing);
1444          bs->backing = child;
1445          bdrv_backing_attach(child);
1446      } else if (child->role & BDRV_CHILD_PRIMARY) {
1447          assert(!bs->file);
1448          bs->file = child;
1449      }
1450  }
1451  
1452  static void GRAPH_WRLOCK bdrv_child_cb_detach(BdrvChild *child)
1453  {
1454      BlockDriverState *bs = child->opaque;
1455  
1456      if (child->role & BDRV_CHILD_COW) {
1457          bdrv_backing_detach(child);
1458      }
1459  
1460      assert_bdrv_graph_writable();
1461      QLIST_REMOVE(child, next);
1462      if (child == bs->backing) {
1463          assert(child != bs->file);
1464          bs->backing = NULL;
1465      } else if (child == bs->file) {
1466          bs->file = NULL;
1467      }
1468  }
1469  
1470  static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
1471                                           const char *filename, Error **errp)
1472  {
1473      if (c->role & BDRV_CHILD_COW) {
1474          return bdrv_backing_update_filename(c, base, filename, errp);
1475      }
1476      return 0;
1477  }
1478  
1479  AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c)
1480  {
1481      BlockDriverState *bs = c->opaque;
1482      IO_CODE();
1483  
1484      return bdrv_get_aio_context(bs);
1485  }
1486  
1487  const BdrvChildClass child_of_bds = {
1488      .parent_is_bds   = true,
1489      .get_parent_desc = bdrv_child_get_parent_desc,
1490      .inherit_options = bdrv_inherited_options,
1491      .drained_begin   = bdrv_child_cb_drained_begin,
1492      .drained_poll    = bdrv_child_cb_drained_poll,
1493      .drained_end     = bdrv_child_cb_drained_end,
1494      .attach          = bdrv_child_cb_attach,
1495      .detach          = bdrv_child_cb_detach,
1496      .inactivate      = bdrv_child_cb_inactivate,
1497      .change_aio_ctx  = bdrv_child_cb_change_aio_ctx,
1498      .update_filename = bdrv_child_cb_update_filename,
1499      .get_parent_aio_context = child_of_bds_get_parent_aio_context,
1500  };
1501  
1502  AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
1503  {
1504      IO_CODE();
1505      return c->klass->get_parent_aio_context(c);
1506  }
1507  
1508  static int bdrv_open_flags(BlockDriverState *bs, int flags)
1509  {
1510      int open_flags = flags;
1511      GLOBAL_STATE_CODE();
1512  
1513      /*
1514       * Clear flags that are internal to the block layer before opening the
1515       * image.
1516       */
1517      open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
1518  
1519      return open_flags;
1520  }
1521  
1522  static void update_flags_from_options(int *flags, QemuOpts *opts)
1523  {
1524      GLOBAL_STATE_CODE();
1525  
1526      *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY);
1527  
1528      if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
1529          *flags |= BDRV_O_NO_FLUSH;
1530      }
1531  
1532      if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) {
1533          *flags |= BDRV_O_NOCACHE;
1534      }
1535  
1536      if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) {
1537          *flags |= BDRV_O_RDWR;
1538      }
1539  
1540      if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) {
1541          *flags |= BDRV_O_AUTO_RDONLY;
1542      }
1543  }
1544  
1545  static void update_options_from_flags(QDict *options, int flags)
1546  {
1547      GLOBAL_STATE_CODE();
1548      if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
1549          qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
1550      }
1551      if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
1552          qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH,
1553                         flags & BDRV_O_NO_FLUSH);
1554      }
1555      if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) {
1556          qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR));
1557      }
1558      if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) {
1559          qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY,
1560                         flags & BDRV_O_AUTO_RDONLY);
1561      }
1562  }
1563  
1564  static void bdrv_assign_node_name(BlockDriverState *bs,
1565                                    const char *node_name,
1566                                    Error **errp)
1567  {
1568      char *gen_node_name = NULL;
1569      GLOBAL_STATE_CODE();
1570  
1571      if (!node_name) {
1572          node_name = gen_node_name = id_generate(ID_BLOCK);
1573      } else if (!id_wellformed(node_name)) {
1574          /*
1575           * Check for empty string or invalid characters, but not if it is
1576           * generated (generated names use characters not available to the user)
1577           */
1578          error_setg(errp, "Invalid node-name: '%s'", node_name);
1579          return;
1580      }
1581  
1582      /* takes care of avoiding namespaces collisions */
1583      if (blk_by_name(node_name)) {
1584          error_setg(errp, "node-name=%s is conflicting with a device id",
1585                     node_name);
1586          goto out;
1587      }
1588  
1589      /* takes care of avoiding duplicates node names */
1590      if (bdrv_find_node(node_name)) {
1591          error_setg(errp, "Duplicate nodes with node-name='%s'", node_name);
1592          goto out;
1593      }
1594  
1595      /* Make sure that the node name isn't truncated */
1596      if (strlen(node_name) >= sizeof(bs->node_name)) {
1597          error_setg(errp, "Node name too long");
1598          goto out;
1599      }
1600  
1601      /* copy node name into the bs and insert it into the graph list */
1602      pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
1603      QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
1604  out:
1605      g_free(gen_node_name);
1606  }
1607  
1608  /*
1609   * The caller must always hold @bs AioContext lock, because this function calls
1610   * bdrv_refresh_total_sectors() which polls when called from non-coroutine
1611   * context.
1612   */
1613  static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
1614                              const char *node_name, QDict *options,
1615                              int open_flags, Error **errp)
1616  {
1617      Error *local_err = NULL;
1618      int i, ret;
1619      GLOBAL_STATE_CODE();
1620  
1621      bdrv_assign_node_name(bs, node_name, &local_err);
1622      if (local_err) {
1623          error_propagate(errp, local_err);
1624          return -EINVAL;
1625      }
1626  
1627      bs->drv = drv;
1628      bs->opaque = g_malloc0(drv->instance_size);
1629  
1630      if (drv->bdrv_file_open) {
1631          assert(!drv->bdrv_needs_filename || bs->filename[0]);
1632          ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1633      } else if (drv->bdrv_open) {
1634          ret = drv->bdrv_open(bs, options, open_flags, &local_err);
1635      } else {
1636          ret = 0;
1637      }
1638  
1639      if (ret < 0) {
1640          if (local_err) {
1641              error_propagate(errp, local_err);
1642          } else if (bs->filename[0]) {
1643              error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1644          } else {
1645              error_setg_errno(errp, -ret, "Could not open image");
1646          }
1647          goto open_failed;
1648      }
1649  
1650      assert(!(bs->supported_read_flags & ~BDRV_REQ_MASK));
1651      assert(!(bs->supported_write_flags & ~BDRV_REQ_MASK));
1652  
1653      /*
1654       * Always allow the BDRV_REQ_REGISTERED_BUF optimization hint. This saves
1655       * drivers that pass read/write requests through to a child the trouble of
1656       * declaring support explicitly.
1657       *
1658       * Drivers must not propagate this flag accidentally when they initiate I/O
1659       * to a bounce buffer. That case should be rare though.
1660       */
1661      bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF;
1662      bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF;
1663  
1664      ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
1665      if (ret < 0) {
1666          error_setg_errno(errp, -ret, "Could not refresh total sector count");
1667          return ret;
1668      }
1669  
1670      bdrv_refresh_limits(bs, NULL, &local_err);
1671      if (local_err) {
1672          error_propagate(errp, local_err);
1673          return -EINVAL;
1674      }
1675  
1676      assert(bdrv_opt_mem_align(bs) != 0);
1677      assert(bdrv_min_mem_align(bs) != 0);
1678      assert(is_power_of_2(bs->bl.request_alignment));
1679  
1680      for (i = 0; i < bs->quiesce_counter; i++) {
1681          if (drv->bdrv_drain_begin) {
1682              drv->bdrv_drain_begin(bs);
1683          }
1684      }
1685  
1686      return 0;
1687  open_failed:
1688      bs->drv = NULL;
1689      if (bs->file != NULL) {
1690          bdrv_unref_child(bs, bs->file);
1691          assert(!bs->file);
1692      }
1693      g_free(bs->opaque);
1694      bs->opaque = NULL;
1695      return ret;
1696  }
1697  
1698  /*
1699   * Create and open a block node.
1700   *
1701   * @options is a QDict of options to pass to the block drivers, or NULL for an
1702   * empty set of options. The reference to the QDict belongs to the block layer
1703   * after the call (even on failure), so if the caller intends to reuse the
1704   * dictionary, it needs to use qobject_ref() before calling bdrv_open.
1705   */
1706  BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
1707                                              const char *node_name,
1708                                              QDict *options, int flags,
1709                                              Error **errp)
1710  {
1711      BlockDriverState *bs;
1712      int ret;
1713  
1714      GLOBAL_STATE_CODE();
1715  
1716      bs = bdrv_new();
1717      bs->open_flags = flags;
1718      bs->options = options ?: qdict_new();
1719      bs->explicit_options = qdict_clone_shallow(bs->options);
1720      bs->opaque = NULL;
1721  
1722      update_options_from_flags(bs->options, flags);
1723  
1724      ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp);
1725      if (ret < 0) {
1726          qobject_unref(bs->explicit_options);
1727          bs->explicit_options = NULL;
1728          qobject_unref(bs->options);
1729          bs->options = NULL;
1730          bdrv_unref(bs);
1731          return NULL;
1732      }
1733  
1734      return bs;
1735  }
1736  
1737  /* Create and open a block node. */
1738  BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
1739                                         int flags, Error **errp)
1740  {
1741      GLOBAL_STATE_CODE();
1742      return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp);
1743  }
1744  
1745  QemuOptsList bdrv_runtime_opts = {
1746      .name = "bdrv_common",
1747      .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
1748      .desc = {
1749          {
1750              .name = "node-name",
1751              .type = QEMU_OPT_STRING,
1752              .help = "Node name of the block device node",
1753          },
1754          {
1755              .name = "driver",
1756              .type = QEMU_OPT_STRING,
1757              .help = "Block driver to use for the node",
1758          },
1759          {
1760              .name = BDRV_OPT_CACHE_DIRECT,
1761              .type = QEMU_OPT_BOOL,
1762              .help = "Bypass software writeback cache on the host",
1763          },
1764          {
1765              .name = BDRV_OPT_CACHE_NO_FLUSH,
1766              .type = QEMU_OPT_BOOL,
1767              .help = "Ignore flush requests",
1768          },
1769          {
1770              .name = BDRV_OPT_READ_ONLY,
1771              .type = QEMU_OPT_BOOL,
1772              .help = "Node is opened in read-only mode",
1773          },
1774          {
1775              .name = BDRV_OPT_AUTO_READ_ONLY,
1776              .type = QEMU_OPT_BOOL,
1777              .help = "Node can become read-only if opening read-write fails",
1778          },
1779          {
1780              .name = "detect-zeroes",
1781              .type = QEMU_OPT_STRING,
1782              .help = "try to optimize zero writes (off, on, unmap)",
1783          },
1784          {
1785              .name = BDRV_OPT_DISCARD,
1786              .type = QEMU_OPT_STRING,
1787              .help = "discard operation (ignore/off, unmap/on)",
1788          },
1789          {
1790              .name = BDRV_OPT_FORCE_SHARE,
1791              .type = QEMU_OPT_BOOL,
1792              .help = "always accept other writers (default: off)",
1793          },
1794          { /* end of list */ }
1795      },
1796  };
1797  
1798  QemuOptsList bdrv_create_opts_simple = {
1799      .name = "simple-create-opts",
1800      .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head),
1801      .desc = {
1802          {
1803              .name = BLOCK_OPT_SIZE,
1804              .type = QEMU_OPT_SIZE,
1805              .help = "Virtual disk size"
1806          },
1807          {
1808              .name = BLOCK_OPT_PREALLOC,
1809              .type = QEMU_OPT_STRING,
1810              .help = "Preallocation mode (allowed values: off)"
1811          },
1812          { /* end of list */ }
1813      }
1814  };
1815  
1816  /*
1817   * Common part for opening disk images and files
1818   *
1819   * Removes all processed options from *options.
1820   */
1821  static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
1822                              QDict *options, Error **errp)
1823  {
1824      int ret, open_flags;
1825      const char *filename;
1826      const char *driver_name = NULL;
1827      const char *node_name = NULL;
1828      const char *discard;
1829      QemuOpts *opts;
1830      BlockDriver *drv;
1831      Error *local_err = NULL;
1832      bool ro;
1833  
1834      assert(bs->file == NULL);
1835      assert(options != NULL && bs->options != options);
1836      GLOBAL_STATE_CODE();
1837  
1838      opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1839      if (!qemu_opts_absorb_qdict(opts, options, errp)) {
1840          ret = -EINVAL;
1841          goto fail_opts;
1842      }
1843  
1844      update_flags_from_options(&bs->open_flags, opts);
1845  
1846      driver_name = qemu_opt_get(opts, "driver");
1847      drv = bdrv_find_format(driver_name);
1848      assert(drv != NULL);
1849  
1850      bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false);
1851  
1852      if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) {
1853          error_setg(errp,
1854                     BDRV_OPT_FORCE_SHARE
1855                     "=on can only be used with read-only images");
1856          ret = -EINVAL;
1857          goto fail_opts;
1858      }
1859  
1860      if (file != NULL) {
1861          bdrv_refresh_filename(blk_bs(file));
1862          filename = blk_bs(file)->filename;
1863      } else {
1864          /*
1865           * Caution: while qdict_get_try_str() is fine, getting
1866           * non-string types would require more care.  When @options
1867           * come from -blockdev or blockdev_add, its members are typed
1868           * according to the QAPI schema, but when they come from
1869           * -drive, they're all QString.
1870           */
1871          filename = qdict_get_try_str(options, "filename");
1872      }
1873  
1874      if (drv->bdrv_needs_filename && (!filename || !filename[0])) {
1875          error_setg(errp, "The '%s' block driver requires a file name",
1876                     drv->format_name);
1877          ret = -EINVAL;
1878          goto fail_opts;
1879      }
1880  
1881      trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
1882                             drv->format_name);
1883  
1884      ro = bdrv_is_read_only(bs);
1885  
1886      if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) {
1887          if (!ro && bdrv_is_whitelisted(drv, true)) {
1888              ret = bdrv_apply_auto_read_only(bs, NULL, NULL);
1889          } else {
1890              ret = -ENOTSUP;
1891          }
1892          if (ret < 0) {
1893              error_setg(errp,
1894                         !ro && bdrv_is_whitelisted(drv, true)
1895                         ? "Driver '%s' can only be used for read-only devices"
1896                         : "Driver '%s' is not whitelisted",
1897                         drv->format_name);
1898              goto fail_opts;
1899          }
1900      }
1901  
1902      /* bdrv_new() and bdrv_close() make it so */
1903      assert(qatomic_read(&bs->copy_on_read) == 0);
1904  
1905      if (bs->open_flags & BDRV_O_COPY_ON_READ) {
1906          if (!ro) {
1907              bdrv_enable_copy_on_read(bs);
1908          } else {
1909              error_setg(errp, "Can't use copy-on-read on read-only device");
1910              ret = -EINVAL;
1911              goto fail_opts;
1912          }
1913      }
1914  
1915      discard = qemu_opt_get(opts, BDRV_OPT_DISCARD);
1916      if (discard != NULL) {
1917          if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) {
1918              error_setg(errp, "Invalid discard option");
1919              ret = -EINVAL;
1920              goto fail_opts;
1921          }
1922      }
1923  
1924      bs->detect_zeroes =
1925          bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err);
1926      if (local_err) {
1927          error_propagate(errp, local_err);
1928          ret = -EINVAL;
1929          goto fail_opts;
1930      }
1931  
1932      if (filename != NULL) {
1933          pstrcpy(bs->filename, sizeof(bs->filename), filename);
1934      } else {
1935          bs->filename[0] = '\0';
1936      }
1937      pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
1938  
1939      /* Open the image, either directly or using a protocol */
1940      open_flags = bdrv_open_flags(bs, bs->open_flags);
1941      node_name = qemu_opt_get(opts, "node-name");
1942  
1943      assert(!drv->bdrv_file_open || file == NULL);
1944      ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp);
1945      if (ret < 0) {
1946          goto fail_opts;
1947      }
1948  
1949      qemu_opts_del(opts);
1950      return 0;
1951  
1952  fail_opts:
1953      qemu_opts_del(opts);
1954      return ret;
1955  }
1956  
1957  static QDict *parse_json_filename(const char *filename, Error **errp)
1958  {
1959      QObject *options_obj;
1960      QDict *options;
1961      int ret;
1962      GLOBAL_STATE_CODE();
1963  
1964      ret = strstart(filename, "json:", &filename);
1965      assert(ret);
1966  
1967      options_obj = qobject_from_json(filename, errp);
1968      if (!options_obj) {
1969          error_prepend(errp, "Could not parse the JSON options: ");
1970          return NULL;
1971      }
1972  
1973      options = qobject_to(QDict, options_obj);
1974      if (!options) {
1975          qobject_unref(options_obj);
1976          error_setg(errp, "Invalid JSON object given");
1977          return NULL;
1978      }
1979  
1980      qdict_flatten(options);
1981  
1982      return options;
1983  }
1984  
1985  static void parse_json_protocol(QDict *options, const char **pfilename,
1986                                  Error **errp)
1987  {
1988      QDict *json_options;
1989      Error *local_err = NULL;
1990      GLOBAL_STATE_CODE();
1991  
1992      /* Parse json: pseudo-protocol */
1993      if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1994          return;
1995      }
1996  
1997      json_options = parse_json_filename(*pfilename, &local_err);
1998      if (local_err) {
1999          error_propagate(errp, local_err);
2000          return;
2001      }
2002  
2003      /* Options given in the filename have lower priority than options
2004       * specified directly */
2005      qdict_join(options, json_options, false);
2006      qobject_unref(json_options);
2007      *pfilename = NULL;
2008  }
2009  
2010  /*
2011   * Fills in default options for opening images and converts the legacy
2012   * filename/flags pair to option QDict entries.
2013   * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
2014   * block driver has been specified explicitly.
2015   */
2016  static int bdrv_fill_options(QDict **options, const char *filename,
2017                               int *flags, Error **errp)
2018  {
2019      const char *drvname;
2020      bool protocol = *flags & BDRV_O_PROTOCOL;
2021      bool parse_filename = false;
2022      BlockDriver *drv = NULL;
2023      Error *local_err = NULL;
2024  
2025      GLOBAL_STATE_CODE();
2026  
2027      /*
2028       * Caution: while qdict_get_try_str() is fine, getting non-string
2029       * types would require more care.  When @options come from
2030       * -blockdev or blockdev_add, its members are typed according to
2031       * the QAPI schema, but when they come from -drive, they're all
2032       * QString.
2033       */
2034      drvname = qdict_get_try_str(*options, "driver");
2035      if (drvname) {
2036          drv = bdrv_find_format(drvname);
2037          if (!drv) {
2038              error_setg(errp, "Unknown driver '%s'", drvname);
2039              return -ENOENT;
2040          }
2041          /* If the user has explicitly specified the driver, this choice should
2042           * override the BDRV_O_PROTOCOL flag */
2043          protocol = drv->bdrv_file_open;
2044      }
2045  
2046      if (protocol) {
2047          *flags |= BDRV_O_PROTOCOL;
2048      } else {
2049          *flags &= ~BDRV_O_PROTOCOL;
2050      }
2051  
2052      /* Translate cache options from flags into options */
2053      update_options_from_flags(*options, *flags);
2054  
2055      /* Fetch the file name from the options QDict if necessary */
2056      if (protocol && filename) {
2057          if (!qdict_haskey(*options, "filename")) {
2058              qdict_put_str(*options, "filename", filename);
2059              parse_filename = true;
2060          } else {
2061              error_setg(errp, "Can't specify 'file' and 'filename' options at "
2062                               "the same time");
2063              return -EINVAL;
2064          }
2065      }
2066  
2067      /* Find the right block driver */
2068      /* See cautionary note on accessing @options above */
2069      filename = qdict_get_try_str(*options, "filename");
2070  
2071      if (!drvname && protocol) {
2072          if (filename) {
2073              drv = bdrv_find_protocol(filename, parse_filename, errp);
2074              if (!drv) {
2075                  return -EINVAL;
2076              }
2077  
2078              drvname = drv->format_name;
2079              qdict_put_str(*options, "driver", drvname);
2080          } else {
2081              error_setg(errp, "Must specify either driver or file");
2082              return -EINVAL;
2083          }
2084      }
2085  
2086      assert(drv || !protocol);
2087  
2088      /* Driver-specific filename parsing */
2089      if (drv && drv->bdrv_parse_filename && parse_filename) {
2090          drv->bdrv_parse_filename(filename, *options, &local_err);
2091          if (local_err) {
2092              error_propagate(errp, local_err);
2093              return -EINVAL;
2094          }
2095  
2096          if (!drv->bdrv_needs_filename) {
2097              qdict_del(*options, "filename");
2098          }
2099      }
2100  
2101      return 0;
2102  }
2103  
2104  typedef struct BlockReopenQueueEntry {
2105       bool prepared;
2106       bool perms_checked;
2107       BDRVReopenState state;
2108       QTAILQ_ENTRY(BlockReopenQueueEntry) entry;
2109  } BlockReopenQueueEntry;
2110  
2111  /*
2112   * Return the flags that @bs will have after the reopens in @q have
2113   * successfully completed. If @q is NULL (or @bs is not contained in @q),
2114   * return the current flags.
2115   */
2116  static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs)
2117  {
2118      BlockReopenQueueEntry *entry;
2119  
2120      if (q != NULL) {
2121          QTAILQ_FOREACH(entry, q, entry) {
2122              if (entry->state.bs == bs) {
2123                  return entry->state.flags;
2124              }
2125          }
2126      }
2127  
2128      return bs->open_flags;
2129  }
2130  
2131  /* Returns whether the image file can be written to after the reopen queue @q
2132   * has been successfully applied, or right now if @q is NULL. */
2133  static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
2134                                            BlockReopenQueue *q)
2135  {
2136      int flags = bdrv_reopen_get_flags(q, bs);
2137  
2138      return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR;
2139  }
2140  
2141  /*
2142   * Return whether the BDS can be written to.  This is not necessarily
2143   * the same as !bdrv_is_read_only(bs), as inactivated images may not
2144   * be written to but do not count as read-only images.
2145   */
2146  bool bdrv_is_writable(BlockDriverState *bs)
2147  {
2148      IO_CODE();
2149      return bdrv_is_writable_after_reopen(bs, NULL);
2150  }
2151  
2152  static char *bdrv_child_user_desc(BdrvChild *c)
2153  {
2154      GLOBAL_STATE_CODE();
2155      return c->klass->get_parent_desc(c);
2156  }
2157  
2158  /*
2159   * Check that @a allows everything that @b needs. @a and @b must reference same
2160   * child node.
2161   */
2162  static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
2163  {
2164      const char *child_bs_name;
2165      g_autofree char *a_user = NULL;
2166      g_autofree char *b_user = NULL;
2167      g_autofree char *perms = NULL;
2168  
2169      assert(a->bs);
2170      assert(a->bs == b->bs);
2171      GLOBAL_STATE_CODE();
2172  
2173      if ((b->perm & a->shared_perm) == b->perm) {
2174          return true;
2175      }
2176  
2177      child_bs_name = bdrv_get_node_name(b->bs);
2178      a_user = bdrv_child_user_desc(a);
2179      b_user = bdrv_child_user_desc(b);
2180      perms = bdrv_perm_names(b->perm & ~a->shared_perm);
2181  
2182      error_setg(errp, "Permission conflict on node '%s': permissions '%s' are "
2183                 "both required by %s (uses node '%s' as '%s' child) and "
2184                 "unshared by %s (uses node '%s' as '%s' child).",
2185                 child_bs_name, perms,
2186                 b_user, child_bs_name, b->name,
2187                 a_user, child_bs_name, a->name);
2188  
2189      return false;
2190  }
2191  
2192  static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
2193  {
2194      BdrvChild *a, *b;
2195      GLOBAL_STATE_CODE();
2196  
2197      /*
2198       * During the loop we'll look at each pair twice. That's correct because
2199       * bdrv_a_allow_b() is asymmetric and we should check each pair in both
2200       * directions.
2201       */
2202      QLIST_FOREACH(a, &bs->parents, next_parent) {
2203          QLIST_FOREACH(b, &bs->parents, next_parent) {
2204              if (a == b) {
2205                  continue;
2206              }
2207  
2208              if (!bdrv_a_allow_b(a, b, errp)) {
2209                  return true;
2210              }
2211          }
2212      }
2213  
2214      return false;
2215  }
2216  
2217  static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
2218                              BdrvChild *c, BdrvChildRole role,
2219                              BlockReopenQueue *reopen_queue,
2220                              uint64_t parent_perm, uint64_t parent_shared,
2221                              uint64_t *nperm, uint64_t *nshared)
2222  {
2223      assert(bs->drv && bs->drv->bdrv_child_perm);
2224      GLOBAL_STATE_CODE();
2225      bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
2226                               parent_perm, parent_shared,
2227                               nperm, nshared);
2228      /* TODO Take force_share from reopen_queue */
2229      if (child_bs && child_bs->force_share) {
2230          *nshared = BLK_PERM_ALL;
2231      }
2232  }
2233  
2234  /*
2235   * Adds the whole subtree of @bs (including @bs itself) to the @list (except for
2236   * nodes that are already in the @list, of course) so that final list is
2237   * topologically sorted. Return the result (GSList @list object is updated, so
2238   * don't use old reference after function call).
2239   *
2240   * On function start @list must be already topologically sorted and for any node
2241   * in the @list the whole subtree of the node must be in the @list as well. The
2242   * simplest way to satisfy this criteria: use only result of
2243   * bdrv_topological_dfs() or NULL as @list parameter.
2244   */
2245  static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found,
2246                                      BlockDriverState *bs)
2247  {
2248      BdrvChild *child;
2249      g_autoptr(GHashTable) local_found = NULL;
2250  
2251      GLOBAL_STATE_CODE();
2252  
2253      if (!found) {
2254          assert(!list);
2255          found = local_found = g_hash_table_new(NULL, NULL);
2256      }
2257  
2258      if (g_hash_table_contains(found, bs)) {
2259          return list;
2260      }
2261      g_hash_table_add(found, bs);
2262  
2263      QLIST_FOREACH(child, &bs->children, next) {
2264          list = bdrv_topological_dfs(list, found, child->bs);
2265      }
2266  
2267      return g_slist_prepend(list, bs);
2268  }
2269  
2270  typedef struct BdrvChildSetPermState {
2271      BdrvChild *child;
2272      uint64_t old_perm;
2273      uint64_t old_shared_perm;
2274  } BdrvChildSetPermState;
2275  
2276  static void bdrv_child_set_perm_abort(void *opaque)
2277  {
2278      BdrvChildSetPermState *s = opaque;
2279  
2280      GLOBAL_STATE_CODE();
2281  
2282      s->child->perm = s->old_perm;
2283      s->child->shared_perm = s->old_shared_perm;
2284  }
2285  
2286  static TransactionActionDrv bdrv_child_set_pem_drv = {
2287      .abort = bdrv_child_set_perm_abort,
2288      .clean = g_free,
2289  };
2290  
2291  static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm,
2292                                  uint64_t shared, Transaction *tran)
2293  {
2294      BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1);
2295      GLOBAL_STATE_CODE();
2296  
2297      *s = (BdrvChildSetPermState) {
2298          .child = c,
2299          .old_perm = c->perm,
2300          .old_shared_perm = c->shared_perm,
2301      };
2302  
2303      c->perm = perm;
2304      c->shared_perm = shared;
2305  
2306      tran_add(tran, &bdrv_child_set_pem_drv, s);
2307  }
2308  
2309  static void bdrv_drv_set_perm_commit(void *opaque)
2310  {
2311      BlockDriverState *bs = opaque;
2312      uint64_t cumulative_perms, cumulative_shared_perms;
2313      GLOBAL_STATE_CODE();
2314  
2315      if (bs->drv->bdrv_set_perm) {
2316          bdrv_get_cumulative_perm(bs, &cumulative_perms,
2317                                   &cumulative_shared_perms);
2318          bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
2319      }
2320  }
2321  
2322  static void bdrv_drv_set_perm_abort(void *opaque)
2323  {
2324      BlockDriverState *bs = opaque;
2325      GLOBAL_STATE_CODE();
2326  
2327      if (bs->drv->bdrv_abort_perm_update) {
2328          bs->drv->bdrv_abort_perm_update(bs);
2329      }
2330  }
2331  
2332  TransactionActionDrv bdrv_drv_set_perm_drv = {
2333      .abort = bdrv_drv_set_perm_abort,
2334      .commit = bdrv_drv_set_perm_commit,
2335  };
2336  
2337  static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm,
2338                               uint64_t shared_perm, Transaction *tran,
2339                               Error **errp)
2340  {
2341      GLOBAL_STATE_CODE();
2342      if (!bs->drv) {
2343          return 0;
2344      }
2345  
2346      if (bs->drv->bdrv_check_perm) {
2347          int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp);
2348          if (ret < 0) {
2349              return ret;
2350          }
2351      }
2352  
2353      if (tran) {
2354          tran_add(tran, &bdrv_drv_set_perm_drv, bs);
2355      }
2356  
2357      return 0;
2358  }
2359  
2360  typedef struct BdrvReplaceChildState {
2361      BdrvChild *child;
2362      BlockDriverState *old_bs;
2363  } BdrvReplaceChildState;
2364  
2365  static void bdrv_replace_child_commit(void *opaque)
2366  {
2367      BdrvReplaceChildState *s = opaque;
2368      GLOBAL_STATE_CODE();
2369  
2370      bdrv_unref(s->old_bs);
2371  }
2372  
2373  static void bdrv_replace_child_abort(void *opaque)
2374  {
2375      BdrvReplaceChildState *s = opaque;
2376      BlockDriverState *new_bs = s->child->bs;
2377  
2378      GLOBAL_STATE_CODE();
2379      /* old_bs reference is transparently moved from @s to @s->child */
2380      if (!s->child->bs) {
2381          /*
2382           * The parents were undrained when removing old_bs from the child. New
2383           * requests can't have been made, though, because the child was empty.
2384           *
2385           * TODO Make bdrv_replace_child_noperm() transactionable to avoid
2386           * undraining the parent in the first place. Once this is done, having
2387           * new_bs drained when calling bdrv_replace_child_tran() is not a
2388           * requirement any more.
2389           */
2390          bdrv_parent_drained_begin_single(s->child);
2391          assert(!bdrv_parent_drained_poll_single(s->child));
2392      }
2393      assert(s->child->quiesced_parent);
2394      bdrv_replace_child_noperm(s->child, s->old_bs);
2395      bdrv_unref(new_bs);
2396  }
2397  
2398  static TransactionActionDrv bdrv_replace_child_drv = {
2399      .commit = bdrv_replace_child_commit,
2400      .abort = bdrv_replace_child_abort,
2401      .clean = g_free,
2402  };
2403  
2404  /*
2405   * bdrv_replace_child_tran
2406   *
2407   * Note: real unref of old_bs is done only on commit.
2408   *
2409   * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be
2410   * kept drained until the transaction is completed.
2411   *
2412   * The function doesn't update permissions, caller is responsible for this.
2413   */
2414  static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
2415                                      Transaction *tran)
2416  {
2417      BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
2418  
2419      assert(child->quiesced_parent);
2420      assert(!new_bs || new_bs->quiesce_counter);
2421  
2422      *s = (BdrvReplaceChildState) {
2423          .child = child,
2424          .old_bs = child->bs,
2425      };
2426      tran_add(tran, &bdrv_replace_child_drv, s);
2427  
2428      if (new_bs) {
2429          bdrv_ref(new_bs);
2430      }
2431      bdrv_replace_child_noperm(child, new_bs);
2432      /* old_bs reference is transparently moved from @child to @s */
2433  }
2434  
2435  /*
2436   * Refresh permissions in @bs subtree. The function is intended to be called
2437   * after some graph modification that was done without permission update.
2438   */
2439  static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
2440                                    Transaction *tran, Error **errp)
2441  {
2442      BlockDriver *drv = bs->drv;
2443      BdrvChild *c;
2444      int ret;
2445      uint64_t cumulative_perms, cumulative_shared_perms;
2446      GLOBAL_STATE_CODE();
2447  
2448      bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
2449  
2450      /* Write permissions never work with read-only images */
2451      if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
2452          !bdrv_is_writable_after_reopen(bs, q))
2453      {
2454          if (!bdrv_is_writable_after_reopen(bs, NULL)) {
2455              error_setg(errp, "Block node is read-only");
2456          } else {
2457              error_setg(errp, "Read-only block node '%s' cannot support "
2458                         "read-write users", bdrv_get_node_name(bs));
2459          }
2460  
2461          return -EPERM;
2462      }
2463  
2464      /*
2465       * Unaligned requests will automatically be aligned to bl.request_alignment
2466       * and without RESIZE we can't extend requests to write to space beyond the
2467       * end of the image, so it's required that the image size is aligned.
2468       */
2469      if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
2470          !(cumulative_perms & BLK_PERM_RESIZE))
2471      {
2472          if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) {
2473              error_setg(errp, "Cannot get 'write' permission without 'resize': "
2474                               "Image size is not a multiple of request "
2475                               "alignment");
2476              return -EPERM;
2477          }
2478      }
2479  
2480      /* Check this node */
2481      if (!drv) {
2482          return 0;
2483      }
2484  
2485      ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran,
2486                              errp);
2487      if (ret < 0) {
2488          return ret;
2489      }
2490  
2491      /* Drivers that never have children can omit .bdrv_child_perm() */
2492      if (!drv->bdrv_child_perm) {
2493          assert(QLIST_EMPTY(&bs->children));
2494          return 0;
2495      }
2496  
2497      /* Check all children */
2498      QLIST_FOREACH(c, &bs->children, next) {
2499          uint64_t cur_perm, cur_shared;
2500  
2501          bdrv_child_perm(bs, c->bs, c, c->role, q,
2502                          cumulative_perms, cumulative_shared_perms,
2503                          &cur_perm, &cur_shared);
2504          bdrv_child_set_perm(c, cur_perm, cur_shared, tran);
2505      }
2506  
2507      return 0;
2508  }
2509  
2510  /*
2511   * @list is a product of bdrv_topological_dfs() (may be called several times) -
2512   * a topologically sorted subgraph.
2513   */
2514  static int bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q,
2515                                   Transaction *tran, Error **errp)
2516  {
2517      int ret;
2518      BlockDriverState *bs;
2519      GLOBAL_STATE_CODE();
2520  
2521      for ( ; list; list = list->next) {
2522          bs = list->data;
2523  
2524          if (bdrv_parent_perms_conflict(bs, errp)) {
2525              return -EINVAL;
2526          }
2527  
2528          ret = bdrv_node_refresh_perm(bs, q, tran, errp);
2529          if (ret < 0) {
2530              return ret;
2531          }
2532      }
2533  
2534      return 0;
2535  }
2536  
2537  /*
2538   * @list is any list of nodes. List is completed by all subtrees and
2539   * topologically sorted. It's not a problem if some node occurs in the @list
2540   * several times.
2541   */
2542  static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
2543                                     Transaction *tran, Error **errp)
2544  {
2545      g_autoptr(GHashTable) found = g_hash_table_new(NULL, NULL);
2546      g_autoptr(GSList) refresh_list = NULL;
2547  
2548      for ( ; list; list = list->next) {
2549          refresh_list = bdrv_topological_dfs(refresh_list, found, list->data);
2550      }
2551  
2552      return bdrv_do_refresh_perms(refresh_list, q, tran, errp);
2553  }
2554  
2555  void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
2556                                uint64_t *shared_perm)
2557  {
2558      BdrvChild *c;
2559      uint64_t cumulative_perms = 0;
2560      uint64_t cumulative_shared_perms = BLK_PERM_ALL;
2561  
2562      GLOBAL_STATE_CODE();
2563  
2564      QLIST_FOREACH(c, &bs->parents, next_parent) {
2565          cumulative_perms |= c->perm;
2566          cumulative_shared_perms &= c->shared_perm;
2567      }
2568  
2569      *perm = cumulative_perms;
2570      *shared_perm = cumulative_shared_perms;
2571  }
2572  
2573  char *bdrv_perm_names(uint64_t perm)
2574  {
2575      struct perm_name {
2576          uint64_t perm;
2577          const char *name;
2578      } permissions[] = {
2579          { BLK_PERM_CONSISTENT_READ, "consistent read" },
2580          { BLK_PERM_WRITE,           "write" },
2581          { BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
2582          { BLK_PERM_RESIZE,          "resize" },
2583          { 0, NULL }
2584      };
2585  
2586      GString *result = g_string_sized_new(30);
2587      struct perm_name *p;
2588  
2589      for (p = permissions; p->name; p++) {
2590          if (perm & p->perm) {
2591              if (result->len > 0) {
2592                  g_string_append(result, ", ");
2593              }
2594              g_string_append(result, p->name);
2595          }
2596      }
2597  
2598      return g_string_free(result, FALSE);
2599  }
2600  
2601  
2602  /* @tran is allowed to be NULL. In this case no rollback is possible */
2603  static int bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran,
2604                                Error **errp)
2605  {
2606      int ret;
2607      Transaction *local_tran = NULL;
2608      g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
2609      GLOBAL_STATE_CODE();
2610  
2611      if (!tran) {
2612          tran = local_tran = tran_new();
2613      }
2614  
2615      ret = bdrv_do_refresh_perms(list, NULL, tran, errp);
2616  
2617      if (local_tran) {
2618          tran_finalize(local_tran, ret);
2619      }
2620  
2621      return ret;
2622  }
2623  
2624  int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
2625                              Error **errp)
2626  {
2627      Error *local_err = NULL;
2628      Transaction *tran = tran_new();
2629      int ret;
2630  
2631      GLOBAL_STATE_CODE();
2632  
2633      bdrv_child_set_perm(c, perm, shared, tran);
2634  
2635      ret = bdrv_refresh_perms(c->bs, tran, &local_err);
2636  
2637      tran_finalize(tran, ret);
2638  
2639      if (ret < 0) {
2640          if ((perm & ~c->perm) || (c->shared_perm & ~shared)) {
2641              /* tighten permissions */
2642              error_propagate(errp, local_err);
2643          } else {
2644              /*
2645               * Our caller may intend to only loosen restrictions and
2646               * does not expect this function to fail.  Errors are not
2647               * fatal in such a case, so we can just hide them from our
2648               * caller.
2649               */
2650              error_free(local_err);
2651              ret = 0;
2652          }
2653      }
2654  
2655      return ret;
2656  }
2657  
2658  int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
2659  {
2660      uint64_t parent_perms, parent_shared;
2661      uint64_t perms, shared;
2662  
2663      GLOBAL_STATE_CODE();
2664  
2665      bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared);
2666      bdrv_child_perm(bs, c->bs, c, c->role, NULL,
2667                      parent_perms, parent_shared, &perms, &shared);
2668  
2669      return bdrv_child_try_set_perm(c, perms, shared, errp);
2670  }
2671  
2672  /*
2673   * Default implementation for .bdrv_child_perm() for block filters:
2674   * Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the
2675   * filtered child.
2676   */
2677  static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
2678                                        BdrvChildRole role,
2679                                        BlockReopenQueue *reopen_queue,
2680                                        uint64_t perm, uint64_t shared,
2681                                        uint64_t *nperm, uint64_t *nshared)
2682  {
2683      GLOBAL_STATE_CODE();
2684      *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
2685      *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
2686  }
2687  
2688  static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c,
2689                                         BdrvChildRole role,
2690                                         BlockReopenQueue *reopen_queue,
2691                                         uint64_t perm, uint64_t shared,
2692                                         uint64_t *nperm, uint64_t *nshared)
2693  {
2694      assert(role & BDRV_CHILD_COW);
2695      GLOBAL_STATE_CODE();
2696  
2697      /*
2698       * We want consistent read from backing files if the parent needs it.
2699       * No other operations are performed on backing files.
2700       */
2701      perm &= BLK_PERM_CONSISTENT_READ;
2702  
2703      /*
2704       * If the parent can deal with changing data, we're okay with a
2705       * writable and resizable backing file.
2706       * TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too?
2707       */
2708      if (shared & BLK_PERM_WRITE) {
2709          shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
2710      } else {
2711          shared = 0;
2712      }
2713  
2714      shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
2715  
2716      if (bs->open_flags & BDRV_O_INACTIVE) {
2717          shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2718      }
2719  
2720      *nperm = perm;
2721      *nshared = shared;
2722  }
2723  
2724  static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c,
2725                                             BdrvChildRole role,
2726                                             BlockReopenQueue *reopen_queue,
2727                                             uint64_t perm, uint64_t shared,
2728                                             uint64_t *nperm, uint64_t *nshared)
2729  {
2730      int flags;
2731  
2732      GLOBAL_STATE_CODE();
2733      assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA));
2734  
2735      flags = bdrv_reopen_get_flags(reopen_queue, bs);
2736  
2737      /*
2738       * Apart from the modifications below, the same permissions are
2739       * forwarded and left alone as for filters
2740       */
2741      bdrv_filter_default_perms(bs, c, role, reopen_queue,
2742                                perm, shared, &perm, &shared);
2743  
2744      if (role & BDRV_CHILD_METADATA) {
2745          /* Format drivers may touch metadata even if the guest doesn't write */
2746          if (bdrv_is_writable_after_reopen(bs, reopen_queue)) {
2747              perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2748          }
2749  
2750          /*
2751           * bs->file always needs to be consistent because of the
2752           * metadata. We can never allow other users to resize or write
2753           * to it.
2754           */
2755          if (!(flags & BDRV_O_NO_IO)) {
2756              perm |= BLK_PERM_CONSISTENT_READ;
2757          }
2758          shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
2759      }
2760  
2761      if (role & BDRV_CHILD_DATA) {
2762          /*
2763           * Technically, everything in this block is a subset of the
2764           * BDRV_CHILD_METADATA path taken above, and so this could
2765           * be an "else if" branch.  However, that is not obvious, and
2766           * this function is not performance critical, therefore we let
2767           * this be an independent "if".
2768           */
2769  
2770          /*
2771           * We cannot allow other users to resize the file because the
2772           * format driver might have some assumptions about the size
2773           * (e.g. because it is stored in metadata, or because the file
2774           * is split into fixed-size data files).
2775           */
2776          shared &= ~BLK_PERM_RESIZE;
2777  
2778          /*
2779           * WRITE_UNCHANGED often cannot be performed as such on the
2780           * data file.  For example, the qcow2 driver may still need to
2781           * write copied clusters on copy-on-read.
2782           */
2783          if (perm & BLK_PERM_WRITE_UNCHANGED) {
2784              perm |= BLK_PERM_WRITE;
2785          }
2786  
2787          /*
2788           * If the data file is written to, the format driver may
2789           * expect to be able to resize it by writing beyond the EOF.
2790           */
2791          if (perm & BLK_PERM_WRITE) {
2792              perm |= BLK_PERM_RESIZE;
2793          }
2794      }
2795  
2796      if (bs->open_flags & BDRV_O_INACTIVE) {
2797          shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2798      }
2799  
2800      *nperm = perm;
2801      *nshared = shared;
2802  }
2803  
2804  void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
2805                          BdrvChildRole role, BlockReopenQueue *reopen_queue,
2806                          uint64_t perm, uint64_t shared,
2807                          uint64_t *nperm, uint64_t *nshared)
2808  {
2809      GLOBAL_STATE_CODE();
2810      if (role & BDRV_CHILD_FILTERED) {
2811          assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
2812                           BDRV_CHILD_COW)));
2813          bdrv_filter_default_perms(bs, c, role, reopen_queue,
2814                                    perm, shared, nperm, nshared);
2815      } else if (role & BDRV_CHILD_COW) {
2816          assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA)));
2817          bdrv_default_perms_for_cow(bs, c, role, reopen_queue,
2818                                     perm, shared, nperm, nshared);
2819      } else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) {
2820          bdrv_default_perms_for_storage(bs, c, role, reopen_queue,
2821                                         perm, shared, nperm, nshared);
2822      } else {
2823          g_assert_not_reached();
2824      }
2825  }
2826  
2827  uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
2828  {
2829      static const uint64_t permissions[] = {
2830          [BLOCK_PERMISSION_CONSISTENT_READ]  = BLK_PERM_CONSISTENT_READ,
2831          [BLOCK_PERMISSION_WRITE]            = BLK_PERM_WRITE,
2832          [BLOCK_PERMISSION_WRITE_UNCHANGED]  = BLK_PERM_WRITE_UNCHANGED,
2833          [BLOCK_PERMISSION_RESIZE]           = BLK_PERM_RESIZE,
2834      };
2835  
2836      QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX);
2837      QEMU_BUILD_BUG_ON(1UL << ARRAY_SIZE(permissions) != BLK_PERM_ALL + 1);
2838  
2839      assert(qapi_perm < BLOCK_PERMISSION__MAX);
2840  
2841      return permissions[qapi_perm];
2842  }
2843  
2844  /*
2845   * Replaces the node that a BdrvChild points to without updating permissions.
2846   *
2847   * If @new_bs is non-NULL, the parent of @child must already be drained through
2848   * @child.
2849   */
2850  static void bdrv_replace_child_noperm(BdrvChild *child,
2851                                        BlockDriverState *new_bs)
2852  {
2853      BlockDriverState *old_bs = child->bs;
2854      int new_bs_quiesce_counter;
2855  
2856      assert(!child->frozen);
2857  
2858      /*
2859       * If we want to change the BdrvChild to point to a drained node as its new
2860       * child->bs, we need to make sure that its new parent is drained, too. In
2861       * other words, either child->quiesce_parent must already be true or we must
2862       * be able to set it and keep the parent's quiesce_counter consistent with
2863       * that, but without polling or starting new requests (this function
2864       * guarantees that it doesn't poll, and starting new requests would be
2865       * against the invariants of drain sections).
2866       *
2867       * To keep things simple, we pick the first option (child->quiesce_parent
2868       * must already be true). We also generalise the rule a bit to make it
2869       * easier to verify in callers and more likely to be covered in test cases:
2870       * The parent must be quiesced through this child even if new_bs isn't
2871       * currently drained.
2872       *
2873       * The only exception is for callers that always pass new_bs == NULL. In
2874       * this case, we obviously never need to consider the case of a drained
2875       * new_bs, so we can keep the callers simpler by allowing them not to drain
2876       * the parent.
2877       */
2878      assert(!new_bs || child->quiesced_parent);
2879      assert(old_bs != new_bs);
2880      GLOBAL_STATE_CODE();
2881  
2882      if (old_bs && new_bs) {
2883          assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
2884      }
2885  
2886      /* TODO Pull this up into the callers to avoid polling here */
2887      bdrv_graph_wrlock();
2888      if (old_bs) {
2889          if (child->klass->detach) {
2890              child->klass->detach(child);
2891          }
2892          QLIST_REMOVE(child, next_parent);
2893      }
2894  
2895      child->bs = new_bs;
2896  
2897      if (new_bs) {
2898          QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
2899          if (child->klass->attach) {
2900              child->klass->attach(child);
2901          }
2902      }
2903      bdrv_graph_wrunlock();
2904  
2905      /*
2906       * If the parent was drained through this BdrvChild previously, but new_bs
2907       * is not drained, allow requests to come in only after the new node has
2908       * been attached.
2909       */
2910      new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
2911      if (!new_bs_quiesce_counter && child->quiesced_parent) {
2912          bdrv_parent_drained_end_single(child);
2913      }
2914  }
2915  
2916  /**
2917   * Free the given @child.
2918   *
2919   * The child must be empty (i.e. `child->bs == NULL`) and it must be
2920   * unused (i.e. not in a children list).
2921   */
2922  static void bdrv_child_free(BdrvChild *child)
2923  {
2924      assert(!child->bs);
2925      GLOBAL_STATE_CODE();
2926      assert(!child->next.le_prev); /* not in children list */
2927  
2928      g_free(child->name);
2929      g_free(child);
2930  }
2931  
2932  typedef struct BdrvAttachChildCommonState {
2933      BdrvChild *child;
2934      AioContext *old_parent_ctx;
2935      AioContext *old_child_ctx;
2936  } BdrvAttachChildCommonState;
2937  
2938  static void bdrv_attach_child_common_abort(void *opaque)
2939  {
2940      BdrvAttachChildCommonState *s = opaque;
2941      BlockDriverState *bs = s->child->bs;
2942  
2943      GLOBAL_STATE_CODE();
2944      bdrv_replace_child_noperm(s->child, NULL);
2945  
2946      if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
2947          bdrv_try_change_aio_context(bs, s->old_child_ctx, NULL, &error_abort);
2948      }
2949  
2950      if (bdrv_child_get_parent_aio_context(s->child) != s->old_parent_ctx) {
2951          Transaction *tran;
2952          GHashTable *visited;
2953          bool ret;
2954  
2955          tran = tran_new();
2956  
2957          /* No need to visit `child`, because it has been detached already */
2958          visited = g_hash_table_new(NULL, NULL);
2959          ret = s->child->klass->change_aio_ctx(s->child, s->old_parent_ctx,
2960                                                visited, tran, &error_abort);
2961          g_hash_table_destroy(visited);
2962  
2963          /* transaction is supposed to always succeed */
2964          assert(ret == true);
2965          tran_commit(tran);
2966      }
2967  
2968      bdrv_unref(bs);
2969      bdrv_child_free(s->child);
2970  }
2971  
2972  static TransactionActionDrv bdrv_attach_child_common_drv = {
2973      .abort = bdrv_attach_child_common_abort,
2974      .clean = g_free,
2975  };
2976  
2977  /*
2978   * Common part of attaching bdrv child to bs or to blk or to job
2979   *
2980   * Function doesn't update permissions, caller is responsible for this.
2981   *
2982   * Returns new created child.
2983   */
2984  static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
2985                                             const char *child_name,
2986                                             const BdrvChildClass *child_class,
2987                                             BdrvChildRole child_role,
2988                                             uint64_t perm, uint64_t shared_perm,
2989                                             void *opaque,
2990                                             Transaction *tran, Error **errp)
2991  {
2992      BdrvChild *new_child;
2993      AioContext *parent_ctx;
2994      AioContext *child_ctx = bdrv_get_aio_context(child_bs);
2995  
2996      assert(child_class->get_parent_desc);
2997      GLOBAL_STATE_CODE();
2998  
2999      new_child = g_new(BdrvChild, 1);
3000      *new_child = (BdrvChild) {
3001          .bs             = NULL,
3002          .name           = g_strdup(child_name),
3003          .klass          = child_class,
3004          .role           = child_role,
3005          .perm           = perm,
3006          .shared_perm    = shared_perm,
3007          .opaque         = opaque,
3008      };
3009  
3010      /*
3011       * If the AioContexts don't match, first try to move the subtree of
3012       * child_bs into the AioContext of the new parent. If this doesn't work,
3013       * try moving the parent into the AioContext of child_bs instead.
3014       */
3015      parent_ctx = bdrv_child_get_parent_aio_context(new_child);
3016      if (child_ctx != parent_ctx) {
3017          Error *local_err = NULL;
3018          int ret = bdrv_try_change_aio_context(child_bs, parent_ctx, NULL,
3019                                                &local_err);
3020  
3021          if (ret < 0 && child_class->change_aio_ctx) {
3022              Transaction *tran = tran_new();
3023              GHashTable *visited = g_hash_table_new(NULL, NULL);
3024              bool ret_child;
3025  
3026              g_hash_table_add(visited, new_child);
3027              ret_child = child_class->change_aio_ctx(new_child, child_ctx,
3028                                                      visited, tran, NULL);
3029              if (ret_child == true) {
3030                  error_free(local_err);
3031                  ret = 0;
3032              }
3033              tran_finalize(tran, ret_child == true ? 0 : -1);
3034              g_hash_table_destroy(visited);
3035          }
3036  
3037          if (ret < 0) {
3038              error_propagate(errp, local_err);
3039              bdrv_child_free(new_child);
3040              return NULL;
3041          }
3042      }
3043  
3044      bdrv_ref(child_bs);
3045      /*
3046       * Let every new BdrvChild start with a drained parent. Inserting the child
3047       * in the graph with bdrv_replace_child_noperm() will undrain it if
3048       * @child_bs is not drained.
3049       *
3050       * The child was only just created and is not yet visible in global state
3051       * until bdrv_replace_child_noperm() inserts it into the graph, so nobody
3052       * could have sent requests and polling is not necessary.
3053       *
3054       * Note that this means that the parent isn't fully drained yet, we only
3055       * stop new requests from coming in. This is fine, we don't care about the
3056       * old requests here, they are not for this child. If another place enters a
3057       * drain section for the same parent, but wants it to be fully quiesced, it
3058       * will not run most of the the code in .drained_begin() again (which is not
3059       * a problem, we already did this), but it will still poll until the parent
3060       * is fully quiesced, so it will not be negatively affected either.
3061       */
3062      bdrv_parent_drained_begin_single(new_child);
3063      bdrv_replace_child_noperm(new_child, child_bs);
3064  
3065      BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
3066      *s = (BdrvAttachChildCommonState) {
3067          .child = new_child,
3068          .old_parent_ctx = parent_ctx,
3069          .old_child_ctx = child_ctx,
3070      };
3071      tran_add(tran, &bdrv_attach_child_common_drv, s);
3072  
3073      return new_child;
3074  }
3075  
3076  /*
3077   * Function doesn't update permissions, caller is responsible for this.
3078   */
3079  static BdrvChild *bdrv_attach_child_noperm(BlockDriverState *parent_bs,
3080                                             BlockDriverState *child_bs,
3081                                             const char *child_name,
3082                                             const BdrvChildClass *child_class,
3083                                             BdrvChildRole child_role,
3084                                             Transaction *tran,
3085                                             Error **errp)
3086  {
3087      uint64_t perm, shared_perm;
3088  
3089      assert(parent_bs->drv);
3090      GLOBAL_STATE_CODE();
3091  
3092      if (bdrv_recurse_has_child(child_bs, parent_bs)) {
3093          error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle",
3094                     child_bs->node_name, child_name, parent_bs->node_name);
3095          return NULL;
3096      }
3097  
3098      bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
3099      bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
3100                      perm, shared_perm, &perm, &shared_perm);
3101  
3102      return bdrv_attach_child_common(child_bs, child_name, child_class,
3103                                      child_role, perm, shared_perm, parent_bs,
3104                                      tran, errp);
3105  }
3106  
3107  /*
3108   * This function steals the reference to child_bs from the caller.
3109   * That reference is later dropped by bdrv_root_unref_child().
3110   *
3111   * On failure NULL is returned, errp is set and the reference to
3112   * child_bs is also dropped.
3113   *
3114   * The caller must hold the AioContext lock @child_bs, but not that of @ctx
3115   * (unless @child_bs is already in @ctx).
3116   */
3117  BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
3118                                    const char *child_name,
3119                                    const BdrvChildClass *child_class,
3120                                    BdrvChildRole child_role,
3121                                    uint64_t perm, uint64_t shared_perm,
3122                                    void *opaque, Error **errp)
3123  {
3124      int ret;
3125      BdrvChild *child;
3126      Transaction *tran = tran_new();
3127  
3128      GLOBAL_STATE_CODE();
3129  
3130      child = bdrv_attach_child_common(child_bs, child_name, child_class,
3131                                     child_role, perm, shared_perm, opaque,
3132                                     tran, errp);
3133      if (!child) {
3134          ret = -EINVAL;
3135          goto out;
3136      }
3137  
3138      ret = bdrv_refresh_perms(child_bs, tran, errp);
3139  
3140  out:
3141      tran_finalize(tran, ret);
3142  
3143      bdrv_unref(child_bs);
3144  
3145      return ret < 0 ? NULL : child;
3146  }
3147  
3148  /*
3149   * This function transfers the reference to child_bs from the caller
3150   * to parent_bs. That reference is later dropped by parent_bs on
3151   * bdrv_close() or if someone calls bdrv_unref_child().
3152   *
3153   * On failure NULL is returned, errp is set and the reference to
3154   * child_bs is also dropped.
3155   *
3156   * If @parent_bs and @child_bs are in different AioContexts, the caller must
3157   * hold the AioContext lock for @child_bs, but not for @parent_bs.
3158   */
3159  BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
3160                               BlockDriverState *child_bs,
3161                               const char *child_name,
3162                               const BdrvChildClass *child_class,
3163                               BdrvChildRole child_role,
3164                               Error **errp)
3165  {
3166      int ret;
3167      BdrvChild *child;
3168      Transaction *tran = tran_new();
3169  
3170      GLOBAL_STATE_CODE();
3171  
3172      child = bdrv_attach_child_noperm(parent_bs, child_bs, child_name,
3173                                       child_class, child_role, tran, errp);
3174      if (!child) {
3175          ret = -EINVAL;
3176          goto out;
3177      }
3178  
3179      ret = bdrv_refresh_perms(parent_bs, tran, errp);
3180      if (ret < 0) {
3181          goto out;
3182      }
3183  
3184  out:
3185      tran_finalize(tran, ret);
3186  
3187      bdrv_unref(child_bs);
3188  
3189      return ret < 0 ? NULL : child;
3190  }
3191  
3192  /* Callers must ensure that child->frozen is false. */
3193  void bdrv_root_unref_child(BdrvChild *child)
3194  {
3195      BlockDriverState *child_bs = child->bs;
3196  
3197      GLOBAL_STATE_CODE();
3198      bdrv_replace_child_noperm(child, NULL);
3199      bdrv_child_free(child);
3200  
3201      if (child_bs) {
3202          /*
3203           * Update permissions for old node. We're just taking a parent away, so
3204           * we're loosening restrictions. Errors of permission update are not
3205           * fatal in this case, ignore them.
3206           */
3207          bdrv_refresh_perms(child_bs, NULL, NULL);
3208  
3209          /*
3210           * When the parent requiring a non-default AioContext is removed, the
3211           * node moves back to the main AioContext
3212           */
3213          bdrv_try_change_aio_context(child_bs, qemu_get_aio_context(), NULL,
3214                                      NULL);
3215      }
3216  
3217      bdrv_unref(child_bs);
3218  }
3219  
3220  typedef struct BdrvSetInheritsFrom {
3221      BlockDriverState *bs;
3222      BlockDriverState *old_inherits_from;
3223  } BdrvSetInheritsFrom;
3224  
3225  static void bdrv_set_inherits_from_abort(void *opaque)
3226  {
3227      BdrvSetInheritsFrom *s = opaque;
3228  
3229      s->bs->inherits_from = s->old_inherits_from;
3230  }
3231  
3232  static TransactionActionDrv bdrv_set_inherits_from_drv = {
3233      .abort = bdrv_set_inherits_from_abort,
3234      .clean = g_free,
3235  };
3236  
3237  /* @tran is allowed to be NULL. In this case no rollback is possible */
3238  static void bdrv_set_inherits_from(BlockDriverState *bs,
3239                                     BlockDriverState *new_inherits_from,
3240                                     Transaction *tran)
3241  {
3242      if (tran) {
3243          BdrvSetInheritsFrom *s = g_new(BdrvSetInheritsFrom, 1);
3244  
3245          *s = (BdrvSetInheritsFrom) {
3246              .bs = bs,
3247              .old_inherits_from = bs->inherits_from,
3248          };
3249  
3250          tran_add(tran, &bdrv_set_inherits_from_drv, s);
3251      }
3252  
3253      bs->inherits_from = new_inherits_from;
3254  }
3255  
3256  /**
3257   * Clear all inherits_from pointers from children and grandchildren of
3258   * @root that point to @root, where necessary.
3259   * @tran is allowed to be NULL. In this case no rollback is possible
3260   */
3261  static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child,
3262                                       Transaction *tran)
3263  {
3264      BdrvChild *c;
3265  
3266      if (child->bs->inherits_from == root) {
3267          /*
3268           * Remove inherits_from only when the last reference between root and
3269           * child->bs goes away.
3270           */
3271          QLIST_FOREACH(c, &root->children, next) {
3272              if (c != child && c->bs == child->bs) {
3273                  break;
3274              }
3275          }
3276          if (c == NULL) {
3277              bdrv_set_inherits_from(child->bs, NULL, tran);
3278          }
3279      }
3280  
3281      QLIST_FOREACH(c, &child->bs->children, next) {
3282          bdrv_unset_inherits_from(root, c, tran);
3283      }
3284  }
3285  
3286  /* Callers must ensure that child->frozen is false. */
3287  void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
3288  {
3289      GLOBAL_STATE_CODE();
3290      if (child == NULL) {
3291          return;
3292      }
3293  
3294      bdrv_unset_inherits_from(parent, child, NULL);
3295      bdrv_root_unref_child(child);
3296  }
3297  
3298  
3299  static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
3300  {
3301      BdrvChild *c;
3302      GLOBAL_STATE_CODE();
3303      QLIST_FOREACH(c, &bs->parents, next_parent) {
3304          if (c->klass->change_media) {
3305              c->klass->change_media(c, load);
3306          }
3307      }
3308  }
3309  
3310  /* Return true if you can reach parent going through child->inherits_from
3311   * recursively. If parent or child are NULL, return false */
3312  static bool bdrv_inherits_from_recursive(BlockDriverState *child,
3313                                           BlockDriverState *parent)
3314  {
3315      while (child && child != parent) {
3316          child = child->inherits_from;
3317      }
3318  
3319      return child != NULL;
3320  }
3321  
3322  /*
3323   * Return the BdrvChildRole for @bs's backing child.  bs->backing is
3324   * mostly used for COW backing children (role = COW), but also for
3325   * filtered children (role = FILTERED | PRIMARY).
3326   */
3327  static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
3328  {
3329      if (bs->drv && bs->drv->is_filter) {
3330          return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
3331      } else {
3332          return BDRV_CHILD_COW;
3333      }
3334  }
3335  
3336  /*
3337   * Sets the bs->backing or bs->file link of a BDS. A new reference is created;
3338   * callers which don't need their own reference any more must call bdrv_unref().
3339   *
3340   * Function doesn't update permissions, caller is responsible for this.
3341   */
3342  static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
3343                                             BlockDriverState *child_bs,
3344                                             bool is_backing,
3345                                             Transaction *tran, Error **errp)
3346  {
3347      bool update_inherits_from =
3348          bdrv_inherits_from_recursive(child_bs, parent_bs);
3349      BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file;
3350      BdrvChildRole role;
3351  
3352      GLOBAL_STATE_CODE();
3353  
3354      if (!parent_bs->drv) {
3355          /*
3356           * Node without drv is an object without a class :/. TODO: finally fix
3357           * qcow2 driver to never clear bs->drv and implement format corruption
3358           * handling in other way.
3359           */
3360          error_setg(errp, "Node corrupted");
3361          return -EINVAL;
3362      }
3363  
3364      if (child && child->frozen) {
3365          error_setg(errp, "Cannot change frozen '%s' link from '%s' to '%s'",
3366                     child->name, parent_bs->node_name, child->bs->node_name);
3367          return -EPERM;
3368      }
3369  
3370      if (is_backing && !parent_bs->drv->is_filter &&
3371          !parent_bs->drv->supports_backing)
3372      {
3373          error_setg(errp, "Driver '%s' of node '%s' does not support backing "
3374                     "files", parent_bs->drv->format_name, parent_bs->node_name);
3375          return -EINVAL;
3376      }
3377  
3378      if (parent_bs->drv->is_filter) {
3379          role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
3380      } else if (is_backing) {
3381          role = BDRV_CHILD_COW;
3382      } else {
3383          /*
3384           * We only can use same role as it is in existing child. We don't have
3385           * infrastructure to determine role of file child in generic way
3386           */
3387          if (!child) {
3388              error_setg(errp, "Cannot set file child to format node without "
3389                         "file child");
3390              return -EINVAL;
3391          }
3392          role = child->role;
3393      }
3394  
3395      if (child) {
3396          bdrv_unset_inherits_from(parent_bs, child, tran);
3397          bdrv_remove_child(child, tran);
3398      }
3399  
3400      if (!child_bs) {
3401          goto out;
3402      }
3403  
3404      child = bdrv_attach_child_noperm(parent_bs, child_bs,
3405                                       is_backing ? "backing" : "file",
3406                                       &child_of_bds, role,
3407                                       tran, errp);
3408      if (!child) {
3409          return -EINVAL;
3410      }
3411  
3412  
3413      /*
3414       * If inherits_from pointed recursively to bs then let's update it to
3415       * point directly to bs (else it will become NULL).
3416       */
3417      if (update_inherits_from) {
3418          bdrv_set_inherits_from(child_bs, parent_bs, tran);
3419      }
3420  
3421  out:
3422      bdrv_refresh_limits(parent_bs, tran, NULL);
3423  
3424      return 0;
3425  }
3426  
3427  static int bdrv_set_backing_noperm(BlockDriverState *bs,
3428                                     BlockDriverState *backing_hd,
3429                                     Transaction *tran, Error **errp)
3430  {
3431      GLOBAL_STATE_CODE();
3432      return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
3433  }
3434  
3435  int bdrv_set_backing_hd_drained(BlockDriverState *bs,
3436                                  BlockDriverState *backing_hd,
3437                                  Error **errp)
3438  {
3439      int ret;
3440      Transaction *tran = tran_new();
3441  
3442      GLOBAL_STATE_CODE();
3443      assert(bs->quiesce_counter > 0);
3444  
3445      ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
3446      if (ret < 0) {
3447          goto out;
3448      }
3449  
3450      ret = bdrv_refresh_perms(bs, tran, errp);
3451  out:
3452      tran_finalize(tran, ret);
3453      return ret;
3454  }
3455  
3456  int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
3457                          Error **errp)
3458  {
3459      int ret;
3460      GLOBAL_STATE_CODE();
3461  
3462      bdrv_drained_begin(bs);
3463      ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp);
3464      bdrv_drained_end(bs);
3465  
3466      return ret;
3467  }
3468  
3469  /*
3470   * Opens the backing file for a BlockDriverState if not yet open
3471   *
3472   * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
3473   * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
3474   * itself, all options starting with "${bdref_key}." are considered part of the
3475   * BlockdevRef.
3476   *
3477   * TODO Can this be unified with bdrv_open_image()?
3478   */
3479  int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
3480                             const char *bdref_key, Error **errp)
3481  {
3482      char *backing_filename = NULL;
3483      char *bdref_key_dot;
3484      const char *reference = NULL;
3485      int ret = 0;
3486      bool implicit_backing = false;
3487      BlockDriverState *backing_hd;
3488      QDict *options;
3489      QDict *tmp_parent_options = NULL;
3490      Error *local_err = NULL;
3491  
3492      GLOBAL_STATE_CODE();
3493  
3494      if (bs->backing != NULL) {
3495          goto free_exit;
3496      }
3497  
3498      /* NULL means an empty set of options */
3499      if (parent_options == NULL) {
3500          tmp_parent_options = qdict_new();
3501          parent_options = tmp_parent_options;
3502      }
3503  
3504      bs->open_flags &= ~BDRV_O_NO_BACKING;
3505  
3506      bdref_key_dot = g_strdup_printf("%s.", bdref_key);
3507      qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
3508      g_free(bdref_key_dot);
3509  
3510      /*
3511       * Caution: while qdict_get_try_str() is fine, getting non-string
3512       * types would require more care.  When @parent_options come from
3513       * -blockdev or blockdev_add, its members are typed according to
3514       * the QAPI schema, but when they come from -drive, they're all
3515       * QString.
3516       */
3517      reference = qdict_get_try_str(parent_options, bdref_key);
3518      if (reference || qdict_haskey(options, "file.filename")) {
3519          /* keep backing_filename NULL */
3520      } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
3521          qobject_unref(options);
3522          goto free_exit;
3523      } else {
3524          if (qdict_size(options) == 0) {
3525              /* If the user specifies options that do not modify the
3526               * backing file's behavior, we might still consider it the
3527               * implicit backing file.  But it's easier this way, and
3528               * just specifying some of the backing BDS's options is
3529               * only possible with -drive anyway (otherwise the QAPI
3530               * schema forces the user to specify everything). */
3531              implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file);
3532          }
3533  
3534          backing_filename = bdrv_get_full_backing_filename(bs, &local_err);
3535          if (local_err) {
3536              ret = -EINVAL;
3537              error_propagate(errp, local_err);
3538              qobject_unref(options);
3539              goto free_exit;
3540          }
3541      }
3542  
3543      if (!bs->drv || !bs->drv->supports_backing) {
3544          ret = -EINVAL;
3545          error_setg(errp, "Driver doesn't support backing files");
3546          qobject_unref(options);
3547          goto free_exit;
3548      }
3549  
3550      if (!reference &&
3551          bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
3552          qdict_put_str(options, "driver", bs->backing_format);
3553      }
3554  
3555      backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs,
3556                                     &child_of_bds, bdrv_backing_role(bs), errp);
3557      if (!backing_hd) {
3558          bs->open_flags |= BDRV_O_NO_BACKING;
3559          error_prepend(errp, "Could not open backing file: ");
3560          ret = -EINVAL;
3561          goto free_exit;
3562      }
3563  
3564      if (implicit_backing) {
3565          bdrv_refresh_filename(backing_hd);
3566          pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
3567                  backing_hd->filename);
3568      }
3569  
3570      /* Hook up the backing file link; drop our reference, bs owns the
3571       * backing_hd reference now */
3572      ret = bdrv_set_backing_hd(bs, backing_hd, errp);
3573      bdrv_unref(backing_hd);
3574      if (ret < 0) {
3575          goto free_exit;
3576      }
3577  
3578      qdict_del(parent_options, bdref_key);
3579  
3580  free_exit:
3581      g_free(backing_filename);
3582      qobject_unref(tmp_parent_options);
3583      return ret;
3584  }
3585  
3586  static BlockDriverState *
3587  bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
3588                     BlockDriverState *parent, const BdrvChildClass *child_class,
3589                     BdrvChildRole child_role, bool allow_none, Error **errp)
3590  {
3591      BlockDriverState *bs = NULL;
3592      QDict *image_options;
3593      char *bdref_key_dot;
3594      const char *reference;
3595  
3596      assert(child_class != NULL);
3597  
3598      bdref_key_dot = g_strdup_printf("%s.", bdref_key);
3599      qdict_extract_subqdict(options, &image_options, bdref_key_dot);
3600      g_free(bdref_key_dot);
3601  
3602      /*
3603       * Caution: while qdict_get_try_str() is fine, getting non-string
3604       * types would require more care.  When @options come from
3605       * -blockdev or blockdev_add, its members are typed according to
3606       * the QAPI schema, but when they come from -drive, they're all
3607       * QString.
3608       */
3609      reference = qdict_get_try_str(options, bdref_key);
3610      if (!filename && !reference && !qdict_size(image_options)) {
3611          if (!allow_none) {
3612              error_setg(errp, "A block device must be specified for \"%s\"",
3613                         bdref_key);
3614          }
3615          qobject_unref(image_options);
3616          goto done;
3617      }
3618  
3619      bs = bdrv_open_inherit(filename, reference, image_options, 0,
3620                             parent, child_class, child_role, errp);
3621      if (!bs) {
3622          goto done;
3623      }
3624  
3625  done:
3626      qdict_del(options, bdref_key);
3627      return bs;
3628  }
3629  
3630  /*
3631   * Opens a disk image whose options are given as BlockdevRef in another block
3632   * device's options.
3633   *
3634   * If allow_none is true, no image will be opened if filename is false and no
3635   * BlockdevRef is given. NULL will be returned, but errp remains unset.
3636   *
3637   * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
3638   * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
3639   * itself, all options starting with "${bdref_key}." are considered part of the
3640   * BlockdevRef.
3641   *
3642   * The BlockdevRef will be removed from the options QDict.
3643   */
3644  BdrvChild *bdrv_open_child(const char *filename,
3645                             QDict *options, const char *bdref_key,
3646                             BlockDriverState *parent,
3647                             const BdrvChildClass *child_class,
3648                             BdrvChildRole child_role,
3649                             bool allow_none, Error **errp)
3650  {
3651      BlockDriverState *bs;
3652  
3653      GLOBAL_STATE_CODE();
3654  
3655      bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class,
3656                              child_role, allow_none, errp);
3657      if (bs == NULL) {
3658          return NULL;
3659      }
3660  
3661      return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
3662                               errp);
3663  }
3664  
3665  /*
3666   * Wrapper on bdrv_open_child() for most popular case: open primary child of bs.
3667   */
3668  int bdrv_open_file_child(const char *filename,
3669                           QDict *options, const char *bdref_key,
3670                           BlockDriverState *parent, Error **errp)
3671  {
3672      BdrvChildRole role;
3673  
3674      /* commit_top and mirror_top don't use this function */
3675      assert(!parent->drv->filtered_child_is_backing);
3676      role = parent->drv->is_filter ?
3677          (BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE;
3678  
3679      if (!bdrv_open_child(filename, options, bdref_key, parent,
3680                           &child_of_bds, role, false, errp))
3681      {
3682          return -EINVAL;
3683      }
3684  
3685      return 0;
3686  }
3687  
3688  /*
3689   * TODO Future callers may need to specify parent/child_class in order for
3690   * option inheritance to work. Existing callers use it for the root node.
3691   */
3692  BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp)
3693  {
3694      BlockDriverState *bs = NULL;
3695      QObject *obj = NULL;
3696      QDict *qdict = NULL;
3697      const char *reference = NULL;
3698      Visitor *v = NULL;
3699  
3700      GLOBAL_STATE_CODE();
3701  
3702      if (ref->type == QTYPE_QSTRING) {
3703          reference = ref->u.reference;
3704      } else {
3705          BlockdevOptions *options = &ref->u.definition;
3706          assert(ref->type == QTYPE_QDICT);
3707  
3708          v = qobject_output_visitor_new(&obj);
3709          visit_type_BlockdevOptions(v, NULL, &options, &error_abort);
3710          visit_complete(v, &obj);
3711  
3712          qdict = qobject_to(QDict, obj);
3713          qdict_flatten(qdict);
3714  
3715          /* bdrv_open_inherit() defaults to the values in bdrv_flags (for
3716           * compatibility with other callers) rather than what we want as the
3717           * real defaults. Apply the defaults here instead. */
3718          qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off");
3719          qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off");
3720          qdict_set_default_str(qdict, BDRV_OPT_READ_ONLY, "off");
3721          qdict_set_default_str(qdict, BDRV_OPT_AUTO_READ_ONLY, "off");
3722  
3723      }
3724  
3725      bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp);
3726      obj = NULL;
3727      qobject_unref(obj);
3728      visit_free(v);
3729      return bs;
3730  }
3731  
3732  static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
3733                                                     int flags,
3734                                                     QDict *snapshot_options,
3735                                                     Error **errp)
3736  {
3737      g_autofree char *tmp_filename = NULL;
3738      int64_t total_size;
3739      QemuOpts *opts = NULL;
3740      BlockDriverState *bs_snapshot = NULL;
3741      int ret;
3742  
3743      GLOBAL_STATE_CODE();
3744  
3745      /* if snapshot, we create a temporary backing file and open it
3746         instead of opening 'filename' directly */
3747  
3748      /* Get the required size from the image */
3749      total_size = bdrv_getlength(bs);
3750      if (total_size < 0) {
3751          error_setg_errno(errp, -total_size, "Could not get image size");
3752          goto out;
3753      }
3754  
3755      /* Create the temporary image */
3756      tmp_filename = create_tmp_file(errp);
3757      if (!tmp_filename) {
3758          goto out;
3759      }
3760  
3761      opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
3762                              &error_abort);
3763      qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
3764      ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
3765      qemu_opts_del(opts);
3766      if (ret < 0) {
3767          error_prepend(errp, "Could not create temporary overlay '%s': ",
3768                        tmp_filename);
3769          goto out;
3770      }
3771  
3772      /* Prepare options QDict for the temporary file */
3773      qdict_put_str(snapshot_options, "file.driver", "file");
3774      qdict_put_str(snapshot_options, "file.filename", tmp_filename);
3775      qdict_put_str(snapshot_options, "driver", "qcow2");
3776  
3777      bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp);
3778      snapshot_options = NULL;
3779      if (!bs_snapshot) {
3780          goto out;
3781      }
3782  
3783      ret = bdrv_append(bs_snapshot, bs, errp);
3784      if (ret < 0) {
3785          bs_snapshot = NULL;
3786          goto out;
3787      }
3788  
3789  out:
3790      qobject_unref(snapshot_options);
3791      return bs_snapshot;
3792  }
3793  
3794  /*
3795   * Opens a disk image (raw, qcow2, vmdk, ...)
3796   *
3797   * options is a QDict of options to pass to the block drivers, or NULL for an
3798   * empty set of options. The reference to the QDict belongs to the block layer
3799   * after the call (even on failure), so if the caller intends to reuse the
3800   * dictionary, it needs to use qobject_ref() before calling bdrv_open.
3801   *
3802   * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
3803   * If it is not NULL, the referenced BDS will be reused.
3804   *
3805   * The reference parameter may be used to specify an existing block device which
3806   * should be opened. If specified, neither options nor a filename may be given,
3807   * nor can an existing BDS be reused (that is, *pbs has to be NULL).
3808   *
3809   * The caller must always hold @filename AioContext lock, because this
3810   * function eventually calls bdrv_refresh_total_sectors() which polls
3811   * when called from non-coroutine context.
3812   */
3813  static BlockDriverState * no_coroutine_fn
3814  bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
3815                    int flags, BlockDriverState *parent,
3816                    const BdrvChildClass *child_class, BdrvChildRole child_role,
3817                    Error **errp)
3818  {
3819      int ret;
3820      BlockBackend *file = NULL;
3821      BlockDriverState *bs;
3822      BlockDriver *drv = NULL;
3823      BdrvChild *child;
3824      const char *drvname;
3825      const char *backing;
3826      Error *local_err = NULL;
3827      QDict *snapshot_options = NULL;
3828      int snapshot_flags = 0;
3829  
3830      assert(!child_class || !flags);
3831      assert(!child_class == !parent);
3832      GLOBAL_STATE_CODE();
3833      assert(!qemu_in_coroutine());
3834  
3835      if (reference) {
3836          bool options_non_empty = options ? qdict_size(options) : false;
3837          qobject_unref(options);
3838  
3839          if (filename || options_non_empty) {
3840              error_setg(errp, "Cannot reference an existing block device with "
3841                         "additional options or a new filename");
3842              return NULL;
3843          }
3844  
3845          bs = bdrv_lookup_bs(reference, reference, errp);
3846          if (!bs) {
3847              return NULL;
3848          }
3849  
3850          bdrv_ref(bs);
3851          return bs;
3852      }
3853  
3854      bs = bdrv_new();
3855  
3856      /* NULL means an empty set of options */
3857      if (options == NULL) {
3858          options = qdict_new();
3859      }
3860  
3861      /* json: syntax counts as explicit options, as if in the QDict */
3862      parse_json_protocol(options, &filename, &local_err);
3863      if (local_err) {
3864          goto fail;
3865      }
3866  
3867      bs->explicit_options = qdict_clone_shallow(options);
3868  
3869      if (child_class) {
3870          bool parent_is_format;
3871  
3872          if (parent->drv) {
3873              parent_is_format = parent->drv->is_format;
3874          } else {
3875              /*
3876               * parent->drv is not set yet because this node is opened for
3877               * (potential) format probing.  That means that @parent is going
3878               * to be a format node.
3879               */
3880              parent_is_format = true;
3881          }
3882  
3883          bs->inherits_from = parent;
3884          child_class->inherit_options(child_role, parent_is_format,
3885                                       &flags, options,
3886                                       parent->open_flags, parent->options);
3887      }
3888  
3889      ret = bdrv_fill_options(&options, filename, &flags, &local_err);
3890      if (ret < 0) {
3891          goto fail;
3892      }
3893  
3894      /*
3895       * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags.
3896       * Caution: getting a boolean member of @options requires care.
3897       * When @options come from -blockdev or blockdev_add, members are
3898       * typed according to the QAPI schema, but when they come from
3899       * -drive, they're all QString.
3900       */
3901      if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") &&
3902          !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) {
3903          flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR);
3904      } else {
3905          flags &= ~BDRV_O_RDWR;
3906      }
3907  
3908      if (flags & BDRV_O_SNAPSHOT) {
3909          snapshot_options = qdict_new();
3910          bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
3911                                     flags, options);
3912          /* Let bdrv_backing_options() override "read-only" */
3913          qdict_del(options, BDRV_OPT_READ_ONLY);
3914          bdrv_inherited_options(BDRV_CHILD_COW, true,
3915                                 &flags, options, flags, options);
3916      }
3917  
3918      bs->open_flags = flags;
3919      bs->options = options;
3920      options = qdict_clone_shallow(options);
3921  
3922      /* Find the right image format driver */
3923      /* See cautionary note on accessing @options above */
3924      drvname = qdict_get_try_str(options, "driver");
3925      if (drvname) {
3926          drv = bdrv_find_format(drvname);
3927          if (!drv) {
3928              error_setg(errp, "Unknown driver: '%s'", drvname);
3929              goto fail;
3930          }
3931      }
3932  
3933      assert(drvname || !(flags & BDRV_O_PROTOCOL));
3934  
3935      /* See cautionary note on accessing @options above */
3936      backing = qdict_get_try_str(options, "backing");
3937      if (qobject_to(QNull, qdict_get(options, "backing")) != NULL ||
3938          (backing && *backing == '\0'))
3939      {
3940          if (backing) {
3941              warn_report("Use of \"backing\": \"\" is deprecated; "
3942                          "use \"backing\": null instead");
3943          }
3944          flags |= BDRV_O_NO_BACKING;
3945          qdict_del(bs->explicit_options, "backing");
3946          qdict_del(bs->options, "backing");
3947          qdict_del(options, "backing");
3948      }
3949  
3950      /* Open image file without format layer. This BlockBackend is only used for
3951       * probing, the block drivers will do their own bdrv_open_child() for the
3952       * same BDS, which is why we put the node name back into options. */
3953      if ((flags & BDRV_O_PROTOCOL) == 0) {
3954          BlockDriverState *file_bs;
3955  
3956          file_bs = bdrv_open_child_bs(filename, options, "file", bs,
3957                                       &child_of_bds, BDRV_CHILD_IMAGE,
3958                                       true, &local_err);
3959          if (local_err) {
3960              goto fail;
3961          }
3962          if (file_bs != NULL) {
3963              /* Not requesting BLK_PERM_CONSISTENT_READ because we're only
3964               * looking at the header to guess the image format. This works even
3965               * in cases where a guest would not see a consistent state. */
3966              file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL);
3967              blk_insert_bs(file, file_bs, &local_err);
3968              bdrv_unref(file_bs);
3969              if (local_err) {
3970                  goto fail;
3971              }
3972  
3973              qdict_put_str(options, "file", bdrv_get_node_name(file_bs));
3974          }
3975      }
3976  
3977      /* Image format probing */
3978      bs->probed = !drv;
3979      if (!drv && file) {
3980          ret = find_image_format(file, filename, &drv, &local_err);
3981          if (ret < 0) {
3982              goto fail;
3983          }
3984          /*
3985           * This option update would logically belong in bdrv_fill_options(),
3986           * but we first need to open bs->file for the probing to work, while
3987           * opening bs->file already requires the (mostly) final set of options
3988           * so that cache mode etc. can be inherited.
3989           *
3990           * Adding the driver later is somewhat ugly, but it's not an option
3991           * that would ever be inherited, so it's correct. We just need to make
3992           * sure to update both bs->options (which has the full effective
3993           * options for bs) and options (which has file.* already removed).
3994           */
3995          qdict_put_str(bs->options, "driver", drv->format_name);
3996          qdict_put_str(options, "driver", drv->format_name);
3997      } else if (!drv) {
3998          error_setg(errp, "Must specify either driver or file");
3999          goto fail;
4000      }
4001  
4002      /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
4003      assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
4004      /* file must be NULL if a protocol BDS is about to be created
4005       * (the inverse results in an error message from bdrv_open_common()) */
4006      assert(!(flags & BDRV_O_PROTOCOL) || !file);
4007  
4008      /* Open the image */
4009      ret = bdrv_open_common(bs, file, options, &local_err);
4010      if (ret < 0) {
4011          goto fail;
4012      }
4013  
4014      if (file) {
4015          blk_unref(file);
4016          file = NULL;
4017      }
4018  
4019      /* If there is a backing file, use it */
4020      if ((flags & BDRV_O_NO_BACKING) == 0) {
4021          ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
4022          if (ret < 0) {
4023              goto close_and_fail;
4024          }
4025      }
4026  
4027      /* Remove all children options and references
4028       * from bs->options and bs->explicit_options */
4029      QLIST_FOREACH(child, &bs->children, next) {
4030          char *child_key_dot;
4031          child_key_dot = g_strdup_printf("%s.", child->name);
4032          qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot);
4033          qdict_extract_subqdict(bs->options, NULL, child_key_dot);
4034          qdict_del(bs->explicit_options, child->name);
4035          qdict_del(bs->options, child->name);
4036          g_free(child_key_dot);
4037      }
4038  
4039      /* Check if any unknown options were used */
4040      if (qdict_size(options) != 0) {
4041          const QDictEntry *entry = qdict_first(options);
4042          if (flags & BDRV_O_PROTOCOL) {
4043              error_setg(errp, "Block protocol '%s' doesn't support the option "
4044                         "'%s'", drv->format_name, entry->key);
4045          } else {
4046              error_setg(errp,
4047                         "Block format '%s' does not support the option '%s'",
4048                         drv->format_name, entry->key);
4049          }
4050  
4051          goto close_and_fail;
4052      }
4053  
4054      bdrv_parent_cb_change_media(bs, true);
4055  
4056      qobject_unref(options);
4057      options = NULL;
4058  
4059      /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
4060       * temporary snapshot afterwards. */
4061      if (snapshot_flags) {
4062          BlockDriverState *snapshot_bs;
4063          snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags,
4064                                                  snapshot_options, &local_err);
4065          snapshot_options = NULL;
4066          if (local_err) {
4067              goto close_and_fail;
4068          }
4069          /* We are not going to return bs but the overlay on top of it
4070           * (snapshot_bs); thus, we have to drop the strong reference to bs
4071           * (which we obtained by calling bdrv_new()). bs will not be deleted,
4072           * though, because the overlay still has a reference to it. */
4073          bdrv_unref(bs);
4074          bs = snapshot_bs;
4075      }
4076  
4077      return bs;
4078  
4079  fail:
4080      blk_unref(file);
4081      qobject_unref(snapshot_options);
4082      qobject_unref(bs->explicit_options);
4083      qobject_unref(bs->options);
4084      qobject_unref(options);
4085      bs->options = NULL;
4086      bs->explicit_options = NULL;
4087      bdrv_unref(bs);
4088      error_propagate(errp, local_err);
4089      return NULL;
4090  
4091  close_and_fail:
4092      bdrv_unref(bs);
4093      qobject_unref(snapshot_options);
4094      qobject_unref(options);
4095      error_propagate(errp, local_err);
4096      return NULL;
4097  }
4098  
4099  /*
4100   * The caller must always hold @filename AioContext lock, because this
4101   * function eventually calls bdrv_refresh_total_sectors() which polls
4102   * when called from non-coroutine context.
4103   */
4104  BlockDriverState *bdrv_open(const char *filename, const char *reference,
4105                              QDict *options, int flags, Error **errp)
4106  {
4107      GLOBAL_STATE_CODE();
4108  
4109      return bdrv_open_inherit(filename, reference, options, flags, NULL,
4110                               NULL, 0, errp);
4111  }
4112  
4113  /* Return true if the NULL-terminated @list contains @str */
4114  static bool is_str_in_list(const char *str, const char *const *list)
4115  {
4116      if (str && list) {
4117          int i;
4118          for (i = 0; list[i] != NULL; i++) {
4119              if (!strcmp(str, list[i])) {
4120                  return true;
4121              }
4122          }
4123      }
4124      return false;
4125  }
4126  
4127  /*
4128   * Check that every option set in @bs->options is also set in
4129   * @new_opts.
4130   *
4131   * Options listed in the common_options list and in
4132   * @bs->drv->mutable_opts are skipped.
4133   *
4134   * Return 0 on success, otherwise return -EINVAL and set @errp.
4135   */
4136  static int bdrv_reset_options_allowed(BlockDriverState *bs,
4137                                        const QDict *new_opts, Error **errp)
4138  {
4139      const QDictEntry *e;
4140      /* These options are common to all block drivers and are handled
4141       * in bdrv_reopen_prepare() so they can be left out of @new_opts */
4142      const char *const common_options[] = {
4143          "node-name", "discard", "cache.direct", "cache.no-flush",
4144          "read-only", "auto-read-only", "detect-zeroes", NULL
4145      };
4146  
4147      for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
4148          if (!qdict_haskey(new_opts, e->key) &&
4149              !is_str_in_list(e->key, common_options) &&
4150              !is_str_in_list(e->key, bs->drv->mutable_opts)) {
4151              error_setg(errp, "Option '%s' cannot be reset "
4152                         "to its default value", e->key);
4153              return -EINVAL;
4154          }
4155      }
4156  
4157      return 0;
4158  }
4159  
4160  /*
4161   * Returns true if @child can be reached recursively from @bs
4162   */
4163  static bool bdrv_recurse_has_child(BlockDriverState *bs,
4164                                     BlockDriverState *child)
4165  {
4166      BdrvChild *c;
4167  
4168      if (bs == child) {
4169          return true;
4170      }
4171  
4172      QLIST_FOREACH(c, &bs->children, next) {
4173          if (bdrv_recurse_has_child(c->bs, child)) {
4174              return true;
4175          }
4176      }
4177  
4178      return false;
4179  }
4180  
4181  /*
4182   * Adds a BlockDriverState to a simple queue for an atomic, transactional
4183   * reopen of multiple devices.
4184   *
4185   * bs_queue can either be an existing BlockReopenQueue that has had QTAILQ_INIT
4186   * already performed, or alternatively may be NULL a new BlockReopenQueue will
4187   * be created and initialized. This newly created BlockReopenQueue should be
4188   * passed back in for subsequent calls that are intended to be of the same
4189   * atomic 'set'.
4190   *
4191   * bs is the BlockDriverState to add to the reopen queue.
4192   *
4193   * options contains the changed options for the associated bs
4194   * (the BlockReopenQueue takes ownership)
4195   *
4196   * flags contains the open flags for the associated bs
4197   *
4198   * returns a pointer to bs_queue, which is either the newly allocated
4199   * bs_queue, or the existing bs_queue being used.
4200   *
4201   * bs is drained here and undrained by bdrv_reopen_queue_free().
4202   *
4203   * To be called with bs->aio_context locked.
4204   */
4205  static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
4206                                                   BlockDriverState *bs,
4207                                                   QDict *options,
4208                                                   const BdrvChildClass *klass,
4209                                                   BdrvChildRole role,
4210                                                   bool parent_is_format,
4211                                                   QDict *parent_options,
4212                                                   int parent_flags,
4213                                                   bool keep_old_opts)
4214  {
4215      assert(bs != NULL);
4216  
4217      BlockReopenQueueEntry *bs_entry;
4218      BdrvChild *child;
4219      QDict *old_options, *explicit_options, *options_copy;
4220      int flags;
4221      QemuOpts *opts;
4222  
4223      GLOBAL_STATE_CODE();
4224  
4225      bdrv_drained_begin(bs);
4226  
4227      if (bs_queue == NULL) {
4228          bs_queue = g_new0(BlockReopenQueue, 1);
4229          QTAILQ_INIT(bs_queue);
4230      }
4231  
4232      if (!options) {
4233          options = qdict_new();
4234      }
4235  
4236      /* Check if this BlockDriverState is already in the queue */
4237      QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
4238          if (bs == bs_entry->state.bs) {
4239              break;
4240          }
4241      }
4242  
4243      /*
4244       * Precedence of options:
4245       * 1. Explicitly passed in options (highest)
4246       * 2. Retained from explicitly set options of bs
4247       * 3. Inherited from parent node
4248       * 4. Retained from effective options of bs
4249       */
4250  
4251      /* Old explicitly set values (don't overwrite by inherited value) */
4252      if (bs_entry || keep_old_opts) {
4253          old_options = qdict_clone_shallow(bs_entry ?
4254                                            bs_entry->state.explicit_options :
4255                                            bs->explicit_options);
4256          bdrv_join_options(bs, options, old_options);
4257          qobject_unref(old_options);
4258      }
4259  
4260      explicit_options = qdict_clone_shallow(options);
4261  
4262      /* Inherit from parent node */
4263      if (parent_options) {
4264          flags = 0;
4265          klass->inherit_options(role, parent_is_format, &flags, options,
4266                                 parent_flags, parent_options);
4267      } else {
4268          flags = bdrv_get_flags(bs);
4269      }
4270  
4271      if (keep_old_opts) {
4272          /* Old values are used for options that aren't set yet */
4273          old_options = qdict_clone_shallow(bs->options);
4274          bdrv_join_options(bs, options, old_options);
4275          qobject_unref(old_options);
4276      }
4277  
4278      /* We have the final set of options so let's update the flags */
4279      options_copy = qdict_clone_shallow(options);
4280      opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
4281      qemu_opts_absorb_qdict(opts, options_copy, NULL);
4282      update_flags_from_options(&flags, opts);
4283      qemu_opts_del(opts);
4284      qobject_unref(options_copy);
4285  
4286      /* bdrv_open_inherit() sets and clears some additional flags internally */
4287      flags &= ~BDRV_O_PROTOCOL;
4288      if (flags & BDRV_O_RDWR) {
4289          flags |= BDRV_O_ALLOW_RDWR;
4290      }
4291  
4292      if (!bs_entry) {
4293          bs_entry = g_new0(BlockReopenQueueEntry, 1);
4294          QTAILQ_INSERT_TAIL(bs_queue, bs_entry, entry);
4295      } else {
4296          qobject_unref(bs_entry->state.options);
4297          qobject_unref(bs_entry->state.explicit_options);
4298      }
4299  
4300      bs_entry->state.bs = bs;
4301      bs_entry->state.options = options;
4302      bs_entry->state.explicit_options = explicit_options;
4303      bs_entry->state.flags = flags;
4304  
4305      /*
4306       * If keep_old_opts is false then it means that unspecified
4307       * options must be reset to their original value. We don't allow
4308       * resetting 'backing' but we need to know if the option is
4309       * missing in order to decide if we have to return an error.
4310       */
4311      if (!keep_old_opts) {
4312          bs_entry->state.backing_missing =
4313              !qdict_haskey(options, "backing") &&
4314              !qdict_haskey(options, "backing.driver");
4315      }
4316  
4317      QLIST_FOREACH(child, &bs->children, next) {
4318          QDict *new_child_options = NULL;
4319          bool child_keep_old = keep_old_opts;
4320  
4321          /* reopen can only change the options of block devices that were
4322           * implicitly created and inherited options. For other (referenced)
4323           * block devices, a syntax like "backing.foo" results in an error. */
4324          if (child->bs->inherits_from != bs) {
4325              continue;
4326          }
4327  
4328          /* Check if the options contain a child reference */
4329          if (qdict_haskey(options, child->name)) {
4330              const char *childref = qdict_get_try_str(options, child->name);
4331              /*
4332               * The current child must not be reopened if the child
4333               * reference is null or points to a different node.
4334               */
4335              if (g_strcmp0(childref, child->bs->node_name)) {
4336                  continue;
4337              }
4338              /*
4339               * If the child reference points to the current child then
4340               * reopen it with its existing set of options (note that
4341               * it can still inherit new options from the parent).
4342               */
4343              child_keep_old = true;
4344          } else {
4345              /* Extract child options ("child-name.*") */
4346              char *child_key_dot = g_strdup_printf("%s.", child->name);
4347              qdict_extract_subqdict(explicit_options, NULL, child_key_dot);
4348              qdict_extract_subqdict(options, &new_child_options, child_key_dot);
4349              g_free(child_key_dot);
4350          }
4351  
4352          bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options,
4353                                  child->klass, child->role, bs->drv->is_format,
4354                                  options, flags, child_keep_old);
4355      }
4356  
4357      return bs_queue;
4358  }
4359  
4360  /* To be called with bs->aio_context locked */
4361  BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
4362                                      BlockDriverState *bs,
4363                                      QDict *options, bool keep_old_opts)
4364  {
4365      GLOBAL_STATE_CODE();
4366  
4367      return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false,
4368                                     NULL, 0, keep_old_opts);
4369  }
4370  
4371  void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
4372  {
4373      GLOBAL_STATE_CODE();
4374      if (bs_queue) {
4375          BlockReopenQueueEntry *bs_entry, *next;
4376          QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
4377              AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs);
4378  
4379              aio_context_acquire(ctx);
4380              bdrv_drained_end(bs_entry->state.bs);
4381              aio_context_release(ctx);
4382  
4383              qobject_unref(bs_entry->state.explicit_options);
4384              qobject_unref(bs_entry->state.options);
4385              g_free(bs_entry);
4386          }
4387          g_free(bs_queue);
4388      }
4389  }
4390  
4391  /*
4392   * Reopen multiple BlockDriverStates atomically & transactionally.
4393   *
4394   * The queue passed in (bs_queue) must have been built up previous
4395   * via bdrv_reopen_queue().
4396   *
4397   * Reopens all BDS specified in the queue, with the appropriate
4398   * flags.  All devices are prepared for reopen, and failure of any
4399   * device will cause all device changes to be abandoned, and intermediate
4400   * data cleaned up.
4401   *
4402   * If all devices prepare successfully, then the changes are committed
4403   * to all devices.
4404   *
4405   * All affected nodes must be drained between bdrv_reopen_queue() and
4406   * bdrv_reopen_multiple().
4407   *
4408   * To be called from the main thread, with all other AioContexts unlocked.
4409   */
4410  int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
4411  {
4412      int ret = -1;
4413      BlockReopenQueueEntry *bs_entry, *next;
4414      AioContext *ctx;
4415      Transaction *tran = tran_new();
4416      g_autoptr(GSList) refresh_list = NULL;
4417  
4418      assert(qemu_get_current_aio_context() == qemu_get_aio_context());
4419      assert(bs_queue != NULL);
4420      GLOBAL_STATE_CODE();
4421  
4422      QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
4423          ctx = bdrv_get_aio_context(bs_entry->state.bs);
4424          aio_context_acquire(ctx);
4425          ret = bdrv_flush(bs_entry->state.bs);
4426          aio_context_release(ctx);
4427          if (ret < 0) {
4428              error_setg_errno(errp, -ret, "Error flushing drive");
4429              goto abort;
4430          }
4431      }
4432  
4433      QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
4434          assert(bs_entry->state.bs->quiesce_counter > 0);
4435          ctx = bdrv_get_aio_context(bs_entry->state.bs);
4436          aio_context_acquire(ctx);
4437          ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp);
4438          aio_context_release(ctx);
4439          if (ret < 0) {
4440              goto abort;
4441          }
4442          bs_entry->prepared = true;
4443      }
4444  
4445      QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
4446          BDRVReopenState *state = &bs_entry->state;
4447  
4448          refresh_list = g_slist_prepend(refresh_list, state->bs);
4449          if (state->old_backing_bs) {
4450              refresh_list = g_slist_prepend(refresh_list, state->old_backing_bs);
4451          }
4452          if (state->old_file_bs) {
4453              refresh_list = g_slist_prepend(refresh_list, state->old_file_bs);
4454          }
4455      }
4456  
4457      /*
4458       * Note that file-posix driver rely on permission update done during reopen
4459       * (even if no permission changed), because it wants "new" permissions for
4460       * reconfiguring the fd and that's why it does it in raw_check_perm(), not
4461       * in raw_reopen_prepare() which is called with "old" permissions.
4462       */
4463      ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp);
4464      if (ret < 0) {
4465          goto abort;
4466      }
4467  
4468      /*
4469       * If we reach this point, we have success and just need to apply the
4470       * changes.
4471       *
4472       * Reverse order is used to comfort qcow2 driver: on commit it need to write
4473       * IN_USE flag to the image, to mark bitmaps in the image as invalid. But
4474       * children are usually goes after parents in reopen-queue, so go from last
4475       * to first element.
4476       */
4477      QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
4478          ctx = bdrv_get_aio_context(bs_entry->state.bs);
4479          aio_context_acquire(ctx);
4480          bdrv_reopen_commit(&bs_entry->state);
4481          aio_context_release(ctx);
4482      }
4483  
4484      tran_commit(tran);
4485  
4486      QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
4487          BlockDriverState *bs = bs_entry->state.bs;
4488  
4489          if (bs->drv->bdrv_reopen_commit_post) {
4490              ctx = bdrv_get_aio_context(bs);
4491              aio_context_acquire(ctx);
4492              bs->drv->bdrv_reopen_commit_post(&bs_entry->state);
4493              aio_context_release(ctx);
4494          }
4495      }
4496  
4497      ret = 0;
4498      goto cleanup;
4499  
4500  abort:
4501      tran_abort(tran);
4502      QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
4503          if (bs_entry->prepared) {
4504              ctx = bdrv_get_aio_context(bs_entry->state.bs);
4505              aio_context_acquire(ctx);
4506              bdrv_reopen_abort(&bs_entry->state);
4507              aio_context_release(ctx);
4508          }
4509      }
4510  
4511  cleanup:
4512      bdrv_reopen_queue_free(bs_queue);
4513  
4514      return ret;
4515  }
4516  
4517  int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
4518                  Error **errp)
4519  {
4520      AioContext *ctx = bdrv_get_aio_context(bs);
4521      BlockReopenQueue *queue;
4522      int ret;
4523  
4524      GLOBAL_STATE_CODE();
4525  
4526      queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
4527  
4528      if (ctx != qemu_get_aio_context()) {
4529          aio_context_release(ctx);
4530      }
4531      ret = bdrv_reopen_multiple(queue, errp);
4532  
4533      if (ctx != qemu_get_aio_context()) {
4534          aio_context_acquire(ctx);
4535      }
4536  
4537      return ret;
4538  }
4539  
4540  int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
4541                                Error **errp)
4542  {
4543      QDict *opts = qdict_new();
4544  
4545      GLOBAL_STATE_CODE();
4546  
4547      qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
4548  
4549      return bdrv_reopen(bs, opts, true, errp);
4550  }
4551  
4552  /*
4553   * Take a BDRVReopenState and check if the value of 'backing' in the
4554   * reopen_state->options QDict is valid or not.
4555   *
4556   * If 'backing' is missing from the QDict then return 0.
4557   *
4558   * If 'backing' contains the node name of the backing file of
4559   * reopen_state->bs then return 0.
4560   *
4561   * If 'backing' contains a different node name (or is null) then check
4562   * whether the current backing file can be replaced with the new one.
4563   * If that's the case then reopen_state->replace_backing_bs is set to
4564   * true and reopen_state->new_backing_bs contains a pointer to the new
4565   * backing BlockDriverState (or NULL).
4566   *
4567   * Return 0 on success, otherwise return < 0 and set @errp.
4568   */
4569  static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
4570                                               bool is_backing, Transaction *tran,
4571                                               Error **errp)
4572  {
4573      BlockDriverState *bs = reopen_state->bs;
4574      BlockDriverState *new_child_bs;
4575      BlockDriverState *old_child_bs = is_backing ? child_bs(bs->backing) :
4576                                                    child_bs(bs->file);
4577      const char *child_name = is_backing ? "backing" : "file";
4578      QObject *value;
4579      const char *str;
4580  
4581      GLOBAL_STATE_CODE();
4582  
4583      value = qdict_get(reopen_state->options, child_name);
4584      if (value == NULL) {
4585          return 0;
4586      }
4587  
4588      switch (qobject_type(value)) {
4589      case QTYPE_QNULL:
4590          assert(is_backing); /* The 'file' option does not allow a null value */
4591          new_child_bs = NULL;
4592          break;
4593      case QTYPE_QSTRING:
4594          str = qstring_get_str(qobject_to(QString, value));
4595          new_child_bs = bdrv_lookup_bs(NULL, str, errp);
4596          if (new_child_bs == NULL) {
4597              return -EINVAL;
4598          } else if (bdrv_recurse_has_child(new_child_bs, bs)) {
4599              error_setg(errp, "Making '%s' a %s child of '%s' would create a "
4600                         "cycle", str, child_name, bs->node_name);
4601              return -EINVAL;
4602          }
4603          break;
4604      default:
4605          /*
4606           * The options QDict has been flattened, so 'backing' and 'file'
4607           * do not allow any other data type here.
4608           */
4609          g_assert_not_reached();
4610      }
4611  
4612      if (old_child_bs == new_child_bs) {
4613          return 0;
4614      }
4615  
4616      if (old_child_bs) {
4617          if (bdrv_skip_implicit_filters(old_child_bs) == new_child_bs) {
4618              return 0;
4619          }
4620  
4621          if (old_child_bs->implicit) {
4622              error_setg(errp, "Cannot replace implicit %s child of %s",
4623                         child_name, bs->node_name);
4624              return -EPERM;
4625          }
4626      }
4627  
4628      if (bs->drv->is_filter && !old_child_bs) {
4629          /*
4630           * Filters always have a file or a backing child, so we are trying to
4631           * change wrong child
4632           */
4633          error_setg(errp, "'%s' is a %s filter node that does not support a "
4634                     "%s child", bs->node_name, bs->drv->format_name, child_name);
4635          return -EINVAL;
4636      }
4637  
4638      if (is_backing) {
4639          reopen_state->old_backing_bs = old_child_bs;
4640      } else {
4641          reopen_state->old_file_bs = old_child_bs;
4642      }
4643  
4644      return bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing,
4645                                             tran, errp);
4646  }
4647  
4648  /*
4649   * Prepares a BlockDriverState for reopen. All changes are staged in the
4650   * 'opaque' field of the BDRVReopenState, which is used and allocated by
4651   * the block driver layer .bdrv_reopen_prepare()
4652   *
4653   * bs is the BlockDriverState to reopen
4654   * flags are the new open flags
4655   * queue is the reopen queue
4656   *
4657   * Returns 0 on success, non-zero on error.  On error errp will be set
4658   * as well.
4659   *
4660   * On failure, bdrv_reopen_abort() will be called to clean up any data.
4661   * It is the responsibility of the caller to then call the abort() or
4662   * commit() for any other BDS that have been left in a prepare() state
4663   *
4664   */
4665  static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
4666                                 BlockReopenQueue *queue,
4667                                 Transaction *change_child_tran, Error **errp)
4668  {
4669      int ret = -1;
4670      int old_flags;
4671      Error *local_err = NULL;
4672      BlockDriver *drv;
4673      QemuOpts *opts;
4674      QDict *orig_reopen_opts;
4675      char *discard = NULL;
4676      bool read_only;
4677      bool drv_prepared = false;
4678  
4679      assert(reopen_state != NULL);
4680      assert(reopen_state->bs->drv != NULL);
4681      GLOBAL_STATE_CODE();
4682      drv = reopen_state->bs->drv;
4683  
4684      /* This function and each driver's bdrv_reopen_prepare() remove
4685       * entries from reopen_state->options as they are processed, so
4686       * we need to make a copy of the original QDict. */
4687      orig_reopen_opts = qdict_clone_shallow(reopen_state->options);
4688  
4689      /* Process generic block layer options */
4690      opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
4691      if (!qemu_opts_absorb_qdict(opts, reopen_state->options, errp)) {
4692          ret = -EINVAL;
4693          goto error;
4694      }
4695  
4696      /* This was already called in bdrv_reopen_queue_child() so the flags
4697       * are up-to-date. This time we simply want to remove the options from
4698       * QemuOpts in order to indicate that they have been processed. */
4699      old_flags = reopen_state->flags;
4700      update_flags_from_options(&reopen_state->flags, opts);
4701      assert(old_flags == reopen_state->flags);
4702  
4703      discard = qemu_opt_get_del(opts, BDRV_OPT_DISCARD);
4704      if (discard != NULL) {
4705          if (bdrv_parse_discard_flags(discard, &reopen_state->flags) != 0) {
4706              error_setg(errp, "Invalid discard option");
4707              ret = -EINVAL;
4708              goto error;
4709          }
4710      }
4711  
4712      reopen_state->detect_zeroes =
4713          bdrv_parse_detect_zeroes(opts, reopen_state->flags, &local_err);
4714      if (local_err) {
4715          error_propagate(errp, local_err);
4716          ret = -EINVAL;
4717          goto error;
4718      }
4719  
4720      /* All other options (including node-name and driver) must be unchanged.
4721       * Put them back into the QDict, so that they are checked at the end
4722       * of this function. */
4723      qemu_opts_to_qdict(opts, reopen_state->options);
4724  
4725      /* If we are to stay read-only, do not allow permission change
4726       * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is
4727       * not set, or if the BDS still has copy_on_read enabled */
4728      read_only = !(reopen_state->flags & BDRV_O_RDWR);
4729      ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err);
4730      if (local_err) {
4731          error_propagate(errp, local_err);
4732          goto error;
4733      }
4734  
4735      if (drv->bdrv_reopen_prepare) {
4736          /*
4737           * If a driver-specific option is missing, it means that we
4738           * should reset it to its default value.
4739           * But not all options allow that, so we need to check it first.
4740           */
4741          ret = bdrv_reset_options_allowed(reopen_state->bs,
4742                                           reopen_state->options, errp);
4743          if (ret) {
4744              goto error;
4745          }
4746  
4747          ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
4748          if (ret) {
4749              if (local_err != NULL) {
4750                  error_propagate(errp, local_err);
4751              } else {
4752                  bdrv_refresh_filename(reopen_state->bs);
4753                  error_setg(errp, "failed while preparing to reopen image '%s'",
4754                             reopen_state->bs->filename);
4755              }
4756              goto error;
4757          }
4758      } else {
4759          /* It is currently mandatory to have a bdrv_reopen_prepare()
4760           * handler for each supported drv. */
4761          error_setg(errp, "Block format '%s' used by node '%s' "
4762                     "does not support reopening files", drv->format_name,
4763                     bdrv_get_device_or_node_name(reopen_state->bs));
4764          ret = -1;
4765          goto error;
4766      }
4767  
4768      drv_prepared = true;
4769  
4770      /*
4771       * We must provide the 'backing' option if the BDS has a backing
4772       * file or if the image file has a backing file name as part of
4773       * its metadata. Otherwise the 'backing' option can be omitted.
4774       */
4775      if (drv->supports_backing && reopen_state->backing_missing &&
4776          (reopen_state->bs->backing || reopen_state->bs->backing_file[0])) {
4777          error_setg(errp, "backing is missing for '%s'",
4778                     reopen_state->bs->node_name);
4779          ret = -EINVAL;
4780          goto error;
4781      }
4782  
4783      /*
4784       * Allow changing the 'backing' option. The new value can be
4785       * either a reference to an existing node (using its node name)
4786       * or NULL to simply detach the current backing file.
4787       */
4788      ret = bdrv_reopen_parse_file_or_backing(reopen_state, true,
4789                                              change_child_tran, errp);
4790      if (ret < 0) {
4791          goto error;
4792      }
4793      qdict_del(reopen_state->options, "backing");
4794  
4795      /* Allow changing the 'file' option. In this case NULL is not allowed */
4796      ret = bdrv_reopen_parse_file_or_backing(reopen_state, false,
4797                                              change_child_tran, errp);
4798      if (ret < 0) {
4799          goto error;
4800      }
4801      qdict_del(reopen_state->options, "file");
4802  
4803      /* Options that are not handled are only okay if they are unchanged
4804       * compared to the old state. It is expected that some options are only
4805       * used for the initial open, but not reopen (e.g. filename) */
4806      if (qdict_size(reopen_state->options)) {
4807          const QDictEntry *entry = qdict_first(reopen_state->options);
4808  
4809          do {
4810              QObject *new = entry->value;
4811              QObject *old = qdict_get(reopen_state->bs->options, entry->key);
4812  
4813              /* Allow child references (child_name=node_name) as long as they
4814               * point to the current child (i.e. everything stays the same). */
4815              if (qobject_type(new) == QTYPE_QSTRING) {
4816                  BdrvChild *child;
4817                  QLIST_FOREACH(child, &reopen_state->bs->children, next) {
4818                      if (!strcmp(child->name, entry->key)) {
4819                          break;
4820                      }
4821                  }
4822  
4823                  if (child) {
4824                      if (!strcmp(child->bs->node_name,
4825                                  qstring_get_str(qobject_to(QString, new)))) {
4826                          continue; /* Found child with this name, skip option */
4827                      }
4828                  }
4829              }
4830  
4831              /*
4832               * TODO: When using -drive to specify blockdev options, all values
4833               * will be strings; however, when using -blockdev, blockdev-add or
4834               * filenames using the json:{} pseudo-protocol, they will be
4835               * correctly typed.
4836               * In contrast, reopening options are (currently) always strings
4837               * (because you can only specify them through qemu-io; all other
4838               * callers do not specify any options).
4839               * Therefore, when using anything other than -drive to create a BDS,
4840               * this cannot detect non-string options as unchanged, because
4841               * qobject_is_equal() always returns false for objects of different
4842               * type.  In the future, this should be remedied by correctly typing
4843               * all options.  For now, this is not too big of an issue because
4844               * the user can simply omit options which cannot be changed anyway,
4845               * so they will stay unchanged.
4846               */
4847              if (!qobject_is_equal(new, old)) {
4848                  error_setg(errp, "Cannot change the option '%s'", entry->key);
4849                  ret = -EINVAL;
4850                  goto error;
4851              }
4852          } while ((entry = qdict_next(reopen_state->options, entry)));
4853      }
4854  
4855      ret = 0;
4856  
4857      /* Restore the original reopen_state->options QDict */
4858      qobject_unref(reopen_state->options);
4859      reopen_state->options = qobject_ref(orig_reopen_opts);
4860  
4861  error:
4862      if (ret < 0 && drv_prepared) {
4863          /* drv->bdrv_reopen_prepare() has succeeded, so we need to
4864           * call drv->bdrv_reopen_abort() before signaling an error
4865           * (bdrv_reopen_multiple() will not call bdrv_reopen_abort()
4866           * when the respective bdrv_reopen_prepare() has failed) */
4867          if (drv->bdrv_reopen_abort) {
4868              drv->bdrv_reopen_abort(reopen_state);
4869          }
4870      }
4871      qemu_opts_del(opts);
4872      qobject_unref(orig_reopen_opts);
4873      g_free(discard);
4874      return ret;
4875  }
4876  
4877  /*
4878   * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
4879   * makes them final by swapping the staging BlockDriverState contents into
4880   * the active BlockDriverState contents.
4881   */
4882  static void bdrv_reopen_commit(BDRVReopenState *reopen_state)
4883  {
4884      BlockDriver *drv;
4885      BlockDriverState *bs;
4886      BdrvChild *child;
4887  
4888      assert(reopen_state != NULL);
4889      bs = reopen_state->bs;
4890      drv = bs->drv;
4891      assert(drv != NULL);
4892      GLOBAL_STATE_CODE();
4893  
4894      /* If there are any driver level actions to take */
4895      if (drv->bdrv_reopen_commit) {
4896          drv->bdrv_reopen_commit(reopen_state);
4897      }
4898  
4899      /* set BDS specific flags now */
4900      qobject_unref(bs->explicit_options);
4901      qobject_unref(bs->options);
4902      qobject_ref(reopen_state->explicit_options);
4903      qobject_ref(reopen_state->options);
4904  
4905      bs->explicit_options   = reopen_state->explicit_options;
4906      bs->options            = reopen_state->options;
4907      bs->open_flags         = reopen_state->flags;
4908      bs->detect_zeroes      = reopen_state->detect_zeroes;
4909  
4910      /* Remove child references from bs->options and bs->explicit_options.
4911       * Child options were already removed in bdrv_reopen_queue_child() */
4912      QLIST_FOREACH(child, &bs->children, next) {
4913          qdict_del(bs->explicit_options, child->name);
4914          qdict_del(bs->options, child->name);
4915      }
4916      /* backing is probably removed, so it's not handled by previous loop */
4917      qdict_del(bs->explicit_options, "backing");
4918      qdict_del(bs->options, "backing");
4919  
4920      bdrv_refresh_limits(bs, NULL, NULL);
4921  }
4922  
4923  /*
4924   * Abort the reopen, and delete and free the staged changes in
4925   * reopen_state
4926   */
4927  static void bdrv_reopen_abort(BDRVReopenState *reopen_state)
4928  {
4929      BlockDriver *drv;
4930  
4931      assert(reopen_state != NULL);
4932      drv = reopen_state->bs->drv;
4933      assert(drv != NULL);
4934      GLOBAL_STATE_CODE();
4935  
4936      if (drv->bdrv_reopen_abort) {
4937          drv->bdrv_reopen_abort(reopen_state);
4938      }
4939  }
4940  
4941  
4942  static void bdrv_close(BlockDriverState *bs)
4943  {
4944      BdrvAioNotifier *ban, *ban_next;
4945      BdrvChild *child, *next;
4946  
4947      GLOBAL_STATE_CODE();
4948      assert(!bs->refcnt);
4949  
4950      bdrv_drained_begin(bs); /* complete I/O */
4951      bdrv_flush(bs);
4952      bdrv_drain(bs); /* in case flush left pending I/O */
4953  
4954      if (bs->drv) {
4955          if (bs->drv->bdrv_close) {
4956              /* Must unfreeze all children, so bdrv_unref_child() works */
4957              bs->drv->bdrv_close(bs);
4958          }
4959          bs->drv = NULL;
4960      }
4961  
4962      QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
4963          bdrv_unref_child(bs, child);
4964      }
4965  
4966      assert(!bs->backing);
4967      assert(!bs->file);
4968      g_free(bs->opaque);
4969      bs->opaque = NULL;
4970      qatomic_set(&bs->copy_on_read, 0);
4971      bs->backing_file[0] = '\0';
4972      bs->backing_format[0] = '\0';
4973      bs->total_sectors = 0;
4974      bs->encrypted = false;
4975      bs->sg = false;
4976      qobject_unref(bs->options);
4977      qobject_unref(bs->explicit_options);
4978      bs->options = NULL;
4979      bs->explicit_options = NULL;
4980      qobject_unref(bs->full_open_options);
4981      bs->full_open_options = NULL;
4982      g_free(bs->block_status_cache);
4983      bs->block_status_cache = NULL;
4984  
4985      bdrv_release_named_dirty_bitmaps(bs);
4986      assert(QLIST_EMPTY(&bs->dirty_bitmaps));
4987  
4988      QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
4989          g_free(ban);
4990      }
4991      QLIST_INIT(&bs->aio_notifiers);
4992      bdrv_drained_end(bs);
4993  
4994      /*
4995       * If we're still inside some bdrv_drain_all_begin()/end() sections, end
4996       * them now since this BDS won't exist anymore when bdrv_drain_all_end()
4997       * gets called.
4998       */
4999      if (bs->quiesce_counter) {
5000          bdrv_drain_all_end_quiesce(bs);
5001      }
5002  }
5003  
5004  void bdrv_close_all(void)
5005  {
5006      GLOBAL_STATE_CODE();
5007      assert(job_next(NULL) == NULL);
5008  
5009      /* Drop references from requests still in flight, such as canceled block
5010       * jobs whose AIO context has not been polled yet */
5011      bdrv_drain_all();
5012  
5013      blk_remove_all_bs();
5014      blockdev_close_all_bdrv_states();
5015  
5016      assert(QTAILQ_EMPTY(&all_bdrv_states));
5017  }
5018  
5019  static bool should_update_child(BdrvChild *c, BlockDriverState *to)
5020  {
5021      GQueue *queue;
5022      GHashTable *found;
5023      bool ret;
5024  
5025      if (c->klass->stay_at_node) {
5026          return false;
5027      }
5028  
5029      /* If the child @c belongs to the BDS @to, replacing the current
5030       * c->bs by @to would mean to create a loop.
5031       *
5032       * Such a case occurs when appending a BDS to a backing chain.
5033       * For instance, imagine the following chain:
5034       *
5035       *   guest device -> node A -> further backing chain...
5036       *
5037       * Now we create a new BDS B which we want to put on top of this
5038       * chain, so we first attach A as its backing node:
5039       *
5040       *                   node B
5041       *                     |
5042       *                     v
5043       *   guest device -> node A -> further backing chain...
5044       *
5045       * Finally we want to replace A by B.  When doing that, we want to
5046       * replace all pointers to A by pointers to B -- except for the
5047       * pointer from B because (1) that would create a loop, and (2)
5048       * that pointer should simply stay intact:
5049       *
5050       *   guest device -> node B
5051       *                     |
5052       *                     v
5053       *                   node A -> further backing chain...
5054       *
5055       * In general, when replacing a node A (c->bs) by a node B (@to),
5056       * if A is a child of B, that means we cannot replace A by B there
5057       * because that would create a loop.  Silently detaching A from B
5058       * is also not really an option.  So overall just leaving A in
5059       * place there is the most sensible choice.
5060       *
5061       * We would also create a loop in any cases where @c is only
5062       * indirectly referenced by @to. Prevent this by returning false
5063       * if @c is found (by breadth-first search) anywhere in the whole
5064       * subtree of @to.
5065       */
5066  
5067      ret = true;
5068      found = g_hash_table_new(NULL, NULL);
5069      g_hash_table_add(found, to);
5070      queue = g_queue_new();
5071      g_queue_push_tail(queue, to);
5072  
5073      while (!g_queue_is_empty(queue)) {
5074          BlockDriverState *v = g_queue_pop_head(queue);
5075          BdrvChild *c2;
5076  
5077          QLIST_FOREACH(c2, &v->children, next) {
5078              if (c2 == c) {
5079                  ret = false;
5080                  break;
5081              }
5082  
5083              if (g_hash_table_contains(found, c2->bs)) {
5084                  continue;
5085              }
5086  
5087              g_queue_push_tail(queue, c2->bs);
5088              g_hash_table_add(found, c2->bs);
5089          }
5090      }
5091  
5092      g_queue_free(queue);
5093      g_hash_table_destroy(found);
5094  
5095      return ret;
5096  }
5097  
5098  static void bdrv_remove_child_commit(void *opaque)
5099  {
5100      GLOBAL_STATE_CODE();
5101      bdrv_child_free(opaque);
5102  }
5103  
5104  static TransactionActionDrv bdrv_remove_child_drv = {
5105      .commit = bdrv_remove_child_commit,
5106  };
5107  
5108  /* Function doesn't update permissions, caller is responsible for this. */
5109  static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
5110  {
5111      if (!child) {
5112          return;
5113      }
5114  
5115      if (child->bs) {
5116          BlockDriverState *bs = child->bs;
5117          bdrv_drained_begin(bs);
5118          bdrv_replace_child_tran(child, NULL, tran);
5119          bdrv_drained_end(bs);
5120      }
5121  
5122      tran_add(tran, &bdrv_remove_child_drv, child);
5123  }
5124  
5125  static void undrain_on_clean_cb(void *opaque)
5126  {
5127      bdrv_drained_end(opaque);
5128  }
5129  
5130  static TransactionActionDrv undrain_on_clean = {
5131      .clean = undrain_on_clean_cb,
5132  };
5133  
5134  static int bdrv_replace_node_noperm(BlockDriverState *from,
5135                                      BlockDriverState *to,
5136                                      bool auto_skip, Transaction *tran,
5137                                      Error **errp)
5138  {
5139      BdrvChild *c, *next;
5140  
5141      GLOBAL_STATE_CODE();
5142  
5143      bdrv_drained_begin(from);
5144      bdrv_drained_begin(to);
5145      tran_add(tran, &undrain_on_clean, from);
5146      tran_add(tran, &undrain_on_clean, to);
5147  
5148      QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
5149          assert(c->bs == from);
5150          if (!should_update_child(c, to)) {
5151              if (auto_skip) {
5152                  continue;
5153              }
5154              error_setg(errp, "Should not change '%s' link to '%s'",
5155                         c->name, from->node_name);
5156              return -EINVAL;
5157          }
5158          if (c->frozen) {
5159              error_setg(errp, "Cannot change '%s' link to '%s'",
5160                         c->name, from->node_name);
5161              return -EPERM;
5162          }
5163          bdrv_replace_child_tran(c, to, tran);
5164      }
5165  
5166      return 0;
5167  }
5168  
5169  /*
5170   * With auto_skip=true bdrv_replace_node_common skips updating from parents
5171   * if it creates a parent-child relation loop or if parent is block-job.
5172   *
5173   * With auto_skip=false the error is returned if from has a parent which should
5174   * not be updated.
5175   *
5176   * With @detach_subchain=true @to must be in a backing chain of @from. In this
5177   * case backing link of the cow-parent of @to is removed.
5178   */
5179  static int bdrv_replace_node_common(BlockDriverState *from,
5180                                      BlockDriverState *to,
5181                                      bool auto_skip, bool detach_subchain,
5182                                      Error **errp)
5183  {
5184      Transaction *tran = tran_new();
5185      g_autoptr(GSList) refresh_list = NULL;
5186      BlockDriverState *to_cow_parent = NULL;
5187      int ret;
5188  
5189      GLOBAL_STATE_CODE();
5190  
5191      if (detach_subchain) {
5192          assert(bdrv_chain_contains(from, to));
5193          assert(from != to);
5194          for (to_cow_parent = from;
5195               bdrv_filter_or_cow_bs(to_cow_parent) != to;
5196               to_cow_parent = bdrv_filter_or_cow_bs(to_cow_parent))
5197          {
5198              ;
5199          }
5200      }
5201  
5202      /* Make sure that @from doesn't go away until we have successfully attached
5203       * all of its parents to @to. */
5204      bdrv_ref(from);
5205  
5206      assert(qemu_get_current_aio_context() == qemu_get_aio_context());
5207      assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
5208      bdrv_drained_begin(from);
5209  
5210      /*
5211       * Do the replacement without permission update.
5212       * Replacement may influence the permissions, we should calculate new
5213       * permissions based on new graph. If we fail, we'll roll-back the
5214       * replacement.
5215       */
5216      ret = bdrv_replace_node_noperm(from, to, auto_skip, tran, errp);
5217      if (ret < 0) {
5218          goto out;
5219      }
5220  
5221      if (detach_subchain) {
5222          bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran);
5223      }
5224  
5225      refresh_list = g_slist_prepend(refresh_list, to);
5226      refresh_list = g_slist_prepend(refresh_list, from);
5227  
5228      ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
5229      if (ret < 0) {
5230          goto out;
5231      }
5232  
5233      ret = 0;
5234  
5235  out:
5236      tran_finalize(tran, ret);
5237  
5238      bdrv_drained_end(from);
5239      bdrv_unref(from);
5240  
5241      return ret;
5242  }
5243  
5244  int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
5245                        Error **errp)
5246  {
5247      GLOBAL_STATE_CODE();
5248  
5249      return bdrv_replace_node_common(from, to, true, false, errp);
5250  }
5251  
5252  int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
5253  {
5254      GLOBAL_STATE_CODE();
5255  
5256      return bdrv_replace_node_common(bs, bdrv_filter_or_cow_bs(bs), true, true,
5257                                      errp);
5258  }
5259  
5260  /*
5261   * Add new bs contents at the top of an image chain while the chain is
5262   * live, while keeping required fields on the top layer.
5263   *
5264   * This will modify the BlockDriverState fields, and swap contents
5265   * between bs_new and bs_top. Both bs_new and bs_top are modified.
5266   *
5267   * bs_new must not be attached to a BlockBackend and must not have backing
5268   * child.
5269   *
5270   * This function does not create any image files.
5271   *
5272   * The caller must hold the AioContext lock for @bs_top.
5273   */
5274  int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
5275                  Error **errp)
5276  {
5277      int ret;
5278      BdrvChild *child;
5279      Transaction *tran = tran_new();
5280      AioContext *old_context, *new_context = NULL;
5281  
5282      GLOBAL_STATE_CODE();
5283  
5284      assert(!bs_new->backing);
5285  
5286      old_context = bdrv_get_aio_context(bs_top);
5287  
5288      child = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
5289                                       &child_of_bds, bdrv_backing_role(bs_new),
5290                                       tran, errp);
5291      if (!child) {
5292          ret = -EINVAL;
5293          goto out;
5294      }
5295  
5296      /*
5297       * bdrv_attach_child_noperm could change the AioContext of bs_top.
5298       * bdrv_replace_node_noperm calls bdrv_drained_begin, so let's temporarily
5299       * hold the new AioContext, since bdrv_drained_begin calls BDRV_POLL_WHILE
5300       * that assumes the new lock is taken.
5301       */
5302      new_context = bdrv_get_aio_context(bs_top);
5303  
5304      if (old_context != new_context) {
5305          aio_context_release(old_context);
5306          aio_context_acquire(new_context);
5307      }
5308  
5309      ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
5310      if (ret < 0) {
5311          goto out;
5312      }
5313  
5314      ret = bdrv_refresh_perms(bs_new, tran, errp);
5315  out:
5316      tran_finalize(tran, ret);
5317  
5318      bdrv_refresh_limits(bs_top, NULL, NULL);
5319  
5320      if (new_context && old_context != new_context) {
5321          aio_context_release(new_context);
5322          aio_context_acquire(old_context);
5323      }
5324  
5325      return ret;
5326  }
5327  
5328  /* Not for empty child */
5329  int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
5330                            Error **errp)
5331  {
5332      int ret;
5333      Transaction *tran = tran_new();
5334      g_autoptr(GSList) refresh_list = NULL;
5335      BlockDriverState *old_bs = child->bs;
5336  
5337      GLOBAL_STATE_CODE();
5338  
5339      bdrv_ref(old_bs);
5340      bdrv_drained_begin(old_bs);
5341      bdrv_drained_begin(new_bs);
5342  
5343      bdrv_replace_child_tran(child, new_bs, tran);
5344  
5345      refresh_list = g_slist_prepend(refresh_list, old_bs);
5346      refresh_list = g_slist_prepend(refresh_list, new_bs);
5347  
5348      ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
5349  
5350      tran_finalize(tran, ret);
5351  
5352      bdrv_drained_end(old_bs);
5353      bdrv_drained_end(new_bs);
5354      bdrv_unref(old_bs);
5355  
5356      return ret;
5357  }
5358  
5359  static void bdrv_delete(BlockDriverState *bs)
5360  {
5361      assert(bdrv_op_blocker_is_empty(bs));
5362      assert(!bs->refcnt);
5363      GLOBAL_STATE_CODE();
5364  
5365      /* remove from list, if necessary */
5366      if (bs->node_name[0] != '\0') {
5367          QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
5368      }
5369      QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
5370  
5371      bdrv_close(bs);
5372  
5373      g_free(bs);
5374  }
5375  
5376  
5377  /*
5378   * Replace @bs by newly created block node.
5379   *
5380   * @options is a QDict of options to pass to the block drivers, or NULL for an
5381   * empty set of options. The reference to the QDict belongs to the block layer
5382   * after the call (even on failure), so if the caller intends to reuse the
5383   * dictionary, it needs to use qobject_ref() before calling bdrv_open.
5384   */
5385  BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
5386                                     int flags, Error **errp)
5387  {
5388      ERRP_GUARD();
5389      int ret;
5390      BlockDriverState *new_node_bs = NULL;
5391      const char *drvname, *node_name;
5392      BlockDriver *drv;
5393  
5394      drvname = qdict_get_try_str(options, "driver");
5395      if (!drvname) {
5396          error_setg(errp, "driver is not specified");
5397          goto fail;
5398      }
5399  
5400      drv = bdrv_find_format(drvname);
5401      if (!drv) {
5402          error_setg(errp, "Unknown driver: '%s'", drvname);
5403          goto fail;
5404      }
5405  
5406      node_name = qdict_get_try_str(options, "node-name");
5407  
5408      GLOBAL_STATE_CODE();
5409  
5410      new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags,
5411                                              errp);
5412      options = NULL; /* bdrv_new_open_driver() eats options */
5413      if (!new_node_bs) {
5414          error_prepend(errp, "Could not create node: ");
5415          goto fail;
5416      }
5417  
5418      bdrv_drained_begin(bs);
5419      ret = bdrv_replace_node(bs, new_node_bs, errp);
5420      bdrv_drained_end(bs);
5421  
5422      if (ret < 0) {
5423          error_prepend(errp, "Could not replace node: ");
5424          goto fail;
5425      }
5426  
5427      return new_node_bs;
5428  
5429  fail:
5430      qobject_unref(options);
5431      bdrv_unref(new_node_bs);
5432      return NULL;
5433  }
5434  
5435  /*
5436   * Run consistency checks on an image
5437   *
5438   * Returns 0 if the check could be completed (it doesn't mean that the image is
5439   * free of errors) or -errno when an internal error occurred. The results of the
5440   * check are stored in res.
5441   */
5442  int coroutine_fn bdrv_co_check(BlockDriverState *bs,
5443                                 BdrvCheckResult *res, BdrvCheckMode fix)
5444  {
5445      IO_CODE();
5446      assert_bdrv_graph_readable();
5447      if (bs->drv == NULL) {
5448          return -ENOMEDIUM;
5449      }
5450      if (bs->drv->bdrv_co_check == NULL) {
5451          return -ENOTSUP;
5452      }
5453  
5454      memset(res, 0, sizeof(*res));
5455      return bs->drv->bdrv_co_check(bs, res, fix);
5456  }
5457  
5458  /*
5459   * Return values:
5460   * 0        - success
5461   * -EINVAL  - backing format specified, but no file
5462   * -ENOSPC  - can't update the backing file because no space is left in the
5463   *            image file header
5464   * -ENOTSUP - format driver doesn't support changing the backing file
5465   */
5466  int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
5467                               const char *backing_fmt, bool require)
5468  {
5469      BlockDriver *drv = bs->drv;
5470      int ret;
5471  
5472      GLOBAL_STATE_CODE();
5473  
5474      if (!drv) {
5475          return -ENOMEDIUM;
5476      }
5477  
5478      /* Backing file format doesn't make sense without a backing file */
5479      if (backing_fmt && !backing_file) {
5480          return -EINVAL;
5481      }
5482  
5483      if (require && backing_file && !backing_fmt) {
5484          return -EINVAL;
5485      }
5486  
5487      if (drv->bdrv_change_backing_file != NULL) {
5488          ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
5489      } else {
5490          ret = -ENOTSUP;
5491      }
5492  
5493      if (ret == 0) {
5494          pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
5495          pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
5496          pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
5497                  backing_file ?: "");
5498      }
5499      return ret;
5500  }
5501  
5502  /*
5503   * Finds the first non-filter node above bs in the chain between
5504   * active and bs.  The returned node is either an immediate parent of
5505   * bs, or there are only filter nodes between the two.
5506   *
5507   * Returns NULL if bs is not found in active's image chain,
5508   * or if active == bs.
5509   *
5510   * Returns the bottommost base image if bs == NULL.
5511   */
5512  BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
5513                                      BlockDriverState *bs)
5514  {
5515  
5516      GLOBAL_STATE_CODE();
5517  
5518      bs = bdrv_skip_filters(bs);
5519      active = bdrv_skip_filters(active);
5520  
5521      while (active) {
5522          BlockDriverState *next = bdrv_backing_chain_next(active);
5523          if (bs == next) {
5524              return active;
5525          }
5526          active = next;
5527      }
5528  
5529      return NULL;
5530  }
5531  
5532  /* Given a BDS, searches for the base layer. */
5533  BlockDriverState *bdrv_find_base(BlockDriverState *bs)
5534  {
5535      GLOBAL_STATE_CODE();
5536  
5537      return bdrv_find_overlay(bs, NULL);
5538  }
5539  
5540  /*
5541   * Return true if at least one of the COW (backing) and filter links
5542   * between @bs and @base is frozen. @errp is set if that's the case.
5543   * @base must be reachable from @bs, or NULL.
5544   */
5545  bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
5546                                    Error **errp)
5547  {
5548      BlockDriverState *i;
5549      BdrvChild *child;
5550  
5551      GLOBAL_STATE_CODE();
5552  
5553      for (i = bs; i != base; i = child_bs(child)) {
5554          child = bdrv_filter_or_cow_child(i);
5555  
5556          if (child && child->frozen) {
5557              error_setg(errp, "Cannot change '%s' link from '%s' to '%s'",
5558                         child->name, i->node_name, child->bs->node_name);
5559              return true;
5560          }
5561      }
5562  
5563      return false;
5564  }
5565  
5566  /*
5567   * Freeze all COW (backing) and filter links between @bs and @base.
5568   * If any of the links is already frozen the operation is aborted and
5569   * none of the links are modified.
5570   * @base must be reachable from @bs, or NULL.
5571   * Returns 0 on success. On failure returns < 0 and sets @errp.
5572   */
5573  int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
5574                                Error **errp)
5575  {
5576      BlockDriverState *i;
5577      BdrvChild *child;
5578  
5579      GLOBAL_STATE_CODE();
5580  
5581      if (bdrv_is_backing_chain_frozen(bs, base, errp)) {
5582          return -EPERM;
5583      }
5584  
5585      for (i = bs; i != base; i = child_bs(child)) {
5586          child = bdrv_filter_or_cow_child(i);
5587          if (child && child->bs->never_freeze) {
5588              error_setg(errp, "Cannot freeze '%s' link to '%s'",
5589                         child->name, child->bs->node_name);
5590              return -EPERM;
5591          }
5592      }
5593  
5594      for (i = bs; i != base; i = child_bs(child)) {
5595          child = bdrv_filter_or_cow_child(i);
5596          if (child) {
5597              child->frozen = true;
5598          }
5599      }
5600  
5601      return 0;
5602  }
5603  
5604  /*
5605   * Unfreeze all COW (backing) and filter links between @bs and @base.
5606   * The caller must ensure that all links are frozen before using this
5607   * function.
5608   * @base must be reachable from @bs, or NULL.
5609   */
5610  void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base)
5611  {
5612      BlockDriverState *i;
5613      BdrvChild *child;
5614  
5615      GLOBAL_STATE_CODE();
5616  
5617      for (i = bs; i != base; i = child_bs(child)) {
5618          child = bdrv_filter_or_cow_child(i);
5619          if (child) {
5620              assert(child->frozen);
5621              child->frozen = false;
5622          }
5623      }
5624  }
5625  
5626  /*
5627   * Drops images above 'base' up to and including 'top', and sets the image
5628   * above 'top' to have base as its backing file.
5629   *
5630   * Requires that the overlay to 'top' is opened r/w, so that the backing file
5631   * information in 'bs' can be properly updated.
5632   *
5633   * E.g., this will convert the following chain:
5634   * bottom <- base <- intermediate <- top <- active
5635   *
5636   * to
5637   *
5638   * bottom <- base <- active
5639   *
5640   * It is allowed for bottom==base, in which case it converts:
5641   *
5642   * base <- intermediate <- top <- active
5643   *
5644   * to
5645   *
5646   * base <- active
5647   *
5648   * If backing_file_str is non-NULL, it will be used when modifying top's
5649   * overlay image metadata.
5650   *
5651   * Error conditions:
5652   *  if active == top, that is considered an error
5653   *
5654   */
5655  int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
5656                             const char *backing_file_str)
5657  {
5658      BlockDriverState *explicit_top = top;
5659      bool update_inherits_from;
5660      BdrvChild *c;
5661      Error *local_err = NULL;
5662      int ret = -EIO;
5663      g_autoptr(GSList) updated_children = NULL;
5664      GSList *p;
5665  
5666      GLOBAL_STATE_CODE();
5667  
5668      bdrv_ref(top);
5669      bdrv_drained_begin(base);
5670  
5671      if (!top->drv || !base->drv) {
5672          goto exit;
5673      }
5674  
5675      /* Make sure that base is in the backing chain of top */
5676      if (!bdrv_chain_contains(top, base)) {
5677          goto exit;
5678      }
5679  
5680      /* If 'base' recursively inherits from 'top' then we should set
5681       * base->inherits_from to top->inherits_from after 'top' and all
5682       * other intermediate nodes have been dropped.
5683       * If 'top' is an implicit node (e.g. "commit_top") we should skip
5684       * it because no one inherits from it. We use explicit_top for that. */
5685      explicit_top = bdrv_skip_implicit_filters(explicit_top);
5686      update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top);
5687  
5688      /* success - we can delete the intermediate states, and link top->base */
5689      if (!backing_file_str) {
5690          bdrv_refresh_filename(base);
5691          backing_file_str = base->filename;
5692      }
5693  
5694      QLIST_FOREACH(c, &top->parents, next_parent) {
5695          updated_children = g_slist_prepend(updated_children, c);
5696      }
5697  
5698      /*
5699       * It seems correct to pass detach_subchain=true here, but it triggers
5700       * one more yet not fixed bug, when due to nested aio_poll loop we switch to
5701       * another drained section, which modify the graph (for example, removing
5702       * the child, which we keep in updated_children list). So, it's a TODO.
5703       *
5704       * Note, bug triggered if pass detach_subchain=true here and run
5705       * test-bdrv-drain. test_drop_intermediate_poll() test-case will crash.
5706       * That's a FIXME.
5707       */
5708      bdrv_replace_node_common(top, base, false, false, &local_err);
5709      if (local_err) {
5710          error_report_err(local_err);
5711          goto exit;
5712      }
5713  
5714      for (p = updated_children; p; p = p->next) {
5715          c = p->data;
5716  
5717          if (c->klass->update_filename) {
5718              ret = c->klass->update_filename(c, base, backing_file_str,
5719                                              &local_err);
5720              if (ret < 0) {
5721                  /*
5722                   * TODO: Actually, we want to rollback all previous iterations
5723                   * of this loop, and (which is almost impossible) previous
5724                   * bdrv_replace_node()...
5725                   *
5726                   * Note, that c->klass->update_filename may lead to permission
5727                   * update, so it's a bad idea to call it inside permission
5728                   * update transaction of bdrv_replace_node.
5729                   */
5730                  error_report_err(local_err);
5731                  goto exit;
5732              }
5733          }
5734      }
5735  
5736      if (update_inherits_from) {
5737          base->inherits_from = explicit_top->inherits_from;
5738      }
5739  
5740      ret = 0;
5741  exit:
5742      bdrv_drained_end(base);
5743      bdrv_unref(top);
5744      return ret;
5745  }
5746  
5747  /**
5748   * Implementation of BlockDriver.bdrv_co_get_allocated_file_size() that
5749   * sums the size of all data-bearing children.  (This excludes backing
5750   * children.)
5751   */
5752  static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs)
5753  {
5754      BdrvChild *child;
5755      int64_t child_size, sum = 0;
5756  
5757      QLIST_FOREACH(child, &bs->children, next) {
5758          if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
5759                             BDRV_CHILD_FILTERED))
5760          {
5761              child_size = bdrv_co_get_allocated_file_size(child->bs);
5762              if (child_size < 0) {
5763                  return child_size;
5764              }
5765              sum += child_size;
5766          }
5767      }
5768  
5769      return sum;
5770  }
5771  
5772  /**
5773   * Length of a allocated file in bytes. Sparse files are counted by actual
5774   * allocated space. Return < 0 if error or unknown.
5775   */
5776  int64_t coroutine_fn bdrv_co_get_allocated_file_size(BlockDriverState *bs)
5777  {
5778      BlockDriver *drv = bs->drv;
5779      IO_CODE();
5780  
5781      if (!drv) {
5782          return -ENOMEDIUM;
5783      }
5784      if (drv->bdrv_co_get_allocated_file_size) {
5785          return drv->bdrv_co_get_allocated_file_size(bs);
5786      }
5787  
5788      if (drv->bdrv_file_open) {
5789          /*
5790           * Protocol drivers default to -ENOTSUP (most of their data is
5791           * not stored in any of their children (if they even have any),
5792           * so there is no generic way to figure it out).
5793           */
5794          return -ENOTSUP;
5795      } else if (drv->is_filter) {
5796          /* Filter drivers default to the size of their filtered child */
5797          return bdrv_co_get_allocated_file_size(bdrv_filter_bs(bs));
5798      } else {
5799          /* Other drivers default to summing their children's sizes */
5800          return bdrv_sum_allocated_file_size(bs);
5801      }
5802  }
5803  
5804  /*
5805   * bdrv_measure:
5806   * @drv: Format driver
5807   * @opts: Creation options for new image
5808   * @in_bs: Existing image containing data for new image (may be NULL)
5809   * @errp: Error object
5810   * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo())
5811   *          or NULL on error
5812   *
5813   * Calculate file size required to create a new image.
5814   *
5815   * If @in_bs is given then space for allocated clusters and zero clusters
5816   * from that image are included in the calculation.  If @opts contains a
5817   * backing file that is shared by @in_bs then backing clusters may be omitted
5818   * from the calculation.
5819   *
5820   * If @in_bs is NULL then the calculation includes no allocated clusters
5821   * unless a preallocation option is given in @opts.
5822   *
5823   * Note that @in_bs may use a different BlockDriver from @drv.
5824   *
5825   * If an error occurs the @errp pointer is set.
5826   */
5827  BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
5828                                 BlockDriverState *in_bs, Error **errp)
5829  {
5830      IO_CODE();
5831      if (!drv->bdrv_measure) {
5832          error_setg(errp, "Block driver '%s' does not support size measurement",
5833                     drv->format_name);
5834          return NULL;
5835      }
5836  
5837      return drv->bdrv_measure(opts, in_bs, errp);
5838  }
5839  
5840  /**
5841   * Return number of sectors on success, -errno on error.
5842   */
5843  int64_t coroutine_fn bdrv_co_nb_sectors(BlockDriverState *bs)
5844  {
5845      BlockDriver *drv = bs->drv;
5846      IO_CODE();
5847      assert_bdrv_graph_readable();
5848  
5849      if (!drv)
5850          return -ENOMEDIUM;
5851  
5852      if (drv->has_variable_length) {
5853          int ret = bdrv_co_refresh_total_sectors(bs, bs->total_sectors);
5854          if (ret < 0) {
5855              return ret;
5856          }
5857      }
5858      return bs->total_sectors;
5859  }
5860  
5861  /**
5862   * Return length in bytes on success, -errno on error.
5863   * The length is always a multiple of BDRV_SECTOR_SIZE.
5864   */
5865  int64_t coroutine_fn bdrv_co_getlength(BlockDriverState *bs)
5866  {
5867      int64_t ret;
5868      IO_CODE();
5869      assert_bdrv_graph_readable();
5870  
5871      ret = bdrv_co_nb_sectors(bs);
5872      if (ret < 0) {
5873          return ret;
5874      }
5875      if (ret > INT64_MAX / BDRV_SECTOR_SIZE) {
5876          return -EFBIG;
5877      }
5878      return ret * BDRV_SECTOR_SIZE;
5879  }
5880  
5881  /* return 0 as number of sectors if no device present or error */
5882  void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
5883  {
5884      int64_t nb_sectors = bdrv_nb_sectors(bs);
5885      IO_CODE();
5886  
5887      *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
5888  }
5889  
5890  bool bdrv_is_sg(BlockDriverState *bs)
5891  {
5892      IO_CODE();
5893      return bs->sg;
5894  }
5895  
5896  /**
5897   * Return whether the given node supports compressed writes.
5898   */
5899  bool bdrv_supports_compressed_writes(BlockDriverState *bs)
5900  {
5901      BlockDriverState *filtered;
5902      IO_CODE();
5903  
5904      if (!bs->drv || !block_driver_can_compress(bs->drv)) {
5905          return false;
5906      }
5907  
5908      filtered = bdrv_filter_bs(bs);
5909      if (filtered) {
5910          /*
5911           * Filters can only forward compressed writes, so we have to
5912           * check the child.
5913           */
5914          return bdrv_supports_compressed_writes(filtered);
5915      }
5916  
5917      return true;
5918  }
5919  
5920  const char *bdrv_get_format_name(BlockDriverState *bs)
5921  {
5922      IO_CODE();
5923      return bs->drv ? bs->drv->format_name : NULL;
5924  }
5925  
5926  static int qsort_strcmp(const void *a, const void *b)
5927  {
5928      return strcmp(*(char *const *)a, *(char *const *)b);
5929  }
5930  
5931  void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
5932                           void *opaque, bool read_only)
5933  {
5934      BlockDriver *drv;
5935      int count = 0;
5936      int i;
5937      const char **formats = NULL;
5938  
5939      GLOBAL_STATE_CODE();
5940  
5941      QLIST_FOREACH(drv, &bdrv_drivers, list) {
5942          if (drv->format_name) {
5943              bool found = false;
5944              int i = count;
5945  
5946              if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, read_only)) {
5947                  continue;
5948              }
5949  
5950              while (formats && i && !found) {
5951                  found = !strcmp(formats[--i], drv->format_name);
5952              }
5953  
5954              if (!found) {
5955                  formats = g_renew(const char *, formats, count + 1);
5956                  formats[count++] = drv->format_name;
5957              }
5958          }
5959      }
5960  
5961      for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) {
5962          const char *format_name = block_driver_modules[i].format_name;
5963  
5964          if (format_name) {
5965              bool found = false;
5966              int j = count;
5967  
5968              if (use_bdrv_whitelist &&
5969                  !bdrv_format_is_whitelisted(format_name, read_only)) {
5970                  continue;
5971              }
5972  
5973              while (formats && j && !found) {
5974                  found = !strcmp(formats[--j], format_name);
5975              }
5976  
5977              if (!found) {
5978                  formats = g_renew(const char *, formats, count + 1);
5979                  formats[count++] = format_name;
5980              }
5981          }
5982      }
5983  
5984      qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
5985  
5986      for (i = 0; i < count; i++) {
5987          it(opaque, formats[i]);
5988      }
5989  
5990      g_free(formats);
5991  }
5992  
5993  /* This function is to find a node in the bs graph */
5994  BlockDriverState *bdrv_find_node(const char *node_name)
5995  {
5996      BlockDriverState *bs;
5997  
5998      assert(node_name);
5999      GLOBAL_STATE_CODE();
6000  
6001      QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
6002          if (!strcmp(node_name, bs->node_name)) {
6003              return bs;
6004          }
6005      }
6006      return NULL;
6007  }
6008  
6009  /* Put this QMP function here so it can access the static graph_bdrv_states. */
6010  BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
6011                                             Error **errp)
6012  {
6013      BlockDeviceInfoList *list;
6014      BlockDriverState *bs;
6015  
6016      GLOBAL_STATE_CODE();
6017  
6018      list = NULL;
6019      QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
6020          BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp);
6021          if (!info) {
6022              qapi_free_BlockDeviceInfoList(list);
6023              return NULL;
6024          }
6025          QAPI_LIST_PREPEND(list, info);
6026      }
6027  
6028      return list;
6029  }
6030  
6031  typedef struct XDbgBlockGraphConstructor {
6032      XDbgBlockGraph *graph;
6033      GHashTable *graph_nodes;
6034  } XDbgBlockGraphConstructor;
6035  
6036  static XDbgBlockGraphConstructor *xdbg_graph_new(void)
6037  {
6038      XDbgBlockGraphConstructor *gr = g_new(XDbgBlockGraphConstructor, 1);
6039  
6040      gr->graph = g_new0(XDbgBlockGraph, 1);
6041      gr->graph_nodes = g_hash_table_new(NULL, NULL);
6042  
6043      return gr;
6044  }
6045  
6046  static XDbgBlockGraph *xdbg_graph_finalize(XDbgBlockGraphConstructor *gr)
6047  {
6048      XDbgBlockGraph *graph = gr->graph;
6049  
6050      g_hash_table_destroy(gr->graph_nodes);
6051      g_free(gr);
6052  
6053      return graph;
6054  }
6055  
6056  static uintptr_t xdbg_graph_node_num(XDbgBlockGraphConstructor *gr, void *node)
6057  {
6058      uintptr_t ret = (uintptr_t)g_hash_table_lookup(gr->graph_nodes, node);
6059  
6060      if (ret != 0) {
6061          return ret;
6062      }
6063  
6064      /*
6065       * Start counting from 1, not 0, because 0 interferes with not-found (NULL)
6066       * answer of g_hash_table_lookup.
6067       */
6068      ret = g_hash_table_size(gr->graph_nodes) + 1;
6069      g_hash_table_insert(gr->graph_nodes, node, (void *)ret);
6070  
6071      return ret;
6072  }
6073  
6074  static void xdbg_graph_add_node(XDbgBlockGraphConstructor *gr, void *node,
6075                                  XDbgBlockGraphNodeType type, const char *name)
6076  {
6077      XDbgBlockGraphNode *n;
6078  
6079      n = g_new0(XDbgBlockGraphNode, 1);
6080  
6081      n->id = xdbg_graph_node_num(gr, node);
6082      n->type = type;
6083      n->name = g_strdup(name);
6084  
6085      QAPI_LIST_PREPEND(gr->graph->nodes, n);
6086  }
6087  
6088  static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent,
6089                                  const BdrvChild *child)
6090  {
6091      BlockPermission qapi_perm;
6092      XDbgBlockGraphEdge *edge;
6093      GLOBAL_STATE_CODE();
6094  
6095      edge = g_new0(XDbgBlockGraphEdge, 1);
6096  
6097      edge->parent = xdbg_graph_node_num(gr, parent);
6098      edge->child = xdbg_graph_node_num(gr, child->bs);
6099      edge->name = g_strdup(child->name);
6100  
6101      for (qapi_perm = 0; qapi_perm < BLOCK_PERMISSION__MAX; qapi_perm++) {
6102          uint64_t flag = bdrv_qapi_perm_to_blk_perm(qapi_perm);
6103  
6104          if (flag & child->perm) {
6105              QAPI_LIST_PREPEND(edge->perm, qapi_perm);
6106          }
6107          if (flag & child->shared_perm) {
6108              QAPI_LIST_PREPEND(edge->shared_perm, qapi_perm);
6109          }
6110      }
6111  
6112      QAPI_LIST_PREPEND(gr->graph->edges, edge);
6113  }
6114  
6115  
6116  XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp)
6117  {
6118      BlockBackend *blk;
6119      BlockJob *job;
6120      BlockDriverState *bs;
6121      BdrvChild *child;
6122      XDbgBlockGraphConstructor *gr = xdbg_graph_new();
6123  
6124      GLOBAL_STATE_CODE();
6125  
6126      for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
6127          char *allocated_name = NULL;
6128          const char *name = blk_name(blk);
6129  
6130          if (!*name) {
6131              name = allocated_name = blk_get_attached_dev_id(blk);
6132          }
6133          xdbg_graph_add_node(gr, blk, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND,
6134                             name);
6135          g_free(allocated_name);
6136          if (blk_root(blk)) {
6137              xdbg_graph_add_edge(gr, blk, blk_root(blk));
6138          }
6139      }
6140  
6141      WITH_JOB_LOCK_GUARD() {
6142          for (job = block_job_next_locked(NULL); job;
6143               job = block_job_next_locked(job)) {
6144              GSList *el;
6145  
6146              xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB,
6147                                  job->job.id);
6148              for (el = job->nodes; el; el = el->next) {
6149                  xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data);
6150              }
6151          }
6152      }
6153  
6154      QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
6155          xdbg_graph_add_node(gr, bs, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_DRIVER,
6156                             bs->node_name);
6157          QLIST_FOREACH(child, &bs->children, next) {
6158              xdbg_graph_add_edge(gr, bs, child);
6159          }
6160      }
6161  
6162      return xdbg_graph_finalize(gr);
6163  }
6164  
6165  BlockDriverState *bdrv_lookup_bs(const char *device,
6166                                   const char *node_name,
6167                                   Error **errp)
6168  {
6169      BlockBackend *blk;
6170      BlockDriverState *bs;
6171  
6172      GLOBAL_STATE_CODE();
6173  
6174      if (device) {
6175          blk = blk_by_name(device);
6176  
6177          if (blk) {
6178              bs = blk_bs(blk);
6179              if (!bs) {
6180                  error_setg(errp, "Device '%s' has no medium", device);
6181              }
6182  
6183              return bs;
6184          }
6185      }
6186  
6187      if (node_name) {
6188          bs = bdrv_find_node(node_name);
6189  
6190          if (bs) {
6191              return bs;
6192          }
6193      }
6194  
6195      error_setg(errp, "Cannot find device=\'%s\' nor node-name=\'%s\'",
6196                       device ? device : "",
6197                       node_name ? node_name : "");
6198      return NULL;
6199  }
6200  
6201  /* If 'base' is in the same chain as 'top', return true. Otherwise,
6202   * return false.  If either argument is NULL, return false. */
6203  bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
6204  {
6205  
6206      GLOBAL_STATE_CODE();
6207  
6208      while (top && top != base) {
6209          top = bdrv_filter_or_cow_bs(top);
6210      }
6211  
6212      return top != NULL;
6213  }
6214  
6215  BlockDriverState *bdrv_next_node(BlockDriverState *bs)
6216  {
6217      GLOBAL_STATE_CODE();
6218      if (!bs) {
6219          return QTAILQ_FIRST(&graph_bdrv_states);
6220      }
6221      return QTAILQ_NEXT(bs, node_list);
6222  }
6223  
6224  BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
6225  {
6226      GLOBAL_STATE_CODE();
6227      if (!bs) {
6228          return QTAILQ_FIRST(&all_bdrv_states);
6229      }
6230      return QTAILQ_NEXT(bs, bs_list);
6231  }
6232  
6233  const char *bdrv_get_node_name(const BlockDriverState *bs)
6234  {
6235      IO_CODE();
6236      return bs->node_name;
6237  }
6238  
6239  const char *bdrv_get_parent_name(const BlockDriverState *bs)
6240  {
6241      BdrvChild *c;
6242      const char *name;
6243      IO_CODE();
6244  
6245      /* If multiple parents have a name, just pick the first one. */
6246      QLIST_FOREACH(c, &bs->parents, next_parent) {
6247          if (c->klass->get_name) {
6248              name = c->klass->get_name(c);
6249              if (name && *name) {
6250                  return name;
6251              }
6252          }
6253      }
6254  
6255      return NULL;
6256  }
6257  
6258  /* TODO check what callers really want: bs->node_name or blk_name() */
6259  const char *bdrv_get_device_name(const BlockDriverState *bs)
6260  {
6261      IO_CODE();
6262      return bdrv_get_parent_name(bs) ?: "";
6263  }
6264  
6265  /* This can be used to identify nodes that might not have a device
6266   * name associated. Since node and device names live in the same
6267   * namespace, the result is unambiguous. The exception is if both are
6268   * absent, then this returns an empty (non-null) string. */
6269  const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
6270  {
6271      IO_CODE();
6272      return bdrv_get_parent_name(bs) ?: bs->node_name;
6273  }
6274  
6275  int bdrv_get_flags(BlockDriverState *bs)
6276  {
6277      IO_CODE();
6278      return bs->open_flags;
6279  }
6280  
6281  int bdrv_has_zero_init_1(BlockDriverState *bs)
6282  {
6283      GLOBAL_STATE_CODE();
6284      return 1;
6285  }
6286  
6287  int bdrv_has_zero_init(BlockDriverState *bs)
6288  {
6289      BlockDriverState *filtered;
6290      GLOBAL_STATE_CODE();
6291  
6292      if (!bs->drv) {
6293          return 0;
6294      }
6295  
6296      /* If BS is a copy on write image, it is initialized to
6297         the contents of the base image, which may not be zeroes.  */
6298      if (bdrv_cow_child(bs)) {
6299          return 0;
6300      }
6301      if (bs->drv->bdrv_has_zero_init) {
6302          return bs->drv->bdrv_has_zero_init(bs);
6303      }
6304  
6305      filtered = bdrv_filter_bs(bs);
6306      if (filtered) {
6307          return bdrv_has_zero_init(filtered);
6308      }
6309  
6310      /* safe default */
6311      return 0;
6312  }
6313  
6314  bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
6315  {
6316      IO_CODE();
6317      if (!(bs->open_flags & BDRV_O_UNMAP)) {
6318          return false;
6319      }
6320  
6321      return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP;
6322  }
6323  
6324  void bdrv_get_backing_filename(BlockDriverState *bs,
6325                                 char *filename, int filename_size)
6326  {
6327      IO_CODE();
6328      pstrcpy(filename, filename_size, bs->backing_file);
6329  }
6330  
6331  int coroutine_fn bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
6332  {
6333      int ret;
6334      BlockDriver *drv = bs->drv;
6335      IO_CODE();
6336      /* if bs->drv == NULL, bs is closed, so there's nothing to do here */
6337      if (!drv) {
6338          return -ENOMEDIUM;
6339      }
6340      if (!drv->bdrv_co_get_info) {
6341          BlockDriverState *filtered = bdrv_filter_bs(bs);
6342          if (filtered) {
6343              return bdrv_co_get_info(filtered, bdi);
6344          }
6345          return -ENOTSUP;
6346      }
6347      memset(bdi, 0, sizeof(*bdi));
6348      ret = drv->bdrv_co_get_info(bs, bdi);
6349      if (ret < 0) {
6350          return ret;
6351      }
6352  
6353      if (bdi->cluster_size > BDRV_MAX_ALIGNMENT) {
6354          return -EINVAL;
6355      }
6356  
6357      return 0;
6358  }
6359  
6360  ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
6361                                            Error **errp)
6362  {
6363      BlockDriver *drv = bs->drv;
6364      IO_CODE();
6365      if (drv && drv->bdrv_get_specific_info) {
6366          return drv->bdrv_get_specific_info(bs, errp);
6367      }
6368      return NULL;
6369  }
6370  
6371  BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs)
6372  {
6373      BlockDriver *drv = bs->drv;
6374      IO_CODE();
6375      if (!drv || !drv->bdrv_get_specific_stats) {
6376          return NULL;
6377      }
6378      return drv->bdrv_get_specific_stats(bs);
6379  }
6380  
6381  void coroutine_fn bdrv_co_debug_event(BlockDriverState *bs, BlkdebugEvent event)
6382  {
6383      IO_CODE();
6384      if (!bs || !bs->drv || !bs->drv->bdrv_co_debug_event) {
6385          return;
6386      }
6387  
6388      bs->drv->bdrv_co_debug_event(bs, event);
6389  }
6390  
6391  static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs)
6392  {
6393      GLOBAL_STATE_CODE();
6394      while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
6395          bs = bdrv_primary_bs(bs);
6396      }
6397  
6398      if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
6399          assert(bs->drv->bdrv_debug_remove_breakpoint);
6400          return bs;
6401      }
6402  
6403      return NULL;
6404  }
6405  
6406  int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
6407                            const char *tag)
6408  {
6409      GLOBAL_STATE_CODE();
6410      bs = bdrv_find_debug_node(bs);
6411      if (bs) {
6412          return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
6413      }
6414  
6415      return -ENOTSUP;
6416  }
6417  
6418  int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
6419  {
6420      GLOBAL_STATE_CODE();
6421      bs = bdrv_find_debug_node(bs);
6422      if (bs) {
6423          return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
6424      }
6425  
6426      return -ENOTSUP;
6427  }
6428  
6429  int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
6430  {
6431      GLOBAL_STATE_CODE();
6432      while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
6433          bs = bdrv_primary_bs(bs);
6434      }
6435  
6436      if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
6437          return bs->drv->bdrv_debug_resume(bs, tag);
6438      }
6439  
6440      return -ENOTSUP;
6441  }
6442  
6443  bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
6444  {
6445      GLOBAL_STATE_CODE();
6446      while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
6447          bs = bdrv_primary_bs(bs);
6448      }
6449  
6450      if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
6451          return bs->drv->bdrv_debug_is_suspended(bs, tag);
6452      }
6453  
6454      return false;
6455  }
6456  
6457  /* backing_file can either be relative, or absolute, or a protocol.  If it is
6458   * relative, it must be relative to the chain.  So, passing in bs->filename
6459   * from a BDS as backing_file should not be done, as that may be relative to
6460   * the CWD rather than the chain. */
6461  BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
6462          const char *backing_file)
6463  {
6464      char *filename_full = NULL;
6465      char *backing_file_full = NULL;
6466      char *filename_tmp = NULL;
6467      int is_protocol = 0;
6468      bool filenames_refreshed = false;
6469      BlockDriverState *curr_bs = NULL;
6470      BlockDriverState *retval = NULL;
6471      BlockDriverState *bs_below;
6472  
6473      GLOBAL_STATE_CODE();
6474  
6475      if (!bs || !bs->drv || !backing_file) {
6476          return NULL;
6477      }
6478  
6479      filename_full     = g_malloc(PATH_MAX);
6480      backing_file_full = g_malloc(PATH_MAX);
6481  
6482      is_protocol = path_has_protocol(backing_file);
6483  
6484      /*
6485       * Being largely a legacy function, skip any filters here
6486       * (because filters do not have normal filenames, so they cannot
6487       * match anyway; and allowing json:{} filenames is a bit out of
6488       * scope).
6489       */
6490      for (curr_bs = bdrv_skip_filters(bs);
6491           bdrv_cow_child(curr_bs) != NULL;
6492           curr_bs = bs_below)
6493      {
6494          bs_below = bdrv_backing_chain_next(curr_bs);
6495  
6496          if (bdrv_backing_overridden(curr_bs)) {
6497              /*
6498               * If the backing file was overridden, we can only compare
6499               * directly against the backing node's filename.
6500               */
6501  
6502              if (!filenames_refreshed) {
6503                  /*
6504                   * This will automatically refresh all of the
6505                   * filenames in the rest of the backing chain, so we
6506                   * only need to do this once.
6507                   */
6508                  bdrv_refresh_filename(bs_below);
6509                  filenames_refreshed = true;
6510              }
6511  
6512              if (strcmp(backing_file, bs_below->filename) == 0) {
6513                  retval = bs_below;
6514                  break;
6515              }
6516          } else if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
6517              /*
6518               * If either of the filename paths is actually a protocol, then
6519               * compare unmodified paths; otherwise make paths relative.
6520               */
6521              char *backing_file_full_ret;
6522  
6523              if (strcmp(backing_file, curr_bs->backing_file) == 0) {
6524                  retval = bs_below;
6525                  break;
6526              }
6527              /* Also check against the full backing filename for the image */
6528              backing_file_full_ret = bdrv_get_full_backing_filename(curr_bs,
6529                                                                     NULL);
6530              if (backing_file_full_ret) {
6531                  bool equal = strcmp(backing_file, backing_file_full_ret) == 0;
6532                  g_free(backing_file_full_ret);
6533                  if (equal) {
6534                      retval = bs_below;
6535                      break;
6536                  }
6537              }
6538          } else {
6539              /* If not an absolute filename path, make it relative to the current
6540               * image's filename path */
6541              filename_tmp = bdrv_make_absolute_filename(curr_bs, backing_file,
6542                                                         NULL);
6543              /* We are going to compare canonicalized absolute pathnames */
6544              if (!filename_tmp || !realpath(filename_tmp, filename_full)) {
6545                  g_free(filename_tmp);
6546                  continue;
6547              }
6548              g_free(filename_tmp);
6549  
6550              /* We need to make sure the backing filename we are comparing against
6551               * is relative to the current image filename (or absolute) */
6552              filename_tmp = bdrv_get_full_backing_filename(curr_bs, NULL);
6553              if (!filename_tmp || !realpath(filename_tmp, backing_file_full)) {
6554                  g_free(filename_tmp);
6555                  continue;
6556              }
6557              g_free(filename_tmp);
6558  
6559              if (strcmp(backing_file_full, filename_full) == 0) {
6560                  retval = bs_below;
6561                  break;
6562              }
6563          }
6564      }
6565  
6566      g_free(filename_full);
6567      g_free(backing_file_full);
6568      return retval;
6569  }
6570  
6571  void bdrv_init(void)
6572  {
6573  #ifdef CONFIG_BDRV_WHITELIST_TOOLS
6574      use_bdrv_whitelist = 1;
6575  #endif
6576      module_call_init(MODULE_INIT_BLOCK);
6577  }
6578  
6579  void bdrv_init_with_whitelist(void)
6580  {
6581      use_bdrv_whitelist = 1;
6582      bdrv_init();
6583  }
6584  
6585  int bdrv_activate(BlockDriverState *bs, Error **errp)
6586  {
6587      BdrvChild *child, *parent;
6588      Error *local_err = NULL;
6589      int ret;
6590      BdrvDirtyBitmap *bm;
6591  
6592      GLOBAL_STATE_CODE();
6593  
6594      if (!bs->drv)  {
6595          return -ENOMEDIUM;
6596      }
6597  
6598      QLIST_FOREACH(child, &bs->children, next) {
6599          bdrv_activate(child->bs, &local_err);
6600          if (local_err) {
6601              error_propagate(errp, local_err);
6602              return -EINVAL;
6603          }
6604      }
6605  
6606      /*
6607       * Update permissions, they may differ for inactive nodes.
6608       *
6609       * Note that the required permissions of inactive images are always a
6610       * subset of the permissions required after activating the image. This
6611       * allows us to just get the permissions upfront without restricting
6612       * bdrv_co_invalidate_cache().
6613       *
6614       * It also means that in error cases, we don't have to try and revert to
6615       * the old permissions (which is an operation that could fail, too). We can
6616       * just keep the extended permissions for the next time that an activation
6617       * of the image is tried.
6618       */
6619      if (bs->open_flags & BDRV_O_INACTIVE) {
6620          bs->open_flags &= ~BDRV_O_INACTIVE;
6621          ret = bdrv_refresh_perms(bs, NULL, errp);
6622          if (ret < 0) {
6623              bs->open_flags |= BDRV_O_INACTIVE;
6624              return ret;
6625          }
6626  
6627          ret = bdrv_invalidate_cache(bs, errp);
6628          if (ret < 0) {
6629              bs->open_flags |= BDRV_O_INACTIVE;
6630              return ret;
6631          }
6632  
6633          FOR_EACH_DIRTY_BITMAP(bs, bm) {
6634              bdrv_dirty_bitmap_skip_store(bm, false);
6635          }
6636  
6637          ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
6638          if (ret < 0) {
6639              bs->open_flags |= BDRV_O_INACTIVE;
6640              error_setg_errno(errp, -ret, "Could not refresh total sector count");
6641              return ret;
6642          }
6643      }
6644  
6645      QLIST_FOREACH(parent, &bs->parents, next_parent) {
6646          if (parent->klass->activate) {
6647              parent->klass->activate(parent, &local_err);
6648              if (local_err) {
6649                  bs->open_flags |= BDRV_O_INACTIVE;
6650                  error_propagate(errp, local_err);
6651                  return -EINVAL;
6652              }
6653          }
6654      }
6655  
6656      return 0;
6657  }
6658  
6659  int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
6660  {
6661      Error *local_err = NULL;
6662      IO_CODE();
6663  
6664      assert(!(bs->open_flags & BDRV_O_INACTIVE));
6665      assert_bdrv_graph_readable();
6666  
6667      if (bs->drv->bdrv_co_invalidate_cache) {
6668          bs->drv->bdrv_co_invalidate_cache(bs, &local_err);
6669          if (local_err) {
6670              error_propagate(errp, local_err);
6671              return -EINVAL;
6672          }
6673      }
6674  
6675      return 0;
6676  }
6677  
6678  void bdrv_activate_all(Error **errp)
6679  {
6680      BlockDriverState *bs;
6681      BdrvNextIterator it;
6682  
6683      GLOBAL_STATE_CODE();
6684  
6685      for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
6686          AioContext *aio_context = bdrv_get_aio_context(bs);
6687          int ret;
6688  
6689          aio_context_acquire(aio_context);
6690          ret = bdrv_activate(bs, errp);
6691          aio_context_release(aio_context);
6692          if (ret < 0) {
6693              bdrv_next_cleanup(&it);
6694              return;
6695          }
6696      }
6697  }
6698  
6699  static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active)
6700  {
6701      BdrvChild *parent;
6702      GLOBAL_STATE_CODE();
6703  
6704      QLIST_FOREACH(parent, &bs->parents, next_parent) {
6705          if (parent->klass->parent_is_bds) {
6706              BlockDriverState *parent_bs = parent->opaque;
6707              if (!only_active || !(parent_bs->open_flags & BDRV_O_INACTIVE)) {
6708                  return true;
6709              }
6710          }
6711      }
6712  
6713      return false;
6714  }
6715  
6716  static int bdrv_inactivate_recurse(BlockDriverState *bs)
6717  {
6718      BdrvChild *child, *parent;
6719      int ret;
6720      uint64_t cumulative_perms, cumulative_shared_perms;
6721  
6722      GLOBAL_STATE_CODE();
6723  
6724      if (!bs->drv) {
6725          return -ENOMEDIUM;
6726      }
6727  
6728      /* Make sure that we don't inactivate a child before its parent.
6729       * It will be covered by recursion from the yet active parent. */
6730      if (bdrv_has_bds_parent(bs, true)) {
6731          return 0;
6732      }
6733  
6734      assert(!(bs->open_flags & BDRV_O_INACTIVE));
6735  
6736      /* Inactivate this node */
6737      if (bs->drv->bdrv_inactivate) {
6738          ret = bs->drv->bdrv_inactivate(bs);
6739          if (ret < 0) {
6740              return ret;
6741          }
6742      }
6743  
6744      QLIST_FOREACH(parent, &bs->parents, next_parent) {
6745          if (parent->klass->inactivate) {
6746              ret = parent->klass->inactivate(parent);
6747              if (ret < 0) {
6748                  return ret;
6749              }
6750          }
6751      }
6752  
6753      bdrv_get_cumulative_perm(bs, &cumulative_perms,
6754                               &cumulative_shared_perms);
6755      if (cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
6756          /* Our inactive parents still need write access. Inactivation failed. */
6757          return -EPERM;
6758      }
6759  
6760      bs->open_flags |= BDRV_O_INACTIVE;
6761  
6762      /*
6763       * Update permissions, they may differ for inactive nodes.
6764       * We only tried to loosen restrictions, so errors are not fatal, ignore
6765       * them.
6766       */
6767      bdrv_refresh_perms(bs, NULL, NULL);
6768  
6769      /* Recursively inactivate children */
6770      QLIST_FOREACH(child, &bs->children, next) {
6771          ret = bdrv_inactivate_recurse(child->bs);
6772          if (ret < 0) {
6773              return ret;
6774          }
6775      }
6776  
6777      return 0;
6778  }
6779  
6780  int bdrv_inactivate_all(void)
6781  {
6782      BlockDriverState *bs = NULL;
6783      BdrvNextIterator it;
6784      int ret = 0;
6785      GSList *aio_ctxs = NULL, *ctx;
6786  
6787      GLOBAL_STATE_CODE();
6788  
6789      for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
6790          AioContext *aio_context = bdrv_get_aio_context(bs);
6791  
6792          if (!g_slist_find(aio_ctxs, aio_context)) {
6793              aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
6794              aio_context_acquire(aio_context);
6795          }
6796      }
6797  
6798      for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
6799          /* Nodes with BDS parents are covered by recursion from the last
6800           * parent that gets inactivated. Don't inactivate them a second
6801           * time if that has already happened. */
6802          if (bdrv_has_bds_parent(bs, false)) {
6803              continue;
6804          }
6805          ret = bdrv_inactivate_recurse(bs);
6806          if (ret < 0) {
6807              bdrv_next_cleanup(&it);
6808              goto out;
6809          }
6810      }
6811  
6812  out:
6813      for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
6814          AioContext *aio_context = ctx->data;
6815          aio_context_release(aio_context);
6816      }
6817      g_slist_free(aio_ctxs);
6818  
6819      return ret;
6820  }
6821  
6822  /**************************************************************/
6823  /* removable device support */
6824  
6825  /**
6826   * Return TRUE if the media is present
6827   */
6828  bool coroutine_fn bdrv_co_is_inserted(BlockDriverState *bs)
6829  {
6830      BlockDriver *drv = bs->drv;
6831      BdrvChild *child;
6832      IO_CODE();
6833      assert_bdrv_graph_readable();
6834  
6835      if (!drv) {
6836          return false;
6837      }
6838      if (drv->bdrv_co_is_inserted) {
6839          return drv->bdrv_co_is_inserted(bs);
6840      }
6841      QLIST_FOREACH(child, &bs->children, next) {
6842          if (!bdrv_co_is_inserted(child->bs)) {
6843              return false;
6844          }
6845      }
6846      return true;
6847  }
6848  
6849  /**
6850   * If eject_flag is TRUE, eject the media. Otherwise, close the tray
6851   */
6852  void coroutine_fn bdrv_co_eject(BlockDriverState *bs, bool eject_flag)
6853  {
6854      BlockDriver *drv = bs->drv;
6855      IO_CODE();
6856      assert_bdrv_graph_readable();
6857  
6858      if (drv && drv->bdrv_co_eject) {
6859          drv->bdrv_co_eject(bs, eject_flag);
6860      }
6861  }
6862  
6863  /**
6864   * Lock or unlock the media (if it is locked, the user won't be able
6865   * to eject it manually).
6866   */
6867  void coroutine_fn bdrv_co_lock_medium(BlockDriverState *bs, bool locked)
6868  {
6869      BlockDriver *drv = bs->drv;
6870      IO_CODE();
6871      assert_bdrv_graph_readable();
6872      trace_bdrv_lock_medium(bs, locked);
6873  
6874      if (drv && drv->bdrv_co_lock_medium) {
6875          drv->bdrv_co_lock_medium(bs, locked);
6876      }
6877  }
6878  
6879  /* Get a reference to bs */
6880  void bdrv_ref(BlockDriverState *bs)
6881  {
6882      GLOBAL_STATE_CODE();
6883      bs->refcnt++;
6884  }
6885  
6886  /* Release a previously grabbed reference to bs.
6887   * If after releasing, reference count is zero, the BlockDriverState is
6888   * deleted. */
6889  void bdrv_unref(BlockDriverState *bs)
6890  {
6891      GLOBAL_STATE_CODE();
6892      if (!bs) {
6893          return;
6894      }
6895      assert(bs->refcnt > 0);
6896      if (--bs->refcnt == 0) {
6897          bdrv_delete(bs);
6898      }
6899  }
6900  
6901  struct BdrvOpBlocker {
6902      Error *reason;
6903      QLIST_ENTRY(BdrvOpBlocker) list;
6904  };
6905  
6906  bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
6907  {
6908      BdrvOpBlocker *blocker;
6909      GLOBAL_STATE_CODE();
6910      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
6911      if (!QLIST_EMPTY(&bs->op_blockers[op])) {
6912          blocker = QLIST_FIRST(&bs->op_blockers[op]);
6913          error_propagate_prepend(errp, error_copy(blocker->reason),
6914                                  "Node '%s' is busy: ",
6915                                  bdrv_get_device_or_node_name(bs));
6916          return true;
6917      }
6918      return false;
6919  }
6920  
6921  void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
6922  {
6923      BdrvOpBlocker *blocker;
6924      GLOBAL_STATE_CODE();
6925      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
6926  
6927      blocker = g_new0(BdrvOpBlocker, 1);
6928      blocker->reason = reason;
6929      QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
6930  }
6931  
6932  void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
6933  {
6934      BdrvOpBlocker *blocker, *next;
6935      GLOBAL_STATE_CODE();
6936      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
6937      QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
6938          if (blocker->reason == reason) {
6939              QLIST_REMOVE(blocker, list);
6940              g_free(blocker);
6941          }
6942      }
6943  }
6944  
6945  void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
6946  {
6947      int i;
6948      GLOBAL_STATE_CODE();
6949      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
6950          bdrv_op_block(bs, i, reason);
6951      }
6952  }
6953  
6954  void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
6955  {
6956      int i;
6957      GLOBAL_STATE_CODE();
6958      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
6959          bdrv_op_unblock(bs, i, reason);
6960      }
6961  }
6962  
6963  bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
6964  {
6965      int i;
6966      GLOBAL_STATE_CODE();
6967      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
6968          if (!QLIST_EMPTY(&bs->op_blockers[i])) {
6969              return false;
6970          }
6971      }
6972      return true;
6973  }
6974  
6975  /*
6976   * Must not be called while holding the lock of an AioContext other than the
6977   * current one.
6978   */
6979  void bdrv_img_create(const char *filename, const char *fmt,
6980                       const char *base_filename, const char *base_fmt,
6981                       char *options, uint64_t img_size, int flags, bool quiet,
6982                       Error **errp)
6983  {
6984      QemuOptsList *create_opts = NULL;
6985      QemuOpts *opts = NULL;
6986      const char *backing_fmt, *backing_file;
6987      int64_t size;
6988      BlockDriver *drv, *proto_drv;
6989      Error *local_err = NULL;
6990      int ret = 0;
6991  
6992      GLOBAL_STATE_CODE();
6993  
6994      /* Find driver and parse its options */
6995      drv = bdrv_find_format(fmt);
6996      if (!drv) {
6997          error_setg(errp, "Unknown file format '%s'", fmt);
6998          return;
6999      }
7000  
7001      proto_drv = bdrv_find_protocol(filename, true, errp);
7002      if (!proto_drv) {
7003          return;
7004      }
7005  
7006      if (!drv->create_opts) {
7007          error_setg(errp, "Format driver '%s' does not support image creation",
7008                     drv->format_name);
7009          return;
7010      }
7011  
7012      if (!proto_drv->create_opts) {
7013          error_setg(errp, "Protocol driver '%s' does not support image creation",
7014                     proto_drv->format_name);
7015          return;
7016      }
7017  
7018      /* Create parameter list */
7019      create_opts = qemu_opts_append(create_opts, drv->create_opts);
7020      create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
7021  
7022      opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
7023  
7024      /* Parse -o options */
7025      if (options) {
7026          if (!qemu_opts_do_parse(opts, options, NULL, errp)) {
7027              goto out;
7028          }
7029      }
7030  
7031      if (!qemu_opt_get(opts, BLOCK_OPT_SIZE)) {
7032          qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
7033      } else if (img_size != UINT64_C(-1)) {
7034          error_setg(errp, "The image size must be specified only once");
7035          goto out;
7036      }
7037  
7038      if (base_filename) {
7039          if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename,
7040                            NULL)) {
7041              error_setg(errp, "Backing file not supported for file format '%s'",
7042                         fmt);
7043              goto out;
7044          }
7045      }
7046  
7047      if (base_fmt) {
7048          if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) {
7049              error_setg(errp, "Backing file format not supported for file "
7050                               "format '%s'", fmt);
7051              goto out;
7052          }
7053      }
7054  
7055      backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
7056      if (backing_file) {
7057          if (!strcmp(filename, backing_file)) {
7058              error_setg(errp, "Error: Trying to create an image with the "
7059                               "same filename as the backing file");
7060              goto out;
7061          }
7062          if (backing_file[0] == '\0') {
7063              error_setg(errp, "Expected backing file name, got empty string");
7064              goto out;
7065          }
7066      }
7067  
7068      backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
7069  
7070      /* The size for the image must always be specified, unless we have a backing
7071       * file and we have not been forbidden from opening it. */
7072      size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size);
7073      if (backing_file && !(flags & BDRV_O_NO_BACKING)) {
7074          BlockDriverState *bs;
7075          char *full_backing;
7076          int back_flags;
7077          QDict *backing_options = NULL;
7078  
7079          full_backing =
7080              bdrv_get_full_backing_filename_from_filename(filename, backing_file,
7081                                                           &local_err);
7082          if (local_err) {
7083              goto out;
7084          }
7085          assert(full_backing);
7086  
7087          /*
7088           * No need to do I/O here, which allows us to open encrypted
7089           * backing images without needing the secret
7090           */
7091          back_flags = flags;
7092          back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
7093          back_flags |= BDRV_O_NO_IO;
7094  
7095          backing_options = qdict_new();
7096          if (backing_fmt) {
7097              qdict_put_str(backing_options, "driver", backing_fmt);
7098          }
7099          qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true);
7100  
7101          bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
7102                         &local_err);
7103          g_free(full_backing);
7104          if (!bs) {
7105              error_append_hint(&local_err, "Could not open backing image.\n");
7106              goto out;
7107          } else {
7108              if (!backing_fmt) {
7109                  error_setg(&local_err,
7110                             "Backing file specified without backing format");
7111                  error_append_hint(&local_err, "Detected format of %s.",
7112                                    bs->drv->format_name);
7113                  goto out;
7114              }
7115              if (size == -1) {
7116                  /* Opened BS, have no size */
7117                  size = bdrv_getlength(bs);
7118                  if (size < 0) {
7119                      error_setg_errno(errp, -size, "Could not get size of '%s'",
7120                                       backing_file);
7121                      bdrv_unref(bs);
7122                      goto out;
7123                  }
7124                  qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
7125              }
7126              bdrv_unref(bs);
7127          }
7128          /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */
7129      } else if (backing_file && !backing_fmt) {
7130          error_setg(&local_err,
7131                     "Backing file specified without backing format");
7132          goto out;
7133      }
7134  
7135      if (size == -1) {
7136          error_setg(errp, "Image creation needs a size parameter");
7137          goto out;
7138      }
7139  
7140      if (!quiet) {
7141          printf("Formatting '%s', fmt=%s ", filename, fmt);
7142          qemu_opts_print(opts, " ");
7143          puts("");
7144          fflush(stdout);
7145      }
7146  
7147      ret = bdrv_create(drv, filename, opts, &local_err);
7148  
7149      if (ret == -EFBIG) {
7150          /* This is generally a better message than whatever the driver would
7151           * deliver (especially because of the cluster_size_hint), since that
7152           * is most probably not much different from "image too large". */
7153          const char *cluster_size_hint = "";
7154          if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
7155              cluster_size_hint = " (try using a larger cluster size)";
7156          }
7157          error_setg(errp, "The image size is too large for file format '%s'"
7158                     "%s", fmt, cluster_size_hint);
7159          error_free(local_err);
7160          local_err = NULL;
7161      }
7162  
7163  out:
7164      qemu_opts_del(opts);
7165      qemu_opts_free(create_opts);
7166      error_propagate(errp, local_err);
7167  }
7168  
7169  AioContext *bdrv_get_aio_context(BlockDriverState *bs)
7170  {
7171      IO_CODE();
7172      return bs ? bs->aio_context : qemu_get_aio_context();
7173  }
7174  
7175  AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs)
7176  {
7177      Coroutine *self = qemu_coroutine_self();
7178      AioContext *old_ctx = qemu_coroutine_get_aio_context(self);
7179      AioContext *new_ctx;
7180      IO_CODE();
7181  
7182      /*
7183       * Increase bs->in_flight to ensure that this operation is completed before
7184       * moving the node to a different AioContext. Read new_ctx only afterwards.
7185       */
7186      bdrv_inc_in_flight(bs);
7187  
7188      new_ctx = bdrv_get_aio_context(bs);
7189      aio_co_reschedule_self(new_ctx);
7190      return old_ctx;
7191  }
7192  
7193  void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
7194  {
7195      IO_CODE();
7196      aio_co_reschedule_self(old_ctx);
7197      bdrv_dec_in_flight(bs);
7198  }
7199  
7200  void coroutine_fn bdrv_co_lock(BlockDriverState *bs)
7201  {
7202      AioContext *ctx = bdrv_get_aio_context(bs);
7203  
7204      /* In the main thread, bs->aio_context won't change concurrently */
7205      assert(qemu_get_current_aio_context() == qemu_get_aio_context());
7206  
7207      /*
7208       * We're in coroutine context, so we already hold the lock of the main
7209       * loop AioContext. Don't lock it twice to avoid deadlocks.
7210       */
7211      assert(qemu_in_coroutine());
7212      if (ctx != qemu_get_aio_context()) {
7213          aio_context_acquire(ctx);
7214      }
7215  }
7216  
7217  void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
7218  {
7219      AioContext *ctx = bdrv_get_aio_context(bs);
7220  
7221      assert(qemu_in_coroutine());
7222      if (ctx != qemu_get_aio_context()) {
7223          aio_context_release(ctx);
7224      }
7225  }
7226  
7227  static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
7228  {
7229      GLOBAL_STATE_CODE();
7230      QLIST_REMOVE(ban, list);
7231      g_free(ban);
7232  }
7233  
7234  static void bdrv_detach_aio_context(BlockDriverState *bs)
7235  {
7236      BdrvAioNotifier *baf, *baf_tmp;
7237  
7238      assert(!bs->walking_aio_notifiers);
7239      GLOBAL_STATE_CODE();
7240      bs->walking_aio_notifiers = true;
7241      QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
7242          if (baf->deleted) {
7243              bdrv_do_remove_aio_context_notifier(baf);
7244          } else {
7245              baf->detach_aio_context(baf->opaque);
7246          }
7247      }
7248      /* Never mind iterating again to check for ->deleted.  bdrv_close() will
7249       * remove remaining aio notifiers if we aren't called again.
7250       */
7251      bs->walking_aio_notifiers = false;
7252  
7253      if (bs->drv && bs->drv->bdrv_detach_aio_context) {
7254          bs->drv->bdrv_detach_aio_context(bs);
7255      }
7256  
7257      if (bs->quiesce_counter) {
7258          aio_enable_external(bs->aio_context);
7259      }
7260      bs->aio_context = NULL;
7261  }
7262  
7263  static void bdrv_attach_aio_context(BlockDriverState *bs,
7264                                      AioContext *new_context)
7265  {
7266      BdrvAioNotifier *ban, *ban_tmp;
7267      GLOBAL_STATE_CODE();
7268  
7269      if (bs->quiesce_counter) {
7270          aio_disable_external(new_context);
7271      }
7272  
7273      bs->aio_context = new_context;
7274  
7275      if (bs->drv && bs->drv->bdrv_attach_aio_context) {
7276          bs->drv->bdrv_attach_aio_context(bs, new_context);
7277      }
7278  
7279      assert(!bs->walking_aio_notifiers);
7280      bs->walking_aio_notifiers = true;
7281      QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) {
7282          if (ban->deleted) {
7283              bdrv_do_remove_aio_context_notifier(ban);
7284          } else {
7285              ban->attached_aio_context(new_context, ban->opaque);
7286          }
7287      }
7288      bs->walking_aio_notifiers = false;
7289  }
7290  
7291  typedef struct BdrvStateSetAioContext {
7292      AioContext *new_ctx;
7293      BlockDriverState *bs;
7294  } BdrvStateSetAioContext;
7295  
7296  static bool bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx,
7297                                             GHashTable *visited,
7298                                             Transaction *tran,
7299                                             Error **errp)
7300  {
7301      GLOBAL_STATE_CODE();
7302      if (g_hash_table_contains(visited, c)) {
7303          return true;
7304      }
7305      g_hash_table_add(visited, c);
7306  
7307      /*
7308       * A BdrvChildClass that doesn't handle AioContext changes cannot
7309       * tolerate any AioContext changes
7310       */
7311      if (!c->klass->change_aio_ctx) {
7312          char *user = bdrv_child_user_desc(c);
7313          error_setg(errp, "Changing iothreads is not supported by %s", user);
7314          g_free(user);
7315          return false;
7316      }
7317      if (!c->klass->change_aio_ctx(c, ctx, visited, tran, errp)) {
7318          assert(!errp || *errp);
7319          return false;
7320      }
7321      return true;
7322  }
7323  
7324  bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
7325                                     GHashTable *visited, Transaction *tran,
7326                                     Error **errp)
7327  {
7328      GLOBAL_STATE_CODE();
7329      if (g_hash_table_contains(visited, c)) {
7330          return true;
7331      }
7332      g_hash_table_add(visited, c);
7333      return bdrv_change_aio_context(c->bs, ctx, visited, tran, errp);
7334  }
7335  
7336  static void bdrv_set_aio_context_clean(void *opaque)
7337  {
7338      BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque;
7339      BlockDriverState *bs = (BlockDriverState *) state->bs;
7340  
7341      /* Paired with bdrv_drained_begin in bdrv_change_aio_context() */
7342      bdrv_drained_end(bs);
7343  
7344      g_free(state);
7345  }
7346  
7347  static void bdrv_set_aio_context_commit(void *opaque)
7348  {
7349      BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque;
7350      BlockDriverState *bs = (BlockDriverState *) state->bs;
7351      AioContext *new_context = state->new_ctx;
7352      AioContext *old_context = bdrv_get_aio_context(bs);
7353  
7354      /*
7355       * Take the old AioContex when detaching it from bs.
7356       * At this point, new_context lock is already acquired, and we are now
7357       * also taking old_context. This is safe as long as bdrv_detach_aio_context
7358       * does not call AIO_POLL_WHILE().
7359       */
7360      if (old_context != qemu_get_aio_context()) {
7361          aio_context_acquire(old_context);
7362      }
7363      bdrv_detach_aio_context(bs);
7364      if (old_context != qemu_get_aio_context()) {
7365          aio_context_release(old_context);
7366      }
7367      bdrv_attach_aio_context(bs, new_context);
7368  }
7369  
7370  static TransactionActionDrv set_aio_context = {
7371      .commit = bdrv_set_aio_context_commit,
7372      .clean = bdrv_set_aio_context_clean,
7373  };
7374  
7375  /*
7376   * Changes the AioContext used for fd handlers, timers, and BHs by this
7377   * BlockDriverState and all its children and parents.
7378   *
7379   * Must be called from the main AioContext.
7380   *
7381   * The caller must own the AioContext lock for the old AioContext of bs, but it
7382   * must not own the AioContext lock for new_context (unless new_context is the
7383   * same as the current context of bs).
7384   *
7385   * @visited will accumulate all visited BdrvChild objects. The caller is
7386   * responsible for freeing the list afterwards.
7387   */
7388  static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
7389                                      GHashTable *visited, Transaction *tran,
7390                                      Error **errp)
7391  {
7392      BdrvChild *c;
7393      BdrvStateSetAioContext *state;
7394  
7395      GLOBAL_STATE_CODE();
7396  
7397      if (bdrv_get_aio_context(bs) == ctx) {
7398          return true;
7399      }
7400  
7401      QLIST_FOREACH(c, &bs->parents, next_parent) {
7402          if (!bdrv_parent_change_aio_context(c, ctx, visited, tran, errp)) {
7403              return false;
7404          }
7405      }
7406  
7407      QLIST_FOREACH(c, &bs->children, next) {
7408          if (!bdrv_child_change_aio_context(c, ctx, visited, tran, errp)) {
7409              return false;
7410          }
7411      }
7412  
7413      state = g_new(BdrvStateSetAioContext, 1);
7414      *state = (BdrvStateSetAioContext) {
7415          .new_ctx = ctx,
7416          .bs = bs,
7417      };
7418  
7419      /* Paired with bdrv_drained_end in bdrv_set_aio_context_clean() */
7420      bdrv_drained_begin(bs);
7421  
7422      tran_add(tran, &set_aio_context, state);
7423  
7424      return true;
7425  }
7426  
7427  /*
7428   * Change bs's and recursively all of its parents' and children's AioContext
7429   * to the given new context, returning an error if that isn't possible.
7430   *
7431   * If ignore_child is not NULL, that child (and its subgraph) will not
7432   * be touched.
7433   *
7434   * This function still requires the caller to take the bs current
7435   * AioContext lock, otherwise draining will fail since AIO_WAIT_WHILE
7436   * assumes the lock is always held if bs is in another AioContext.
7437   * For the same reason, it temporarily also holds the new AioContext, since
7438   * bdrv_drained_end calls BDRV_POLL_WHILE that assumes the lock is taken too.
7439   * Therefore the new AioContext lock must not be taken by the caller.
7440   */
7441  int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
7442                                  BdrvChild *ignore_child, Error **errp)
7443  {
7444      Transaction *tran;
7445      GHashTable *visited;
7446      int ret;
7447      AioContext *old_context = bdrv_get_aio_context(bs);
7448      GLOBAL_STATE_CODE();
7449  
7450      /*
7451       * Recursion phase: go through all nodes of the graph.
7452       * Take care of checking that all nodes support changing AioContext
7453       * and drain them, builing a linear list of callbacks to run if everything
7454       * is successful (the transaction itself).
7455       */
7456      tran = tran_new();
7457      visited = g_hash_table_new(NULL, NULL);
7458      if (ignore_child) {
7459          g_hash_table_add(visited, ignore_child);
7460      }
7461      ret = bdrv_change_aio_context(bs, ctx, visited, tran, errp);
7462      g_hash_table_destroy(visited);
7463  
7464      /*
7465       * Linear phase: go through all callbacks collected in the transaction.
7466       * Run all callbacks collected in the recursion to switch all nodes
7467       * AioContext lock (transaction commit), or undo all changes done in the
7468       * recursion (transaction abort).
7469       */
7470  
7471      if (!ret) {
7472          /* Just run clean() callbacks. No AioContext changed. */
7473          tran_abort(tran);
7474          return -EPERM;
7475      }
7476  
7477      /*
7478       * Release old AioContext, it won't be needed anymore, as all
7479       * bdrv_drained_begin() have been called already.
7480       */
7481      if (qemu_get_aio_context() != old_context) {
7482          aio_context_release(old_context);
7483      }
7484  
7485      /*
7486       * Acquire new AioContext since bdrv_drained_end() is going to be called
7487       * after we switched all nodes in the new AioContext, and the function
7488       * assumes that the lock of the bs is always taken.
7489       */
7490      if (qemu_get_aio_context() != ctx) {
7491          aio_context_acquire(ctx);
7492      }
7493  
7494      tran_commit(tran);
7495  
7496      if (qemu_get_aio_context() != ctx) {
7497          aio_context_release(ctx);
7498      }
7499  
7500      /* Re-acquire the old AioContext, since the caller takes and releases it. */
7501      if (qemu_get_aio_context() != old_context) {
7502          aio_context_acquire(old_context);
7503      }
7504  
7505      return 0;
7506  }
7507  
7508  void bdrv_add_aio_context_notifier(BlockDriverState *bs,
7509          void (*attached_aio_context)(AioContext *new_context, void *opaque),
7510          void (*detach_aio_context)(void *opaque), void *opaque)
7511  {
7512      BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
7513      *ban = (BdrvAioNotifier){
7514          .attached_aio_context = attached_aio_context,
7515          .detach_aio_context   = detach_aio_context,
7516          .opaque               = opaque
7517      };
7518      GLOBAL_STATE_CODE();
7519  
7520      QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
7521  }
7522  
7523  void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
7524                                        void (*attached_aio_context)(AioContext *,
7525                                                                     void *),
7526                                        void (*detach_aio_context)(void *),
7527                                        void *opaque)
7528  {
7529      BdrvAioNotifier *ban, *ban_next;
7530      GLOBAL_STATE_CODE();
7531  
7532      QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
7533          if (ban->attached_aio_context == attached_aio_context &&
7534              ban->detach_aio_context   == detach_aio_context   &&
7535              ban->opaque               == opaque               &&
7536              ban->deleted              == false)
7537          {
7538              if (bs->walking_aio_notifiers) {
7539                  ban->deleted = true;
7540              } else {
7541                  bdrv_do_remove_aio_context_notifier(ban);
7542              }
7543              return;
7544          }
7545      }
7546  
7547      abort();
7548  }
7549  
7550  int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
7551                         BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
7552                         bool force,
7553                         Error **errp)
7554  {
7555      GLOBAL_STATE_CODE();
7556      if (!bs->drv) {
7557          error_setg(errp, "Node is ejected");
7558          return -ENOMEDIUM;
7559      }
7560      if (!bs->drv->bdrv_amend_options) {
7561          error_setg(errp, "Block driver '%s' does not support option amendment",
7562                     bs->drv->format_name);
7563          return -ENOTSUP;
7564      }
7565      return bs->drv->bdrv_amend_options(bs, opts, status_cb,
7566                                         cb_opaque, force, errp);
7567  }
7568  
7569  /*
7570   * This function checks whether the given @to_replace is allowed to be
7571   * replaced by a node that always shows the same data as @bs.  This is
7572   * used for example to verify whether the mirror job can replace
7573   * @to_replace by the target mirrored from @bs.
7574   * To be replaceable, @bs and @to_replace may either be guaranteed to
7575   * always show the same data (because they are only connected through
7576   * filters), or some driver may allow replacing one of its children
7577   * because it can guarantee that this child's data is not visible at
7578   * all (for example, for dissenting quorum children that have no other
7579   * parents).
7580   */
7581  bool bdrv_recurse_can_replace(BlockDriverState *bs,
7582                                BlockDriverState *to_replace)
7583  {
7584      BlockDriverState *filtered;
7585  
7586      GLOBAL_STATE_CODE();
7587  
7588      if (!bs || !bs->drv) {
7589          return false;
7590      }
7591  
7592      if (bs == to_replace) {
7593          return true;
7594      }
7595  
7596      /* See what the driver can do */
7597      if (bs->drv->bdrv_recurse_can_replace) {
7598          return bs->drv->bdrv_recurse_can_replace(bs, to_replace);
7599      }
7600  
7601      /* For filters without an own implementation, we can recurse on our own */
7602      filtered = bdrv_filter_bs(bs);
7603      if (filtered) {
7604          return bdrv_recurse_can_replace(filtered, to_replace);
7605      }
7606  
7607      /* Safe default */
7608      return false;
7609  }
7610  
7611  /*
7612   * Check whether the given @node_name can be replaced by a node that
7613   * has the same data as @parent_bs.  If so, return @node_name's BDS;
7614   * NULL otherwise.
7615   *
7616   * @node_name must be a (recursive) *child of @parent_bs (or this
7617   * function will return NULL).
7618   *
7619   * The result (whether the node can be replaced or not) is only valid
7620   * for as long as no graph or permission changes occur.
7621   */
7622  BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
7623                                          const char *node_name, Error **errp)
7624  {
7625      BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
7626      AioContext *aio_context;
7627  
7628      GLOBAL_STATE_CODE();
7629  
7630      if (!to_replace_bs) {
7631          error_setg(errp, "Failed to find node with node-name='%s'", node_name);
7632          return NULL;
7633      }
7634  
7635      aio_context = bdrv_get_aio_context(to_replace_bs);
7636      aio_context_acquire(aio_context);
7637  
7638      if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
7639          to_replace_bs = NULL;
7640          goto out;
7641      }
7642  
7643      /* We don't want arbitrary node of the BDS chain to be replaced only the top
7644       * most non filter in order to prevent data corruption.
7645       * Another benefit is that this tests exclude backing files which are
7646       * blocked by the backing blockers.
7647       */
7648      if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) {
7649          error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', "
7650                     "because it cannot be guaranteed that doing so would not "
7651                     "lead to an abrupt change of visible data",
7652                     node_name, parent_bs->node_name);
7653          to_replace_bs = NULL;
7654          goto out;
7655      }
7656  
7657  out:
7658      aio_context_release(aio_context);
7659      return to_replace_bs;
7660  }
7661  
7662  /**
7663   * Iterates through the list of runtime option keys that are said to
7664   * be "strong" for a BDS.  An option is called "strong" if it changes
7665   * a BDS's data.  For example, the null block driver's "size" and
7666   * "read-zeroes" options are strong, but its "latency-ns" option is
7667   * not.
7668   *
7669   * If a key returned by this function ends with a dot, all options
7670   * starting with that prefix are strong.
7671   */
7672  static const char *const *strong_options(BlockDriverState *bs,
7673                                           const char *const *curopt)
7674  {
7675      static const char *const global_options[] = {
7676          "driver", "filename", NULL
7677      };
7678  
7679      if (!curopt) {
7680          return &global_options[0];
7681      }
7682  
7683      curopt++;
7684      if (curopt == &global_options[ARRAY_SIZE(global_options) - 1] && bs->drv) {
7685          curopt = bs->drv->strong_runtime_opts;
7686      }
7687  
7688      return (curopt && *curopt) ? curopt : NULL;
7689  }
7690  
7691  /**
7692   * Copies all strong runtime options from bs->options to the given
7693   * QDict.  The set of strong option keys is determined by invoking
7694   * strong_options().
7695   *
7696   * Returns true iff any strong option was present in bs->options (and
7697   * thus copied to the target QDict) with the exception of "filename"
7698   * and "driver".  The caller is expected to use this value to decide
7699   * whether the existence of strong options prevents the generation of
7700   * a plain filename.
7701   */
7702  static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs)
7703  {
7704      bool found_any = false;
7705      const char *const *option_name = NULL;
7706  
7707      if (!bs->drv) {
7708          return false;
7709      }
7710  
7711      while ((option_name = strong_options(bs, option_name))) {
7712          bool option_given = false;
7713  
7714          assert(strlen(*option_name) > 0);
7715          if ((*option_name)[strlen(*option_name) - 1] != '.') {
7716              QObject *entry = qdict_get(bs->options, *option_name);
7717              if (!entry) {
7718                  continue;
7719              }
7720  
7721              qdict_put_obj(d, *option_name, qobject_ref(entry));
7722              option_given = true;
7723          } else {
7724              const QDictEntry *entry;
7725              for (entry = qdict_first(bs->options); entry;
7726                   entry = qdict_next(bs->options, entry))
7727              {
7728                  if (strstart(qdict_entry_key(entry), *option_name, NULL)) {
7729                      qdict_put_obj(d, qdict_entry_key(entry),
7730                                    qobject_ref(qdict_entry_value(entry)));
7731                      option_given = true;
7732                  }
7733              }
7734          }
7735  
7736          /* While "driver" and "filename" need to be included in a JSON filename,
7737           * their existence does not prohibit generation of a plain filename. */
7738          if (!found_any && option_given &&
7739              strcmp(*option_name, "driver") && strcmp(*option_name, "filename"))
7740          {
7741              found_any = true;
7742          }
7743      }
7744  
7745      if (!qdict_haskey(d, "driver")) {
7746          /* Drivers created with bdrv_new_open_driver() may not have a
7747           * @driver option.  Add it here. */
7748          qdict_put_str(d, "driver", bs->drv->format_name);
7749      }
7750  
7751      return found_any;
7752  }
7753  
7754  /* Note: This function may return false positives; it may return true
7755   * even if opening the backing file specified by bs's image header
7756   * would result in exactly bs->backing. */
7757  static bool bdrv_backing_overridden(BlockDriverState *bs)
7758  {
7759      GLOBAL_STATE_CODE();
7760      if (bs->backing) {
7761          return strcmp(bs->auto_backing_file,
7762                        bs->backing->bs->filename);
7763      } else {
7764          /* No backing BDS, so if the image header reports any backing
7765           * file, it must have been suppressed */
7766          return bs->auto_backing_file[0] != '\0';
7767      }
7768  }
7769  
7770  /* Updates the following BDS fields:
7771   *  - exact_filename: A filename which may be used for opening a block device
7772   *                    which (mostly) equals the given BDS (even without any
7773   *                    other options; so reading and writing must return the same
7774   *                    results, but caching etc. may be different)
7775   *  - full_open_options: Options which, when given when opening a block device
7776   *                       (without a filename), result in a BDS (mostly)
7777   *                       equalling the given one
7778   *  - filename: If exact_filename is set, it is copied here. Otherwise,
7779   *              full_open_options is converted to a JSON object, prefixed with
7780   *              "json:" (for use through the JSON pseudo protocol) and put here.
7781   */
7782  void bdrv_refresh_filename(BlockDriverState *bs)
7783  {
7784      BlockDriver *drv = bs->drv;
7785      BdrvChild *child;
7786      BlockDriverState *primary_child_bs;
7787      QDict *opts;
7788      bool backing_overridden;
7789      bool generate_json_filename; /* Whether our default implementation should
7790                                      fill exact_filename (false) or not (true) */
7791  
7792      GLOBAL_STATE_CODE();
7793  
7794      if (!drv) {
7795          return;
7796      }
7797  
7798      /* This BDS's file name may depend on any of its children's file names, so
7799       * refresh those first */
7800      QLIST_FOREACH(child, &bs->children, next) {
7801          bdrv_refresh_filename(child->bs);
7802      }
7803  
7804      if (bs->implicit) {
7805          /* For implicit nodes, just copy everything from the single child */
7806          child = QLIST_FIRST(&bs->children);
7807          assert(QLIST_NEXT(child, next) == NULL);
7808  
7809          pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
7810                  child->bs->exact_filename);
7811          pstrcpy(bs->filename, sizeof(bs->filename), child->bs->filename);
7812  
7813          qobject_unref(bs->full_open_options);
7814          bs->full_open_options = qobject_ref(child->bs->full_open_options);
7815  
7816          return;
7817      }
7818  
7819      backing_overridden = bdrv_backing_overridden(bs);
7820  
7821      if (bs->open_flags & BDRV_O_NO_IO) {
7822          /* Without I/O, the backing file does not change anything.
7823           * Therefore, in such a case (primarily qemu-img), we can
7824           * pretend the backing file has not been overridden even if
7825           * it technically has been. */
7826          backing_overridden = false;
7827      }
7828  
7829      /* Gather the options QDict */
7830      opts = qdict_new();
7831      generate_json_filename = append_strong_runtime_options(opts, bs);
7832      generate_json_filename |= backing_overridden;
7833  
7834      if (drv->bdrv_gather_child_options) {
7835          /* Some block drivers may not want to present all of their children's
7836           * options, or name them differently from BdrvChild.name */
7837          drv->bdrv_gather_child_options(bs, opts, backing_overridden);
7838      } else {
7839          QLIST_FOREACH(child, &bs->children, next) {
7840              if (child == bs->backing && !backing_overridden) {
7841                  /* We can skip the backing BDS if it has not been overridden */
7842                  continue;
7843              }
7844  
7845              qdict_put(opts, child->name,
7846                        qobject_ref(child->bs->full_open_options));
7847          }
7848  
7849          if (backing_overridden && !bs->backing) {
7850              /* Force no backing file */
7851              qdict_put_null(opts, "backing");
7852          }
7853      }
7854  
7855      qobject_unref(bs->full_open_options);
7856      bs->full_open_options = opts;
7857  
7858      primary_child_bs = bdrv_primary_bs(bs);
7859  
7860      if (drv->bdrv_refresh_filename) {
7861          /* Obsolete information is of no use here, so drop the old file name
7862           * information before refreshing it */
7863          bs->exact_filename[0] = '\0';
7864  
7865          drv->bdrv_refresh_filename(bs);
7866      } else if (primary_child_bs) {
7867          /*
7868           * Try to reconstruct valid information from the underlying
7869           * file -- this only works for format nodes (filter nodes
7870           * cannot be probed and as such must be selected by the user
7871           * either through an options dict, or through a special
7872           * filename which the filter driver must construct in its
7873           * .bdrv_refresh_filename() implementation).
7874           */
7875  
7876          bs->exact_filename[0] = '\0';
7877  
7878          /*
7879           * We can use the underlying file's filename if:
7880           * - it has a filename,
7881           * - the current BDS is not a filter,
7882           * - the file is a protocol BDS, and
7883           * - opening that file (as this BDS's format) will automatically create
7884           *   the BDS tree we have right now, that is:
7885           *   - the user did not significantly change this BDS's behavior with
7886           *     some explicit (strong) options
7887           *   - no non-file child of this BDS has been overridden by the user
7888           *   Both of these conditions are represented by generate_json_filename.
7889           */
7890          if (primary_child_bs->exact_filename[0] &&
7891              primary_child_bs->drv->bdrv_file_open &&
7892              !drv->is_filter && !generate_json_filename)
7893          {
7894              strcpy(bs->exact_filename, primary_child_bs->exact_filename);
7895          }
7896      }
7897  
7898      if (bs->exact_filename[0]) {
7899          pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
7900      } else {
7901          GString *json = qobject_to_json(QOBJECT(bs->full_open_options));
7902          if (snprintf(bs->filename, sizeof(bs->filename), "json:%s",
7903                       json->str) >= sizeof(bs->filename)) {
7904              /* Give user a hint if we truncated things. */
7905              strcpy(bs->filename + sizeof(bs->filename) - 4, "...");
7906          }
7907          g_string_free(json, true);
7908      }
7909  }
7910  
7911  char *bdrv_dirname(BlockDriverState *bs, Error **errp)
7912  {
7913      BlockDriver *drv = bs->drv;
7914      BlockDriverState *child_bs;
7915  
7916      GLOBAL_STATE_CODE();
7917  
7918      if (!drv) {
7919          error_setg(errp, "Node '%s' is ejected", bs->node_name);
7920          return NULL;
7921      }
7922  
7923      if (drv->bdrv_dirname) {
7924          return drv->bdrv_dirname(bs, errp);
7925      }
7926  
7927      child_bs = bdrv_primary_bs(bs);
7928      if (child_bs) {
7929          return bdrv_dirname(child_bs, errp);
7930      }
7931  
7932      bdrv_refresh_filename(bs);
7933      if (bs->exact_filename[0] != '\0') {
7934          return path_combine(bs->exact_filename, "");
7935      }
7936  
7937      error_setg(errp, "Cannot generate a base directory for %s nodes",
7938                 drv->format_name);
7939      return NULL;
7940  }
7941  
7942  /*
7943   * Hot add/remove a BDS's child. So the user can take a child offline when
7944   * it is broken and take a new child online
7945   */
7946  void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
7947                      Error **errp)
7948  {
7949      GLOBAL_STATE_CODE();
7950      if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
7951          error_setg(errp, "The node %s does not support adding a child",
7952                     bdrv_get_device_or_node_name(parent_bs));
7953          return;
7954      }
7955  
7956      if (!QLIST_EMPTY(&child_bs->parents)) {
7957          error_setg(errp, "The node %s already has a parent",
7958                     child_bs->node_name);
7959          return;
7960      }
7961  
7962      parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp);
7963  }
7964  
7965  void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
7966  {
7967      BdrvChild *tmp;
7968  
7969      GLOBAL_STATE_CODE();
7970      if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
7971          error_setg(errp, "The node %s does not support removing a child",
7972                     bdrv_get_device_or_node_name(parent_bs));
7973          return;
7974      }
7975  
7976      QLIST_FOREACH(tmp, &parent_bs->children, next) {
7977          if (tmp == child) {
7978              break;
7979          }
7980      }
7981  
7982      if (!tmp) {
7983          error_setg(errp, "The node %s does not have a child named %s",
7984                     bdrv_get_device_or_node_name(parent_bs),
7985                     bdrv_get_device_or_node_name(child->bs));
7986          return;
7987      }
7988  
7989      parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
7990  }
7991  
7992  int bdrv_make_empty(BdrvChild *c, Error **errp)
7993  {
7994      BlockDriver *drv = c->bs->drv;
7995      int ret;
7996  
7997      GLOBAL_STATE_CODE();
7998      assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED));
7999  
8000      if (!drv->bdrv_make_empty) {
8001          error_setg(errp, "%s does not support emptying nodes",
8002                     drv->format_name);
8003          return -ENOTSUP;
8004      }
8005  
8006      ret = drv->bdrv_make_empty(c->bs);
8007      if (ret < 0) {
8008          error_setg_errno(errp, -ret, "Failed to empty %s",
8009                           c->bs->filename);
8010          return ret;
8011      }
8012  
8013      return 0;
8014  }
8015  
8016  /*
8017   * Return the child that @bs acts as an overlay for, and from which data may be
8018   * copied in COW or COR operations.  Usually this is the backing file.
8019   */
8020  BdrvChild *bdrv_cow_child(BlockDriverState *bs)
8021  {
8022      IO_CODE();
8023  
8024      if (!bs || !bs->drv) {
8025          return NULL;
8026      }
8027  
8028      if (bs->drv->is_filter) {
8029          return NULL;
8030      }
8031  
8032      if (!bs->backing) {
8033          return NULL;
8034      }
8035  
8036      assert(bs->backing->role & BDRV_CHILD_COW);
8037      return bs->backing;
8038  }
8039  
8040  /*
8041   * If @bs acts as a filter for exactly one of its children, return
8042   * that child.
8043   */
8044  BdrvChild *bdrv_filter_child(BlockDriverState *bs)
8045  {
8046      BdrvChild *c;
8047      IO_CODE();
8048  
8049      if (!bs || !bs->drv) {
8050          return NULL;
8051      }
8052  
8053      if (!bs->drv->is_filter) {
8054          return NULL;
8055      }
8056  
8057      /* Only one of @backing or @file may be used */
8058      assert(!(bs->backing && bs->file));
8059  
8060      c = bs->backing ?: bs->file;
8061      if (!c) {
8062          return NULL;
8063      }
8064  
8065      assert(c->role & BDRV_CHILD_FILTERED);
8066      return c;
8067  }
8068  
8069  /*
8070   * Return either the result of bdrv_cow_child() or bdrv_filter_child(),
8071   * whichever is non-NULL.
8072   *
8073   * Return NULL if both are NULL.
8074   */
8075  BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs)
8076  {
8077      BdrvChild *cow_child = bdrv_cow_child(bs);
8078      BdrvChild *filter_child = bdrv_filter_child(bs);
8079      IO_CODE();
8080  
8081      /* Filter nodes cannot have COW backing files */
8082      assert(!(cow_child && filter_child));
8083  
8084      return cow_child ?: filter_child;
8085  }
8086  
8087  /*
8088   * Return the primary child of this node: For filters, that is the
8089   * filtered child.  For other nodes, that is usually the child storing
8090   * metadata.
8091   * (A generally more helpful description is that this is (usually) the
8092   * child that has the same filename as @bs.)
8093   *
8094   * Drivers do not necessarily have a primary child; for example quorum
8095   * does not.
8096   */
8097  BdrvChild *bdrv_primary_child(BlockDriverState *bs)
8098  {
8099      BdrvChild *c, *found = NULL;
8100      IO_CODE();
8101  
8102      QLIST_FOREACH(c, &bs->children, next) {
8103          if (c->role & BDRV_CHILD_PRIMARY) {
8104              assert(!found);
8105              found = c;
8106          }
8107      }
8108  
8109      return found;
8110  }
8111  
8112  static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs,
8113                                                bool stop_on_explicit_filter)
8114  {
8115      BdrvChild *c;
8116  
8117      if (!bs) {
8118          return NULL;
8119      }
8120  
8121      while (!(stop_on_explicit_filter && !bs->implicit)) {
8122          c = bdrv_filter_child(bs);
8123          if (!c) {
8124              /*
8125               * A filter that is embedded in a working block graph must
8126               * have a child.  Assert this here so this function does
8127               * not return a filter node that is not expected by the
8128               * caller.
8129               */
8130              assert(!bs->drv || !bs->drv->is_filter);
8131              break;
8132          }
8133          bs = c->bs;
8134      }
8135      /*
8136       * Note that this treats nodes with bs->drv == NULL as not being
8137       * filters (bs->drv == NULL should be replaced by something else
8138       * anyway).
8139       * The advantage of this behavior is that this function will thus
8140       * always return a non-NULL value (given a non-NULL @bs).
8141       */
8142  
8143      return bs;
8144  }
8145  
8146  /*
8147   * Return the first BDS that has not been added implicitly or that
8148   * does not have a filtered child down the chain starting from @bs
8149   * (including @bs itself).
8150   */
8151  BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs)
8152  {
8153      GLOBAL_STATE_CODE();
8154      return bdrv_do_skip_filters(bs, true);
8155  }
8156  
8157  /*
8158   * Return the first BDS that does not have a filtered child down the
8159   * chain starting from @bs (including @bs itself).
8160   */
8161  BlockDriverState *bdrv_skip_filters(BlockDriverState *bs)
8162  {
8163      IO_CODE();
8164      return bdrv_do_skip_filters(bs, false);
8165  }
8166  
8167  /*
8168   * For a backing chain, return the first non-filter backing image of
8169   * the first non-filter image.
8170   */
8171  BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs)
8172  {
8173      IO_CODE();
8174      return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs)));
8175  }
8176  
8177  /**
8178   * Check whether [offset, offset + bytes) overlaps with the cached
8179   * block-status data region.
8180   *
8181   * If so, and @pnum is not NULL, set *pnum to `bsc.data_end - offset`,
8182   * which is what bdrv_bsc_is_data()'s interface needs.
8183   * Otherwise, *pnum is not touched.
8184   */
8185  static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs,
8186                                             int64_t offset, int64_t bytes,
8187                                             int64_t *pnum)
8188  {
8189      BdrvBlockStatusCache *bsc = qatomic_rcu_read(&bs->block_status_cache);
8190      bool overlaps;
8191  
8192      overlaps =
8193          qatomic_read(&bsc->valid) &&
8194          ranges_overlap(offset, bytes, bsc->data_start,
8195                         bsc->data_end - bsc->data_start);
8196  
8197      if (overlaps && pnum) {
8198          *pnum = bsc->data_end - offset;
8199      }
8200  
8201      return overlaps;
8202  }
8203  
8204  /**
8205   * See block_int.h for this function's documentation.
8206   */
8207  bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum)
8208  {
8209      IO_CODE();
8210      RCU_READ_LOCK_GUARD();
8211      return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum);
8212  }
8213  
8214  /**
8215   * See block_int.h for this function's documentation.
8216   */
8217  void bdrv_bsc_invalidate_range(BlockDriverState *bs,
8218                                 int64_t offset, int64_t bytes)
8219  {
8220      IO_CODE();
8221      RCU_READ_LOCK_GUARD();
8222  
8223      if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) {
8224          qatomic_set(&bs->block_status_cache->valid, false);
8225      }
8226  }
8227  
8228  /**
8229   * See block_int.h for this function's documentation.
8230   */
8231  void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes)
8232  {
8233      BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1);
8234      BdrvBlockStatusCache *old_bsc;
8235      IO_CODE();
8236  
8237      *new_bsc = (BdrvBlockStatusCache) {
8238          .valid = true,
8239          .data_start = offset,
8240          .data_end = offset + bytes,
8241      };
8242  
8243      QEMU_LOCK_GUARD(&bs->bsc_modify_lock);
8244  
8245      old_bsc = qatomic_rcu_read(&bs->block_status_cache);
8246      qatomic_rcu_set(&bs->block_status_cache, new_bsc);
8247      if (old_bsc) {
8248          g_free_rcu(old_bsc, rcu);
8249      }
8250  }
8251