xref: /openbmc/qemu/block.c (revision dcc99bd833840c6c4e909d391df17b71e47dea62)
1  /*
2   * QEMU System Emulator block driver
3   *
4   * Copyright (c) 2003 Fabrice Bellard
5   * Copyright (c) 2020 Virtuozzo International GmbH.
6   *
7   * Permission is hereby granted, free of charge, to any person obtaining a copy
8   * of this software and associated documentation files (the "Software"), to deal
9   * in the Software without restriction, including without limitation the rights
10   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11   * copies of the Software, and to permit persons to whom the Software is
12   * furnished to do so, subject to the following conditions:
13   *
14   * The above copyright notice and this permission notice shall be included in
15   * all copies or substantial portions of the Software.
16   *
17   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23   * THE SOFTWARE.
24   */
25  
26  #include "qemu/osdep.h"
27  #include "block/trace.h"
28  #include "block/block_int.h"
29  #include "block/blockjob.h"
30  #include "block/fuse.h"
31  #include "block/nbd.h"
32  #include "block/qdict.h"
33  #include "qemu/error-report.h"
34  #include "block/module_block.h"
35  #include "qemu/main-loop.h"
36  #include "qemu/module.h"
37  #include "qapi/error.h"
38  #include "qapi/qmp/qdict.h"
39  #include "qapi/qmp/qjson.h"
40  #include "qapi/qmp/qnull.h"
41  #include "qapi/qmp/qstring.h"
42  #include "qapi/qobject-output-visitor.h"
43  #include "qapi/qapi-visit-block-core.h"
44  #include "sysemu/block-backend.h"
45  #include "qemu/notify.h"
46  #include "qemu/option.h"
47  #include "qemu/coroutine.h"
48  #include "block/qapi.h"
49  #include "qemu/timer.h"
50  #include "qemu/cutils.h"
51  #include "qemu/id.h"
52  #include "qemu/range.h"
53  #include "qemu/rcu.h"
54  #include "block/coroutines.h"
55  
56  #ifdef CONFIG_BSD
57  #include <sys/ioctl.h>
58  #include <sys/queue.h>
59  #if defined(HAVE_SYS_DISK_H)
60  #include <sys/disk.h>
61  #endif
62  #endif
63  
64  #ifdef _WIN32
65  #include <windows.h>
66  #endif
67  
68  #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
69  
70  static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
71      QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
72  
73  static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
74      QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
75  
76  static QLIST_HEAD(, BlockDriver) bdrv_drivers =
77      QLIST_HEAD_INITIALIZER(bdrv_drivers);
78  
79  static BlockDriverState *bdrv_open_inherit(const char *filename,
80                                             const char *reference,
81                                             QDict *options, int flags,
82                                             BlockDriverState *parent,
83                                             const BdrvChildClass *child_class,
84                                             BdrvChildRole child_role,
85                                             Error **errp);
86  
87  static void bdrv_replace_child_noperm(BdrvChild *child,
88                                        BlockDriverState *new_bs);
89  static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
90                                                BdrvChild *child,
91                                                Transaction *tran);
92  static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
93                                              Transaction *tran);
94  
95  static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
96                                 BlockReopenQueue *queue,
97                                 Transaction *change_child_tran, Error **errp);
98  static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
99  static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
100  
101  /* If non-zero, use only whitelisted block drivers */
102  static int use_bdrv_whitelist;
103  
104  #ifdef _WIN32
105  static int is_windows_drive_prefix(const char *filename)
106  {
107      return (((filename[0] >= 'a' && filename[0] <= 'z') ||
108               (filename[0] >= 'A' && filename[0] <= 'Z')) &&
109              filename[1] == ':');
110  }
111  
112  int is_windows_drive(const char *filename)
113  {
114      if (is_windows_drive_prefix(filename) &&
115          filename[2] == '\0')
116          return 1;
117      if (strstart(filename, "\\\\.\\", NULL) ||
118          strstart(filename, "//./", NULL))
119          return 1;
120      return 0;
121  }
122  #endif
123  
124  size_t bdrv_opt_mem_align(BlockDriverState *bs)
125  {
126      if (!bs || !bs->drv) {
127          /* page size or 4k (hdd sector size) should be on the safe side */
128          return MAX(4096, qemu_real_host_page_size);
129      }
130  
131      return bs->bl.opt_mem_alignment;
132  }
133  
134  size_t bdrv_min_mem_align(BlockDriverState *bs)
135  {
136      if (!bs || !bs->drv) {
137          /* page size or 4k (hdd sector size) should be on the safe side */
138          return MAX(4096, qemu_real_host_page_size);
139      }
140  
141      return bs->bl.min_mem_alignment;
142  }
143  
144  /* check if the path starts with "<protocol>:" */
145  int path_has_protocol(const char *path)
146  {
147      const char *p;
148  
149  #ifdef _WIN32
150      if (is_windows_drive(path) ||
151          is_windows_drive_prefix(path)) {
152          return 0;
153      }
154      p = path + strcspn(path, ":/\\");
155  #else
156      p = path + strcspn(path, ":/");
157  #endif
158  
159      return *p == ':';
160  }
161  
162  int path_is_absolute(const char *path)
163  {
164  #ifdef _WIN32
165      /* specific case for names like: "\\.\d:" */
166      if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
167          return 1;
168      }
169      return (*path == '/' || *path == '\\');
170  #else
171      return (*path == '/');
172  #endif
173  }
174  
175  /* if filename is absolute, just return its duplicate. Otherwise, build a
176     path to it by considering it is relative to base_path. URL are
177     supported. */
178  char *path_combine(const char *base_path, const char *filename)
179  {
180      const char *protocol_stripped = NULL;
181      const char *p, *p1;
182      char *result;
183      int len;
184  
185      if (path_is_absolute(filename)) {
186          return g_strdup(filename);
187      }
188  
189      if (path_has_protocol(base_path)) {
190          protocol_stripped = strchr(base_path, ':');
191          if (protocol_stripped) {
192              protocol_stripped++;
193          }
194      }
195      p = protocol_stripped ?: base_path;
196  
197      p1 = strrchr(base_path, '/');
198  #ifdef _WIN32
199      {
200          const char *p2;
201          p2 = strrchr(base_path, '\\');
202          if (!p1 || p2 > p1) {
203              p1 = p2;
204          }
205      }
206  #endif
207      if (p1) {
208          p1++;
209      } else {
210          p1 = base_path;
211      }
212      if (p1 > p) {
213          p = p1;
214      }
215      len = p - base_path;
216  
217      result = g_malloc(len + strlen(filename) + 1);
218      memcpy(result, base_path, len);
219      strcpy(result + len, filename);
220  
221      return result;
222  }
223  
224  /*
225   * Helper function for bdrv_parse_filename() implementations to remove optional
226   * protocol prefixes (especially "file:") from a filename and for putting the
227   * stripped filename into the options QDict if there is such a prefix.
228   */
229  void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
230                                        QDict *options)
231  {
232      if (strstart(filename, prefix, &filename)) {
233          /* Stripping the explicit protocol prefix may result in a protocol
234           * prefix being (wrongly) detected (if the filename contains a colon) */
235          if (path_has_protocol(filename)) {
236              GString *fat_filename;
237  
238              /* This means there is some colon before the first slash; therefore,
239               * this cannot be an absolute path */
240              assert(!path_is_absolute(filename));
241  
242              /* And we can thus fix the protocol detection issue by prefixing it
243               * by "./" */
244              fat_filename = g_string_new("./");
245              g_string_append(fat_filename, filename);
246  
247              assert(!path_has_protocol(fat_filename->str));
248  
249              qdict_put(options, "filename",
250                        qstring_from_gstring(fat_filename));
251          } else {
252              /* If no protocol prefix was detected, we can use the shortened
253               * filename as-is */
254              qdict_put_str(options, "filename", filename);
255          }
256      }
257  }
258  
259  
260  /* Returns whether the image file is opened as read-only. Note that this can
261   * return false and writing to the image file is still not possible because the
262   * image is inactivated. */
263  bool bdrv_is_read_only(BlockDriverState *bs)
264  {
265      return !(bs->open_flags & BDRV_O_RDWR);
266  }
267  
268  int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
269                             bool ignore_allow_rdw, Error **errp)
270  {
271      /* Do not set read_only if copy_on_read is enabled */
272      if (bs->copy_on_read && read_only) {
273          error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
274                     bdrv_get_device_or_node_name(bs));
275          return -EINVAL;
276      }
277  
278      /* Do not clear read_only if it is prohibited */
279      if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) &&
280          !ignore_allow_rdw)
281      {
282          error_setg(errp, "Node '%s' is read only",
283                     bdrv_get_device_or_node_name(bs));
284          return -EPERM;
285      }
286  
287      return 0;
288  }
289  
290  /*
291   * Called by a driver that can only provide a read-only image.
292   *
293   * Returns 0 if the node is already read-only or it could switch the node to
294   * read-only because BDRV_O_AUTO_RDONLY is set.
295   *
296   * Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set
297   * or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg
298   * is not NULL, it is used as the error message for the Error object.
299   */
300  int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
301                                Error **errp)
302  {
303      int ret = 0;
304  
305      if (!(bs->open_flags & BDRV_O_RDWR)) {
306          return 0;
307      }
308      if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) {
309          goto fail;
310      }
311  
312      ret = bdrv_can_set_read_only(bs, true, false, NULL);
313      if (ret < 0) {
314          goto fail;
315      }
316  
317      bs->open_flags &= ~BDRV_O_RDWR;
318  
319      return 0;
320  
321  fail:
322      error_setg(errp, "%s", errmsg ?: "Image is read-only");
323      return -EACCES;
324  }
325  
326  /*
327   * If @backing is empty, this function returns NULL without setting
328   * @errp.  In all other cases, NULL will only be returned with @errp
329   * set.
330   *
331   * Therefore, a return value of NULL without @errp set means that
332   * there is no backing file; if @errp is set, there is one but its
333   * absolute filename cannot be generated.
334   */
335  char *bdrv_get_full_backing_filename_from_filename(const char *backed,
336                                                     const char *backing,
337                                                     Error **errp)
338  {
339      if (backing[0] == '\0') {
340          return NULL;
341      } else if (path_has_protocol(backing) || path_is_absolute(backing)) {
342          return g_strdup(backing);
343      } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
344          error_setg(errp, "Cannot use relative backing file names for '%s'",
345                     backed);
346          return NULL;
347      } else {
348          return path_combine(backed, backing);
349      }
350  }
351  
352  /*
353   * If @filename is empty or NULL, this function returns NULL without
354   * setting @errp.  In all other cases, NULL will only be returned with
355   * @errp set.
356   */
357  static char *bdrv_make_absolute_filename(BlockDriverState *relative_to,
358                                           const char *filename, Error **errp)
359  {
360      char *dir, *full_name;
361  
362      if (!filename || filename[0] == '\0') {
363          return NULL;
364      } else if (path_has_protocol(filename) || path_is_absolute(filename)) {
365          return g_strdup(filename);
366      }
367  
368      dir = bdrv_dirname(relative_to, errp);
369      if (!dir) {
370          return NULL;
371      }
372  
373      full_name = g_strconcat(dir, filename, NULL);
374      g_free(dir);
375      return full_name;
376  }
377  
378  char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp)
379  {
380      return bdrv_make_absolute_filename(bs, bs->backing_file, errp);
381  }
382  
383  void bdrv_register(BlockDriver *bdrv)
384  {
385      assert(bdrv->format_name);
386      QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
387  }
388  
389  BlockDriverState *bdrv_new(void)
390  {
391      BlockDriverState *bs;
392      int i;
393  
394      bs = g_new0(BlockDriverState, 1);
395      QLIST_INIT(&bs->dirty_bitmaps);
396      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
397          QLIST_INIT(&bs->op_blockers[i]);
398      }
399      qemu_co_mutex_init(&bs->reqs_lock);
400      qemu_mutex_init(&bs->dirty_bitmap_mutex);
401      bs->refcnt = 1;
402      bs->aio_context = qemu_get_aio_context();
403  
404      qemu_co_queue_init(&bs->flush_queue);
405  
406      qemu_co_mutex_init(&bs->bsc_modify_lock);
407      bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1);
408  
409      for (i = 0; i < bdrv_drain_all_count; i++) {
410          bdrv_drained_begin(bs);
411      }
412  
413      QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
414  
415      return bs;
416  }
417  
418  static BlockDriver *bdrv_do_find_format(const char *format_name)
419  {
420      BlockDriver *drv1;
421  
422      QLIST_FOREACH(drv1, &bdrv_drivers, list) {
423          if (!strcmp(drv1->format_name, format_name)) {
424              return drv1;
425          }
426      }
427  
428      return NULL;
429  }
430  
431  BlockDriver *bdrv_find_format(const char *format_name)
432  {
433      BlockDriver *drv1;
434      int i;
435  
436      drv1 = bdrv_do_find_format(format_name);
437      if (drv1) {
438          return drv1;
439      }
440  
441      /* The driver isn't registered, maybe we need to load a module */
442      for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
443          if (!strcmp(block_driver_modules[i].format_name, format_name)) {
444              block_module_load_one(block_driver_modules[i].library_name);
445              break;
446          }
447      }
448  
449      return bdrv_do_find_format(format_name);
450  }
451  
452  static int bdrv_format_is_whitelisted(const char *format_name, bool read_only)
453  {
454      static const char *whitelist_rw[] = {
455          CONFIG_BDRV_RW_WHITELIST
456          NULL
457      };
458      static const char *whitelist_ro[] = {
459          CONFIG_BDRV_RO_WHITELIST
460          NULL
461      };
462      const char **p;
463  
464      if (!whitelist_rw[0] && !whitelist_ro[0]) {
465          return 1;               /* no whitelist, anything goes */
466      }
467  
468      for (p = whitelist_rw; *p; p++) {
469          if (!strcmp(format_name, *p)) {
470              return 1;
471          }
472      }
473      if (read_only) {
474          for (p = whitelist_ro; *p; p++) {
475              if (!strcmp(format_name, *p)) {
476                  return 1;
477              }
478          }
479      }
480      return 0;
481  }
482  
483  int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
484  {
485      return bdrv_format_is_whitelisted(drv->format_name, read_only);
486  }
487  
488  bool bdrv_uses_whitelist(void)
489  {
490      return use_bdrv_whitelist;
491  }
492  
493  typedef struct CreateCo {
494      BlockDriver *drv;
495      char *filename;
496      QemuOpts *opts;
497      int ret;
498      Error *err;
499  } CreateCo;
500  
501  static void coroutine_fn bdrv_create_co_entry(void *opaque)
502  {
503      Error *local_err = NULL;
504      int ret;
505  
506      CreateCo *cco = opaque;
507      assert(cco->drv);
508  
509      ret = cco->drv->bdrv_co_create_opts(cco->drv,
510                                          cco->filename, cco->opts, &local_err);
511      error_propagate(&cco->err, local_err);
512      cco->ret = ret;
513  }
514  
515  int bdrv_create(BlockDriver *drv, const char* filename,
516                  QemuOpts *opts, Error **errp)
517  {
518      int ret;
519  
520      Coroutine *co;
521      CreateCo cco = {
522          .drv = drv,
523          .filename = g_strdup(filename),
524          .opts = opts,
525          .ret = NOT_DONE,
526          .err = NULL,
527      };
528  
529      if (!drv->bdrv_co_create_opts) {
530          error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
531          ret = -ENOTSUP;
532          goto out;
533      }
534  
535      if (qemu_in_coroutine()) {
536          /* Fast-path if already in coroutine context */
537          bdrv_create_co_entry(&cco);
538      } else {
539          co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
540          qemu_coroutine_enter(co);
541          while (cco.ret == NOT_DONE) {
542              aio_poll(qemu_get_aio_context(), true);
543          }
544      }
545  
546      ret = cco.ret;
547      if (ret < 0) {
548          if (cco.err) {
549              error_propagate(errp, cco.err);
550          } else {
551              error_setg_errno(errp, -ret, "Could not create image");
552          }
553      }
554  
555  out:
556      g_free(cco.filename);
557      return ret;
558  }
559  
560  /**
561   * Helper function for bdrv_create_file_fallback(): Resize @blk to at
562   * least the given @minimum_size.
563   *
564   * On success, return @blk's actual length.
565   * Otherwise, return -errno.
566   */
567  static int64_t create_file_fallback_truncate(BlockBackend *blk,
568                                               int64_t minimum_size, Error **errp)
569  {
570      Error *local_err = NULL;
571      int64_t size;
572      int ret;
573  
574      ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
575                         &local_err);
576      if (ret < 0 && ret != -ENOTSUP) {
577          error_propagate(errp, local_err);
578          return ret;
579      }
580  
581      size = blk_getlength(blk);
582      if (size < 0) {
583          error_free(local_err);
584          error_setg_errno(errp, -size,
585                           "Failed to inquire the new image file's length");
586          return size;
587      }
588  
589      if (size < minimum_size) {
590          /* Need to grow the image, but we failed to do that */
591          error_propagate(errp, local_err);
592          return -ENOTSUP;
593      }
594  
595      error_free(local_err);
596      local_err = NULL;
597  
598      return size;
599  }
600  
601  /**
602   * Helper function for bdrv_create_file_fallback(): Zero the first
603   * sector to remove any potentially pre-existing image header.
604   */
605  static int create_file_fallback_zero_first_sector(BlockBackend *blk,
606                                                    int64_t current_size,
607                                                    Error **errp)
608  {
609      int64_t bytes_to_clear;
610      int ret;
611  
612      bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE);
613      if (bytes_to_clear) {
614          ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP);
615          if (ret < 0) {
616              error_setg_errno(errp, -ret,
617                               "Failed to clear the new image's first sector");
618              return ret;
619          }
620      }
621  
622      return 0;
623  }
624  
625  /**
626   * Simple implementation of bdrv_co_create_opts for protocol drivers
627   * which only support creation via opening a file
628   * (usually existing raw storage device)
629   */
630  int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
631                                              const char *filename,
632                                              QemuOpts *opts,
633                                              Error **errp)
634  {
635      BlockBackend *blk;
636      QDict *options;
637      int64_t size = 0;
638      char *buf = NULL;
639      PreallocMode prealloc;
640      Error *local_err = NULL;
641      int ret;
642  
643      size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
644      buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
645      prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
646                                 PREALLOC_MODE_OFF, &local_err);
647      g_free(buf);
648      if (local_err) {
649          error_propagate(errp, local_err);
650          return -EINVAL;
651      }
652  
653      if (prealloc != PREALLOC_MODE_OFF) {
654          error_setg(errp, "Unsupported preallocation mode '%s'",
655                     PreallocMode_str(prealloc));
656          return -ENOTSUP;
657      }
658  
659      options = qdict_new();
660      qdict_put_str(options, "driver", drv->format_name);
661  
662      blk = blk_new_open(filename, NULL, options,
663                         BDRV_O_RDWR | BDRV_O_RESIZE, errp);
664      if (!blk) {
665          error_prepend(errp, "Protocol driver '%s' does not support image "
666                        "creation, and opening the image failed: ",
667                        drv->format_name);
668          return -EINVAL;
669      }
670  
671      size = create_file_fallback_truncate(blk, size, errp);
672      if (size < 0) {
673          ret = size;
674          goto out;
675      }
676  
677      ret = create_file_fallback_zero_first_sector(blk, size, errp);
678      if (ret < 0) {
679          goto out;
680      }
681  
682      ret = 0;
683  out:
684      blk_unref(blk);
685      return ret;
686  }
687  
688  int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
689  {
690      QemuOpts *protocol_opts;
691      BlockDriver *drv;
692      QDict *qdict;
693      int ret;
694  
695      drv = bdrv_find_protocol(filename, true, errp);
696      if (drv == NULL) {
697          return -ENOENT;
698      }
699  
700      if (!drv->create_opts) {
701          error_setg(errp, "Driver '%s' does not support image creation",
702                     drv->format_name);
703          return -ENOTSUP;
704      }
705  
706      /*
707       * 'opts' contains a QemuOptsList with a combination of format and protocol
708       * default values.
709       *
710       * The format properly removes its options, but the default values remain
711       * in 'opts->list'.  So if the protocol has options with the same name
712       * (e.g. rbd has 'cluster_size' as qcow2), it will see the default values
713       * of the format, since for overlapping options, the format wins.
714       *
715       * To avoid this issue, lets convert QemuOpts to QDict, in this way we take
716       * only the set options, and then convert it back to QemuOpts, using the
717       * create_opts of the protocol. So the new QemuOpts, will contain only the
718       * protocol defaults.
719       */
720      qdict = qemu_opts_to_qdict(opts, NULL);
721      protocol_opts = qemu_opts_from_qdict(drv->create_opts, qdict, errp);
722      if (protocol_opts == NULL) {
723          ret = -EINVAL;
724          goto out;
725      }
726  
727      ret = bdrv_create(drv, filename, protocol_opts, errp);
728  out:
729      qemu_opts_del(protocol_opts);
730      qobject_unref(qdict);
731      return ret;
732  }
733  
734  int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
735  {
736      Error *local_err = NULL;
737      int ret;
738  
739      assert(bs != NULL);
740  
741      if (!bs->drv) {
742          error_setg(errp, "Block node '%s' is not opened", bs->filename);
743          return -ENOMEDIUM;
744      }
745  
746      if (!bs->drv->bdrv_co_delete_file) {
747          error_setg(errp, "Driver '%s' does not support image deletion",
748                     bs->drv->format_name);
749          return -ENOTSUP;
750      }
751  
752      ret = bs->drv->bdrv_co_delete_file(bs, &local_err);
753      if (ret < 0) {
754          error_propagate(errp, local_err);
755      }
756  
757      return ret;
758  }
759  
760  void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs)
761  {
762      Error *local_err = NULL;
763      int ret;
764  
765      if (!bs) {
766          return;
767      }
768  
769      ret = bdrv_co_delete_file(bs, &local_err);
770      /*
771       * ENOTSUP will happen if the block driver doesn't support
772       * the 'bdrv_co_delete_file' interface. This is a predictable
773       * scenario and shouldn't be reported back to the user.
774       */
775      if (ret == -ENOTSUP) {
776          error_free(local_err);
777      } else if (ret < 0) {
778          error_report_err(local_err);
779      }
780  }
781  
782  /**
783   * Try to get @bs's logical and physical block size.
784   * On success, store them in @bsz struct and return 0.
785   * On failure return -errno.
786   * @bs must not be empty.
787   */
788  int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
789  {
790      BlockDriver *drv = bs->drv;
791      BlockDriverState *filtered = bdrv_filter_bs(bs);
792  
793      if (drv && drv->bdrv_probe_blocksizes) {
794          return drv->bdrv_probe_blocksizes(bs, bsz);
795      } else if (filtered) {
796          return bdrv_probe_blocksizes(filtered, bsz);
797      }
798  
799      return -ENOTSUP;
800  }
801  
802  /**
803   * Try to get @bs's geometry (cyls, heads, sectors).
804   * On success, store them in @geo struct and return 0.
805   * On failure return -errno.
806   * @bs must not be empty.
807   */
808  int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
809  {
810      BlockDriver *drv = bs->drv;
811      BlockDriverState *filtered = bdrv_filter_bs(bs);
812  
813      if (drv && drv->bdrv_probe_geometry) {
814          return drv->bdrv_probe_geometry(bs, geo);
815      } else if (filtered) {
816          return bdrv_probe_geometry(filtered, geo);
817      }
818  
819      return -ENOTSUP;
820  }
821  
822  /*
823   * Create a uniquely-named empty temporary file.
824   * Return 0 upon success, otherwise a negative errno value.
825   */
826  int get_tmp_filename(char *filename, int size)
827  {
828  #ifdef _WIN32
829      char temp_dir[MAX_PATH];
830      /* GetTempFileName requires that its output buffer (4th param)
831         have length MAX_PATH or greater.  */
832      assert(size >= MAX_PATH);
833      return (GetTempPath(MAX_PATH, temp_dir)
834              && GetTempFileName(temp_dir, "qem", 0, filename)
835              ? 0 : -GetLastError());
836  #else
837      int fd;
838      const char *tmpdir;
839      tmpdir = getenv("TMPDIR");
840      if (!tmpdir) {
841          tmpdir = "/var/tmp";
842      }
843      if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
844          return -EOVERFLOW;
845      }
846      fd = mkstemp(filename);
847      if (fd < 0) {
848          return -errno;
849      }
850      if (close(fd) != 0) {
851          unlink(filename);
852          return -errno;
853      }
854      return 0;
855  #endif
856  }
857  
858  /*
859   * Detect host devices. By convention, /dev/cdrom[N] is always
860   * recognized as a host CDROM.
861   */
862  static BlockDriver *find_hdev_driver(const char *filename)
863  {
864      int score_max = 0, score;
865      BlockDriver *drv = NULL, *d;
866  
867      QLIST_FOREACH(d, &bdrv_drivers, list) {
868          if (d->bdrv_probe_device) {
869              score = d->bdrv_probe_device(filename);
870              if (score > score_max) {
871                  score_max = score;
872                  drv = d;
873              }
874          }
875      }
876  
877      return drv;
878  }
879  
880  static BlockDriver *bdrv_do_find_protocol(const char *protocol)
881  {
882      BlockDriver *drv1;
883  
884      QLIST_FOREACH(drv1, &bdrv_drivers, list) {
885          if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
886              return drv1;
887          }
888      }
889  
890      return NULL;
891  }
892  
893  BlockDriver *bdrv_find_protocol(const char *filename,
894                                  bool allow_protocol_prefix,
895                                  Error **errp)
896  {
897      BlockDriver *drv1;
898      char protocol[128];
899      int len;
900      const char *p;
901      int i;
902  
903      /* TODO Drivers without bdrv_file_open must be specified explicitly */
904  
905      /*
906       * XXX(hch): we really should not let host device detection
907       * override an explicit protocol specification, but moving this
908       * later breaks access to device names with colons in them.
909       * Thanks to the brain-dead persistent naming schemes on udev-
910       * based Linux systems those actually are quite common.
911       */
912      drv1 = find_hdev_driver(filename);
913      if (drv1) {
914          return drv1;
915      }
916  
917      if (!path_has_protocol(filename) || !allow_protocol_prefix) {
918          return &bdrv_file;
919      }
920  
921      p = strchr(filename, ':');
922      assert(p != NULL);
923      len = p - filename;
924      if (len > sizeof(protocol) - 1)
925          len = sizeof(protocol) - 1;
926      memcpy(protocol, filename, len);
927      protocol[len] = '\0';
928  
929      drv1 = bdrv_do_find_protocol(protocol);
930      if (drv1) {
931          return drv1;
932      }
933  
934      for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
935          if (block_driver_modules[i].protocol_name &&
936              !strcmp(block_driver_modules[i].protocol_name, protocol)) {
937              block_module_load_one(block_driver_modules[i].library_name);
938              break;
939          }
940      }
941  
942      drv1 = bdrv_do_find_protocol(protocol);
943      if (!drv1) {
944          error_setg(errp, "Unknown protocol '%s'", protocol);
945      }
946      return drv1;
947  }
948  
949  /*
950   * Guess image format by probing its contents.
951   * This is not a good idea when your image is raw (CVE-2008-2004), but
952   * we do it anyway for backward compatibility.
953   *
954   * @buf         contains the image's first @buf_size bytes.
955   * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
956   *              but can be smaller if the image file is smaller)
957   * @filename    is its filename.
958   *
959   * For all block drivers, call the bdrv_probe() method to get its
960   * probing score.
961   * Return the first block driver with the highest probing score.
962   */
963  BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
964                              const char *filename)
965  {
966      int score_max = 0, score;
967      BlockDriver *drv = NULL, *d;
968  
969      QLIST_FOREACH(d, &bdrv_drivers, list) {
970          if (d->bdrv_probe) {
971              score = d->bdrv_probe(buf, buf_size, filename);
972              if (score > score_max) {
973                  score_max = score;
974                  drv = d;
975              }
976          }
977      }
978  
979      return drv;
980  }
981  
982  static int find_image_format(BlockBackend *file, const char *filename,
983                               BlockDriver **pdrv, Error **errp)
984  {
985      BlockDriver *drv;
986      uint8_t buf[BLOCK_PROBE_BUF_SIZE];
987      int ret = 0;
988  
989      /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
990      if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
991          *pdrv = &bdrv_raw;
992          return ret;
993      }
994  
995      ret = blk_pread(file, 0, buf, sizeof(buf));
996      if (ret < 0) {
997          error_setg_errno(errp, -ret, "Could not read image for determining its "
998                           "format");
999          *pdrv = NULL;
1000          return ret;
1001      }
1002  
1003      drv = bdrv_probe_all(buf, ret, filename);
1004      if (!drv) {
1005          error_setg(errp, "Could not determine image format: No compatible "
1006                     "driver found");
1007          ret = -ENOENT;
1008      }
1009      *pdrv = drv;
1010      return ret;
1011  }
1012  
1013  /**
1014   * Set the current 'total_sectors' value
1015   * Return 0 on success, -errno on error.
1016   */
1017  int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
1018  {
1019      BlockDriver *drv = bs->drv;
1020  
1021      if (!drv) {
1022          return -ENOMEDIUM;
1023      }
1024  
1025      /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
1026      if (bdrv_is_sg(bs))
1027          return 0;
1028  
1029      /* query actual device if possible, otherwise just trust the hint */
1030      if (drv->bdrv_getlength) {
1031          int64_t length = drv->bdrv_getlength(bs);
1032          if (length < 0) {
1033              return length;
1034          }
1035          hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
1036      }
1037  
1038      bs->total_sectors = hint;
1039  
1040      if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) {
1041          return -EFBIG;
1042      }
1043  
1044      return 0;
1045  }
1046  
1047  /**
1048   * Combines a QDict of new block driver @options with any missing options taken
1049   * from @old_options, so that leaving out an option defaults to its old value.
1050   */
1051  static void bdrv_join_options(BlockDriverState *bs, QDict *options,
1052                                QDict *old_options)
1053  {
1054      if (bs->drv && bs->drv->bdrv_join_options) {
1055          bs->drv->bdrv_join_options(options, old_options);
1056      } else {
1057          qdict_join(options, old_options, false);
1058      }
1059  }
1060  
1061  static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts,
1062                                                              int open_flags,
1063                                                              Error **errp)
1064  {
1065      Error *local_err = NULL;
1066      char *value = qemu_opt_get_del(opts, "detect-zeroes");
1067      BlockdevDetectZeroesOptions detect_zeroes =
1068          qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value,
1069                          BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err);
1070      g_free(value);
1071      if (local_err) {
1072          error_propagate(errp, local_err);
1073          return detect_zeroes;
1074      }
1075  
1076      if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
1077          !(open_flags & BDRV_O_UNMAP))
1078      {
1079          error_setg(errp, "setting detect-zeroes to unmap is not allowed "
1080                     "without setting discard operation to unmap");
1081      }
1082  
1083      return detect_zeroes;
1084  }
1085  
1086  /**
1087   * Set open flags for aio engine
1088   *
1089   * Return 0 on success, -1 if the engine specified is invalid
1090   */
1091  int bdrv_parse_aio(const char *mode, int *flags)
1092  {
1093      if (!strcmp(mode, "threads")) {
1094          /* do nothing, default */
1095      } else if (!strcmp(mode, "native")) {
1096          *flags |= BDRV_O_NATIVE_AIO;
1097  #ifdef CONFIG_LINUX_IO_URING
1098      } else if (!strcmp(mode, "io_uring")) {
1099          *flags |= BDRV_O_IO_URING;
1100  #endif
1101      } else {
1102          return -1;
1103      }
1104  
1105      return 0;
1106  }
1107  
1108  /**
1109   * Set open flags for a given discard mode
1110   *
1111   * Return 0 on success, -1 if the discard mode was invalid.
1112   */
1113  int bdrv_parse_discard_flags(const char *mode, int *flags)
1114  {
1115      *flags &= ~BDRV_O_UNMAP;
1116  
1117      if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
1118          /* do nothing */
1119      } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
1120          *flags |= BDRV_O_UNMAP;
1121      } else {
1122          return -1;
1123      }
1124  
1125      return 0;
1126  }
1127  
1128  /**
1129   * Set open flags for a given cache mode
1130   *
1131   * Return 0 on success, -1 if the cache mode was invalid.
1132   */
1133  int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
1134  {
1135      *flags &= ~BDRV_O_CACHE_MASK;
1136  
1137      if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
1138          *writethrough = false;
1139          *flags |= BDRV_O_NOCACHE;
1140      } else if (!strcmp(mode, "directsync")) {
1141          *writethrough = true;
1142          *flags |= BDRV_O_NOCACHE;
1143      } else if (!strcmp(mode, "writeback")) {
1144          *writethrough = false;
1145      } else if (!strcmp(mode, "unsafe")) {
1146          *writethrough = false;
1147          *flags |= BDRV_O_NO_FLUSH;
1148      } else if (!strcmp(mode, "writethrough")) {
1149          *writethrough = true;
1150      } else {
1151          return -1;
1152      }
1153  
1154      return 0;
1155  }
1156  
1157  static char *bdrv_child_get_parent_desc(BdrvChild *c)
1158  {
1159      BlockDriverState *parent = c->opaque;
1160      return g_strdup_printf("node '%s'", bdrv_get_node_name(parent));
1161  }
1162  
1163  static void bdrv_child_cb_drained_begin(BdrvChild *child)
1164  {
1165      BlockDriverState *bs = child->opaque;
1166      bdrv_do_drained_begin_quiesce(bs, NULL, false);
1167  }
1168  
1169  static bool bdrv_child_cb_drained_poll(BdrvChild *child)
1170  {
1171      BlockDriverState *bs = child->opaque;
1172      return bdrv_drain_poll(bs, false, NULL, false);
1173  }
1174  
1175  static void bdrv_child_cb_drained_end(BdrvChild *child,
1176                                        int *drained_end_counter)
1177  {
1178      BlockDriverState *bs = child->opaque;
1179      bdrv_drained_end_no_poll(bs, drained_end_counter);
1180  }
1181  
1182  static int bdrv_child_cb_inactivate(BdrvChild *child)
1183  {
1184      BlockDriverState *bs = child->opaque;
1185      assert(bs->open_flags & BDRV_O_INACTIVE);
1186      return 0;
1187  }
1188  
1189  static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
1190                                            GSList **ignore, Error **errp)
1191  {
1192      BlockDriverState *bs = child->opaque;
1193      return bdrv_can_set_aio_context(bs, ctx, ignore, errp);
1194  }
1195  
1196  static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx,
1197                                        GSList **ignore)
1198  {
1199      BlockDriverState *bs = child->opaque;
1200      return bdrv_set_aio_context_ignore(bs, ctx, ignore);
1201  }
1202  
1203  /*
1204   * Returns the options and flags that a temporary snapshot should get, based on
1205   * the originally requested flags (the originally requested image will have
1206   * flags like a backing file)
1207   */
1208  static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
1209                                         int parent_flags, QDict *parent_options)
1210  {
1211      *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
1212  
1213      /* For temporary files, unconditional cache=unsafe is fine */
1214      qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
1215      qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
1216  
1217      /* Copy the read-only and discard options from the parent */
1218      qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
1219      qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD);
1220  
1221      /* aio=native doesn't work for cache.direct=off, so disable it for the
1222       * temporary snapshot */
1223      *child_flags &= ~BDRV_O_NATIVE_AIO;
1224  }
1225  
1226  static void bdrv_backing_attach(BdrvChild *c)
1227  {
1228      BlockDriverState *parent = c->opaque;
1229      BlockDriverState *backing_hd = c->bs;
1230  
1231      assert(!parent->backing_blocker);
1232      error_setg(&parent->backing_blocker,
1233                 "node is used as backing hd of '%s'",
1234                 bdrv_get_device_or_node_name(parent));
1235  
1236      bdrv_refresh_filename(backing_hd);
1237  
1238      parent->open_flags &= ~BDRV_O_NO_BACKING;
1239  
1240      bdrv_op_block_all(backing_hd, parent->backing_blocker);
1241      /* Otherwise we won't be able to commit or stream */
1242      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1243                      parent->backing_blocker);
1244      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
1245                      parent->backing_blocker);
1246      /*
1247       * We do backup in 3 ways:
1248       * 1. drive backup
1249       *    The target bs is new opened, and the source is top BDS
1250       * 2. blockdev backup
1251       *    Both the source and the target are top BDSes.
1252       * 3. internal backup(used for block replication)
1253       *    Both the source and the target are backing file
1254       *
1255       * In case 1 and 2, neither the source nor the target is the backing file.
1256       * In case 3, we will block the top BDS, so there is only one block job
1257       * for the top BDS and its backing chain.
1258       */
1259      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
1260                      parent->backing_blocker);
1261      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
1262                      parent->backing_blocker);
1263  }
1264  
1265  static void bdrv_backing_detach(BdrvChild *c)
1266  {
1267      BlockDriverState *parent = c->opaque;
1268  
1269      assert(parent->backing_blocker);
1270      bdrv_op_unblock_all(c->bs, parent->backing_blocker);
1271      error_free(parent->backing_blocker);
1272      parent->backing_blocker = NULL;
1273  }
1274  
1275  static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
1276                                          const char *filename, Error **errp)
1277  {
1278      BlockDriverState *parent = c->opaque;
1279      bool read_only = bdrv_is_read_only(parent);
1280      int ret;
1281  
1282      if (read_only) {
1283          ret = bdrv_reopen_set_read_only(parent, false, errp);
1284          if (ret < 0) {
1285              return ret;
1286          }
1287      }
1288  
1289      ret = bdrv_change_backing_file(parent, filename,
1290                                     base->drv ? base->drv->format_name : "",
1291                                     false);
1292      if (ret < 0) {
1293          error_setg_errno(errp, -ret, "Could not update backing file link");
1294      }
1295  
1296      if (read_only) {
1297          bdrv_reopen_set_read_only(parent, true, NULL);
1298      }
1299  
1300      return ret;
1301  }
1302  
1303  /*
1304   * Returns the options and flags that a generic child of a BDS should
1305   * get, based on the given options and flags for the parent BDS.
1306   */
1307  static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
1308                                     int *child_flags, QDict *child_options,
1309                                     int parent_flags, QDict *parent_options)
1310  {
1311      int flags = parent_flags;
1312  
1313      /*
1314       * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL.
1315       * Generally, the question to answer is: Should this child be
1316       * format-probed by default?
1317       */
1318  
1319      /*
1320       * Pure and non-filtered data children of non-format nodes should
1321       * be probed by default (even when the node itself has BDRV_O_PROTOCOL
1322       * set).  This only affects a very limited set of drivers (namely
1323       * quorum and blkverify when this comment was written).
1324       * Force-clear BDRV_O_PROTOCOL then.
1325       */
1326      if (!parent_is_format &&
1327          (role & BDRV_CHILD_DATA) &&
1328          !(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED)))
1329      {
1330          flags &= ~BDRV_O_PROTOCOL;
1331      }
1332  
1333      /*
1334       * All children of format nodes (except for COW children) and all
1335       * metadata children in general should never be format-probed.
1336       * Force-set BDRV_O_PROTOCOL then.
1337       */
1338      if ((parent_is_format && !(role & BDRV_CHILD_COW)) ||
1339          (role & BDRV_CHILD_METADATA))
1340      {
1341          flags |= BDRV_O_PROTOCOL;
1342      }
1343  
1344      /*
1345       * If the cache mode isn't explicitly set, inherit direct and no-flush from
1346       * the parent.
1347       */
1348      qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
1349      qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
1350      qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);
1351  
1352      if (role & BDRV_CHILD_COW) {
1353          /* backing files are opened read-only by default */
1354          qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on");
1355          qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off");
1356      } else {
1357          /* Inherit the read-only option from the parent if it's not set */
1358          qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
1359          qdict_copy_default(child_options, parent_options,
1360                             BDRV_OPT_AUTO_READ_ONLY);
1361      }
1362  
1363      /*
1364       * bdrv_co_pdiscard() respects unmap policy for the parent, so we
1365       * can default to enable it on lower layers regardless of the
1366       * parent option.
1367       */
1368      qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap");
1369  
1370      /* Clear flags that only apply to the top layer */
1371      flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
1372  
1373      if (role & BDRV_CHILD_METADATA) {
1374          flags &= ~BDRV_O_NO_IO;
1375      }
1376      if (role & BDRV_CHILD_COW) {
1377          flags &= ~BDRV_O_TEMPORARY;
1378      }
1379  
1380      *child_flags = flags;
1381  }
1382  
1383  static void bdrv_child_cb_attach(BdrvChild *child)
1384  {
1385      BlockDriverState *bs = child->opaque;
1386  
1387      if (child->role & BDRV_CHILD_COW) {
1388          bdrv_backing_attach(child);
1389      }
1390  
1391      bdrv_apply_subtree_drain(child, bs);
1392  }
1393  
1394  static void bdrv_child_cb_detach(BdrvChild *child)
1395  {
1396      BlockDriverState *bs = child->opaque;
1397  
1398      if (child->role & BDRV_CHILD_COW) {
1399          bdrv_backing_detach(child);
1400      }
1401  
1402      bdrv_unapply_subtree_drain(child, bs);
1403  }
1404  
1405  static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
1406                                           const char *filename, Error **errp)
1407  {
1408      if (c->role & BDRV_CHILD_COW) {
1409          return bdrv_backing_update_filename(c, base, filename, errp);
1410      }
1411      return 0;
1412  }
1413  
1414  AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c)
1415  {
1416      BlockDriverState *bs = c->opaque;
1417  
1418      return bdrv_get_aio_context(bs);
1419  }
1420  
1421  const BdrvChildClass child_of_bds = {
1422      .parent_is_bds   = true,
1423      .get_parent_desc = bdrv_child_get_parent_desc,
1424      .inherit_options = bdrv_inherited_options,
1425      .drained_begin   = bdrv_child_cb_drained_begin,
1426      .drained_poll    = bdrv_child_cb_drained_poll,
1427      .drained_end     = bdrv_child_cb_drained_end,
1428      .attach          = bdrv_child_cb_attach,
1429      .detach          = bdrv_child_cb_detach,
1430      .inactivate      = bdrv_child_cb_inactivate,
1431      .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx,
1432      .set_aio_ctx     = bdrv_child_cb_set_aio_ctx,
1433      .update_filename = bdrv_child_cb_update_filename,
1434      .get_parent_aio_context = child_of_bds_get_parent_aio_context,
1435  };
1436  
1437  AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
1438  {
1439      return c->klass->get_parent_aio_context(c);
1440  }
1441  
1442  static int bdrv_open_flags(BlockDriverState *bs, int flags)
1443  {
1444      int open_flags = flags;
1445  
1446      /*
1447       * Clear flags that are internal to the block layer before opening the
1448       * image.
1449       */
1450      open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
1451  
1452      return open_flags;
1453  }
1454  
1455  static void update_flags_from_options(int *flags, QemuOpts *opts)
1456  {
1457      *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY);
1458  
1459      if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
1460          *flags |= BDRV_O_NO_FLUSH;
1461      }
1462  
1463      if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) {
1464          *flags |= BDRV_O_NOCACHE;
1465      }
1466  
1467      if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) {
1468          *flags |= BDRV_O_RDWR;
1469      }
1470  
1471      if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) {
1472          *flags |= BDRV_O_AUTO_RDONLY;
1473      }
1474  }
1475  
1476  static void update_options_from_flags(QDict *options, int flags)
1477  {
1478      if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
1479          qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
1480      }
1481      if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
1482          qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH,
1483                         flags & BDRV_O_NO_FLUSH);
1484      }
1485      if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) {
1486          qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR));
1487      }
1488      if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) {
1489          qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY,
1490                         flags & BDRV_O_AUTO_RDONLY);
1491      }
1492  }
1493  
1494  static void bdrv_assign_node_name(BlockDriverState *bs,
1495                                    const char *node_name,
1496                                    Error **errp)
1497  {
1498      char *gen_node_name = NULL;
1499  
1500      if (!node_name) {
1501          node_name = gen_node_name = id_generate(ID_BLOCK);
1502      } else if (!id_wellformed(node_name)) {
1503          /*
1504           * Check for empty string or invalid characters, but not if it is
1505           * generated (generated names use characters not available to the user)
1506           */
1507          error_setg(errp, "Invalid node-name: '%s'", node_name);
1508          return;
1509      }
1510  
1511      /* takes care of avoiding namespaces collisions */
1512      if (blk_by_name(node_name)) {
1513          error_setg(errp, "node-name=%s is conflicting with a device id",
1514                     node_name);
1515          goto out;
1516      }
1517  
1518      /* takes care of avoiding duplicates node names */
1519      if (bdrv_find_node(node_name)) {
1520          error_setg(errp, "Duplicate nodes with node-name='%s'", node_name);
1521          goto out;
1522      }
1523  
1524      /* Make sure that the node name isn't truncated */
1525      if (strlen(node_name) >= sizeof(bs->node_name)) {
1526          error_setg(errp, "Node name too long");
1527          goto out;
1528      }
1529  
1530      /* copy node name into the bs and insert it into the graph list */
1531      pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
1532      QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
1533  out:
1534      g_free(gen_node_name);
1535  }
1536  
1537  static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
1538                              const char *node_name, QDict *options,
1539                              int open_flags, Error **errp)
1540  {
1541      Error *local_err = NULL;
1542      int i, ret;
1543  
1544      bdrv_assign_node_name(bs, node_name, &local_err);
1545      if (local_err) {
1546          error_propagate(errp, local_err);
1547          return -EINVAL;
1548      }
1549  
1550      bs->drv = drv;
1551      bs->opaque = g_malloc0(drv->instance_size);
1552  
1553      if (drv->bdrv_file_open) {
1554          assert(!drv->bdrv_needs_filename || bs->filename[0]);
1555          ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1556      } else if (drv->bdrv_open) {
1557          ret = drv->bdrv_open(bs, options, open_flags, &local_err);
1558      } else {
1559          ret = 0;
1560      }
1561  
1562      if (ret < 0) {
1563          if (local_err) {
1564              error_propagate(errp, local_err);
1565          } else if (bs->filename[0]) {
1566              error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1567          } else {
1568              error_setg_errno(errp, -ret, "Could not open image");
1569          }
1570          goto open_failed;
1571      }
1572  
1573      ret = refresh_total_sectors(bs, bs->total_sectors);
1574      if (ret < 0) {
1575          error_setg_errno(errp, -ret, "Could not refresh total sector count");
1576          return ret;
1577      }
1578  
1579      bdrv_refresh_limits(bs, NULL, &local_err);
1580      if (local_err) {
1581          error_propagate(errp, local_err);
1582          return -EINVAL;
1583      }
1584  
1585      assert(bdrv_opt_mem_align(bs) != 0);
1586      assert(bdrv_min_mem_align(bs) != 0);
1587      assert(is_power_of_2(bs->bl.request_alignment));
1588  
1589      for (i = 0; i < bs->quiesce_counter; i++) {
1590          if (drv->bdrv_co_drain_begin) {
1591              drv->bdrv_co_drain_begin(bs);
1592          }
1593      }
1594  
1595      return 0;
1596  open_failed:
1597      bs->drv = NULL;
1598      if (bs->file != NULL) {
1599          bdrv_unref_child(bs, bs->file);
1600          bs->file = NULL;
1601      }
1602      g_free(bs->opaque);
1603      bs->opaque = NULL;
1604      return ret;
1605  }
1606  
1607  /*
1608   * Create and open a block node.
1609   *
1610   * @options is a QDict of options to pass to the block drivers, or NULL for an
1611   * empty set of options. The reference to the QDict belongs to the block layer
1612   * after the call (even on failure), so if the caller intends to reuse the
1613   * dictionary, it needs to use qobject_ref() before calling bdrv_open.
1614   */
1615  BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
1616                                              const char *node_name,
1617                                              QDict *options, int flags,
1618                                              Error **errp)
1619  {
1620      BlockDriverState *bs;
1621      int ret;
1622  
1623      bs = bdrv_new();
1624      bs->open_flags = flags;
1625      bs->options = options ?: qdict_new();
1626      bs->explicit_options = qdict_clone_shallow(bs->options);
1627      bs->opaque = NULL;
1628  
1629      update_options_from_flags(bs->options, flags);
1630  
1631      ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp);
1632      if (ret < 0) {
1633          qobject_unref(bs->explicit_options);
1634          bs->explicit_options = NULL;
1635          qobject_unref(bs->options);
1636          bs->options = NULL;
1637          bdrv_unref(bs);
1638          return NULL;
1639      }
1640  
1641      return bs;
1642  }
1643  
1644  /* Create and open a block node. */
1645  BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
1646                                         int flags, Error **errp)
1647  {
1648      return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp);
1649  }
1650  
1651  QemuOptsList bdrv_runtime_opts = {
1652      .name = "bdrv_common",
1653      .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
1654      .desc = {
1655          {
1656              .name = "node-name",
1657              .type = QEMU_OPT_STRING,
1658              .help = "Node name of the block device node",
1659          },
1660          {
1661              .name = "driver",
1662              .type = QEMU_OPT_STRING,
1663              .help = "Block driver to use for the node",
1664          },
1665          {
1666              .name = BDRV_OPT_CACHE_DIRECT,
1667              .type = QEMU_OPT_BOOL,
1668              .help = "Bypass software writeback cache on the host",
1669          },
1670          {
1671              .name = BDRV_OPT_CACHE_NO_FLUSH,
1672              .type = QEMU_OPT_BOOL,
1673              .help = "Ignore flush requests",
1674          },
1675          {
1676              .name = BDRV_OPT_READ_ONLY,
1677              .type = QEMU_OPT_BOOL,
1678              .help = "Node is opened in read-only mode",
1679          },
1680          {
1681              .name = BDRV_OPT_AUTO_READ_ONLY,
1682              .type = QEMU_OPT_BOOL,
1683              .help = "Node can become read-only if opening read-write fails",
1684          },
1685          {
1686              .name = "detect-zeroes",
1687              .type = QEMU_OPT_STRING,
1688              .help = "try to optimize zero writes (off, on, unmap)",
1689          },
1690          {
1691              .name = BDRV_OPT_DISCARD,
1692              .type = QEMU_OPT_STRING,
1693              .help = "discard operation (ignore/off, unmap/on)",
1694          },
1695          {
1696              .name = BDRV_OPT_FORCE_SHARE,
1697              .type = QEMU_OPT_BOOL,
1698              .help = "always accept other writers (default: off)",
1699          },
1700          { /* end of list */ }
1701      },
1702  };
1703  
1704  QemuOptsList bdrv_create_opts_simple = {
1705      .name = "simple-create-opts",
1706      .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head),
1707      .desc = {
1708          {
1709              .name = BLOCK_OPT_SIZE,
1710              .type = QEMU_OPT_SIZE,
1711              .help = "Virtual disk size"
1712          },
1713          {
1714              .name = BLOCK_OPT_PREALLOC,
1715              .type = QEMU_OPT_STRING,
1716              .help = "Preallocation mode (allowed values: off)"
1717          },
1718          { /* end of list */ }
1719      }
1720  };
1721  
1722  /*
1723   * Common part for opening disk images and files
1724   *
1725   * Removes all processed options from *options.
1726   */
1727  static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
1728                              QDict *options, Error **errp)
1729  {
1730      int ret, open_flags;
1731      const char *filename;
1732      const char *driver_name = NULL;
1733      const char *node_name = NULL;
1734      const char *discard;
1735      QemuOpts *opts;
1736      BlockDriver *drv;
1737      Error *local_err = NULL;
1738      bool ro;
1739  
1740      assert(bs->file == NULL);
1741      assert(options != NULL && bs->options != options);
1742  
1743      opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1744      if (!qemu_opts_absorb_qdict(opts, options, errp)) {
1745          ret = -EINVAL;
1746          goto fail_opts;
1747      }
1748  
1749      update_flags_from_options(&bs->open_flags, opts);
1750  
1751      driver_name = qemu_opt_get(opts, "driver");
1752      drv = bdrv_find_format(driver_name);
1753      assert(drv != NULL);
1754  
1755      bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false);
1756  
1757      if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) {
1758          error_setg(errp,
1759                     BDRV_OPT_FORCE_SHARE
1760                     "=on can only be used with read-only images");
1761          ret = -EINVAL;
1762          goto fail_opts;
1763      }
1764  
1765      if (file != NULL) {
1766          bdrv_refresh_filename(blk_bs(file));
1767          filename = blk_bs(file)->filename;
1768      } else {
1769          /*
1770           * Caution: while qdict_get_try_str() is fine, getting
1771           * non-string types would require more care.  When @options
1772           * come from -blockdev or blockdev_add, its members are typed
1773           * according to the QAPI schema, but when they come from
1774           * -drive, they're all QString.
1775           */
1776          filename = qdict_get_try_str(options, "filename");
1777      }
1778  
1779      if (drv->bdrv_needs_filename && (!filename || !filename[0])) {
1780          error_setg(errp, "The '%s' block driver requires a file name",
1781                     drv->format_name);
1782          ret = -EINVAL;
1783          goto fail_opts;
1784      }
1785  
1786      trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
1787                             drv->format_name);
1788  
1789      ro = bdrv_is_read_only(bs);
1790  
1791      if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) {
1792          if (!ro && bdrv_is_whitelisted(drv, true)) {
1793              ret = bdrv_apply_auto_read_only(bs, NULL, NULL);
1794          } else {
1795              ret = -ENOTSUP;
1796          }
1797          if (ret < 0) {
1798              error_setg(errp,
1799                         !ro && bdrv_is_whitelisted(drv, true)
1800                         ? "Driver '%s' can only be used for read-only devices"
1801                         : "Driver '%s' is not whitelisted",
1802                         drv->format_name);
1803              goto fail_opts;
1804          }
1805      }
1806  
1807      /* bdrv_new() and bdrv_close() make it so */
1808      assert(qatomic_read(&bs->copy_on_read) == 0);
1809  
1810      if (bs->open_flags & BDRV_O_COPY_ON_READ) {
1811          if (!ro) {
1812              bdrv_enable_copy_on_read(bs);
1813          } else {
1814              error_setg(errp, "Can't use copy-on-read on read-only device");
1815              ret = -EINVAL;
1816              goto fail_opts;
1817          }
1818      }
1819  
1820      discard = qemu_opt_get(opts, BDRV_OPT_DISCARD);
1821      if (discard != NULL) {
1822          if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) {
1823              error_setg(errp, "Invalid discard option");
1824              ret = -EINVAL;
1825              goto fail_opts;
1826          }
1827      }
1828  
1829      bs->detect_zeroes =
1830          bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err);
1831      if (local_err) {
1832          error_propagate(errp, local_err);
1833          ret = -EINVAL;
1834          goto fail_opts;
1835      }
1836  
1837      if (filename != NULL) {
1838          pstrcpy(bs->filename, sizeof(bs->filename), filename);
1839      } else {
1840          bs->filename[0] = '\0';
1841      }
1842      pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
1843  
1844      /* Open the image, either directly or using a protocol */
1845      open_flags = bdrv_open_flags(bs, bs->open_flags);
1846      node_name = qemu_opt_get(opts, "node-name");
1847  
1848      assert(!drv->bdrv_file_open || file == NULL);
1849      ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp);
1850      if (ret < 0) {
1851          goto fail_opts;
1852      }
1853  
1854      qemu_opts_del(opts);
1855      return 0;
1856  
1857  fail_opts:
1858      qemu_opts_del(opts);
1859      return ret;
1860  }
1861  
1862  static QDict *parse_json_filename(const char *filename, Error **errp)
1863  {
1864      QObject *options_obj;
1865      QDict *options;
1866      int ret;
1867  
1868      ret = strstart(filename, "json:", &filename);
1869      assert(ret);
1870  
1871      options_obj = qobject_from_json(filename, errp);
1872      if (!options_obj) {
1873          error_prepend(errp, "Could not parse the JSON options: ");
1874          return NULL;
1875      }
1876  
1877      options = qobject_to(QDict, options_obj);
1878      if (!options) {
1879          qobject_unref(options_obj);
1880          error_setg(errp, "Invalid JSON object given");
1881          return NULL;
1882      }
1883  
1884      qdict_flatten(options);
1885  
1886      return options;
1887  }
1888  
1889  static void parse_json_protocol(QDict *options, const char **pfilename,
1890                                  Error **errp)
1891  {
1892      QDict *json_options;
1893      Error *local_err = NULL;
1894  
1895      /* Parse json: pseudo-protocol */
1896      if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1897          return;
1898      }
1899  
1900      json_options = parse_json_filename(*pfilename, &local_err);
1901      if (local_err) {
1902          error_propagate(errp, local_err);
1903          return;
1904      }
1905  
1906      /* Options given in the filename have lower priority than options
1907       * specified directly */
1908      qdict_join(options, json_options, false);
1909      qobject_unref(json_options);
1910      *pfilename = NULL;
1911  }
1912  
1913  /*
1914   * Fills in default options for opening images and converts the legacy
1915   * filename/flags pair to option QDict entries.
1916   * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1917   * block driver has been specified explicitly.
1918   */
1919  static int bdrv_fill_options(QDict **options, const char *filename,
1920                               int *flags, Error **errp)
1921  {
1922      const char *drvname;
1923      bool protocol = *flags & BDRV_O_PROTOCOL;
1924      bool parse_filename = false;
1925      BlockDriver *drv = NULL;
1926      Error *local_err = NULL;
1927  
1928      /*
1929       * Caution: while qdict_get_try_str() is fine, getting non-string
1930       * types would require more care.  When @options come from
1931       * -blockdev or blockdev_add, its members are typed according to
1932       * the QAPI schema, but when they come from -drive, they're all
1933       * QString.
1934       */
1935      drvname = qdict_get_try_str(*options, "driver");
1936      if (drvname) {
1937          drv = bdrv_find_format(drvname);
1938          if (!drv) {
1939              error_setg(errp, "Unknown driver '%s'", drvname);
1940              return -ENOENT;
1941          }
1942          /* If the user has explicitly specified the driver, this choice should
1943           * override the BDRV_O_PROTOCOL flag */
1944          protocol = drv->bdrv_file_open;
1945      }
1946  
1947      if (protocol) {
1948          *flags |= BDRV_O_PROTOCOL;
1949      } else {
1950          *flags &= ~BDRV_O_PROTOCOL;
1951      }
1952  
1953      /* Translate cache options from flags into options */
1954      update_options_from_flags(*options, *flags);
1955  
1956      /* Fetch the file name from the options QDict if necessary */
1957      if (protocol && filename) {
1958          if (!qdict_haskey(*options, "filename")) {
1959              qdict_put_str(*options, "filename", filename);
1960              parse_filename = true;
1961          } else {
1962              error_setg(errp, "Can't specify 'file' and 'filename' options at "
1963                               "the same time");
1964              return -EINVAL;
1965          }
1966      }
1967  
1968      /* Find the right block driver */
1969      /* See cautionary note on accessing @options above */
1970      filename = qdict_get_try_str(*options, "filename");
1971  
1972      if (!drvname && protocol) {
1973          if (filename) {
1974              drv = bdrv_find_protocol(filename, parse_filename, errp);
1975              if (!drv) {
1976                  return -EINVAL;
1977              }
1978  
1979              drvname = drv->format_name;
1980              qdict_put_str(*options, "driver", drvname);
1981          } else {
1982              error_setg(errp, "Must specify either driver or file");
1983              return -EINVAL;
1984          }
1985      }
1986  
1987      assert(drv || !protocol);
1988  
1989      /* Driver-specific filename parsing */
1990      if (drv && drv->bdrv_parse_filename && parse_filename) {
1991          drv->bdrv_parse_filename(filename, *options, &local_err);
1992          if (local_err) {
1993              error_propagate(errp, local_err);
1994              return -EINVAL;
1995          }
1996  
1997          if (!drv->bdrv_needs_filename) {
1998              qdict_del(*options, "filename");
1999          }
2000      }
2001  
2002      return 0;
2003  }
2004  
2005  typedef struct BlockReopenQueueEntry {
2006       bool prepared;
2007       bool perms_checked;
2008       BDRVReopenState state;
2009       QTAILQ_ENTRY(BlockReopenQueueEntry) entry;
2010  } BlockReopenQueueEntry;
2011  
2012  /*
2013   * Return the flags that @bs will have after the reopens in @q have
2014   * successfully completed. If @q is NULL (or @bs is not contained in @q),
2015   * return the current flags.
2016   */
2017  static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs)
2018  {
2019      BlockReopenQueueEntry *entry;
2020  
2021      if (q != NULL) {
2022          QTAILQ_FOREACH(entry, q, entry) {
2023              if (entry->state.bs == bs) {
2024                  return entry->state.flags;
2025              }
2026          }
2027      }
2028  
2029      return bs->open_flags;
2030  }
2031  
2032  /* Returns whether the image file can be written to after the reopen queue @q
2033   * has been successfully applied, or right now if @q is NULL. */
2034  static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
2035                                            BlockReopenQueue *q)
2036  {
2037      int flags = bdrv_reopen_get_flags(q, bs);
2038  
2039      return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR;
2040  }
2041  
2042  /*
2043   * Return whether the BDS can be written to.  This is not necessarily
2044   * the same as !bdrv_is_read_only(bs), as inactivated images may not
2045   * be written to but do not count as read-only images.
2046   */
2047  bool bdrv_is_writable(BlockDriverState *bs)
2048  {
2049      return bdrv_is_writable_after_reopen(bs, NULL);
2050  }
2051  
2052  static char *bdrv_child_user_desc(BdrvChild *c)
2053  {
2054      return c->klass->get_parent_desc(c);
2055  }
2056  
2057  /*
2058   * Check that @a allows everything that @b needs. @a and @b must reference same
2059   * child node.
2060   */
2061  static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
2062  {
2063      const char *child_bs_name;
2064      g_autofree char *a_user = NULL;
2065      g_autofree char *b_user = NULL;
2066      g_autofree char *perms = NULL;
2067  
2068      assert(a->bs);
2069      assert(a->bs == b->bs);
2070  
2071      if ((b->perm & a->shared_perm) == b->perm) {
2072          return true;
2073      }
2074  
2075      child_bs_name = bdrv_get_node_name(b->bs);
2076      a_user = bdrv_child_user_desc(a);
2077      b_user = bdrv_child_user_desc(b);
2078      perms = bdrv_perm_names(b->perm & ~a->shared_perm);
2079  
2080      error_setg(errp, "Permission conflict on node '%s': permissions '%s' are "
2081                 "both required by %s (uses node '%s' as '%s' child) and "
2082                 "unshared by %s (uses node '%s' as '%s' child).",
2083                 child_bs_name, perms,
2084                 b_user, child_bs_name, b->name,
2085                 a_user, child_bs_name, a->name);
2086  
2087      return false;
2088  }
2089  
2090  static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
2091  {
2092      BdrvChild *a, *b;
2093  
2094      /*
2095       * During the loop we'll look at each pair twice. That's correct because
2096       * bdrv_a_allow_b() is asymmetric and we should check each pair in both
2097       * directions.
2098       */
2099      QLIST_FOREACH(a, &bs->parents, next_parent) {
2100          QLIST_FOREACH(b, &bs->parents, next_parent) {
2101              if (a == b) {
2102                  continue;
2103              }
2104  
2105              if (!bdrv_a_allow_b(a, b, errp)) {
2106                  return true;
2107              }
2108          }
2109      }
2110  
2111      return false;
2112  }
2113  
2114  static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
2115                              BdrvChild *c, BdrvChildRole role,
2116                              BlockReopenQueue *reopen_queue,
2117                              uint64_t parent_perm, uint64_t parent_shared,
2118                              uint64_t *nperm, uint64_t *nshared)
2119  {
2120      assert(bs->drv && bs->drv->bdrv_child_perm);
2121      bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
2122                               parent_perm, parent_shared,
2123                               nperm, nshared);
2124      /* TODO Take force_share from reopen_queue */
2125      if (child_bs && child_bs->force_share) {
2126          *nshared = BLK_PERM_ALL;
2127      }
2128  }
2129  
2130  /*
2131   * Adds the whole subtree of @bs (including @bs itself) to the @list (except for
2132   * nodes that are already in the @list, of course) so that final list is
2133   * topologically sorted. Return the result (GSList @list object is updated, so
2134   * don't use old reference after function call).
2135   *
2136   * On function start @list must be already topologically sorted and for any node
2137   * in the @list the whole subtree of the node must be in the @list as well. The
2138   * simplest way to satisfy this criteria: use only result of
2139   * bdrv_topological_dfs() or NULL as @list parameter.
2140   */
2141  static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found,
2142                                      BlockDriverState *bs)
2143  {
2144      BdrvChild *child;
2145      g_autoptr(GHashTable) local_found = NULL;
2146  
2147      if (!found) {
2148          assert(!list);
2149          found = local_found = g_hash_table_new(NULL, NULL);
2150      }
2151  
2152      if (g_hash_table_contains(found, bs)) {
2153          return list;
2154      }
2155      g_hash_table_add(found, bs);
2156  
2157      QLIST_FOREACH(child, &bs->children, next) {
2158          list = bdrv_topological_dfs(list, found, child->bs);
2159      }
2160  
2161      return g_slist_prepend(list, bs);
2162  }
2163  
2164  typedef struct BdrvChildSetPermState {
2165      BdrvChild *child;
2166      uint64_t old_perm;
2167      uint64_t old_shared_perm;
2168  } BdrvChildSetPermState;
2169  
2170  static void bdrv_child_set_perm_abort(void *opaque)
2171  {
2172      BdrvChildSetPermState *s = opaque;
2173  
2174      s->child->perm = s->old_perm;
2175      s->child->shared_perm = s->old_shared_perm;
2176  }
2177  
2178  static TransactionActionDrv bdrv_child_set_pem_drv = {
2179      .abort = bdrv_child_set_perm_abort,
2180      .clean = g_free,
2181  };
2182  
2183  static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm,
2184                                  uint64_t shared, Transaction *tran)
2185  {
2186      BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1);
2187  
2188      *s = (BdrvChildSetPermState) {
2189          .child = c,
2190          .old_perm = c->perm,
2191          .old_shared_perm = c->shared_perm,
2192      };
2193  
2194      c->perm = perm;
2195      c->shared_perm = shared;
2196  
2197      tran_add(tran, &bdrv_child_set_pem_drv, s);
2198  }
2199  
2200  static void bdrv_drv_set_perm_commit(void *opaque)
2201  {
2202      BlockDriverState *bs = opaque;
2203      uint64_t cumulative_perms, cumulative_shared_perms;
2204  
2205      if (bs->drv->bdrv_set_perm) {
2206          bdrv_get_cumulative_perm(bs, &cumulative_perms,
2207                                   &cumulative_shared_perms);
2208          bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
2209      }
2210  }
2211  
2212  static void bdrv_drv_set_perm_abort(void *opaque)
2213  {
2214      BlockDriverState *bs = opaque;
2215  
2216      if (bs->drv->bdrv_abort_perm_update) {
2217          bs->drv->bdrv_abort_perm_update(bs);
2218      }
2219  }
2220  
2221  TransactionActionDrv bdrv_drv_set_perm_drv = {
2222      .abort = bdrv_drv_set_perm_abort,
2223      .commit = bdrv_drv_set_perm_commit,
2224  };
2225  
2226  static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm,
2227                               uint64_t shared_perm, Transaction *tran,
2228                               Error **errp)
2229  {
2230      if (!bs->drv) {
2231          return 0;
2232      }
2233  
2234      if (bs->drv->bdrv_check_perm) {
2235          int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp);
2236          if (ret < 0) {
2237              return ret;
2238          }
2239      }
2240  
2241      if (tran) {
2242          tran_add(tran, &bdrv_drv_set_perm_drv, bs);
2243      }
2244  
2245      return 0;
2246  }
2247  
2248  typedef struct BdrvReplaceChildState {
2249      BdrvChild *child;
2250      BlockDriverState *old_bs;
2251  } BdrvReplaceChildState;
2252  
2253  static void bdrv_replace_child_commit(void *opaque)
2254  {
2255      BdrvReplaceChildState *s = opaque;
2256  
2257      bdrv_unref(s->old_bs);
2258  }
2259  
2260  static void bdrv_replace_child_abort(void *opaque)
2261  {
2262      BdrvReplaceChildState *s = opaque;
2263      BlockDriverState *new_bs = s->child->bs;
2264  
2265      /* old_bs reference is transparently moved from @s to @s->child */
2266      bdrv_replace_child_noperm(s->child, s->old_bs);
2267      bdrv_unref(new_bs);
2268  }
2269  
2270  static TransactionActionDrv bdrv_replace_child_drv = {
2271      .commit = bdrv_replace_child_commit,
2272      .abort = bdrv_replace_child_abort,
2273      .clean = g_free,
2274  };
2275  
2276  /*
2277   * bdrv_replace_child_tran
2278   *
2279   * Note: real unref of old_bs is done only on commit.
2280   *
2281   * The function doesn't update permissions, caller is responsible for this.
2282   */
2283  static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
2284                                      Transaction *tran)
2285  {
2286      BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
2287      *s = (BdrvReplaceChildState) {
2288          .child = child,
2289          .old_bs = child->bs,
2290      };
2291      tran_add(tran, &bdrv_replace_child_drv, s);
2292  
2293      if (new_bs) {
2294          bdrv_ref(new_bs);
2295      }
2296      bdrv_replace_child_noperm(child, new_bs);
2297      /* old_bs reference is transparently moved from @child to @s */
2298  }
2299  
2300  /*
2301   * Refresh permissions in @bs subtree. The function is intended to be called
2302   * after some graph modification that was done without permission update.
2303   */
2304  static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
2305                                    Transaction *tran, Error **errp)
2306  {
2307      BlockDriver *drv = bs->drv;
2308      BdrvChild *c;
2309      int ret;
2310      uint64_t cumulative_perms, cumulative_shared_perms;
2311  
2312      bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
2313  
2314      /* Write permissions never work with read-only images */
2315      if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
2316          !bdrv_is_writable_after_reopen(bs, q))
2317      {
2318          if (!bdrv_is_writable_after_reopen(bs, NULL)) {
2319              error_setg(errp, "Block node is read-only");
2320          } else {
2321              error_setg(errp, "Read-only block node '%s' cannot support "
2322                         "read-write users", bdrv_get_node_name(bs));
2323          }
2324  
2325          return -EPERM;
2326      }
2327  
2328      /*
2329       * Unaligned requests will automatically be aligned to bl.request_alignment
2330       * and without RESIZE we can't extend requests to write to space beyond the
2331       * end of the image, so it's required that the image size is aligned.
2332       */
2333      if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
2334          !(cumulative_perms & BLK_PERM_RESIZE))
2335      {
2336          if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) {
2337              error_setg(errp, "Cannot get 'write' permission without 'resize': "
2338                               "Image size is not a multiple of request "
2339                               "alignment");
2340              return -EPERM;
2341          }
2342      }
2343  
2344      /* Check this node */
2345      if (!drv) {
2346          return 0;
2347      }
2348  
2349      ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran,
2350                              errp);
2351      if (ret < 0) {
2352          return ret;
2353      }
2354  
2355      /* Drivers that never have children can omit .bdrv_child_perm() */
2356      if (!drv->bdrv_child_perm) {
2357          assert(QLIST_EMPTY(&bs->children));
2358          return 0;
2359      }
2360  
2361      /* Check all children */
2362      QLIST_FOREACH(c, &bs->children, next) {
2363          uint64_t cur_perm, cur_shared;
2364  
2365          bdrv_child_perm(bs, c->bs, c, c->role, q,
2366                          cumulative_perms, cumulative_shared_perms,
2367                          &cur_perm, &cur_shared);
2368          bdrv_child_set_perm(c, cur_perm, cur_shared, tran);
2369      }
2370  
2371      return 0;
2372  }
2373  
2374  static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
2375                                     Transaction *tran, Error **errp)
2376  {
2377      int ret;
2378      BlockDriverState *bs;
2379  
2380      for ( ; list; list = list->next) {
2381          bs = list->data;
2382  
2383          if (bdrv_parent_perms_conflict(bs, errp)) {
2384              return -EINVAL;
2385          }
2386  
2387          ret = bdrv_node_refresh_perm(bs, q, tran, errp);
2388          if (ret < 0) {
2389              return ret;
2390          }
2391      }
2392  
2393      return 0;
2394  }
2395  
2396  void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
2397                                uint64_t *shared_perm)
2398  {
2399      BdrvChild *c;
2400      uint64_t cumulative_perms = 0;
2401      uint64_t cumulative_shared_perms = BLK_PERM_ALL;
2402  
2403      QLIST_FOREACH(c, &bs->parents, next_parent) {
2404          cumulative_perms |= c->perm;
2405          cumulative_shared_perms &= c->shared_perm;
2406      }
2407  
2408      *perm = cumulative_perms;
2409      *shared_perm = cumulative_shared_perms;
2410  }
2411  
2412  char *bdrv_perm_names(uint64_t perm)
2413  {
2414      struct perm_name {
2415          uint64_t perm;
2416          const char *name;
2417      } permissions[] = {
2418          { BLK_PERM_CONSISTENT_READ, "consistent read" },
2419          { BLK_PERM_WRITE,           "write" },
2420          { BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
2421          { BLK_PERM_RESIZE,          "resize" },
2422          { BLK_PERM_GRAPH_MOD,       "change children" },
2423          { 0, NULL }
2424      };
2425  
2426      GString *result = g_string_sized_new(30);
2427      struct perm_name *p;
2428  
2429      for (p = permissions; p->name; p++) {
2430          if (perm & p->perm) {
2431              if (result->len > 0) {
2432                  g_string_append(result, ", ");
2433              }
2434              g_string_append(result, p->name);
2435          }
2436      }
2437  
2438      return g_string_free(result, FALSE);
2439  }
2440  
2441  
2442  static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp)
2443  {
2444      int ret;
2445      Transaction *tran = tran_new();
2446      g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
2447  
2448      ret = bdrv_list_refresh_perms(list, NULL, tran, errp);
2449      tran_finalize(tran, ret);
2450  
2451      return ret;
2452  }
2453  
2454  int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
2455                              Error **errp)
2456  {
2457      Error *local_err = NULL;
2458      Transaction *tran = tran_new();
2459      int ret;
2460  
2461      bdrv_child_set_perm(c, perm, shared, tran);
2462  
2463      ret = bdrv_refresh_perms(c->bs, &local_err);
2464  
2465      tran_finalize(tran, ret);
2466  
2467      if (ret < 0) {
2468          if ((perm & ~c->perm) || (c->shared_perm & ~shared)) {
2469              /* tighten permissions */
2470              error_propagate(errp, local_err);
2471          } else {
2472              /*
2473               * Our caller may intend to only loosen restrictions and
2474               * does not expect this function to fail.  Errors are not
2475               * fatal in such a case, so we can just hide them from our
2476               * caller.
2477               */
2478              error_free(local_err);
2479              ret = 0;
2480          }
2481      }
2482  
2483      return ret;
2484  }
2485  
2486  int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
2487  {
2488      uint64_t parent_perms, parent_shared;
2489      uint64_t perms, shared;
2490  
2491      bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared);
2492      bdrv_child_perm(bs, c->bs, c, c->role, NULL,
2493                      parent_perms, parent_shared, &perms, &shared);
2494  
2495      return bdrv_child_try_set_perm(c, perms, shared, errp);
2496  }
2497  
2498  /*
2499   * Default implementation for .bdrv_child_perm() for block filters:
2500   * Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the
2501   * filtered child.
2502   */
2503  static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
2504                                        BdrvChildRole role,
2505                                        BlockReopenQueue *reopen_queue,
2506                                        uint64_t perm, uint64_t shared,
2507                                        uint64_t *nperm, uint64_t *nshared)
2508  {
2509      *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
2510      *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
2511  }
2512  
2513  static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c,
2514                                         BdrvChildRole role,
2515                                         BlockReopenQueue *reopen_queue,
2516                                         uint64_t perm, uint64_t shared,
2517                                         uint64_t *nperm, uint64_t *nshared)
2518  {
2519      assert(role & BDRV_CHILD_COW);
2520  
2521      /*
2522       * We want consistent read from backing files if the parent needs it.
2523       * No other operations are performed on backing files.
2524       */
2525      perm &= BLK_PERM_CONSISTENT_READ;
2526  
2527      /*
2528       * If the parent can deal with changing data, we're okay with a
2529       * writable and resizable backing file.
2530       * TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too?
2531       */
2532      if (shared & BLK_PERM_WRITE) {
2533          shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
2534      } else {
2535          shared = 0;
2536      }
2537  
2538      shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD |
2539                BLK_PERM_WRITE_UNCHANGED;
2540  
2541      if (bs->open_flags & BDRV_O_INACTIVE) {
2542          shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2543      }
2544  
2545      *nperm = perm;
2546      *nshared = shared;
2547  }
2548  
2549  static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c,
2550                                             BdrvChildRole role,
2551                                             BlockReopenQueue *reopen_queue,
2552                                             uint64_t perm, uint64_t shared,
2553                                             uint64_t *nperm, uint64_t *nshared)
2554  {
2555      int flags;
2556  
2557      assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA));
2558  
2559      flags = bdrv_reopen_get_flags(reopen_queue, bs);
2560  
2561      /*
2562       * Apart from the modifications below, the same permissions are
2563       * forwarded and left alone as for filters
2564       */
2565      bdrv_filter_default_perms(bs, c, role, reopen_queue,
2566                                perm, shared, &perm, &shared);
2567  
2568      if (role & BDRV_CHILD_METADATA) {
2569          /* Format drivers may touch metadata even if the guest doesn't write */
2570          if (bdrv_is_writable_after_reopen(bs, reopen_queue)) {
2571              perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2572          }
2573  
2574          /*
2575           * bs->file always needs to be consistent because of the
2576           * metadata. We can never allow other users to resize or write
2577           * to it.
2578           */
2579          if (!(flags & BDRV_O_NO_IO)) {
2580              perm |= BLK_PERM_CONSISTENT_READ;
2581          }
2582          shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
2583      }
2584  
2585      if (role & BDRV_CHILD_DATA) {
2586          /*
2587           * Technically, everything in this block is a subset of the
2588           * BDRV_CHILD_METADATA path taken above, and so this could
2589           * be an "else if" branch.  However, that is not obvious, and
2590           * this function is not performance critical, therefore we let
2591           * this be an independent "if".
2592           */
2593  
2594          /*
2595           * We cannot allow other users to resize the file because the
2596           * format driver might have some assumptions about the size
2597           * (e.g. because it is stored in metadata, or because the file
2598           * is split into fixed-size data files).
2599           */
2600          shared &= ~BLK_PERM_RESIZE;
2601  
2602          /*
2603           * WRITE_UNCHANGED often cannot be performed as such on the
2604           * data file.  For example, the qcow2 driver may still need to
2605           * write copied clusters on copy-on-read.
2606           */
2607          if (perm & BLK_PERM_WRITE_UNCHANGED) {
2608              perm |= BLK_PERM_WRITE;
2609          }
2610  
2611          /*
2612           * If the data file is written to, the format driver may
2613           * expect to be able to resize it by writing beyond the EOF.
2614           */
2615          if (perm & BLK_PERM_WRITE) {
2616              perm |= BLK_PERM_RESIZE;
2617          }
2618      }
2619  
2620      if (bs->open_flags & BDRV_O_INACTIVE) {
2621          shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2622      }
2623  
2624      *nperm = perm;
2625      *nshared = shared;
2626  }
2627  
2628  void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
2629                          BdrvChildRole role, BlockReopenQueue *reopen_queue,
2630                          uint64_t perm, uint64_t shared,
2631                          uint64_t *nperm, uint64_t *nshared)
2632  {
2633      if (role & BDRV_CHILD_FILTERED) {
2634          assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
2635                           BDRV_CHILD_COW)));
2636          bdrv_filter_default_perms(bs, c, role, reopen_queue,
2637                                    perm, shared, nperm, nshared);
2638      } else if (role & BDRV_CHILD_COW) {
2639          assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA)));
2640          bdrv_default_perms_for_cow(bs, c, role, reopen_queue,
2641                                     perm, shared, nperm, nshared);
2642      } else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) {
2643          bdrv_default_perms_for_storage(bs, c, role, reopen_queue,
2644                                         perm, shared, nperm, nshared);
2645      } else {
2646          g_assert_not_reached();
2647      }
2648  }
2649  
2650  uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
2651  {
2652      static const uint64_t permissions[] = {
2653          [BLOCK_PERMISSION_CONSISTENT_READ]  = BLK_PERM_CONSISTENT_READ,
2654          [BLOCK_PERMISSION_WRITE]            = BLK_PERM_WRITE,
2655          [BLOCK_PERMISSION_WRITE_UNCHANGED]  = BLK_PERM_WRITE_UNCHANGED,
2656          [BLOCK_PERMISSION_RESIZE]           = BLK_PERM_RESIZE,
2657          [BLOCK_PERMISSION_GRAPH_MOD]        = BLK_PERM_GRAPH_MOD,
2658      };
2659  
2660      QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX);
2661      QEMU_BUILD_BUG_ON(1UL << ARRAY_SIZE(permissions) != BLK_PERM_ALL + 1);
2662  
2663      assert(qapi_perm < BLOCK_PERMISSION__MAX);
2664  
2665      return permissions[qapi_perm];
2666  }
2667  
2668  static void bdrv_replace_child_noperm(BdrvChild *child,
2669                                        BlockDriverState *new_bs)
2670  {
2671      BlockDriverState *old_bs = child->bs;
2672      int new_bs_quiesce_counter;
2673      int drain_saldo;
2674  
2675      assert(!child->frozen);
2676  
2677      if (old_bs && new_bs) {
2678          assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
2679      }
2680  
2681      new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
2682      drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
2683  
2684      /*
2685       * If the new child node is drained but the old one was not, flush
2686       * all outstanding requests to the old child node.
2687       */
2688      while (drain_saldo > 0 && child->klass->drained_begin) {
2689          bdrv_parent_drained_begin_single(child, true);
2690          drain_saldo--;
2691      }
2692  
2693      if (old_bs) {
2694          /* Detach first so that the recursive drain sections coming from @child
2695           * are already gone and we only end the drain sections that came from
2696           * elsewhere. */
2697          if (child->klass->detach) {
2698              child->klass->detach(child);
2699          }
2700          QLIST_REMOVE(child, next_parent);
2701      }
2702  
2703      child->bs = new_bs;
2704  
2705      if (new_bs) {
2706          QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
2707  
2708          /*
2709           * Detaching the old node may have led to the new node's
2710           * quiesce_counter having been decreased.  Not a problem, we
2711           * just need to recognize this here and then invoke
2712           * drained_end appropriately more often.
2713           */
2714          assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
2715          drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
2716  
2717          /* Attach only after starting new drained sections, so that recursive
2718           * drain sections coming from @child don't get an extra .drained_begin
2719           * callback. */
2720          if (child->klass->attach) {
2721              child->klass->attach(child);
2722          }
2723      }
2724  
2725      /*
2726       * If the old child node was drained but the new one is not, allow
2727       * requests to come in only after the new node has been attached.
2728       */
2729      while (drain_saldo < 0 && child->klass->drained_end) {
2730          bdrv_parent_drained_end_single(child);
2731          drain_saldo++;
2732      }
2733  }
2734  
2735  static void bdrv_child_free(void *opaque)
2736  {
2737      BdrvChild *c = opaque;
2738  
2739      g_free(c->name);
2740      g_free(c);
2741  }
2742  
2743  static void bdrv_remove_empty_child(BdrvChild *child)
2744  {
2745      assert(!child->bs);
2746      QLIST_SAFE_REMOVE(child, next);
2747      bdrv_child_free(child);
2748  }
2749  
2750  typedef struct BdrvAttachChildCommonState {
2751      BdrvChild **child;
2752      AioContext *old_parent_ctx;
2753      AioContext *old_child_ctx;
2754  } BdrvAttachChildCommonState;
2755  
2756  static void bdrv_attach_child_common_abort(void *opaque)
2757  {
2758      BdrvAttachChildCommonState *s = opaque;
2759      BdrvChild *child = *s->child;
2760      BlockDriverState *bs = child->bs;
2761  
2762      bdrv_replace_child_noperm(child, NULL);
2763  
2764      if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
2765          bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort);
2766      }
2767  
2768      if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) {
2769          GSList *ignore = g_slist_prepend(NULL, child);
2770  
2771          child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore,
2772                                        &error_abort);
2773          g_slist_free(ignore);
2774          ignore = g_slist_prepend(NULL, child);
2775          child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore);
2776  
2777          g_slist_free(ignore);
2778      }
2779  
2780      bdrv_unref(bs);
2781      bdrv_remove_empty_child(child);
2782      *s->child = NULL;
2783  }
2784  
2785  static TransactionActionDrv bdrv_attach_child_common_drv = {
2786      .abort = bdrv_attach_child_common_abort,
2787      .clean = g_free,
2788  };
2789  
2790  /*
2791   * Common part of attaching bdrv child to bs or to blk or to job
2792   *
2793   * Resulting new child is returned through @child.
2794   * At start *@child must be NULL.
2795   * @child is saved to a new entry of @tran, so that *@child could be reverted to
2796   * NULL on abort(). So referenced variable must live at least until transaction
2797   * end.
2798   *
2799   * Function doesn't update permissions, caller is responsible for this.
2800   */
2801  static int bdrv_attach_child_common(BlockDriverState *child_bs,
2802                                      const char *child_name,
2803                                      const BdrvChildClass *child_class,
2804                                      BdrvChildRole child_role,
2805                                      uint64_t perm, uint64_t shared_perm,
2806                                      void *opaque, BdrvChild **child,
2807                                      Transaction *tran, Error **errp)
2808  {
2809      BdrvChild *new_child;
2810      AioContext *parent_ctx;
2811      AioContext *child_ctx = bdrv_get_aio_context(child_bs);
2812  
2813      assert(child);
2814      assert(*child == NULL);
2815      assert(child_class->get_parent_desc);
2816  
2817      new_child = g_new(BdrvChild, 1);
2818      *new_child = (BdrvChild) {
2819          .bs             = NULL,
2820          .name           = g_strdup(child_name),
2821          .klass          = child_class,
2822          .role           = child_role,
2823          .perm           = perm,
2824          .shared_perm    = shared_perm,
2825          .opaque         = opaque,
2826      };
2827  
2828      /*
2829       * If the AioContexts don't match, first try to move the subtree of
2830       * child_bs into the AioContext of the new parent. If this doesn't work,
2831       * try moving the parent into the AioContext of child_bs instead.
2832       */
2833      parent_ctx = bdrv_child_get_parent_aio_context(new_child);
2834      if (child_ctx != parent_ctx) {
2835          Error *local_err = NULL;
2836          int ret = bdrv_try_set_aio_context(child_bs, parent_ctx, &local_err);
2837  
2838          if (ret < 0 && child_class->can_set_aio_ctx) {
2839              GSList *ignore = g_slist_prepend(NULL, new_child);
2840              if (child_class->can_set_aio_ctx(new_child, child_ctx, &ignore,
2841                                               NULL))
2842              {
2843                  error_free(local_err);
2844                  ret = 0;
2845                  g_slist_free(ignore);
2846                  ignore = g_slist_prepend(NULL, new_child);
2847                  child_class->set_aio_ctx(new_child, child_ctx, &ignore);
2848              }
2849              g_slist_free(ignore);
2850          }
2851  
2852          if (ret < 0) {
2853              error_propagate(errp, local_err);
2854              bdrv_remove_empty_child(new_child);
2855              return ret;
2856          }
2857      }
2858  
2859      bdrv_ref(child_bs);
2860      bdrv_replace_child_noperm(new_child, child_bs);
2861  
2862      *child = new_child;
2863  
2864      BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
2865      *s = (BdrvAttachChildCommonState) {
2866          .child = child,
2867          .old_parent_ctx = parent_ctx,
2868          .old_child_ctx = child_ctx,
2869      };
2870      tran_add(tran, &bdrv_attach_child_common_drv, s);
2871  
2872      return 0;
2873  }
2874  
2875  /*
2876   * Variable referenced by @child must live at least until transaction end.
2877   * (see bdrv_attach_child_common() doc for details)
2878   *
2879   * Function doesn't update permissions, caller is responsible for this.
2880   */
2881  static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
2882                                      BlockDriverState *child_bs,
2883                                      const char *child_name,
2884                                      const BdrvChildClass *child_class,
2885                                      BdrvChildRole child_role,
2886                                      BdrvChild **child,
2887                                      Transaction *tran,
2888                                      Error **errp)
2889  {
2890      int ret;
2891      uint64_t perm, shared_perm;
2892  
2893      assert(parent_bs->drv);
2894  
2895      bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
2896      bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
2897                      perm, shared_perm, &perm, &shared_perm);
2898  
2899      ret = bdrv_attach_child_common(child_bs, child_name, child_class,
2900                                     child_role, perm, shared_perm, parent_bs,
2901                                     child, tran, errp);
2902      if (ret < 0) {
2903          return ret;
2904      }
2905  
2906      QLIST_INSERT_HEAD(&parent_bs->children, *child, next);
2907      /*
2908       * child is removed in bdrv_attach_child_common_abort(), so don't care to
2909       * abort this change separately.
2910       */
2911  
2912      return 0;
2913  }
2914  
2915  static void bdrv_detach_child(BdrvChild *child)
2916  {
2917      BlockDriverState *old_bs = child->bs;
2918  
2919      bdrv_replace_child_noperm(child, NULL);
2920      bdrv_remove_empty_child(child);
2921  
2922      if (old_bs) {
2923          /*
2924           * Update permissions for old node. We're just taking a parent away, so
2925           * we're loosening restrictions. Errors of permission update are not
2926           * fatal in this case, ignore them.
2927           */
2928          bdrv_refresh_perms(old_bs, NULL);
2929  
2930          /*
2931           * When the parent requiring a non-default AioContext is removed, the
2932           * node moves back to the main AioContext
2933           */
2934          bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL);
2935      }
2936  }
2937  
2938  /*
2939   * This function steals the reference to child_bs from the caller.
2940   * That reference is later dropped by bdrv_root_unref_child().
2941   *
2942   * On failure NULL is returned, errp is set and the reference to
2943   * child_bs is also dropped.
2944   *
2945   * The caller must hold the AioContext lock @child_bs, but not that of @ctx
2946   * (unless @child_bs is already in @ctx).
2947   */
2948  BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
2949                                    const char *child_name,
2950                                    const BdrvChildClass *child_class,
2951                                    BdrvChildRole child_role,
2952                                    uint64_t perm, uint64_t shared_perm,
2953                                    void *opaque, Error **errp)
2954  {
2955      int ret;
2956      BdrvChild *child = NULL;
2957      Transaction *tran = tran_new();
2958  
2959      ret = bdrv_attach_child_common(child_bs, child_name, child_class,
2960                                     child_role, perm, shared_perm, opaque,
2961                                     &child, tran, errp);
2962      if (ret < 0) {
2963          goto out;
2964      }
2965  
2966      ret = bdrv_refresh_perms(child_bs, errp);
2967  
2968  out:
2969      tran_finalize(tran, ret);
2970      /* child is unset on failure by bdrv_attach_child_common_abort() */
2971      assert((ret < 0) == !child);
2972  
2973      bdrv_unref(child_bs);
2974      return child;
2975  }
2976  
2977  /*
2978   * This function transfers the reference to child_bs from the caller
2979   * to parent_bs. That reference is later dropped by parent_bs on
2980   * bdrv_close() or if someone calls bdrv_unref_child().
2981   *
2982   * On failure NULL is returned, errp is set and the reference to
2983   * child_bs is also dropped.
2984   *
2985   * If @parent_bs and @child_bs are in different AioContexts, the caller must
2986   * hold the AioContext lock for @child_bs, but not for @parent_bs.
2987   */
2988  BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
2989                               BlockDriverState *child_bs,
2990                               const char *child_name,
2991                               const BdrvChildClass *child_class,
2992                               BdrvChildRole child_role,
2993                               Error **errp)
2994  {
2995      int ret;
2996      BdrvChild *child = NULL;
2997      Transaction *tran = tran_new();
2998  
2999      ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, child_class,
3000                                     child_role, &child, tran, errp);
3001      if (ret < 0) {
3002          goto out;
3003      }
3004  
3005      ret = bdrv_refresh_perms(parent_bs, errp);
3006      if (ret < 0) {
3007          goto out;
3008      }
3009  
3010  out:
3011      tran_finalize(tran, ret);
3012      /* child is unset on failure by bdrv_attach_child_common_abort() */
3013      assert((ret < 0) == !child);
3014  
3015      bdrv_unref(child_bs);
3016  
3017      return child;
3018  }
3019  
3020  /* Callers must ensure that child->frozen is false. */
3021  void bdrv_root_unref_child(BdrvChild *child)
3022  {
3023      BlockDriverState *child_bs;
3024  
3025      child_bs = child->bs;
3026      bdrv_detach_child(child);
3027      bdrv_unref(child_bs);
3028  }
3029  
3030  typedef struct BdrvSetInheritsFrom {
3031      BlockDriverState *bs;
3032      BlockDriverState *old_inherits_from;
3033  } BdrvSetInheritsFrom;
3034  
3035  static void bdrv_set_inherits_from_abort(void *opaque)
3036  {
3037      BdrvSetInheritsFrom *s = opaque;
3038  
3039      s->bs->inherits_from = s->old_inherits_from;
3040  }
3041  
3042  static TransactionActionDrv bdrv_set_inherits_from_drv = {
3043      .abort = bdrv_set_inherits_from_abort,
3044      .clean = g_free,
3045  };
3046  
3047  /* @tran is allowed to be NULL. In this case no rollback is possible */
3048  static void bdrv_set_inherits_from(BlockDriverState *bs,
3049                                     BlockDriverState *new_inherits_from,
3050                                     Transaction *tran)
3051  {
3052      if (tran) {
3053          BdrvSetInheritsFrom *s = g_new(BdrvSetInheritsFrom, 1);
3054  
3055          *s = (BdrvSetInheritsFrom) {
3056              .bs = bs,
3057              .old_inherits_from = bs->inherits_from,
3058          };
3059  
3060          tran_add(tran, &bdrv_set_inherits_from_drv, s);
3061      }
3062  
3063      bs->inherits_from = new_inherits_from;
3064  }
3065  
3066  /**
3067   * Clear all inherits_from pointers from children and grandchildren of
3068   * @root that point to @root, where necessary.
3069   * @tran is allowed to be NULL. In this case no rollback is possible
3070   */
3071  static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child,
3072                                       Transaction *tran)
3073  {
3074      BdrvChild *c;
3075  
3076      if (child->bs->inherits_from == root) {
3077          /*
3078           * Remove inherits_from only when the last reference between root and
3079           * child->bs goes away.
3080           */
3081          QLIST_FOREACH(c, &root->children, next) {
3082              if (c != child && c->bs == child->bs) {
3083                  break;
3084              }
3085          }
3086          if (c == NULL) {
3087              bdrv_set_inherits_from(child->bs, NULL, tran);
3088          }
3089      }
3090  
3091      QLIST_FOREACH(c, &child->bs->children, next) {
3092          bdrv_unset_inherits_from(root, c, tran);
3093      }
3094  }
3095  
3096  /* Callers must ensure that child->frozen is false. */
3097  void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
3098  {
3099      if (child == NULL) {
3100          return;
3101      }
3102  
3103      bdrv_unset_inherits_from(parent, child, NULL);
3104      bdrv_root_unref_child(child);
3105  }
3106  
3107  
3108  static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
3109  {
3110      BdrvChild *c;
3111      QLIST_FOREACH(c, &bs->parents, next_parent) {
3112          if (c->klass->change_media) {
3113              c->klass->change_media(c, load);
3114          }
3115      }
3116  }
3117  
3118  /* Return true if you can reach parent going through child->inherits_from
3119   * recursively. If parent or child are NULL, return false */
3120  static bool bdrv_inherits_from_recursive(BlockDriverState *child,
3121                                           BlockDriverState *parent)
3122  {
3123      while (child && child != parent) {
3124          child = child->inherits_from;
3125      }
3126  
3127      return child != NULL;
3128  }
3129  
3130  /*
3131   * Return the BdrvChildRole for @bs's backing child.  bs->backing is
3132   * mostly used for COW backing children (role = COW), but also for
3133   * filtered children (role = FILTERED | PRIMARY).
3134   */
3135  static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
3136  {
3137      if (bs->drv && bs->drv->is_filter) {
3138          return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
3139      } else {
3140          return BDRV_CHILD_COW;
3141      }
3142  }
3143  
3144  /*
3145   * Sets the bs->backing or bs->file link of a BDS. A new reference is created;
3146   * callers which don't need their own reference any more must call bdrv_unref().
3147   *
3148   * Function doesn't update permissions, caller is responsible for this.
3149   */
3150  static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
3151                                             BlockDriverState *child_bs,
3152                                             bool is_backing,
3153                                             Transaction *tran, Error **errp)
3154  {
3155      int ret = 0;
3156      bool update_inherits_from =
3157          bdrv_inherits_from_recursive(child_bs, parent_bs);
3158      BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file;
3159      BdrvChildRole role;
3160  
3161      if (!parent_bs->drv) {
3162          /*
3163           * Node without drv is an object without a class :/. TODO: finally fix
3164           * qcow2 driver to never clear bs->drv and implement format corruption
3165           * handling in other way.
3166           */
3167          error_setg(errp, "Node corrupted");
3168          return -EINVAL;
3169      }
3170  
3171      if (child && child->frozen) {
3172          error_setg(errp, "Cannot change frozen '%s' link from '%s' to '%s'",
3173                     child->name, parent_bs->node_name, child->bs->node_name);
3174          return -EPERM;
3175      }
3176  
3177      if (is_backing && !parent_bs->drv->is_filter &&
3178          !parent_bs->drv->supports_backing)
3179      {
3180          error_setg(errp, "Driver '%s' of node '%s' does not support backing "
3181                     "files", parent_bs->drv->format_name, parent_bs->node_name);
3182          return -EINVAL;
3183      }
3184  
3185      if (parent_bs->drv->is_filter) {
3186          role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
3187      } else if (is_backing) {
3188          role = BDRV_CHILD_COW;
3189      } else {
3190          /*
3191           * We only can use same role as it is in existing child. We don't have
3192           * infrastructure to determine role of file child in generic way
3193           */
3194          if (!child) {
3195              error_setg(errp, "Cannot set file child to format node without "
3196                         "file child");
3197              return -EINVAL;
3198          }
3199          role = child->role;
3200      }
3201  
3202      if (child) {
3203          bdrv_unset_inherits_from(parent_bs, child, tran);
3204          bdrv_remove_file_or_backing_child(parent_bs, child, tran);
3205      }
3206  
3207      if (!child_bs) {
3208          goto out;
3209      }
3210  
3211      ret = bdrv_attach_child_noperm(parent_bs, child_bs,
3212                                     is_backing ? "backing" : "file",
3213                                     &child_of_bds, role,
3214                                     is_backing ? &parent_bs->backing :
3215                                                  &parent_bs->file,
3216                                     tran, errp);
3217      if (ret < 0) {
3218          return ret;
3219      }
3220  
3221  
3222      /*
3223       * If inherits_from pointed recursively to bs then let's update it to
3224       * point directly to bs (else it will become NULL).
3225       */
3226      if (update_inherits_from) {
3227          bdrv_set_inherits_from(child_bs, parent_bs, tran);
3228      }
3229  
3230  out:
3231      bdrv_refresh_limits(parent_bs, tran, NULL);
3232  
3233      return 0;
3234  }
3235  
3236  static int bdrv_set_backing_noperm(BlockDriverState *bs,
3237                                     BlockDriverState *backing_hd,
3238                                     Transaction *tran, Error **errp)
3239  {
3240      return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
3241  }
3242  
3243  int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
3244                          Error **errp)
3245  {
3246      int ret;
3247      Transaction *tran = tran_new();
3248  
3249      ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
3250      if (ret < 0) {
3251          goto out;
3252      }
3253  
3254      ret = bdrv_refresh_perms(bs, errp);
3255  out:
3256      tran_finalize(tran, ret);
3257  
3258      return ret;
3259  }
3260  
3261  /*
3262   * Opens the backing file for a BlockDriverState if not yet open
3263   *
3264   * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
3265   * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
3266   * itself, all options starting with "${bdref_key}." are considered part of the
3267   * BlockdevRef.
3268   *
3269   * TODO Can this be unified with bdrv_open_image()?
3270   */
3271  int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
3272                             const char *bdref_key, Error **errp)
3273  {
3274      char *backing_filename = NULL;
3275      char *bdref_key_dot;
3276      const char *reference = NULL;
3277      int ret = 0;
3278      bool implicit_backing = false;
3279      BlockDriverState *backing_hd;
3280      QDict *options;
3281      QDict *tmp_parent_options = NULL;
3282      Error *local_err = NULL;
3283  
3284      if (bs->backing != NULL) {
3285          goto free_exit;
3286      }
3287  
3288      /* NULL means an empty set of options */
3289      if (parent_options == NULL) {
3290          tmp_parent_options = qdict_new();
3291          parent_options = tmp_parent_options;
3292      }
3293  
3294      bs->open_flags &= ~BDRV_O_NO_BACKING;
3295  
3296      bdref_key_dot = g_strdup_printf("%s.", bdref_key);
3297      qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
3298      g_free(bdref_key_dot);
3299  
3300      /*
3301       * Caution: while qdict_get_try_str() is fine, getting non-string
3302       * types would require more care.  When @parent_options come from
3303       * -blockdev or blockdev_add, its members are typed according to
3304       * the QAPI schema, but when they come from -drive, they're all
3305       * QString.
3306       */
3307      reference = qdict_get_try_str(parent_options, bdref_key);
3308      if (reference || qdict_haskey(options, "file.filename")) {
3309          /* keep backing_filename NULL */
3310      } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
3311          qobject_unref(options);
3312          goto free_exit;
3313      } else {
3314          if (qdict_size(options) == 0) {
3315              /* If the user specifies options that do not modify the
3316               * backing file's behavior, we might still consider it the
3317               * implicit backing file.  But it's easier this way, and
3318               * just specifying some of the backing BDS's options is
3319               * only possible with -drive anyway (otherwise the QAPI
3320               * schema forces the user to specify everything). */
3321              implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file);
3322          }
3323  
3324          backing_filename = bdrv_get_full_backing_filename(bs, &local_err);
3325          if (local_err) {
3326              ret = -EINVAL;
3327              error_propagate(errp, local_err);
3328              qobject_unref(options);
3329              goto free_exit;
3330          }
3331      }
3332  
3333      if (!bs->drv || !bs->drv->supports_backing) {
3334          ret = -EINVAL;
3335          error_setg(errp, "Driver doesn't support backing files");
3336          qobject_unref(options);
3337          goto free_exit;
3338      }
3339  
3340      if (!reference &&
3341          bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
3342          qdict_put_str(options, "driver", bs->backing_format);
3343      }
3344  
3345      backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs,
3346                                     &child_of_bds, bdrv_backing_role(bs), errp);
3347      if (!backing_hd) {
3348          bs->open_flags |= BDRV_O_NO_BACKING;
3349          error_prepend(errp, "Could not open backing file: ");
3350          ret = -EINVAL;
3351          goto free_exit;
3352      }
3353  
3354      if (implicit_backing) {
3355          bdrv_refresh_filename(backing_hd);
3356          pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
3357                  backing_hd->filename);
3358      }
3359  
3360      /* Hook up the backing file link; drop our reference, bs owns the
3361       * backing_hd reference now */
3362      ret = bdrv_set_backing_hd(bs, backing_hd, errp);
3363      bdrv_unref(backing_hd);
3364      if (ret < 0) {
3365          goto free_exit;
3366      }
3367  
3368      qdict_del(parent_options, bdref_key);
3369  
3370  free_exit:
3371      g_free(backing_filename);
3372      qobject_unref(tmp_parent_options);
3373      return ret;
3374  }
3375  
3376  static BlockDriverState *
3377  bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
3378                     BlockDriverState *parent, const BdrvChildClass *child_class,
3379                     BdrvChildRole child_role, bool allow_none, Error **errp)
3380  {
3381      BlockDriverState *bs = NULL;
3382      QDict *image_options;
3383      char *bdref_key_dot;
3384      const char *reference;
3385  
3386      assert(child_class != NULL);
3387  
3388      bdref_key_dot = g_strdup_printf("%s.", bdref_key);
3389      qdict_extract_subqdict(options, &image_options, bdref_key_dot);
3390      g_free(bdref_key_dot);
3391  
3392      /*
3393       * Caution: while qdict_get_try_str() is fine, getting non-string
3394       * types would require more care.  When @options come from
3395       * -blockdev or blockdev_add, its members are typed according to
3396       * the QAPI schema, but when they come from -drive, they're all
3397       * QString.
3398       */
3399      reference = qdict_get_try_str(options, bdref_key);
3400      if (!filename && !reference && !qdict_size(image_options)) {
3401          if (!allow_none) {
3402              error_setg(errp, "A block device must be specified for \"%s\"",
3403                         bdref_key);
3404          }
3405          qobject_unref(image_options);
3406          goto done;
3407      }
3408  
3409      bs = bdrv_open_inherit(filename, reference, image_options, 0,
3410                             parent, child_class, child_role, errp);
3411      if (!bs) {
3412          goto done;
3413      }
3414  
3415  done:
3416      qdict_del(options, bdref_key);
3417      return bs;
3418  }
3419  
3420  /*
3421   * Opens a disk image whose options are given as BlockdevRef in another block
3422   * device's options.
3423   *
3424   * If allow_none is true, no image will be opened if filename is false and no
3425   * BlockdevRef is given. NULL will be returned, but errp remains unset.
3426   *
3427   * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
3428   * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
3429   * itself, all options starting with "${bdref_key}." are considered part of the
3430   * BlockdevRef.
3431   *
3432   * The BlockdevRef will be removed from the options QDict.
3433   */
3434  BdrvChild *bdrv_open_child(const char *filename,
3435                             QDict *options, const char *bdref_key,
3436                             BlockDriverState *parent,
3437                             const BdrvChildClass *child_class,
3438                             BdrvChildRole child_role,
3439                             bool allow_none, Error **errp)
3440  {
3441      BlockDriverState *bs;
3442  
3443      bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class,
3444                              child_role, allow_none, errp);
3445      if (bs == NULL) {
3446          return NULL;
3447      }
3448  
3449      return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
3450                               errp);
3451  }
3452  
3453  /*
3454   * TODO Future callers may need to specify parent/child_class in order for
3455   * option inheritance to work. Existing callers use it for the root node.
3456   */
3457  BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp)
3458  {
3459      BlockDriverState *bs = NULL;
3460      QObject *obj = NULL;
3461      QDict *qdict = NULL;
3462      const char *reference = NULL;
3463      Visitor *v = NULL;
3464  
3465      if (ref->type == QTYPE_QSTRING) {
3466          reference = ref->u.reference;
3467      } else {
3468          BlockdevOptions *options = &ref->u.definition;
3469          assert(ref->type == QTYPE_QDICT);
3470  
3471          v = qobject_output_visitor_new(&obj);
3472          visit_type_BlockdevOptions(v, NULL, &options, &error_abort);
3473          visit_complete(v, &obj);
3474  
3475          qdict = qobject_to(QDict, obj);
3476          qdict_flatten(qdict);
3477  
3478          /* bdrv_open_inherit() defaults to the values in bdrv_flags (for
3479           * compatibility with other callers) rather than what we want as the
3480           * real defaults. Apply the defaults here instead. */
3481          qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off");
3482          qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off");
3483          qdict_set_default_str(qdict, BDRV_OPT_READ_ONLY, "off");
3484          qdict_set_default_str(qdict, BDRV_OPT_AUTO_READ_ONLY, "off");
3485  
3486      }
3487  
3488      bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp);
3489      obj = NULL;
3490      qobject_unref(obj);
3491      visit_free(v);
3492      return bs;
3493  }
3494  
3495  static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
3496                                                     int flags,
3497                                                     QDict *snapshot_options,
3498                                                     Error **errp)
3499  {
3500      /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
3501      char *tmp_filename = g_malloc0(PATH_MAX + 1);
3502      int64_t total_size;
3503      QemuOpts *opts = NULL;
3504      BlockDriverState *bs_snapshot = NULL;
3505      int ret;
3506  
3507      /* if snapshot, we create a temporary backing file and open it
3508         instead of opening 'filename' directly */
3509  
3510      /* Get the required size from the image */
3511      total_size = bdrv_getlength(bs);
3512      if (total_size < 0) {
3513          error_setg_errno(errp, -total_size, "Could not get image size");
3514          goto out;
3515      }
3516  
3517      /* Create the temporary image */
3518      ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
3519      if (ret < 0) {
3520          error_setg_errno(errp, -ret, "Could not get temporary filename");
3521          goto out;
3522      }
3523  
3524      opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
3525                              &error_abort);
3526      qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
3527      ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
3528      qemu_opts_del(opts);
3529      if (ret < 0) {
3530          error_prepend(errp, "Could not create temporary overlay '%s': ",
3531                        tmp_filename);
3532          goto out;
3533      }
3534  
3535      /* Prepare options QDict for the temporary file */
3536      qdict_put_str(snapshot_options, "file.driver", "file");
3537      qdict_put_str(snapshot_options, "file.filename", tmp_filename);
3538      qdict_put_str(snapshot_options, "driver", "qcow2");
3539  
3540      bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp);
3541      snapshot_options = NULL;
3542      if (!bs_snapshot) {
3543          goto out;
3544      }
3545  
3546      ret = bdrv_append(bs_snapshot, bs, errp);
3547      if (ret < 0) {
3548          bs_snapshot = NULL;
3549          goto out;
3550      }
3551  
3552  out:
3553      qobject_unref(snapshot_options);
3554      g_free(tmp_filename);
3555      return bs_snapshot;
3556  }
3557  
3558  /*
3559   * Opens a disk image (raw, qcow2, vmdk, ...)
3560   *
3561   * options is a QDict of options to pass to the block drivers, or NULL for an
3562   * empty set of options. The reference to the QDict belongs to the block layer
3563   * after the call (even on failure), so if the caller intends to reuse the
3564   * dictionary, it needs to use qobject_ref() before calling bdrv_open.
3565   *
3566   * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
3567   * If it is not NULL, the referenced BDS will be reused.
3568   *
3569   * The reference parameter may be used to specify an existing block device which
3570   * should be opened. If specified, neither options nor a filename may be given,
3571   * nor can an existing BDS be reused (that is, *pbs has to be NULL).
3572   */
3573  static BlockDriverState *bdrv_open_inherit(const char *filename,
3574                                             const char *reference,
3575                                             QDict *options, int flags,
3576                                             BlockDriverState *parent,
3577                                             const BdrvChildClass *child_class,
3578                                             BdrvChildRole child_role,
3579                                             Error **errp)
3580  {
3581      int ret;
3582      BlockBackend *file = NULL;
3583      BlockDriverState *bs;
3584      BlockDriver *drv = NULL;
3585      BdrvChild *child;
3586      const char *drvname;
3587      const char *backing;
3588      Error *local_err = NULL;
3589      QDict *snapshot_options = NULL;
3590      int snapshot_flags = 0;
3591  
3592      assert(!child_class || !flags);
3593      assert(!child_class == !parent);
3594  
3595      if (reference) {
3596          bool options_non_empty = options ? qdict_size(options) : false;
3597          qobject_unref(options);
3598  
3599          if (filename || options_non_empty) {
3600              error_setg(errp, "Cannot reference an existing block device with "
3601                         "additional options or a new filename");
3602              return NULL;
3603          }
3604  
3605          bs = bdrv_lookup_bs(reference, reference, errp);
3606          if (!bs) {
3607              return NULL;
3608          }
3609  
3610          bdrv_ref(bs);
3611          return bs;
3612      }
3613  
3614      bs = bdrv_new();
3615  
3616      /* NULL means an empty set of options */
3617      if (options == NULL) {
3618          options = qdict_new();
3619      }
3620  
3621      /* json: syntax counts as explicit options, as if in the QDict */
3622      parse_json_protocol(options, &filename, &local_err);
3623      if (local_err) {
3624          goto fail;
3625      }
3626  
3627      bs->explicit_options = qdict_clone_shallow(options);
3628  
3629      if (child_class) {
3630          bool parent_is_format;
3631  
3632          if (parent->drv) {
3633              parent_is_format = parent->drv->is_format;
3634          } else {
3635              /*
3636               * parent->drv is not set yet because this node is opened for
3637               * (potential) format probing.  That means that @parent is going
3638               * to be a format node.
3639               */
3640              parent_is_format = true;
3641          }
3642  
3643          bs->inherits_from = parent;
3644          child_class->inherit_options(child_role, parent_is_format,
3645                                       &flags, options,
3646                                       parent->open_flags, parent->options);
3647      }
3648  
3649      ret = bdrv_fill_options(&options, filename, &flags, &local_err);
3650      if (ret < 0) {
3651          goto fail;
3652      }
3653  
3654      /*
3655       * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags.
3656       * Caution: getting a boolean member of @options requires care.
3657       * When @options come from -blockdev or blockdev_add, members are
3658       * typed according to the QAPI schema, but when they come from
3659       * -drive, they're all QString.
3660       */
3661      if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") &&
3662          !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) {
3663          flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR);
3664      } else {
3665          flags &= ~BDRV_O_RDWR;
3666      }
3667  
3668      if (flags & BDRV_O_SNAPSHOT) {
3669          snapshot_options = qdict_new();
3670          bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
3671                                     flags, options);
3672          /* Let bdrv_backing_options() override "read-only" */
3673          qdict_del(options, BDRV_OPT_READ_ONLY);
3674          bdrv_inherited_options(BDRV_CHILD_COW, true,
3675                                 &flags, options, flags, options);
3676      }
3677  
3678      bs->open_flags = flags;
3679      bs->options = options;
3680      options = qdict_clone_shallow(options);
3681  
3682      /* Find the right image format driver */
3683      /* See cautionary note on accessing @options above */
3684      drvname = qdict_get_try_str(options, "driver");
3685      if (drvname) {
3686          drv = bdrv_find_format(drvname);
3687          if (!drv) {
3688              error_setg(errp, "Unknown driver: '%s'", drvname);
3689              goto fail;
3690          }
3691      }
3692  
3693      assert(drvname || !(flags & BDRV_O_PROTOCOL));
3694  
3695      /* See cautionary note on accessing @options above */
3696      backing = qdict_get_try_str(options, "backing");
3697      if (qobject_to(QNull, qdict_get(options, "backing")) != NULL ||
3698          (backing && *backing == '\0'))
3699      {
3700          if (backing) {
3701              warn_report("Use of \"backing\": \"\" is deprecated; "
3702                          "use \"backing\": null instead");
3703          }
3704          flags |= BDRV_O_NO_BACKING;
3705          qdict_del(bs->explicit_options, "backing");
3706          qdict_del(bs->options, "backing");
3707          qdict_del(options, "backing");
3708      }
3709  
3710      /* Open image file without format layer. This BlockBackend is only used for
3711       * probing, the block drivers will do their own bdrv_open_child() for the
3712       * same BDS, which is why we put the node name back into options. */
3713      if ((flags & BDRV_O_PROTOCOL) == 0) {
3714          BlockDriverState *file_bs;
3715  
3716          file_bs = bdrv_open_child_bs(filename, options, "file", bs,
3717                                       &child_of_bds, BDRV_CHILD_IMAGE,
3718                                       true, &local_err);
3719          if (local_err) {
3720              goto fail;
3721          }
3722          if (file_bs != NULL) {
3723              /* Not requesting BLK_PERM_CONSISTENT_READ because we're only
3724               * looking at the header to guess the image format. This works even
3725               * in cases where a guest would not see a consistent state. */
3726              file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL);
3727              blk_insert_bs(file, file_bs, &local_err);
3728              bdrv_unref(file_bs);
3729              if (local_err) {
3730                  goto fail;
3731              }
3732  
3733              qdict_put_str(options, "file", bdrv_get_node_name(file_bs));
3734          }
3735      }
3736  
3737      /* Image format probing */
3738      bs->probed = !drv;
3739      if (!drv && file) {
3740          ret = find_image_format(file, filename, &drv, &local_err);
3741          if (ret < 0) {
3742              goto fail;
3743          }
3744          /*
3745           * This option update would logically belong in bdrv_fill_options(),
3746           * but we first need to open bs->file for the probing to work, while
3747           * opening bs->file already requires the (mostly) final set of options
3748           * so that cache mode etc. can be inherited.
3749           *
3750           * Adding the driver later is somewhat ugly, but it's not an option
3751           * that would ever be inherited, so it's correct. We just need to make
3752           * sure to update both bs->options (which has the full effective
3753           * options for bs) and options (which has file.* already removed).
3754           */
3755          qdict_put_str(bs->options, "driver", drv->format_name);
3756          qdict_put_str(options, "driver", drv->format_name);
3757      } else if (!drv) {
3758          error_setg(errp, "Must specify either driver or file");
3759          goto fail;
3760      }
3761  
3762      /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
3763      assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
3764      /* file must be NULL if a protocol BDS is about to be created
3765       * (the inverse results in an error message from bdrv_open_common()) */
3766      assert(!(flags & BDRV_O_PROTOCOL) || !file);
3767  
3768      /* Open the image */
3769      ret = bdrv_open_common(bs, file, options, &local_err);
3770      if (ret < 0) {
3771          goto fail;
3772      }
3773  
3774      if (file) {
3775          blk_unref(file);
3776          file = NULL;
3777      }
3778  
3779      /* If there is a backing file, use it */
3780      if ((flags & BDRV_O_NO_BACKING) == 0) {
3781          ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
3782          if (ret < 0) {
3783              goto close_and_fail;
3784          }
3785      }
3786  
3787      /* Remove all children options and references
3788       * from bs->options and bs->explicit_options */
3789      QLIST_FOREACH(child, &bs->children, next) {
3790          char *child_key_dot;
3791          child_key_dot = g_strdup_printf("%s.", child->name);
3792          qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot);
3793          qdict_extract_subqdict(bs->options, NULL, child_key_dot);
3794          qdict_del(bs->explicit_options, child->name);
3795          qdict_del(bs->options, child->name);
3796          g_free(child_key_dot);
3797      }
3798  
3799      /* Check if any unknown options were used */
3800      if (qdict_size(options) != 0) {
3801          const QDictEntry *entry = qdict_first(options);
3802          if (flags & BDRV_O_PROTOCOL) {
3803              error_setg(errp, "Block protocol '%s' doesn't support the option "
3804                         "'%s'", drv->format_name, entry->key);
3805          } else {
3806              error_setg(errp,
3807                         "Block format '%s' does not support the option '%s'",
3808                         drv->format_name, entry->key);
3809          }
3810  
3811          goto close_and_fail;
3812      }
3813  
3814      bdrv_parent_cb_change_media(bs, true);
3815  
3816      qobject_unref(options);
3817      options = NULL;
3818  
3819      /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
3820       * temporary snapshot afterwards. */
3821      if (snapshot_flags) {
3822          BlockDriverState *snapshot_bs;
3823          snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags,
3824                                                  snapshot_options, &local_err);
3825          snapshot_options = NULL;
3826          if (local_err) {
3827              goto close_and_fail;
3828          }
3829          /* We are not going to return bs but the overlay on top of it
3830           * (snapshot_bs); thus, we have to drop the strong reference to bs
3831           * (which we obtained by calling bdrv_new()). bs will not be deleted,
3832           * though, because the overlay still has a reference to it. */
3833          bdrv_unref(bs);
3834          bs = snapshot_bs;
3835      }
3836  
3837      return bs;
3838  
3839  fail:
3840      blk_unref(file);
3841      qobject_unref(snapshot_options);
3842      qobject_unref(bs->explicit_options);
3843      qobject_unref(bs->options);
3844      qobject_unref(options);
3845      bs->options = NULL;
3846      bs->explicit_options = NULL;
3847      bdrv_unref(bs);
3848      error_propagate(errp, local_err);
3849      return NULL;
3850  
3851  close_and_fail:
3852      bdrv_unref(bs);
3853      qobject_unref(snapshot_options);
3854      qobject_unref(options);
3855      error_propagate(errp, local_err);
3856      return NULL;
3857  }
3858  
3859  BlockDriverState *bdrv_open(const char *filename, const char *reference,
3860                              QDict *options, int flags, Error **errp)
3861  {
3862      return bdrv_open_inherit(filename, reference, options, flags, NULL,
3863                               NULL, 0, errp);
3864  }
3865  
3866  /* Return true if the NULL-terminated @list contains @str */
3867  static bool is_str_in_list(const char *str, const char *const *list)
3868  {
3869      if (str && list) {
3870          int i;
3871          for (i = 0; list[i] != NULL; i++) {
3872              if (!strcmp(str, list[i])) {
3873                  return true;
3874              }
3875          }
3876      }
3877      return false;
3878  }
3879  
3880  /*
3881   * Check that every option set in @bs->options is also set in
3882   * @new_opts.
3883   *
3884   * Options listed in the common_options list and in
3885   * @bs->drv->mutable_opts are skipped.
3886   *
3887   * Return 0 on success, otherwise return -EINVAL and set @errp.
3888   */
3889  static int bdrv_reset_options_allowed(BlockDriverState *bs,
3890                                        const QDict *new_opts, Error **errp)
3891  {
3892      const QDictEntry *e;
3893      /* These options are common to all block drivers and are handled
3894       * in bdrv_reopen_prepare() so they can be left out of @new_opts */
3895      const char *const common_options[] = {
3896          "node-name", "discard", "cache.direct", "cache.no-flush",
3897          "read-only", "auto-read-only", "detect-zeroes", NULL
3898      };
3899  
3900      for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
3901          if (!qdict_haskey(new_opts, e->key) &&
3902              !is_str_in_list(e->key, common_options) &&
3903              !is_str_in_list(e->key, bs->drv->mutable_opts)) {
3904              error_setg(errp, "Option '%s' cannot be reset "
3905                         "to its default value", e->key);
3906              return -EINVAL;
3907          }
3908      }
3909  
3910      return 0;
3911  }
3912  
3913  /*
3914   * Returns true if @child can be reached recursively from @bs
3915   */
3916  static bool bdrv_recurse_has_child(BlockDriverState *bs,
3917                                     BlockDriverState *child)
3918  {
3919      BdrvChild *c;
3920  
3921      if (bs == child) {
3922          return true;
3923      }
3924  
3925      QLIST_FOREACH(c, &bs->children, next) {
3926          if (bdrv_recurse_has_child(c->bs, child)) {
3927              return true;
3928          }
3929      }
3930  
3931      return false;
3932  }
3933  
3934  /*
3935   * Adds a BlockDriverState to a simple queue for an atomic, transactional
3936   * reopen of multiple devices.
3937   *
3938   * bs_queue can either be an existing BlockReopenQueue that has had QTAILQ_INIT
3939   * already performed, or alternatively may be NULL a new BlockReopenQueue will
3940   * be created and initialized. This newly created BlockReopenQueue should be
3941   * passed back in for subsequent calls that are intended to be of the same
3942   * atomic 'set'.
3943   *
3944   * bs is the BlockDriverState to add to the reopen queue.
3945   *
3946   * options contains the changed options for the associated bs
3947   * (the BlockReopenQueue takes ownership)
3948   *
3949   * flags contains the open flags for the associated bs
3950   *
3951   * returns a pointer to bs_queue, which is either the newly allocated
3952   * bs_queue, or the existing bs_queue being used.
3953   *
3954   * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
3955   */
3956  static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
3957                                                   BlockDriverState *bs,
3958                                                   QDict *options,
3959                                                   const BdrvChildClass *klass,
3960                                                   BdrvChildRole role,
3961                                                   bool parent_is_format,
3962                                                   QDict *parent_options,
3963                                                   int parent_flags,
3964                                                   bool keep_old_opts)
3965  {
3966      assert(bs != NULL);
3967  
3968      BlockReopenQueueEntry *bs_entry;
3969      BdrvChild *child;
3970      QDict *old_options, *explicit_options, *options_copy;
3971      int flags;
3972      QemuOpts *opts;
3973  
3974      /* Make sure that the caller remembered to use a drained section. This is
3975       * important to avoid graph changes between the recursive queuing here and
3976       * bdrv_reopen_multiple(). */
3977      assert(bs->quiesce_counter > 0);
3978  
3979      if (bs_queue == NULL) {
3980          bs_queue = g_new0(BlockReopenQueue, 1);
3981          QTAILQ_INIT(bs_queue);
3982      }
3983  
3984      if (!options) {
3985          options = qdict_new();
3986      }
3987  
3988      /* Check if this BlockDriverState is already in the queue */
3989      QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
3990          if (bs == bs_entry->state.bs) {
3991              break;
3992          }
3993      }
3994  
3995      /*
3996       * Precedence of options:
3997       * 1. Explicitly passed in options (highest)
3998       * 2. Retained from explicitly set options of bs
3999       * 3. Inherited from parent node
4000       * 4. Retained from effective options of bs
4001       */
4002  
4003      /* Old explicitly set values (don't overwrite by inherited value) */
4004      if (bs_entry || keep_old_opts) {
4005          old_options = qdict_clone_shallow(bs_entry ?
4006                                            bs_entry->state.explicit_options :
4007                                            bs->explicit_options);
4008          bdrv_join_options(bs, options, old_options);
4009          qobject_unref(old_options);
4010      }
4011  
4012      explicit_options = qdict_clone_shallow(options);
4013  
4014      /* Inherit from parent node */
4015      if (parent_options) {
4016          flags = 0;
4017          klass->inherit_options(role, parent_is_format, &flags, options,
4018                                 parent_flags, parent_options);
4019      } else {
4020          flags = bdrv_get_flags(bs);
4021      }
4022  
4023      if (keep_old_opts) {
4024          /* Old values are used for options that aren't set yet */
4025          old_options = qdict_clone_shallow(bs->options);
4026          bdrv_join_options(bs, options, old_options);
4027          qobject_unref(old_options);
4028      }
4029  
4030      /* We have the final set of options so let's update the flags */
4031      options_copy = qdict_clone_shallow(options);
4032      opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
4033      qemu_opts_absorb_qdict(opts, options_copy, NULL);
4034      update_flags_from_options(&flags, opts);
4035      qemu_opts_del(opts);
4036      qobject_unref(options_copy);
4037  
4038      /* bdrv_open_inherit() sets and clears some additional flags internally */
4039      flags &= ~BDRV_O_PROTOCOL;
4040      if (flags & BDRV_O_RDWR) {
4041          flags |= BDRV_O_ALLOW_RDWR;
4042      }
4043  
4044      if (!bs_entry) {
4045          bs_entry = g_new0(BlockReopenQueueEntry, 1);
4046          QTAILQ_INSERT_TAIL(bs_queue, bs_entry, entry);
4047      } else {
4048          qobject_unref(bs_entry->state.options);
4049          qobject_unref(bs_entry->state.explicit_options);
4050      }
4051  
4052      bs_entry->state.bs = bs;
4053      bs_entry->state.options = options;
4054      bs_entry->state.explicit_options = explicit_options;
4055      bs_entry->state.flags = flags;
4056  
4057      /*
4058       * If keep_old_opts is false then it means that unspecified
4059       * options must be reset to their original value. We don't allow
4060       * resetting 'backing' but we need to know if the option is
4061       * missing in order to decide if we have to return an error.
4062       */
4063      if (!keep_old_opts) {
4064          bs_entry->state.backing_missing =
4065              !qdict_haskey(options, "backing") &&
4066              !qdict_haskey(options, "backing.driver");
4067      }
4068  
4069      QLIST_FOREACH(child, &bs->children, next) {
4070          QDict *new_child_options = NULL;
4071          bool child_keep_old = keep_old_opts;
4072  
4073          /* reopen can only change the options of block devices that were
4074           * implicitly created and inherited options. For other (referenced)
4075           * block devices, a syntax like "backing.foo" results in an error. */
4076          if (child->bs->inherits_from != bs) {
4077              continue;
4078          }
4079  
4080          /* Check if the options contain a child reference */
4081          if (qdict_haskey(options, child->name)) {
4082              const char *childref = qdict_get_try_str(options, child->name);
4083              /*
4084               * The current child must not be reopened if the child
4085               * reference is null or points to a different node.
4086               */
4087              if (g_strcmp0(childref, child->bs->node_name)) {
4088                  continue;
4089              }
4090              /*
4091               * If the child reference points to the current child then
4092               * reopen it with its existing set of options (note that
4093               * it can still inherit new options from the parent).
4094               */
4095              child_keep_old = true;
4096          } else {
4097              /* Extract child options ("child-name.*") */
4098              char *child_key_dot = g_strdup_printf("%s.", child->name);
4099              qdict_extract_subqdict(explicit_options, NULL, child_key_dot);
4100              qdict_extract_subqdict(options, &new_child_options, child_key_dot);
4101              g_free(child_key_dot);
4102          }
4103  
4104          bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options,
4105                                  child->klass, child->role, bs->drv->is_format,
4106                                  options, flags, child_keep_old);
4107      }
4108  
4109      return bs_queue;
4110  }
4111  
4112  BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
4113                                      BlockDriverState *bs,
4114                                      QDict *options, bool keep_old_opts)
4115  {
4116      return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false,
4117                                     NULL, 0, keep_old_opts);
4118  }
4119  
4120  void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
4121  {
4122      if (bs_queue) {
4123          BlockReopenQueueEntry *bs_entry, *next;
4124          QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
4125              qobject_unref(bs_entry->state.explicit_options);
4126              qobject_unref(bs_entry->state.options);
4127              g_free(bs_entry);
4128          }
4129          g_free(bs_queue);
4130      }
4131  }
4132  
4133  /*
4134   * Reopen multiple BlockDriverStates atomically & transactionally.
4135   *
4136   * The queue passed in (bs_queue) must have been built up previous
4137   * via bdrv_reopen_queue().
4138   *
4139   * Reopens all BDS specified in the queue, with the appropriate
4140   * flags.  All devices are prepared for reopen, and failure of any
4141   * device will cause all device changes to be abandoned, and intermediate
4142   * data cleaned up.
4143   *
4144   * If all devices prepare successfully, then the changes are committed
4145   * to all devices.
4146   *
4147   * All affected nodes must be drained between bdrv_reopen_queue() and
4148   * bdrv_reopen_multiple().
4149   *
4150   * To be called from the main thread, with all other AioContexts unlocked.
4151   */
4152  int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
4153  {
4154      int ret = -1;
4155      BlockReopenQueueEntry *bs_entry, *next;
4156      AioContext *ctx;
4157      Transaction *tran = tran_new();
4158      g_autoptr(GHashTable) found = NULL;
4159      g_autoptr(GSList) refresh_list = NULL;
4160  
4161      assert(qemu_get_current_aio_context() == qemu_get_aio_context());
4162      assert(bs_queue != NULL);
4163  
4164      QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
4165          ctx = bdrv_get_aio_context(bs_entry->state.bs);
4166          aio_context_acquire(ctx);
4167          ret = bdrv_flush(bs_entry->state.bs);
4168          aio_context_release(ctx);
4169          if (ret < 0) {
4170              error_setg_errno(errp, -ret, "Error flushing drive");
4171              goto abort;
4172          }
4173      }
4174  
4175      QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
4176          assert(bs_entry->state.bs->quiesce_counter > 0);
4177          ctx = bdrv_get_aio_context(bs_entry->state.bs);
4178          aio_context_acquire(ctx);
4179          ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp);
4180          aio_context_release(ctx);
4181          if (ret < 0) {
4182              goto abort;
4183          }
4184          bs_entry->prepared = true;
4185      }
4186  
4187      found = g_hash_table_new(NULL, NULL);
4188      QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
4189          BDRVReopenState *state = &bs_entry->state;
4190  
4191          refresh_list = bdrv_topological_dfs(refresh_list, found, state->bs);
4192          if (state->old_backing_bs) {
4193              refresh_list = bdrv_topological_dfs(refresh_list, found,
4194                                                  state->old_backing_bs);
4195          }
4196          if (state->old_file_bs) {
4197              refresh_list = bdrv_topological_dfs(refresh_list, found,
4198                                                  state->old_file_bs);
4199          }
4200      }
4201  
4202      /*
4203       * Note that file-posix driver rely on permission update done during reopen
4204       * (even if no permission changed), because it wants "new" permissions for
4205       * reconfiguring the fd and that's why it does it in raw_check_perm(), not
4206       * in raw_reopen_prepare() which is called with "old" permissions.
4207       */
4208      ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp);
4209      if (ret < 0) {
4210          goto abort;
4211      }
4212  
4213      /*
4214       * If we reach this point, we have success and just need to apply the
4215       * changes.
4216       *
4217       * Reverse order is used to comfort qcow2 driver: on commit it need to write
4218       * IN_USE flag to the image, to mark bitmaps in the image as invalid. But
4219       * children are usually goes after parents in reopen-queue, so go from last
4220       * to first element.
4221       */
4222      QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
4223          ctx = bdrv_get_aio_context(bs_entry->state.bs);
4224          aio_context_acquire(ctx);
4225          bdrv_reopen_commit(&bs_entry->state);
4226          aio_context_release(ctx);
4227      }
4228  
4229      tran_commit(tran);
4230  
4231      QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
4232          BlockDriverState *bs = bs_entry->state.bs;
4233  
4234          if (bs->drv->bdrv_reopen_commit_post) {
4235              ctx = bdrv_get_aio_context(bs);
4236              aio_context_acquire(ctx);
4237              bs->drv->bdrv_reopen_commit_post(&bs_entry->state);
4238              aio_context_release(ctx);
4239          }
4240      }
4241  
4242      ret = 0;
4243      goto cleanup;
4244  
4245  abort:
4246      tran_abort(tran);
4247      QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
4248          if (bs_entry->prepared) {
4249              ctx = bdrv_get_aio_context(bs_entry->state.bs);
4250              aio_context_acquire(ctx);
4251              bdrv_reopen_abort(&bs_entry->state);
4252              aio_context_release(ctx);
4253          }
4254      }
4255  
4256  cleanup:
4257      bdrv_reopen_queue_free(bs_queue);
4258  
4259      return ret;
4260  }
4261  
4262  int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
4263                  Error **errp)
4264  {
4265      AioContext *ctx = bdrv_get_aio_context(bs);
4266      BlockReopenQueue *queue;
4267      int ret;
4268  
4269      bdrv_subtree_drained_begin(bs);
4270      if (ctx != qemu_get_aio_context()) {
4271          aio_context_release(ctx);
4272      }
4273  
4274      queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
4275      ret = bdrv_reopen_multiple(queue, errp);
4276  
4277      if (ctx != qemu_get_aio_context()) {
4278          aio_context_acquire(ctx);
4279      }
4280      bdrv_subtree_drained_end(bs);
4281  
4282      return ret;
4283  }
4284  
4285  int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
4286                                Error **errp)
4287  {
4288      QDict *opts = qdict_new();
4289  
4290      qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
4291  
4292      return bdrv_reopen(bs, opts, true, errp);
4293  }
4294  
4295  /*
4296   * Take a BDRVReopenState and check if the value of 'backing' in the
4297   * reopen_state->options QDict is valid or not.
4298   *
4299   * If 'backing' is missing from the QDict then return 0.
4300   *
4301   * If 'backing' contains the node name of the backing file of
4302   * reopen_state->bs then return 0.
4303   *
4304   * If 'backing' contains a different node name (or is null) then check
4305   * whether the current backing file can be replaced with the new one.
4306   * If that's the case then reopen_state->replace_backing_bs is set to
4307   * true and reopen_state->new_backing_bs contains a pointer to the new
4308   * backing BlockDriverState (or NULL).
4309   *
4310   * Return 0 on success, otherwise return < 0 and set @errp.
4311   */
4312  static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
4313                                               bool is_backing, Transaction *tran,
4314                                               Error **errp)
4315  {
4316      BlockDriverState *bs = reopen_state->bs;
4317      BlockDriverState *new_child_bs;
4318      BlockDriverState *old_child_bs = is_backing ? child_bs(bs->backing) :
4319                                                    child_bs(bs->file);
4320      const char *child_name = is_backing ? "backing" : "file";
4321      QObject *value;
4322      const char *str;
4323  
4324      value = qdict_get(reopen_state->options, child_name);
4325      if (value == NULL) {
4326          return 0;
4327      }
4328  
4329      switch (qobject_type(value)) {
4330      case QTYPE_QNULL:
4331          assert(is_backing); /* The 'file' option does not allow a null value */
4332          new_child_bs = NULL;
4333          break;
4334      case QTYPE_QSTRING:
4335          str = qstring_get_str(qobject_to(QString, value));
4336          new_child_bs = bdrv_lookup_bs(NULL, str, errp);
4337          if (new_child_bs == NULL) {
4338              return -EINVAL;
4339          } else if (bdrv_recurse_has_child(new_child_bs, bs)) {
4340              error_setg(errp, "Making '%s' a %s child of '%s' would create a "
4341                         "cycle", str, child_name, bs->node_name);
4342              return -EINVAL;
4343          }
4344          break;
4345      default:
4346          /*
4347           * The options QDict has been flattened, so 'backing' and 'file'
4348           * do not allow any other data type here.
4349           */
4350          g_assert_not_reached();
4351      }
4352  
4353      if (old_child_bs == new_child_bs) {
4354          return 0;
4355      }
4356  
4357      if (old_child_bs) {
4358          if (bdrv_skip_implicit_filters(old_child_bs) == new_child_bs) {
4359              return 0;
4360          }
4361  
4362          if (old_child_bs->implicit) {
4363              error_setg(errp, "Cannot replace implicit %s child of %s",
4364                         child_name, bs->node_name);
4365              return -EPERM;
4366          }
4367      }
4368  
4369      if (bs->drv->is_filter && !old_child_bs) {
4370          /*
4371           * Filters always have a file or a backing child, so we are trying to
4372           * change wrong child
4373           */
4374          error_setg(errp, "'%s' is a %s filter node that does not support a "
4375                     "%s child", bs->node_name, bs->drv->format_name, child_name);
4376          return -EINVAL;
4377      }
4378  
4379      if (is_backing) {
4380          reopen_state->old_backing_bs = old_child_bs;
4381      } else {
4382          reopen_state->old_file_bs = old_child_bs;
4383      }
4384  
4385      return bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing,
4386                                             tran, errp);
4387  }
4388  
4389  /*
4390   * Prepares a BlockDriverState for reopen. All changes are staged in the
4391   * 'opaque' field of the BDRVReopenState, which is used and allocated by
4392   * the block driver layer .bdrv_reopen_prepare()
4393   *
4394   * bs is the BlockDriverState to reopen
4395   * flags are the new open flags
4396   * queue is the reopen queue
4397   *
4398   * Returns 0 on success, non-zero on error.  On error errp will be set
4399   * as well.
4400   *
4401   * On failure, bdrv_reopen_abort() will be called to clean up any data.
4402   * It is the responsibility of the caller to then call the abort() or
4403   * commit() for any other BDS that have been left in a prepare() state
4404   *
4405   */
4406  static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
4407                                 BlockReopenQueue *queue,
4408                                 Transaction *change_child_tran, Error **errp)
4409  {
4410      int ret = -1;
4411      int old_flags;
4412      Error *local_err = NULL;
4413      BlockDriver *drv;
4414      QemuOpts *opts;
4415      QDict *orig_reopen_opts;
4416      char *discard = NULL;
4417      bool read_only;
4418      bool drv_prepared = false;
4419  
4420      assert(reopen_state != NULL);
4421      assert(reopen_state->bs->drv != NULL);
4422      drv = reopen_state->bs->drv;
4423  
4424      /* This function and each driver's bdrv_reopen_prepare() remove
4425       * entries from reopen_state->options as they are processed, so
4426       * we need to make a copy of the original QDict. */
4427      orig_reopen_opts = qdict_clone_shallow(reopen_state->options);
4428  
4429      /* Process generic block layer options */
4430      opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
4431      if (!qemu_opts_absorb_qdict(opts, reopen_state->options, errp)) {
4432          ret = -EINVAL;
4433          goto error;
4434      }
4435  
4436      /* This was already called in bdrv_reopen_queue_child() so the flags
4437       * are up-to-date. This time we simply want to remove the options from
4438       * QemuOpts in order to indicate that they have been processed. */
4439      old_flags = reopen_state->flags;
4440      update_flags_from_options(&reopen_state->flags, opts);
4441      assert(old_flags == reopen_state->flags);
4442  
4443      discard = qemu_opt_get_del(opts, BDRV_OPT_DISCARD);
4444      if (discard != NULL) {
4445          if (bdrv_parse_discard_flags(discard, &reopen_state->flags) != 0) {
4446              error_setg(errp, "Invalid discard option");
4447              ret = -EINVAL;
4448              goto error;
4449          }
4450      }
4451  
4452      reopen_state->detect_zeroes =
4453          bdrv_parse_detect_zeroes(opts, reopen_state->flags, &local_err);
4454      if (local_err) {
4455          error_propagate(errp, local_err);
4456          ret = -EINVAL;
4457          goto error;
4458      }
4459  
4460      /* All other options (including node-name and driver) must be unchanged.
4461       * Put them back into the QDict, so that they are checked at the end
4462       * of this function. */
4463      qemu_opts_to_qdict(opts, reopen_state->options);
4464  
4465      /* If we are to stay read-only, do not allow permission change
4466       * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is
4467       * not set, or if the BDS still has copy_on_read enabled */
4468      read_only = !(reopen_state->flags & BDRV_O_RDWR);
4469      ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err);
4470      if (local_err) {
4471          error_propagate(errp, local_err);
4472          goto error;
4473      }
4474  
4475      if (drv->bdrv_reopen_prepare) {
4476          /*
4477           * If a driver-specific option is missing, it means that we
4478           * should reset it to its default value.
4479           * But not all options allow that, so we need to check it first.
4480           */
4481          ret = bdrv_reset_options_allowed(reopen_state->bs,
4482                                           reopen_state->options, errp);
4483          if (ret) {
4484              goto error;
4485          }
4486  
4487          ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
4488          if (ret) {
4489              if (local_err != NULL) {
4490                  error_propagate(errp, local_err);
4491              } else {
4492                  bdrv_refresh_filename(reopen_state->bs);
4493                  error_setg(errp, "failed while preparing to reopen image '%s'",
4494                             reopen_state->bs->filename);
4495              }
4496              goto error;
4497          }
4498      } else {
4499          /* It is currently mandatory to have a bdrv_reopen_prepare()
4500           * handler for each supported drv. */
4501          error_setg(errp, "Block format '%s' used by node '%s' "
4502                     "does not support reopening files", drv->format_name,
4503                     bdrv_get_device_or_node_name(reopen_state->bs));
4504          ret = -1;
4505          goto error;
4506      }
4507  
4508      drv_prepared = true;
4509  
4510      /*
4511       * We must provide the 'backing' option if the BDS has a backing
4512       * file or if the image file has a backing file name as part of
4513       * its metadata. Otherwise the 'backing' option can be omitted.
4514       */
4515      if (drv->supports_backing && reopen_state->backing_missing &&
4516          (reopen_state->bs->backing || reopen_state->bs->backing_file[0])) {
4517          error_setg(errp, "backing is missing for '%s'",
4518                     reopen_state->bs->node_name);
4519          ret = -EINVAL;
4520          goto error;
4521      }
4522  
4523      /*
4524       * Allow changing the 'backing' option. The new value can be
4525       * either a reference to an existing node (using its node name)
4526       * or NULL to simply detach the current backing file.
4527       */
4528      ret = bdrv_reopen_parse_file_or_backing(reopen_state, true,
4529                                              change_child_tran, errp);
4530      if (ret < 0) {
4531          goto error;
4532      }
4533      qdict_del(reopen_state->options, "backing");
4534  
4535      /* Allow changing the 'file' option. In this case NULL is not allowed */
4536      ret = bdrv_reopen_parse_file_or_backing(reopen_state, false,
4537                                              change_child_tran, errp);
4538      if (ret < 0) {
4539          goto error;
4540      }
4541      qdict_del(reopen_state->options, "file");
4542  
4543      /* Options that are not handled are only okay if they are unchanged
4544       * compared to the old state. It is expected that some options are only
4545       * used for the initial open, but not reopen (e.g. filename) */
4546      if (qdict_size(reopen_state->options)) {
4547          const QDictEntry *entry = qdict_first(reopen_state->options);
4548  
4549          do {
4550              QObject *new = entry->value;
4551              QObject *old = qdict_get(reopen_state->bs->options, entry->key);
4552  
4553              /* Allow child references (child_name=node_name) as long as they
4554               * point to the current child (i.e. everything stays the same). */
4555              if (qobject_type(new) == QTYPE_QSTRING) {
4556                  BdrvChild *child;
4557                  QLIST_FOREACH(child, &reopen_state->bs->children, next) {
4558                      if (!strcmp(child->name, entry->key)) {
4559                          break;
4560                      }
4561                  }
4562  
4563                  if (child) {
4564                      if (!strcmp(child->bs->node_name,
4565                                  qstring_get_str(qobject_to(QString, new)))) {
4566                          continue; /* Found child with this name, skip option */
4567                      }
4568                  }
4569              }
4570  
4571              /*
4572               * TODO: When using -drive to specify blockdev options, all values
4573               * will be strings; however, when using -blockdev, blockdev-add or
4574               * filenames using the json:{} pseudo-protocol, they will be
4575               * correctly typed.
4576               * In contrast, reopening options are (currently) always strings
4577               * (because you can only specify them through qemu-io; all other
4578               * callers do not specify any options).
4579               * Therefore, when using anything other than -drive to create a BDS,
4580               * this cannot detect non-string options as unchanged, because
4581               * qobject_is_equal() always returns false for objects of different
4582               * type.  In the future, this should be remedied by correctly typing
4583               * all options.  For now, this is not too big of an issue because
4584               * the user can simply omit options which cannot be changed anyway,
4585               * so they will stay unchanged.
4586               */
4587              if (!qobject_is_equal(new, old)) {
4588                  error_setg(errp, "Cannot change the option '%s'", entry->key);
4589                  ret = -EINVAL;
4590                  goto error;
4591              }
4592          } while ((entry = qdict_next(reopen_state->options, entry)));
4593      }
4594  
4595      ret = 0;
4596  
4597      /* Restore the original reopen_state->options QDict */
4598      qobject_unref(reopen_state->options);
4599      reopen_state->options = qobject_ref(orig_reopen_opts);
4600  
4601  error:
4602      if (ret < 0 && drv_prepared) {
4603          /* drv->bdrv_reopen_prepare() has succeeded, so we need to
4604           * call drv->bdrv_reopen_abort() before signaling an error
4605           * (bdrv_reopen_multiple() will not call bdrv_reopen_abort()
4606           * when the respective bdrv_reopen_prepare() has failed) */
4607          if (drv->bdrv_reopen_abort) {
4608              drv->bdrv_reopen_abort(reopen_state);
4609          }
4610      }
4611      qemu_opts_del(opts);
4612      qobject_unref(orig_reopen_opts);
4613      g_free(discard);
4614      return ret;
4615  }
4616  
4617  /*
4618   * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
4619   * makes them final by swapping the staging BlockDriverState contents into
4620   * the active BlockDriverState contents.
4621   */
4622  static void bdrv_reopen_commit(BDRVReopenState *reopen_state)
4623  {
4624      BlockDriver *drv;
4625      BlockDriverState *bs;
4626      BdrvChild *child;
4627  
4628      assert(reopen_state != NULL);
4629      bs = reopen_state->bs;
4630      drv = bs->drv;
4631      assert(drv != NULL);
4632  
4633      /* If there are any driver level actions to take */
4634      if (drv->bdrv_reopen_commit) {
4635          drv->bdrv_reopen_commit(reopen_state);
4636      }
4637  
4638      /* set BDS specific flags now */
4639      qobject_unref(bs->explicit_options);
4640      qobject_unref(bs->options);
4641      qobject_ref(reopen_state->explicit_options);
4642      qobject_ref(reopen_state->options);
4643  
4644      bs->explicit_options   = reopen_state->explicit_options;
4645      bs->options            = reopen_state->options;
4646      bs->open_flags         = reopen_state->flags;
4647      bs->detect_zeroes      = reopen_state->detect_zeroes;
4648  
4649      /* Remove child references from bs->options and bs->explicit_options.
4650       * Child options were already removed in bdrv_reopen_queue_child() */
4651      QLIST_FOREACH(child, &bs->children, next) {
4652          qdict_del(bs->explicit_options, child->name);
4653          qdict_del(bs->options, child->name);
4654      }
4655      /* backing is probably removed, so it's not handled by previous loop */
4656      qdict_del(bs->explicit_options, "backing");
4657      qdict_del(bs->options, "backing");
4658  
4659      bdrv_refresh_limits(bs, NULL, NULL);
4660  }
4661  
4662  /*
4663   * Abort the reopen, and delete and free the staged changes in
4664   * reopen_state
4665   */
4666  static void bdrv_reopen_abort(BDRVReopenState *reopen_state)
4667  {
4668      BlockDriver *drv;
4669  
4670      assert(reopen_state != NULL);
4671      drv = reopen_state->bs->drv;
4672      assert(drv != NULL);
4673  
4674      if (drv->bdrv_reopen_abort) {
4675          drv->bdrv_reopen_abort(reopen_state);
4676      }
4677  }
4678  
4679  
4680  static void bdrv_close(BlockDriverState *bs)
4681  {
4682      BdrvAioNotifier *ban, *ban_next;
4683      BdrvChild *child, *next;
4684  
4685      assert(!bs->refcnt);
4686  
4687      bdrv_drained_begin(bs); /* complete I/O */
4688      bdrv_flush(bs);
4689      bdrv_drain(bs); /* in case flush left pending I/O */
4690  
4691      if (bs->drv) {
4692          if (bs->drv->bdrv_close) {
4693              /* Must unfreeze all children, so bdrv_unref_child() works */
4694              bs->drv->bdrv_close(bs);
4695          }
4696          bs->drv = NULL;
4697      }
4698  
4699      QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
4700          bdrv_unref_child(bs, child);
4701      }
4702  
4703      bs->backing = NULL;
4704      bs->file = NULL;
4705      g_free(bs->opaque);
4706      bs->opaque = NULL;
4707      qatomic_set(&bs->copy_on_read, 0);
4708      bs->backing_file[0] = '\0';
4709      bs->backing_format[0] = '\0';
4710      bs->total_sectors = 0;
4711      bs->encrypted = false;
4712      bs->sg = false;
4713      qobject_unref(bs->options);
4714      qobject_unref(bs->explicit_options);
4715      bs->options = NULL;
4716      bs->explicit_options = NULL;
4717      qobject_unref(bs->full_open_options);
4718      bs->full_open_options = NULL;
4719      g_free(bs->block_status_cache);
4720      bs->block_status_cache = NULL;
4721  
4722      bdrv_release_named_dirty_bitmaps(bs);
4723      assert(QLIST_EMPTY(&bs->dirty_bitmaps));
4724  
4725      QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
4726          g_free(ban);
4727      }
4728      QLIST_INIT(&bs->aio_notifiers);
4729      bdrv_drained_end(bs);
4730  
4731      /*
4732       * If we're still inside some bdrv_drain_all_begin()/end() sections, end
4733       * them now since this BDS won't exist anymore when bdrv_drain_all_end()
4734       * gets called.
4735       */
4736      if (bs->quiesce_counter) {
4737          bdrv_drain_all_end_quiesce(bs);
4738      }
4739  }
4740  
4741  void bdrv_close_all(void)
4742  {
4743      assert(job_next(NULL) == NULL);
4744  
4745      /* Drop references from requests still in flight, such as canceled block
4746       * jobs whose AIO context has not been polled yet */
4747      bdrv_drain_all();
4748  
4749      blk_remove_all_bs();
4750      blockdev_close_all_bdrv_states();
4751  
4752      assert(QTAILQ_EMPTY(&all_bdrv_states));
4753  }
4754  
4755  static bool should_update_child(BdrvChild *c, BlockDriverState *to)
4756  {
4757      GQueue *queue;
4758      GHashTable *found;
4759      bool ret;
4760  
4761      if (c->klass->stay_at_node) {
4762          return false;
4763      }
4764  
4765      /* If the child @c belongs to the BDS @to, replacing the current
4766       * c->bs by @to would mean to create a loop.
4767       *
4768       * Such a case occurs when appending a BDS to a backing chain.
4769       * For instance, imagine the following chain:
4770       *
4771       *   guest device -> node A -> further backing chain...
4772       *
4773       * Now we create a new BDS B which we want to put on top of this
4774       * chain, so we first attach A as its backing node:
4775       *
4776       *                   node B
4777       *                     |
4778       *                     v
4779       *   guest device -> node A -> further backing chain...
4780       *
4781       * Finally we want to replace A by B.  When doing that, we want to
4782       * replace all pointers to A by pointers to B -- except for the
4783       * pointer from B because (1) that would create a loop, and (2)
4784       * that pointer should simply stay intact:
4785       *
4786       *   guest device -> node B
4787       *                     |
4788       *                     v
4789       *                   node A -> further backing chain...
4790       *
4791       * In general, when replacing a node A (c->bs) by a node B (@to),
4792       * if A is a child of B, that means we cannot replace A by B there
4793       * because that would create a loop.  Silently detaching A from B
4794       * is also not really an option.  So overall just leaving A in
4795       * place there is the most sensible choice.
4796       *
4797       * We would also create a loop in any cases where @c is only
4798       * indirectly referenced by @to. Prevent this by returning false
4799       * if @c is found (by breadth-first search) anywhere in the whole
4800       * subtree of @to.
4801       */
4802  
4803      ret = true;
4804      found = g_hash_table_new(NULL, NULL);
4805      g_hash_table_add(found, to);
4806      queue = g_queue_new();
4807      g_queue_push_tail(queue, to);
4808  
4809      while (!g_queue_is_empty(queue)) {
4810          BlockDriverState *v = g_queue_pop_head(queue);
4811          BdrvChild *c2;
4812  
4813          QLIST_FOREACH(c2, &v->children, next) {
4814              if (c2 == c) {
4815                  ret = false;
4816                  break;
4817              }
4818  
4819              if (g_hash_table_contains(found, c2->bs)) {
4820                  continue;
4821              }
4822  
4823              g_queue_push_tail(queue, c2->bs);
4824              g_hash_table_add(found, c2->bs);
4825          }
4826      }
4827  
4828      g_queue_free(queue);
4829      g_hash_table_destroy(found);
4830  
4831      return ret;
4832  }
4833  
4834  typedef struct BdrvRemoveFilterOrCowChild {
4835      BdrvChild *child;
4836      bool is_backing;
4837  } BdrvRemoveFilterOrCowChild;
4838  
4839  static void bdrv_remove_filter_or_cow_child_abort(void *opaque)
4840  {
4841      BdrvRemoveFilterOrCowChild *s = opaque;
4842      BlockDriverState *parent_bs = s->child->opaque;
4843  
4844      QLIST_INSERT_HEAD(&parent_bs->children, s->child, next);
4845      if (s->is_backing) {
4846          parent_bs->backing = s->child;
4847      } else {
4848          parent_bs->file = s->child;
4849      }
4850  
4851      /*
4852       * We don't have to restore child->bs here to undo bdrv_replace_child_tran()
4853       * because that function is transactionable and it registered own completion
4854       * entries in @tran, so .abort() for bdrv_replace_child_safe() will be
4855       * called automatically.
4856       */
4857  }
4858  
4859  static void bdrv_remove_filter_or_cow_child_commit(void *opaque)
4860  {
4861      BdrvRemoveFilterOrCowChild *s = opaque;
4862  
4863      bdrv_child_free(s->child);
4864  }
4865  
4866  static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = {
4867      .abort = bdrv_remove_filter_or_cow_child_abort,
4868      .commit = bdrv_remove_filter_or_cow_child_commit,
4869      .clean = g_free,
4870  };
4871  
4872  /*
4873   * A function to remove backing or file child of @bs.
4874   * Function doesn't update permissions, caller is responsible for this.
4875   */
4876  static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
4877                                                BdrvChild *child,
4878                                                Transaction *tran)
4879  {
4880      BdrvRemoveFilterOrCowChild *s;
4881  
4882      assert(child == bs->backing || child == bs->file);
4883  
4884      if (!child) {
4885          return;
4886      }
4887  
4888      if (child->bs) {
4889          bdrv_replace_child_tran(child, NULL, tran);
4890      }
4891  
4892      s = g_new(BdrvRemoveFilterOrCowChild, 1);
4893      *s = (BdrvRemoveFilterOrCowChild) {
4894          .child = child,
4895          .is_backing = (child == bs->backing),
4896      };
4897      tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s);
4898  
4899      QLIST_SAFE_REMOVE(child, next);
4900      if (s->is_backing) {
4901          bs->backing = NULL;
4902      } else {
4903          bs->file = NULL;
4904      }
4905  }
4906  
4907  /*
4908   * A function to remove backing-chain child of @bs if exists: cow child for
4909   * format nodes (always .backing) and filter child for filters (may be .file or
4910   * .backing)
4911   */
4912  static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
4913                                              Transaction *tran)
4914  {
4915      bdrv_remove_file_or_backing_child(bs, bdrv_filter_or_cow_child(bs), tran);
4916  }
4917  
4918  static int bdrv_replace_node_noperm(BlockDriverState *from,
4919                                      BlockDriverState *to,
4920                                      bool auto_skip, Transaction *tran,
4921                                      Error **errp)
4922  {
4923      BdrvChild *c, *next;
4924  
4925      QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
4926          assert(c->bs == from);
4927          if (!should_update_child(c, to)) {
4928              if (auto_skip) {
4929                  continue;
4930              }
4931              error_setg(errp, "Should not change '%s' link to '%s'",
4932                         c->name, from->node_name);
4933              return -EINVAL;
4934          }
4935          if (c->frozen) {
4936              error_setg(errp, "Cannot change '%s' link to '%s'",
4937                         c->name, from->node_name);
4938              return -EPERM;
4939          }
4940          bdrv_replace_child_tran(c, to, tran);
4941      }
4942  
4943      return 0;
4944  }
4945  
4946  /*
4947   * With auto_skip=true bdrv_replace_node_common skips updating from parents
4948   * if it creates a parent-child relation loop or if parent is block-job.
4949   *
4950   * With auto_skip=false the error is returned if from has a parent which should
4951   * not be updated.
4952   *
4953   * With @detach_subchain=true @to must be in a backing chain of @from. In this
4954   * case backing link of the cow-parent of @to is removed.
4955   */
4956  static int bdrv_replace_node_common(BlockDriverState *from,
4957                                      BlockDriverState *to,
4958                                      bool auto_skip, bool detach_subchain,
4959                                      Error **errp)
4960  {
4961      Transaction *tran = tran_new();
4962      g_autoptr(GHashTable) found = NULL;
4963      g_autoptr(GSList) refresh_list = NULL;
4964      BlockDriverState *to_cow_parent = NULL;
4965      int ret;
4966  
4967      if (detach_subchain) {
4968          assert(bdrv_chain_contains(from, to));
4969          assert(from != to);
4970          for (to_cow_parent = from;
4971               bdrv_filter_or_cow_bs(to_cow_parent) != to;
4972               to_cow_parent = bdrv_filter_or_cow_bs(to_cow_parent))
4973          {
4974              ;
4975          }
4976      }
4977  
4978      /* Make sure that @from doesn't go away until we have successfully attached
4979       * all of its parents to @to. */
4980      bdrv_ref(from);
4981  
4982      assert(qemu_get_current_aio_context() == qemu_get_aio_context());
4983      assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
4984      bdrv_drained_begin(from);
4985  
4986      /*
4987       * Do the replacement without permission update.
4988       * Replacement may influence the permissions, we should calculate new
4989       * permissions based on new graph. If we fail, we'll roll-back the
4990       * replacement.
4991       */
4992      ret = bdrv_replace_node_noperm(from, to, auto_skip, tran, errp);
4993      if (ret < 0) {
4994          goto out;
4995      }
4996  
4997      if (detach_subchain) {
4998          bdrv_remove_filter_or_cow_child(to_cow_parent, tran);
4999      }
5000  
5001      found = g_hash_table_new(NULL, NULL);
5002  
5003      refresh_list = bdrv_topological_dfs(refresh_list, found, to);
5004      refresh_list = bdrv_topological_dfs(refresh_list, found, from);
5005  
5006      ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
5007      if (ret < 0) {
5008          goto out;
5009      }
5010  
5011      ret = 0;
5012  
5013  out:
5014      tran_finalize(tran, ret);
5015  
5016      bdrv_drained_end(from);
5017      bdrv_unref(from);
5018  
5019      return ret;
5020  }
5021  
5022  int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
5023                        Error **errp)
5024  {
5025      return bdrv_replace_node_common(from, to, true, false, errp);
5026  }
5027  
5028  int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
5029  {
5030      return bdrv_replace_node_common(bs, bdrv_filter_or_cow_bs(bs), true, true,
5031                                      errp);
5032  }
5033  
5034  /*
5035   * Add new bs contents at the top of an image chain while the chain is
5036   * live, while keeping required fields on the top layer.
5037   *
5038   * This will modify the BlockDriverState fields, and swap contents
5039   * between bs_new and bs_top. Both bs_new and bs_top are modified.
5040   *
5041   * bs_new must not be attached to a BlockBackend and must not have backing
5042   * child.
5043   *
5044   * This function does not create any image files.
5045   */
5046  int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
5047                  Error **errp)
5048  {
5049      int ret;
5050      Transaction *tran = tran_new();
5051  
5052      assert(!bs_new->backing);
5053  
5054      ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
5055                                     &child_of_bds, bdrv_backing_role(bs_new),
5056                                     &bs_new->backing, tran, errp);
5057      if (ret < 0) {
5058          goto out;
5059      }
5060  
5061      ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
5062      if (ret < 0) {
5063          goto out;
5064      }
5065  
5066      ret = bdrv_refresh_perms(bs_new, errp);
5067  out:
5068      tran_finalize(tran, ret);
5069  
5070      bdrv_refresh_limits(bs_top, NULL, NULL);
5071  
5072      return ret;
5073  }
5074  
5075  /* Not for empty child */
5076  int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
5077                            Error **errp)
5078  {
5079      int ret;
5080      Transaction *tran = tran_new();
5081      g_autoptr(GHashTable) found = NULL;
5082      g_autoptr(GSList) refresh_list = NULL;
5083      BlockDriverState *old_bs = child->bs;
5084  
5085      bdrv_ref(old_bs);
5086      bdrv_drained_begin(old_bs);
5087      bdrv_drained_begin(new_bs);
5088  
5089      bdrv_replace_child_tran(child, new_bs, tran);
5090  
5091      found = g_hash_table_new(NULL, NULL);
5092      refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs);
5093      refresh_list = bdrv_topological_dfs(refresh_list, found, new_bs);
5094  
5095      ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
5096  
5097      tran_finalize(tran, ret);
5098  
5099      bdrv_drained_end(old_bs);
5100      bdrv_drained_end(new_bs);
5101      bdrv_unref(old_bs);
5102  
5103      return ret;
5104  }
5105  
5106  static void bdrv_delete(BlockDriverState *bs)
5107  {
5108      assert(bdrv_op_blocker_is_empty(bs));
5109      assert(!bs->refcnt);
5110  
5111      /* remove from list, if necessary */
5112      if (bs->node_name[0] != '\0') {
5113          QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
5114      }
5115      QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
5116  
5117      bdrv_close(bs);
5118  
5119      g_free(bs);
5120  }
5121  
5122  
5123  /*
5124   * Replace @bs by newly created block node.
5125   *
5126   * @options is a QDict of options to pass to the block drivers, or NULL for an
5127   * empty set of options. The reference to the QDict belongs to the block layer
5128   * after the call (even on failure), so if the caller intends to reuse the
5129   * dictionary, it needs to use qobject_ref() before calling bdrv_open.
5130   */
5131  BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
5132                                     int flags, Error **errp)
5133  {
5134      ERRP_GUARD();
5135      int ret;
5136      BlockDriverState *new_node_bs = NULL;
5137      const char *drvname, *node_name;
5138      BlockDriver *drv;
5139  
5140      drvname = qdict_get_try_str(options, "driver");
5141      if (!drvname) {
5142          error_setg(errp, "driver is not specified");
5143          goto fail;
5144      }
5145  
5146      drv = bdrv_find_format(drvname);
5147      if (!drv) {
5148          error_setg(errp, "Unknown driver: '%s'", drvname);
5149          goto fail;
5150      }
5151  
5152      node_name = qdict_get_try_str(options, "node-name");
5153  
5154      new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags,
5155                                              errp);
5156      options = NULL; /* bdrv_new_open_driver() eats options */
5157      if (!new_node_bs) {
5158          error_prepend(errp, "Could not create node: ");
5159          goto fail;
5160      }
5161  
5162      bdrv_drained_begin(bs);
5163      ret = bdrv_replace_node(bs, new_node_bs, errp);
5164      bdrv_drained_end(bs);
5165  
5166      if (ret < 0) {
5167          error_prepend(errp, "Could not replace node: ");
5168          goto fail;
5169      }
5170  
5171      return new_node_bs;
5172  
5173  fail:
5174      qobject_unref(options);
5175      bdrv_unref(new_node_bs);
5176      return NULL;
5177  }
5178  
5179  /*
5180   * Run consistency checks on an image
5181   *
5182   * Returns 0 if the check could be completed (it doesn't mean that the image is
5183   * free of errors) or -errno when an internal error occurred. The results of the
5184   * check are stored in res.
5185   */
5186  int coroutine_fn bdrv_co_check(BlockDriverState *bs,
5187                                 BdrvCheckResult *res, BdrvCheckMode fix)
5188  {
5189      if (bs->drv == NULL) {
5190          return -ENOMEDIUM;
5191      }
5192      if (bs->drv->bdrv_co_check == NULL) {
5193          return -ENOTSUP;
5194      }
5195  
5196      memset(res, 0, sizeof(*res));
5197      return bs->drv->bdrv_co_check(bs, res, fix);
5198  }
5199  
5200  /*
5201   * Return values:
5202   * 0        - success
5203   * -EINVAL  - backing format specified, but no file
5204   * -ENOSPC  - can't update the backing file because no space is left in the
5205   *            image file header
5206   * -ENOTSUP - format driver doesn't support changing the backing file
5207   */
5208  int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
5209                               const char *backing_fmt, bool require)
5210  {
5211      BlockDriver *drv = bs->drv;
5212      int ret;
5213  
5214      if (!drv) {
5215          return -ENOMEDIUM;
5216      }
5217  
5218      /* Backing file format doesn't make sense without a backing file */
5219      if (backing_fmt && !backing_file) {
5220          return -EINVAL;
5221      }
5222  
5223      if (require && backing_file && !backing_fmt) {
5224          return -EINVAL;
5225      }
5226  
5227      if (drv->bdrv_change_backing_file != NULL) {
5228          ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
5229      } else {
5230          ret = -ENOTSUP;
5231      }
5232  
5233      if (ret == 0) {
5234          pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
5235          pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
5236          pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
5237                  backing_file ?: "");
5238      }
5239      return ret;
5240  }
5241  
5242  /*
5243   * Finds the first non-filter node above bs in the chain between
5244   * active and bs.  The returned node is either an immediate parent of
5245   * bs, or there are only filter nodes between the two.
5246   *
5247   * Returns NULL if bs is not found in active's image chain,
5248   * or if active == bs.
5249   *
5250   * Returns the bottommost base image if bs == NULL.
5251   */
5252  BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
5253                                      BlockDriverState *bs)
5254  {
5255      bs = bdrv_skip_filters(bs);
5256      active = bdrv_skip_filters(active);
5257  
5258      while (active) {
5259          BlockDriverState *next = bdrv_backing_chain_next(active);
5260          if (bs == next) {
5261              return active;
5262          }
5263          active = next;
5264      }
5265  
5266      return NULL;
5267  }
5268  
5269  /* Given a BDS, searches for the base layer. */
5270  BlockDriverState *bdrv_find_base(BlockDriverState *bs)
5271  {
5272      return bdrv_find_overlay(bs, NULL);
5273  }
5274  
5275  /*
5276   * Return true if at least one of the COW (backing) and filter links
5277   * between @bs and @base is frozen. @errp is set if that's the case.
5278   * @base must be reachable from @bs, or NULL.
5279   */
5280  bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
5281                                    Error **errp)
5282  {
5283      BlockDriverState *i;
5284      BdrvChild *child;
5285  
5286      for (i = bs; i != base; i = child_bs(child)) {
5287          child = bdrv_filter_or_cow_child(i);
5288  
5289          if (child && child->frozen) {
5290              error_setg(errp, "Cannot change '%s' link from '%s' to '%s'",
5291                         child->name, i->node_name, child->bs->node_name);
5292              return true;
5293          }
5294      }
5295  
5296      return false;
5297  }
5298  
5299  /*
5300   * Freeze all COW (backing) and filter links between @bs and @base.
5301   * If any of the links is already frozen the operation is aborted and
5302   * none of the links are modified.
5303   * @base must be reachable from @bs, or NULL.
5304   * Returns 0 on success. On failure returns < 0 and sets @errp.
5305   */
5306  int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
5307                                Error **errp)
5308  {
5309      BlockDriverState *i;
5310      BdrvChild *child;
5311  
5312      if (bdrv_is_backing_chain_frozen(bs, base, errp)) {
5313          return -EPERM;
5314      }
5315  
5316      for (i = bs; i != base; i = child_bs(child)) {
5317          child = bdrv_filter_or_cow_child(i);
5318          if (child && child->bs->never_freeze) {
5319              error_setg(errp, "Cannot freeze '%s' link to '%s'",
5320                         child->name, child->bs->node_name);
5321              return -EPERM;
5322          }
5323      }
5324  
5325      for (i = bs; i != base; i = child_bs(child)) {
5326          child = bdrv_filter_or_cow_child(i);
5327          if (child) {
5328              child->frozen = true;
5329          }
5330      }
5331  
5332      return 0;
5333  }
5334  
5335  /*
5336   * Unfreeze all COW (backing) and filter links between @bs and @base.
5337   * The caller must ensure that all links are frozen before using this
5338   * function.
5339   * @base must be reachable from @bs, or NULL.
5340   */
5341  void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base)
5342  {
5343      BlockDriverState *i;
5344      BdrvChild *child;
5345  
5346      for (i = bs; i != base; i = child_bs(child)) {
5347          child = bdrv_filter_or_cow_child(i);
5348          if (child) {
5349              assert(child->frozen);
5350              child->frozen = false;
5351          }
5352      }
5353  }
5354  
5355  /*
5356   * Drops images above 'base' up to and including 'top', and sets the image
5357   * above 'top' to have base as its backing file.
5358   *
5359   * Requires that the overlay to 'top' is opened r/w, so that the backing file
5360   * information in 'bs' can be properly updated.
5361   *
5362   * E.g., this will convert the following chain:
5363   * bottom <- base <- intermediate <- top <- active
5364   *
5365   * to
5366   *
5367   * bottom <- base <- active
5368   *
5369   * It is allowed for bottom==base, in which case it converts:
5370   *
5371   * base <- intermediate <- top <- active
5372   *
5373   * to
5374   *
5375   * base <- active
5376   *
5377   * If backing_file_str is non-NULL, it will be used when modifying top's
5378   * overlay image metadata.
5379   *
5380   * Error conditions:
5381   *  if active == top, that is considered an error
5382   *
5383   */
5384  int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
5385                             const char *backing_file_str)
5386  {
5387      BlockDriverState *explicit_top = top;
5388      bool update_inherits_from;
5389      BdrvChild *c;
5390      Error *local_err = NULL;
5391      int ret = -EIO;
5392      g_autoptr(GSList) updated_children = NULL;
5393      GSList *p;
5394  
5395      bdrv_ref(top);
5396      bdrv_subtree_drained_begin(top);
5397  
5398      if (!top->drv || !base->drv) {
5399          goto exit;
5400      }
5401  
5402      /* Make sure that base is in the backing chain of top */
5403      if (!bdrv_chain_contains(top, base)) {
5404          goto exit;
5405      }
5406  
5407      /* If 'base' recursively inherits from 'top' then we should set
5408       * base->inherits_from to top->inherits_from after 'top' and all
5409       * other intermediate nodes have been dropped.
5410       * If 'top' is an implicit node (e.g. "commit_top") we should skip
5411       * it because no one inherits from it. We use explicit_top for that. */
5412      explicit_top = bdrv_skip_implicit_filters(explicit_top);
5413      update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top);
5414  
5415      /* success - we can delete the intermediate states, and link top->base */
5416      /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once
5417       * we've figured out how they should work. */
5418      if (!backing_file_str) {
5419          bdrv_refresh_filename(base);
5420          backing_file_str = base->filename;
5421      }
5422  
5423      QLIST_FOREACH(c, &top->parents, next_parent) {
5424          updated_children = g_slist_prepend(updated_children, c);
5425      }
5426  
5427      /*
5428       * It seems correct to pass detach_subchain=true here, but it triggers
5429       * one more yet not fixed bug, when due to nested aio_poll loop we switch to
5430       * another drained section, which modify the graph (for example, removing
5431       * the child, which we keep in updated_children list). So, it's a TODO.
5432       *
5433       * Note, bug triggered if pass detach_subchain=true here and run
5434       * test-bdrv-drain. test_drop_intermediate_poll() test-case will crash.
5435       * That's a FIXME.
5436       */
5437      bdrv_replace_node_common(top, base, false, false, &local_err);
5438      if (local_err) {
5439          error_report_err(local_err);
5440          goto exit;
5441      }
5442  
5443      for (p = updated_children; p; p = p->next) {
5444          c = p->data;
5445  
5446          if (c->klass->update_filename) {
5447              ret = c->klass->update_filename(c, base, backing_file_str,
5448                                              &local_err);
5449              if (ret < 0) {
5450                  /*
5451                   * TODO: Actually, we want to rollback all previous iterations
5452                   * of this loop, and (which is almost impossible) previous
5453                   * bdrv_replace_node()...
5454                   *
5455                   * Note, that c->klass->update_filename may lead to permission
5456                   * update, so it's a bad idea to call it inside permission
5457                   * update transaction of bdrv_replace_node.
5458                   */
5459                  error_report_err(local_err);
5460                  goto exit;
5461              }
5462          }
5463      }
5464  
5465      if (update_inherits_from) {
5466          base->inherits_from = explicit_top->inherits_from;
5467      }
5468  
5469      ret = 0;
5470  exit:
5471      bdrv_subtree_drained_end(top);
5472      bdrv_unref(top);
5473      return ret;
5474  }
5475  
5476  /**
5477   * Implementation of BlockDriver.bdrv_get_allocated_file_size() that
5478   * sums the size of all data-bearing children.  (This excludes backing
5479   * children.)
5480   */
5481  static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs)
5482  {
5483      BdrvChild *child;
5484      int64_t child_size, sum = 0;
5485  
5486      QLIST_FOREACH(child, &bs->children, next) {
5487          if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
5488                             BDRV_CHILD_FILTERED))
5489          {
5490              child_size = bdrv_get_allocated_file_size(child->bs);
5491              if (child_size < 0) {
5492                  return child_size;
5493              }
5494              sum += child_size;
5495          }
5496      }
5497  
5498      return sum;
5499  }
5500  
5501  /**
5502   * Length of a allocated file in bytes. Sparse files are counted by actual
5503   * allocated space. Return < 0 if error or unknown.
5504   */
5505  int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
5506  {
5507      BlockDriver *drv = bs->drv;
5508      if (!drv) {
5509          return -ENOMEDIUM;
5510      }
5511      if (drv->bdrv_get_allocated_file_size) {
5512          return drv->bdrv_get_allocated_file_size(bs);
5513      }
5514  
5515      if (drv->bdrv_file_open) {
5516          /*
5517           * Protocol drivers default to -ENOTSUP (most of their data is
5518           * not stored in any of their children (if they even have any),
5519           * so there is no generic way to figure it out).
5520           */
5521          return -ENOTSUP;
5522      } else if (drv->is_filter) {
5523          /* Filter drivers default to the size of their filtered child */
5524          return bdrv_get_allocated_file_size(bdrv_filter_bs(bs));
5525      } else {
5526          /* Other drivers default to summing their children's sizes */
5527          return bdrv_sum_allocated_file_size(bs);
5528      }
5529  }
5530  
5531  /*
5532   * bdrv_measure:
5533   * @drv: Format driver
5534   * @opts: Creation options for new image
5535   * @in_bs: Existing image containing data for new image (may be NULL)
5536   * @errp: Error object
5537   * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo())
5538   *          or NULL on error
5539   *
5540   * Calculate file size required to create a new image.
5541   *
5542   * If @in_bs is given then space for allocated clusters and zero clusters
5543   * from that image are included in the calculation.  If @opts contains a
5544   * backing file that is shared by @in_bs then backing clusters may be omitted
5545   * from the calculation.
5546   *
5547   * If @in_bs is NULL then the calculation includes no allocated clusters
5548   * unless a preallocation option is given in @opts.
5549   *
5550   * Note that @in_bs may use a different BlockDriver from @drv.
5551   *
5552   * If an error occurs the @errp pointer is set.
5553   */
5554  BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
5555                                 BlockDriverState *in_bs, Error **errp)
5556  {
5557      if (!drv->bdrv_measure) {
5558          error_setg(errp, "Block driver '%s' does not support size measurement",
5559                     drv->format_name);
5560          return NULL;
5561      }
5562  
5563      return drv->bdrv_measure(opts, in_bs, errp);
5564  }
5565  
5566  /**
5567   * Return number of sectors on success, -errno on error.
5568   */
5569  int64_t bdrv_nb_sectors(BlockDriverState *bs)
5570  {
5571      BlockDriver *drv = bs->drv;
5572  
5573      if (!drv)
5574          return -ENOMEDIUM;
5575  
5576      if (drv->has_variable_length) {
5577          int ret = refresh_total_sectors(bs, bs->total_sectors);
5578          if (ret < 0) {
5579              return ret;
5580          }
5581      }
5582      return bs->total_sectors;
5583  }
5584  
5585  /**
5586   * Return length in bytes on success, -errno on error.
5587   * The length is always a multiple of BDRV_SECTOR_SIZE.
5588   */
5589  int64_t bdrv_getlength(BlockDriverState *bs)
5590  {
5591      int64_t ret = bdrv_nb_sectors(bs);
5592  
5593      if (ret < 0) {
5594          return ret;
5595      }
5596      if (ret > INT64_MAX / BDRV_SECTOR_SIZE) {
5597          return -EFBIG;
5598      }
5599      return ret * BDRV_SECTOR_SIZE;
5600  }
5601  
5602  /* return 0 as number of sectors if no device present or error */
5603  void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
5604  {
5605      int64_t nb_sectors = bdrv_nb_sectors(bs);
5606  
5607      *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
5608  }
5609  
5610  bool bdrv_is_sg(BlockDriverState *bs)
5611  {
5612      return bs->sg;
5613  }
5614  
5615  /**
5616   * Return whether the given node supports compressed writes.
5617   */
5618  bool bdrv_supports_compressed_writes(BlockDriverState *bs)
5619  {
5620      BlockDriverState *filtered;
5621  
5622      if (!bs->drv || !block_driver_can_compress(bs->drv)) {
5623          return false;
5624      }
5625  
5626      filtered = bdrv_filter_bs(bs);
5627      if (filtered) {
5628          /*
5629           * Filters can only forward compressed writes, so we have to
5630           * check the child.
5631           */
5632          return bdrv_supports_compressed_writes(filtered);
5633      }
5634  
5635      return true;
5636  }
5637  
5638  const char *bdrv_get_format_name(BlockDriverState *bs)
5639  {
5640      return bs->drv ? bs->drv->format_name : NULL;
5641  }
5642  
5643  static int qsort_strcmp(const void *a, const void *b)
5644  {
5645      return strcmp(*(char *const *)a, *(char *const *)b);
5646  }
5647  
5648  void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
5649                           void *opaque, bool read_only)
5650  {
5651      BlockDriver *drv;
5652      int count = 0;
5653      int i;
5654      const char **formats = NULL;
5655  
5656      QLIST_FOREACH(drv, &bdrv_drivers, list) {
5657          if (drv->format_name) {
5658              bool found = false;
5659              int i = count;
5660  
5661              if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, read_only)) {
5662                  continue;
5663              }
5664  
5665              while (formats && i && !found) {
5666                  found = !strcmp(formats[--i], drv->format_name);
5667              }
5668  
5669              if (!found) {
5670                  formats = g_renew(const char *, formats, count + 1);
5671                  formats[count++] = drv->format_name;
5672              }
5673          }
5674      }
5675  
5676      for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) {
5677          const char *format_name = block_driver_modules[i].format_name;
5678  
5679          if (format_name) {
5680              bool found = false;
5681              int j = count;
5682  
5683              if (use_bdrv_whitelist &&
5684                  !bdrv_format_is_whitelisted(format_name, read_only)) {
5685                  continue;
5686              }
5687  
5688              while (formats && j && !found) {
5689                  found = !strcmp(formats[--j], format_name);
5690              }
5691  
5692              if (!found) {
5693                  formats = g_renew(const char *, formats, count + 1);
5694                  formats[count++] = format_name;
5695              }
5696          }
5697      }
5698  
5699      qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
5700  
5701      for (i = 0; i < count; i++) {
5702          it(opaque, formats[i]);
5703      }
5704  
5705      g_free(formats);
5706  }
5707  
5708  /* This function is to find a node in the bs graph */
5709  BlockDriverState *bdrv_find_node(const char *node_name)
5710  {
5711      BlockDriverState *bs;
5712  
5713      assert(node_name);
5714  
5715      QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
5716          if (!strcmp(node_name, bs->node_name)) {
5717              return bs;
5718          }
5719      }
5720      return NULL;
5721  }
5722  
5723  /* Put this QMP function here so it can access the static graph_bdrv_states. */
5724  BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
5725                                             Error **errp)
5726  {
5727      BlockDeviceInfoList *list;
5728      BlockDriverState *bs;
5729  
5730      list = NULL;
5731      QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
5732          BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp);
5733          if (!info) {
5734              qapi_free_BlockDeviceInfoList(list);
5735              return NULL;
5736          }
5737          QAPI_LIST_PREPEND(list, info);
5738      }
5739  
5740      return list;
5741  }
5742  
5743  typedef struct XDbgBlockGraphConstructor {
5744      XDbgBlockGraph *graph;
5745      GHashTable *graph_nodes;
5746  } XDbgBlockGraphConstructor;
5747  
5748  static XDbgBlockGraphConstructor *xdbg_graph_new(void)
5749  {
5750      XDbgBlockGraphConstructor *gr = g_new(XDbgBlockGraphConstructor, 1);
5751  
5752      gr->graph = g_new0(XDbgBlockGraph, 1);
5753      gr->graph_nodes = g_hash_table_new(NULL, NULL);
5754  
5755      return gr;
5756  }
5757  
5758  static XDbgBlockGraph *xdbg_graph_finalize(XDbgBlockGraphConstructor *gr)
5759  {
5760      XDbgBlockGraph *graph = gr->graph;
5761  
5762      g_hash_table_destroy(gr->graph_nodes);
5763      g_free(gr);
5764  
5765      return graph;
5766  }
5767  
5768  static uintptr_t xdbg_graph_node_num(XDbgBlockGraphConstructor *gr, void *node)
5769  {
5770      uintptr_t ret = (uintptr_t)g_hash_table_lookup(gr->graph_nodes, node);
5771  
5772      if (ret != 0) {
5773          return ret;
5774      }
5775  
5776      /*
5777       * Start counting from 1, not 0, because 0 interferes with not-found (NULL)
5778       * answer of g_hash_table_lookup.
5779       */
5780      ret = g_hash_table_size(gr->graph_nodes) + 1;
5781      g_hash_table_insert(gr->graph_nodes, node, (void *)ret);
5782  
5783      return ret;
5784  }
5785  
5786  static void xdbg_graph_add_node(XDbgBlockGraphConstructor *gr, void *node,
5787                                  XDbgBlockGraphNodeType type, const char *name)
5788  {
5789      XDbgBlockGraphNode *n;
5790  
5791      n = g_new0(XDbgBlockGraphNode, 1);
5792  
5793      n->id = xdbg_graph_node_num(gr, node);
5794      n->type = type;
5795      n->name = g_strdup(name);
5796  
5797      QAPI_LIST_PREPEND(gr->graph->nodes, n);
5798  }
5799  
5800  static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent,
5801                                  const BdrvChild *child)
5802  {
5803      BlockPermission qapi_perm;
5804      XDbgBlockGraphEdge *edge;
5805  
5806      edge = g_new0(XDbgBlockGraphEdge, 1);
5807  
5808      edge->parent = xdbg_graph_node_num(gr, parent);
5809      edge->child = xdbg_graph_node_num(gr, child->bs);
5810      edge->name = g_strdup(child->name);
5811  
5812      for (qapi_perm = 0; qapi_perm < BLOCK_PERMISSION__MAX; qapi_perm++) {
5813          uint64_t flag = bdrv_qapi_perm_to_blk_perm(qapi_perm);
5814  
5815          if (flag & child->perm) {
5816              QAPI_LIST_PREPEND(edge->perm, qapi_perm);
5817          }
5818          if (flag & child->shared_perm) {
5819              QAPI_LIST_PREPEND(edge->shared_perm, qapi_perm);
5820          }
5821      }
5822  
5823      QAPI_LIST_PREPEND(gr->graph->edges, edge);
5824  }
5825  
5826  
5827  XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp)
5828  {
5829      BlockBackend *blk;
5830      BlockJob *job;
5831      BlockDriverState *bs;
5832      BdrvChild *child;
5833      XDbgBlockGraphConstructor *gr = xdbg_graph_new();
5834  
5835      for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
5836          char *allocated_name = NULL;
5837          const char *name = blk_name(blk);
5838  
5839          if (!*name) {
5840              name = allocated_name = blk_get_attached_dev_id(blk);
5841          }
5842          xdbg_graph_add_node(gr, blk, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND,
5843                             name);
5844          g_free(allocated_name);
5845          if (blk_root(blk)) {
5846              xdbg_graph_add_edge(gr, blk, blk_root(blk));
5847          }
5848      }
5849  
5850      for (job = block_job_next(NULL); job; job = block_job_next(job)) {
5851          GSList *el;
5852  
5853          xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB,
5854                             job->job.id);
5855          for (el = job->nodes; el; el = el->next) {
5856              xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data);
5857          }
5858      }
5859  
5860      QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
5861          xdbg_graph_add_node(gr, bs, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_DRIVER,
5862                             bs->node_name);
5863          QLIST_FOREACH(child, &bs->children, next) {
5864              xdbg_graph_add_edge(gr, bs, child);
5865          }
5866      }
5867  
5868      return xdbg_graph_finalize(gr);
5869  }
5870  
5871  BlockDriverState *bdrv_lookup_bs(const char *device,
5872                                   const char *node_name,
5873                                   Error **errp)
5874  {
5875      BlockBackend *blk;
5876      BlockDriverState *bs;
5877  
5878      if (device) {
5879          blk = blk_by_name(device);
5880  
5881          if (blk) {
5882              bs = blk_bs(blk);
5883              if (!bs) {
5884                  error_setg(errp, "Device '%s' has no medium", device);
5885              }
5886  
5887              return bs;
5888          }
5889      }
5890  
5891      if (node_name) {
5892          bs = bdrv_find_node(node_name);
5893  
5894          if (bs) {
5895              return bs;
5896          }
5897      }
5898  
5899      error_setg(errp, "Cannot find device=\'%s\' nor node-name=\'%s\'",
5900                       device ? device : "",
5901                       node_name ? node_name : "");
5902      return NULL;
5903  }
5904  
5905  /* If 'base' is in the same chain as 'top', return true. Otherwise,
5906   * return false.  If either argument is NULL, return false. */
5907  bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
5908  {
5909      while (top && top != base) {
5910          top = bdrv_filter_or_cow_bs(top);
5911      }
5912  
5913      return top != NULL;
5914  }
5915  
5916  BlockDriverState *bdrv_next_node(BlockDriverState *bs)
5917  {
5918      if (!bs) {
5919          return QTAILQ_FIRST(&graph_bdrv_states);
5920      }
5921      return QTAILQ_NEXT(bs, node_list);
5922  }
5923  
5924  BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
5925  {
5926      if (!bs) {
5927          return QTAILQ_FIRST(&all_bdrv_states);
5928      }
5929      return QTAILQ_NEXT(bs, bs_list);
5930  }
5931  
5932  const char *bdrv_get_node_name(const BlockDriverState *bs)
5933  {
5934      return bs->node_name;
5935  }
5936  
5937  const char *bdrv_get_parent_name(const BlockDriverState *bs)
5938  {
5939      BdrvChild *c;
5940      const char *name;
5941  
5942      /* If multiple parents have a name, just pick the first one. */
5943      QLIST_FOREACH(c, &bs->parents, next_parent) {
5944          if (c->klass->get_name) {
5945              name = c->klass->get_name(c);
5946              if (name && *name) {
5947                  return name;
5948              }
5949          }
5950      }
5951  
5952      return NULL;
5953  }
5954  
5955  /* TODO check what callers really want: bs->node_name or blk_name() */
5956  const char *bdrv_get_device_name(const BlockDriverState *bs)
5957  {
5958      return bdrv_get_parent_name(bs) ?: "";
5959  }
5960  
5961  /* This can be used to identify nodes that might not have a device
5962   * name associated. Since node and device names live in the same
5963   * namespace, the result is unambiguous. The exception is if both are
5964   * absent, then this returns an empty (non-null) string. */
5965  const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
5966  {
5967      return bdrv_get_parent_name(bs) ?: bs->node_name;
5968  }
5969  
5970  int bdrv_get_flags(BlockDriverState *bs)
5971  {
5972      return bs->open_flags;
5973  }
5974  
5975  int bdrv_has_zero_init_1(BlockDriverState *bs)
5976  {
5977      return 1;
5978  }
5979  
5980  int bdrv_has_zero_init(BlockDriverState *bs)
5981  {
5982      BlockDriverState *filtered;
5983  
5984      if (!bs->drv) {
5985          return 0;
5986      }
5987  
5988      /* If BS is a copy on write image, it is initialized to
5989         the contents of the base image, which may not be zeroes.  */
5990      if (bdrv_cow_child(bs)) {
5991          return 0;
5992      }
5993      if (bs->drv->bdrv_has_zero_init) {
5994          return bs->drv->bdrv_has_zero_init(bs);
5995      }
5996  
5997      filtered = bdrv_filter_bs(bs);
5998      if (filtered) {
5999          return bdrv_has_zero_init(filtered);
6000      }
6001  
6002      /* safe default */
6003      return 0;
6004  }
6005  
6006  bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
6007  {
6008      if (!(bs->open_flags & BDRV_O_UNMAP)) {
6009          return false;
6010      }
6011  
6012      return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP;
6013  }
6014  
6015  void bdrv_get_backing_filename(BlockDriverState *bs,
6016                                 char *filename, int filename_size)
6017  {
6018      pstrcpy(filename, filename_size, bs->backing_file);
6019  }
6020  
6021  int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
6022  {
6023      int ret;
6024      BlockDriver *drv = bs->drv;
6025      /* if bs->drv == NULL, bs is closed, so there's nothing to do here */
6026      if (!drv) {
6027          return -ENOMEDIUM;
6028      }
6029      if (!drv->bdrv_get_info) {
6030          BlockDriverState *filtered = bdrv_filter_bs(bs);
6031          if (filtered) {
6032              return bdrv_get_info(filtered, bdi);
6033          }
6034          return -ENOTSUP;
6035      }
6036      memset(bdi, 0, sizeof(*bdi));
6037      ret = drv->bdrv_get_info(bs, bdi);
6038      if (ret < 0) {
6039          return ret;
6040      }
6041  
6042      if (bdi->cluster_size > BDRV_MAX_ALIGNMENT) {
6043          return -EINVAL;
6044      }
6045  
6046      return 0;
6047  }
6048  
6049  ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
6050                                            Error **errp)
6051  {
6052      BlockDriver *drv = bs->drv;
6053      if (drv && drv->bdrv_get_specific_info) {
6054          return drv->bdrv_get_specific_info(bs, errp);
6055      }
6056      return NULL;
6057  }
6058  
6059  BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs)
6060  {
6061      BlockDriver *drv = bs->drv;
6062      if (!drv || !drv->bdrv_get_specific_stats) {
6063          return NULL;
6064      }
6065      return drv->bdrv_get_specific_stats(bs);
6066  }
6067  
6068  void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
6069  {
6070      if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
6071          return;
6072      }
6073  
6074      bs->drv->bdrv_debug_event(bs, event);
6075  }
6076  
6077  static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs)
6078  {
6079      while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
6080          bs = bdrv_primary_bs(bs);
6081      }
6082  
6083      if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
6084          assert(bs->drv->bdrv_debug_remove_breakpoint);
6085          return bs;
6086      }
6087  
6088      return NULL;
6089  }
6090  
6091  int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
6092                            const char *tag)
6093  {
6094      bs = bdrv_find_debug_node(bs);
6095      if (bs) {
6096          return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
6097      }
6098  
6099      return -ENOTSUP;
6100  }
6101  
6102  int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
6103  {
6104      bs = bdrv_find_debug_node(bs);
6105      if (bs) {
6106          return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
6107      }
6108  
6109      return -ENOTSUP;
6110  }
6111  
6112  int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
6113  {
6114      while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
6115          bs = bdrv_primary_bs(bs);
6116      }
6117  
6118      if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
6119          return bs->drv->bdrv_debug_resume(bs, tag);
6120      }
6121  
6122      return -ENOTSUP;
6123  }
6124  
6125  bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
6126  {
6127      while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
6128          bs = bdrv_primary_bs(bs);
6129      }
6130  
6131      if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
6132          return bs->drv->bdrv_debug_is_suspended(bs, tag);
6133      }
6134  
6135      return false;
6136  }
6137  
6138  /* backing_file can either be relative, or absolute, or a protocol.  If it is
6139   * relative, it must be relative to the chain.  So, passing in bs->filename
6140   * from a BDS as backing_file should not be done, as that may be relative to
6141   * the CWD rather than the chain. */
6142  BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
6143          const char *backing_file)
6144  {
6145      char *filename_full = NULL;
6146      char *backing_file_full = NULL;
6147      char *filename_tmp = NULL;
6148      int is_protocol = 0;
6149      bool filenames_refreshed = false;
6150      BlockDriverState *curr_bs = NULL;
6151      BlockDriverState *retval = NULL;
6152      BlockDriverState *bs_below;
6153  
6154      if (!bs || !bs->drv || !backing_file) {
6155          return NULL;
6156      }
6157  
6158      filename_full     = g_malloc(PATH_MAX);
6159      backing_file_full = g_malloc(PATH_MAX);
6160  
6161      is_protocol = path_has_protocol(backing_file);
6162  
6163      /*
6164       * Being largely a legacy function, skip any filters here
6165       * (because filters do not have normal filenames, so they cannot
6166       * match anyway; and allowing json:{} filenames is a bit out of
6167       * scope).
6168       */
6169      for (curr_bs = bdrv_skip_filters(bs);
6170           bdrv_cow_child(curr_bs) != NULL;
6171           curr_bs = bs_below)
6172      {
6173          bs_below = bdrv_backing_chain_next(curr_bs);
6174  
6175          if (bdrv_backing_overridden(curr_bs)) {
6176              /*
6177               * If the backing file was overridden, we can only compare
6178               * directly against the backing node's filename.
6179               */
6180  
6181              if (!filenames_refreshed) {
6182                  /*
6183                   * This will automatically refresh all of the
6184                   * filenames in the rest of the backing chain, so we
6185                   * only need to do this once.
6186                   */
6187                  bdrv_refresh_filename(bs_below);
6188                  filenames_refreshed = true;
6189              }
6190  
6191              if (strcmp(backing_file, bs_below->filename) == 0) {
6192                  retval = bs_below;
6193                  break;
6194              }
6195          } else if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
6196              /*
6197               * If either of the filename paths is actually a protocol, then
6198               * compare unmodified paths; otherwise make paths relative.
6199               */
6200              char *backing_file_full_ret;
6201  
6202              if (strcmp(backing_file, curr_bs->backing_file) == 0) {
6203                  retval = bs_below;
6204                  break;
6205              }
6206              /* Also check against the full backing filename for the image */
6207              backing_file_full_ret = bdrv_get_full_backing_filename(curr_bs,
6208                                                                     NULL);
6209              if (backing_file_full_ret) {
6210                  bool equal = strcmp(backing_file, backing_file_full_ret) == 0;
6211                  g_free(backing_file_full_ret);
6212                  if (equal) {
6213                      retval = bs_below;
6214                      break;
6215                  }
6216              }
6217          } else {
6218              /* If not an absolute filename path, make it relative to the current
6219               * image's filename path */
6220              filename_tmp = bdrv_make_absolute_filename(curr_bs, backing_file,
6221                                                         NULL);
6222              /* We are going to compare canonicalized absolute pathnames */
6223              if (!filename_tmp || !realpath(filename_tmp, filename_full)) {
6224                  g_free(filename_tmp);
6225                  continue;
6226              }
6227              g_free(filename_tmp);
6228  
6229              /* We need to make sure the backing filename we are comparing against
6230               * is relative to the current image filename (or absolute) */
6231              filename_tmp = bdrv_get_full_backing_filename(curr_bs, NULL);
6232              if (!filename_tmp || !realpath(filename_tmp, backing_file_full)) {
6233                  g_free(filename_tmp);
6234                  continue;
6235              }
6236              g_free(filename_tmp);
6237  
6238              if (strcmp(backing_file_full, filename_full) == 0) {
6239                  retval = bs_below;
6240                  break;
6241              }
6242          }
6243      }
6244  
6245      g_free(filename_full);
6246      g_free(backing_file_full);
6247      return retval;
6248  }
6249  
6250  void bdrv_init(void)
6251  {
6252  #ifdef CONFIG_BDRV_WHITELIST_TOOLS
6253      use_bdrv_whitelist = 1;
6254  #endif
6255      module_call_init(MODULE_INIT_BLOCK);
6256  }
6257  
6258  void bdrv_init_with_whitelist(void)
6259  {
6260      use_bdrv_whitelist = 1;
6261      bdrv_init();
6262  }
6263  
6264  int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
6265  {
6266      BdrvChild *child, *parent;
6267      Error *local_err = NULL;
6268      int ret;
6269      BdrvDirtyBitmap *bm;
6270  
6271      if (!bs->drv)  {
6272          return -ENOMEDIUM;
6273      }
6274  
6275      QLIST_FOREACH(child, &bs->children, next) {
6276          bdrv_co_invalidate_cache(child->bs, &local_err);
6277          if (local_err) {
6278              error_propagate(errp, local_err);
6279              return -EINVAL;
6280          }
6281      }
6282  
6283      /*
6284       * Update permissions, they may differ for inactive nodes.
6285       *
6286       * Note that the required permissions of inactive images are always a
6287       * subset of the permissions required after activating the image. This
6288       * allows us to just get the permissions upfront without restricting
6289       * drv->bdrv_invalidate_cache().
6290       *
6291       * It also means that in error cases, we don't have to try and revert to
6292       * the old permissions (which is an operation that could fail, too). We can
6293       * just keep the extended permissions for the next time that an activation
6294       * of the image is tried.
6295       */
6296      if (bs->open_flags & BDRV_O_INACTIVE) {
6297          bs->open_flags &= ~BDRV_O_INACTIVE;
6298          ret = bdrv_refresh_perms(bs, errp);
6299          if (ret < 0) {
6300              bs->open_flags |= BDRV_O_INACTIVE;
6301              return ret;
6302          }
6303  
6304          if (bs->drv->bdrv_co_invalidate_cache) {
6305              bs->drv->bdrv_co_invalidate_cache(bs, &local_err);
6306              if (local_err) {
6307                  bs->open_flags |= BDRV_O_INACTIVE;
6308                  error_propagate(errp, local_err);
6309                  return -EINVAL;
6310              }
6311          }
6312  
6313          FOR_EACH_DIRTY_BITMAP(bs, bm) {
6314              bdrv_dirty_bitmap_skip_store(bm, false);
6315          }
6316  
6317          ret = refresh_total_sectors(bs, bs->total_sectors);
6318          if (ret < 0) {
6319              bs->open_flags |= BDRV_O_INACTIVE;
6320              error_setg_errno(errp, -ret, "Could not refresh total sector count");
6321              return ret;
6322          }
6323      }
6324  
6325      QLIST_FOREACH(parent, &bs->parents, next_parent) {
6326          if (parent->klass->activate) {
6327              parent->klass->activate(parent, &local_err);
6328              if (local_err) {
6329                  bs->open_flags |= BDRV_O_INACTIVE;
6330                  error_propagate(errp, local_err);
6331                  return -EINVAL;
6332              }
6333          }
6334      }
6335  
6336      return 0;
6337  }
6338  
6339  void bdrv_invalidate_cache_all(Error **errp)
6340  {
6341      BlockDriverState *bs;
6342      BdrvNextIterator it;
6343  
6344      for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
6345          AioContext *aio_context = bdrv_get_aio_context(bs);
6346          int ret;
6347  
6348          aio_context_acquire(aio_context);
6349          ret = bdrv_invalidate_cache(bs, errp);
6350          aio_context_release(aio_context);
6351          if (ret < 0) {
6352              bdrv_next_cleanup(&it);
6353              return;
6354          }
6355      }
6356  }
6357  
6358  static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active)
6359  {
6360      BdrvChild *parent;
6361  
6362      QLIST_FOREACH(parent, &bs->parents, next_parent) {
6363          if (parent->klass->parent_is_bds) {
6364              BlockDriverState *parent_bs = parent->opaque;
6365              if (!only_active || !(parent_bs->open_flags & BDRV_O_INACTIVE)) {
6366                  return true;
6367              }
6368          }
6369      }
6370  
6371      return false;
6372  }
6373  
6374  static int bdrv_inactivate_recurse(BlockDriverState *bs)
6375  {
6376      BdrvChild *child, *parent;
6377      int ret;
6378      uint64_t cumulative_perms, cumulative_shared_perms;
6379  
6380      if (!bs->drv) {
6381          return -ENOMEDIUM;
6382      }
6383  
6384      /* Make sure that we don't inactivate a child before its parent.
6385       * It will be covered by recursion from the yet active parent. */
6386      if (bdrv_has_bds_parent(bs, true)) {
6387          return 0;
6388      }
6389  
6390      assert(!(bs->open_flags & BDRV_O_INACTIVE));
6391  
6392      /* Inactivate this node */
6393      if (bs->drv->bdrv_inactivate) {
6394          ret = bs->drv->bdrv_inactivate(bs);
6395          if (ret < 0) {
6396              return ret;
6397          }
6398      }
6399  
6400      QLIST_FOREACH(parent, &bs->parents, next_parent) {
6401          if (parent->klass->inactivate) {
6402              ret = parent->klass->inactivate(parent);
6403              if (ret < 0) {
6404                  return ret;
6405              }
6406          }
6407      }
6408  
6409      bdrv_get_cumulative_perm(bs, &cumulative_perms,
6410                               &cumulative_shared_perms);
6411      if (cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
6412          /* Our inactive parents still need write access. Inactivation failed. */
6413          return -EPERM;
6414      }
6415  
6416      bs->open_flags |= BDRV_O_INACTIVE;
6417  
6418      /*
6419       * Update permissions, they may differ for inactive nodes.
6420       * We only tried to loosen restrictions, so errors are not fatal, ignore
6421       * them.
6422       */
6423      bdrv_refresh_perms(bs, NULL);
6424  
6425      /* Recursively inactivate children */
6426      QLIST_FOREACH(child, &bs->children, next) {
6427          ret = bdrv_inactivate_recurse(child->bs);
6428          if (ret < 0) {
6429              return ret;
6430          }
6431      }
6432  
6433      return 0;
6434  }
6435  
6436  int bdrv_inactivate_all(void)
6437  {
6438      BlockDriverState *bs = NULL;
6439      BdrvNextIterator it;
6440      int ret = 0;
6441      GSList *aio_ctxs = NULL, *ctx;
6442  
6443      for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
6444          AioContext *aio_context = bdrv_get_aio_context(bs);
6445  
6446          if (!g_slist_find(aio_ctxs, aio_context)) {
6447              aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
6448              aio_context_acquire(aio_context);
6449          }
6450      }
6451  
6452      for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
6453          /* Nodes with BDS parents are covered by recursion from the last
6454           * parent that gets inactivated. Don't inactivate them a second
6455           * time if that has already happened. */
6456          if (bdrv_has_bds_parent(bs, false)) {
6457              continue;
6458          }
6459          ret = bdrv_inactivate_recurse(bs);
6460          if (ret < 0) {
6461              bdrv_next_cleanup(&it);
6462              goto out;
6463          }
6464      }
6465  
6466  out:
6467      for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
6468          AioContext *aio_context = ctx->data;
6469          aio_context_release(aio_context);
6470      }
6471      g_slist_free(aio_ctxs);
6472  
6473      return ret;
6474  }
6475  
6476  /**************************************************************/
6477  /* removable device support */
6478  
6479  /**
6480   * Return TRUE if the media is present
6481   */
6482  bool bdrv_is_inserted(BlockDriverState *bs)
6483  {
6484      BlockDriver *drv = bs->drv;
6485      BdrvChild *child;
6486  
6487      if (!drv) {
6488          return false;
6489      }
6490      if (drv->bdrv_is_inserted) {
6491          return drv->bdrv_is_inserted(bs);
6492      }
6493      QLIST_FOREACH(child, &bs->children, next) {
6494          if (!bdrv_is_inserted(child->bs)) {
6495              return false;
6496          }
6497      }
6498      return true;
6499  }
6500  
6501  /**
6502   * If eject_flag is TRUE, eject the media. Otherwise, close the tray
6503   */
6504  void bdrv_eject(BlockDriverState *bs, bool eject_flag)
6505  {
6506      BlockDriver *drv = bs->drv;
6507  
6508      if (drv && drv->bdrv_eject) {
6509          drv->bdrv_eject(bs, eject_flag);
6510      }
6511  }
6512  
6513  /**
6514   * Lock or unlock the media (if it is locked, the user won't be able
6515   * to eject it manually).
6516   */
6517  void bdrv_lock_medium(BlockDriverState *bs, bool locked)
6518  {
6519      BlockDriver *drv = bs->drv;
6520  
6521      trace_bdrv_lock_medium(bs, locked);
6522  
6523      if (drv && drv->bdrv_lock_medium) {
6524          drv->bdrv_lock_medium(bs, locked);
6525      }
6526  }
6527  
6528  /* Get a reference to bs */
6529  void bdrv_ref(BlockDriverState *bs)
6530  {
6531      bs->refcnt++;
6532  }
6533  
6534  /* Release a previously grabbed reference to bs.
6535   * If after releasing, reference count is zero, the BlockDriverState is
6536   * deleted. */
6537  void bdrv_unref(BlockDriverState *bs)
6538  {
6539      if (!bs) {
6540          return;
6541      }
6542      assert(bs->refcnt > 0);
6543      if (--bs->refcnt == 0) {
6544          bdrv_delete(bs);
6545      }
6546  }
6547  
6548  struct BdrvOpBlocker {
6549      Error *reason;
6550      QLIST_ENTRY(BdrvOpBlocker) list;
6551  };
6552  
6553  bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
6554  {
6555      BdrvOpBlocker *blocker;
6556      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
6557      if (!QLIST_EMPTY(&bs->op_blockers[op])) {
6558          blocker = QLIST_FIRST(&bs->op_blockers[op]);
6559          error_propagate_prepend(errp, error_copy(blocker->reason),
6560                                  "Node '%s' is busy: ",
6561                                  bdrv_get_device_or_node_name(bs));
6562          return true;
6563      }
6564      return false;
6565  }
6566  
6567  void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
6568  {
6569      BdrvOpBlocker *blocker;
6570      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
6571  
6572      blocker = g_new0(BdrvOpBlocker, 1);
6573      blocker->reason = reason;
6574      QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
6575  }
6576  
6577  void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
6578  {
6579      BdrvOpBlocker *blocker, *next;
6580      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
6581      QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
6582          if (blocker->reason == reason) {
6583              QLIST_REMOVE(blocker, list);
6584              g_free(blocker);
6585          }
6586      }
6587  }
6588  
6589  void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
6590  {
6591      int i;
6592      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
6593          bdrv_op_block(bs, i, reason);
6594      }
6595  }
6596  
6597  void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
6598  {
6599      int i;
6600      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
6601          bdrv_op_unblock(bs, i, reason);
6602      }
6603  }
6604  
6605  bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
6606  {
6607      int i;
6608  
6609      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
6610          if (!QLIST_EMPTY(&bs->op_blockers[i])) {
6611              return false;
6612          }
6613      }
6614      return true;
6615  }
6616  
6617  void bdrv_img_create(const char *filename, const char *fmt,
6618                       const char *base_filename, const char *base_fmt,
6619                       char *options, uint64_t img_size, int flags, bool quiet,
6620                       Error **errp)
6621  {
6622      QemuOptsList *create_opts = NULL;
6623      QemuOpts *opts = NULL;
6624      const char *backing_fmt, *backing_file;
6625      int64_t size;
6626      BlockDriver *drv, *proto_drv;
6627      Error *local_err = NULL;
6628      int ret = 0;
6629  
6630      /* Find driver and parse its options */
6631      drv = bdrv_find_format(fmt);
6632      if (!drv) {
6633          error_setg(errp, "Unknown file format '%s'", fmt);
6634          return;
6635      }
6636  
6637      proto_drv = bdrv_find_protocol(filename, true, errp);
6638      if (!proto_drv) {
6639          return;
6640      }
6641  
6642      if (!drv->create_opts) {
6643          error_setg(errp, "Format driver '%s' does not support image creation",
6644                     drv->format_name);
6645          return;
6646      }
6647  
6648      if (!proto_drv->create_opts) {
6649          error_setg(errp, "Protocol driver '%s' does not support image creation",
6650                     proto_drv->format_name);
6651          return;
6652      }
6653  
6654      /* Create parameter list */
6655      create_opts = qemu_opts_append(create_opts, drv->create_opts);
6656      create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
6657  
6658      opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
6659  
6660      /* Parse -o options */
6661      if (options) {
6662          if (!qemu_opts_do_parse(opts, options, NULL, errp)) {
6663              goto out;
6664          }
6665      }
6666  
6667      if (!qemu_opt_get(opts, BLOCK_OPT_SIZE)) {
6668          qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
6669      } else if (img_size != UINT64_C(-1)) {
6670          error_setg(errp, "The image size must be specified only once");
6671          goto out;
6672      }
6673  
6674      if (base_filename) {
6675          if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename,
6676                            NULL)) {
6677              error_setg(errp, "Backing file not supported for file format '%s'",
6678                         fmt);
6679              goto out;
6680          }
6681      }
6682  
6683      if (base_fmt) {
6684          if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) {
6685              error_setg(errp, "Backing file format not supported for file "
6686                               "format '%s'", fmt);
6687              goto out;
6688          }
6689      }
6690  
6691      backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
6692      if (backing_file) {
6693          if (!strcmp(filename, backing_file)) {
6694              error_setg(errp, "Error: Trying to create an image with the "
6695                               "same filename as the backing file");
6696              goto out;
6697          }
6698          if (backing_file[0] == '\0') {
6699              error_setg(errp, "Expected backing file name, got empty string");
6700              goto out;
6701          }
6702      }
6703  
6704      backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
6705  
6706      /* The size for the image must always be specified, unless we have a backing
6707       * file and we have not been forbidden from opening it. */
6708      size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size);
6709      if (backing_file && !(flags & BDRV_O_NO_BACKING)) {
6710          BlockDriverState *bs;
6711          char *full_backing;
6712          int back_flags;
6713          QDict *backing_options = NULL;
6714  
6715          full_backing =
6716              bdrv_get_full_backing_filename_from_filename(filename, backing_file,
6717                                                           &local_err);
6718          if (local_err) {
6719              goto out;
6720          }
6721          assert(full_backing);
6722  
6723          /*
6724           * No need to do I/O here, which allows us to open encrypted
6725           * backing images without needing the secret
6726           */
6727          back_flags = flags;
6728          back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
6729          back_flags |= BDRV_O_NO_IO;
6730  
6731          backing_options = qdict_new();
6732          if (backing_fmt) {
6733              qdict_put_str(backing_options, "driver", backing_fmt);
6734          }
6735          qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true);
6736  
6737          bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
6738                         &local_err);
6739          g_free(full_backing);
6740          if (!bs) {
6741              error_append_hint(&local_err, "Could not open backing image.\n");
6742              goto out;
6743          } else {
6744              if (!backing_fmt) {
6745                  error_setg(&local_err,
6746                             "Backing file specified without backing format");
6747                  error_append_hint(&local_err, "Detected format of %s.",
6748                                    bs->drv->format_name);
6749                  goto out;
6750              }
6751              if (size == -1) {
6752                  /* Opened BS, have no size */
6753                  size = bdrv_getlength(bs);
6754                  if (size < 0) {
6755                      error_setg_errno(errp, -size, "Could not get size of '%s'",
6756                                       backing_file);
6757                      bdrv_unref(bs);
6758                      goto out;
6759                  }
6760                  qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
6761              }
6762              bdrv_unref(bs);
6763          }
6764          /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */
6765      } else if (backing_file && !backing_fmt) {
6766          error_setg(&local_err,
6767                     "Backing file specified without backing format");
6768          goto out;
6769      }
6770  
6771      if (size == -1) {
6772          error_setg(errp, "Image creation needs a size parameter");
6773          goto out;
6774      }
6775  
6776      if (!quiet) {
6777          printf("Formatting '%s', fmt=%s ", filename, fmt);
6778          qemu_opts_print(opts, " ");
6779          puts("");
6780          fflush(stdout);
6781      }
6782  
6783      ret = bdrv_create(drv, filename, opts, &local_err);
6784  
6785      if (ret == -EFBIG) {
6786          /* This is generally a better message than whatever the driver would
6787           * deliver (especially because of the cluster_size_hint), since that
6788           * is most probably not much different from "image too large". */
6789          const char *cluster_size_hint = "";
6790          if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
6791              cluster_size_hint = " (try using a larger cluster size)";
6792          }
6793          error_setg(errp, "The image size is too large for file format '%s'"
6794                     "%s", fmt, cluster_size_hint);
6795          error_free(local_err);
6796          local_err = NULL;
6797      }
6798  
6799  out:
6800      qemu_opts_del(opts);
6801      qemu_opts_free(create_opts);
6802      error_propagate(errp, local_err);
6803  }
6804  
6805  AioContext *bdrv_get_aio_context(BlockDriverState *bs)
6806  {
6807      return bs ? bs->aio_context : qemu_get_aio_context();
6808  }
6809  
6810  AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs)
6811  {
6812      Coroutine *self = qemu_coroutine_self();
6813      AioContext *old_ctx = qemu_coroutine_get_aio_context(self);
6814      AioContext *new_ctx;
6815  
6816      /*
6817       * Increase bs->in_flight to ensure that this operation is completed before
6818       * moving the node to a different AioContext. Read new_ctx only afterwards.
6819       */
6820      bdrv_inc_in_flight(bs);
6821  
6822      new_ctx = bdrv_get_aio_context(bs);
6823      aio_co_reschedule_self(new_ctx);
6824      return old_ctx;
6825  }
6826  
6827  void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
6828  {
6829      aio_co_reschedule_self(old_ctx);
6830      bdrv_dec_in_flight(bs);
6831  }
6832  
6833  void coroutine_fn bdrv_co_lock(BlockDriverState *bs)
6834  {
6835      AioContext *ctx = bdrv_get_aio_context(bs);
6836  
6837      /* In the main thread, bs->aio_context won't change concurrently */
6838      assert(qemu_get_current_aio_context() == qemu_get_aio_context());
6839  
6840      /*
6841       * We're in coroutine context, so we already hold the lock of the main
6842       * loop AioContext. Don't lock it twice to avoid deadlocks.
6843       */
6844      assert(qemu_in_coroutine());
6845      if (ctx != qemu_get_aio_context()) {
6846          aio_context_acquire(ctx);
6847      }
6848  }
6849  
6850  void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
6851  {
6852      AioContext *ctx = bdrv_get_aio_context(bs);
6853  
6854      assert(qemu_in_coroutine());
6855      if (ctx != qemu_get_aio_context()) {
6856          aio_context_release(ctx);
6857      }
6858  }
6859  
6860  void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
6861  {
6862      aio_co_enter(bdrv_get_aio_context(bs), co);
6863  }
6864  
6865  static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
6866  {
6867      QLIST_REMOVE(ban, list);
6868      g_free(ban);
6869  }
6870  
6871  static void bdrv_detach_aio_context(BlockDriverState *bs)
6872  {
6873      BdrvAioNotifier *baf, *baf_tmp;
6874  
6875      assert(!bs->walking_aio_notifiers);
6876      bs->walking_aio_notifiers = true;
6877      QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
6878          if (baf->deleted) {
6879              bdrv_do_remove_aio_context_notifier(baf);
6880          } else {
6881              baf->detach_aio_context(baf->opaque);
6882          }
6883      }
6884      /* Never mind iterating again to check for ->deleted.  bdrv_close() will
6885       * remove remaining aio notifiers if we aren't called again.
6886       */
6887      bs->walking_aio_notifiers = false;
6888  
6889      if (bs->drv && bs->drv->bdrv_detach_aio_context) {
6890          bs->drv->bdrv_detach_aio_context(bs);
6891      }
6892  
6893      if (bs->quiesce_counter) {
6894          aio_enable_external(bs->aio_context);
6895      }
6896      bs->aio_context = NULL;
6897  }
6898  
6899  static void bdrv_attach_aio_context(BlockDriverState *bs,
6900                                      AioContext *new_context)
6901  {
6902      BdrvAioNotifier *ban, *ban_tmp;
6903  
6904      if (bs->quiesce_counter) {
6905          aio_disable_external(new_context);
6906      }
6907  
6908      bs->aio_context = new_context;
6909  
6910      if (bs->drv && bs->drv->bdrv_attach_aio_context) {
6911          bs->drv->bdrv_attach_aio_context(bs, new_context);
6912      }
6913  
6914      assert(!bs->walking_aio_notifiers);
6915      bs->walking_aio_notifiers = true;
6916      QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) {
6917          if (ban->deleted) {
6918              bdrv_do_remove_aio_context_notifier(ban);
6919          } else {
6920              ban->attached_aio_context(new_context, ban->opaque);
6921          }
6922      }
6923      bs->walking_aio_notifiers = false;
6924  }
6925  
6926  /*
6927   * Changes the AioContext used for fd handlers, timers, and BHs by this
6928   * BlockDriverState and all its children and parents.
6929   *
6930   * Must be called from the main AioContext.
6931   *
6932   * The caller must own the AioContext lock for the old AioContext of bs, but it
6933   * must not own the AioContext lock for new_context (unless new_context is the
6934   * same as the current context of bs).
6935   *
6936   * @ignore will accumulate all visited BdrvChild object. The caller is
6937   * responsible for freeing the list afterwards.
6938   */
6939  void bdrv_set_aio_context_ignore(BlockDriverState *bs,
6940                                   AioContext *new_context, GSList **ignore)
6941  {
6942      AioContext *old_context = bdrv_get_aio_context(bs);
6943      GSList *children_to_process = NULL;
6944      GSList *parents_to_process = NULL;
6945      GSList *entry;
6946      BdrvChild *child, *parent;
6947  
6948      g_assert(qemu_get_current_aio_context() == qemu_get_aio_context());
6949  
6950      if (old_context == new_context) {
6951          return;
6952      }
6953  
6954      bdrv_drained_begin(bs);
6955  
6956      QLIST_FOREACH(child, &bs->children, next) {
6957          if (g_slist_find(*ignore, child)) {
6958              continue;
6959          }
6960          *ignore = g_slist_prepend(*ignore, child);
6961          children_to_process = g_slist_prepend(children_to_process, child);
6962      }
6963  
6964      QLIST_FOREACH(parent, &bs->parents, next_parent) {
6965          if (g_slist_find(*ignore, parent)) {
6966              continue;
6967          }
6968          *ignore = g_slist_prepend(*ignore, parent);
6969          parents_to_process = g_slist_prepend(parents_to_process, parent);
6970      }
6971  
6972      for (entry = children_to_process;
6973           entry != NULL;
6974           entry = g_slist_next(entry)) {
6975          child = entry->data;
6976          bdrv_set_aio_context_ignore(child->bs, new_context, ignore);
6977      }
6978      g_slist_free(children_to_process);
6979  
6980      for (entry = parents_to_process;
6981           entry != NULL;
6982           entry = g_slist_next(entry)) {
6983          parent = entry->data;
6984          assert(parent->klass->set_aio_ctx);
6985          parent->klass->set_aio_ctx(parent, new_context, ignore);
6986      }
6987      g_slist_free(parents_to_process);
6988  
6989      bdrv_detach_aio_context(bs);
6990  
6991      /* Acquire the new context, if necessary */
6992      if (qemu_get_aio_context() != new_context) {
6993          aio_context_acquire(new_context);
6994      }
6995  
6996      bdrv_attach_aio_context(bs, new_context);
6997  
6998      /*
6999       * If this function was recursively called from
7000       * bdrv_set_aio_context_ignore(), there may be nodes in the
7001       * subtree that have not yet been moved to the new AioContext.
7002       * Release the old one so bdrv_drained_end() can poll them.
7003       */
7004      if (qemu_get_aio_context() != old_context) {
7005          aio_context_release(old_context);
7006      }
7007  
7008      bdrv_drained_end(bs);
7009  
7010      if (qemu_get_aio_context() != old_context) {
7011          aio_context_acquire(old_context);
7012      }
7013      if (qemu_get_aio_context() != new_context) {
7014          aio_context_release(new_context);
7015      }
7016  }
7017  
7018  static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx,
7019                                              GSList **ignore, Error **errp)
7020  {
7021      if (g_slist_find(*ignore, c)) {
7022          return true;
7023      }
7024      *ignore = g_slist_prepend(*ignore, c);
7025  
7026      /*
7027       * A BdrvChildClass that doesn't handle AioContext changes cannot
7028       * tolerate any AioContext changes
7029       */
7030      if (!c->klass->can_set_aio_ctx) {
7031          char *user = bdrv_child_user_desc(c);
7032          error_setg(errp, "Changing iothreads is not supported by %s", user);
7033          g_free(user);
7034          return false;
7035      }
7036      if (!c->klass->can_set_aio_ctx(c, ctx, ignore, errp)) {
7037          assert(!errp || *errp);
7038          return false;
7039      }
7040      return true;
7041  }
7042  
7043  bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
7044                                      GSList **ignore, Error **errp)
7045  {
7046      if (g_slist_find(*ignore, c)) {
7047          return true;
7048      }
7049      *ignore = g_slist_prepend(*ignore, c);
7050      return bdrv_can_set_aio_context(c->bs, ctx, ignore, errp);
7051  }
7052  
7053  /* @ignore will accumulate all visited BdrvChild object. The caller is
7054   * responsible for freeing the list afterwards. */
7055  bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
7056                                GSList **ignore, Error **errp)
7057  {
7058      BdrvChild *c;
7059  
7060      if (bdrv_get_aio_context(bs) == ctx) {
7061          return true;
7062      }
7063  
7064      QLIST_FOREACH(c, &bs->parents, next_parent) {
7065          if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) {
7066              return false;
7067          }
7068      }
7069      QLIST_FOREACH(c, &bs->children, next) {
7070          if (!bdrv_child_can_set_aio_context(c, ctx, ignore, errp)) {
7071              return false;
7072          }
7073      }
7074  
7075      return true;
7076  }
7077  
7078  int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
7079                                     BdrvChild *ignore_child, Error **errp)
7080  {
7081      GSList *ignore;
7082      bool ret;
7083  
7084      ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL;
7085      ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp);
7086      g_slist_free(ignore);
7087  
7088      if (!ret) {
7089          return -EPERM;
7090      }
7091  
7092      ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL;
7093      bdrv_set_aio_context_ignore(bs, ctx, &ignore);
7094      g_slist_free(ignore);
7095  
7096      return 0;
7097  }
7098  
7099  int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
7100                               Error **errp)
7101  {
7102      return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp);
7103  }
7104  
7105  void bdrv_add_aio_context_notifier(BlockDriverState *bs,
7106          void (*attached_aio_context)(AioContext *new_context, void *opaque),
7107          void (*detach_aio_context)(void *opaque), void *opaque)
7108  {
7109      BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
7110      *ban = (BdrvAioNotifier){
7111          .attached_aio_context = attached_aio_context,
7112          .detach_aio_context   = detach_aio_context,
7113          .opaque               = opaque
7114      };
7115  
7116      QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
7117  }
7118  
7119  void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
7120                                        void (*attached_aio_context)(AioContext *,
7121                                                                     void *),
7122                                        void (*detach_aio_context)(void *),
7123                                        void *opaque)
7124  {
7125      BdrvAioNotifier *ban, *ban_next;
7126  
7127      QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
7128          if (ban->attached_aio_context == attached_aio_context &&
7129              ban->detach_aio_context   == detach_aio_context   &&
7130              ban->opaque               == opaque               &&
7131              ban->deleted              == false)
7132          {
7133              if (bs->walking_aio_notifiers) {
7134                  ban->deleted = true;
7135              } else {
7136                  bdrv_do_remove_aio_context_notifier(ban);
7137              }
7138              return;
7139          }
7140      }
7141  
7142      abort();
7143  }
7144  
7145  int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
7146                         BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
7147                         bool force,
7148                         Error **errp)
7149  {
7150      if (!bs->drv) {
7151          error_setg(errp, "Node is ejected");
7152          return -ENOMEDIUM;
7153      }
7154      if (!bs->drv->bdrv_amend_options) {
7155          error_setg(errp, "Block driver '%s' does not support option amendment",
7156                     bs->drv->format_name);
7157          return -ENOTSUP;
7158      }
7159      return bs->drv->bdrv_amend_options(bs, opts, status_cb,
7160                                         cb_opaque, force, errp);
7161  }
7162  
7163  /*
7164   * This function checks whether the given @to_replace is allowed to be
7165   * replaced by a node that always shows the same data as @bs.  This is
7166   * used for example to verify whether the mirror job can replace
7167   * @to_replace by the target mirrored from @bs.
7168   * To be replaceable, @bs and @to_replace may either be guaranteed to
7169   * always show the same data (because they are only connected through
7170   * filters), or some driver may allow replacing one of its children
7171   * because it can guarantee that this child's data is not visible at
7172   * all (for example, for dissenting quorum children that have no other
7173   * parents).
7174   */
7175  bool bdrv_recurse_can_replace(BlockDriverState *bs,
7176                                BlockDriverState *to_replace)
7177  {
7178      BlockDriverState *filtered;
7179  
7180      if (!bs || !bs->drv) {
7181          return false;
7182      }
7183  
7184      if (bs == to_replace) {
7185          return true;
7186      }
7187  
7188      /* See what the driver can do */
7189      if (bs->drv->bdrv_recurse_can_replace) {
7190          return bs->drv->bdrv_recurse_can_replace(bs, to_replace);
7191      }
7192  
7193      /* For filters without an own implementation, we can recurse on our own */
7194      filtered = bdrv_filter_bs(bs);
7195      if (filtered) {
7196          return bdrv_recurse_can_replace(filtered, to_replace);
7197      }
7198  
7199      /* Safe default */
7200      return false;
7201  }
7202  
7203  /*
7204   * Check whether the given @node_name can be replaced by a node that
7205   * has the same data as @parent_bs.  If so, return @node_name's BDS;
7206   * NULL otherwise.
7207   *
7208   * @node_name must be a (recursive) *child of @parent_bs (or this
7209   * function will return NULL).
7210   *
7211   * The result (whether the node can be replaced or not) is only valid
7212   * for as long as no graph or permission changes occur.
7213   */
7214  BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
7215                                          const char *node_name, Error **errp)
7216  {
7217      BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
7218      AioContext *aio_context;
7219  
7220      if (!to_replace_bs) {
7221          error_setg(errp, "Failed to find node with node-name='%s'", node_name);
7222          return NULL;
7223      }
7224  
7225      aio_context = bdrv_get_aio_context(to_replace_bs);
7226      aio_context_acquire(aio_context);
7227  
7228      if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
7229          to_replace_bs = NULL;
7230          goto out;
7231      }
7232  
7233      /* We don't want arbitrary node of the BDS chain to be replaced only the top
7234       * most non filter in order to prevent data corruption.
7235       * Another benefit is that this tests exclude backing files which are
7236       * blocked by the backing blockers.
7237       */
7238      if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) {
7239          error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', "
7240                     "because it cannot be guaranteed that doing so would not "
7241                     "lead to an abrupt change of visible data",
7242                     node_name, parent_bs->node_name);
7243          to_replace_bs = NULL;
7244          goto out;
7245      }
7246  
7247  out:
7248      aio_context_release(aio_context);
7249      return to_replace_bs;
7250  }
7251  
7252  /**
7253   * Iterates through the list of runtime option keys that are said to
7254   * be "strong" for a BDS.  An option is called "strong" if it changes
7255   * a BDS's data.  For example, the null block driver's "size" and
7256   * "read-zeroes" options are strong, but its "latency-ns" option is
7257   * not.
7258   *
7259   * If a key returned by this function ends with a dot, all options
7260   * starting with that prefix are strong.
7261   */
7262  static const char *const *strong_options(BlockDriverState *bs,
7263                                           const char *const *curopt)
7264  {
7265      static const char *const global_options[] = {
7266          "driver", "filename", NULL
7267      };
7268  
7269      if (!curopt) {
7270          return &global_options[0];
7271      }
7272  
7273      curopt++;
7274      if (curopt == &global_options[ARRAY_SIZE(global_options) - 1] && bs->drv) {
7275          curopt = bs->drv->strong_runtime_opts;
7276      }
7277  
7278      return (curopt && *curopt) ? curopt : NULL;
7279  }
7280  
7281  /**
7282   * Copies all strong runtime options from bs->options to the given
7283   * QDict.  The set of strong option keys is determined by invoking
7284   * strong_options().
7285   *
7286   * Returns true iff any strong option was present in bs->options (and
7287   * thus copied to the target QDict) with the exception of "filename"
7288   * and "driver".  The caller is expected to use this value to decide
7289   * whether the existence of strong options prevents the generation of
7290   * a plain filename.
7291   */
7292  static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs)
7293  {
7294      bool found_any = false;
7295      const char *const *option_name = NULL;
7296  
7297      if (!bs->drv) {
7298          return false;
7299      }
7300  
7301      while ((option_name = strong_options(bs, option_name))) {
7302          bool option_given = false;
7303  
7304          assert(strlen(*option_name) > 0);
7305          if ((*option_name)[strlen(*option_name) - 1] != '.') {
7306              QObject *entry = qdict_get(bs->options, *option_name);
7307              if (!entry) {
7308                  continue;
7309              }
7310  
7311              qdict_put_obj(d, *option_name, qobject_ref(entry));
7312              option_given = true;
7313          } else {
7314              const QDictEntry *entry;
7315              for (entry = qdict_first(bs->options); entry;
7316                   entry = qdict_next(bs->options, entry))
7317              {
7318                  if (strstart(qdict_entry_key(entry), *option_name, NULL)) {
7319                      qdict_put_obj(d, qdict_entry_key(entry),
7320                                    qobject_ref(qdict_entry_value(entry)));
7321                      option_given = true;
7322                  }
7323              }
7324          }
7325  
7326          /* While "driver" and "filename" need to be included in a JSON filename,
7327           * their existence does not prohibit generation of a plain filename. */
7328          if (!found_any && option_given &&
7329              strcmp(*option_name, "driver") && strcmp(*option_name, "filename"))
7330          {
7331              found_any = true;
7332          }
7333      }
7334  
7335      if (!qdict_haskey(d, "driver")) {
7336          /* Drivers created with bdrv_new_open_driver() may not have a
7337           * @driver option.  Add it here. */
7338          qdict_put_str(d, "driver", bs->drv->format_name);
7339      }
7340  
7341      return found_any;
7342  }
7343  
7344  /* Note: This function may return false positives; it may return true
7345   * even if opening the backing file specified by bs's image header
7346   * would result in exactly bs->backing. */
7347  bool bdrv_backing_overridden(BlockDriverState *bs)
7348  {
7349      if (bs->backing) {
7350          return strcmp(bs->auto_backing_file,
7351                        bs->backing->bs->filename);
7352      } else {
7353          /* No backing BDS, so if the image header reports any backing
7354           * file, it must have been suppressed */
7355          return bs->auto_backing_file[0] != '\0';
7356      }
7357  }
7358  
7359  /* Updates the following BDS fields:
7360   *  - exact_filename: A filename which may be used for opening a block device
7361   *                    which (mostly) equals the given BDS (even without any
7362   *                    other options; so reading and writing must return the same
7363   *                    results, but caching etc. may be different)
7364   *  - full_open_options: Options which, when given when opening a block device
7365   *                       (without a filename), result in a BDS (mostly)
7366   *                       equalling the given one
7367   *  - filename: If exact_filename is set, it is copied here. Otherwise,
7368   *              full_open_options is converted to a JSON object, prefixed with
7369   *              "json:" (for use through the JSON pseudo protocol) and put here.
7370   */
7371  void bdrv_refresh_filename(BlockDriverState *bs)
7372  {
7373      BlockDriver *drv = bs->drv;
7374      BdrvChild *child;
7375      BlockDriverState *primary_child_bs;
7376      QDict *opts;
7377      bool backing_overridden;
7378      bool generate_json_filename; /* Whether our default implementation should
7379                                      fill exact_filename (false) or not (true) */
7380  
7381      if (!drv) {
7382          return;
7383      }
7384  
7385      /* This BDS's file name may depend on any of its children's file names, so
7386       * refresh those first */
7387      QLIST_FOREACH(child, &bs->children, next) {
7388          bdrv_refresh_filename(child->bs);
7389      }
7390  
7391      if (bs->implicit) {
7392          /* For implicit nodes, just copy everything from the single child */
7393          child = QLIST_FIRST(&bs->children);
7394          assert(QLIST_NEXT(child, next) == NULL);
7395  
7396          pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
7397                  child->bs->exact_filename);
7398          pstrcpy(bs->filename, sizeof(bs->filename), child->bs->filename);
7399  
7400          qobject_unref(bs->full_open_options);
7401          bs->full_open_options = qobject_ref(child->bs->full_open_options);
7402  
7403          return;
7404      }
7405  
7406      backing_overridden = bdrv_backing_overridden(bs);
7407  
7408      if (bs->open_flags & BDRV_O_NO_IO) {
7409          /* Without I/O, the backing file does not change anything.
7410           * Therefore, in such a case (primarily qemu-img), we can
7411           * pretend the backing file has not been overridden even if
7412           * it technically has been. */
7413          backing_overridden = false;
7414      }
7415  
7416      /* Gather the options QDict */
7417      opts = qdict_new();
7418      generate_json_filename = append_strong_runtime_options(opts, bs);
7419      generate_json_filename |= backing_overridden;
7420  
7421      if (drv->bdrv_gather_child_options) {
7422          /* Some block drivers may not want to present all of their children's
7423           * options, or name them differently from BdrvChild.name */
7424          drv->bdrv_gather_child_options(bs, opts, backing_overridden);
7425      } else {
7426          QLIST_FOREACH(child, &bs->children, next) {
7427              if (child == bs->backing && !backing_overridden) {
7428                  /* We can skip the backing BDS if it has not been overridden */
7429                  continue;
7430              }
7431  
7432              qdict_put(opts, child->name,
7433                        qobject_ref(child->bs->full_open_options));
7434          }
7435  
7436          if (backing_overridden && !bs->backing) {
7437              /* Force no backing file */
7438              qdict_put_null(opts, "backing");
7439          }
7440      }
7441  
7442      qobject_unref(bs->full_open_options);
7443      bs->full_open_options = opts;
7444  
7445      primary_child_bs = bdrv_primary_bs(bs);
7446  
7447      if (drv->bdrv_refresh_filename) {
7448          /* Obsolete information is of no use here, so drop the old file name
7449           * information before refreshing it */
7450          bs->exact_filename[0] = '\0';
7451  
7452          drv->bdrv_refresh_filename(bs);
7453      } else if (primary_child_bs) {
7454          /*
7455           * Try to reconstruct valid information from the underlying
7456           * file -- this only works for format nodes (filter nodes
7457           * cannot be probed and as such must be selected by the user
7458           * either through an options dict, or through a special
7459           * filename which the filter driver must construct in its
7460           * .bdrv_refresh_filename() implementation).
7461           */
7462  
7463          bs->exact_filename[0] = '\0';
7464  
7465          /*
7466           * We can use the underlying file's filename if:
7467           * - it has a filename,
7468           * - the current BDS is not a filter,
7469           * - the file is a protocol BDS, and
7470           * - opening that file (as this BDS's format) will automatically create
7471           *   the BDS tree we have right now, that is:
7472           *   - the user did not significantly change this BDS's behavior with
7473           *     some explicit (strong) options
7474           *   - no non-file child of this BDS has been overridden by the user
7475           *   Both of these conditions are represented by generate_json_filename.
7476           */
7477          if (primary_child_bs->exact_filename[0] &&
7478              primary_child_bs->drv->bdrv_file_open &&
7479              !drv->is_filter && !generate_json_filename)
7480          {
7481              strcpy(bs->exact_filename, primary_child_bs->exact_filename);
7482          }
7483      }
7484  
7485      if (bs->exact_filename[0]) {
7486          pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
7487      } else {
7488          GString *json = qobject_to_json(QOBJECT(bs->full_open_options));
7489          if (snprintf(bs->filename, sizeof(bs->filename), "json:%s",
7490                       json->str) >= sizeof(bs->filename)) {
7491              /* Give user a hint if we truncated things. */
7492              strcpy(bs->filename + sizeof(bs->filename) - 4, "...");
7493          }
7494          g_string_free(json, true);
7495      }
7496  }
7497  
7498  char *bdrv_dirname(BlockDriverState *bs, Error **errp)
7499  {
7500      BlockDriver *drv = bs->drv;
7501      BlockDriverState *child_bs;
7502  
7503      if (!drv) {
7504          error_setg(errp, "Node '%s' is ejected", bs->node_name);
7505          return NULL;
7506      }
7507  
7508      if (drv->bdrv_dirname) {
7509          return drv->bdrv_dirname(bs, errp);
7510      }
7511  
7512      child_bs = bdrv_primary_bs(bs);
7513      if (child_bs) {
7514          return bdrv_dirname(child_bs, errp);
7515      }
7516  
7517      bdrv_refresh_filename(bs);
7518      if (bs->exact_filename[0] != '\0') {
7519          return path_combine(bs->exact_filename, "");
7520      }
7521  
7522      error_setg(errp, "Cannot generate a base directory for %s nodes",
7523                 drv->format_name);
7524      return NULL;
7525  }
7526  
7527  /*
7528   * Hot add/remove a BDS's child. So the user can take a child offline when
7529   * it is broken and take a new child online
7530   */
7531  void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
7532                      Error **errp)
7533  {
7534  
7535      if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
7536          error_setg(errp, "The node %s does not support adding a child",
7537                     bdrv_get_device_or_node_name(parent_bs));
7538          return;
7539      }
7540  
7541      if (!QLIST_EMPTY(&child_bs->parents)) {
7542          error_setg(errp, "The node %s already has a parent",
7543                     child_bs->node_name);
7544          return;
7545      }
7546  
7547      parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp);
7548  }
7549  
7550  void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
7551  {
7552      BdrvChild *tmp;
7553  
7554      if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
7555          error_setg(errp, "The node %s does not support removing a child",
7556                     bdrv_get_device_or_node_name(parent_bs));
7557          return;
7558      }
7559  
7560      QLIST_FOREACH(tmp, &parent_bs->children, next) {
7561          if (tmp == child) {
7562              break;
7563          }
7564      }
7565  
7566      if (!tmp) {
7567          error_setg(errp, "The node %s does not have a child named %s",
7568                     bdrv_get_device_or_node_name(parent_bs),
7569                     bdrv_get_device_or_node_name(child->bs));
7570          return;
7571      }
7572  
7573      parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
7574  }
7575  
7576  int bdrv_make_empty(BdrvChild *c, Error **errp)
7577  {
7578      BlockDriver *drv = c->bs->drv;
7579      int ret;
7580  
7581      assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED));
7582  
7583      if (!drv->bdrv_make_empty) {
7584          error_setg(errp, "%s does not support emptying nodes",
7585                     drv->format_name);
7586          return -ENOTSUP;
7587      }
7588  
7589      ret = drv->bdrv_make_empty(c->bs);
7590      if (ret < 0) {
7591          error_setg_errno(errp, -ret, "Failed to empty %s",
7592                           c->bs->filename);
7593          return ret;
7594      }
7595  
7596      return 0;
7597  }
7598  
7599  /*
7600   * Return the child that @bs acts as an overlay for, and from which data may be
7601   * copied in COW or COR operations.  Usually this is the backing file.
7602   */
7603  BdrvChild *bdrv_cow_child(BlockDriverState *bs)
7604  {
7605      if (!bs || !bs->drv) {
7606          return NULL;
7607      }
7608  
7609      if (bs->drv->is_filter) {
7610          return NULL;
7611      }
7612  
7613      if (!bs->backing) {
7614          return NULL;
7615      }
7616  
7617      assert(bs->backing->role & BDRV_CHILD_COW);
7618      return bs->backing;
7619  }
7620  
7621  /*
7622   * If @bs acts as a filter for exactly one of its children, return
7623   * that child.
7624   */
7625  BdrvChild *bdrv_filter_child(BlockDriverState *bs)
7626  {
7627      BdrvChild *c;
7628  
7629      if (!bs || !bs->drv) {
7630          return NULL;
7631      }
7632  
7633      if (!bs->drv->is_filter) {
7634          return NULL;
7635      }
7636  
7637      /* Only one of @backing or @file may be used */
7638      assert(!(bs->backing && bs->file));
7639  
7640      c = bs->backing ?: bs->file;
7641      if (!c) {
7642          return NULL;
7643      }
7644  
7645      assert(c->role & BDRV_CHILD_FILTERED);
7646      return c;
7647  }
7648  
7649  /*
7650   * Return either the result of bdrv_cow_child() or bdrv_filter_child(),
7651   * whichever is non-NULL.
7652   *
7653   * Return NULL if both are NULL.
7654   */
7655  BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs)
7656  {
7657      BdrvChild *cow_child = bdrv_cow_child(bs);
7658      BdrvChild *filter_child = bdrv_filter_child(bs);
7659  
7660      /* Filter nodes cannot have COW backing files */
7661      assert(!(cow_child && filter_child));
7662  
7663      return cow_child ?: filter_child;
7664  }
7665  
7666  /*
7667   * Return the primary child of this node: For filters, that is the
7668   * filtered child.  For other nodes, that is usually the child storing
7669   * metadata.
7670   * (A generally more helpful description is that this is (usually) the
7671   * child that has the same filename as @bs.)
7672   *
7673   * Drivers do not necessarily have a primary child; for example quorum
7674   * does not.
7675   */
7676  BdrvChild *bdrv_primary_child(BlockDriverState *bs)
7677  {
7678      BdrvChild *c, *found = NULL;
7679  
7680      QLIST_FOREACH(c, &bs->children, next) {
7681          if (c->role & BDRV_CHILD_PRIMARY) {
7682              assert(!found);
7683              found = c;
7684          }
7685      }
7686  
7687      return found;
7688  }
7689  
7690  static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs,
7691                                                bool stop_on_explicit_filter)
7692  {
7693      BdrvChild *c;
7694  
7695      if (!bs) {
7696          return NULL;
7697      }
7698  
7699      while (!(stop_on_explicit_filter && !bs->implicit)) {
7700          c = bdrv_filter_child(bs);
7701          if (!c) {
7702              /*
7703               * A filter that is embedded in a working block graph must
7704               * have a child.  Assert this here so this function does
7705               * not return a filter node that is not expected by the
7706               * caller.
7707               */
7708              assert(!bs->drv || !bs->drv->is_filter);
7709              break;
7710          }
7711          bs = c->bs;
7712      }
7713      /*
7714       * Note that this treats nodes with bs->drv == NULL as not being
7715       * filters (bs->drv == NULL should be replaced by something else
7716       * anyway).
7717       * The advantage of this behavior is that this function will thus
7718       * always return a non-NULL value (given a non-NULL @bs).
7719       */
7720  
7721      return bs;
7722  }
7723  
7724  /*
7725   * Return the first BDS that has not been added implicitly or that
7726   * does not have a filtered child down the chain starting from @bs
7727   * (including @bs itself).
7728   */
7729  BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs)
7730  {
7731      return bdrv_do_skip_filters(bs, true);
7732  }
7733  
7734  /*
7735   * Return the first BDS that does not have a filtered child down the
7736   * chain starting from @bs (including @bs itself).
7737   */
7738  BlockDriverState *bdrv_skip_filters(BlockDriverState *bs)
7739  {
7740      return bdrv_do_skip_filters(bs, false);
7741  }
7742  
7743  /*
7744   * For a backing chain, return the first non-filter backing image of
7745   * the first non-filter image.
7746   */
7747  BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs)
7748  {
7749      return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs)));
7750  }
7751  
7752  /**
7753   * Check whether [offset, offset + bytes) overlaps with the cached
7754   * block-status data region.
7755   *
7756   * If so, and @pnum is not NULL, set *pnum to `bsc.data_end - offset`,
7757   * which is what bdrv_bsc_is_data()'s interface needs.
7758   * Otherwise, *pnum is not touched.
7759   */
7760  static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs,
7761                                             int64_t offset, int64_t bytes,
7762                                             int64_t *pnum)
7763  {
7764      BdrvBlockStatusCache *bsc = qatomic_rcu_read(&bs->block_status_cache);
7765      bool overlaps;
7766  
7767      overlaps =
7768          qatomic_read(&bsc->valid) &&
7769          ranges_overlap(offset, bytes, bsc->data_start,
7770                         bsc->data_end - bsc->data_start);
7771  
7772      if (overlaps && pnum) {
7773          *pnum = bsc->data_end - offset;
7774      }
7775  
7776      return overlaps;
7777  }
7778  
7779  /**
7780   * See block_int.h for this function's documentation.
7781   */
7782  bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum)
7783  {
7784      RCU_READ_LOCK_GUARD();
7785  
7786      return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum);
7787  }
7788  
7789  /**
7790   * See block_int.h for this function's documentation.
7791   */
7792  void bdrv_bsc_invalidate_range(BlockDriverState *bs,
7793                                 int64_t offset, int64_t bytes)
7794  {
7795      RCU_READ_LOCK_GUARD();
7796  
7797      if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) {
7798          qatomic_set(&bs->block_status_cache->valid, false);
7799      }
7800  }
7801  
7802  /**
7803   * See block_int.h for this function's documentation.
7804   */
7805  void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes)
7806  {
7807      BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1);
7808      BdrvBlockStatusCache *old_bsc;
7809  
7810      *new_bsc = (BdrvBlockStatusCache) {
7811          .valid = true,
7812          .data_start = offset,
7813          .data_end = offset + bytes,
7814      };
7815  
7816      QEMU_LOCK_GUARD(&bs->bsc_modify_lock);
7817  
7818      old_bsc = qatomic_rcu_read(&bs->block_status_cache);
7819      qatomic_rcu_set(&bs->block_status_cache, new_bsc);
7820      if (old_bsc) {
7821          g_free_rcu(old_bsc, rcu);
7822      }
7823  }
7824