xref: /openbmc/qemu/block.c (revision 96291f13434e3f179744fec549ada90a9411fef0)
1  /*
2   * QEMU System Emulator block driver
3   *
4   * Copyright (c) 2003 Fabrice Bellard
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a copy
7   * of this software and associated documentation files (the "Software"), to deal
8   * in the Software without restriction, including without limitation the rights
9   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10   * copies of the Software, and to permit persons to whom the Software is
11   * furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22   * THE SOFTWARE.
23   */
24  
25  #include "qemu/osdep.h"
26  #include "block/trace.h"
27  #include "block/block_int.h"
28  #include "block/blockjob.h"
29  #include "block/fuse.h"
30  #include "block/nbd.h"
31  #include "block/qdict.h"
32  #include "qemu/error-report.h"
33  #include "block/module_block.h"
34  #include "qemu/main-loop.h"
35  #include "qemu/module.h"
36  #include "qapi/error.h"
37  #include "qapi/qmp/qdict.h"
38  #include "qapi/qmp/qjson.h"
39  #include "qapi/qmp/qnull.h"
40  #include "qapi/qmp/qstring.h"
41  #include "qapi/qobject-output-visitor.h"
42  #include "qapi/qapi-visit-block-core.h"
43  #include "sysemu/block-backend.h"
44  #include "sysemu/sysemu.h"
45  #include "qemu/notify.h"
46  #include "qemu/option.h"
47  #include "qemu/coroutine.h"
48  #include "block/qapi.h"
49  #include "qemu/timer.h"
50  #include "qemu/cutils.h"
51  #include "qemu/id.h"
52  #include "block/coroutines.h"
53  
54  #ifdef CONFIG_BSD
55  #include <sys/ioctl.h>
56  #include <sys/queue.h>
57  #ifndef __DragonFly__
58  #include <sys/disk.h>
59  #endif
60  #endif
61  
62  #ifdef _WIN32
63  #include <windows.h>
64  #endif
65  
66  #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
67  
68  static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
69      QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
70  
71  static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
72      QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
73  
74  static QLIST_HEAD(, BlockDriver) bdrv_drivers =
75      QLIST_HEAD_INITIALIZER(bdrv_drivers);
76  
77  static BlockDriverState *bdrv_open_inherit(const char *filename,
78                                             const char *reference,
79                                             QDict *options, int flags,
80                                             BlockDriverState *parent,
81                                             const BdrvChildClass *child_class,
82                                             BdrvChildRole child_role,
83                                             Error **errp);
84  
85  /* If non-zero, use only whitelisted block drivers */
86  static int use_bdrv_whitelist;
87  
88  #ifdef _WIN32
89  static int is_windows_drive_prefix(const char *filename)
90  {
91      return (((filename[0] >= 'a' && filename[0] <= 'z') ||
92               (filename[0] >= 'A' && filename[0] <= 'Z')) &&
93              filename[1] == ':');
94  }
95  
96  int is_windows_drive(const char *filename)
97  {
98      if (is_windows_drive_prefix(filename) &&
99          filename[2] == '\0')
100          return 1;
101      if (strstart(filename, "\\\\.\\", NULL) ||
102          strstart(filename, "//./", NULL))
103          return 1;
104      return 0;
105  }
106  #endif
107  
108  size_t bdrv_opt_mem_align(BlockDriverState *bs)
109  {
110      if (!bs || !bs->drv) {
111          /* page size or 4k (hdd sector size) should be on the safe side */
112          return MAX(4096, qemu_real_host_page_size);
113      }
114  
115      return bs->bl.opt_mem_alignment;
116  }
117  
118  size_t bdrv_min_mem_align(BlockDriverState *bs)
119  {
120      if (!bs || !bs->drv) {
121          /* page size or 4k (hdd sector size) should be on the safe side */
122          return MAX(4096, qemu_real_host_page_size);
123      }
124  
125      return bs->bl.min_mem_alignment;
126  }
127  
128  /* check if the path starts with "<protocol>:" */
129  int path_has_protocol(const char *path)
130  {
131      const char *p;
132  
133  #ifdef _WIN32
134      if (is_windows_drive(path) ||
135          is_windows_drive_prefix(path)) {
136          return 0;
137      }
138      p = path + strcspn(path, ":/\\");
139  #else
140      p = path + strcspn(path, ":/");
141  #endif
142  
143      return *p == ':';
144  }
145  
146  int path_is_absolute(const char *path)
147  {
148  #ifdef _WIN32
149      /* specific case for names like: "\\.\d:" */
150      if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
151          return 1;
152      }
153      return (*path == '/' || *path == '\\');
154  #else
155      return (*path == '/');
156  #endif
157  }
158  
159  /* if filename is absolute, just return its duplicate. Otherwise, build a
160     path to it by considering it is relative to base_path. URL are
161     supported. */
162  char *path_combine(const char *base_path, const char *filename)
163  {
164      const char *protocol_stripped = NULL;
165      const char *p, *p1;
166      char *result;
167      int len;
168  
169      if (path_is_absolute(filename)) {
170          return g_strdup(filename);
171      }
172  
173      if (path_has_protocol(base_path)) {
174          protocol_stripped = strchr(base_path, ':');
175          if (protocol_stripped) {
176              protocol_stripped++;
177          }
178      }
179      p = protocol_stripped ?: base_path;
180  
181      p1 = strrchr(base_path, '/');
182  #ifdef _WIN32
183      {
184          const char *p2;
185          p2 = strrchr(base_path, '\\');
186          if (!p1 || p2 > p1) {
187              p1 = p2;
188          }
189      }
190  #endif
191      if (p1) {
192          p1++;
193      } else {
194          p1 = base_path;
195      }
196      if (p1 > p) {
197          p = p1;
198      }
199      len = p - base_path;
200  
201      result = g_malloc(len + strlen(filename) + 1);
202      memcpy(result, base_path, len);
203      strcpy(result + len, filename);
204  
205      return result;
206  }
207  
208  /*
209   * Helper function for bdrv_parse_filename() implementations to remove optional
210   * protocol prefixes (especially "file:") from a filename and for putting the
211   * stripped filename into the options QDict if there is such a prefix.
212   */
213  void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
214                                        QDict *options)
215  {
216      if (strstart(filename, prefix, &filename)) {
217          /* Stripping the explicit protocol prefix may result in a protocol
218           * prefix being (wrongly) detected (if the filename contains a colon) */
219          if (path_has_protocol(filename)) {
220              GString *fat_filename;
221  
222              /* This means there is some colon before the first slash; therefore,
223               * this cannot be an absolute path */
224              assert(!path_is_absolute(filename));
225  
226              /* And we can thus fix the protocol detection issue by prefixing it
227               * by "./" */
228              fat_filename = g_string_new("./");
229              g_string_append(fat_filename, filename);
230  
231              assert(!path_has_protocol(fat_filename->str));
232  
233              qdict_put(options, "filename",
234                        qstring_from_gstring(fat_filename));
235          } else {
236              /* If no protocol prefix was detected, we can use the shortened
237               * filename as-is */
238              qdict_put_str(options, "filename", filename);
239          }
240      }
241  }
242  
243  
244  /* Returns whether the image file is opened as read-only. Note that this can
245   * return false and writing to the image file is still not possible because the
246   * image is inactivated. */
247  bool bdrv_is_read_only(BlockDriverState *bs)
248  {
249      return bs->read_only;
250  }
251  
252  int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
253                             bool ignore_allow_rdw, Error **errp)
254  {
255      /* Do not set read_only if copy_on_read is enabled */
256      if (bs->copy_on_read && read_only) {
257          error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
258                     bdrv_get_device_or_node_name(bs));
259          return -EINVAL;
260      }
261  
262      /* Do not clear read_only if it is prohibited */
263      if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) &&
264          !ignore_allow_rdw)
265      {
266          error_setg(errp, "Node '%s' is read only",
267                     bdrv_get_device_or_node_name(bs));
268          return -EPERM;
269      }
270  
271      return 0;
272  }
273  
274  /*
275   * Called by a driver that can only provide a read-only image.
276   *
277   * Returns 0 if the node is already read-only or it could switch the node to
278   * read-only because BDRV_O_AUTO_RDONLY is set.
279   *
280   * Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set
281   * or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg
282   * is not NULL, it is used as the error message for the Error object.
283   */
284  int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
285                                Error **errp)
286  {
287      int ret = 0;
288  
289      if (!(bs->open_flags & BDRV_O_RDWR)) {
290          return 0;
291      }
292      if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) {
293          goto fail;
294      }
295  
296      ret = bdrv_can_set_read_only(bs, true, false, NULL);
297      if (ret < 0) {
298          goto fail;
299      }
300  
301      bs->read_only = true;
302      bs->open_flags &= ~BDRV_O_RDWR;
303  
304      return 0;
305  
306  fail:
307      error_setg(errp, "%s", errmsg ?: "Image is read-only");
308      return -EACCES;
309  }
310  
311  /*
312   * If @backing is empty, this function returns NULL without setting
313   * @errp.  In all other cases, NULL will only be returned with @errp
314   * set.
315   *
316   * Therefore, a return value of NULL without @errp set means that
317   * there is no backing file; if @errp is set, there is one but its
318   * absolute filename cannot be generated.
319   */
320  char *bdrv_get_full_backing_filename_from_filename(const char *backed,
321                                                     const char *backing,
322                                                     Error **errp)
323  {
324      if (backing[0] == '\0') {
325          return NULL;
326      } else if (path_has_protocol(backing) || path_is_absolute(backing)) {
327          return g_strdup(backing);
328      } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
329          error_setg(errp, "Cannot use relative backing file names for '%s'",
330                     backed);
331          return NULL;
332      } else {
333          return path_combine(backed, backing);
334      }
335  }
336  
337  /*
338   * If @filename is empty or NULL, this function returns NULL without
339   * setting @errp.  In all other cases, NULL will only be returned with
340   * @errp set.
341   */
342  static char *bdrv_make_absolute_filename(BlockDriverState *relative_to,
343                                           const char *filename, Error **errp)
344  {
345      char *dir, *full_name;
346  
347      if (!filename || filename[0] == '\0') {
348          return NULL;
349      } else if (path_has_protocol(filename) || path_is_absolute(filename)) {
350          return g_strdup(filename);
351      }
352  
353      dir = bdrv_dirname(relative_to, errp);
354      if (!dir) {
355          return NULL;
356      }
357  
358      full_name = g_strconcat(dir, filename, NULL);
359      g_free(dir);
360      return full_name;
361  }
362  
363  char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp)
364  {
365      return bdrv_make_absolute_filename(bs, bs->backing_file, errp);
366  }
367  
368  void bdrv_register(BlockDriver *bdrv)
369  {
370      assert(bdrv->format_name);
371      QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
372  }
373  
374  BlockDriverState *bdrv_new(void)
375  {
376      BlockDriverState *bs;
377      int i;
378  
379      bs = g_new0(BlockDriverState, 1);
380      QLIST_INIT(&bs->dirty_bitmaps);
381      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
382          QLIST_INIT(&bs->op_blockers[i]);
383      }
384      notifier_with_return_list_init(&bs->before_write_notifiers);
385      qemu_co_mutex_init(&bs->reqs_lock);
386      qemu_mutex_init(&bs->dirty_bitmap_mutex);
387      bs->refcnt = 1;
388      bs->aio_context = qemu_get_aio_context();
389  
390      qemu_co_queue_init(&bs->flush_queue);
391  
392      for (i = 0; i < bdrv_drain_all_count; i++) {
393          bdrv_drained_begin(bs);
394      }
395  
396      QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
397  
398      return bs;
399  }
400  
401  static BlockDriver *bdrv_do_find_format(const char *format_name)
402  {
403      BlockDriver *drv1;
404  
405      QLIST_FOREACH(drv1, &bdrv_drivers, list) {
406          if (!strcmp(drv1->format_name, format_name)) {
407              return drv1;
408          }
409      }
410  
411      return NULL;
412  }
413  
414  BlockDriver *bdrv_find_format(const char *format_name)
415  {
416      BlockDriver *drv1;
417      int i;
418  
419      drv1 = bdrv_do_find_format(format_name);
420      if (drv1) {
421          return drv1;
422      }
423  
424      /* The driver isn't registered, maybe we need to load a module */
425      for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
426          if (!strcmp(block_driver_modules[i].format_name, format_name)) {
427              block_module_load_one(block_driver_modules[i].library_name);
428              break;
429          }
430      }
431  
432      return bdrv_do_find_format(format_name);
433  }
434  
435  static int bdrv_format_is_whitelisted(const char *format_name, bool read_only)
436  {
437      static const char *whitelist_rw[] = {
438          CONFIG_BDRV_RW_WHITELIST
439          NULL
440      };
441      static const char *whitelist_ro[] = {
442          CONFIG_BDRV_RO_WHITELIST
443          NULL
444      };
445      const char **p;
446  
447      if (!whitelist_rw[0] && !whitelist_ro[0]) {
448          return 1;               /* no whitelist, anything goes */
449      }
450  
451      for (p = whitelist_rw; *p; p++) {
452          if (!strcmp(format_name, *p)) {
453              return 1;
454          }
455      }
456      if (read_only) {
457          for (p = whitelist_ro; *p; p++) {
458              if (!strcmp(format_name, *p)) {
459                  return 1;
460              }
461          }
462      }
463      return 0;
464  }
465  
466  int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
467  {
468      return bdrv_format_is_whitelisted(drv->format_name, read_only);
469  }
470  
471  bool bdrv_uses_whitelist(void)
472  {
473      return use_bdrv_whitelist;
474  }
475  
476  typedef struct CreateCo {
477      BlockDriver *drv;
478      char *filename;
479      QemuOpts *opts;
480      int ret;
481      Error *err;
482  } CreateCo;
483  
484  static void coroutine_fn bdrv_create_co_entry(void *opaque)
485  {
486      Error *local_err = NULL;
487      int ret;
488  
489      CreateCo *cco = opaque;
490      assert(cco->drv);
491  
492      ret = cco->drv->bdrv_co_create_opts(cco->drv,
493                                          cco->filename, cco->opts, &local_err);
494      error_propagate(&cco->err, local_err);
495      cco->ret = ret;
496  }
497  
498  int bdrv_create(BlockDriver *drv, const char* filename,
499                  QemuOpts *opts, Error **errp)
500  {
501      int ret;
502  
503      Coroutine *co;
504      CreateCo cco = {
505          .drv = drv,
506          .filename = g_strdup(filename),
507          .opts = opts,
508          .ret = NOT_DONE,
509          .err = NULL,
510      };
511  
512      if (!drv->bdrv_co_create_opts) {
513          error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
514          ret = -ENOTSUP;
515          goto out;
516      }
517  
518      if (qemu_in_coroutine()) {
519          /* Fast-path if already in coroutine context */
520          bdrv_create_co_entry(&cco);
521      } else {
522          co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
523          qemu_coroutine_enter(co);
524          while (cco.ret == NOT_DONE) {
525              aio_poll(qemu_get_aio_context(), true);
526          }
527      }
528  
529      ret = cco.ret;
530      if (ret < 0) {
531          if (cco.err) {
532              error_propagate(errp, cco.err);
533          } else {
534              error_setg_errno(errp, -ret, "Could not create image");
535          }
536      }
537  
538  out:
539      g_free(cco.filename);
540      return ret;
541  }
542  
543  /**
544   * Helper function for bdrv_create_file_fallback(): Resize @blk to at
545   * least the given @minimum_size.
546   *
547   * On success, return @blk's actual length.
548   * Otherwise, return -errno.
549   */
550  static int64_t create_file_fallback_truncate(BlockBackend *blk,
551                                               int64_t minimum_size, Error **errp)
552  {
553      Error *local_err = NULL;
554      int64_t size;
555      int ret;
556  
557      ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
558                         &local_err);
559      if (ret < 0 && ret != -ENOTSUP) {
560          error_propagate(errp, local_err);
561          return ret;
562      }
563  
564      size = blk_getlength(blk);
565      if (size < 0) {
566          error_free(local_err);
567          error_setg_errno(errp, -size,
568                           "Failed to inquire the new image file's length");
569          return size;
570      }
571  
572      if (size < minimum_size) {
573          /* Need to grow the image, but we failed to do that */
574          error_propagate(errp, local_err);
575          return -ENOTSUP;
576      }
577  
578      error_free(local_err);
579      local_err = NULL;
580  
581      return size;
582  }
583  
584  /**
585   * Helper function for bdrv_create_file_fallback(): Zero the first
586   * sector to remove any potentially pre-existing image header.
587   */
588  static int create_file_fallback_zero_first_sector(BlockBackend *blk,
589                                                    int64_t current_size,
590                                                    Error **errp)
591  {
592      int64_t bytes_to_clear;
593      int ret;
594  
595      bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE);
596      if (bytes_to_clear) {
597          ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP);
598          if (ret < 0) {
599              error_setg_errno(errp, -ret,
600                               "Failed to clear the new image's first sector");
601              return ret;
602          }
603      }
604  
605      return 0;
606  }
607  
608  /**
609   * Simple implementation of bdrv_co_create_opts for protocol drivers
610   * which only support creation via opening a file
611   * (usually existing raw storage device)
612   */
613  int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
614                                              const char *filename,
615                                              QemuOpts *opts,
616                                              Error **errp)
617  {
618      BlockBackend *blk;
619      QDict *options;
620      int64_t size = 0;
621      char *buf = NULL;
622      PreallocMode prealloc;
623      Error *local_err = NULL;
624      int ret;
625  
626      size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
627      buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
628      prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
629                                 PREALLOC_MODE_OFF, &local_err);
630      g_free(buf);
631      if (local_err) {
632          error_propagate(errp, local_err);
633          return -EINVAL;
634      }
635  
636      if (prealloc != PREALLOC_MODE_OFF) {
637          error_setg(errp, "Unsupported preallocation mode '%s'",
638                     PreallocMode_str(prealloc));
639          return -ENOTSUP;
640      }
641  
642      options = qdict_new();
643      qdict_put_str(options, "driver", drv->format_name);
644  
645      blk = blk_new_open(filename, NULL, options,
646                         BDRV_O_RDWR | BDRV_O_RESIZE, errp);
647      if (!blk) {
648          error_prepend(errp, "Protocol driver '%s' does not support image "
649                        "creation, and opening the image failed: ",
650                        drv->format_name);
651          return -EINVAL;
652      }
653  
654      size = create_file_fallback_truncate(blk, size, errp);
655      if (size < 0) {
656          ret = size;
657          goto out;
658      }
659  
660      ret = create_file_fallback_zero_first_sector(blk, size, errp);
661      if (ret < 0) {
662          goto out;
663      }
664  
665      ret = 0;
666  out:
667      blk_unref(blk);
668      return ret;
669  }
670  
671  int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
672  {
673      BlockDriver *drv;
674  
675      drv = bdrv_find_protocol(filename, true, errp);
676      if (drv == NULL) {
677          return -ENOENT;
678      }
679  
680      return bdrv_create(drv, filename, opts, errp);
681  }
682  
683  int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
684  {
685      Error *local_err = NULL;
686      int ret;
687  
688      assert(bs != NULL);
689  
690      if (!bs->drv) {
691          error_setg(errp, "Block node '%s' is not opened", bs->filename);
692          return -ENOMEDIUM;
693      }
694  
695      if (!bs->drv->bdrv_co_delete_file) {
696          error_setg(errp, "Driver '%s' does not support image deletion",
697                     bs->drv->format_name);
698          return -ENOTSUP;
699      }
700  
701      ret = bs->drv->bdrv_co_delete_file(bs, &local_err);
702      if (ret < 0) {
703          error_propagate(errp, local_err);
704      }
705  
706      return ret;
707  }
708  
709  void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs)
710  {
711      Error *local_err = NULL;
712      int ret;
713  
714      if (!bs) {
715          return;
716      }
717  
718      ret = bdrv_co_delete_file(bs, &local_err);
719      /*
720       * ENOTSUP will happen if the block driver doesn't support
721       * the 'bdrv_co_delete_file' interface. This is a predictable
722       * scenario and shouldn't be reported back to the user.
723       */
724      if (ret == -ENOTSUP) {
725          error_free(local_err);
726      } else if (ret < 0) {
727          error_report_err(local_err);
728      }
729  }
730  
731  /**
732   * Try to get @bs's logical and physical block size.
733   * On success, store them in @bsz struct and return 0.
734   * On failure return -errno.
735   * @bs must not be empty.
736   */
737  int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
738  {
739      BlockDriver *drv = bs->drv;
740      BlockDriverState *filtered = bdrv_filter_bs(bs);
741  
742      if (drv && drv->bdrv_probe_blocksizes) {
743          return drv->bdrv_probe_blocksizes(bs, bsz);
744      } else if (filtered) {
745          return bdrv_probe_blocksizes(filtered, bsz);
746      }
747  
748      return -ENOTSUP;
749  }
750  
751  /**
752   * Try to get @bs's geometry (cyls, heads, sectors).
753   * On success, store them in @geo struct and return 0.
754   * On failure return -errno.
755   * @bs must not be empty.
756   */
757  int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
758  {
759      BlockDriver *drv = bs->drv;
760      BlockDriverState *filtered = bdrv_filter_bs(bs);
761  
762      if (drv && drv->bdrv_probe_geometry) {
763          return drv->bdrv_probe_geometry(bs, geo);
764      } else if (filtered) {
765          return bdrv_probe_geometry(filtered, geo);
766      }
767  
768      return -ENOTSUP;
769  }
770  
771  /*
772   * Create a uniquely-named empty temporary file.
773   * Return 0 upon success, otherwise a negative errno value.
774   */
775  int get_tmp_filename(char *filename, int size)
776  {
777  #ifdef _WIN32
778      char temp_dir[MAX_PATH];
779      /* GetTempFileName requires that its output buffer (4th param)
780         have length MAX_PATH or greater.  */
781      assert(size >= MAX_PATH);
782      return (GetTempPath(MAX_PATH, temp_dir)
783              && GetTempFileName(temp_dir, "qem", 0, filename)
784              ? 0 : -GetLastError());
785  #else
786      int fd;
787      const char *tmpdir;
788      tmpdir = getenv("TMPDIR");
789      if (!tmpdir) {
790          tmpdir = "/var/tmp";
791      }
792      if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
793          return -EOVERFLOW;
794      }
795      fd = mkstemp(filename);
796      if (fd < 0) {
797          return -errno;
798      }
799      if (close(fd) != 0) {
800          unlink(filename);
801          return -errno;
802      }
803      return 0;
804  #endif
805  }
806  
807  /*
808   * Detect host devices. By convention, /dev/cdrom[N] is always
809   * recognized as a host CDROM.
810   */
811  static BlockDriver *find_hdev_driver(const char *filename)
812  {
813      int score_max = 0, score;
814      BlockDriver *drv = NULL, *d;
815  
816      QLIST_FOREACH(d, &bdrv_drivers, list) {
817          if (d->bdrv_probe_device) {
818              score = d->bdrv_probe_device(filename);
819              if (score > score_max) {
820                  score_max = score;
821                  drv = d;
822              }
823          }
824      }
825  
826      return drv;
827  }
828  
829  static BlockDriver *bdrv_do_find_protocol(const char *protocol)
830  {
831      BlockDriver *drv1;
832  
833      QLIST_FOREACH(drv1, &bdrv_drivers, list) {
834          if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
835              return drv1;
836          }
837      }
838  
839      return NULL;
840  }
841  
842  BlockDriver *bdrv_find_protocol(const char *filename,
843                                  bool allow_protocol_prefix,
844                                  Error **errp)
845  {
846      BlockDriver *drv1;
847      char protocol[128];
848      int len;
849      const char *p;
850      int i;
851  
852      /* TODO Drivers without bdrv_file_open must be specified explicitly */
853  
854      /*
855       * XXX(hch): we really should not let host device detection
856       * override an explicit protocol specification, but moving this
857       * later breaks access to device names with colons in them.
858       * Thanks to the brain-dead persistent naming schemes on udev-
859       * based Linux systems those actually are quite common.
860       */
861      drv1 = find_hdev_driver(filename);
862      if (drv1) {
863          return drv1;
864      }
865  
866      if (!path_has_protocol(filename) || !allow_protocol_prefix) {
867          return &bdrv_file;
868      }
869  
870      p = strchr(filename, ':');
871      assert(p != NULL);
872      len = p - filename;
873      if (len > sizeof(protocol) - 1)
874          len = sizeof(protocol) - 1;
875      memcpy(protocol, filename, len);
876      protocol[len] = '\0';
877  
878      drv1 = bdrv_do_find_protocol(protocol);
879      if (drv1) {
880          return drv1;
881      }
882  
883      for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
884          if (block_driver_modules[i].protocol_name &&
885              !strcmp(block_driver_modules[i].protocol_name, protocol)) {
886              block_module_load_one(block_driver_modules[i].library_name);
887              break;
888          }
889      }
890  
891      drv1 = bdrv_do_find_protocol(protocol);
892      if (!drv1) {
893          error_setg(errp, "Unknown protocol '%s'", protocol);
894      }
895      return drv1;
896  }
897  
898  /*
899   * Guess image format by probing its contents.
900   * This is not a good idea when your image is raw (CVE-2008-2004), but
901   * we do it anyway for backward compatibility.
902   *
903   * @buf         contains the image's first @buf_size bytes.
904   * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
905   *              but can be smaller if the image file is smaller)
906   * @filename    is its filename.
907   *
908   * For all block drivers, call the bdrv_probe() method to get its
909   * probing score.
910   * Return the first block driver with the highest probing score.
911   */
912  BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
913                              const char *filename)
914  {
915      int score_max = 0, score;
916      BlockDriver *drv = NULL, *d;
917  
918      QLIST_FOREACH(d, &bdrv_drivers, list) {
919          if (d->bdrv_probe) {
920              score = d->bdrv_probe(buf, buf_size, filename);
921              if (score > score_max) {
922                  score_max = score;
923                  drv = d;
924              }
925          }
926      }
927  
928      return drv;
929  }
930  
931  static int find_image_format(BlockBackend *file, const char *filename,
932                               BlockDriver **pdrv, Error **errp)
933  {
934      BlockDriver *drv;
935      uint8_t buf[BLOCK_PROBE_BUF_SIZE];
936      int ret = 0;
937  
938      /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
939      if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
940          *pdrv = &bdrv_raw;
941          return ret;
942      }
943  
944      ret = blk_pread(file, 0, buf, sizeof(buf));
945      if (ret < 0) {
946          error_setg_errno(errp, -ret, "Could not read image for determining its "
947                           "format");
948          *pdrv = NULL;
949          return ret;
950      }
951  
952      drv = bdrv_probe_all(buf, ret, filename);
953      if (!drv) {
954          error_setg(errp, "Could not determine image format: No compatible "
955                     "driver found");
956          ret = -ENOENT;
957      }
958      *pdrv = drv;
959      return ret;
960  }
961  
962  /**
963   * Set the current 'total_sectors' value
964   * Return 0 on success, -errno on error.
965   */
966  int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
967  {
968      BlockDriver *drv = bs->drv;
969  
970      if (!drv) {
971          return -ENOMEDIUM;
972      }
973  
974      /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
975      if (bdrv_is_sg(bs))
976          return 0;
977  
978      /* query actual device if possible, otherwise just trust the hint */
979      if (drv->bdrv_getlength) {
980          int64_t length = drv->bdrv_getlength(bs);
981          if (length < 0) {
982              return length;
983          }
984          hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
985      }
986  
987      bs->total_sectors = hint;
988  
989      if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) {
990          return -EFBIG;
991      }
992  
993      return 0;
994  }
995  
996  /**
997   * Combines a QDict of new block driver @options with any missing options taken
998   * from @old_options, so that leaving out an option defaults to its old value.
999   */
1000  static void bdrv_join_options(BlockDriverState *bs, QDict *options,
1001                                QDict *old_options)
1002  {
1003      if (bs->drv && bs->drv->bdrv_join_options) {
1004          bs->drv->bdrv_join_options(options, old_options);
1005      } else {
1006          qdict_join(options, old_options, false);
1007      }
1008  }
1009  
1010  static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts,
1011                                                              int open_flags,
1012                                                              Error **errp)
1013  {
1014      Error *local_err = NULL;
1015      char *value = qemu_opt_get_del(opts, "detect-zeroes");
1016      BlockdevDetectZeroesOptions detect_zeroes =
1017          qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value,
1018                          BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err);
1019      g_free(value);
1020      if (local_err) {
1021          error_propagate(errp, local_err);
1022          return detect_zeroes;
1023      }
1024  
1025      if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
1026          !(open_flags & BDRV_O_UNMAP))
1027      {
1028          error_setg(errp, "setting detect-zeroes to unmap is not allowed "
1029                     "without setting discard operation to unmap");
1030      }
1031  
1032      return detect_zeroes;
1033  }
1034  
1035  /**
1036   * Set open flags for aio engine
1037   *
1038   * Return 0 on success, -1 if the engine specified is invalid
1039   */
1040  int bdrv_parse_aio(const char *mode, int *flags)
1041  {
1042      if (!strcmp(mode, "threads")) {
1043          /* do nothing, default */
1044      } else if (!strcmp(mode, "native")) {
1045          *flags |= BDRV_O_NATIVE_AIO;
1046  #ifdef CONFIG_LINUX_IO_URING
1047      } else if (!strcmp(mode, "io_uring")) {
1048          *flags |= BDRV_O_IO_URING;
1049  #endif
1050      } else {
1051          return -1;
1052      }
1053  
1054      return 0;
1055  }
1056  
1057  /**
1058   * Set open flags for a given discard mode
1059   *
1060   * Return 0 on success, -1 if the discard mode was invalid.
1061   */
1062  int bdrv_parse_discard_flags(const char *mode, int *flags)
1063  {
1064      *flags &= ~BDRV_O_UNMAP;
1065  
1066      if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
1067          /* do nothing */
1068      } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
1069          *flags |= BDRV_O_UNMAP;
1070      } else {
1071          return -1;
1072      }
1073  
1074      return 0;
1075  }
1076  
1077  /**
1078   * Set open flags for a given cache mode
1079   *
1080   * Return 0 on success, -1 if the cache mode was invalid.
1081   */
1082  int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
1083  {
1084      *flags &= ~BDRV_O_CACHE_MASK;
1085  
1086      if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
1087          *writethrough = false;
1088          *flags |= BDRV_O_NOCACHE;
1089      } else if (!strcmp(mode, "directsync")) {
1090          *writethrough = true;
1091          *flags |= BDRV_O_NOCACHE;
1092      } else if (!strcmp(mode, "writeback")) {
1093          *writethrough = false;
1094      } else if (!strcmp(mode, "unsafe")) {
1095          *writethrough = false;
1096          *flags |= BDRV_O_NO_FLUSH;
1097      } else if (!strcmp(mode, "writethrough")) {
1098          *writethrough = true;
1099      } else {
1100          return -1;
1101      }
1102  
1103      return 0;
1104  }
1105  
1106  static char *bdrv_child_get_parent_desc(BdrvChild *c)
1107  {
1108      BlockDriverState *parent = c->opaque;
1109      return g_strdup(bdrv_get_device_or_node_name(parent));
1110  }
1111  
1112  static void bdrv_child_cb_drained_begin(BdrvChild *child)
1113  {
1114      BlockDriverState *bs = child->opaque;
1115      bdrv_do_drained_begin_quiesce(bs, NULL, false);
1116  }
1117  
1118  static bool bdrv_child_cb_drained_poll(BdrvChild *child)
1119  {
1120      BlockDriverState *bs = child->opaque;
1121      return bdrv_drain_poll(bs, false, NULL, false);
1122  }
1123  
1124  static void bdrv_child_cb_drained_end(BdrvChild *child,
1125                                        int *drained_end_counter)
1126  {
1127      BlockDriverState *bs = child->opaque;
1128      bdrv_drained_end_no_poll(bs, drained_end_counter);
1129  }
1130  
1131  static int bdrv_child_cb_inactivate(BdrvChild *child)
1132  {
1133      BlockDriverState *bs = child->opaque;
1134      assert(bs->open_flags & BDRV_O_INACTIVE);
1135      return 0;
1136  }
1137  
1138  static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
1139                                            GSList **ignore, Error **errp)
1140  {
1141      BlockDriverState *bs = child->opaque;
1142      return bdrv_can_set_aio_context(bs, ctx, ignore, errp);
1143  }
1144  
1145  static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx,
1146                                        GSList **ignore)
1147  {
1148      BlockDriverState *bs = child->opaque;
1149      return bdrv_set_aio_context_ignore(bs, ctx, ignore);
1150  }
1151  
1152  /*
1153   * Returns the options and flags that a temporary snapshot should get, based on
1154   * the originally requested flags (the originally requested image will have
1155   * flags like a backing file)
1156   */
1157  static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
1158                                         int parent_flags, QDict *parent_options)
1159  {
1160      *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
1161  
1162      /* For temporary files, unconditional cache=unsafe is fine */
1163      qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
1164      qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
1165  
1166      /* Copy the read-only and discard options from the parent */
1167      qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
1168      qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD);
1169  
1170      /* aio=native doesn't work for cache.direct=off, so disable it for the
1171       * temporary snapshot */
1172      *child_flags &= ~BDRV_O_NATIVE_AIO;
1173  }
1174  
1175  static void bdrv_backing_attach(BdrvChild *c)
1176  {
1177      BlockDriverState *parent = c->opaque;
1178      BlockDriverState *backing_hd = c->bs;
1179  
1180      assert(!parent->backing_blocker);
1181      error_setg(&parent->backing_blocker,
1182                 "node is used as backing hd of '%s'",
1183                 bdrv_get_device_or_node_name(parent));
1184  
1185      bdrv_refresh_filename(backing_hd);
1186  
1187      parent->open_flags &= ~BDRV_O_NO_BACKING;
1188  
1189      bdrv_op_block_all(backing_hd, parent->backing_blocker);
1190      /* Otherwise we won't be able to commit or stream */
1191      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1192                      parent->backing_blocker);
1193      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
1194                      parent->backing_blocker);
1195      /*
1196       * We do backup in 3 ways:
1197       * 1. drive backup
1198       *    The target bs is new opened, and the source is top BDS
1199       * 2. blockdev backup
1200       *    Both the source and the target are top BDSes.
1201       * 3. internal backup(used for block replication)
1202       *    Both the source and the target are backing file
1203       *
1204       * In case 1 and 2, neither the source nor the target is the backing file.
1205       * In case 3, we will block the top BDS, so there is only one block job
1206       * for the top BDS and its backing chain.
1207       */
1208      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
1209                      parent->backing_blocker);
1210      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
1211                      parent->backing_blocker);
1212  }
1213  
1214  static void bdrv_backing_detach(BdrvChild *c)
1215  {
1216      BlockDriverState *parent = c->opaque;
1217  
1218      assert(parent->backing_blocker);
1219      bdrv_op_unblock_all(c->bs, parent->backing_blocker);
1220      error_free(parent->backing_blocker);
1221      parent->backing_blocker = NULL;
1222  }
1223  
1224  static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
1225                                          const char *filename, Error **errp)
1226  {
1227      BlockDriverState *parent = c->opaque;
1228      bool read_only = bdrv_is_read_only(parent);
1229      int ret;
1230  
1231      if (read_only) {
1232          ret = bdrv_reopen_set_read_only(parent, false, errp);
1233          if (ret < 0) {
1234              return ret;
1235          }
1236      }
1237  
1238      ret = bdrv_change_backing_file(parent, filename,
1239                                     base->drv ? base->drv->format_name : "",
1240                                     false);
1241      if (ret < 0) {
1242          error_setg_errno(errp, -ret, "Could not update backing file link");
1243      }
1244  
1245      if (read_only) {
1246          bdrv_reopen_set_read_only(parent, true, NULL);
1247      }
1248  
1249      return ret;
1250  }
1251  
1252  /*
1253   * Returns the options and flags that a generic child of a BDS should
1254   * get, based on the given options and flags for the parent BDS.
1255   */
1256  static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
1257                                     int *child_flags, QDict *child_options,
1258                                     int parent_flags, QDict *parent_options)
1259  {
1260      int flags = parent_flags;
1261  
1262      /*
1263       * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL.
1264       * Generally, the question to answer is: Should this child be
1265       * format-probed by default?
1266       */
1267  
1268      /*
1269       * Pure and non-filtered data children of non-format nodes should
1270       * be probed by default (even when the node itself has BDRV_O_PROTOCOL
1271       * set).  This only affects a very limited set of drivers (namely
1272       * quorum and blkverify when this comment was written).
1273       * Force-clear BDRV_O_PROTOCOL then.
1274       */
1275      if (!parent_is_format &&
1276          (role & BDRV_CHILD_DATA) &&
1277          !(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED)))
1278      {
1279          flags &= ~BDRV_O_PROTOCOL;
1280      }
1281  
1282      /*
1283       * All children of format nodes (except for COW children) and all
1284       * metadata children in general should never be format-probed.
1285       * Force-set BDRV_O_PROTOCOL then.
1286       */
1287      if ((parent_is_format && !(role & BDRV_CHILD_COW)) ||
1288          (role & BDRV_CHILD_METADATA))
1289      {
1290          flags |= BDRV_O_PROTOCOL;
1291      }
1292  
1293      /*
1294       * If the cache mode isn't explicitly set, inherit direct and no-flush from
1295       * the parent.
1296       */
1297      qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
1298      qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
1299      qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);
1300  
1301      if (role & BDRV_CHILD_COW) {
1302          /* backing files are opened read-only by default */
1303          qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on");
1304          qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off");
1305      } else {
1306          /* Inherit the read-only option from the parent if it's not set */
1307          qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
1308          qdict_copy_default(child_options, parent_options,
1309                             BDRV_OPT_AUTO_READ_ONLY);
1310      }
1311  
1312      /*
1313       * bdrv_co_pdiscard() respects unmap policy for the parent, so we
1314       * can default to enable it on lower layers regardless of the
1315       * parent option.
1316       */
1317      qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap");
1318  
1319      /* Clear flags that only apply to the top layer */
1320      flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
1321  
1322      if (role & BDRV_CHILD_METADATA) {
1323          flags &= ~BDRV_O_NO_IO;
1324      }
1325      if (role & BDRV_CHILD_COW) {
1326          flags &= ~BDRV_O_TEMPORARY;
1327      }
1328  
1329      *child_flags = flags;
1330  }
1331  
1332  static void bdrv_child_cb_attach(BdrvChild *child)
1333  {
1334      BlockDriverState *bs = child->opaque;
1335  
1336      if (child->role & BDRV_CHILD_COW) {
1337          bdrv_backing_attach(child);
1338      }
1339  
1340      bdrv_apply_subtree_drain(child, bs);
1341  }
1342  
1343  static void bdrv_child_cb_detach(BdrvChild *child)
1344  {
1345      BlockDriverState *bs = child->opaque;
1346  
1347      if (child->role & BDRV_CHILD_COW) {
1348          bdrv_backing_detach(child);
1349      }
1350  
1351      bdrv_unapply_subtree_drain(child, bs);
1352  }
1353  
1354  static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
1355                                           const char *filename, Error **errp)
1356  {
1357      if (c->role & BDRV_CHILD_COW) {
1358          return bdrv_backing_update_filename(c, base, filename, errp);
1359      }
1360      return 0;
1361  }
1362  
1363  const BdrvChildClass child_of_bds = {
1364      .parent_is_bds   = true,
1365      .get_parent_desc = bdrv_child_get_parent_desc,
1366      .inherit_options = bdrv_inherited_options,
1367      .drained_begin   = bdrv_child_cb_drained_begin,
1368      .drained_poll    = bdrv_child_cb_drained_poll,
1369      .drained_end     = bdrv_child_cb_drained_end,
1370      .attach          = bdrv_child_cb_attach,
1371      .detach          = bdrv_child_cb_detach,
1372      .inactivate      = bdrv_child_cb_inactivate,
1373      .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx,
1374      .set_aio_ctx     = bdrv_child_cb_set_aio_ctx,
1375      .update_filename = bdrv_child_cb_update_filename,
1376  };
1377  
1378  static int bdrv_open_flags(BlockDriverState *bs, int flags)
1379  {
1380      int open_flags = flags;
1381  
1382      /*
1383       * Clear flags that are internal to the block layer before opening the
1384       * image.
1385       */
1386      open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
1387  
1388      return open_flags;
1389  }
1390  
1391  static void update_flags_from_options(int *flags, QemuOpts *opts)
1392  {
1393      *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY);
1394  
1395      if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
1396          *flags |= BDRV_O_NO_FLUSH;
1397      }
1398  
1399      if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) {
1400          *flags |= BDRV_O_NOCACHE;
1401      }
1402  
1403      if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) {
1404          *flags |= BDRV_O_RDWR;
1405      }
1406  
1407      if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) {
1408          *flags |= BDRV_O_AUTO_RDONLY;
1409      }
1410  }
1411  
1412  static void update_options_from_flags(QDict *options, int flags)
1413  {
1414      if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
1415          qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
1416      }
1417      if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
1418          qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH,
1419                         flags & BDRV_O_NO_FLUSH);
1420      }
1421      if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) {
1422          qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR));
1423      }
1424      if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) {
1425          qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY,
1426                         flags & BDRV_O_AUTO_RDONLY);
1427      }
1428  }
1429  
1430  static void bdrv_assign_node_name(BlockDriverState *bs,
1431                                    const char *node_name,
1432                                    Error **errp)
1433  {
1434      char *gen_node_name = NULL;
1435  
1436      if (!node_name) {
1437          node_name = gen_node_name = id_generate(ID_BLOCK);
1438      } else if (!id_wellformed(node_name)) {
1439          /*
1440           * Check for empty string or invalid characters, but not if it is
1441           * generated (generated names use characters not available to the user)
1442           */
1443          error_setg(errp, "Invalid node name");
1444          return;
1445      }
1446  
1447      /* takes care of avoiding namespaces collisions */
1448      if (blk_by_name(node_name)) {
1449          error_setg(errp, "node-name=%s is conflicting with a device id",
1450                     node_name);
1451          goto out;
1452      }
1453  
1454      /* takes care of avoiding duplicates node names */
1455      if (bdrv_find_node(node_name)) {
1456          error_setg(errp, "Duplicate node name");
1457          goto out;
1458      }
1459  
1460      /* Make sure that the node name isn't truncated */
1461      if (strlen(node_name) >= sizeof(bs->node_name)) {
1462          error_setg(errp, "Node name too long");
1463          goto out;
1464      }
1465  
1466      /* copy node name into the bs and insert it into the graph list */
1467      pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
1468      QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
1469  out:
1470      g_free(gen_node_name);
1471  }
1472  
1473  static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
1474                              const char *node_name, QDict *options,
1475                              int open_flags, Error **errp)
1476  {
1477      Error *local_err = NULL;
1478      int i, ret;
1479  
1480      bdrv_assign_node_name(bs, node_name, &local_err);
1481      if (local_err) {
1482          error_propagate(errp, local_err);
1483          return -EINVAL;
1484      }
1485  
1486      bs->drv = drv;
1487      bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
1488      bs->opaque = g_malloc0(drv->instance_size);
1489  
1490      if (drv->bdrv_file_open) {
1491          assert(!drv->bdrv_needs_filename || bs->filename[0]);
1492          ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1493      } else if (drv->bdrv_open) {
1494          ret = drv->bdrv_open(bs, options, open_flags, &local_err);
1495      } else {
1496          ret = 0;
1497      }
1498  
1499      if (ret < 0) {
1500          if (local_err) {
1501              error_propagate(errp, local_err);
1502          } else if (bs->filename[0]) {
1503              error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1504          } else {
1505              error_setg_errno(errp, -ret, "Could not open image");
1506          }
1507          goto open_failed;
1508      }
1509  
1510      ret = refresh_total_sectors(bs, bs->total_sectors);
1511      if (ret < 0) {
1512          error_setg_errno(errp, -ret, "Could not refresh total sector count");
1513          return ret;
1514      }
1515  
1516      bdrv_refresh_limits(bs, &local_err);
1517      if (local_err) {
1518          error_propagate(errp, local_err);
1519          return -EINVAL;
1520      }
1521  
1522      assert(bdrv_opt_mem_align(bs) != 0);
1523      assert(bdrv_min_mem_align(bs) != 0);
1524      assert(is_power_of_2(bs->bl.request_alignment));
1525  
1526      for (i = 0; i < bs->quiesce_counter; i++) {
1527          if (drv->bdrv_co_drain_begin) {
1528              drv->bdrv_co_drain_begin(bs);
1529          }
1530      }
1531  
1532      return 0;
1533  open_failed:
1534      bs->drv = NULL;
1535      if (bs->file != NULL) {
1536          bdrv_unref_child(bs, bs->file);
1537          bs->file = NULL;
1538      }
1539      g_free(bs->opaque);
1540      bs->opaque = NULL;
1541      return ret;
1542  }
1543  
1544  BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
1545                                         int flags, Error **errp)
1546  {
1547      BlockDriverState *bs;
1548      int ret;
1549  
1550      bs = bdrv_new();
1551      bs->open_flags = flags;
1552      bs->explicit_options = qdict_new();
1553      bs->options = qdict_new();
1554      bs->opaque = NULL;
1555  
1556      update_options_from_flags(bs->options, flags);
1557  
1558      ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp);
1559      if (ret < 0) {
1560          qobject_unref(bs->explicit_options);
1561          bs->explicit_options = NULL;
1562          qobject_unref(bs->options);
1563          bs->options = NULL;
1564          bdrv_unref(bs);
1565          return NULL;
1566      }
1567  
1568      return bs;
1569  }
1570  
1571  QemuOptsList bdrv_runtime_opts = {
1572      .name = "bdrv_common",
1573      .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
1574      .desc = {
1575          {
1576              .name = "node-name",
1577              .type = QEMU_OPT_STRING,
1578              .help = "Node name of the block device node",
1579          },
1580          {
1581              .name = "driver",
1582              .type = QEMU_OPT_STRING,
1583              .help = "Block driver to use for the node",
1584          },
1585          {
1586              .name = BDRV_OPT_CACHE_DIRECT,
1587              .type = QEMU_OPT_BOOL,
1588              .help = "Bypass software writeback cache on the host",
1589          },
1590          {
1591              .name = BDRV_OPT_CACHE_NO_FLUSH,
1592              .type = QEMU_OPT_BOOL,
1593              .help = "Ignore flush requests",
1594          },
1595          {
1596              .name = BDRV_OPT_READ_ONLY,
1597              .type = QEMU_OPT_BOOL,
1598              .help = "Node is opened in read-only mode",
1599          },
1600          {
1601              .name = BDRV_OPT_AUTO_READ_ONLY,
1602              .type = QEMU_OPT_BOOL,
1603              .help = "Node can become read-only if opening read-write fails",
1604          },
1605          {
1606              .name = "detect-zeroes",
1607              .type = QEMU_OPT_STRING,
1608              .help = "try to optimize zero writes (off, on, unmap)",
1609          },
1610          {
1611              .name = BDRV_OPT_DISCARD,
1612              .type = QEMU_OPT_STRING,
1613              .help = "discard operation (ignore/off, unmap/on)",
1614          },
1615          {
1616              .name = BDRV_OPT_FORCE_SHARE,
1617              .type = QEMU_OPT_BOOL,
1618              .help = "always accept other writers (default: off)",
1619          },
1620          { /* end of list */ }
1621      },
1622  };
1623  
1624  QemuOptsList bdrv_create_opts_simple = {
1625      .name = "simple-create-opts",
1626      .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head),
1627      .desc = {
1628          {
1629              .name = BLOCK_OPT_SIZE,
1630              .type = QEMU_OPT_SIZE,
1631              .help = "Virtual disk size"
1632          },
1633          {
1634              .name = BLOCK_OPT_PREALLOC,
1635              .type = QEMU_OPT_STRING,
1636              .help = "Preallocation mode (allowed values: off)"
1637          },
1638          { /* end of list */ }
1639      }
1640  };
1641  
1642  /*
1643   * Common part for opening disk images and files
1644   *
1645   * Removes all processed options from *options.
1646   */
1647  static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
1648                              QDict *options, Error **errp)
1649  {
1650      int ret, open_flags;
1651      const char *filename;
1652      const char *driver_name = NULL;
1653      const char *node_name = NULL;
1654      const char *discard;
1655      QemuOpts *opts;
1656      BlockDriver *drv;
1657      Error *local_err = NULL;
1658  
1659      assert(bs->file == NULL);
1660      assert(options != NULL && bs->options != options);
1661  
1662      opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1663      if (!qemu_opts_absorb_qdict(opts, options, errp)) {
1664          ret = -EINVAL;
1665          goto fail_opts;
1666      }
1667  
1668      update_flags_from_options(&bs->open_flags, opts);
1669  
1670      driver_name = qemu_opt_get(opts, "driver");
1671      drv = bdrv_find_format(driver_name);
1672      assert(drv != NULL);
1673  
1674      bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false);
1675  
1676      if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) {
1677          error_setg(errp,
1678                     BDRV_OPT_FORCE_SHARE
1679                     "=on can only be used with read-only images");
1680          ret = -EINVAL;
1681          goto fail_opts;
1682      }
1683  
1684      if (file != NULL) {
1685          bdrv_refresh_filename(blk_bs(file));
1686          filename = blk_bs(file)->filename;
1687      } else {
1688          /*
1689           * Caution: while qdict_get_try_str() is fine, getting
1690           * non-string types would require more care.  When @options
1691           * come from -blockdev or blockdev_add, its members are typed
1692           * according to the QAPI schema, but when they come from
1693           * -drive, they're all QString.
1694           */
1695          filename = qdict_get_try_str(options, "filename");
1696      }
1697  
1698      if (drv->bdrv_needs_filename && (!filename || !filename[0])) {
1699          error_setg(errp, "The '%s' block driver requires a file name",
1700                     drv->format_name);
1701          ret = -EINVAL;
1702          goto fail_opts;
1703      }
1704  
1705      trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
1706                             drv->format_name);
1707  
1708      bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
1709  
1710      if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
1711          if (!bs->read_only && bdrv_is_whitelisted(drv, true)) {
1712              ret = bdrv_apply_auto_read_only(bs, NULL, NULL);
1713          } else {
1714              ret = -ENOTSUP;
1715          }
1716          if (ret < 0) {
1717              error_setg(errp,
1718                         !bs->read_only && bdrv_is_whitelisted(drv, true)
1719                         ? "Driver '%s' can only be used for read-only devices"
1720                         : "Driver '%s' is not whitelisted",
1721                         drv->format_name);
1722              goto fail_opts;
1723          }
1724      }
1725  
1726      /* bdrv_new() and bdrv_close() make it so */
1727      assert(qatomic_read(&bs->copy_on_read) == 0);
1728  
1729      if (bs->open_flags & BDRV_O_COPY_ON_READ) {
1730          if (!bs->read_only) {
1731              bdrv_enable_copy_on_read(bs);
1732          } else {
1733              error_setg(errp, "Can't use copy-on-read on read-only device");
1734              ret = -EINVAL;
1735              goto fail_opts;
1736          }
1737      }
1738  
1739      discard = qemu_opt_get(opts, BDRV_OPT_DISCARD);
1740      if (discard != NULL) {
1741          if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) {
1742              error_setg(errp, "Invalid discard option");
1743              ret = -EINVAL;
1744              goto fail_opts;
1745          }
1746      }
1747  
1748      bs->detect_zeroes =
1749          bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err);
1750      if (local_err) {
1751          error_propagate(errp, local_err);
1752          ret = -EINVAL;
1753          goto fail_opts;
1754      }
1755  
1756      if (filename != NULL) {
1757          pstrcpy(bs->filename, sizeof(bs->filename), filename);
1758      } else {
1759          bs->filename[0] = '\0';
1760      }
1761      pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
1762  
1763      /* Open the image, either directly or using a protocol */
1764      open_flags = bdrv_open_flags(bs, bs->open_flags);
1765      node_name = qemu_opt_get(opts, "node-name");
1766  
1767      assert(!drv->bdrv_file_open || file == NULL);
1768      ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp);
1769      if (ret < 0) {
1770          goto fail_opts;
1771      }
1772  
1773      qemu_opts_del(opts);
1774      return 0;
1775  
1776  fail_opts:
1777      qemu_opts_del(opts);
1778      return ret;
1779  }
1780  
1781  static QDict *parse_json_filename(const char *filename, Error **errp)
1782  {
1783      QObject *options_obj;
1784      QDict *options;
1785      int ret;
1786  
1787      ret = strstart(filename, "json:", &filename);
1788      assert(ret);
1789  
1790      options_obj = qobject_from_json(filename, errp);
1791      if (!options_obj) {
1792          error_prepend(errp, "Could not parse the JSON options: ");
1793          return NULL;
1794      }
1795  
1796      options = qobject_to(QDict, options_obj);
1797      if (!options) {
1798          qobject_unref(options_obj);
1799          error_setg(errp, "Invalid JSON object given");
1800          return NULL;
1801      }
1802  
1803      qdict_flatten(options);
1804  
1805      return options;
1806  }
1807  
1808  static void parse_json_protocol(QDict *options, const char **pfilename,
1809                                  Error **errp)
1810  {
1811      QDict *json_options;
1812      Error *local_err = NULL;
1813  
1814      /* Parse json: pseudo-protocol */
1815      if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1816          return;
1817      }
1818  
1819      json_options = parse_json_filename(*pfilename, &local_err);
1820      if (local_err) {
1821          error_propagate(errp, local_err);
1822          return;
1823      }
1824  
1825      /* Options given in the filename have lower priority than options
1826       * specified directly */
1827      qdict_join(options, json_options, false);
1828      qobject_unref(json_options);
1829      *pfilename = NULL;
1830  }
1831  
1832  /*
1833   * Fills in default options for opening images and converts the legacy
1834   * filename/flags pair to option QDict entries.
1835   * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1836   * block driver has been specified explicitly.
1837   */
1838  static int bdrv_fill_options(QDict **options, const char *filename,
1839                               int *flags, Error **errp)
1840  {
1841      const char *drvname;
1842      bool protocol = *flags & BDRV_O_PROTOCOL;
1843      bool parse_filename = false;
1844      BlockDriver *drv = NULL;
1845      Error *local_err = NULL;
1846  
1847      /*
1848       * Caution: while qdict_get_try_str() is fine, getting non-string
1849       * types would require more care.  When @options come from
1850       * -blockdev or blockdev_add, its members are typed according to
1851       * the QAPI schema, but when they come from -drive, they're all
1852       * QString.
1853       */
1854      drvname = qdict_get_try_str(*options, "driver");
1855      if (drvname) {
1856          drv = bdrv_find_format(drvname);
1857          if (!drv) {
1858              error_setg(errp, "Unknown driver '%s'", drvname);
1859              return -ENOENT;
1860          }
1861          /* If the user has explicitly specified the driver, this choice should
1862           * override the BDRV_O_PROTOCOL flag */
1863          protocol = drv->bdrv_file_open;
1864      }
1865  
1866      if (protocol) {
1867          *flags |= BDRV_O_PROTOCOL;
1868      } else {
1869          *flags &= ~BDRV_O_PROTOCOL;
1870      }
1871  
1872      /* Translate cache options from flags into options */
1873      update_options_from_flags(*options, *flags);
1874  
1875      /* Fetch the file name from the options QDict if necessary */
1876      if (protocol && filename) {
1877          if (!qdict_haskey(*options, "filename")) {
1878              qdict_put_str(*options, "filename", filename);
1879              parse_filename = true;
1880          } else {
1881              error_setg(errp, "Can't specify 'file' and 'filename' options at "
1882                               "the same time");
1883              return -EINVAL;
1884          }
1885      }
1886  
1887      /* Find the right block driver */
1888      /* See cautionary note on accessing @options above */
1889      filename = qdict_get_try_str(*options, "filename");
1890  
1891      if (!drvname && protocol) {
1892          if (filename) {
1893              drv = bdrv_find_protocol(filename, parse_filename, errp);
1894              if (!drv) {
1895                  return -EINVAL;
1896              }
1897  
1898              drvname = drv->format_name;
1899              qdict_put_str(*options, "driver", drvname);
1900          } else {
1901              error_setg(errp, "Must specify either driver or file");
1902              return -EINVAL;
1903          }
1904      }
1905  
1906      assert(drv || !protocol);
1907  
1908      /* Driver-specific filename parsing */
1909      if (drv && drv->bdrv_parse_filename && parse_filename) {
1910          drv->bdrv_parse_filename(filename, *options, &local_err);
1911          if (local_err) {
1912              error_propagate(errp, local_err);
1913              return -EINVAL;
1914          }
1915  
1916          if (!drv->bdrv_needs_filename) {
1917              qdict_del(*options, "filename");
1918          }
1919      }
1920  
1921      return 0;
1922  }
1923  
1924  static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q,
1925                                   uint64_t perm, uint64_t shared,
1926                                   GSList *ignore_children, Error **errp);
1927  static void bdrv_child_abort_perm_update(BdrvChild *c);
1928  static void bdrv_child_set_perm(BdrvChild *c);
1929  
1930  typedef struct BlockReopenQueueEntry {
1931       bool prepared;
1932       bool perms_checked;
1933       BDRVReopenState state;
1934       QTAILQ_ENTRY(BlockReopenQueueEntry) entry;
1935  } BlockReopenQueueEntry;
1936  
1937  /*
1938   * Return the flags that @bs will have after the reopens in @q have
1939   * successfully completed. If @q is NULL (or @bs is not contained in @q),
1940   * return the current flags.
1941   */
1942  static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs)
1943  {
1944      BlockReopenQueueEntry *entry;
1945  
1946      if (q != NULL) {
1947          QTAILQ_FOREACH(entry, q, entry) {
1948              if (entry->state.bs == bs) {
1949                  return entry->state.flags;
1950              }
1951          }
1952      }
1953  
1954      return bs->open_flags;
1955  }
1956  
1957  /* Returns whether the image file can be written to after the reopen queue @q
1958   * has been successfully applied, or right now if @q is NULL. */
1959  static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
1960                                            BlockReopenQueue *q)
1961  {
1962      int flags = bdrv_reopen_get_flags(q, bs);
1963  
1964      return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR;
1965  }
1966  
1967  /*
1968   * Return whether the BDS can be written to.  This is not necessarily
1969   * the same as !bdrv_is_read_only(bs), as inactivated images may not
1970   * be written to but do not count as read-only images.
1971   */
1972  bool bdrv_is_writable(BlockDriverState *bs)
1973  {
1974      return bdrv_is_writable_after_reopen(bs, NULL);
1975  }
1976  
1977  static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
1978                              BdrvChild *c, BdrvChildRole role,
1979                              BlockReopenQueue *reopen_queue,
1980                              uint64_t parent_perm, uint64_t parent_shared,
1981                              uint64_t *nperm, uint64_t *nshared)
1982  {
1983      assert(bs->drv && bs->drv->bdrv_child_perm);
1984      bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
1985                               parent_perm, parent_shared,
1986                               nperm, nshared);
1987      /* TODO Take force_share from reopen_queue */
1988      if (child_bs && child_bs->force_share) {
1989          *nshared = BLK_PERM_ALL;
1990      }
1991  }
1992  
1993  /*
1994   * Check whether permissions on this node can be changed in a way that
1995   * @cumulative_perms and @cumulative_shared_perms are the new cumulative
1996   * permissions of all its parents. This involves checking whether all necessary
1997   * permission changes to child nodes can be performed.
1998   *
1999   * A call to this function must always be followed by a call to bdrv_set_perm()
2000   * or bdrv_abort_perm_update().
2001   */
2002  static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
2003                             uint64_t cumulative_perms,
2004                             uint64_t cumulative_shared_perms,
2005                             GSList *ignore_children, Error **errp)
2006  {
2007      BlockDriver *drv = bs->drv;
2008      BdrvChild *c;
2009      int ret;
2010  
2011      /* Write permissions never work with read-only images */
2012      if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
2013          !bdrv_is_writable_after_reopen(bs, q))
2014      {
2015          if (!bdrv_is_writable_after_reopen(bs, NULL)) {
2016              error_setg(errp, "Block node is read-only");
2017          } else {
2018              uint64_t current_perms, current_shared;
2019              bdrv_get_cumulative_perm(bs, &current_perms, &current_shared);
2020              if (current_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
2021                  error_setg(errp, "Cannot make block node read-only, there is "
2022                             "a writer on it");
2023              } else {
2024                  error_setg(errp, "Cannot make block node read-only and create "
2025                             "a writer on it");
2026              }
2027          }
2028  
2029          return -EPERM;
2030      }
2031  
2032      /*
2033       * Unaligned requests will automatically be aligned to bl.request_alignment
2034       * and without RESIZE we can't extend requests to write to space beyond the
2035       * end of the image, so it's required that the image size is aligned.
2036       */
2037      if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
2038          !(cumulative_perms & BLK_PERM_RESIZE))
2039      {
2040          if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) {
2041              error_setg(errp, "Cannot get 'write' permission without 'resize': "
2042                               "Image size is not a multiple of request "
2043                               "alignment");
2044              return -EPERM;
2045          }
2046      }
2047  
2048      /* Check this node */
2049      if (!drv) {
2050          return 0;
2051      }
2052  
2053      if (drv->bdrv_check_perm) {
2054          ret = drv->bdrv_check_perm(bs, cumulative_perms,
2055                                     cumulative_shared_perms, errp);
2056          if (ret < 0) {
2057              return ret;
2058          }
2059      }
2060  
2061      /* Drivers that never have children can omit .bdrv_child_perm() */
2062      if (!drv->bdrv_child_perm) {
2063          assert(QLIST_EMPTY(&bs->children));
2064          return 0;
2065      }
2066  
2067      /* Check all children */
2068      QLIST_FOREACH(c, &bs->children, next) {
2069          uint64_t cur_perm, cur_shared;
2070  
2071          bdrv_child_perm(bs, c->bs, c, c->role, q,
2072                          cumulative_perms, cumulative_shared_perms,
2073                          &cur_perm, &cur_shared);
2074          ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, ignore_children,
2075                                      errp);
2076          if (ret < 0) {
2077              return ret;
2078          }
2079      }
2080  
2081      return 0;
2082  }
2083  
2084  /*
2085   * Notifies drivers that after a previous bdrv_check_perm() call, the
2086   * permission update is not performed and any preparations made for it (e.g.
2087   * taken file locks) need to be undone.
2088   *
2089   * This function recursively notifies all child nodes.
2090   */
2091  static void bdrv_abort_perm_update(BlockDriverState *bs)
2092  {
2093      BlockDriver *drv = bs->drv;
2094      BdrvChild *c;
2095  
2096      if (!drv) {
2097          return;
2098      }
2099  
2100      if (drv->bdrv_abort_perm_update) {
2101          drv->bdrv_abort_perm_update(bs);
2102      }
2103  
2104      QLIST_FOREACH(c, &bs->children, next) {
2105          bdrv_child_abort_perm_update(c);
2106      }
2107  }
2108  
2109  static void bdrv_set_perm(BlockDriverState *bs)
2110  {
2111      uint64_t cumulative_perms, cumulative_shared_perms;
2112      BlockDriver *drv = bs->drv;
2113      BdrvChild *c;
2114  
2115      if (!drv) {
2116          return;
2117      }
2118  
2119      bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
2120  
2121      /* Update this node */
2122      if (drv->bdrv_set_perm) {
2123          drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
2124      }
2125  
2126      /* Drivers that never have children can omit .bdrv_child_perm() */
2127      if (!drv->bdrv_child_perm) {
2128          assert(QLIST_EMPTY(&bs->children));
2129          return;
2130      }
2131  
2132      /* Update all children */
2133      QLIST_FOREACH(c, &bs->children, next) {
2134          bdrv_child_set_perm(c);
2135      }
2136  }
2137  
2138  void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
2139                                uint64_t *shared_perm)
2140  {
2141      BdrvChild *c;
2142      uint64_t cumulative_perms = 0;
2143      uint64_t cumulative_shared_perms = BLK_PERM_ALL;
2144  
2145      QLIST_FOREACH(c, &bs->parents, next_parent) {
2146          cumulative_perms |= c->perm;
2147          cumulative_shared_perms &= c->shared_perm;
2148      }
2149  
2150      *perm = cumulative_perms;
2151      *shared_perm = cumulative_shared_perms;
2152  }
2153  
2154  static char *bdrv_child_user_desc(BdrvChild *c)
2155  {
2156      if (c->klass->get_parent_desc) {
2157          return c->klass->get_parent_desc(c);
2158      }
2159  
2160      return g_strdup("another user");
2161  }
2162  
2163  char *bdrv_perm_names(uint64_t perm)
2164  {
2165      struct perm_name {
2166          uint64_t perm;
2167          const char *name;
2168      } permissions[] = {
2169          { BLK_PERM_CONSISTENT_READ, "consistent read" },
2170          { BLK_PERM_WRITE,           "write" },
2171          { BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
2172          { BLK_PERM_RESIZE,          "resize" },
2173          { BLK_PERM_GRAPH_MOD,       "change children" },
2174          { 0, NULL }
2175      };
2176  
2177      GString *result = g_string_sized_new(30);
2178      struct perm_name *p;
2179  
2180      for (p = permissions; p->name; p++) {
2181          if (perm & p->perm) {
2182              if (result->len > 0) {
2183                  g_string_append(result, ", ");
2184              }
2185              g_string_append(result, p->name);
2186          }
2187      }
2188  
2189      return g_string_free(result, FALSE);
2190  }
2191  
2192  /*
2193   * Checks whether a new reference to @bs can be added if the new user requires
2194   * @new_used_perm/@new_shared_perm as its permissions. If @ignore_children is
2195   * set, the BdrvChild objects in this list are ignored in the calculations;
2196   * this allows checking permission updates for an existing reference.
2197   *
2198   * Needs to be followed by a call to either bdrv_set_perm() or
2199   * bdrv_abort_perm_update(). */
2200  static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q,
2201                                    uint64_t new_used_perm,
2202                                    uint64_t new_shared_perm,
2203                                    GSList *ignore_children,
2204                                    Error **errp)
2205  {
2206      BdrvChild *c;
2207      uint64_t cumulative_perms = new_used_perm;
2208      uint64_t cumulative_shared_perms = new_shared_perm;
2209  
2210  
2211      /* There is no reason why anyone couldn't tolerate write_unchanged */
2212      assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED);
2213  
2214      QLIST_FOREACH(c, &bs->parents, next_parent) {
2215          if (g_slist_find(ignore_children, c)) {
2216              continue;
2217          }
2218  
2219          if ((new_used_perm & c->shared_perm) != new_used_perm) {
2220              char *user = bdrv_child_user_desc(c);
2221              char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm);
2222  
2223              error_setg(errp, "Conflicts with use by %s as '%s', which does not "
2224                               "allow '%s' on %s",
2225                         user, c->name, perm_names, bdrv_get_node_name(c->bs));
2226              g_free(user);
2227              g_free(perm_names);
2228              return -EPERM;
2229          }
2230  
2231          if ((c->perm & new_shared_perm) != c->perm) {
2232              char *user = bdrv_child_user_desc(c);
2233              char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm);
2234  
2235              error_setg(errp, "Conflicts with use by %s as '%s', which uses "
2236                               "'%s' on %s",
2237                         user, c->name, perm_names, bdrv_get_node_name(c->bs));
2238              g_free(user);
2239              g_free(perm_names);
2240              return -EPERM;
2241          }
2242  
2243          cumulative_perms |= c->perm;
2244          cumulative_shared_perms &= c->shared_perm;
2245      }
2246  
2247      return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms,
2248                             ignore_children, errp);
2249  }
2250  
2251  /* Needs to be followed by a call to either bdrv_child_set_perm() or
2252   * bdrv_child_abort_perm_update(). */
2253  static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q,
2254                                   uint64_t perm, uint64_t shared,
2255                                   GSList *ignore_children, Error **errp)
2256  {
2257      int ret;
2258  
2259      ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c);
2260      ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children, errp);
2261      g_slist_free(ignore_children);
2262  
2263      if (ret < 0) {
2264          return ret;
2265      }
2266  
2267      if (!c->has_backup_perm) {
2268          c->has_backup_perm = true;
2269          c->backup_perm = c->perm;
2270          c->backup_shared_perm = c->shared_perm;
2271      }
2272      /*
2273       * Note: it's OK if c->has_backup_perm was already set, as we can find the
2274       * same child twice during check_perm procedure
2275       */
2276  
2277      c->perm = perm;
2278      c->shared_perm = shared;
2279  
2280      return 0;
2281  }
2282  
2283  static void bdrv_child_set_perm(BdrvChild *c)
2284  {
2285      c->has_backup_perm = false;
2286  
2287      bdrv_set_perm(c->bs);
2288  }
2289  
2290  static void bdrv_child_abort_perm_update(BdrvChild *c)
2291  {
2292      if (c->has_backup_perm) {
2293          c->perm = c->backup_perm;
2294          c->shared_perm = c->backup_shared_perm;
2295          c->has_backup_perm = false;
2296      }
2297  
2298      bdrv_abort_perm_update(c->bs);
2299  }
2300  
2301  static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp)
2302  {
2303      int ret;
2304      uint64_t perm, shared_perm;
2305  
2306      bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
2307      ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, errp);
2308      if (ret < 0) {
2309          bdrv_abort_perm_update(bs);
2310          return ret;
2311      }
2312      bdrv_set_perm(bs);
2313  
2314      return 0;
2315  }
2316  
2317  int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
2318                              Error **errp)
2319  {
2320      Error *local_err = NULL;
2321      int ret;
2322  
2323      ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL, &local_err);
2324      if (ret < 0) {
2325          bdrv_child_abort_perm_update(c);
2326          if ((perm & ~c->perm) || (c->shared_perm & ~shared)) {
2327              /* tighten permissions */
2328              error_propagate(errp, local_err);
2329          } else {
2330              /*
2331               * Our caller may intend to only loosen restrictions and
2332               * does not expect this function to fail.  Errors are not
2333               * fatal in such a case, so we can just hide them from our
2334               * caller.
2335               */
2336              error_free(local_err);
2337              ret = 0;
2338          }
2339          return ret;
2340      }
2341  
2342      bdrv_child_set_perm(c);
2343  
2344      return 0;
2345  }
2346  
2347  int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
2348  {
2349      uint64_t parent_perms, parent_shared;
2350      uint64_t perms, shared;
2351  
2352      bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared);
2353      bdrv_child_perm(bs, c->bs, c, c->role, NULL,
2354                      parent_perms, parent_shared, &perms, &shared);
2355  
2356      return bdrv_child_try_set_perm(c, perms, shared, errp);
2357  }
2358  
2359  /*
2360   * Default implementation for .bdrv_child_perm() for block filters:
2361   * Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the
2362   * filtered child.
2363   */
2364  static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
2365                                        BdrvChildRole role,
2366                                        BlockReopenQueue *reopen_queue,
2367                                        uint64_t perm, uint64_t shared,
2368                                        uint64_t *nperm, uint64_t *nshared)
2369  {
2370      *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
2371      *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
2372  }
2373  
2374  static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c,
2375                                         BdrvChildRole role,
2376                                         BlockReopenQueue *reopen_queue,
2377                                         uint64_t perm, uint64_t shared,
2378                                         uint64_t *nperm, uint64_t *nshared)
2379  {
2380      assert(role & BDRV_CHILD_COW);
2381  
2382      /*
2383       * We want consistent read from backing files if the parent needs it.
2384       * No other operations are performed on backing files.
2385       */
2386      perm &= BLK_PERM_CONSISTENT_READ;
2387  
2388      /*
2389       * If the parent can deal with changing data, we're okay with a
2390       * writable and resizable backing file.
2391       * TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too?
2392       */
2393      if (shared & BLK_PERM_WRITE) {
2394          shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
2395      } else {
2396          shared = 0;
2397      }
2398  
2399      shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD |
2400                BLK_PERM_WRITE_UNCHANGED;
2401  
2402      if (bs->open_flags & BDRV_O_INACTIVE) {
2403          shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2404      }
2405  
2406      *nperm = perm;
2407      *nshared = shared;
2408  }
2409  
2410  static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c,
2411                                             BdrvChildRole role,
2412                                             BlockReopenQueue *reopen_queue,
2413                                             uint64_t perm, uint64_t shared,
2414                                             uint64_t *nperm, uint64_t *nshared)
2415  {
2416      int flags;
2417  
2418      assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA));
2419  
2420      flags = bdrv_reopen_get_flags(reopen_queue, bs);
2421  
2422      /*
2423       * Apart from the modifications below, the same permissions are
2424       * forwarded and left alone as for filters
2425       */
2426      bdrv_filter_default_perms(bs, c, role, reopen_queue,
2427                                perm, shared, &perm, &shared);
2428  
2429      if (role & BDRV_CHILD_METADATA) {
2430          /* Format drivers may touch metadata even if the guest doesn't write */
2431          if (bdrv_is_writable_after_reopen(bs, reopen_queue)) {
2432              perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2433          }
2434  
2435          /*
2436           * bs->file always needs to be consistent because of the
2437           * metadata. We can never allow other users to resize or write
2438           * to it.
2439           */
2440          if (!(flags & BDRV_O_NO_IO)) {
2441              perm |= BLK_PERM_CONSISTENT_READ;
2442          }
2443          shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
2444      }
2445  
2446      if (role & BDRV_CHILD_DATA) {
2447          /*
2448           * Technically, everything in this block is a subset of the
2449           * BDRV_CHILD_METADATA path taken above, and so this could
2450           * be an "else if" branch.  However, that is not obvious, and
2451           * this function is not performance critical, therefore we let
2452           * this be an independent "if".
2453           */
2454  
2455          /*
2456           * We cannot allow other users to resize the file because the
2457           * format driver might have some assumptions about the size
2458           * (e.g. because it is stored in metadata, or because the file
2459           * is split into fixed-size data files).
2460           */
2461          shared &= ~BLK_PERM_RESIZE;
2462  
2463          /*
2464           * WRITE_UNCHANGED often cannot be performed as such on the
2465           * data file.  For example, the qcow2 driver may still need to
2466           * write copied clusters on copy-on-read.
2467           */
2468          if (perm & BLK_PERM_WRITE_UNCHANGED) {
2469              perm |= BLK_PERM_WRITE;
2470          }
2471  
2472          /*
2473           * If the data file is written to, the format driver may
2474           * expect to be able to resize it by writing beyond the EOF.
2475           */
2476          if (perm & BLK_PERM_WRITE) {
2477              perm |= BLK_PERM_RESIZE;
2478          }
2479      }
2480  
2481      if (bs->open_flags & BDRV_O_INACTIVE) {
2482          shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2483      }
2484  
2485      *nperm = perm;
2486      *nshared = shared;
2487  }
2488  
2489  void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
2490                          BdrvChildRole role, BlockReopenQueue *reopen_queue,
2491                          uint64_t perm, uint64_t shared,
2492                          uint64_t *nperm, uint64_t *nshared)
2493  {
2494      if (role & BDRV_CHILD_FILTERED) {
2495          assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
2496                           BDRV_CHILD_COW)));
2497          bdrv_filter_default_perms(bs, c, role, reopen_queue,
2498                                    perm, shared, nperm, nshared);
2499      } else if (role & BDRV_CHILD_COW) {
2500          assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA)));
2501          bdrv_default_perms_for_cow(bs, c, role, reopen_queue,
2502                                     perm, shared, nperm, nshared);
2503      } else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) {
2504          bdrv_default_perms_for_storage(bs, c, role, reopen_queue,
2505                                         perm, shared, nperm, nshared);
2506      } else {
2507          g_assert_not_reached();
2508      }
2509  }
2510  
2511  uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
2512  {
2513      static const uint64_t permissions[] = {
2514          [BLOCK_PERMISSION_CONSISTENT_READ]  = BLK_PERM_CONSISTENT_READ,
2515          [BLOCK_PERMISSION_WRITE]            = BLK_PERM_WRITE,
2516          [BLOCK_PERMISSION_WRITE_UNCHANGED]  = BLK_PERM_WRITE_UNCHANGED,
2517          [BLOCK_PERMISSION_RESIZE]           = BLK_PERM_RESIZE,
2518          [BLOCK_PERMISSION_GRAPH_MOD]        = BLK_PERM_GRAPH_MOD,
2519      };
2520  
2521      QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX);
2522      QEMU_BUILD_BUG_ON(1UL << ARRAY_SIZE(permissions) != BLK_PERM_ALL + 1);
2523  
2524      assert(qapi_perm < BLOCK_PERMISSION__MAX);
2525  
2526      return permissions[qapi_perm];
2527  }
2528  
2529  static void bdrv_replace_child_noperm(BdrvChild *child,
2530                                        BlockDriverState *new_bs)
2531  {
2532      BlockDriverState *old_bs = child->bs;
2533      int new_bs_quiesce_counter;
2534      int drain_saldo;
2535  
2536      assert(!child->frozen);
2537  
2538      if (old_bs && new_bs) {
2539          assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
2540      }
2541  
2542      new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
2543      drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
2544  
2545      /*
2546       * If the new child node is drained but the old one was not, flush
2547       * all outstanding requests to the old child node.
2548       */
2549      while (drain_saldo > 0 && child->klass->drained_begin) {
2550          bdrv_parent_drained_begin_single(child, true);
2551          drain_saldo--;
2552      }
2553  
2554      if (old_bs) {
2555          /* Detach first so that the recursive drain sections coming from @child
2556           * are already gone and we only end the drain sections that came from
2557           * elsewhere. */
2558          if (child->klass->detach) {
2559              child->klass->detach(child);
2560          }
2561          QLIST_REMOVE(child, next_parent);
2562      }
2563  
2564      child->bs = new_bs;
2565  
2566      if (new_bs) {
2567          QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
2568  
2569          /*
2570           * Detaching the old node may have led to the new node's
2571           * quiesce_counter having been decreased.  Not a problem, we
2572           * just need to recognize this here and then invoke
2573           * drained_end appropriately more often.
2574           */
2575          assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
2576          drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
2577  
2578          /* Attach only after starting new drained sections, so that recursive
2579           * drain sections coming from @child don't get an extra .drained_begin
2580           * callback. */
2581          if (child->klass->attach) {
2582              child->klass->attach(child);
2583          }
2584      }
2585  
2586      /*
2587       * If the old child node was drained but the new one is not, allow
2588       * requests to come in only after the new node has been attached.
2589       */
2590      while (drain_saldo < 0 && child->klass->drained_end) {
2591          bdrv_parent_drained_end_single(child);
2592          drain_saldo++;
2593      }
2594  }
2595  
2596  /*
2597   * Updates @child to change its reference to point to @new_bs, including
2598   * checking and applying the necessary permission updates both to the old node
2599   * and to @new_bs.
2600   *
2601   * NULL is passed as @new_bs for removing the reference before freeing @child.
2602   *
2603   * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this
2604   * function uses bdrv_set_perm() to update the permissions according to the new
2605   * reference that @new_bs gets.
2606   *
2607   * Callers must ensure that child->frozen is false.
2608   */
2609  static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
2610  {
2611      BlockDriverState *old_bs = child->bs;
2612  
2613      /* Asserts that child->frozen == false */
2614      bdrv_replace_child_noperm(child, new_bs);
2615  
2616      /*
2617       * Start with the new node's permissions.  If @new_bs is a (direct
2618       * or indirect) child of @old_bs, we must complete the permission
2619       * update on @new_bs before we loosen the restrictions on @old_bs.
2620       * Otherwise, bdrv_check_perm() on @old_bs would re-initiate
2621       * updating the permissions of @new_bs, and thus not purely loosen
2622       * restrictions.
2623       */
2624      if (new_bs) {
2625          bdrv_set_perm(new_bs);
2626      }
2627  
2628      if (old_bs) {
2629          /*
2630           * Update permissions for old node. We're just taking a parent away, so
2631           * we're loosening restrictions. Errors of permission update are not
2632           * fatal in this case, ignore them.
2633           */
2634          bdrv_refresh_perms(old_bs, NULL);
2635  
2636          /* When the parent requiring a non-default AioContext is removed, the
2637           * node moves back to the main AioContext */
2638          bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL);
2639      }
2640  }
2641  
2642  /*
2643   * This function steals the reference to child_bs from the caller.
2644   * That reference is later dropped by bdrv_root_unref_child().
2645   *
2646   * On failure NULL is returned, errp is set and the reference to
2647   * child_bs is also dropped.
2648   *
2649   * The caller must hold the AioContext lock @child_bs, but not that of @ctx
2650   * (unless @child_bs is already in @ctx).
2651   */
2652  BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
2653                                    const char *child_name,
2654                                    const BdrvChildClass *child_class,
2655                                    BdrvChildRole child_role,
2656                                    AioContext *ctx,
2657                                    uint64_t perm, uint64_t shared_perm,
2658                                    void *opaque, Error **errp)
2659  {
2660      BdrvChild *child;
2661      Error *local_err = NULL;
2662      int ret;
2663  
2664      ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp);
2665      if (ret < 0) {
2666          bdrv_abort_perm_update(child_bs);
2667          bdrv_unref(child_bs);
2668          return NULL;
2669      }
2670  
2671      child = g_new(BdrvChild, 1);
2672      *child = (BdrvChild) {
2673          .bs             = NULL,
2674          .name           = g_strdup(child_name),
2675          .klass          = child_class,
2676          .role           = child_role,
2677          .perm           = perm,
2678          .shared_perm    = shared_perm,
2679          .opaque         = opaque,
2680      };
2681  
2682      /* If the AioContexts don't match, first try to move the subtree of
2683       * child_bs into the AioContext of the new parent. If this doesn't work,
2684       * try moving the parent into the AioContext of child_bs instead. */
2685      if (bdrv_get_aio_context(child_bs) != ctx) {
2686          ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err);
2687          if (ret < 0 && child_class->can_set_aio_ctx) {
2688              GSList *ignore = g_slist_prepend(NULL, child);
2689              ctx = bdrv_get_aio_context(child_bs);
2690              if (child_class->can_set_aio_ctx(child, ctx, &ignore, NULL)) {
2691                  error_free(local_err);
2692                  ret = 0;
2693                  g_slist_free(ignore);
2694                  ignore = g_slist_prepend(NULL, child);
2695                  child_class->set_aio_ctx(child, ctx, &ignore);
2696              }
2697              g_slist_free(ignore);
2698          }
2699          if (ret < 0) {
2700              error_propagate(errp, local_err);
2701              g_free(child);
2702              bdrv_abort_perm_update(child_bs);
2703              bdrv_unref(child_bs);
2704              return NULL;
2705          }
2706      }
2707  
2708      /* This performs the matching bdrv_set_perm() for the above check. */
2709      bdrv_replace_child(child, child_bs);
2710  
2711      return child;
2712  }
2713  
2714  /*
2715   * This function transfers the reference to child_bs from the caller
2716   * to parent_bs. That reference is later dropped by parent_bs on
2717   * bdrv_close() or if someone calls bdrv_unref_child().
2718   *
2719   * On failure NULL is returned, errp is set and the reference to
2720   * child_bs is also dropped.
2721   *
2722   * If @parent_bs and @child_bs are in different AioContexts, the caller must
2723   * hold the AioContext lock for @child_bs, but not for @parent_bs.
2724   */
2725  BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
2726                               BlockDriverState *child_bs,
2727                               const char *child_name,
2728                               const BdrvChildClass *child_class,
2729                               BdrvChildRole child_role,
2730                               Error **errp)
2731  {
2732      BdrvChild *child;
2733      uint64_t perm, shared_perm;
2734  
2735      bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
2736  
2737      assert(parent_bs->drv);
2738      bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
2739                      perm, shared_perm, &perm, &shared_perm);
2740  
2741      child = bdrv_root_attach_child(child_bs, child_name, child_class,
2742                                     child_role, bdrv_get_aio_context(parent_bs),
2743                                     perm, shared_perm, parent_bs, errp);
2744      if (child == NULL) {
2745          return NULL;
2746      }
2747  
2748      QLIST_INSERT_HEAD(&parent_bs->children, child, next);
2749      return child;
2750  }
2751  
2752  static void bdrv_detach_child(BdrvChild *child)
2753  {
2754      QLIST_SAFE_REMOVE(child, next);
2755  
2756      bdrv_replace_child(child, NULL);
2757  
2758      g_free(child->name);
2759      g_free(child);
2760  }
2761  
2762  /* Callers must ensure that child->frozen is false. */
2763  void bdrv_root_unref_child(BdrvChild *child)
2764  {
2765      BlockDriverState *child_bs;
2766  
2767      child_bs = child->bs;
2768      bdrv_detach_child(child);
2769      bdrv_unref(child_bs);
2770  }
2771  
2772  /**
2773   * Clear all inherits_from pointers from children and grandchildren of
2774   * @root that point to @root, where necessary.
2775   */
2776  static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child)
2777  {
2778      BdrvChild *c;
2779  
2780      if (child->bs->inherits_from == root) {
2781          /*
2782           * Remove inherits_from only when the last reference between root and
2783           * child->bs goes away.
2784           */
2785          QLIST_FOREACH(c, &root->children, next) {
2786              if (c != child && c->bs == child->bs) {
2787                  break;
2788              }
2789          }
2790          if (c == NULL) {
2791              child->bs->inherits_from = NULL;
2792          }
2793      }
2794  
2795      QLIST_FOREACH(c, &child->bs->children, next) {
2796          bdrv_unset_inherits_from(root, c);
2797      }
2798  }
2799  
2800  /* Callers must ensure that child->frozen is false. */
2801  void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
2802  {
2803      if (child == NULL) {
2804          return;
2805      }
2806  
2807      bdrv_unset_inherits_from(parent, child);
2808      bdrv_root_unref_child(child);
2809  }
2810  
2811  
2812  static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
2813  {
2814      BdrvChild *c;
2815      QLIST_FOREACH(c, &bs->parents, next_parent) {
2816          if (c->klass->change_media) {
2817              c->klass->change_media(c, load);
2818          }
2819      }
2820  }
2821  
2822  /* Return true if you can reach parent going through child->inherits_from
2823   * recursively. If parent or child are NULL, return false */
2824  static bool bdrv_inherits_from_recursive(BlockDriverState *child,
2825                                           BlockDriverState *parent)
2826  {
2827      while (child && child != parent) {
2828          child = child->inherits_from;
2829      }
2830  
2831      return child != NULL;
2832  }
2833  
2834  /*
2835   * Return the BdrvChildRole for @bs's backing child.  bs->backing is
2836   * mostly used for COW backing children (role = COW), but also for
2837   * filtered children (role = FILTERED | PRIMARY).
2838   */
2839  static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
2840  {
2841      if (bs->drv && bs->drv->is_filter) {
2842          return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
2843      } else {
2844          return BDRV_CHILD_COW;
2845      }
2846  }
2847  
2848  /*
2849   * Sets the bs->backing link of a BDS. A new reference is created; callers
2850   * which don't need their own reference any more must call bdrv_unref().
2851   */
2852  int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
2853                          Error **errp)
2854  {
2855      int ret = 0;
2856      bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) &&
2857          bdrv_inherits_from_recursive(backing_hd, bs);
2858  
2859      if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) {
2860          return -EPERM;
2861      }
2862  
2863      if (backing_hd) {
2864          bdrv_ref(backing_hd);
2865      }
2866  
2867      if (bs->backing) {
2868          /* Cannot be frozen, we checked that above */
2869          bdrv_unref_child(bs, bs->backing);
2870          bs->backing = NULL;
2871      }
2872  
2873      if (!backing_hd) {
2874          goto out;
2875      }
2876  
2877      bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_of_bds,
2878                                      bdrv_backing_role(bs), errp);
2879      if (!bs->backing) {
2880          ret = -EPERM;
2881          goto out;
2882      }
2883  
2884      /* If backing_hd was already part of bs's backing chain, and
2885       * inherits_from pointed recursively to bs then let's update it to
2886       * point directly to bs (else it will become NULL). */
2887      if (update_inherits_from) {
2888          backing_hd->inherits_from = bs;
2889      }
2890  
2891  out:
2892      bdrv_refresh_limits(bs, NULL);
2893  
2894      return ret;
2895  }
2896  
2897  /*
2898   * Opens the backing file for a BlockDriverState if not yet open
2899   *
2900   * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
2901   * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
2902   * itself, all options starting with "${bdref_key}." are considered part of the
2903   * BlockdevRef.
2904   *
2905   * TODO Can this be unified with bdrv_open_image()?
2906   */
2907  int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
2908                             const char *bdref_key, Error **errp)
2909  {
2910      char *backing_filename = NULL;
2911      char *bdref_key_dot;
2912      const char *reference = NULL;
2913      int ret = 0;
2914      bool implicit_backing = false;
2915      BlockDriverState *backing_hd;
2916      QDict *options;
2917      QDict *tmp_parent_options = NULL;
2918      Error *local_err = NULL;
2919  
2920      if (bs->backing != NULL) {
2921          goto free_exit;
2922      }
2923  
2924      /* NULL means an empty set of options */
2925      if (parent_options == NULL) {
2926          tmp_parent_options = qdict_new();
2927          parent_options = tmp_parent_options;
2928      }
2929  
2930      bs->open_flags &= ~BDRV_O_NO_BACKING;
2931  
2932      bdref_key_dot = g_strdup_printf("%s.", bdref_key);
2933      qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
2934      g_free(bdref_key_dot);
2935  
2936      /*
2937       * Caution: while qdict_get_try_str() is fine, getting non-string
2938       * types would require more care.  When @parent_options come from
2939       * -blockdev or blockdev_add, its members are typed according to
2940       * the QAPI schema, but when they come from -drive, they're all
2941       * QString.
2942       */
2943      reference = qdict_get_try_str(parent_options, bdref_key);
2944      if (reference || qdict_haskey(options, "file.filename")) {
2945          /* keep backing_filename NULL */
2946      } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
2947          qobject_unref(options);
2948          goto free_exit;
2949      } else {
2950          if (qdict_size(options) == 0) {
2951              /* If the user specifies options that do not modify the
2952               * backing file's behavior, we might still consider it the
2953               * implicit backing file.  But it's easier this way, and
2954               * just specifying some of the backing BDS's options is
2955               * only possible with -drive anyway (otherwise the QAPI
2956               * schema forces the user to specify everything). */
2957              implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file);
2958          }
2959  
2960          backing_filename = bdrv_get_full_backing_filename(bs, &local_err);
2961          if (local_err) {
2962              ret = -EINVAL;
2963              error_propagate(errp, local_err);
2964              qobject_unref(options);
2965              goto free_exit;
2966          }
2967      }
2968  
2969      if (!bs->drv || !bs->drv->supports_backing) {
2970          ret = -EINVAL;
2971          error_setg(errp, "Driver doesn't support backing files");
2972          qobject_unref(options);
2973          goto free_exit;
2974      }
2975  
2976      if (!reference &&
2977          bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
2978          qdict_put_str(options, "driver", bs->backing_format);
2979      }
2980  
2981      backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs,
2982                                     &child_of_bds, bdrv_backing_role(bs), errp);
2983      if (!backing_hd) {
2984          bs->open_flags |= BDRV_O_NO_BACKING;
2985          error_prepend(errp, "Could not open backing file: ");
2986          ret = -EINVAL;
2987          goto free_exit;
2988      }
2989  
2990      if (implicit_backing) {
2991          bdrv_refresh_filename(backing_hd);
2992          pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
2993                  backing_hd->filename);
2994      }
2995  
2996      /* Hook up the backing file link; drop our reference, bs owns the
2997       * backing_hd reference now */
2998      bdrv_set_backing_hd(bs, backing_hd, &local_err);
2999      bdrv_unref(backing_hd);
3000      if (local_err) {
3001          error_propagate(errp, local_err);
3002          ret = -EINVAL;
3003          goto free_exit;
3004      }
3005  
3006      qdict_del(parent_options, bdref_key);
3007  
3008  free_exit:
3009      g_free(backing_filename);
3010      qobject_unref(tmp_parent_options);
3011      return ret;
3012  }
3013  
3014  static BlockDriverState *
3015  bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
3016                     BlockDriverState *parent, const BdrvChildClass *child_class,
3017                     BdrvChildRole child_role, bool allow_none, Error **errp)
3018  {
3019      BlockDriverState *bs = NULL;
3020      QDict *image_options;
3021      char *bdref_key_dot;
3022      const char *reference;
3023  
3024      assert(child_class != NULL);
3025  
3026      bdref_key_dot = g_strdup_printf("%s.", bdref_key);
3027      qdict_extract_subqdict(options, &image_options, bdref_key_dot);
3028      g_free(bdref_key_dot);
3029  
3030      /*
3031       * Caution: while qdict_get_try_str() is fine, getting non-string
3032       * types would require more care.  When @options come from
3033       * -blockdev or blockdev_add, its members are typed according to
3034       * the QAPI schema, but when they come from -drive, they're all
3035       * QString.
3036       */
3037      reference = qdict_get_try_str(options, bdref_key);
3038      if (!filename && !reference && !qdict_size(image_options)) {
3039          if (!allow_none) {
3040              error_setg(errp, "A block device must be specified for \"%s\"",
3041                         bdref_key);
3042          }
3043          qobject_unref(image_options);
3044          goto done;
3045      }
3046  
3047      bs = bdrv_open_inherit(filename, reference, image_options, 0,
3048                             parent, child_class, child_role, errp);
3049      if (!bs) {
3050          goto done;
3051      }
3052  
3053  done:
3054      qdict_del(options, bdref_key);
3055      return bs;
3056  }
3057  
3058  /*
3059   * Opens a disk image whose options are given as BlockdevRef in another block
3060   * device's options.
3061   *
3062   * If allow_none is true, no image will be opened if filename is false and no
3063   * BlockdevRef is given. NULL will be returned, but errp remains unset.
3064   *
3065   * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
3066   * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
3067   * itself, all options starting with "${bdref_key}." are considered part of the
3068   * BlockdevRef.
3069   *
3070   * The BlockdevRef will be removed from the options QDict.
3071   */
3072  BdrvChild *bdrv_open_child(const char *filename,
3073                             QDict *options, const char *bdref_key,
3074                             BlockDriverState *parent,
3075                             const BdrvChildClass *child_class,
3076                             BdrvChildRole child_role,
3077                             bool allow_none, Error **errp)
3078  {
3079      BlockDriverState *bs;
3080  
3081      bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class,
3082                              child_role, allow_none, errp);
3083      if (bs == NULL) {
3084          return NULL;
3085      }
3086  
3087      return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
3088                               errp);
3089  }
3090  
3091  /*
3092   * TODO Future callers may need to specify parent/child_class in order for
3093   * option inheritance to work. Existing callers use it for the root node.
3094   */
3095  BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp)
3096  {
3097      BlockDriverState *bs = NULL;
3098      QObject *obj = NULL;
3099      QDict *qdict = NULL;
3100      const char *reference = NULL;
3101      Visitor *v = NULL;
3102  
3103      if (ref->type == QTYPE_QSTRING) {
3104          reference = ref->u.reference;
3105      } else {
3106          BlockdevOptions *options = &ref->u.definition;
3107          assert(ref->type == QTYPE_QDICT);
3108  
3109          v = qobject_output_visitor_new(&obj);
3110          visit_type_BlockdevOptions(v, NULL, &options, &error_abort);
3111          visit_complete(v, &obj);
3112  
3113          qdict = qobject_to(QDict, obj);
3114          qdict_flatten(qdict);
3115  
3116          /* bdrv_open_inherit() defaults to the values in bdrv_flags (for
3117           * compatibility with other callers) rather than what we want as the
3118           * real defaults. Apply the defaults here instead. */
3119          qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off");
3120          qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off");
3121          qdict_set_default_str(qdict, BDRV_OPT_READ_ONLY, "off");
3122          qdict_set_default_str(qdict, BDRV_OPT_AUTO_READ_ONLY, "off");
3123  
3124      }
3125  
3126      bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp);
3127      obj = NULL;
3128      qobject_unref(obj);
3129      visit_free(v);
3130      return bs;
3131  }
3132  
3133  static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
3134                                                     int flags,
3135                                                     QDict *snapshot_options,
3136                                                     Error **errp)
3137  {
3138      /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
3139      char *tmp_filename = g_malloc0(PATH_MAX + 1);
3140      int64_t total_size;
3141      QemuOpts *opts = NULL;
3142      BlockDriverState *bs_snapshot = NULL;
3143      int ret;
3144  
3145      /* if snapshot, we create a temporary backing file and open it
3146         instead of opening 'filename' directly */
3147  
3148      /* Get the required size from the image */
3149      total_size = bdrv_getlength(bs);
3150      if (total_size < 0) {
3151          error_setg_errno(errp, -total_size, "Could not get image size");
3152          goto out;
3153      }
3154  
3155      /* Create the temporary image */
3156      ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
3157      if (ret < 0) {
3158          error_setg_errno(errp, -ret, "Could not get temporary filename");
3159          goto out;
3160      }
3161  
3162      opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
3163                              &error_abort);
3164      qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
3165      ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
3166      qemu_opts_del(opts);
3167      if (ret < 0) {
3168          error_prepend(errp, "Could not create temporary overlay '%s': ",
3169                        tmp_filename);
3170          goto out;
3171      }
3172  
3173      /* Prepare options QDict for the temporary file */
3174      qdict_put_str(snapshot_options, "file.driver", "file");
3175      qdict_put_str(snapshot_options, "file.filename", tmp_filename);
3176      qdict_put_str(snapshot_options, "driver", "qcow2");
3177  
3178      bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp);
3179      snapshot_options = NULL;
3180      if (!bs_snapshot) {
3181          goto out;
3182      }
3183  
3184      /* bdrv_append() consumes a strong reference to bs_snapshot
3185       * (i.e. it will call bdrv_unref() on it) even on error, so in
3186       * order to be able to return one, we have to increase
3187       * bs_snapshot's refcount here */
3188      bdrv_ref(bs_snapshot);
3189      ret = bdrv_append(bs_snapshot, bs, errp);
3190      if (ret < 0) {
3191          bs_snapshot = NULL;
3192          goto out;
3193      }
3194  
3195  out:
3196      qobject_unref(snapshot_options);
3197      g_free(tmp_filename);
3198      return bs_snapshot;
3199  }
3200  
3201  /*
3202   * Opens a disk image (raw, qcow2, vmdk, ...)
3203   *
3204   * options is a QDict of options to pass to the block drivers, or NULL for an
3205   * empty set of options. The reference to the QDict belongs to the block layer
3206   * after the call (even on failure), so if the caller intends to reuse the
3207   * dictionary, it needs to use qobject_ref() before calling bdrv_open.
3208   *
3209   * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
3210   * If it is not NULL, the referenced BDS will be reused.
3211   *
3212   * The reference parameter may be used to specify an existing block device which
3213   * should be opened. If specified, neither options nor a filename may be given,
3214   * nor can an existing BDS be reused (that is, *pbs has to be NULL).
3215   */
3216  static BlockDriverState *bdrv_open_inherit(const char *filename,
3217                                             const char *reference,
3218                                             QDict *options, int flags,
3219                                             BlockDriverState *parent,
3220                                             const BdrvChildClass *child_class,
3221                                             BdrvChildRole child_role,
3222                                             Error **errp)
3223  {
3224      int ret;
3225      BlockBackend *file = NULL;
3226      BlockDriverState *bs;
3227      BlockDriver *drv = NULL;
3228      BdrvChild *child;
3229      const char *drvname;
3230      const char *backing;
3231      Error *local_err = NULL;
3232      QDict *snapshot_options = NULL;
3233      int snapshot_flags = 0;
3234  
3235      assert(!child_class || !flags);
3236      assert(!child_class == !parent);
3237  
3238      if (reference) {
3239          bool options_non_empty = options ? qdict_size(options) : false;
3240          qobject_unref(options);
3241  
3242          if (filename || options_non_empty) {
3243              error_setg(errp, "Cannot reference an existing block device with "
3244                         "additional options or a new filename");
3245              return NULL;
3246          }
3247  
3248          bs = bdrv_lookup_bs(reference, reference, errp);
3249          if (!bs) {
3250              return NULL;
3251          }
3252  
3253          bdrv_ref(bs);
3254          return bs;
3255      }
3256  
3257      bs = bdrv_new();
3258  
3259      /* NULL means an empty set of options */
3260      if (options == NULL) {
3261          options = qdict_new();
3262      }
3263  
3264      /* json: syntax counts as explicit options, as if in the QDict */
3265      parse_json_protocol(options, &filename, &local_err);
3266      if (local_err) {
3267          goto fail;
3268      }
3269  
3270      bs->explicit_options = qdict_clone_shallow(options);
3271  
3272      if (child_class) {
3273          bool parent_is_format;
3274  
3275          if (parent->drv) {
3276              parent_is_format = parent->drv->is_format;
3277          } else {
3278              /*
3279               * parent->drv is not set yet because this node is opened for
3280               * (potential) format probing.  That means that @parent is going
3281               * to be a format node.
3282               */
3283              parent_is_format = true;
3284          }
3285  
3286          bs->inherits_from = parent;
3287          child_class->inherit_options(child_role, parent_is_format,
3288                                       &flags, options,
3289                                       parent->open_flags, parent->options);
3290      }
3291  
3292      ret = bdrv_fill_options(&options, filename, &flags, &local_err);
3293      if (ret < 0) {
3294          goto fail;
3295      }
3296  
3297      /*
3298       * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags.
3299       * Caution: getting a boolean member of @options requires care.
3300       * When @options come from -blockdev or blockdev_add, members are
3301       * typed according to the QAPI schema, but when they come from
3302       * -drive, they're all QString.
3303       */
3304      if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") &&
3305          !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) {
3306          flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR);
3307      } else {
3308          flags &= ~BDRV_O_RDWR;
3309      }
3310  
3311      if (flags & BDRV_O_SNAPSHOT) {
3312          snapshot_options = qdict_new();
3313          bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
3314                                     flags, options);
3315          /* Let bdrv_backing_options() override "read-only" */
3316          qdict_del(options, BDRV_OPT_READ_ONLY);
3317          bdrv_inherited_options(BDRV_CHILD_COW, true,
3318                                 &flags, options, flags, options);
3319      }
3320  
3321      bs->open_flags = flags;
3322      bs->options = options;
3323      options = qdict_clone_shallow(options);
3324  
3325      /* Find the right image format driver */
3326      /* See cautionary note on accessing @options above */
3327      drvname = qdict_get_try_str(options, "driver");
3328      if (drvname) {
3329          drv = bdrv_find_format(drvname);
3330          if (!drv) {
3331              error_setg(errp, "Unknown driver: '%s'", drvname);
3332              goto fail;
3333          }
3334      }
3335  
3336      assert(drvname || !(flags & BDRV_O_PROTOCOL));
3337  
3338      /* See cautionary note on accessing @options above */
3339      backing = qdict_get_try_str(options, "backing");
3340      if (qobject_to(QNull, qdict_get(options, "backing")) != NULL ||
3341          (backing && *backing == '\0'))
3342      {
3343          if (backing) {
3344              warn_report("Use of \"backing\": \"\" is deprecated; "
3345                          "use \"backing\": null instead");
3346          }
3347          flags |= BDRV_O_NO_BACKING;
3348          qdict_del(bs->explicit_options, "backing");
3349          qdict_del(bs->options, "backing");
3350          qdict_del(options, "backing");
3351      }
3352  
3353      /* Open image file without format layer. This BlockBackend is only used for
3354       * probing, the block drivers will do their own bdrv_open_child() for the
3355       * same BDS, which is why we put the node name back into options. */
3356      if ((flags & BDRV_O_PROTOCOL) == 0) {
3357          BlockDriverState *file_bs;
3358  
3359          file_bs = bdrv_open_child_bs(filename, options, "file", bs,
3360                                       &child_of_bds, BDRV_CHILD_IMAGE,
3361                                       true, &local_err);
3362          if (local_err) {
3363              goto fail;
3364          }
3365          if (file_bs != NULL) {
3366              /* Not requesting BLK_PERM_CONSISTENT_READ because we're only
3367               * looking at the header to guess the image format. This works even
3368               * in cases where a guest would not see a consistent state. */
3369              file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL);
3370              blk_insert_bs(file, file_bs, &local_err);
3371              bdrv_unref(file_bs);
3372              if (local_err) {
3373                  goto fail;
3374              }
3375  
3376              qdict_put_str(options, "file", bdrv_get_node_name(file_bs));
3377          }
3378      }
3379  
3380      /* Image format probing */
3381      bs->probed = !drv;
3382      if (!drv && file) {
3383          ret = find_image_format(file, filename, &drv, &local_err);
3384          if (ret < 0) {
3385              goto fail;
3386          }
3387          /*
3388           * This option update would logically belong in bdrv_fill_options(),
3389           * but we first need to open bs->file for the probing to work, while
3390           * opening bs->file already requires the (mostly) final set of options
3391           * so that cache mode etc. can be inherited.
3392           *
3393           * Adding the driver later is somewhat ugly, but it's not an option
3394           * that would ever be inherited, so it's correct. We just need to make
3395           * sure to update both bs->options (which has the full effective
3396           * options for bs) and options (which has file.* already removed).
3397           */
3398          qdict_put_str(bs->options, "driver", drv->format_name);
3399          qdict_put_str(options, "driver", drv->format_name);
3400      } else if (!drv) {
3401          error_setg(errp, "Must specify either driver or file");
3402          goto fail;
3403      }
3404  
3405      /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
3406      assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
3407      /* file must be NULL if a protocol BDS is about to be created
3408       * (the inverse results in an error message from bdrv_open_common()) */
3409      assert(!(flags & BDRV_O_PROTOCOL) || !file);
3410  
3411      /* Open the image */
3412      ret = bdrv_open_common(bs, file, options, &local_err);
3413      if (ret < 0) {
3414          goto fail;
3415      }
3416  
3417      if (file) {
3418          blk_unref(file);
3419          file = NULL;
3420      }
3421  
3422      /* If there is a backing file, use it */
3423      if ((flags & BDRV_O_NO_BACKING) == 0) {
3424          ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
3425          if (ret < 0) {
3426              goto close_and_fail;
3427          }
3428      }
3429  
3430      /* Remove all children options and references
3431       * from bs->options and bs->explicit_options */
3432      QLIST_FOREACH(child, &bs->children, next) {
3433          char *child_key_dot;
3434          child_key_dot = g_strdup_printf("%s.", child->name);
3435          qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot);
3436          qdict_extract_subqdict(bs->options, NULL, child_key_dot);
3437          qdict_del(bs->explicit_options, child->name);
3438          qdict_del(bs->options, child->name);
3439          g_free(child_key_dot);
3440      }
3441  
3442      /* Check if any unknown options were used */
3443      if (qdict_size(options) != 0) {
3444          const QDictEntry *entry = qdict_first(options);
3445          if (flags & BDRV_O_PROTOCOL) {
3446              error_setg(errp, "Block protocol '%s' doesn't support the option "
3447                         "'%s'", drv->format_name, entry->key);
3448          } else {
3449              error_setg(errp,
3450                         "Block format '%s' does not support the option '%s'",
3451                         drv->format_name, entry->key);
3452          }
3453  
3454          goto close_and_fail;
3455      }
3456  
3457      bdrv_parent_cb_change_media(bs, true);
3458  
3459      qobject_unref(options);
3460      options = NULL;
3461  
3462      /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
3463       * temporary snapshot afterwards. */
3464      if (snapshot_flags) {
3465          BlockDriverState *snapshot_bs;
3466          snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags,
3467                                                  snapshot_options, &local_err);
3468          snapshot_options = NULL;
3469          if (local_err) {
3470              goto close_and_fail;
3471          }
3472          /* We are not going to return bs but the overlay on top of it
3473           * (snapshot_bs); thus, we have to drop the strong reference to bs
3474           * (which we obtained by calling bdrv_new()). bs will not be deleted,
3475           * though, because the overlay still has a reference to it. */
3476          bdrv_unref(bs);
3477          bs = snapshot_bs;
3478      }
3479  
3480      return bs;
3481  
3482  fail:
3483      blk_unref(file);
3484      qobject_unref(snapshot_options);
3485      qobject_unref(bs->explicit_options);
3486      qobject_unref(bs->options);
3487      qobject_unref(options);
3488      bs->options = NULL;
3489      bs->explicit_options = NULL;
3490      bdrv_unref(bs);
3491      error_propagate(errp, local_err);
3492      return NULL;
3493  
3494  close_and_fail:
3495      bdrv_unref(bs);
3496      qobject_unref(snapshot_options);
3497      qobject_unref(options);
3498      error_propagate(errp, local_err);
3499      return NULL;
3500  }
3501  
3502  BlockDriverState *bdrv_open(const char *filename, const char *reference,
3503                              QDict *options, int flags, Error **errp)
3504  {
3505      return bdrv_open_inherit(filename, reference, options, flags, NULL,
3506                               NULL, 0, errp);
3507  }
3508  
3509  /* Return true if the NULL-terminated @list contains @str */
3510  static bool is_str_in_list(const char *str, const char *const *list)
3511  {
3512      if (str && list) {
3513          int i;
3514          for (i = 0; list[i] != NULL; i++) {
3515              if (!strcmp(str, list[i])) {
3516                  return true;
3517              }
3518          }
3519      }
3520      return false;
3521  }
3522  
3523  /*
3524   * Check that every option set in @bs->options is also set in
3525   * @new_opts.
3526   *
3527   * Options listed in the common_options list and in
3528   * @bs->drv->mutable_opts are skipped.
3529   *
3530   * Return 0 on success, otherwise return -EINVAL and set @errp.
3531   */
3532  static int bdrv_reset_options_allowed(BlockDriverState *bs,
3533                                        const QDict *new_opts, Error **errp)
3534  {
3535      const QDictEntry *e;
3536      /* These options are common to all block drivers and are handled
3537       * in bdrv_reopen_prepare() so they can be left out of @new_opts */
3538      const char *const common_options[] = {
3539          "node-name", "discard", "cache.direct", "cache.no-flush",
3540          "read-only", "auto-read-only", "detect-zeroes", NULL
3541      };
3542  
3543      for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
3544          if (!qdict_haskey(new_opts, e->key) &&
3545              !is_str_in_list(e->key, common_options) &&
3546              !is_str_in_list(e->key, bs->drv->mutable_opts)) {
3547              error_setg(errp, "Option '%s' cannot be reset "
3548                         "to its default value", e->key);
3549              return -EINVAL;
3550          }
3551      }
3552  
3553      return 0;
3554  }
3555  
3556  /*
3557   * Returns true if @child can be reached recursively from @bs
3558   */
3559  static bool bdrv_recurse_has_child(BlockDriverState *bs,
3560                                     BlockDriverState *child)
3561  {
3562      BdrvChild *c;
3563  
3564      if (bs == child) {
3565          return true;
3566      }
3567  
3568      QLIST_FOREACH(c, &bs->children, next) {
3569          if (bdrv_recurse_has_child(c->bs, child)) {
3570              return true;
3571          }
3572      }
3573  
3574      return false;
3575  }
3576  
3577  /*
3578   * Adds a BlockDriverState to a simple queue for an atomic, transactional
3579   * reopen of multiple devices.
3580   *
3581   * bs_queue can either be an existing BlockReopenQueue that has had QTAILQ_INIT
3582   * already performed, or alternatively may be NULL a new BlockReopenQueue will
3583   * be created and initialized. This newly created BlockReopenQueue should be
3584   * passed back in for subsequent calls that are intended to be of the same
3585   * atomic 'set'.
3586   *
3587   * bs is the BlockDriverState to add to the reopen queue.
3588   *
3589   * options contains the changed options for the associated bs
3590   * (the BlockReopenQueue takes ownership)
3591   *
3592   * flags contains the open flags for the associated bs
3593   *
3594   * returns a pointer to bs_queue, which is either the newly allocated
3595   * bs_queue, or the existing bs_queue being used.
3596   *
3597   * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
3598   */
3599  static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
3600                                                   BlockDriverState *bs,
3601                                                   QDict *options,
3602                                                   const BdrvChildClass *klass,
3603                                                   BdrvChildRole role,
3604                                                   bool parent_is_format,
3605                                                   QDict *parent_options,
3606                                                   int parent_flags,
3607                                                   bool keep_old_opts)
3608  {
3609      assert(bs != NULL);
3610  
3611      BlockReopenQueueEntry *bs_entry;
3612      BdrvChild *child;
3613      QDict *old_options, *explicit_options, *options_copy;
3614      int flags;
3615      QemuOpts *opts;
3616  
3617      /* Make sure that the caller remembered to use a drained section. This is
3618       * important to avoid graph changes between the recursive queuing here and
3619       * bdrv_reopen_multiple(). */
3620      assert(bs->quiesce_counter > 0);
3621  
3622      if (bs_queue == NULL) {
3623          bs_queue = g_new0(BlockReopenQueue, 1);
3624          QTAILQ_INIT(bs_queue);
3625      }
3626  
3627      if (!options) {
3628          options = qdict_new();
3629      }
3630  
3631      /* Check if this BlockDriverState is already in the queue */
3632      QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
3633          if (bs == bs_entry->state.bs) {
3634              break;
3635          }
3636      }
3637  
3638      /*
3639       * Precedence of options:
3640       * 1. Explicitly passed in options (highest)
3641       * 2. Retained from explicitly set options of bs
3642       * 3. Inherited from parent node
3643       * 4. Retained from effective options of bs
3644       */
3645  
3646      /* Old explicitly set values (don't overwrite by inherited value) */
3647      if (bs_entry || keep_old_opts) {
3648          old_options = qdict_clone_shallow(bs_entry ?
3649                                            bs_entry->state.explicit_options :
3650                                            bs->explicit_options);
3651          bdrv_join_options(bs, options, old_options);
3652          qobject_unref(old_options);
3653      }
3654  
3655      explicit_options = qdict_clone_shallow(options);
3656  
3657      /* Inherit from parent node */
3658      if (parent_options) {
3659          flags = 0;
3660          klass->inherit_options(role, parent_is_format, &flags, options,
3661                                 parent_flags, parent_options);
3662      } else {
3663          flags = bdrv_get_flags(bs);
3664      }
3665  
3666      if (keep_old_opts) {
3667          /* Old values are used for options that aren't set yet */
3668          old_options = qdict_clone_shallow(bs->options);
3669          bdrv_join_options(bs, options, old_options);
3670          qobject_unref(old_options);
3671      }
3672  
3673      /* We have the final set of options so let's update the flags */
3674      options_copy = qdict_clone_shallow(options);
3675      opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
3676      qemu_opts_absorb_qdict(opts, options_copy, NULL);
3677      update_flags_from_options(&flags, opts);
3678      qemu_opts_del(opts);
3679      qobject_unref(options_copy);
3680  
3681      /* bdrv_open_inherit() sets and clears some additional flags internally */
3682      flags &= ~BDRV_O_PROTOCOL;
3683      if (flags & BDRV_O_RDWR) {
3684          flags |= BDRV_O_ALLOW_RDWR;
3685      }
3686  
3687      if (!bs_entry) {
3688          bs_entry = g_new0(BlockReopenQueueEntry, 1);
3689          QTAILQ_INSERT_TAIL(bs_queue, bs_entry, entry);
3690      } else {
3691          qobject_unref(bs_entry->state.options);
3692          qobject_unref(bs_entry->state.explicit_options);
3693      }
3694  
3695      bs_entry->state.bs = bs;
3696      bs_entry->state.options = options;
3697      bs_entry->state.explicit_options = explicit_options;
3698      bs_entry->state.flags = flags;
3699  
3700      /* This needs to be overwritten in bdrv_reopen_prepare() */
3701      bs_entry->state.perm = UINT64_MAX;
3702      bs_entry->state.shared_perm = 0;
3703  
3704      /*
3705       * If keep_old_opts is false then it means that unspecified
3706       * options must be reset to their original value. We don't allow
3707       * resetting 'backing' but we need to know if the option is
3708       * missing in order to decide if we have to return an error.
3709       */
3710      if (!keep_old_opts) {
3711          bs_entry->state.backing_missing =
3712              !qdict_haskey(options, "backing") &&
3713              !qdict_haskey(options, "backing.driver");
3714      }
3715  
3716      QLIST_FOREACH(child, &bs->children, next) {
3717          QDict *new_child_options = NULL;
3718          bool child_keep_old = keep_old_opts;
3719  
3720          /* reopen can only change the options of block devices that were
3721           * implicitly created and inherited options. For other (referenced)
3722           * block devices, a syntax like "backing.foo" results in an error. */
3723          if (child->bs->inherits_from != bs) {
3724              continue;
3725          }
3726  
3727          /* Check if the options contain a child reference */
3728          if (qdict_haskey(options, child->name)) {
3729              const char *childref = qdict_get_try_str(options, child->name);
3730              /*
3731               * The current child must not be reopened if the child
3732               * reference is null or points to a different node.
3733               */
3734              if (g_strcmp0(childref, child->bs->node_name)) {
3735                  continue;
3736              }
3737              /*
3738               * If the child reference points to the current child then
3739               * reopen it with its existing set of options (note that
3740               * it can still inherit new options from the parent).
3741               */
3742              child_keep_old = true;
3743          } else {
3744              /* Extract child options ("child-name.*") */
3745              char *child_key_dot = g_strdup_printf("%s.", child->name);
3746              qdict_extract_subqdict(explicit_options, NULL, child_key_dot);
3747              qdict_extract_subqdict(options, &new_child_options, child_key_dot);
3748              g_free(child_key_dot);
3749          }
3750  
3751          bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options,
3752                                  child->klass, child->role, bs->drv->is_format,
3753                                  options, flags, child_keep_old);
3754      }
3755  
3756      return bs_queue;
3757  }
3758  
3759  BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
3760                                      BlockDriverState *bs,
3761                                      QDict *options, bool keep_old_opts)
3762  {
3763      return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false,
3764                                     NULL, 0, keep_old_opts);
3765  }
3766  
3767  /*
3768   * Reopen multiple BlockDriverStates atomically & transactionally.
3769   *
3770   * The queue passed in (bs_queue) must have been built up previous
3771   * via bdrv_reopen_queue().
3772   *
3773   * Reopens all BDS specified in the queue, with the appropriate
3774   * flags.  All devices are prepared for reopen, and failure of any
3775   * device will cause all device changes to be abandoned, and intermediate
3776   * data cleaned up.
3777   *
3778   * If all devices prepare successfully, then the changes are committed
3779   * to all devices.
3780   *
3781   * All affected nodes must be drained between bdrv_reopen_queue() and
3782   * bdrv_reopen_multiple().
3783   */
3784  int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
3785  {
3786      int ret = -1;
3787      BlockReopenQueueEntry *bs_entry, *next;
3788  
3789      assert(bs_queue != NULL);
3790  
3791      QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
3792          assert(bs_entry->state.bs->quiesce_counter > 0);
3793          if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, errp)) {
3794              goto cleanup;
3795          }
3796          bs_entry->prepared = true;
3797      }
3798  
3799      QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
3800          BDRVReopenState *state = &bs_entry->state;
3801          ret = bdrv_check_perm(state->bs, bs_queue, state->perm,
3802                                state->shared_perm, NULL, errp);
3803          if (ret < 0) {
3804              goto cleanup_perm;
3805          }
3806          /* Check if new_backing_bs would accept the new permissions */
3807          if (state->replace_backing_bs && state->new_backing_bs) {
3808              uint64_t nperm, nshared;
3809              bdrv_child_perm(state->bs, state->new_backing_bs,
3810                              NULL, bdrv_backing_role(state->bs),
3811                              bs_queue, state->perm, state->shared_perm,
3812                              &nperm, &nshared);
3813              ret = bdrv_check_update_perm(state->new_backing_bs, NULL,
3814                                           nperm, nshared, NULL, errp);
3815              if (ret < 0) {
3816                  goto cleanup_perm;
3817              }
3818          }
3819          bs_entry->perms_checked = true;
3820      }
3821  
3822      /*
3823       * If we reach this point, we have success and just need to apply the
3824       * changes.
3825       *
3826       * Reverse order is used to comfort qcow2 driver: on commit it need to write
3827       * IN_USE flag to the image, to mark bitmaps in the image as invalid. But
3828       * children are usually goes after parents in reopen-queue, so go from last
3829       * to first element.
3830       */
3831      QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
3832          bdrv_reopen_commit(&bs_entry->state);
3833      }
3834  
3835      ret = 0;
3836  cleanup_perm:
3837      QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
3838          BDRVReopenState *state = &bs_entry->state;
3839  
3840          if (!bs_entry->perms_checked) {
3841              continue;
3842          }
3843  
3844          if (ret == 0) {
3845              uint64_t perm, shared;
3846  
3847              bdrv_get_cumulative_perm(state->bs, &perm, &shared);
3848              assert(perm == state->perm);
3849              assert(shared == state->shared_perm);
3850  
3851              bdrv_set_perm(state->bs);
3852          } else {
3853              bdrv_abort_perm_update(state->bs);
3854              if (state->replace_backing_bs && state->new_backing_bs) {
3855                  bdrv_abort_perm_update(state->new_backing_bs);
3856              }
3857          }
3858      }
3859  
3860      if (ret == 0) {
3861          QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
3862              BlockDriverState *bs = bs_entry->state.bs;
3863  
3864              if (bs->drv->bdrv_reopen_commit_post)
3865                  bs->drv->bdrv_reopen_commit_post(&bs_entry->state);
3866          }
3867      }
3868  cleanup:
3869      QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
3870          if (ret) {
3871              if (bs_entry->prepared) {
3872                  bdrv_reopen_abort(&bs_entry->state);
3873              }
3874              qobject_unref(bs_entry->state.explicit_options);
3875              qobject_unref(bs_entry->state.options);
3876          }
3877          if (bs_entry->state.new_backing_bs) {
3878              bdrv_unref(bs_entry->state.new_backing_bs);
3879          }
3880          g_free(bs_entry);
3881      }
3882      g_free(bs_queue);
3883  
3884      return ret;
3885  }
3886  
3887  int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
3888                                Error **errp)
3889  {
3890      int ret;
3891      BlockReopenQueue *queue;
3892      QDict *opts = qdict_new();
3893  
3894      qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
3895  
3896      bdrv_subtree_drained_begin(bs);
3897      queue = bdrv_reopen_queue(NULL, bs, opts, true);
3898      ret = bdrv_reopen_multiple(queue, errp);
3899      bdrv_subtree_drained_end(bs);
3900  
3901      return ret;
3902  }
3903  
3904  static BlockReopenQueueEntry *find_parent_in_reopen_queue(BlockReopenQueue *q,
3905                                                            BdrvChild *c)
3906  {
3907      BlockReopenQueueEntry *entry;
3908  
3909      QTAILQ_FOREACH(entry, q, entry) {
3910          BlockDriverState *bs = entry->state.bs;
3911          BdrvChild *child;
3912  
3913          QLIST_FOREACH(child, &bs->children, next) {
3914              if (child == c) {
3915                  return entry;
3916              }
3917          }
3918      }
3919  
3920      return NULL;
3921  }
3922  
3923  static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs,
3924                               uint64_t *perm, uint64_t *shared)
3925  {
3926      BdrvChild *c;
3927      BlockReopenQueueEntry *parent;
3928      uint64_t cumulative_perms = 0;
3929      uint64_t cumulative_shared_perms = BLK_PERM_ALL;
3930  
3931      QLIST_FOREACH(c, &bs->parents, next_parent) {
3932          parent = find_parent_in_reopen_queue(q, c);
3933          if (!parent) {
3934              cumulative_perms |= c->perm;
3935              cumulative_shared_perms &= c->shared_perm;
3936          } else {
3937              uint64_t nperm, nshared;
3938  
3939              bdrv_child_perm(parent->state.bs, bs, c, c->role, q,
3940                              parent->state.perm, parent->state.shared_perm,
3941                              &nperm, &nshared);
3942  
3943              cumulative_perms |= nperm;
3944              cumulative_shared_perms &= nshared;
3945          }
3946      }
3947      *perm = cumulative_perms;
3948      *shared = cumulative_shared_perms;
3949  }
3950  
3951  static bool bdrv_reopen_can_attach(BlockDriverState *parent,
3952                                     BdrvChild *child,
3953                                     BlockDriverState *new_child,
3954                                     Error **errp)
3955  {
3956      AioContext *parent_ctx = bdrv_get_aio_context(parent);
3957      AioContext *child_ctx = bdrv_get_aio_context(new_child);
3958      GSList *ignore;
3959      bool ret;
3960  
3961      ignore = g_slist_prepend(NULL, child);
3962      ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL);
3963      g_slist_free(ignore);
3964      if (ret) {
3965          return ret;
3966      }
3967  
3968      ignore = g_slist_prepend(NULL, child);
3969      ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp);
3970      g_slist_free(ignore);
3971      return ret;
3972  }
3973  
3974  /*
3975   * Take a BDRVReopenState and check if the value of 'backing' in the
3976   * reopen_state->options QDict is valid or not.
3977   *
3978   * If 'backing' is missing from the QDict then return 0.
3979   *
3980   * If 'backing' contains the node name of the backing file of
3981   * reopen_state->bs then return 0.
3982   *
3983   * If 'backing' contains a different node name (or is null) then check
3984   * whether the current backing file can be replaced with the new one.
3985   * If that's the case then reopen_state->replace_backing_bs is set to
3986   * true and reopen_state->new_backing_bs contains a pointer to the new
3987   * backing BlockDriverState (or NULL).
3988   *
3989   * Return 0 on success, otherwise return < 0 and set @errp.
3990   */
3991  static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state,
3992                                       Error **errp)
3993  {
3994      BlockDriverState *bs = reopen_state->bs;
3995      BlockDriverState *overlay_bs, *below_bs, *new_backing_bs;
3996      QObject *value;
3997      const char *str;
3998  
3999      value = qdict_get(reopen_state->options, "backing");
4000      if (value == NULL) {
4001          return 0;
4002      }
4003  
4004      switch (qobject_type(value)) {
4005      case QTYPE_QNULL:
4006          new_backing_bs = NULL;
4007          break;
4008      case QTYPE_QSTRING:
4009          str = qstring_get_str(qobject_to(QString, value));
4010          new_backing_bs = bdrv_lookup_bs(NULL, str, errp);
4011          if (new_backing_bs == NULL) {
4012              return -EINVAL;
4013          } else if (bdrv_recurse_has_child(new_backing_bs, bs)) {
4014              error_setg(errp, "Making '%s' a backing file of '%s' "
4015                         "would create a cycle", str, bs->node_name);
4016              return -EINVAL;
4017          }
4018          break;
4019      default:
4020          /* 'backing' does not allow any other data type */
4021          g_assert_not_reached();
4022      }
4023  
4024      /*
4025       * Check AioContext compatibility so that the bdrv_set_backing_hd() call in
4026       * bdrv_reopen_commit() won't fail.
4027       */
4028      if (new_backing_bs) {
4029          if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) {
4030              return -EINVAL;
4031          }
4032      }
4033  
4034      /*
4035       * Ensure that @bs can really handle backing files, because we are
4036       * about to give it one (or swap the existing one)
4037       */
4038      if (bs->drv->is_filter) {
4039          /* Filters always have a file or a backing child */
4040          if (!bs->backing) {
4041              error_setg(errp, "'%s' is a %s filter node that does not support a "
4042                         "backing child", bs->node_name, bs->drv->format_name);
4043              return -EINVAL;
4044          }
4045      } else if (!bs->drv->supports_backing) {
4046          error_setg(errp, "Driver '%s' of node '%s' does not support backing "
4047                     "files", bs->drv->format_name, bs->node_name);
4048          return -EINVAL;
4049      }
4050  
4051      /*
4052       * Find the "actual" backing file by skipping all links that point
4053       * to an implicit node, if any (e.g. a commit filter node).
4054       * We cannot use any of the bdrv_skip_*() functions here because
4055       * those return the first explicit node, while we are looking for
4056       * its overlay here.
4057       */
4058      overlay_bs = bs;
4059      for (below_bs = bdrv_filter_or_cow_bs(overlay_bs);
4060           below_bs && below_bs->implicit;
4061           below_bs = bdrv_filter_or_cow_bs(overlay_bs))
4062      {
4063          overlay_bs = below_bs;
4064      }
4065  
4066      /* If we want to replace the backing file we need some extra checks */
4067      if (new_backing_bs != bdrv_filter_or_cow_bs(overlay_bs)) {
4068          /* Check for implicit nodes between bs and its backing file */
4069          if (bs != overlay_bs) {
4070              error_setg(errp, "Cannot change backing link if '%s' has "
4071                         "an implicit backing file", bs->node_name);
4072              return -EPERM;
4073          }
4074          /*
4075           * Check if the backing link that we want to replace is frozen.
4076           * Note that
4077           * bdrv_filter_or_cow_child(overlay_bs) == overlay_bs->backing,
4078           * because we know that overlay_bs == bs, and that @bs
4079           * either is a filter that uses ->backing or a COW format BDS
4080           * with bs->drv->supports_backing == true.
4081           */
4082          if (bdrv_is_backing_chain_frozen(overlay_bs,
4083                                           child_bs(overlay_bs->backing), errp))
4084          {
4085              return -EPERM;
4086          }
4087          reopen_state->replace_backing_bs = true;
4088          if (new_backing_bs) {
4089              bdrv_ref(new_backing_bs);
4090              reopen_state->new_backing_bs = new_backing_bs;
4091          }
4092      }
4093  
4094      return 0;
4095  }
4096  
4097  /*
4098   * Prepares a BlockDriverState for reopen. All changes are staged in the
4099   * 'opaque' field of the BDRVReopenState, which is used and allocated by
4100   * the block driver layer .bdrv_reopen_prepare()
4101   *
4102   * bs is the BlockDriverState to reopen
4103   * flags are the new open flags
4104   * queue is the reopen queue
4105   *
4106   * Returns 0 on success, non-zero on error.  On error errp will be set
4107   * as well.
4108   *
4109   * On failure, bdrv_reopen_abort() will be called to clean up any data.
4110   * It is the responsibility of the caller to then call the abort() or
4111   * commit() for any other BDS that have been left in a prepare() state
4112   *
4113   */
4114  int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
4115                          Error **errp)
4116  {
4117      int ret = -1;
4118      int old_flags;
4119      Error *local_err = NULL;
4120      BlockDriver *drv;
4121      QemuOpts *opts;
4122      QDict *orig_reopen_opts;
4123      char *discard = NULL;
4124      bool read_only;
4125      bool drv_prepared = false;
4126  
4127      assert(reopen_state != NULL);
4128      assert(reopen_state->bs->drv != NULL);
4129      drv = reopen_state->bs->drv;
4130  
4131      /* This function and each driver's bdrv_reopen_prepare() remove
4132       * entries from reopen_state->options as they are processed, so
4133       * we need to make a copy of the original QDict. */
4134      orig_reopen_opts = qdict_clone_shallow(reopen_state->options);
4135  
4136      /* Process generic block layer options */
4137      opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
4138      if (!qemu_opts_absorb_qdict(opts, reopen_state->options, errp)) {
4139          ret = -EINVAL;
4140          goto error;
4141      }
4142  
4143      /* This was already called in bdrv_reopen_queue_child() so the flags
4144       * are up-to-date. This time we simply want to remove the options from
4145       * QemuOpts in order to indicate that they have been processed. */
4146      old_flags = reopen_state->flags;
4147      update_flags_from_options(&reopen_state->flags, opts);
4148      assert(old_flags == reopen_state->flags);
4149  
4150      discard = qemu_opt_get_del(opts, BDRV_OPT_DISCARD);
4151      if (discard != NULL) {
4152          if (bdrv_parse_discard_flags(discard, &reopen_state->flags) != 0) {
4153              error_setg(errp, "Invalid discard option");
4154              ret = -EINVAL;
4155              goto error;
4156          }
4157      }
4158  
4159      reopen_state->detect_zeroes =
4160          bdrv_parse_detect_zeroes(opts, reopen_state->flags, &local_err);
4161      if (local_err) {
4162          error_propagate(errp, local_err);
4163          ret = -EINVAL;
4164          goto error;
4165      }
4166  
4167      /* All other options (including node-name and driver) must be unchanged.
4168       * Put them back into the QDict, so that they are checked at the end
4169       * of this function. */
4170      qemu_opts_to_qdict(opts, reopen_state->options);
4171  
4172      /* If we are to stay read-only, do not allow permission change
4173       * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is
4174       * not set, or if the BDS still has copy_on_read enabled */
4175      read_only = !(reopen_state->flags & BDRV_O_RDWR);
4176      ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err);
4177      if (local_err) {
4178          error_propagate(errp, local_err);
4179          goto error;
4180      }
4181  
4182      /* Calculate required permissions after reopening */
4183      bdrv_reopen_perm(queue, reopen_state->bs,
4184                       &reopen_state->perm, &reopen_state->shared_perm);
4185  
4186      ret = bdrv_flush(reopen_state->bs);
4187      if (ret) {
4188          error_setg_errno(errp, -ret, "Error flushing drive");
4189          goto error;
4190      }
4191  
4192      if (drv->bdrv_reopen_prepare) {
4193          /*
4194           * If a driver-specific option is missing, it means that we
4195           * should reset it to its default value.
4196           * But not all options allow that, so we need to check it first.
4197           */
4198          ret = bdrv_reset_options_allowed(reopen_state->bs,
4199                                           reopen_state->options, errp);
4200          if (ret) {
4201              goto error;
4202          }
4203  
4204          ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
4205          if (ret) {
4206              if (local_err != NULL) {
4207                  error_propagate(errp, local_err);
4208              } else {
4209                  bdrv_refresh_filename(reopen_state->bs);
4210                  error_setg(errp, "failed while preparing to reopen image '%s'",
4211                             reopen_state->bs->filename);
4212              }
4213              goto error;
4214          }
4215      } else {
4216          /* It is currently mandatory to have a bdrv_reopen_prepare()
4217           * handler for each supported drv. */
4218          error_setg(errp, "Block format '%s' used by node '%s' "
4219                     "does not support reopening files", drv->format_name,
4220                     bdrv_get_device_or_node_name(reopen_state->bs));
4221          ret = -1;
4222          goto error;
4223      }
4224  
4225      drv_prepared = true;
4226  
4227      /*
4228       * We must provide the 'backing' option if the BDS has a backing
4229       * file or if the image file has a backing file name as part of
4230       * its metadata. Otherwise the 'backing' option can be omitted.
4231       */
4232      if (drv->supports_backing && reopen_state->backing_missing &&
4233          (reopen_state->bs->backing || reopen_state->bs->backing_file[0])) {
4234          error_setg(errp, "backing is missing for '%s'",
4235                     reopen_state->bs->node_name);
4236          ret = -EINVAL;
4237          goto error;
4238      }
4239  
4240      /*
4241       * Allow changing the 'backing' option. The new value can be
4242       * either a reference to an existing node (using its node name)
4243       * or NULL to simply detach the current backing file.
4244       */
4245      ret = bdrv_reopen_parse_backing(reopen_state, errp);
4246      if (ret < 0) {
4247          goto error;
4248      }
4249      qdict_del(reopen_state->options, "backing");
4250  
4251      /* Options that are not handled are only okay if they are unchanged
4252       * compared to the old state. It is expected that some options are only
4253       * used for the initial open, but not reopen (e.g. filename) */
4254      if (qdict_size(reopen_state->options)) {
4255          const QDictEntry *entry = qdict_first(reopen_state->options);
4256  
4257          do {
4258              QObject *new = entry->value;
4259              QObject *old = qdict_get(reopen_state->bs->options, entry->key);
4260  
4261              /* Allow child references (child_name=node_name) as long as they
4262               * point to the current child (i.e. everything stays the same). */
4263              if (qobject_type(new) == QTYPE_QSTRING) {
4264                  BdrvChild *child;
4265                  QLIST_FOREACH(child, &reopen_state->bs->children, next) {
4266                      if (!strcmp(child->name, entry->key)) {
4267                          break;
4268                      }
4269                  }
4270  
4271                  if (child) {
4272                      if (!strcmp(child->bs->node_name,
4273                                  qstring_get_str(qobject_to(QString, new)))) {
4274                          continue; /* Found child with this name, skip option */
4275                      }
4276                  }
4277              }
4278  
4279              /*
4280               * TODO: When using -drive to specify blockdev options, all values
4281               * will be strings; however, when using -blockdev, blockdev-add or
4282               * filenames using the json:{} pseudo-protocol, they will be
4283               * correctly typed.
4284               * In contrast, reopening options are (currently) always strings
4285               * (because you can only specify them through qemu-io; all other
4286               * callers do not specify any options).
4287               * Therefore, when using anything other than -drive to create a BDS,
4288               * this cannot detect non-string options as unchanged, because
4289               * qobject_is_equal() always returns false for objects of different
4290               * type.  In the future, this should be remedied by correctly typing
4291               * all options.  For now, this is not too big of an issue because
4292               * the user can simply omit options which cannot be changed anyway,
4293               * so they will stay unchanged.
4294               */
4295              if (!qobject_is_equal(new, old)) {
4296                  error_setg(errp, "Cannot change the option '%s'", entry->key);
4297                  ret = -EINVAL;
4298                  goto error;
4299              }
4300          } while ((entry = qdict_next(reopen_state->options, entry)));
4301      }
4302  
4303      ret = 0;
4304  
4305      /* Restore the original reopen_state->options QDict */
4306      qobject_unref(reopen_state->options);
4307      reopen_state->options = qobject_ref(orig_reopen_opts);
4308  
4309  error:
4310      if (ret < 0 && drv_prepared) {
4311          /* drv->bdrv_reopen_prepare() has succeeded, so we need to
4312           * call drv->bdrv_reopen_abort() before signaling an error
4313           * (bdrv_reopen_multiple() will not call bdrv_reopen_abort()
4314           * when the respective bdrv_reopen_prepare() has failed) */
4315          if (drv->bdrv_reopen_abort) {
4316              drv->bdrv_reopen_abort(reopen_state);
4317          }
4318      }
4319      qemu_opts_del(opts);
4320      qobject_unref(orig_reopen_opts);
4321      g_free(discard);
4322      return ret;
4323  }
4324  
4325  /*
4326   * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
4327   * makes them final by swapping the staging BlockDriverState contents into
4328   * the active BlockDriverState contents.
4329   */
4330  void bdrv_reopen_commit(BDRVReopenState *reopen_state)
4331  {
4332      BlockDriver *drv;
4333      BlockDriverState *bs;
4334      BdrvChild *child;
4335  
4336      assert(reopen_state != NULL);
4337      bs = reopen_state->bs;
4338      drv = bs->drv;
4339      assert(drv != NULL);
4340  
4341      /* If there are any driver level actions to take */
4342      if (drv->bdrv_reopen_commit) {
4343          drv->bdrv_reopen_commit(reopen_state);
4344      }
4345  
4346      /* set BDS specific flags now */
4347      qobject_unref(bs->explicit_options);
4348      qobject_unref(bs->options);
4349  
4350      bs->explicit_options   = reopen_state->explicit_options;
4351      bs->options            = reopen_state->options;
4352      bs->open_flags         = reopen_state->flags;
4353      bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
4354      bs->detect_zeroes      = reopen_state->detect_zeroes;
4355  
4356      if (reopen_state->replace_backing_bs) {
4357          qdict_del(bs->explicit_options, "backing");
4358          qdict_del(bs->options, "backing");
4359      }
4360  
4361      /* Remove child references from bs->options and bs->explicit_options.
4362       * Child options were already removed in bdrv_reopen_queue_child() */
4363      QLIST_FOREACH(child, &bs->children, next) {
4364          qdict_del(bs->explicit_options, child->name);
4365          qdict_del(bs->options, child->name);
4366      }
4367  
4368      /*
4369       * Change the backing file if a new one was specified. We do this
4370       * after updating bs->options, so bdrv_refresh_filename() (called
4371       * from bdrv_set_backing_hd()) has the new values.
4372       */
4373      if (reopen_state->replace_backing_bs) {
4374          BlockDriverState *old_backing_bs = child_bs(bs->backing);
4375          assert(!old_backing_bs || !old_backing_bs->implicit);
4376          /* Abort the permission update on the backing bs we're detaching */
4377          if (old_backing_bs) {
4378              bdrv_abort_perm_update(old_backing_bs);
4379          }
4380          bdrv_set_backing_hd(bs, reopen_state->new_backing_bs, &error_abort);
4381      }
4382  
4383      bdrv_refresh_limits(bs, NULL);
4384  }
4385  
4386  /*
4387   * Abort the reopen, and delete and free the staged changes in
4388   * reopen_state
4389   */
4390  void bdrv_reopen_abort(BDRVReopenState *reopen_state)
4391  {
4392      BlockDriver *drv;
4393  
4394      assert(reopen_state != NULL);
4395      drv = reopen_state->bs->drv;
4396      assert(drv != NULL);
4397  
4398      if (drv->bdrv_reopen_abort) {
4399          drv->bdrv_reopen_abort(reopen_state);
4400      }
4401  }
4402  
4403  
4404  static void bdrv_close(BlockDriverState *bs)
4405  {
4406      BdrvAioNotifier *ban, *ban_next;
4407      BdrvChild *child, *next;
4408  
4409      assert(!bs->refcnt);
4410  
4411      bdrv_drained_begin(bs); /* complete I/O */
4412      bdrv_flush(bs);
4413      bdrv_drain(bs); /* in case flush left pending I/O */
4414  
4415      if (bs->drv) {
4416          if (bs->drv->bdrv_close) {
4417              /* Must unfreeze all children, so bdrv_unref_child() works */
4418              bs->drv->bdrv_close(bs);
4419          }
4420          bs->drv = NULL;
4421      }
4422  
4423      QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
4424          bdrv_unref_child(bs, child);
4425      }
4426  
4427      bs->backing = NULL;
4428      bs->file = NULL;
4429      g_free(bs->opaque);
4430      bs->opaque = NULL;
4431      qatomic_set(&bs->copy_on_read, 0);
4432      bs->backing_file[0] = '\0';
4433      bs->backing_format[0] = '\0';
4434      bs->total_sectors = 0;
4435      bs->encrypted = false;
4436      bs->sg = false;
4437      qobject_unref(bs->options);
4438      qobject_unref(bs->explicit_options);
4439      bs->options = NULL;
4440      bs->explicit_options = NULL;
4441      qobject_unref(bs->full_open_options);
4442      bs->full_open_options = NULL;
4443  
4444      bdrv_release_named_dirty_bitmaps(bs);
4445      assert(QLIST_EMPTY(&bs->dirty_bitmaps));
4446  
4447      QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
4448          g_free(ban);
4449      }
4450      QLIST_INIT(&bs->aio_notifiers);
4451      bdrv_drained_end(bs);
4452  
4453      /*
4454       * If we're still inside some bdrv_drain_all_begin()/end() sections, end
4455       * them now since this BDS won't exist anymore when bdrv_drain_all_end()
4456       * gets called.
4457       */
4458      if (bs->quiesce_counter) {
4459          bdrv_drain_all_end_quiesce(bs);
4460      }
4461  }
4462  
4463  void bdrv_close_all(void)
4464  {
4465      assert(job_next(NULL) == NULL);
4466  
4467      /* Drop references from requests still in flight, such as canceled block
4468       * jobs whose AIO context has not been polled yet */
4469      bdrv_drain_all();
4470  
4471      blk_remove_all_bs();
4472      blockdev_close_all_bdrv_states();
4473  
4474      assert(QTAILQ_EMPTY(&all_bdrv_states));
4475  }
4476  
4477  static bool should_update_child(BdrvChild *c, BlockDriverState *to)
4478  {
4479      GQueue *queue;
4480      GHashTable *found;
4481      bool ret;
4482  
4483      if (c->klass->stay_at_node) {
4484          return false;
4485      }
4486  
4487      /* If the child @c belongs to the BDS @to, replacing the current
4488       * c->bs by @to would mean to create a loop.
4489       *
4490       * Such a case occurs when appending a BDS to a backing chain.
4491       * For instance, imagine the following chain:
4492       *
4493       *   guest device -> node A -> further backing chain...
4494       *
4495       * Now we create a new BDS B which we want to put on top of this
4496       * chain, so we first attach A as its backing node:
4497       *
4498       *                   node B
4499       *                     |
4500       *                     v
4501       *   guest device -> node A -> further backing chain...
4502       *
4503       * Finally we want to replace A by B.  When doing that, we want to
4504       * replace all pointers to A by pointers to B -- except for the
4505       * pointer from B because (1) that would create a loop, and (2)
4506       * that pointer should simply stay intact:
4507       *
4508       *   guest device -> node B
4509       *                     |
4510       *                     v
4511       *                   node A -> further backing chain...
4512       *
4513       * In general, when replacing a node A (c->bs) by a node B (@to),
4514       * if A is a child of B, that means we cannot replace A by B there
4515       * because that would create a loop.  Silently detaching A from B
4516       * is also not really an option.  So overall just leaving A in
4517       * place there is the most sensible choice.
4518       *
4519       * We would also create a loop in any cases where @c is only
4520       * indirectly referenced by @to. Prevent this by returning false
4521       * if @c is found (by breadth-first search) anywhere in the whole
4522       * subtree of @to.
4523       */
4524  
4525      ret = true;
4526      found = g_hash_table_new(NULL, NULL);
4527      g_hash_table_add(found, to);
4528      queue = g_queue_new();
4529      g_queue_push_tail(queue, to);
4530  
4531      while (!g_queue_is_empty(queue)) {
4532          BlockDriverState *v = g_queue_pop_head(queue);
4533          BdrvChild *c2;
4534  
4535          QLIST_FOREACH(c2, &v->children, next) {
4536              if (c2 == c) {
4537                  ret = false;
4538                  break;
4539              }
4540  
4541              if (g_hash_table_contains(found, c2->bs)) {
4542                  continue;
4543              }
4544  
4545              g_queue_push_tail(queue, c2->bs);
4546              g_hash_table_add(found, c2->bs);
4547          }
4548      }
4549  
4550      g_queue_free(queue);
4551      g_hash_table_destroy(found);
4552  
4553      return ret;
4554  }
4555  
4556  /*
4557   * With auto_skip=true bdrv_replace_node_common skips updating from parents
4558   * if it creates a parent-child relation loop or if parent is block-job.
4559   *
4560   * With auto_skip=false the error is returned if from has a parent which should
4561   * not be updated.
4562   */
4563  static int bdrv_replace_node_common(BlockDriverState *from,
4564                                      BlockDriverState *to,
4565                                      bool auto_skip, Error **errp)
4566  {
4567      BdrvChild *c, *next;
4568      GSList *list = NULL, *p;
4569      uint64_t perm = 0, shared = BLK_PERM_ALL;
4570      int ret;
4571  
4572      /* Make sure that @from doesn't go away until we have successfully attached
4573       * all of its parents to @to. */
4574      bdrv_ref(from);
4575  
4576      assert(qemu_get_current_aio_context() == qemu_get_aio_context());
4577      assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
4578      bdrv_drained_begin(from);
4579  
4580      /* Put all parents into @list and calculate their cumulative permissions */
4581      QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
4582          assert(c->bs == from);
4583          if (!should_update_child(c, to)) {
4584              if (auto_skip) {
4585                  continue;
4586              }
4587              ret = -EINVAL;
4588              error_setg(errp, "Should not change '%s' link to '%s'",
4589                         c->name, from->node_name);
4590              goto out;
4591          }
4592          if (c->frozen) {
4593              ret = -EPERM;
4594              error_setg(errp, "Cannot change '%s' link to '%s'",
4595                         c->name, from->node_name);
4596              goto out;
4597          }
4598          list = g_slist_prepend(list, c);
4599          perm |= c->perm;
4600          shared &= c->shared_perm;
4601      }
4602  
4603      /* Check whether the required permissions can be granted on @to, ignoring
4604       * all BdrvChild in @list so that they can't block themselves. */
4605      ret = bdrv_check_update_perm(to, NULL, perm, shared, list, errp);
4606      if (ret < 0) {
4607          bdrv_abort_perm_update(to);
4608          goto out;
4609      }
4610  
4611      /* Now actually perform the change. We performed the permission check for
4612       * all elements of @list at once, so set the permissions all at once at the
4613       * very end. */
4614      for (p = list; p != NULL; p = p->next) {
4615          c = p->data;
4616  
4617          bdrv_ref(to);
4618          bdrv_replace_child_noperm(c, to);
4619          bdrv_unref(from);
4620      }
4621  
4622      bdrv_set_perm(to);
4623  
4624      ret = 0;
4625  
4626  out:
4627      g_slist_free(list);
4628      bdrv_drained_end(from);
4629      bdrv_unref(from);
4630  
4631      return ret;
4632  }
4633  
4634  int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
4635                        Error **errp)
4636  {
4637      return bdrv_replace_node_common(from, to, true, errp);
4638  }
4639  
4640  /*
4641   * Add new bs contents at the top of an image chain while the chain is
4642   * live, while keeping required fields on the top layer.
4643   *
4644   * This will modify the BlockDriverState fields, and swap contents
4645   * between bs_new and bs_top. Both bs_new and bs_top are modified.
4646   *
4647   * bs_new must not be attached to a BlockBackend.
4648   *
4649   * This function does not create any image files.
4650   *
4651   * bdrv_append() takes ownership of a bs_new reference and unrefs it because
4652   * that's what the callers commonly need. bs_new will be referenced by the old
4653   * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
4654   * reference of its own, it must call bdrv_ref().
4655   */
4656  int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
4657                  Error **errp)
4658  {
4659      int ret = bdrv_set_backing_hd(bs_new, bs_top, errp);
4660      if (ret < 0) {
4661          goto out;
4662      }
4663  
4664      ret = bdrv_replace_node(bs_top, bs_new, errp);
4665      if (ret < 0) {
4666          bdrv_set_backing_hd(bs_new, NULL, &error_abort);
4667          goto out;
4668      }
4669  
4670      ret = 0;
4671  
4672  out:
4673      /*
4674       * bs_new is now referenced by its new parents, we don't need the
4675       * additional reference any more.
4676       */
4677      bdrv_unref(bs_new);
4678  
4679      return ret;
4680  }
4681  
4682  static void bdrv_delete(BlockDriverState *bs)
4683  {
4684      assert(bdrv_op_blocker_is_empty(bs));
4685      assert(!bs->refcnt);
4686  
4687      /* remove from list, if necessary */
4688      if (bs->node_name[0] != '\0') {
4689          QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
4690      }
4691      QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
4692  
4693      bdrv_close(bs);
4694  
4695      g_free(bs);
4696  }
4697  
4698  BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
4699                                     int flags, Error **errp)
4700  {
4701      BlockDriverState *new_node_bs;
4702      Error *local_err = NULL;
4703  
4704      new_node_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
4705      if (new_node_bs == NULL) {
4706          error_prepend(errp, "Could not create node: ");
4707          return NULL;
4708      }
4709  
4710      bdrv_drained_begin(bs);
4711      bdrv_replace_node(bs, new_node_bs, &local_err);
4712      bdrv_drained_end(bs);
4713  
4714      if (local_err) {
4715          bdrv_unref(new_node_bs);
4716          error_propagate(errp, local_err);
4717          return NULL;
4718      }
4719  
4720      return new_node_bs;
4721  }
4722  
4723  /*
4724   * Run consistency checks on an image
4725   *
4726   * Returns 0 if the check could be completed (it doesn't mean that the image is
4727   * free of errors) or -errno when an internal error occurred. The results of the
4728   * check are stored in res.
4729   */
4730  int coroutine_fn bdrv_co_check(BlockDriverState *bs,
4731                                 BdrvCheckResult *res, BdrvCheckMode fix)
4732  {
4733      if (bs->drv == NULL) {
4734          return -ENOMEDIUM;
4735      }
4736      if (bs->drv->bdrv_co_check == NULL) {
4737          return -ENOTSUP;
4738      }
4739  
4740      memset(res, 0, sizeof(*res));
4741      return bs->drv->bdrv_co_check(bs, res, fix);
4742  }
4743  
4744  /*
4745   * Return values:
4746   * 0        - success
4747   * -EINVAL  - backing format specified, but no file
4748   * -ENOSPC  - can't update the backing file because no space is left in the
4749   *            image file header
4750   * -ENOTSUP - format driver doesn't support changing the backing file
4751   */
4752  int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
4753                               const char *backing_fmt, bool warn)
4754  {
4755      BlockDriver *drv = bs->drv;
4756      int ret;
4757  
4758      if (!drv) {
4759          return -ENOMEDIUM;
4760      }
4761  
4762      /* Backing file format doesn't make sense without a backing file */
4763      if (backing_fmt && !backing_file) {
4764          return -EINVAL;
4765      }
4766  
4767      if (warn && backing_file && !backing_fmt) {
4768          warn_report("Deprecated use of backing file without explicit "
4769                      "backing format, use of this image requires "
4770                      "potentially unsafe format probing");
4771      }
4772  
4773      if (drv->bdrv_change_backing_file != NULL) {
4774          ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
4775      } else {
4776          ret = -ENOTSUP;
4777      }
4778  
4779      if (ret == 0) {
4780          pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
4781          pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
4782          pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
4783                  backing_file ?: "");
4784      }
4785      return ret;
4786  }
4787  
4788  /*
4789   * Finds the first non-filter node above bs in the chain between
4790   * active and bs.  The returned node is either an immediate parent of
4791   * bs, or there are only filter nodes between the two.
4792   *
4793   * Returns NULL if bs is not found in active's image chain,
4794   * or if active == bs.
4795   *
4796   * Returns the bottommost base image if bs == NULL.
4797   */
4798  BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
4799                                      BlockDriverState *bs)
4800  {
4801      bs = bdrv_skip_filters(bs);
4802      active = bdrv_skip_filters(active);
4803  
4804      while (active) {
4805          BlockDriverState *next = bdrv_backing_chain_next(active);
4806          if (bs == next) {
4807              return active;
4808          }
4809          active = next;
4810      }
4811  
4812      return NULL;
4813  }
4814  
4815  /* Given a BDS, searches for the base layer. */
4816  BlockDriverState *bdrv_find_base(BlockDriverState *bs)
4817  {
4818      return bdrv_find_overlay(bs, NULL);
4819  }
4820  
4821  /*
4822   * Return true if at least one of the COW (backing) and filter links
4823   * between @bs and @base is frozen. @errp is set if that's the case.
4824   * @base must be reachable from @bs, or NULL.
4825   */
4826  bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
4827                                    Error **errp)
4828  {
4829      BlockDriverState *i;
4830      BdrvChild *child;
4831  
4832      for (i = bs; i != base; i = child_bs(child)) {
4833          child = bdrv_filter_or_cow_child(i);
4834  
4835          if (child && child->frozen) {
4836              error_setg(errp, "Cannot change '%s' link from '%s' to '%s'",
4837                         child->name, i->node_name, child->bs->node_name);
4838              return true;
4839          }
4840      }
4841  
4842      return false;
4843  }
4844  
4845  /*
4846   * Freeze all COW (backing) and filter links between @bs and @base.
4847   * If any of the links is already frozen the operation is aborted and
4848   * none of the links are modified.
4849   * @base must be reachable from @bs, or NULL.
4850   * Returns 0 on success. On failure returns < 0 and sets @errp.
4851   */
4852  int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
4853                                Error **errp)
4854  {
4855      BlockDriverState *i;
4856      BdrvChild *child;
4857  
4858      if (bdrv_is_backing_chain_frozen(bs, base, errp)) {
4859          return -EPERM;
4860      }
4861  
4862      for (i = bs; i != base; i = child_bs(child)) {
4863          child = bdrv_filter_or_cow_child(i);
4864          if (child && child->bs->never_freeze) {
4865              error_setg(errp, "Cannot freeze '%s' link to '%s'",
4866                         child->name, child->bs->node_name);
4867              return -EPERM;
4868          }
4869      }
4870  
4871      for (i = bs; i != base; i = child_bs(child)) {
4872          child = bdrv_filter_or_cow_child(i);
4873          if (child) {
4874              child->frozen = true;
4875          }
4876      }
4877  
4878      return 0;
4879  }
4880  
4881  /*
4882   * Unfreeze all COW (backing) and filter links between @bs and @base.
4883   * The caller must ensure that all links are frozen before using this
4884   * function.
4885   * @base must be reachable from @bs, or NULL.
4886   */
4887  void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base)
4888  {
4889      BlockDriverState *i;
4890      BdrvChild *child;
4891  
4892      for (i = bs; i != base; i = child_bs(child)) {
4893          child = bdrv_filter_or_cow_child(i);
4894          if (child) {
4895              assert(child->frozen);
4896              child->frozen = false;
4897          }
4898      }
4899  }
4900  
4901  /*
4902   * Drops images above 'base' up to and including 'top', and sets the image
4903   * above 'top' to have base as its backing file.
4904   *
4905   * Requires that the overlay to 'top' is opened r/w, so that the backing file
4906   * information in 'bs' can be properly updated.
4907   *
4908   * E.g., this will convert the following chain:
4909   * bottom <- base <- intermediate <- top <- active
4910   *
4911   * to
4912   *
4913   * bottom <- base <- active
4914   *
4915   * It is allowed for bottom==base, in which case it converts:
4916   *
4917   * base <- intermediate <- top <- active
4918   *
4919   * to
4920   *
4921   * base <- active
4922   *
4923   * If backing_file_str is non-NULL, it will be used when modifying top's
4924   * overlay image metadata.
4925   *
4926   * Error conditions:
4927   *  if active == top, that is considered an error
4928   *
4929   */
4930  int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
4931                             const char *backing_file_str)
4932  {
4933      BlockDriverState *explicit_top = top;
4934      bool update_inherits_from;
4935      BdrvChild *c;
4936      Error *local_err = NULL;
4937      int ret = -EIO;
4938      g_autoptr(GSList) updated_children = NULL;
4939      GSList *p;
4940  
4941      bdrv_ref(top);
4942      bdrv_subtree_drained_begin(top);
4943  
4944      if (!top->drv || !base->drv) {
4945          goto exit;
4946      }
4947  
4948      /* Make sure that base is in the backing chain of top */
4949      if (!bdrv_chain_contains(top, base)) {
4950          goto exit;
4951      }
4952  
4953      /* If 'base' recursively inherits from 'top' then we should set
4954       * base->inherits_from to top->inherits_from after 'top' and all
4955       * other intermediate nodes have been dropped.
4956       * If 'top' is an implicit node (e.g. "commit_top") we should skip
4957       * it because no one inherits from it. We use explicit_top for that. */
4958      explicit_top = bdrv_skip_implicit_filters(explicit_top);
4959      update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top);
4960  
4961      /* success - we can delete the intermediate states, and link top->base */
4962      /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once
4963       * we've figured out how they should work. */
4964      if (!backing_file_str) {
4965          bdrv_refresh_filename(base);
4966          backing_file_str = base->filename;
4967      }
4968  
4969      QLIST_FOREACH(c, &top->parents, next_parent) {
4970          updated_children = g_slist_prepend(updated_children, c);
4971      }
4972  
4973      bdrv_replace_node_common(top, base, false, &local_err);
4974      if (local_err) {
4975          error_report_err(local_err);
4976          goto exit;
4977      }
4978  
4979      for (p = updated_children; p; p = p->next) {
4980          c = p->data;
4981  
4982          if (c->klass->update_filename) {
4983              ret = c->klass->update_filename(c, base, backing_file_str,
4984                                              &local_err);
4985              if (ret < 0) {
4986                  /*
4987                   * TODO: Actually, we want to rollback all previous iterations
4988                   * of this loop, and (which is almost impossible) previous
4989                   * bdrv_replace_node()...
4990                   *
4991                   * Note, that c->klass->update_filename may lead to permission
4992                   * update, so it's a bad idea to call it inside permission
4993                   * update transaction of bdrv_replace_node.
4994                   */
4995                  error_report_err(local_err);
4996                  goto exit;
4997              }
4998          }
4999      }
5000  
5001      if (update_inherits_from) {
5002          base->inherits_from = explicit_top->inherits_from;
5003      }
5004  
5005      ret = 0;
5006  exit:
5007      bdrv_subtree_drained_end(top);
5008      bdrv_unref(top);
5009      return ret;
5010  }
5011  
5012  /**
5013   * Implementation of BlockDriver.bdrv_get_allocated_file_size() that
5014   * sums the size of all data-bearing children.  (This excludes backing
5015   * children.)
5016   */
5017  static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs)
5018  {
5019      BdrvChild *child;
5020      int64_t child_size, sum = 0;
5021  
5022      QLIST_FOREACH(child, &bs->children, next) {
5023          if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
5024                             BDRV_CHILD_FILTERED))
5025          {
5026              child_size = bdrv_get_allocated_file_size(child->bs);
5027              if (child_size < 0) {
5028                  return child_size;
5029              }
5030              sum += child_size;
5031          }
5032      }
5033  
5034      return sum;
5035  }
5036  
5037  /**
5038   * Length of a allocated file in bytes. Sparse files are counted by actual
5039   * allocated space. Return < 0 if error or unknown.
5040   */
5041  int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
5042  {
5043      BlockDriver *drv = bs->drv;
5044      if (!drv) {
5045          return -ENOMEDIUM;
5046      }
5047      if (drv->bdrv_get_allocated_file_size) {
5048          return drv->bdrv_get_allocated_file_size(bs);
5049      }
5050  
5051      if (drv->bdrv_file_open) {
5052          /*
5053           * Protocol drivers default to -ENOTSUP (most of their data is
5054           * not stored in any of their children (if they even have any),
5055           * so there is no generic way to figure it out).
5056           */
5057          return -ENOTSUP;
5058      } else if (drv->is_filter) {
5059          /* Filter drivers default to the size of their filtered child */
5060          return bdrv_get_allocated_file_size(bdrv_filter_bs(bs));
5061      } else {
5062          /* Other drivers default to summing their children's sizes */
5063          return bdrv_sum_allocated_file_size(bs);
5064      }
5065  }
5066  
5067  /*
5068   * bdrv_measure:
5069   * @drv: Format driver
5070   * @opts: Creation options for new image
5071   * @in_bs: Existing image containing data for new image (may be NULL)
5072   * @errp: Error object
5073   * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo())
5074   *          or NULL on error
5075   *
5076   * Calculate file size required to create a new image.
5077   *
5078   * If @in_bs is given then space for allocated clusters and zero clusters
5079   * from that image are included in the calculation.  If @opts contains a
5080   * backing file that is shared by @in_bs then backing clusters may be omitted
5081   * from the calculation.
5082   *
5083   * If @in_bs is NULL then the calculation includes no allocated clusters
5084   * unless a preallocation option is given in @opts.
5085   *
5086   * Note that @in_bs may use a different BlockDriver from @drv.
5087   *
5088   * If an error occurs the @errp pointer is set.
5089   */
5090  BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
5091                                 BlockDriverState *in_bs, Error **errp)
5092  {
5093      if (!drv->bdrv_measure) {
5094          error_setg(errp, "Block driver '%s' does not support size measurement",
5095                     drv->format_name);
5096          return NULL;
5097      }
5098  
5099      return drv->bdrv_measure(opts, in_bs, errp);
5100  }
5101  
5102  /**
5103   * Return number of sectors on success, -errno on error.
5104   */
5105  int64_t bdrv_nb_sectors(BlockDriverState *bs)
5106  {
5107      BlockDriver *drv = bs->drv;
5108  
5109      if (!drv)
5110          return -ENOMEDIUM;
5111  
5112      if (drv->has_variable_length) {
5113          int ret = refresh_total_sectors(bs, bs->total_sectors);
5114          if (ret < 0) {
5115              return ret;
5116          }
5117      }
5118      return bs->total_sectors;
5119  }
5120  
5121  /**
5122   * Return length in bytes on success, -errno on error.
5123   * The length is always a multiple of BDRV_SECTOR_SIZE.
5124   */
5125  int64_t bdrv_getlength(BlockDriverState *bs)
5126  {
5127      int64_t ret = bdrv_nb_sectors(bs);
5128  
5129      if (ret < 0) {
5130          return ret;
5131      }
5132      if (ret > INT64_MAX / BDRV_SECTOR_SIZE) {
5133          return -EFBIG;
5134      }
5135      return ret * BDRV_SECTOR_SIZE;
5136  }
5137  
5138  /* return 0 as number of sectors if no device present or error */
5139  void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
5140  {
5141      int64_t nb_sectors = bdrv_nb_sectors(bs);
5142  
5143      *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
5144  }
5145  
5146  bool bdrv_is_sg(BlockDriverState *bs)
5147  {
5148      return bs->sg;
5149  }
5150  
5151  /**
5152   * Return whether the given node supports compressed writes.
5153   */
5154  bool bdrv_supports_compressed_writes(BlockDriverState *bs)
5155  {
5156      BlockDriverState *filtered;
5157  
5158      if (!bs->drv || !block_driver_can_compress(bs->drv)) {
5159          return false;
5160      }
5161  
5162      filtered = bdrv_filter_bs(bs);
5163      if (filtered) {
5164          /*
5165           * Filters can only forward compressed writes, so we have to
5166           * check the child.
5167           */
5168          return bdrv_supports_compressed_writes(filtered);
5169      }
5170  
5171      return true;
5172  }
5173  
5174  const char *bdrv_get_format_name(BlockDriverState *bs)
5175  {
5176      return bs->drv ? bs->drv->format_name : NULL;
5177  }
5178  
5179  static int qsort_strcmp(const void *a, const void *b)
5180  {
5181      return strcmp(*(char *const *)a, *(char *const *)b);
5182  }
5183  
5184  void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
5185                           void *opaque, bool read_only)
5186  {
5187      BlockDriver *drv;
5188      int count = 0;
5189      int i;
5190      const char **formats = NULL;
5191  
5192      QLIST_FOREACH(drv, &bdrv_drivers, list) {
5193          if (drv->format_name) {
5194              bool found = false;
5195              int i = count;
5196  
5197              if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, read_only)) {
5198                  continue;
5199              }
5200  
5201              while (formats && i && !found) {
5202                  found = !strcmp(formats[--i], drv->format_name);
5203              }
5204  
5205              if (!found) {
5206                  formats = g_renew(const char *, formats, count + 1);
5207                  formats[count++] = drv->format_name;
5208              }
5209          }
5210      }
5211  
5212      for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) {
5213          const char *format_name = block_driver_modules[i].format_name;
5214  
5215          if (format_name) {
5216              bool found = false;
5217              int j = count;
5218  
5219              if (use_bdrv_whitelist &&
5220                  !bdrv_format_is_whitelisted(format_name, read_only)) {
5221                  continue;
5222              }
5223  
5224              while (formats && j && !found) {
5225                  found = !strcmp(formats[--j], format_name);
5226              }
5227  
5228              if (!found) {
5229                  formats = g_renew(const char *, formats, count + 1);
5230                  formats[count++] = format_name;
5231              }
5232          }
5233      }
5234  
5235      qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
5236  
5237      for (i = 0; i < count; i++) {
5238          it(opaque, formats[i]);
5239      }
5240  
5241      g_free(formats);
5242  }
5243  
5244  /* This function is to find a node in the bs graph */
5245  BlockDriverState *bdrv_find_node(const char *node_name)
5246  {
5247      BlockDriverState *bs;
5248  
5249      assert(node_name);
5250  
5251      QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
5252          if (!strcmp(node_name, bs->node_name)) {
5253              return bs;
5254          }
5255      }
5256      return NULL;
5257  }
5258  
5259  /* Put this QMP function here so it can access the static graph_bdrv_states. */
5260  BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
5261                                             Error **errp)
5262  {
5263      BlockDeviceInfoList *list;
5264      BlockDriverState *bs;
5265  
5266      list = NULL;
5267      QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
5268          BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp);
5269          if (!info) {
5270              qapi_free_BlockDeviceInfoList(list);
5271              return NULL;
5272          }
5273          QAPI_LIST_PREPEND(list, info);
5274      }
5275  
5276      return list;
5277  }
5278  
5279  typedef struct XDbgBlockGraphConstructor {
5280      XDbgBlockGraph *graph;
5281      GHashTable *graph_nodes;
5282  } XDbgBlockGraphConstructor;
5283  
5284  static XDbgBlockGraphConstructor *xdbg_graph_new(void)
5285  {
5286      XDbgBlockGraphConstructor *gr = g_new(XDbgBlockGraphConstructor, 1);
5287  
5288      gr->graph = g_new0(XDbgBlockGraph, 1);
5289      gr->graph_nodes = g_hash_table_new(NULL, NULL);
5290  
5291      return gr;
5292  }
5293  
5294  static XDbgBlockGraph *xdbg_graph_finalize(XDbgBlockGraphConstructor *gr)
5295  {
5296      XDbgBlockGraph *graph = gr->graph;
5297  
5298      g_hash_table_destroy(gr->graph_nodes);
5299      g_free(gr);
5300  
5301      return graph;
5302  }
5303  
5304  static uintptr_t xdbg_graph_node_num(XDbgBlockGraphConstructor *gr, void *node)
5305  {
5306      uintptr_t ret = (uintptr_t)g_hash_table_lookup(gr->graph_nodes, node);
5307  
5308      if (ret != 0) {
5309          return ret;
5310      }
5311  
5312      /*
5313       * Start counting from 1, not 0, because 0 interferes with not-found (NULL)
5314       * answer of g_hash_table_lookup.
5315       */
5316      ret = g_hash_table_size(gr->graph_nodes) + 1;
5317      g_hash_table_insert(gr->graph_nodes, node, (void *)ret);
5318  
5319      return ret;
5320  }
5321  
5322  static void xdbg_graph_add_node(XDbgBlockGraphConstructor *gr, void *node,
5323                                  XDbgBlockGraphNodeType type, const char *name)
5324  {
5325      XDbgBlockGraphNode *n;
5326  
5327      n = g_new0(XDbgBlockGraphNode, 1);
5328  
5329      n->id = xdbg_graph_node_num(gr, node);
5330      n->type = type;
5331      n->name = g_strdup(name);
5332  
5333      QAPI_LIST_PREPEND(gr->graph->nodes, n);
5334  }
5335  
5336  static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent,
5337                                  const BdrvChild *child)
5338  {
5339      BlockPermission qapi_perm;
5340      XDbgBlockGraphEdge *edge;
5341  
5342      edge = g_new0(XDbgBlockGraphEdge, 1);
5343  
5344      edge->parent = xdbg_graph_node_num(gr, parent);
5345      edge->child = xdbg_graph_node_num(gr, child->bs);
5346      edge->name = g_strdup(child->name);
5347  
5348      for (qapi_perm = 0; qapi_perm < BLOCK_PERMISSION__MAX; qapi_perm++) {
5349          uint64_t flag = bdrv_qapi_perm_to_blk_perm(qapi_perm);
5350  
5351          if (flag & child->perm) {
5352              QAPI_LIST_PREPEND(edge->perm, qapi_perm);
5353          }
5354          if (flag & child->shared_perm) {
5355              QAPI_LIST_PREPEND(edge->shared_perm, qapi_perm);
5356          }
5357      }
5358  
5359      QAPI_LIST_PREPEND(gr->graph->edges, edge);
5360  }
5361  
5362  
5363  XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp)
5364  {
5365      BlockBackend *blk;
5366      BlockJob *job;
5367      BlockDriverState *bs;
5368      BdrvChild *child;
5369      XDbgBlockGraphConstructor *gr = xdbg_graph_new();
5370  
5371      for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
5372          char *allocated_name = NULL;
5373          const char *name = blk_name(blk);
5374  
5375          if (!*name) {
5376              name = allocated_name = blk_get_attached_dev_id(blk);
5377          }
5378          xdbg_graph_add_node(gr, blk, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND,
5379                             name);
5380          g_free(allocated_name);
5381          if (blk_root(blk)) {
5382              xdbg_graph_add_edge(gr, blk, blk_root(blk));
5383          }
5384      }
5385  
5386      for (job = block_job_next(NULL); job; job = block_job_next(job)) {
5387          GSList *el;
5388  
5389          xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB,
5390                             job->job.id);
5391          for (el = job->nodes; el; el = el->next) {
5392              xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data);
5393          }
5394      }
5395  
5396      QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
5397          xdbg_graph_add_node(gr, bs, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_DRIVER,
5398                             bs->node_name);
5399          QLIST_FOREACH(child, &bs->children, next) {
5400              xdbg_graph_add_edge(gr, bs, child);
5401          }
5402      }
5403  
5404      return xdbg_graph_finalize(gr);
5405  }
5406  
5407  BlockDriverState *bdrv_lookup_bs(const char *device,
5408                                   const char *node_name,
5409                                   Error **errp)
5410  {
5411      BlockBackend *blk;
5412      BlockDriverState *bs;
5413  
5414      if (device) {
5415          blk = blk_by_name(device);
5416  
5417          if (blk) {
5418              bs = blk_bs(blk);
5419              if (!bs) {
5420                  error_setg(errp, "Device '%s' has no medium", device);
5421              }
5422  
5423              return bs;
5424          }
5425      }
5426  
5427      if (node_name) {
5428          bs = bdrv_find_node(node_name);
5429  
5430          if (bs) {
5431              return bs;
5432          }
5433      }
5434  
5435      error_setg(errp, "Cannot find device=%s nor node_name=%s",
5436                       device ? device : "",
5437                       node_name ? node_name : "");
5438      return NULL;
5439  }
5440  
5441  /* If 'base' is in the same chain as 'top', return true. Otherwise,
5442   * return false.  If either argument is NULL, return false. */
5443  bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
5444  {
5445      while (top && top != base) {
5446          top = bdrv_filter_or_cow_bs(top);
5447      }
5448  
5449      return top != NULL;
5450  }
5451  
5452  BlockDriverState *bdrv_next_node(BlockDriverState *bs)
5453  {
5454      if (!bs) {
5455          return QTAILQ_FIRST(&graph_bdrv_states);
5456      }
5457      return QTAILQ_NEXT(bs, node_list);
5458  }
5459  
5460  BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
5461  {
5462      if (!bs) {
5463          return QTAILQ_FIRST(&all_bdrv_states);
5464      }
5465      return QTAILQ_NEXT(bs, bs_list);
5466  }
5467  
5468  const char *bdrv_get_node_name(const BlockDriverState *bs)
5469  {
5470      return bs->node_name;
5471  }
5472  
5473  const char *bdrv_get_parent_name(const BlockDriverState *bs)
5474  {
5475      BdrvChild *c;
5476      const char *name;
5477  
5478      /* If multiple parents have a name, just pick the first one. */
5479      QLIST_FOREACH(c, &bs->parents, next_parent) {
5480          if (c->klass->get_name) {
5481              name = c->klass->get_name(c);
5482              if (name && *name) {
5483                  return name;
5484              }
5485          }
5486      }
5487  
5488      return NULL;
5489  }
5490  
5491  /* TODO check what callers really want: bs->node_name or blk_name() */
5492  const char *bdrv_get_device_name(const BlockDriverState *bs)
5493  {
5494      return bdrv_get_parent_name(bs) ?: "";
5495  }
5496  
5497  /* This can be used to identify nodes that might not have a device
5498   * name associated. Since node and device names live in the same
5499   * namespace, the result is unambiguous. The exception is if both are
5500   * absent, then this returns an empty (non-null) string. */
5501  const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
5502  {
5503      return bdrv_get_parent_name(bs) ?: bs->node_name;
5504  }
5505  
5506  int bdrv_get_flags(BlockDriverState *bs)
5507  {
5508      return bs->open_flags;
5509  }
5510  
5511  int bdrv_has_zero_init_1(BlockDriverState *bs)
5512  {
5513      return 1;
5514  }
5515  
5516  int bdrv_has_zero_init(BlockDriverState *bs)
5517  {
5518      BlockDriverState *filtered;
5519  
5520      if (!bs->drv) {
5521          return 0;
5522      }
5523  
5524      /* If BS is a copy on write image, it is initialized to
5525         the contents of the base image, which may not be zeroes.  */
5526      if (bdrv_cow_child(bs)) {
5527          return 0;
5528      }
5529      if (bs->drv->bdrv_has_zero_init) {
5530          return bs->drv->bdrv_has_zero_init(bs);
5531      }
5532  
5533      filtered = bdrv_filter_bs(bs);
5534      if (filtered) {
5535          return bdrv_has_zero_init(filtered);
5536      }
5537  
5538      /* safe default */
5539      return 0;
5540  }
5541  
5542  bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
5543  {
5544      if (!(bs->open_flags & BDRV_O_UNMAP)) {
5545          return false;
5546      }
5547  
5548      return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP;
5549  }
5550  
5551  void bdrv_get_backing_filename(BlockDriverState *bs,
5552                                 char *filename, int filename_size)
5553  {
5554      pstrcpy(filename, filename_size, bs->backing_file);
5555  }
5556  
5557  int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
5558  {
5559      int ret;
5560      BlockDriver *drv = bs->drv;
5561      /* if bs->drv == NULL, bs is closed, so there's nothing to do here */
5562      if (!drv) {
5563          return -ENOMEDIUM;
5564      }
5565      if (!drv->bdrv_get_info) {
5566          BlockDriverState *filtered = bdrv_filter_bs(bs);
5567          if (filtered) {
5568              return bdrv_get_info(filtered, bdi);
5569          }
5570          return -ENOTSUP;
5571      }
5572      memset(bdi, 0, sizeof(*bdi));
5573      ret = drv->bdrv_get_info(bs, bdi);
5574      if (ret < 0) {
5575          return ret;
5576      }
5577  
5578      if (bdi->cluster_size > BDRV_MAX_ALIGNMENT) {
5579          return -EINVAL;
5580      }
5581  
5582      return 0;
5583  }
5584  
5585  ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
5586                                            Error **errp)
5587  {
5588      BlockDriver *drv = bs->drv;
5589      if (drv && drv->bdrv_get_specific_info) {
5590          return drv->bdrv_get_specific_info(bs, errp);
5591      }
5592      return NULL;
5593  }
5594  
5595  BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs)
5596  {
5597      BlockDriver *drv = bs->drv;
5598      if (!drv || !drv->bdrv_get_specific_stats) {
5599          return NULL;
5600      }
5601      return drv->bdrv_get_specific_stats(bs);
5602  }
5603  
5604  void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
5605  {
5606      if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
5607          return;
5608      }
5609  
5610      bs->drv->bdrv_debug_event(bs, event);
5611  }
5612  
5613  static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs)
5614  {
5615      while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
5616          bs = bdrv_primary_bs(bs);
5617      }
5618  
5619      if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
5620          assert(bs->drv->bdrv_debug_remove_breakpoint);
5621          return bs;
5622      }
5623  
5624      return NULL;
5625  }
5626  
5627  int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
5628                            const char *tag)
5629  {
5630      bs = bdrv_find_debug_node(bs);
5631      if (bs) {
5632          return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
5633      }
5634  
5635      return -ENOTSUP;
5636  }
5637  
5638  int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
5639  {
5640      bs = bdrv_find_debug_node(bs);
5641      if (bs) {
5642          return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
5643      }
5644  
5645      return -ENOTSUP;
5646  }
5647  
5648  int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
5649  {
5650      while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
5651          bs = bdrv_primary_bs(bs);
5652      }
5653  
5654      if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
5655          return bs->drv->bdrv_debug_resume(bs, tag);
5656      }
5657  
5658      return -ENOTSUP;
5659  }
5660  
5661  bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
5662  {
5663      while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
5664          bs = bdrv_primary_bs(bs);
5665      }
5666  
5667      if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
5668          return bs->drv->bdrv_debug_is_suspended(bs, tag);
5669      }
5670  
5671      return false;
5672  }
5673  
5674  /* backing_file can either be relative, or absolute, or a protocol.  If it is
5675   * relative, it must be relative to the chain.  So, passing in bs->filename
5676   * from a BDS as backing_file should not be done, as that may be relative to
5677   * the CWD rather than the chain. */
5678  BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
5679          const char *backing_file)
5680  {
5681      char *filename_full = NULL;
5682      char *backing_file_full = NULL;
5683      char *filename_tmp = NULL;
5684      int is_protocol = 0;
5685      bool filenames_refreshed = false;
5686      BlockDriverState *curr_bs = NULL;
5687      BlockDriverState *retval = NULL;
5688      BlockDriverState *bs_below;
5689  
5690      if (!bs || !bs->drv || !backing_file) {
5691          return NULL;
5692      }
5693  
5694      filename_full     = g_malloc(PATH_MAX);
5695      backing_file_full = g_malloc(PATH_MAX);
5696  
5697      is_protocol = path_has_protocol(backing_file);
5698  
5699      /*
5700       * Being largely a legacy function, skip any filters here
5701       * (because filters do not have normal filenames, so they cannot
5702       * match anyway; and allowing json:{} filenames is a bit out of
5703       * scope).
5704       */
5705      for (curr_bs = bdrv_skip_filters(bs);
5706           bdrv_cow_child(curr_bs) != NULL;
5707           curr_bs = bs_below)
5708      {
5709          bs_below = bdrv_backing_chain_next(curr_bs);
5710  
5711          if (bdrv_backing_overridden(curr_bs)) {
5712              /*
5713               * If the backing file was overridden, we can only compare
5714               * directly against the backing node's filename.
5715               */
5716  
5717              if (!filenames_refreshed) {
5718                  /*
5719                   * This will automatically refresh all of the
5720                   * filenames in the rest of the backing chain, so we
5721                   * only need to do this once.
5722                   */
5723                  bdrv_refresh_filename(bs_below);
5724                  filenames_refreshed = true;
5725              }
5726  
5727              if (strcmp(backing_file, bs_below->filename) == 0) {
5728                  retval = bs_below;
5729                  break;
5730              }
5731          } else if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
5732              /*
5733               * If either of the filename paths is actually a protocol, then
5734               * compare unmodified paths; otherwise make paths relative.
5735               */
5736              char *backing_file_full_ret;
5737  
5738              if (strcmp(backing_file, curr_bs->backing_file) == 0) {
5739                  retval = bs_below;
5740                  break;
5741              }
5742              /* Also check against the full backing filename for the image */
5743              backing_file_full_ret = bdrv_get_full_backing_filename(curr_bs,
5744                                                                     NULL);
5745              if (backing_file_full_ret) {
5746                  bool equal = strcmp(backing_file, backing_file_full_ret) == 0;
5747                  g_free(backing_file_full_ret);
5748                  if (equal) {
5749                      retval = bs_below;
5750                      break;
5751                  }
5752              }
5753          } else {
5754              /* If not an absolute filename path, make it relative to the current
5755               * image's filename path */
5756              filename_tmp = bdrv_make_absolute_filename(curr_bs, backing_file,
5757                                                         NULL);
5758              /* We are going to compare canonicalized absolute pathnames */
5759              if (!filename_tmp || !realpath(filename_tmp, filename_full)) {
5760                  g_free(filename_tmp);
5761                  continue;
5762              }
5763              g_free(filename_tmp);
5764  
5765              /* We need to make sure the backing filename we are comparing against
5766               * is relative to the current image filename (or absolute) */
5767              filename_tmp = bdrv_get_full_backing_filename(curr_bs, NULL);
5768              if (!filename_tmp || !realpath(filename_tmp, backing_file_full)) {
5769                  g_free(filename_tmp);
5770                  continue;
5771              }
5772              g_free(filename_tmp);
5773  
5774              if (strcmp(backing_file_full, filename_full) == 0) {
5775                  retval = bs_below;
5776                  break;
5777              }
5778          }
5779      }
5780  
5781      g_free(filename_full);
5782      g_free(backing_file_full);
5783      return retval;
5784  }
5785  
5786  void bdrv_init(void)
5787  {
5788      module_call_init(MODULE_INIT_BLOCK);
5789  }
5790  
5791  void bdrv_init_with_whitelist(void)
5792  {
5793      use_bdrv_whitelist = 1;
5794      bdrv_init();
5795  }
5796  
5797  int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
5798  {
5799      BdrvChild *child, *parent;
5800      Error *local_err = NULL;
5801      int ret;
5802      BdrvDirtyBitmap *bm;
5803  
5804      if (!bs->drv)  {
5805          return -ENOMEDIUM;
5806      }
5807  
5808      QLIST_FOREACH(child, &bs->children, next) {
5809          bdrv_co_invalidate_cache(child->bs, &local_err);
5810          if (local_err) {
5811              error_propagate(errp, local_err);
5812              return -EINVAL;
5813          }
5814      }
5815  
5816      /*
5817       * Update permissions, they may differ for inactive nodes.
5818       *
5819       * Note that the required permissions of inactive images are always a
5820       * subset of the permissions required after activating the image. This
5821       * allows us to just get the permissions upfront without restricting
5822       * drv->bdrv_invalidate_cache().
5823       *
5824       * It also means that in error cases, we don't have to try and revert to
5825       * the old permissions (which is an operation that could fail, too). We can
5826       * just keep the extended permissions for the next time that an activation
5827       * of the image is tried.
5828       */
5829      if (bs->open_flags & BDRV_O_INACTIVE) {
5830          bs->open_flags &= ~BDRV_O_INACTIVE;
5831          ret = bdrv_refresh_perms(bs, errp);
5832          if (ret < 0) {
5833              bs->open_flags |= BDRV_O_INACTIVE;
5834              return ret;
5835          }
5836  
5837          if (bs->drv->bdrv_co_invalidate_cache) {
5838              bs->drv->bdrv_co_invalidate_cache(bs, &local_err);
5839              if (local_err) {
5840                  bs->open_flags |= BDRV_O_INACTIVE;
5841                  error_propagate(errp, local_err);
5842                  return -EINVAL;
5843              }
5844          }
5845  
5846          FOR_EACH_DIRTY_BITMAP(bs, bm) {
5847              bdrv_dirty_bitmap_skip_store(bm, false);
5848          }
5849  
5850          ret = refresh_total_sectors(bs, bs->total_sectors);
5851          if (ret < 0) {
5852              bs->open_flags |= BDRV_O_INACTIVE;
5853              error_setg_errno(errp, -ret, "Could not refresh total sector count");
5854              return ret;
5855          }
5856      }
5857  
5858      QLIST_FOREACH(parent, &bs->parents, next_parent) {
5859          if (parent->klass->activate) {
5860              parent->klass->activate(parent, &local_err);
5861              if (local_err) {
5862                  bs->open_flags |= BDRV_O_INACTIVE;
5863                  error_propagate(errp, local_err);
5864                  return -EINVAL;
5865              }
5866          }
5867      }
5868  
5869      return 0;
5870  }
5871  
5872  void bdrv_invalidate_cache_all(Error **errp)
5873  {
5874      BlockDriverState *bs;
5875      BdrvNextIterator it;
5876  
5877      for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
5878          AioContext *aio_context = bdrv_get_aio_context(bs);
5879          int ret;
5880  
5881          aio_context_acquire(aio_context);
5882          ret = bdrv_invalidate_cache(bs, errp);
5883          aio_context_release(aio_context);
5884          if (ret < 0) {
5885              bdrv_next_cleanup(&it);
5886              return;
5887          }
5888      }
5889  }
5890  
5891  static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active)
5892  {
5893      BdrvChild *parent;
5894  
5895      QLIST_FOREACH(parent, &bs->parents, next_parent) {
5896          if (parent->klass->parent_is_bds) {
5897              BlockDriverState *parent_bs = parent->opaque;
5898              if (!only_active || !(parent_bs->open_flags & BDRV_O_INACTIVE)) {
5899                  return true;
5900              }
5901          }
5902      }
5903  
5904      return false;
5905  }
5906  
5907  static int bdrv_inactivate_recurse(BlockDriverState *bs)
5908  {
5909      BdrvChild *child, *parent;
5910      int ret;
5911  
5912      if (!bs->drv) {
5913          return -ENOMEDIUM;
5914      }
5915  
5916      /* Make sure that we don't inactivate a child before its parent.
5917       * It will be covered by recursion from the yet active parent. */
5918      if (bdrv_has_bds_parent(bs, true)) {
5919          return 0;
5920      }
5921  
5922      assert(!(bs->open_flags & BDRV_O_INACTIVE));
5923  
5924      /* Inactivate this node */
5925      if (bs->drv->bdrv_inactivate) {
5926          ret = bs->drv->bdrv_inactivate(bs);
5927          if (ret < 0) {
5928              return ret;
5929          }
5930      }
5931  
5932      QLIST_FOREACH(parent, &bs->parents, next_parent) {
5933          if (parent->klass->inactivate) {
5934              ret = parent->klass->inactivate(parent);
5935              if (ret < 0) {
5936                  return ret;
5937              }
5938          }
5939      }
5940  
5941      bs->open_flags |= BDRV_O_INACTIVE;
5942  
5943      /*
5944       * Update permissions, they may differ for inactive nodes.
5945       * We only tried to loosen restrictions, so errors are not fatal, ignore
5946       * them.
5947       */
5948      bdrv_refresh_perms(bs, NULL);
5949  
5950      /* Recursively inactivate children */
5951      QLIST_FOREACH(child, &bs->children, next) {
5952          ret = bdrv_inactivate_recurse(child->bs);
5953          if (ret < 0) {
5954              return ret;
5955          }
5956      }
5957  
5958      return 0;
5959  }
5960  
5961  int bdrv_inactivate_all(void)
5962  {
5963      BlockDriverState *bs = NULL;
5964      BdrvNextIterator it;
5965      int ret = 0;
5966      GSList *aio_ctxs = NULL, *ctx;
5967  
5968      for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
5969          AioContext *aio_context = bdrv_get_aio_context(bs);
5970  
5971          if (!g_slist_find(aio_ctxs, aio_context)) {
5972              aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
5973              aio_context_acquire(aio_context);
5974          }
5975      }
5976  
5977      for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
5978          /* Nodes with BDS parents are covered by recursion from the last
5979           * parent that gets inactivated. Don't inactivate them a second
5980           * time if that has already happened. */
5981          if (bdrv_has_bds_parent(bs, false)) {
5982              continue;
5983          }
5984          ret = bdrv_inactivate_recurse(bs);
5985          if (ret < 0) {
5986              bdrv_next_cleanup(&it);
5987              goto out;
5988          }
5989      }
5990  
5991  out:
5992      for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
5993          AioContext *aio_context = ctx->data;
5994          aio_context_release(aio_context);
5995      }
5996      g_slist_free(aio_ctxs);
5997  
5998      return ret;
5999  }
6000  
6001  /**************************************************************/
6002  /* removable device support */
6003  
6004  /**
6005   * Return TRUE if the media is present
6006   */
6007  bool bdrv_is_inserted(BlockDriverState *bs)
6008  {
6009      BlockDriver *drv = bs->drv;
6010      BdrvChild *child;
6011  
6012      if (!drv) {
6013          return false;
6014      }
6015      if (drv->bdrv_is_inserted) {
6016          return drv->bdrv_is_inserted(bs);
6017      }
6018      QLIST_FOREACH(child, &bs->children, next) {
6019          if (!bdrv_is_inserted(child->bs)) {
6020              return false;
6021          }
6022      }
6023      return true;
6024  }
6025  
6026  /**
6027   * If eject_flag is TRUE, eject the media. Otherwise, close the tray
6028   */
6029  void bdrv_eject(BlockDriverState *bs, bool eject_flag)
6030  {
6031      BlockDriver *drv = bs->drv;
6032  
6033      if (drv && drv->bdrv_eject) {
6034          drv->bdrv_eject(bs, eject_flag);
6035      }
6036  }
6037  
6038  /**
6039   * Lock or unlock the media (if it is locked, the user won't be able
6040   * to eject it manually).
6041   */
6042  void bdrv_lock_medium(BlockDriverState *bs, bool locked)
6043  {
6044      BlockDriver *drv = bs->drv;
6045  
6046      trace_bdrv_lock_medium(bs, locked);
6047  
6048      if (drv && drv->bdrv_lock_medium) {
6049          drv->bdrv_lock_medium(bs, locked);
6050      }
6051  }
6052  
6053  /* Get a reference to bs */
6054  void bdrv_ref(BlockDriverState *bs)
6055  {
6056      bs->refcnt++;
6057  }
6058  
6059  /* Release a previously grabbed reference to bs.
6060   * If after releasing, reference count is zero, the BlockDriverState is
6061   * deleted. */
6062  void bdrv_unref(BlockDriverState *bs)
6063  {
6064      if (!bs) {
6065          return;
6066      }
6067      assert(bs->refcnt > 0);
6068      if (--bs->refcnt == 0) {
6069          bdrv_delete(bs);
6070      }
6071  }
6072  
6073  struct BdrvOpBlocker {
6074      Error *reason;
6075      QLIST_ENTRY(BdrvOpBlocker) list;
6076  };
6077  
6078  bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
6079  {
6080      BdrvOpBlocker *blocker;
6081      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
6082      if (!QLIST_EMPTY(&bs->op_blockers[op])) {
6083          blocker = QLIST_FIRST(&bs->op_blockers[op]);
6084          error_propagate_prepend(errp, error_copy(blocker->reason),
6085                                  "Node '%s' is busy: ",
6086                                  bdrv_get_device_or_node_name(bs));
6087          return true;
6088      }
6089      return false;
6090  }
6091  
6092  void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
6093  {
6094      BdrvOpBlocker *blocker;
6095      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
6096  
6097      blocker = g_new0(BdrvOpBlocker, 1);
6098      blocker->reason = reason;
6099      QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
6100  }
6101  
6102  void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
6103  {
6104      BdrvOpBlocker *blocker, *next;
6105      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
6106      QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
6107          if (blocker->reason == reason) {
6108              QLIST_REMOVE(blocker, list);
6109              g_free(blocker);
6110          }
6111      }
6112  }
6113  
6114  void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
6115  {
6116      int i;
6117      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
6118          bdrv_op_block(bs, i, reason);
6119      }
6120  }
6121  
6122  void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
6123  {
6124      int i;
6125      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
6126          bdrv_op_unblock(bs, i, reason);
6127      }
6128  }
6129  
6130  bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
6131  {
6132      int i;
6133  
6134      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
6135          if (!QLIST_EMPTY(&bs->op_blockers[i])) {
6136              return false;
6137          }
6138      }
6139      return true;
6140  }
6141  
6142  void bdrv_img_create(const char *filename, const char *fmt,
6143                       const char *base_filename, const char *base_fmt,
6144                       char *options, uint64_t img_size, int flags, bool quiet,
6145                       Error **errp)
6146  {
6147      QemuOptsList *create_opts = NULL;
6148      QemuOpts *opts = NULL;
6149      const char *backing_fmt, *backing_file;
6150      int64_t size;
6151      BlockDriver *drv, *proto_drv;
6152      Error *local_err = NULL;
6153      int ret = 0;
6154  
6155      /* Find driver and parse its options */
6156      drv = bdrv_find_format(fmt);
6157      if (!drv) {
6158          error_setg(errp, "Unknown file format '%s'", fmt);
6159          return;
6160      }
6161  
6162      proto_drv = bdrv_find_protocol(filename, true, errp);
6163      if (!proto_drv) {
6164          return;
6165      }
6166  
6167      if (!drv->create_opts) {
6168          error_setg(errp, "Format driver '%s' does not support image creation",
6169                     drv->format_name);
6170          return;
6171      }
6172  
6173      if (!proto_drv->create_opts) {
6174          error_setg(errp, "Protocol driver '%s' does not support image creation",
6175                     proto_drv->format_name);
6176          return;
6177      }
6178  
6179      /* Create parameter list */
6180      create_opts = qemu_opts_append(create_opts, drv->create_opts);
6181      create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
6182  
6183      opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
6184  
6185      /* Parse -o options */
6186      if (options) {
6187          if (!qemu_opts_do_parse(opts, options, NULL, errp)) {
6188              goto out;
6189          }
6190      }
6191  
6192      if (!qemu_opt_get(opts, BLOCK_OPT_SIZE)) {
6193          qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
6194      } else if (img_size != UINT64_C(-1)) {
6195          error_setg(errp, "The image size must be specified only once");
6196          goto out;
6197      }
6198  
6199      if (base_filename) {
6200          if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename,
6201                            NULL)) {
6202              error_setg(errp, "Backing file not supported for file format '%s'",
6203                         fmt);
6204              goto out;
6205          }
6206      }
6207  
6208      if (base_fmt) {
6209          if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) {
6210              error_setg(errp, "Backing file format not supported for file "
6211                               "format '%s'", fmt);
6212              goto out;
6213          }
6214      }
6215  
6216      backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
6217      if (backing_file) {
6218          if (!strcmp(filename, backing_file)) {
6219              error_setg(errp, "Error: Trying to create an image with the "
6220                               "same filename as the backing file");
6221              goto out;
6222          }
6223          if (backing_file[0] == '\0') {
6224              error_setg(errp, "Expected backing file name, got empty string");
6225              goto out;
6226          }
6227      }
6228  
6229      backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
6230  
6231      /* The size for the image must always be specified, unless we have a backing
6232       * file and we have not been forbidden from opening it. */
6233      size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size);
6234      if (backing_file && !(flags & BDRV_O_NO_BACKING)) {
6235          BlockDriverState *bs;
6236          char *full_backing;
6237          int back_flags;
6238          QDict *backing_options = NULL;
6239  
6240          full_backing =
6241              bdrv_get_full_backing_filename_from_filename(filename, backing_file,
6242                                                           &local_err);
6243          if (local_err) {
6244              goto out;
6245          }
6246          assert(full_backing);
6247  
6248          /* backing files always opened read-only */
6249          back_flags = flags;
6250          back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
6251  
6252          backing_options = qdict_new();
6253          if (backing_fmt) {
6254              qdict_put_str(backing_options, "driver", backing_fmt);
6255          }
6256          qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true);
6257  
6258          bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
6259                         &local_err);
6260          g_free(full_backing);
6261          if (!bs) {
6262              error_append_hint(&local_err, "Could not open backing image.\n");
6263              goto out;
6264          } else {
6265              if (!backing_fmt) {
6266                  warn_report("Deprecated use of backing file without explicit "
6267                              "backing format (detected format of %s)",
6268                              bs->drv->format_name);
6269                  if (bs->drv != &bdrv_raw) {
6270                      /*
6271                       * A probe of raw deserves the most attention:
6272                       * leaving the backing format out of the image
6273                       * will ensure bs->probed is set (ensuring we
6274                       * don't accidentally commit into the backing
6275                       * file), and allow more spots to warn the users
6276                       * to fix their toolchain when opening this image
6277                       * later.  For other images, we can safely record
6278                       * the format that we probed.
6279                       */
6280                      backing_fmt = bs->drv->format_name;
6281                      qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, backing_fmt,
6282                                   NULL);
6283                  }
6284              }
6285              if (size == -1) {
6286                  /* Opened BS, have no size */
6287                  size = bdrv_getlength(bs);
6288                  if (size < 0) {
6289                      error_setg_errno(errp, -size, "Could not get size of '%s'",
6290                                       backing_file);
6291                      bdrv_unref(bs);
6292                      goto out;
6293                  }
6294                  qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
6295              }
6296              bdrv_unref(bs);
6297          }
6298          /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */
6299      } else if (backing_file && !backing_fmt) {
6300          warn_report("Deprecated use of unopened backing file without "
6301                      "explicit backing format, use of this image requires "
6302                      "potentially unsafe format probing");
6303      }
6304  
6305      if (size == -1) {
6306          error_setg(errp, "Image creation needs a size parameter");
6307          goto out;
6308      }
6309  
6310      if (!quiet) {
6311          printf("Formatting '%s', fmt=%s ", filename, fmt);
6312          qemu_opts_print(opts, " ");
6313          puts("");
6314          fflush(stdout);
6315      }
6316  
6317      ret = bdrv_create(drv, filename, opts, &local_err);
6318  
6319      if (ret == -EFBIG) {
6320          /* This is generally a better message than whatever the driver would
6321           * deliver (especially because of the cluster_size_hint), since that
6322           * is most probably not much different from "image too large". */
6323          const char *cluster_size_hint = "";
6324          if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
6325              cluster_size_hint = " (try using a larger cluster size)";
6326          }
6327          error_setg(errp, "The image size is too large for file format '%s'"
6328                     "%s", fmt, cluster_size_hint);
6329          error_free(local_err);
6330          local_err = NULL;
6331      }
6332  
6333  out:
6334      qemu_opts_del(opts);
6335      qemu_opts_free(create_opts);
6336      error_propagate(errp, local_err);
6337  }
6338  
6339  AioContext *bdrv_get_aio_context(BlockDriverState *bs)
6340  {
6341      return bs ? bs->aio_context : qemu_get_aio_context();
6342  }
6343  
6344  AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs)
6345  {
6346      Coroutine *self = qemu_coroutine_self();
6347      AioContext *old_ctx = qemu_coroutine_get_aio_context(self);
6348      AioContext *new_ctx;
6349  
6350      /*
6351       * Increase bs->in_flight to ensure that this operation is completed before
6352       * moving the node to a different AioContext. Read new_ctx only afterwards.
6353       */
6354      bdrv_inc_in_flight(bs);
6355  
6356      new_ctx = bdrv_get_aio_context(bs);
6357      aio_co_reschedule_self(new_ctx);
6358      return old_ctx;
6359  }
6360  
6361  void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
6362  {
6363      aio_co_reschedule_self(old_ctx);
6364      bdrv_dec_in_flight(bs);
6365  }
6366  
6367  void coroutine_fn bdrv_co_lock(BlockDriverState *bs)
6368  {
6369      AioContext *ctx = bdrv_get_aio_context(bs);
6370  
6371      /* In the main thread, bs->aio_context won't change concurrently */
6372      assert(qemu_get_current_aio_context() == qemu_get_aio_context());
6373  
6374      /*
6375       * We're in coroutine context, so we already hold the lock of the main
6376       * loop AioContext. Don't lock it twice to avoid deadlocks.
6377       */
6378      assert(qemu_in_coroutine());
6379      if (ctx != qemu_get_aio_context()) {
6380          aio_context_acquire(ctx);
6381      }
6382  }
6383  
6384  void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
6385  {
6386      AioContext *ctx = bdrv_get_aio_context(bs);
6387  
6388      assert(qemu_in_coroutine());
6389      if (ctx != qemu_get_aio_context()) {
6390          aio_context_release(ctx);
6391      }
6392  }
6393  
6394  void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
6395  {
6396      aio_co_enter(bdrv_get_aio_context(bs), co);
6397  }
6398  
6399  static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
6400  {
6401      QLIST_REMOVE(ban, list);
6402      g_free(ban);
6403  }
6404  
6405  static void bdrv_detach_aio_context(BlockDriverState *bs)
6406  {
6407      BdrvAioNotifier *baf, *baf_tmp;
6408  
6409      assert(!bs->walking_aio_notifiers);
6410      bs->walking_aio_notifiers = true;
6411      QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
6412          if (baf->deleted) {
6413              bdrv_do_remove_aio_context_notifier(baf);
6414          } else {
6415              baf->detach_aio_context(baf->opaque);
6416          }
6417      }
6418      /* Never mind iterating again to check for ->deleted.  bdrv_close() will
6419       * remove remaining aio notifiers if we aren't called again.
6420       */
6421      bs->walking_aio_notifiers = false;
6422  
6423      if (bs->drv && bs->drv->bdrv_detach_aio_context) {
6424          bs->drv->bdrv_detach_aio_context(bs);
6425      }
6426  
6427      if (bs->quiesce_counter) {
6428          aio_enable_external(bs->aio_context);
6429      }
6430      bs->aio_context = NULL;
6431  }
6432  
6433  static void bdrv_attach_aio_context(BlockDriverState *bs,
6434                                      AioContext *new_context)
6435  {
6436      BdrvAioNotifier *ban, *ban_tmp;
6437  
6438      if (bs->quiesce_counter) {
6439          aio_disable_external(new_context);
6440      }
6441  
6442      bs->aio_context = new_context;
6443  
6444      if (bs->drv && bs->drv->bdrv_attach_aio_context) {
6445          bs->drv->bdrv_attach_aio_context(bs, new_context);
6446      }
6447  
6448      assert(!bs->walking_aio_notifiers);
6449      bs->walking_aio_notifiers = true;
6450      QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) {
6451          if (ban->deleted) {
6452              bdrv_do_remove_aio_context_notifier(ban);
6453          } else {
6454              ban->attached_aio_context(new_context, ban->opaque);
6455          }
6456      }
6457      bs->walking_aio_notifiers = false;
6458  }
6459  
6460  /*
6461   * Changes the AioContext used for fd handlers, timers, and BHs by this
6462   * BlockDriverState and all its children and parents.
6463   *
6464   * Must be called from the main AioContext.
6465   *
6466   * The caller must own the AioContext lock for the old AioContext of bs, but it
6467   * must not own the AioContext lock for new_context (unless new_context is the
6468   * same as the current context of bs).
6469   *
6470   * @ignore will accumulate all visited BdrvChild object. The caller is
6471   * responsible for freeing the list afterwards.
6472   */
6473  void bdrv_set_aio_context_ignore(BlockDriverState *bs,
6474                                   AioContext *new_context, GSList **ignore)
6475  {
6476      AioContext *old_context = bdrv_get_aio_context(bs);
6477      GSList *children_to_process = NULL;
6478      GSList *parents_to_process = NULL;
6479      GSList *entry;
6480      BdrvChild *child, *parent;
6481  
6482      g_assert(qemu_get_current_aio_context() == qemu_get_aio_context());
6483  
6484      if (old_context == new_context) {
6485          return;
6486      }
6487  
6488      bdrv_drained_begin(bs);
6489  
6490      QLIST_FOREACH(child, &bs->children, next) {
6491          if (g_slist_find(*ignore, child)) {
6492              continue;
6493          }
6494          *ignore = g_slist_prepend(*ignore, child);
6495          children_to_process = g_slist_prepend(children_to_process, child);
6496      }
6497  
6498      QLIST_FOREACH(parent, &bs->parents, next_parent) {
6499          if (g_slist_find(*ignore, parent)) {
6500              continue;
6501          }
6502          *ignore = g_slist_prepend(*ignore, parent);
6503          parents_to_process = g_slist_prepend(parents_to_process, parent);
6504      }
6505  
6506      for (entry = children_to_process;
6507           entry != NULL;
6508           entry = g_slist_next(entry)) {
6509          child = entry->data;
6510          bdrv_set_aio_context_ignore(child->bs, new_context, ignore);
6511      }
6512      g_slist_free(children_to_process);
6513  
6514      for (entry = parents_to_process;
6515           entry != NULL;
6516           entry = g_slist_next(entry)) {
6517          parent = entry->data;
6518          assert(parent->klass->set_aio_ctx);
6519          parent->klass->set_aio_ctx(parent, new_context, ignore);
6520      }
6521      g_slist_free(parents_to_process);
6522  
6523      bdrv_detach_aio_context(bs);
6524  
6525      /* Acquire the new context, if necessary */
6526      if (qemu_get_aio_context() != new_context) {
6527          aio_context_acquire(new_context);
6528      }
6529  
6530      bdrv_attach_aio_context(bs, new_context);
6531  
6532      /*
6533       * If this function was recursively called from
6534       * bdrv_set_aio_context_ignore(), there may be nodes in the
6535       * subtree that have not yet been moved to the new AioContext.
6536       * Release the old one so bdrv_drained_end() can poll them.
6537       */
6538      if (qemu_get_aio_context() != old_context) {
6539          aio_context_release(old_context);
6540      }
6541  
6542      bdrv_drained_end(bs);
6543  
6544      if (qemu_get_aio_context() != old_context) {
6545          aio_context_acquire(old_context);
6546      }
6547      if (qemu_get_aio_context() != new_context) {
6548          aio_context_release(new_context);
6549      }
6550  }
6551  
6552  static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx,
6553                                              GSList **ignore, Error **errp)
6554  {
6555      if (g_slist_find(*ignore, c)) {
6556          return true;
6557      }
6558      *ignore = g_slist_prepend(*ignore, c);
6559  
6560      /*
6561       * A BdrvChildClass that doesn't handle AioContext changes cannot
6562       * tolerate any AioContext changes
6563       */
6564      if (!c->klass->can_set_aio_ctx) {
6565          char *user = bdrv_child_user_desc(c);
6566          error_setg(errp, "Changing iothreads is not supported by %s", user);
6567          g_free(user);
6568          return false;
6569      }
6570      if (!c->klass->can_set_aio_ctx(c, ctx, ignore, errp)) {
6571          assert(!errp || *errp);
6572          return false;
6573      }
6574      return true;
6575  }
6576  
6577  bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
6578                                      GSList **ignore, Error **errp)
6579  {
6580      if (g_slist_find(*ignore, c)) {
6581          return true;
6582      }
6583      *ignore = g_slist_prepend(*ignore, c);
6584      return bdrv_can_set_aio_context(c->bs, ctx, ignore, errp);
6585  }
6586  
6587  /* @ignore will accumulate all visited BdrvChild object. The caller is
6588   * responsible for freeing the list afterwards. */
6589  bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
6590                                GSList **ignore, Error **errp)
6591  {
6592      BdrvChild *c;
6593  
6594      if (bdrv_get_aio_context(bs) == ctx) {
6595          return true;
6596      }
6597  
6598      QLIST_FOREACH(c, &bs->parents, next_parent) {
6599          if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) {
6600              return false;
6601          }
6602      }
6603      QLIST_FOREACH(c, &bs->children, next) {
6604          if (!bdrv_child_can_set_aio_context(c, ctx, ignore, errp)) {
6605              return false;
6606          }
6607      }
6608  
6609      return true;
6610  }
6611  
6612  int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
6613                                     BdrvChild *ignore_child, Error **errp)
6614  {
6615      GSList *ignore;
6616      bool ret;
6617  
6618      ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL;
6619      ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp);
6620      g_slist_free(ignore);
6621  
6622      if (!ret) {
6623          return -EPERM;
6624      }
6625  
6626      ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL;
6627      bdrv_set_aio_context_ignore(bs, ctx, &ignore);
6628      g_slist_free(ignore);
6629  
6630      return 0;
6631  }
6632  
6633  int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
6634                               Error **errp)
6635  {
6636      return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp);
6637  }
6638  
6639  void bdrv_add_aio_context_notifier(BlockDriverState *bs,
6640          void (*attached_aio_context)(AioContext *new_context, void *opaque),
6641          void (*detach_aio_context)(void *opaque), void *opaque)
6642  {
6643      BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
6644      *ban = (BdrvAioNotifier){
6645          .attached_aio_context = attached_aio_context,
6646          .detach_aio_context   = detach_aio_context,
6647          .opaque               = opaque
6648      };
6649  
6650      QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
6651  }
6652  
6653  void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
6654                                        void (*attached_aio_context)(AioContext *,
6655                                                                     void *),
6656                                        void (*detach_aio_context)(void *),
6657                                        void *opaque)
6658  {
6659      BdrvAioNotifier *ban, *ban_next;
6660  
6661      QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
6662          if (ban->attached_aio_context == attached_aio_context &&
6663              ban->detach_aio_context   == detach_aio_context   &&
6664              ban->opaque               == opaque               &&
6665              ban->deleted              == false)
6666          {
6667              if (bs->walking_aio_notifiers) {
6668                  ban->deleted = true;
6669              } else {
6670                  bdrv_do_remove_aio_context_notifier(ban);
6671              }
6672              return;
6673          }
6674      }
6675  
6676      abort();
6677  }
6678  
6679  int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
6680                         BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
6681                         bool force,
6682                         Error **errp)
6683  {
6684      if (!bs->drv) {
6685          error_setg(errp, "Node is ejected");
6686          return -ENOMEDIUM;
6687      }
6688      if (!bs->drv->bdrv_amend_options) {
6689          error_setg(errp, "Block driver '%s' does not support option amendment",
6690                     bs->drv->format_name);
6691          return -ENOTSUP;
6692      }
6693      return bs->drv->bdrv_amend_options(bs, opts, status_cb,
6694                                         cb_opaque, force, errp);
6695  }
6696  
6697  /*
6698   * This function checks whether the given @to_replace is allowed to be
6699   * replaced by a node that always shows the same data as @bs.  This is
6700   * used for example to verify whether the mirror job can replace
6701   * @to_replace by the target mirrored from @bs.
6702   * To be replaceable, @bs and @to_replace may either be guaranteed to
6703   * always show the same data (because they are only connected through
6704   * filters), or some driver may allow replacing one of its children
6705   * because it can guarantee that this child's data is not visible at
6706   * all (for example, for dissenting quorum children that have no other
6707   * parents).
6708   */
6709  bool bdrv_recurse_can_replace(BlockDriverState *bs,
6710                                BlockDriverState *to_replace)
6711  {
6712      BlockDriverState *filtered;
6713  
6714      if (!bs || !bs->drv) {
6715          return false;
6716      }
6717  
6718      if (bs == to_replace) {
6719          return true;
6720      }
6721  
6722      /* See what the driver can do */
6723      if (bs->drv->bdrv_recurse_can_replace) {
6724          return bs->drv->bdrv_recurse_can_replace(bs, to_replace);
6725      }
6726  
6727      /* For filters without an own implementation, we can recurse on our own */
6728      filtered = bdrv_filter_bs(bs);
6729      if (filtered) {
6730          return bdrv_recurse_can_replace(filtered, to_replace);
6731      }
6732  
6733      /* Safe default */
6734      return false;
6735  }
6736  
6737  /*
6738   * Check whether the given @node_name can be replaced by a node that
6739   * has the same data as @parent_bs.  If so, return @node_name's BDS;
6740   * NULL otherwise.
6741   *
6742   * @node_name must be a (recursive) *child of @parent_bs (or this
6743   * function will return NULL).
6744   *
6745   * The result (whether the node can be replaced or not) is only valid
6746   * for as long as no graph or permission changes occur.
6747   */
6748  BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
6749                                          const char *node_name, Error **errp)
6750  {
6751      BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
6752      AioContext *aio_context;
6753  
6754      if (!to_replace_bs) {
6755          error_setg(errp, "Node name '%s' not found", node_name);
6756          return NULL;
6757      }
6758  
6759      aio_context = bdrv_get_aio_context(to_replace_bs);
6760      aio_context_acquire(aio_context);
6761  
6762      if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
6763          to_replace_bs = NULL;
6764          goto out;
6765      }
6766  
6767      /* We don't want arbitrary node of the BDS chain to be replaced only the top
6768       * most non filter in order to prevent data corruption.
6769       * Another benefit is that this tests exclude backing files which are
6770       * blocked by the backing blockers.
6771       */
6772      if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) {
6773          error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', "
6774                     "because it cannot be guaranteed that doing so would not "
6775                     "lead to an abrupt change of visible data",
6776                     node_name, parent_bs->node_name);
6777          to_replace_bs = NULL;
6778          goto out;
6779      }
6780  
6781  out:
6782      aio_context_release(aio_context);
6783      return to_replace_bs;
6784  }
6785  
6786  /**
6787   * Iterates through the list of runtime option keys that are said to
6788   * be "strong" for a BDS.  An option is called "strong" if it changes
6789   * a BDS's data.  For example, the null block driver's "size" and
6790   * "read-zeroes" options are strong, but its "latency-ns" option is
6791   * not.
6792   *
6793   * If a key returned by this function ends with a dot, all options
6794   * starting with that prefix are strong.
6795   */
6796  static const char *const *strong_options(BlockDriverState *bs,
6797                                           const char *const *curopt)
6798  {
6799      static const char *const global_options[] = {
6800          "driver", "filename", NULL
6801      };
6802  
6803      if (!curopt) {
6804          return &global_options[0];
6805      }
6806  
6807      curopt++;
6808      if (curopt == &global_options[ARRAY_SIZE(global_options) - 1] && bs->drv) {
6809          curopt = bs->drv->strong_runtime_opts;
6810      }
6811  
6812      return (curopt && *curopt) ? curopt : NULL;
6813  }
6814  
6815  /**
6816   * Copies all strong runtime options from bs->options to the given
6817   * QDict.  The set of strong option keys is determined by invoking
6818   * strong_options().
6819   *
6820   * Returns true iff any strong option was present in bs->options (and
6821   * thus copied to the target QDict) with the exception of "filename"
6822   * and "driver".  The caller is expected to use this value to decide
6823   * whether the existence of strong options prevents the generation of
6824   * a plain filename.
6825   */
6826  static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs)
6827  {
6828      bool found_any = false;
6829      const char *const *option_name = NULL;
6830  
6831      if (!bs->drv) {
6832          return false;
6833      }
6834  
6835      while ((option_name = strong_options(bs, option_name))) {
6836          bool option_given = false;
6837  
6838          assert(strlen(*option_name) > 0);
6839          if ((*option_name)[strlen(*option_name) - 1] != '.') {
6840              QObject *entry = qdict_get(bs->options, *option_name);
6841              if (!entry) {
6842                  continue;
6843              }
6844  
6845              qdict_put_obj(d, *option_name, qobject_ref(entry));
6846              option_given = true;
6847          } else {
6848              const QDictEntry *entry;
6849              for (entry = qdict_first(bs->options); entry;
6850                   entry = qdict_next(bs->options, entry))
6851              {
6852                  if (strstart(qdict_entry_key(entry), *option_name, NULL)) {
6853                      qdict_put_obj(d, qdict_entry_key(entry),
6854                                    qobject_ref(qdict_entry_value(entry)));
6855                      option_given = true;
6856                  }
6857              }
6858          }
6859  
6860          /* While "driver" and "filename" need to be included in a JSON filename,
6861           * their existence does not prohibit generation of a plain filename. */
6862          if (!found_any && option_given &&
6863              strcmp(*option_name, "driver") && strcmp(*option_name, "filename"))
6864          {
6865              found_any = true;
6866          }
6867      }
6868  
6869      if (!qdict_haskey(d, "driver")) {
6870          /* Drivers created with bdrv_new_open_driver() may not have a
6871           * @driver option.  Add it here. */
6872          qdict_put_str(d, "driver", bs->drv->format_name);
6873      }
6874  
6875      return found_any;
6876  }
6877  
6878  /* Note: This function may return false positives; it may return true
6879   * even if opening the backing file specified by bs's image header
6880   * would result in exactly bs->backing. */
6881  bool bdrv_backing_overridden(BlockDriverState *bs)
6882  {
6883      if (bs->backing) {
6884          return strcmp(bs->auto_backing_file,
6885                        bs->backing->bs->filename);
6886      } else {
6887          /* No backing BDS, so if the image header reports any backing
6888           * file, it must have been suppressed */
6889          return bs->auto_backing_file[0] != '\0';
6890      }
6891  }
6892  
6893  /* Updates the following BDS fields:
6894   *  - exact_filename: A filename which may be used for opening a block device
6895   *                    which (mostly) equals the given BDS (even without any
6896   *                    other options; so reading and writing must return the same
6897   *                    results, but caching etc. may be different)
6898   *  - full_open_options: Options which, when given when opening a block device
6899   *                       (without a filename), result in a BDS (mostly)
6900   *                       equalling the given one
6901   *  - filename: If exact_filename is set, it is copied here. Otherwise,
6902   *              full_open_options is converted to a JSON object, prefixed with
6903   *              "json:" (for use through the JSON pseudo protocol) and put here.
6904   */
6905  void bdrv_refresh_filename(BlockDriverState *bs)
6906  {
6907      BlockDriver *drv = bs->drv;
6908      BdrvChild *child;
6909      BlockDriverState *primary_child_bs;
6910      QDict *opts;
6911      bool backing_overridden;
6912      bool generate_json_filename; /* Whether our default implementation should
6913                                      fill exact_filename (false) or not (true) */
6914  
6915      if (!drv) {
6916          return;
6917      }
6918  
6919      /* This BDS's file name may depend on any of its children's file names, so
6920       * refresh those first */
6921      QLIST_FOREACH(child, &bs->children, next) {
6922          bdrv_refresh_filename(child->bs);
6923      }
6924  
6925      if (bs->implicit) {
6926          /* For implicit nodes, just copy everything from the single child */
6927          child = QLIST_FIRST(&bs->children);
6928          assert(QLIST_NEXT(child, next) == NULL);
6929  
6930          pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
6931                  child->bs->exact_filename);
6932          pstrcpy(bs->filename, sizeof(bs->filename), child->bs->filename);
6933  
6934          qobject_unref(bs->full_open_options);
6935          bs->full_open_options = qobject_ref(child->bs->full_open_options);
6936  
6937          return;
6938      }
6939  
6940      backing_overridden = bdrv_backing_overridden(bs);
6941  
6942      if (bs->open_flags & BDRV_O_NO_IO) {
6943          /* Without I/O, the backing file does not change anything.
6944           * Therefore, in such a case (primarily qemu-img), we can
6945           * pretend the backing file has not been overridden even if
6946           * it technically has been. */
6947          backing_overridden = false;
6948      }
6949  
6950      /* Gather the options QDict */
6951      opts = qdict_new();
6952      generate_json_filename = append_strong_runtime_options(opts, bs);
6953      generate_json_filename |= backing_overridden;
6954  
6955      if (drv->bdrv_gather_child_options) {
6956          /* Some block drivers may not want to present all of their children's
6957           * options, or name them differently from BdrvChild.name */
6958          drv->bdrv_gather_child_options(bs, opts, backing_overridden);
6959      } else {
6960          QLIST_FOREACH(child, &bs->children, next) {
6961              if (child == bs->backing && !backing_overridden) {
6962                  /* We can skip the backing BDS if it has not been overridden */
6963                  continue;
6964              }
6965  
6966              qdict_put(opts, child->name,
6967                        qobject_ref(child->bs->full_open_options));
6968          }
6969  
6970          if (backing_overridden && !bs->backing) {
6971              /* Force no backing file */
6972              qdict_put_null(opts, "backing");
6973          }
6974      }
6975  
6976      qobject_unref(bs->full_open_options);
6977      bs->full_open_options = opts;
6978  
6979      primary_child_bs = bdrv_primary_bs(bs);
6980  
6981      if (drv->bdrv_refresh_filename) {
6982          /* Obsolete information is of no use here, so drop the old file name
6983           * information before refreshing it */
6984          bs->exact_filename[0] = '\0';
6985  
6986          drv->bdrv_refresh_filename(bs);
6987      } else if (primary_child_bs) {
6988          /*
6989           * Try to reconstruct valid information from the underlying
6990           * file -- this only works for format nodes (filter nodes
6991           * cannot be probed and as such must be selected by the user
6992           * either through an options dict, or through a special
6993           * filename which the filter driver must construct in its
6994           * .bdrv_refresh_filename() implementation).
6995           */
6996  
6997          bs->exact_filename[0] = '\0';
6998  
6999          /*
7000           * We can use the underlying file's filename if:
7001           * - it has a filename,
7002           * - the current BDS is not a filter,
7003           * - the file is a protocol BDS, and
7004           * - opening that file (as this BDS's format) will automatically create
7005           *   the BDS tree we have right now, that is:
7006           *   - the user did not significantly change this BDS's behavior with
7007           *     some explicit (strong) options
7008           *   - no non-file child of this BDS has been overridden by the user
7009           *   Both of these conditions are represented by generate_json_filename.
7010           */
7011          if (primary_child_bs->exact_filename[0] &&
7012              primary_child_bs->drv->bdrv_file_open &&
7013              !drv->is_filter && !generate_json_filename)
7014          {
7015              strcpy(bs->exact_filename, primary_child_bs->exact_filename);
7016          }
7017      }
7018  
7019      if (bs->exact_filename[0]) {
7020          pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
7021      } else {
7022          GString *json = qobject_to_json(QOBJECT(bs->full_open_options));
7023          if (snprintf(bs->filename, sizeof(bs->filename), "json:%s",
7024                       json->str) >= sizeof(bs->filename)) {
7025              /* Give user a hint if we truncated things. */
7026              strcpy(bs->filename + sizeof(bs->filename) - 4, "...");
7027          }
7028          g_string_free(json, true);
7029      }
7030  }
7031  
7032  char *bdrv_dirname(BlockDriverState *bs, Error **errp)
7033  {
7034      BlockDriver *drv = bs->drv;
7035      BlockDriverState *child_bs;
7036  
7037      if (!drv) {
7038          error_setg(errp, "Node '%s' is ejected", bs->node_name);
7039          return NULL;
7040      }
7041  
7042      if (drv->bdrv_dirname) {
7043          return drv->bdrv_dirname(bs, errp);
7044      }
7045  
7046      child_bs = bdrv_primary_bs(bs);
7047      if (child_bs) {
7048          return bdrv_dirname(child_bs, errp);
7049      }
7050  
7051      bdrv_refresh_filename(bs);
7052      if (bs->exact_filename[0] != '\0') {
7053          return path_combine(bs->exact_filename, "");
7054      }
7055  
7056      error_setg(errp, "Cannot generate a base directory for %s nodes",
7057                 drv->format_name);
7058      return NULL;
7059  }
7060  
7061  /*
7062   * Hot add/remove a BDS's child. So the user can take a child offline when
7063   * it is broken and take a new child online
7064   */
7065  void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
7066                      Error **errp)
7067  {
7068  
7069      if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
7070          error_setg(errp, "The node %s does not support adding a child",
7071                     bdrv_get_device_or_node_name(parent_bs));
7072          return;
7073      }
7074  
7075      if (!QLIST_EMPTY(&child_bs->parents)) {
7076          error_setg(errp, "The node %s already has a parent",
7077                     child_bs->node_name);
7078          return;
7079      }
7080  
7081      parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp);
7082  }
7083  
7084  void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
7085  {
7086      BdrvChild *tmp;
7087  
7088      if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
7089          error_setg(errp, "The node %s does not support removing a child",
7090                     bdrv_get_device_or_node_name(parent_bs));
7091          return;
7092      }
7093  
7094      QLIST_FOREACH(tmp, &parent_bs->children, next) {
7095          if (tmp == child) {
7096              break;
7097          }
7098      }
7099  
7100      if (!tmp) {
7101          error_setg(errp, "The node %s does not have a child named %s",
7102                     bdrv_get_device_or_node_name(parent_bs),
7103                     bdrv_get_device_or_node_name(child->bs));
7104          return;
7105      }
7106  
7107      parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
7108  }
7109  
7110  int bdrv_make_empty(BdrvChild *c, Error **errp)
7111  {
7112      BlockDriver *drv = c->bs->drv;
7113      int ret;
7114  
7115      assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED));
7116  
7117      if (!drv->bdrv_make_empty) {
7118          error_setg(errp, "%s does not support emptying nodes",
7119                     drv->format_name);
7120          return -ENOTSUP;
7121      }
7122  
7123      ret = drv->bdrv_make_empty(c->bs);
7124      if (ret < 0) {
7125          error_setg_errno(errp, -ret, "Failed to empty %s",
7126                           c->bs->filename);
7127          return ret;
7128      }
7129  
7130      return 0;
7131  }
7132  
7133  /*
7134   * Return the child that @bs acts as an overlay for, and from which data may be
7135   * copied in COW or COR operations.  Usually this is the backing file.
7136   */
7137  BdrvChild *bdrv_cow_child(BlockDriverState *bs)
7138  {
7139      if (!bs || !bs->drv) {
7140          return NULL;
7141      }
7142  
7143      if (bs->drv->is_filter) {
7144          return NULL;
7145      }
7146  
7147      if (!bs->backing) {
7148          return NULL;
7149      }
7150  
7151      assert(bs->backing->role & BDRV_CHILD_COW);
7152      return bs->backing;
7153  }
7154  
7155  /*
7156   * If @bs acts as a filter for exactly one of its children, return
7157   * that child.
7158   */
7159  BdrvChild *bdrv_filter_child(BlockDriverState *bs)
7160  {
7161      BdrvChild *c;
7162  
7163      if (!bs || !bs->drv) {
7164          return NULL;
7165      }
7166  
7167      if (!bs->drv->is_filter) {
7168          return NULL;
7169      }
7170  
7171      /* Only one of @backing or @file may be used */
7172      assert(!(bs->backing && bs->file));
7173  
7174      c = bs->backing ?: bs->file;
7175      if (!c) {
7176          return NULL;
7177      }
7178  
7179      assert(c->role & BDRV_CHILD_FILTERED);
7180      return c;
7181  }
7182  
7183  /*
7184   * Return either the result of bdrv_cow_child() or bdrv_filter_child(),
7185   * whichever is non-NULL.
7186   *
7187   * Return NULL if both are NULL.
7188   */
7189  BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs)
7190  {
7191      BdrvChild *cow_child = bdrv_cow_child(bs);
7192      BdrvChild *filter_child = bdrv_filter_child(bs);
7193  
7194      /* Filter nodes cannot have COW backing files */
7195      assert(!(cow_child && filter_child));
7196  
7197      return cow_child ?: filter_child;
7198  }
7199  
7200  /*
7201   * Return the primary child of this node: For filters, that is the
7202   * filtered child.  For other nodes, that is usually the child storing
7203   * metadata.
7204   * (A generally more helpful description is that this is (usually) the
7205   * child that has the same filename as @bs.)
7206   *
7207   * Drivers do not necessarily have a primary child; for example quorum
7208   * does not.
7209   */
7210  BdrvChild *bdrv_primary_child(BlockDriverState *bs)
7211  {
7212      BdrvChild *c, *found = NULL;
7213  
7214      QLIST_FOREACH(c, &bs->children, next) {
7215          if (c->role & BDRV_CHILD_PRIMARY) {
7216              assert(!found);
7217              found = c;
7218          }
7219      }
7220  
7221      return found;
7222  }
7223  
7224  static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs,
7225                                                bool stop_on_explicit_filter)
7226  {
7227      BdrvChild *c;
7228  
7229      if (!bs) {
7230          return NULL;
7231      }
7232  
7233      while (!(stop_on_explicit_filter && !bs->implicit)) {
7234          c = bdrv_filter_child(bs);
7235          if (!c) {
7236              /*
7237               * A filter that is embedded in a working block graph must
7238               * have a child.  Assert this here so this function does
7239               * not return a filter node that is not expected by the
7240               * caller.
7241               */
7242              assert(!bs->drv || !bs->drv->is_filter);
7243              break;
7244          }
7245          bs = c->bs;
7246      }
7247      /*
7248       * Note that this treats nodes with bs->drv == NULL as not being
7249       * filters (bs->drv == NULL should be replaced by something else
7250       * anyway).
7251       * The advantage of this behavior is that this function will thus
7252       * always return a non-NULL value (given a non-NULL @bs).
7253       */
7254  
7255      return bs;
7256  }
7257  
7258  /*
7259   * Return the first BDS that has not been added implicitly or that
7260   * does not have a filtered child down the chain starting from @bs
7261   * (including @bs itself).
7262   */
7263  BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs)
7264  {
7265      return bdrv_do_skip_filters(bs, true);
7266  }
7267  
7268  /*
7269   * Return the first BDS that does not have a filtered child down the
7270   * chain starting from @bs (including @bs itself).
7271   */
7272  BlockDriverState *bdrv_skip_filters(BlockDriverState *bs)
7273  {
7274      return bdrv_do_skip_filters(bs, false);
7275  }
7276  
7277  /*
7278   * For a backing chain, return the first non-filter backing image of
7279   * the first non-filter image.
7280   */
7281  BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs)
7282  {
7283      return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs)));
7284  }
7285