xref: /openbmc/qemu/block.c (revision 54aa3de72ea2aaa2e903e7e879a4f3dda515a00e)
1  /*
2   * QEMU System Emulator block driver
3   *
4   * Copyright (c) 2003 Fabrice Bellard
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a copy
7   * of this software and associated documentation files (the "Software"), to deal
8   * in the Software without restriction, including without limitation the rights
9   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10   * copies of the Software, and to permit persons to whom the Software is
11   * furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22   * THE SOFTWARE.
23   */
24  
25  #include "qemu/osdep.h"
26  #include "block/trace.h"
27  #include "block/block_int.h"
28  #include "block/blockjob.h"
29  #include "block/fuse.h"
30  #include "block/nbd.h"
31  #include "block/qdict.h"
32  #include "qemu/error-report.h"
33  #include "block/module_block.h"
34  #include "qemu/main-loop.h"
35  #include "qemu/module.h"
36  #include "qapi/error.h"
37  #include "qapi/qmp/qdict.h"
38  #include "qapi/qmp/qjson.h"
39  #include "qapi/qmp/qnull.h"
40  #include "qapi/qmp/qstring.h"
41  #include "qapi/qobject-output-visitor.h"
42  #include "qapi/qapi-visit-block-core.h"
43  #include "sysemu/block-backend.h"
44  #include "sysemu/sysemu.h"
45  #include "qemu/notify.h"
46  #include "qemu/option.h"
47  #include "qemu/coroutine.h"
48  #include "block/qapi.h"
49  #include "qemu/timer.h"
50  #include "qemu/cutils.h"
51  #include "qemu/id.h"
52  #include "block/coroutines.h"
53  
54  #ifdef CONFIG_BSD
55  #include <sys/ioctl.h>
56  #include <sys/queue.h>
57  #ifndef __DragonFly__
58  #include <sys/disk.h>
59  #endif
60  #endif
61  
62  #ifdef _WIN32
63  #include <windows.h>
64  #endif
65  
66  #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
67  
68  static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
69      QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
70  
71  static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
72      QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
73  
74  static QLIST_HEAD(, BlockDriver) bdrv_drivers =
75      QLIST_HEAD_INITIALIZER(bdrv_drivers);
76  
77  static BlockDriverState *bdrv_open_inherit(const char *filename,
78                                             const char *reference,
79                                             QDict *options, int flags,
80                                             BlockDriverState *parent,
81                                             const BdrvChildClass *child_class,
82                                             BdrvChildRole child_role,
83                                             Error **errp);
84  
85  /* If non-zero, use only whitelisted block drivers */
86  static int use_bdrv_whitelist;
87  
88  #ifdef _WIN32
89  static int is_windows_drive_prefix(const char *filename)
90  {
91      return (((filename[0] >= 'a' && filename[0] <= 'z') ||
92               (filename[0] >= 'A' && filename[0] <= 'Z')) &&
93              filename[1] == ':');
94  }
95  
96  int is_windows_drive(const char *filename)
97  {
98      if (is_windows_drive_prefix(filename) &&
99          filename[2] == '\0')
100          return 1;
101      if (strstart(filename, "\\\\.\\", NULL) ||
102          strstart(filename, "//./", NULL))
103          return 1;
104      return 0;
105  }
106  #endif
107  
108  size_t bdrv_opt_mem_align(BlockDriverState *bs)
109  {
110      if (!bs || !bs->drv) {
111          /* page size or 4k (hdd sector size) should be on the safe side */
112          return MAX(4096, qemu_real_host_page_size);
113      }
114  
115      return bs->bl.opt_mem_alignment;
116  }
117  
118  size_t bdrv_min_mem_align(BlockDriverState *bs)
119  {
120      if (!bs || !bs->drv) {
121          /* page size or 4k (hdd sector size) should be on the safe side */
122          return MAX(4096, qemu_real_host_page_size);
123      }
124  
125      return bs->bl.min_mem_alignment;
126  }
127  
128  /* check if the path starts with "<protocol>:" */
129  int path_has_protocol(const char *path)
130  {
131      const char *p;
132  
133  #ifdef _WIN32
134      if (is_windows_drive(path) ||
135          is_windows_drive_prefix(path)) {
136          return 0;
137      }
138      p = path + strcspn(path, ":/\\");
139  #else
140      p = path + strcspn(path, ":/");
141  #endif
142  
143      return *p == ':';
144  }
145  
146  int path_is_absolute(const char *path)
147  {
148  #ifdef _WIN32
149      /* specific case for names like: "\\.\d:" */
150      if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
151          return 1;
152      }
153      return (*path == '/' || *path == '\\');
154  #else
155      return (*path == '/');
156  #endif
157  }
158  
159  /* if filename is absolute, just return its duplicate. Otherwise, build a
160     path to it by considering it is relative to base_path. URL are
161     supported. */
162  char *path_combine(const char *base_path, const char *filename)
163  {
164      const char *protocol_stripped = NULL;
165      const char *p, *p1;
166      char *result;
167      int len;
168  
169      if (path_is_absolute(filename)) {
170          return g_strdup(filename);
171      }
172  
173      if (path_has_protocol(base_path)) {
174          protocol_stripped = strchr(base_path, ':');
175          if (protocol_stripped) {
176              protocol_stripped++;
177          }
178      }
179      p = protocol_stripped ?: base_path;
180  
181      p1 = strrchr(base_path, '/');
182  #ifdef _WIN32
183      {
184          const char *p2;
185          p2 = strrchr(base_path, '\\');
186          if (!p1 || p2 > p1) {
187              p1 = p2;
188          }
189      }
190  #endif
191      if (p1) {
192          p1++;
193      } else {
194          p1 = base_path;
195      }
196      if (p1 > p) {
197          p = p1;
198      }
199      len = p - base_path;
200  
201      result = g_malloc(len + strlen(filename) + 1);
202      memcpy(result, base_path, len);
203      strcpy(result + len, filename);
204  
205      return result;
206  }
207  
208  /*
209   * Helper function for bdrv_parse_filename() implementations to remove optional
210   * protocol prefixes (especially "file:") from a filename and for putting the
211   * stripped filename into the options QDict if there is such a prefix.
212   */
213  void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
214                                        QDict *options)
215  {
216      if (strstart(filename, prefix, &filename)) {
217          /* Stripping the explicit protocol prefix may result in a protocol
218           * prefix being (wrongly) detected (if the filename contains a colon) */
219          if (path_has_protocol(filename)) {
220              QString *fat_filename;
221  
222              /* This means there is some colon before the first slash; therefore,
223               * this cannot be an absolute path */
224              assert(!path_is_absolute(filename));
225  
226              /* And we can thus fix the protocol detection issue by prefixing it
227               * by "./" */
228              fat_filename = qstring_from_str("./");
229              qstring_append(fat_filename, filename);
230  
231              assert(!path_has_protocol(qstring_get_str(fat_filename)));
232  
233              qdict_put(options, "filename", fat_filename);
234          } else {
235              /* If no protocol prefix was detected, we can use the shortened
236               * filename as-is */
237              qdict_put_str(options, "filename", filename);
238          }
239      }
240  }
241  
242  
243  /* Returns whether the image file is opened as read-only. Note that this can
244   * return false and writing to the image file is still not possible because the
245   * image is inactivated. */
246  bool bdrv_is_read_only(BlockDriverState *bs)
247  {
248      return bs->read_only;
249  }
250  
251  int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
252                             bool ignore_allow_rdw, Error **errp)
253  {
254      /* Do not set read_only if copy_on_read is enabled */
255      if (bs->copy_on_read && read_only) {
256          error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
257                     bdrv_get_device_or_node_name(bs));
258          return -EINVAL;
259      }
260  
261      /* Do not clear read_only if it is prohibited */
262      if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) &&
263          !ignore_allow_rdw)
264      {
265          error_setg(errp, "Node '%s' is read only",
266                     bdrv_get_device_or_node_name(bs));
267          return -EPERM;
268      }
269  
270      return 0;
271  }
272  
273  /*
274   * Called by a driver that can only provide a read-only image.
275   *
276   * Returns 0 if the node is already read-only or it could switch the node to
277   * read-only because BDRV_O_AUTO_RDONLY is set.
278   *
279   * Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set
280   * or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg
281   * is not NULL, it is used as the error message for the Error object.
282   */
283  int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
284                                Error **errp)
285  {
286      int ret = 0;
287  
288      if (!(bs->open_flags & BDRV_O_RDWR)) {
289          return 0;
290      }
291      if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) {
292          goto fail;
293      }
294  
295      ret = bdrv_can_set_read_only(bs, true, false, NULL);
296      if (ret < 0) {
297          goto fail;
298      }
299  
300      bs->read_only = true;
301      bs->open_flags &= ~BDRV_O_RDWR;
302  
303      return 0;
304  
305  fail:
306      error_setg(errp, "%s", errmsg ?: "Image is read-only");
307      return -EACCES;
308  }
309  
310  /*
311   * If @backing is empty, this function returns NULL without setting
312   * @errp.  In all other cases, NULL will only be returned with @errp
313   * set.
314   *
315   * Therefore, a return value of NULL without @errp set means that
316   * there is no backing file; if @errp is set, there is one but its
317   * absolute filename cannot be generated.
318   */
319  char *bdrv_get_full_backing_filename_from_filename(const char *backed,
320                                                     const char *backing,
321                                                     Error **errp)
322  {
323      if (backing[0] == '\0') {
324          return NULL;
325      } else if (path_has_protocol(backing) || path_is_absolute(backing)) {
326          return g_strdup(backing);
327      } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
328          error_setg(errp, "Cannot use relative backing file names for '%s'",
329                     backed);
330          return NULL;
331      } else {
332          return path_combine(backed, backing);
333      }
334  }
335  
336  /*
337   * If @filename is empty or NULL, this function returns NULL without
338   * setting @errp.  In all other cases, NULL will only be returned with
339   * @errp set.
340   */
341  static char *bdrv_make_absolute_filename(BlockDriverState *relative_to,
342                                           const char *filename, Error **errp)
343  {
344      char *dir, *full_name;
345  
346      if (!filename || filename[0] == '\0') {
347          return NULL;
348      } else if (path_has_protocol(filename) || path_is_absolute(filename)) {
349          return g_strdup(filename);
350      }
351  
352      dir = bdrv_dirname(relative_to, errp);
353      if (!dir) {
354          return NULL;
355      }
356  
357      full_name = g_strconcat(dir, filename, NULL);
358      g_free(dir);
359      return full_name;
360  }
361  
362  char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp)
363  {
364      return bdrv_make_absolute_filename(bs, bs->backing_file, errp);
365  }
366  
367  void bdrv_register(BlockDriver *bdrv)
368  {
369      assert(bdrv->format_name);
370      QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
371  }
372  
373  BlockDriverState *bdrv_new(void)
374  {
375      BlockDriverState *bs;
376      int i;
377  
378      bs = g_new0(BlockDriverState, 1);
379      QLIST_INIT(&bs->dirty_bitmaps);
380      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
381          QLIST_INIT(&bs->op_blockers[i]);
382      }
383      notifier_with_return_list_init(&bs->before_write_notifiers);
384      qemu_co_mutex_init(&bs->reqs_lock);
385      qemu_mutex_init(&bs->dirty_bitmap_mutex);
386      bs->refcnt = 1;
387      bs->aio_context = qemu_get_aio_context();
388  
389      qemu_co_queue_init(&bs->flush_queue);
390  
391      for (i = 0; i < bdrv_drain_all_count; i++) {
392          bdrv_drained_begin(bs);
393      }
394  
395      QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
396  
397      return bs;
398  }
399  
400  static BlockDriver *bdrv_do_find_format(const char *format_name)
401  {
402      BlockDriver *drv1;
403  
404      QLIST_FOREACH(drv1, &bdrv_drivers, list) {
405          if (!strcmp(drv1->format_name, format_name)) {
406              return drv1;
407          }
408      }
409  
410      return NULL;
411  }
412  
413  BlockDriver *bdrv_find_format(const char *format_name)
414  {
415      BlockDriver *drv1;
416      int i;
417  
418      drv1 = bdrv_do_find_format(format_name);
419      if (drv1) {
420          return drv1;
421      }
422  
423      /* The driver isn't registered, maybe we need to load a module */
424      for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
425          if (!strcmp(block_driver_modules[i].format_name, format_name)) {
426              block_module_load_one(block_driver_modules[i].library_name);
427              break;
428          }
429      }
430  
431      return bdrv_do_find_format(format_name);
432  }
433  
434  static int bdrv_format_is_whitelisted(const char *format_name, bool read_only)
435  {
436      static const char *whitelist_rw[] = {
437          CONFIG_BDRV_RW_WHITELIST
438          NULL
439      };
440      static const char *whitelist_ro[] = {
441          CONFIG_BDRV_RO_WHITELIST
442          NULL
443      };
444      const char **p;
445  
446      if (!whitelist_rw[0] && !whitelist_ro[0]) {
447          return 1;               /* no whitelist, anything goes */
448      }
449  
450      for (p = whitelist_rw; *p; p++) {
451          if (!strcmp(format_name, *p)) {
452              return 1;
453          }
454      }
455      if (read_only) {
456          for (p = whitelist_ro; *p; p++) {
457              if (!strcmp(format_name, *p)) {
458                  return 1;
459              }
460          }
461      }
462      return 0;
463  }
464  
465  int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
466  {
467      return bdrv_format_is_whitelisted(drv->format_name, read_only);
468  }
469  
470  bool bdrv_uses_whitelist(void)
471  {
472      return use_bdrv_whitelist;
473  }
474  
475  typedef struct CreateCo {
476      BlockDriver *drv;
477      char *filename;
478      QemuOpts *opts;
479      int ret;
480      Error *err;
481  } CreateCo;
482  
483  static void coroutine_fn bdrv_create_co_entry(void *opaque)
484  {
485      Error *local_err = NULL;
486      int ret;
487  
488      CreateCo *cco = opaque;
489      assert(cco->drv);
490  
491      ret = cco->drv->bdrv_co_create_opts(cco->drv,
492                                          cco->filename, cco->opts, &local_err);
493      error_propagate(&cco->err, local_err);
494      cco->ret = ret;
495  }
496  
497  int bdrv_create(BlockDriver *drv, const char* filename,
498                  QemuOpts *opts, Error **errp)
499  {
500      int ret;
501  
502      Coroutine *co;
503      CreateCo cco = {
504          .drv = drv,
505          .filename = g_strdup(filename),
506          .opts = opts,
507          .ret = NOT_DONE,
508          .err = NULL,
509      };
510  
511      if (!drv->bdrv_co_create_opts) {
512          error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
513          ret = -ENOTSUP;
514          goto out;
515      }
516  
517      if (qemu_in_coroutine()) {
518          /* Fast-path if already in coroutine context */
519          bdrv_create_co_entry(&cco);
520      } else {
521          co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
522          qemu_coroutine_enter(co);
523          while (cco.ret == NOT_DONE) {
524              aio_poll(qemu_get_aio_context(), true);
525          }
526      }
527  
528      ret = cco.ret;
529      if (ret < 0) {
530          if (cco.err) {
531              error_propagate(errp, cco.err);
532          } else {
533              error_setg_errno(errp, -ret, "Could not create image");
534          }
535      }
536  
537  out:
538      g_free(cco.filename);
539      return ret;
540  }
541  
542  /**
543   * Helper function for bdrv_create_file_fallback(): Resize @blk to at
544   * least the given @minimum_size.
545   *
546   * On success, return @blk's actual length.
547   * Otherwise, return -errno.
548   */
549  static int64_t create_file_fallback_truncate(BlockBackend *blk,
550                                               int64_t minimum_size, Error **errp)
551  {
552      Error *local_err = NULL;
553      int64_t size;
554      int ret;
555  
556      ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
557                         &local_err);
558      if (ret < 0 && ret != -ENOTSUP) {
559          error_propagate(errp, local_err);
560          return ret;
561      }
562  
563      size = blk_getlength(blk);
564      if (size < 0) {
565          error_free(local_err);
566          error_setg_errno(errp, -size,
567                           "Failed to inquire the new image file's length");
568          return size;
569      }
570  
571      if (size < minimum_size) {
572          /* Need to grow the image, but we failed to do that */
573          error_propagate(errp, local_err);
574          return -ENOTSUP;
575      }
576  
577      error_free(local_err);
578      local_err = NULL;
579  
580      return size;
581  }
582  
583  /**
584   * Helper function for bdrv_create_file_fallback(): Zero the first
585   * sector to remove any potentially pre-existing image header.
586   */
587  static int create_file_fallback_zero_first_sector(BlockBackend *blk,
588                                                    int64_t current_size,
589                                                    Error **errp)
590  {
591      int64_t bytes_to_clear;
592      int ret;
593  
594      bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE);
595      if (bytes_to_clear) {
596          ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP);
597          if (ret < 0) {
598              error_setg_errno(errp, -ret,
599                               "Failed to clear the new image's first sector");
600              return ret;
601          }
602      }
603  
604      return 0;
605  }
606  
607  /**
608   * Simple implementation of bdrv_co_create_opts for protocol drivers
609   * which only support creation via opening a file
610   * (usually existing raw storage device)
611   */
612  int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
613                                              const char *filename,
614                                              QemuOpts *opts,
615                                              Error **errp)
616  {
617      BlockBackend *blk;
618      QDict *options;
619      int64_t size = 0;
620      char *buf = NULL;
621      PreallocMode prealloc;
622      Error *local_err = NULL;
623      int ret;
624  
625      size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
626      buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
627      prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
628                                 PREALLOC_MODE_OFF, &local_err);
629      g_free(buf);
630      if (local_err) {
631          error_propagate(errp, local_err);
632          return -EINVAL;
633      }
634  
635      if (prealloc != PREALLOC_MODE_OFF) {
636          error_setg(errp, "Unsupported preallocation mode '%s'",
637                     PreallocMode_str(prealloc));
638          return -ENOTSUP;
639      }
640  
641      options = qdict_new();
642      qdict_put_str(options, "driver", drv->format_name);
643  
644      blk = blk_new_open(filename, NULL, options,
645                         BDRV_O_RDWR | BDRV_O_RESIZE, errp);
646      if (!blk) {
647          error_prepend(errp, "Protocol driver '%s' does not support image "
648                        "creation, and opening the image failed: ",
649                        drv->format_name);
650          return -EINVAL;
651      }
652  
653      size = create_file_fallback_truncate(blk, size, errp);
654      if (size < 0) {
655          ret = size;
656          goto out;
657      }
658  
659      ret = create_file_fallback_zero_first_sector(blk, size, errp);
660      if (ret < 0) {
661          goto out;
662      }
663  
664      ret = 0;
665  out:
666      blk_unref(blk);
667      return ret;
668  }
669  
670  int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
671  {
672      BlockDriver *drv;
673  
674      drv = bdrv_find_protocol(filename, true, errp);
675      if (drv == NULL) {
676          return -ENOENT;
677      }
678  
679      return bdrv_create(drv, filename, opts, errp);
680  }
681  
682  int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
683  {
684      Error *local_err = NULL;
685      int ret;
686  
687      assert(bs != NULL);
688  
689      if (!bs->drv) {
690          error_setg(errp, "Block node '%s' is not opened", bs->filename);
691          return -ENOMEDIUM;
692      }
693  
694      if (!bs->drv->bdrv_co_delete_file) {
695          error_setg(errp, "Driver '%s' does not support image deletion",
696                     bs->drv->format_name);
697          return -ENOTSUP;
698      }
699  
700      ret = bs->drv->bdrv_co_delete_file(bs, &local_err);
701      if (ret < 0) {
702          error_propagate(errp, local_err);
703      }
704  
705      return ret;
706  }
707  
708  /**
709   * Try to get @bs's logical and physical block size.
710   * On success, store them in @bsz struct and return 0.
711   * On failure return -errno.
712   * @bs must not be empty.
713   */
714  int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
715  {
716      BlockDriver *drv = bs->drv;
717      BlockDriverState *filtered = bdrv_filter_bs(bs);
718  
719      if (drv && drv->bdrv_probe_blocksizes) {
720          return drv->bdrv_probe_blocksizes(bs, bsz);
721      } else if (filtered) {
722          return bdrv_probe_blocksizes(filtered, bsz);
723      }
724  
725      return -ENOTSUP;
726  }
727  
728  /**
729   * Try to get @bs's geometry (cyls, heads, sectors).
730   * On success, store them in @geo struct and return 0.
731   * On failure return -errno.
732   * @bs must not be empty.
733   */
734  int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
735  {
736      BlockDriver *drv = bs->drv;
737      BlockDriverState *filtered = bdrv_filter_bs(bs);
738  
739      if (drv && drv->bdrv_probe_geometry) {
740          return drv->bdrv_probe_geometry(bs, geo);
741      } else if (filtered) {
742          return bdrv_probe_geometry(filtered, geo);
743      }
744  
745      return -ENOTSUP;
746  }
747  
748  /*
749   * Create a uniquely-named empty temporary file.
750   * Return 0 upon success, otherwise a negative errno value.
751   */
752  int get_tmp_filename(char *filename, int size)
753  {
754  #ifdef _WIN32
755      char temp_dir[MAX_PATH];
756      /* GetTempFileName requires that its output buffer (4th param)
757         have length MAX_PATH or greater.  */
758      assert(size >= MAX_PATH);
759      return (GetTempPath(MAX_PATH, temp_dir)
760              && GetTempFileName(temp_dir, "qem", 0, filename)
761              ? 0 : -GetLastError());
762  #else
763      int fd;
764      const char *tmpdir;
765      tmpdir = getenv("TMPDIR");
766      if (!tmpdir) {
767          tmpdir = "/var/tmp";
768      }
769      if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
770          return -EOVERFLOW;
771      }
772      fd = mkstemp(filename);
773      if (fd < 0) {
774          return -errno;
775      }
776      if (close(fd) != 0) {
777          unlink(filename);
778          return -errno;
779      }
780      return 0;
781  #endif
782  }
783  
784  /*
785   * Detect host devices. By convention, /dev/cdrom[N] is always
786   * recognized as a host CDROM.
787   */
788  static BlockDriver *find_hdev_driver(const char *filename)
789  {
790      int score_max = 0, score;
791      BlockDriver *drv = NULL, *d;
792  
793      QLIST_FOREACH(d, &bdrv_drivers, list) {
794          if (d->bdrv_probe_device) {
795              score = d->bdrv_probe_device(filename);
796              if (score > score_max) {
797                  score_max = score;
798                  drv = d;
799              }
800          }
801      }
802  
803      return drv;
804  }
805  
806  static BlockDriver *bdrv_do_find_protocol(const char *protocol)
807  {
808      BlockDriver *drv1;
809  
810      QLIST_FOREACH(drv1, &bdrv_drivers, list) {
811          if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
812              return drv1;
813          }
814      }
815  
816      return NULL;
817  }
818  
819  BlockDriver *bdrv_find_protocol(const char *filename,
820                                  bool allow_protocol_prefix,
821                                  Error **errp)
822  {
823      BlockDriver *drv1;
824      char protocol[128];
825      int len;
826      const char *p;
827      int i;
828  
829      /* TODO Drivers without bdrv_file_open must be specified explicitly */
830  
831      /*
832       * XXX(hch): we really should not let host device detection
833       * override an explicit protocol specification, but moving this
834       * later breaks access to device names with colons in them.
835       * Thanks to the brain-dead persistent naming schemes on udev-
836       * based Linux systems those actually are quite common.
837       */
838      drv1 = find_hdev_driver(filename);
839      if (drv1) {
840          return drv1;
841      }
842  
843      if (!path_has_protocol(filename) || !allow_protocol_prefix) {
844          return &bdrv_file;
845      }
846  
847      p = strchr(filename, ':');
848      assert(p != NULL);
849      len = p - filename;
850      if (len > sizeof(protocol) - 1)
851          len = sizeof(protocol) - 1;
852      memcpy(protocol, filename, len);
853      protocol[len] = '\0';
854  
855      drv1 = bdrv_do_find_protocol(protocol);
856      if (drv1) {
857          return drv1;
858      }
859  
860      for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
861          if (block_driver_modules[i].protocol_name &&
862              !strcmp(block_driver_modules[i].protocol_name, protocol)) {
863              block_module_load_one(block_driver_modules[i].library_name);
864              break;
865          }
866      }
867  
868      drv1 = bdrv_do_find_protocol(protocol);
869      if (!drv1) {
870          error_setg(errp, "Unknown protocol '%s'", protocol);
871      }
872      return drv1;
873  }
874  
875  /*
876   * Guess image format by probing its contents.
877   * This is not a good idea when your image is raw (CVE-2008-2004), but
878   * we do it anyway for backward compatibility.
879   *
880   * @buf         contains the image's first @buf_size bytes.
881   * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
882   *              but can be smaller if the image file is smaller)
883   * @filename    is its filename.
884   *
885   * For all block drivers, call the bdrv_probe() method to get its
886   * probing score.
887   * Return the first block driver with the highest probing score.
888   */
889  BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
890                              const char *filename)
891  {
892      int score_max = 0, score;
893      BlockDriver *drv = NULL, *d;
894  
895      QLIST_FOREACH(d, &bdrv_drivers, list) {
896          if (d->bdrv_probe) {
897              score = d->bdrv_probe(buf, buf_size, filename);
898              if (score > score_max) {
899                  score_max = score;
900                  drv = d;
901              }
902          }
903      }
904  
905      return drv;
906  }
907  
908  static int find_image_format(BlockBackend *file, const char *filename,
909                               BlockDriver **pdrv, Error **errp)
910  {
911      BlockDriver *drv;
912      uint8_t buf[BLOCK_PROBE_BUF_SIZE];
913      int ret = 0;
914  
915      /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
916      if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
917          *pdrv = &bdrv_raw;
918          return ret;
919      }
920  
921      ret = blk_pread(file, 0, buf, sizeof(buf));
922      if (ret < 0) {
923          error_setg_errno(errp, -ret, "Could not read image for determining its "
924                           "format");
925          *pdrv = NULL;
926          return ret;
927      }
928  
929      drv = bdrv_probe_all(buf, ret, filename);
930      if (!drv) {
931          error_setg(errp, "Could not determine image format: No compatible "
932                     "driver found");
933          ret = -ENOENT;
934      }
935      *pdrv = drv;
936      return ret;
937  }
938  
939  /**
940   * Set the current 'total_sectors' value
941   * Return 0 on success, -errno on error.
942   */
943  int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
944  {
945      BlockDriver *drv = bs->drv;
946  
947      if (!drv) {
948          return -ENOMEDIUM;
949      }
950  
951      /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
952      if (bdrv_is_sg(bs))
953          return 0;
954  
955      /* query actual device if possible, otherwise just trust the hint */
956      if (drv->bdrv_getlength) {
957          int64_t length = drv->bdrv_getlength(bs);
958          if (length < 0) {
959              return length;
960          }
961          hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
962      }
963  
964      bs->total_sectors = hint;
965  
966      if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) {
967          return -EFBIG;
968      }
969  
970      return 0;
971  }
972  
973  /**
974   * Combines a QDict of new block driver @options with any missing options taken
975   * from @old_options, so that leaving out an option defaults to its old value.
976   */
977  static void bdrv_join_options(BlockDriverState *bs, QDict *options,
978                                QDict *old_options)
979  {
980      if (bs->drv && bs->drv->bdrv_join_options) {
981          bs->drv->bdrv_join_options(options, old_options);
982      } else {
983          qdict_join(options, old_options, false);
984      }
985  }
986  
987  static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts,
988                                                              int open_flags,
989                                                              Error **errp)
990  {
991      Error *local_err = NULL;
992      char *value = qemu_opt_get_del(opts, "detect-zeroes");
993      BlockdevDetectZeroesOptions detect_zeroes =
994          qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value,
995                          BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err);
996      g_free(value);
997      if (local_err) {
998          error_propagate(errp, local_err);
999          return detect_zeroes;
1000      }
1001  
1002      if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
1003          !(open_flags & BDRV_O_UNMAP))
1004      {
1005          error_setg(errp, "setting detect-zeroes to unmap is not allowed "
1006                     "without setting discard operation to unmap");
1007      }
1008  
1009      return detect_zeroes;
1010  }
1011  
1012  /**
1013   * Set open flags for aio engine
1014   *
1015   * Return 0 on success, -1 if the engine specified is invalid
1016   */
1017  int bdrv_parse_aio(const char *mode, int *flags)
1018  {
1019      if (!strcmp(mode, "threads")) {
1020          /* do nothing, default */
1021      } else if (!strcmp(mode, "native")) {
1022          *flags |= BDRV_O_NATIVE_AIO;
1023  #ifdef CONFIG_LINUX_IO_URING
1024      } else if (!strcmp(mode, "io_uring")) {
1025          *flags |= BDRV_O_IO_URING;
1026  #endif
1027      } else {
1028          return -1;
1029      }
1030  
1031      return 0;
1032  }
1033  
1034  /**
1035   * Set open flags for a given discard mode
1036   *
1037   * Return 0 on success, -1 if the discard mode was invalid.
1038   */
1039  int bdrv_parse_discard_flags(const char *mode, int *flags)
1040  {
1041      *flags &= ~BDRV_O_UNMAP;
1042  
1043      if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
1044          /* do nothing */
1045      } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
1046          *flags |= BDRV_O_UNMAP;
1047      } else {
1048          return -1;
1049      }
1050  
1051      return 0;
1052  }
1053  
1054  /**
1055   * Set open flags for a given cache mode
1056   *
1057   * Return 0 on success, -1 if the cache mode was invalid.
1058   */
1059  int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
1060  {
1061      *flags &= ~BDRV_O_CACHE_MASK;
1062  
1063      if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
1064          *writethrough = false;
1065          *flags |= BDRV_O_NOCACHE;
1066      } else if (!strcmp(mode, "directsync")) {
1067          *writethrough = true;
1068          *flags |= BDRV_O_NOCACHE;
1069      } else if (!strcmp(mode, "writeback")) {
1070          *writethrough = false;
1071      } else if (!strcmp(mode, "unsafe")) {
1072          *writethrough = false;
1073          *flags |= BDRV_O_NO_FLUSH;
1074      } else if (!strcmp(mode, "writethrough")) {
1075          *writethrough = true;
1076      } else {
1077          return -1;
1078      }
1079  
1080      return 0;
1081  }
1082  
1083  static char *bdrv_child_get_parent_desc(BdrvChild *c)
1084  {
1085      BlockDriverState *parent = c->opaque;
1086      return g_strdup(bdrv_get_device_or_node_name(parent));
1087  }
1088  
1089  static void bdrv_child_cb_drained_begin(BdrvChild *child)
1090  {
1091      BlockDriverState *bs = child->opaque;
1092      bdrv_do_drained_begin_quiesce(bs, NULL, false);
1093  }
1094  
1095  static bool bdrv_child_cb_drained_poll(BdrvChild *child)
1096  {
1097      BlockDriverState *bs = child->opaque;
1098      return bdrv_drain_poll(bs, false, NULL, false);
1099  }
1100  
1101  static void bdrv_child_cb_drained_end(BdrvChild *child,
1102                                        int *drained_end_counter)
1103  {
1104      BlockDriverState *bs = child->opaque;
1105      bdrv_drained_end_no_poll(bs, drained_end_counter);
1106  }
1107  
1108  static int bdrv_child_cb_inactivate(BdrvChild *child)
1109  {
1110      BlockDriverState *bs = child->opaque;
1111      assert(bs->open_flags & BDRV_O_INACTIVE);
1112      return 0;
1113  }
1114  
1115  static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
1116                                            GSList **ignore, Error **errp)
1117  {
1118      BlockDriverState *bs = child->opaque;
1119      return bdrv_can_set_aio_context(bs, ctx, ignore, errp);
1120  }
1121  
1122  static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx,
1123                                        GSList **ignore)
1124  {
1125      BlockDriverState *bs = child->opaque;
1126      return bdrv_set_aio_context_ignore(bs, ctx, ignore);
1127  }
1128  
1129  /*
1130   * Returns the options and flags that a temporary snapshot should get, based on
1131   * the originally requested flags (the originally requested image will have
1132   * flags like a backing file)
1133   */
1134  static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
1135                                         int parent_flags, QDict *parent_options)
1136  {
1137      *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
1138  
1139      /* For temporary files, unconditional cache=unsafe is fine */
1140      qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
1141      qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
1142  
1143      /* Copy the read-only and discard options from the parent */
1144      qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
1145      qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD);
1146  
1147      /* aio=native doesn't work for cache.direct=off, so disable it for the
1148       * temporary snapshot */
1149      *child_flags &= ~BDRV_O_NATIVE_AIO;
1150  }
1151  
1152  static void bdrv_backing_attach(BdrvChild *c)
1153  {
1154      BlockDriverState *parent = c->opaque;
1155      BlockDriverState *backing_hd = c->bs;
1156  
1157      assert(!parent->backing_blocker);
1158      error_setg(&parent->backing_blocker,
1159                 "node is used as backing hd of '%s'",
1160                 bdrv_get_device_or_node_name(parent));
1161  
1162      bdrv_refresh_filename(backing_hd);
1163  
1164      parent->open_flags &= ~BDRV_O_NO_BACKING;
1165  
1166      bdrv_op_block_all(backing_hd, parent->backing_blocker);
1167      /* Otherwise we won't be able to commit or stream */
1168      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1169                      parent->backing_blocker);
1170      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
1171                      parent->backing_blocker);
1172      /*
1173       * We do backup in 3 ways:
1174       * 1. drive backup
1175       *    The target bs is new opened, and the source is top BDS
1176       * 2. blockdev backup
1177       *    Both the source and the target are top BDSes.
1178       * 3. internal backup(used for block replication)
1179       *    Both the source and the target are backing file
1180       *
1181       * In case 1 and 2, neither the source nor the target is the backing file.
1182       * In case 3, we will block the top BDS, so there is only one block job
1183       * for the top BDS and its backing chain.
1184       */
1185      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
1186                      parent->backing_blocker);
1187      bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
1188                      parent->backing_blocker);
1189  }
1190  
1191  static void bdrv_backing_detach(BdrvChild *c)
1192  {
1193      BlockDriverState *parent = c->opaque;
1194  
1195      assert(parent->backing_blocker);
1196      bdrv_op_unblock_all(c->bs, parent->backing_blocker);
1197      error_free(parent->backing_blocker);
1198      parent->backing_blocker = NULL;
1199  }
1200  
1201  static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
1202                                          const char *filename, Error **errp)
1203  {
1204      BlockDriverState *parent = c->opaque;
1205      bool read_only = bdrv_is_read_only(parent);
1206      int ret;
1207  
1208      if (read_only) {
1209          ret = bdrv_reopen_set_read_only(parent, false, errp);
1210          if (ret < 0) {
1211              return ret;
1212          }
1213      }
1214  
1215      ret = bdrv_change_backing_file(parent, filename,
1216                                     base->drv ? base->drv->format_name : "",
1217                                     false);
1218      if (ret < 0) {
1219          error_setg_errno(errp, -ret, "Could not update backing file link");
1220      }
1221  
1222      if (read_only) {
1223          bdrv_reopen_set_read_only(parent, true, NULL);
1224      }
1225  
1226      return ret;
1227  }
1228  
1229  /*
1230   * Returns the options and flags that a generic child of a BDS should
1231   * get, based on the given options and flags for the parent BDS.
1232   */
1233  static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
1234                                     int *child_flags, QDict *child_options,
1235                                     int parent_flags, QDict *parent_options)
1236  {
1237      int flags = parent_flags;
1238  
1239      /*
1240       * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL.
1241       * Generally, the question to answer is: Should this child be
1242       * format-probed by default?
1243       */
1244  
1245      /*
1246       * Pure and non-filtered data children of non-format nodes should
1247       * be probed by default (even when the node itself has BDRV_O_PROTOCOL
1248       * set).  This only affects a very limited set of drivers (namely
1249       * quorum and blkverify when this comment was written).
1250       * Force-clear BDRV_O_PROTOCOL then.
1251       */
1252      if (!parent_is_format &&
1253          (role & BDRV_CHILD_DATA) &&
1254          !(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED)))
1255      {
1256          flags &= ~BDRV_O_PROTOCOL;
1257      }
1258  
1259      /*
1260       * All children of format nodes (except for COW children) and all
1261       * metadata children in general should never be format-probed.
1262       * Force-set BDRV_O_PROTOCOL then.
1263       */
1264      if ((parent_is_format && !(role & BDRV_CHILD_COW)) ||
1265          (role & BDRV_CHILD_METADATA))
1266      {
1267          flags |= BDRV_O_PROTOCOL;
1268      }
1269  
1270      /*
1271       * If the cache mode isn't explicitly set, inherit direct and no-flush from
1272       * the parent.
1273       */
1274      qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
1275      qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
1276      qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);
1277  
1278      if (role & BDRV_CHILD_COW) {
1279          /* backing files are opened read-only by default */
1280          qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on");
1281          qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off");
1282      } else {
1283          /* Inherit the read-only option from the parent if it's not set */
1284          qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
1285          qdict_copy_default(child_options, parent_options,
1286                             BDRV_OPT_AUTO_READ_ONLY);
1287      }
1288  
1289      /*
1290       * bdrv_co_pdiscard() respects unmap policy for the parent, so we
1291       * can default to enable it on lower layers regardless of the
1292       * parent option.
1293       */
1294      qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap");
1295  
1296      /* Clear flags that only apply to the top layer */
1297      flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
1298  
1299      if (role & BDRV_CHILD_METADATA) {
1300          flags &= ~BDRV_O_NO_IO;
1301      }
1302      if (role & BDRV_CHILD_COW) {
1303          flags &= ~BDRV_O_TEMPORARY;
1304      }
1305  
1306      *child_flags = flags;
1307  }
1308  
1309  static void bdrv_child_cb_attach(BdrvChild *child)
1310  {
1311      BlockDriverState *bs = child->opaque;
1312  
1313      if (child->role & BDRV_CHILD_COW) {
1314          bdrv_backing_attach(child);
1315      }
1316  
1317      bdrv_apply_subtree_drain(child, bs);
1318  }
1319  
1320  static void bdrv_child_cb_detach(BdrvChild *child)
1321  {
1322      BlockDriverState *bs = child->opaque;
1323  
1324      if (child->role & BDRV_CHILD_COW) {
1325          bdrv_backing_detach(child);
1326      }
1327  
1328      bdrv_unapply_subtree_drain(child, bs);
1329  }
1330  
1331  static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
1332                                           const char *filename, Error **errp)
1333  {
1334      if (c->role & BDRV_CHILD_COW) {
1335          return bdrv_backing_update_filename(c, base, filename, errp);
1336      }
1337      return 0;
1338  }
1339  
1340  const BdrvChildClass child_of_bds = {
1341      .parent_is_bds   = true,
1342      .get_parent_desc = bdrv_child_get_parent_desc,
1343      .inherit_options = bdrv_inherited_options,
1344      .drained_begin   = bdrv_child_cb_drained_begin,
1345      .drained_poll    = bdrv_child_cb_drained_poll,
1346      .drained_end     = bdrv_child_cb_drained_end,
1347      .attach          = bdrv_child_cb_attach,
1348      .detach          = bdrv_child_cb_detach,
1349      .inactivate      = bdrv_child_cb_inactivate,
1350      .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx,
1351      .set_aio_ctx     = bdrv_child_cb_set_aio_ctx,
1352      .update_filename = bdrv_child_cb_update_filename,
1353  };
1354  
1355  static int bdrv_open_flags(BlockDriverState *bs, int flags)
1356  {
1357      int open_flags = flags;
1358  
1359      /*
1360       * Clear flags that are internal to the block layer before opening the
1361       * image.
1362       */
1363      open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
1364  
1365      return open_flags;
1366  }
1367  
1368  static void update_flags_from_options(int *flags, QemuOpts *opts)
1369  {
1370      *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY);
1371  
1372      if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
1373          *flags |= BDRV_O_NO_FLUSH;
1374      }
1375  
1376      if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) {
1377          *flags |= BDRV_O_NOCACHE;
1378      }
1379  
1380      if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) {
1381          *flags |= BDRV_O_RDWR;
1382      }
1383  
1384      if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) {
1385          *flags |= BDRV_O_AUTO_RDONLY;
1386      }
1387  }
1388  
1389  static void update_options_from_flags(QDict *options, int flags)
1390  {
1391      if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
1392          qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
1393      }
1394      if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
1395          qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH,
1396                         flags & BDRV_O_NO_FLUSH);
1397      }
1398      if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) {
1399          qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR));
1400      }
1401      if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) {
1402          qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY,
1403                         flags & BDRV_O_AUTO_RDONLY);
1404      }
1405  }
1406  
1407  static void bdrv_assign_node_name(BlockDriverState *bs,
1408                                    const char *node_name,
1409                                    Error **errp)
1410  {
1411      char *gen_node_name = NULL;
1412  
1413      if (!node_name) {
1414          node_name = gen_node_name = id_generate(ID_BLOCK);
1415      } else if (!id_wellformed(node_name)) {
1416          /*
1417           * Check for empty string or invalid characters, but not if it is
1418           * generated (generated names use characters not available to the user)
1419           */
1420          error_setg(errp, "Invalid node name");
1421          return;
1422      }
1423  
1424      /* takes care of avoiding namespaces collisions */
1425      if (blk_by_name(node_name)) {
1426          error_setg(errp, "node-name=%s is conflicting with a device id",
1427                     node_name);
1428          goto out;
1429      }
1430  
1431      /* takes care of avoiding duplicates node names */
1432      if (bdrv_find_node(node_name)) {
1433          error_setg(errp, "Duplicate node name");
1434          goto out;
1435      }
1436  
1437      /* Make sure that the node name isn't truncated */
1438      if (strlen(node_name) >= sizeof(bs->node_name)) {
1439          error_setg(errp, "Node name too long");
1440          goto out;
1441      }
1442  
1443      /* copy node name into the bs and insert it into the graph list */
1444      pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
1445      QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
1446  out:
1447      g_free(gen_node_name);
1448  }
1449  
1450  static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
1451                              const char *node_name, QDict *options,
1452                              int open_flags, Error **errp)
1453  {
1454      Error *local_err = NULL;
1455      int i, ret;
1456  
1457      bdrv_assign_node_name(bs, node_name, &local_err);
1458      if (local_err) {
1459          error_propagate(errp, local_err);
1460          return -EINVAL;
1461      }
1462  
1463      bs->drv = drv;
1464      bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
1465      bs->opaque = g_malloc0(drv->instance_size);
1466  
1467      if (drv->bdrv_file_open) {
1468          assert(!drv->bdrv_needs_filename || bs->filename[0]);
1469          ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1470      } else if (drv->bdrv_open) {
1471          ret = drv->bdrv_open(bs, options, open_flags, &local_err);
1472      } else {
1473          ret = 0;
1474      }
1475  
1476      if (ret < 0) {
1477          if (local_err) {
1478              error_propagate(errp, local_err);
1479          } else if (bs->filename[0]) {
1480              error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1481          } else {
1482              error_setg_errno(errp, -ret, "Could not open image");
1483          }
1484          goto open_failed;
1485      }
1486  
1487      ret = refresh_total_sectors(bs, bs->total_sectors);
1488      if (ret < 0) {
1489          error_setg_errno(errp, -ret, "Could not refresh total sector count");
1490          return ret;
1491      }
1492  
1493      bdrv_refresh_limits(bs, &local_err);
1494      if (local_err) {
1495          error_propagate(errp, local_err);
1496          return -EINVAL;
1497      }
1498  
1499      assert(bdrv_opt_mem_align(bs) != 0);
1500      assert(bdrv_min_mem_align(bs) != 0);
1501      assert(is_power_of_2(bs->bl.request_alignment));
1502  
1503      for (i = 0; i < bs->quiesce_counter; i++) {
1504          if (drv->bdrv_co_drain_begin) {
1505              drv->bdrv_co_drain_begin(bs);
1506          }
1507      }
1508  
1509      return 0;
1510  open_failed:
1511      bs->drv = NULL;
1512      if (bs->file != NULL) {
1513          bdrv_unref_child(bs, bs->file);
1514          bs->file = NULL;
1515      }
1516      g_free(bs->opaque);
1517      bs->opaque = NULL;
1518      return ret;
1519  }
1520  
1521  BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
1522                                         int flags, Error **errp)
1523  {
1524      BlockDriverState *bs;
1525      int ret;
1526  
1527      bs = bdrv_new();
1528      bs->open_flags = flags;
1529      bs->explicit_options = qdict_new();
1530      bs->options = qdict_new();
1531      bs->opaque = NULL;
1532  
1533      update_options_from_flags(bs->options, flags);
1534  
1535      ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp);
1536      if (ret < 0) {
1537          qobject_unref(bs->explicit_options);
1538          bs->explicit_options = NULL;
1539          qobject_unref(bs->options);
1540          bs->options = NULL;
1541          bdrv_unref(bs);
1542          return NULL;
1543      }
1544  
1545      return bs;
1546  }
1547  
1548  QemuOptsList bdrv_runtime_opts = {
1549      .name = "bdrv_common",
1550      .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
1551      .desc = {
1552          {
1553              .name = "node-name",
1554              .type = QEMU_OPT_STRING,
1555              .help = "Node name of the block device node",
1556          },
1557          {
1558              .name = "driver",
1559              .type = QEMU_OPT_STRING,
1560              .help = "Block driver to use for the node",
1561          },
1562          {
1563              .name = BDRV_OPT_CACHE_DIRECT,
1564              .type = QEMU_OPT_BOOL,
1565              .help = "Bypass software writeback cache on the host",
1566          },
1567          {
1568              .name = BDRV_OPT_CACHE_NO_FLUSH,
1569              .type = QEMU_OPT_BOOL,
1570              .help = "Ignore flush requests",
1571          },
1572          {
1573              .name = BDRV_OPT_READ_ONLY,
1574              .type = QEMU_OPT_BOOL,
1575              .help = "Node is opened in read-only mode",
1576          },
1577          {
1578              .name = BDRV_OPT_AUTO_READ_ONLY,
1579              .type = QEMU_OPT_BOOL,
1580              .help = "Node can become read-only if opening read-write fails",
1581          },
1582          {
1583              .name = "detect-zeroes",
1584              .type = QEMU_OPT_STRING,
1585              .help = "try to optimize zero writes (off, on, unmap)",
1586          },
1587          {
1588              .name = BDRV_OPT_DISCARD,
1589              .type = QEMU_OPT_STRING,
1590              .help = "discard operation (ignore/off, unmap/on)",
1591          },
1592          {
1593              .name = BDRV_OPT_FORCE_SHARE,
1594              .type = QEMU_OPT_BOOL,
1595              .help = "always accept other writers (default: off)",
1596          },
1597          { /* end of list */ }
1598      },
1599  };
1600  
1601  QemuOptsList bdrv_create_opts_simple = {
1602      .name = "simple-create-opts",
1603      .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head),
1604      .desc = {
1605          {
1606              .name = BLOCK_OPT_SIZE,
1607              .type = QEMU_OPT_SIZE,
1608              .help = "Virtual disk size"
1609          },
1610          {
1611              .name = BLOCK_OPT_PREALLOC,
1612              .type = QEMU_OPT_STRING,
1613              .help = "Preallocation mode (allowed values: off)"
1614          },
1615          { /* end of list */ }
1616      }
1617  };
1618  
1619  /*
1620   * Common part for opening disk images and files
1621   *
1622   * Removes all processed options from *options.
1623   */
1624  static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
1625                              QDict *options, Error **errp)
1626  {
1627      int ret, open_flags;
1628      const char *filename;
1629      const char *driver_name = NULL;
1630      const char *node_name = NULL;
1631      const char *discard;
1632      QemuOpts *opts;
1633      BlockDriver *drv;
1634      Error *local_err = NULL;
1635  
1636      assert(bs->file == NULL);
1637      assert(options != NULL && bs->options != options);
1638  
1639      opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1640      if (!qemu_opts_absorb_qdict(opts, options, errp)) {
1641          ret = -EINVAL;
1642          goto fail_opts;
1643      }
1644  
1645      update_flags_from_options(&bs->open_flags, opts);
1646  
1647      driver_name = qemu_opt_get(opts, "driver");
1648      drv = bdrv_find_format(driver_name);
1649      assert(drv != NULL);
1650  
1651      bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false);
1652  
1653      if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) {
1654          error_setg(errp,
1655                     BDRV_OPT_FORCE_SHARE
1656                     "=on can only be used with read-only images");
1657          ret = -EINVAL;
1658          goto fail_opts;
1659      }
1660  
1661      if (file != NULL) {
1662          bdrv_refresh_filename(blk_bs(file));
1663          filename = blk_bs(file)->filename;
1664      } else {
1665          /*
1666           * Caution: while qdict_get_try_str() is fine, getting
1667           * non-string types would require more care.  When @options
1668           * come from -blockdev or blockdev_add, its members are typed
1669           * according to the QAPI schema, but when they come from
1670           * -drive, they're all QString.
1671           */
1672          filename = qdict_get_try_str(options, "filename");
1673      }
1674  
1675      if (drv->bdrv_needs_filename && (!filename || !filename[0])) {
1676          error_setg(errp, "The '%s' block driver requires a file name",
1677                     drv->format_name);
1678          ret = -EINVAL;
1679          goto fail_opts;
1680      }
1681  
1682      trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
1683                             drv->format_name);
1684  
1685      bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
1686  
1687      if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
1688          if (!bs->read_only && bdrv_is_whitelisted(drv, true)) {
1689              ret = bdrv_apply_auto_read_only(bs, NULL, NULL);
1690          } else {
1691              ret = -ENOTSUP;
1692          }
1693          if (ret < 0) {
1694              error_setg(errp,
1695                         !bs->read_only && bdrv_is_whitelisted(drv, true)
1696                         ? "Driver '%s' can only be used for read-only devices"
1697                         : "Driver '%s' is not whitelisted",
1698                         drv->format_name);
1699              goto fail_opts;
1700          }
1701      }
1702  
1703      /* bdrv_new() and bdrv_close() make it so */
1704      assert(qatomic_read(&bs->copy_on_read) == 0);
1705  
1706      if (bs->open_flags & BDRV_O_COPY_ON_READ) {
1707          if (!bs->read_only) {
1708              bdrv_enable_copy_on_read(bs);
1709          } else {
1710              error_setg(errp, "Can't use copy-on-read on read-only device");
1711              ret = -EINVAL;
1712              goto fail_opts;
1713          }
1714      }
1715  
1716      discard = qemu_opt_get(opts, BDRV_OPT_DISCARD);
1717      if (discard != NULL) {
1718          if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) {
1719              error_setg(errp, "Invalid discard option");
1720              ret = -EINVAL;
1721              goto fail_opts;
1722          }
1723      }
1724  
1725      bs->detect_zeroes =
1726          bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err);
1727      if (local_err) {
1728          error_propagate(errp, local_err);
1729          ret = -EINVAL;
1730          goto fail_opts;
1731      }
1732  
1733      if (filename != NULL) {
1734          pstrcpy(bs->filename, sizeof(bs->filename), filename);
1735      } else {
1736          bs->filename[0] = '\0';
1737      }
1738      pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
1739  
1740      /* Open the image, either directly or using a protocol */
1741      open_flags = bdrv_open_flags(bs, bs->open_flags);
1742      node_name = qemu_opt_get(opts, "node-name");
1743  
1744      assert(!drv->bdrv_file_open || file == NULL);
1745      ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp);
1746      if (ret < 0) {
1747          goto fail_opts;
1748      }
1749  
1750      qemu_opts_del(opts);
1751      return 0;
1752  
1753  fail_opts:
1754      qemu_opts_del(opts);
1755      return ret;
1756  }
1757  
1758  static QDict *parse_json_filename(const char *filename, Error **errp)
1759  {
1760      QObject *options_obj;
1761      QDict *options;
1762      int ret;
1763  
1764      ret = strstart(filename, "json:", &filename);
1765      assert(ret);
1766  
1767      options_obj = qobject_from_json(filename, errp);
1768      if (!options_obj) {
1769          error_prepend(errp, "Could not parse the JSON options: ");
1770          return NULL;
1771      }
1772  
1773      options = qobject_to(QDict, options_obj);
1774      if (!options) {
1775          qobject_unref(options_obj);
1776          error_setg(errp, "Invalid JSON object given");
1777          return NULL;
1778      }
1779  
1780      qdict_flatten(options);
1781  
1782      return options;
1783  }
1784  
1785  static void parse_json_protocol(QDict *options, const char **pfilename,
1786                                  Error **errp)
1787  {
1788      QDict *json_options;
1789      Error *local_err = NULL;
1790  
1791      /* Parse json: pseudo-protocol */
1792      if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1793          return;
1794      }
1795  
1796      json_options = parse_json_filename(*pfilename, &local_err);
1797      if (local_err) {
1798          error_propagate(errp, local_err);
1799          return;
1800      }
1801  
1802      /* Options given in the filename have lower priority than options
1803       * specified directly */
1804      qdict_join(options, json_options, false);
1805      qobject_unref(json_options);
1806      *pfilename = NULL;
1807  }
1808  
1809  /*
1810   * Fills in default options for opening images and converts the legacy
1811   * filename/flags pair to option QDict entries.
1812   * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1813   * block driver has been specified explicitly.
1814   */
1815  static int bdrv_fill_options(QDict **options, const char *filename,
1816                               int *flags, Error **errp)
1817  {
1818      const char *drvname;
1819      bool protocol = *flags & BDRV_O_PROTOCOL;
1820      bool parse_filename = false;
1821      BlockDriver *drv = NULL;
1822      Error *local_err = NULL;
1823  
1824      /*
1825       * Caution: while qdict_get_try_str() is fine, getting non-string
1826       * types would require more care.  When @options come from
1827       * -blockdev or blockdev_add, its members are typed according to
1828       * the QAPI schema, but when they come from -drive, they're all
1829       * QString.
1830       */
1831      drvname = qdict_get_try_str(*options, "driver");
1832      if (drvname) {
1833          drv = bdrv_find_format(drvname);
1834          if (!drv) {
1835              error_setg(errp, "Unknown driver '%s'", drvname);
1836              return -ENOENT;
1837          }
1838          /* If the user has explicitly specified the driver, this choice should
1839           * override the BDRV_O_PROTOCOL flag */
1840          protocol = drv->bdrv_file_open;
1841      }
1842  
1843      if (protocol) {
1844          *flags |= BDRV_O_PROTOCOL;
1845      } else {
1846          *flags &= ~BDRV_O_PROTOCOL;
1847      }
1848  
1849      /* Translate cache options from flags into options */
1850      update_options_from_flags(*options, *flags);
1851  
1852      /* Fetch the file name from the options QDict if necessary */
1853      if (protocol && filename) {
1854          if (!qdict_haskey(*options, "filename")) {
1855              qdict_put_str(*options, "filename", filename);
1856              parse_filename = true;
1857          } else {
1858              error_setg(errp, "Can't specify 'file' and 'filename' options at "
1859                               "the same time");
1860              return -EINVAL;
1861          }
1862      }
1863  
1864      /* Find the right block driver */
1865      /* See cautionary note on accessing @options above */
1866      filename = qdict_get_try_str(*options, "filename");
1867  
1868      if (!drvname && protocol) {
1869          if (filename) {
1870              drv = bdrv_find_protocol(filename, parse_filename, errp);
1871              if (!drv) {
1872                  return -EINVAL;
1873              }
1874  
1875              drvname = drv->format_name;
1876              qdict_put_str(*options, "driver", drvname);
1877          } else {
1878              error_setg(errp, "Must specify either driver or file");
1879              return -EINVAL;
1880          }
1881      }
1882  
1883      assert(drv || !protocol);
1884  
1885      /* Driver-specific filename parsing */
1886      if (drv && drv->bdrv_parse_filename && parse_filename) {
1887          drv->bdrv_parse_filename(filename, *options, &local_err);
1888          if (local_err) {
1889              error_propagate(errp, local_err);
1890              return -EINVAL;
1891          }
1892  
1893          if (!drv->bdrv_needs_filename) {
1894              qdict_del(*options, "filename");
1895          }
1896      }
1897  
1898      return 0;
1899  }
1900  
1901  static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q,
1902                                   uint64_t perm, uint64_t shared,
1903                                   GSList *ignore_children,
1904                                   bool *tighten_restrictions, Error **errp);
1905  static void bdrv_child_abort_perm_update(BdrvChild *c);
1906  static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared);
1907  
1908  typedef struct BlockReopenQueueEntry {
1909       bool prepared;
1910       bool perms_checked;
1911       BDRVReopenState state;
1912       QTAILQ_ENTRY(BlockReopenQueueEntry) entry;
1913  } BlockReopenQueueEntry;
1914  
1915  /*
1916   * Return the flags that @bs will have after the reopens in @q have
1917   * successfully completed. If @q is NULL (or @bs is not contained in @q),
1918   * return the current flags.
1919   */
1920  static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs)
1921  {
1922      BlockReopenQueueEntry *entry;
1923  
1924      if (q != NULL) {
1925          QTAILQ_FOREACH(entry, q, entry) {
1926              if (entry->state.bs == bs) {
1927                  return entry->state.flags;
1928              }
1929          }
1930      }
1931  
1932      return bs->open_flags;
1933  }
1934  
1935  /* Returns whether the image file can be written to after the reopen queue @q
1936   * has been successfully applied, or right now if @q is NULL. */
1937  static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
1938                                            BlockReopenQueue *q)
1939  {
1940      int flags = bdrv_reopen_get_flags(q, bs);
1941  
1942      return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR;
1943  }
1944  
1945  /*
1946   * Return whether the BDS can be written to.  This is not necessarily
1947   * the same as !bdrv_is_read_only(bs), as inactivated images may not
1948   * be written to but do not count as read-only images.
1949   */
1950  bool bdrv_is_writable(BlockDriverState *bs)
1951  {
1952      return bdrv_is_writable_after_reopen(bs, NULL);
1953  }
1954  
1955  static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
1956                              BdrvChild *c, BdrvChildRole role,
1957                              BlockReopenQueue *reopen_queue,
1958                              uint64_t parent_perm, uint64_t parent_shared,
1959                              uint64_t *nperm, uint64_t *nshared)
1960  {
1961      assert(bs->drv && bs->drv->bdrv_child_perm);
1962      bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
1963                               parent_perm, parent_shared,
1964                               nperm, nshared);
1965      /* TODO Take force_share from reopen_queue */
1966      if (child_bs && child_bs->force_share) {
1967          *nshared = BLK_PERM_ALL;
1968      }
1969  }
1970  
1971  /*
1972   * Check whether permissions on this node can be changed in a way that
1973   * @cumulative_perms and @cumulative_shared_perms are the new cumulative
1974   * permissions of all its parents. This involves checking whether all necessary
1975   * permission changes to child nodes can be performed.
1976   *
1977   * Will set *tighten_restrictions to true if and only if new permissions have to
1978   * be taken or currently shared permissions are to be unshared.  Otherwise,
1979   * errors are not fatal as long as the caller accepts that the restrictions
1980   * remain tighter than they need to be.  The caller still has to abort the
1981   * transaction.
1982   * @tighten_restrictions cannot be used together with @q: When reopening, we may
1983   * encounter fatal errors even though no restrictions are to be tightened.  For
1984   * example, changing a node from RW to RO will fail if the WRITE permission is
1985   * to be kept.
1986   *
1987   * A call to this function must always be followed by a call to bdrv_set_perm()
1988   * or bdrv_abort_perm_update().
1989   */
1990  static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
1991                             uint64_t cumulative_perms,
1992                             uint64_t cumulative_shared_perms,
1993                             GSList *ignore_children,
1994                             bool *tighten_restrictions, Error **errp)
1995  {
1996      BlockDriver *drv = bs->drv;
1997      BdrvChild *c;
1998      int ret;
1999  
2000      assert(!q || !tighten_restrictions);
2001  
2002      if (tighten_restrictions) {
2003          uint64_t current_perms, current_shared;
2004          uint64_t added_perms, removed_shared_perms;
2005  
2006          bdrv_get_cumulative_perm(bs, &current_perms, &current_shared);
2007  
2008          added_perms = cumulative_perms & ~current_perms;
2009          removed_shared_perms = current_shared & ~cumulative_shared_perms;
2010  
2011          *tighten_restrictions = added_perms || removed_shared_perms;
2012      }
2013  
2014      /* Write permissions never work with read-only images */
2015      if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
2016          !bdrv_is_writable_after_reopen(bs, q))
2017      {
2018          if (!bdrv_is_writable_after_reopen(bs, NULL)) {
2019              error_setg(errp, "Block node is read-only");
2020          } else {
2021              uint64_t current_perms, current_shared;
2022              bdrv_get_cumulative_perm(bs, &current_perms, &current_shared);
2023              if (current_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
2024                  error_setg(errp, "Cannot make block node read-only, there is "
2025                             "a writer on it");
2026              } else {
2027                  error_setg(errp, "Cannot make block node read-only and create "
2028                             "a writer on it");
2029              }
2030          }
2031  
2032          return -EPERM;
2033      }
2034  
2035      /*
2036       * Unaligned requests will automatically be aligned to bl.request_alignment
2037       * and without RESIZE we can't extend requests to write to space beyond the
2038       * end of the image, so it's required that the image size is aligned.
2039       */
2040      if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
2041          !(cumulative_perms & BLK_PERM_RESIZE))
2042      {
2043          if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) {
2044              error_setg(errp, "Cannot get 'write' permission without 'resize': "
2045                               "Image size is not a multiple of request "
2046                               "alignment");
2047              return -EPERM;
2048          }
2049      }
2050  
2051      /* Check this node */
2052      if (!drv) {
2053          return 0;
2054      }
2055  
2056      if (drv->bdrv_check_perm) {
2057          return drv->bdrv_check_perm(bs, cumulative_perms,
2058                                      cumulative_shared_perms, errp);
2059      }
2060  
2061      /* Drivers that never have children can omit .bdrv_child_perm() */
2062      if (!drv->bdrv_child_perm) {
2063          assert(QLIST_EMPTY(&bs->children));
2064          return 0;
2065      }
2066  
2067      /* Check all children */
2068      QLIST_FOREACH(c, &bs->children, next) {
2069          uint64_t cur_perm, cur_shared;
2070          bool child_tighten_restr;
2071  
2072          bdrv_child_perm(bs, c->bs, c, c->role, q,
2073                          cumulative_perms, cumulative_shared_perms,
2074                          &cur_perm, &cur_shared);
2075          ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, ignore_children,
2076                                      tighten_restrictions ? &child_tighten_restr
2077                                                           : NULL,
2078                                      errp);
2079          if (tighten_restrictions) {
2080              *tighten_restrictions |= child_tighten_restr;
2081          }
2082          if (ret < 0) {
2083              return ret;
2084          }
2085      }
2086  
2087      return 0;
2088  }
2089  
2090  /*
2091   * Notifies drivers that after a previous bdrv_check_perm() call, the
2092   * permission update is not performed and any preparations made for it (e.g.
2093   * taken file locks) need to be undone.
2094   *
2095   * This function recursively notifies all child nodes.
2096   */
2097  static void bdrv_abort_perm_update(BlockDriverState *bs)
2098  {
2099      BlockDriver *drv = bs->drv;
2100      BdrvChild *c;
2101  
2102      if (!drv) {
2103          return;
2104      }
2105  
2106      if (drv->bdrv_abort_perm_update) {
2107          drv->bdrv_abort_perm_update(bs);
2108      }
2109  
2110      QLIST_FOREACH(c, &bs->children, next) {
2111          bdrv_child_abort_perm_update(c);
2112      }
2113  }
2114  
2115  static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms,
2116                            uint64_t cumulative_shared_perms)
2117  {
2118      BlockDriver *drv = bs->drv;
2119      BdrvChild *c;
2120  
2121      if (!drv) {
2122          return;
2123      }
2124  
2125      /* Update this node */
2126      if (drv->bdrv_set_perm) {
2127          drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
2128      }
2129  
2130      /* Drivers that never have children can omit .bdrv_child_perm() */
2131      if (!drv->bdrv_child_perm) {
2132          assert(QLIST_EMPTY(&bs->children));
2133          return;
2134      }
2135  
2136      /* Update all children */
2137      QLIST_FOREACH(c, &bs->children, next) {
2138          uint64_t cur_perm, cur_shared;
2139          bdrv_child_perm(bs, c->bs, c, c->role, NULL,
2140                          cumulative_perms, cumulative_shared_perms,
2141                          &cur_perm, &cur_shared);
2142          bdrv_child_set_perm(c, cur_perm, cur_shared);
2143      }
2144  }
2145  
2146  void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
2147                                uint64_t *shared_perm)
2148  {
2149      BdrvChild *c;
2150      uint64_t cumulative_perms = 0;
2151      uint64_t cumulative_shared_perms = BLK_PERM_ALL;
2152  
2153      QLIST_FOREACH(c, &bs->parents, next_parent) {
2154          cumulative_perms |= c->perm;
2155          cumulative_shared_perms &= c->shared_perm;
2156      }
2157  
2158      *perm = cumulative_perms;
2159      *shared_perm = cumulative_shared_perms;
2160  }
2161  
2162  static char *bdrv_child_user_desc(BdrvChild *c)
2163  {
2164      if (c->klass->get_parent_desc) {
2165          return c->klass->get_parent_desc(c);
2166      }
2167  
2168      return g_strdup("another user");
2169  }
2170  
2171  char *bdrv_perm_names(uint64_t perm)
2172  {
2173      struct perm_name {
2174          uint64_t perm;
2175          const char *name;
2176      } permissions[] = {
2177          { BLK_PERM_CONSISTENT_READ, "consistent read" },
2178          { BLK_PERM_WRITE,           "write" },
2179          { BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
2180          { BLK_PERM_RESIZE,          "resize" },
2181          { BLK_PERM_GRAPH_MOD,       "change children" },
2182          { 0, NULL }
2183      };
2184  
2185      GString *result = g_string_sized_new(30);
2186      struct perm_name *p;
2187  
2188      for (p = permissions; p->name; p++) {
2189          if (perm & p->perm) {
2190              if (result->len > 0) {
2191                  g_string_append(result, ", ");
2192              }
2193              g_string_append(result, p->name);
2194          }
2195      }
2196  
2197      return g_string_free(result, FALSE);
2198  }
2199  
2200  /*
2201   * Checks whether a new reference to @bs can be added if the new user requires
2202   * @new_used_perm/@new_shared_perm as its permissions. If @ignore_children is
2203   * set, the BdrvChild objects in this list are ignored in the calculations;
2204   * this allows checking permission updates for an existing reference.
2205   *
2206   * See bdrv_check_perm() for the semantics of @tighten_restrictions.
2207   *
2208   * Needs to be followed by a call to either bdrv_set_perm() or
2209   * bdrv_abort_perm_update(). */
2210  static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q,
2211                                    uint64_t new_used_perm,
2212                                    uint64_t new_shared_perm,
2213                                    GSList *ignore_children,
2214                                    bool *tighten_restrictions,
2215                                    Error **errp)
2216  {
2217      BdrvChild *c;
2218      uint64_t cumulative_perms = new_used_perm;
2219      uint64_t cumulative_shared_perms = new_shared_perm;
2220  
2221      assert(!q || !tighten_restrictions);
2222  
2223      /* There is no reason why anyone couldn't tolerate write_unchanged */
2224      assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED);
2225  
2226      QLIST_FOREACH(c, &bs->parents, next_parent) {
2227          if (g_slist_find(ignore_children, c)) {
2228              continue;
2229          }
2230  
2231          if ((new_used_perm & c->shared_perm) != new_used_perm) {
2232              char *user = bdrv_child_user_desc(c);
2233              char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm);
2234  
2235              if (tighten_restrictions) {
2236                  *tighten_restrictions = true;
2237              }
2238  
2239              error_setg(errp, "Conflicts with use by %s as '%s', which does not "
2240                               "allow '%s' on %s",
2241                         user, c->name, perm_names, bdrv_get_node_name(c->bs));
2242              g_free(user);
2243              g_free(perm_names);
2244              return -EPERM;
2245          }
2246  
2247          if ((c->perm & new_shared_perm) != c->perm) {
2248              char *user = bdrv_child_user_desc(c);
2249              char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm);
2250  
2251              if (tighten_restrictions) {
2252                  *tighten_restrictions = true;
2253              }
2254  
2255              error_setg(errp, "Conflicts with use by %s as '%s', which uses "
2256                               "'%s' on %s",
2257                         user, c->name, perm_names, bdrv_get_node_name(c->bs));
2258              g_free(user);
2259              g_free(perm_names);
2260              return -EPERM;
2261          }
2262  
2263          cumulative_perms |= c->perm;
2264          cumulative_shared_perms &= c->shared_perm;
2265      }
2266  
2267      return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms,
2268                             ignore_children, tighten_restrictions, errp);
2269  }
2270  
2271  /* Needs to be followed by a call to either bdrv_child_set_perm() or
2272   * bdrv_child_abort_perm_update(). */
2273  static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q,
2274                                   uint64_t perm, uint64_t shared,
2275                                   GSList *ignore_children,
2276                                   bool *tighten_restrictions, Error **errp)
2277  {
2278      int ret;
2279  
2280      ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c);
2281      ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children,
2282                                   tighten_restrictions, errp);
2283      g_slist_free(ignore_children);
2284  
2285      if (ret < 0) {
2286          return ret;
2287      }
2288  
2289      if (!c->has_backup_perm) {
2290          c->has_backup_perm = true;
2291          c->backup_perm = c->perm;
2292          c->backup_shared_perm = c->shared_perm;
2293      }
2294      /*
2295       * Note: it's OK if c->has_backup_perm was already set, as we can find the
2296       * same child twice during check_perm procedure
2297       */
2298  
2299      c->perm = perm;
2300      c->shared_perm = shared;
2301  
2302      return 0;
2303  }
2304  
2305  static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared)
2306  {
2307      uint64_t cumulative_perms, cumulative_shared_perms;
2308  
2309      c->has_backup_perm = false;
2310  
2311      c->perm = perm;
2312      c->shared_perm = shared;
2313  
2314      bdrv_get_cumulative_perm(c->bs, &cumulative_perms,
2315                               &cumulative_shared_perms);
2316      bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms);
2317  }
2318  
2319  static void bdrv_child_abort_perm_update(BdrvChild *c)
2320  {
2321      if (c->has_backup_perm) {
2322          c->perm = c->backup_perm;
2323          c->shared_perm = c->backup_shared_perm;
2324          c->has_backup_perm = false;
2325      }
2326  
2327      bdrv_abort_perm_update(c->bs);
2328  }
2329  
2330  int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
2331                              Error **errp)
2332  {
2333      Error *local_err = NULL;
2334      int ret;
2335      bool tighten_restrictions;
2336  
2337      ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL,
2338                                  &tighten_restrictions, &local_err);
2339      if (ret < 0) {
2340          bdrv_child_abort_perm_update(c);
2341          if (tighten_restrictions) {
2342              error_propagate(errp, local_err);
2343          } else {
2344              /*
2345               * Our caller may intend to only loosen restrictions and
2346               * does not expect this function to fail.  Errors are not
2347               * fatal in such a case, so we can just hide them from our
2348               * caller.
2349               */
2350              error_free(local_err);
2351              ret = 0;
2352          }
2353          return ret;
2354      }
2355  
2356      bdrv_child_set_perm(c, perm, shared);
2357  
2358      return 0;
2359  }
2360  
2361  int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
2362  {
2363      uint64_t parent_perms, parent_shared;
2364      uint64_t perms, shared;
2365  
2366      bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared);
2367      bdrv_child_perm(bs, c->bs, c, c->role, NULL,
2368                      parent_perms, parent_shared, &perms, &shared);
2369  
2370      return bdrv_child_try_set_perm(c, perms, shared, errp);
2371  }
2372  
2373  /*
2374   * Default implementation for .bdrv_child_perm() for block filters:
2375   * Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the
2376   * filtered child.
2377   */
2378  static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
2379                                        BdrvChildRole role,
2380                                        BlockReopenQueue *reopen_queue,
2381                                        uint64_t perm, uint64_t shared,
2382                                        uint64_t *nperm, uint64_t *nshared)
2383  {
2384      *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
2385      *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
2386  }
2387  
2388  static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c,
2389                                         BdrvChildRole role,
2390                                         BlockReopenQueue *reopen_queue,
2391                                         uint64_t perm, uint64_t shared,
2392                                         uint64_t *nperm, uint64_t *nshared)
2393  {
2394      assert(role & BDRV_CHILD_COW);
2395  
2396      /*
2397       * We want consistent read from backing files if the parent needs it.
2398       * No other operations are performed on backing files.
2399       */
2400      perm &= BLK_PERM_CONSISTENT_READ;
2401  
2402      /*
2403       * If the parent can deal with changing data, we're okay with a
2404       * writable and resizable backing file.
2405       * TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too?
2406       */
2407      if (shared & BLK_PERM_WRITE) {
2408          shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
2409      } else {
2410          shared = 0;
2411      }
2412  
2413      shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD |
2414                BLK_PERM_WRITE_UNCHANGED;
2415  
2416      if (bs->open_flags & BDRV_O_INACTIVE) {
2417          shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2418      }
2419  
2420      *nperm = perm;
2421      *nshared = shared;
2422  }
2423  
2424  static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c,
2425                                             BdrvChildRole role,
2426                                             BlockReopenQueue *reopen_queue,
2427                                             uint64_t perm, uint64_t shared,
2428                                             uint64_t *nperm, uint64_t *nshared)
2429  {
2430      int flags;
2431  
2432      assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA));
2433  
2434      flags = bdrv_reopen_get_flags(reopen_queue, bs);
2435  
2436      /*
2437       * Apart from the modifications below, the same permissions are
2438       * forwarded and left alone as for filters
2439       */
2440      bdrv_filter_default_perms(bs, c, role, reopen_queue,
2441                                perm, shared, &perm, &shared);
2442  
2443      if (role & BDRV_CHILD_METADATA) {
2444          /* Format drivers may touch metadata even if the guest doesn't write */
2445          if (bdrv_is_writable_after_reopen(bs, reopen_queue)) {
2446              perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2447          }
2448  
2449          /*
2450           * bs->file always needs to be consistent because of the
2451           * metadata. We can never allow other users to resize or write
2452           * to it.
2453           */
2454          if (!(flags & BDRV_O_NO_IO)) {
2455              perm |= BLK_PERM_CONSISTENT_READ;
2456          }
2457          shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
2458      }
2459  
2460      if (role & BDRV_CHILD_DATA) {
2461          /*
2462           * Technically, everything in this block is a subset of the
2463           * BDRV_CHILD_METADATA path taken above, and so this could
2464           * be an "else if" branch.  However, that is not obvious, and
2465           * this function is not performance critical, therefore we let
2466           * this be an independent "if".
2467           */
2468  
2469          /*
2470           * We cannot allow other users to resize the file because the
2471           * format driver might have some assumptions about the size
2472           * (e.g. because it is stored in metadata, or because the file
2473           * is split into fixed-size data files).
2474           */
2475          shared &= ~BLK_PERM_RESIZE;
2476  
2477          /*
2478           * WRITE_UNCHANGED often cannot be performed as such on the
2479           * data file.  For example, the qcow2 driver may still need to
2480           * write copied clusters on copy-on-read.
2481           */
2482          if (perm & BLK_PERM_WRITE_UNCHANGED) {
2483              perm |= BLK_PERM_WRITE;
2484          }
2485  
2486          /*
2487           * If the data file is written to, the format driver may
2488           * expect to be able to resize it by writing beyond the EOF.
2489           */
2490          if (perm & BLK_PERM_WRITE) {
2491              perm |= BLK_PERM_RESIZE;
2492          }
2493      }
2494  
2495      if (bs->open_flags & BDRV_O_INACTIVE) {
2496          shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2497      }
2498  
2499      *nperm = perm;
2500      *nshared = shared;
2501  }
2502  
2503  void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
2504                          BdrvChildRole role, BlockReopenQueue *reopen_queue,
2505                          uint64_t perm, uint64_t shared,
2506                          uint64_t *nperm, uint64_t *nshared)
2507  {
2508      if (role & BDRV_CHILD_FILTERED) {
2509          assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
2510                           BDRV_CHILD_COW)));
2511          bdrv_filter_default_perms(bs, c, role, reopen_queue,
2512                                    perm, shared, nperm, nshared);
2513      } else if (role & BDRV_CHILD_COW) {
2514          assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA)));
2515          bdrv_default_perms_for_cow(bs, c, role, reopen_queue,
2516                                     perm, shared, nperm, nshared);
2517      } else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) {
2518          bdrv_default_perms_for_storage(bs, c, role, reopen_queue,
2519                                         perm, shared, nperm, nshared);
2520      } else {
2521          g_assert_not_reached();
2522      }
2523  }
2524  
2525  uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
2526  {
2527      static const uint64_t permissions[] = {
2528          [BLOCK_PERMISSION_CONSISTENT_READ]  = BLK_PERM_CONSISTENT_READ,
2529          [BLOCK_PERMISSION_WRITE]            = BLK_PERM_WRITE,
2530          [BLOCK_PERMISSION_WRITE_UNCHANGED]  = BLK_PERM_WRITE_UNCHANGED,
2531          [BLOCK_PERMISSION_RESIZE]           = BLK_PERM_RESIZE,
2532          [BLOCK_PERMISSION_GRAPH_MOD]        = BLK_PERM_GRAPH_MOD,
2533      };
2534  
2535      QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX);
2536      QEMU_BUILD_BUG_ON(1UL << ARRAY_SIZE(permissions) != BLK_PERM_ALL + 1);
2537  
2538      assert(qapi_perm < BLOCK_PERMISSION__MAX);
2539  
2540      return permissions[qapi_perm];
2541  }
2542  
2543  static void bdrv_replace_child_noperm(BdrvChild *child,
2544                                        BlockDriverState *new_bs)
2545  {
2546      BlockDriverState *old_bs = child->bs;
2547      int new_bs_quiesce_counter;
2548      int drain_saldo;
2549  
2550      assert(!child->frozen);
2551  
2552      if (old_bs && new_bs) {
2553          assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
2554      }
2555  
2556      new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
2557      drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
2558  
2559      /*
2560       * If the new child node is drained but the old one was not, flush
2561       * all outstanding requests to the old child node.
2562       */
2563      while (drain_saldo > 0 && child->klass->drained_begin) {
2564          bdrv_parent_drained_begin_single(child, true);
2565          drain_saldo--;
2566      }
2567  
2568      if (old_bs) {
2569          /* Detach first so that the recursive drain sections coming from @child
2570           * are already gone and we only end the drain sections that came from
2571           * elsewhere. */
2572          if (child->klass->detach) {
2573              child->klass->detach(child);
2574          }
2575          QLIST_REMOVE(child, next_parent);
2576      }
2577  
2578      child->bs = new_bs;
2579  
2580      if (new_bs) {
2581          QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
2582  
2583          /*
2584           * Detaching the old node may have led to the new node's
2585           * quiesce_counter having been decreased.  Not a problem, we
2586           * just need to recognize this here and then invoke
2587           * drained_end appropriately more often.
2588           */
2589          assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
2590          drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
2591  
2592          /* Attach only after starting new drained sections, so that recursive
2593           * drain sections coming from @child don't get an extra .drained_begin
2594           * callback. */
2595          if (child->klass->attach) {
2596              child->klass->attach(child);
2597          }
2598      }
2599  
2600      /*
2601       * If the old child node was drained but the new one is not, allow
2602       * requests to come in only after the new node has been attached.
2603       */
2604      while (drain_saldo < 0 && child->klass->drained_end) {
2605          bdrv_parent_drained_end_single(child);
2606          drain_saldo++;
2607      }
2608  }
2609  
2610  /*
2611   * Updates @child to change its reference to point to @new_bs, including
2612   * checking and applying the necessary permission updates both to the old node
2613   * and to @new_bs.
2614   *
2615   * NULL is passed as @new_bs for removing the reference before freeing @child.
2616   *
2617   * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this
2618   * function uses bdrv_set_perm() to update the permissions according to the new
2619   * reference that @new_bs gets.
2620   *
2621   * Callers must ensure that child->frozen is false.
2622   */
2623  static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
2624  {
2625      BlockDriverState *old_bs = child->bs;
2626      uint64_t perm, shared_perm;
2627  
2628      /* Asserts that child->frozen == false */
2629      bdrv_replace_child_noperm(child, new_bs);
2630  
2631      /*
2632       * Start with the new node's permissions.  If @new_bs is a (direct
2633       * or indirect) child of @old_bs, we must complete the permission
2634       * update on @new_bs before we loosen the restrictions on @old_bs.
2635       * Otherwise, bdrv_check_perm() on @old_bs would re-initiate
2636       * updating the permissions of @new_bs, and thus not purely loosen
2637       * restrictions.
2638       */
2639      if (new_bs) {
2640          bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm);
2641          bdrv_set_perm(new_bs, perm, shared_perm);
2642      }
2643  
2644      if (old_bs) {
2645          /* Update permissions for old node. This is guaranteed to succeed
2646           * because we're just taking a parent away, so we're loosening
2647           * restrictions. */
2648          bool tighten_restrictions;
2649          int ret;
2650  
2651          bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm);
2652          ret = bdrv_check_perm(old_bs, NULL, perm, shared_perm, NULL,
2653                                &tighten_restrictions, NULL);
2654          assert(tighten_restrictions == false);
2655          if (ret < 0) {
2656              /* We only tried to loosen restrictions, so errors are not fatal */
2657              bdrv_abort_perm_update(old_bs);
2658          } else {
2659              bdrv_set_perm(old_bs, perm, shared_perm);
2660          }
2661  
2662          /* When the parent requiring a non-default AioContext is removed, the
2663           * node moves back to the main AioContext */
2664          bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL);
2665      }
2666  }
2667  
2668  /*
2669   * This function steals the reference to child_bs from the caller.
2670   * That reference is later dropped by bdrv_root_unref_child().
2671   *
2672   * On failure NULL is returned, errp is set and the reference to
2673   * child_bs is also dropped.
2674   *
2675   * The caller must hold the AioContext lock @child_bs, but not that of @ctx
2676   * (unless @child_bs is already in @ctx).
2677   */
2678  BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
2679                                    const char *child_name,
2680                                    const BdrvChildClass *child_class,
2681                                    BdrvChildRole child_role,
2682                                    AioContext *ctx,
2683                                    uint64_t perm, uint64_t shared_perm,
2684                                    void *opaque, Error **errp)
2685  {
2686      BdrvChild *child;
2687      Error *local_err = NULL;
2688      int ret;
2689  
2690      ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, NULL,
2691                                   errp);
2692      if (ret < 0) {
2693          bdrv_abort_perm_update(child_bs);
2694          bdrv_unref(child_bs);
2695          return NULL;
2696      }
2697  
2698      child = g_new(BdrvChild, 1);
2699      *child = (BdrvChild) {
2700          .bs             = NULL,
2701          .name           = g_strdup(child_name),
2702          .klass          = child_class,
2703          .role           = child_role,
2704          .perm           = perm,
2705          .shared_perm    = shared_perm,
2706          .opaque         = opaque,
2707      };
2708  
2709      /* If the AioContexts don't match, first try to move the subtree of
2710       * child_bs into the AioContext of the new parent. If this doesn't work,
2711       * try moving the parent into the AioContext of child_bs instead. */
2712      if (bdrv_get_aio_context(child_bs) != ctx) {
2713          ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err);
2714          if (ret < 0 && child_class->can_set_aio_ctx) {
2715              GSList *ignore = g_slist_prepend(NULL, child);
2716              ctx = bdrv_get_aio_context(child_bs);
2717              if (child_class->can_set_aio_ctx(child, ctx, &ignore, NULL)) {
2718                  error_free(local_err);
2719                  ret = 0;
2720                  g_slist_free(ignore);
2721                  ignore = g_slist_prepend(NULL, child);
2722                  child_class->set_aio_ctx(child, ctx, &ignore);
2723              }
2724              g_slist_free(ignore);
2725          }
2726          if (ret < 0) {
2727              error_propagate(errp, local_err);
2728              g_free(child);
2729              bdrv_abort_perm_update(child_bs);
2730              bdrv_unref(child_bs);
2731              return NULL;
2732          }
2733      }
2734  
2735      /* This performs the matching bdrv_set_perm() for the above check. */
2736      bdrv_replace_child(child, child_bs);
2737  
2738      return child;
2739  }
2740  
2741  /*
2742   * This function transfers the reference to child_bs from the caller
2743   * to parent_bs. That reference is later dropped by parent_bs on
2744   * bdrv_close() or if someone calls bdrv_unref_child().
2745   *
2746   * On failure NULL is returned, errp is set and the reference to
2747   * child_bs is also dropped.
2748   *
2749   * If @parent_bs and @child_bs are in different AioContexts, the caller must
2750   * hold the AioContext lock for @child_bs, but not for @parent_bs.
2751   */
2752  BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
2753                               BlockDriverState *child_bs,
2754                               const char *child_name,
2755                               const BdrvChildClass *child_class,
2756                               BdrvChildRole child_role,
2757                               Error **errp)
2758  {
2759      BdrvChild *child;
2760      uint64_t perm, shared_perm;
2761  
2762      bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
2763  
2764      assert(parent_bs->drv);
2765      bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
2766                      perm, shared_perm, &perm, &shared_perm);
2767  
2768      child = bdrv_root_attach_child(child_bs, child_name, child_class,
2769                                     child_role, bdrv_get_aio_context(parent_bs),
2770                                     perm, shared_perm, parent_bs, errp);
2771      if (child == NULL) {
2772          return NULL;
2773      }
2774  
2775      QLIST_INSERT_HEAD(&parent_bs->children, child, next);
2776      return child;
2777  }
2778  
2779  static void bdrv_detach_child(BdrvChild *child)
2780  {
2781      QLIST_SAFE_REMOVE(child, next);
2782  
2783      bdrv_replace_child(child, NULL);
2784  
2785      g_free(child->name);
2786      g_free(child);
2787  }
2788  
2789  /* Callers must ensure that child->frozen is false. */
2790  void bdrv_root_unref_child(BdrvChild *child)
2791  {
2792      BlockDriverState *child_bs;
2793  
2794      child_bs = child->bs;
2795      bdrv_detach_child(child);
2796      bdrv_unref(child_bs);
2797  }
2798  
2799  /**
2800   * Clear all inherits_from pointers from children and grandchildren of
2801   * @root that point to @root, where necessary.
2802   */
2803  static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child)
2804  {
2805      BdrvChild *c;
2806  
2807      if (child->bs->inherits_from == root) {
2808          /*
2809           * Remove inherits_from only when the last reference between root and
2810           * child->bs goes away.
2811           */
2812          QLIST_FOREACH(c, &root->children, next) {
2813              if (c != child && c->bs == child->bs) {
2814                  break;
2815              }
2816          }
2817          if (c == NULL) {
2818              child->bs->inherits_from = NULL;
2819          }
2820      }
2821  
2822      QLIST_FOREACH(c, &child->bs->children, next) {
2823          bdrv_unset_inherits_from(root, c);
2824      }
2825  }
2826  
2827  /* Callers must ensure that child->frozen is false. */
2828  void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
2829  {
2830      if (child == NULL) {
2831          return;
2832      }
2833  
2834      bdrv_unset_inherits_from(parent, child);
2835      bdrv_root_unref_child(child);
2836  }
2837  
2838  
2839  static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
2840  {
2841      BdrvChild *c;
2842      QLIST_FOREACH(c, &bs->parents, next_parent) {
2843          if (c->klass->change_media) {
2844              c->klass->change_media(c, load);
2845          }
2846      }
2847  }
2848  
2849  /* Return true if you can reach parent going through child->inherits_from
2850   * recursively. If parent or child are NULL, return false */
2851  static bool bdrv_inherits_from_recursive(BlockDriverState *child,
2852                                           BlockDriverState *parent)
2853  {
2854      while (child && child != parent) {
2855          child = child->inherits_from;
2856      }
2857  
2858      return child != NULL;
2859  }
2860  
2861  /*
2862   * Return the BdrvChildRole for @bs's backing child.  bs->backing is
2863   * mostly used for COW backing children (role = COW), but also for
2864   * filtered children (role = FILTERED | PRIMARY).
2865   */
2866  static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
2867  {
2868      if (bs->drv && bs->drv->is_filter) {
2869          return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
2870      } else {
2871          return BDRV_CHILD_COW;
2872      }
2873  }
2874  
2875  /*
2876   * Sets the bs->backing link of a BDS. A new reference is created; callers
2877   * which don't need their own reference any more must call bdrv_unref().
2878   */
2879  void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
2880                           Error **errp)
2881  {
2882      bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) &&
2883          bdrv_inherits_from_recursive(backing_hd, bs);
2884  
2885      if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) {
2886          return;
2887      }
2888  
2889      if (backing_hd) {
2890          bdrv_ref(backing_hd);
2891      }
2892  
2893      if (bs->backing) {
2894          /* Cannot be frozen, we checked that above */
2895          bdrv_unref_child(bs, bs->backing);
2896          bs->backing = NULL;
2897      }
2898  
2899      if (!backing_hd) {
2900          goto out;
2901      }
2902  
2903      bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_of_bds,
2904                                      bdrv_backing_role(bs), errp);
2905      /* If backing_hd was already part of bs's backing chain, and
2906       * inherits_from pointed recursively to bs then let's update it to
2907       * point directly to bs (else it will become NULL). */
2908      if (bs->backing && update_inherits_from) {
2909          backing_hd->inherits_from = bs;
2910      }
2911  
2912  out:
2913      bdrv_refresh_limits(bs, NULL);
2914  }
2915  
2916  /*
2917   * Opens the backing file for a BlockDriverState if not yet open
2918   *
2919   * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
2920   * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
2921   * itself, all options starting with "${bdref_key}." are considered part of the
2922   * BlockdevRef.
2923   *
2924   * TODO Can this be unified with bdrv_open_image()?
2925   */
2926  int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
2927                             const char *bdref_key, Error **errp)
2928  {
2929      char *backing_filename = NULL;
2930      char *bdref_key_dot;
2931      const char *reference = NULL;
2932      int ret = 0;
2933      bool implicit_backing = false;
2934      BlockDriverState *backing_hd;
2935      QDict *options;
2936      QDict *tmp_parent_options = NULL;
2937      Error *local_err = NULL;
2938  
2939      if (bs->backing != NULL) {
2940          goto free_exit;
2941      }
2942  
2943      /* NULL means an empty set of options */
2944      if (parent_options == NULL) {
2945          tmp_parent_options = qdict_new();
2946          parent_options = tmp_parent_options;
2947      }
2948  
2949      bs->open_flags &= ~BDRV_O_NO_BACKING;
2950  
2951      bdref_key_dot = g_strdup_printf("%s.", bdref_key);
2952      qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
2953      g_free(bdref_key_dot);
2954  
2955      /*
2956       * Caution: while qdict_get_try_str() is fine, getting non-string
2957       * types would require more care.  When @parent_options come from
2958       * -blockdev or blockdev_add, its members are typed according to
2959       * the QAPI schema, but when they come from -drive, they're all
2960       * QString.
2961       */
2962      reference = qdict_get_try_str(parent_options, bdref_key);
2963      if (reference || qdict_haskey(options, "file.filename")) {
2964          /* keep backing_filename NULL */
2965      } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
2966          qobject_unref(options);
2967          goto free_exit;
2968      } else {
2969          if (qdict_size(options) == 0) {
2970              /* If the user specifies options that do not modify the
2971               * backing file's behavior, we might still consider it the
2972               * implicit backing file.  But it's easier this way, and
2973               * just specifying some of the backing BDS's options is
2974               * only possible with -drive anyway (otherwise the QAPI
2975               * schema forces the user to specify everything). */
2976              implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file);
2977          }
2978  
2979          backing_filename = bdrv_get_full_backing_filename(bs, &local_err);
2980          if (local_err) {
2981              ret = -EINVAL;
2982              error_propagate(errp, local_err);
2983              qobject_unref(options);
2984              goto free_exit;
2985          }
2986      }
2987  
2988      if (!bs->drv || !bs->drv->supports_backing) {
2989          ret = -EINVAL;
2990          error_setg(errp, "Driver doesn't support backing files");
2991          qobject_unref(options);
2992          goto free_exit;
2993      }
2994  
2995      if (!reference &&
2996          bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
2997          qdict_put_str(options, "driver", bs->backing_format);
2998      }
2999  
3000      backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs,
3001                                     &child_of_bds, bdrv_backing_role(bs), errp);
3002      if (!backing_hd) {
3003          bs->open_flags |= BDRV_O_NO_BACKING;
3004          error_prepend(errp, "Could not open backing file: ");
3005          ret = -EINVAL;
3006          goto free_exit;
3007      }
3008  
3009      if (implicit_backing) {
3010          bdrv_refresh_filename(backing_hd);
3011          pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
3012                  backing_hd->filename);
3013      }
3014  
3015      /* Hook up the backing file link; drop our reference, bs owns the
3016       * backing_hd reference now */
3017      bdrv_set_backing_hd(bs, backing_hd, &local_err);
3018      bdrv_unref(backing_hd);
3019      if (local_err) {
3020          error_propagate(errp, local_err);
3021          ret = -EINVAL;
3022          goto free_exit;
3023      }
3024  
3025      qdict_del(parent_options, bdref_key);
3026  
3027  free_exit:
3028      g_free(backing_filename);
3029      qobject_unref(tmp_parent_options);
3030      return ret;
3031  }
3032  
3033  static BlockDriverState *
3034  bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
3035                     BlockDriverState *parent, const BdrvChildClass *child_class,
3036                     BdrvChildRole child_role, bool allow_none, Error **errp)
3037  {
3038      BlockDriverState *bs = NULL;
3039      QDict *image_options;
3040      char *bdref_key_dot;
3041      const char *reference;
3042  
3043      assert(child_class != NULL);
3044  
3045      bdref_key_dot = g_strdup_printf("%s.", bdref_key);
3046      qdict_extract_subqdict(options, &image_options, bdref_key_dot);
3047      g_free(bdref_key_dot);
3048  
3049      /*
3050       * Caution: while qdict_get_try_str() is fine, getting non-string
3051       * types would require more care.  When @options come from
3052       * -blockdev or blockdev_add, its members are typed according to
3053       * the QAPI schema, but when they come from -drive, they're all
3054       * QString.
3055       */
3056      reference = qdict_get_try_str(options, bdref_key);
3057      if (!filename && !reference && !qdict_size(image_options)) {
3058          if (!allow_none) {
3059              error_setg(errp, "A block device must be specified for \"%s\"",
3060                         bdref_key);
3061          }
3062          qobject_unref(image_options);
3063          goto done;
3064      }
3065  
3066      bs = bdrv_open_inherit(filename, reference, image_options, 0,
3067                             parent, child_class, child_role, errp);
3068      if (!bs) {
3069          goto done;
3070      }
3071  
3072  done:
3073      qdict_del(options, bdref_key);
3074      return bs;
3075  }
3076  
3077  /*
3078   * Opens a disk image whose options are given as BlockdevRef in another block
3079   * device's options.
3080   *
3081   * If allow_none is true, no image will be opened if filename is false and no
3082   * BlockdevRef is given. NULL will be returned, but errp remains unset.
3083   *
3084   * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
3085   * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
3086   * itself, all options starting with "${bdref_key}." are considered part of the
3087   * BlockdevRef.
3088   *
3089   * The BlockdevRef will be removed from the options QDict.
3090   */
3091  BdrvChild *bdrv_open_child(const char *filename,
3092                             QDict *options, const char *bdref_key,
3093                             BlockDriverState *parent,
3094                             const BdrvChildClass *child_class,
3095                             BdrvChildRole child_role,
3096                             bool allow_none, Error **errp)
3097  {
3098      BlockDriverState *bs;
3099  
3100      bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class,
3101                              child_role, allow_none, errp);
3102      if (bs == NULL) {
3103          return NULL;
3104      }
3105  
3106      return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
3107                               errp);
3108  }
3109  
3110  /*
3111   * TODO Future callers may need to specify parent/child_class in order for
3112   * option inheritance to work. Existing callers use it for the root node.
3113   */
3114  BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp)
3115  {
3116      BlockDriverState *bs = NULL;
3117      QObject *obj = NULL;
3118      QDict *qdict = NULL;
3119      const char *reference = NULL;
3120      Visitor *v = NULL;
3121  
3122      if (ref->type == QTYPE_QSTRING) {
3123          reference = ref->u.reference;
3124      } else {
3125          BlockdevOptions *options = &ref->u.definition;
3126          assert(ref->type == QTYPE_QDICT);
3127  
3128          v = qobject_output_visitor_new(&obj);
3129          visit_type_BlockdevOptions(v, NULL, &options, &error_abort);
3130          visit_complete(v, &obj);
3131  
3132          qdict = qobject_to(QDict, obj);
3133          qdict_flatten(qdict);
3134  
3135          /* bdrv_open_inherit() defaults to the values in bdrv_flags (for
3136           * compatibility with other callers) rather than what we want as the
3137           * real defaults. Apply the defaults here instead. */
3138          qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off");
3139          qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off");
3140          qdict_set_default_str(qdict, BDRV_OPT_READ_ONLY, "off");
3141          qdict_set_default_str(qdict, BDRV_OPT_AUTO_READ_ONLY, "off");
3142  
3143      }
3144  
3145      bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp);
3146      obj = NULL;
3147      qobject_unref(obj);
3148      visit_free(v);
3149      return bs;
3150  }
3151  
3152  static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
3153                                                     int flags,
3154                                                     QDict *snapshot_options,
3155                                                     Error **errp)
3156  {
3157      /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
3158      char *tmp_filename = g_malloc0(PATH_MAX + 1);
3159      int64_t total_size;
3160      QemuOpts *opts = NULL;
3161      BlockDriverState *bs_snapshot = NULL;
3162      Error *local_err = NULL;
3163      int ret;
3164  
3165      /* if snapshot, we create a temporary backing file and open it
3166         instead of opening 'filename' directly */
3167  
3168      /* Get the required size from the image */
3169      total_size = bdrv_getlength(bs);
3170      if (total_size < 0) {
3171          error_setg_errno(errp, -total_size, "Could not get image size");
3172          goto out;
3173      }
3174  
3175      /* Create the temporary image */
3176      ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
3177      if (ret < 0) {
3178          error_setg_errno(errp, -ret, "Could not get temporary filename");
3179          goto out;
3180      }
3181  
3182      opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
3183                              &error_abort);
3184      qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
3185      ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
3186      qemu_opts_del(opts);
3187      if (ret < 0) {
3188          error_prepend(errp, "Could not create temporary overlay '%s': ",
3189                        tmp_filename);
3190          goto out;
3191      }
3192  
3193      /* Prepare options QDict for the temporary file */
3194      qdict_put_str(snapshot_options, "file.driver", "file");
3195      qdict_put_str(snapshot_options, "file.filename", tmp_filename);
3196      qdict_put_str(snapshot_options, "driver", "qcow2");
3197  
3198      bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp);
3199      snapshot_options = NULL;
3200      if (!bs_snapshot) {
3201          goto out;
3202      }
3203  
3204      /* bdrv_append() consumes a strong reference to bs_snapshot
3205       * (i.e. it will call bdrv_unref() on it) even on error, so in
3206       * order to be able to return one, we have to increase
3207       * bs_snapshot's refcount here */
3208      bdrv_ref(bs_snapshot);
3209      bdrv_append(bs_snapshot, bs, &local_err);
3210      if (local_err) {
3211          error_propagate(errp, local_err);
3212          bs_snapshot = NULL;
3213          goto out;
3214      }
3215  
3216  out:
3217      qobject_unref(snapshot_options);
3218      g_free(tmp_filename);
3219      return bs_snapshot;
3220  }
3221  
3222  /*
3223   * Opens a disk image (raw, qcow2, vmdk, ...)
3224   *
3225   * options is a QDict of options to pass to the block drivers, or NULL for an
3226   * empty set of options. The reference to the QDict belongs to the block layer
3227   * after the call (even on failure), so if the caller intends to reuse the
3228   * dictionary, it needs to use qobject_ref() before calling bdrv_open.
3229   *
3230   * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
3231   * If it is not NULL, the referenced BDS will be reused.
3232   *
3233   * The reference parameter may be used to specify an existing block device which
3234   * should be opened. If specified, neither options nor a filename may be given,
3235   * nor can an existing BDS be reused (that is, *pbs has to be NULL).
3236   */
3237  static BlockDriverState *bdrv_open_inherit(const char *filename,
3238                                             const char *reference,
3239                                             QDict *options, int flags,
3240                                             BlockDriverState *parent,
3241                                             const BdrvChildClass *child_class,
3242                                             BdrvChildRole child_role,
3243                                             Error **errp)
3244  {
3245      int ret;
3246      BlockBackend *file = NULL;
3247      BlockDriverState *bs;
3248      BlockDriver *drv = NULL;
3249      BdrvChild *child;
3250      const char *drvname;
3251      const char *backing;
3252      Error *local_err = NULL;
3253      QDict *snapshot_options = NULL;
3254      int snapshot_flags = 0;
3255  
3256      assert(!child_class || !flags);
3257      assert(!child_class == !parent);
3258  
3259      if (reference) {
3260          bool options_non_empty = options ? qdict_size(options) : false;
3261          qobject_unref(options);
3262  
3263          if (filename || options_non_empty) {
3264              error_setg(errp, "Cannot reference an existing block device with "
3265                         "additional options or a new filename");
3266              return NULL;
3267          }
3268  
3269          bs = bdrv_lookup_bs(reference, reference, errp);
3270          if (!bs) {
3271              return NULL;
3272          }
3273  
3274          bdrv_ref(bs);
3275          return bs;
3276      }
3277  
3278      bs = bdrv_new();
3279  
3280      /* NULL means an empty set of options */
3281      if (options == NULL) {
3282          options = qdict_new();
3283      }
3284  
3285      /* json: syntax counts as explicit options, as if in the QDict */
3286      parse_json_protocol(options, &filename, &local_err);
3287      if (local_err) {
3288          goto fail;
3289      }
3290  
3291      bs->explicit_options = qdict_clone_shallow(options);
3292  
3293      if (child_class) {
3294          bool parent_is_format;
3295  
3296          if (parent->drv) {
3297              parent_is_format = parent->drv->is_format;
3298          } else {
3299              /*
3300               * parent->drv is not set yet because this node is opened for
3301               * (potential) format probing.  That means that @parent is going
3302               * to be a format node.
3303               */
3304              parent_is_format = true;
3305          }
3306  
3307          bs->inherits_from = parent;
3308          child_class->inherit_options(child_role, parent_is_format,
3309                                       &flags, options,
3310                                       parent->open_flags, parent->options);
3311      }
3312  
3313      ret = bdrv_fill_options(&options, filename, &flags, &local_err);
3314      if (ret < 0) {
3315          goto fail;
3316      }
3317  
3318      /*
3319       * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags.
3320       * Caution: getting a boolean member of @options requires care.
3321       * When @options come from -blockdev or blockdev_add, members are
3322       * typed according to the QAPI schema, but when they come from
3323       * -drive, they're all QString.
3324       */
3325      if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") &&
3326          !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) {
3327          flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR);
3328      } else {
3329          flags &= ~BDRV_O_RDWR;
3330      }
3331  
3332      if (flags & BDRV_O_SNAPSHOT) {
3333          snapshot_options = qdict_new();
3334          bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
3335                                     flags, options);
3336          /* Let bdrv_backing_options() override "read-only" */
3337          qdict_del(options, BDRV_OPT_READ_ONLY);
3338          bdrv_inherited_options(BDRV_CHILD_COW, true,
3339                                 &flags, options, flags, options);
3340      }
3341  
3342      bs->open_flags = flags;
3343      bs->options = options;
3344      options = qdict_clone_shallow(options);
3345  
3346      /* Find the right image format driver */
3347      /* See cautionary note on accessing @options above */
3348      drvname = qdict_get_try_str(options, "driver");
3349      if (drvname) {
3350          drv = bdrv_find_format(drvname);
3351          if (!drv) {
3352              error_setg(errp, "Unknown driver: '%s'", drvname);
3353              goto fail;
3354          }
3355      }
3356  
3357      assert(drvname || !(flags & BDRV_O_PROTOCOL));
3358  
3359      /* See cautionary note on accessing @options above */
3360      backing = qdict_get_try_str(options, "backing");
3361      if (qobject_to(QNull, qdict_get(options, "backing")) != NULL ||
3362          (backing && *backing == '\0'))
3363      {
3364          if (backing) {
3365              warn_report("Use of \"backing\": \"\" is deprecated; "
3366                          "use \"backing\": null instead");
3367          }
3368          flags |= BDRV_O_NO_BACKING;
3369          qdict_del(bs->explicit_options, "backing");
3370          qdict_del(bs->options, "backing");
3371          qdict_del(options, "backing");
3372      }
3373  
3374      /* Open image file without format layer. This BlockBackend is only used for
3375       * probing, the block drivers will do their own bdrv_open_child() for the
3376       * same BDS, which is why we put the node name back into options. */
3377      if ((flags & BDRV_O_PROTOCOL) == 0) {
3378          BlockDriverState *file_bs;
3379  
3380          file_bs = bdrv_open_child_bs(filename, options, "file", bs,
3381                                       &child_of_bds, BDRV_CHILD_IMAGE,
3382                                       true, &local_err);
3383          if (local_err) {
3384              goto fail;
3385          }
3386          if (file_bs != NULL) {
3387              /* Not requesting BLK_PERM_CONSISTENT_READ because we're only
3388               * looking at the header to guess the image format. This works even
3389               * in cases where a guest would not see a consistent state. */
3390              file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL);
3391              blk_insert_bs(file, file_bs, &local_err);
3392              bdrv_unref(file_bs);
3393              if (local_err) {
3394                  goto fail;
3395              }
3396  
3397              qdict_put_str(options, "file", bdrv_get_node_name(file_bs));
3398          }
3399      }
3400  
3401      /* Image format probing */
3402      bs->probed = !drv;
3403      if (!drv && file) {
3404          ret = find_image_format(file, filename, &drv, &local_err);
3405          if (ret < 0) {
3406              goto fail;
3407          }
3408          /*
3409           * This option update would logically belong in bdrv_fill_options(),
3410           * but we first need to open bs->file for the probing to work, while
3411           * opening bs->file already requires the (mostly) final set of options
3412           * so that cache mode etc. can be inherited.
3413           *
3414           * Adding the driver later is somewhat ugly, but it's not an option
3415           * that would ever be inherited, so it's correct. We just need to make
3416           * sure to update both bs->options (which has the full effective
3417           * options for bs) and options (which has file.* already removed).
3418           */
3419          qdict_put_str(bs->options, "driver", drv->format_name);
3420          qdict_put_str(options, "driver", drv->format_name);
3421      } else if (!drv) {
3422          error_setg(errp, "Must specify either driver or file");
3423          goto fail;
3424      }
3425  
3426      /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
3427      assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
3428      /* file must be NULL if a protocol BDS is about to be created
3429       * (the inverse results in an error message from bdrv_open_common()) */
3430      assert(!(flags & BDRV_O_PROTOCOL) || !file);
3431  
3432      /* Open the image */
3433      ret = bdrv_open_common(bs, file, options, &local_err);
3434      if (ret < 0) {
3435          goto fail;
3436      }
3437  
3438      if (file) {
3439          blk_unref(file);
3440          file = NULL;
3441      }
3442  
3443      /* If there is a backing file, use it */
3444      if ((flags & BDRV_O_NO_BACKING) == 0) {
3445          ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
3446          if (ret < 0) {
3447              goto close_and_fail;
3448          }
3449      }
3450  
3451      /* Remove all children options and references
3452       * from bs->options and bs->explicit_options */
3453      QLIST_FOREACH(child, &bs->children, next) {
3454          char *child_key_dot;
3455          child_key_dot = g_strdup_printf("%s.", child->name);
3456          qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot);
3457          qdict_extract_subqdict(bs->options, NULL, child_key_dot);
3458          qdict_del(bs->explicit_options, child->name);
3459          qdict_del(bs->options, child->name);
3460          g_free(child_key_dot);
3461      }
3462  
3463      /* Check if any unknown options were used */
3464      if (qdict_size(options) != 0) {
3465          const QDictEntry *entry = qdict_first(options);
3466          if (flags & BDRV_O_PROTOCOL) {
3467              error_setg(errp, "Block protocol '%s' doesn't support the option "
3468                         "'%s'", drv->format_name, entry->key);
3469          } else {
3470              error_setg(errp,
3471                         "Block format '%s' does not support the option '%s'",
3472                         drv->format_name, entry->key);
3473          }
3474  
3475          goto close_and_fail;
3476      }
3477  
3478      bdrv_parent_cb_change_media(bs, true);
3479  
3480      qobject_unref(options);
3481      options = NULL;
3482  
3483      /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
3484       * temporary snapshot afterwards. */
3485      if (snapshot_flags) {
3486          BlockDriverState *snapshot_bs;
3487          snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags,
3488                                                  snapshot_options, &local_err);
3489          snapshot_options = NULL;
3490          if (local_err) {
3491              goto close_and_fail;
3492          }
3493          /* We are not going to return bs but the overlay on top of it
3494           * (snapshot_bs); thus, we have to drop the strong reference to bs
3495           * (which we obtained by calling bdrv_new()). bs will not be deleted,
3496           * though, because the overlay still has a reference to it. */
3497          bdrv_unref(bs);
3498          bs = snapshot_bs;
3499      }
3500  
3501      return bs;
3502  
3503  fail:
3504      blk_unref(file);
3505      qobject_unref(snapshot_options);
3506      qobject_unref(bs->explicit_options);
3507      qobject_unref(bs->options);
3508      qobject_unref(options);
3509      bs->options = NULL;
3510      bs->explicit_options = NULL;
3511      bdrv_unref(bs);
3512      error_propagate(errp, local_err);
3513      return NULL;
3514  
3515  close_and_fail:
3516      bdrv_unref(bs);
3517      qobject_unref(snapshot_options);
3518      qobject_unref(options);
3519      error_propagate(errp, local_err);
3520      return NULL;
3521  }
3522  
3523  BlockDriverState *bdrv_open(const char *filename, const char *reference,
3524                              QDict *options, int flags, Error **errp)
3525  {
3526      return bdrv_open_inherit(filename, reference, options, flags, NULL,
3527                               NULL, 0, errp);
3528  }
3529  
3530  /* Return true if the NULL-terminated @list contains @str */
3531  static bool is_str_in_list(const char *str, const char *const *list)
3532  {
3533      if (str && list) {
3534          int i;
3535          for (i = 0; list[i] != NULL; i++) {
3536              if (!strcmp(str, list[i])) {
3537                  return true;
3538              }
3539          }
3540      }
3541      return false;
3542  }
3543  
3544  /*
3545   * Check that every option set in @bs->options is also set in
3546   * @new_opts.
3547   *
3548   * Options listed in the common_options list and in
3549   * @bs->drv->mutable_opts are skipped.
3550   *
3551   * Return 0 on success, otherwise return -EINVAL and set @errp.
3552   */
3553  static int bdrv_reset_options_allowed(BlockDriverState *bs,
3554                                        const QDict *new_opts, Error **errp)
3555  {
3556      const QDictEntry *e;
3557      /* These options are common to all block drivers and are handled
3558       * in bdrv_reopen_prepare() so they can be left out of @new_opts */
3559      const char *const common_options[] = {
3560          "node-name", "discard", "cache.direct", "cache.no-flush",
3561          "read-only", "auto-read-only", "detect-zeroes", NULL
3562      };
3563  
3564      for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
3565          if (!qdict_haskey(new_opts, e->key) &&
3566              !is_str_in_list(e->key, common_options) &&
3567              !is_str_in_list(e->key, bs->drv->mutable_opts)) {
3568              error_setg(errp, "Option '%s' cannot be reset "
3569                         "to its default value", e->key);
3570              return -EINVAL;
3571          }
3572      }
3573  
3574      return 0;
3575  }
3576  
3577  /*
3578   * Returns true if @child can be reached recursively from @bs
3579   */
3580  static bool bdrv_recurse_has_child(BlockDriverState *bs,
3581                                     BlockDriverState *child)
3582  {
3583      BdrvChild *c;
3584  
3585      if (bs == child) {
3586          return true;
3587      }
3588  
3589      QLIST_FOREACH(c, &bs->children, next) {
3590          if (bdrv_recurse_has_child(c->bs, child)) {
3591              return true;
3592          }
3593      }
3594  
3595      return false;
3596  }
3597  
3598  /*
3599   * Adds a BlockDriverState to a simple queue for an atomic, transactional
3600   * reopen of multiple devices.
3601   *
3602   * bs_queue can either be an existing BlockReopenQueue that has had QTAILQ_INIT
3603   * already performed, or alternatively may be NULL a new BlockReopenQueue will
3604   * be created and initialized. This newly created BlockReopenQueue should be
3605   * passed back in for subsequent calls that are intended to be of the same
3606   * atomic 'set'.
3607   *
3608   * bs is the BlockDriverState to add to the reopen queue.
3609   *
3610   * options contains the changed options for the associated bs
3611   * (the BlockReopenQueue takes ownership)
3612   *
3613   * flags contains the open flags for the associated bs
3614   *
3615   * returns a pointer to bs_queue, which is either the newly allocated
3616   * bs_queue, or the existing bs_queue being used.
3617   *
3618   * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
3619   */
3620  static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
3621                                                   BlockDriverState *bs,
3622                                                   QDict *options,
3623                                                   const BdrvChildClass *klass,
3624                                                   BdrvChildRole role,
3625                                                   bool parent_is_format,
3626                                                   QDict *parent_options,
3627                                                   int parent_flags,
3628                                                   bool keep_old_opts)
3629  {
3630      assert(bs != NULL);
3631  
3632      BlockReopenQueueEntry *bs_entry;
3633      BdrvChild *child;
3634      QDict *old_options, *explicit_options, *options_copy;
3635      int flags;
3636      QemuOpts *opts;
3637  
3638      /* Make sure that the caller remembered to use a drained section. This is
3639       * important to avoid graph changes between the recursive queuing here and
3640       * bdrv_reopen_multiple(). */
3641      assert(bs->quiesce_counter > 0);
3642  
3643      if (bs_queue == NULL) {
3644          bs_queue = g_new0(BlockReopenQueue, 1);
3645          QTAILQ_INIT(bs_queue);
3646      }
3647  
3648      if (!options) {
3649          options = qdict_new();
3650      }
3651  
3652      /* Check if this BlockDriverState is already in the queue */
3653      QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
3654          if (bs == bs_entry->state.bs) {
3655              break;
3656          }
3657      }
3658  
3659      /*
3660       * Precedence of options:
3661       * 1. Explicitly passed in options (highest)
3662       * 2. Retained from explicitly set options of bs
3663       * 3. Inherited from parent node
3664       * 4. Retained from effective options of bs
3665       */
3666  
3667      /* Old explicitly set values (don't overwrite by inherited value) */
3668      if (bs_entry || keep_old_opts) {
3669          old_options = qdict_clone_shallow(bs_entry ?
3670                                            bs_entry->state.explicit_options :
3671                                            bs->explicit_options);
3672          bdrv_join_options(bs, options, old_options);
3673          qobject_unref(old_options);
3674      }
3675  
3676      explicit_options = qdict_clone_shallow(options);
3677  
3678      /* Inherit from parent node */
3679      if (parent_options) {
3680          flags = 0;
3681          klass->inherit_options(role, parent_is_format, &flags, options,
3682                                 parent_flags, parent_options);
3683      } else {
3684          flags = bdrv_get_flags(bs);
3685      }
3686  
3687      if (keep_old_opts) {
3688          /* Old values are used for options that aren't set yet */
3689          old_options = qdict_clone_shallow(bs->options);
3690          bdrv_join_options(bs, options, old_options);
3691          qobject_unref(old_options);
3692      }
3693  
3694      /* We have the final set of options so let's update the flags */
3695      options_copy = qdict_clone_shallow(options);
3696      opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
3697      qemu_opts_absorb_qdict(opts, options_copy, NULL);
3698      update_flags_from_options(&flags, opts);
3699      qemu_opts_del(opts);
3700      qobject_unref(options_copy);
3701  
3702      /* bdrv_open_inherit() sets and clears some additional flags internally */
3703      flags &= ~BDRV_O_PROTOCOL;
3704      if (flags & BDRV_O_RDWR) {
3705          flags |= BDRV_O_ALLOW_RDWR;
3706      }
3707  
3708      if (!bs_entry) {
3709          bs_entry = g_new0(BlockReopenQueueEntry, 1);
3710          QTAILQ_INSERT_TAIL(bs_queue, bs_entry, entry);
3711      } else {
3712          qobject_unref(bs_entry->state.options);
3713          qobject_unref(bs_entry->state.explicit_options);
3714      }
3715  
3716      bs_entry->state.bs = bs;
3717      bs_entry->state.options = options;
3718      bs_entry->state.explicit_options = explicit_options;
3719      bs_entry->state.flags = flags;
3720  
3721      /* This needs to be overwritten in bdrv_reopen_prepare() */
3722      bs_entry->state.perm = UINT64_MAX;
3723      bs_entry->state.shared_perm = 0;
3724  
3725      /*
3726       * If keep_old_opts is false then it means that unspecified
3727       * options must be reset to their original value. We don't allow
3728       * resetting 'backing' but we need to know if the option is
3729       * missing in order to decide if we have to return an error.
3730       */
3731      if (!keep_old_opts) {
3732          bs_entry->state.backing_missing =
3733              !qdict_haskey(options, "backing") &&
3734              !qdict_haskey(options, "backing.driver");
3735      }
3736  
3737      QLIST_FOREACH(child, &bs->children, next) {
3738          QDict *new_child_options = NULL;
3739          bool child_keep_old = keep_old_opts;
3740  
3741          /* reopen can only change the options of block devices that were
3742           * implicitly created and inherited options. For other (referenced)
3743           * block devices, a syntax like "backing.foo" results in an error. */
3744          if (child->bs->inherits_from != bs) {
3745              continue;
3746          }
3747  
3748          /* Check if the options contain a child reference */
3749          if (qdict_haskey(options, child->name)) {
3750              const char *childref = qdict_get_try_str(options, child->name);
3751              /*
3752               * The current child must not be reopened if the child
3753               * reference is null or points to a different node.
3754               */
3755              if (g_strcmp0(childref, child->bs->node_name)) {
3756                  continue;
3757              }
3758              /*
3759               * If the child reference points to the current child then
3760               * reopen it with its existing set of options (note that
3761               * it can still inherit new options from the parent).
3762               */
3763              child_keep_old = true;
3764          } else {
3765              /* Extract child options ("child-name.*") */
3766              char *child_key_dot = g_strdup_printf("%s.", child->name);
3767              qdict_extract_subqdict(explicit_options, NULL, child_key_dot);
3768              qdict_extract_subqdict(options, &new_child_options, child_key_dot);
3769              g_free(child_key_dot);
3770          }
3771  
3772          bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options,
3773                                  child->klass, child->role, bs->drv->is_format,
3774                                  options, flags, child_keep_old);
3775      }
3776  
3777      return bs_queue;
3778  }
3779  
3780  BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
3781                                      BlockDriverState *bs,
3782                                      QDict *options, bool keep_old_opts)
3783  {
3784      return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false,
3785                                     NULL, 0, keep_old_opts);
3786  }
3787  
3788  /*
3789   * Reopen multiple BlockDriverStates atomically & transactionally.
3790   *
3791   * The queue passed in (bs_queue) must have been built up previous
3792   * via bdrv_reopen_queue().
3793   *
3794   * Reopens all BDS specified in the queue, with the appropriate
3795   * flags.  All devices are prepared for reopen, and failure of any
3796   * device will cause all device changes to be abandoned, and intermediate
3797   * data cleaned up.
3798   *
3799   * If all devices prepare successfully, then the changes are committed
3800   * to all devices.
3801   *
3802   * All affected nodes must be drained between bdrv_reopen_queue() and
3803   * bdrv_reopen_multiple().
3804   */
3805  int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
3806  {
3807      int ret = -1;
3808      BlockReopenQueueEntry *bs_entry, *next;
3809  
3810      assert(bs_queue != NULL);
3811  
3812      QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
3813          assert(bs_entry->state.bs->quiesce_counter > 0);
3814          if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, errp)) {
3815              goto cleanup;
3816          }
3817          bs_entry->prepared = true;
3818      }
3819  
3820      QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
3821          BDRVReopenState *state = &bs_entry->state;
3822          ret = bdrv_check_perm(state->bs, bs_queue, state->perm,
3823                                state->shared_perm, NULL, NULL, errp);
3824          if (ret < 0) {
3825              goto cleanup_perm;
3826          }
3827          /* Check if new_backing_bs would accept the new permissions */
3828          if (state->replace_backing_bs && state->new_backing_bs) {
3829              uint64_t nperm, nshared;
3830              bdrv_child_perm(state->bs, state->new_backing_bs,
3831                              NULL, bdrv_backing_role(state->bs),
3832                              bs_queue, state->perm, state->shared_perm,
3833                              &nperm, &nshared);
3834              ret = bdrv_check_update_perm(state->new_backing_bs, NULL,
3835                                           nperm, nshared, NULL, NULL, errp);
3836              if (ret < 0) {
3837                  goto cleanup_perm;
3838              }
3839          }
3840          bs_entry->perms_checked = true;
3841      }
3842  
3843      /*
3844       * If we reach this point, we have success and just need to apply the
3845       * changes.
3846       *
3847       * Reverse order is used to comfort qcow2 driver: on commit it need to write
3848       * IN_USE flag to the image, to mark bitmaps in the image as invalid. But
3849       * children are usually goes after parents in reopen-queue, so go from last
3850       * to first element.
3851       */
3852      QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
3853          bdrv_reopen_commit(&bs_entry->state);
3854      }
3855  
3856      ret = 0;
3857  cleanup_perm:
3858      QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
3859          BDRVReopenState *state = &bs_entry->state;
3860  
3861          if (!bs_entry->perms_checked) {
3862              continue;
3863          }
3864  
3865          if (ret == 0) {
3866              bdrv_set_perm(state->bs, state->perm, state->shared_perm);
3867          } else {
3868              bdrv_abort_perm_update(state->bs);
3869              if (state->replace_backing_bs && state->new_backing_bs) {
3870                  bdrv_abort_perm_update(state->new_backing_bs);
3871              }
3872          }
3873      }
3874  
3875      if (ret == 0) {
3876          QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
3877              BlockDriverState *bs = bs_entry->state.bs;
3878  
3879              if (bs->drv->bdrv_reopen_commit_post)
3880                  bs->drv->bdrv_reopen_commit_post(&bs_entry->state);
3881          }
3882      }
3883  cleanup:
3884      QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
3885          if (ret) {
3886              if (bs_entry->prepared) {
3887                  bdrv_reopen_abort(&bs_entry->state);
3888              }
3889              qobject_unref(bs_entry->state.explicit_options);
3890              qobject_unref(bs_entry->state.options);
3891          }
3892          if (bs_entry->state.new_backing_bs) {
3893              bdrv_unref(bs_entry->state.new_backing_bs);
3894          }
3895          g_free(bs_entry);
3896      }
3897      g_free(bs_queue);
3898  
3899      return ret;
3900  }
3901  
3902  int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
3903                                Error **errp)
3904  {
3905      int ret;
3906      BlockReopenQueue *queue;
3907      QDict *opts = qdict_new();
3908  
3909      qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
3910  
3911      bdrv_subtree_drained_begin(bs);
3912      queue = bdrv_reopen_queue(NULL, bs, opts, true);
3913      ret = bdrv_reopen_multiple(queue, errp);
3914      bdrv_subtree_drained_end(bs);
3915  
3916      return ret;
3917  }
3918  
3919  static BlockReopenQueueEntry *find_parent_in_reopen_queue(BlockReopenQueue *q,
3920                                                            BdrvChild *c)
3921  {
3922      BlockReopenQueueEntry *entry;
3923  
3924      QTAILQ_FOREACH(entry, q, entry) {
3925          BlockDriverState *bs = entry->state.bs;
3926          BdrvChild *child;
3927  
3928          QLIST_FOREACH(child, &bs->children, next) {
3929              if (child == c) {
3930                  return entry;
3931              }
3932          }
3933      }
3934  
3935      return NULL;
3936  }
3937  
3938  static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs,
3939                               uint64_t *perm, uint64_t *shared)
3940  {
3941      BdrvChild *c;
3942      BlockReopenQueueEntry *parent;
3943      uint64_t cumulative_perms = 0;
3944      uint64_t cumulative_shared_perms = BLK_PERM_ALL;
3945  
3946      QLIST_FOREACH(c, &bs->parents, next_parent) {
3947          parent = find_parent_in_reopen_queue(q, c);
3948          if (!parent) {
3949              cumulative_perms |= c->perm;
3950              cumulative_shared_perms &= c->shared_perm;
3951          } else {
3952              uint64_t nperm, nshared;
3953  
3954              bdrv_child_perm(parent->state.bs, bs, c, c->role, q,
3955                              parent->state.perm, parent->state.shared_perm,
3956                              &nperm, &nshared);
3957  
3958              cumulative_perms |= nperm;
3959              cumulative_shared_perms &= nshared;
3960          }
3961      }
3962      *perm = cumulative_perms;
3963      *shared = cumulative_shared_perms;
3964  }
3965  
3966  static bool bdrv_reopen_can_attach(BlockDriverState *parent,
3967                                     BdrvChild *child,
3968                                     BlockDriverState *new_child,
3969                                     Error **errp)
3970  {
3971      AioContext *parent_ctx = bdrv_get_aio_context(parent);
3972      AioContext *child_ctx = bdrv_get_aio_context(new_child);
3973      GSList *ignore;
3974      bool ret;
3975  
3976      ignore = g_slist_prepend(NULL, child);
3977      ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL);
3978      g_slist_free(ignore);
3979      if (ret) {
3980          return ret;
3981      }
3982  
3983      ignore = g_slist_prepend(NULL, child);
3984      ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp);
3985      g_slist_free(ignore);
3986      return ret;
3987  }
3988  
3989  /*
3990   * Take a BDRVReopenState and check if the value of 'backing' in the
3991   * reopen_state->options QDict is valid or not.
3992   *
3993   * If 'backing' is missing from the QDict then return 0.
3994   *
3995   * If 'backing' contains the node name of the backing file of
3996   * reopen_state->bs then return 0.
3997   *
3998   * If 'backing' contains a different node name (or is null) then check
3999   * whether the current backing file can be replaced with the new one.
4000   * If that's the case then reopen_state->replace_backing_bs is set to
4001   * true and reopen_state->new_backing_bs contains a pointer to the new
4002   * backing BlockDriverState (or NULL).
4003   *
4004   * Return 0 on success, otherwise return < 0 and set @errp.
4005   */
4006  static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state,
4007                                       Error **errp)
4008  {
4009      BlockDriverState *bs = reopen_state->bs;
4010      BlockDriverState *overlay_bs, *below_bs, *new_backing_bs;
4011      QObject *value;
4012      const char *str;
4013  
4014      value = qdict_get(reopen_state->options, "backing");
4015      if (value == NULL) {
4016          return 0;
4017      }
4018  
4019      switch (qobject_type(value)) {
4020      case QTYPE_QNULL:
4021          new_backing_bs = NULL;
4022          break;
4023      case QTYPE_QSTRING:
4024          str = qobject_get_try_str(value);
4025          new_backing_bs = bdrv_lookup_bs(NULL, str, errp);
4026          if (new_backing_bs == NULL) {
4027              return -EINVAL;
4028          } else if (bdrv_recurse_has_child(new_backing_bs, bs)) {
4029              error_setg(errp, "Making '%s' a backing file of '%s' "
4030                         "would create a cycle", str, bs->node_name);
4031              return -EINVAL;
4032          }
4033          break;
4034      default:
4035          /* 'backing' does not allow any other data type */
4036          g_assert_not_reached();
4037      }
4038  
4039      /*
4040       * Check AioContext compatibility so that the bdrv_set_backing_hd() call in
4041       * bdrv_reopen_commit() won't fail.
4042       */
4043      if (new_backing_bs) {
4044          if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) {
4045              return -EINVAL;
4046          }
4047      }
4048  
4049      /*
4050       * Ensure that @bs can really handle backing files, because we are
4051       * about to give it one (or swap the existing one)
4052       */
4053      if (bs->drv->is_filter) {
4054          /* Filters always have a file or a backing child */
4055          if (!bs->backing) {
4056              error_setg(errp, "'%s' is a %s filter node that does not support a "
4057                         "backing child", bs->node_name, bs->drv->format_name);
4058              return -EINVAL;
4059          }
4060      } else if (!bs->drv->supports_backing) {
4061          error_setg(errp, "Driver '%s' of node '%s' does not support backing "
4062                     "files", bs->drv->format_name, bs->node_name);
4063          return -EINVAL;
4064      }
4065  
4066      /*
4067       * Find the "actual" backing file by skipping all links that point
4068       * to an implicit node, if any (e.g. a commit filter node).
4069       * We cannot use any of the bdrv_skip_*() functions here because
4070       * those return the first explicit node, while we are looking for
4071       * its overlay here.
4072       */
4073      overlay_bs = bs;
4074      for (below_bs = bdrv_filter_or_cow_bs(overlay_bs);
4075           below_bs && below_bs->implicit;
4076           below_bs = bdrv_filter_or_cow_bs(overlay_bs))
4077      {
4078          overlay_bs = below_bs;
4079      }
4080  
4081      /* If we want to replace the backing file we need some extra checks */
4082      if (new_backing_bs != bdrv_filter_or_cow_bs(overlay_bs)) {
4083          /* Check for implicit nodes between bs and its backing file */
4084          if (bs != overlay_bs) {
4085              error_setg(errp, "Cannot change backing link if '%s' has "
4086                         "an implicit backing file", bs->node_name);
4087              return -EPERM;
4088          }
4089          /*
4090           * Check if the backing link that we want to replace is frozen.
4091           * Note that
4092           * bdrv_filter_or_cow_child(overlay_bs) == overlay_bs->backing,
4093           * because we know that overlay_bs == bs, and that @bs
4094           * either is a filter that uses ->backing or a COW format BDS
4095           * with bs->drv->supports_backing == true.
4096           */
4097          if (bdrv_is_backing_chain_frozen(overlay_bs,
4098                                           child_bs(overlay_bs->backing), errp))
4099          {
4100              return -EPERM;
4101          }
4102          reopen_state->replace_backing_bs = true;
4103          if (new_backing_bs) {
4104              bdrv_ref(new_backing_bs);
4105              reopen_state->new_backing_bs = new_backing_bs;
4106          }
4107      }
4108  
4109      return 0;
4110  }
4111  
4112  /*
4113   * Prepares a BlockDriverState for reopen. All changes are staged in the
4114   * 'opaque' field of the BDRVReopenState, which is used and allocated by
4115   * the block driver layer .bdrv_reopen_prepare()
4116   *
4117   * bs is the BlockDriverState to reopen
4118   * flags are the new open flags
4119   * queue is the reopen queue
4120   *
4121   * Returns 0 on success, non-zero on error.  On error errp will be set
4122   * as well.
4123   *
4124   * On failure, bdrv_reopen_abort() will be called to clean up any data.
4125   * It is the responsibility of the caller to then call the abort() or
4126   * commit() for any other BDS that have been left in a prepare() state
4127   *
4128   */
4129  int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
4130                          Error **errp)
4131  {
4132      int ret = -1;
4133      int old_flags;
4134      Error *local_err = NULL;
4135      BlockDriver *drv;
4136      QemuOpts *opts;
4137      QDict *orig_reopen_opts;
4138      char *discard = NULL;
4139      bool read_only;
4140      bool drv_prepared = false;
4141  
4142      assert(reopen_state != NULL);
4143      assert(reopen_state->bs->drv != NULL);
4144      drv = reopen_state->bs->drv;
4145  
4146      /* This function and each driver's bdrv_reopen_prepare() remove
4147       * entries from reopen_state->options as they are processed, so
4148       * we need to make a copy of the original QDict. */
4149      orig_reopen_opts = qdict_clone_shallow(reopen_state->options);
4150  
4151      /* Process generic block layer options */
4152      opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
4153      if (!qemu_opts_absorb_qdict(opts, reopen_state->options, errp)) {
4154          ret = -EINVAL;
4155          goto error;
4156      }
4157  
4158      /* This was already called in bdrv_reopen_queue_child() so the flags
4159       * are up-to-date. This time we simply want to remove the options from
4160       * QemuOpts in order to indicate that they have been processed. */
4161      old_flags = reopen_state->flags;
4162      update_flags_from_options(&reopen_state->flags, opts);
4163      assert(old_flags == reopen_state->flags);
4164  
4165      discard = qemu_opt_get_del(opts, BDRV_OPT_DISCARD);
4166      if (discard != NULL) {
4167          if (bdrv_parse_discard_flags(discard, &reopen_state->flags) != 0) {
4168              error_setg(errp, "Invalid discard option");
4169              ret = -EINVAL;
4170              goto error;
4171          }
4172      }
4173  
4174      reopen_state->detect_zeroes =
4175          bdrv_parse_detect_zeroes(opts, reopen_state->flags, &local_err);
4176      if (local_err) {
4177          error_propagate(errp, local_err);
4178          ret = -EINVAL;
4179          goto error;
4180      }
4181  
4182      /* All other options (including node-name and driver) must be unchanged.
4183       * Put them back into the QDict, so that they are checked at the end
4184       * of this function. */
4185      qemu_opts_to_qdict(opts, reopen_state->options);
4186  
4187      /* If we are to stay read-only, do not allow permission change
4188       * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is
4189       * not set, or if the BDS still has copy_on_read enabled */
4190      read_only = !(reopen_state->flags & BDRV_O_RDWR);
4191      ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err);
4192      if (local_err) {
4193          error_propagate(errp, local_err);
4194          goto error;
4195      }
4196  
4197      /* Calculate required permissions after reopening */
4198      bdrv_reopen_perm(queue, reopen_state->bs,
4199                       &reopen_state->perm, &reopen_state->shared_perm);
4200  
4201      ret = bdrv_flush(reopen_state->bs);
4202      if (ret) {
4203          error_setg_errno(errp, -ret, "Error flushing drive");
4204          goto error;
4205      }
4206  
4207      if (drv->bdrv_reopen_prepare) {
4208          /*
4209           * If a driver-specific option is missing, it means that we
4210           * should reset it to its default value.
4211           * But not all options allow that, so we need to check it first.
4212           */
4213          ret = bdrv_reset_options_allowed(reopen_state->bs,
4214                                           reopen_state->options, errp);
4215          if (ret) {
4216              goto error;
4217          }
4218  
4219          ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
4220          if (ret) {
4221              if (local_err != NULL) {
4222                  error_propagate(errp, local_err);
4223              } else {
4224                  bdrv_refresh_filename(reopen_state->bs);
4225                  error_setg(errp, "failed while preparing to reopen image '%s'",
4226                             reopen_state->bs->filename);
4227              }
4228              goto error;
4229          }
4230      } else {
4231          /* It is currently mandatory to have a bdrv_reopen_prepare()
4232           * handler for each supported drv. */
4233          error_setg(errp, "Block format '%s' used by node '%s' "
4234                     "does not support reopening files", drv->format_name,
4235                     bdrv_get_device_or_node_name(reopen_state->bs));
4236          ret = -1;
4237          goto error;
4238      }
4239  
4240      drv_prepared = true;
4241  
4242      /*
4243       * We must provide the 'backing' option if the BDS has a backing
4244       * file or if the image file has a backing file name as part of
4245       * its metadata. Otherwise the 'backing' option can be omitted.
4246       */
4247      if (drv->supports_backing && reopen_state->backing_missing &&
4248          (reopen_state->bs->backing || reopen_state->bs->backing_file[0])) {
4249          error_setg(errp, "backing is missing for '%s'",
4250                     reopen_state->bs->node_name);
4251          ret = -EINVAL;
4252          goto error;
4253      }
4254  
4255      /*
4256       * Allow changing the 'backing' option. The new value can be
4257       * either a reference to an existing node (using its node name)
4258       * or NULL to simply detach the current backing file.
4259       */
4260      ret = bdrv_reopen_parse_backing(reopen_state, errp);
4261      if (ret < 0) {
4262          goto error;
4263      }
4264      qdict_del(reopen_state->options, "backing");
4265  
4266      /* Options that are not handled are only okay if they are unchanged
4267       * compared to the old state. It is expected that some options are only
4268       * used for the initial open, but not reopen (e.g. filename) */
4269      if (qdict_size(reopen_state->options)) {
4270          const QDictEntry *entry = qdict_first(reopen_state->options);
4271  
4272          do {
4273              QObject *new = entry->value;
4274              QObject *old = qdict_get(reopen_state->bs->options, entry->key);
4275  
4276              /* Allow child references (child_name=node_name) as long as they
4277               * point to the current child (i.e. everything stays the same). */
4278              if (qobject_type(new) == QTYPE_QSTRING) {
4279                  BdrvChild *child;
4280                  QLIST_FOREACH(child, &reopen_state->bs->children, next) {
4281                      if (!strcmp(child->name, entry->key)) {
4282                          break;
4283                      }
4284                  }
4285  
4286                  if (child) {
4287                      const char *str = qobject_get_try_str(new);
4288                      if (!strcmp(child->bs->node_name, str)) {
4289                          continue; /* Found child with this name, skip option */
4290                      }
4291                  }
4292              }
4293  
4294              /*
4295               * TODO: When using -drive to specify blockdev options, all values
4296               * will be strings; however, when using -blockdev, blockdev-add or
4297               * filenames using the json:{} pseudo-protocol, they will be
4298               * correctly typed.
4299               * In contrast, reopening options are (currently) always strings
4300               * (because you can only specify them through qemu-io; all other
4301               * callers do not specify any options).
4302               * Therefore, when using anything other than -drive to create a BDS,
4303               * this cannot detect non-string options as unchanged, because
4304               * qobject_is_equal() always returns false for objects of different
4305               * type.  In the future, this should be remedied by correctly typing
4306               * all options.  For now, this is not too big of an issue because
4307               * the user can simply omit options which cannot be changed anyway,
4308               * so they will stay unchanged.
4309               */
4310              if (!qobject_is_equal(new, old)) {
4311                  error_setg(errp, "Cannot change the option '%s'", entry->key);
4312                  ret = -EINVAL;
4313                  goto error;
4314              }
4315          } while ((entry = qdict_next(reopen_state->options, entry)));
4316      }
4317  
4318      ret = 0;
4319  
4320      /* Restore the original reopen_state->options QDict */
4321      qobject_unref(reopen_state->options);
4322      reopen_state->options = qobject_ref(orig_reopen_opts);
4323  
4324  error:
4325      if (ret < 0 && drv_prepared) {
4326          /* drv->bdrv_reopen_prepare() has succeeded, so we need to
4327           * call drv->bdrv_reopen_abort() before signaling an error
4328           * (bdrv_reopen_multiple() will not call bdrv_reopen_abort()
4329           * when the respective bdrv_reopen_prepare() has failed) */
4330          if (drv->bdrv_reopen_abort) {
4331              drv->bdrv_reopen_abort(reopen_state);
4332          }
4333      }
4334      qemu_opts_del(opts);
4335      qobject_unref(orig_reopen_opts);
4336      g_free(discard);
4337      return ret;
4338  }
4339  
4340  /*
4341   * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
4342   * makes them final by swapping the staging BlockDriverState contents into
4343   * the active BlockDriverState contents.
4344   */
4345  void bdrv_reopen_commit(BDRVReopenState *reopen_state)
4346  {
4347      BlockDriver *drv;
4348      BlockDriverState *bs;
4349      BdrvChild *child;
4350  
4351      assert(reopen_state != NULL);
4352      bs = reopen_state->bs;
4353      drv = bs->drv;
4354      assert(drv != NULL);
4355  
4356      /* If there are any driver level actions to take */
4357      if (drv->bdrv_reopen_commit) {
4358          drv->bdrv_reopen_commit(reopen_state);
4359      }
4360  
4361      /* set BDS specific flags now */
4362      qobject_unref(bs->explicit_options);
4363      qobject_unref(bs->options);
4364  
4365      bs->explicit_options   = reopen_state->explicit_options;
4366      bs->options            = reopen_state->options;
4367      bs->open_flags         = reopen_state->flags;
4368      bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
4369      bs->detect_zeroes      = reopen_state->detect_zeroes;
4370  
4371      if (reopen_state->replace_backing_bs) {
4372          qdict_del(bs->explicit_options, "backing");
4373          qdict_del(bs->options, "backing");
4374      }
4375  
4376      /* Remove child references from bs->options and bs->explicit_options.
4377       * Child options were already removed in bdrv_reopen_queue_child() */
4378      QLIST_FOREACH(child, &bs->children, next) {
4379          qdict_del(bs->explicit_options, child->name);
4380          qdict_del(bs->options, child->name);
4381      }
4382  
4383      /*
4384       * Change the backing file if a new one was specified. We do this
4385       * after updating bs->options, so bdrv_refresh_filename() (called
4386       * from bdrv_set_backing_hd()) has the new values.
4387       */
4388      if (reopen_state->replace_backing_bs) {
4389          BlockDriverState *old_backing_bs = child_bs(bs->backing);
4390          assert(!old_backing_bs || !old_backing_bs->implicit);
4391          /* Abort the permission update on the backing bs we're detaching */
4392          if (old_backing_bs) {
4393              bdrv_abort_perm_update(old_backing_bs);
4394          }
4395          bdrv_set_backing_hd(bs, reopen_state->new_backing_bs, &error_abort);
4396      }
4397  
4398      bdrv_refresh_limits(bs, NULL);
4399  }
4400  
4401  /*
4402   * Abort the reopen, and delete and free the staged changes in
4403   * reopen_state
4404   */
4405  void bdrv_reopen_abort(BDRVReopenState *reopen_state)
4406  {
4407      BlockDriver *drv;
4408  
4409      assert(reopen_state != NULL);
4410      drv = reopen_state->bs->drv;
4411      assert(drv != NULL);
4412  
4413      if (drv->bdrv_reopen_abort) {
4414          drv->bdrv_reopen_abort(reopen_state);
4415      }
4416  }
4417  
4418  
4419  static void bdrv_close(BlockDriverState *bs)
4420  {
4421      BdrvAioNotifier *ban, *ban_next;
4422      BdrvChild *child, *next;
4423  
4424      assert(!bs->refcnt);
4425  
4426      bdrv_drained_begin(bs); /* complete I/O */
4427      bdrv_flush(bs);
4428      bdrv_drain(bs); /* in case flush left pending I/O */
4429  
4430      if (bs->drv) {
4431          if (bs->drv->bdrv_close) {
4432              /* Must unfreeze all children, so bdrv_unref_child() works */
4433              bs->drv->bdrv_close(bs);
4434          }
4435          bs->drv = NULL;
4436      }
4437  
4438      QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
4439          bdrv_unref_child(bs, child);
4440      }
4441  
4442      bs->backing = NULL;
4443      bs->file = NULL;
4444      g_free(bs->opaque);
4445      bs->opaque = NULL;
4446      qatomic_set(&bs->copy_on_read, 0);
4447      bs->backing_file[0] = '\0';
4448      bs->backing_format[0] = '\0';
4449      bs->total_sectors = 0;
4450      bs->encrypted = false;
4451      bs->sg = false;
4452      qobject_unref(bs->options);
4453      qobject_unref(bs->explicit_options);
4454      bs->options = NULL;
4455      bs->explicit_options = NULL;
4456      qobject_unref(bs->full_open_options);
4457      bs->full_open_options = NULL;
4458  
4459      bdrv_release_named_dirty_bitmaps(bs);
4460      assert(QLIST_EMPTY(&bs->dirty_bitmaps));
4461  
4462      QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
4463          g_free(ban);
4464      }
4465      QLIST_INIT(&bs->aio_notifiers);
4466      bdrv_drained_end(bs);
4467  
4468      /*
4469       * If we're still inside some bdrv_drain_all_begin()/end() sections, end
4470       * them now since this BDS won't exist anymore when bdrv_drain_all_end()
4471       * gets called.
4472       */
4473      if (bs->quiesce_counter) {
4474          bdrv_drain_all_end_quiesce(bs);
4475      }
4476  }
4477  
4478  void bdrv_close_all(void)
4479  {
4480      assert(job_next(NULL) == NULL);
4481      blk_exp_close_all();
4482  
4483      /* Drop references from requests still in flight, such as canceled block
4484       * jobs whose AIO context has not been polled yet */
4485      bdrv_drain_all();
4486  
4487      blk_remove_all_bs();
4488      blockdev_close_all_bdrv_states();
4489  
4490      assert(QTAILQ_EMPTY(&all_bdrv_states));
4491  }
4492  
4493  static bool should_update_child(BdrvChild *c, BlockDriverState *to)
4494  {
4495      GQueue *queue;
4496      GHashTable *found;
4497      bool ret;
4498  
4499      if (c->klass->stay_at_node) {
4500          return false;
4501      }
4502  
4503      /* If the child @c belongs to the BDS @to, replacing the current
4504       * c->bs by @to would mean to create a loop.
4505       *
4506       * Such a case occurs when appending a BDS to a backing chain.
4507       * For instance, imagine the following chain:
4508       *
4509       *   guest device -> node A -> further backing chain...
4510       *
4511       * Now we create a new BDS B which we want to put on top of this
4512       * chain, so we first attach A as its backing node:
4513       *
4514       *                   node B
4515       *                     |
4516       *                     v
4517       *   guest device -> node A -> further backing chain...
4518       *
4519       * Finally we want to replace A by B.  When doing that, we want to
4520       * replace all pointers to A by pointers to B -- except for the
4521       * pointer from B because (1) that would create a loop, and (2)
4522       * that pointer should simply stay intact:
4523       *
4524       *   guest device -> node B
4525       *                     |
4526       *                     v
4527       *                   node A -> further backing chain...
4528       *
4529       * In general, when replacing a node A (c->bs) by a node B (@to),
4530       * if A is a child of B, that means we cannot replace A by B there
4531       * because that would create a loop.  Silently detaching A from B
4532       * is also not really an option.  So overall just leaving A in
4533       * place there is the most sensible choice.
4534       *
4535       * We would also create a loop in any cases where @c is only
4536       * indirectly referenced by @to. Prevent this by returning false
4537       * if @c is found (by breadth-first search) anywhere in the whole
4538       * subtree of @to.
4539       */
4540  
4541      ret = true;
4542      found = g_hash_table_new(NULL, NULL);
4543      g_hash_table_add(found, to);
4544      queue = g_queue_new();
4545      g_queue_push_tail(queue, to);
4546  
4547      while (!g_queue_is_empty(queue)) {
4548          BlockDriverState *v = g_queue_pop_head(queue);
4549          BdrvChild *c2;
4550  
4551          QLIST_FOREACH(c2, &v->children, next) {
4552              if (c2 == c) {
4553                  ret = false;
4554                  break;
4555              }
4556  
4557              if (g_hash_table_contains(found, c2->bs)) {
4558                  continue;
4559              }
4560  
4561              g_queue_push_tail(queue, c2->bs);
4562              g_hash_table_add(found, c2->bs);
4563          }
4564      }
4565  
4566      g_queue_free(queue);
4567      g_hash_table_destroy(found);
4568  
4569      return ret;
4570  }
4571  
4572  /*
4573   * With auto_skip=true bdrv_replace_node_common skips updating from parents
4574   * if it creates a parent-child relation loop or if parent is block-job.
4575   *
4576   * With auto_skip=false the error is returned if from has a parent which should
4577   * not be updated.
4578   */
4579  static void bdrv_replace_node_common(BlockDriverState *from,
4580                                       BlockDriverState *to,
4581                                       bool auto_skip, Error **errp)
4582  {
4583      BdrvChild *c, *next;
4584      GSList *list = NULL, *p;
4585      uint64_t perm = 0, shared = BLK_PERM_ALL;
4586      int ret;
4587  
4588      /* Make sure that @from doesn't go away until we have successfully attached
4589       * all of its parents to @to. */
4590      bdrv_ref(from);
4591  
4592      assert(qemu_get_current_aio_context() == qemu_get_aio_context());
4593      assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
4594      bdrv_drained_begin(from);
4595  
4596      /* Put all parents into @list and calculate their cumulative permissions */
4597      QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
4598          assert(c->bs == from);
4599          if (!should_update_child(c, to)) {
4600              if (auto_skip) {
4601                  continue;
4602              }
4603              error_setg(errp, "Should not change '%s' link to '%s'",
4604                         c->name, from->node_name);
4605              goto out;
4606          }
4607          if (c->frozen) {
4608              error_setg(errp, "Cannot change '%s' link to '%s'",
4609                         c->name, from->node_name);
4610              goto out;
4611          }
4612          list = g_slist_prepend(list, c);
4613          perm |= c->perm;
4614          shared &= c->shared_perm;
4615      }
4616  
4617      /* Check whether the required permissions can be granted on @to, ignoring
4618       * all BdrvChild in @list so that they can't block themselves. */
4619      ret = bdrv_check_update_perm(to, NULL, perm, shared, list, NULL, errp);
4620      if (ret < 0) {
4621          bdrv_abort_perm_update(to);
4622          goto out;
4623      }
4624  
4625      /* Now actually perform the change. We performed the permission check for
4626       * all elements of @list at once, so set the permissions all at once at the
4627       * very end. */
4628      for (p = list; p != NULL; p = p->next) {
4629          c = p->data;
4630  
4631          bdrv_ref(to);
4632          bdrv_replace_child_noperm(c, to);
4633          bdrv_unref(from);
4634      }
4635  
4636      bdrv_get_cumulative_perm(to, &perm, &shared);
4637      bdrv_set_perm(to, perm, shared);
4638  
4639  out:
4640      g_slist_free(list);
4641      bdrv_drained_end(from);
4642      bdrv_unref(from);
4643  }
4644  
4645  void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
4646                         Error **errp)
4647  {
4648      return bdrv_replace_node_common(from, to, true, errp);
4649  }
4650  
4651  /*
4652   * Add new bs contents at the top of an image chain while the chain is
4653   * live, while keeping required fields on the top layer.
4654   *
4655   * This will modify the BlockDriverState fields, and swap contents
4656   * between bs_new and bs_top. Both bs_new and bs_top are modified.
4657   *
4658   * bs_new must not be attached to a BlockBackend.
4659   *
4660   * This function does not create any image files.
4661   *
4662   * bdrv_append() takes ownership of a bs_new reference and unrefs it because
4663   * that's what the callers commonly need. bs_new will be referenced by the old
4664   * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
4665   * reference of its own, it must call bdrv_ref().
4666   */
4667  void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
4668                   Error **errp)
4669  {
4670      Error *local_err = NULL;
4671  
4672      bdrv_set_backing_hd(bs_new, bs_top, &local_err);
4673      if (local_err) {
4674          error_propagate(errp, local_err);
4675          goto out;
4676      }
4677  
4678      bdrv_replace_node(bs_top, bs_new, &local_err);
4679      if (local_err) {
4680          error_propagate(errp, local_err);
4681          bdrv_set_backing_hd(bs_new, NULL, &error_abort);
4682          goto out;
4683      }
4684  
4685      /* bs_new is now referenced by its new parents, we don't need the
4686       * additional reference any more. */
4687  out:
4688      bdrv_unref(bs_new);
4689  }
4690  
4691  static void bdrv_delete(BlockDriverState *bs)
4692  {
4693      assert(bdrv_op_blocker_is_empty(bs));
4694      assert(!bs->refcnt);
4695  
4696      /* remove from list, if necessary */
4697      if (bs->node_name[0] != '\0') {
4698          QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
4699      }
4700      QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
4701  
4702      bdrv_close(bs);
4703  
4704      g_free(bs);
4705  }
4706  
4707  /*
4708   * Run consistency checks on an image
4709   *
4710   * Returns 0 if the check could be completed (it doesn't mean that the image is
4711   * free of errors) or -errno when an internal error occurred. The results of the
4712   * check are stored in res.
4713   */
4714  int coroutine_fn bdrv_co_check(BlockDriverState *bs,
4715                                 BdrvCheckResult *res, BdrvCheckMode fix)
4716  {
4717      if (bs->drv == NULL) {
4718          return -ENOMEDIUM;
4719      }
4720      if (bs->drv->bdrv_co_check == NULL) {
4721          return -ENOTSUP;
4722      }
4723  
4724      memset(res, 0, sizeof(*res));
4725      return bs->drv->bdrv_co_check(bs, res, fix);
4726  }
4727  
4728  /*
4729   * Return values:
4730   * 0        - success
4731   * -EINVAL  - backing format specified, but no file
4732   * -ENOSPC  - can't update the backing file because no space is left in the
4733   *            image file header
4734   * -ENOTSUP - format driver doesn't support changing the backing file
4735   */
4736  int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
4737                               const char *backing_fmt, bool warn)
4738  {
4739      BlockDriver *drv = bs->drv;
4740      int ret;
4741  
4742      if (!drv) {
4743          return -ENOMEDIUM;
4744      }
4745  
4746      /* Backing file format doesn't make sense without a backing file */
4747      if (backing_fmt && !backing_file) {
4748          return -EINVAL;
4749      }
4750  
4751      if (warn && backing_file && !backing_fmt) {
4752          warn_report("Deprecated use of backing file without explicit "
4753                      "backing format, use of this image requires "
4754                      "potentially unsafe format probing");
4755      }
4756  
4757      if (drv->bdrv_change_backing_file != NULL) {
4758          ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
4759      } else {
4760          ret = -ENOTSUP;
4761      }
4762  
4763      if (ret == 0) {
4764          pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
4765          pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
4766          pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
4767                  backing_file ?: "");
4768      }
4769      return ret;
4770  }
4771  
4772  /*
4773   * Finds the first non-filter node above bs in the chain between
4774   * active and bs.  The returned node is either an immediate parent of
4775   * bs, or there are only filter nodes between the two.
4776   *
4777   * Returns NULL if bs is not found in active's image chain,
4778   * or if active == bs.
4779   *
4780   * Returns the bottommost base image if bs == NULL.
4781   */
4782  BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
4783                                      BlockDriverState *bs)
4784  {
4785      bs = bdrv_skip_filters(bs);
4786      active = bdrv_skip_filters(active);
4787  
4788      while (active) {
4789          BlockDriverState *next = bdrv_backing_chain_next(active);
4790          if (bs == next) {
4791              return active;
4792          }
4793          active = next;
4794      }
4795  
4796      return NULL;
4797  }
4798  
4799  /* Given a BDS, searches for the base layer. */
4800  BlockDriverState *bdrv_find_base(BlockDriverState *bs)
4801  {
4802      return bdrv_find_overlay(bs, NULL);
4803  }
4804  
4805  /*
4806   * Return true if at least one of the COW (backing) and filter links
4807   * between @bs and @base is frozen. @errp is set if that's the case.
4808   * @base must be reachable from @bs, or NULL.
4809   */
4810  bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
4811                                    Error **errp)
4812  {
4813      BlockDriverState *i;
4814      BdrvChild *child;
4815  
4816      for (i = bs; i != base; i = child_bs(child)) {
4817          child = bdrv_filter_or_cow_child(i);
4818  
4819          if (child && child->frozen) {
4820              error_setg(errp, "Cannot change '%s' link from '%s' to '%s'",
4821                         child->name, i->node_name, child->bs->node_name);
4822              return true;
4823          }
4824      }
4825  
4826      return false;
4827  }
4828  
4829  /*
4830   * Freeze all COW (backing) and filter links between @bs and @base.
4831   * If any of the links is already frozen the operation is aborted and
4832   * none of the links are modified.
4833   * @base must be reachable from @bs, or NULL.
4834   * Returns 0 on success. On failure returns < 0 and sets @errp.
4835   */
4836  int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
4837                                Error **errp)
4838  {
4839      BlockDriverState *i;
4840      BdrvChild *child;
4841  
4842      if (bdrv_is_backing_chain_frozen(bs, base, errp)) {
4843          return -EPERM;
4844      }
4845  
4846      for (i = bs; i != base; i = child_bs(child)) {
4847          child = bdrv_filter_or_cow_child(i);
4848          if (child && child->bs->never_freeze) {
4849              error_setg(errp, "Cannot freeze '%s' link to '%s'",
4850                         child->name, child->bs->node_name);
4851              return -EPERM;
4852          }
4853      }
4854  
4855      for (i = bs; i != base; i = child_bs(child)) {
4856          child = bdrv_filter_or_cow_child(i);
4857          if (child) {
4858              child->frozen = true;
4859          }
4860      }
4861  
4862      return 0;
4863  }
4864  
4865  /*
4866   * Unfreeze all COW (backing) and filter links between @bs and @base.
4867   * The caller must ensure that all links are frozen before using this
4868   * function.
4869   * @base must be reachable from @bs, or NULL.
4870   */
4871  void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base)
4872  {
4873      BlockDriverState *i;
4874      BdrvChild *child;
4875  
4876      for (i = bs; i != base; i = child_bs(child)) {
4877          child = bdrv_filter_or_cow_child(i);
4878          if (child) {
4879              assert(child->frozen);
4880              child->frozen = false;
4881          }
4882      }
4883  }
4884  
4885  /*
4886   * Drops images above 'base' up to and including 'top', and sets the image
4887   * above 'top' to have base as its backing file.
4888   *
4889   * Requires that the overlay to 'top' is opened r/w, so that the backing file
4890   * information in 'bs' can be properly updated.
4891   *
4892   * E.g., this will convert the following chain:
4893   * bottom <- base <- intermediate <- top <- active
4894   *
4895   * to
4896   *
4897   * bottom <- base <- active
4898   *
4899   * It is allowed for bottom==base, in which case it converts:
4900   *
4901   * base <- intermediate <- top <- active
4902   *
4903   * to
4904   *
4905   * base <- active
4906   *
4907   * If backing_file_str is non-NULL, it will be used when modifying top's
4908   * overlay image metadata.
4909   *
4910   * Error conditions:
4911   *  if active == top, that is considered an error
4912   *
4913   */
4914  int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
4915                             const char *backing_file_str)
4916  {
4917      BlockDriverState *explicit_top = top;
4918      bool update_inherits_from;
4919      BdrvChild *c;
4920      Error *local_err = NULL;
4921      int ret = -EIO;
4922      g_autoptr(GSList) updated_children = NULL;
4923      GSList *p;
4924  
4925      bdrv_ref(top);
4926      bdrv_subtree_drained_begin(top);
4927  
4928      if (!top->drv || !base->drv) {
4929          goto exit;
4930      }
4931  
4932      /* Make sure that base is in the backing chain of top */
4933      if (!bdrv_chain_contains(top, base)) {
4934          goto exit;
4935      }
4936  
4937      /* If 'base' recursively inherits from 'top' then we should set
4938       * base->inherits_from to top->inherits_from after 'top' and all
4939       * other intermediate nodes have been dropped.
4940       * If 'top' is an implicit node (e.g. "commit_top") we should skip
4941       * it because no one inherits from it. We use explicit_top for that. */
4942      explicit_top = bdrv_skip_implicit_filters(explicit_top);
4943      update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top);
4944  
4945      /* success - we can delete the intermediate states, and link top->base */
4946      /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once
4947       * we've figured out how they should work. */
4948      if (!backing_file_str) {
4949          bdrv_refresh_filename(base);
4950          backing_file_str = base->filename;
4951      }
4952  
4953      QLIST_FOREACH(c, &top->parents, next_parent) {
4954          updated_children = g_slist_prepend(updated_children, c);
4955      }
4956  
4957      bdrv_replace_node_common(top, base, false, &local_err);
4958      if (local_err) {
4959          error_report_err(local_err);
4960          goto exit;
4961      }
4962  
4963      for (p = updated_children; p; p = p->next) {
4964          c = p->data;
4965  
4966          if (c->klass->update_filename) {
4967              ret = c->klass->update_filename(c, base, backing_file_str,
4968                                              &local_err);
4969              if (ret < 0) {
4970                  /*
4971                   * TODO: Actually, we want to rollback all previous iterations
4972                   * of this loop, and (which is almost impossible) previous
4973                   * bdrv_replace_node()...
4974                   *
4975                   * Note, that c->klass->update_filename may lead to permission
4976                   * update, so it's a bad idea to call it inside permission
4977                   * update transaction of bdrv_replace_node.
4978                   */
4979                  error_report_err(local_err);
4980                  goto exit;
4981              }
4982          }
4983      }
4984  
4985      if (update_inherits_from) {
4986          base->inherits_from = explicit_top->inherits_from;
4987      }
4988  
4989      ret = 0;
4990  exit:
4991      bdrv_subtree_drained_end(top);
4992      bdrv_unref(top);
4993      return ret;
4994  }
4995  
4996  /**
4997   * Implementation of BlockDriver.bdrv_get_allocated_file_size() that
4998   * sums the size of all data-bearing children.  (This excludes backing
4999   * children.)
5000   */
5001  static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs)
5002  {
5003      BdrvChild *child;
5004      int64_t child_size, sum = 0;
5005  
5006      QLIST_FOREACH(child, &bs->children, next) {
5007          if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
5008                             BDRV_CHILD_FILTERED))
5009          {
5010              child_size = bdrv_get_allocated_file_size(child->bs);
5011              if (child_size < 0) {
5012                  return child_size;
5013              }
5014              sum += child_size;
5015          }
5016      }
5017  
5018      return sum;
5019  }
5020  
5021  /**
5022   * Length of a allocated file in bytes. Sparse files are counted by actual
5023   * allocated space. Return < 0 if error or unknown.
5024   */
5025  int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
5026  {
5027      BlockDriver *drv = bs->drv;
5028      if (!drv) {
5029          return -ENOMEDIUM;
5030      }
5031      if (drv->bdrv_get_allocated_file_size) {
5032          return drv->bdrv_get_allocated_file_size(bs);
5033      }
5034  
5035      if (drv->bdrv_file_open) {
5036          /*
5037           * Protocol drivers default to -ENOTSUP (most of their data is
5038           * not stored in any of their children (if they even have any),
5039           * so there is no generic way to figure it out).
5040           */
5041          return -ENOTSUP;
5042      } else if (drv->is_filter) {
5043          /* Filter drivers default to the size of their filtered child */
5044          return bdrv_get_allocated_file_size(bdrv_filter_bs(bs));
5045      } else {
5046          /* Other drivers default to summing their children's sizes */
5047          return bdrv_sum_allocated_file_size(bs);
5048      }
5049  }
5050  
5051  /*
5052   * bdrv_measure:
5053   * @drv: Format driver
5054   * @opts: Creation options for new image
5055   * @in_bs: Existing image containing data for new image (may be NULL)
5056   * @errp: Error object
5057   * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo())
5058   *          or NULL on error
5059   *
5060   * Calculate file size required to create a new image.
5061   *
5062   * If @in_bs is given then space for allocated clusters and zero clusters
5063   * from that image are included in the calculation.  If @opts contains a
5064   * backing file that is shared by @in_bs then backing clusters may be omitted
5065   * from the calculation.
5066   *
5067   * If @in_bs is NULL then the calculation includes no allocated clusters
5068   * unless a preallocation option is given in @opts.
5069   *
5070   * Note that @in_bs may use a different BlockDriver from @drv.
5071   *
5072   * If an error occurs the @errp pointer is set.
5073   */
5074  BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
5075                                 BlockDriverState *in_bs, Error **errp)
5076  {
5077      if (!drv->bdrv_measure) {
5078          error_setg(errp, "Block driver '%s' does not support size measurement",
5079                     drv->format_name);
5080          return NULL;
5081      }
5082  
5083      return drv->bdrv_measure(opts, in_bs, errp);
5084  }
5085  
5086  /**
5087   * Return number of sectors on success, -errno on error.
5088   */
5089  int64_t bdrv_nb_sectors(BlockDriverState *bs)
5090  {
5091      BlockDriver *drv = bs->drv;
5092  
5093      if (!drv)
5094          return -ENOMEDIUM;
5095  
5096      if (drv->has_variable_length) {
5097          int ret = refresh_total_sectors(bs, bs->total_sectors);
5098          if (ret < 0) {
5099              return ret;
5100          }
5101      }
5102      return bs->total_sectors;
5103  }
5104  
5105  /**
5106   * Return length in bytes on success, -errno on error.
5107   * The length is always a multiple of BDRV_SECTOR_SIZE.
5108   */
5109  int64_t bdrv_getlength(BlockDriverState *bs)
5110  {
5111      int64_t ret = bdrv_nb_sectors(bs);
5112  
5113      if (ret < 0) {
5114          return ret;
5115      }
5116      if (ret > INT64_MAX / BDRV_SECTOR_SIZE) {
5117          return -EFBIG;
5118      }
5119      return ret * BDRV_SECTOR_SIZE;
5120  }
5121  
5122  /* return 0 as number of sectors if no device present or error */
5123  void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
5124  {
5125      int64_t nb_sectors = bdrv_nb_sectors(bs);
5126  
5127      *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
5128  }
5129  
5130  bool bdrv_is_sg(BlockDriverState *bs)
5131  {
5132      return bs->sg;
5133  }
5134  
5135  /**
5136   * Return whether the given node supports compressed writes.
5137   */
5138  bool bdrv_supports_compressed_writes(BlockDriverState *bs)
5139  {
5140      BlockDriverState *filtered;
5141  
5142      if (!bs->drv || !block_driver_can_compress(bs->drv)) {
5143          return false;
5144      }
5145  
5146      filtered = bdrv_filter_bs(bs);
5147      if (filtered) {
5148          /*
5149           * Filters can only forward compressed writes, so we have to
5150           * check the child.
5151           */
5152          return bdrv_supports_compressed_writes(filtered);
5153      }
5154  
5155      return true;
5156  }
5157  
5158  const char *bdrv_get_format_name(BlockDriverState *bs)
5159  {
5160      return bs->drv ? bs->drv->format_name : NULL;
5161  }
5162  
5163  static int qsort_strcmp(const void *a, const void *b)
5164  {
5165      return strcmp(*(char *const *)a, *(char *const *)b);
5166  }
5167  
5168  void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
5169                           void *opaque, bool read_only)
5170  {
5171      BlockDriver *drv;
5172      int count = 0;
5173      int i;
5174      const char **formats = NULL;
5175  
5176      QLIST_FOREACH(drv, &bdrv_drivers, list) {
5177          if (drv->format_name) {
5178              bool found = false;
5179              int i = count;
5180  
5181              if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, read_only)) {
5182                  continue;
5183              }
5184  
5185              while (formats && i && !found) {
5186                  found = !strcmp(formats[--i], drv->format_name);
5187              }
5188  
5189              if (!found) {
5190                  formats = g_renew(const char *, formats, count + 1);
5191                  formats[count++] = drv->format_name;
5192              }
5193          }
5194      }
5195  
5196      for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) {
5197          const char *format_name = block_driver_modules[i].format_name;
5198  
5199          if (format_name) {
5200              bool found = false;
5201              int j = count;
5202  
5203              if (use_bdrv_whitelist &&
5204                  !bdrv_format_is_whitelisted(format_name, read_only)) {
5205                  continue;
5206              }
5207  
5208              while (formats && j && !found) {
5209                  found = !strcmp(formats[--j], format_name);
5210              }
5211  
5212              if (!found) {
5213                  formats = g_renew(const char *, formats, count + 1);
5214                  formats[count++] = format_name;
5215              }
5216          }
5217      }
5218  
5219      qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
5220  
5221      for (i = 0; i < count; i++) {
5222          it(opaque, formats[i]);
5223      }
5224  
5225      g_free(formats);
5226  }
5227  
5228  /* This function is to find a node in the bs graph */
5229  BlockDriverState *bdrv_find_node(const char *node_name)
5230  {
5231      BlockDriverState *bs;
5232  
5233      assert(node_name);
5234  
5235      QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
5236          if (!strcmp(node_name, bs->node_name)) {
5237              return bs;
5238          }
5239      }
5240      return NULL;
5241  }
5242  
5243  /* Put this QMP function here so it can access the static graph_bdrv_states. */
5244  BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
5245                                             Error **errp)
5246  {
5247      BlockDeviceInfoList *list;
5248      BlockDriverState *bs;
5249  
5250      list = NULL;
5251      QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
5252          BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp);
5253          if (!info) {
5254              qapi_free_BlockDeviceInfoList(list);
5255              return NULL;
5256          }
5257          QAPI_LIST_PREPEND(list, info);
5258      }
5259  
5260      return list;
5261  }
5262  
5263  typedef struct XDbgBlockGraphConstructor {
5264      XDbgBlockGraph *graph;
5265      GHashTable *graph_nodes;
5266  } XDbgBlockGraphConstructor;
5267  
5268  static XDbgBlockGraphConstructor *xdbg_graph_new(void)
5269  {
5270      XDbgBlockGraphConstructor *gr = g_new(XDbgBlockGraphConstructor, 1);
5271  
5272      gr->graph = g_new0(XDbgBlockGraph, 1);
5273      gr->graph_nodes = g_hash_table_new(NULL, NULL);
5274  
5275      return gr;
5276  }
5277  
5278  static XDbgBlockGraph *xdbg_graph_finalize(XDbgBlockGraphConstructor *gr)
5279  {
5280      XDbgBlockGraph *graph = gr->graph;
5281  
5282      g_hash_table_destroy(gr->graph_nodes);
5283      g_free(gr);
5284  
5285      return graph;
5286  }
5287  
5288  static uintptr_t xdbg_graph_node_num(XDbgBlockGraphConstructor *gr, void *node)
5289  {
5290      uintptr_t ret = (uintptr_t)g_hash_table_lookup(gr->graph_nodes, node);
5291  
5292      if (ret != 0) {
5293          return ret;
5294      }
5295  
5296      /*
5297       * Start counting from 1, not 0, because 0 interferes with not-found (NULL)
5298       * answer of g_hash_table_lookup.
5299       */
5300      ret = g_hash_table_size(gr->graph_nodes) + 1;
5301      g_hash_table_insert(gr->graph_nodes, node, (void *)ret);
5302  
5303      return ret;
5304  }
5305  
5306  static void xdbg_graph_add_node(XDbgBlockGraphConstructor *gr, void *node,
5307                                  XDbgBlockGraphNodeType type, const char *name)
5308  {
5309      XDbgBlockGraphNode *n;
5310  
5311      n = g_new0(XDbgBlockGraphNode, 1);
5312  
5313      n->id = xdbg_graph_node_num(gr, node);
5314      n->type = type;
5315      n->name = g_strdup(name);
5316  
5317      QAPI_LIST_PREPEND(gr->graph->nodes, n);
5318  }
5319  
5320  static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent,
5321                                  const BdrvChild *child)
5322  {
5323      BlockPermission qapi_perm;
5324      XDbgBlockGraphEdge *edge;
5325  
5326      edge = g_new0(XDbgBlockGraphEdge, 1);
5327  
5328      edge->parent = xdbg_graph_node_num(gr, parent);
5329      edge->child = xdbg_graph_node_num(gr, child->bs);
5330      edge->name = g_strdup(child->name);
5331  
5332      for (qapi_perm = 0; qapi_perm < BLOCK_PERMISSION__MAX; qapi_perm++) {
5333          uint64_t flag = bdrv_qapi_perm_to_blk_perm(qapi_perm);
5334  
5335          if (flag & child->perm) {
5336              QAPI_LIST_PREPEND(edge->perm, qapi_perm);
5337          }
5338          if (flag & child->shared_perm) {
5339              QAPI_LIST_PREPEND(edge->shared_perm, qapi_perm);
5340          }
5341      }
5342  
5343      QAPI_LIST_PREPEND(gr->graph->edges, edge);
5344  }
5345  
5346  
5347  XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp)
5348  {
5349      BlockBackend *blk;
5350      BlockJob *job;
5351      BlockDriverState *bs;
5352      BdrvChild *child;
5353      XDbgBlockGraphConstructor *gr = xdbg_graph_new();
5354  
5355      for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
5356          char *allocated_name = NULL;
5357          const char *name = blk_name(blk);
5358  
5359          if (!*name) {
5360              name = allocated_name = blk_get_attached_dev_id(blk);
5361          }
5362          xdbg_graph_add_node(gr, blk, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND,
5363                             name);
5364          g_free(allocated_name);
5365          if (blk_root(blk)) {
5366              xdbg_graph_add_edge(gr, blk, blk_root(blk));
5367          }
5368      }
5369  
5370      for (job = block_job_next(NULL); job; job = block_job_next(job)) {
5371          GSList *el;
5372  
5373          xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB,
5374                             job->job.id);
5375          for (el = job->nodes; el; el = el->next) {
5376              xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data);
5377          }
5378      }
5379  
5380      QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
5381          xdbg_graph_add_node(gr, bs, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_DRIVER,
5382                             bs->node_name);
5383          QLIST_FOREACH(child, &bs->children, next) {
5384              xdbg_graph_add_edge(gr, bs, child);
5385          }
5386      }
5387  
5388      return xdbg_graph_finalize(gr);
5389  }
5390  
5391  BlockDriverState *bdrv_lookup_bs(const char *device,
5392                                   const char *node_name,
5393                                   Error **errp)
5394  {
5395      BlockBackend *blk;
5396      BlockDriverState *bs;
5397  
5398      if (device) {
5399          blk = blk_by_name(device);
5400  
5401          if (blk) {
5402              bs = blk_bs(blk);
5403              if (!bs) {
5404                  error_setg(errp, "Device '%s' has no medium", device);
5405              }
5406  
5407              return bs;
5408          }
5409      }
5410  
5411      if (node_name) {
5412          bs = bdrv_find_node(node_name);
5413  
5414          if (bs) {
5415              return bs;
5416          }
5417      }
5418  
5419      error_setg(errp, "Cannot find device=%s nor node_name=%s",
5420                       device ? device : "",
5421                       node_name ? node_name : "");
5422      return NULL;
5423  }
5424  
5425  /* If 'base' is in the same chain as 'top', return true. Otherwise,
5426   * return false.  If either argument is NULL, return false. */
5427  bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
5428  {
5429      while (top && top != base) {
5430          top = bdrv_filter_or_cow_bs(top);
5431      }
5432  
5433      return top != NULL;
5434  }
5435  
5436  BlockDriverState *bdrv_next_node(BlockDriverState *bs)
5437  {
5438      if (!bs) {
5439          return QTAILQ_FIRST(&graph_bdrv_states);
5440      }
5441      return QTAILQ_NEXT(bs, node_list);
5442  }
5443  
5444  BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
5445  {
5446      if (!bs) {
5447          return QTAILQ_FIRST(&all_bdrv_states);
5448      }
5449      return QTAILQ_NEXT(bs, bs_list);
5450  }
5451  
5452  const char *bdrv_get_node_name(const BlockDriverState *bs)
5453  {
5454      return bs->node_name;
5455  }
5456  
5457  const char *bdrv_get_parent_name(const BlockDriverState *bs)
5458  {
5459      BdrvChild *c;
5460      const char *name;
5461  
5462      /* If multiple parents have a name, just pick the first one. */
5463      QLIST_FOREACH(c, &bs->parents, next_parent) {
5464          if (c->klass->get_name) {
5465              name = c->klass->get_name(c);
5466              if (name && *name) {
5467                  return name;
5468              }
5469          }
5470      }
5471  
5472      return NULL;
5473  }
5474  
5475  /* TODO check what callers really want: bs->node_name or blk_name() */
5476  const char *bdrv_get_device_name(const BlockDriverState *bs)
5477  {
5478      return bdrv_get_parent_name(bs) ?: "";
5479  }
5480  
5481  /* This can be used to identify nodes that might not have a device
5482   * name associated. Since node and device names live in the same
5483   * namespace, the result is unambiguous. The exception is if both are
5484   * absent, then this returns an empty (non-null) string. */
5485  const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
5486  {
5487      return bdrv_get_parent_name(bs) ?: bs->node_name;
5488  }
5489  
5490  int bdrv_get_flags(BlockDriverState *bs)
5491  {
5492      return bs->open_flags;
5493  }
5494  
5495  int bdrv_has_zero_init_1(BlockDriverState *bs)
5496  {
5497      return 1;
5498  }
5499  
5500  int bdrv_has_zero_init(BlockDriverState *bs)
5501  {
5502      BlockDriverState *filtered;
5503  
5504      if (!bs->drv) {
5505          return 0;
5506      }
5507  
5508      /* If BS is a copy on write image, it is initialized to
5509         the contents of the base image, which may not be zeroes.  */
5510      if (bdrv_cow_child(bs)) {
5511          return 0;
5512      }
5513      if (bs->drv->bdrv_has_zero_init) {
5514          return bs->drv->bdrv_has_zero_init(bs);
5515      }
5516  
5517      filtered = bdrv_filter_bs(bs);
5518      if (filtered) {
5519          return bdrv_has_zero_init(filtered);
5520      }
5521  
5522      /* safe default */
5523      return 0;
5524  }
5525  
5526  bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
5527  {
5528      if (!(bs->open_flags & BDRV_O_UNMAP)) {
5529          return false;
5530      }
5531  
5532      return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP;
5533  }
5534  
5535  void bdrv_get_backing_filename(BlockDriverState *bs,
5536                                 char *filename, int filename_size)
5537  {
5538      pstrcpy(filename, filename_size, bs->backing_file);
5539  }
5540  
5541  int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
5542  {
5543      int ret;
5544      BlockDriver *drv = bs->drv;
5545      /* if bs->drv == NULL, bs is closed, so there's nothing to do here */
5546      if (!drv) {
5547          return -ENOMEDIUM;
5548      }
5549      if (!drv->bdrv_get_info) {
5550          BlockDriverState *filtered = bdrv_filter_bs(bs);
5551          if (filtered) {
5552              return bdrv_get_info(filtered, bdi);
5553          }
5554          return -ENOTSUP;
5555      }
5556      memset(bdi, 0, sizeof(*bdi));
5557      ret = drv->bdrv_get_info(bs, bdi);
5558      if (ret < 0) {
5559          return ret;
5560      }
5561  
5562      if (bdi->cluster_size > BDRV_MAX_ALIGNMENT) {
5563          return -EINVAL;
5564      }
5565  
5566      return 0;
5567  }
5568  
5569  ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
5570                                            Error **errp)
5571  {
5572      BlockDriver *drv = bs->drv;
5573      if (drv && drv->bdrv_get_specific_info) {
5574          return drv->bdrv_get_specific_info(bs, errp);
5575      }
5576      return NULL;
5577  }
5578  
5579  BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs)
5580  {
5581      BlockDriver *drv = bs->drv;
5582      if (!drv || !drv->bdrv_get_specific_stats) {
5583          return NULL;
5584      }
5585      return drv->bdrv_get_specific_stats(bs);
5586  }
5587  
5588  void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
5589  {
5590      if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
5591          return;
5592      }
5593  
5594      bs->drv->bdrv_debug_event(bs, event);
5595  }
5596  
5597  static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs)
5598  {
5599      while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
5600          bs = bdrv_primary_bs(bs);
5601      }
5602  
5603      if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
5604          assert(bs->drv->bdrv_debug_remove_breakpoint);
5605          return bs;
5606      }
5607  
5608      return NULL;
5609  }
5610  
5611  int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
5612                            const char *tag)
5613  {
5614      bs = bdrv_find_debug_node(bs);
5615      if (bs) {
5616          return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
5617      }
5618  
5619      return -ENOTSUP;
5620  }
5621  
5622  int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
5623  {
5624      bs = bdrv_find_debug_node(bs);
5625      if (bs) {
5626          return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
5627      }
5628  
5629      return -ENOTSUP;
5630  }
5631  
5632  int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
5633  {
5634      while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
5635          bs = bdrv_primary_bs(bs);
5636      }
5637  
5638      if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
5639          return bs->drv->bdrv_debug_resume(bs, tag);
5640      }
5641  
5642      return -ENOTSUP;
5643  }
5644  
5645  bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
5646  {
5647      while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
5648          bs = bdrv_primary_bs(bs);
5649      }
5650  
5651      if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
5652          return bs->drv->bdrv_debug_is_suspended(bs, tag);
5653      }
5654  
5655      return false;
5656  }
5657  
5658  /* backing_file can either be relative, or absolute, or a protocol.  If it is
5659   * relative, it must be relative to the chain.  So, passing in bs->filename
5660   * from a BDS as backing_file should not be done, as that may be relative to
5661   * the CWD rather than the chain. */
5662  BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
5663          const char *backing_file)
5664  {
5665      char *filename_full = NULL;
5666      char *backing_file_full = NULL;
5667      char *filename_tmp = NULL;
5668      int is_protocol = 0;
5669      bool filenames_refreshed = false;
5670      BlockDriverState *curr_bs = NULL;
5671      BlockDriverState *retval = NULL;
5672      BlockDriverState *bs_below;
5673  
5674      if (!bs || !bs->drv || !backing_file) {
5675          return NULL;
5676      }
5677  
5678      filename_full     = g_malloc(PATH_MAX);
5679      backing_file_full = g_malloc(PATH_MAX);
5680  
5681      is_protocol = path_has_protocol(backing_file);
5682  
5683      /*
5684       * Being largely a legacy function, skip any filters here
5685       * (because filters do not have normal filenames, so they cannot
5686       * match anyway; and allowing json:{} filenames is a bit out of
5687       * scope).
5688       */
5689      for (curr_bs = bdrv_skip_filters(bs);
5690           bdrv_cow_child(curr_bs) != NULL;
5691           curr_bs = bs_below)
5692      {
5693          bs_below = bdrv_backing_chain_next(curr_bs);
5694  
5695          if (bdrv_backing_overridden(curr_bs)) {
5696              /*
5697               * If the backing file was overridden, we can only compare
5698               * directly against the backing node's filename.
5699               */
5700  
5701              if (!filenames_refreshed) {
5702                  /*
5703                   * This will automatically refresh all of the
5704                   * filenames in the rest of the backing chain, so we
5705                   * only need to do this once.
5706                   */
5707                  bdrv_refresh_filename(bs_below);
5708                  filenames_refreshed = true;
5709              }
5710  
5711              if (strcmp(backing_file, bs_below->filename) == 0) {
5712                  retval = bs_below;
5713                  break;
5714              }
5715          } else if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
5716              /*
5717               * If either of the filename paths is actually a protocol, then
5718               * compare unmodified paths; otherwise make paths relative.
5719               */
5720              char *backing_file_full_ret;
5721  
5722              if (strcmp(backing_file, curr_bs->backing_file) == 0) {
5723                  retval = bs_below;
5724                  break;
5725              }
5726              /* Also check against the full backing filename for the image */
5727              backing_file_full_ret = bdrv_get_full_backing_filename(curr_bs,
5728                                                                     NULL);
5729              if (backing_file_full_ret) {
5730                  bool equal = strcmp(backing_file, backing_file_full_ret) == 0;
5731                  g_free(backing_file_full_ret);
5732                  if (equal) {
5733                      retval = bs_below;
5734                      break;
5735                  }
5736              }
5737          } else {
5738              /* If not an absolute filename path, make it relative to the current
5739               * image's filename path */
5740              filename_tmp = bdrv_make_absolute_filename(curr_bs, backing_file,
5741                                                         NULL);
5742              /* We are going to compare canonicalized absolute pathnames */
5743              if (!filename_tmp || !realpath(filename_tmp, filename_full)) {
5744                  g_free(filename_tmp);
5745                  continue;
5746              }
5747              g_free(filename_tmp);
5748  
5749              /* We need to make sure the backing filename we are comparing against
5750               * is relative to the current image filename (or absolute) */
5751              filename_tmp = bdrv_get_full_backing_filename(curr_bs, NULL);
5752              if (!filename_tmp || !realpath(filename_tmp, backing_file_full)) {
5753                  g_free(filename_tmp);
5754                  continue;
5755              }
5756              g_free(filename_tmp);
5757  
5758              if (strcmp(backing_file_full, filename_full) == 0) {
5759                  retval = bs_below;
5760                  break;
5761              }
5762          }
5763      }
5764  
5765      g_free(filename_full);
5766      g_free(backing_file_full);
5767      return retval;
5768  }
5769  
5770  void bdrv_init(void)
5771  {
5772      module_call_init(MODULE_INIT_BLOCK);
5773  }
5774  
5775  void bdrv_init_with_whitelist(void)
5776  {
5777      use_bdrv_whitelist = 1;
5778      bdrv_init();
5779  }
5780  
5781  int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
5782  {
5783      BdrvChild *child, *parent;
5784      uint64_t perm, shared_perm;
5785      Error *local_err = NULL;
5786      int ret;
5787      BdrvDirtyBitmap *bm;
5788  
5789      if (!bs->drv)  {
5790          return -ENOMEDIUM;
5791      }
5792  
5793      QLIST_FOREACH(child, &bs->children, next) {
5794          bdrv_co_invalidate_cache(child->bs, &local_err);
5795          if (local_err) {
5796              error_propagate(errp, local_err);
5797              return -EINVAL;
5798          }
5799      }
5800  
5801      /*
5802       * Update permissions, they may differ for inactive nodes.
5803       *
5804       * Note that the required permissions of inactive images are always a
5805       * subset of the permissions required after activating the image. This
5806       * allows us to just get the permissions upfront without restricting
5807       * drv->bdrv_invalidate_cache().
5808       *
5809       * It also means that in error cases, we don't have to try and revert to
5810       * the old permissions (which is an operation that could fail, too). We can
5811       * just keep the extended permissions for the next time that an activation
5812       * of the image is tried.
5813       */
5814      if (bs->open_flags & BDRV_O_INACTIVE) {
5815          bs->open_flags &= ~BDRV_O_INACTIVE;
5816          bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
5817          ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, errp);
5818          if (ret < 0) {
5819              bdrv_abort_perm_update(bs);
5820              bs->open_flags |= BDRV_O_INACTIVE;
5821              return ret;
5822          }
5823          bdrv_set_perm(bs, perm, shared_perm);
5824  
5825          if (bs->drv->bdrv_co_invalidate_cache) {
5826              bs->drv->bdrv_co_invalidate_cache(bs, &local_err);
5827              if (local_err) {
5828                  bs->open_flags |= BDRV_O_INACTIVE;
5829                  error_propagate(errp, local_err);
5830                  return -EINVAL;
5831              }
5832          }
5833  
5834          FOR_EACH_DIRTY_BITMAP(bs, bm) {
5835              bdrv_dirty_bitmap_skip_store(bm, false);
5836          }
5837  
5838          ret = refresh_total_sectors(bs, bs->total_sectors);
5839          if (ret < 0) {
5840              bs->open_flags |= BDRV_O_INACTIVE;
5841              error_setg_errno(errp, -ret, "Could not refresh total sector count");
5842              return ret;
5843          }
5844      }
5845  
5846      QLIST_FOREACH(parent, &bs->parents, next_parent) {
5847          if (parent->klass->activate) {
5848              parent->klass->activate(parent, &local_err);
5849              if (local_err) {
5850                  bs->open_flags |= BDRV_O_INACTIVE;
5851                  error_propagate(errp, local_err);
5852                  return -EINVAL;
5853              }
5854          }
5855      }
5856  
5857      return 0;
5858  }
5859  
5860  void bdrv_invalidate_cache_all(Error **errp)
5861  {
5862      BlockDriverState *bs;
5863      BdrvNextIterator it;
5864  
5865      for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
5866          AioContext *aio_context = bdrv_get_aio_context(bs);
5867          int ret;
5868  
5869          aio_context_acquire(aio_context);
5870          ret = bdrv_invalidate_cache(bs, errp);
5871          aio_context_release(aio_context);
5872          if (ret < 0) {
5873              bdrv_next_cleanup(&it);
5874              return;
5875          }
5876      }
5877  }
5878  
5879  static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active)
5880  {
5881      BdrvChild *parent;
5882  
5883      QLIST_FOREACH(parent, &bs->parents, next_parent) {
5884          if (parent->klass->parent_is_bds) {
5885              BlockDriverState *parent_bs = parent->opaque;
5886              if (!only_active || !(parent_bs->open_flags & BDRV_O_INACTIVE)) {
5887                  return true;
5888              }
5889          }
5890      }
5891  
5892      return false;
5893  }
5894  
5895  static int bdrv_inactivate_recurse(BlockDriverState *bs)
5896  {
5897      BdrvChild *child, *parent;
5898      bool tighten_restrictions;
5899      uint64_t perm, shared_perm;
5900      int ret;
5901  
5902      if (!bs->drv) {
5903          return -ENOMEDIUM;
5904      }
5905  
5906      /* Make sure that we don't inactivate a child before its parent.
5907       * It will be covered by recursion from the yet active parent. */
5908      if (bdrv_has_bds_parent(bs, true)) {
5909          return 0;
5910      }
5911  
5912      assert(!(bs->open_flags & BDRV_O_INACTIVE));
5913  
5914      /* Inactivate this node */
5915      if (bs->drv->bdrv_inactivate) {
5916          ret = bs->drv->bdrv_inactivate(bs);
5917          if (ret < 0) {
5918              return ret;
5919          }
5920      }
5921  
5922      QLIST_FOREACH(parent, &bs->parents, next_parent) {
5923          if (parent->klass->inactivate) {
5924              ret = parent->klass->inactivate(parent);
5925              if (ret < 0) {
5926                  return ret;
5927              }
5928          }
5929      }
5930  
5931      bs->open_flags |= BDRV_O_INACTIVE;
5932  
5933      /* Update permissions, they may differ for inactive nodes */
5934      bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
5935      ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL,
5936                            &tighten_restrictions, NULL);
5937      assert(tighten_restrictions == false);
5938      if (ret < 0) {
5939          /* We only tried to loosen restrictions, so errors are not fatal */
5940          bdrv_abort_perm_update(bs);
5941      } else {
5942          bdrv_set_perm(bs, perm, shared_perm);
5943      }
5944  
5945  
5946      /* Recursively inactivate children */
5947      QLIST_FOREACH(child, &bs->children, next) {
5948          ret = bdrv_inactivate_recurse(child->bs);
5949          if (ret < 0) {
5950              return ret;
5951          }
5952      }
5953  
5954      return 0;
5955  }
5956  
5957  int bdrv_inactivate_all(void)
5958  {
5959      BlockDriverState *bs = NULL;
5960      BdrvNextIterator it;
5961      int ret = 0;
5962      GSList *aio_ctxs = NULL, *ctx;
5963  
5964      for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
5965          AioContext *aio_context = bdrv_get_aio_context(bs);
5966  
5967          if (!g_slist_find(aio_ctxs, aio_context)) {
5968              aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
5969              aio_context_acquire(aio_context);
5970          }
5971      }
5972  
5973      for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
5974          /* Nodes with BDS parents are covered by recursion from the last
5975           * parent that gets inactivated. Don't inactivate them a second
5976           * time if that has already happened. */
5977          if (bdrv_has_bds_parent(bs, false)) {
5978              continue;
5979          }
5980          ret = bdrv_inactivate_recurse(bs);
5981          if (ret < 0) {
5982              bdrv_next_cleanup(&it);
5983              goto out;
5984          }
5985      }
5986  
5987  out:
5988      for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
5989          AioContext *aio_context = ctx->data;
5990          aio_context_release(aio_context);
5991      }
5992      g_slist_free(aio_ctxs);
5993  
5994      return ret;
5995  }
5996  
5997  /**************************************************************/
5998  /* removable device support */
5999  
6000  /**
6001   * Return TRUE if the media is present
6002   */
6003  bool bdrv_is_inserted(BlockDriverState *bs)
6004  {
6005      BlockDriver *drv = bs->drv;
6006      BdrvChild *child;
6007  
6008      if (!drv) {
6009          return false;
6010      }
6011      if (drv->bdrv_is_inserted) {
6012          return drv->bdrv_is_inserted(bs);
6013      }
6014      QLIST_FOREACH(child, &bs->children, next) {
6015          if (!bdrv_is_inserted(child->bs)) {
6016              return false;
6017          }
6018      }
6019      return true;
6020  }
6021  
6022  /**
6023   * If eject_flag is TRUE, eject the media. Otherwise, close the tray
6024   */
6025  void bdrv_eject(BlockDriverState *bs, bool eject_flag)
6026  {
6027      BlockDriver *drv = bs->drv;
6028  
6029      if (drv && drv->bdrv_eject) {
6030          drv->bdrv_eject(bs, eject_flag);
6031      }
6032  }
6033  
6034  /**
6035   * Lock or unlock the media (if it is locked, the user won't be able
6036   * to eject it manually).
6037   */
6038  void bdrv_lock_medium(BlockDriverState *bs, bool locked)
6039  {
6040      BlockDriver *drv = bs->drv;
6041  
6042      trace_bdrv_lock_medium(bs, locked);
6043  
6044      if (drv && drv->bdrv_lock_medium) {
6045          drv->bdrv_lock_medium(bs, locked);
6046      }
6047  }
6048  
6049  /* Get a reference to bs */
6050  void bdrv_ref(BlockDriverState *bs)
6051  {
6052      bs->refcnt++;
6053  }
6054  
6055  /* Release a previously grabbed reference to bs.
6056   * If after releasing, reference count is zero, the BlockDriverState is
6057   * deleted. */
6058  void bdrv_unref(BlockDriverState *bs)
6059  {
6060      if (!bs) {
6061          return;
6062      }
6063      assert(bs->refcnt > 0);
6064      if (--bs->refcnt == 0) {
6065          bdrv_delete(bs);
6066      }
6067  }
6068  
6069  struct BdrvOpBlocker {
6070      Error *reason;
6071      QLIST_ENTRY(BdrvOpBlocker) list;
6072  };
6073  
6074  bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
6075  {
6076      BdrvOpBlocker *blocker;
6077      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
6078      if (!QLIST_EMPTY(&bs->op_blockers[op])) {
6079          blocker = QLIST_FIRST(&bs->op_blockers[op]);
6080          error_propagate_prepend(errp, error_copy(blocker->reason),
6081                                  "Node '%s' is busy: ",
6082                                  bdrv_get_device_or_node_name(bs));
6083          return true;
6084      }
6085      return false;
6086  }
6087  
6088  void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
6089  {
6090      BdrvOpBlocker *blocker;
6091      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
6092  
6093      blocker = g_new0(BdrvOpBlocker, 1);
6094      blocker->reason = reason;
6095      QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
6096  }
6097  
6098  void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
6099  {
6100      BdrvOpBlocker *blocker, *next;
6101      assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
6102      QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
6103          if (blocker->reason == reason) {
6104              QLIST_REMOVE(blocker, list);
6105              g_free(blocker);
6106          }
6107      }
6108  }
6109  
6110  void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
6111  {
6112      int i;
6113      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
6114          bdrv_op_block(bs, i, reason);
6115      }
6116  }
6117  
6118  void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
6119  {
6120      int i;
6121      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
6122          bdrv_op_unblock(bs, i, reason);
6123      }
6124  }
6125  
6126  bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
6127  {
6128      int i;
6129  
6130      for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
6131          if (!QLIST_EMPTY(&bs->op_blockers[i])) {
6132              return false;
6133          }
6134      }
6135      return true;
6136  }
6137  
6138  void bdrv_img_create(const char *filename, const char *fmt,
6139                       const char *base_filename, const char *base_fmt,
6140                       char *options, uint64_t img_size, int flags, bool quiet,
6141                       Error **errp)
6142  {
6143      QemuOptsList *create_opts = NULL;
6144      QemuOpts *opts = NULL;
6145      const char *backing_fmt, *backing_file;
6146      int64_t size;
6147      BlockDriver *drv, *proto_drv;
6148      Error *local_err = NULL;
6149      int ret = 0;
6150  
6151      /* Find driver and parse its options */
6152      drv = bdrv_find_format(fmt);
6153      if (!drv) {
6154          error_setg(errp, "Unknown file format '%s'", fmt);
6155          return;
6156      }
6157  
6158      proto_drv = bdrv_find_protocol(filename, true, errp);
6159      if (!proto_drv) {
6160          return;
6161      }
6162  
6163      if (!drv->create_opts) {
6164          error_setg(errp, "Format driver '%s' does not support image creation",
6165                     drv->format_name);
6166          return;
6167      }
6168  
6169      if (!proto_drv->create_opts) {
6170          error_setg(errp, "Protocol driver '%s' does not support image creation",
6171                     proto_drv->format_name);
6172          return;
6173      }
6174  
6175      /* Create parameter list */
6176      create_opts = qemu_opts_append(create_opts, drv->create_opts);
6177      create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
6178  
6179      opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
6180  
6181      /* Parse -o options */
6182      if (options) {
6183          if (!qemu_opts_do_parse(opts, options, NULL, errp)) {
6184              goto out;
6185          }
6186      }
6187  
6188      if (!qemu_opt_get(opts, BLOCK_OPT_SIZE)) {
6189          qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
6190      } else if (img_size != UINT64_C(-1)) {
6191          error_setg(errp, "The image size must be specified only once");
6192          goto out;
6193      }
6194  
6195      if (base_filename) {
6196          if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename,
6197                            NULL)) {
6198              error_setg(errp, "Backing file not supported for file format '%s'",
6199                         fmt);
6200              goto out;
6201          }
6202      }
6203  
6204      if (base_fmt) {
6205          if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) {
6206              error_setg(errp, "Backing file format not supported for file "
6207                               "format '%s'", fmt);
6208              goto out;
6209          }
6210      }
6211  
6212      backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
6213      if (backing_file) {
6214          if (!strcmp(filename, backing_file)) {
6215              error_setg(errp, "Error: Trying to create an image with the "
6216                               "same filename as the backing file");
6217              goto out;
6218          }
6219          if (backing_file[0] == '\0') {
6220              error_setg(errp, "Expected backing file name, got empty string");
6221              goto out;
6222          }
6223      }
6224  
6225      backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
6226  
6227      /* The size for the image must always be specified, unless we have a backing
6228       * file and we have not been forbidden from opening it. */
6229      size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size);
6230      if (backing_file && !(flags & BDRV_O_NO_BACKING)) {
6231          BlockDriverState *bs;
6232          char *full_backing;
6233          int back_flags;
6234          QDict *backing_options = NULL;
6235  
6236          full_backing =
6237              bdrv_get_full_backing_filename_from_filename(filename, backing_file,
6238                                                           &local_err);
6239          if (local_err) {
6240              goto out;
6241          }
6242          assert(full_backing);
6243  
6244          /* backing files always opened read-only */
6245          back_flags = flags;
6246          back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
6247  
6248          backing_options = qdict_new();
6249          if (backing_fmt) {
6250              qdict_put_str(backing_options, "driver", backing_fmt);
6251          }
6252          qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true);
6253  
6254          bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
6255                         &local_err);
6256          g_free(full_backing);
6257          if (!bs) {
6258              error_append_hint(&local_err, "Could not open backing image.\n");
6259              goto out;
6260          } else {
6261              if (!backing_fmt) {
6262                  warn_report("Deprecated use of backing file without explicit "
6263                              "backing format (detected format of %s)",
6264                              bs->drv->format_name);
6265                  if (bs->drv != &bdrv_raw) {
6266                      /*
6267                       * A probe of raw deserves the most attention:
6268                       * leaving the backing format out of the image
6269                       * will ensure bs->probed is set (ensuring we
6270                       * don't accidentally commit into the backing
6271                       * file), and allow more spots to warn the users
6272                       * to fix their toolchain when opening this image
6273                       * later.  For other images, we can safely record
6274                       * the format that we probed.
6275                       */
6276                      backing_fmt = bs->drv->format_name;
6277                      qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, backing_fmt,
6278                                   NULL);
6279                  }
6280              }
6281              if (size == -1) {
6282                  /* Opened BS, have no size */
6283                  size = bdrv_getlength(bs);
6284                  if (size < 0) {
6285                      error_setg_errno(errp, -size, "Could not get size of '%s'",
6286                                       backing_file);
6287                      bdrv_unref(bs);
6288                      goto out;
6289                  }
6290                  qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
6291              }
6292              bdrv_unref(bs);
6293          }
6294          /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */
6295      } else if (backing_file && !backing_fmt) {
6296          warn_report("Deprecated use of unopened backing file without "
6297                      "explicit backing format, use of this image requires "
6298                      "potentially unsafe format probing");
6299      }
6300  
6301      if (size == -1) {
6302          error_setg(errp, "Image creation needs a size parameter");
6303          goto out;
6304      }
6305  
6306      if (!quiet) {
6307          printf("Formatting '%s', fmt=%s ", filename, fmt);
6308          qemu_opts_print(opts, " ");
6309          puts("");
6310          fflush(stdout);
6311      }
6312  
6313      ret = bdrv_create(drv, filename, opts, &local_err);
6314  
6315      if (ret == -EFBIG) {
6316          /* This is generally a better message than whatever the driver would
6317           * deliver (especially because of the cluster_size_hint), since that
6318           * is most probably not much different from "image too large". */
6319          const char *cluster_size_hint = "";
6320          if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
6321              cluster_size_hint = " (try using a larger cluster size)";
6322          }
6323          error_setg(errp, "The image size is too large for file format '%s'"
6324                     "%s", fmt, cluster_size_hint);
6325          error_free(local_err);
6326          local_err = NULL;
6327      }
6328  
6329  out:
6330      qemu_opts_del(opts);
6331      qemu_opts_free(create_opts);
6332      error_propagate(errp, local_err);
6333  }
6334  
6335  AioContext *bdrv_get_aio_context(BlockDriverState *bs)
6336  {
6337      return bs ? bs->aio_context : qemu_get_aio_context();
6338  }
6339  
6340  AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs)
6341  {
6342      Coroutine *self = qemu_coroutine_self();
6343      AioContext *old_ctx = qemu_coroutine_get_aio_context(self);
6344      AioContext *new_ctx;
6345  
6346      /*
6347       * Increase bs->in_flight to ensure that this operation is completed before
6348       * moving the node to a different AioContext. Read new_ctx only afterwards.
6349       */
6350      bdrv_inc_in_flight(bs);
6351  
6352      new_ctx = bdrv_get_aio_context(bs);
6353      aio_co_reschedule_self(new_ctx);
6354      return old_ctx;
6355  }
6356  
6357  void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
6358  {
6359      aio_co_reschedule_self(old_ctx);
6360      bdrv_dec_in_flight(bs);
6361  }
6362  
6363  void coroutine_fn bdrv_co_lock(BlockDriverState *bs)
6364  {
6365      AioContext *ctx = bdrv_get_aio_context(bs);
6366  
6367      /* In the main thread, bs->aio_context won't change concurrently */
6368      assert(qemu_get_current_aio_context() == qemu_get_aio_context());
6369  
6370      /*
6371       * We're in coroutine context, so we already hold the lock of the main
6372       * loop AioContext. Don't lock it twice to avoid deadlocks.
6373       */
6374      assert(qemu_in_coroutine());
6375      if (ctx != qemu_get_aio_context()) {
6376          aio_context_acquire(ctx);
6377      }
6378  }
6379  
6380  void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
6381  {
6382      AioContext *ctx = bdrv_get_aio_context(bs);
6383  
6384      assert(qemu_in_coroutine());
6385      if (ctx != qemu_get_aio_context()) {
6386          aio_context_release(ctx);
6387      }
6388  }
6389  
6390  void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
6391  {
6392      aio_co_enter(bdrv_get_aio_context(bs), co);
6393  }
6394  
6395  static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
6396  {
6397      QLIST_REMOVE(ban, list);
6398      g_free(ban);
6399  }
6400  
6401  static void bdrv_detach_aio_context(BlockDriverState *bs)
6402  {
6403      BdrvAioNotifier *baf, *baf_tmp;
6404  
6405      assert(!bs->walking_aio_notifiers);
6406      bs->walking_aio_notifiers = true;
6407      QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
6408          if (baf->deleted) {
6409              bdrv_do_remove_aio_context_notifier(baf);
6410          } else {
6411              baf->detach_aio_context(baf->opaque);
6412          }
6413      }
6414      /* Never mind iterating again to check for ->deleted.  bdrv_close() will
6415       * remove remaining aio notifiers if we aren't called again.
6416       */
6417      bs->walking_aio_notifiers = false;
6418  
6419      if (bs->drv && bs->drv->bdrv_detach_aio_context) {
6420          bs->drv->bdrv_detach_aio_context(bs);
6421      }
6422  
6423      if (bs->quiesce_counter) {
6424          aio_enable_external(bs->aio_context);
6425      }
6426      bs->aio_context = NULL;
6427  }
6428  
6429  static void bdrv_attach_aio_context(BlockDriverState *bs,
6430                                      AioContext *new_context)
6431  {
6432      BdrvAioNotifier *ban, *ban_tmp;
6433  
6434      if (bs->quiesce_counter) {
6435          aio_disable_external(new_context);
6436      }
6437  
6438      bs->aio_context = new_context;
6439  
6440      if (bs->drv && bs->drv->bdrv_attach_aio_context) {
6441          bs->drv->bdrv_attach_aio_context(bs, new_context);
6442      }
6443  
6444      assert(!bs->walking_aio_notifiers);
6445      bs->walking_aio_notifiers = true;
6446      QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) {
6447          if (ban->deleted) {
6448              bdrv_do_remove_aio_context_notifier(ban);
6449          } else {
6450              ban->attached_aio_context(new_context, ban->opaque);
6451          }
6452      }
6453      bs->walking_aio_notifiers = false;
6454  }
6455  
6456  /*
6457   * Changes the AioContext used for fd handlers, timers, and BHs by this
6458   * BlockDriverState and all its children and parents.
6459   *
6460   * Must be called from the main AioContext.
6461   *
6462   * The caller must own the AioContext lock for the old AioContext of bs, but it
6463   * must not own the AioContext lock for new_context (unless new_context is the
6464   * same as the current context of bs).
6465   *
6466   * @ignore will accumulate all visited BdrvChild object. The caller is
6467   * responsible for freeing the list afterwards.
6468   */
6469  void bdrv_set_aio_context_ignore(BlockDriverState *bs,
6470                                   AioContext *new_context, GSList **ignore)
6471  {
6472      AioContext *old_context = bdrv_get_aio_context(bs);
6473      BdrvChild *child;
6474  
6475      g_assert(qemu_get_current_aio_context() == qemu_get_aio_context());
6476  
6477      if (old_context == new_context) {
6478          return;
6479      }
6480  
6481      bdrv_drained_begin(bs);
6482  
6483      QLIST_FOREACH(child, &bs->children, next) {
6484          if (g_slist_find(*ignore, child)) {
6485              continue;
6486          }
6487          *ignore = g_slist_prepend(*ignore, child);
6488          bdrv_set_aio_context_ignore(child->bs, new_context, ignore);
6489      }
6490      QLIST_FOREACH(child, &bs->parents, next_parent) {
6491          if (g_slist_find(*ignore, child)) {
6492              continue;
6493          }
6494          assert(child->klass->set_aio_ctx);
6495          *ignore = g_slist_prepend(*ignore, child);
6496          child->klass->set_aio_ctx(child, new_context, ignore);
6497      }
6498  
6499      bdrv_detach_aio_context(bs);
6500  
6501      /* Acquire the new context, if necessary */
6502      if (qemu_get_aio_context() != new_context) {
6503          aio_context_acquire(new_context);
6504      }
6505  
6506      bdrv_attach_aio_context(bs, new_context);
6507  
6508      /*
6509       * If this function was recursively called from
6510       * bdrv_set_aio_context_ignore(), there may be nodes in the
6511       * subtree that have not yet been moved to the new AioContext.
6512       * Release the old one so bdrv_drained_end() can poll them.
6513       */
6514      if (qemu_get_aio_context() != old_context) {
6515          aio_context_release(old_context);
6516      }
6517  
6518      bdrv_drained_end(bs);
6519  
6520      if (qemu_get_aio_context() != old_context) {
6521          aio_context_acquire(old_context);
6522      }
6523      if (qemu_get_aio_context() != new_context) {
6524          aio_context_release(new_context);
6525      }
6526  }
6527  
6528  static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx,
6529                                              GSList **ignore, Error **errp)
6530  {
6531      if (g_slist_find(*ignore, c)) {
6532          return true;
6533      }
6534      *ignore = g_slist_prepend(*ignore, c);
6535  
6536      /*
6537       * A BdrvChildClass that doesn't handle AioContext changes cannot
6538       * tolerate any AioContext changes
6539       */
6540      if (!c->klass->can_set_aio_ctx) {
6541          char *user = bdrv_child_user_desc(c);
6542          error_setg(errp, "Changing iothreads is not supported by %s", user);
6543          g_free(user);
6544          return false;
6545      }
6546      if (!c->klass->can_set_aio_ctx(c, ctx, ignore, errp)) {
6547          assert(!errp || *errp);
6548          return false;
6549      }
6550      return true;
6551  }
6552  
6553  bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
6554                                      GSList **ignore, Error **errp)
6555  {
6556      if (g_slist_find(*ignore, c)) {
6557          return true;
6558      }
6559      *ignore = g_slist_prepend(*ignore, c);
6560      return bdrv_can_set_aio_context(c->bs, ctx, ignore, errp);
6561  }
6562  
6563  /* @ignore will accumulate all visited BdrvChild object. The caller is
6564   * responsible for freeing the list afterwards. */
6565  bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
6566                                GSList **ignore, Error **errp)
6567  {
6568      BdrvChild *c;
6569  
6570      if (bdrv_get_aio_context(bs) == ctx) {
6571          return true;
6572      }
6573  
6574      QLIST_FOREACH(c, &bs->parents, next_parent) {
6575          if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) {
6576              return false;
6577          }
6578      }
6579      QLIST_FOREACH(c, &bs->children, next) {
6580          if (!bdrv_child_can_set_aio_context(c, ctx, ignore, errp)) {
6581              return false;
6582          }
6583      }
6584  
6585      return true;
6586  }
6587  
6588  int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
6589                                     BdrvChild *ignore_child, Error **errp)
6590  {
6591      GSList *ignore;
6592      bool ret;
6593  
6594      ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL;
6595      ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp);
6596      g_slist_free(ignore);
6597  
6598      if (!ret) {
6599          return -EPERM;
6600      }
6601  
6602      ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL;
6603      bdrv_set_aio_context_ignore(bs, ctx, &ignore);
6604      g_slist_free(ignore);
6605  
6606      return 0;
6607  }
6608  
6609  int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
6610                               Error **errp)
6611  {
6612      return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp);
6613  }
6614  
6615  void bdrv_add_aio_context_notifier(BlockDriverState *bs,
6616          void (*attached_aio_context)(AioContext *new_context, void *opaque),
6617          void (*detach_aio_context)(void *opaque), void *opaque)
6618  {
6619      BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
6620      *ban = (BdrvAioNotifier){
6621          .attached_aio_context = attached_aio_context,
6622          .detach_aio_context   = detach_aio_context,
6623          .opaque               = opaque
6624      };
6625  
6626      QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
6627  }
6628  
6629  void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
6630                                        void (*attached_aio_context)(AioContext *,
6631                                                                     void *),
6632                                        void (*detach_aio_context)(void *),
6633                                        void *opaque)
6634  {
6635      BdrvAioNotifier *ban, *ban_next;
6636  
6637      QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
6638          if (ban->attached_aio_context == attached_aio_context &&
6639              ban->detach_aio_context   == detach_aio_context   &&
6640              ban->opaque               == opaque               &&
6641              ban->deleted              == false)
6642          {
6643              if (bs->walking_aio_notifiers) {
6644                  ban->deleted = true;
6645              } else {
6646                  bdrv_do_remove_aio_context_notifier(ban);
6647              }
6648              return;
6649          }
6650      }
6651  
6652      abort();
6653  }
6654  
6655  int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
6656                         BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
6657                         bool force,
6658                         Error **errp)
6659  {
6660      if (!bs->drv) {
6661          error_setg(errp, "Node is ejected");
6662          return -ENOMEDIUM;
6663      }
6664      if (!bs->drv->bdrv_amend_options) {
6665          error_setg(errp, "Block driver '%s' does not support option amendment",
6666                     bs->drv->format_name);
6667          return -ENOTSUP;
6668      }
6669      return bs->drv->bdrv_amend_options(bs, opts, status_cb,
6670                                         cb_opaque, force, errp);
6671  }
6672  
6673  /*
6674   * This function checks whether the given @to_replace is allowed to be
6675   * replaced by a node that always shows the same data as @bs.  This is
6676   * used for example to verify whether the mirror job can replace
6677   * @to_replace by the target mirrored from @bs.
6678   * To be replaceable, @bs and @to_replace may either be guaranteed to
6679   * always show the same data (because they are only connected through
6680   * filters), or some driver may allow replacing one of its children
6681   * because it can guarantee that this child's data is not visible at
6682   * all (for example, for dissenting quorum children that have no other
6683   * parents).
6684   */
6685  bool bdrv_recurse_can_replace(BlockDriverState *bs,
6686                                BlockDriverState *to_replace)
6687  {
6688      BlockDriverState *filtered;
6689  
6690      if (!bs || !bs->drv) {
6691          return false;
6692      }
6693  
6694      if (bs == to_replace) {
6695          return true;
6696      }
6697  
6698      /* See what the driver can do */
6699      if (bs->drv->bdrv_recurse_can_replace) {
6700          return bs->drv->bdrv_recurse_can_replace(bs, to_replace);
6701      }
6702  
6703      /* For filters without an own implementation, we can recurse on our own */
6704      filtered = bdrv_filter_bs(bs);
6705      if (filtered) {
6706          return bdrv_recurse_can_replace(filtered, to_replace);
6707      }
6708  
6709      /* Safe default */
6710      return false;
6711  }
6712  
6713  /*
6714   * Check whether the given @node_name can be replaced by a node that
6715   * has the same data as @parent_bs.  If so, return @node_name's BDS;
6716   * NULL otherwise.
6717   *
6718   * @node_name must be a (recursive) *child of @parent_bs (or this
6719   * function will return NULL).
6720   *
6721   * The result (whether the node can be replaced or not) is only valid
6722   * for as long as no graph or permission changes occur.
6723   */
6724  BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
6725                                          const char *node_name, Error **errp)
6726  {
6727      BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
6728      AioContext *aio_context;
6729  
6730      if (!to_replace_bs) {
6731          error_setg(errp, "Node name '%s' not found", node_name);
6732          return NULL;
6733      }
6734  
6735      aio_context = bdrv_get_aio_context(to_replace_bs);
6736      aio_context_acquire(aio_context);
6737  
6738      if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
6739          to_replace_bs = NULL;
6740          goto out;
6741      }
6742  
6743      /* We don't want arbitrary node of the BDS chain to be replaced only the top
6744       * most non filter in order to prevent data corruption.
6745       * Another benefit is that this tests exclude backing files which are
6746       * blocked by the backing blockers.
6747       */
6748      if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) {
6749          error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', "
6750                     "because it cannot be guaranteed that doing so would not "
6751                     "lead to an abrupt change of visible data",
6752                     node_name, parent_bs->node_name);
6753          to_replace_bs = NULL;
6754          goto out;
6755      }
6756  
6757  out:
6758      aio_context_release(aio_context);
6759      return to_replace_bs;
6760  }
6761  
6762  /**
6763   * Iterates through the list of runtime option keys that are said to
6764   * be "strong" for a BDS.  An option is called "strong" if it changes
6765   * a BDS's data.  For example, the null block driver's "size" and
6766   * "read-zeroes" options are strong, but its "latency-ns" option is
6767   * not.
6768   *
6769   * If a key returned by this function ends with a dot, all options
6770   * starting with that prefix are strong.
6771   */
6772  static const char *const *strong_options(BlockDriverState *bs,
6773                                           const char *const *curopt)
6774  {
6775      static const char *const global_options[] = {
6776          "driver", "filename", NULL
6777      };
6778  
6779      if (!curopt) {
6780          return &global_options[0];
6781      }
6782  
6783      curopt++;
6784      if (curopt == &global_options[ARRAY_SIZE(global_options) - 1] && bs->drv) {
6785          curopt = bs->drv->strong_runtime_opts;
6786      }
6787  
6788      return (curopt && *curopt) ? curopt : NULL;
6789  }
6790  
6791  /**
6792   * Copies all strong runtime options from bs->options to the given
6793   * QDict.  The set of strong option keys is determined by invoking
6794   * strong_options().
6795   *
6796   * Returns true iff any strong option was present in bs->options (and
6797   * thus copied to the target QDict) with the exception of "filename"
6798   * and "driver".  The caller is expected to use this value to decide
6799   * whether the existence of strong options prevents the generation of
6800   * a plain filename.
6801   */
6802  static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs)
6803  {
6804      bool found_any = false;
6805      const char *const *option_name = NULL;
6806  
6807      if (!bs->drv) {
6808          return false;
6809      }
6810  
6811      while ((option_name = strong_options(bs, option_name))) {
6812          bool option_given = false;
6813  
6814          assert(strlen(*option_name) > 0);
6815          if ((*option_name)[strlen(*option_name) - 1] != '.') {
6816              QObject *entry = qdict_get(bs->options, *option_name);
6817              if (!entry) {
6818                  continue;
6819              }
6820  
6821              qdict_put_obj(d, *option_name, qobject_ref(entry));
6822              option_given = true;
6823          } else {
6824              const QDictEntry *entry;
6825              for (entry = qdict_first(bs->options); entry;
6826                   entry = qdict_next(bs->options, entry))
6827              {
6828                  if (strstart(qdict_entry_key(entry), *option_name, NULL)) {
6829                      qdict_put_obj(d, qdict_entry_key(entry),
6830                                    qobject_ref(qdict_entry_value(entry)));
6831                      option_given = true;
6832                  }
6833              }
6834          }
6835  
6836          /* While "driver" and "filename" need to be included in a JSON filename,
6837           * their existence does not prohibit generation of a plain filename. */
6838          if (!found_any && option_given &&
6839              strcmp(*option_name, "driver") && strcmp(*option_name, "filename"))
6840          {
6841              found_any = true;
6842          }
6843      }
6844  
6845      if (!qdict_haskey(d, "driver")) {
6846          /* Drivers created with bdrv_new_open_driver() may not have a
6847           * @driver option.  Add it here. */
6848          qdict_put_str(d, "driver", bs->drv->format_name);
6849      }
6850  
6851      return found_any;
6852  }
6853  
6854  /* Note: This function may return false positives; it may return true
6855   * even if opening the backing file specified by bs's image header
6856   * would result in exactly bs->backing. */
6857  bool bdrv_backing_overridden(BlockDriverState *bs)
6858  {
6859      if (bs->backing) {
6860          return strcmp(bs->auto_backing_file,
6861                        bs->backing->bs->filename);
6862      } else {
6863          /* No backing BDS, so if the image header reports any backing
6864           * file, it must have been suppressed */
6865          return bs->auto_backing_file[0] != '\0';
6866      }
6867  }
6868  
6869  /* Updates the following BDS fields:
6870   *  - exact_filename: A filename which may be used for opening a block device
6871   *                    which (mostly) equals the given BDS (even without any
6872   *                    other options; so reading and writing must return the same
6873   *                    results, but caching etc. may be different)
6874   *  - full_open_options: Options which, when given when opening a block device
6875   *                       (without a filename), result in a BDS (mostly)
6876   *                       equalling the given one
6877   *  - filename: If exact_filename is set, it is copied here. Otherwise,
6878   *              full_open_options is converted to a JSON object, prefixed with
6879   *              "json:" (for use through the JSON pseudo protocol) and put here.
6880   */
6881  void bdrv_refresh_filename(BlockDriverState *bs)
6882  {
6883      BlockDriver *drv = bs->drv;
6884      BdrvChild *child;
6885      BlockDriverState *primary_child_bs;
6886      QDict *opts;
6887      bool backing_overridden;
6888      bool generate_json_filename; /* Whether our default implementation should
6889                                      fill exact_filename (false) or not (true) */
6890  
6891      if (!drv) {
6892          return;
6893      }
6894  
6895      /* This BDS's file name may depend on any of its children's file names, so
6896       * refresh those first */
6897      QLIST_FOREACH(child, &bs->children, next) {
6898          bdrv_refresh_filename(child->bs);
6899      }
6900  
6901      if (bs->implicit) {
6902          /* For implicit nodes, just copy everything from the single child */
6903          child = QLIST_FIRST(&bs->children);
6904          assert(QLIST_NEXT(child, next) == NULL);
6905  
6906          pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
6907                  child->bs->exact_filename);
6908          pstrcpy(bs->filename, sizeof(bs->filename), child->bs->filename);
6909  
6910          qobject_unref(bs->full_open_options);
6911          bs->full_open_options = qobject_ref(child->bs->full_open_options);
6912  
6913          return;
6914      }
6915  
6916      backing_overridden = bdrv_backing_overridden(bs);
6917  
6918      if (bs->open_flags & BDRV_O_NO_IO) {
6919          /* Without I/O, the backing file does not change anything.
6920           * Therefore, in such a case (primarily qemu-img), we can
6921           * pretend the backing file has not been overridden even if
6922           * it technically has been. */
6923          backing_overridden = false;
6924      }
6925  
6926      /* Gather the options QDict */
6927      opts = qdict_new();
6928      generate_json_filename = append_strong_runtime_options(opts, bs);
6929      generate_json_filename |= backing_overridden;
6930  
6931      if (drv->bdrv_gather_child_options) {
6932          /* Some block drivers may not want to present all of their children's
6933           * options, or name them differently from BdrvChild.name */
6934          drv->bdrv_gather_child_options(bs, opts, backing_overridden);
6935      } else {
6936          QLIST_FOREACH(child, &bs->children, next) {
6937              if (child == bs->backing && !backing_overridden) {
6938                  /* We can skip the backing BDS if it has not been overridden */
6939                  continue;
6940              }
6941  
6942              qdict_put(opts, child->name,
6943                        qobject_ref(child->bs->full_open_options));
6944          }
6945  
6946          if (backing_overridden && !bs->backing) {
6947              /* Force no backing file */
6948              qdict_put_null(opts, "backing");
6949          }
6950      }
6951  
6952      qobject_unref(bs->full_open_options);
6953      bs->full_open_options = opts;
6954  
6955      primary_child_bs = bdrv_primary_bs(bs);
6956  
6957      if (drv->bdrv_refresh_filename) {
6958          /* Obsolete information is of no use here, so drop the old file name
6959           * information before refreshing it */
6960          bs->exact_filename[0] = '\0';
6961  
6962          drv->bdrv_refresh_filename(bs);
6963      } else if (primary_child_bs) {
6964          /*
6965           * Try to reconstruct valid information from the underlying
6966           * file -- this only works for format nodes (filter nodes
6967           * cannot be probed and as such must be selected by the user
6968           * either through an options dict, or through a special
6969           * filename which the filter driver must construct in its
6970           * .bdrv_refresh_filename() implementation).
6971           */
6972  
6973          bs->exact_filename[0] = '\0';
6974  
6975          /*
6976           * We can use the underlying file's filename if:
6977           * - it has a filename,
6978           * - the current BDS is not a filter,
6979           * - the file is a protocol BDS, and
6980           * - opening that file (as this BDS's format) will automatically create
6981           *   the BDS tree we have right now, that is:
6982           *   - the user did not significantly change this BDS's behavior with
6983           *     some explicit (strong) options
6984           *   - no non-file child of this BDS has been overridden by the user
6985           *   Both of these conditions are represented by generate_json_filename.
6986           */
6987          if (primary_child_bs->exact_filename[0] &&
6988              primary_child_bs->drv->bdrv_file_open &&
6989              !drv->is_filter && !generate_json_filename)
6990          {
6991              strcpy(bs->exact_filename, primary_child_bs->exact_filename);
6992          }
6993      }
6994  
6995      if (bs->exact_filename[0]) {
6996          pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
6997      } else {
6998          QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6999          if (snprintf(bs->filename, sizeof(bs->filename), "json:%s",
7000                       qstring_get_str(json)) >= sizeof(bs->filename)) {
7001              /* Give user a hint if we truncated things. */
7002              strcpy(bs->filename + sizeof(bs->filename) - 4, "...");
7003          }
7004          qobject_unref(json);
7005      }
7006  }
7007  
7008  char *bdrv_dirname(BlockDriverState *bs, Error **errp)
7009  {
7010      BlockDriver *drv = bs->drv;
7011      BlockDriverState *child_bs;
7012  
7013      if (!drv) {
7014          error_setg(errp, "Node '%s' is ejected", bs->node_name);
7015          return NULL;
7016      }
7017  
7018      if (drv->bdrv_dirname) {
7019          return drv->bdrv_dirname(bs, errp);
7020      }
7021  
7022      child_bs = bdrv_primary_bs(bs);
7023      if (child_bs) {
7024          return bdrv_dirname(child_bs, errp);
7025      }
7026  
7027      bdrv_refresh_filename(bs);
7028      if (bs->exact_filename[0] != '\0') {
7029          return path_combine(bs->exact_filename, "");
7030      }
7031  
7032      error_setg(errp, "Cannot generate a base directory for %s nodes",
7033                 drv->format_name);
7034      return NULL;
7035  }
7036  
7037  /*
7038   * Hot add/remove a BDS's child. So the user can take a child offline when
7039   * it is broken and take a new child online
7040   */
7041  void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
7042                      Error **errp)
7043  {
7044  
7045      if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
7046          error_setg(errp, "The node %s does not support adding a child",
7047                     bdrv_get_device_or_node_name(parent_bs));
7048          return;
7049      }
7050  
7051      if (!QLIST_EMPTY(&child_bs->parents)) {
7052          error_setg(errp, "The node %s already has a parent",
7053                     child_bs->node_name);
7054          return;
7055      }
7056  
7057      parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp);
7058  }
7059  
7060  void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
7061  {
7062      BdrvChild *tmp;
7063  
7064      if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
7065          error_setg(errp, "The node %s does not support removing a child",
7066                     bdrv_get_device_or_node_name(parent_bs));
7067          return;
7068      }
7069  
7070      QLIST_FOREACH(tmp, &parent_bs->children, next) {
7071          if (tmp == child) {
7072              break;
7073          }
7074      }
7075  
7076      if (!tmp) {
7077          error_setg(errp, "The node %s does not have a child named %s",
7078                     bdrv_get_device_or_node_name(parent_bs),
7079                     bdrv_get_device_or_node_name(child->bs));
7080          return;
7081      }
7082  
7083      parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
7084  }
7085  
7086  int bdrv_make_empty(BdrvChild *c, Error **errp)
7087  {
7088      BlockDriver *drv = c->bs->drv;
7089      int ret;
7090  
7091      assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED));
7092  
7093      if (!drv->bdrv_make_empty) {
7094          error_setg(errp, "%s does not support emptying nodes",
7095                     drv->format_name);
7096          return -ENOTSUP;
7097      }
7098  
7099      ret = drv->bdrv_make_empty(c->bs);
7100      if (ret < 0) {
7101          error_setg_errno(errp, -ret, "Failed to empty %s",
7102                           c->bs->filename);
7103          return ret;
7104      }
7105  
7106      return 0;
7107  }
7108  
7109  /*
7110   * Return the child that @bs acts as an overlay for, and from which data may be
7111   * copied in COW or COR operations.  Usually this is the backing file.
7112   */
7113  BdrvChild *bdrv_cow_child(BlockDriverState *bs)
7114  {
7115      if (!bs || !bs->drv) {
7116          return NULL;
7117      }
7118  
7119      if (bs->drv->is_filter) {
7120          return NULL;
7121      }
7122  
7123      if (!bs->backing) {
7124          return NULL;
7125      }
7126  
7127      assert(bs->backing->role & BDRV_CHILD_COW);
7128      return bs->backing;
7129  }
7130  
7131  /*
7132   * If @bs acts as a filter for exactly one of its children, return
7133   * that child.
7134   */
7135  BdrvChild *bdrv_filter_child(BlockDriverState *bs)
7136  {
7137      BdrvChild *c;
7138  
7139      if (!bs || !bs->drv) {
7140          return NULL;
7141      }
7142  
7143      if (!bs->drv->is_filter) {
7144          return NULL;
7145      }
7146  
7147      /* Only one of @backing or @file may be used */
7148      assert(!(bs->backing && bs->file));
7149  
7150      c = bs->backing ?: bs->file;
7151      if (!c) {
7152          return NULL;
7153      }
7154  
7155      assert(c->role & BDRV_CHILD_FILTERED);
7156      return c;
7157  }
7158  
7159  /*
7160   * Return either the result of bdrv_cow_child() or bdrv_filter_child(),
7161   * whichever is non-NULL.
7162   *
7163   * Return NULL if both are NULL.
7164   */
7165  BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs)
7166  {
7167      BdrvChild *cow_child = bdrv_cow_child(bs);
7168      BdrvChild *filter_child = bdrv_filter_child(bs);
7169  
7170      /* Filter nodes cannot have COW backing files */
7171      assert(!(cow_child && filter_child));
7172  
7173      return cow_child ?: filter_child;
7174  }
7175  
7176  /*
7177   * Return the primary child of this node: For filters, that is the
7178   * filtered child.  For other nodes, that is usually the child storing
7179   * metadata.
7180   * (A generally more helpful description is that this is (usually) the
7181   * child that has the same filename as @bs.)
7182   *
7183   * Drivers do not necessarily have a primary child; for example quorum
7184   * does not.
7185   */
7186  BdrvChild *bdrv_primary_child(BlockDriverState *bs)
7187  {
7188      BdrvChild *c, *found = NULL;
7189  
7190      QLIST_FOREACH(c, &bs->children, next) {
7191          if (c->role & BDRV_CHILD_PRIMARY) {
7192              assert(!found);
7193              found = c;
7194          }
7195      }
7196  
7197      return found;
7198  }
7199  
7200  static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs,
7201                                                bool stop_on_explicit_filter)
7202  {
7203      BdrvChild *c;
7204  
7205      if (!bs) {
7206          return NULL;
7207      }
7208  
7209      while (!(stop_on_explicit_filter && !bs->implicit)) {
7210          c = bdrv_filter_child(bs);
7211          if (!c) {
7212              /*
7213               * A filter that is embedded in a working block graph must
7214               * have a child.  Assert this here so this function does
7215               * not return a filter node that is not expected by the
7216               * caller.
7217               */
7218              assert(!bs->drv || !bs->drv->is_filter);
7219              break;
7220          }
7221          bs = c->bs;
7222      }
7223      /*
7224       * Note that this treats nodes with bs->drv == NULL as not being
7225       * filters (bs->drv == NULL should be replaced by something else
7226       * anyway).
7227       * The advantage of this behavior is that this function will thus
7228       * always return a non-NULL value (given a non-NULL @bs).
7229       */
7230  
7231      return bs;
7232  }
7233  
7234  /*
7235   * Return the first BDS that has not been added implicitly or that
7236   * does not have a filtered child down the chain starting from @bs
7237   * (including @bs itself).
7238   */
7239  BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs)
7240  {
7241      return bdrv_do_skip_filters(bs, true);
7242  }
7243  
7244  /*
7245   * Return the first BDS that does not have a filtered child down the
7246   * chain starting from @bs (including @bs itself).
7247   */
7248  BlockDriverState *bdrv_skip_filters(BlockDriverState *bs)
7249  {
7250      return bdrv_do_skip_filters(bs, false);
7251  }
7252  
7253  /*
7254   * For a backing chain, return the first non-filter backing image of
7255   * the first non-filter image.
7256   */
7257  BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs)
7258  {
7259      return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs)));
7260  }
7261