xref: /openbmc/qemu/block.c (revision 9c4218e9)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/error-report.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qapi/qmp/qbool.h"
33 #include "qapi/qmp/qjson.h"
34 #include "sysemu/block-backend.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/notify.h"
37 #include "qemu/coroutine.h"
38 #include "block/qapi.h"
39 #include "qmp-commands.h"
40 #include "qemu/timer.h"
41 #include "qapi-event.h"
42 #include "block/throttle-groups.h"
43 
44 #ifdef CONFIG_BSD
45 #include <sys/types.h>
46 #include <sys/stat.h>
47 #include <sys/ioctl.h>
48 #include <sys/queue.h>
49 #ifndef __DragonFly__
50 #include <sys/disk.h>
51 #endif
52 #endif
53 
54 #ifdef _WIN32
55 #include <windows.h>
56 #endif
57 
58 /**
59  * A BdrvDirtyBitmap can be in three possible states:
60  * (1) successor is NULL and disabled is false: full r/w mode
61  * (2) successor is NULL and disabled is true: read only mode ("disabled")
62  * (3) successor is set: frozen mode.
63  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
64  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
65  */
66 struct BdrvDirtyBitmap {
67     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
68     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
69     char *name;                 /* Optional non-empty unique ID */
70     int64_t size;               /* Size of the bitmap (Number of sectors) */
71     bool disabled;              /* Bitmap is read-only */
72     QLIST_ENTRY(BdrvDirtyBitmap) list;
73 };
74 
75 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
76 
77 struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
78 
79 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
80     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
81 
82 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
83     QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
84 
85 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
86     QLIST_HEAD_INITIALIZER(bdrv_drivers);
87 
88 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
89                              const char *reference, QDict *options, int flags,
90                              BlockDriverState *parent,
91                              const BdrvChildRole *child_role, Error **errp);
92 
93 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
94 static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
95 
96 /* If non-zero, use only whitelisted block drivers */
97 static int use_bdrv_whitelist;
98 
99 static void bdrv_close(BlockDriverState *bs);
100 
101 #ifdef _WIN32
102 static int is_windows_drive_prefix(const char *filename)
103 {
104     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
105              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
106             filename[1] == ':');
107 }
108 
109 int is_windows_drive(const char *filename)
110 {
111     if (is_windows_drive_prefix(filename) &&
112         filename[2] == '\0')
113         return 1;
114     if (strstart(filename, "\\\\.\\", NULL) ||
115         strstart(filename, "//./", NULL))
116         return 1;
117     return 0;
118 }
119 #endif
120 
121 size_t bdrv_opt_mem_align(BlockDriverState *bs)
122 {
123     if (!bs || !bs->drv) {
124         /* page size or 4k (hdd sector size) should be on the safe side */
125         return MAX(4096, getpagesize());
126     }
127 
128     return bs->bl.opt_mem_alignment;
129 }
130 
131 size_t bdrv_min_mem_align(BlockDriverState *bs)
132 {
133     if (!bs || !bs->drv) {
134         /* page size or 4k (hdd sector size) should be on the safe side */
135         return MAX(4096, getpagesize());
136     }
137 
138     return bs->bl.min_mem_alignment;
139 }
140 
141 /* check if the path starts with "<protocol>:" */
142 int path_has_protocol(const char *path)
143 {
144     const char *p;
145 
146 #ifdef _WIN32
147     if (is_windows_drive(path) ||
148         is_windows_drive_prefix(path)) {
149         return 0;
150     }
151     p = path + strcspn(path, ":/\\");
152 #else
153     p = path + strcspn(path, ":/");
154 #endif
155 
156     return *p == ':';
157 }
158 
159 int path_is_absolute(const char *path)
160 {
161 #ifdef _WIN32
162     /* specific case for names like: "\\.\d:" */
163     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
164         return 1;
165     }
166     return (*path == '/' || *path == '\\');
167 #else
168     return (*path == '/');
169 #endif
170 }
171 
172 /* if filename is absolute, just copy it to dest. Otherwise, build a
173    path to it by considering it is relative to base_path. URL are
174    supported. */
175 void path_combine(char *dest, int dest_size,
176                   const char *base_path,
177                   const char *filename)
178 {
179     const char *p, *p1;
180     int len;
181 
182     if (dest_size <= 0)
183         return;
184     if (path_is_absolute(filename)) {
185         pstrcpy(dest, dest_size, filename);
186     } else {
187         p = strchr(base_path, ':');
188         if (p)
189             p++;
190         else
191             p = base_path;
192         p1 = strrchr(base_path, '/');
193 #ifdef _WIN32
194         {
195             const char *p2;
196             p2 = strrchr(base_path, '\\');
197             if (!p1 || p2 > p1)
198                 p1 = p2;
199         }
200 #endif
201         if (p1)
202             p1++;
203         else
204             p1 = base_path;
205         if (p1 > p)
206             p = p1;
207         len = p - base_path;
208         if (len > dest_size - 1)
209             len = dest_size - 1;
210         memcpy(dest, base_path, len);
211         dest[len] = '\0';
212         pstrcat(dest, dest_size, filename);
213     }
214 }
215 
216 void bdrv_get_full_backing_filename_from_filename(const char *backed,
217                                                   const char *backing,
218                                                   char *dest, size_t sz,
219                                                   Error **errp)
220 {
221     if (backing[0] == '\0' || path_has_protocol(backing) ||
222         path_is_absolute(backing))
223     {
224         pstrcpy(dest, sz, backing);
225     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
226         error_setg(errp, "Cannot use relative backing file names for '%s'",
227                    backed);
228     } else {
229         path_combine(dest, sz, backed, backing);
230     }
231 }
232 
233 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
234                                     Error **errp)
235 {
236     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
237 
238     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
239                                                  dest, sz, errp);
240 }
241 
242 void bdrv_register(BlockDriver *bdrv)
243 {
244     bdrv_setup_io_funcs(bdrv);
245 
246     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
247 }
248 
249 BlockDriverState *bdrv_new_root(void)
250 {
251     BlockDriverState *bs = bdrv_new();
252 
253     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
254     return bs;
255 }
256 
257 BlockDriverState *bdrv_new(void)
258 {
259     BlockDriverState *bs;
260     int i;
261 
262     bs = g_new0(BlockDriverState, 1);
263     QLIST_INIT(&bs->dirty_bitmaps);
264     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
265         QLIST_INIT(&bs->op_blockers[i]);
266     }
267     notifier_with_return_list_init(&bs->before_write_notifiers);
268     qemu_co_queue_init(&bs->throttled_reqs[0]);
269     qemu_co_queue_init(&bs->throttled_reqs[1]);
270     bs->refcnt = 1;
271     bs->aio_context = qemu_get_aio_context();
272 
273     QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
274 
275     return bs;
276 }
277 
278 BlockDriver *bdrv_find_format(const char *format_name)
279 {
280     BlockDriver *drv1;
281     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
282         if (!strcmp(drv1->format_name, format_name)) {
283             return drv1;
284         }
285     }
286     return NULL;
287 }
288 
289 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
290 {
291     static const char *whitelist_rw[] = {
292         CONFIG_BDRV_RW_WHITELIST
293     };
294     static const char *whitelist_ro[] = {
295         CONFIG_BDRV_RO_WHITELIST
296     };
297     const char **p;
298 
299     if (!whitelist_rw[0] && !whitelist_ro[0]) {
300         return 1;               /* no whitelist, anything goes */
301     }
302 
303     for (p = whitelist_rw; *p; p++) {
304         if (!strcmp(drv->format_name, *p)) {
305             return 1;
306         }
307     }
308     if (read_only) {
309         for (p = whitelist_ro; *p; p++) {
310             if (!strcmp(drv->format_name, *p)) {
311                 return 1;
312             }
313         }
314     }
315     return 0;
316 }
317 
318 typedef struct CreateCo {
319     BlockDriver *drv;
320     char *filename;
321     QemuOpts *opts;
322     int ret;
323     Error *err;
324 } CreateCo;
325 
326 static void coroutine_fn bdrv_create_co_entry(void *opaque)
327 {
328     Error *local_err = NULL;
329     int ret;
330 
331     CreateCo *cco = opaque;
332     assert(cco->drv);
333 
334     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
335     if (local_err) {
336         error_propagate(&cco->err, local_err);
337     }
338     cco->ret = ret;
339 }
340 
341 int bdrv_create(BlockDriver *drv, const char* filename,
342                 QemuOpts *opts, Error **errp)
343 {
344     int ret;
345 
346     Coroutine *co;
347     CreateCo cco = {
348         .drv = drv,
349         .filename = g_strdup(filename),
350         .opts = opts,
351         .ret = NOT_DONE,
352         .err = NULL,
353     };
354 
355     if (!drv->bdrv_create) {
356         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
357         ret = -ENOTSUP;
358         goto out;
359     }
360 
361     if (qemu_in_coroutine()) {
362         /* Fast-path if already in coroutine context */
363         bdrv_create_co_entry(&cco);
364     } else {
365         co = qemu_coroutine_create(bdrv_create_co_entry);
366         qemu_coroutine_enter(co, &cco);
367         while (cco.ret == NOT_DONE) {
368             aio_poll(qemu_get_aio_context(), true);
369         }
370     }
371 
372     ret = cco.ret;
373     if (ret < 0) {
374         if (cco.err) {
375             error_propagate(errp, cco.err);
376         } else {
377             error_setg_errno(errp, -ret, "Could not create image");
378         }
379     }
380 
381 out:
382     g_free(cco.filename);
383     return ret;
384 }
385 
386 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
387 {
388     BlockDriver *drv;
389     Error *local_err = NULL;
390     int ret;
391 
392     drv = bdrv_find_protocol(filename, true, errp);
393     if (drv == NULL) {
394         return -ENOENT;
395     }
396 
397     ret = bdrv_create(drv, filename, opts, &local_err);
398     if (local_err) {
399         error_propagate(errp, local_err);
400     }
401     return ret;
402 }
403 
404 /**
405  * Try to get @bs's logical and physical block size.
406  * On success, store them in @bsz struct and return 0.
407  * On failure return -errno.
408  * @bs must not be empty.
409  */
410 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
411 {
412     BlockDriver *drv = bs->drv;
413 
414     if (drv && drv->bdrv_probe_blocksizes) {
415         return drv->bdrv_probe_blocksizes(bs, bsz);
416     }
417 
418     return -ENOTSUP;
419 }
420 
421 /**
422  * Try to get @bs's geometry (cyls, heads, sectors).
423  * On success, store them in @geo struct and return 0.
424  * On failure return -errno.
425  * @bs must not be empty.
426  */
427 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
428 {
429     BlockDriver *drv = bs->drv;
430 
431     if (drv && drv->bdrv_probe_geometry) {
432         return drv->bdrv_probe_geometry(bs, geo);
433     }
434 
435     return -ENOTSUP;
436 }
437 
438 /*
439  * Create a uniquely-named empty temporary file.
440  * Return 0 upon success, otherwise a negative errno value.
441  */
442 int get_tmp_filename(char *filename, int size)
443 {
444 #ifdef _WIN32
445     char temp_dir[MAX_PATH];
446     /* GetTempFileName requires that its output buffer (4th param)
447        have length MAX_PATH or greater.  */
448     assert(size >= MAX_PATH);
449     return (GetTempPath(MAX_PATH, temp_dir)
450             && GetTempFileName(temp_dir, "qem", 0, filename)
451             ? 0 : -GetLastError());
452 #else
453     int fd;
454     const char *tmpdir;
455     tmpdir = getenv("TMPDIR");
456     if (!tmpdir) {
457         tmpdir = "/var/tmp";
458     }
459     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
460         return -EOVERFLOW;
461     }
462     fd = mkstemp(filename);
463     if (fd < 0) {
464         return -errno;
465     }
466     if (close(fd) != 0) {
467         unlink(filename);
468         return -errno;
469     }
470     return 0;
471 #endif
472 }
473 
474 /*
475  * Detect host devices. By convention, /dev/cdrom[N] is always
476  * recognized as a host CDROM.
477  */
478 static BlockDriver *find_hdev_driver(const char *filename)
479 {
480     int score_max = 0, score;
481     BlockDriver *drv = NULL, *d;
482 
483     QLIST_FOREACH(d, &bdrv_drivers, list) {
484         if (d->bdrv_probe_device) {
485             score = d->bdrv_probe_device(filename);
486             if (score > score_max) {
487                 score_max = score;
488                 drv = d;
489             }
490         }
491     }
492 
493     return drv;
494 }
495 
496 BlockDriver *bdrv_find_protocol(const char *filename,
497                                 bool allow_protocol_prefix,
498                                 Error **errp)
499 {
500     BlockDriver *drv1;
501     char protocol[128];
502     int len;
503     const char *p;
504 
505     /* TODO Drivers without bdrv_file_open must be specified explicitly */
506 
507     /*
508      * XXX(hch): we really should not let host device detection
509      * override an explicit protocol specification, but moving this
510      * later breaks access to device names with colons in them.
511      * Thanks to the brain-dead persistent naming schemes on udev-
512      * based Linux systems those actually are quite common.
513      */
514     drv1 = find_hdev_driver(filename);
515     if (drv1) {
516         return drv1;
517     }
518 
519     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
520         return &bdrv_file;
521     }
522 
523     p = strchr(filename, ':');
524     assert(p != NULL);
525     len = p - filename;
526     if (len > sizeof(protocol) - 1)
527         len = sizeof(protocol) - 1;
528     memcpy(protocol, filename, len);
529     protocol[len] = '\0';
530     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
531         if (drv1->protocol_name &&
532             !strcmp(drv1->protocol_name, protocol)) {
533             return drv1;
534         }
535     }
536 
537     error_setg(errp, "Unknown protocol '%s'", protocol);
538     return NULL;
539 }
540 
541 /*
542  * Guess image format by probing its contents.
543  * This is not a good idea when your image is raw (CVE-2008-2004), but
544  * we do it anyway for backward compatibility.
545  *
546  * @buf         contains the image's first @buf_size bytes.
547  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
548  *              but can be smaller if the image file is smaller)
549  * @filename    is its filename.
550  *
551  * For all block drivers, call the bdrv_probe() method to get its
552  * probing score.
553  * Return the first block driver with the highest probing score.
554  */
555 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
556                             const char *filename)
557 {
558     int score_max = 0, score;
559     BlockDriver *drv = NULL, *d;
560 
561     QLIST_FOREACH(d, &bdrv_drivers, list) {
562         if (d->bdrv_probe) {
563             score = d->bdrv_probe(buf, buf_size, filename);
564             if (score > score_max) {
565                 score_max = score;
566                 drv = d;
567             }
568         }
569     }
570 
571     return drv;
572 }
573 
574 static int find_image_format(BlockDriverState *bs, const char *filename,
575                              BlockDriver **pdrv, Error **errp)
576 {
577     BlockDriver *drv;
578     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
579     int ret = 0;
580 
581     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
582     if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
583         *pdrv = &bdrv_raw;
584         return ret;
585     }
586 
587     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
588     if (ret < 0) {
589         error_setg_errno(errp, -ret, "Could not read image for determining its "
590                          "format");
591         *pdrv = NULL;
592         return ret;
593     }
594 
595     drv = bdrv_probe_all(buf, ret, filename);
596     if (!drv) {
597         error_setg(errp, "Could not determine image format: No compatible "
598                    "driver found");
599         ret = -ENOENT;
600     }
601     *pdrv = drv;
602     return ret;
603 }
604 
605 /**
606  * Set the current 'total_sectors' value
607  * Return 0 on success, -errno on error.
608  */
609 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
610 {
611     BlockDriver *drv = bs->drv;
612 
613     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
614     if (bdrv_is_sg(bs))
615         return 0;
616 
617     /* query actual device if possible, otherwise just trust the hint */
618     if (drv->bdrv_getlength) {
619         int64_t length = drv->bdrv_getlength(bs);
620         if (length < 0) {
621             return length;
622         }
623         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
624     }
625 
626     bs->total_sectors = hint;
627     return 0;
628 }
629 
630 /**
631  * Combines a QDict of new block driver @options with any missing options taken
632  * from @old_options, so that leaving out an option defaults to its old value.
633  */
634 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
635                               QDict *old_options)
636 {
637     if (bs->drv && bs->drv->bdrv_join_options) {
638         bs->drv->bdrv_join_options(options, old_options);
639     } else {
640         qdict_join(options, old_options, false);
641     }
642 }
643 
644 /**
645  * Set open flags for a given discard mode
646  *
647  * Return 0 on success, -1 if the discard mode was invalid.
648  */
649 int bdrv_parse_discard_flags(const char *mode, int *flags)
650 {
651     *flags &= ~BDRV_O_UNMAP;
652 
653     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
654         /* do nothing */
655     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
656         *flags |= BDRV_O_UNMAP;
657     } else {
658         return -1;
659     }
660 
661     return 0;
662 }
663 
664 /**
665  * Set open flags for a given cache mode
666  *
667  * Return 0 on success, -1 if the cache mode was invalid.
668  */
669 int bdrv_parse_cache_flags(const char *mode, int *flags)
670 {
671     *flags &= ~BDRV_O_CACHE_MASK;
672 
673     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
674         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
675     } else if (!strcmp(mode, "directsync")) {
676         *flags |= BDRV_O_NOCACHE;
677     } else if (!strcmp(mode, "writeback")) {
678         *flags |= BDRV_O_CACHE_WB;
679     } else if (!strcmp(mode, "unsafe")) {
680         *flags |= BDRV_O_CACHE_WB;
681         *flags |= BDRV_O_NO_FLUSH;
682     } else if (!strcmp(mode, "writethrough")) {
683         /* this is the default */
684     } else {
685         return -1;
686     }
687 
688     return 0;
689 }
690 
691 /*
692  * Returns the flags that a temporary snapshot should get, based on the
693  * originally requested flags (the originally requested image will have flags
694  * like a backing file)
695  */
696 static int bdrv_temp_snapshot_flags(int flags)
697 {
698     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
699 }
700 
701 /*
702  * Returns the options and flags that bs->file should get if a protocol driver
703  * is expected, based on the given options and flags for the parent BDS
704  */
705 static void bdrv_inherited_options(int *child_flags, QDict *child_options,
706                                    int parent_flags, QDict *parent_options)
707 {
708     int flags = parent_flags;
709 
710     /* Enable protocol handling, disable format probing for bs->file */
711     flags |= BDRV_O_PROTOCOL;
712 
713     /* If the cache mode isn't explicitly set, inherit direct and no-flush from
714      * the parent. */
715     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
716     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
717 
718     /* Our block drivers take care to send flushes and respect unmap policy,
719      * so we can default to enable both on lower layers regardless of the
720      * corresponding parent options. */
721     qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
722     flags |= BDRV_O_UNMAP;
723 
724     /* Clear flags that only apply to the top layer */
725     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
726 
727     *child_flags = flags;
728 }
729 
730 const BdrvChildRole child_file = {
731     .inherit_options = bdrv_inherited_options,
732 };
733 
734 /*
735  * Returns the options and flags that bs->file should get if the use of formats
736  * (and not only protocols) is permitted for it, based on the given options and
737  * flags for the parent BDS
738  */
739 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
740                                        int parent_flags, QDict *parent_options)
741 {
742     child_file.inherit_options(child_flags, child_options,
743                                parent_flags, parent_options);
744 
745     *child_flags &= ~BDRV_O_PROTOCOL;
746 }
747 
748 const BdrvChildRole child_format = {
749     .inherit_options = bdrv_inherited_fmt_options,
750 };
751 
752 /*
753  * Returns the options and flags that bs->backing should get, based on the
754  * given options and flags for the parent BDS
755  */
756 static void bdrv_backing_options(int *child_flags, QDict *child_options,
757                                  int parent_flags, QDict *parent_options)
758 {
759     int flags = parent_flags;
760 
761     /* The cache mode is inherited unmodified for backing files */
762     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_WB);
763     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
764     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
765 
766     /* backing files always opened read-only */
767     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
768 
769     /* snapshot=on is handled on the top layer */
770     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
771 
772     *child_flags = flags;
773 }
774 
775 static const BdrvChildRole child_backing = {
776     .inherit_options = bdrv_backing_options,
777 };
778 
779 static int bdrv_open_flags(BlockDriverState *bs, int flags)
780 {
781     int open_flags = flags | BDRV_O_CACHE_WB;
782 
783     /*
784      * Clear flags that are internal to the block layer before opening the
785      * image.
786      */
787     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
788 
789     /*
790      * Snapshots should be writable.
791      */
792     if (flags & BDRV_O_TEMPORARY) {
793         open_flags |= BDRV_O_RDWR;
794     }
795 
796     return open_flags;
797 }
798 
799 static void update_flags_from_options(int *flags, QemuOpts *opts)
800 {
801     *flags &= ~BDRV_O_CACHE_MASK;
802 
803     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_WB));
804     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, false)) {
805         *flags |= BDRV_O_CACHE_WB;
806     }
807 
808     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
809     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
810         *flags |= BDRV_O_NO_FLUSH;
811     }
812 
813     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
814     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
815         *flags |= BDRV_O_NOCACHE;
816     }
817 }
818 
819 static void update_options_from_flags(QDict *options, int flags)
820 {
821     if (!qdict_haskey(options, BDRV_OPT_CACHE_WB)) {
822         qdict_put(options, BDRV_OPT_CACHE_WB,
823                   qbool_from_bool(flags & BDRV_O_CACHE_WB));
824     }
825     if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
826         qdict_put(options, BDRV_OPT_CACHE_DIRECT,
827                   qbool_from_bool(flags & BDRV_O_NOCACHE));
828     }
829     if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
830         qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
831                   qbool_from_bool(flags & BDRV_O_NO_FLUSH));
832     }
833 }
834 
835 static void bdrv_assign_node_name(BlockDriverState *bs,
836                                   const char *node_name,
837                                   Error **errp)
838 {
839     char *gen_node_name = NULL;
840 
841     if (!node_name) {
842         node_name = gen_node_name = id_generate(ID_BLOCK);
843     } else if (!id_wellformed(node_name)) {
844         /*
845          * Check for empty string or invalid characters, but not if it is
846          * generated (generated names use characters not available to the user)
847          */
848         error_setg(errp, "Invalid node name");
849         return;
850     }
851 
852     /* takes care of avoiding namespaces collisions */
853     if (blk_by_name(node_name)) {
854         error_setg(errp, "node-name=%s is conflicting with a device id",
855                    node_name);
856         goto out;
857     }
858 
859     /* takes care of avoiding duplicates node names */
860     if (bdrv_find_node(node_name)) {
861         error_setg(errp, "Duplicate node name");
862         goto out;
863     }
864 
865     /* copy node name into the bs and insert it into the graph list */
866     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
867     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
868 out:
869     g_free(gen_node_name);
870 }
871 
872 static QemuOptsList bdrv_runtime_opts = {
873     .name = "bdrv_common",
874     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
875     .desc = {
876         {
877             .name = "node-name",
878             .type = QEMU_OPT_STRING,
879             .help = "Node name of the block device node",
880         },
881         {
882             .name = "driver",
883             .type = QEMU_OPT_STRING,
884             .help = "Block driver to use for the node",
885         },
886         {
887             .name = BDRV_OPT_CACHE_WB,
888             .type = QEMU_OPT_BOOL,
889             .help = "Enable writeback mode",
890         },
891         {
892             .name = BDRV_OPT_CACHE_DIRECT,
893             .type = QEMU_OPT_BOOL,
894             .help = "Bypass software writeback cache on the host",
895         },
896         {
897             .name = BDRV_OPT_CACHE_NO_FLUSH,
898             .type = QEMU_OPT_BOOL,
899             .help = "Ignore flush requests",
900         },
901         { /* end of list */ }
902     },
903 };
904 
905 /*
906  * Common part for opening disk images and files
907  *
908  * Removes all processed options from *options.
909  */
910 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
911                             QDict *options, Error **errp)
912 {
913     int ret, open_flags;
914     const char *filename;
915     const char *driver_name = NULL;
916     const char *node_name = NULL;
917     QemuOpts *opts;
918     BlockDriver *drv;
919     Error *local_err = NULL;
920 
921     assert(bs->file == NULL);
922     assert(options != NULL && bs->options != options);
923 
924     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
925     qemu_opts_absorb_qdict(opts, options, &local_err);
926     if (local_err) {
927         error_propagate(errp, local_err);
928         ret = -EINVAL;
929         goto fail_opts;
930     }
931 
932     driver_name = qemu_opt_get(opts, "driver");
933     drv = bdrv_find_format(driver_name);
934     assert(drv != NULL);
935 
936     if (file != NULL) {
937         filename = file->bs->filename;
938     } else {
939         filename = qdict_get_try_str(options, "filename");
940     }
941 
942     if (drv->bdrv_needs_filename && !filename) {
943         error_setg(errp, "The '%s' block driver requires a file name",
944                    drv->format_name);
945         ret = -EINVAL;
946         goto fail_opts;
947     }
948 
949     trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
950                            drv->format_name);
951 
952     node_name = qemu_opt_get(opts, "node-name");
953     bdrv_assign_node_name(bs, node_name, &local_err);
954     if (local_err) {
955         error_propagate(errp, local_err);
956         ret = -EINVAL;
957         goto fail_opts;
958     }
959 
960     bs->request_alignment = 512;
961     bs->zero_beyond_eof = true;
962     bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
963 
964     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
965         error_setg(errp,
966                    !bs->read_only && bdrv_is_whitelisted(drv, true)
967                         ? "Driver '%s' can only be used for read-only devices"
968                         : "Driver '%s' is not whitelisted",
969                    drv->format_name);
970         ret = -ENOTSUP;
971         goto fail_opts;
972     }
973 
974     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
975     if (bs->open_flags & BDRV_O_COPY_ON_READ) {
976         if (!bs->read_only) {
977             bdrv_enable_copy_on_read(bs);
978         } else {
979             error_setg(errp, "Can't use copy-on-read on read-only device");
980             ret = -EINVAL;
981             goto fail_opts;
982         }
983     }
984 
985     if (filename != NULL) {
986         pstrcpy(bs->filename, sizeof(bs->filename), filename);
987     } else {
988         bs->filename[0] = '\0';
989     }
990     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
991 
992     bs->drv = drv;
993     bs->opaque = g_malloc0(drv->instance_size);
994 
995     /* Apply cache mode options */
996     update_flags_from_options(&bs->open_flags, opts);
997     bdrv_set_enable_write_cache(bs, bs->open_flags & BDRV_O_CACHE_WB);
998 
999     /* Open the image, either directly or using a protocol */
1000     open_flags = bdrv_open_flags(bs, bs->open_flags);
1001     if (drv->bdrv_file_open) {
1002         assert(file == NULL);
1003         assert(!drv->bdrv_needs_filename || filename != NULL);
1004         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1005     } else {
1006         if (file == NULL) {
1007             error_setg(errp, "Can't use '%s' as a block driver for the "
1008                        "protocol level", drv->format_name);
1009             ret = -EINVAL;
1010             goto free_and_fail;
1011         }
1012         bs->file = file;
1013         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
1014     }
1015 
1016     if (ret < 0) {
1017         if (local_err) {
1018             error_propagate(errp, local_err);
1019         } else if (bs->filename[0]) {
1020             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1021         } else {
1022             error_setg_errno(errp, -ret, "Could not open image");
1023         }
1024         goto free_and_fail;
1025     }
1026 
1027     if (bs->encrypted) {
1028         error_report("Encrypted images are deprecated");
1029         error_printf("Support for them will be removed in a future release.\n"
1030                      "You can use 'qemu-img convert' to convert your image"
1031                      " to an unencrypted one.\n");
1032     }
1033 
1034     ret = refresh_total_sectors(bs, bs->total_sectors);
1035     if (ret < 0) {
1036         error_setg_errno(errp, -ret, "Could not refresh total sector count");
1037         goto free_and_fail;
1038     }
1039 
1040     bdrv_refresh_limits(bs, &local_err);
1041     if (local_err) {
1042         error_propagate(errp, local_err);
1043         ret = -EINVAL;
1044         goto free_and_fail;
1045     }
1046 
1047     assert(bdrv_opt_mem_align(bs) != 0);
1048     assert(bdrv_min_mem_align(bs) != 0);
1049     assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
1050 
1051     qemu_opts_del(opts);
1052     return 0;
1053 
1054 free_and_fail:
1055     bs->file = NULL;
1056     g_free(bs->opaque);
1057     bs->opaque = NULL;
1058     bs->drv = NULL;
1059 fail_opts:
1060     qemu_opts_del(opts);
1061     return ret;
1062 }
1063 
1064 static QDict *parse_json_filename(const char *filename, Error **errp)
1065 {
1066     QObject *options_obj;
1067     QDict *options;
1068     int ret;
1069 
1070     ret = strstart(filename, "json:", &filename);
1071     assert(ret);
1072 
1073     options_obj = qobject_from_json(filename);
1074     if (!options_obj) {
1075         error_setg(errp, "Could not parse the JSON options");
1076         return NULL;
1077     }
1078 
1079     if (qobject_type(options_obj) != QTYPE_QDICT) {
1080         qobject_decref(options_obj);
1081         error_setg(errp, "Invalid JSON object given");
1082         return NULL;
1083     }
1084 
1085     options = qobject_to_qdict(options_obj);
1086     qdict_flatten(options);
1087 
1088     return options;
1089 }
1090 
1091 static void parse_json_protocol(QDict *options, const char **pfilename,
1092                                 Error **errp)
1093 {
1094     QDict *json_options;
1095     Error *local_err = NULL;
1096 
1097     /* Parse json: pseudo-protocol */
1098     if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1099         return;
1100     }
1101 
1102     json_options = parse_json_filename(*pfilename, &local_err);
1103     if (local_err) {
1104         error_propagate(errp, local_err);
1105         return;
1106     }
1107 
1108     /* Options given in the filename have lower priority than options
1109      * specified directly */
1110     qdict_join(options, json_options, false);
1111     QDECREF(json_options);
1112     *pfilename = NULL;
1113 }
1114 
1115 /*
1116  * Fills in default options for opening images and converts the legacy
1117  * filename/flags pair to option QDict entries.
1118  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1119  * block driver has been specified explicitly.
1120  */
1121 static int bdrv_fill_options(QDict **options, const char *filename,
1122                              int *flags, Error **errp)
1123 {
1124     const char *drvname;
1125     bool protocol = *flags & BDRV_O_PROTOCOL;
1126     bool parse_filename = false;
1127     BlockDriver *drv = NULL;
1128     Error *local_err = NULL;
1129 
1130     drvname = qdict_get_try_str(*options, "driver");
1131     if (drvname) {
1132         drv = bdrv_find_format(drvname);
1133         if (!drv) {
1134             error_setg(errp, "Unknown driver '%s'", drvname);
1135             return -ENOENT;
1136         }
1137         /* If the user has explicitly specified the driver, this choice should
1138          * override the BDRV_O_PROTOCOL flag */
1139         protocol = drv->bdrv_file_open;
1140     }
1141 
1142     if (protocol) {
1143         *flags |= BDRV_O_PROTOCOL;
1144     } else {
1145         *flags &= ~BDRV_O_PROTOCOL;
1146     }
1147 
1148     /* Translate cache options from flags into options */
1149     update_options_from_flags(*options, *flags);
1150 
1151     /* Fetch the file name from the options QDict if necessary */
1152     if (protocol && filename) {
1153         if (!qdict_haskey(*options, "filename")) {
1154             qdict_put(*options, "filename", qstring_from_str(filename));
1155             parse_filename = true;
1156         } else {
1157             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1158                              "the same time");
1159             return -EINVAL;
1160         }
1161     }
1162 
1163     /* Find the right block driver */
1164     filename = qdict_get_try_str(*options, "filename");
1165 
1166     if (!drvname && protocol) {
1167         if (filename) {
1168             drv = bdrv_find_protocol(filename, parse_filename, errp);
1169             if (!drv) {
1170                 return -EINVAL;
1171             }
1172 
1173             drvname = drv->format_name;
1174             qdict_put(*options, "driver", qstring_from_str(drvname));
1175         } else {
1176             error_setg(errp, "Must specify either driver or file");
1177             return -EINVAL;
1178         }
1179     }
1180 
1181     assert(drv || !protocol);
1182 
1183     /* Driver-specific filename parsing */
1184     if (drv && drv->bdrv_parse_filename && parse_filename) {
1185         drv->bdrv_parse_filename(filename, *options, &local_err);
1186         if (local_err) {
1187             error_propagate(errp, local_err);
1188             return -EINVAL;
1189         }
1190 
1191         if (!drv->bdrv_needs_filename) {
1192             qdict_del(*options, "filename");
1193         }
1194     }
1195 
1196     if (runstate_check(RUN_STATE_INMIGRATE)) {
1197         *flags |= BDRV_O_INACTIVE;
1198     }
1199 
1200     return 0;
1201 }
1202 
1203 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1204                                     BlockDriverState *child_bs,
1205                                     const char *child_name,
1206                                     const BdrvChildRole *child_role)
1207 {
1208     BdrvChild *child = g_new(BdrvChild, 1);
1209     *child = (BdrvChild) {
1210         .bs     = child_bs,
1211         .name   = g_strdup(child_name),
1212         .role   = child_role,
1213     };
1214 
1215     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1216     QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1217 
1218     return child;
1219 }
1220 
1221 static void bdrv_detach_child(BdrvChild *child)
1222 {
1223     QLIST_REMOVE(child, next);
1224     QLIST_REMOVE(child, next_parent);
1225     g_free(child->name);
1226     g_free(child);
1227 }
1228 
1229 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1230 {
1231     BlockDriverState *child_bs;
1232 
1233     if (child == NULL) {
1234         return;
1235     }
1236 
1237     if (child->bs->inherits_from == parent) {
1238         child->bs->inherits_from = NULL;
1239     }
1240 
1241     child_bs = child->bs;
1242     bdrv_detach_child(child);
1243     bdrv_unref(child_bs);
1244 }
1245 
1246 /*
1247  * Sets the backing file link of a BDS. A new reference is created; callers
1248  * which don't need their own reference any more must call bdrv_unref().
1249  */
1250 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1251 {
1252     if (backing_hd) {
1253         bdrv_ref(backing_hd);
1254     }
1255 
1256     if (bs->backing) {
1257         assert(bs->backing_blocker);
1258         bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1259         bdrv_unref_child(bs, bs->backing);
1260     } else if (backing_hd) {
1261         error_setg(&bs->backing_blocker,
1262                    "node is used as backing hd of '%s'",
1263                    bdrv_get_device_or_node_name(bs));
1264     }
1265 
1266     if (!backing_hd) {
1267         error_free(bs->backing_blocker);
1268         bs->backing_blocker = NULL;
1269         bs->backing = NULL;
1270         goto out;
1271     }
1272     bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
1273     bs->open_flags &= ~BDRV_O_NO_BACKING;
1274     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1275     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1276             backing_hd->drv ? backing_hd->drv->format_name : "");
1277 
1278     bdrv_op_block_all(backing_hd, bs->backing_blocker);
1279     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1280     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1281                     bs->backing_blocker);
1282 out:
1283     bdrv_refresh_limits(bs, NULL);
1284 }
1285 
1286 /*
1287  * Opens the backing file for a BlockDriverState if not yet open
1288  *
1289  * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1290  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1291  * itself, all options starting with "${bdref_key}." are considered part of the
1292  * BlockdevRef.
1293  *
1294  * TODO Can this be unified with bdrv_open_image()?
1295  */
1296 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1297                            const char *bdref_key, Error **errp)
1298 {
1299     char *backing_filename = g_malloc0(PATH_MAX);
1300     char *bdref_key_dot;
1301     const char *reference = NULL;
1302     int ret = 0;
1303     BlockDriverState *backing_hd;
1304     QDict *options;
1305     QDict *tmp_parent_options = NULL;
1306     Error *local_err = NULL;
1307 
1308     if (bs->backing != NULL) {
1309         goto free_exit;
1310     }
1311 
1312     /* NULL means an empty set of options */
1313     if (parent_options == NULL) {
1314         tmp_parent_options = qdict_new();
1315         parent_options = tmp_parent_options;
1316     }
1317 
1318     bs->open_flags &= ~BDRV_O_NO_BACKING;
1319 
1320     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1321     qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1322     g_free(bdref_key_dot);
1323 
1324     reference = qdict_get_try_str(parent_options, bdref_key);
1325     if (reference || qdict_haskey(options, "file.filename")) {
1326         backing_filename[0] = '\0';
1327     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1328         QDECREF(options);
1329         goto free_exit;
1330     } else {
1331         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1332                                        &local_err);
1333         if (local_err) {
1334             ret = -EINVAL;
1335             error_propagate(errp, local_err);
1336             QDECREF(options);
1337             goto free_exit;
1338         }
1339     }
1340 
1341     if (!bs->drv || !bs->drv->supports_backing) {
1342         ret = -EINVAL;
1343         error_setg(errp, "Driver doesn't support backing files");
1344         QDECREF(options);
1345         goto free_exit;
1346     }
1347 
1348     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1349         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1350     }
1351 
1352     backing_hd = NULL;
1353     ret = bdrv_open_inherit(&backing_hd,
1354                             *backing_filename ? backing_filename : NULL,
1355                             reference, options, 0, bs, &child_backing,
1356                             errp);
1357     if (ret < 0) {
1358         bs->open_flags |= BDRV_O_NO_BACKING;
1359         error_prepend(errp, "Could not open backing file: ");
1360         goto free_exit;
1361     }
1362 
1363     /* Hook up the backing file link; drop our reference, bs owns the
1364      * backing_hd reference now */
1365     bdrv_set_backing_hd(bs, backing_hd);
1366     bdrv_unref(backing_hd);
1367 
1368     qdict_del(parent_options, bdref_key);
1369 
1370 free_exit:
1371     g_free(backing_filename);
1372     QDECREF(tmp_parent_options);
1373     return ret;
1374 }
1375 
1376 /*
1377  * Opens a disk image whose options are given as BlockdevRef in another block
1378  * device's options.
1379  *
1380  * If allow_none is true, no image will be opened if filename is false and no
1381  * BlockdevRef is given. NULL will be returned, but errp remains unset.
1382  *
1383  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1384  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1385  * itself, all options starting with "${bdref_key}." are considered part of the
1386  * BlockdevRef.
1387  *
1388  * The BlockdevRef will be removed from the options QDict.
1389  */
1390 BdrvChild *bdrv_open_child(const char *filename,
1391                            QDict *options, const char *bdref_key,
1392                            BlockDriverState* parent,
1393                            const BdrvChildRole *child_role,
1394                            bool allow_none, Error **errp)
1395 {
1396     BdrvChild *c = NULL;
1397     BlockDriverState *bs;
1398     QDict *image_options;
1399     int ret;
1400     char *bdref_key_dot;
1401     const char *reference;
1402 
1403     assert(child_role != NULL);
1404 
1405     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1406     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1407     g_free(bdref_key_dot);
1408 
1409     reference = qdict_get_try_str(options, bdref_key);
1410     if (!filename && !reference && !qdict_size(image_options)) {
1411         if (!allow_none) {
1412             error_setg(errp, "A block device must be specified for \"%s\"",
1413                        bdref_key);
1414         }
1415         QDECREF(image_options);
1416         goto done;
1417     }
1418 
1419     bs = NULL;
1420     ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1421                             parent, child_role, errp);
1422     if (ret < 0) {
1423         goto done;
1424     }
1425 
1426     c = bdrv_attach_child(parent, bs, bdref_key, child_role);
1427 
1428 done:
1429     qdict_del(options, bdref_key);
1430     return c;
1431 }
1432 
1433 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1434 {
1435     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1436     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1437     int64_t total_size;
1438     QemuOpts *opts = NULL;
1439     QDict *snapshot_options;
1440     BlockDriverState *bs_snapshot;
1441     Error *local_err = NULL;
1442     int ret;
1443 
1444     /* if snapshot, we create a temporary backing file and open it
1445        instead of opening 'filename' directly */
1446 
1447     /* Get the required size from the image */
1448     total_size = bdrv_getlength(bs);
1449     if (total_size < 0) {
1450         ret = total_size;
1451         error_setg_errno(errp, -total_size, "Could not get image size");
1452         goto out;
1453     }
1454 
1455     /* Create the temporary image */
1456     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1457     if (ret < 0) {
1458         error_setg_errno(errp, -ret, "Could not get temporary filename");
1459         goto out;
1460     }
1461 
1462     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1463                             &error_abort);
1464     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1465     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
1466     qemu_opts_del(opts);
1467     if (ret < 0) {
1468         error_prepend(errp, "Could not create temporary overlay '%s': ",
1469                       tmp_filename);
1470         goto out;
1471     }
1472 
1473     /* Prepare a new options QDict for the temporary file */
1474     snapshot_options = qdict_new();
1475     qdict_put(snapshot_options, "file.driver",
1476               qstring_from_str("file"));
1477     qdict_put(snapshot_options, "file.filename",
1478               qstring_from_str(tmp_filename));
1479     qdict_put(snapshot_options, "driver",
1480               qstring_from_str("qcow2"));
1481 
1482     bs_snapshot = bdrv_new();
1483 
1484     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1485                     flags, &local_err);
1486     if (ret < 0) {
1487         error_propagate(errp, local_err);
1488         goto out;
1489     }
1490 
1491     bdrv_append(bs_snapshot, bs);
1492 
1493 out:
1494     g_free(tmp_filename);
1495     return ret;
1496 }
1497 
1498 /*
1499  * Opens a disk image (raw, qcow2, vmdk, ...)
1500  *
1501  * options is a QDict of options to pass to the block drivers, or NULL for an
1502  * empty set of options. The reference to the QDict belongs to the block layer
1503  * after the call (even on failure), so if the caller intends to reuse the
1504  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1505  *
1506  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1507  * If it is not NULL, the referenced BDS will be reused.
1508  *
1509  * The reference parameter may be used to specify an existing block device which
1510  * should be opened. If specified, neither options nor a filename may be given,
1511  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1512  */
1513 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1514                              const char *reference, QDict *options, int flags,
1515                              BlockDriverState *parent,
1516                              const BdrvChildRole *child_role, Error **errp)
1517 {
1518     int ret;
1519     BdrvChild *file = NULL;
1520     BlockDriverState *bs;
1521     BlockDriver *drv = NULL;
1522     const char *drvname;
1523     const char *backing;
1524     Error *local_err = NULL;
1525     int snapshot_flags = 0;
1526 
1527     assert(pbs);
1528     assert(!child_role || !flags);
1529     assert(!child_role == !parent);
1530 
1531     if (reference) {
1532         bool options_non_empty = options ? qdict_size(options) : false;
1533         QDECREF(options);
1534 
1535         if (*pbs) {
1536             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1537                        "another block device");
1538             return -EINVAL;
1539         }
1540 
1541         if (filename || options_non_empty) {
1542             error_setg(errp, "Cannot reference an existing block device with "
1543                        "additional options or a new filename");
1544             return -EINVAL;
1545         }
1546 
1547         bs = bdrv_lookup_bs(reference, reference, errp);
1548         if (!bs) {
1549             return -ENODEV;
1550         }
1551         bdrv_ref(bs);
1552         *pbs = bs;
1553         return 0;
1554     }
1555 
1556     if (*pbs) {
1557         bs = *pbs;
1558     } else {
1559         bs = bdrv_new();
1560     }
1561 
1562     /* NULL means an empty set of options */
1563     if (options == NULL) {
1564         options = qdict_new();
1565     }
1566 
1567     /* json: syntax counts as explicit options, as if in the QDict */
1568     parse_json_protocol(options, &filename, &local_err);
1569     if (local_err) {
1570         ret = -EINVAL;
1571         goto fail;
1572     }
1573 
1574     bs->explicit_options = qdict_clone_shallow(options);
1575 
1576     if (child_role) {
1577         bs->inherits_from = parent;
1578         child_role->inherit_options(&flags, options,
1579                                     parent->open_flags, parent->options);
1580     }
1581 
1582     ret = bdrv_fill_options(&options, filename, &flags, &local_err);
1583     if (local_err) {
1584         goto fail;
1585     }
1586 
1587     bs->open_flags = flags;
1588     bs->options = options;
1589     options = qdict_clone_shallow(options);
1590 
1591     /* Find the right image format driver */
1592     drvname = qdict_get_try_str(options, "driver");
1593     if (drvname) {
1594         drv = bdrv_find_format(drvname);
1595         if (!drv) {
1596             error_setg(errp, "Unknown driver: '%s'", drvname);
1597             ret = -EINVAL;
1598             goto fail;
1599         }
1600     }
1601 
1602     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1603 
1604     backing = qdict_get_try_str(options, "backing");
1605     if (backing && *backing == '\0') {
1606         flags |= BDRV_O_NO_BACKING;
1607         qdict_del(options, "backing");
1608     }
1609 
1610     /* Open image file without format layer */
1611     if ((flags & BDRV_O_PROTOCOL) == 0) {
1612         if (flags & BDRV_O_RDWR) {
1613             flags |= BDRV_O_ALLOW_RDWR;
1614         }
1615         if (flags & BDRV_O_SNAPSHOT) {
1616             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1617             bdrv_backing_options(&flags, options, flags, options);
1618         }
1619 
1620         bs->open_flags = flags;
1621 
1622         file = bdrv_open_child(filename, options, "file", bs,
1623                                &child_file, true, &local_err);
1624         if (local_err) {
1625             ret = -EINVAL;
1626             goto fail;
1627         }
1628     }
1629 
1630     /* Image format probing */
1631     bs->probed = !drv;
1632     if (!drv && file) {
1633         ret = find_image_format(file->bs, filename, &drv, &local_err);
1634         if (ret < 0) {
1635             goto fail;
1636         }
1637         /*
1638          * This option update would logically belong in bdrv_fill_options(),
1639          * but we first need to open bs->file for the probing to work, while
1640          * opening bs->file already requires the (mostly) final set of options
1641          * so that cache mode etc. can be inherited.
1642          *
1643          * Adding the driver later is somewhat ugly, but it's not an option
1644          * that would ever be inherited, so it's correct. We just need to make
1645          * sure to update both bs->options (which has the full effective
1646          * options for bs) and options (which has file.* already removed).
1647          */
1648         qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1649         qdict_put(options, "driver", qstring_from_str(drv->format_name));
1650     } else if (!drv) {
1651         error_setg(errp, "Must specify either driver or file");
1652         ret = -EINVAL;
1653         goto fail;
1654     }
1655 
1656     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1657     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1658     /* file must be NULL if a protocol BDS is about to be created
1659      * (the inverse results in an error message from bdrv_open_common()) */
1660     assert(!(flags & BDRV_O_PROTOCOL) || !file);
1661 
1662     /* Open the image */
1663     ret = bdrv_open_common(bs, file, options, &local_err);
1664     if (ret < 0) {
1665         goto fail;
1666     }
1667 
1668     if (file && (bs->file != file)) {
1669         bdrv_unref_child(bs, file);
1670         file = NULL;
1671     }
1672 
1673     /* If there is a backing file, use it */
1674     if ((flags & BDRV_O_NO_BACKING) == 0) {
1675         ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
1676         if (ret < 0) {
1677             goto close_and_fail;
1678         }
1679     }
1680 
1681     bdrv_refresh_filename(bs);
1682 
1683     /* Check if any unknown options were used */
1684     if (options && (qdict_size(options) != 0)) {
1685         const QDictEntry *entry = qdict_first(options);
1686         if (flags & BDRV_O_PROTOCOL) {
1687             error_setg(errp, "Block protocol '%s' doesn't support the option "
1688                        "'%s'", drv->format_name, entry->key);
1689         } else {
1690             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1691                        "support the option '%s'", drv->format_name,
1692                        bdrv_get_device_name(bs), entry->key);
1693         }
1694 
1695         ret = -EINVAL;
1696         goto close_and_fail;
1697     }
1698 
1699     if (!bdrv_key_required(bs)) {
1700         if (bs->blk) {
1701             blk_dev_change_media_cb(bs->blk, true);
1702         }
1703     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1704                && !runstate_check(RUN_STATE_INMIGRATE)
1705                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1706         error_setg(errp,
1707                    "Guest must be stopped for opening of encrypted image");
1708         ret = -EBUSY;
1709         goto close_and_fail;
1710     }
1711 
1712     QDECREF(options);
1713     *pbs = bs;
1714 
1715     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1716      * temporary snapshot afterwards. */
1717     if (snapshot_flags) {
1718         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1719         if (local_err) {
1720             goto close_and_fail;
1721         }
1722     }
1723 
1724     return 0;
1725 
1726 fail:
1727     if (file != NULL) {
1728         bdrv_unref_child(bs, file);
1729     }
1730     QDECREF(bs->explicit_options);
1731     QDECREF(bs->options);
1732     QDECREF(options);
1733     bs->options = NULL;
1734     if (!*pbs) {
1735         /* If *pbs is NULL, a new BDS has been created in this function and
1736            needs to be freed now. Otherwise, it does not need to be closed,
1737            since it has not really been opened yet. */
1738         bdrv_unref(bs);
1739     }
1740     if (local_err) {
1741         error_propagate(errp, local_err);
1742     }
1743     return ret;
1744 
1745 close_and_fail:
1746     /* See fail path, but now the BDS has to be always closed */
1747     if (*pbs) {
1748         bdrv_close(bs);
1749     } else {
1750         bdrv_unref(bs);
1751     }
1752     QDECREF(options);
1753     if (local_err) {
1754         error_propagate(errp, local_err);
1755     }
1756     return ret;
1757 }
1758 
1759 int bdrv_open(BlockDriverState **pbs, const char *filename,
1760               const char *reference, QDict *options, int flags, Error **errp)
1761 {
1762     return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1763                              NULL, errp);
1764 }
1765 
1766 typedef struct BlockReopenQueueEntry {
1767      bool prepared;
1768      BDRVReopenState state;
1769      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1770 } BlockReopenQueueEntry;
1771 
1772 /*
1773  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1774  * reopen of multiple devices.
1775  *
1776  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1777  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1778  * be created and initialized. This newly created BlockReopenQueue should be
1779  * passed back in for subsequent calls that are intended to be of the same
1780  * atomic 'set'.
1781  *
1782  * bs is the BlockDriverState to add to the reopen queue.
1783  *
1784  * options contains the changed options for the associated bs
1785  * (the BlockReopenQueue takes ownership)
1786  *
1787  * flags contains the open flags for the associated bs
1788  *
1789  * returns a pointer to bs_queue, which is either the newly allocated
1790  * bs_queue, or the existing bs_queue being used.
1791  *
1792  */
1793 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1794                                                  BlockDriverState *bs,
1795                                                  QDict *options,
1796                                                  int flags,
1797                                                  const BdrvChildRole *role,
1798                                                  QDict *parent_options,
1799                                                  int parent_flags)
1800 {
1801     assert(bs != NULL);
1802 
1803     BlockReopenQueueEntry *bs_entry;
1804     BdrvChild *child;
1805     QDict *old_options, *explicit_options;
1806 
1807     if (bs_queue == NULL) {
1808         bs_queue = g_new0(BlockReopenQueue, 1);
1809         QSIMPLEQ_INIT(bs_queue);
1810     }
1811 
1812     if (!options) {
1813         options = qdict_new();
1814     }
1815 
1816     /*
1817      * Precedence of options:
1818      * 1. Explicitly passed in options (highest)
1819      * 2. Set in flags (only for top level)
1820      * 3. Retained from explicitly set options of bs
1821      * 4. Inherited from parent node
1822      * 5. Retained from effective options of bs
1823      */
1824 
1825     if (!parent_options) {
1826         /*
1827          * Any setting represented by flags is always updated. If the
1828          * corresponding QDict option is set, it takes precedence. Otherwise
1829          * the flag is translated into a QDict option. The old setting of bs is
1830          * not considered.
1831          */
1832         update_options_from_flags(options, flags);
1833     }
1834 
1835     /* Old explicitly set values (don't overwrite by inherited value) */
1836     old_options = qdict_clone_shallow(bs->explicit_options);
1837     bdrv_join_options(bs, options, old_options);
1838     QDECREF(old_options);
1839 
1840     explicit_options = qdict_clone_shallow(options);
1841 
1842     /* Inherit from parent node */
1843     if (parent_options) {
1844         assert(!flags);
1845         role->inherit_options(&flags, options, parent_flags, parent_options);
1846     }
1847 
1848     /* Old values are used for options that aren't set yet */
1849     old_options = qdict_clone_shallow(bs->options);
1850     bdrv_join_options(bs, options, old_options);
1851     QDECREF(old_options);
1852 
1853     /* bdrv_open() masks this flag out */
1854     flags &= ~BDRV_O_PROTOCOL;
1855 
1856     QLIST_FOREACH(child, &bs->children, next) {
1857         QDict *new_child_options;
1858         char *child_key_dot;
1859 
1860         /* reopen can only change the options of block devices that were
1861          * implicitly created and inherited options. For other (referenced)
1862          * block devices, a syntax like "backing.foo" results in an error. */
1863         if (child->bs->inherits_from != bs) {
1864             continue;
1865         }
1866 
1867         child_key_dot = g_strdup_printf("%s.", child->name);
1868         qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1869         g_free(child_key_dot);
1870 
1871         bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1872                                 child->role, options, flags);
1873     }
1874 
1875     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1876     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1877 
1878     bs_entry->state.bs = bs;
1879     bs_entry->state.options = options;
1880     bs_entry->state.explicit_options = explicit_options;
1881     bs_entry->state.flags = flags;
1882 
1883     return bs_queue;
1884 }
1885 
1886 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1887                                     BlockDriverState *bs,
1888                                     QDict *options, int flags)
1889 {
1890     return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1891                                    NULL, NULL, 0);
1892 }
1893 
1894 /*
1895  * Reopen multiple BlockDriverStates atomically & transactionally.
1896  *
1897  * The queue passed in (bs_queue) must have been built up previous
1898  * via bdrv_reopen_queue().
1899  *
1900  * Reopens all BDS specified in the queue, with the appropriate
1901  * flags.  All devices are prepared for reopen, and failure of any
1902  * device will cause all device changes to be abandonded, and intermediate
1903  * data cleaned up.
1904  *
1905  * If all devices prepare successfully, then the changes are committed
1906  * to all devices.
1907  *
1908  */
1909 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1910 {
1911     int ret = -1;
1912     BlockReopenQueueEntry *bs_entry, *next;
1913     Error *local_err = NULL;
1914 
1915     assert(bs_queue != NULL);
1916 
1917     bdrv_drain_all();
1918 
1919     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1920         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1921             error_propagate(errp, local_err);
1922             goto cleanup;
1923         }
1924         bs_entry->prepared = true;
1925     }
1926 
1927     /* If we reach this point, we have success and just need to apply the
1928      * changes
1929      */
1930     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1931         bdrv_reopen_commit(&bs_entry->state);
1932     }
1933 
1934     ret = 0;
1935 
1936 cleanup:
1937     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1938         if (ret && bs_entry->prepared) {
1939             bdrv_reopen_abort(&bs_entry->state);
1940         } else if (ret) {
1941             QDECREF(bs_entry->state.explicit_options);
1942         }
1943         QDECREF(bs_entry->state.options);
1944         g_free(bs_entry);
1945     }
1946     g_free(bs_queue);
1947     return ret;
1948 }
1949 
1950 
1951 /* Reopen a single BlockDriverState with the specified flags. */
1952 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1953 {
1954     int ret = -1;
1955     Error *local_err = NULL;
1956     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1957 
1958     ret = bdrv_reopen_multiple(queue, &local_err);
1959     if (local_err != NULL) {
1960         error_propagate(errp, local_err);
1961     }
1962     return ret;
1963 }
1964 
1965 
1966 /*
1967  * Prepares a BlockDriverState for reopen. All changes are staged in the
1968  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1969  * the block driver layer .bdrv_reopen_prepare()
1970  *
1971  * bs is the BlockDriverState to reopen
1972  * flags are the new open flags
1973  * queue is the reopen queue
1974  *
1975  * Returns 0 on success, non-zero on error.  On error errp will be set
1976  * as well.
1977  *
1978  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1979  * It is the responsibility of the caller to then call the abort() or
1980  * commit() for any other BDS that have been left in a prepare() state
1981  *
1982  */
1983 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1984                         Error **errp)
1985 {
1986     int ret = -1;
1987     Error *local_err = NULL;
1988     BlockDriver *drv;
1989     QemuOpts *opts;
1990     const char *value;
1991 
1992     assert(reopen_state != NULL);
1993     assert(reopen_state->bs->drv != NULL);
1994     drv = reopen_state->bs->drv;
1995 
1996     /* Process generic block layer options */
1997     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1998     qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
1999     if (local_err) {
2000         error_propagate(errp, local_err);
2001         ret = -EINVAL;
2002         goto error;
2003     }
2004 
2005     update_flags_from_options(&reopen_state->flags, opts);
2006 
2007     /* If a guest device is attached, it owns WCE */
2008     if (reopen_state->bs->blk && blk_get_attached_dev(reopen_state->bs->blk)) {
2009         bool old_wce = bdrv_enable_write_cache(reopen_state->bs);
2010         bool new_wce = (reopen_state->flags & BDRV_O_CACHE_WB);
2011         if (old_wce != new_wce) {
2012             error_setg(errp, "Cannot change cache.writeback: Device attached");
2013             ret = -EINVAL;
2014             goto error;
2015         }
2016     }
2017 
2018     /* node-name and driver must be unchanged. Put them back into the QDict, so
2019      * that they are checked at the end of this function. */
2020     value = qemu_opt_get(opts, "node-name");
2021     if (value) {
2022         qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
2023     }
2024 
2025     value = qemu_opt_get(opts, "driver");
2026     if (value) {
2027         qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2028     }
2029 
2030     /* if we are to stay read-only, do not allow permission change
2031      * to r/w */
2032     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2033         reopen_state->flags & BDRV_O_RDWR) {
2034         error_setg(errp, "Node '%s' is read only",
2035                    bdrv_get_device_or_node_name(reopen_state->bs));
2036         goto error;
2037     }
2038 
2039 
2040     ret = bdrv_flush(reopen_state->bs);
2041     if (ret) {
2042         error_setg_errno(errp, -ret, "Error flushing drive");
2043         goto error;
2044     }
2045 
2046     if (drv->bdrv_reopen_prepare) {
2047         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2048         if (ret) {
2049             if (local_err != NULL) {
2050                 error_propagate(errp, local_err);
2051             } else {
2052                 error_setg(errp, "failed while preparing to reopen image '%s'",
2053                            reopen_state->bs->filename);
2054             }
2055             goto error;
2056         }
2057     } else {
2058         /* It is currently mandatory to have a bdrv_reopen_prepare()
2059          * handler for each supported drv. */
2060         error_setg(errp, "Block format '%s' used by node '%s' "
2061                    "does not support reopening files", drv->format_name,
2062                    bdrv_get_device_or_node_name(reopen_state->bs));
2063         ret = -1;
2064         goto error;
2065     }
2066 
2067     /* Options that are not handled are only okay if they are unchanged
2068      * compared to the old state. It is expected that some options are only
2069      * used for the initial open, but not reopen (e.g. filename) */
2070     if (qdict_size(reopen_state->options)) {
2071         const QDictEntry *entry = qdict_first(reopen_state->options);
2072 
2073         do {
2074             QString *new_obj = qobject_to_qstring(entry->value);
2075             const char *new = qstring_get_str(new_obj);
2076             const char *old = qdict_get_try_str(reopen_state->bs->options,
2077                                                 entry->key);
2078 
2079             if (!old || strcmp(new, old)) {
2080                 error_setg(errp, "Cannot change the option '%s'", entry->key);
2081                 ret = -EINVAL;
2082                 goto error;
2083             }
2084         } while ((entry = qdict_next(reopen_state->options, entry)));
2085     }
2086 
2087     ret = 0;
2088 
2089 error:
2090     qemu_opts_del(opts);
2091     return ret;
2092 }
2093 
2094 /*
2095  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2096  * makes them final by swapping the staging BlockDriverState contents into
2097  * the active BlockDriverState contents.
2098  */
2099 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2100 {
2101     BlockDriver *drv;
2102 
2103     assert(reopen_state != NULL);
2104     drv = reopen_state->bs->drv;
2105     assert(drv != NULL);
2106 
2107     /* If there are any driver level actions to take */
2108     if (drv->bdrv_reopen_commit) {
2109         drv->bdrv_reopen_commit(reopen_state);
2110     }
2111 
2112     /* set BDS specific flags now */
2113     QDECREF(reopen_state->bs->explicit_options);
2114 
2115     reopen_state->bs->explicit_options   = reopen_state->explicit_options;
2116     reopen_state->bs->open_flags         = reopen_state->flags;
2117     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
2118                                               BDRV_O_CACHE_WB);
2119     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
2120 
2121     bdrv_refresh_limits(reopen_state->bs, NULL);
2122 }
2123 
2124 /*
2125  * Abort the reopen, and delete and free the staged changes in
2126  * reopen_state
2127  */
2128 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2129 {
2130     BlockDriver *drv;
2131 
2132     assert(reopen_state != NULL);
2133     drv = reopen_state->bs->drv;
2134     assert(drv != NULL);
2135 
2136     if (drv->bdrv_reopen_abort) {
2137         drv->bdrv_reopen_abort(reopen_state);
2138     }
2139 
2140     QDECREF(reopen_state->explicit_options);
2141 }
2142 
2143 
2144 static void bdrv_close(BlockDriverState *bs)
2145 {
2146     BdrvAioNotifier *ban, *ban_next;
2147 
2148     if (bs->job) {
2149         block_job_cancel_sync(bs->job);
2150     }
2151 
2152     /* Disable I/O limits and drain all pending throttled requests */
2153     if (bs->throttle_state) {
2154         bdrv_io_limits_disable(bs);
2155     }
2156 
2157     bdrv_drained_begin(bs); /* complete I/O */
2158     bdrv_flush(bs);
2159     bdrv_drain(bs); /* in case flush left pending I/O */
2160 
2161     bdrv_release_named_dirty_bitmaps(bs);
2162     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2163 
2164     if (bs->blk) {
2165         blk_dev_change_media_cb(bs->blk, false);
2166     }
2167 
2168     if (bs->drv) {
2169         BdrvChild *child, *next;
2170 
2171         bs->drv->bdrv_close(bs);
2172         bs->drv = NULL;
2173 
2174         bdrv_set_backing_hd(bs, NULL);
2175 
2176         if (bs->file != NULL) {
2177             bdrv_unref_child(bs, bs->file);
2178             bs->file = NULL;
2179         }
2180 
2181         QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
2182             /* TODO Remove bdrv_unref() from drivers' close function and use
2183              * bdrv_unref_child() here */
2184             if (child->bs->inherits_from == bs) {
2185                 child->bs->inherits_from = NULL;
2186             }
2187             bdrv_detach_child(child);
2188         }
2189 
2190         g_free(bs->opaque);
2191         bs->opaque = NULL;
2192         bs->copy_on_read = 0;
2193         bs->backing_file[0] = '\0';
2194         bs->backing_format[0] = '\0';
2195         bs->total_sectors = 0;
2196         bs->encrypted = 0;
2197         bs->valid_key = 0;
2198         bs->sg = 0;
2199         bs->zero_beyond_eof = false;
2200         QDECREF(bs->options);
2201         QDECREF(bs->explicit_options);
2202         bs->options = NULL;
2203         QDECREF(bs->full_open_options);
2204         bs->full_open_options = NULL;
2205     }
2206 
2207     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2208         g_free(ban);
2209     }
2210     QLIST_INIT(&bs->aio_notifiers);
2211     bdrv_drained_end(bs);
2212 }
2213 
2214 void bdrv_close_all(void)
2215 {
2216     BlockDriverState *bs;
2217 
2218     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2219         AioContext *aio_context = bdrv_get_aio_context(bs);
2220 
2221         aio_context_acquire(aio_context);
2222         bdrv_close(bs);
2223         aio_context_release(aio_context);
2224     }
2225 }
2226 
2227 /* make a BlockDriverState anonymous by removing from bdrv_state and
2228  * graph_bdrv_state list.
2229    Also, NULL terminate the device_name to prevent double remove */
2230 void bdrv_make_anon(BlockDriverState *bs)
2231 {
2232     /*
2233      * Take care to remove bs from bdrv_states only when it's actually
2234      * in it.  Note that bs->device_list.tqe_prev is initially null,
2235      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
2236      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2237      * resetting it to null on remove.
2238      */
2239     if (bs->device_list.tqe_prev) {
2240         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
2241         bs->device_list.tqe_prev = NULL;
2242     }
2243     if (bs->node_name[0] != '\0') {
2244         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2245     }
2246     bs->node_name[0] = '\0';
2247 }
2248 
2249 /* Fields that need to stay with the top-level BDS */
2250 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2251                                      BlockDriverState *bs_src)
2252 {
2253     /* move some fields that need to stay attached to the device */
2254 
2255     /* dev info */
2256     bs_dest->copy_on_read       = bs_src->copy_on_read;
2257 
2258     bs_dest->enable_write_cache = bs_src->enable_write_cache;
2259 
2260     /* dirty bitmap */
2261     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
2262 }
2263 
2264 static void change_parent_backing_link(BlockDriverState *from,
2265                                        BlockDriverState *to)
2266 {
2267     BdrvChild *c, *next;
2268 
2269     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2270         assert(c->role != &child_backing);
2271         c->bs = to;
2272         QLIST_REMOVE(c, next_parent);
2273         QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2274         bdrv_ref(to);
2275         bdrv_unref(from);
2276     }
2277     if (from->blk) {
2278         blk_set_bs(from->blk, to);
2279         if (!to->device_list.tqe_prev) {
2280             QTAILQ_INSERT_BEFORE(from, to, device_list);
2281         }
2282         QTAILQ_REMOVE(&bdrv_states, from, device_list);
2283     }
2284 }
2285 
2286 static void swap_feature_fields(BlockDriverState *bs_top,
2287                                 BlockDriverState *bs_new)
2288 {
2289     BlockDriverState tmp;
2290 
2291     bdrv_move_feature_fields(&tmp, bs_top);
2292     bdrv_move_feature_fields(bs_top, bs_new);
2293     bdrv_move_feature_fields(bs_new, &tmp);
2294 
2295     assert(!bs_new->throttle_state);
2296     if (bs_top->throttle_state) {
2297         assert(bs_top->io_limits_enabled);
2298         bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2299         bdrv_io_limits_disable(bs_top);
2300     }
2301 }
2302 
2303 /*
2304  * Add new bs contents at the top of an image chain while the chain is
2305  * live, while keeping required fields on the top layer.
2306  *
2307  * This will modify the BlockDriverState fields, and swap contents
2308  * between bs_new and bs_top. Both bs_new and bs_top are modified.
2309  *
2310  * bs_new must not be attached to a BlockBackend.
2311  *
2312  * This function does not create any image files.
2313  *
2314  * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2315  * that's what the callers commonly need. bs_new will be referenced by the old
2316  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2317  * reference of its own, it must call bdrv_ref().
2318  */
2319 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2320 {
2321     assert(!bdrv_requests_pending(bs_top));
2322     assert(!bdrv_requests_pending(bs_new));
2323 
2324     bdrv_ref(bs_top);
2325     change_parent_backing_link(bs_top, bs_new);
2326 
2327     /* Some fields always stay on top of the backing file chain */
2328     swap_feature_fields(bs_top, bs_new);
2329 
2330     bdrv_set_backing_hd(bs_new, bs_top);
2331     bdrv_unref(bs_top);
2332 
2333     /* bs_new is now referenced by its new parents, we don't need the
2334      * additional reference any more. */
2335     bdrv_unref(bs_new);
2336 }
2337 
2338 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2339 {
2340     assert(!bdrv_requests_pending(old));
2341     assert(!bdrv_requests_pending(new));
2342 
2343     bdrv_ref(old);
2344 
2345     if (old->blk) {
2346         /* As long as these fields aren't in BlockBackend, but in the top-level
2347          * BlockDriverState, it's not possible for a BDS to have two BBs.
2348          *
2349          * We really want to copy the fields from old to new, but we go for a
2350          * swap instead so that pointers aren't duplicated and cause trouble.
2351          * (Also, bdrv_swap() used to do the same.) */
2352         assert(!new->blk);
2353         swap_feature_fields(old, new);
2354     }
2355     change_parent_backing_link(old, new);
2356 
2357     /* Change backing files if a previously independent node is added to the
2358      * chain. For active commit, we replace top by its own (indirect) backing
2359      * file and don't do anything here so we don't build a loop. */
2360     if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2361         bdrv_set_backing_hd(new, backing_bs(old));
2362         bdrv_set_backing_hd(old, NULL);
2363     }
2364 
2365     bdrv_unref(old);
2366 }
2367 
2368 static void bdrv_delete(BlockDriverState *bs)
2369 {
2370     assert(!bs->job);
2371     assert(bdrv_op_blocker_is_empty(bs));
2372     assert(!bs->refcnt);
2373 
2374     bdrv_close(bs);
2375 
2376     /* remove from list, if necessary */
2377     bdrv_make_anon(bs);
2378 
2379     QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
2380 
2381     g_free(bs);
2382 }
2383 
2384 /*
2385  * Run consistency checks on an image
2386  *
2387  * Returns 0 if the check could be completed (it doesn't mean that the image is
2388  * free of errors) or -errno when an internal error occurred. The results of the
2389  * check are stored in res.
2390  */
2391 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2392 {
2393     if (bs->drv == NULL) {
2394         return -ENOMEDIUM;
2395     }
2396     if (bs->drv->bdrv_check == NULL) {
2397         return -ENOTSUP;
2398     }
2399 
2400     memset(res, 0, sizeof(*res));
2401     return bs->drv->bdrv_check(bs, res, fix);
2402 }
2403 
2404 #define COMMIT_BUF_SECTORS 2048
2405 
2406 /* commit COW file into the raw image */
2407 int bdrv_commit(BlockDriverState *bs)
2408 {
2409     BlockDriver *drv = bs->drv;
2410     int64_t sector, total_sectors, length, backing_length;
2411     int n, ro, open_flags;
2412     int ret = 0;
2413     uint8_t *buf = NULL;
2414 
2415     if (!drv)
2416         return -ENOMEDIUM;
2417 
2418     if (!bs->backing) {
2419         return -ENOTSUP;
2420     }
2421 
2422     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2423         bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2424         return -EBUSY;
2425     }
2426 
2427     ro = bs->backing->bs->read_only;
2428     open_flags =  bs->backing->bs->open_flags;
2429 
2430     if (ro) {
2431         if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2432             return -EACCES;
2433         }
2434     }
2435 
2436     length = bdrv_getlength(bs);
2437     if (length < 0) {
2438         ret = length;
2439         goto ro_cleanup;
2440     }
2441 
2442     backing_length = bdrv_getlength(bs->backing->bs);
2443     if (backing_length < 0) {
2444         ret = backing_length;
2445         goto ro_cleanup;
2446     }
2447 
2448     /* If our top snapshot is larger than the backing file image,
2449      * grow the backing file image if possible.  If not possible,
2450      * we must return an error */
2451     if (length > backing_length) {
2452         ret = bdrv_truncate(bs->backing->bs, length);
2453         if (ret < 0) {
2454             goto ro_cleanup;
2455         }
2456     }
2457 
2458     total_sectors = length >> BDRV_SECTOR_BITS;
2459 
2460     /* qemu_try_blockalign() for bs will choose an alignment that works for
2461      * bs->backing->bs as well, so no need to compare the alignment manually. */
2462     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2463     if (buf == NULL) {
2464         ret = -ENOMEM;
2465         goto ro_cleanup;
2466     }
2467 
2468     for (sector = 0; sector < total_sectors; sector += n) {
2469         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2470         if (ret < 0) {
2471             goto ro_cleanup;
2472         }
2473         if (ret) {
2474             ret = bdrv_read(bs, sector, buf, n);
2475             if (ret < 0) {
2476                 goto ro_cleanup;
2477             }
2478 
2479             ret = bdrv_write(bs->backing->bs, sector, buf, n);
2480             if (ret < 0) {
2481                 goto ro_cleanup;
2482             }
2483         }
2484     }
2485 
2486     if (drv->bdrv_make_empty) {
2487         ret = drv->bdrv_make_empty(bs);
2488         if (ret < 0) {
2489             goto ro_cleanup;
2490         }
2491         bdrv_flush(bs);
2492     }
2493 
2494     /*
2495      * Make sure all data we wrote to the backing device is actually
2496      * stable on disk.
2497      */
2498     if (bs->backing) {
2499         bdrv_flush(bs->backing->bs);
2500     }
2501 
2502     ret = 0;
2503 ro_cleanup:
2504     qemu_vfree(buf);
2505 
2506     if (ro) {
2507         /* ignoring error return here */
2508         bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2509     }
2510 
2511     return ret;
2512 }
2513 
2514 int bdrv_commit_all(void)
2515 {
2516     BlockDriverState *bs;
2517 
2518     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2519         AioContext *aio_context = bdrv_get_aio_context(bs);
2520 
2521         aio_context_acquire(aio_context);
2522         if (bs->drv && bs->backing) {
2523             int ret = bdrv_commit(bs);
2524             if (ret < 0) {
2525                 aio_context_release(aio_context);
2526                 return ret;
2527             }
2528         }
2529         aio_context_release(aio_context);
2530     }
2531     return 0;
2532 }
2533 
2534 /*
2535  * Return values:
2536  * 0        - success
2537  * -EINVAL  - backing format specified, but no file
2538  * -ENOSPC  - can't update the backing file because no space is left in the
2539  *            image file header
2540  * -ENOTSUP - format driver doesn't support changing the backing file
2541  */
2542 int bdrv_change_backing_file(BlockDriverState *bs,
2543     const char *backing_file, const char *backing_fmt)
2544 {
2545     BlockDriver *drv = bs->drv;
2546     int ret;
2547 
2548     /* Backing file format doesn't make sense without a backing file */
2549     if (backing_fmt && !backing_file) {
2550         return -EINVAL;
2551     }
2552 
2553     if (drv->bdrv_change_backing_file != NULL) {
2554         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2555     } else {
2556         ret = -ENOTSUP;
2557     }
2558 
2559     if (ret == 0) {
2560         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2561         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2562     }
2563     return ret;
2564 }
2565 
2566 /*
2567  * Finds the image layer in the chain that has 'bs' as its backing file.
2568  *
2569  * active is the current topmost image.
2570  *
2571  * Returns NULL if bs is not found in active's image chain,
2572  * or if active == bs.
2573  *
2574  * Returns the bottommost base image if bs == NULL.
2575  */
2576 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2577                                     BlockDriverState *bs)
2578 {
2579     while (active && bs != backing_bs(active)) {
2580         active = backing_bs(active);
2581     }
2582 
2583     return active;
2584 }
2585 
2586 /* Given a BDS, searches for the base layer. */
2587 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2588 {
2589     return bdrv_find_overlay(bs, NULL);
2590 }
2591 
2592 /*
2593  * Drops images above 'base' up to and including 'top', and sets the image
2594  * above 'top' to have base as its backing file.
2595  *
2596  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2597  * information in 'bs' can be properly updated.
2598  *
2599  * E.g., this will convert the following chain:
2600  * bottom <- base <- intermediate <- top <- active
2601  *
2602  * to
2603  *
2604  * bottom <- base <- active
2605  *
2606  * It is allowed for bottom==base, in which case it converts:
2607  *
2608  * base <- intermediate <- top <- active
2609  *
2610  * to
2611  *
2612  * base <- active
2613  *
2614  * If backing_file_str is non-NULL, it will be used when modifying top's
2615  * overlay image metadata.
2616  *
2617  * Error conditions:
2618  *  if active == top, that is considered an error
2619  *
2620  */
2621 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2622                            BlockDriverState *base, const char *backing_file_str)
2623 {
2624     BlockDriverState *new_top_bs = NULL;
2625     int ret = -EIO;
2626 
2627     if (!top->drv || !base->drv) {
2628         goto exit;
2629     }
2630 
2631     new_top_bs = bdrv_find_overlay(active, top);
2632 
2633     if (new_top_bs == NULL) {
2634         /* we could not find the image above 'top', this is an error */
2635         goto exit;
2636     }
2637 
2638     /* special case of new_top_bs->backing->bs already pointing to base - nothing
2639      * to do, no intermediate images */
2640     if (backing_bs(new_top_bs) == base) {
2641         ret = 0;
2642         goto exit;
2643     }
2644 
2645     /* Make sure that base is in the backing chain of top */
2646     if (!bdrv_chain_contains(top, base)) {
2647         goto exit;
2648     }
2649 
2650     /* success - we can delete the intermediate states, and link top->base */
2651     backing_file_str = backing_file_str ? backing_file_str : base->filename;
2652     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2653                                    base->drv ? base->drv->format_name : "");
2654     if (ret) {
2655         goto exit;
2656     }
2657     bdrv_set_backing_hd(new_top_bs, base);
2658 
2659     ret = 0;
2660 exit:
2661     return ret;
2662 }
2663 
2664 /**
2665  * Truncate file to 'offset' bytes (needed only for file protocols)
2666  */
2667 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2668 {
2669     BlockDriver *drv = bs->drv;
2670     int ret;
2671     if (!drv)
2672         return -ENOMEDIUM;
2673     if (!drv->bdrv_truncate)
2674         return -ENOTSUP;
2675     if (bs->read_only)
2676         return -EACCES;
2677 
2678     ret = drv->bdrv_truncate(bs, offset);
2679     if (ret == 0) {
2680         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2681         bdrv_dirty_bitmap_truncate(bs);
2682         if (bs->blk) {
2683             blk_dev_resize_cb(bs->blk);
2684         }
2685     }
2686     return ret;
2687 }
2688 
2689 /**
2690  * Length of a allocated file in bytes. Sparse files are counted by actual
2691  * allocated space. Return < 0 if error or unknown.
2692  */
2693 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2694 {
2695     BlockDriver *drv = bs->drv;
2696     if (!drv) {
2697         return -ENOMEDIUM;
2698     }
2699     if (drv->bdrv_get_allocated_file_size) {
2700         return drv->bdrv_get_allocated_file_size(bs);
2701     }
2702     if (bs->file) {
2703         return bdrv_get_allocated_file_size(bs->file->bs);
2704     }
2705     return -ENOTSUP;
2706 }
2707 
2708 /**
2709  * Return number of sectors on success, -errno on error.
2710  */
2711 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2712 {
2713     BlockDriver *drv = bs->drv;
2714 
2715     if (!drv)
2716         return -ENOMEDIUM;
2717 
2718     if (drv->has_variable_length) {
2719         int ret = refresh_total_sectors(bs, bs->total_sectors);
2720         if (ret < 0) {
2721             return ret;
2722         }
2723     }
2724     return bs->total_sectors;
2725 }
2726 
2727 /**
2728  * Return length in bytes on success, -errno on error.
2729  * The length is always a multiple of BDRV_SECTOR_SIZE.
2730  */
2731 int64_t bdrv_getlength(BlockDriverState *bs)
2732 {
2733     int64_t ret = bdrv_nb_sectors(bs);
2734 
2735     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2736     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2737 }
2738 
2739 /* return 0 as number of sectors if no device present or error */
2740 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2741 {
2742     int64_t nb_sectors = bdrv_nb_sectors(bs);
2743 
2744     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2745 }
2746 
2747 int bdrv_is_read_only(BlockDriverState *bs)
2748 {
2749     return bs->read_only;
2750 }
2751 
2752 int bdrv_is_sg(BlockDriverState *bs)
2753 {
2754     return bs->sg;
2755 }
2756 
2757 int bdrv_enable_write_cache(BlockDriverState *bs)
2758 {
2759     return bs->enable_write_cache;
2760 }
2761 
2762 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2763 {
2764     bs->enable_write_cache = wce;
2765 
2766     /* so a reopen() will preserve wce */
2767     if (wce) {
2768         bs->open_flags |= BDRV_O_CACHE_WB;
2769     } else {
2770         bs->open_flags &= ~BDRV_O_CACHE_WB;
2771     }
2772 }
2773 
2774 int bdrv_is_encrypted(BlockDriverState *bs)
2775 {
2776     if (bs->backing && bs->backing->bs->encrypted) {
2777         return 1;
2778     }
2779     return bs->encrypted;
2780 }
2781 
2782 int bdrv_key_required(BlockDriverState *bs)
2783 {
2784     BdrvChild *backing = bs->backing;
2785 
2786     if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2787         return 1;
2788     }
2789     return (bs->encrypted && !bs->valid_key);
2790 }
2791 
2792 int bdrv_set_key(BlockDriverState *bs, const char *key)
2793 {
2794     int ret;
2795     if (bs->backing && bs->backing->bs->encrypted) {
2796         ret = bdrv_set_key(bs->backing->bs, key);
2797         if (ret < 0)
2798             return ret;
2799         if (!bs->encrypted)
2800             return 0;
2801     }
2802     if (!bs->encrypted) {
2803         return -EINVAL;
2804     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2805         return -ENOMEDIUM;
2806     }
2807     ret = bs->drv->bdrv_set_key(bs, key);
2808     if (ret < 0) {
2809         bs->valid_key = 0;
2810     } else if (!bs->valid_key) {
2811         bs->valid_key = 1;
2812         if (bs->blk) {
2813             /* call the change callback now, we skipped it on open */
2814             blk_dev_change_media_cb(bs->blk, true);
2815         }
2816     }
2817     return ret;
2818 }
2819 
2820 /*
2821  * Provide an encryption key for @bs.
2822  * If @key is non-null:
2823  *     If @bs is not encrypted, fail.
2824  *     Else if the key is invalid, fail.
2825  *     Else set @bs's key to @key, replacing the existing key, if any.
2826  * If @key is null:
2827  *     If @bs is encrypted and still lacks a key, fail.
2828  *     Else do nothing.
2829  * On failure, store an error object through @errp if non-null.
2830  */
2831 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2832 {
2833     if (key) {
2834         if (!bdrv_is_encrypted(bs)) {
2835             error_setg(errp, "Node '%s' is not encrypted",
2836                       bdrv_get_device_or_node_name(bs));
2837         } else if (bdrv_set_key(bs, key) < 0) {
2838             error_setg(errp, QERR_INVALID_PASSWORD);
2839         }
2840     } else {
2841         if (bdrv_key_required(bs)) {
2842             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2843                       "'%s' (%s) is encrypted",
2844                       bdrv_get_device_or_node_name(bs),
2845                       bdrv_get_encrypted_filename(bs));
2846         }
2847     }
2848 }
2849 
2850 const char *bdrv_get_format_name(BlockDriverState *bs)
2851 {
2852     return bs->drv ? bs->drv->format_name : NULL;
2853 }
2854 
2855 static int qsort_strcmp(const void *a, const void *b)
2856 {
2857     return strcmp(a, b);
2858 }
2859 
2860 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2861                          void *opaque)
2862 {
2863     BlockDriver *drv;
2864     int count = 0;
2865     int i;
2866     const char **formats = NULL;
2867 
2868     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2869         if (drv->format_name) {
2870             bool found = false;
2871             int i = count;
2872             while (formats && i && !found) {
2873                 found = !strcmp(formats[--i], drv->format_name);
2874             }
2875 
2876             if (!found) {
2877                 formats = g_renew(const char *, formats, count + 1);
2878                 formats[count++] = drv->format_name;
2879             }
2880         }
2881     }
2882 
2883     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2884 
2885     for (i = 0; i < count; i++) {
2886         it(opaque, formats[i]);
2887     }
2888 
2889     g_free(formats);
2890 }
2891 
2892 /* This function is to find a node in the bs graph */
2893 BlockDriverState *bdrv_find_node(const char *node_name)
2894 {
2895     BlockDriverState *bs;
2896 
2897     assert(node_name);
2898 
2899     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2900         if (!strcmp(node_name, bs->node_name)) {
2901             return bs;
2902         }
2903     }
2904     return NULL;
2905 }
2906 
2907 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2908 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2909 {
2910     BlockDeviceInfoList *list, *entry;
2911     BlockDriverState *bs;
2912 
2913     list = NULL;
2914     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2915         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2916         if (!info) {
2917             qapi_free_BlockDeviceInfoList(list);
2918             return NULL;
2919         }
2920         entry = g_malloc0(sizeof(*entry));
2921         entry->value = info;
2922         entry->next = list;
2923         list = entry;
2924     }
2925 
2926     return list;
2927 }
2928 
2929 BlockDriverState *bdrv_lookup_bs(const char *device,
2930                                  const char *node_name,
2931                                  Error **errp)
2932 {
2933     BlockBackend *blk;
2934     BlockDriverState *bs;
2935 
2936     if (device) {
2937         blk = blk_by_name(device);
2938 
2939         if (blk) {
2940             bs = blk_bs(blk);
2941             if (!bs) {
2942                 error_setg(errp, "Device '%s' has no medium", device);
2943             }
2944 
2945             return bs;
2946         }
2947     }
2948 
2949     if (node_name) {
2950         bs = bdrv_find_node(node_name);
2951 
2952         if (bs) {
2953             return bs;
2954         }
2955     }
2956 
2957     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2958                      device ? device : "",
2959                      node_name ? node_name : "");
2960     return NULL;
2961 }
2962 
2963 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2964  * return false.  If either argument is NULL, return false. */
2965 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2966 {
2967     while (top && top != base) {
2968         top = backing_bs(top);
2969     }
2970 
2971     return top != NULL;
2972 }
2973 
2974 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2975 {
2976     if (!bs) {
2977         return QTAILQ_FIRST(&graph_bdrv_states);
2978     }
2979     return QTAILQ_NEXT(bs, node_list);
2980 }
2981 
2982 BlockDriverState *bdrv_next(BlockDriverState *bs)
2983 {
2984     if (!bs) {
2985         return QTAILQ_FIRST(&bdrv_states);
2986     }
2987     return QTAILQ_NEXT(bs, device_list);
2988 }
2989 
2990 const char *bdrv_get_node_name(const BlockDriverState *bs)
2991 {
2992     return bs->node_name;
2993 }
2994 
2995 /* TODO check what callers really want: bs->node_name or blk_name() */
2996 const char *bdrv_get_device_name(const BlockDriverState *bs)
2997 {
2998     return bs->blk ? blk_name(bs->blk) : "";
2999 }
3000 
3001 /* This can be used to identify nodes that might not have a device
3002  * name associated. Since node and device names live in the same
3003  * namespace, the result is unambiguous. The exception is if both are
3004  * absent, then this returns an empty (non-null) string. */
3005 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
3006 {
3007     return bs->blk ? blk_name(bs->blk) : bs->node_name;
3008 }
3009 
3010 int bdrv_get_flags(BlockDriverState *bs)
3011 {
3012     return bs->open_flags;
3013 }
3014 
3015 int bdrv_has_zero_init_1(BlockDriverState *bs)
3016 {
3017     return 1;
3018 }
3019 
3020 int bdrv_has_zero_init(BlockDriverState *bs)
3021 {
3022     assert(bs->drv);
3023 
3024     /* If BS is a copy on write image, it is initialized to
3025        the contents of the base image, which may not be zeroes.  */
3026     if (bs->backing) {
3027         return 0;
3028     }
3029     if (bs->drv->bdrv_has_zero_init) {
3030         return bs->drv->bdrv_has_zero_init(bs);
3031     }
3032 
3033     /* safe default */
3034     return 0;
3035 }
3036 
3037 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3038 {
3039     BlockDriverInfo bdi;
3040 
3041     if (bs->backing) {
3042         return false;
3043     }
3044 
3045     if (bdrv_get_info(bs, &bdi) == 0) {
3046         return bdi.unallocated_blocks_are_zero;
3047     }
3048 
3049     return false;
3050 }
3051 
3052 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3053 {
3054     BlockDriverInfo bdi;
3055 
3056     if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
3057         return false;
3058     }
3059 
3060     if (bdrv_get_info(bs, &bdi) == 0) {
3061         return bdi.can_write_zeroes_with_unmap;
3062     }
3063 
3064     return false;
3065 }
3066 
3067 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3068 {
3069     if (bs->backing && bs->backing->bs->encrypted)
3070         return bs->backing_file;
3071     else if (bs->encrypted)
3072         return bs->filename;
3073     else
3074         return NULL;
3075 }
3076 
3077 void bdrv_get_backing_filename(BlockDriverState *bs,
3078                                char *filename, int filename_size)
3079 {
3080     pstrcpy(filename, filename_size, bs->backing_file);
3081 }
3082 
3083 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3084 {
3085     BlockDriver *drv = bs->drv;
3086     if (!drv)
3087         return -ENOMEDIUM;
3088     if (!drv->bdrv_get_info)
3089         return -ENOTSUP;
3090     memset(bdi, 0, sizeof(*bdi));
3091     return drv->bdrv_get_info(bs, bdi);
3092 }
3093 
3094 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3095 {
3096     BlockDriver *drv = bs->drv;
3097     if (drv && drv->bdrv_get_specific_info) {
3098         return drv->bdrv_get_specific_info(bs);
3099     }
3100     return NULL;
3101 }
3102 
3103 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
3104 {
3105     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3106         return;
3107     }
3108 
3109     bs->drv->bdrv_debug_event(bs, event);
3110 }
3111 
3112 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3113                           const char *tag)
3114 {
3115     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3116         bs = bs->file ? bs->file->bs : NULL;
3117     }
3118 
3119     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3120         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3121     }
3122 
3123     return -ENOTSUP;
3124 }
3125 
3126 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3127 {
3128     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3129         bs = bs->file ? bs->file->bs : NULL;
3130     }
3131 
3132     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3133         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3134     }
3135 
3136     return -ENOTSUP;
3137 }
3138 
3139 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3140 {
3141     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3142         bs = bs->file ? bs->file->bs : NULL;
3143     }
3144 
3145     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3146         return bs->drv->bdrv_debug_resume(bs, tag);
3147     }
3148 
3149     return -ENOTSUP;
3150 }
3151 
3152 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3153 {
3154     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3155         bs = bs->file ? bs->file->bs : NULL;
3156     }
3157 
3158     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3159         return bs->drv->bdrv_debug_is_suspended(bs, tag);
3160     }
3161 
3162     return false;
3163 }
3164 
3165 int bdrv_is_snapshot(BlockDriverState *bs)
3166 {
3167     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3168 }
3169 
3170 /* backing_file can either be relative, or absolute, or a protocol.  If it is
3171  * relative, it must be relative to the chain.  So, passing in bs->filename
3172  * from a BDS as backing_file should not be done, as that may be relative to
3173  * the CWD rather than the chain. */
3174 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3175         const char *backing_file)
3176 {
3177     char *filename_full = NULL;
3178     char *backing_file_full = NULL;
3179     char *filename_tmp = NULL;
3180     int is_protocol = 0;
3181     BlockDriverState *curr_bs = NULL;
3182     BlockDriverState *retval = NULL;
3183 
3184     if (!bs || !bs->drv || !backing_file) {
3185         return NULL;
3186     }
3187 
3188     filename_full     = g_malloc(PATH_MAX);
3189     backing_file_full = g_malloc(PATH_MAX);
3190     filename_tmp      = g_malloc(PATH_MAX);
3191 
3192     is_protocol = path_has_protocol(backing_file);
3193 
3194     for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3195 
3196         /* If either of the filename paths is actually a protocol, then
3197          * compare unmodified paths; otherwise make paths relative */
3198         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3199             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3200                 retval = curr_bs->backing->bs;
3201                 break;
3202             }
3203         } else {
3204             /* If not an absolute filename path, make it relative to the current
3205              * image's filename path */
3206             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3207                          backing_file);
3208 
3209             /* We are going to compare absolute pathnames */
3210             if (!realpath(filename_tmp, filename_full)) {
3211                 continue;
3212             }
3213 
3214             /* We need to make sure the backing filename we are comparing against
3215              * is relative to the current image filename (or absolute) */
3216             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3217                          curr_bs->backing_file);
3218 
3219             if (!realpath(filename_tmp, backing_file_full)) {
3220                 continue;
3221             }
3222 
3223             if (strcmp(backing_file_full, filename_full) == 0) {
3224                 retval = curr_bs->backing->bs;
3225                 break;
3226             }
3227         }
3228     }
3229 
3230     g_free(filename_full);
3231     g_free(backing_file_full);
3232     g_free(filename_tmp);
3233     return retval;
3234 }
3235 
3236 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3237 {
3238     if (!bs->drv) {
3239         return 0;
3240     }
3241 
3242     if (!bs->backing) {
3243         return 0;
3244     }
3245 
3246     return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3247 }
3248 
3249 void bdrv_init(void)
3250 {
3251     module_call_init(MODULE_INIT_BLOCK);
3252 }
3253 
3254 void bdrv_init_with_whitelist(void)
3255 {
3256     use_bdrv_whitelist = 1;
3257     bdrv_init();
3258 }
3259 
3260 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3261 {
3262     Error *local_err = NULL;
3263     int ret;
3264 
3265     if (!bs->drv)  {
3266         return;
3267     }
3268 
3269     if (!(bs->open_flags & BDRV_O_INACTIVE)) {
3270         return;
3271     }
3272     bs->open_flags &= ~BDRV_O_INACTIVE;
3273 
3274     if (bs->drv->bdrv_invalidate_cache) {
3275         bs->drv->bdrv_invalidate_cache(bs, &local_err);
3276     } else if (bs->file) {
3277         bdrv_invalidate_cache(bs->file->bs, &local_err);
3278     }
3279     if (local_err) {
3280         bs->open_flags |= BDRV_O_INACTIVE;
3281         error_propagate(errp, local_err);
3282         return;
3283     }
3284 
3285     ret = refresh_total_sectors(bs, bs->total_sectors);
3286     if (ret < 0) {
3287         bs->open_flags |= BDRV_O_INACTIVE;
3288         error_setg_errno(errp, -ret, "Could not refresh total sector count");
3289         return;
3290     }
3291 }
3292 
3293 void bdrv_invalidate_cache_all(Error **errp)
3294 {
3295     BlockDriverState *bs;
3296     Error *local_err = NULL;
3297 
3298     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3299         AioContext *aio_context = bdrv_get_aio_context(bs);
3300 
3301         aio_context_acquire(aio_context);
3302         bdrv_invalidate_cache(bs, &local_err);
3303         aio_context_release(aio_context);
3304         if (local_err) {
3305             error_propagate(errp, local_err);
3306             return;
3307         }
3308     }
3309 }
3310 
3311 static int bdrv_inactivate(BlockDriverState *bs)
3312 {
3313     int ret;
3314 
3315     if (bs->drv->bdrv_inactivate) {
3316         ret = bs->drv->bdrv_inactivate(bs);
3317         if (ret < 0) {
3318             return ret;
3319         }
3320     }
3321 
3322     bs->open_flags |= BDRV_O_INACTIVE;
3323     return 0;
3324 }
3325 
3326 int bdrv_inactivate_all(void)
3327 {
3328     BlockDriverState *bs;
3329     int ret;
3330 
3331     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3332         AioContext *aio_context = bdrv_get_aio_context(bs);
3333 
3334         aio_context_acquire(aio_context);
3335         ret = bdrv_inactivate(bs);
3336         aio_context_release(aio_context);
3337         if (ret < 0) {
3338             return ret;
3339         }
3340     }
3341 
3342     return 0;
3343 }
3344 
3345 /**************************************************************/
3346 /* removable device support */
3347 
3348 /**
3349  * Return TRUE if the media is present
3350  */
3351 bool bdrv_is_inserted(BlockDriverState *bs)
3352 {
3353     BlockDriver *drv = bs->drv;
3354     BdrvChild *child;
3355 
3356     if (!drv) {
3357         return false;
3358     }
3359     if (drv->bdrv_is_inserted) {
3360         return drv->bdrv_is_inserted(bs);
3361     }
3362     QLIST_FOREACH(child, &bs->children, next) {
3363         if (!bdrv_is_inserted(child->bs)) {
3364             return false;
3365         }
3366     }
3367     return true;
3368 }
3369 
3370 /**
3371  * Return whether the media changed since the last call to this
3372  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3373  */
3374 int bdrv_media_changed(BlockDriverState *bs)
3375 {
3376     BlockDriver *drv = bs->drv;
3377 
3378     if (drv && drv->bdrv_media_changed) {
3379         return drv->bdrv_media_changed(bs);
3380     }
3381     return -ENOTSUP;
3382 }
3383 
3384 /**
3385  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3386  */
3387 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3388 {
3389     BlockDriver *drv = bs->drv;
3390     const char *device_name;
3391 
3392     if (drv && drv->bdrv_eject) {
3393         drv->bdrv_eject(bs, eject_flag);
3394     }
3395 
3396     device_name = bdrv_get_device_name(bs);
3397     if (device_name[0] != '\0') {
3398         qapi_event_send_device_tray_moved(device_name,
3399                                           eject_flag, &error_abort);
3400     }
3401 }
3402 
3403 /**
3404  * Lock or unlock the media (if it is locked, the user won't be able
3405  * to eject it manually).
3406  */
3407 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3408 {
3409     BlockDriver *drv = bs->drv;
3410 
3411     trace_bdrv_lock_medium(bs, locked);
3412 
3413     if (drv && drv->bdrv_lock_medium) {
3414         drv->bdrv_lock_medium(bs, locked);
3415     }
3416 }
3417 
3418 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3419 {
3420     BdrvDirtyBitmap *bm;
3421 
3422     assert(name);
3423     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3424         if (bm->name && !strcmp(name, bm->name)) {
3425             return bm;
3426         }
3427     }
3428     return NULL;
3429 }
3430 
3431 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3432 {
3433     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3434     g_free(bitmap->name);
3435     bitmap->name = NULL;
3436 }
3437 
3438 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3439                                           uint32_t granularity,
3440                                           const char *name,
3441                                           Error **errp)
3442 {
3443     int64_t bitmap_size;
3444     BdrvDirtyBitmap *bitmap;
3445     uint32_t sector_granularity;
3446 
3447     assert((granularity & (granularity - 1)) == 0);
3448 
3449     if (name && bdrv_find_dirty_bitmap(bs, name)) {
3450         error_setg(errp, "Bitmap already exists: %s", name);
3451         return NULL;
3452     }
3453     sector_granularity = granularity >> BDRV_SECTOR_BITS;
3454     assert(sector_granularity);
3455     bitmap_size = bdrv_nb_sectors(bs);
3456     if (bitmap_size < 0) {
3457         error_setg_errno(errp, -bitmap_size, "could not get length of device");
3458         errno = -bitmap_size;
3459         return NULL;
3460     }
3461     bitmap = g_new0(BdrvDirtyBitmap, 1);
3462     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3463     bitmap->size = bitmap_size;
3464     bitmap->name = g_strdup(name);
3465     bitmap->disabled = false;
3466     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3467     return bitmap;
3468 }
3469 
3470 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3471 {
3472     return bitmap->successor;
3473 }
3474 
3475 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3476 {
3477     return !(bitmap->disabled || bitmap->successor);
3478 }
3479 
3480 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3481 {
3482     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3483         return DIRTY_BITMAP_STATUS_FROZEN;
3484     } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3485         return DIRTY_BITMAP_STATUS_DISABLED;
3486     } else {
3487         return DIRTY_BITMAP_STATUS_ACTIVE;
3488     }
3489 }
3490 
3491 /**
3492  * Create a successor bitmap destined to replace this bitmap after an operation.
3493  * Requires that the bitmap is not frozen and has no successor.
3494  */
3495 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3496                                        BdrvDirtyBitmap *bitmap, Error **errp)
3497 {
3498     uint64_t granularity;
3499     BdrvDirtyBitmap *child;
3500 
3501     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3502         error_setg(errp, "Cannot create a successor for a bitmap that is "
3503                    "currently frozen");
3504         return -1;
3505     }
3506     assert(!bitmap->successor);
3507 
3508     /* Create an anonymous successor */
3509     granularity = bdrv_dirty_bitmap_granularity(bitmap);
3510     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3511     if (!child) {
3512         return -1;
3513     }
3514 
3515     /* Successor will be on or off based on our current state. */
3516     child->disabled = bitmap->disabled;
3517 
3518     /* Install the successor and freeze the parent */
3519     bitmap->successor = child;
3520     return 0;
3521 }
3522 
3523 /**
3524  * For a bitmap with a successor, yield our name to the successor,
3525  * delete the old bitmap, and return a handle to the new bitmap.
3526  */
3527 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3528                                             BdrvDirtyBitmap *bitmap,
3529                                             Error **errp)
3530 {
3531     char *name;
3532     BdrvDirtyBitmap *successor = bitmap->successor;
3533 
3534     if (successor == NULL) {
3535         error_setg(errp, "Cannot relinquish control if "
3536                    "there's no successor present");
3537         return NULL;
3538     }
3539 
3540     name = bitmap->name;
3541     bitmap->name = NULL;
3542     successor->name = name;
3543     bitmap->successor = NULL;
3544     bdrv_release_dirty_bitmap(bs, bitmap);
3545 
3546     return successor;
3547 }
3548 
3549 /**
3550  * In cases of failure where we can no longer safely delete the parent,
3551  * we may wish to re-join the parent and child/successor.
3552  * The merged parent will be un-frozen, but not explicitly re-enabled.
3553  */
3554 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3555                                            BdrvDirtyBitmap *parent,
3556                                            Error **errp)
3557 {
3558     BdrvDirtyBitmap *successor = parent->successor;
3559 
3560     if (!successor) {
3561         error_setg(errp, "Cannot reclaim a successor when none is present");
3562         return NULL;
3563     }
3564 
3565     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3566         error_setg(errp, "Merging of parent and successor bitmap failed");
3567         return NULL;
3568     }
3569     bdrv_release_dirty_bitmap(bs, successor);
3570     parent->successor = NULL;
3571 
3572     return parent;
3573 }
3574 
3575 /**
3576  * Truncates _all_ bitmaps attached to a BDS.
3577  */
3578 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3579 {
3580     BdrvDirtyBitmap *bitmap;
3581     uint64_t size = bdrv_nb_sectors(bs);
3582 
3583     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3584         assert(!bdrv_dirty_bitmap_frozen(bitmap));
3585         hbitmap_truncate(bitmap->bitmap, size);
3586         bitmap->size = size;
3587     }
3588 }
3589 
3590 static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
3591                                                   BdrvDirtyBitmap *bitmap,
3592                                                   bool only_named)
3593 {
3594     BdrvDirtyBitmap *bm, *next;
3595     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3596         if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
3597             assert(!bdrv_dirty_bitmap_frozen(bm));
3598             QLIST_REMOVE(bm, list);
3599             hbitmap_free(bm->bitmap);
3600             g_free(bm->name);
3601             g_free(bm);
3602 
3603             if (bitmap) {
3604                 return;
3605             }
3606         }
3607     }
3608 }
3609 
3610 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3611 {
3612     bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
3613 }
3614 
3615 /**
3616  * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
3617  * There must not be any frozen bitmaps attached.
3618  */
3619 static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
3620 {
3621     bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
3622 }
3623 
3624 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3625 {
3626     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3627     bitmap->disabled = true;
3628 }
3629 
3630 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3631 {
3632     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3633     bitmap->disabled = false;
3634 }
3635 
3636 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3637 {
3638     BdrvDirtyBitmap *bm;
3639     BlockDirtyInfoList *list = NULL;
3640     BlockDirtyInfoList **plist = &list;
3641 
3642     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3643         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3644         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3645         info->count = bdrv_get_dirty_count(bm);
3646         info->granularity = bdrv_dirty_bitmap_granularity(bm);
3647         info->has_name = !!bm->name;
3648         info->name = g_strdup(bm->name);
3649         info->status = bdrv_dirty_bitmap_status(bm);
3650         entry->value = info;
3651         *plist = entry;
3652         plist = &entry->next;
3653     }
3654 
3655     return list;
3656 }
3657 
3658 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3659 {
3660     if (bitmap) {
3661         return hbitmap_get(bitmap->bitmap, sector);
3662     } else {
3663         return 0;
3664     }
3665 }
3666 
3667 /**
3668  * Chooses a default granularity based on the existing cluster size,
3669  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3670  * is no cluster size information available.
3671  */
3672 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3673 {
3674     BlockDriverInfo bdi;
3675     uint32_t granularity;
3676 
3677     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3678         granularity = MAX(4096, bdi.cluster_size);
3679         granularity = MIN(65536, granularity);
3680     } else {
3681         granularity = 65536;
3682     }
3683 
3684     return granularity;
3685 }
3686 
3687 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3688 {
3689     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3690 }
3691 
3692 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3693 {
3694     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3695 }
3696 
3697 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3698                            int64_t cur_sector, int nr_sectors)
3699 {
3700     assert(bdrv_dirty_bitmap_enabled(bitmap));
3701     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3702 }
3703 
3704 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3705                              int64_t cur_sector, int nr_sectors)
3706 {
3707     assert(bdrv_dirty_bitmap_enabled(bitmap));
3708     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3709 }
3710 
3711 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
3712 {
3713     assert(bdrv_dirty_bitmap_enabled(bitmap));
3714     if (!out) {
3715         hbitmap_reset_all(bitmap->bitmap);
3716     } else {
3717         HBitmap *backup = bitmap->bitmap;
3718         bitmap->bitmap = hbitmap_alloc(bitmap->size,
3719                                        hbitmap_granularity(backup));
3720         *out = backup;
3721     }
3722 }
3723 
3724 void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
3725 {
3726     HBitmap *tmp = bitmap->bitmap;
3727     assert(bdrv_dirty_bitmap_enabled(bitmap));
3728     bitmap->bitmap = in;
3729     hbitmap_free(tmp);
3730 }
3731 
3732 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3733                     int nr_sectors)
3734 {
3735     BdrvDirtyBitmap *bitmap;
3736     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3737         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3738             continue;
3739         }
3740         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3741     }
3742 }
3743 
3744 /**
3745  * Advance an HBitmapIter to an arbitrary offset.
3746  */
3747 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3748 {
3749     assert(hbi->hb);
3750     hbitmap_iter_init(hbi, hbi->hb, offset);
3751 }
3752 
3753 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3754 {
3755     return hbitmap_count(bitmap->bitmap);
3756 }
3757 
3758 /* Get a reference to bs */
3759 void bdrv_ref(BlockDriverState *bs)
3760 {
3761     bs->refcnt++;
3762 }
3763 
3764 /* Release a previously grabbed reference to bs.
3765  * If after releasing, reference count is zero, the BlockDriverState is
3766  * deleted. */
3767 void bdrv_unref(BlockDriverState *bs)
3768 {
3769     if (!bs) {
3770         return;
3771     }
3772     assert(bs->refcnt > 0);
3773     if (--bs->refcnt == 0) {
3774         bdrv_delete(bs);
3775     }
3776 }
3777 
3778 struct BdrvOpBlocker {
3779     Error *reason;
3780     QLIST_ENTRY(BdrvOpBlocker) list;
3781 };
3782 
3783 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3784 {
3785     BdrvOpBlocker *blocker;
3786     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3787     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3788         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3789         if (errp) {
3790             *errp = error_copy(blocker->reason);
3791             error_prepend(errp, "Node '%s' is busy: ",
3792                           bdrv_get_device_or_node_name(bs));
3793         }
3794         return true;
3795     }
3796     return false;
3797 }
3798 
3799 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3800 {
3801     BdrvOpBlocker *blocker;
3802     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3803 
3804     blocker = g_new0(BdrvOpBlocker, 1);
3805     blocker->reason = reason;
3806     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3807 }
3808 
3809 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3810 {
3811     BdrvOpBlocker *blocker, *next;
3812     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3813     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3814         if (blocker->reason == reason) {
3815             QLIST_REMOVE(blocker, list);
3816             g_free(blocker);
3817         }
3818     }
3819 }
3820 
3821 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3822 {
3823     int i;
3824     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3825         bdrv_op_block(bs, i, reason);
3826     }
3827 }
3828 
3829 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3830 {
3831     int i;
3832     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3833         bdrv_op_unblock(bs, i, reason);
3834     }
3835 }
3836 
3837 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3838 {
3839     int i;
3840 
3841     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3842         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3843             return false;
3844         }
3845     }
3846     return true;
3847 }
3848 
3849 void bdrv_img_create(const char *filename, const char *fmt,
3850                      const char *base_filename, const char *base_fmt,
3851                      char *options, uint64_t img_size, int flags,
3852                      Error **errp, bool quiet)
3853 {
3854     QemuOptsList *create_opts = NULL;
3855     QemuOpts *opts = NULL;
3856     const char *backing_fmt, *backing_file;
3857     int64_t size;
3858     BlockDriver *drv, *proto_drv;
3859     Error *local_err = NULL;
3860     int ret = 0;
3861 
3862     /* Find driver and parse its options */
3863     drv = bdrv_find_format(fmt);
3864     if (!drv) {
3865         error_setg(errp, "Unknown file format '%s'", fmt);
3866         return;
3867     }
3868 
3869     proto_drv = bdrv_find_protocol(filename, true, errp);
3870     if (!proto_drv) {
3871         return;
3872     }
3873 
3874     if (!drv->create_opts) {
3875         error_setg(errp, "Format driver '%s' does not support image creation",
3876                    drv->format_name);
3877         return;
3878     }
3879 
3880     if (!proto_drv->create_opts) {
3881         error_setg(errp, "Protocol driver '%s' does not support image creation",
3882                    proto_drv->format_name);
3883         return;
3884     }
3885 
3886     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3887     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3888 
3889     /* Create parameter list with default values */
3890     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3891     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3892 
3893     /* Parse -o options */
3894     if (options) {
3895         qemu_opts_do_parse(opts, options, NULL, &local_err);
3896         if (local_err) {
3897             error_report_err(local_err);
3898             local_err = NULL;
3899             error_setg(errp, "Invalid options for file format '%s'", fmt);
3900             goto out;
3901         }
3902     }
3903 
3904     if (base_filename) {
3905         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3906         if (local_err) {
3907             error_setg(errp, "Backing file not supported for file format '%s'",
3908                        fmt);
3909             goto out;
3910         }
3911     }
3912 
3913     if (base_fmt) {
3914         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3915         if (local_err) {
3916             error_setg(errp, "Backing file format not supported for file "
3917                              "format '%s'", fmt);
3918             goto out;
3919         }
3920     }
3921 
3922     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3923     if (backing_file) {
3924         if (!strcmp(filename, backing_file)) {
3925             error_setg(errp, "Error: Trying to create an image with the "
3926                              "same filename as the backing file");
3927             goto out;
3928         }
3929     }
3930 
3931     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3932 
3933     // The size for the image must always be specified, with one exception:
3934     // If we are using a backing file, we can obtain the size from there
3935     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3936     if (size == -1) {
3937         if (backing_file) {
3938             BlockDriverState *bs;
3939             char *full_backing = g_new0(char, PATH_MAX);
3940             int64_t size;
3941             int back_flags;
3942             QDict *backing_options = NULL;
3943 
3944             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3945                                                          full_backing, PATH_MAX,
3946                                                          &local_err);
3947             if (local_err) {
3948                 g_free(full_backing);
3949                 goto out;
3950             }
3951 
3952             /* backing files always opened read-only */
3953             back_flags =
3954                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3955 
3956             if (backing_fmt) {
3957                 backing_options = qdict_new();
3958                 qdict_put(backing_options, "driver",
3959                           qstring_from_str(backing_fmt));
3960             }
3961 
3962             bs = NULL;
3963             ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3964                             back_flags, &local_err);
3965             g_free(full_backing);
3966             if (ret < 0) {
3967                 goto out;
3968             }
3969             size = bdrv_getlength(bs);
3970             if (size < 0) {
3971                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3972                                  backing_file);
3973                 bdrv_unref(bs);
3974                 goto out;
3975             }
3976 
3977             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3978 
3979             bdrv_unref(bs);
3980         } else {
3981             error_setg(errp, "Image creation needs a size parameter");
3982             goto out;
3983         }
3984     }
3985 
3986     if (!quiet) {
3987         printf("Formatting '%s', fmt=%s ", filename, fmt);
3988         qemu_opts_print(opts, " ");
3989         puts("");
3990     }
3991 
3992     ret = bdrv_create(drv, filename, opts, &local_err);
3993 
3994     if (ret == -EFBIG) {
3995         /* This is generally a better message than whatever the driver would
3996          * deliver (especially because of the cluster_size_hint), since that
3997          * is most probably not much different from "image too large". */
3998         const char *cluster_size_hint = "";
3999         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
4000             cluster_size_hint = " (try using a larger cluster size)";
4001         }
4002         error_setg(errp, "The image size is too large for file format '%s'"
4003                    "%s", fmt, cluster_size_hint);
4004         error_free(local_err);
4005         local_err = NULL;
4006     }
4007 
4008 out:
4009     qemu_opts_del(opts);
4010     qemu_opts_free(create_opts);
4011     if (local_err) {
4012         error_propagate(errp, local_err);
4013     }
4014 }
4015 
4016 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
4017 {
4018     return bs->aio_context;
4019 }
4020 
4021 void bdrv_detach_aio_context(BlockDriverState *bs)
4022 {
4023     BdrvAioNotifier *baf;
4024 
4025     if (!bs->drv) {
4026         return;
4027     }
4028 
4029     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
4030         baf->detach_aio_context(baf->opaque);
4031     }
4032 
4033     if (bs->throttle_state) {
4034         throttle_timers_detach_aio_context(&bs->throttle_timers);
4035     }
4036     if (bs->drv->bdrv_detach_aio_context) {
4037         bs->drv->bdrv_detach_aio_context(bs);
4038     }
4039     if (bs->file) {
4040         bdrv_detach_aio_context(bs->file->bs);
4041     }
4042     if (bs->backing) {
4043         bdrv_detach_aio_context(bs->backing->bs);
4044     }
4045 
4046     bs->aio_context = NULL;
4047 }
4048 
4049 void bdrv_attach_aio_context(BlockDriverState *bs,
4050                              AioContext *new_context)
4051 {
4052     BdrvAioNotifier *ban;
4053 
4054     if (!bs->drv) {
4055         return;
4056     }
4057 
4058     bs->aio_context = new_context;
4059 
4060     if (bs->backing) {
4061         bdrv_attach_aio_context(bs->backing->bs, new_context);
4062     }
4063     if (bs->file) {
4064         bdrv_attach_aio_context(bs->file->bs, new_context);
4065     }
4066     if (bs->drv->bdrv_attach_aio_context) {
4067         bs->drv->bdrv_attach_aio_context(bs, new_context);
4068     }
4069     if (bs->throttle_state) {
4070         throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
4071     }
4072 
4073     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
4074         ban->attached_aio_context(new_context, ban->opaque);
4075     }
4076 }
4077 
4078 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
4079 {
4080     bdrv_drain(bs); /* ensure there are no in-flight requests */
4081 
4082     bdrv_detach_aio_context(bs);
4083 
4084     /* This function executes in the old AioContext so acquire the new one in
4085      * case it runs in a different thread.
4086      */
4087     aio_context_acquire(new_context);
4088     bdrv_attach_aio_context(bs, new_context);
4089     aio_context_release(new_context);
4090 }
4091 
4092 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
4093         void (*attached_aio_context)(AioContext *new_context, void *opaque),
4094         void (*detach_aio_context)(void *opaque), void *opaque)
4095 {
4096     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
4097     *ban = (BdrvAioNotifier){
4098         .attached_aio_context = attached_aio_context,
4099         .detach_aio_context   = detach_aio_context,
4100         .opaque               = opaque
4101     };
4102 
4103     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
4104 }
4105 
4106 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
4107                                       void (*attached_aio_context)(AioContext *,
4108                                                                    void *),
4109                                       void (*detach_aio_context)(void *),
4110                                       void *opaque)
4111 {
4112     BdrvAioNotifier *ban, *ban_next;
4113 
4114     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
4115         if (ban->attached_aio_context == attached_aio_context &&
4116             ban->detach_aio_context   == detach_aio_context   &&
4117             ban->opaque               == opaque)
4118         {
4119             QLIST_REMOVE(ban, list);
4120             g_free(ban);
4121 
4122             return;
4123         }
4124     }
4125 
4126     abort();
4127 }
4128 
4129 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
4130                        BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
4131 {
4132     if (!bs->drv->bdrv_amend_options) {
4133         return -ENOTSUP;
4134     }
4135     return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
4136 }
4137 
4138 /* This function will be called by the bdrv_recurse_is_first_non_filter method
4139  * of block filter and by bdrv_is_first_non_filter.
4140  * It is used to test if the given bs is the candidate or recurse more in the
4141  * node graph.
4142  */
4143 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
4144                                       BlockDriverState *candidate)
4145 {
4146     /* return false if basic checks fails */
4147     if (!bs || !bs->drv) {
4148         return false;
4149     }
4150 
4151     /* the code reached a non block filter driver -> check if the bs is
4152      * the same as the candidate. It's the recursion termination condition.
4153      */
4154     if (!bs->drv->is_filter) {
4155         return bs == candidate;
4156     }
4157     /* Down this path the driver is a block filter driver */
4158 
4159     /* If the block filter recursion method is defined use it to recurse down
4160      * the node graph.
4161      */
4162     if (bs->drv->bdrv_recurse_is_first_non_filter) {
4163         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
4164     }
4165 
4166     /* the driver is a block filter but don't allow to recurse -> return false
4167      */
4168     return false;
4169 }
4170 
4171 /* This function checks if the candidate is the first non filter bs down it's
4172  * bs chain. Since we don't have pointers to parents it explore all bs chains
4173  * from the top. Some filters can choose not to pass down the recursion.
4174  */
4175 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
4176 {
4177     BlockDriverState *bs;
4178 
4179     /* walk down the bs forest recursively */
4180     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4181         bool perm;
4182 
4183         /* try to recurse in this top level bs */
4184         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
4185 
4186         /* candidate is the first non filter */
4187         if (perm) {
4188             return true;
4189         }
4190     }
4191 
4192     return false;
4193 }
4194 
4195 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
4196                                         const char *node_name, Error **errp)
4197 {
4198     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
4199     AioContext *aio_context;
4200 
4201     if (!to_replace_bs) {
4202         error_setg(errp, "Node name '%s' not found", node_name);
4203         return NULL;
4204     }
4205 
4206     aio_context = bdrv_get_aio_context(to_replace_bs);
4207     aio_context_acquire(aio_context);
4208 
4209     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
4210         to_replace_bs = NULL;
4211         goto out;
4212     }
4213 
4214     /* We don't want arbitrary node of the BDS chain to be replaced only the top
4215      * most non filter in order to prevent data corruption.
4216      * Another benefit is that this tests exclude backing files which are
4217      * blocked by the backing blockers.
4218      */
4219     if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
4220         error_setg(errp, "Only top most non filter can be replaced");
4221         to_replace_bs = NULL;
4222         goto out;
4223     }
4224 
4225 out:
4226     aio_context_release(aio_context);
4227     return to_replace_bs;
4228 }
4229 
4230 static bool append_open_options(QDict *d, BlockDriverState *bs)
4231 {
4232     const QDictEntry *entry;
4233     QemuOptDesc *desc;
4234     BdrvChild *child;
4235     bool found_any = false;
4236     const char *p;
4237 
4238     for (entry = qdict_first(bs->options); entry;
4239          entry = qdict_next(bs->options, entry))
4240     {
4241         /* Exclude options for children */
4242         QLIST_FOREACH(child, &bs->children, next) {
4243             if (strstart(qdict_entry_key(entry), child->name, &p)
4244                 && (!*p || *p == '.'))
4245             {
4246                 break;
4247             }
4248         }
4249         if (child) {
4250             continue;
4251         }
4252 
4253         /* And exclude all non-driver-specific options */
4254         for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
4255             if (!strcmp(qdict_entry_key(entry), desc->name)) {
4256                 break;
4257             }
4258         }
4259         if (desc->name) {
4260             continue;
4261         }
4262 
4263         qobject_incref(qdict_entry_value(entry));
4264         qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
4265         found_any = true;
4266     }
4267 
4268     return found_any;
4269 }
4270 
4271 /* Updates the following BDS fields:
4272  *  - exact_filename: A filename which may be used for opening a block device
4273  *                    which (mostly) equals the given BDS (even without any
4274  *                    other options; so reading and writing must return the same
4275  *                    results, but caching etc. may be different)
4276  *  - full_open_options: Options which, when given when opening a block device
4277  *                       (without a filename), result in a BDS (mostly)
4278  *                       equalling the given one
4279  *  - filename: If exact_filename is set, it is copied here. Otherwise,
4280  *              full_open_options is converted to a JSON object, prefixed with
4281  *              "json:" (for use through the JSON pseudo protocol) and put here.
4282  */
4283 void bdrv_refresh_filename(BlockDriverState *bs)
4284 {
4285     BlockDriver *drv = bs->drv;
4286     QDict *opts;
4287 
4288     if (!drv) {
4289         return;
4290     }
4291 
4292     /* This BDS's file name will most probably depend on its file's name, so
4293      * refresh that first */
4294     if (bs->file) {
4295         bdrv_refresh_filename(bs->file->bs);
4296     }
4297 
4298     if (drv->bdrv_refresh_filename) {
4299         /* Obsolete information is of no use here, so drop the old file name
4300          * information before refreshing it */
4301         bs->exact_filename[0] = '\0';
4302         if (bs->full_open_options) {
4303             QDECREF(bs->full_open_options);
4304             bs->full_open_options = NULL;
4305         }
4306 
4307         opts = qdict_new();
4308         append_open_options(opts, bs);
4309         drv->bdrv_refresh_filename(bs, opts);
4310         QDECREF(opts);
4311     } else if (bs->file) {
4312         /* Try to reconstruct valid information from the underlying file */
4313         bool has_open_options;
4314 
4315         bs->exact_filename[0] = '\0';
4316         if (bs->full_open_options) {
4317             QDECREF(bs->full_open_options);
4318             bs->full_open_options = NULL;
4319         }
4320 
4321         opts = qdict_new();
4322         has_open_options = append_open_options(opts, bs);
4323 
4324         /* If no specific options have been given for this BDS, the filename of
4325          * the underlying file should suffice for this one as well */
4326         if (bs->file->bs->exact_filename[0] && !has_open_options) {
4327             strcpy(bs->exact_filename, bs->file->bs->exact_filename);
4328         }
4329         /* Reconstructing the full options QDict is simple for most format block
4330          * drivers, as long as the full options are known for the underlying
4331          * file BDS. The full options QDict of that file BDS should somehow
4332          * contain a representation of the filename, therefore the following
4333          * suffices without querying the (exact_)filename of this BDS. */
4334         if (bs->file->bs->full_open_options) {
4335             qdict_put_obj(opts, "driver",
4336                           QOBJECT(qstring_from_str(drv->format_name)));
4337             QINCREF(bs->file->bs->full_open_options);
4338             qdict_put_obj(opts, "file",
4339                           QOBJECT(bs->file->bs->full_open_options));
4340 
4341             bs->full_open_options = opts;
4342         } else {
4343             QDECREF(opts);
4344         }
4345     } else if (!bs->full_open_options && qdict_size(bs->options)) {
4346         /* There is no underlying file BDS (at least referenced by BDS.file),
4347          * so the full options QDict should be equal to the options given
4348          * specifically for this block device when it was opened (plus the
4349          * driver specification).
4350          * Because those options don't change, there is no need to update
4351          * full_open_options when it's already set. */
4352 
4353         opts = qdict_new();
4354         append_open_options(opts, bs);
4355         qdict_put_obj(opts, "driver",
4356                       QOBJECT(qstring_from_str(drv->format_name)));
4357 
4358         if (bs->exact_filename[0]) {
4359             /* This may not work for all block protocol drivers (some may
4360              * require this filename to be parsed), but we have to find some
4361              * default solution here, so just include it. If some block driver
4362              * does not support pure options without any filename at all or
4363              * needs some special format of the options QDict, it needs to
4364              * implement the driver-specific bdrv_refresh_filename() function.
4365              */
4366             qdict_put_obj(opts, "filename",
4367                           QOBJECT(qstring_from_str(bs->exact_filename)));
4368         }
4369 
4370         bs->full_open_options = opts;
4371     }
4372 
4373     if (bs->exact_filename[0]) {
4374         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4375     } else if (bs->full_open_options) {
4376         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4377         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4378                  qstring_get_str(json));
4379         QDECREF(json);
4380     }
4381 }
4382