xref: /openbmc/qemu/block.c (revision 4b4629d9)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "trace.h"
26 #include "block/block_int.h"
27 #include "block/blockjob.h"
28 #include "qemu/error-report.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qapi/qmp/qbool.h"
32 #include "qapi/qmp/qjson.h"
33 #include "sysemu/block-backend.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/notify.h"
36 #include "qemu/coroutine.h"
37 #include "block/qapi.h"
38 #include "qmp-commands.h"
39 #include "qemu/timer.h"
40 #include "qapi-event.h"
41 #include "block/throttle-groups.h"
42 #include "qemu/cutils.h"
43 #include "qemu/id.h"
44 
45 #ifdef CONFIG_BSD
46 #include <sys/ioctl.h>
47 #include <sys/queue.h>
48 #ifndef __DragonFly__
49 #include <sys/disk.h>
50 #endif
51 #endif
52 
53 #ifdef _WIN32
54 #include <windows.h>
55 #endif
56 
57 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
58 
59 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
60     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
61 
62 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
63     QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
64 
65 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
66     QLIST_HEAD_INITIALIZER(bdrv_drivers);
67 
68 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
69                              const char *reference, QDict *options, int flags,
70                              BlockDriverState *parent,
71                              const BdrvChildRole *child_role, Error **errp);
72 
73 /* If non-zero, use only whitelisted block drivers */
74 static int use_bdrv_whitelist;
75 
76 static void bdrv_close(BlockDriverState *bs);
77 
78 #ifdef _WIN32
79 static int is_windows_drive_prefix(const char *filename)
80 {
81     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
82              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
83             filename[1] == ':');
84 }
85 
86 int is_windows_drive(const char *filename)
87 {
88     if (is_windows_drive_prefix(filename) &&
89         filename[2] == '\0')
90         return 1;
91     if (strstart(filename, "\\\\.\\", NULL) ||
92         strstart(filename, "//./", NULL))
93         return 1;
94     return 0;
95 }
96 #endif
97 
98 size_t bdrv_opt_mem_align(BlockDriverState *bs)
99 {
100     if (!bs || !bs->drv) {
101         /* page size or 4k (hdd sector size) should be on the safe side */
102         return MAX(4096, getpagesize());
103     }
104 
105     return bs->bl.opt_mem_alignment;
106 }
107 
108 size_t bdrv_min_mem_align(BlockDriverState *bs)
109 {
110     if (!bs || !bs->drv) {
111         /* page size or 4k (hdd sector size) should be on the safe side */
112         return MAX(4096, getpagesize());
113     }
114 
115     return bs->bl.min_mem_alignment;
116 }
117 
118 /* check if the path starts with "<protocol>:" */
119 int path_has_protocol(const char *path)
120 {
121     const char *p;
122 
123 #ifdef _WIN32
124     if (is_windows_drive(path) ||
125         is_windows_drive_prefix(path)) {
126         return 0;
127     }
128     p = path + strcspn(path, ":/\\");
129 #else
130     p = path + strcspn(path, ":/");
131 #endif
132 
133     return *p == ':';
134 }
135 
136 int path_is_absolute(const char *path)
137 {
138 #ifdef _WIN32
139     /* specific case for names like: "\\.\d:" */
140     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
141         return 1;
142     }
143     return (*path == '/' || *path == '\\');
144 #else
145     return (*path == '/');
146 #endif
147 }
148 
149 /* if filename is absolute, just copy it to dest. Otherwise, build a
150    path to it by considering it is relative to base_path. URL are
151    supported. */
152 void path_combine(char *dest, int dest_size,
153                   const char *base_path,
154                   const char *filename)
155 {
156     const char *p, *p1;
157     int len;
158 
159     if (dest_size <= 0)
160         return;
161     if (path_is_absolute(filename)) {
162         pstrcpy(dest, dest_size, filename);
163     } else {
164         p = strchr(base_path, ':');
165         if (p)
166             p++;
167         else
168             p = base_path;
169         p1 = strrchr(base_path, '/');
170 #ifdef _WIN32
171         {
172             const char *p2;
173             p2 = strrchr(base_path, '\\');
174             if (!p1 || p2 > p1)
175                 p1 = p2;
176         }
177 #endif
178         if (p1)
179             p1++;
180         else
181             p1 = base_path;
182         if (p1 > p)
183             p = p1;
184         len = p - base_path;
185         if (len > dest_size - 1)
186             len = dest_size - 1;
187         memcpy(dest, base_path, len);
188         dest[len] = '\0';
189         pstrcat(dest, dest_size, filename);
190     }
191 }
192 
193 void bdrv_get_full_backing_filename_from_filename(const char *backed,
194                                                   const char *backing,
195                                                   char *dest, size_t sz,
196                                                   Error **errp)
197 {
198     if (backing[0] == '\0' || path_has_protocol(backing) ||
199         path_is_absolute(backing))
200     {
201         pstrcpy(dest, sz, backing);
202     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
203         error_setg(errp, "Cannot use relative backing file names for '%s'",
204                    backed);
205     } else {
206         path_combine(dest, sz, backed, backing);
207     }
208 }
209 
210 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
211                                     Error **errp)
212 {
213     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
214 
215     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
216                                                  dest, sz, errp);
217 }
218 
219 void bdrv_register(BlockDriver *bdrv)
220 {
221     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
222 }
223 
224 BlockDriverState *bdrv_new_root(void)
225 {
226     return bdrv_new();
227 }
228 
229 BlockDriverState *bdrv_new(void)
230 {
231     BlockDriverState *bs;
232     int i;
233 
234     bs = g_new0(BlockDriverState, 1);
235     QLIST_INIT(&bs->dirty_bitmaps);
236     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
237         QLIST_INIT(&bs->op_blockers[i]);
238     }
239     notifier_with_return_list_init(&bs->before_write_notifiers);
240     qemu_co_queue_init(&bs->throttled_reqs[0]);
241     qemu_co_queue_init(&bs->throttled_reqs[1]);
242     bs->refcnt = 1;
243     bs->aio_context = qemu_get_aio_context();
244 
245     QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
246 
247     return bs;
248 }
249 
250 BlockDriver *bdrv_find_format(const char *format_name)
251 {
252     BlockDriver *drv1;
253     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
254         if (!strcmp(drv1->format_name, format_name)) {
255             return drv1;
256         }
257     }
258     return NULL;
259 }
260 
261 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
262 {
263     static const char *whitelist_rw[] = {
264         CONFIG_BDRV_RW_WHITELIST
265     };
266     static const char *whitelist_ro[] = {
267         CONFIG_BDRV_RO_WHITELIST
268     };
269     const char **p;
270 
271     if (!whitelist_rw[0] && !whitelist_ro[0]) {
272         return 1;               /* no whitelist, anything goes */
273     }
274 
275     for (p = whitelist_rw; *p; p++) {
276         if (!strcmp(drv->format_name, *p)) {
277             return 1;
278         }
279     }
280     if (read_only) {
281         for (p = whitelist_ro; *p; p++) {
282             if (!strcmp(drv->format_name, *p)) {
283                 return 1;
284             }
285         }
286     }
287     return 0;
288 }
289 
290 bool bdrv_uses_whitelist(void)
291 {
292     return use_bdrv_whitelist;
293 }
294 
295 typedef struct CreateCo {
296     BlockDriver *drv;
297     char *filename;
298     QemuOpts *opts;
299     int ret;
300     Error *err;
301 } CreateCo;
302 
303 static void coroutine_fn bdrv_create_co_entry(void *opaque)
304 {
305     Error *local_err = NULL;
306     int ret;
307 
308     CreateCo *cco = opaque;
309     assert(cco->drv);
310 
311     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
312     if (local_err) {
313         error_propagate(&cco->err, local_err);
314     }
315     cco->ret = ret;
316 }
317 
318 int bdrv_create(BlockDriver *drv, const char* filename,
319                 QemuOpts *opts, Error **errp)
320 {
321     int ret;
322 
323     Coroutine *co;
324     CreateCo cco = {
325         .drv = drv,
326         .filename = g_strdup(filename),
327         .opts = opts,
328         .ret = NOT_DONE,
329         .err = NULL,
330     };
331 
332     if (!drv->bdrv_create) {
333         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
334         ret = -ENOTSUP;
335         goto out;
336     }
337 
338     if (qemu_in_coroutine()) {
339         /* Fast-path if already in coroutine context */
340         bdrv_create_co_entry(&cco);
341     } else {
342         co = qemu_coroutine_create(bdrv_create_co_entry);
343         qemu_coroutine_enter(co, &cco);
344         while (cco.ret == NOT_DONE) {
345             aio_poll(qemu_get_aio_context(), true);
346         }
347     }
348 
349     ret = cco.ret;
350     if (ret < 0) {
351         if (cco.err) {
352             error_propagate(errp, cco.err);
353         } else {
354             error_setg_errno(errp, -ret, "Could not create image");
355         }
356     }
357 
358 out:
359     g_free(cco.filename);
360     return ret;
361 }
362 
363 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
364 {
365     BlockDriver *drv;
366     Error *local_err = NULL;
367     int ret;
368 
369     drv = bdrv_find_protocol(filename, true, errp);
370     if (drv == NULL) {
371         return -ENOENT;
372     }
373 
374     ret = bdrv_create(drv, filename, opts, &local_err);
375     if (local_err) {
376         error_propagate(errp, local_err);
377     }
378     return ret;
379 }
380 
381 /**
382  * Try to get @bs's logical and physical block size.
383  * On success, store them in @bsz struct and return 0.
384  * On failure return -errno.
385  * @bs must not be empty.
386  */
387 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
388 {
389     BlockDriver *drv = bs->drv;
390 
391     if (drv && drv->bdrv_probe_blocksizes) {
392         return drv->bdrv_probe_blocksizes(bs, bsz);
393     }
394 
395     return -ENOTSUP;
396 }
397 
398 /**
399  * Try to get @bs's geometry (cyls, heads, sectors).
400  * On success, store them in @geo struct and return 0.
401  * On failure return -errno.
402  * @bs must not be empty.
403  */
404 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
405 {
406     BlockDriver *drv = bs->drv;
407 
408     if (drv && drv->bdrv_probe_geometry) {
409         return drv->bdrv_probe_geometry(bs, geo);
410     }
411 
412     return -ENOTSUP;
413 }
414 
415 /*
416  * Create a uniquely-named empty temporary file.
417  * Return 0 upon success, otherwise a negative errno value.
418  */
419 int get_tmp_filename(char *filename, int size)
420 {
421 #ifdef _WIN32
422     char temp_dir[MAX_PATH];
423     /* GetTempFileName requires that its output buffer (4th param)
424        have length MAX_PATH or greater.  */
425     assert(size >= MAX_PATH);
426     return (GetTempPath(MAX_PATH, temp_dir)
427             && GetTempFileName(temp_dir, "qem", 0, filename)
428             ? 0 : -GetLastError());
429 #else
430     int fd;
431     const char *tmpdir;
432     tmpdir = getenv("TMPDIR");
433     if (!tmpdir) {
434         tmpdir = "/var/tmp";
435     }
436     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
437         return -EOVERFLOW;
438     }
439     fd = mkstemp(filename);
440     if (fd < 0) {
441         return -errno;
442     }
443     if (close(fd) != 0) {
444         unlink(filename);
445         return -errno;
446     }
447     return 0;
448 #endif
449 }
450 
451 /*
452  * Detect host devices. By convention, /dev/cdrom[N] is always
453  * recognized as a host CDROM.
454  */
455 static BlockDriver *find_hdev_driver(const char *filename)
456 {
457     int score_max = 0, score;
458     BlockDriver *drv = NULL, *d;
459 
460     QLIST_FOREACH(d, &bdrv_drivers, list) {
461         if (d->bdrv_probe_device) {
462             score = d->bdrv_probe_device(filename);
463             if (score > score_max) {
464                 score_max = score;
465                 drv = d;
466             }
467         }
468     }
469 
470     return drv;
471 }
472 
473 BlockDriver *bdrv_find_protocol(const char *filename,
474                                 bool allow_protocol_prefix,
475                                 Error **errp)
476 {
477     BlockDriver *drv1;
478     char protocol[128];
479     int len;
480     const char *p;
481 
482     /* TODO Drivers without bdrv_file_open must be specified explicitly */
483 
484     /*
485      * XXX(hch): we really should not let host device detection
486      * override an explicit protocol specification, but moving this
487      * later breaks access to device names with colons in them.
488      * Thanks to the brain-dead persistent naming schemes on udev-
489      * based Linux systems those actually are quite common.
490      */
491     drv1 = find_hdev_driver(filename);
492     if (drv1) {
493         return drv1;
494     }
495 
496     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
497         return &bdrv_file;
498     }
499 
500     p = strchr(filename, ':');
501     assert(p != NULL);
502     len = p - filename;
503     if (len > sizeof(protocol) - 1)
504         len = sizeof(protocol) - 1;
505     memcpy(protocol, filename, len);
506     protocol[len] = '\0';
507     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
508         if (drv1->protocol_name &&
509             !strcmp(drv1->protocol_name, protocol)) {
510             return drv1;
511         }
512     }
513 
514     error_setg(errp, "Unknown protocol '%s'", protocol);
515     return NULL;
516 }
517 
518 /*
519  * Guess image format by probing its contents.
520  * This is not a good idea when your image is raw (CVE-2008-2004), but
521  * we do it anyway for backward compatibility.
522  *
523  * @buf         contains the image's first @buf_size bytes.
524  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
525  *              but can be smaller if the image file is smaller)
526  * @filename    is its filename.
527  *
528  * For all block drivers, call the bdrv_probe() method to get its
529  * probing score.
530  * Return the first block driver with the highest probing score.
531  */
532 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
533                             const char *filename)
534 {
535     int score_max = 0, score;
536     BlockDriver *drv = NULL, *d;
537 
538     QLIST_FOREACH(d, &bdrv_drivers, list) {
539         if (d->bdrv_probe) {
540             score = d->bdrv_probe(buf, buf_size, filename);
541             if (score > score_max) {
542                 score_max = score;
543                 drv = d;
544             }
545         }
546     }
547 
548     return drv;
549 }
550 
551 static int find_image_format(BlockDriverState *bs, const char *filename,
552                              BlockDriver **pdrv, Error **errp)
553 {
554     BlockDriver *drv;
555     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
556     int ret = 0;
557 
558     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
559     if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
560         *pdrv = &bdrv_raw;
561         return ret;
562     }
563 
564     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
565     if (ret < 0) {
566         error_setg_errno(errp, -ret, "Could not read image for determining its "
567                          "format");
568         *pdrv = NULL;
569         return ret;
570     }
571 
572     drv = bdrv_probe_all(buf, ret, filename);
573     if (!drv) {
574         error_setg(errp, "Could not determine image format: No compatible "
575                    "driver found");
576         ret = -ENOENT;
577     }
578     *pdrv = drv;
579     return ret;
580 }
581 
582 /**
583  * Set the current 'total_sectors' value
584  * Return 0 on success, -errno on error.
585  */
586 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
587 {
588     BlockDriver *drv = bs->drv;
589 
590     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
591     if (bdrv_is_sg(bs))
592         return 0;
593 
594     /* query actual device if possible, otherwise just trust the hint */
595     if (drv->bdrv_getlength) {
596         int64_t length = drv->bdrv_getlength(bs);
597         if (length < 0) {
598             return length;
599         }
600         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
601     }
602 
603     bs->total_sectors = hint;
604     return 0;
605 }
606 
607 /**
608  * Combines a QDict of new block driver @options with any missing options taken
609  * from @old_options, so that leaving out an option defaults to its old value.
610  */
611 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
612                               QDict *old_options)
613 {
614     if (bs->drv && bs->drv->bdrv_join_options) {
615         bs->drv->bdrv_join_options(options, old_options);
616     } else {
617         qdict_join(options, old_options, false);
618     }
619 }
620 
621 /**
622  * Set open flags for a given discard mode
623  *
624  * Return 0 on success, -1 if the discard mode was invalid.
625  */
626 int bdrv_parse_discard_flags(const char *mode, int *flags)
627 {
628     *flags &= ~BDRV_O_UNMAP;
629 
630     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
631         /* do nothing */
632     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
633         *flags |= BDRV_O_UNMAP;
634     } else {
635         return -1;
636     }
637 
638     return 0;
639 }
640 
641 /**
642  * Set open flags for a given cache mode
643  *
644  * Return 0 on success, -1 if the cache mode was invalid.
645  */
646 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
647 {
648     *flags &= ~BDRV_O_CACHE_MASK;
649 
650     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
651         *writethrough = false;
652         *flags |= BDRV_O_NOCACHE;
653     } else if (!strcmp(mode, "directsync")) {
654         *writethrough = true;
655         *flags |= BDRV_O_NOCACHE;
656     } else if (!strcmp(mode, "writeback")) {
657         *writethrough = false;
658     } else if (!strcmp(mode, "unsafe")) {
659         *writethrough = false;
660         *flags |= BDRV_O_NO_FLUSH;
661     } else if (!strcmp(mode, "writethrough")) {
662         *writethrough = true;
663     } else {
664         return -1;
665     }
666 
667     return 0;
668 }
669 
670 /*
671  * Returns the options and flags that a temporary snapshot should get, based on
672  * the originally requested flags (the originally requested image will have
673  * flags like a backing file)
674  */
675 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
676                                        int parent_flags, QDict *parent_options)
677 {
678     *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
679 
680     /* For temporary files, unconditional cache=unsafe is fine */
681     qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
682     qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
683 }
684 
685 /*
686  * Returns the options and flags that bs->file should get if a protocol driver
687  * is expected, based on the given options and flags for the parent BDS
688  */
689 static void bdrv_inherited_options(int *child_flags, QDict *child_options,
690                                    int parent_flags, QDict *parent_options)
691 {
692     int flags = parent_flags;
693 
694     /* Enable protocol handling, disable format probing for bs->file */
695     flags |= BDRV_O_PROTOCOL;
696 
697     /* If the cache mode isn't explicitly set, inherit direct and no-flush from
698      * the parent. */
699     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
700     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
701 
702     /* Our block drivers take care to send flushes and respect unmap policy,
703      * so we can default to enable both on lower layers regardless of the
704      * corresponding parent options. */
705     flags |= BDRV_O_UNMAP;
706 
707     /* Clear flags that only apply to the top layer */
708     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
709                BDRV_O_NO_IO);
710 
711     *child_flags = flags;
712 }
713 
714 const BdrvChildRole child_file = {
715     .inherit_options = bdrv_inherited_options,
716 };
717 
718 /*
719  * Returns the options and flags that bs->file should get if the use of formats
720  * (and not only protocols) is permitted for it, based on the given options and
721  * flags for the parent BDS
722  */
723 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
724                                        int parent_flags, QDict *parent_options)
725 {
726     child_file.inherit_options(child_flags, child_options,
727                                parent_flags, parent_options);
728 
729     *child_flags &= ~(BDRV_O_PROTOCOL | BDRV_O_NO_IO);
730 }
731 
732 const BdrvChildRole child_format = {
733     .inherit_options = bdrv_inherited_fmt_options,
734 };
735 
736 /*
737  * Returns the options and flags that bs->backing should get, based on the
738  * given options and flags for the parent BDS
739  */
740 static void bdrv_backing_options(int *child_flags, QDict *child_options,
741                                  int parent_flags, QDict *parent_options)
742 {
743     int flags = parent_flags;
744 
745     /* The cache mode is inherited unmodified for backing files; except WCE,
746      * which is only applied on the top level (BlockBackend) */
747     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
748     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
749 
750     /* backing files always opened read-only */
751     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
752 
753     /* snapshot=on is handled on the top layer */
754     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
755 
756     *child_flags = flags;
757 }
758 
759 static const BdrvChildRole child_backing = {
760     .inherit_options = bdrv_backing_options,
761 };
762 
763 static int bdrv_open_flags(BlockDriverState *bs, int flags)
764 {
765     int open_flags = flags;
766 
767     /*
768      * Clear flags that are internal to the block layer before opening the
769      * image.
770      */
771     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
772 
773     /*
774      * Snapshots should be writable.
775      */
776     if (flags & BDRV_O_TEMPORARY) {
777         open_flags |= BDRV_O_RDWR;
778     }
779 
780     return open_flags;
781 }
782 
783 static void update_flags_from_options(int *flags, QemuOpts *opts)
784 {
785     *flags &= ~BDRV_O_CACHE_MASK;
786 
787     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
788     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
789         *flags |= BDRV_O_NO_FLUSH;
790     }
791 
792     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
793     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
794         *flags |= BDRV_O_NOCACHE;
795     }
796 }
797 
798 static void update_options_from_flags(QDict *options, int flags)
799 {
800     if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
801         qdict_put(options, BDRV_OPT_CACHE_DIRECT,
802                   qbool_from_bool(flags & BDRV_O_NOCACHE));
803     }
804     if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
805         qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
806                   qbool_from_bool(flags & BDRV_O_NO_FLUSH));
807     }
808 }
809 
810 static void bdrv_assign_node_name(BlockDriverState *bs,
811                                   const char *node_name,
812                                   Error **errp)
813 {
814     char *gen_node_name = NULL;
815 
816     if (!node_name) {
817         node_name = gen_node_name = id_generate(ID_BLOCK);
818     } else if (!id_wellformed(node_name)) {
819         /*
820          * Check for empty string or invalid characters, but not if it is
821          * generated (generated names use characters not available to the user)
822          */
823         error_setg(errp, "Invalid node name");
824         return;
825     }
826 
827     /* takes care of avoiding namespaces collisions */
828     if (blk_by_name(node_name)) {
829         error_setg(errp, "node-name=%s is conflicting with a device id",
830                    node_name);
831         goto out;
832     }
833 
834     /* takes care of avoiding duplicates node names */
835     if (bdrv_find_node(node_name)) {
836         error_setg(errp, "Duplicate node name");
837         goto out;
838     }
839 
840     /* copy node name into the bs and insert it into the graph list */
841     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
842     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
843 out:
844     g_free(gen_node_name);
845 }
846 
847 static QemuOptsList bdrv_runtime_opts = {
848     .name = "bdrv_common",
849     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
850     .desc = {
851         {
852             .name = "node-name",
853             .type = QEMU_OPT_STRING,
854             .help = "Node name of the block device node",
855         },
856         {
857             .name = "driver",
858             .type = QEMU_OPT_STRING,
859             .help = "Block driver to use for the node",
860         },
861         {
862             .name = BDRV_OPT_CACHE_DIRECT,
863             .type = QEMU_OPT_BOOL,
864             .help = "Bypass software writeback cache on the host",
865         },
866         {
867             .name = BDRV_OPT_CACHE_NO_FLUSH,
868             .type = QEMU_OPT_BOOL,
869             .help = "Ignore flush requests",
870         },
871         { /* end of list */ }
872     },
873 };
874 
875 /*
876  * Common part for opening disk images and files
877  *
878  * Removes all processed options from *options.
879  */
880 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
881                             QDict *options, Error **errp)
882 {
883     int ret, open_flags;
884     const char *filename;
885     const char *driver_name = NULL;
886     const char *node_name = NULL;
887     QemuOpts *opts;
888     BlockDriver *drv;
889     Error *local_err = NULL;
890 
891     assert(bs->file == NULL);
892     assert(options != NULL && bs->options != options);
893 
894     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
895     qemu_opts_absorb_qdict(opts, options, &local_err);
896     if (local_err) {
897         error_propagate(errp, local_err);
898         ret = -EINVAL;
899         goto fail_opts;
900     }
901 
902     driver_name = qemu_opt_get(opts, "driver");
903     drv = bdrv_find_format(driver_name);
904     assert(drv != NULL);
905 
906     if (file != NULL) {
907         filename = file->bs->filename;
908     } else {
909         filename = qdict_get_try_str(options, "filename");
910     }
911 
912     if (drv->bdrv_needs_filename && !filename) {
913         error_setg(errp, "The '%s' block driver requires a file name",
914                    drv->format_name);
915         ret = -EINVAL;
916         goto fail_opts;
917     }
918 
919     trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
920                            drv->format_name);
921 
922     node_name = qemu_opt_get(opts, "node-name");
923     bdrv_assign_node_name(bs, node_name, &local_err);
924     if (local_err) {
925         error_propagate(errp, local_err);
926         ret = -EINVAL;
927         goto fail_opts;
928     }
929 
930     bs->request_alignment = 512;
931     bs->zero_beyond_eof = true;
932     bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
933 
934     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
935         error_setg(errp,
936                    !bs->read_only && bdrv_is_whitelisted(drv, true)
937                         ? "Driver '%s' can only be used for read-only devices"
938                         : "Driver '%s' is not whitelisted",
939                    drv->format_name);
940         ret = -ENOTSUP;
941         goto fail_opts;
942     }
943 
944     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
945     if (bs->open_flags & BDRV_O_COPY_ON_READ) {
946         if (!bs->read_only) {
947             bdrv_enable_copy_on_read(bs);
948         } else {
949             error_setg(errp, "Can't use copy-on-read on read-only device");
950             ret = -EINVAL;
951             goto fail_opts;
952         }
953     }
954 
955     if (filename != NULL) {
956         pstrcpy(bs->filename, sizeof(bs->filename), filename);
957     } else {
958         bs->filename[0] = '\0';
959     }
960     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
961 
962     bs->drv = drv;
963     bs->opaque = g_malloc0(drv->instance_size);
964 
965     /* Apply cache mode options */
966     update_flags_from_options(&bs->open_flags, opts);
967 
968     /* Open the image, either directly or using a protocol */
969     open_flags = bdrv_open_flags(bs, bs->open_flags);
970     if (drv->bdrv_file_open) {
971         assert(file == NULL);
972         assert(!drv->bdrv_needs_filename || filename != NULL);
973         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
974     } else {
975         if (file == NULL) {
976             error_setg(errp, "Can't use '%s' as a block driver for the "
977                        "protocol level", drv->format_name);
978             ret = -EINVAL;
979             goto free_and_fail;
980         }
981         bs->file = file;
982         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
983     }
984 
985     if (ret < 0) {
986         if (local_err) {
987             error_propagate(errp, local_err);
988         } else if (bs->filename[0]) {
989             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
990         } else {
991             error_setg_errno(errp, -ret, "Could not open image");
992         }
993         goto free_and_fail;
994     }
995 
996     ret = refresh_total_sectors(bs, bs->total_sectors);
997     if (ret < 0) {
998         error_setg_errno(errp, -ret, "Could not refresh total sector count");
999         goto free_and_fail;
1000     }
1001 
1002     bdrv_refresh_limits(bs, &local_err);
1003     if (local_err) {
1004         error_propagate(errp, local_err);
1005         ret = -EINVAL;
1006         goto free_and_fail;
1007     }
1008 
1009     assert(bdrv_opt_mem_align(bs) != 0);
1010     assert(bdrv_min_mem_align(bs) != 0);
1011     assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
1012 
1013     qemu_opts_del(opts);
1014     return 0;
1015 
1016 free_and_fail:
1017     bs->file = NULL;
1018     g_free(bs->opaque);
1019     bs->opaque = NULL;
1020     bs->drv = NULL;
1021 fail_opts:
1022     qemu_opts_del(opts);
1023     return ret;
1024 }
1025 
1026 static QDict *parse_json_filename(const char *filename, Error **errp)
1027 {
1028     QObject *options_obj;
1029     QDict *options;
1030     int ret;
1031 
1032     ret = strstart(filename, "json:", &filename);
1033     assert(ret);
1034 
1035     options_obj = qobject_from_json(filename);
1036     if (!options_obj) {
1037         error_setg(errp, "Could not parse the JSON options");
1038         return NULL;
1039     }
1040 
1041     if (qobject_type(options_obj) != QTYPE_QDICT) {
1042         qobject_decref(options_obj);
1043         error_setg(errp, "Invalid JSON object given");
1044         return NULL;
1045     }
1046 
1047     options = qobject_to_qdict(options_obj);
1048     qdict_flatten(options);
1049 
1050     return options;
1051 }
1052 
1053 static void parse_json_protocol(QDict *options, const char **pfilename,
1054                                 Error **errp)
1055 {
1056     QDict *json_options;
1057     Error *local_err = NULL;
1058 
1059     /* Parse json: pseudo-protocol */
1060     if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1061         return;
1062     }
1063 
1064     json_options = parse_json_filename(*pfilename, &local_err);
1065     if (local_err) {
1066         error_propagate(errp, local_err);
1067         return;
1068     }
1069 
1070     /* Options given in the filename have lower priority than options
1071      * specified directly */
1072     qdict_join(options, json_options, false);
1073     QDECREF(json_options);
1074     *pfilename = NULL;
1075 }
1076 
1077 /*
1078  * Fills in default options for opening images and converts the legacy
1079  * filename/flags pair to option QDict entries.
1080  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1081  * block driver has been specified explicitly.
1082  */
1083 static int bdrv_fill_options(QDict **options, const char *filename,
1084                              int *flags, Error **errp)
1085 {
1086     const char *drvname;
1087     bool protocol = *flags & BDRV_O_PROTOCOL;
1088     bool parse_filename = false;
1089     BlockDriver *drv = NULL;
1090     Error *local_err = NULL;
1091 
1092     drvname = qdict_get_try_str(*options, "driver");
1093     if (drvname) {
1094         drv = bdrv_find_format(drvname);
1095         if (!drv) {
1096             error_setg(errp, "Unknown driver '%s'", drvname);
1097             return -ENOENT;
1098         }
1099         /* If the user has explicitly specified the driver, this choice should
1100          * override the BDRV_O_PROTOCOL flag */
1101         protocol = drv->bdrv_file_open;
1102     }
1103 
1104     if (protocol) {
1105         *flags |= BDRV_O_PROTOCOL;
1106     } else {
1107         *flags &= ~BDRV_O_PROTOCOL;
1108     }
1109 
1110     /* Translate cache options from flags into options */
1111     update_options_from_flags(*options, *flags);
1112 
1113     /* Fetch the file name from the options QDict if necessary */
1114     if (protocol && filename) {
1115         if (!qdict_haskey(*options, "filename")) {
1116             qdict_put(*options, "filename", qstring_from_str(filename));
1117             parse_filename = true;
1118         } else {
1119             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1120                              "the same time");
1121             return -EINVAL;
1122         }
1123     }
1124 
1125     /* Find the right block driver */
1126     filename = qdict_get_try_str(*options, "filename");
1127 
1128     if (!drvname && protocol) {
1129         if (filename) {
1130             drv = bdrv_find_protocol(filename, parse_filename, errp);
1131             if (!drv) {
1132                 return -EINVAL;
1133             }
1134 
1135             drvname = drv->format_name;
1136             qdict_put(*options, "driver", qstring_from_str(drvname));
1137         } else {
1138             error_setg(errp, "Must specify either driver or file");
1139             return -EINVAL;
1140         }
1141     }
1142 
1143     assert(drv || !protocol);
1144 
1145     /* Driver-specific filename parsing */
1146     if (drv && drv->bdrv_parse_filename && parse_filename) {
1147         drv->bdrv_parse_filename(filename, *options, &local_err);
1148         if (local_err) {
1149             error_propagate(errp, local_err);
1150             return -EINVAL;
1151         }
1152 
1153         if (!drv->bdrv_needs_filename) {
1154             qdict_del(*options, "filename");
1155         }
1156     }
1157 
1158     return 0;
1159 }
1160 
1161 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
1162                                   const char *child_name,
1163                                   const BdrvChildRole *child_role)
1164 {
1165     BdrvChild *child = g_new(BdrvChild, 1);
1166     *child = (BdrvChild) {
1167         .bs     = child_bs,
1168         .name   = g_strdup(child_name),
1169         .role   = child_role,
1170     };
1171 
1172     QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1173 
1174     return child;
1175 }
1176 
1177 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1178                              BlockDriverState *child_bs,
1179                              const char *child_name,
1180                              const BdrvChildRole *child_role)
1181 {
1182     BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
1183     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1184     return child;
1185 }
1186 
1187 static void bdrv_detach_child(BdrvChild *child)
1188 {
1189     if (child->next.le_prev) {
1190         QLIST_REMOVE(child, next);
1191         child->next.le_prev = NULL;
1192     }
1193     QLIST_REMOVE(child, next_parent);
1194     g_free(child->name);
1195     g_free(child);
1196 }
1197 
1198 void bdrv_root_unref_child(BdrvChild *child)
1199 {
1200     BlockDriverState *child_bs;
1201 
1202     child_bs = child->bs;
1203     bdrv_detach_child(child);
1204     bdrv_unref(child_bs);
1205 }
1206 
1207 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1208 {
1209     if (child == NULL) {
1210         return;
1211     }
1212 
1213     if (child->bs->inherits_from == parent) {
1214         child->bs->inherits_from = NULL;
1215     }
1216 
1217     bdrv_root_unref_child(child);
1218 }
1219 
1220 /*
1221  * Sets the backing file link of a BDS. A new reference is created; callers
1222  * which don't need their own reference any more must call bdrv_unref().
1223  */
1224 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1225 {
1226     if (backing_hd) {
1227         bdrv_ref(backing_hd);
1228     }
1229 
1230     if (bs->backing) {
1231         assert(bs->backing_blocker);
1232         bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1233         bdrv_unref_child(bs, bs->backing);
1234     } else if (backing_hd) {
1235         error_setg(&bs->backing_blocker,
1236                    "node is used as backing hd of '%s'",
1237                    bdrv_get_device_or_node_name(bs));
1238     }
1239 
1240     if (!backing_hd) {
1241         error_free(bs->backing_blocker);
1242         bs->backing_blocker = NULL;
1243         bs->backing = NULL;
1244         goto out;
1245     }
1246     bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
1247     bs->open_flags &= ~BDRV_O_NO_BACKING;
1248     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1249     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1250             backing_hd->drv ? backing_hd->drv->format_name : "");
1251 
1252     bdrv_op_block_all(backing_hd, bs->backing_blocker);
1253     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1254     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1255                     bs->backing_blocker);
1256 out:
1257     bdrv_refresh_limits(bs, NULL);
1258 }
1259 
1260 /*
1261  * Opens the backing file for a BlockDriverState if not yet open
1262  *
1263  * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1264  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1265  * itself, all options starting with "${bdref_key}." are considered part of the
1266  * BlockdevRef.
1267  *
1268  * TODO Can this be unified with bdrv_open_image()?
1269  */
1270 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1271                            const char *bdref_key, Error **errp)
1272 {
1273     char *backing_filename = g_malloc0(PATH_MAX);
1274     char *bdref_key_dot;
1275     const char *reference = NULL;
1276     int ret = 0;
1277     BlockDriverState *backing_hd;
1278     QDict *options;
1279     QDict *tmp_parent_options = NULL;
1280     Error *local_err = NULL;
1281 
1282     if (bs->backing != NULL) {
1283         goto free_exit;
1284     }
1285 
1286     /* NULL means an empty set of options */
1287     if (parent_options == NULL) {
1288         tmp_parent_options = qdict_new();
1289         parent_options = tmp_parent_options;
1290     }
1291 
1292     bs->open_flags &= ~BDRV_O_NO_BACKING;
1293 
1294     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1295     qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1296     g_free(bdref_key_dot);
1297 
1298     reference = qdict_get_try_str(parent_options, bdref_key);
1299     if (reference || qdict_haskey(options, "file.filename")) {
1300         backing_filename[0] = '\0';
1301     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1302         QDECREF(options);
1303         goto free_exit;
1304     } else {
1305         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1306                                        &local_err);
1307         if (local_err) {
1308             ret = -EINVAL;
1309             error_propagate(errp, local_err);
1310             QDECREF(options);
1311             goto free_exit;
1312         }
1313     }
1314 
1315     if (!bs->drv || !bs->drv->supports_backing) {
1316         ret = -EINVAL;
1317         error_setg(errp, "Driver doesn't support backing files");
1318         QDECREF(options);
1319         goto free_exit;
1320     }
1321 
1322     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1323         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1324     }
1325 
1326     backing_hd = NULL;
1327     ret = bdrv_open_inherit(&backing_hd,
1328                             *backing_filename ? backing_filename : NULL,
1329                             reference, options, 0, bs, &child_backing,
1330                             errp);
1331     if (ret < 0) {
1332         bs->open_flags |= BDRV_O_NO_BACKING;
1333         error_prepend(errp, "Could not open backing file: ");
1334         goto free_exit;
1335     }
1336 
1337     /* Hook up the backing file link; drop our reference, bs owns the
1338      * backing_hd reference now */
1339     bdrv_set_backing_hd(bs, backing_hd);
1340     bdrv_unref(backing_hd);
1341 
1342     qdict_del(parent_options, bdref_key);
1343 
1344 free_exit:
1345     g_free(backing_filename);
1346     QDECREF(tmp_parent_options);
1347     return ret;
1348 }
1349 
1350 /*
1351  * Opens a disk image whose options are given as BlockdevRef in another block
1352  * device's options.
1353  *
1354  * If allow_none is true, no image will be opened if filename is false and no
1355  * BlockdevRef is given. NULL will be returned, but errp remains unset.
1356  *
1357  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1358  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1359  * itself, all options starting with "${bdref_key}." are considered part of the
1360  * BlockdevRef.
1361  *
1362  * The BlockdevRef will be removed from the options QDict.
1363  */
1364 BdrvChild *bdrv_open_child(const char *filename,
1365                            QDict *options, const char *bdref_key,
1366                            BlockDriverState* parent,
1367                            const BdrvChildRole *child_role,
1368                            bool allow_none, Error **errp)
1369 {
1370     BdrvChild *c = NULL;
1371     BlockDriverState *bs;
1372     QDict *image_options;
1373     int ret;
1374     char *bdref_key_dot;
1375     const char *reference;
1376 
1377     assert(child_role != NULL);
1378 
1379     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1380     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1381     g_free(bdref_key_dot);
1382 
1383     reference = qdict_get_try_str(options, bdref_key);
1384     if (!filename && !reference && !qdict_size(image_options)) {
1385         if (!allow_none) {
1386             error_setg(errp, "A block device must be specified for \"%s\"",
1387                        bdref_key);
1388         }
1389         QDECREF(image_options);
1390         goto done;
1391     }
1392 
1393     bs = NULL;
1394     ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1395                             parent, child_role, errp);
1396     if (ret < 0) {
1397         goto done;
1398     }
1399 
1400     c = bdrv_attach_child(parent, bs, bdref_key, child_role);
1401 
1402 done:
1403     qdict_del(options, bdref_key);
1404     return c;
1405 }
1406 
1407 static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
1408                                      QDict *snapshot_options, Error **errp)
1409 {
1410     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1411     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1412     int64_t total_size;
1413     QemuOpts *opts = NULL;
1414     BlockDriverState *bs_snapshot;
1415     Error *local_err = NULL;
1416     int ret;
1417 
1418     /* if snapshot, we create a temporary backing file and open it
1419        instead of opening 'filename' directly */
1420 
1421     /* Get the required size from the image */
1422     total_size = bdrv_getlength(bs);
1423     if (total_size < 0) {
1424         ret = total_size;
1425         error_setg_errno(errp, -total_size, "Could not get image size");
1426         goto out;
1427     }
1428 
1429     /* Create the temporary image */
1430     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1431     if (ret < 0) {
1432         error_setg_errno(errp, -ret, "Could not get temporary filename");
1433         goto out;
1434     }
1435 
1436     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1437                             &error_abort);
1438     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1439     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
1440     qemu_opts_del(opts);
1441     if (ret < 0) {
1442         error_prepend(errp, "Could not create temporary overlay '%s': ",
1443                       tmp_filename);
1444         goto out;
1445     }
1446 
1447     /* Prepare options QDict for the temporary file */
1448     qdict_put(snapshot_options, "file.driver",
1449               qstring_from_str("file"));
1450     qdict_put(snapshot_options, "file.filename",
1451               qstring_from_str(tmp_filename));
1452     qdict_put(snapshot_options, "driver",
1453               qstring_from_str("qcow2"));
1454 
1455     bs_snapshot = bdrv_new();
1456 
1457     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1458                     flags, &local_err);
1459     snapshot_options = NULL;
1460     if (ret < 0) {
1461         error_propagate(errp, local_err);
1462         goto out;
1463     }
1464 
1465     bdrv_append(bs_snapshot, bs);
1466 
1467 out:
1468     QDECREF(snapshot_options);
1469     g_free(tmp_filename);
1470     return ret;
1471 }
1472 
1473 /*
1474  * Opens a disk image (raw, qcow2, vmdk, ...)
1475  *
1476  * options is a QDict of options to pass to the block drivers, or NULL for an
1477  * empty set of options. The reference to the QDict belongs to the block layer
1478  * after the call (even on failure), so if the caller intends to reuse the
1479  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1480  *
1481  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1482  * If it is not NULL, the referenced BDS will be reused.
1483  *
1484  * The reference parameter may be used to specify an existing block device which
1485  * should be opened. If specified, neither options nor a filename may be given,
1486  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1487  */
1488 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1489                              const char *reference, QDict *options, int flags,
1490                              BlockDriverState *parent,
1491                              const BdrvChildRole *child_role, Error **errp)
1492 {
1493     int ret;
1494     BdrvChild *file = NULL;
1495     BlockDriverState *bs;
1496     BlockDriver *drv = NULL;
1497     const char *drvname;
1498     const char *backing;
1499     Error *local_err = NULL;
1500     QDict *snapshot_options = NULL;
1501     int snapshot_flags = 0;
1502 
1503     assert(pbs);
1504     assert(!child_role || !flags);
1505     assert(!child_role == !parent);
1506 
1507     if (reference) {
1508         bool options_non_empty = options ? qdict_size(options) : false;
1509         QDECREF(options);
1510 
1511         if (*pbs) {
1512             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1513                        "another block device");
1514             return -EINVAL;
1515         }
1516 
1517         if (filename || options_non_empty) {
1518             error_setg(errp, "Cannot reference an existing block device with "
1519                        "additional options or a new filename");
1520             return -EINVAL;
1521         }
1522 
1523         bs = bdrv_lookup_bs(reference, reference, errp);
1524         if (!bs) {
1525             return -ENODEV;
1526         }
1527 
1528         if (bs->throttle_state) {
1529             error_setg(errp, "Cannot reference an existing block device for "
1530                        "which I/O throttling is enabled");
1531             return -EINVAL;
1532         }
1533 
1534         bdrv_ref(bs);
1535         *pbs = bs;
1536         return 0;
1537     }
1538 
1539     if (*pbs) {
1540         bs = *pbs;
1541     } else {
1542         bs = bdrv_new();
1543     }
1544 
1545     /* NULL means an empty set of options */
1546     if (options == NULL) {
1547         options = qdict_new();
1548     }
1549 
1550     /* json: syntax counts as explicit options, as if in the QDict */
1551     parse_json_protocol(options, &filename, &local_err);
1552     if (local_err) {
1553         ret = -EINVAL;
1554         goto fail;
1555     }
1556 
1557     bs->explicit_options = qdict_clone_shallow(options);
1558 
1559     if (child_role) {
1560         bs->inherits_from = parent;
1561         child_role->inherit_options(&flags, options,
1562                                     parent->open_flags, parent->options);
1563     }
1564 
1565     ret = bdrv_fill_options(&options, filename, &flags, &local_err);
1566     if (local_err) {
1567         goto fail;
1568     }
1569 
1570     bs->open_flags = flags;
1571     bs->options = options;
1572     options = qdict_clone_shallow(options);
1573 
1574     /* Find the right image format driver */
1575     drvname = qdict_get_try_str(options, "driver");
1576     if (drvname) {
1577         drv = bdrv_find_format(drvname);
1578         if (!drv) {
1579             error_setg(errp, "Unknown driver: '%s'", drvname);
1580             ret = -EINVAL;
1581             goto fail;
1582         }
1583     }
1584 
1585     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1586 
1587     backing = qdict_get_try_str(options, "backing");
1588     if (backing && *backing == '\0') {
1589         flags |= BDRV_O_NO_BACKING;
1590         qdict_del(options, "backing");
1591     }
1592 
1593     /* Open image file without format layer */
1594     if ((flags & BDRV_O_PROTOCOL) == 0) {
1595         if (flags & BDRV_O_RDWR) {
1596             flags |= BDRV_O_ALLOW_RDWR;
1597         }
1598         if (flags & BDRV_O_SNAPSHOT) {
1599             snapshot_options = qdict_new();
1600             bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
1601                                        flags, options);
1602             bdrv_backing_options(&flags, options, flags, options);
1603         }
1604 
1605         bs->open_flags = flags;
1606 
1607         file = bdrv_open_child(filename, options, "file", bs,
1608                                &child_file, true, &local_err);
1609         if (local_err) {
1610             ret = -EINVAL;
1611             goto fail;
1612         }
1613     }
1614 
1615     /* Image format probing */
1616     bs->probed = !drv;
1617     if (!drv && file) {
1618         ret = find_image_format(file->bs, filename, &drv, &local_err);
1619         if (ret < 0) {
1620             goto fail;
1621         }
1622         /*
1623          * This option update would logically belong in bdrv_fill_options(),
1624          * but we first need to open bs->file for the probing to work, while
1625          * opening bs->file already requires the (mostly) final set of options
1626          * so that cache mode etc. can be inherited.
1627          *
1628          * Adding the driver later is somewhat ugly, but it's not an option
1629          * that would ever be inherited, so it's correct. We just need to make
1630          * sure to update both bs->options (which has the full effective
1631          * options for bs) and options (which has file.* already removed).
1632          */
1633         qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1634         qdict_put(options, "driver", qstring_from_str(drv->format_name));
1635     } else if (!drv) {
1636         error_setg(errp, "Must specify either driver or file");
1637         ret = -EINVAL;
1638         goto fail;
1639     }
1640 
1641     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1642     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1643     /* file must be NULL if a protocol BDS is about to be created
1644      * (the inverse results in an error message from bdrv_open_common()) */
1645     assert(!(flags & BDRV_O_PROTOCOL) || !file);
1646 
1647     /* Open the image */
1648     ret = bdrv_open_common(bs, file, options, &local_err);
1649     if (ret < 0) {
1650         goto fail;
1651     }
1652 
1653     if (file && (bs->file != file)) {
1654         bdrv_unref_child(bs, file);
1655         file = NULL;
1656     }
1657 
1658     /* If there is a backing file, use it */
1659     if ((flags & BDRV_O_NO_BACKING) == 0) {
1660         ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
1661         if (ret < 0) {
1662             goto close_and_fail;
1663         }
1664     }
1665 
1666     bdrv_refresh_filename(bs);
1667 
1668     /* Check if any unknown options were used */
1669     if (options && (qdict_size(options) != 0)) {
1670         const QDictEntry *entry = qdict_first(options);
1671         if (flags & BDRV_O_PROTOCOL) {
1672             error_setg(errp, "Block protocol '%s' doesn't support the option "
1673                        "'%s'", drv->format_name, entry->key);
1674         } else {
1675             error_setg(errp,
1676                        "Block format '%s' does not support the option '%s'",
1677                        drv->format_name, entry->key);
1678         }
1679 
1680         ret = -EINVAL;
1681         goto close_and_fail;
1682     }
1683 
1684     if (!bdrv_key_required(bs)) {
1685         if (bs->blk) {
1686             blk_dev_change_media_cb(bs->blk, true);
1687         }
1688     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1689                && !runstate_check(RUN_STATE_INMIGRATE)
1690                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1691         error_setg(errp,
1692                    "Guest must be stopped for opening of encrypted image");
1693         ret = -EBUSY;
1694         goto close_and_fail;
1695     }
1696 
1697     QDECREF(options);
1698     *pbs = bs;
1699 
1700     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1701      * temporary snapshot afterwards. */
1702     if (snapshot_flags) {
1703         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
1704                                         &local_err);
1705         snapshot_options = NULL;
1706         if (local_err) {
1707             goto close_and_fail;
1708         }
1709     }
1710 
1711     return 0;
1712 
1713 fail:
1714     if (file != NULL) {
1715         bdrv_unref_child(bs, file);
1716     }
1717     QDECREF(snapshot_options);
1718     QDECREF(bs->explicit_options);
1719     QDECREF(bs->options);
1720     QDECREF(options);
1721     bs->options = NULL;
1722     if (!*pbs) {
1723         /* If *pbs is NULL, a new BDS has been created in this function and
1724            needs to be freed now. Otherwise, it does not need to be closed,
1725            since it has not really been opened yet. */
1726         bdrv_unref(bs);
1727     }
1728     if (local_err) {
1729         error_propagate(errp, local_err);
1730     }
1731     return ret;
1732 
1733 close_and_fail:
1734     /* See fail path, but now the BDS has to be always closed */
1735     if (*pbs) {
1736         bdrv_close(bs);
1737     } else {
1738         bdrv_unref(bs);
1739     }
1740     QDECREF(snapshot_options);
1741     QDECREF(options);
1742     if (local_err) {
1743         error_propagate(errp, local_err);
1744     }
1745     return ret;
1746 }
1747 
1748 int bdrv_open(BlockDriverState **pbs, const char *filename,
1749               const char *reference, QDict *options, int flags, Error **errp)
1750 {
1751     return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1752                              NULL, errp);
1753 }
1754 
1755 typedef struct BlockReopenQueueEntry {
1756      bool prepared;
1757      BDRVReopenState state;
1758      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1759 } BlockReopenQueueEntry;
1760 
1761 /*
1762  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1763  * reopen of multiple devices.
1764  *
1765  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1766  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1767  * be created and initialized. This newly created BlockReopenQueue should be
1768  * passed back in for subsequent calls that are intended to be of the same
1769  * atomic 'set'.
1770  *
1771  * bs is the BlockDriverState to add to the reopen queue.
1772  *
1773  * options contains the changed options for the associated bs
1774  * (the BlockReopenQueue takes ownership)
1775  *
1776  * flags contains the open flags for the associated bs
1777  *
1778  * returns a pointer to bs_queue, which is either the newly allocated
1779  * bs_queue, or the existing bs_queue being used.
1780  *
1781  */
1782 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1783                                                  BlockDriverState *bs,
1784                                                  QDict *options,
1785                                                  int flags,
1786                                                  const BdrvChildRole *role,
1787                                                  QDict *parent_options,
1788                                                  int parent_flags)
1789 {
1790     assert(bs != NULL);
1791 
1792     BlockReopenQueueEntry *bs_entry;
1793     BdrvChild *child;
1794     QDict *old_options, *explicit_options;
1795 
1796     if (bs_queue == NULL) {
1797         bs_queue = g_new0(BlockReopenQueue, 1);
1798         QSIMPLEQ_INIT(bs_queue);
1799     }
1800 
1801     if (!options) {
1802         options = qdict_new();
1803     }
1804 
1805     /*
1806      * Precedence of options:
1807      * 1. Explicitly passed in options (highest)
1808      * 2. Set in flags (only for top level)
1809      * 3. Retained from explicitly set options of bs
1810      * 4. Inherited from parent node
1811      * 5. Retained from effective options of bs
1812      */
1813 
1814     if (!parent_options) {
1815         /*
1816          * Any setting represented by flags is always updated. If the
1817          * corresponding QDict option is set, it takes precedence. Otherwise
1818          * the flag is translated into a QDict option. The old setting of bs is
1819          * not considered.
1820          */
1821         update_options_from_flags(options, flags);
1822     }
1823 
1824     /* Old explicitly set values (don't overwrite by inherited value) */
1825     old_options = qdict_clone_shallow(bs->explicit_options);
1826     bdrv_join_options(bs, options, old_options);
1827     QDECREF(old_options);
1828 
1829     explicit_options = qdict_clone_shallow(options);
1830 
1831     /* Inherit from parent node */
1832     if (parent_options) {
1833         assert(!flags);
1834         role->inherit_options(&flags, options, parent_flags, parent_options);
1835     }
1836 
1837     /* Old values are used for options that aren't set yet */
1838     old_options = qdict_clone_shallow(bs->options);
1839     bdrv_join_options(bs, options, old_options);
1840     QDECREF(old_options);
1841 
1842     /* bdrv_open() masks this flag out */
1843     flags &= ~BDRV_O_PROTOCOL;
1844 
1845     QLIST_FOREACH(child, &bs->children, next) {
1846         QDict *new_child_options;
1847         char *child_key_dot;
1848 
1849         /* reopen can only change the options of block devices that were
1850          * implicitly created and inherited options. For other (referenced)
1851          * block devices, a syntax like "backing.foo" results in an error. */
1852         if (child->bs->inherits_from != bs) {
1853             continue;
1854         }
1855 
1856         child_key_dot = g_strdup_printf("%s.", child->name);
1857         qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1858         g_free(child_key_dot);
1859 
1860         bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1861                                 child->role, options, flags);
1862     }
1863 
1864     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1865     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1866 
1867     bs_entry->state.bs = bs;
1868     bs_entry->state.options = options;
1869     bs_entry->state.explicit_options = explicit_options;
1870     bs_entry->state.flags = flags;
1871 
1872     return bs_queue;
1873 }
1874 
1875 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1876                                     BlockDriverState *bs,
1877                                     QDict *options, int flags)
1878 {
1879     return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1880                                    NULL, NULL, 0);
1881 }
1882 
1883 /*
1884  * Reopen multiple BlockDriverStates atomically & transactionally.
1885  *
1886  * The queue passed in (bs_queue) must have been built up previous
1887  * via bdrv_reopen_queue().
1888  *
1889  * Reopens all BDS specified in the queue, with the appropriate
1890  * flags.  All devices are prepared for reopen, and failure of any
1891  * device will cause all device changes to be abandonded, and intermediate
1892  * data cleaned up.
1893  *
1894  * If all devices prepare successfully, then the changes are committed
1895  * to all devices.
1896  *
1897  */
1898 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1899 {
1900     int ret = -1;
1901     BlockReopenQueueEntry *bs_entry, *next;
1902     Error *local_err = NULL;
1903 
1904     assert(bs_queue != NULL);
1905 
1906     bdrv_drain_all();
1907 
1908     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1909         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1910             error_propagate(errp, local_err);
1911             goto cleanup;
1912         }
1913         bs_entry->prepared = true;
1914     }
1915 
1916     /* If we reach this point, we have success and just need to apply the
1917      * changes
1918      */
1919     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1920         bdrv_reopen_commit(&bs_entry->state);
1921     }
1922 
1923     ret = 0;
1924 
1925 cleanup:
1926     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1927         if (ret && bs_entry->prepared) {
1928             bdrv_reopen_abort(&bs_entry->state);
1929         } else if (ret) {
1930             QDECREF(bs_entry->state.explicit_options);
1931         }
1932         QDECREF(bs_entry->state.options);
1933         g_free(bs_entry);
1934     }
1935     g_free(bs_queue);
1936     return ret;
1937 }
1938 
1939 
1940 /* Reopen a single BlockDriverState with the specified flags. */
1941 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1942 {
1943     int ret = -1;
1944     Error *local_err = NULL;
1945     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1946 
1947     ret = bdrv_reopen_multiple(queue, &local_err);
1948     if (local_err != NULL) {
1949         error_propagate(errp, local_err);
1950     }
1951     return ret;
1952 }
1953 
1954 
1955 /*
1956  * Prepares a BlockDriverState for reopen. All changes are staged in the
1957  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1958  * the block driver layer .bdrv_reopen_prepare()
1959  *
1960  * bs is the BlockDriverState to reopen
1961  * flags are the new open flags
1962  * queue is the reopen queue
1963  *
1964  * Returns 0 on success, non-zero on error.  On error errp will be set
1965  * as well.
1966  *
1967  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1968  * It is the responsibility of the caller to then call the abort() or
1969  * commit() for any other BDS that have been left in a prepare() state
1970  *
1971  */
1972 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1973                         Error **errp)
1974 {
1975     int ret = -1;
1976     Error *local_err = NULL;
1977     BlockDriver *drv;
1978     QemuOpts *opts;
1979     const char *value;
1980 
1981     assert(reopen_state != NULL);
1982     assert(reopen_state->bs->drv != NULL);
1983     drv = reopen_state->bs->drv;
1984 
1985     /* Process generic block layer options */
1986     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1987     qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
1988     if (local_err) {
1989         error_propagate(errp, local_err);
1990         ret = -EINVAL;
1991         goto error;
1992     }
1993 
1994     update_flags_from_options(&reopen_state->flags, opts);
1995 
1996     /* node-name and driver must be unchanged. Put them back into the QDict, so
1997      * that they are checked at the end of this function. */
1998     value = qemu_opt_get(opts, "node-name");
1999     if (value) {
2000         qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
2001     }
2002 
2003     value = qemu_opt_get(opts, "driver");
2004     if (value) {
2005         qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2006     }
2007 
2008     /* if we are to stay read-only, do not allow permission change
2009      * to r/w */
2010     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2011         reopen_state->flags & BDRV_O_RDWR) {
2012         error_setg(errp, "Node '%s' is read only",
2013                    bdrv_get_device_or_node_name(reopen_state->bs));
2014         goto error;
2015     }
2016 
2017 
2018     ret = bdrv_flush(reopen_state->bs);
2019     if (ret) {
2020         error_setg_errno(errp, -ret, "Error flushing drive");
2021         goto error;
2022     }
2023 
2024     if (drv->bdrv_reopen_prepare) {
2025         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2026         if (ret) {
2027             if (local_err != NULL) {
2028                 error_propagate(errp, local_err);
2029             } else {
2030                 error_setg(errp, "failed while preparing to reopen image '%s'",
2031                            reopen_state->bs->filename);
2032             }
2033             goto error;
2034         }
2035     } else {
2036         /* It is currently mandatory to have a bdrv_reopen_prepare()
2037          * handler for each supported drv. */
2038         error_setg(errp, "Block format '%s' used by node '%s' "
2039                    "does not support reopening files", drv->format_name,
2040                    bdrv_get_device_or_node_name(reopen_state->bs));
2041         ret = -1;
2042         goto error;
2043     }
2044 
2045     /* Options that are not handled are only okay if they are unchanged
2046      * compared to the old state. It is expected that some options are only
2047      * used for the initial open, but not reopen (e.g. filename) */
2048     if (qdict_size(reopen_state->options)) {
2049         const QDictEntry *entry = qdict_first(reopen_state->options);
2050 
2051         do {
2052             QString *new_obj = qobject_to_qstring(entry->value);
2053             const char *new = qstring_get_str(new_obj);
2054             const char *old = qdict_get_try_str(reopen_state->bs->options,
2055                                                 entry->key);
2056 
2057             if (!old || strcmp(new, old)) {
2058                 error_setg(errp, "Cannot change the option '%s'", entry->key);
2059                 ret = -EINVAL;
2060                 goto error;
2061             }
2062         } while ((entry = qdict_next(reopen_state->options, entry)));
2063     }
2064 
2065     ret = 0;
2066 
2067 error:
2068     qemu_opts_del(opts);
2069     return ret;
2070 }
2071 
2072 /*
2073  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2074  * makes them final by swapping the staging BlockDriverState contents into
2075  * the active BlockDriverState contents.
2076  */
2077 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2078 {
2079     BlockDriver *drv;
2080 
2081     assert(reopen_state != NULL);
2082     drv = reopen_state->bs->drv;
2083     assert(drv != NULL);
2084 
2085     /* If there are any driver level actions to take */
2086     if (drv->bdrv_reopen_commit) {
2087         drv->bdrv_reopen_commit(reopen_state);
2088     }
2089 
2090     /* set BDS specific flags now */
2091     QDECREF(reopen_state->bs->explicit_options);
2092 
2093     reopen_state->bs->explicit_options   = reopen_state->explicit_options;
2094     reopen_state->bs->open_flags         = reopen_state->flags;
2095     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
2096 
2097     bdrv_refresh_limits(reopen_state->bs, NULL);
2098 }
2099 
2100 /*
2101  * Abort the reopen, and delete and free the staged changes in
2102  * reopen_state
2103  */
2104 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2105 {
2106     BlockDriver *drv;
2107 
2108     assert(reopen_state != NULL);
2109     drv = reopen_state->bs->drv;
2110     assert(drv != NULL);
2111 
2112     if (drv->bdrv_reopen_abort) {
2113         drv->bdrv_reopen_abort(reopen_state);
2114     }
2115 
2116     QDECREF(reopen_state->explicit_options);
2117 }
2118 
2119 
2120 static void bdrv_close(BlockDriverState *bs)
2121 {
2122     BdrvAioNotifier *ban, *ban_next;
2123 
2124     assert(!bs->job);
2125 
2126     /* Disable I/O limits and drain all pending throttled requests */
2127     if (bs->throttle_state) {
2128         bdrv_io_limits_disable(bs);
2129     }
2130 
2131     bdrv_drained_begin(bs); /* complete I/O */
2132     bdrv_flush(bs);
2133     bdrv_drain(bs); /* in case flush left pending I/O */
2134 
2135     bdrv_release_named_dirty_bitmaps(bs);
2136     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2137 
2138     if (bs->blk) {
2139         blk_dev_change_media_cb(bs->blk, false);
2140     }
2141 
2142     if (bs->drv) {
2143         BdrvChild *child, *next;
2144 
2145         bs->drv->bdrv_close(bs);
2146         bs->drv = NULL;
2147 
2148         bdrv_set_backing_hd(bs, NULL);
2149 
2150         if (bs->file != NULL) {
2151             bdrv_unref_child(bs, bs->file);
2152             bs->file = NULL;
2153         }
2154 
2155         QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
2156             /* TODO Remove bdrv_unref() from drivers' close function and use
2157              * bdrv_unref_child() here */
2158             if (child->bs->inherits_from == bs) {
2159                 child->bs->inherits_from = NULL;
2160             }
2161             bdrv_detach_child(child);
2162         }
2163 
2164         g_free(bs->opaque);
2165         bs->opaque = NULL;
2166         bs->copy_on_read = 0;
2167         bs->backing_file[0] = '\0';
2168         bs->backing_format[0] = '\0';
2169         bs->total_sectors = 0;
2170         bs->encrypted = 0;
2171         bs->valid_key = 0;
2172         bs->sg = 0;
2173         bs->zero_beyond_eof = false;
2174         QDECREF(bs->options);
2175         QDECREF(bs->explicit_options);
2176         bs->options = NULL;
2177         QDECREF(bs->full_open_options);
2178         bs->full_open_options = NULL;
2179     }
2180 
2181     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2182         g_free(ban);
2183     }
2184     QLIST_INIT(&bs->aio_notifiers);
2185     bdrv_drained_end(bs);
2186 }
2187 
2188 void bdrv_close_all(void)
2189 {
2190     BlockDriverState *bs;
2191     AioContext *aio_context;
2192 
2193     /* Drop references from requests still in flight, such as canceled block
2194      * jobs whose AIO context has not been polled yet */
2195     bdrv_drain_all();
2196 
2197     blk_remove_all_bs();
2198     blockdev_close_all_bdrv_states();
2199 
2200     /* Cancel all block jobs */
2201     while (!QTAILQ_EMPTY(&all_bdrv_states)) {
2202         QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
2203             aio_context = bdrv_get_aio_context(bs);
2204 
2205             aio_context_acquire(aio_context);
2206             if (bs->job) {
2207                 block_job_cancel_sync(bs->job);
2208                 aio_context_release(aio_context);
2209                 break;
2210             }
2211             aio_context_release(aio_context);
2212         }
2213 
2214         /* All the remaining BlockDriverStates are referenced directly or
2215          * indirectly from block jobs, so there needs to be at least one BDS
2216          * directly used by a block job */
2217         assert(bs);
2218     }
2219 }
2220 
2221 /* Fields that need to stay with the top-level BDS */
2222 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2223                                      BlockDriverState *bs_src)
2224 {
2225     /* move some fields that need to stay attached to the device */
2226 }
2227 
2228 static void change_parent_backing_link(BlockDriverState *from,
2229                                        BlockDriverState *to)
2230 {
2231     BdrvChild *c, *next;
2232 
2233     if (from->blk) {
2234         /* FIXME We bypass blk_set_bs(), so we need to make these updates
2235          * manually. The root problem is not in this change function, but the
2236          * existence of BlockDriverState.blk. */
2237         to->blk = from->blk;
2238         from->blk = NULL;
2239     }
2240 
2241     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2242         assert(c->role != &child_backing);
2243         c->bs = to;
2244         QLIST_REMOVE(c, next_parent);
2245         QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2246         bdrv_ref(to);
2247         bdrv_unref(from);
2248     }
2249 }
2250 
2251 static void swap_feature_fields(BlockDriverState *bs_top,
2252                                 BlockDriverState *bs_new)
2253 {
2254     BlockDriverState tmp;
2255 
2256     bdrv_move_feature_fields(&tmp, bs_top);
2257     bdrv_move_feature_fields(bs_top, bs_new);
2258     bdrv_move_feature_fields(bs_new, &tmp);
2259 
2260     assert(!bs_new->throttle_state);
2261     if (bs_top->throttle_state) {
2262         bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2263         bdrv_io_limits_disable(bs_top);
2264     }
2265 }
2266 
2267 /*
2268  * Add new bs contents at the top of an image chain while the chain is
2269  * live, while keeping required fields on the top layer.
2270  *
2271  * This will modify the BlockDriverState fields, and swap contents
2272  * between bs_new and bs_top. Both bs_new and bs_top are modified.
2273  *
2274  * bs_new must not be attached to a BlockBackend.
2275  *
2276  * This function does not create any image files.
2277  *
2278  * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2279  * that's what the callers commonly need. bs_new will be referenced by the old
2280  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2281  * reference of its own, it must call bdrv_ref().
2282  */
2283 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2284 {
2285     assert(!bdrv_requests_pending(bs_top));
2286     assert(!bdrv_requests_pending(bs_new));
2287 
2288     bdrv_ref(bs_top);
2289     change_parent_backing_link(bs_top, bs_new);
2290 
2291     /* Some fields always stay on top of the backing file chain */
2292     swap_feature_fields(bs_top, bs_new);
2293 
2294     bdrv_set_backing_hd(bs_new, bs_top);
2295     bdrv_unref(bs_top);
2296 
2297     /* bs_new is now referenced by its new parents, we don't need the
2298      * additional reference any more. */
2299     bdrv_unref(bs_new);
2300 }
2301 
2302 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2303 {
2304     assert(!bdrv_requests_pending(old));
2305     assert(!bdrv_requests_pending(new));
2306 
2307     bdrv_ref(old);
2308 
2309     if (old->blk) {
2310         /* As long as these fields aren't in BlockBackend, but in the top-level
2311          * BlockDriverState, it's not possible for a BDS to have two BBs.
2312          *
2313          * We really want to copy the fields from old to new, but we go for a
2314          * swap instead so that pointers aren't duplicated and cause trouble.
2315          * (Also, bdrv_swap() used to do the same.) */
2316         assert(!new->blk);
2317         swap_feature_fields(old, new);
2318     }
2319     change_parent_backing_link(old, new);
2320 
2321     /* Change backing files if a previously independent node is added to the
2322      * chain. For active commit, we replace top by its own (indirect) backing
2323      * file and don't do anything here so we don't build a loop. */
2324     if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2325         bdrv_set_backing_hd(new, backing_bs(old));
2326         bdrv_set_backing_hd(old, NULL);
2327     }
2328 
2329     bdrv_unref(old);
2330 }
2331 
2332 static void bdrv_delete(BlockDriverState *bs)
2333 {
2334     assert(!bs->job);
2335     assert(bdrv_op_blocker_is_empty(bs));
2336     assert(!bs->refcnt);
2337 
2338     bdrv_close(bs);
2339 
2340     /* remove from list, if necessary */
2341     if (bs->node_name[0] != '\0') {
2342         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2343     }
2344     QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
2345 
2346     g_free(bs);
2347 }
2348 
2349 /*
2350  * Run consistency checks on an image
2351  *
2352  * Returns 0 if the check could be completed (it doesn't mean that the image is
2353  * free of errors) or -errno when an internal error occurred. The results of the
2354  * check are stored in res.
2355  */
2356 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2357 {
2358     if (bs->drv == NULL) {
2359         return -ENOMEDIUM;
2360     }
2361     if (bs->drv->bdrv_check == NULL) {
2362         return -ENOTSUP;
2363     }
2364 
2365     memset(res, 0, sizeof(*res));
2366     return bs->drv->bdrv_check(bs, res, fix);
2367 }
2368 
2369 #define COMMIT_BUF_SECTORS 2048
2370 
2371 /* commit COW file into the raw image */
2372 int bdrv_commit(BlockDriverState *bs)
2373 {
2374     BlockDriver *drv = bs->drv;
2375     int64_t sector, total_sectors, length, backing_length;
2376     int n, ro, open_flags;
2377     int ret = 0;
2378     uint8_t *buf = NULL;
2379 
2380     if (!drv)
2381         return -ENOMEDIUM;
2382 
2383     if (!bs->backing) {
2384         return -ENOTSUP;
2385     }
2386 
2387     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2388         bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2389         return -EBUSY;
2390     }
2391 
2392     ro = bs->backing->bs->read_only;
2393     open_flags =  bs->backing->bs->open_flags;
2394 
2395     if (ro) {
2396         if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2397             return -EACCES;
2398         }
2399     }
2400 
2401     length = bdrv_getlength(bs);
2402     if (length < 0) {
2403         ret = length;
2404         goto ro_cleanup;
2405     }
2406 
2407     backing_length = bdrv_getlength(bs->backing->bs);
2408     if (backing_length < 0) {
2409         ret = backing_length;
2410         goto ro_cleanup;
2411     }
2412 
2413     /* If our top snapshot is larger than the backing file image,
2414      * grow the backing file image if possible.  If not possible,
2415      * we must return an error */
2416     if (length > backing_length) {
2417         ret = bdrv_truncate(bs->backing->bs, length);
2418         if (ret < 0) {
2419             goto ro_cleanup;
2420         }
2421     }
2422 
2423     total_sectors = length >> BDRV_SECTOR_BITS;
2424 
2425     /* qemu_try_blockalign() for bs will choose an alignment that works for
2426      * bs->backing->bs as well, so no need to compare the alignment manually. */
2427     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2428     if (buf == NULL) {
2429         ret = -ENOMEM;
2430         goto ro_cleanup;
2431     }
2432 
2433     for (sector = 0; sector < total_sectors; sector += n) {
2434         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2435         if (ret < 0) {
2436             goto ro_cleanup;
2437         }
2438         if (ret) {
2439             ret = bdrv_read(bs, sector, buf, n);
2440             if (ret < 0) {
2441                 goto ro_cleanup;
2442             }
2443 
2444             ret = bdrv_write(bs->backing->bs, sector, buf, n);
2445             if (ret < 0) {
2446                 goto ro_cleanup;
2447             }
2448         }
2449     }
2450 
2451     if (drv->bdrv_make_empty) {
2452         ret = drv->bdrv_make_empty(bs);
2453         if (ret < 0) {
2454             goto ro_cleanup;
2455         }
2456         bdrv_flush(bs);
2457     }
2458 
2459     /*
2460      * Make sure all data we wrote to the backing device is actually
2461      * stable on disk.
2462      */
2463     if (bs->backing) {
2464         bdrv_flush(bs->backing->bs);
2465     }
2466 
2467     ret = 0;
2468 ro_cleanup:
2469     qemu_vfree(buf);
2470 
2471     if (ro) {
2472         /* ignoring error return here */
2473         bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2474     }
2475 
2476     return ret;
2477 }
2478 
2479 /*
2480  * Return values:
2481  * 0        - success
2482  * -EINVAL  - backing format specified, but no file
2483  * -ENOSPC  - can't update the backing file because no space is left in the
2484  *            image file header
2485  * -ENOTSUP - format driver doesn't support changing the backing file
2486  */
2487 int bdrv_change_backing_file(BlockDriverState *bs,
2488     const char *backing_file, const char *backing_fmt)
2489 {
2490     BlockDriver *drv = bs->drv;
2491     int ret;
2492 
2493     /* Backing file format doesn't make sense without a backing file */
2494     if (backing_fmt && !backing_file) {
2495         return -EINVAL;
2496     }
2497 
2498     if (drv->bdrv_change_backing_file != NULL) {
2499         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2500     } else {
2501         ret = -ENOTSUP;
2502     }
2503 
2504     if (ret == 0) {
2505         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2506         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2507     }
2508     return ret;
2509 }
2510 
2511 /*
2512  * Finds the image layer in the chain that has 'bs' as its backing file.
2513  *
2514  * active is the current topmost image.
2515  *
2516  * Returns NULL if bs is not found in active's image chain,
2517  * or if active == bs.
2518  *
2519  * Returns the bottommost base image if bs == NULL.
2520  */
2521 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2522                                     BlockDriverState *bs)
2523 {
2524     while (active && bs != backing_bs(active)) {
2525         active = backing_bs(active);
2526     }
2527 
2528     return active;
2529 }
2530 
2531 /* Given a BDS, searches for the base layer. */
2532 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2533 {
2534     return bdrv_find_overlay(bs, NULL);
2535 }
2536 
2537 /*
2538  * Drops images above 'base' up to and including 'top', and sets the image
2539  * above 'top' to have base as its backing file.
2540  *
2541  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2542  * information in 'bs' can be properly updated.
2543  *
2544  * E.g., this will convert the following chain:
2545  * bottom <- base <- intermediate <- top <- active
2546  *
2547  * to
2548  *
2549  * bottom <- base <- active
2550  *
2551  * It is allowed for bottom==base, in which case it converts:
2552  *
2553  * base <- intermediate <- top <- active
2554  *
2555  * to
2556  *
2557  * base <- active
2558  *
2559  * If backing_file_str is non-NULL, it will be used when modifying top's
2560  * overlay image metadata.
2561  *
2562  * Error conditions:
2563  *  if active == top, that is considered an error
2564  *
2565  */
2566 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2567                            BlockDriverState *base, const char *backing_file_str)
2568 {
2569     BlockDriverState *new_top_bs = NULL;
2570     int ret = -EIO;
2571 
2572     if (!top->drv || !base->drv) {
2573         goto exit;
2574     }
2575 
2576     new_top_bs = bdrv_find_overlay(active, top);
2577 
2578     if (new_top_bs == NULL) {
2579         /* we could not find the image above 'top', this is an error */
2580         goto exit;
2581     }
2582 
2583     /* special case of new_top_bs->backing->bs already pointing to base - nothing
2584      * to do, no intermediate images */
2585     if (backing_bs(new_top_bs) == base) {
2586         ret = 0;
2587         goto exit;
2588     }
2589 
2590     /* Make sure that base is in the backing chain of top */
2591     if (!bdrv_chain_contains(top, base)) {
2592         goto exit;
2593     }
2594 
2595     /* success - we can delete the intermediate states, and link top->base */
2596     backing_file_str = backing_file_str ? backing_file_str : base->filename;
2597     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2598                                    base->drv ? base->drv->format_name : "");
2599     if (ret) {
2600         goto exit;
2601     }
2602     bdrv_set_backing_hd(new_top_bs, base);
2603 
2604     ret = 0;
2605 exit:
2606     return ret;
2607 }
2608 
2609 /**
2610  * Truncate file to 'offset' bytes (needed only for file protocols)
2611  */
2612 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2613 {
2614     BlockDriver *drv = bs->drv;
2615     int ret;
2616     if (!drv)
2617         return -ENOMEDIUM;
2618     if (!drv->bdrv_truncate)
2619         return -ENOTSUP;
2620     if (bs->read_only)
2621         return -EACCES;
2622 
2623     ret = drv->bdrv_truncate(bs, offset);
2624     if (ret == 0) {
2625         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2626         bdrv_dirty_bitmap_truncate(bs);
2627         if (bs->blk) {
2628             blk_dev_resize_cb(bs->blk);
2629         }
2630     }
2631     return ret;
2632 }
2633 
2634 /**
2635  * Length of a allocated file in bytes. Sparse files are counted by actual
2636  * allocated space. Return < 0 if error or unknown.
2637  */
2638 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2639 {
2640     BlockDriver *drv = bs->drv;
2641     if (!drv) {
2642         return -ENOMEDIUM;
2643     }
2644     if (drv->bdrv_get_allocated_file_size) {
2645         return drv->bdrv_get_allocated_file_size(bs);
2646     }
2647     if (bs->file) {
2648         return bdrv_get_allocated_file_size(bs->file->bs);
2649     }
2650     return -ENOTSUP;
2651 }
2652 
2653 /**
2654  * Return number of sectors on success, -errno on error.
2655  */
2656 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2657 {
2658     BlockDriver *drv = bs->drv;
2659 
2660     if (!drv)
2661         return -ENOMEDIUM;
2662 
2663     if (drv->has_variable_length) {
2664         int ret = refresh_total_sectors(bs, bs->total_sectors);
2665         if (ret < 0) {
2666             return ret;
2667         }
2668     }
2669     return bs->total_sectors;
2670 }
2671 
2672 /**
2673  * Return length in bytes on success, -errno on error.
2674  * The length is always a multiple of BDRV_SECTOR_SIZE.
2675  */
2676 int64_t bdrv_getlength(BlockDriverState *bs)
2677 {
2678     int64_t ret = bdrv_nb_sectors(bs);
2679 
2680     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2681     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2682 }
2683 
2684 /* return 0 as number of sectors if no device present or error */
2685 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2686 {
2687     int64_t nb_sectors = bdrv_nb_sectors(bs);
2688 
2689     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2690 }
2691 
2692 int bdrv_is_read_only(BlockDriverState *bs)
2693 {
2694     return bs->read_only;
2695 }
2696 
2697 int bdrv_is_sg(BlockDriverState *bs)
2698 {
2699     return bs->sg;
2700 }
2701 
2702 int bdrv_is_encrypted(BlockDriverState *bs)
2703 {
2704     if (bs->backing && bs->backing->bs->encrypted) {
2705         return 1;
2706     }
2707     return bs->encrypted;
2708 }
2709 
2710 int bdrv_key_required(BlockDriverState *bs)
2711 {
2712     BdrvChild *backing = bs->backing;
2713 
2714     if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2715         return 1;
2716     }
2717     return (bs->encrypted && !bs->valid_key);
2718 }
2719 
2720 int bdrv_set_key(BlockDriverState *bs, const char *key)
2721 {
2722     int ret;
2723     if (bs->backing && bs->backing->bs->encrypted) {
2724         ret = bdrv_set_key(bs->backing->bs, key);
2725         if (ret < 0)
2726             return ret;
2727         if (!bs->encrypted)
2728             return 0;
2729     }
2730     if (!bs->encrypted) {
2731         return -EINVAL;
2732     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2733         return -ENOMEDIUM;
2734     }
2735     ret = bs->drv->bdrv_set_key(bs, key);
2736     if (ret < 0) {
2737         bs->valid_key = 0;
2738     } else if (!bs->valid_key) {
2739         bs->valid_key = 1;
2740         if (bs->blk) {
2741             /* call the change callback now, we skipped it on open */
2742             blk_dev_change_media_cb(bs->blk, true);
2743         }
2744     }
2745     return ret;
2746 }
2747 
2748 /*
2749  * Provide an encryption key for @bs.
2750  * If @key is non-null:
2751  *     If @bs is not encrypted, fail.
2752  *     Else if the key is invalid, fail.
2753  *     Else set @bs's key to @key, replacing the existing key, if any.
2754  * If @key is null:
2755  *     If @bs is encrypted and still lacks a key, fail.
2756  *     Else do nothing.
2757  * On failure, store an error object through @errp if non-null.
2758  */
2759 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2760 {
2761     if (key) {
2762         if (!bdrv_is_encrypted(bs)) {
2763             error_setg(errp, "Node '%s' is not encrypted",
2764                       bdrv_get_device_or_node_name(bs));
2765         } else if (bdrv_set_key(bs, key) < 0) {
2766             error_setg(errp, QERR_INVALID_PASSWORD);
2767         }
2768     } else {
2769         if (bdrv_key_required(bs)) {
2770             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2771                       "'%s' (%s) is encrypted",
2772                       bdrv_get_device_or_node_name(bs),
2773                       bdrv_get_encrypted_filename(bs));
2774         }
2775     }
2776 }
2777 
2778 const char *bdrv_get_format_name(BlockDriverState *bs)
2779 {
2780     return bs->drv ? bs->drv->format_name : NULL;
2781 }
2782 
2783 static int qsort_strcmp(const void *a, const void *b)
2784 {
2785     return strcmp(a, b);
2786 }
2787 
2788 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2789                          void *opaque)
2790 {
2791     BlockDriver *drv;
2792     int count = 0;
2793     int i;
2794     const char **formats = NULL;
2795 
2796     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2797         if (drv->format_name) {
2798             bool found = false;
2799             int i = count;
2800             while (formats && i && !found) {
2801                 found = !strcmp(formats[--i], drv->format_name);
2802             }
2803 
2804             if (!found) {
2805                 formats = g_renew(const char *, formats, count + 1);
2806                 formats[count++] = drv->format_name;
2807             }
2808         }
2809     }
2810 
2811     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2812 
2813     for (i = 0; i < count; i++) {
2814         it(opaque, formats[i]);
2815     }
2816 
2817     g_free(formats);
2818 }
2819 
2820 /* This function is to find a node in the bs graph */
2821 BlockDriverState *bdrv_find_node(const char *node_name)
2822 {
2823     BlockDriverState *bs;
2824 
2825     assert(node_name);
2826 
2827     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2828         if (!strcmp(node_name, bs->node_name)) {
2829             return bs;
2830         }
2831     }
2832     return NULL;
2833 }
2834 
2835 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2836 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2837 {
2838     BlockDeviceInfoList *list, *entry;
2839     BlockDriverState *bs;
2840 
2841     list = NULL;
2842     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2843         BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, errp);
2844         if (!info) {
2845             qapi_free_BlockDeviceInfoList(list);
2846             return NULL;
2847         }
2848         entry = g_malloc0(sizeof(*entry));
2849         entry->value = info;
2850         entry->next = list;
2851         list = entry;
2852     }
2853 
2854     return list;
2855 }
2856 
2857 BlockDriverState *bdrv_lookup_bs(const char *device,
2858                                  const char *node_name,
2859                                  Error **errp)
2860 {
2861     BlockBackend *blk;
2862     BlockDriverState *bs;
2863 
2864     if (device) {
2865         blk = blk_by_name(device);
2866 
2867         if (blk) {
2868             bs = blk_bs(blk);
2869             if (!bs) {
2870                 error_setg(errp, "Device '%s' has no medium", device);
2871             }
2872 
2873             return bs;
2874         }
2875     }
2876 
2877     if (node_name) {
2878         bs = bdrv_find_node(node_name);
2879 
2880         if (bs) {
2881             return bs;
2882         }
2883     }
2884 
2885     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2886                      device ? device : "",
2887                      node_name ? node_name : "");
2888     return NULL;
2889 }
2890 
2891 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2892  * return false.  If either argument is NULL, return false. */
2893 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2894 {
2895     while (top && top != base) {
2896         top = backing_bs(top);
2897     }
2898 
2899     return top != NULL;
2900 }
2901 
2902 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2903 {
2904     if (!bs) {
2905         return QTAILQ_FIRST(&graph_bdrv_states);
2906     }
2907     return QTAILQ_NEXT(bs, node_list);
2908 }
2909 
2910 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
2911  * the monitor or attached to a BlockBackend */
2912 BlockDriverState *bdrv_next(BlockDriverState *bs)
2913 {
2914     if (!bs || bs->blk) {
2915         bs = blk_next_root_bs(bs);
2916         if (bs) {
2917             return bs;
2918         }
2919     }
2920 
2921     /* Ignore all BDSs that are attached to a BlockBackend here; they have been
2922      * handled by the above block already */
2923     do {
2924         bs = bdrv_next_monitor_owned(bs);
2925     } while (bs && bs->blk);
2926     return bs;
2927 }
2928 
2929 const char *bdrv_get_node_name(const BlockDriverState *bs)
2930 {
2931     return bs->node_name;
2932 }
2933 
2934 /* TODO check what callers really want: bs->node_name or blk_name() */
2935 const char *bdrv_get_device_name(const BlockDriverState *bs)
2936 {
2937     return bs->blk ? blk_name(bs->blk) : "";
2938 }
2939 
2940 /* This can be used to identify nodes that might not have a device
2941  * name associated. Since node and device names live in the same
2942  * namespace, the result is unambiguous. The exception is if both are
2943  * absent, then this returns an empty (non-null) string. */
2944 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2945 {
2946     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2947 }
2948 
2949 int bdrv_get_flags(BlockDriverState *bs)
2950 {
2951     return bs->open_flags;
2952 }
2953 
2954 int bdrv_has_zero_init_1(BlockDriverState *bs)
2955 {
2956     return 1;
2957 }
2958 
2959 int bdrv_has_zero_init(BlockDriverState *bs)
2960 {
2961     assert(bs->drv);
2962 
2963     /* If BS is a copy on write image, it is initialized to
2964        the contents of the base image, which may not be zeroes.  */
2965     if (bs->backing) {
2966         return 0;
2967     }
2968     if (bs->drv->bdrv_has_zero_init) {
2969         return bs->drv->bdrv_has_zero_init(bs);
2970     }
2971 
2972     /* safe default */
2973     return 0;
2974 }
2975 
2976 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2977 {
2978     BlockDriverInfo bdi;
2979 
2980     if (bs->backing) {
2981         return false;
2982     }
2983 
2984     if (bdrv_get_info(bs, &bdi) == 0) {
2985         return bdi.unallocated_blocks_are_zero;
2986     }
2987 
2988     return false;
2989 }
2990 
2991 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2992 {
2993     BlockDriverInfo bdi;
2994 
2995     if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
2996         return false;
2997     }
2998 
2999     if (bdrv_get_info(bs, &bdi) == 0) {
3000         return bdi.can_write_zeroes_with_unmap;
3001     }
3002 
3003     return false;
3004 }
3005 
3006 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3007 {
3008     if (bs->backing && bs->backing->bs->encrypted)
3009         return bs->backing_file;
3010     else if (bs->encrypted)
3011         return bs->filename;
3012     else
3013         return NULL;
3014 }
3015 
3016 void bdrv_get_backing_filename(BlockDriverState *bs,
3017                                char *filename, int filename_size)
3018 {
3019     pstrcpy(filename, filename_size, bs->backing_file);
3020 }
3021 
3022 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3023 {
3024     BlockDriver *drv = bs->drv;
3025     if (!drv)
3026         return -ENOMEDIUM;
3027     if (!drv->bdrv_get_info)
3028         return -ENOTSUP;
3029     memset(bdi, 0, sizeof(*bdi));
3030     return drv->bdrv_get_info(bs, bdi);
3031 }
3032 
3033 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3034 {
3035     BlockDriver *drv = bs->drv;
3036     if (drv && drv->bdrv_get_specific_info) {
3037         return drv->bdrv_get_specific_info(bs);
3038     }
3039     return NULL;
3040 }
3041 
3042 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
3043 {
3044     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3045         return;
3046     }
3047 
3048     bs->drv->bdrv_debug_event(bs, event);
3049 }
3050 
3051 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3052                           const char *tag)
3053 {
3054     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3055         bs = bs->file ? bs->file->bs : NULL;
3056     }
3057 
3058     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3059         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3060     }
3061 
3062     return -ENOTSUP;
3063 }
3064 
3065 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3066 {
3067     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3068         bs = bs->file ? bs->file->bs : NULL;
3069     }
3070 
3071     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3072         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3073     }
3074 
3075     return -ENOTSUP;
3076 }
3077 
3078 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3079 {
3080     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3081         bs = bs->file ? bs->file->bs : NULL;
3082     }
3083 
3084     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3085         return bs->drv->bdrv_debug_resume(bs, tag);
3086     }
3087 
3088     return -ENOTSUP;
3089 }
3090 
3091 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3092 {
3093     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3094         bs = bs->file ? bs->file->bs : NULL;
3095     }
3096 
3097     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3098         return bs->drv->bdrv_debug_is_suspended(bs, tag);
3099     }
3100 
3101     return false;
3102 }
3103 
3104 int bdrv_is_snapshot(BlockDriverState *bs)
3105 {
3106     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3107 }
3108 
3109 /* backing_file can either be relative, or absolute, or a protocol.  If it is
3110  * relative, it must be relative to the chain.  So, passing in bs->filename
3111  * from a BDS as backing_file should not be done, as that may be relative to
3112  * the CWD rather than the chain. */
3113 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3114         const char *backing_file)
3115 {
3116     char *filename_full = NULL;
3117     char *backing_file_full = NULL;
3118     char *filename_tmp = NULL;
3119     int is_protocol = 0;
3120     BlockDriverState *curr_bs = NULL;
3121     BlockDriverState *retval = NULL;
3122 
3123     if (!bs || !bs->drv || !backing_file) {
3124         return NULL;
3125     }
3126 
3127     filename_full     = g_malloc(PATH_MAX);
3128     backing_file_full = g_malloc(PATH_MAX);
3129     filename_tmp      = g_malloc(PATH_MAX);
3130 
3131     is_protocol = path_has_protocol(backing_file);
3132 
3133     for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3134 
3135         /* If either of the filename paths is actually a protocol, then
3136          * compare unmodified paths; otherwise make paths relative */
3137         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3138             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3139                 retval = curr_bs->backing->bs;
3140                 break;
3141             }
3142         } else {
3143             /* If not an absolute filename path, make it relative to the current
3144              * image's filename path */
3145             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3146                          backing_file);
3147 
3148             /* We are going to compare absolute pathnames */
3149             if (!realpath(filename_tmp, filename_full)) {
3150                 continue;
3151             }
3152 
3153             /* We need to make sure the backing filename we are comparing against
3154              * is relative to the current image filename (or absolute) */
3155             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3156                          curr_bs->backing_file);
3157 
3158             if (!realpath(filename_tmp, backing_file_full)) {
3159                 continue;
3160             }
3161 
3162             if (strcmp(backing_file_full, filename_full) == 0) {
3163                 retval = curr_bs->backing->bs;
3164                 break;
3165             }
3166         }
3167     }
3168 
3169     g_free(filename_full);
3170     g_free(backing_file_full);
3171     g_free(filename_tmp);
3172     return retval;
3173 }
3174 
3175 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3176 {
3177     if (!bs->drv) {
3178         return 0;
3179     }
3180 
3181     if (!bs->backing) {
3182         return 0;
3183     }
3184 
3185     return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3186 }
3187 
3188 void bdrv_init(void)
3189 {
3190     module_call_init(MODULE_INIT_BLOCK);
3191 }
3192 
3193 void bdrv_init_with_whitelist(void)
3194 {
3195     use_bdrv_whitelist = 1;
3196     bdrv_init();
3197 }
3198 
3199 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3200 {
3201     BdrvChild *child;
3202     Error *local_err = NULL;
3203     int ret;
3204 
3205     if (!bs->drv)  {
3206         return;
3207     }
3208 
3209     if (!(bs->open_flags & BDRV_O_INACTIVE)) {
3210         return;
3211     }
3212     bs->open_flags &= ~BDRV_O_INACTIVE;
3213 
3214     if (bs->drv->bdrv_invalidate_cache) {
3215         bs->drv->bdrv_invalidate_cache(bs, &local_err);
3216         if (local_err) {
3217             bs->open_flags |= BDRV_O_INACTIVE;
3218             error_propagate(errp, local_err);
3219             return;
3220         }
3221     }
3222 
3223     QLIST_FOREACH(child, &bs->children, next) {
3224         bdrv_invalidate_cache(child->bs, &local_err);
3225         if (local_err) {
3226             bs->open_flags |= BDRV_O_INACTIVE;
3227             error_propagate(errp, local_err);
3228             return;
3229         }
3230     }
3231 
3232     ret = refresh_total_sectors(bs, bs->total_sectors);
3233     if (ret < 0) {
3234         bs->open_flags |= BDRV_O_INACTIVE;
3235         error_setg_errno(errp, -ret, "Could not refresh total sector count");
3236         return;
3237     }
3238 }
3239 
3240 void bdrv_invalidate_cache_all(Error **errp)
3241 {
3242     BlockDriverState *bs = NULL;
3243     Error *local_err = NULL;
3244 
3245     while ((bs = bdrv_next(bs)) != NULL) {
3246         AioContext *aio_context = bdrv_get_aio_context(bs);
3247 
3248         aio_context_acquire(aio_context);
3249         bdrv_invalidate_cache(bs, &local_err);
3250         aio_context_release(aio_context);
3251         if (local_err) {
3252             error_propagate(errp, local_err);
3253             return;
3254         }
3255     }
3256 }
3257 
3258 static int bdrv_inactivate_recurse(BlockDriverState *bs,
3259                                    bool setting_flag)
3260 {
3261     BdrvChild *child;
3262     int ret;
3263 
3264     if (!setting_flag && bs->drv->bdrv_inactivate) {
3265         ret = bs->drv->bdrv_inactivate(bs);
3266         if (ret < 0) {
3267             return ret;
3268         }
3269     }
3270 
3271     QLIST_FOREACH(child, &bs->children, next) {
3272         ret = bdrv_inactivate_recurse(child->bs, setting_flag);
3273         if (ret < 0) {
3274             return ret;
3275         }
3276     }
3277 
3278     if (setting_flag) {
3279         bs->open_flags |= BDRV_O_INACTIVE;
3280     }
3281     return 0;
3282 }
3283 
3284 int bdrv_inactivate_all(void)
3285 {
3286     BlockDriverState *bs = NULL;
3287     int ret = 0;
3288     int pass;
3289 
3290     while ((bs = bdrv_next(bs)) != NULL) {
3291         aio_context_acquire(bdrv_get_aio_context(bs));
3292     }
3293 
3294     /* We do two passes of inactivation. The first pass calls to drivers'
3295      * .bdrv_inactivate callbacks recursively so all cache is flushed to disk;
3296      * the second pass sets the BDRV_O_INACTIVE flag so that no further write
3297      * is allowed. */
3298     for (pass = 0; pass < 2; pass++) {
3299         bs = NULL;
3300         while ((bs = bdrv_next(bs)) != NULL) {
3301             ret = bdrv_inactivate_recurse(bs, pass);
3302             if (ret < 0) {
3303                 goto out;
3304             }
3305         }
3306     }
3307 
3308 out:
3309     bs = NULL;
3310     while ((bs = bdrv_next(bs)) != NULL) {
3311         aio_context_release(bdrv_get_aio_context(bs));
3312     }
3313 
3314     return ret;
3315 }
3316 
3317 /**************************************************************/
3318 /* removable device support */
3319 
3320 /**
3321  * Return TRUE if the media is present
3322  */
3323 bool bdrv_is_inserted(BlockDriverState *bs)
3324 {
3325     BlockDriver *drv = bs->drv;
3326     BdrvChild *child;
3327 
3328     if (!drv) {
3329         return false;
3330     }
3331     if (drv->bdrv_is_inserted) {
3332         return drv->bdrv_is_inserted(bs);
3333     }
3334     QLIST_FOREACH(child, &bs->children, next) {
3335         if (!bdrv_is_inserted(child->bs)) {
3336             return false;
3337         }
3338     }
3339     return true;
3340 }
3341 
3342 /**
3343  * Return whether the media changed since the last call to this
3344  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3345  */
3346 int bdrv_media_changed(BlockDriverState *bs)
3347 {
3348     BlockDriver *drv = bs->drv;
3349 
3350     if (drv && drv->bdrv_media_changed) {
3351         return drv->bdrv_media_changed(bs);
3352     }
3353     return -ENOTSUP;
3354 }
3355 
3356 /**
3357  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3358  */
3359 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3360 {
3361     BlockDriver *drv = bs->drv;
3362     const char *device_name;
3363 
3364     if (drv && drv->bdrv_eject) {
3365         drv->bdrv_eject(bs, eject_flag);
3366     }
3367 
3368     device_name = bdrv_get_device_name(bs);
3369     if (device_name[0] != '\0') {
3370         qapi_event_send_device_tray_moved(device_name,
3371                                           eject_flag, &error_abort);
3372     }
3373 }
3374 
3375 /**
3376  * Lock or unlock the media (if it is locked, the user won't be able
3377  * to eject it manually).
3378  */
3379 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3380 {
3381     BlockDriver *drv = bs->drv;
3382 
3383     trace_bdrv_lock_medium(bs, locked);
3384 
3385     if (drv && drv->bdrv_lock_medium) {
3386         drv->bdrv_lock_medium(bs, locked);
3387     }
3388 }
3389 
3390 /* Get a reference to bs */
3391 void bdrv_ref(BlockDriverState *bs)
3392 {
3393     bs->refcnt++;
3394 }
3395 
3396 /* Release a previously grabbed reference to bs.
3397  * If after releasing, reference count is zero, the BlockDriverState is
3398  * deleted. */
3399 void bdrv_unref(BlockDriverState *bs)
3400 {
3401     if (!bs) {
3402         return;
3403     }
3404     assert(bs->refcnt > 0);
3405     if (--bs->refcnt == 0) {
3406         bdrv_delete(bs);
3407     }
3408 }
3409 
3410 struct BdrvOpBlocker {
3411     Error *reason;
3412     QLIST_ENTRY(BdrvOpBlocker) list;
3413 };
3414 
3415 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3416 {
3417     BdrvOpBlocker *blocker;
3418     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3419     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3420         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3421         if (errp) {
3422             *errp = error_copy(blocker->reason);
3423             error_prepend(errp, "Node '%s' is busy: ",
3424                           bdrv_get_device_or_node_name(bs));
3425         }
3426         return true;
3427     }
3428     return false;
3429 }
3430 
3431 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3432 {
3433     BdrvOpBlocker *blocker;
3434     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3435 
3436     blocker = g_new0(BdrvOpBlocker, 1);
3437     blocker->reason = reason;
3438     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3439 }
3440 
3441 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3442 {
3443     BdrvOpBlocker *blocker, *next;
3444     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3445     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3446         if (blocker->reason == reason) {
3447             QLIST_REMOVE(blocker, list);
3448             g_free(blocker);
3449         }
3450     }
3451 }
3452 
3453 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3454 {
3455     int i;
3456     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3457         bdrv_op_block(bs, i, reason);
3458     }
3459 }
3460 
3461 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3462 {
3463     int i;
3464     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3465         bdrv_op_unblock(bs, i, reason);
3466     }
3467 }
3468 
3469 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3470 {
3471     int i;
3472 
3473     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3474         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3475             return false;
3476         }
3477     }
3478     return true;
3479 }
3480 
3481 void bdrv_img_create(const char *filename, const char *fmt,
3482                      const char *base_filename, const char *base_fmt,
3483                      char *options, uint64_t img_size, int flags,
3484                      Error **errp, bool quiet)
3485 {
3486     QemuOptsList *create_opts = NULL;
3487     QemuOpts *opts = NULL;
3488     const char *backing_fmt, *backing_file;
3489     int64_t size;
3490     BlockDriver *drv, *proto_drv;
3491     Error *local_err = NULL;
3492     int ret = 0;
3493 
3494     /* Find driver and parse its options */
3495     drv = bdrv_find_format(fmt);
3496     if (!drv) {
3497         error_setg(errp, "Unknown file format '%s'", fmt);
3498         return;
3499     }
3500 
3501     proto_drv = bdrv_find_protocol(filename, true, errp);
3502     if (!proto_drv) {
3503         return;
3504     }
3505 
3506     if (!drv->create_opts) {
3507         error_setg(errp, "Format driver '%s' does not support image creation",
3508                    drv->format_name);
3509         return;
3510     }
3511 
3512     if (!proto_drv->create_opts) {
3513         error_setg(errp, "Protocol driver '%s' does not support image creation",
3514                    proto_drv->format_name);
3515         return;
3516     }
3517 
3518     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3519     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3520 
3521     /* Create parameter list with default values */
3522     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3523     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3524 
3525     /* Parse -o options */
3526     if (options) {
3527         qemu_opts_do_parse(opts, options, NULL, &local_err);
3528         if (local_err) {
3529             error_report_err(local_err);
3530             local_err = NULL;
3531             error_setg(errp, "Invalid options for file format '%s'", fmt);
3532             goto out;
3533         }
3534     }
3535 
3536     if (base_filename) {
3537         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3538         if (local_err) {
3539             error_setg(errp, "Backing file not supported for file format '%s'",
3540                        fmt);
3541             goto out;
3542         }
3543     }
3544 
3545     if (base_fmt) {
3546         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3547         if (local_err) {
3548             error_setg(errp, "Backing file format not supported for file "
3549                              "format '%s'", fmt);
3550             goto out;
3551         }
3552     }
3553 
3554     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3555     if (backing_file) {
3556         if (!strcmp(filename, backing_file)) {
3557             error_setg(errp, "Error: Trying to create an image with the "
3558                              "same filename as the backing file");
3559             goto out;
3560         }
3561     }
3562 
3563     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3564 
3565     // The size for the image must always be specified, with one exception:
3566     // If we are using a backing file, we can obtain the size from there
3567     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3568     if (size == -1) {
3569         if (backing_file) {
3570             BlockDriverState *bs;
3571             char *full_backing = g_new0(char, PATH_MAX);
3572             int64_t size;
3573             int back_flags;
3574             QDict *backing_options = NULL;
3575 
3576             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3577                                                          full_backing, PATH_MAX,
3578                                                          &local_err);
3579             if (local_err) {
3580                 g_free(full_backing);
3581                 goto out;
3582             }
3583 
3584             /* backing files always opened read-only */
3585             back_flags = flags;
3586             back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3587 
3588             if (backing_fmt) {
3589                 backing_options = qdict_new();
3590                 qdict_put(backing_options, "driver",
3591                           qstring_from_str(backing_fmt));
3592             }
3593 
3594             bs = NULL;
3595             ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3596                             back_flags, &local_err);
3597             g_free(full_backing);
3598             if (ret < 0) {
3599                 goto out;
3600             }
3601             size = bdrv_getlength(bs);
3602             if (size < 0) {
3603                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3604                                  backing_file);
3605                 bdrv_unref(bs);
3606                 goto out;
3607             }
3608 
3609             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3610 
3611             bdrv_unref(bs);
3612         } else {
3613             error_setg(errp, "Image creation needs a size parameter");
3614             goto out;
3615         }
3616     }
3617 
3618     if (!quiet) {
3619         printf("Formatting '%s', fmt=%s ", filename, fmt);
3620         qemu_opts_print(opts, " ");
3621         puts("");
3622     }
3623 
3624     ret = bdrv_create(drv, filename, opts, &local_err);
3625 
3626     if (ret == -EFBIG) {
3627         /* This is generally a better message than whatever the driver would
3628          * deliver (especially because of the cluster_size_hint), since that
3629          * is most probably not much different from "image too large". */
3630         const char *cluster_size_hint = "";
3631         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3632             cluster_size_hint = " (try using a larger cluster size)";
3633         }
3634         error_setg(errp, "The image size is too large for file format '%s'"
3635                    "%s", fmt, cluster_size_hint);
3636         error_free(local_err);
3637         local_err = NULL;
3638     }
3639 
3640 out:
3641     qemu_opts_del(opts);
3642     qemu_opts_free(create_opts);
3643     if (local_err) {
3644         error_propagate(errp, local_err);
3645     }
3646 }
3647 
3648 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3649 {
3650     return bs->aio_context;
3651 }
3652 
3653 void bdrv_detach_aio_context(BlockDriverState *bs)
3654 {
3655     BdrvAioNotifier *baf;
3656 
3657     if (!bs->drv) {
3658         return;
3659     }
3660 
3661     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3662         baf->detach_aio_context(baf->opaque);
3663     }
3664 
3665     if (bs->throttle_state) {
3666         throttle_timers_detach_aio_context(&bs->throttle_timers);
3667     }
3668     if (bs->drv->bdrv_detach_aio_context) {
3669         bs->drv->bdrv_detach_aio_context(bs);
3670     }
3671     if (bs->file) {
3672         bdrv_detach_aio_context(bs->file->bs);
3673     }
3674     if (bs->backing) {
3675         bdrv_detach_aio_context(bs->backing->bs);
3676     }
3677 
3678     bs->aio_context = NULL;
3679 }
3680 
3681 void bdrv_attach_aio_context(BlockDriverState *bs,
3682                              AioContext *new_context)
3683 {
3684     BdrvAioNotifier *ban;
3685 
3686     if (!bs->drv) {
3687         return;
3688     }
3689 
3690     bs->aio_context = new_context;
3691 
3692     if (bs->backing) {
3693         bdrv_attach_aio_context(bs->backing->bs, new_context);
3694     }
3695     if (bs->file) {
3696         bdrv_attach_aio_context(bs->file->bs, new_context);
3697     }
3698     if (bs->drv->bdrv_attach_aio_context) {
3699         bs->drv->bdrv_attach_aio_context(bs, new_context);
3700     }
3701     if (bs->throttle_state) {
3702         throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3703     }
3704 
3705     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3706         ban->attached_aio_context(new_context, ban->opaque);
3707     }
3708 }
3709 
3710 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3711 {
3712     bdrv_drain(bs); /* ensure there are no in-flight requests */
3713 
3714     bdrv_detach_aio_context(bs);
3715 
3716     /* This function executes in the old AioContext so acquire the new one in
3717      * case it runs in a different thread.
3718      */
3719     aio_context_acquire(new_context);
3720     bdrv_attach_aio_context(bs, new_context);
3721     aio_context_release(new_context);
3722 }
3723 
3724 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3725         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3726         void (*detach_aio_context)(void *opaque), void *opaque)
3727 {
3728     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3729     *ban = (BdrvAioNotifier){
3730         .attached_aio_context = attached_aio_context,
3731         .detach_aio_context   = detach_aio_context,
3732         .opaque               = opaque
3733     };
3734 
3735     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3736 }
3737 
3738 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3739                                       void (*attached_aio_context)(AioContext *,
3740                                                                    void *),
3741                                       void (*detach_aio_context)(void *),
3742                                       void *opaque)
3743 {
3744     BdrvAioNotifier *ban, *ban_next;
3745 
3746     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3747         if (ban->attached_aio_context == attached_aio_context &&
3748             ban->detach_aio_context   == detach_aio_context   &&
3749             ban->opaque               == opaque)
3750         {
3751             QLIST_REMOVE(ban, list);
3752             g_free(ban);
3753 
3754             return;
3755         }
3756     }
3757 
3758     abort();
3759 }
3760 
3761 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3762                        BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
3763 {
3764     if (!bs->drv->bdrv_amend_options) {
3765         return -ENOTSUP;
3766     }
3767     return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
3768 }
3769 
3770 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3771  * of block filter and by bdrv_is_first_non_filter.
3772  * It is used to test if the given bs is the candidate or recurse more in the
3773  * node graph.
3774  */
3775 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3776                                       BlockDriverState *candidate)
3777 {
3778     /* return false if basic checks fails */
3779     if (!bs || !bs->drv) {
3780         return false;
3781     }
3782 
3783     /* the code reached a non block filter driver -> check if the bs is
3784      * the same as the candidate. It's the recursion termination condition.
3785      */
3786     if (!bs->drv->is_filter) {
3787         return bs == candidate;
3788     }
3789     /* Down this path the driver is a block filter driver */
3790 
3791     /* If the block filter recursion method is defined use it to recurse down
3792      * the node graph.
3793      */
3794     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3795         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3796     }
3797 
3798     /* the driver is a block filter but don't allow to recurse -> return false
3799      */
3800     return false;
3801 }
3802 
3803 /* This function checks if the candidate is the first non filter bs down it's
3804  * bs chain. Since we don't have pointers to parents it explore all bs chains
3805  * from the top. Some filters can choose not to pass down the recursion.
3806  */
3807 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3808 {
3809     BlockDriverState *bs = NULL;
3810 
3811     /* walk down the bs forest recursively */
3812     while ((bs = bdrv_next(bs)) != NULL) {
3813         bool perm;
3814 
3815         /* try to recurse in this top level bs */
3816         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3817 
3818         /* candidate is the first non filter */
3819         if (perm) {
3820             return true;
3821         }
3822     }
3823 
3824     return false;
3825 }
3826 
3827 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3828                                         const char *node_name, Error **errp)
3829 {
3830     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3831     AioContext *aio_context;
3832 
3833     if (!to_replace_bs) {
3834         error_setg(errp, "Node name '%s' not found", node_name);
3835         return NULL;
3836     }
3837 
3838     aio_context = bdrv_get_aio_context(to_replace_bs);
3839     aio_context_acquire(aio_context);
3840 
3841     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3842         to_replace_bs = NULL;
3843         goto out;
3844     }
3845 
3846     /* We don't want arbitrary node of the BDS chain to be replaced only the top
3847      * most non filter in order to prevent data corruption.
3848      * Another benefit is that this tests exclude backing files which are
3849      * blocked by the backing blockers.
3850      */
3851     if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
3852         error_setg(errp, "Only top most non filter can be replaced");
3853         to_replace_bs = NULL;
3854         goto out;
3855     }
3856 
3857 out:
3858     aio_context_release(aio_context);
3859     return to_replace_bs;
3860 }
3861 
3862 static bool append_open_options(QDict *d, BlockDriverState *bs)
3863 {
3864     const QDictEntry *entry;
3865     QemuOptDesc *desc;
3866     BdrvChild *child;
3867     bool found_any = false;
3868     const char *p;
3869 
3870     for (entry = qdict_first(bs->options); entry;
3871          entry = qdict_next(bs->options, entry))
3872     {
3873         /* Exclude options for children */
3874         QLIST_FOREACH(child, &bs->children, next) {
3875             if (strstart(qdict_entry_key(entry), child->name, &p)
3876                 && (!*p || *p == '.'))
3877             {
3878                 break;
3879             }
3880         }
3881         if (child) {
3882             continue;
3883         }
3884 
3885         /* And exclude all non-driver-specific options */
3886         for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
3887             if (!strcmp(qdict_entry_key(entry), desc->name)) {
3888                 break;
3889             }
3890         }
3891         if (desc->name) {
3892             continue;
3893         }
3894 
3895         qobject_incref(qdict_entry_value(entry));
3896         qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3897         found_any = true;
3898     }
3899 
3900     return found_any;
3901 }
3902 
3903 /* Updates the following BDS fields:
3904  *  - exact_filename: A filename which may be used for opening a block device
3905  *                    which (mostly) equals the given BDS (even without any
3906  *                    other options; so reading and writing must return the same
3907  *                    results, but caching etc. may be different)
3908  *  - full_open_options: Options which, when given when opening a block device
3909  *                       (without a filename), result in a BDS (mostly)
3910  *                       equalling the given one
3911  *  - filename: If exact_filename is set, it is copied here. Otherwise,
3912  *              full_open_options is converted to a JSON object, prefixed with
3913  *              "json:" (for use through the JSON pseudo protocol) and put here.
3914  */
3915 void bdrv_refresh_filename(BlockDriverState *bs)
3916 {
3917     BlockDriver *drv = bs->drv;
3918     QDict *opts;
3919 
3920     if (!drv) {
3921         return;
3922     }
3923 
3924     /* This BDS's file name will most probably depend on its file's name, so
3925      * refresh that first */
3926     if (bs->file) {
3927         bdrv_refresh_filename(bs->file->bs);
3928     }
3929 
3930     if (drv->bdrv_refresh_filename) {
3931         /* Obsolete information is of no use here, so drop the old file name
3932          * information before refreshing it */
3933         bs->exact_filename[0] = '\0';
3934         if (bs->full_open_options) {
3935             QDECREF(bs->full_open_options);
3936             bs->full_open_options = NULL;
3937         }
3938 
3939         opts = qdict_new();
3940         append_open_options(opts, bs);
3941         drv->bdrv_refresh_filename(bs, opts);
3942         QDECREF(opts);
3943     } else if (bs->file) {
3944         /* Try to reconstruct valid information from the underlying file */
3945         bool has_open_options;
3946 
3947         bs->exact_filename[0] = '\0';
3948         if (bs->full_open_options) {
3949             QDECREF(bs->full_open_options);
3950             bs->full_open_options = NULL;
3951         }
3952 
3953         opts = qdict_new();
3954         has_open_options = append_open_options(opts, bs);
3955 
3956         /* If no specific options have been given for this BDS, the filename of
3957          * the underlying file should suffice for this one as well */
3958         if (bs->file->bs->exact_filename[0] && !has_open_options) {
3959             strcpy(bs->exact_filename, bs->file->bs->exact_filename);
3960         }
3961         /* Reconstructing the full options QDict is simple for most format block
3962          * drivers, as long as the full options are known for the underlying
3963          * file BDS. The full options QDict of that file BDS should somehow
3964          * contain a representation of the filename, therefore the following
3965          * suffices without querying the (exact_)filename of this BDS. */
3966         if (bs->file->bs->full_open_options) {
3967             qdict_put_obj(opts, "driver",
3968                           QOBJECT(qstring_from_str(drv->format_name)));
3969             QINCREF(bs->file->bs->full_open_options);
3970             qdict_put_obj(opts, "file",
3971                           QOBJECT(bs->file->bs->full_open_options));
3972 
3973             bs->full_open_options = opts;
3974         } else {
3975             QDECREF(opts);
3976         }
3977     } else if (!bs->full_open_options && qdict_size(bs->options)) {
3978         /* There is no underlying file BDS (at least referenced by BDS.file),
3979          * so the full options QDict should be equal to the options given
3980          * specifically for this block device when it was opened (plus the
3981          * driver specification).
3982          * Because those options don't change, there is no need to update
3983          * full_open_options when it's already set. */
3984 
3985         opts = qdict_new();
3986         append_open_options(opts, bs);
3987         qdict_put_obj(opts, "driver",
3988                       QOBJECT(qstring_from_str(drv->format_name)));
3989 
3990         if (bs->exact_filename[0]) {
3991             /* This may not work for all block protocol drivers (some may
3992              * require this filename to be parsed), but we have to find some
3993              * default solution here, so just include it. If some block driver
3994              * does not support pure options without any filename at all or
3995              * needs some special format of the options QDict, it needs to
3996              * implement the driver-specific bdrv_refresh_filename() function.
3997              */
3998             qdict_put_obj(opts, "filename",
3999                           QOBJECT(qstring_from_str(bs->exact_filename)));
4000         }
4001 
4002         bs->full_open_options = opts;
4003     }
4004 
4005     if (bs->exact_filename[0]) {
4006         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4007     } else if (bs->full_open_options) {
4008         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4009         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4010                  qstring_get_str(json));
4011         QDECREF(json);
4012     }
4013 }
4014 
4015 /*
4016  * Hot add/remove a BDS's child. So the user can take a child offline when
4017  * it is broken and take a new child online
4018  */
4019 void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
4020                     Error **errp)
4021 {
4022 
4023     if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
4024         error_setg(errp, "The node %s does not support adding a child",
4025                    bdrv_get_device_or_node_name(parent_bs));
4026         return;
4027     }
4028 
4029     if (!QLIST_EMPTY(&child_bs->parents)) {
4030         error_setg(errp, "The node %s already has a parent",
4031                    child_bs->node_name);
4032         return;
4033     }
4034 
4035     parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp);
4036 }
4037 
4038 void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
4039 {
4040     BdrvChild *tmp;
4041 
4042     if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
4043         error_setg(errp, "The node %s does not support removing a child",
4044                    bdrv_get_device_or_node_name(parent_bs));
4045         return;
4046     }
4047 
4048     QLIST_FOREACH(tmp, &parent_bs->children, next) {
4049         if (tmp == child) {
4050             break;
4051         }
4052     }
4053 
4054     if (!tmp) {
4055         error_setg(errp, "The node %s does not have a child named %s",
4056                    bdrv_get_device_or_node_name(parent_bs),
4057                    bdrv_get_device_or_node_name(child->bs));
4058         return;
4059     }
4060 
4061     parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
4062 }
4063