xref: /openbmc/qemu/block.c (revision 63785678)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "trace.h"
26 #include "block/block_int.h"
27 #include "block/blockjob.h"
28 #include "qemu/error-report.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qapi/qmp/qbool.h"
32 #include "qapi/qmp/qjson.h"
33 #include "sysemu/block-backend.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/notify.h"
36 #include "qemu/coroutine.h"
37 #include "block/qapi.h"
38 #include "qmp-commands.h"
39 #include "qemu/timer.h"
40 #include "qapi-event.h"
41 #include "block/throttle-groups.h"
42 #include "qemu/cutils.h"
43 #include "qemu/id.h"
44 
45 #ifdef CONFIG_BSD
46 #include <sys/ioctl.h>
47 #include <sys/queue.h>
48 #ifndef __DragonFly__
49 #include <sys/disk.h>
50 #endif
51 #endif
52 
53 #ifdef _WIN32
54 #include <windows.h>
55 #endif
56 
57 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
58 
59 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
60     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
61 
62 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
63     QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
64 
65 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
66     QLIST_HEAD_INITIALIZER(bdrv_drivers);
67 
68 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
69                              const char *reference, QDict *options, int flags,
70                              BlockDriverState *parent,
71                              const BdrvChildRole *child_role, Error **errp);
72 
73 /* If non-zero, use only whitelisted block drivers */
74 static int use_bdrv_whitelist;
75 
76 static void bdrv_close(BlockDriverState *bs);
77 
78 #ifdef _WIN32
79 static int is_windows_drive_prefix(const char *filename)
80 {
81     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
82              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
83             filename[1] == ':');
84 }
85 
86 int is_windows_drive(const char *filename)
87 {
88     if (is_windows_drive_prefix(filename) &&
89         filename[2] == '\0')
90         return 1;
91     if (strstart(filename, "\\\\.\\", NULL) ||
92         strstart(filename, "//./", NULL))
93         return 1;
94     return 0;
95 }
96 #endif
97 
98 size_t bdrv_opt_mem_align(BlockDriverState *bs)
99 {
100     if (!bs || !bs->drv) {
101         /* page size or 4k (hdd sector size) should be on the safe side */
102         return MAX(4096, getpagesize());
103     }
104 
105     return bs->bl.opt_mem_alignment;
106 }
107 
108 size_t bdrv_min_mem_align(BlockDriverState *bs)
109 {
110     if (!bs || !bs->drv) {
111         /* page size or 4k (hdd sector size) should be on the safe side */
112         return MAX(4096, getpagesize());
113     }
114 
115     return bs->bl.min_mem_alignment;
116 }
117 
118 /* check if the path starts with "<protocol>:" */
119 int path_has_protocol(const char *path)
120 {
121     const char *p;
122 
123 #ifdef _WIN32
124     if (is_windows_drive(path) ||
125         is_windows_drive_prefix(path)) {
126         return 0;
127     }
128     p = path + strcspn(path, ":/\\");
129 #else
130     p = path + strcspn(path, ":/");
131 #endif
132 
133     return *p == ':';
134 }
135 
136 int path_is_absolute(const char *path)
137 {
138 #ifdef _WIN32
139     /* specific case for names like: "\\.\d:" */
140     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
141         return 1;
142     }
143     return (*path == '/' || *path == '\\');
144 #else
145     return (*path == '/');
146 #endif
147 }
148 
149 /* if filename is absolute, just copy it to dest. Otherwise, build a
150    path to it by considering it is relative to base_path. URL are
151    supported. */
152 void path_combine(char *dest, int dest_size,
153                   const char *base_path,
154                   const char *filename)
155 {
156     const char *p, *p1;
157     int len;
158 
159     if (dest_size <= 0)
160         return;
161     if (path_is_absolute(filename)) {
162         pstrcpy(dest, dest_size, filename);
163     } else {
164         p = strchr(base_path, ':');
165         if (p)
166             p++;
167         else
168             p = base_path;
169         p1 = strrchr(base_path, '/');
170 #ifdef _WIN32
171         {
172             const char *p2;
173             p2 = strrchr(base_path, '\\');
174             if (!p1 || p2 > p1)
175                 p1 = p2;
176         }
177 #endif
178         if (p1)
179             p1++;
180         else
181             p1 = base_path;
182         if (p1 > p)
183             p = p1;
184         len = p - base_path;
185         if (len > dest_size - 1)
186             len = dest_size - 1;
187         memcpy(dest, base_path, len);
188         dest[len] = '\0';
189         pstrcat(dest, dest_size, filename);
190     }
191 }
192 
193 void bdrv_get_full_backing_filename_from_filename(const char *backed,
194                                                   const char *backing,
195                                                   char *dest, size_t sz,
196                                                   Error **errp)
197 {
198     if (backing[0] == '\0' || path_has_protocol(backing) ||
199         path_is_absolute(backing))
200     {
201         pstrcpy(dest, sz, backing);
202     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
203         error_setg(errp, "Cannot use relative backing file names for '%s'",
204                    backed);
205     } else {
206         path_combine(dest, sz, backed, backing);
207     }
208 }
209 
210 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
211                                     Error **errp)
212 {
213     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
214 
215     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
216                                                  dest, sz, errp);
217 }
218 
219 void bdrv_register(BlockDriver *bdrv)
220 {
221     bdrv_setup_io_funcs(bdrv);
222 
223     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
224 }
225 
226 BlockDriverState *bdrv_new_root(void)
227 {
228     return bdrv_new();
229 }
230 
231 BlockDriverState *bdrv_new(void)
232 {
233     BlockDriverState *bs;
234     int i;
235 
236     bs = g_new0(BlockDriverState, 1);
237     QLIST_INIT(&bs->dirty_bitmaps);
238     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
239         QLIST_INIT(&bs->op_blockers[i]);
240     }
241     notifier_with_return_list_init(&bs->before_write_notifiers);
242     qemu_co_queue_init(&bs->throttled_reqs[0]);
243     qemu_co_queue_init(&bs->throttled_reqs[1]);
244     bs->refcnt = 1;
245     bs->aio_context = qemu_get_aio_context();
246 
247     QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
248 
249     return bs;
250 }
251 
252 BlockDriver *bdrv_find_format(const char *format_name)
253 {
254     BlockDriver *drv1;
255     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
256         if (!strcmp(drv1->format_name, format_name)) {
257             return drv1;
258         }
259     }
260     return NULL;
261 }
262 
263 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
264 {
265     static const char *whitelist_rw[] = {
266         CONFIG_BDRV_RW_WHITELIST
267     };
268     static const char *whitelist_ro[] = {
269         CONFIG_BDRV_RO_WHITELIST
270     };
271     const char **p;
272 
273     if (!whitelist_rw[0] && !whitelist_ro[0]) {
274         return 1;               /* no whitelist, anything goes */
275     }
276 
277     for (p = whitelist_rw; *p; p++) {
278         if (!strcmp(drv->format_name, *p)) {
279             return 1;
280         }
281     }
282     if (read_only) {
283         for (p = whitelist_ro; *p; p++) {
284             if (!strcmp(drv->format_name, *p)) {
285                 return 1;
286             }
287         }
288     }
289     return 0;
290 }
291 
292 bool bdrv_uses_whitelist(void)
293 {
294     return use_bdrv_whitelist;
295 }
296 
297 typedef struct CreateCo {
298     BlockDriver *drv;
299     char *filename;
300     QemuOpts *opts;
301     int ret;
302     Error *err;
303 } CreateCo;
304 
305 static void coroutine_fn bdrv_create_co_entry(void *opaque)
306 {
307     Error *local_err = NULL;
308     int ret;
309 
310     CreateCo *cco = opaque;
311     assert(cco->drv);
312 
313     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
314     if (local_err) {
315         error_propagate(&cco->err, local_err);
316     }
317     cco->ret = ret;
318 }
319 
320 int bdrv_create(BlockDriver *drv, const char* filename,
321                 QemuOpts *opts, Error **errp)
322 {
323     int ret;
324 
325     Coroutine *co;
326     CreateCo cco = {
327         .drv = drv,
328         .filename = g_strdup(filename),
329         .opts = opts,
330         .ret = NOT_DONE,
331         .err = NULL,
332     };
333 
334     if (!drv->bdrv_create) {
335         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
336         ret = -ENOTSUP;
337         goto out;
338     }
339 
340     if (qemu_in_coroutine()) {
341         /* Fast-path if already in coroutine context */
342         bdrv_create_co_entry(&cco);
343     } else {
344         co = qemu_coroutine_create(bdrv_create_co_entry);
345         qemu_coroutine_enter(co, &cco);
346         while (cco.ret == NOT_DONE) {
347             aio_poll(qemu_get_aio_context(), true);
348         }
349     }
350 
351     ret = cco.ret;
352     if (ret < 0) {
353         if (cco.err) {
354             error_propagate(errp, cco.err);
355         } else {
356             error_setg_errno(errp, -ret, "Could not create image");
357         }
358     }
359 
360 out:
361     g_free(cco.filename);
362     return ret;
363 }
364 
365 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
366 {
367     BlockDriver *drv;
368     Error *local_err = NULL;
369     int ret;
370 
371     drv = bdrv_find_protocol(filename, true, errp);
372     if (drv == NULL) {
373         return -ENOENT;
374     }
375 
376     ret = bdrv_create(drv, filename, opts, &local_err);
377     if (local_err) {
378         error_propagate(errp, local_err);
379     }
380     return ret;
381 }
382 
383 /**
384  * Try to get @bs's logical and physical block size.
385  * On success, store them in @bsz struct and return 0.
386  * On failure return -errno.
387  * @bs must not be empty.
388  */
389 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
390 {
391     BlockDriver *drv = bs->drv;
392 
393     if (drv && drv->bdrv_probe_blocksizes) {
394         return drv->bdrv_probe_blocksizes(bs, bsz);
395     }
396 
397     return -ENOTSUP;
398 }
399 
400 /**
401  * Try to get @bs's geometry (cyls, heads, sectors).
402  * On success, store them in @geo struct and return 0.
403  * On failure return -errno.
404  * @bs must not be empty.
405  */
406 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
407 {
408     BlockDriver *drv = bs->drv;
409 
410     if (drv && drv->bdrv_probe_geometry) {
411         return drv->bdrv_probe_geometry(bs, geo);
412     }
413 
414     return -ENOTSUP;
415 }
416 
417 /*
418  * Create a uniquely-named empty temporary file.
419  * Return 0 upon success, otherwise a negative errno value.
420  */
421 int get_tmp_filename(char *filename, int size)
422 {
423 #ifdef _WIN32
424     char temp_dir[MAX_PATH];
425     /* GetTempFileName requires that its output buffer (4th param)
426        have length MAX_PATH or greater.  */
427     assert(size >= MAX_PATH);
428     return (GetTempPath(MAX_PATH, temp_dir)
429             && GetTempFileName(temp_dir, "qem", 0, filename)
430             ? 0 : -GetLastError());
431 #else
432     int fd;
433     const char *tmpdir;
434     tmpdir = getenv("TMPDIR");
435     if (!tmpdir) {
436         tmpdir = "/var/tmp";
437     }
438     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
439         return -EOVERFLOW;
440     }
441     fd = mkstemp(filename);
442     if (fd < 0) {
443         return -errno;
444     }
445     if (close(fd) != 0) {
446         unlink(filename);
447         return -errno;
448     }
449     return 0;
450 #endif
451 }
452 
453 /*
454  * Detect host devices. By convention, /dev/cdrom[N] is always
455  * recognized as a host CDROM.
456  */
457 static BlockDriver *find_hdev_driver(const char *filename)
458 {
459     int score_max = 0, score;
460     BlockDriver *drv = NULL, *d;
461 
462     QLIST_FOREACH(d, &bdrv_drivers, list) {
463         if (d->bdrv_probe_device) {
464             score = d->bdrv_probe_device(filename);
465             if (score > score_max) {
466                 score_max = score;
467                 drv = d;
468             }
469         }
470     }
471 
472     return drv;
473 }
474 
475 BlockDriver *bdrv_find_protocol(const char *filename,
476                                 bool allow_protocol_prefix,
477                                 Error **errp)
478 {
479     BlockDriver *drv1;
480     char protocol[128];
481     int len;
482     const char *p;
483 
484     /* TODO Drivers without bdrv_file_open must be specified explicitly */
485 
486     /*
487      * XXX(hch): we really should not let host device detection
488      * override an explicit protocol specification, but moving this
489      * later breaks access to device names with colons in them.
490      * Thanks to the brain-dead persistent naming schemes on udev-
491      * based Linux systems those actually are quite common.
492      */
493     drv1 = find_hdev_driver(filename);
494     if (drv1) {
495         return drv1;
496     }
497 
498     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
499         return &bdrv_file;
500     }
501 
502     p = strchr(filename, ':');
503     assert(p != NULL);
504     len = p - filename;
505     if (len > sizeof(protocol) - 1)
506         len = sizeof(protocol) - 1;
507     memcpy(protocol, filename, len);
508     protocol[len] = '\0';
509     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
510         if (drv1->protocol_name &&
511             !strcmp(drv1->protocol_name, protocol)) {
512             return drv1;
513         }
514     }
515 
516     error_setg(errp, "Unknown protocol '%s'", protocol);
517     return NULL;
518 }
519 
520 /*
521  * Guess image format by probing its contents.
522  * This is not a good idea when your image is raw (CVE-2008-2004), but
523  * we do it anyway for backward compatibility.
524  *
525  * @buf         contains the image's first @buf_size bytes.
526  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
527  *              but can be smaller if the image file is smaller)
528  * @filename    is its filename.
529  *
530  * For all block drivers, call the bdrv_probe() method to get its
531  * probing score.
532  * Return the first block driver with the highest probing score.
533  */
534 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
535                             const char *filename)
536 {
537     int score_max = 0, score;
538     BlockDriver *drv = NULL, *d;
539 
540     QLIST_FOREACH(d, &bdrv_drivers, list) {
541         if (d->bdrv_probe) {
542             score = d->bdrv_probe(buf, buf_size, filename);
543             if (score > score_max) {
544                 score_max = score;
545                 drv = d;
546             }
547         }
548     }
549 
550     return drv;
551 }
552 
553 static int find_image_format(BlockDriverState *bs, const char *filename,
554                              BlockDriver **pdrv, Error **errp)
555 {
556     BlockDriver *drv;
557     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
558     int ret = 0;
559 
560     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
561     if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
562         *pdrv = &bdrv_raw;
563         return ret;
564     }
565 
566     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
567     if (ret < 0) {
568         error_setg_errno(errp, -ret, "Could not read image for determining its "
569                          "format");
570         *pdrv = NULL;
571         return ret;
572     }
573 
574     drv = bdrv_probe_all(buf, ret, filename);
575     if (!drv) {
576         error_setg(errp, "Could not determine image format: No compatible "
577                    "driver found");
578         ret = -ENOENT;
579     }
580     *pdrv = drv;
581     return ret;
582 }
583 
584 /**
585  * Set the current 'total_sectors' value
586  * Return 0 on success, -errno on error.
587  */
588 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
589 {
590     BlockDriver *drv = bs->drv;
591 
592     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
593     if (bdrv_is_sg(bs))
594         return 0;
595 
596     /* query actual device if possible, otherwise just trust the hint */
597     if (drv->bdrv_getlength) {
598         int64_t length = drv->bdrv_getlength(bs);
599         if (length < 0) {
600             return length;
601         }
602         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
603     }
604 
605     bs->total_sectors = hint;
606     return 0;
607 }
608 
609 /**
610  * Combines a QDict of new block driver @options with any missing options taken
611  * from @old_options, so that leaving out an option defaults to its old value.
612  */
613 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
614                               QDict *old_options)
615 {
616     if (bs->drv && bs->drv->bdrv_join_options) {
617         bs->drv->bdrv_join_options(options, old_options);
618     } else {
619         qdict_join(options, old_options, false);
620     }
621 }
622 
623 /**
624  * Set open flags for a given discard mode
625  *
626  * Return 0 on success, -1 if the discard mode was invalid.
627  */
628 int bdrv_parse_discard_flags(const char *mode, int *flags)
629 {
630     *flags &= ~BDRV_O_UNMAP;
631 
632     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
633         /* do nothing */
634     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
635         *flags |= BDRV_O_UNMAP;
636     } else {
637         return -1;
638     }
639 
640     return 0;
641 }
642 
643 /**
644  * Set open flags for a given cache mode
645  *
646  * Return 0 on success, -1 if the cache mode was invalid.
647  */
648 int bdrv_parse_cache_flags(const char *mode, int *flags)
649 {
650     *flags &= ~BDRV_O_CACHE_MASK;
651 
652     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
653         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
654     } else if (!strcmp(mode, "directsync")) {
655         *flags |= BDRV_O_NOCACHE;
656     } else if (!strcmp(mode, "writeback")) {
657         *flags |= BDRV_O_CACHE_WB;
658     } else if (!strcmp(mode, "unsafe")) {
659         *flags |= BDRV_O_CACHE_WB;
660         *flags |= BDRV_O_NO_FLUSH;
661     } else if (!strcmp(mode, "writethrough")) {
662         /* this is the default */
663     } else {
664         return -1;
665     }
666 
667     return 0;
668 }
669 
670 /*
671  * Returns the options and flags that a temporary snapshot should get, based on
672  * the originally requested flags (the originally requested image will have
673  * flags like a backing file)
674  */
675 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
676                                        int parent_flags, QDict *parent_options)
677 {
678     *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
679 
680     /* For temporary files, unconditional cache=unsafe is fine */
681     qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
682     qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
683     qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
684 }
685 
686 /*
687  * Returns the options and flags that bs->file should get if a protocol driver
688  * is expected, based on the given options and flags for the parent BDS
689  */
690 static void bdrv_inherited_options(int *child_flags, QDict *child_options,
691                                    int parent_flags, QDict *parent_options)
692 {
693     int flags = parent_flags;
694 
695     /* Enable protocol handling, disable format probing for bs->file */
696     flags |= BDRV_O_PROTOCOL;
697 
698     /* If the cache mode isn't explicitly set, inherit direct and no-flush from
699      * the parent. */
700     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
701     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
702 
703     /* Our block drivers take care to send flushes and respect unmap policy,
704      * so we can default to enable both on lower layers regardless of the
705      * corresponding parent options. */
706     qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
707     flags |= BDRV_O_UNMAP;
708 
709     /* Clear flags that only apply to the top layer */
710     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
711                BDRV_O_NO_IO);
712 
713     *child_flags = flags;
714 }
715 
716 const BdrvChildRole child_file = {
717     .inherit_options = bdrv_inherited_options,
718 };
719 
720 /*
721  * Returns the options and flags that bs->file should get if the use of formats
722  * (and not only protocols) is permitted for it, based on the given options and
723  * flags for the parent BDS
724  */
725 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
726                                        int parent_flags, QDict *parent_options)
727 {
728     child_file.inherit_options(child_flags, child_options,
729                                parent_flags, parent_options);
730 
731     *child_flags &= ~(BDRV_O_PROTOCOL | BDRV_O_NO_IO);
732 }
733 
734 const BdrvChildRole child_format = {
735     .inherit_options = bdrv_inherited_fmt_options,
736 };
737 
738 /*
739  * Returns the options and flags that bs->backing should get, based on the
740  * given options and flags for the parent BDS
741  */
742 static void bdrv_backing_options(int *child_flags, QDict *child_options,
743                                  int parent_flags, QDict *parent_options)
744 {
745     int flags = parent_flags;
746 
747     /* The cache mode is inherited unmodified for backing files; except WCE,
748      * which is only applied on the top level (BlockBackend) */
749     qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
750     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
751     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
752 
753     /* backing files always opened read-only */
754     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
755 
756     /* snapshot=on is handled on the top layer */
757     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
758 
759     *child_flags = flags;
760 }
761 
762 static const BdrvChildRole child_backing = {
763     .inherit_options = bdrv_backing_options,
764 };
765 
766 static int bdrv_open_flags(BlockDriverState *bs, int flags)
767 {
768     int open_flags = flags | BDRV_O_CACHE_WB;
769 
770     /*
771      * Clear flags that are internal to the block layer before opening the
772      * image.
773      */
774     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
775 
776     /*
777      * Snapshots should be writable.
778      */
779     if (flags & BDRV_O_TEMPORARY) {
780         open_flags |= BDRV_O_RDWR;
781     }
782 
783     return open_flags;
784 }
785 
786 static void update_flags_from_options(int *flags, QemuOpts *opts)
787 {
788     *flags &= ~BDRV_O_CACHE_MASK;
789 
790     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_WB));
791     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, false)) {
792         *flags |= BDRV_O_CACHE_WB;
793     }
794 
795     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
796     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
797         *flags |= BDRV_O_NO_FLUSH;
798     }
799 
800     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
801     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
802         *flags |= BDRV_O_NOCACHE;
803     }
804 }
805 
806 static void update_options_from_flags(QDict *options, int flags)
807 {
808     if (!qdict_haskey(options, BDRV_OPT_CACHE_WB)) {
809         qdict_put(options, BDRV_OPT_CACHE_WB,
810                   qbool_from_bool(flags & BDRV_O_CACHE_WB));
811     }
812     if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
813         qdict_put(options, BDRV_OPT_CACHE_DIRECT,
814                   qbool_from_bool(flags & BDRV_O_NOCACHE));
815     }
816     if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
817         qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
818                   qbool_from_bool(flags & BDRV_O_NO_FLUSH));
819     }
820 }
821 
822 static void bdrv_assign_node_name(BlockDriverState *bs,
823                                   const char *node_name,
824                                   Error **errp)
825 {
826     char *gen_node_name = NULL;
827 
828     if (!node_name) {
829         node_name = gen_node_name = id_generate(ID_BLOCK);
830     } else if (!id_wellformed(node_name)) {
831         /*
832          * Check for empty string or invalid characters, but not if it is
833          * generated (generated names use characters not available to the user)
834          */
835         error_setg(errp, "Invalid node name");
836         return;
837     }
838 
839     /* takes care of avoiding namespaces collisions */
840     if (blk_by_name(node_name)) {
841         error_setg(errp, "node-name=%s is conflicting with a device id",
842                    node_name);
843         goto out;
844     }
845 
846     /* takes care of avoiding duplicates node names */
847     if (bdrv_find_node(node_name)) {
848         error_setg(errp, "Duplicate node name");
849         goto out;
850     }
851 
852     /* copy node name into the bs and insert it into the graph list */
853     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
854     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
855 out:
856     g_free(gen_node_name);
857 }
858 
859 static QemuOptsList bdrv_runtime_opts = {
860     .name = "bdrv_common",
861     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
862     .desc = {
863         {
864             .name = "node-name",
865             .type = QEMU_OPT_STRING,
866             .help = "Node name of the block device node",
867         },
868         {
869             .name = "driver",
870             .type = QEMU_OPT_STRING,
871             .help = "Block driver to use for the node",
872         },
873         {
874             .name = BDRV_OPT_CACHE_WB,
875             .type = QEMU_OPT_BOOL,
876             .help = "Enable writeback mode",
877         },
878         {
879             .name = BDRV_OPT_CACHE_DIRECT,
880             .type = QEMU_OPT_BOOL,
881             .help = "Bypass software writeback cache on the host",
882         },
883         {
884             .name = BDRV_OPT_CACHE_NO_FLUSH,
885             .type = QEMU_OPT_BOOL,
886             .help = "Ignore flush requests",
887         },
888         { /* end of list */ }
889     },
890 };
891 
892 /*
893  * Common part for opening disk images and files
894  *
895  * Removes all processed options from *options.
896  */
897 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
898                             QDict *options, Error **errp)
899 {
900     int ret, open_flags;
901     const char *filename;
902     const char *driver_name = NULL;
903     const char *node_name = NULL;
904     QemuOpts *opts;
905     BlockDriver *drv;
906     Error *local_err = NULL;
907 
908     assert(bs->file == NULL);
909     assert(options != NULL && bs->options != options);
910 
911     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
912     qemu_opts_absorb_qdict(opts, options, &local_err);
913     if (local_err) {
914         error_propagate(errp, local_err);
915         ret = -EINVAL;
916         goto fail_opts;
917     }
918 
919     driver_name = qemu_opt_get(opts, "driver");
920     drv = bdrv_find_format(driver_name);
921     assert(drv != NULL);
922 
923     if (file != NULL) {
924         filename = file->bs->filename;
925     } else {
926         filename = qdict_get_try_str(options, "filename");
927     }
928 
929     if (drv->bdrv_needs_filename && !filename) {
930         error_setg(errp, "The '%s' block driver requires a file name",
931                    drv->format_name);
932         ret = -EINVAL;
933         goto fail_opts;
934     }
935 
936     trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
937                            drv->format_name);
938 
939     node_name = qemu_opt_get(opts, "node-name");
940     bdrv_assign_node_name(bs, node_name, &local_err);
941     if (local_err) {
942         error_propagate(errp, local_err);
943         ret = -EINVAL;
944         goto fail_opts;
945     }
946 
947     bs->request_alignment = 512;
948     bs->zero_beyond_eof = true;
949     bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
950 
951     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
952         error_setg(errp,
953                    !bs->read_only && bdrv_is_whitelisted(drv, true)
954                         ? "Driver '%s' can only be used for read-only devices"
955                         : "Driver '%s' is not whitelisted",
956                    drv->format_name);
957         ret = -ENOTSUP;
958         goto fail_opts;
959     }
960 
961     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
962     if (bs->open_flags & BDRV_O_COPY_ON_READ) {
963         if (!bs->read_only) {
964             bdrv_enable_copy_on_read(bs);
965         } else {
966             error_setg(errp, "Can't use copy-on-read on read-only device");
967             ret = -EINVAL;
968             goto fail_opts;
969         }
970     }
971 
972     if (filename != NULL) {
973         pstrcpy(bs->filename, sizeof(bs->filename), filename);
974     } else {
975         bs->filename[0] = '\0';
976     }
977     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
978 
979     bs->drv = drv;
980     bs->opaque = g_malloc0(drv->instance_size);
981 
982     /* Apply cache mode options */
983     update_flags_from_options(&bs->open_flags, opts);
984 
985     if (!bs->blk && (bs->open_flags & BDRV_O_CACHE_WB) == 0) {
986         error_setg(errp, "Can't set writethrough mode except for the root");
987         ret = -EINVAL;
988         goto free_and_fail;
989     }
990 
991     bdrv_set_enable_write_cache(bs, bs->open_flags & BDRV_O_CACHE_WB);
992 
993     /* Open the image, either directly or using a protocol */
994     open_flags = bdrv_open_flags(bs, bs->open_flags);
995     if (drv->bdrv_file_open) {
996         assert(file == NULL);
997         assert(!drv->bdrv_needs_filename || filename != NULL);
998         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
999     } else {
1000         if (file == NULL) {
1001             error_setg(errp, "Can't use '%s' as a block driver for the "
1002                        "protocol level", drv->format_name);
1003             ret = -EINVAL;
1004             goto free_and_fail;
1005         }
1006         bs->file = file;
1007         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
1008     }
1009 
1010     if (ret < 0) {
1011         if (local_err) {
1012             error_propagate(errp, local_err);
1013         } else if (bs->filename[0]) {
1014             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1015         } else {
1016             error_setg_errno(errp, -ret, "Could not open image");
1017         }
1018         goto free_and_fail;
1019     }
1020 
1021     ret = refresh_total_sectors(bs, bs->total_sectors);
1022     if (ret < 0) {
1023         error_setg_errno(errp, -ret, "Could not refresh total sector count");
1024         goto free_and_fail;
1025     }
1026 
1027     bdrv_refresh_limits(bs, &local_err);
1028     if (local_err) {
1029         error_propagate(errp, local_err);
1030         ret = -EINVAL;
1031         goto free_and_fail;
1032     }
1033 
1034     assert(bdrv_opt_mem_align(bs) != 0);
1035     assert(bdrv_min_mem_align(bs) != 0);
1036     assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
1037 
1038     qemu_opts_del(opts);
1039     return 0;
1040 
1041 free_and_fail:
1042     bs->file = NULL;
1043     g_free(bs->opaque);
1044     bs->opaque = NULL;
1045     bs->drv = NULL;
1046 fail_opts:
1047     qemu_opts_del(opts);
1048     return ret;
1049 }
1050 
1051 static QDict *parse_json_filename(const char *filename, Error **errp)
1052 {
1053     QObject *options_obj;
1054     QDict *options;
1055     int ret;
1056 
1057     ret = strstart(filename, "json:", &filename);
1058     assert(ret);
1059 
1060     options_obj = qobject_from_json(filename);
1061     if (!options_obj) {
1062         error_setg(errp, "Could not parse the JSON options");
1063         return NULL;
1064     }
1065 
1066     if (qobject_type(options_obj) != QTYPE_QDICT) {
1067         qobject_decref(options_obj);
1068         error_setg(errp, "Invalid JSON object given");
1069         return NULL;
1070     }
1071 
1072     options = qobject_to_qdict(options_obj);
1073     qdict_flatten(options);
1074 
1075     return options;
1076 }
1077 
1078 static void parse_json_protocol(QDict *options, const char **pfilename,
1079                                 Error **errp)
1080 {
1081     QDict *json_options;
1082     Error *local_err = NULL;
1083 
1084     /* Parse json: pseudo-protocol */
1085     if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1086         return;
1087     }
1088 
1089     json_options = parse_json_filename(*pfilename, &local_err);
1090     if (local_err) {
1091         error_propagate(errp, local_err);
1092         return;
1093     }
1094 
1095     /* Options given in the filename have lower priority than options
1096      * specified directly */
1097     qdict_join(options, json_options, false);
1098     QDECREF(json_options);
1099     *pfilename = NULL;
1100 }
1101 
1102 /*
1103  * Fills in default options for opening images and converts the legacy
1104  * filename/flags pair to option QDict entries.
1105  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1106  * block driver has been specified explicitly.
1107  */
1108 static int bdrv_fill_options(QDict **options, const char *filename,
1109                              int *flags, Error **errp)
1110 {
1111     const char *drvname;
1112     bool protocol = *flags & BDRV_O_PROTOCOL;
1113     bool parse_filename = false;
1114     BlockDriver *drv = NULL;
1115     Error *local_err = NULL;
1116 
1117     drvname = qdict_get_try_str(*options, "driver");
1118     if (drvname) {
1119         drv = bdrv_find_format(drvname);
1120         if (!drv) {
1121             error_setg(errp, "Unknown driver '%s'", drvname);
1122             return -ENOENT;
1123         }
1124         /* If the user has explicitly specified the driver, this choice should
1125          * override the BDRV_O_PROTOCOL flag */
1126         protocol = drv->bdrv_file_open;
1127     }
1128 
1129     if (protocol) {
1130         *flags |= BDRV_O_PROTOCOL;
1131     } else {
1132         *flags &= ~BDRV_O_PROTOCOL;
1133     }
1134 
1135     /* Translate cache options from flags into options */
1136     update_options_from_flags(*options, *flags);
1137 
1138     /* Fetch the file name from the options QDict if necessary */
1139     if (protocol && filename) {
1140         if (!qdict_haskey(*options, "filename")) {
1141             qdict_put(*options, "filename", qstring_from_str(filename));
1142             parse_filename = true;
1143         } else {
1144             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1145                              "the same time");
1146             return -EINVAL;
1147         }
1148     }
1149 
1150     /* Find the right block driver */
1151     filename = qdict_get_try_str(*options, "filename");
1152 
1153     if (!drvname && protocol) {
1154         if (filename) {
1155             drv = bdrv_find_protocol(filename, parse_filename, errp);
1156             if (!drv) {
1157                 return -EINVAL;
1158             }
1159 
1160             drvname = drv->format_name;
1161             qdict_put(*options, "driver", qstring_from_str(drvname));
1162         } else {
1163             error_setg(errp, "Must specify either driver or file");
1164             return -EINVAL;
1165         }
1166     }
1167 
1168     assert(drv || !protocol);
1169 
1170     /* Driver-specific filename parsing */
1171     if (drv && drv->bdrv_parse_filename && parse_filename) {
1172         drv->bdrv_parse_filename(filename, *options, &local_err);
1173         if (local_err) {
1174             error_propagate(errp, local_err);
1175             return -EINVAL;
1176         }
1177 
1178         if (!drv->bdrv_needs_filename) {
1179             qdict_del(*options, "filename");
1180         }
1181     }
1182 
1183     return 0;
1184 }
1185 
1186 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
1187                                   const char *child_name,
1188                                   const BdrvChildRole *child_role)
1189 {
1190     BdrvChild *child = g_new(BdrvChild, 1);
1191     *child = (BdrvChild) {
1192         .bs     = child_bs,
1193         .name   = g_strdup(child_name),
1194         .role   = child_role,
1195     };
1196 
1197     QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1198 
1199     return child;
1200 }
1201 
1202 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1203                                     BlockDriverState *child_bs,
1204                                     const char *child_name,
1205                                     const BdrvChildRole *child_role)
1206 {
1207     BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
1208     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1209     return child;
1210 }
1211 
1212 static void bdrv_detach_child(BdrvChild *child)
1213 {
1214     if (child->next.le_prev) {
1215         QLIST_REMOVE(child, next);
1216         child->next.le_prev = NULL;
1217     }
1218     QLIST_REMOVE(child, next_parent);
1219     g_free(child->name);
1220     g_free(child);
1221 }
1222 
1223 void bdrv_root_unref_child(BdrvChild *child)
1224 {
1225     BlockDriverState *child_bs;
1226 
1227     child_bs = child->bs;
1228     bdrv_detach_child(child);
1229     bdrv_unref(child_bs);
1230 }
1231 
1232 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1233 {
1234     if (child == NULL) {
1235         return;
1236     }
1237 
1238     if (child->bs->inherits_from == parent) {
1239         child->bs->inherits_from = NULL;
1240     }
1241 
1242     bdrv_root_unref_child(child);
1243 }
1244 
1245 /*
1246  * Sets the backing file link of a BDS. A new reference is created; callers
1247  * which don't need their own reference any more must call bdrv_unref().
1248  */
1249 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1250 {
1251     if (backing_hd) {
1252         bdrv_ref(backing_hd);
1253     }
1254 
1255     if (bs->backing) {
1256         assert(bs->backing_blocker);
1257         bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1258         bdrv_unref_child(bs, bs->backing);
1259     } else if (backing_hd) {
1260         error_setg(&bs->backing_blocker,
1261                    "node is used as backing hd of '%s'",
1262                    bdrv_get_device_or_node_name(bs));
1263     }
1264 
1265     if (!backing_hd) {
1266         error_free(bs->backing_blocker);
1267         bs->backing_blocker = NULL;
1268         bs->backing = NULL;
1269         goto out;
1270     }
1271     bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
1272     bs->open_flags &= ~BDRV_O_NO_BACKING;
1273     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1274     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1275             backing_hd->drv ? backing_hd->drv->format_name : "");
1276 
1277     bdrv_op_block_all(backing_hd, bs->backing_blocker);
1278     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1279     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1280                     bs->backing_blocker);
1281 out:
1282     bdrv_refresh_limits(bs, NULL);
1283 }
1284 
1285 /*
1286  * Opens the backing file for a BlockDriverState if not yet open
1287  *
1288  * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1289  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1290  * itself, all options starting with "${bdref_key}." are considered part of the
1291  * BlockdevRef.
1292  *
1293  * TODO Can this be unified with bdrv_open_image()?
1294  */
1295 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1296                            const char *bdref_key, Error **errp)
1297 {
1298     char *backing_filename = g_malloc0(PATH_MAX);
1299     char *bdref_key_dot;
1300     const char *reference = NULL;
1301     int ret = 0;
1302     BlockDriverState *backing_hd;
1303     QDict *options;
1304     QDict *tmp_parent_options = NULL;
1305     Error *local_err = NULL;
1306 
1307     if (bs->backing != NULL) {
1308         goto free_exit;
1309     }
1310 
1311     /* NULL means an empty set of options */
1312     if (parent_options == NULL) {
1313         tmp_parent_options = qdict_new();
1314         parent_options = tmp_parent_options;
1315     }
1316 
1317     bs->open_flags &= ~BDRV_O_NO_BACKING;
1318 
1319     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1320     qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1321     g_free(bdref_key_dot);
1322 
1323     reference = qdict_get_try_str(parent_options, bdref_key);
1324     if (reference || qdict_haskey(options, "file.filename")) {
1325         backing_filename[0] = '\0';
1326     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1327         QDECREF(options);
1328         goto free_exit;
1329     } else {
1330         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1331                                        &local_err);
1332         if (local_err) {
1333             ret = -EINVAL;
1334             error_propagate(errp, local_err);
1335             QDECREF(options);
1336             goto free_exit;
1337         }
1338     }
1339 
1340     if (!bs->drv || !bs->drv->supports_backing) {
1341         ret = -EINVAL;
1342         error_setg(errp, "Driver doesn't support backing files");
1343         QDECREF(options);
1344         goto free_exit;
1345     }
1346 
1347     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1348         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1349     }
1350 
1351     backing_hd = NULL;
1352     ret = bdrv_open_inherit(&backing_hd,
1353                             *backing_filename ? backing_filename : NULL,
1354                             reference, options, 0, bs, &child_backing,
1355                             errp);
1356     if (ret < 0) {
1357         bs->open_flags |= BDRV_O_NO_BACKING;
1358         error_prepend(errp, "Could not open backing file: ");
1359         goto free_exit;
1360     }
1361 
1362     /* Hook up the backing file link; drop our reference, bs owns the
1363      * backing_hd reference now */
1364     bdrv_set_backing_hd(bs, backing_hd);
1365     bdrv_unref(backing_hd);
1366 
1367     qdict_del(parent_options, bdref_key);
1368 
1369 free_exit:
1370     g_free(backing_filename);
1371     QDECREF(tmp_parent_options);
1372     return ret;
1373 }
1374 
1375 /*
1376  * Opens a disk image whose options are given as BlockdevRef in another block
1377  * device's options.
1378  *
1379  * If allow_none is true, no image will be opened if filename is false and no
1380  * BlockdevRef is given. NULL will be returned, but errp remains unset.
1381  *
1382  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1383  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1384  * itself, all options starting with "${bdref_key}." are considered part of the
1385  * BlockdevRef.
1386  *
1387  * The BlockdevRef will be removed from the options QDict.
1388  */
1389 BdrvChild *bdrv_open_child(const char *filename,
1390                            QDict *options, const char *bdref_key,
1391                            BlockDriverState* parent,
1392                            const BdrvChildRole *child_role,
1393                            bool allow_none, Error **errp)
1394 {
1395     BdrvChild *c = NULL;
1396     BlockDriverState *bs;
1397     QDict *image_options;
1398     int ret;
1399     char *bdref_key_dot;
1400     const char *reference;
1401 
1402     assert(child_role != NULL);
1403 
1404     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1405     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1406     g_free(bdref_key_dot);
1407 
1408     reference = qdict_get_try_str(options, bdref_key);
1409     if (!filename && !reference && !qdict_size(image_options)) {
1410         if (!allow_none) {
1411             error_setg(errp, "A block device must be specified for \"%s\"",
1412                        bdref_key);
1413         }
1414         QDECREF(image_options);
1415         goto done;
1416     }
1417 
1418     bs = NULL;
1419     ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1420                             parent, child_role, errp);
1421     if (ret < 0) {
1422         goto done;
1423     }
1424 
1425     c = bdrv_attach_child(parent, bs, bdref_key, child_role);
1426 
1427 done:
1428     qdict_del(options, bdref_key);
1429     return c;
1430 }
1431 
1432 static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
1433                                      QDict *snapshot_options, Error **errp)
1434 {
1435     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1436     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1437     int64_t total_size;
1438     QemuOpts *opts = NULL;
1439     BlockDriverState *bs_snapshot;
1440     Error *local_err = NULL;
1441     int ret;
1442 
1443     /* if snapshot, we create a temporary backing file and open it
1444        instead of opening 'filename' directly */
1445 
1446     /* Get the required size from the image */
1447     total_size = bdrv_getlength(bs);
1448     if (total_size < 0) {
1449         ret = total_size;
1450         error_setg_errno(errp, -total_size, "Could not get image size");
1451         goto out;
1452     }
1453 
1454     /* Create the temporary image */
1455     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1456     if (ret < 0) {
1457         error_setg_errno(errp, -ret, "Could not get temporary filename");
1458         goto out;
1459     }
1460 
1461     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1462                             &error_abort);
1463     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1464     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
1465     qemu_opts_del(opts);
1466     if (ret < 0) {
1467         error_prepend(errp, "Could not create temporary overlay '%s': ",
1468                       tmp_filename);
1469         goto out;
1470     }
1471 
1472     /* Prepare options QDict for the temporary file */
1473     qdict_put(snapshot_options, "file.driver",
1474               qstring_from_str("file"));
1475     qdict_put(snapshot_options, "file.filename",
1476               qstring_from_str(tmp_filename));
1477     qdict_put(snapshot_options, "driver",
1478               qstring_from_str("qcow2"));
1479 
1480     bs_snapshot = bdrv_new();
1481 
1482     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1483                     flags, &local_err);
1484     snapshot_options = NULL;
1485     if (ret < 0) {
1486         error_propagate(errp, local_err);
1487         goto out;
1488     }
1489 
1490     bdrv_append(bs_snapshot, bs);
1491 
1492 out:
1493     QDECREF(snapshot_options);
1494     g_free(tmp_filename);
1495     return ret;
1496 }
1497 
1498 /*
1499  * Opens a disk image (raw, qcow2, vmdk, ...)
1500  *
1501  * options is a QDict of options to pass to the block drivers, or NULL for an
1502  * empty set of options. The reference to the QDict belongs to the block layer
1503  * after the call (even on failure), so if the caller intends to reuse the
1504  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1505  *
1506  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1507  * If it is not NULL, the referenced BDS will be reused.
1508  *
1509  * The reference parameter may be used to specify an existing block device which
1510  * should be opened. If specified, neither options nor a filename may be given,
1511  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1512  */
1513 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1514                              const char *reference, QDict *options, int flags,
1515                              BlockDriverState *parent,
1516                              const BdrvChildRole *child_role, Error **errp)
1517 {
1518     int ret;
1519     BdrvChild *file = NULL;
1520     BlockDriverState *bs;
1521     BlockDriver *drv = NULL;
1522     const char *drvname;
1523     const char *backing;
1524     Error *local_err = NULL;
1525     QDict *snapshot_options = NULL;
1526     int snapshot_flags = 0;
1527 
1528     assert(pbs);
1529     assert(!child_role || !flags);
1530     assert(!child_role == !parent);
1531 
1532     if (reference) {
1533         bool options_non_empty = options ? qdict_size(options) : false;
1534         QDECREF(options);
1535 
1536         if (*pbs) {
1537             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1538                        "another block device");
1539             return -EINVAL;
1540         }
1541 
1542         if (filename || options_non_empty) {
1543             error_setg(errp, "Cannot reference an existing block device with "
1544                        "additional options or a new filename");
1545             return -EINVAL;
1546         }
1547 
1548         bs = bdrv_lookup_bs(reference, reference, errp);
1549         if (!bs) {
1550             return -ENODEV;
1551         }
1552         bdrv_ref(bs);
1553         *pbs = bs;
1554         return 0;
1555     }
1556 
1557     if (*pbs) {
1558         bs = *pbs;
1559     } else {
1560         bs = bdrv_new();
1561     }
1562 
1563     /* NULL means an empty set of options */
1564     if (options == NULL) {
1565         options = qdict_new();
1566     }
1567 
1568     /* json: syntax counts as explicit options, as if in the QDict */
1569     parse_json_protocol(options, &filename, &local_err);
1570     if (local_err) {
1571         ret = -EINVAL;
1572         goto fail;
1573     }
1574 
1575     bs->explicit_options = qdict_clone_shallow(options);
1576 
1577     if (child_role) {
1578         bs->inherits_from = parent;
1579         child_role->inherit_options(&flags, options,
1580                                     parent->open_flags, parent->options);
1581     }
1582 
1583     ret = bdrv_fill_options(&options, filename, &flags, &local_err);
1584     if (local_err) {
1585         goto fail;
1586     }
1587 
1588     bs->open_flags = flags;
1589     bs->options = options;
1590     options = qdict_clone_shallow(options);
1591 
1592     /* Find the right image format driver */
1593     drvname = qdict_get_try_str(options, "driver");
1594     if (drvname) {
1595         drv = bdrv_find_format(drvname);
1596         if (!drv) {
1597             error_setg(errp, "Unknown driver: '%s'", drvname);
1598             ret = -EINVAL;
1599             goto fail;
1600         }
1601     }
1602 
1603     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1604 
1605     backing = qdict_get_try_str(options, "backing");
1606     if (backing && *backing == '\0') {
1607         flags |= BDRV_O_NO_BACKING;
1608         qdict_del(options, "backing");
1609     }
1610 
1611     /* Open image file without format layer */
1612     if ((flags & BDRV_O_PROTOCOL) == 0) {
1613         if (flags & BDRV_O_RDWR) {
1614             flags |= BDRV_O_ALLOW_RDWR;
1615         }
1616         if (flags & BDRV_O_SNAPSHOT) {
1617             snapshot_options = qdict_new();
1618             bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
1619                                        flags, options);
1620             bdrv_backing_options(&flags, options, flags, options);
1621         }
1622 
1623         bs->open_flags = flags;
1624 
1625         file = bdrv_open_child(filename, options, "file", bs,
1626                                &child_file, true, &local_err);
1627         if (local_err) {
1628             ret = -EINVAL;
1629             goto fail;
1630         }
1631     }
1632 
1633     /* Image format probing */
1634     bs->probed = !drv;
1635     if (!drv && file) {
1636         ret = find_image_format(file->bs, filename, &drv, &local_err);
1637         if (ret < 0) {
1638             goto fail;
1639         }
1640         /*
1641          * This option update would logically belong in bdrv_fill_options(),
1642          * but we first need to open bs->file for the probing to work, while
1643          * opening bs->file already requires the (mostly) final set of options
1644          * so that cache mode etc. can be inherited.
1645          *
1646          * Adding the driver later is somewhat ugly, but it's not an option
1647          * that would ever be inherited, so it's correct. We just need to make
1648          * sure to update both bs->options (which has the full effective
1649          * options for bs) and options (which has file.* already removed).
1650          */
1651         qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1652         qdict_put(options, "driver", qstring_from_str(drv->format_name));
1653     } else if (!drv) {
1654         error_setg(errp, "Must specify either driver or file");
1655         ret = -EINVAL;
1656         goto fail;
1657     }
1658 
1659     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1660     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1661     /* file must be NULL if a protocol BDS is about to be created
1662      * (the inverse results in an error message from bdrv_open_common()) */
1663     assert(!(flags & BDRV_O_PROTOCOL) || !file);
1664 
1665     /* Open the image */
1666     ret = bdrv_open_common(bs, file, options, &local_err);
1667     if (ret < 0) {
1668         goto fail;
1669     }
1670 
1671     if (file && (bs->file != file)) {
1672         bdrv_unref_child(bs, file);
1673         file = NULL;
1674     }
1675 
1676     /* If there is a backing file, use it */
1677     if ((flags & BDRV_O_NO_BACKING) == 0) {
1678         ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
1679         if (ret < 0) {
1680             goto close_and_fail;
1681         }
1682     }
1683 
1684     bdrv_refresh_filename(bs);
1685 
1686     /* Check if any unknown options were used */
1687     if (options && (qdict_size(options) != 0)) {
1688         const QDictEntry *entry = qdict_first(options);
1689         if (flags & BDRV_O_PROTOCOL) {
1690             error_setg(errp, "Block protocol '%s' doesn't support the option "
1691                        "'%s'", drv->format_name, entry->key);
1692         } else {
1693             error_setg(errp,
1694                        "Block format '%s' does not support the option '%s'",
1695                        drv->format_name, entry->key);
1696         }
1697 
1698         ret = -EINVAL;
1699         goto close_and_fail;
1700     }
1701 
1702     if (!bdrv_key_required(bs)) {
1703         if (bs->blk) {
1704             blk_dev_change_media_cb(bs->blk, true);
1705         }
1706     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1707                && !runstate_check(RUN_STATE_INMIGRATE)
1708                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1709         error_setg(errp,
1710                    "Guest must be stopped for opening of encrypted image");
1711         ret = -EBUSY;
1712         goto close_and_fail;
1713     }
1714 
1715     QDECREF(options);
1716     *pbs = bs;
1717 
1718     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1719      * temporary snapshot afterwards. */
1720     if (snapshot_flags) {
1721         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
1722                                         &local_err);
1723         snapshot_options = NULL;
1724         if (local_err) {
1725             goto close_and_fail;
1726         }
1727     }
1728 
1729     return 0;
1730 
1731 fail:
1732     if (file != NULL) {
1733         bdrv_unref_child(bs, file);
1734     }
1735     QDECREF(snapshot_options);
1736     QDECREF(bs->explicit_options);
1737     QDECREF(bs->options);
1738     QDECREF(options);
1739     bs->options = NULL;
1740     if (!*pbs) {
1741         /* If *pbs is NULL, a new BDS has been created in this function and
1742            needs to be freed now. Otherwise, it does not need to be closed,
1743            since it has not really been opened yet. */
1744         bdrv_unref(bs);
1745     }
1746     if (local_err) {
1747         error_propagate(errp, local_err);
1748     }
1749     return ret;
1750 
1751 close_and_fail:
1752     /* See fail path, but now the BDS has to be always closed */
1753     if (*pbs) {
1754         bdrv_close(bs);
1755     } else {
1756         bdrv_unref(bs);
1757     }
1758     QDECREF(snapshot_options);
1759     QDECREF(options);
1760     if (local_err) {
1761         error_propagate(errp, local_err);
1762     }
1763     return ret;
1764 }
1765 
1766 int bdrv_open(BlockDriverState **pbs, const char *filename,
1767               const char *reference, QDict *options, int flags, Error **errp)
1768 {
1769     return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1770                              NULL, errp);
1771 }
1772 
1773 typedef struct BlockReopenQueueEntry {
1774      bool prepared;
1775      BDRVReopenState state;
1776      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1777 } BlockReopenQueueEntry;
1778 
1779 /*
1780  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1781  * reopen of multiple devices.
1782  *
1783  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1784  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1785  * be created and initialized. This newly created BlockReopenQueue should be
1786  * passed back in for subsequent calls that are intended to be of the same
1787  * atomic 'set'.
1788  *
1789  * bs is the BlockDriverState to add to the reopen queue.
1790  *
1791  * options contains the changed options for the associated bs
1792  * (the BlockReopenQueue takes ownership)
1793  *
1794  * flags contains the open flags for the associated bs
1795  *
1796  * returns a pointer to bs_queue, which is either the newly allocated
1797  * bs_queue, or the existing bs_queue being used.
1798  *
1799  */
1800 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1801                                                  BlockDriverState *bs,
1802                                                  QDict *options,
1803                                                  int flags,
1804                                                  const BdrvChildRole *role,
1805                                                  QDict *parent_options,
1806                                                  int parent_flags)
1807 {
1808     assert(bs != NULL);
1809 
1810     BlockReopenQueueEntry *bs_entry;
1811     BdrvChild *child;
1812     QDict *old_options, *explicit_options;
1813 
1814     if (bs_queue == NULL) {
1815         bs_queue = g_new0(BlockReopenQueue, 1);
1816         QSIMPLEQ_INIT(bs_queue);
1817     }
1818 
1819     if (!options) {
1820         options = qdict_new();
1821     }
1822 
1823     /*
1824      * Precedence of options:
1825      * 1. Explicitly passed in options (highest)
1826      * 2. Set in flags (only for top level)
1827      * 3. Retained from explicitly set options of bs
1828      * 4. Inherited from parent node
1829      * 5. Retained from effective options of bs
1830      */
1831 
1832     if (!parent_options) {
1833         /*
1834          * Any setting represented by flags is always updated. If the
1835          * corresponding QDict option is set, it takes precedence. Otherwise
1836          * the flag is translated into a QDict option. The old setting of bs is
1837          * not considered.
1838          */
1839         update_options_from_flags(options, flags);
1840     }
1841 
1842     /* Old explicitly set values (don't overwrite by inherited value) */
1843     old_options = qdict_clone_shallow(bs->explicit_options);
1844     bdrv_join_options(bs, options, old_options);
1845     QDECREF(old_options);
1846 
1847     explicit_options = qdict_clone_shallow(options);
1848 
1849     /* Inherit from parent node */
1850     if (parent_options) {
1851         assert(!flags);
1852         role->inherit_options(&flags, options, parent_flags, parent_options);
1853     }
1854 
1855     /* Old values are used for options that aren't set yet */
1856     old_options = qdict_clone_shallow(bs->options);
1857     bdrv_join_options(bs, options, old_options);
1858     QDECREF(old_options);
1859 
1860     /* bdrv_open() masks this flag out */
1861     flags &= ~BDRV_O_PROTOCOL;
1862 
1863     QLIST_FOREACH(child, &bs->children, next) {
1864         QDict *new_child_options;
1865         char *child_key_dot;
1866 
1867         /* reopen can only change the options of block devices that were
1868          * implicitly created and inherited options. For other (referenced)
1869          * block devices, a syntax like "backing.foo" results in an error. */
1870         if (child->bs->inherits_from != bs) {
1871             continue;
1872         }
1873 
1874         child_key_dot = g_strdup_printf("%s.", child->name);
1875         qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1876         g_free(child_key_dot);
1877 
1878         bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1879                                 child->role, options, flags);
1880     }
1881 
1882     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1883     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1884 
1885     bs_entry->state.bs = bs;
1886     bs_entry->state.options = options;
1887     bs_entry->state.explicit_options = explicit_options;
1888     bs_entry->state.flags = flags;
1889 
1890     return bs_queue;
1891 }
1892 
1893 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1894                                     BlockDriverState *bs,
1895                                     QDict *options, int flags)
1896 {
1897     return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1898                                    NULL, NULL, 0);
1899 }
1900 
1901 /*
1902  * Reopen multiple BlockDriverStates atomically & transactionally.
1903  *
1904  * The queue passed in (bs_queue) must have been built up previous
1905  * via bdrv_reopen_queue().
1906  *
1907  * Reopens all BDS specified in the queue, with the appropriate
1908  * flags.  All devices are prepared for reopen, and failure of any
1909  * device will cause all device changes to be abandonded, and intermediate
1910  * data cleaned up.
1911  *
1912  * If all devices prepare successfully, then the changes are committed
1913  * to all devices.
1914  *
1915  */
1916 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1917 {
1918     int ret = -1;
1919     BlockReopenQueueEntry *bs_entry, *next;
1920     Error *local_err = NULL;
1921 
1922     assert(bs_queue != NULL);
1923 
1924     bdrv_drain_all();
1925 
1926     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1927         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1928             error_propagate(errp, local_err);
1929             goto cleanup;
1930         }
1931         bs_entry->prepared = true;
1932     }
1933 
1934     /* If we reach this point, we have success and just need to apply the
1935      * changes
1936      */
1937     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1938         bdrv_reopen_commit(&bs_entry->state);
1939     }
1940 
1941     ret = 0;
1942 
1943 cleanup:
1944     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1945         if (ret && bs_entry->prepared) {
1946             bdrv_reopen_abort(&bs_entry->state);
1947         } else if (ret) {
1948             QDECREF(bs_entry->state.explicit_options);
1949         }
1950         QDECREF(bs_entry->state.options);
1951         g_free(bs_entry);
1952     }
1953     g_free(bs_queue);
1954     return ret;
1955 }
1956 
1957 
1958 /* Reopen a single BlockDriverState with the specified flags. */
1959 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1960 {
1961     int ret = -1;
1962     Error *local_err = NULL;
1963     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1964 
1965     ret = bdrv_reopen_multiple(queue, &local_err);
1966     if (local_err != NULL) {
1967         error_propagate(errp, local_err);
1968     }
1969     return ret;
1970 }
1971 
1972 
1973 /*
1974  * Prepares a BlockDriverState for reopen. All changes are staged in the
1975  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1976  * the block driver layer .bdrv_reopen_prepare()
1977  *
1978  * bs is the BlockDriverState to reopen
1979  * flags are the new open flags
1980  * queue is the reopen queue
1981  *
1982  * Returns 0 on success, non-zero on error.  On error errp will be set
1983  * as well.
1984  *
1985  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1986  * It is the responsibility of the caller to then call the abort() or
1987  * commit() for any other BDS that have been left in a prepare() state
1988  *
1989  */
1990 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1991                         Error **errp)
1992 {
1993     int ret = -1;
1994     Error *local_err = NULL;
1995     BlockDriver *drv;
1996     QemuOpts *opts;
1997     const char *value;
1998 
1999     assert(reopen_state != NULL);
2000     assert(reopen_state->bs->drv != NULL);
2001     drv = reopen_state->bs->drv;
2002 
2003     /* Process generic block layer options */
2004     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
2005     qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
2006     if (local_err) {
2007         error_propagate(errp, local_err);
2008         ret = -EINVAL;
2009         goto error;
2010     }
2011 
2012     update_flags_from_options(&reopen_state->flags, opts);
2013 
2014     /* If a guest device is attached, it owns WCE */
2015     if (reopen_state->bs->blk && blk_get_attached_dev(reopen_state->bs->blk)) {
2016         bool old_wce = bdrv_enable_write_cache(reopen_state->bs);
2017         bool new_wce = (reopen_state->flags & BDRV_O_CACHE_WB);
2018         if (old_wce != new_wce) {
2019             error_setg(errp, "Cannot change cache.writeback: Device attached");
2020             ret = -EINVAL;
2021             goto error;
2022         }
2023     }
2024 
2025     /* node-name and driver must be unchanged. Put them back into the QDict, so
2026      * that they are checked at the end of this function. */
2027     value = qemu_opt_get(opts, "node-name");
2028     if (value) {
2029         qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
2030     }
2031 
2032     value = qemu_opt_get(opts, "driver");
2033     if (value) {
2034         qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2035     }
2036 
2037     /* if we are to stay read-only, do not allow permission change
2038      * to r/w */
2039     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2040         reopen_state->flags & BDRV_O_RDWR) {
2041         error_setg(errp, "Node '%s' is read only",
2042                    bdrv_get_device_or_node_name(reopen_state->bs));
2043         goto error;
2044     }
2045 
2046 
2047     ret = bdrv_flush(reopen_state->bs);
2048     if (ret) {
2049         error_setg_errno(errp, -ret, "Error flushing drive");
2050         goto error;
2051     }
2052 
2053     if (drv->bdrv_reopen_prepare) {
2054         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2055         if (ret) {
2056             if (local_err != NULL) {
2057                 error_propagate(errp, local_err);
2058             } else {
2059                 error_setg(errp, "failed while preparing to reopen image '%s'",
2060                            reopen_state->bs->filename);
2061             }
2062             goto error;
2063         }
2064     } else {
2065         /* It is currently mandatory to have a bdrv_reopen_prepare()
2066          * handler for each supported drv. */
2067         error_setg(errp, "Block format '%s' used by node '%s' "
2068                    "does not support reopening files", drv->format_name,
2069                    bdrv_get_device_or_node_name(reopen_state->bs));
2070         ret = -1;
2071         goto error;
2072     }
2073 
2074     /* Options that are not handled are only okay if they are unchanged
2075      * compared to the old state. It is expected that some options are only
2076      * used for the initial open, but not reopen (e.g. filename) */
2077     if (qdict_size(reopen_state->options)) {
2078         const QDictEntry *entry = qdict_first(reopen_state->options);
2079 
2080         do {
2081             QString *new_obj = qobject_to_qstring(entry->value);
2082             const char *new = qstring_get_str(new_obj);
2083             const char *old = qdict_get_try_str(reopen_state->bs->options,
2084                                                 entry->key);
2085 
2086             if (!old || strcmp(new, old)) {
2087                 error_setg(errp, "Cannot change the option '%s'", entry->key);
2088                 ret = -EINVAL;
2089                 goto error;
2090             }
2091         } while ((entry = qdict_next(reopen_state->options, entry)));
2092     }
2093 
2094     ret = 0;
2095 
2096 error:
2097     qemu_opts_del(opts);
2098     return ret;
2099 }
2100 
2101 /*
2102  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2103  * makes them final by swapping the staging BlockDriverState contents into
2104  * the active BlockDriverState contents.
2105  */
2106 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2107 {
2108     BlockDriver *drv;
2109 
2110     assert(reopen_state != NULL);
2111     drv = reopen_state->bs->drv;
2112     assert(drv != NULL);
2113 
2114     /* If there are any driver level actions to take */
2115     if (drv->bdrv_reopen_commit) {
2116         drv->bdrv_reopen_commit(reopen_state);
2117     }
2118 
2119     /* set BDS specific flags now */
2120     QDECREF(reopen_state->bs->explicit_options);
2121 
2122     reopen_state->bs->explicit_options   = reopen_state->explicit_options;
2123     reopen_state->bs->open_flags         = reopen_state->flags;
2124     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
2125                                               BDRV_O_CACHE_WB);
2126     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
2127 
2128     bdrv_refresh_limits(reopen_state->bs, NULL);
2129 }
2130 
2131 /*
2132  * Abort the reopen, and delete and free the staged changes in
2133  * reopen_state
2134  */
2135 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2136 {
2137     BlockDriver *drv;
2138 
2139     assert(reopen_state != NULL);
2140     drv = reopen_state->bs->drv;
2141     assert(drv != NULL);
2142 
2143     if (drv->bdrv_reopen_abort) {
2144         drv->bdrv_reopen_abort(reopen_state);
2145     }
2146 
2147     QDECREF(reopen_state->explicit_options);
2148 }
2149 
2150 
2151 static void bdrv_close(BlockDriverState *bs)
2152 {
2153     BdrvAioNotifier *ban, *ban_next;
2154 
2155     assert(!bs->job);
2156 
2157     /* Disable I/O limits and drain all pending throttled requests */
2158     if (bs->throttle_state) {
2159         bdrv_io_limits_disable(bs);
2160     }
2161 
2162     bdrv_drained_begin(bs); /* complete I/O */
2163     bdrv_flush(bs);
2164     bdrv_drain(bs); /* in case flush left pending I/O */
2165 
2166     bdrv_release_named_dirty_bitmaps(bs);
2167     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2168 
2169     if (bs->blk) {
2170         blk_dev_change_media_cb(bs->blk, false);
2171     }
2172 
2173     if (bs->drv) {
2174         BdrvChild *child, *next;
2175 
2176         bs->drv->bdrv_close(bs);
2177         bs->drv = NULL;
2178 
2179         bdrv_set_backing_hd(bs, NULL);
2180 
2181         if (bs->file != NULL) {
2182             bdrv_unref_child(bs, bs->file);
2183             bs->file = NULL;
2184         }
2185 
2186         QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
2187             /* TODO Remove bdrv_unref() from drivers' close function and use
2188              * bdrv_unref_child() here */
2189             if (child->bs->inherits_from == bs) {
2190                 child->bs->inherits_from = NULL;
2191             }
2192             bdrv_detach_child(child);
2193         }
2194 
2195         g_free(bs->opaque);
2196         bs->opaque = NULL;
2197         bs->copy_on_read = 0;
2198         bs->backing_file[0] = '\0';
2199         bs->backing_format[0] = '\0';
2200         bs->total_sectors = 0;
2201         bs->encrypted = 0;
2202         bs->valid_key = 0;
2203         bs->sg = 0;
2204         bs->zero_beyond_eof = false;
2205         QDECREF(bs->options);
2206         QDECREF(bs->explicit_options);
2207         bs->options = NULL;
2208         QDECREF(bs->full_open_options);
2209         bs->full_open_options = NULL;
2210     }
2211 
2212     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2213         g_free(ban);
2214     }
2215     QLIST_INIT(&bs->aio_notifiers);
2216     bdrv_drained_end(bs);
2217 }
2218 
2219 void bdrv_close_all(void)
2220 {
2221     BlockDriverState *bs;
2222     AioContext *aio_context;
2223 
2224     /* Drop references from requests still in flight, such as canceled block
2225      * jobs whose AIO context has not been polled yet */
2226     bdrv_drain_all();
2227 
2228     blk_remove_all_bs();
2229     blockdev_close_all_bdrv_states();
2230 
2231     /* Cancel all block jobs */
2232     while (!QTAILQ_EMPTY(&all_bdrv_states)) {
2233         QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
2234             aio_context = bdrv_get_aio_context(bs);
2235 
2236             aio_context_acquire(aio_context);
2237             if (bs->job) {
2238                 block_job_cancel_sync(bs->job);
2239                 aio_context_release(aio_context);
2240                 break;
2241             }
2242             aio_context_release(aio_context);
2243         }
2244 
2245         /* All the remaining BlockDriverStates are referenced directly or
2246          * indirectly from block jobs, so there needs to be at least one BDS
2247          * directly used by a block job */
2248         assert(bs);
2249     }
2250 }
2251 
2252 /* Fields that need to stay with the top-level BDS */
2253 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2254                                      BlockDriverState *bs_src)
2255 {
2256     /* move some fields that need to stay attached to the device */
2257 
2258     /* dev info */
2259     bs_dest->enable_write_cache = bs_src->enable_write_cache;
2260 }
2261 
2262 static void change_parent_backing_link(BlockDriverState *from,
2263                                        BlockDriverState *to)
2264 {
2265     BdrvChild *c, *next;
2266 
2267     if (from->blk) {
2268         /* FIXME We bypass blk_set_bs(), so we need to make these updates
2269          * manually. The root problem is not in this change function, but the
2270          * existence of BlockDriverState.blk. */
2271         to->blk = from->blk;
2272         from->blk = NULL;
2273     }
2274 
2275     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2276         assert(c->role != &child_backing);
2277         c->bs = to;
2278         QLIST_REMOVE(c, next_parent);
2279         QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2280         bdrv_ref(to);
2281         bdrv_unref(from);
2282     }
2283 }
2284 
2285 static void swap_feature_fields(BlockDriverState *bs_top,
2286                                 BlockDriverState *bs_new)
2287 {
2288     BlockDriverState tmp;
2289 
2290     bdrv_move_feature_fields(&tmp, bs_top);
2291     bdrv_move_feature_fields(bs_top, bs_new);
2292     bdrv_move_feature_fields(bs_new, &tmp);
2293 
2294     assert(!bs_new->throttle_state);
2295     if (bs_top->throttle_state) {
2296         assert(bs_top->io_limits_enabled);
2297         bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2298         bdrv_io_limits_disable(bs_top);
2299     }
2300 }
2301 
2302 /*
2303  * Add new bs contents at the top of an image chain while the chain is
2304  * live, while keeping required fields on the top layer.
2305  *
2306  * This will modify the BlockDriverState fields, and swap contents
2307  * between bs_new and bs_top. Both bs_new and bs_top are modified.
2308  *
2309  * bs_new must not be attached to a BlockBackend.
2310  *
2311  * This function does not create any image files.
2312  *
2313  * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2314  * that's what the callers commonly need. bs_new will be referenced by the old
2315  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2316  * reference of its own, it must call bdrv_ref().
2317  */
2318 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2319 {
2320     assert(!bdrv_requests_pending(bs_top));
2321     assert(!bdrv_requests_pending(bs_new));
2322 
2323     bdrv_ref(bs_top);
2324     change_parent_backing_link(bs_top, bs_new);
2325 
2326     /* Some fields always stay on top of the backing file chain */
2327     swap_feature_fields(bs_top, bs_new);
2328 
2329     bdrv_set_backing_hd(bs_new, bs_top);
2330     bdrv_unref(bs_top);
2331 
2332     /* bs_new is now referenced by its new parents, we don't need the
2333      * additional reference any more. */
2334     bdrv_unref(bs_new);
2335 }
2336 
2337 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2338 {
2339     assert(!bdrv_requests_pending(old));
2340     assert(!bdrv_requests_pending(new));
2341 
2342     bdrv_ref(old);
2343 
2344     if (old->blk) {
2345         /* As long as these fields aren't in BlockBackend, but in the top-level
2346          * BlockDriverState, it's not possible for a BDS to have two BBs.
2347          *
2348          * We really want to copy the fields from old to new, but we go for a
2349          * swap instead so that pointers aren't duplicated and cause trouble.
2350          * (Also, bdrv_swap() used to do the same.) */
2351         assert(!new->blk);
2352         swap_feature_fields(old, new);
2353     }
2354     change_parent_backing_link(old, new);
2355 
2356     /* Change backing files if a previously independent node is added to the
2357      * chain. For active commit, we replace top by its own (indirect) backing
2358      * file and don't do anything here so we don't build a loop. */
2359     if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2360         bdrv_set_backing_hd(new, backing_bs(old));
2361         bdrv_set_backing_hd(old, NULL);
2362     }
2363 
2364     bdrv_unref(old);
2365 }
2366 
2367 static void bdrv_delete(BlockDriverState *bs)
2368 {
2369     assert(!bs->job);
2370     assert(bdrv_op_blocker_is_empty(bs));
2371     assert(!bs->refcnt);
2372 
2373     bdrv_close(bs);
2374 
2375     /* remove from list, if necessary */
2376     if (bs->node_name[0] != '\0') {
2377         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2378     }
2379     QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
2380 
2381     g_free(bs);
2382 }
2383 
2384 /*
2385  * Run consistency checks on an image
2386  *
2387  * Returns 0 if the check could be completed (it doesn't mean that the image is
2388  * free of errors) or -errno when an internal error occurred. The results of the
2389  * check are stored in res.
2390  */
2391 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2392 {
2393     if (bs->drv == NULL) {
2394         return -ENOMEDIUM;
2395     }
2396     if (bs->drv->bdrv_check == NULL) {
2397         return -ENOTSUP;
2398     }
2399 
2400     memset(res, 0, sizeof(*res));
2401     return bs->drv->bdrv_check(bs, res, fix);
2402 }
2403 
2404 #define COMMIT_BUF_SECTORS 2048
2405 
2406 /* commit COW file into the raw image */
2407 int bdrv_commit(BlockDriverState *bs)
2408 {
2409     BlockDriver *drv = bs->drv;
2410     int64_t sector, total_sectors, length, backing_length;
2411     int n, ro, open_flags;
2412     int ret = 0;
2413     uint8_t *buf = NULL;
2414 
2415     if (!drv)
2416         return -ENOMEDIUM;
2417 
2418     if (!bs->backing) {
2419         return -ENOTSUP;
2420     }
2421 
2422     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2423         bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2424         return -EBUSY;
2425     }
2426 
2427     ro = bs->backing->bs->read_only;
2428     open_flags =  bs->backing->bs->open_flags;
2429 
2430     if (ro) {
2431         if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2432             return -EACCES;
2433         }
2434     }
2435 
2436     length = bdrv_getlength(bs);
2437     if (length < 0) {
2438         ret = length;
2439         goto ro_cleanup;
2440     }
2441 
2442     backing_length = bdrv_getlength(bs->backing->bs);
2443     if (backing_length < 0) {
2444         ret = backing_length;
2445         goto ro_cleanup;
2446     }
2447 
2448     /* If our top snapshot is larger than the backing file image,
2449      * grow the backing file image if possible.  If not possible,
2450      * we must return an error */
2451     if (length > backing_length) {
2452         ret = bdrv_truncate(bs->backing->bs, length);
2453         if (ret < 0) {
2454             goto ro_cleanup;
2455         }
2456     }
2457 
2458     total_sectors = length >> BDRV_SECTOR_BITS;
2459 
2460     /* qemu_try_blockalign() for bs will choose an alignment that works for
2461      * bs->backing->bs as well, so no need to compare the alignment manually. */
2462     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2463     if (buf == NULL) {
2464         ret = -ENOMEM;
2465         goto ro_cleanup;
2466     }
2467 
2468     for (sector = 0; sector < total_sectors; sector += n) {
2469         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2470         if (ret < 0) {
2471             goto ro_cleanup;
2472         }
2473         if (ret) {
2474             ret = bdrv_read(bs, sector, buf, n);
2475             if (ret < 0) {
2476                 goto ro_cleanup;
2477             }
2478 
2479             ret = bdrv_write(bs->backing->bs, sector, buf, n);
2480             if (ret < 0) {
2481                 goto ro_cleanup;
2482             }
2483         }
2484     }
2485 
2486     if (drv->bdrv_make_empty) {
2487         ret = drv->bdrv_make_empty(bs);
2488         if (ret < 0) {
2489             goto ro_cleanup;
2490         }
2491         bdrv_flush(bs);
2492     }
2493 
2494     /*
2495      * Make sure all data we wrote to the backing device is actually
2496      * stable on disk.
2497      */
2498     if (bs->backing) {
2499         bdrv_flush(bs->backing->bs);
2500     }
2501 
2502     ret = 0;
2503 ro_cleanup:
2504     qemu_vfree(buf);
2505 
2506     if (ro) {
2507         /* ignoring error return here */
2508         bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2509     }
2510 
2511     return ret;
2512 }
2513 
2514 /*
2515  * Return values:
2516  * 0        - success
2517  * -EINVAL  - backing format specified, but no file
2518  * -ENOSPC  - can't update the backing file because no space is left in the
2519  *            image file header
2520  * -ENOTSUP - format driver doesn't support changing the backing file
2521  */
2522 int bdrv_change_backing_file(BlockDriverState *bs,
2523     const char *backing_file, const char *backing_fmt)
2524 {
2525     BlockDriver *drv = bs->drv;
2526     int ret;
2527 
2528     /* Backing file format doesn't make sense without a backing file */
2529     if (backing_fmt && !backing_file) {
2530         return -EINVAL;
2531     }
2532 
2533     if (drv->bdrv_change_backing_file != NULL) {
2534         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2535     } else {
2536         ret = -ENOTSUP;
2537     }
2538 
2539     if (ret == 0) {
2540         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2541         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2542     }
2543     return ret;
2544 }
2545 
2546 /*
2547  * Finds the image layer in the chain that has 'bs' as its backing file.
2548  *
2549  * active is the current topmost image.
2550  *
2551  * Returns NULL if bs is not found in active's image chain,
2552  * or if active == bs.
2553  *
2554  * Returns the bottommost base image if bs == NULL.
2555  */
2556 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2557                                     BlockDriverState *bs)
2558 {
2559     while (active && bs != backing_bs(active)) {
2560         active = backing_bs(active);
2561     }
2562 
2563     return active;
2564 }
2565 
2566 /* Given a BDS, searches for the base layer. */
2567 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2568 {
2569     return bdrv_find_overlay(bs, NULL);
2570 }
2571 
2572 /*
2573  * Drops images above 'base' up to and including 'top', and sets the image
2574  * above 'top' to have base as its backing file.
2575  *
2576  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2577  * information in 'bs' can be properly updated.
2578  *
2579  * E.g., this will convert the following chain:
2580  * bottom <- base <- intermediate <- top <- active
2581  *
2582  * to
2583  *
2584  * bottom <- base <- active
2585  *
2586  * It is allowed for bottom==base, in which case it converts:
2587  *
2588  * base <- intermediate <- top <- active
2589  *
2590  * to
2591  *
2592  * base <- active
2593  *
2594  * If backing_file_str is non-NULL, it will be used when modifying top's
2595  * overlay image metadata.
2596  *
2597  * Error conditions:
2598  *  if active == top, that is considered an error
2599  *
2600  */
2601 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2602                            BlockDriverState *base, const char *backing_file_str)
2603 {
2604     BlockDriverState *new_top_bs = NULL;
2605     int ret = -EIO;
2606 
2607     if (!top->drv || !base->drv) {
2608         goto exit;
2609     }
2610 
2611     new_top_bs = bdrv_find_overlay(active, top);
2612 
2613     if (new_top_bs == NULL) {
2614         /* we could not find the image above 'top', this is an error */
2615         goto exit;
2616     }
2617 
2618     /* special case of new_top_bs->backing->bs already pointing to base - nothing
2619      * to do, no intermediate images */
2620     if (backing_bs(new_top_bs) == base) {
2621         ret = 0;
2622         goto exit;
2623     }
2624 
2625     /* Make sure that base is in the backing chain of top */
2626     if (!bdrv_chain_contains(top, base)) {
2627         goto exit;
2628     }
2629 
2630     /* success - we can delete the intermediate states, and link top->base */
2631     backing_file_str = backing_file_str ? backing_file_str : base->filename;
2632     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2633                                    base->drv ? base->drv->format_name : "");
2634     if (ret) {
2635         goto exit;
2636     }
2637     bdrv_set_backing_hd(new_top_bs, base);
2638 
2639     ret = 0;
2640 exit:
2641     return ret;
2642 }
2643 
2644 /**
2645  * Truncate file to 'offset' bytes (needed only for file protocols)
2646  */
2647 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2648 {
2649     BlockDriver *drv = bs->drv;
2650     int ret;
2651     if (!drv)
2652         return -ENOMEDIUM;
2653     if (!drv->bdrv_truncate)
2654         return -ENOTSUP;
2655     if (bs->read_only)
2656         return -EACCES;
2657 
2658     ret = drv->bdrv_truncate(bs, offset);
2659     if (ret == 0) {
2660         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2661         bdrv_dirty_bitmap_truncate(bs);
2662         if (bs->blk) {
2663             blk_dev_resize_cb(bs->blk);
2664         }
2665     }
2666     return ret;
2667 }
2668 
2669 /**
2670  * Length of a allocated file in bytes. Sparse files are counted by actual
2671  * allocated space. Return < 0 if error or unknown.
2672  */
2673 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2674 {
2675     BlockDriver *drv = bs->drv;
2676     if (!drv) {
2677         return -ENOMEDIUM;
2678     }
2679     if (drv->bdrv_get_allocated_file_size) {
2680         return drv->bdrv_get_allocated_file_size(bs);
2681     }
2682     if (bs->file) {
2683         return bdrv_get_allocated_file_size(bs->file->bs);
2684     }
2685     return -ENOTSUP;
2686 }
2687 
2688 /**
2689  * Return number of sectors on success, -errno on error.
2690  */
2691 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2692 {
2693     BlockDriver *drv = bs->drv;
2694 
2695     if (!drv)
2696         return -ENOMEDIUM;
2697 
2698     if (drv->has_variable_length) {
2699         int ret = refresh_total_sectors(bs, bs->total_sectors);
2700         if (ret < 0) {
2701             return ret;
2702         }
2703     }
2704     return bs->total_sectors;
2705 }
2706 
2707 /**
2708  * Return length in bytes on success, -errno on error.
2709  * The length is always a multiple of BDRV_SECTOR_SIZE.
2710  */
2711 int64_t bdrv_getlength(BlockDriverState *bs)
2712 {
2713     int64_t ret = bdrv_nb_sectors(bs);
2714 
2715     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2716     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2717 }
2718 
2719 /* return 0 as number of sectors if no device present or error */
2720 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2721 {
2722     int64_t nb_sectors = bdrv_nb_sectors(bs);
2723 
2724     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2725 }
2726 
2727 int bdrv_is_read_only(BlockDriverState *bs)
2728 {
2729     return bs->read_only;
2730 }
2731 
2732 int bdrv_is_sg(BlockDriverState *bs)
2733 {
2734     return bs->sg;
2735 }
2736 
2737 int bdrv_enable_write_cache(BlockDriverState *bs)
2738 {
2739     return bs->enable_write_cache;
2740 }
2741 
2742 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2743 {
2744     bs->enable_write_cache = wce;
2745 
2746     /* so a reopen() will preserve wce */
2747     if (wce) {
2748         bs->open_flags |= BDRV_O_CACHE_WB;
2749     } else {
2750         bs->open_flags &= ~BDRV_O_CACHE_WB;
2751     }
2752 }
2753 
2754 int bdrv_is_encrypted(BlockDriverState *bs)
2755 {
2756     if (bs->backing && bs->backing->bs->encrypted) {
2757         return 1;
2758     }
2759     return bs->encrypted;
2760 }
2761 
2762 int bdrv_key_required(BlockDriverState *bs)
2763 {
2764     BdrvChild *backing = bs->backing;
2765 
2766     if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2767         return 1;
2768     }
2769     return (bs->encrypted && !bs->valid_key);
2770 }
2771 
2772 int bdrv_set_key(BlockDriverState *bs, const char *key)
2773 {
2774     int ret;
2775     if (bs->backing && bs->backing->bs->encrypted) {
2776         ret = bdrv_set_key(bs->backing->bs, key);
2777         if (ret < 0)
2778             return ret;
2779         if (!bs->encrypted)
2780             return 0;
2781     }
2782     if (!bs->encrypted) {
2783         return -EINVAL;
2784     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2785         return -ENOMEDIUM;
2786     }
2787     ret = bs->drv->bdrv_set_key(bs, key);
2788     if (ret < 0) {
2789         bs->valid_key = 0;
2790     } else if (!bs->valid_key) {
2791         bs->valid_key = 1;
2792         if (bs->blk) {
2793             /* call the change callback now, we skipped it on open */
2794             blk_dev_change_media_cb(bs->blk, true);
2795         }
2796     }
2797     return ret;
2798 }
2799 
2800 /*
2801  * Provide an encryption key for @bs.
2802  * If @key is non-null:
2803  *     If @bs is not encrypted, fail.
2804  *     Else if the key is invalid, fail.
2805  *     Else set @bs's key to @key, replacing the existing key, if any.
2806  * If @key is null:
2807  *     If @bs is encrypted and still lacks a key, fail.
2808  *     Else do nothing.
2809  * On failure, store an error object through @errp if non-null.
2810  */
2811 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2812 {
2813     if (key) {
2814         if (!bdrv_is_encrypted(bs)) {
2815             error_setg(errp, "Node '%s' is not encrypted",
2816                       bdrv_get_device_or_node_name(bs));
2817         } else if (bdrv_set_key(bs, key) < 0) {
2818             error_setg(errp, QERR_INVALID_PASSWORD);
2819         }
2820     } else {
2821         if (bdrv_key_required(bs)) {
2822             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2823                       "'%s' (%s) is encrypted",
2824                       bdrv_get_device_or_node_name(bs),
2825                       bdrv_get_encrypted_filename(bs));
2826         }
2827     }
2828 }
2829 
2830 const char *bdrv_get_format_name(BlockDriverState *bs)
2831 {
2832     return bs->drv ? bs->drv->format_name : NULL;
2833 }
2834 
2835 static int qsort_strcmp(const void *a, const void *b)
2836 {
2837     return strcmp(a, b);
2838 }
2839 
2840 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2841                          void *opaque)
2842 {
2843     BlockDriver *drv;
2844     int count = 0;
2845     int i;
2846     const char **formats = NULL;
2847 
2848     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2849         if (drv->format_name) {
2850             bool found = false;
2851             int i = count;
2852             while (formats && i && !found) {
2853                 found = !strcmp(formats[--i], drv->format_name);
2854             }
2855 
2856             if (!found) {
2857                 formats = g_renew(const char *, formats, count + 1);
2858                 formats[count++] = drv->format_name;
2859             }
2860         }
2861     }
2862 
2863     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2864 
2865     for (i = 0; i < count; i++) {
2866         it(opaque, formats[i]);
2867     }
2868 
2869     g_free(formats);
2870 }
2871 
2872 /* This function is to find a node in the bs graph */
2873 BlockDriverState *bdrv_find_node(const char *node_name)
2874 {
2875     BlockDriverState *bs;
2876 
2877     assert(node_name);
2878 
2879     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2880         if (!strcmp(node_name, bs->node_name)) {
2881             return bs;
2882         }
2883     }
2884     return NULL;
2885 }
2886 
2887 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2888 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2889 {
2890     BlockDeviceInfoList *list, *entry;
2891     BlockDriverState *bs;
2892 
2893     list = NULL;
2894     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2895         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2896         if (!info) {
2897             qapi_free_BlockDeviceInfoList(list);
2898             return NULL;
2899         }
2900         entry = g_malloc0(sizeof(*entry));
2901         entry->value = info;
2902         entry->next = list;
2903         list = entry;
2904     }
2905 
2906     return list;
2907 }
2908 
2909 BlockDriverState *bdrv_lookup_bs(const char *device,
2910                                  const char *node_name,
2911                                  Error **errp)
2912 {
2913     BlockBackend *blk;
2914     BlockDriverState *bs;
2915 
2916     if (device) {
2917         blk = blk_by_name(device);
2918 
2919         if (blk) {
2920             bs = blk_bs(blk);
2921             if (!bs) {
2922                 error_setg(errp, "Device '%s' has no medium", device);
2923             }
2924 
2925             return bs;
2926         }
2927     }
2928 
2929     if (node_name) {
2930         bs = bdrv_find_node(node_name);
2931 
2932         if (bs) {
2933             return bs;
2934         }
2935     }
2936 
2937     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2938                      device ? device : "",
2939                      node_name ? node_name : "");
2940     return NULL;
2941 }
2942 
2943 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2944  * return false.  If either argument is NULL, return false. */
2945 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2946 {
2947     while (top && top != base) {
2948         top = backing_bs(top);
2949     }
2950 
2951     return top != NULL;
2952 }
2953 
2954 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2955 {
2956     if (!bs) {
2957         return QTAILQ_FIRST(&graph_bdrv_states);
2958     }
2959     return QTAILQ_NEXT(bs, node_list);
2960 }
2961 
2962 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
2963  * the monitor or attached to a BlockBackend */
2964 BlockDriverState *bdrv_next(BlockDriverState *bs)
2965 {
2966     if (!bs || bs->blk) {
2967         bs = blk_next_root_bs(bs);
2968         if (bs) {
2969             return bs;
2970         }
2971     }
2972 
2973     /* Ignore all BDSs that are attached to a BlockBackend here; they have been
2974      * handled by the above block already */
2975     do {
2976         bs = bdrv_next_monitor_owned(bs);
2977     } while (bs && bs->blk);
2978     return bs;
2979 }
2980 
2981 const char *bdrv_get_node_name(const BlockDriverState *bs)
2982 {
2983     return bs->node_name;
2984 }
2985 
2986 /* TODO check what callers really want: bs->node_name or blk_name() */
2987 const char *bdrv_get_device_name(const BlockDriverState *bs)
2988 {
2989     return bs->blk ? blk_name(bs->blk) : "";
2990 }
2991 
2992 /* This can be used to identify nodes that might not have a device
2993  * name associated. Since node and device names live in the same
2994  * namespace, the result is unambiguous. The exception is if both are
2995  * absent, then this returns an empty (non-null) string. */
2996 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2997 {
2998     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2999 }
3000 
3001 int bdrv_get_flags(BlockDriverState *bs)
3002 {
3003     return bs->open_flags;
3004 }
3005 
3006 int bdrv_has_zero_init_1(BlockDriverState *bs)
3007 {
3008     return 1;
3009 }
3010 
3011 int bdrv_has_zero_init(BlockDriverState *bs)
3012 {
3013     assert(bs->drv);
3014 
3015     /* If BS is a copy on write image, it is initialized to
3016        the contents of the base image, which may not be zeroes.  */
3017     if (bs->backing) {
3018         return 0;
3019     }
3020     if (bs->drv->bdrv_has_zero_init) {
3021         return bs->drv->bdrv_has_zero_init(bs);
3022     }
3023 
3024     /* safe default */
3025     return 0;
3026 }
3027 
3028 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3029 {
3030     BlockDriverInfo bdi;
3031 
3032     if (bs->backing) {
3033         return false;
3034     }
3035 
3036     if (bdrv_get_info(bs, &bdi) == 0) {
3037         return bdi.unallocated_blocks_are_zero;
3038     }
3039 
3040     return false;
3041 }
3042 
3043 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3044 {
3045     BlockDriverInfo bdi;
3046 
3047     if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
3048         return false;
3049     }
3050 
3051     if (bdrv_get_info(bs, &bdi) == 0) {
3052         return bdi.can_write_zeroes_with_unmap;
3053     }
3054 
3055     return false;
3056 }
3057 
3058 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3059 {
3060     if (bs->backing && bs->backing->bs->encrypted)
3061         return bs->backing_file;
3062     else if (bs->encrypted)
3063         return bs->filename;
3064     else
3065         return NULL;
3066 }
3067 
3068 void bdrv_get_backing_filename(BlockDriverState *bs,
3069                                char *filename, int filename_size)
3070 {
3071     pstrcpy(filename, filename_size, bs->backing_file);
3072 }
3073 
3074 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3075 {
3076     BlockDriver *drv = bs->drv;
3077     if (!drv)
3078         return -ENOMEDIUM;
3079     if (!drv->bdrv_get_info)
3080         return -ENOTSUP;
3081     memset(bdi, 0, sizeof(*bdi));
3082     return drv->bdrv_get_info(bs, bdi);
3083 }
3084 
3085 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3086 {
3087     BlockDriver *drv = bs->drv;
3088     if (drv && drv->bdrv_get_specific_info) {
3089         return drv->bdrv_get_specific_info(bs);
3090     }
3091     return NULL;
3092 }
3093 
3094 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
3095 {
3096     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3097         return;
3098     }
3099 
3100     bs->drv->bdrv_debug_event(bs, event);
3101 }
3102 
3103 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3104                           const char *tag)
3105 {
3106     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3107         bs = bs->file ? bs->file->bs : NULL;
3108     }
3109 
3110     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3111         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3112     }
3113 
3114     return -ENOTSUP;
3115 }
3116 
3117 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3118 {
3119     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3120         bs = bs->file ? bs->file->bs : NULL;
3121     }
3122 
3123     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3124         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3125     }
3126 
3127     return -ENOTSUP;
3128 }
3129 
3130 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3131 {
3132     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3133         bs = bs->file ? bs->file->bs : NULL;
3134     }
3135 
3136     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3137         return bs->drv->bdrv_debug_resume(bs, tag);
3138     }
3139 
3140     return -ENOTSUP;
3141 }
3142 
3143 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3144 {
3145     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3146         bs = bs->file ? bs->file->bs : NULL;
3147     }
3148 
3149     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3150         return bs->drv->bdrv_debug_is_suspended(bs, tag);
3151     }
3152 
3153     return false;
3154 }
3155 
3156 int bdrv_is_snapshot(BlockDriverState *bs)
3157 {
3158     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3159 }
3160 
3161 /* backing_file can either be relative, or absolute, or a protocol.  If it is
3162  * relative, it must be relative to the chain.  So, passing in bs->filename
3163  * from a BDS as backing_file should not be done, as that may be relative to
3164  * the CWD rather than the chain. */
3165 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3166         const char *backing_file)
3167 {
3168     char *filename_full = NULL;
3169     char *backing_file_full = NULL;
3170     char *filename_tmp = NULL;
3171     int is_protocol = 0;
3172     BlockDriverState *curr_bs = NULL;
3173     BlockDriverState *retval = NULL;
3174 
3175     if (!bs || !bs->drv || !backing_file) {
3176         return NULL;
3177     }
3178 
3179     filename_full     = g_malloc(PATH_MAX);
3180     backing_file_full = g_malloc(PATH_MAX);
3181     filename_tmp      = g_malloc(PATH_MAX);
3182 
3183     is_protocol = path_has_protocol(backing_file);
3184 
3185     for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3186 
3187         /* If either of the filename paths is actually a protocol, then
3188          * compare unmodified paths; otherwise make paths relative */
3189         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3190             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3191                 retval = curr_bs->backing->bs;
3192                 break;
3193             }
3194         } else {
3195             /* If not an absolute filename path, make it relative to the current
3196              * image's filename path */
3197             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3198                          backing_file);
3199 
3200             /* We are going to compare absolute pathnames */
3201             if (!realpath(filename_tmp, filename_full)) {
3202                 continue;
3203             }
3204 
3205             /* We need to make sure the backing filename we are comparing against
3206              * is relative to the current image filename (or absolute) */
3207             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3208                          curr_bs->backing_file);
3209 
3210             if (!realpath(filename_tmp, backing_file_full)) {
3211                 continue;
3212             }
3213 
3214             if (strcmp(backing_file_full, filename_full) == 0) {
3215                 retval = curr_bs->backing->bs;
3216                 break;
3217             }
3218         }
3219     }
3220 
3221     g_free(filename_full);
3222     g_free(backing_file_full);
3223     g_free(filename_tmp);
3224     return retval;
3225 }
3226 
3227 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3228 {
3229     if (!bs->drv) {
3230         return 0;
3231     }
3232 
3233     if (!bs->backing) {
3234         return 0;
3235     }
3236 
3237     return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3238 }
3239 
3240 void bdrv_init(void)
3241 {
3242     module_call_init(MODULE_INIT_BLOCK);
3243 }
3244 
3245 void bdrv_init_with_whitelist(void)
3246 {
3247     use_bdrv_whitelist = 1;
3248     bdrv_init();
3249 }
3250 
3251 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3252 {
3253     Error *local_err = NULL;
3254     int ret;
3255 
3256     if (!bs->drv)  {
3257         return;
3258     }
3259 
3260     if (!(bs->open_flags & BDRV_O_INACTIVE)) {
3261         return;
3262     }
3263     bs->open_flags &= ~BDRV_O_INACTIVE;
3264 
3265     if (bs->drv->bdrv_invalidate_cache) {
3266         bs->drv->bdrv_invalidate_cache(bs, &local_err);
3267     } else if (bs->file) {
3268         bdrv_invalidate_cache(bs->file->bs, &local_err);
3269     }
3270     if (local_err) {
3271         bs->open_flags |= BDRV_O_INACTIVE;
3272         error_propagate(errp, local_err);
3273         return;
3274     }
3275 
3276     ret = refresh_total_sectors(bs, bs->total_sectors);
3277     if (ret < 0) {
3278         bs->open_flags |= BDRV_O_INACTIVE;
3279         error_setg_errno(errp, -ret, "Could not refresh total sector count");
3280         return;
3281     }
3282 }
3283 
3284 void bdrv_invalidate_cache_all(Error **errp)
3285 {
3286     BlockDriverState *bs = NULL;
3287     Error *local_err = NULL;
3288 
3289     while ((bs = bdrv_next(bs)) != NULL) {
3290         AioContext *aio_context = bdrv_get_aio_context(bs);
3291 
3292         aio_context_acquire(aio_context);
3293         bdrv_invalidate_cache(bs, &local_err);
3294         aio_context_release(aio_context);
3295         if (local_err) {
3296             error_propagate(errp, local_err);
3297             return;
3298         }
3299     }
3300 }
3301 
3302 static int bdrv_inactivate(BlockDriverState *bs)
3303 {
3304     int ret;
3305 
3306     if (bs->drv->bdrv_inactivate) {
3307         ret = bs->drv->bdrv_inactivate(bs);
3308         if (ret < 0) {
3309             return ret;
3310         }
3311     }
3312 
3313     bs->open_flags |= BDRV_O_INACTIVE;
3314     return 0;
3315 }
3316 
3317 int bdrv_inactivate_all(void)
3318 {
3319     BlockDriverState *bs = NULL;
3320     int ret;
3321 
3322     while ((bs = bdrv_next(bs)) != NULL) {
3323         AioContext *aio_context = bdrv_get_aio_context(bs);
3324 
3325         aio_context_acquire(aio_context);
3326         ret = bdrv_inactivate(bs);
3327         aio_context_release(aio_context);
3328         if (ret < 0) {
3329             return ret;
3330         }
3331     }
3332 
3333     return 0;
3334 }
3335 
3336 /**************************************************************/
3337 /* removable device support */
3338 
3339 /**
3340  * Return TRUE if the media is present
3341  */
3342 bool bdrv_is_inserted(BlockDriverState *bs)
3343 {
3344     BlockDriver *drv = bs->drv;
3345     BdrvChild *child;
3346 
3347     if (!drv) {
3348         return false;
3349     }
3350     if (drv->bdrv_is_inserted) {
3351         return drv->bdrv_is_inserted(bs);
3352     }
3353     QLIST_FOREACH(child, &bs->children, next) {
3354         if (!bdrv_is_inserted(child->bs)) {
3355             return false;
3356         }
3357     }
3358     return true;
3359 }
3360 
3361 /**
3362  * Return whether the media changed since the last call to this
3363  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3364  */
3365 int bdrv_media_changed(BlockDriverState *bs)
3366 {
3367     BlockDriver *drv = bs->drv;
3368 
3369     if (drv && drv->bdrv_media_changed) {
3370         return drv->bdrv_media_changed(bs);
3371     }
3372     return -ENOTSUP;
3373 }
3374 
3375 /**
3376  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3377  */
3378 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3379 {
3380     BlockDriver *drv = bs->drv;
3381     const char *device_name;
3382 
3383     if (drv && drv->bdrv_eject) {
3384         drv->bdrv_eject(bs, eject_flag);
3385     }
3386 
3387     device_name = bdrv_get_device_name(bs);
3388     if (device_name[0] != '\0') {
3389         qapi_event_send_device_tray_moved(device_name,
3390                                           eject_flag, &error_abort);
3391     }
3392 }
3393 
3394 /**
3395  * Lock or unlock the media (if it is locked, the user won't be able
3396  * to eject it manually).
3397  */
3398 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3399 {
3400     BlockDriver *drv = bs->drv;
3401 
3402     trace_bdrv_lock_medium(bs, locked);
3403 
3404     if (drv && drv->bdrv_lock_medium) {
3405         drv->bdrv_lock_medium(bs, locked);
3406     }
3407 }
3408 
3409 /* Get a reference to bs */
3410 void bdrv_ref(BlockDriverState *bs)
3411 {
3412     bs->refcnt++;
3413 }
3414 
3415 /* Release a previously grabbed reference to bs.
3416  * If after releasing, reference count is zero, the BlockDriverState is
3417  * deleted. */
3418 void bdrv_unref(BlockDriverState *bs)
3419 {
3420     if (!bs) {
3421         return;
3422     }
3423     assert(bs->refcnt > 0);
3424     if (--bs->refcnt == 0) {
3425         bdrv_delete(bs);
3426     }
3427 }
3428 
3429 struct BdrvOpBlocker {
3430     Error *reason;
3431     QLIST_ENTRY(BdrvOpBlocker) list;
3432 };
3433 
3434 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3435 {
3436     BdrvOpBlocker *blocker;
3437     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3438     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3439         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3440         if (errp) {
3441             *errp = error_copy(blocker->reason);
3442             error_prepend(errp, "Node '%s' is busy: ",
3443                           bdrv_get_device_or_node_name(bs));
3444         }
3445         return true;
3446     }
3447     return false;
3448 }
3449 
3450 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3451 {
3452     BdrvOpBlocker *blocker;
3453     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3454 
3455     blocker = g_new0(BdrvOpBlocker, 1);
3456     blocker->reason = reason;
3457     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3458 }
3459 
3460 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3461 {
3462     BdrvOpBlocker *blocker, *next;
3463     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3464     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3465         if (blocker->reason == reason) {
3466             QLIST_REMOVE(blocker, list);
3467             g_free(blocker);
3468         }
3469     }
3470 }
3471 
3472 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3473 {
3474     int i;
3475     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3476         bdrv_op_block(bs, i, reason);
3477     }
3478 }
3479 
3480 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3481 {
3482     int i;
3483     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3484         bdrv_op_unblock(bs, i, reason);
3485     }
3486 }
3487 
3488 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3489 {
3490     int i;
3491 
3492     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3493         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3494             return false;
3495         }
3496     }
3497     return true;
3498 }
3499 
3500 void bdrv_img_create(const char *filename, const char *fmt,
3501                      const char *base_filename, const char *base_fmt,
3502                      char *options, uint64_t img_size, int flags,
3503                      Error **errp, bool quiet)
3504 {
3505     QemuOptsList *create_opts = NULL;
3506     QemuOpts *opts = NULL;
3507     const char *backing_fmt, *backing_file;
3508     int64_t size;
3509     BlockDriver *drv, *proto_drv;
3510     Error *local_err = NULL;
3511     int ret = 0;
3512 
3513     /* Find driver and parse its options */
3514     drv = bdrv_find_format(fmt);
3515     if (!drv) {
3516         error_setg(errp, "Unknown file format '%s'", fmt);
3517         return;
3518     }
3519 
3520     proto_drv = bdrv_find_protocol(filename, true, errp);
3521     if (!proto_drv) {
3522         return;
3523     }
3524 
3525     if (!drv->create_opts) {
3526         error_setg(errp, "Format driver '%s' does not support image creation",
3527                    drv->format_name);
3528         return;
3529     }
3530 
3531     if (!proto_drv->create_opts) {
3532         error_setg(errp, "Protocol driver '%s' does not support image creation",
3533                    proto_drv->format_name);
3534         return;
3535     }
3536 
3537     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3538     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3539 
3540     /* Create parameter list with default values */
3541     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3542     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3543 
3544     /* Parse -o options */
3545     if (options) {
3546         qemu_opts_do_parse(opts, options, NULL, &local_err);
3547         if (local_err) {
3548             error_report_err(local_err);
3549             local_err = NULL;
3550             error_setg(errp, "Invalid options for file format '%s'", fmt);
3551             goto out;
3552         }
3553     }
3554 
3555     if (base_filename) {
3556         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3557         if (local_err) {
3558             error_setg(errp, "Backing file not supported for file format '%s'",
3559                        fmt);
3560             goto out;
3561         }
3562     }
3563 
3564     if (base_fmt) {
3565         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3566         if (local_err) {
3567             error_setg(errp, "Backing file format not supported for file "
3568                              "format '%s'", fmt);
3569             goto out;
3570         }
3571     }
3572 
3573     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3574     if (backing_file) {
3575         if (!strcmp(filename, backing_file)) {
3576             error_setg(errp, "Error: Trying to create an image with the "
3577                              "same filename as the backing file");
3578             goto out;
3579         }
3580     }
3581 
3582     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3583 
3584     // The size for the image must always be specified, with one exception:
3585     // If we are using a backing file, we can obtain the size from there
3586     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3587     if (size == -1) {
3588         if (backing_file) {
3589             BlockDriverState *bs;
3590             char *full_backing = g_new0(char, PATH_MAX);
3591             int64_t size;
3592             int back_flags;
3593             QDict *backing_options = NULL;
3594 
3595             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3596                                                          full_backing, PATH_MAX,
3597                                                          &local_err);
3598             if (local_err) {
3599                 g_free(full_backing);
3600                 goto out;
3601             }
3602 
3603             /* backing files always opened read-only */
3604             back_flags =
3605                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3606 
3607             if (backing_fmt) {
3608                 backing_options = qdict_new();
3609                 qdict_put(backing_options, "driver",
3610                           qstring_from_str(backing_fmt));
3611             }
3612 
3613             bs = NULL;
3614             ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3615                             back_flags, &local_err);
3616             g_free(full_backing);
3617             if (ret < 0) {
3618                 goto out;
3619             }
3620             size = bdrv_getlength(bs);
3621             if (size < 0) {
3622                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3623                                  backing_file);
3624                 bdrv_unref(bs);
3625                 goto out;
3626             }
3627 
3628             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3629 
3630             bdrv_unref(bs);
3631         } else {
3632             error_setg(errp, "Image creation needs a size parameter");
3633             goto out;
3634         }
3635     }
3636 
3637     if (!quiet) {
3638         printf("Formatting '%s', fmt=%s ", filename, fmt);
3639         qemu_opts_print(opts, " ");
3640         puts("");
3641     }
3642 
3643     ret = bdrv_create(drv, filename, opts, &local_err);
3644 
3645     if (ret == -EFBIG) {
3646         /* This is generally a better message than whatever the driver would
3647          * deliver (especially because of the cluster_size_hint), since that
3648          * is most probably not much different from "image too large". */
3649         const char *cluster_size_hint = "";
3650         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3651             cluster_size_hint = " (try using a larger cluster size)";
3652         }
3653         error_setg(errp, "The image size is too large for file format '%s'"
3654                    "%s", fmt, cluster_size_hint);
3655         error_free(local_err);
3656         local_err = NULL;
3657     }
3658 
3659 out:
3660     qemu_opts_del(opts);
3661     qemu_opts_free(create_opts);
3662     if (local_err) {
3663         error_propagate(errp, local_err);
3664     }
3665 }
3666 
3667 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3668 {
3669     return bs->aio_context;
3670 }
3671 
3672 void bdrv_detach_aio_context(BlockDriverState *bs)
3673 {
3674     BdrvAioNotifier *baf;
3675 
3676     if (!bs->drv) {
3677         return;
3678     }
3679 
3680     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3681         baf->detach_aio_context(baf->opaque);
3682     }
3683 
3684     if (bs->throttle_state) {
3685         throttle_timers_detach_aio_context(&bs->throttle_timers);
3686     }
3687     if (bs->drv->bdrv_detach_aio_context) {
3688         bs->drv->bdrv_detach_aio_context(bs);
3689     }
3690     if (bs->file) {
3691         bdrv_detach_aio_context(bs->file->bs);
3692     }
3693     if (bs->backing) {
3694         bdrv_detach_aio_context(bs->backing->bs);
3695     }
3696 
3697     bs->aio_context = NULL;
3698 }
3699 
3700 void bdrv_attach_aio_context(BlockDriverState *bs,
3701                              AioContext *new_context)
3702 {
3703     BdrvAioNotifier *ban;
3704 
3705     if (!bs->drv) {
3706         return;
3707     }
3708 
3709     bs->aio_context = new_context;
3710 
3711     if (bs->backing) {
3712         bdrv_attach_aio_context(bs->backing->bs, new_context);
3713     }
3714     if (bs->file) {
3715         bdrv_attach_aio_context(bs->file->bs, new_context);
3716     }
3717     if (bs->drv->bdrv_attach_aio_context) {
3718         bs->drv->bdrv_attach_aio_context(bs, new_context);
3719     }
3720     if (bs->throttle_state) {
3721         throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3722     }
3723 
3724     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3725         ban->attached_aio_context(new_context, ban->opaque);
3726     }
3727 }
3728 
3729 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3730 {
3731     bdrv_drain(bs); /* ensure there are no in-flight requests */
3732 
3733     bdrv_detach_aio_context(bs);
3734 
3735     /* This function executes in the old AioContext so acquire the new one in
3736      * case it runs in a different thread.
3737      */
3738     aio_context_acquire(new_context);
3739     bdrv_attach_aio_context(bs, new_context);
3740     aio_context_release(new_context);
3741 }
3742 
3743 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3744         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3745         void (*detach_aio_context)(void *opaque), void *opaque)
3746 {
3747     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3748     *ban = (BdrvAioNotifier){
3749         .attached_aio_context = attached_aio_context,
3750         .detach_aio_context   = detach_aio_context,
3751         .opaque               = opaque
3752     };
3753 
3754     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3755 }
3756 
3757 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3758                                       void (*attached_aio_context)(AioContext *,
3759                                                                    void *),
3760                                       void (*detach_aio_context)(void *),
3761                                       void *opaque)
3762 {
3763     BdrvAioNotifier *ban, *ban_next;
3764 
3765     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3766         if (ban->attached_aio_context == attached_aio_context &&
3767             ban->detach_aio_context   == detach_aio_context   &&
3768             ban->opaque               == opaque)
3769         {
3770             QLIST_REMOVE(ban, list);
3771             g_free(ban);
3772 
3773             return;
3774         }
3775     }
3776 
3777     abort();
3778 }
3779 
3780 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3781                        BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
3782 {
3783     if (!bs->drv->bdrv_amend_options) {
3784         return -ENOTSUP;
3785     }
3786     return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
3787 }
3788 
3789 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3790  * of block filter and by bdrv_is_first_non_filter.
3791  * It is used to test if the given bs is the candidate or recurse more in the
3792  * node graph.
3793  */
3794 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3795                                       BlockDriverState *candidate)
3796 {
3797     /* return false if basic checks fails */
3798     if (!bs || !bs->drv) {
3799         return false;
3800     }
3801 
3802     /* the code reached a non block filter driver -> check if the bs is
3803      * the same as the candidate. It's the recursion termination condition.
3804      */
3805     if (!bs->drv->is_filter) {
3806         return bs == candidate;
3807     }
3808     /* Down this path the driver is a block filter driver */
3809 
3810     /* If the block filter recursion method is defined use it to recurse down
3811      * the node graph.
3812      */
3813     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3814         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3815     }
3816 
3817     /* the driver is a block filter but don't allow to recurse -> return false
3818      */
3819     return false;
3820 }
3821 
3822 /* This function checks if the candidate is the first non filter bs down it's
3823  * bs chain. Since we don't have pointers to parents it explore all bs chains
3824  * from the top. Some filters can choose not to pass down the recursion.
3825  */
3826 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3827 {
3828     BlockDriverState *bs = NULL;
3829 
3830     /* walk down the bs forest recursively */
3831     while ((bs = bdrv_next(bs)) != NULL) {
3832         bool perm;
3833 
3834         /* try to recurse in this top level bs */
3835         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3836 
3837         /* candidate is the first non filter */
3838         if (perm) {
3839             return true;
3840         }
3841     }
3842 
3843     return false;
3844 }
3845 
3846 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3847                                         const char *node_name, Error **errp)
3848 {
3849     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3850     AioContext *aio_context;
3851 
3852     if (!to_replace_bs) {
3853         error_setg(errp, "Node name '%s' not found", node_name);
3854         return NULL;
3855     }
3856 
3857     aio_context = bdrv_get_aio_context(to_replace_bs);
3858     aio_context_acquire(aio_context);
3859 
3860     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3861         to_replace_bs = NULL;
3862         goto out;
3863     }
3864 
3865     /* We don't want arbitrary node of the BDS chain to be replaced only the top
3866      * most non filter in order to prevent data corruption.
3867      * Another benefit is that this tests exclude backing files which are
3868      * blocked by the backing blockers.
3869      */
3870     if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
3871         error_setg(errp, "Only top most non filter can be replaced");
3872         to_replace_bs = NULL;
3873         goto out;
3874     }
3875 
3876 out:
3877     aio_context_release(aio_context);
3878     return to_replace_bs;
3879 }
3880 
3881 static bool append_open_options(QDict *d, BlockDriverState *bs)
3882 {
3883     const QDictEntry *entry;
3884     QemuOptDesc *desc;
3885     BdrvChild *child;
3886     bool found_any = false;
3887     const char *p;
3888 
3889     for (entry = qdict_first(bs->options); entry;
3890          entry = qdict_next(bs->options, entry))
3891     {
3892         /* Exclude options for children */
3893         QLIST_FOREACH(child, &bs->children, next) {
3894             if (strstart(qdict_entry_key(entry), child->name, &p)
3895                 && (!*p || *p == '.'))
3896             {
3897                 break;
3898             }
3899         }
3900         if (child) {
3901             continue;
3902         }
3903 
3904         /* And exclude all non-driver-specific options */
3905         for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
3906             if (!strcmp(qdict_entry_key(entry), desc->name)) {
3907                 break;
3908             }
3909         }
3910         if (desc->name) {
3911             continue;
3912         }
3913 
3914         qobject_incref(qdict_entry_value(entry));
3915         qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3916         found_any = true;
3917     }
3918 
3919     return found_any;
3920 }
3921 
3922 /* Updates the following BDS fields:
3923  *  - exact_filename: A filename which may be used for opening a block device
3924  *                    which (mostly) equals the given BDS (even without any
3925  *                    other options; so reading and writing must return the same
3926  *                    results, but caching etc. may be different)
3927  *  - full_open_options: Options which, when given when opening a block device
3928  *                       (without a filename), result in a BDS (mostly)
3929  *                       equalling the given one
3930  *  - filename: If exact_filename is set, it is copied here. Otherwise,
3931  *              full_open_options is converted to a JSON object, prefixed with
3932  *              "json:" (for use through the JSON pseudo protocol) and put here.
3933  */
3934 void bdrv_refresh_filename(BlockDriverState *bs)
3935 {
3936     BlockDriver *drv = bs->drv;
3937     QDict *opts;
3938 
3939     if (!drv) {
3940         return;
3941     }
3942 
3943     /* This BDS's file name will most probably depend on its file's name, so
3944      * refresh that first */
3945     if (bs->file) {
3946         bdrv_refresh_filename(bs->file->bs);
3947     }
3948 
3949     if (drv->bdrv_refresh_filename) {
3950         /* Obsolete information is of no use here, so drop the old file name
3951          * information before refreshing it */
3952         bs->exact_filename[0] = '\0';
3953         if (bs->full_open_options) {
3954             QDECREF(bs->full_open_options);
3955             bs->full_open_options = NULL;
3956         }
3957 
3958         opts = qdict_new();
3959         append_open_options(opts, bs);
3960         drv->bdrv_refresh_filename(bs, opts);
3961         QDECREF(opts);
3962     } else if (bs->file) {
3963         /* Try to reconstruct valid information from the underlying file */
3964         bool has_open_options;
3965 
3966         bs->exact_filename[0] = '\0';
3967         if (bs->full_open_options) {
3968             QDECREF(bs->full_open_options);
3969             bs->full_open_options = NULL;
3970         }
3971 
3972         opts = qdict_new();
3973         has_open_options = append_open_options(opts, bs);
3974 
3975         /* If no specific options have been given for this BDS, the filename of
3976          * the underlying file should suffice for this one as well */
3977         if (bs->file->bs->exact_filename[0] && !has_open_options) {
3978             strcpy(bs->exact_filename, bs->file->bs->exact_filename);
3979         }
3980         /* Reconstructing the full options QDict is simple for most format block
3981          * drivers, as long as the full options are known for the underlying
3982          * file BDS. The full options QDict of that file BDS should somehow
3983          * contain a representation of the filename, therefore the following
3984          * suffices without querying the (exact_)filename of this BDS. */
3985         if (bs->file->bs->full_open_options) {
3986             qdict_put_obj(opts, "driver",
3987                           QOBJECT(qstring_from_str(drv->format_name)));
3988             QINCREF(bs->file->bs->full_open_options);
3989             qdict_put_obj(opts, "file",
3990                           QOBJECT(bs->file->bs->full_open_options));
3991 
3992             bs->full_open_options = opts;
3993         } else {
3994             QDECREF(opts);
3995         }
3996     } else if (!bs->full_open_options && qdict_size(bs->options)) {
3997         /* There is no underlying file BDS (at least referenced by BDS.file),
3998          * so the full options QDict should be equal to the options given
3999          * specifically for this block device when it was opened (plus the
4000          * driver specification).
4001          * Because those options don't change, there is no need to update
4002          * full_open_options when it's already set. */
4003 
4004         opts = qdict_new();
4005         append_open_options(opts, bs);
4006         qdict_put_obj(opts, "driver",
4007                       QOBJECT(qstring_from_str(drv->format_name)));
4008 
4009         if (bs->exact_filename[0]) {
4010             /* This may not work for all block protocol drivers (some may
4011              * require this filename to be parsed), but we have to find some
4012              * default solution here, so just include it. If some block driver
4013              * does not support pure options without any filename at all or
4014              * needs some special format of the options QDict, it needs to
4015              * implement the driver-specific bdrv_refresh_filename() function.
4016              */
4017             qdict_put_obj(opts, "filename",
4018                           QOBJECT(qstring_from_str(bs->exact_filename)));
4019         }
4020 
4021         bs->full_open_options = opts;
4022     }
4023 
4024     if (bs->exact_filename[0]) {
4025         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4026     } else if (bs->full_open_options) {
4027         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4028         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4029                  qstring_get_str(json));
4030         QDECREF(json);
4031     }
4032 }
4033