xref: /openbmc/qemu/block.c (revision a5614993d79584af93bb845f69f59872b3f76cf8)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "trace.h"
26 #include "block/block_int.h"
27 #include "block/blockjob.h"
28 #include "qemu/error-report.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qapi/qmp/qbool.h"
32 #include "qapi/qmp/qjson.h"
33 #include "sysemu/block-backend.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/notify.h"
36 #include "qemu/coroutine.h"
37 #include "block/qapi.h"
38 #include "qmp-commands.h"
39 #include "qemu/timer.h"
40 #include "qapi-event.h"
41 #include "block/throttle-groups.h"
42 #include "qemu/cutils.h"
43 #include "qemu/id.h"
44 
45 #ifdef CONFIG_BSD
46 #include <sys/ioctl.h>
47 #include <sys/queue.h>
48 #ifndef __DragonFly__
49 #include <sys/disk.h>
50 #endif
51 #endif
52 
53 #ifdef _WIN32
54 #include <windows.h>
55 #endif
56 
57 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
58 
59 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
60     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
61 
62 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
63     QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
64 
65 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
66     QLIST_HEAD_INITIALIZER(bdrv_drivers);
67 
68 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
69                              const char *reference, QDict *options, int flags,
70                              BlockDriverState *parent,
71                              const BdrvChildRole *child_role, Error **errp);
72 
73 /* If non-zero, use only whitelisted block drivers */
74 static int use_bdrv_whitelist;
75 
76 static void bdrv_close(BlockDriverState *bs);
77 
78 #ifdef _WIN32
79 static int is_windows_drive_prefix(const char *filename)
80 {
81     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
82              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
83             filename[1] == ':');
84 }
85 
86 int is_windows_drive(const char *filename)
87 {
88     if (is_windows_drive_prefix(filename) &&
89         filename[2] == '\0')
90         return 1;
91     if (strstart(filename, "\\\\.\\", NULL) ||
92         strstart(filename, "//./", NULL))
93         return 1;
94     return 0;
95 }
96 #endif
97 
98 size_t bdrv_opt_mem_align(BlockDriverState *bs)
99 {
100     if (!bs || !bs->drv) {
101         /* page size or 4k (hdd sector size) should be on the safe side */
102         return MAX(4096, getpagesize());
103     }
104 
105     return bs->bl.opt_mem_alignment;
106 }
107 
108 size_t bdrv_min_mem_align(BlockDriverState *bs)
109 {
110     if (!bs || !bs->drv) {
111         /* page size or 4k (hdd sector size) should be on the safe side */
112         return MAX(4096, getpagesize());
113     }
114 
115     return bs->bl.min_mem_alignment;
116 }
117 
118 /* check if the path starts with "<protocol>:" */
119 int path_has_protocol(const char *path)
120 {
121     const char *p;
122 
123 #ifdef _WIN32
124     if (is_windows_drive(path) ||
125         is_windows_drive_prefix(path)) {
126         return 0;
127     }
128     p = path + strcspn(path, ":/\\");
129 #else
130     p = path + strcspn(path, ":/");
131 #endif
132 
133     return *p == ':';
134 }
135 
136 int path_is_absolute(const char *path)
137 {
138 #ifdef _WIN32
139     /* specific case for names like: "\\.\d:" */
140     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
141         return 1;
142     }
143     return (*path == '/' || *path == '\\');
144 #else
145     return (*path == '/');
146 #endif
147 }
148 
149 /* if filename is absolute, just copy it to dest. Otherwise, build a
150    path to it by considering it is relative to base_path. URL are
151    supported. */
152 void path_combine(char *dest, int dest_size,
153                   const char *base_path,
154                   const char *filename)
155 {
156     const char *p, *p1;
157     int len;
158 
159     if (dest_size <= 0)
160         return;
161     if (path_is_absolute(filename)) {
162         pstrcpy(dest, dest_size, filename);
163     } else {
164         p = strchr(base_path, ':');
165         if (p)
166             p++;
167         else
168             p = base_path;
169         p1 = strrchr(base_path, '/');
170 #ifdef _WIN32
171         {
172             const char *p2;
173             p2 = strrchr(base_path, '\\');
174             if (!p1 || p2 > p1)
175                 p1 = p2;
176         }
177 #endif
178         if (p1)
179             p1++;
180         else
181             p1 = base_path;
182         if (p1 > p)
183             p = p1;
184         len = p - base_path;
185         if (len > dest_size - 1)
186             len = dest_size - 1;
187         memcpy(dest, base_path, len);
188         dest[len] = '\0';
189         pstrcat(dest, dest_size, filename);
190     }
191 }
192 
193 void bdrv_get_full_backing_filename_from_filename(const char *backed,
194                                                   const char *backing,
195                                                   char *dest, size_t sz,
196                                                   Error **errp)
197 {
198     if (backing[0] == '\0' || path_has_protocol(backing) ||
199         path_is_absolute(backing))
200     {
201         pstrcpy(dest, sz, backing);
202     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
203         error_setg(errp, "Cannot use relative backing file names for '%s'",
204                    backed);
205     } else {
206         path_combine(dest, sz, backed, backing);
207     }
208 }
209 
210 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
211                                     Error **errp)
212 {
213     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
214 
215     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
216                                                  dest, sz, errp);
217 }
218 
219 void bdrv_register(BlockDriver *bdrv)
220 {
221     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
222 }
223 
224 BlockDriverState *bdrv_new_root(void)
225 {
226     return bdrv_new();
227 }
228 
229 BlockDriverState *bdrv_new(void)
230 {
231     BlockDriverState *bs;
232     int i;
233 
234     bs = g_new0(BlockDriverState, 1);
235     QLIST_INIT(&bs->dirty_bitmaps);
236     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
237         QLIST_INIT(&bs->op_blockers[i]);
238     }
239     notifier_with_return_list_init(&bs->before_write_notifiers);
240     qemu_co_queue_init(&bs->throttled_reqs[0]);
241     qemu_co_queue_init(&bs->throttled_reqs[1]);
242     bs->refcnt = 1;
243     bs->aio_context = qemu_get_aio_context();
244 
245     QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
246 
247     return bs;
248 }
249 
250 BlockDriver *bdrv_find_format(const char *format_name)
251 {
252     BlockDriver *drv1;
253     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
254         if (!strcmp(drv1->format_name, format_name)) {
255             return drv1;
256         }
257     }
258     return NULL;
259 }
260 
261 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
262 {
263     static const char *whitelist_rw[] = {
264         CONFIG_BDRV_RW_WHITELIST
265     };
266     static const char *whitelist_ro[] = {
267         CONFIG_BDRV_RO_WHITELIST
268     };
269     const char **p;
270 
271     if (!whitelist_rw[0] && !whitelist_ro[0]) {
272         return 1;               /* no whitelist, anything goes */
273     }
274 
275     for (p = whitelist_rw; *p; p++) {
276         if (!strcmp(drv->format_name, *p)) {
277             return 1;
278         }
279     }
280     if (read_only) {
281         for (p = whitelist_ro; *p; p++) {
282             if (!strcmp(drv->format_name, *p)) {
283                 return 1;
284             }
285         }
286     }
287     return 0;
288 }
289 
290 bool bdrv_uses_whitelist(void)
291 {
292     return use_bdrv_whitelist;
293 }
294 
295 typedef struct CreateCo {
296     BlockDriver *drv;
297     char *filename;
298     QemuOpts *opts;
299     int ret;
300     Error *err;
301 } CreateCo;
302 
303 static void coroutine_fn bdrv_create_co_entry(void *opaque)
304 {
305     Error *local_err = NULL;
306     int ret;
307 
308     CreateCo *cco = opaque;
309     assert(cco->drv);
310 
311     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
312     if (local_err) {
313         error_propagate(&cco->err, local_err);
314     }
315     cco->ret = ret;
316 }
317 
318 int bdrv_create(BlockDriver *drv, const char* filename,
319                 QemuOpts *opts, Error **errp)
320 {
321     int ret;
322 
323     Coroutine *co;
324     CreateCo cco = {
325         .drv = drv,
326         .filename = g_strdup(filename),
327         .opts = opts,
328         .ret = NOT_DONE,
329         .err = NULL,
330     };
331 
332     if (!drv->bdrv_create) {
333         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
334         ret = -ENOTSUP;
335         goto out;
336     }
337 
338     if (qemu_in_coroutine()) {
339         /* Fast-path if already in coroutine context */
340         bdrv_create_co_entry(&cco);
341     } else {
342         co = qemu_coroutine_create(bdrv_create_co_entry);
343         qemu_coroutine_enter(co, &cco);
344         while (cco.ret == NOT_DONE) {
345             aio_poll(qemu_get_aio_context(), true);
346         }
347     }
348 
349     ret = cco.ret;
350     if (ret < 0) {
351         if (cco.err) {
352             error_propagate(errp, cco.err);
353         } else {
354             error_setg_errno(errp, -ret, "Could not create image");
355         }
356     }
357 
358 out:
359     g_free(cco.filename);
360     return ret;
361 }
362 
363 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
364 {
365     BlockDriver *drv;
366     Error *local_err = NULL;
367     int ret;
368 
369     drv = bdrv_find_protocol(filename, true, errp);
370     if (drv == NULL) {
371         return -ENOENT;
372     }
373 
374     ret = bdrv_create(drv, filename, opts, &local_err);
375     if (local_err) {
376         error_propagate(errp, local_err);
377     }
378     return ret;
379 }
380 
381 /**
382  * Try to get @bs's logical and physical block size.
383  * On success, store them in @bsz struct and return 0.
384  * On failure return -errno.
385  * @bs must not be empty.
386  */
387 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
388 {
389     BlockDriver *drv = bs->drv;
390 
391     if (drv && drv->bdrv_probe_blocksizes) {
392         return drv->bdrv_probe_blocksizes(bs, bsz);
393     }
394 
395     return -ENOTSUP;
396 }
397 
398 /**
399  * Try to get @bs's geometry (cyls, heads, sectors).
400  * On success, store them in @geo struct and return 0.
401  * On failure return -errno.
402  * @bs must not be empty.
403  */
404 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
405 {
406     BlockDriver *drv = bs->drv;
407 
408     if (drv && drv->bdrv_probe_geometry) {
409         return drv->bdrv_probe_geometry(bs, geo);
410     }
411 
412     return -ENOTSUP;
413 }
414 
415 /*
416  * Create a uniquely-named empty temporary file.
417  * Return 0 upon success, otherwise a negative errno value.
418  */
419 int get_tmp_filename(char *filename, int size)
420 {
421 #ifdef _WIN32
422     char temp_dir[MAX_PATH];
423     /* GetTempFileName requires that its output buffer (4th param)
424        have length MAX_PATH or greater.  */
425     assert(size >= MAX_PATH);
426     return (GetTempPath(MAX_PATH, temp_dir)
427             && GetTempFileName(temp_dir, "qem", 0, filename)
428             ? 0 : -GetLastError());
429 #else
430     int fd;
431     const char *tmpdir;
432     tmpdir = getenv("TMPDIR");
433     if (!tmpdir) {
434         tmpdir = "/var/tmp";
435     }
436     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
437         return -EOVERFLOW;
438     }
439     fd = mkstemp(filename);
440     if (fd < 0) {
441         return -errno;
442     }
443     if (close(fd) != 0) {
444         unlink(filename);
445         return -errno;
446     }
447     return 0;
448 #endif
449 }
450 
451 /*
452  * Detect host devices. By convention, /dev/cdrom[N] is always
453  * recognized as a host CDROM.
454  */
455 static BlockDriver *find_hdev_driver(const char *filename)
456 {
457     int score_max = 0, score;
458     BlockDriver *drv = NULL, *d;
459 
460     QLIST_FOREACH(d, &bdrv_drivers, list) {
461         if (d->bdrv_probe_device) {
462             score = d->bdrv_probe_device(filename);
463             if (score > score_max) {
464                 score_max = score;
465                 drv = d;
466             }
467         }
468     }
469 
470     return drv;
471 }
472 
473 BlockDriver *bdrv_find_protocol(const char *filename,
474                                 bool allow_protocol_prefix,
475                                 Error **errp)
476 {
477     BlockDriver *drv1;
478     char protocol[128];
479     int len;
480     const char *p;
481 
482     /* TODO Drivers without bdrv_file_open must be specified explicitly */
483 
484     /*
485      * XXX(hch): we really should not let host device detection
486      * override an explicit protocol specification, but moving this
487      * later breaks access to device names with colons in them.
488      * Thanks to the brain-dead persistent naming schemes on udev-
489      * based Linux systems those actually are quite common.
490      */
491     drv1 = find_hdev_driver(filename);
492     if (drv1) {
493         return drv1;
494     }
495 
496     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
497         return &bdrv_file;
498     }
499 
500     p = strchr(filename, ':');
501     assert(p != NULL);
502     len = p - filename;
503     if (len > sizeof(protocol) - 1)
504         len = sizeof(protocol) - 1;
505     memcpy(protocol, filename, len);
506     protocol[len] = '\0';
507     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
508         if (drv1->protocol_name &&
509             !strcmp(drv1->protocol_name, protocol)) {
510             return drv1;
511         }
512     }
513 
514     error_setg(errp, "Unknown protocol '%s'", protocol);
515     return NULL;
516 }
517 
518 /*
519  * Guess image format by probing its contents.
520  * This is not a good idea when your image is raw (CVE-2008-2004), but
521  * we do it anyway for backward compatibility.
522  *
523  * @buf         contains the image's first @buf_size bytes.
524  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
525  *              but can be smaller if the image file is smaller)
526  * @filename    is its filename.
527  *
528  * For all block drivers, call the bdrv_probe() method to get its
529  * probing score.
530  * Return the first block driver with the highest probing score.
531  */
532 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
533                             const char *filename)
534 {
535     int score_max = 0, score;
536     BlockDriver *drv = NULL, *d;
537 
538     QLIST_FOREACH(d, &bdrv_drivers, list) {
539         if (d->bdrv_probe) {
540             score = d->bdrv_probe(buf, buf_size, filename);
541             if (score > score_max) {
542                 score_max = score;
543                 drv = d;
544             }
545         }
546     }
547 
548     return drv;
549 }
550 
551 static int find_image_format(BlockDriverState *bs, const char *filename,
552                              BlockDriver **pdrv, Error **errp)
553 {
554     BlockDriver *drv;
555     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
556     int ret = 0;
557 
558     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
559     if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
560         *pdrv = &bdrv_raw;
561         return ret;
562     }
563 
564     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
565     if (ret < 0) {
566         error_setg_errno(errp, -ret, "Could not read image for determining its "
567                          "format");
568         *pdrv = NULL;
569         return ret;
570     }
571 
572     drv = bdrv_probe_all(buf, ret, filename);
573     if (!drv) {
574         error_setg(errp, "Could not determine image format: No compatible "
575                    "driver found");
576         ret = -ENOENT;
577     }
578     *pdrv = drv;
579     return ret;
580 }
581 
582 /**
583  * Set the current 'total_sectors' value
584  * Return 0 on success, -errno on error.
585  */
586 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
587 {
588     BlockDriver *drv = bs->drv;
589 
590     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
591     if (bdrv_is_sg(bs))
592         return 0;
593 
594     /* query actual device if possible, otherwise just trust the hint */
595     if (drv->bdrv_getlength) {
596         int64_t length = drv->bdrv_getlength(bs);
597         if (length < 0) {
598             return length;
599         }
600         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
601     }
602 
603     bs->total_sectors = hint;
604     return 0;
605 }
606 
607 /**
608  * Combines a QDict of new block driver @options with any missing options taken
609  * from @old_options, so that leaving out an option defaults to its old value.
610  */
611 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
612                               QDict *old_options)
613 {
614     if (bs->drv && bs->drv->bdrv_join_options) {
615         bs->drv->bdrv_join_options(options, old_options);
616     } else {
617         qdict_join(options, old_options, false);
618     }
619 }
620 
621 /**
622  * Set open flags for a given discard mode
623  *
624  * Return 0 on success, -1 if the discard mode was invalid.
625  */
626 int bdrv_parse_discard_flags(const char *mode, int *flags)
627 {
628     *flags &= ~BDRV_O_UNMAP;
629 
630     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
631         /* do nothing */
632     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
633         *flags |= BDRV_O_UNMAP;
634     } else {
635         return -1;
636     }
637 
638     return 0;
639 }
640 
641 /**
642  * Set open flags for a given cache mode
643  *
644  * Return 0 on success, -1 if the cache mode was invalid.
645  */
646 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
647 {
648     *flags &= ~BDRV_O_CACHE_MASK;
649 
650     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
651         *writethrough = false;
652         *flags |= BDRV_O_NOCACHE;
653     } else if (!strcmp(mode, "directsync")) {
654         *writethrough = true;
655         *flags |= BDRV_O_NOCACHE;
656     } else if (!strcmp(mode, "writeback")) {
657         *writethrough = false;
658     } else if (!strcmp(mode, "unsafe")) {
659         *writethrough = false;
660         *flags |= BDRV_O_NO_FLUSH;
661     } else if (!strcmp(mode, "writethrough")) {
662         *writethrough = true;
663     } else {
664         return -1;
665     }
666 
667     return 0;
668 }
669 
670 /*
671  * Returns the options and flags that a temporary snapshot should get, based on
672  * the originally requested flags (the originally requested image will have
673  * flags like a backing file)
674  */
675 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
676                                        int parent_flags, QDict *parent_options)
677 {
678     *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
679 
680     /* For temporary files, unconditional cache=unsafe is fine */
681     qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
682     qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
683 }
684 
685 /*
686  * Returns the options and flags that bs->file should get if a protocol driver
687  * is expected, based on the given options and flags for the parent BDS
688  */
689 static void bdrv_inherited_options(int *child_flags, QDict *child_options,
690                                    int parent_flags, QDict *parent_options)
691 {
692     int flags = parent_flags;
693 
694     /* Enable protocol handling, disable format probing for bs->file */
695     flags |= BDRV_O_PROTOCOL;
696 
697     /* If the cache mode isn't explicitly set, inherit direct and no-flush from
698      * the parent. */
699     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
700     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
701 
702     /* Our block drivers take care to send flushes and respect unmap policy,
703      * so we can default to enable both on lower layers regardless of the
704      * corresponding parent options. */
705     flags |= BDRV_O_UNMAP;
706 
707     /* Clear flags that only apply to the top layer */
708     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
709                BDRV_O_NO_IO);
710 
711     *child_flags = flags;
712 }
713 
714 const BdrvChildRole child_file = {
715     .inherit_options = bdrv_inherited_options,
716 };
717 
718 /*
719  * Returns the options and flags that bs->file should get if the use of formats
720  * (and not only protocols) is permitted for it, based on the given options and
721  * flags for the parent BDS
722  */
723 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
724                                        int parent_flags, QDict *parent_options)
725 {
726     child_file.inherit_options(child_flags, child_options,
727                                parent_flags, parent_options);
728 
729     *child_flags &= ~(BDRV_O_PROTOCOL | BDRV_O_NO_IO);
730 }
731 
732 const BdrvChildRole child_format = {
733     .inherit_options = bdrv_inherited_fmt_options,
734 };
735 
736 /*
737  * Returns the options and flags that bs->backing should get, based on the
738  * given options and flags for the parent BDS
739  */
740 static void bdrv_backing_options(int *child_flags, QDict *child_options,
741                                  int parent_flags, QDict *parent_options)
742 {
743     int flags = parent_flags;
744 
745     /* The cache mode is inherited unmodified for backing files; except WCE,
746      * which is only applied on the top level (BlockBackend) */
747     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
748     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
749 
750     /* backing files always opened read-only */
751     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
752 
753     /* snapshot=on is handled on the top layer */
754     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
755 
756     *child_flags = flags;
757 }
758 
759 static const BdrvChildRole child_backing = {
760     .inherit_options = bdrv_backing_options,
761 };
762 
763 static int bdrv_open_flags(BlockDriverState *bs, int flags)
764 {
765     int open_flags = flags;
766 
767     /*
768      * Clear flags that are internal to the block layer before opening the
769      * image.
770      */
771     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
772 
773     /*
774      * Snapshots should be writable.
775      */
776     if (flags & BDRV_O_TEMPORARY) {
777         open_flags |= BDRV_O_RDWR;
778     }
779 
780     return open_flags;
781 }
782 
783 static void update_flags_from_options(int *flags, QemuOpts *opts)
784 {
785     *flags &= ~BDRV_O_CACHE_MASK;
786 
787     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
788     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
789         *flags |= BDRV_O_NO_FLUSH;
790     }
791 
792     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
793     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
794         *flags |= BDRV_O_NOCACHE;
795     }
796 }
797 
798 static void update_options_from_flags(QDict *options, int flags)
799 {
800     if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
801         qdict_put(options, BDRV_OPT_CACHE_DIRECT,
802                   qbool_from_bool(flags & BDRV_O_NOCACHE));
803     }
804     if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
805         qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
806                   qbool_from_bool(flags & BDRV_O_NO_FLUSH));
807     }
808 }
809 
810 static void bdrv_assign_node_name(BlockDriverState *bs,
811                                   const char *node_name,
812                                   Error **errp)
813 {
814     char *gen_node_name = NULL;
815 
816     if (!node_name) {
817         node_name = gen_node_name = id_generate(ID_BLOCK);
818     } else if (!id_wellformed(node_name)) {
819         /*
820          * Check for empty string or invalid characters, but not if it is
821          * generated (generated names use characters not available to the user)
822          */
823         error_setg(errp, "Invalid node name");
824         return;
825     }
826 
827     /* takes care of avoiding namespaces collisions */
828     if (blk_by_name(node_name)) {
829         error_setg(errp, "node-name=%s is conflicting with a device id",
830                    node_name);
831         goto out;
832     }
833 
834     /* takes care of avoiding duplicates node names */
835     if (bdrv_find_node(node_name)) {
836         error_setg(errp, "Duplicate node name");
837         goto out;
838     }
839 
840     /* copy node name into the bs and insert it into the graph list */
841     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
842     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
843 out:
844     g_free(gen_node_name);
845 }
846 
847 static QemuOptsList bdrv_runtime_opts = {
848     .name = "bdrv_common",
849     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
850     .desc = {
851         {
852             .name = "node-name",
853             .type = QEMU_OPT_STRING,
854             .help = "Node name of the block device node",
855         },
856         {
857             .name = "driver",
858             .type = QEMU_OPT_STRING,
859             .help = "Block driver to use for the node",
860         },
861         {
862             .name = BDRV_OPT_CACHE_DIRECT,
863             .type = QEMU_OPT_BOOL,
864             .help = "Bypass software writeback cache on the host",
865         },
866         {
867             .name = BDRV_OPT_CACHE_NO_FLUSH,
868             .type = QEMU_OPT_BOOL,
869             .help = "Ignore flush requests",
870         },
871         { /* end of list */ }
872     },
873 };
874 
875 /*
876  * Common part for opening disk images and files
877  *
878  * Removes all processed options from *options.
879  */
880 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
881                             QDict *options, Error **errp)
882 {
883     int ret, open_flags;
884     const char *filename;
885     const char *driver_name = NULL;
886     const char *node_name = NULL;
887     QemuOpts *opts;
888     BlockDriver *drv;
889     Error *local_err = NULL;
890 
891     assert(bs->file == NULL);
892     assert(options != NULL && bs->options != options);
893 
894     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
895     qemu_opts_absorb_qdict(opts, options, &local_err);
896     if (local_err) {
897         error_propagate(errp, local_err);
898         ret = -EINVAL;
899         goto fail_opts;
900     }
901 
902     driver_name = qemu_opt_get(opts, "driver");
903     drv = bdrv_find_format(driver_name);
904     assert(drv != NULL);
905 
906     if (file != NULL) {
907         filename = file->bs->filename;
908     } else {
909         filename = qdict_get_try_str(options, "filename");
910     }
911 
912     if (drv->bdrv_needs_filename && !filename) {
913         error_setg(errp, "The '%s' block driver requires a file name",
914                    drv->format_name);
915         ret = -EINVAL;
916         goto fail_opts;
917     }
918 
919     trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
920                            drv->format_name);
921 
922     node_name = qemu_opt_get(opts, "node-name");
923     bdrv_assign_node_name(bs, node_name, &local_err);
924     if (local_err) {
925         error_propagate(errp, local_err);
926         ret = -EINVAL;
927         goto fail_opts;
928     }
929 
930     bs->request_alignment = 512;
931     bs->zero_beyond_eof = true;
932     bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
933 
934     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
935         error_setg(errp,
936                    !bs->read_only && bdrv_is_whitelisted(drv, true)
937                         ? "Driver '%s' can only be used for read-only devices"
938                         : "Driver '%s' is not whitelisted",
939                    drv->format_name);
940         ret = -ENOTSUP;
941         goto fail_opts;
942     }
943 
944     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
945     if (bs->open_flags & BDRV_O_COPY_ON_READ) {
946         if (!bs->read_only) {
947             bdrv_enable_copy_on_read(bs);
948         } else {
949             error_setg(errp, "Can't use copy-on-read on read-only device");
950             ret = -EINVAL;
951             goto fail_opts;
952         }
953     }
954 
955     if (filename != NULL) {
956         pstrcpy(bs->filename, sizeof(bs->filename), filename);
957     } else {
958         bs->filename[0] = '\0';
959     }
960     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
961 
962     bs->drv = drv;
963     bs->opaque = g_malloc0(drv->instance_size);
964 
965     /* Apply cache mode options */
966     update_flags_from_options(&bs->open_flags, opts);
967 
968     /* Open the image, either directly or using a protocol */
969     open_flags = bdrv_open_flags(bs, bs->open_flags);
970     if (drv->bdrv_file_open) {
971         assert(file == NULL);
972         assert(!drv->bdrv_needs_filename || filename != NULL);
973         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
974     } else {
975         if (file == NULL) {
976             error_setg(errp, "Can't use '%s' as a block driver for the "
977                        "protocol level", drv->format_name);
978             ret = -EINVAL;
979             goto free_and_fail;
980         }
981         bs->file = file;
982         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
983     }
984 
985     if (ret < 0) {
986         if (local_err) {
987             error_propagate(errp, local_err);
988         } else if (bs->filename[0]) {
989             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
990         } else {
991             error_setg_errno(errp, -ret, "Could not open image");
992         }
993         goto free_and_fail;
994     }
995 
996     ret = refresh_total_sectors(bs, bs->total_sectors);
997     if (ret < 0) {
998         error_setg_errno(errp, -ret, "Could not refresh total sector count");
999         goto free_and_fail;
1000     }
1001 
1002     bdrv_refresh_limits(bs, &local_err);
1003     if (local_err) {
1004         error_propagate(errp, local_err);
1005         ret = -EINVAL;
1006         goto free_and_fail;
1007     }
1008 
1009     assert(bdrv_opt_mem_align(bs) != 0);
1010     assert(bdrv_min_mem_align(bs) != 0);
1011     assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
1012 
1013     qemu_opts_del(opts);
1014     return 0;
1015 
1016 free_and_fail:
1017     bs->file = NULL;
1018     g_free(bs->opaque);
1019     bs->opaque = NULL;
1020     bs->drv = NULL;
1021 fail_opts:
1022     qemu_opts_del(opts);
1023     return ret;
1024 }
1025 
1026 static QDict *parse_json_filename(const char *filename, Error **errp)
1027 {
1028     QObject *options_obj;
1029     QDict *options;
1030     int ret;
1031 
1032     ret = strstart(filename, "json:", &filename);
1033     assert(ret);
1034 
1035     options_obj = qobject_from_json(filename);
1036     if (!options_obj) {
1037         error_setg(errp, "Could not parse the JSON options");
1038         return NULL;
1039     }
1040 
1041     if (qobject_type(options_obj) != QTYPE_QDICT) {
1042         qobject_decref(options_obj);
1043         error_setg(errp, "Invalid JSON object given");
1044         return NULL;
1045     }
1046 
1047     options = qobject_to_qdict(options_obj);
1048     qdict_flatten(options);
1049 
1050     return options;
1051 }
1052 
1053 static void parse_json_protocol(QDict *options, const char **pfilename,
1054                                 Error **errp)
1055 {
1056     QDict *json_options;
1057     Error *local_err = NULL;
1058 
1059     /* Parse json: pseudo-protocol */
1060     if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1061         return;
1062     }
1063 
1064     json_options = parse_json_filename(*pfilename, &local_err);
1065     if (local_err) {
1066         error_propagate(errp, local_err);
1067         return;
1068     }
1069 
1070     /* Options given in the filename have lower priority than options
1071      * specified directly */
1072     qdict_join(options, json_options, false);
1073     QDECREF(json_options);
1074     *pfilename = NULL;
1075 }
1076 
1077 /*
1078  * Fills in default options for opening images and converts the legacy
1079  * filename/flags pair to option QDict entries.
1080  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1081  * block driver has been specified explicitly.
1082  */
1083 static int bdrv_fill_options(QDict **options, const char *filename,
1084                              int *flags, Error **errp)
1085 {
1086     const char *drvname;
1087     bool protocol = *flags & BDRV_O_PROTOCOL;
1088     bool parse_filename = false;
1089     BlockDriver *drv = NULL;
1090     Error *local_err = NULL;
1091 
1092     drvname = qdict_get_try_str(*options, "driver");
1093     if (drvname) {
1094         drv = bdrv_find_format(drvname);
1095         if (!drv) {
1096             error_setg(errp, "Unknown driver '%s'", drvname);
1097             return -ENOENT;
1098         }
1099         /* If the user has explicitly specified the driver, this choice should
1100          * override the BDRV_O_PROTOCOL flag */
1101         protocol = drv->bdrv_file_open;
1102     }
1103 
1104     if (protocol) {
1105         *flags |= BDRV_O_PROTOCOL;
1106     } else {
1107         *flags &= ~BDRV_O_PROTOCOL;
1108     }
1109 
1110     /* Translate cache options from flags into options */
1111     update_options_from_flags(*options, *flags);
1112 
1113     /* Fetch the file name from the options QDict if necessary */
1114     if (protocol && filename) {
1115         if (!qdict_haskey(*options, "filename")) {
1116             qdict_put(*options, "filename", qstring_from_str(filename));
1117             parse_filename = true;
1118         } else {
1119             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1120                              "the same time");
1121             return -EINVAL;
1122         }
1123     }
1124 
1125     /* Find the right block driver */
1126     filename = qdict_get_try_str(*options, "filename");
1127 
1128     if (!drvname && protocol) {
1129         if (filename) {
1130             drv = bdrv_find_protocol(filename, parse_filename, errp);
1131             if (!drv) {
1132                 return -EINVAL;
1133             }
1134 
1135             drvname = drv->format_name;
1136             qdict_put(*options, "driver", qstring_from_str(drvname));
1137         } else {
1138             error_setg(errp, "Must specify either driver or file");
1139             return -EINVAL;
1140         }
1141     }
1142 
1143     assert(drv || !protocol);
1144 
1145     /* Driver-specific filename parsing */
1146     if (drv && drv->bdrv_parse_filename && parse_filename) {
1147         drv->bdrv_parse_filename(filename, *options, &local_err);
1148         if (local_err) {
1149             error_propagate(errp, local_err);
1150             return -EINVAL;
1151         }
1152 
1153         if (!drv->bdrv_needs_filename) {
1154             qdict_del(*options, "filename");
1155         }
1156     }
1157 
1158     return 0;
1159 }
1160 
1161 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
1162                                   const char *child_name,
1163                                   const BdrvChildRole *child_role)
1164 {
1165     BdrvChild *child = g_new(BdrvChild, 1);
1166     *child = (BdrvChild) {
1167         .bs     = child_bs,
1168         .name   = g_strdup(child_name),
1169         .role   = child_role,
1170     };
1171 
1172     QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1173 
1174     return child;
1175 }
1176 
1177 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1178                              BlockDriverState *child_bs,
1179                              const char *child_name,
1180                              const BdrvChildRole *child_role)
1181 {
1182     BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
1183     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1184     return child;
1185 }
1186 
1187 static void bdrv_detach_child(BdrvChild *child)
1188 {
1189     if (child->next.le_prev) {
1190         QLIST_REMOVE(child, next);
1191         child->next.le_prev = NULL;
1192     }
1193     QLIST_REMOVE(child, next_parent);
1194     g_free(child->name);
1195     g_free(child);
1196 }
1197 
1198 void bdrv_root_unref_child(BdrvChild *child)
1199 {
1200     BlockDriverState *child_bs;
1201 
1202     child_bs = child->bs;
1203     bdrv_detach_child(child);
1204     bdrv_unref(child_bs);
1205 }
1206 
1207 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1208 {
1209     if (child == NULL) {
1210         return;
1211     }
1212 
1213     if (child->bs->inherits_from == parent) {
1214         child->bs->inherits_from = NULL;
1215     }
1216 
1217     bdrv_root_unref_child(child);
1218 }
1219 
1220 /*
1221  * Sets the backing file link of a BDS. A new reference is created; callers
1222  * which don't need their own reference any more must call bdrv_unref().
1223  */
1224 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1225 {
1226     if (backing_hd) {
1227         bdrv_ref(backing_hd);
1228     }
1229 
1230     if (bs->backing) {
1231         assert(bs->backing_blocker);
1232         bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1233         bdrv_unref_child(bs, bs->backing);
1234     } else if (backing_hd) {
1235         error_setg(&bs->backing_blocker,
1236                    "node is used as backing hd of '%s'",
1237                    bdrv_get_device_or_node_name(bs));
1238     }
1239 
1240     if (!backing_hd) {
1241         error_free(bs->backing_blocker);
1242         bs->backing_blocker = NULL;
1243         bs->backing = NULL;
1244         goto out;
1245     }
1246     bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
1247     bs->open_flags &= ~BDRV_O_NO_BACKING;
1248     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1249     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1250             backing_hd->drv ? backing_hd->drv->format_name : "");
1251 
1252     bdrv_op_block_all(backing_hd, bs->backing_blocker);
1253     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1254     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1255                     bs->backing_blocker);
1256 out:
1257     bdrv_refresh_limits(bs, NULL);
1258 }
1259 
1260 /*
1261  * Opens the backing file for a BlockDriverState if not yet open
1262  *
1263  * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1264  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1265  * itself, all options starting with "${bdref_key}." are considered part of the
1266  * BlockdevRef.
1267  *
1268  * TODO Can this be unified with bdrv_open_image()?
1269  */
1270 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1271                            const char *bdref_key, Error **errp)
1272 {
1273     char *backing_filename = g_malloc0(PATH_MAX);
1274     char *bdref_key_dot;
1275     const char *reference = NULL;
1276     int ret = 0;
1277     BlockDriverState *backing_hd;
1278     QDict *options;
1279     QDict *tmp_parent_options = NULL;
1280     Error *local_err = NULL;
1281 
1282     if (bs->backing != NULL) {
1283         goto free_exit;
1284     }
1285 
1286     /* NULL means an empty set of options */
1287     if (parent_options == NULL) {
1288         tmp_parent_options = qdict_new();
1289         parent_options = tmp_parent_options;
1290     }
1291 
1292     bs->open_flags &= ~BDRV_O_NO_BACKING;
1293 
1294     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1295     qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1296     g_free(bdref_key_dot);
1297 
1298     reference = qdict_get_try_str(parent_options, bdref_key);
1299     if (reference || qdict_haskey(options, "file.filename")) {
1300         backing_filename[0] = '\0';
1301     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1302         QDECREF(options);
1303         goto free_exit;
1304     } else {
1305         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1306                                        &local_err);
1307         if (local_err) {
1308             ret = -EINVAL;
1309             error_propagate(errp, local_err);
1310             QDECREF(options);
1311             goto free_exit;
1312         }
1313     }
1314 
1315     if (!bs->drv || !bs->drv->supports_backing) {
1316         ret = -EINVAL;
1317         error_setg(errp, "Driver doesn't support backing files");
1318         QDECREF(options);
1319         goto free_exit;
1320     }
1321 
1322     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1323         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1324     }
1325 
1326     backing_hd = NULL;
1327     ret = bdrv_open_inherit(&backing_hd,
1328                             *backing_filename ? backing_filename : NULL,
1329                             reference, options, 0, bs, &child_backing,
1330                             errp);
1331     if (ret < 0) {
1332         bs->open_flags |= BDRV_O_NO_BACKING;
1333         error_prepend(errp, "Could not open backing file: ");
1334         goto free_exit;
1335     }
1336 
1337     /* Hook up the backing file link; drop our reference, bs owns the
1338      * backing_hd reference now */
1339     bdrv_set_backing_hd(bs, backing_hd);
1340     bdrv_unref(backing_hd);
1341 
1342     qdict_del(parent_options, bdref_key);
1343 
1344 free_exit:
1345     g_free(backing_filename);
1346     QDECREF(tmp_parent_options);
1347     return ret;
1348 }
1349 
1350 /*
1351  * Opens a disk image whose options are given as BlockdevRef in another block
1352  * device's options.
1353  *
1354  * If allow_none is true, no image will be opened if filename is false and no
1355  * BlockdevRef is given. NULL will be returned, but errp remains unset.
1356  *
1357  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1358  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1359  * itself, all options starting with "${bdref_key}." are considered part of the
1360  * BlockdevRef.
1361  *
1362  * The BlockdevRef will be removed from the options QDict.
1363  */
1364 BdrvChild *bdrv_open_child(const char *filename,
1365                            QDict *options, const char *bdref_key,
1366                            BlockDriverState* parent,
1367                            const BdrvChildRole *child_role,
1368                            bool allow_none, Error **errp)
1369 {
1370     BdrvChild *c = NULL;
1371     BlockDriverState *bs;
1372     QDict *image_options;
1373     int ret;
1374     char *bdref_key_dot;
1375     const char *reference;
1376 
1377     assert(child_role != NULL);
1378 
1379     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1380     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1381     g_free(bdref_key_dot);
1382 
1383     reference = qdict_get_try_str(options, bdref_key);
1384     if (!filename && !reference && !qdict_size(image_options)) {
1385         if (!allow_none) {
1386             error_setg(errp, "A block device must be specified for \"%s\"",
1387                        bdref_key);
1388         }
1389         QDECREF(image_options);
1390         goto done;
1391     }
1392 
1393     bs = NULL;
1394     ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1395                             parent, child_role, errp);
1396     if (ret < 0) {
1397         goto done;
1398     }
1399 
1400     c = bdrv_attach_child(parent, bs, bdref_key, child_role);
1401 
1402 done:
1403     qdict_del(options, bdref_key);
1404     return c;
1405 }
1406 
1407 static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
1408                                      QDict *snapshot_options, Error **errp)
1409 {
1410     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1411     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1412     int64_t total_size;
1413     QemuOpts *opts = NULL;
1414     BlockDriverState *bs_snapshot;
1415     Error *local_err = NULL;
1416     int ret;
1417 
1418     /* if snapshot, we create a temporary backing file and open it
1419        instead of opening 'filename' directly */
1420 
1421     /* Get the required size from the image */
1422     total_size = bdrv_getlength(bs);
1423     if (total_size < 0) {
1424         ret = total_size;
1425         error_setg_errno(errp, -total_size, "Could not get image size");
1426         goto out;
1427     }
1428 
1429     /* Create the temporary image */
1430     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1431     if (ret < 0) {
1432         error_setg_errno(errp, -ret, "Could not get temporary filename");
1433         goto out;
1434     }
1435 
1436     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1437                             &error_abort);
1438     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1439     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
1440     qemu_opts_del(opts);
1441     if (ret < 0) {
1442         error_prepend(errp, "Could not create temporary overlay '%s': ",
1443                       tmp_filename);
1444         goto out;
1445     }
1446 
1447     /* Prepare options QDict for the temporary file */
1448     qdict_put(snapshot_options, "file.driver",
1449               qstring_from_str("file"));
1450     qdict_put(snapshot_options, "file.filename",
1451               qstring_from_str(tmp_filename));
1452     qdict_put(snapshot_options, "driver",
1453               qstring_from_str("qcow2"));
1454 
1455     bs_snapshot = bdrv_new();
1456 
1457     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1458                     flags, &local_err);
1459     snapshot_options = NULL;
1460     if (ret < 0) {
1461         error_propagate(errp, local_err);
1462         goto out;
1463     }
1464 
1465     bdrv_append(bs_snapshot, bs);
1466 
1467 out:
1468     QDECREF(snapshot_options);
1469     g_free(tmp_filename);
1470     return ret;
1471 }
1472 
1473 /*
1474  * Opens a disk image (raw, qcow2, vmdk, ...)
1475  *
1476  * options is a QDict of options to pass to the block drivers, or NULL for an
1477  * empty set of options. The reference to the QDict belongs to the block layer
1478  * after the call (even on failure), so if the caller intends to reuse the
1479  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1480  *
1481  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1482  * If it is not NULL, the referenced BDS will be reused.
1483  *
1484  * The reference parameter may be used to specify an existing block device which
1485  * should be opened. If specified, neither options nor a filename may be given,
1486  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1487  */
1488 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1489                              const char *reference, QDict *options, int flags,
1490                              BlockDriverState *parent,
1491                              const BdrvChildRole *child_role, Error **errp)
1492 {
1493     int ret;
1494     BdrvChild *file = NULL;
1495     BlockDriverState *bs;
1496     BlockDriver *drv = NULL;
1497     const char *drvname;
1498     const char *backing;
1499     Error *local_err = NULL;
1500     QDict *snapshot_options = NULL;
1501     int snapshot_flags = 0;
1502 
1503     assert(pbs);
1504     assert(!child_role || !flags);
1505     assert(!child_role == !parent);
1506 
1507     if (reference) {
1508         bool options_non_empty = options ? qdict_size(options) : false;
1509         QDECREF(options);
1510 
1511         if (*pbs) {
1512             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1513                        "another block device");
1514             return -EINVAL;
1515         }
1516 
1517         if (filename || options_non_empty) {
1518             error_setg(errp, "Cannot reference an existing block device with "
1519                        "additional options or a new filename");
1520             return -EINVAL;
1521         }
1522 
1523         bs = bdrv_lookup_bs(reference, reference, errp);
1524         if (!bs) {
1525             return -ENODEV;
1526         }
1527 
1528         if (bs->throttle_state) {
1529             error_setg(errp, "Cannot reference an existing block device for "
1530                        "which I/O throttling is enabled");
1531             return -EINVAL;
1532         }
1533 
1534         bdrv_ref(bs);
1535         *pbs = bs;
1536         return 0;
1537     }
1538 
1539     if (*pbs) {
1540         bs = *pbs;
1541     } else {
1542         bs = bdrv_new();
1543     }
1544 
1545     /* NULL means an empty set of options */
1546     if (options == NULL) {
1547         options = qdict_new();
1548     }
1549 
1550     /* json: syntax counts as explicit options, as if in the QDict */
1551     parse_json_protocol(options, &filename, &local_err);
1552     if (local_err) {
1553         ret = -EINVAL;
1554         goto fail;
1555     }
1556 
1557     bs->explicit_options = qdict_clone_shallow(options);
1558 
1559     if (child_role) {
1560         bs->inherits_from = parent;
1561         child_role->inherit_options(&flags, options,
1562                                     parent->open_flags, parent->options);
1563     }
1564 
1565     ret = bdrv_fill_options(&options, filename, &flags, &local_err);
1566     if (local_err) {
1567         goto fail;
1568     }
1569 
1570     bs->open_flags = flags;
1571     bs->options = options;
1572     options = qdict_clone_shallow(options);
1573 
1574     /* Find the right image format driver */
1575     drvname = qdict_get_try_str(options, "driver");
1576     if (drvname) {
1577         drv = bdrv_find_format(drvname);
1578         if (!drv) {
1579             error_setg(errp, "Unknown driver: '%s'", drvname);
1580             ret = -EINVAL;
1581             goto fail;
1582         }
1583     }
1584 
1585     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1586 
1587     backing = qdict_get_try_str(options, "backing");
1588     if (backing && *backing == '\0') {
1589         flags |= BDRV_O_NO_BACKING;
1590         qdict_del(options, "backing");
1591     }
1592 
1593     /* Open image file without format layer */
1594     if ((flags & BDRV_O_PROTOCOL) == 0) {
1595         if (flags & BDRV_O_RDWR) {
1596             flags |= BDRV_O_ALLOW_RDWR;
1597         }
1598         if (flags & BDRV_O_SNAPSHOT) {
1599             snapshot_options = qdict_new();
1600             bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
1601                                        flags, options);
1602             bdrv_backing_options(&flags, options, flags, options);
1603         }
1604 
1605         bs->open_flags = flags;
1606 
1607         file = bdrv_open_child(filename, options, "file", bs,
1608                                &child_file, true, &local_err);
1609         if (local_err) {
1610             ret = -EINVAL;
1611             goto fail;
1612         }
1613     }
1614 
1615     /* Image format probing */
1616     bs->probed = !drv;
1617     if (!drv && file) {
1618         ret = find_image_format(file->bs, filename, &drv, &local_err);
1619         if (ret < 0) {
1620             goto fail;
1621         }
1622         /*
1623          * This option update would logically belong in bdrv_fill_options(),
1624          * but we first need to open bs->file for the probing to work, while
1625          * opening bs->file already requires the (mostly) final set of options
1626          * so that cache mode etc. can be inherited.
1627          *
1628          * Adding the driver later is somewhat ugly, but it's not an option
1629          * that would ever be inherited, so it's correct. We just need to make
1630          * sure to update both bs->options (which has the full effective
1631          * options for bs) and options (which has file.* already removed).
1632          */
1633         qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1634         qdict_put(options, "driver", qstring_from_str(drv->format_name));
1635     } else if (!drv) {
1636         error_setg(errp, "Must specify either driver or file");
1637         ret = -EINVAL;
1638         goto fail;
1639     }
1640 
1641     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1642     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1643     /* file must be NULL if a protocol BDS is about to be created
1644      * (the inverse results in an error message from bdrv_open_common()) */
1645     assert(!(flags & BDRV_O_PROTOCOL) || !file);
1646 
1647     /* Open the image */
1648     ret = bdrv_open_common(bs, file, options, &local_err);
1649     if (ret < 0) {
1650         goto fail;
1651     }
1652 
1653     if (file && (bs->file != file)) {
1654         bdrv_unref_child(bs, file);
1655         file = NULL;
1656     }
1657 
1658     /* If there is a backing file, use it */
1659     if ((flags & BDRV_O_NO_BACKING) == 0) {
1660         ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
1661         if (ret < 0) {
1662             goto close_and_fail;
1663         }
1664     }
1665 
1666     bdrv_refresh_filename(bs);
1667 
1668     /* Check if any unknown options were used */
1669     if (options && (qdict_size(options) != 0)) {
1670         const QDictEntry *entry = qdict_first(options);
1671         if (flags & BDRV_O_PROTOCOL) {
1672             error_setg(errp, "Block protocol '%s' doesn't support the option "
1673                        "'%s'", drv->format_name, entry->key);
1674         } else {
1675             error_setg(errp,
1676                        "Block format '%s' does not support the option '%s'",
1677                        drv->format_name, entry->key);
1678         }
1679 
1680         ret = -EINVAL;
1681         goto close_and_fail;
1682     }
1683 
1684     if (!bdrv_key_required(bs)) {
1685         if (bs->blk) {
1686             blk_dev_change_media_cb(bs->blk, true);
1687         }
1688     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1689                && !runstate_check(RUN_STATE_INMIGRATE)
1690                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1691         error_setg(errp,
1692                    "Guest must be stopped for opening of encrypted image");
1693         ret = -EBUSY;
1694         goto close_and_fail;
1695     }
1696 
1697     QDECREF(options);
1698     *pbs = bs;
1699 
1700     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1701      * temporary snapshot afterwards. */
1702     if (snapshot_flags) {
1703         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
1704                                         &local_err);
1705         snapshot_options = NULL;
1706         if (local_err) {
1707             goto close_and_fail;
1708         }
1709     }
1710 
1711     return 0;
1712 
1713 fail:
1714     if (file != NULL) {
1715         bdrv_unref_child(bs, file);
1716     }
1717     QDECREF(snapshot_options);
1718     QDECREF(bs->explicit_options);
1719     QDECREF(bs->options);
1720     QDECREF(options);
1721     bs->options = NULL;
1722     if (!*pbs) {
1723         /* If *pbs is NULL, a new BDS has been created in this function and
1724            needs to be freed now. Otherwise, it does not need to be closed,
1725            since it has not really been opened yet. */
1726         bdrv_unref(bs);
1727     }
1728     if (local_err) {
1729         error_propagate(errp, local_err);
1730     }
1731     return ret;
1732 
1733 close_and_fail:
1734     /* See fail path, but now the BDS has to be always closed */
1735     if (*pbs) {
1736         bdrv_close(bs);
1737     } else {
1738         bdrv_unref(bs);
1739     }
1740     QDECREF(snapshot_options);
1741     QDECREF(options);
1742     if (local_err) {
1743         error_propagate(errp, local_err);
1744     }
1745     return ret;
1746 }
1747 
1748 int bdrv_open(BlockDriverState **pbs, const char *filename,
1749               const char *reference, QDict *options, int flags, Error **errp)
1750 {
1751     return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1752                              NULL, errp);
1753 }
1754 
1755 typedef struct BlockReopenQueueEntry {
1756      bool prepared;
1757      BDRVReopenState state;
1758      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1759 } BlockReopenQueueEntry;
1760 
1761 /*
1762  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1763  * reopen of multiple devices.
1764  *
1765  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1766  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1767  * be created and initialized. This newly created BlockReopenQueue should be
1768  * passed back in for subsequent calls that are intended to be of the same
1769  * atomic 'set'.
1770  *
1771  * bs is the BlockDriverState to add to the reopen queue.
1772  *
1773  * options contains the changed options for the associated bs
1774  * (the BlockReopenQueue takes ownership)
1775  *
1776  * flags contains the open flags for the associated bs
1777  *
1778  * returns a pointer to bs_queue, which is either the newly allocated
1779  * bs_queue, or the existing bs_queue being used.
1780  *
1781  */
1782 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1783                                                  BlockDriverState *bs,
1784                                                  QDict *options,
1785                                                  int flags,
1786                                                  const BdrvChildRole *role,
1787                                                  QDict *parent_options,
1788                                                  int parent_flags)
1789 {
1790     assert(bs != NULL);
1791 
1792     BlockReopenQueueEntry *bs_entry;
1793     BdrvChild *child;
1794     QDict *old_options, *explicit_options;
1795 
1796     if (bs_queue == NULL) {
1797         bs_queue = g_new0(BlockReopenQueue, 1);
1798         QSIMPLEQ_INIT(bs_queue);
1799     }
1800 
1801     if (!options) {
1802         options = qdict_new();
1803     }
1804 
1805     /*
1806      * Precedence of options:
1807      * 1. Explicitly passed in options (highest)
1808      * 2. Set in flags (only for top level)
1809      * 3. Retained from explicitly set options of bs
1810      * 4. Inherited from parent node
1811      * 5. Retained from effective options of bs
1812      */
1813 
1814     if (!parent_options) {
1815         /*
1816          * Any setting represented by flags is always updated. If the
1817          * corresponding QDict option is set, it takes precedence. Otherwise
1818          * the flag is translated into a QDict option. The old setting of bs is
1819          * not considered.
1820          */
1821         update_options_from_flags(options, flags);
1822     }
1823 
1824     /* Old explicitly set values (don't overwrite by inherited value) */
1825     old_options = qdict_clone_shallow(bs->explicit_options);
1826     bdrv_join_options(bs, options, old_options);
1827     QDECREF(old_options);
1828 
1829     explicit_options = qdict_clone_shallow(options);
1830 
1831     /* Inherit from parent node */
1832     if (parent_options) {
1833         assert(!flags);
1834         role->inherit_options(&flags, options, parent_flags, parent_options);
1835     }
1836 
1837     /* Old values are used for options that aren't set yet */
1838     old_options = qdict_clone_shallow(bs->options);
1839     bdrv_join_options(bs, options, old_options);
1840     QDECREF(old_options);
1841 
1842     /* bdrv_open() masks this flag out */
1843     flags &= ~BDRV_O_PROTOCOL;
1844 
1845     QLIST_FOREACH(child, &bs->children, next) {
1846         QDict *new_child_options;
1847         char *child_key_dot;
1848 
1849         /* reopen can only change the options of block devices that were
1850          * implicitly created and inherited options. For other (referenced)
1851          * block devices, a syntax like "backing.foo" results in an error. */
1852         if (child->bs->inherits_from != bs) {
1853             continue;
1854         }
1855 
1856         child_key_dot = g_strdup_printf("%s.", child->name);
1857         qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1858         g_free(child_key_dot);
1859 
1860         bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1861                                 child->role, options, flags);
1862     }
1863 
1864     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1865     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1866 
1867     bs_entry->state.bs = bs;
1868     bs_entry->state.options = options;
1869     bs_entry->state.explicit_options = explicit_options;
1870     bs_entry->state.flags = flags;
1871 
1872     return bs_queue;
1873 }
1874 
1875 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1876                                     BlockDriverState *bs,
1877                                     QDict *options, int flags)
1878 {
1879     return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1880                                    NULL, NULL, 0);
1881 }
1882 
1883 /*
1884  * Reopen multiple BlockDriverStates atomically & transactionally.
1885  *
1886  * The queue passed in (bs_queue) must have been built up previous
1887  * via bdrv_reopen_queue().
1888  *
1889  * Reopens all BDS specified in the queue, with the appropriate
1890  * flags.  All devices are prepared for reopen, and failure of any
1891  * device will cause all device changes to be abandonded, and intermediate
1892  * data cleaned up.
1893  *
1894  * If all devices prepare successfully, then the changes are committed
1895  * to all devices.
1896  *
1897  */
1898 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1899 {
1900     int ret = -1;
1901     BlockReopenQueueEntry *bs_entry, *next;
1902     Error *local_err = NULL;
1903 
1904     assert(bs_queue != NULL);
1905 
1906     bdrv_drain_all();
1907 
1908     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1909         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1910             error_propagate(errp, local_err);
1911             goto cleanup;
1912         }
1913         bs_entry->prepared = true;
1914     }
1915 
1916     /* If we reach this point, we have success and just need to apply the
1917      * changes
1918      */
1919     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1920         bdrv_reopen_commit(&bs_entry->state);
1921     }
1922 
1923     ret = 0;
1924 
1925 cleanup:
1926     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1927         if (ret && bs_entry->prepared) {
1928             bdrv_reopen_abort(&bs_entry->state);
1929         } else if (ret) {
1930             QDECREF(bs_entry->state.explicit_options);
1931         }
1932         QDECREF(bs_entry->state.options);
1933         g_free(bs_entry);
1934     }
1935     g_free(bs_queue);
1936     return ret;
1937 }
1938 
1939 
1940 /* Reopen a single BlockDriverState with the specified flags. */
1941 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1942 {
1943     int ret = -1;
1944     Error *local_err = NULL;
1945     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1946 
1947     ret = bdrv_reopen_multiple(queue, &local_err);
1948     if (local_err != NULL) {
1949         error_propagate(errp, local_err);
1950     }
1951     return ret;
1952 }
1953 
1954 
1955 /*
1956  * Prepares a BlockDriverState for reopen. All changes are staged in the
1957  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1958  * the block driver layer .bdrv_reopen_prepare()
1959  *
1960  * bs is the BlockDriverState to reopen
1961  * flags are the new open flags
1962  * queue is the reopen queue
1963  *
1964  * Returns 0 on success, non-zero on error.  On error errp will be set
1965  * as well.
1966  *
1967  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1968  * It is the responsibility of the caller to then call the abort() or
1969  * commit() for any other BDS that have been left in a prepare() state
1970  *
1971  */
1972 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1973                         Error **errp)
1974 {
1975     int ret = -1;
1976     Error *local_err = NULL;
1977     BlockDriver *drv;
1978     QemuOpts *opts;
1979     const char *value;
1980 
1981     assert(reopen_state != NULL);
1982     assert(reopen_state->bs->drv != NULL);
1983     drv = reopen_state->bs->drv;
1984 
1985     /* Process generic block layer options */
1986     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1987     qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
1988     if (local_err) {
1989         error_propagate(errp, local_err);
1990         ret = -EINVAL;
1991         goto error;
1992     }
1993 
1994     update_flags_from_options(&reopen_state->flags, opts);
1995 
1996     /* node-name and driver must be unchanged. Put them back into the QDict, so
1997      * that they are checked at the end of this function. */
1998     value = qemu_opt_get(opts, "node-name");
1999     if (value) {
2000         qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
2001     }
2002 
2003     value = qemu_opt_get(opts, "driver");
2004     if (value) {
2005         qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2006     }
2007 
2008     /* if we are to stay read-only, do not allow permission change
2009      * to r/w */
2010     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2011         reopen_state->flags & BDRV_O_RDWR) {
2012         error_setg(errp, "Node '%s' is read only",
2013                    bdrv_get_device_or_node_name(reopen_state->bs));
2014         goto error;
2015     }
2016 
2017 
2018     ret = bdrv_flush(reopen_state->bs);
2019     if (ret) {
2020         error_setg_errno(errp, -ret, "Error flushing drive");
2021         goto error;
2022     }
2023 
2024     if (drv->bdrv_reopen_prepare) {
2025         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2026         if (ret) {
2027             if (local_err != NULL) {
2028                 error_propagate(errp, local_err);
2029             } else {
2030                 error_setg(errp, "failed while preparing to reopen image '%s'",
2031                            reopen_state->bs->filename);
2032             }
2033             goto error;
2034         }
2035     } else {
2036         /* It is currently mandatory to have a bdrv_reopen_prepare()
2037          * handler for each supported drv. */
2038         error_setg(errp, "Block format '%s' used by node '%s' "
2039                    "does not support reopening files", drv->format_name,
2040                    bdrv_get_device_or_node_name(reopen_state->bs));
2041         ret = -1;
2042         goto error;
2043     }
2044 
2045     /* Options that are not handled are only okay if they are unchanged
2046      * compared to the old state. It is expected that some options are only
2047      * used for the initial open, but not reopen (e.g. filename) */
2048     if (qdict_size(reopen_state->options)) {
2049         const QDictEntry *entry = qdict_first(reopen_state->options);
2050 
2051         do {
2052             QString *new_obj = qobject_to_qstring(entry->value);
2053             const char *new = qstring_get_str(new_obj);
2054             const char *old = qdict_get_try_str(reopen_state->bs->options,
2055                                                 entry->key);
2056 
2057             if (!old || strcmp(new, old)) {
2058                 error_setg(errp, "Cannot change the option '%s'", entry->key);
2059                 ret = -EINVAL;
2060                 goto error;
2061             }
2062         } while ((entry = qdict_next(reopen_state->options, entry)));
2063     }
2064 
2065     ret = 0;
2066 
2067 error:
2068     qemu_opts_del(opts);
2069     return ret;
2070 }
2071 
2072 /*
2073  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2074  * makes them final by swapping the staging BlockDriverState contents into
2075  * the active BlockDriverState contents.
2076  */
2077 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2078 {
2079     BlockDriver *drv;
2080 
2081     assert(reopen_state != NULL);
2082     drv = reopen_state->bs->drv;
2083     assert(drv != NULL);
2084 
2085     /* If there are any driver level actions to take */
2086     if (drv->bdrv_reopen_commit) {
2087         drv->bdrv_reopen_commit(reopen_state);
2088     }
2089 
2090     /* set BDS specific flags now */
2091     QDECREF(reopen_state->bs->explicit_options);
2092 
2093     reopen_state->bs->explicit_options   = reopen_state->explicit_options;
2094     reopen_state->bs->open_flags         = reopen_state->flags;
2095     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
2096 
2097     bdrv_refresh_limits(reopen_state->bs, NULL);
2098 }
2099 
2100 /*
2101  * Abort the reopen, and delete and free the staged changes in
2102  * reopen_state
2103  */
2104 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2105 {
2106     BlockDriver *drv;
2107 
2108     assert(reopen_state != NULL);
2109     drv = reopen_state->bs->drv;
2110     assert(drv != NULL);
2111 
2112     if (drv->bdrv_reopen_abort) {
2113         drv->bdrv_reopen_abort(reopen_state);
2114     }
2115 
2116     QDECREF(reopen_state->explicit_options);
2117 }
2118 
2119 
2120 static void bdrv_close(BlockDriverState *bs)
2121 {
2122     BdrvAioNotifier *ban, *ban_next;
2123 
2124     assert(!bs->job);
2125 
2126     /* Disable I/O limits and drain all pending throttled requests */
2127     if (bs->throttle_state) {
2128         bdrv_io_limits_disable(bs);
2129     }
2130 
2131     bdrv_drained_begin(bs); /* complete I/O */
2132     bdrv_flush(bs);
2133     bdrv_drain(bs); /* in case flush left pending I/O */
2134 
2135     bdrv_release_named_dirty_bitmaps(bs);
2136     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2137 
2138     if (bs->blk) {
2139         blk_dev_change_media_cb(bs->blk, false);
2140     }
2141 
2142     if (bs->drv) {
2143         BdrvChild *child, *next;
2144 
2145         bs->drv->bdrv_close(bs);
2146         bs->drv = NULL;
2147 
2148         bdrv_set_backing_hd(bs, NULL);
2149 
2150         if (bs->file != NULL) {
2151             bdrv_unref_child(bs, bs->file);
2152             bs->file = NULL;
2153         }
2154 
2155         QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
2156             /* TODO Remove bdrv_unref() from drivers' close function and use
2157              * bdrv_unref_child() here */
2158             if (child->bs->inherits_from == bs) {
2159                 child->bs->inherits_from = NULL;
2160             }
2161             bdrv_detach_child(child);
2162         }
2163 
2164         g_free(bs->opaque);
2165         bs->opaque = NULL;
2166         bs->copy_on_read = 0;
2167         bs->backing_file[0] = '\0';
2168         bs->backing_format[0] = '\0';
2169         bs->total_sectors = 0;
2170         bs->encrypted = 0;
2171         bs->valid_key = 0;
2172         bs->sg = 0;
2173         bs->zero_beyond_eof = false;
2174         QDECREF(bs->options);
2175         QDECREF(bs->explicit_options);
2176         bs->options = NULL;
2177         QDECREF(bs->full_open_options);
2178         bs->full_open_options = NULL;
2179     }
2180 
2181     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2182         g_free(ban);
2183     }
2184     QLIST_INIT(&bs->aio_notifiers);
2185     bdrv_drained_end(bs);
2186 }
2187 
2188 void bdrv_close_all(void)
2189 {
2190     BlockDriverState *bs;
2191     AioContext *aio_context;
2192 
2193     /* Drop references from requests still in flight, such as canceled block
2194      * jobs whose AIO context has not been polled yet */
2195     bdrv_drain_all();
2196 
2197     blk_remove_all_bs();
2198     blockdev_close_all_bdrv_states();
2199 
2200     /* Cancel all block jobs */
2201     while (!QTAILQ_EMPTY(&all_bdrv_states)) {
2202         QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
2203             aio_context = bdrv_get_aio_context(bs);
2204 
2205             aio_context_acquire(aio_context);
2206             if (bs->job) {
2207                 block_job_cancel_sync(bs->job);
2208                 aio_context_release(aio_context);
2209                 break;
2210             }
2211             aio_context_release(aio_context);
2212         }
2213 
2214         /* All the remaining BlockDriverStates are referenced directly or
2215          * indirectly from block jobs, so there needs to be at least one BDS
2216          * directly used by a block job */
2217         assert(bs);
2218     }
2219 }
2220 
2221 /* Fields that need to stay with the top-level BDS */
2222 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2223                                      BlockDriverState *bs_src)
2224 {
2225     /* move some fields that need to stay attached to the device */
2226 }
2227 
2228 static void change_parent_backing_link(BlockDriverState *from,
2229                                        BlockDriverState *to)
2230 {
2231     BdrvChild *c, *next;
2232 
2233     if (from->blk) {
2234         /* FIXME We bypass blk_set_bs(), so we need to make these updates
2235          * manually. The root problem is not in this change function, but the
2236          * existence of BlockDriverState.blk. */
2237         to->blk = from->blk;
2238         from->blk = NULL;
2239     }
2240 
2241     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2242         assert(c->role != &child_backing);
2243         c->bs = to;
2244         QLIST_REMOVE(c, next_parent);
2245         QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2246         bdrv_ref(to);
2247         bdrv_unref(from);
2248     }
2249 }
2250 
2251 static void swap_feature_fields(BlockDriverState *bs_top,
2252                                 BlockDriverState *bs_new)
2253 {
2254     BlockDriverState tmp;
2255 
2256     bdrv_move_feature_fields(&tmp, bs_top);
2257     bdrv_move_feature_fields(bs_top, bs_new);
2258     bdrv_move_feature_fields(bs_new, &tmp);
2259 
2260     assert(!bs_new->throttle_state);
2261     if (bs_top->throttle_state) {
2262         /*
2263          * FIXME Need to break I/O throttling with graph manipulations
2264          * temporarily because of conflicting invariants (3. will go away when
2265          * throttling is fully converted to work on BlockBackends):
2266          *
2267          * 1. Every BlockBackend has a single root BDS
2268          * 2. I/O throttling functions require an attached BlockBackend
2269          * 3. We need to first enable throttling on the new BDS and then
2270          *    disable it on the old one (because of throttle group refcounts)
2271          */
2272 #if 0
2273         bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2274         bdrv_io_limits_disable(bs_top);
2275 #else
2276         abort();
2277 #endif
2278     }
2279 }
2280 
2281 /*
2282  * Add new bs contents at the top of an image chain while the chain is
2283  * live, while keeping required fields on the top layer.
2284  *
2285  * This will modify the BlockDriverState fields, and swap contents
2286  * between bs_new and bs_top. Both bs_new and bs_top are modified.
2287  *
2288  * bs_new must not be attached to a BlockBackend.
2289  *
2290  * This function does not create any image files.
2291  *
2292  * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2293  * that's what the callers commonly need. bs_new will be referenced by the old
2294  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2295  * reference of its own, it must call bdrv_ref().
2296  */
2297 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2298 {
2299     assert(!bdrv_requests_pending(bs_top));
2300     assert(!bdrv_requests_pending(bs_new));
2301 
2302     bdrv_ref(bs_top);
2303     change_parent_backing_link(bs_top, bs_new);
2304 
2305     /* Some fields always stay on top of the backing file chain */
2306     swap_feature_fields(bs_top, bs_new);
2307 
2308     bdrv_set_backing_hd(bs_new, bs_top);
2309     bdrv_unref(bs_top);
2310 
2311     /* bs_new is now referenced by its new parents, we don't need the
2312      * additional reference any more. */
2313     bdrv_unref(bs_new);
2314 }
2315 
2316 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2317 {
2318     assert(!bdrv_requests_pending(old));
2319     assert(!bdrv_requests_pending(new));
2320 
2321     bdrv_ref(old);
2322 
2323     if (old->blk) {
2324         /* As long as these fields aren't in BlockBackend, but in the top-level
2325          * BlockDriverState, it's not possible for a BDS to have two BBs.
2326          *
2327          * We really want to copy the fields from old to new, but we go for a
2328          * swap instead so that pointers aren't duplicated and cause trouble.
2329          * (Also, bdrv_swap() used to do the same.) */
2330         assert(!new->blk);
2331         swap_feature_fields(old, new);
2332     }
2333     change_parent_backing_link(old, new);
2334 
2335     /* Change backing files if a previously independent node is added to the
2336      * chain. For active commit, we replace top by its own (indirect) backing
2337      * file and don't do anything here so we don't build a loop. */
2338     if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2339         bdrv_set_backing_hd(new, backing_bs(old));
2340         bdrv_set_backing_hd(old, NULL);
2341     }
2342 
2343     bdrv_unref(old);
2344 }
2345 
2346 static void bdrv_delete(BlockDriverState *bs)
2347 {
2348     assert(!bs->job);
2349     assert(bdrv_op_blocker_is_empty(bs));
2350     assert(!bs->refcnt);
2351 
2352     bdrv_close(bs);
2353 
2354     /* remove from list, if necessary */
2355     if (bs->node_name[0] != '\0') {
2356         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2357     }
2358     QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
2359 
2360     g_free(bs);
2361 }
2362 
2363 /*
2364  * Run consistency checks on an image
2365  *
2366  * Returns 0 if the check could be completed (it doesn't mean that the image is
2367  * free of errors) or -errno when an internal error occurred. The results of the
2368  * check are stored in res.
2369  */
2370 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2371 {
2372     if (bs->drv == NULL) {
2373         return -ENOMEDIUM;
2374     }
2375     if (bs->drv->bdrv_check == NULL) {
2376         return -ENOTSUP;
2377     }
2378 
2379     memset(res, 0, sizeof(*res));
2380     return bs->drv->bdrv_check(bs, res, fix);
2381 }
2382 
2383 #define COMMIT_BUF_SECTORS 2048
2384 
2385 /* commit COW file into the raw image */
2386 int bdrv_commit(BlockDriverState *bs)
2387 {
2388     BlockDriver *drv = bs->drv;
2389     int64_t sector, total_sectors, length, backing_length;
2390     int n, ro, open_flags;
2391     int ret = 0;
2392     uint8_t *buf = NULL;
2393 
2394     if (!drv)
2395         return -ENOMEDIUM;
2396 
2397     if (!bs->backing) {
2398         return -ENOTSUP;
2399     }
2400 
2401     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2402         bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2403         return -EBUSY;
2404     }
2405 
2406     ro = bs->backing->bs->read_only;
2407     open_flags =  bs->backing->bs->open_flags;
2408 
2409     if (ro) {
2410         if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2411             return -EACCES;
2412         }
2413     }
2414 
2415     length = bdrv_getlength(bs);
2416     if (length < 0) {
2417         ret = length;
2418         goto ro_cleanup;
2419     }
2420 
2421     backing_length = bdrv_getlength(bs->backing->bs);
2422     if (backing_length < 0) {
2423         ret = backing_length;
2424         goto ro_cleanup;
2425     }
2426 
2427     /* If our top snapshot is larger than the backing file image,
2428      * grow the backing file image if possible.  If not possible,
2429      * we must return an error */
2430     if (length > backing_length) {
2431         ret = bdrv_truncate(bs->backing->bs, length);
2432         if (ret < 0) {
2433             goto ro_cleanup;
2434         }
2435     }
2436 
2437     total_sectors = length >> BDRV_SECTOR_BITS;
2438 
2439     /* qemu_try_blockalign() for bs will choose an alignment that works for
2440      * bs->backing->bs as well, so no need to compare the alignment manually. */
2441     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2442     if (buf == NULL) {
2443         ret = -ENOMEM;
2444         goto ro_cleanup;
2445     }
2446 
2447     for (sector = 0; sector < total_sectors; sector += n) {
2448         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2449         if (ret < 0) {
2450             goto ro_cleanup;
2451         }
2452         if (ret) {
2453             ret = bdrv_read(bs, sector, buf, n);
2454             if (ret < 0) {
2455                 goto ro_cleanup;
2456             }
2457 
2458             ret = bdrv_write(bs->backing->bs, sector, buf, n);
2459             if (ret < 0) {
2460                 goto ro_cleanup;
2461             }
2462         }
2463     }
2464 
2465     if (drv->bdrv_make_empty) {
2466         ret = drv->bdrv_make_empty(bs);
2467         if (ret < 0) {
2468             goto ro_cleanup;
2469         }
2470         bdrv_flush(bs);
2471     }
2472 
2473     /*
2474      * Make sure all data we wrote to the backing device is actually
2475      * stable on disk.
2476      */
2477     if (bs->backing) {
2478         bdrv_flush(bs->backing->bs);
2479     }
2480 
2481     ret = 0;
2482 ro_cleanup:
2483     qemu_vfree(buf);
2484 
2485     if (ro) {
2486         /* ignoring error return here */
2487         bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2488     }
2489 
2490     return ret;
2491 }
2492 
2493 /*
2494  * Return values:
2495  * 0        - success
2496  * -EINVAL  - backing format specified, but no file
2497  * -ENOSPC  - can't update the backing file because no space is left in the
2498  *            image file header
2499  * -ENOTSUP - format driver doesn't support changing the backing file
2500  */
2501 int bdrv_change_backing_file(BlockDriverState *bs,
2502     const char *backing_file, const char *backing_fmt)
2503 {
2504     BlockDriver *drv = bs->drv;
2505     int ret;
2506 
2507     /* Backing file format doesn't make sense without a backing file */
2508     if (backing_fmt && !backing_file) {
2509         return -EINVAL;
2510     }
2511 
2512     if (drv->bdrv_change_backing_file != NULL) {
2513         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2514     } else {
2515         ret = -ENOTSUP;
2516     }
2517 
2518     if (ret == 0) {
2519         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2520         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2521     }
2522     return ret;
2523 }
2524 
2525 /*
2526  * Finds the image layer in the chain that has 'bs' as its backing file.
2527  *
2528  * active is the current topmost image.
2529  *
2530  * Returns NULL if bs is not found in active's image chain,
2531  * or if active == bs.
2532  *
2533  * Returns the bottommost base image if bs == NULL.
2534  */
2535 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2536                                     BlockDriverState *bs)
2537 {
2538     while (active && bs != backing_bs(active)) {
2539         active = backing_bs(active);
2540     }
2541 
2542     return active;
2543 }
2544 
2545 /* Given a BDS, searches for the base layer. */
2546 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2547 {
2548     return bdrv_find_overlay(bs, NULL);
2549 }
2550 
2551 /*
2552  * Drops images above 'base' up to and including 'top', and sets the image
2553  * above 'top' to have base as its backing file.
2554  *
2555  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2556  * information in 'bs' can be properly updated.
2557  *
2558  * E.g., this will convert the following chain:
2559  * bottom <- base <- intermediate <- top <- active
2560  *
2561  * to
2562  *
2563  * bottom <- base <- active
2564  *
2565  * It is allowed for bottom==base, in which case it converts:
2566  *
2567  * base <- intermediate <- top <- active
2568  *
2569  * to
2570  *
2571  * base <- active
2572  *
2573  * If backing_file_str is non-NULL, it will be used when modifying top's
2574  * overlay image metadata.
2575  *
2576  * Error conditions:
2577  *  if active == top, that is considered an error
2578  *
2579  */
2580 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2581                            BlockDriverState *base, const char *backing_file_str)
2582 {
2583     BlockDriverState *new_top_bs = NULL;
2584     int ret = -EIO;
2585 
2586     if (!top->drv || !base->drv) {
2587         goto exit;
2588     }
2589 
2590     new_top_bs = bdrv_find_overlay(active, top);
2591 
2592     if (new_top_bs == NULL) {
2593         /* we could not find the image above 'top', this is an error */
2594         goto exit;
2595     }
2596 
2597     /* special case of new_top_bs->backing->bs already pointing to base - nothing
2598      * to do, no intermediate images */
2599     if (backing_bs(new_top_bs) == base) {
2600         ret = 0;
2601         goto exit;
2602     }
2603 
2604     /* Make sure that base is in the backing chain of top */
2605     if (!bdrv_chain_contains(top, base)) {
2606         goto exit;
2607     }
2608 
2609     /* success - we can delete the intermediate states, and link top->base */
2610     backing_file_str = backing_file_str ? backing_file_str : base->filename;
2611     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2612                                    base->drv ? base->drv->format_name : "");
2613     if (ret) {
2614         goto exit;
2615     }
2616     bdrv_set_backing_hd(new_top_bs, base);
2617 
2618     ret = 0;
2619 exit:
2620     return ret;
2621 }
2622 
2623 /**
2624  * Truncate file to 'offset' bytes (needed only for file protocols)
2625  */
2626 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2627 {
2628     BlockDriver *drv = bs->drv;
2629     int ret;
2630     if (!drv)
2631         return -ENOMEDIUM;
2632     if (!drv->bdrv_truncate)
2633         return -ENOTSUP;
2634     if (bs->read_only)
2635         return -EACCES;
2636 
2637     ret = drv->bdrv_truncate(bs, offset);
2638     if (ret == 0) {
2639         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2640         bdrv_dirty_bitmap_truncate(bs);
2641         if (bs->blk) {
2642             blk_dev_resize_cb(bs->blk);
2643         }
2644     }
2645     return ret;
2646 }
2647 
2648 /**
2649  * Length of a allocated file in bytes. Sparse files are counted by actual
2650  * allocated space. Return < 0 if error or unknown.
2651  */
2652 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2653 {
2654     BlockDriver *drv = bs->drv;
2655     if (!drv) {
2656         return -ENOMEDIUM;
2657     }
2658     if (drv->bdrv_get_allocated_file_size) {
2659         return drv->bdrv_get_allocated_file_size(bs);
2660     }
2661     if (bs->file) {
2662         return bdrv_get_allocated_file_size(bs->file->bs);
2663     }
2664     return -ENOTSUP;
2665 }
2666 
2667 /**
2668  * Return number of sectors on success, -errno on error.
2669  */
2670 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2671 {
2672     BlockDriver *drv = bs->drv;
2673 
2674     if (!drv)
2675         return -ENOMEDIUM;
2676 
2677     if (drv->has_variable_length) {
2678         int ret = refresh_total_sectors(bs, bs->total_sectors);
2679         if (ret < 0) {
2680             return ret;
2681         }
2682     }
2683     return bs->total_sectors;
2684 }
2685 
2686 /**
2687  * Return length in bytes on success, -errno on error.
2688  * The length is always a multiple of BDRV_SECTOR_SIZE.
2689  */
2690 int64_t bdrv_getlength(BlockDriverState *bs)
2691 {
2692     int64_t ret = bdrv_nb_sectors(bs);
2693 
2694     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2695     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2696 }
2697 
2698 /* return 0 as number of sectors if no device present or error */
2699 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2700 {
2701     int64_t nb_sectors = bdrv_nb_sectors(bs);
2702 
2703     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2704 }
2705 
2706 int bdrv_is_read_only(BlockDriverState *bs)
2707 {
2708     return bs->read_only;
2709 }
2710 
2711 int bdrv_is_sg(BlockDriverState *bs)
2712 {
2713     return bs->sg;
2714 }
2715 
2716 int bdrv_is_encrypted(BlockDriverState *bs)
2717 {
2718     if (bs->backing && bs->backing->bs->encrypted) {
2719         return 1;
2720     }
2721     return bs->encrypted;
2722 }
2723 
2724 int bdrv_key_required(BlockDriverState *bs)
2725 {
2726     BdrvChild *backing = bs->backing;
2727 
2728     if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2729         return 1;
2730     }
2731     return (bs->encrypted && !bs->valid_key);
2732 }
2733 
2734 int bdrv_set_key(BlockDriverState *bs, const char *key)
2735 {
2736     int ret;
2737     if (bs->backing && bs->backing->bs->encrypted) {
2738         ret = bdrv_set_key(bs->backing->bs, key);
2739         if (ret < 0)
2740             return ret;
2741         if (!bs->encrypted)
2742             return 0;
2743     }
2744     if (!bs->encrypted) {
2745         return -EINVAL;
2746     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2747         return -ENOMEDIUM;
2748     }
2749     ret = bs->drv->bdrv_set_key(bs, key);
2750     if (ret < 0) {
2751         bs->valid_key = 0;
2752     } else if (!bs->valid_key) {
2753         bs->valid_key = 1;
2754         if (bs->blk) {
2755             /* call the change callback now, we skipped it on open */
2756             blk_dev_change_media_cb(bs->blk, true);
2757         }
2758     }
2759     return ret;
2760 }
2761 
2762 /*
2763  * Provide an encryption key for @bs.
2764  * If @key is non-null:
2765  *     If @bs is not encrypted, fail.
2766  *     Else if the key is invalid, fail.
2767  *     Else set @bs's key to @key, replacing the existing key, if any.
2768  * If @key is null:
2769  *     If @bs is encrypted and still lacks a key, fail.
2770  *     Else do nothing.
2771  * On failure, store an error object through @errp if non-null.
2772  */
2773 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2774 {
2775     if (key) {
2776         if (!bdrv_is_encrypted(bs)) {
2777             error_setg(errp, "Node '%s' is not encrypted",
2778                       bdrv_get_device_or_node_name(bs));
2779         } else if (bdrv_set_key(bs, key) < 0) {
2780             error_setg(errp, QERR_INVALID_PASSWORD);
2781         }
2782     } else {
2783         if (bdrv_key_required(bs)) {
2784             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2785                       "'%s' (%s) is encrypted",
2786                       bdrv_get_device_or_node_name(bs),
2787                       bdrv_get_encrypted_filename(bs));
2788         }
2789     }
2790 }
2791 
2792 const char *bdrv_get_format_name(BlockDriverState *bs)
2793 {
2794     return bs->drv ? bs->drv->format_name : NULL;
2795 }
2796 
2797 static int qsort_strcmp(const void *a, const void *b)
2798 {
2799     return strcmp(a, b);
2800 }
2801 
2802 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2803                          void *opaque)
2804 {
2805     BlockDriver *drv;
2806     int count = 0;
2807     int i;
2808     const char **formats = NULL;
2809 
2810     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2811         if (drv->format_name) {
2812             bool found = false;
2813             int i = count;
2814             while (formats && i && !found) {
2815                 found = !strcmp(formats[--i], drv->format_name);
2816             }
2817 
2818             if (!found) {
2819                 formats = g_renew(const char *, formats, count + 1);
2820                 formats[count++] = drv->format_name;
2821             }
2822         }
2823     }
2824 
2825     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2826 
2827     for (i = 0; i < count; i++) {
2828         it(opaque, formats[i]);
2829     }
2830 
2831     g_free(formats);
2832 }
2833 
2834 /* This function is to find a node in the bs graph */
2835 BlockDriverState *bdrv_find_node(const char *node_name)
2836 {
2837     BlockDriverState *bs;
2838 
2839     assert(node_name);
2840 
2841     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2842         if (!strcmp(node_name, bs->node_name)) {
2843             return bs;
2844         }
2845     }
2846     return NULL;
2847 }
2848 
2849 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2850 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2851 {
2852     BlockDeviceInfoList *list, *entry;
2853     BlockDriverState *bs;
2854 
2855     list = NULL;
2856     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2857         BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, errp);
2858         if (!info) {
2859             qapi_free_BlockDeviceInfoList(list);
2860             return NULL;
2861         }
2862         entry = g_malloc0(sizeof(*entry));
2863         entry->value = info;
2864         entry->next = list;
2865         list = entry;
2866     }
2867 
2868     return list;
2869 }
2870 
2871 BlockDriverState *bdrv_lookup_bs(const char *device,
2872                                  const char *node_name,
2873                                  Error **errp)
2874 {
2875     BlockBackend *blk;
2876     BlockDriverState *bs;
2877 
2878     if (device) {
2879         blk = blk_by_name(device);
2880 
2881         if (blk) {
2882             bs = blk_bs(blk);
2883             if (!bs) {
2884                 error_setg(errp, "Device '%s' has no medium", device);
2885             }
2886 
2887             return bs;
2888         }
2889     }
2890 
2891     if (node_name) {
2892         bs = bdrv_find_node(node_name);
2893 
2894         if (bs) {
2895             return bs;
2896         }
2897     }
2898 
2899     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2900                      device ? device : "",
2901                      node_name ? node_name : "");
2902     return NULL;
2903 }
2904 
2905 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2906  * return false.  If either argument is NULL, return false. */
2907 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2908 {
2909     while (top && top != base) {
2910         top = backing_bs(top);
2911     }
2912 
2913     return top != NULL;
2914 }
2915 
2916 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2917 {
2918     if (!bs) {
2919         return QTAILQ_FIRST(&graph_bdrv_states);
2920     }
2921     return QTAILQ_NEXT(bs, node_list);
2922 }
2923 
2924 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
2925  * the monitor or attached to a BlockBackend */
2926 BlockDriverState *bdrv_next(BlockDriverState *bs)
2927 {
2928     if (!bs || bs->blk) {
2929         bs = blk_next_root_bs(bs);
2930         if (bs) {
2931             return bs;
2932         }
2933     }
2934 
2935     /* Ignore all BDSs that are attached to a BlockBackend here; they have been
2936      * handled by the above block already */
2937     do {
2938         bs = bdrv_next_monitor_owned(bs);
2939     } while (bs && bs->blk);
2940     return bs;
2941 }
2942 
2943 const char *bdrv_get_node_name(const BlockDriverState *bs)
2944 {
2945     return bs->node_name;
2946 }
2947 
2948 /* TODO check what callers really want: bs->node_name or blk_name() */
2949 const char *bdrv_get_device_name(const BlockDriverState *bs)
2950 {
2951     return bs->blk ? blk_name(bs->blk) : "";
2952 }
2953 
2954 /* This can be used to identify nodes that might not have a device
2955  * name associated. Since node and device names live in the same
2956  * namespace, the result is unambiguous. The exception is if both are
2957  * absent, then this returns an empty (non-null) string. */
2958 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2959 {
2960     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2961 }
2962 
2963 int bdrv_get_flags(BlockDriverState *bs)
2964 {
2965     return bs->open_flags;
2966 }
2967 
2968 int bdrv_has_zero_init_1(BlockDriverState *bs)
2969 {
2970     return 1;
2971 }
2972 
2973 int bdrv_has_zero_init(BlockDriverState *bs)
2974 {
2975     assert(bs->drv);
2976 
2977     /* If BS is a copy on write image, it is initialized to
2978        the contents of the base image, which may not be zeroes.  */
2979     if (bs->backing) {
2980         return 0;
2981     }
2982     if (bs->drv->bdrv_has_zero_init) {
2983         return bs->drv->bdrv_has_zero_init(bs);
2984     }
2985 
2986     /* safe default */
2987     return 0;
2988 }
2989 
2990 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2991 {
2992     BlockDriverInfo bdi;
2993 
2994     if (bs->backing) {
2995         return false;
2996     }
2997 
2998     if (bdrv_get_info(bs, &bdi) == 0) {
2999         return bdi.unallocated_blocks_are_zero;
3000     }
3001 
3002     return false;
3003 }
3004 
3005 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3006 {
3007     BlockDriverInfo bdi;
3008 
3009     if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
3010         return false;
3011     }
3012 
3013     if (bdrv_get_info(bs, &bdi) == 0) {
3014         return bdi.can_write_zeroes_with_unmap;
3015     }
3016 
3017     return false;
3018 }
3019 
3020 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3021 {
3022     if (bs->backing && bs->backing->bs->encrypted)
3023         return bs->backing_file;
3024     else if (bs->encrypted)
3025         return bs->filename;
3026     else
3027         return NULL;
3028 }
3029 
3030 void bdrv_get_backing_filename(BlockDriverState *bs,
3031                                char *filename, int filename_size)
3032 {
3033     pstrcpy(filename, filename_size, bs->backing_file);
3034 }
3035 
3036 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3037 {
3038     BlockDriver *drv = bs->drv;
3039     if (!drv)
3040         return -ENOMEDIUM;
3041     if (!drv->bdrv_get_info)
3042         return -ENOTSUP;
3043     memset(bdi, 0, sizeof(*bdi));
3044     return drv->bdrv_get_info(bs, bdi);
3045 }
3046 
3047 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3048 {
3049     BlockDriver *drv = bs->drv;
3050     if (drv && drv->bdrv_get_specific_info) {
3051         return drv->bdrv_get_specific_info(bs);
3052     }
3053     return NULL;
3054 }
3055 
3056 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
3057 {
3058     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3059         return;
3060     }
3061 
3062     bs->drv->bdrv_debug_event(bs, event);
3063 }
3064 
3065 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3066                           const char *tag)
3067 {
3068     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3069         bs = bs->file ? bs->file->bs : NULL;
3070     }
3071 
3072     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3073         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3074     }
3075 
3076     return -ENOTSUP;
3077 }
3078 
3079 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3080 {
3081     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3082         bs = bs->file ? bs->file->bs : NULL;
3083     }
3084 
3085     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3086         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3087     }
3088 
3089     return -ENOTSUP;
3090 }
3091 
3092 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3093 {
3094     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3095         bs = bs->file ? bs->file->bs : NULL;
3096     }
3097 
3098     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3099         return bs->drv->bdrv_debug_resume(bs, tag);
3100     }
3101 
3102     return -ENOTSUP;
3103 }
3104 
3105 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3106 {
3107     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3108         bs = bs->file ? bs->file->bs : NULL;
3109     }
3110 
3111     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3112         return bs->drv->bdrv_debug_is_suspended(bs, tag);
3113     }
3114 
3115     return false;
3116 }
3117 
3118 int bdrv_is_snapshot(BlockDriverState *bs)
3119 {
3120     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3121 }
3122 
3123 /* backing_file can either be relative, or absolute, or a protocol.  If it is
3124  * relative, it must be relative to the chain.  So, passing in bs->filename
3125  * from a BDS as backing_file should not be done, as that may be relative to
3126  * the CWD rather than the chain. */
3127 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3128         const char *backing_file)
3129 {
3130     char *filename_full = NULL;
3131     char *backing_file_full = NULL;
3132     char *filename_tmp = NULL;
3133     int is_protocol = 0;
3134     BlockDriverState *curr_bs = NULL;
3135     BlockDriverState *retval = NULL;
3136 
3137     if (!bs || !bs->drv || !backing_file) {
3138         return NULL;
3139     }
3140 
3141     filename_full     = g_malloc(PATH_MAX);
3142     backing_file_full = g_malloc(PATH_MAX);
3143     filename_tmp      = g_malloc(PATH_MAX);
3144 
3145     is_protocol = path_has_protocol(backing_file);
3146 
3147     for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3148 
3149         /* If either of the filename paths is actually a protocol, then
3150          * compare unmodified paths; otherwise make paths relative */
3151         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3152             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3153                 retval = curr_bs->backing->bs;
3154                 break;
3155             }
3156         } else {
3157             /* If not an absolute filename path, make it relative to the current
3158              * image's filename path */
3159             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3160                          backing_file);
3161 
3162             /* We are going to compare absolute pathnames */
3163             if (!realpath(filename_tmp, filename_full)) {
3164                 continue;
3165             }
3166 
3167             /* We need to make sure the backing filename we are comparing against
3168              * is relative to the current image filename (or absolute) */
3169             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3170                          curr_bs->backing_file);
3171 
3172             if (!realpath(filename_tmp, backing_file_full)) {
3173                 continue;
3174             }
3175 
3176             if (strcmp(backing_file_full, filename_full) == 0) {
3177                 retval = curr_bs->backing->bs;
3178                 break;
3179             }
3180         }
3181     }
3182 
3183     g_free(filename_full);
3184     g_free(backing_file_full);
3185     g_free(filename_tmp);
3186     return retval;
3187 }
3188 
3189 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3190 {
3191     if (!bs->drv) {
3192         return 0;
3193     }
3194 
3195     if (!bs->backing) {
3196         return 0;
3197     }
3198 
3199     return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3200 }
3201 
3202 void bdrv_init(void)
3203 {
3204     module_call_init(MODULE_INIT_BLOCK);
3205 }
3206 
3207 void bdrv_init_with_whitelist(void)
3208 {
3209     use_bdrv_whitelist = 1;
3210     bdrv_init();
3211 }
3212 
3213 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3214 {
3215     BdrvChild *child;
3216     Error *local_err = NULL;
3217     int ret;
3218 
3219     if (!bs->drv)  {
3220         return;
3221     }
3222 
3223     if (!(bs->open_flags & BDRV_O_INACTIVE)) {
3224         return;
3225     }
3226     bs->open_flags &= ~BDRV_O_INACTIVE;
3227 
3228     if (bs->drv->bdrv_invalidate_cache) {
3229         bs->drv->bdrv_invalidate_cache(bs, &local_err);
3230         if (local_err) {
3231             bs->open_flags |= BDRV_O_INACTIVE;
3232             error_propagate(errp, local_err);
3233             return;
3234         }
3235     }
3236 
3237     QLIST_FOREACH(child, &bs->children, next) {
3238         bdrv_invalidate_cache(child->bs, &local_err);
3239         if (local_err) {
3240             bs->open_flags |= BDRV_O_INACTIVE;
3241             error_propagate(errp, local_err);
3242             return;
3243         }
3244     }
3245 
3246     ret = refresh_total_sectors(bs, bs->total_sectors);
3247     if (ret < 0) {
3248         bs->open_flags |= BDRV_O_INACTIVE;
3249         error_setg_errno(errp, -ret, "Could not refresh total sector count");
3250         return;
3251     }
3252 }
3253 
3254 void bdrv_invalidate_cache_all(Error **errp)
3255 {
3256     BlockDriverState *bs = NULL;
3257     Error *local_err = NULL;
3258 
3259     while ((bs = bdrv_next(bs)) != NULL) {
3260         AioContext *aio_context = bdrv_get_aio_context(bs);
3261 
3262         aio_context_acquire(aio_context);
3263         bdrv_invalidate_cache(bs, &local_err);
3264         aio_context_release(aio_context);
3265         if (local_err) {
3266             error_propagate(errp, local_err);
3267             return;
3268         }
3269     }
3270 }
3271 
3272 static int bdrv_inactivate_recurse(BlockDriverState *bs,
3273                                    bool setting_flag)
3274 {
3275     BdrvChild *child;
3276     int ret;
3277 
3278     if (!setting_flag && bs->drv->bdrv_inactivate) {
3279         ret = bs->drv->bdrv_inactivate(bs);
3280         if (ret < 0) {
3281             return ret;
3282         }
3283     }
3284 
3285     QLIST_FOREACH(child, &bs->children, next) {
3286         ret = bdrv_inactivate_recurse(child->bs, setting_flag);
3287         if (ret < 0) {
3288             return ret;
3289         }
3290     }
3291 
3292     if (setting_flag) {
3293         bs->open_flags |= BDRV_O_INACTIVE;
3294     }
3295     return 0;
3296 }
3297 
3298 int bdrv_inactivate_all(void)
3299 {
3300     BlockDriverState *bs = NULL;
3301     int ret = 0;
3302     int pass;
3303 
3304     while ((bs = bdrv_next(bs)) != NULL) {
3305         aio_context_acquire(bdrv_get_aio_context(bs));
3306     }
3307 
3308     /* We do two passes of inactivation. The first pass calls to drivers'
3309      * .bdrv_inactivate callbacks recursively so all cache is flushed to disk;
3310      * the second pass sets the BDRV_O_INACTIVE flag so that no further write
3311      * is allowed. */
3312     for (pass = 0; pass < 2; pass++) {
3313         bs = NULL;
3314         while ((bs = bdrv_next(bs)) != NULL) {
3315             ret = bdrv_inactivate_recurse(bs, pass);
3316             if (ret < 0) {
3317                 goto out;
3318             }
3319         }
3320     }
3321 
3322 out:
3323     bs = NULL;
3324     while ((bs = bdrv_next(bs)) != NULL) {
3325         aio_context_release(bdrv_get_aio_context(bs));
3326     }
3327 
3328     return ret;
3329 }
3330 
3331 /**************************************************************/
3332 /* removable device support */
3333 
3334 /**
3335  * Return TRUE if the media is present
3336  */
3337 bool bdrv_is_inserted(BlockDriverState *bs)
3338 {
3339     BlockDriver *drv = bs->drv;
3340     BdrvChild *child;
3341 
3342     if (!drv) {
3343         return false;
3344     }
3345     if (drv->bdrv_is_inserted) {
3346         return drv->bdrv_is_inserted(bs);
3347     }
3348     QLIST_FOREACH(child, &bs->children, next) {
3349         if (!bdrv_is_inserted(child->bs)) {
3350             return false;
3351         }
3352     }
3353     return true;
3354 }
3355 
3356 /**
3357  * Return whether the media changed since the last call to this
3358  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3359  */
3360 int bdrv_media_changed(BlockDriverState *bs)
3361 {
3362     BlockDriver *drv = bs->drv;
3363 
3364     if (drv && drv->bdrv_media_changed) {
3365         return drv->bdrv_media_changed(bs);
3366     }
3367     return -ENOTSUP;
3368 }
3369 
3370 /**
3371  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3372  */
3373 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3374 {
3375     BlockDriver *drv = bs->drv;
3376     const char *device_name;
3377 
3378     if (drv && drv->bdrv_eject) {
3379         drv->bdrv_eject(bs, eject_flag);
3380     }
3381 
3382     device_name = bdrv_get_device_name(bs);
3383     if (device_name[0] != '\0') {
3384         qapi_event_send_device_tray_moved(device_name,
3385                                           eject_flag, &error_abort);
3386     }
3387 }
3388 
3389 /**
3390  * Lock or unlock the media (if it is locked, the user won't be able
3391  * to eject it manually).
3392  */
3393 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3394 {
3395     BlockDriver *drv = bs->drv;
3396 
3397     trace_bdrv_lock_medium(bs, locked);
3398 
3399     if (drv && drv->bdrv_lock_medium) {
3400         drv->bdrv_lock_medium(bs, locked);
3401     }
3402 }
3403 
3404 /* Get a reference to bs */
3405 void bdrv_ref(BlockDriverState *bs)
3406 {
3407     bs->refcnt++;
3408 }
3409 
3410 /* Release a previously grabbed reference to bs.
3411  * If after releasing, reference count is zero, the BlockDriverState is
3412  * deleted. */
3413 void bdrv_unref(BlockDriverState *bs)
3414 {
3415     if (!bs) {
3416         return;
3417     }
3418     assert(bs->refcnt > 0);
3419     if (--bs->refcnt == 0) {
3420         bdrv_delete(bs);
3421     }
3422 }
3423 
3424 struct BdrvOpBlocker {
3425     Error *reason;
3426     QLIST_ENTRY(BdrvOpBlocker) list;
3427 };
3428 
3429 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3430 {
3431     BdrvOpBlocker *blocker;
3432     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3433     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3434         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3435         if (errp) {
3436             *errp = error_copy(blocker->reason);
3437             error_prepend(errp, "Node '%s' is busy: ",
3438                           bdrv_get_device_or_node_name(bs));
3439         }
3440         return true;
3441     }
3442     return false;
3443 }
3444 
3445 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3446 {
3447     BdrvOpBlocker *blocker;
3448     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3449 
3450     blocker = g_new0(BdrvOpBlocker, 1);
3451     blocker->reason = reason;
3452     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3453 }
3454 
3455 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3456 {
3457     BdrvOpBlocker *blocker, *next;
3458     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3459     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3460         if (blocker->reason == reason) {
3461             QLIST_REMOVE(blocker, list);
3462             g_free(blocker);
3463         }
3464     }
3465 }
3466 
3467 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3468 {
3469     int i;
3470     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3471         bdrv_op_block(bs, i, reason);
3472     }
3473 }
3474 
3475 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3476 {
3477     int i;
3478     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3479         bdrv_op_unblock(bs, i, reason);
3480     }
3481 }
3482 
3483 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3484 {
3485     int i;
3486 
3487     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3488         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3489             return false;
3490         }
3491     }
3492     return true;
3493 }
3494 
3495 void bdrv_img_create(const char *filename, const char *fmt,
3496                      const char *base_filename, const char *base_fmt,
3497                      char *options, uint64_t img_size, int flags,
3498                      Error **errp, bool quiet)
3499 {
3500     QemuOptsList *create_opts = NULL;
3501     QemuOpts *opts = NULL;
3502     const char *backing_fmt, *backing_file;
3503     int64_t size;
3504     BlockDriver *drv, *proto_drv;
3505     Error *local_err = NULL;
3506     int ret = 0;
3507 
3508     /* Find driver and parse its options */
3509     drv = bdrv_find_format(fmt);
3510     if (!drv) {
3511         error_setg(errp, "Unknown file format '%s'", fmt);
3512         return;
3513     }
3514 
3515     proto_drv = bdrv_find_protocol(filename, true, errp);
3516     if (!proto_drv) {
3517         return;
3518     }
3519 
3520     if (!drv->create_opts) {
3521         error_setg(errp, "Format driver '%s' does not support image creation",
3522                    drv->format_name);
3523         return;
3524     }
3525 
3526     if (!proto_drv->create_opts) {
3527         error_setg(errp, "Protocol driver '%s' does not support image creation",
3528                    proto_drv->format_name);
3529         return;
3530     }
3531 
3532     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3533     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3534 
3535     /* Create parameter list with default values */
3536     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3537     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3538 
3539     /* Parse -o options */
3540     if (options) {
3541         qemu_opts_do_parse(opts, options, NULL, &local_err);
3542         if (local_err) {
3543             error_report_err(local_err);
3544             local_err = NULL;
3545             error_setg(errp, "Invalid options for file format '%s'", fmt);
3546             goto out;
3547         }
3548     }
3549 
3550     if (base_filename) {
3551         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3552         if (local_err) {
3553             error_setg(errp, "Backing file not supported for file format '%s'",
3554                        fmt);
3555             goto out;
3556         }
3557     }
3558 
3559     if (base_fmt) {
3560         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3561         if (local_err) {
3562             error_setg(errp, "Backing file format not supported for file "
3563                              "format '%s'", fmt);
3564             goto out;
3565         }
3566     }
3567 
3568     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3569     if (backing_file) {
3570         if (!strcmp(filename, backing_file)) {
3571             error_setg(errp, "Error: Trying to create an image with the "
3572                              "same filename as the backing file");
3573             goto out;
3574         }
3575     }
3576 
3577     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3578 
3579     // The size for the image must always be specified, with one exception:
3580     // If we are using a backing file, we can obtain the size from there
3581     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3582     if (size == -1) {
3583         if (backing_file) {
3584             BlockDriverState *bs;
3585             char *full_backing = g_new0(char, PATH_MAX);
3586             int64_t size;
3587             int back_flags;
3588             QDict *backing_options = NULL;
3589 
3590             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3591                                                          full_backing, PATH_MAX,
3592                                                          &local_err);
3593             if (local_err) {
3594                 g_free(full_backing);
3595                 goto out;
3596             }
3597 
3598             /* backing files always opened read-only */
3599             back_flags = flags;
3600             back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3601 
3602             if (backing_fmt) {
3603                 backing_options = qdict_new();
3604                 qdict_put(backing_options, "driver",
3605                           qstring_from_str(backing_fmt));
3606             }
3607 
3608             bs = NULL;
3609             ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3610                             back_flags, &local_err);
3611             g_free(full_backing);
3612             if (ret < 0) {
3613                 goto out;
3614             }
3615             size = bdrv_getlength(bs);
3616             if (size < 0) {
3617                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3618                                  backing_file);
3619                 bdrv_unref(bs);
3620                 goto out;
3621             }
3622 
3623             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3624 
3625             bdrv_unref(bs);
3626         } else {
3627             error_setg(errp, "Image creation needs a size parameter");
3628             goto out;
3629         }
3630     }
3631 
3632     if (!quiet) {
3633         printf("Formatting '%s', fmt=%s ", filename, fmt);
3634         qemu_opts_print(opts, " ");
3635         puts("");
3636     }
3637 
3638     ret = bdrv_create(drv, filename, opts, &local_err);
3639 
3640     if (ret == -EFBIG) {
3641         /* This is generally a better message than whatever the driver would
3642          * deliver (especially because of the cluster_size_hint), since that
3643          * is most probably not much different from "image too large". */
3644         const char *cluster_size_hint = "";
3645         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3646             cluster_size_hint = " (try using a larger cluster size)";
3647         }
3648         error_setg(errp, "The image size is too large for file format '%s'"
3649                    "%s", fmt, cluster_size_hint);
3650         error_free(local_err);
3651         local_err = NULL;
3652     }
3653 
3654 out:
3655     qemu_opts_del(opts);
3656     qemu_opts_free(create_opts);
3657     if (local_err) {
3658         error_propagate(errp, local_err);
3659     }
3660 }
3661 
3662 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3663 {
3664     return bs->aio_context;
3665 }
3666 
3667 void bdrv_detach_aio_context(BlockDriverState *bs)
3668 {
3669     BdrvAioNotifier *baf;
3670 
3671     if (!bs->drv) {
3672         return;
3673     }
3674 
3675     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3676         baf->detach_aio_context(baf->opaque);
3677     }
3678 
3679     if (bs->throttle_state) {
3680         throttle_timers_detach_aio_context(&bs->throttle_timers);
3681     }
3682     if (bs->drv->bdrv_detach_aio_context) {
3683         bs->drv->bdrv_detach_aio_context(bs);
3684     }
3685     if (bs->file) {
3686         bdrv_detach_aio_context(bs->file->bs);
3687     }
3688     if (bs->backing) {
3689         bdrv_detach_aio_context(bs->backing->bs);
3690     }
3691 
3692     bs->aio_context = NULL;
3693 }
3694 
3695 void bdrv_attach_aio_context(BlockDriverState *bs,
3696                              AioContext *new_context)
3697 {
3698     BdrvAioNotifier *ban;
3699 
3700     if (!bs->drv) {
3701         return;
3702     }
3703 
3704     bs->aio_context = new_context;
3705 
3706     if (bs->backing) {
3707         bdrv_attach_aio_context(bs->backing->bs, new_context);
3708     }
3709     if (bs->file) {
3710         bdrv_attach_aio_context(bs->file->bs, new_context);
3711     }
3712     if (bs->drv->bdrv_attach_aio_context) {
3713         bs->drv->bdrv_attach_aio_context(bs, new_context);
3714     }
3715     if (bs->throttle_state) {
3716         throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3717     }
3718 
3719     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3720         ban->attached_aio_context(new_context, ban->opaque);
3721     }
3722 }
3723 
3724 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3725 {
3726     bdrv_drain(bs); /* ensure there are no in-flight requests */
3727 
3728     bdrv_detach_aio_context(bs);
3729 
3730     /* This function executes in the old AioContext so acquire the new one in
3731      * case it runs in a different thread.
3732      */
3733     aio_context_acquire(new_context);
3734     bdrv_attach_aio_context(bs, new_context);
3735     aio_context_release(new_context);
3736 }
3737 
3738 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3739         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3740         void (*detach_aio_context)(void *opaque), void *opaque)
3741 {
3742     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3743     *ban = (BdrvAioNotifier){
3744         .attached_aio_context = attached_aio_context,
3745         .detach_aio_context   = detach_aio_context,
3746         .opaque               = opaque
3747     };
3748 
3749     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3750 }
3751 
3752 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3753                                       void (*attached_aio_context)(AioContext *,
3754                                                                    void *),
3755                                       void (*detach_aio_context)(void *),
3756                                       void *opaque)
3757 {
3758     BdrvAioNotifier *ban, *ban_next;
3759 
3760     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3761         if (ban->attached_aio_context == attached_aio_context &&
3762             ban->detach_aio_context   == detach_aio_context   &&
3763             ban->opaque               == opaque)
3764         {
3765             QLIST_REMOVE(ban, list);
3766             g_free(ban);
3767 
3768             return;
3769         }
3770     }
3771 
3772     abort();
3773 }
3774 
3775 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3776                        BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
3777 {
3778     if (!bs->drv->bdrv_amend_options) {
3779         return -ENOTSUP;
3780     }
3781     return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
3782 }
3783 
3784 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3785  * of block filter and by bdrv_is_first_non_filter.
3786  * It is used to test if the given bs is the candidate or recurse more in the
3787  * node graph.
3788  */
3789 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3790                                       BlockDriverState *candidate)
3791 {
3792     /* return false if basic checks fails */
3793     if (!bs || !bs->drv) {
3794         return false;
3795     }
3796 
3797     /* the code reached a non block filter driver -> check if the bs is
3798      * the same as the candidate. It's the recursion termination condition.
3799      */
3800     if (!bs->drv->is_filter) {
3801         return bs == candidate;
3802     }
3803     /* Down this path the driver is a block filter driver */
3804 
3805     /* If the block filter recursion method is defined use it to recurse down
3806      * the node graph.
3807      */
3808     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3809         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3810     }
3811 
3812     /* the driver is a block filter but don't allow to recurse -> return false
3813      */
3814     return false;
3815 }
3816 
3817 /* This function checks if the candidate is the first non filter bs down it's
3818  * bs chain. Since we don't have pointers to parents it explore all bs chains
3819  * from the top. Some filters can choose not to pass down the recursion.
3820  */
3821 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3822 {
3823     BlockDriverState *bs = NULL;
3824 
3825     /* walk down the bs forest recursively */
3826     while ((bs = bdrv_next(bs)) != NULL) {
3827         bool perm;
3828 
3829         /* try to recurse in this top level bs */
3830         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3831 
3832         /* candidate is the first non filter */
3833         if (perm) {
3834             return true;
3835         }
3836     }
3837 
3838     return false;
3839 }
3840 
3841 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3842                                         const char *node_name, Error **errp)
3843 {
3844     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3845     AioContext *aio_context;
3846 
3847     if (!to_replace_bs) {
3848         error_setg(errp, "Node name '%s' not found", node_name);
3849         return NULL;
3850     }
3851 
3852     aio_context = bdrv_get_aio_context(to_replace_bs);
3853     aio_context_acquire(aio_context);
3854 
3855     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3856         to_replace_bs = NULL;
3857         goto out;
3858     }
3859 
3860     /* We don't want arbitrary node of the BDS chain to be replaced only the top
3861      * most non filter in order to prevent data corruption.
3862      * Another benefit is that this tests exclude backing files which are
3863      * blocked by the backing blockers.
3864      */
3865     if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
3866         error_setg(errp, "Only top most non filter can be replaced");
3867         to_replace_bs = NULL;
3868         goto out;
3869     }
3870 
3871 out:
3872     aio_context_release(aio_context);
3873     return to_replace_bs;
3874 }
3875 
3876 static bool append_open_options(QDict *d, BlockDriverState *bs)
3877 {
3878     const QDictEntry *entry;
3879     QemuOptDesc *desc;
3880     BdrvChild *child;
3881     bool found_any = false;
3882     const char *p;
3883 
3884     for (entry = qdict_first(bs->options); entry;
3885          entry = qdict_next(bs->options, entry))
3886     {
3887         /* Exclude options for children */
3888         QLIST_FOREACH(child, &bs->children, next) {
3889             if (strstart(qdict_entry_key(entry), child->name, &p)
3890                 && (!*p || *p == '.'))
3891             {
3892                 break;
3893             }
3894         }
3895         if (child) {
3896             continue;
3897         }
3898 
3899         /* And exclude all non-driver-specific options */
3900         for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
3901             if (!strcmp(qdict_entry_key(entry), desc->name)) {
3902                 break;
3903             }
3904         }
3905         if (desc->name) {
3906             continue;
3907         }
3908 
3909         qobject_incref(qdict_entry_value(entry));
3910         qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3911         found_any = true;
3912     }
3913 
3914     return found_any;
3915 }
3916 
3917 /* Updates the following BDS fields:
3918  *  - exact_filename: A filename which may be used for opening a block device
3919  *                    which (mostly) equals the given BDS (even without any
3920  *                    other options; so reading and writing must return the same
3921  *                    results, but caching etc. may be different)
3922  *  - full_open_options: Options which, when given when opening a block device
3923  *                       (without a filename), result in a BDS (mostly)
3924  *                       equalling the given one
3925  *  - filename: If exact_filename is set, it is copied here. Otherwise,
3926  *              full_open_options is converted to a JSON object, prefixed with
3927  *              "json:" (for use through the JSON pseudo protocol) and put here.
3928  */
3929 void bdrv_refresh_filename(BlockDriverState *bs)
3930 {
3931     BlockDriver *drv = bs->drv;
3932     QDict *opts;
3933 
3934     if (!drv) {
3935         return;
3936     }
3937 
3938     /* This BDS's file name will most probably depend on its file's name, so
3939      * refresh that first */
3940     if (bs->file) {
3941         bdrv_refresh_filename(bs->file->bs);
3942     }
3943 
3944     if (drv->bdrv_refresh_filename) {
3945         /* Obsolete information is of no use here, so drop the old file name
3946          * information before refreshing it */
3947         bs->exact_filename[0] = '\0';
3948         if (bs->full_open_options) {
3949             QDECREF(bs->full_open_options);
3950             bs->full_open_options = NULL;
3951         }
3952 
3953         opts = qdict_new();
3954         append_open_options(opts, bs);
3955         drv->bdrv_refresh_filename(bs, opts);
3956         QDECREF(opts);
3957     } else if (bs->file) {
3958         /* Try to reconstruct valid information from the underlying file */
3959         bool has_open_options;
3960 
3961         bs->exact_filename[0] = '\0';
3962         if (bs->full_open_options) {
3963             QDECREF(bs->full_open_options);
3964             bs->full_open_options = NULL;
3965         }
3966 
3967         opts = qdict_new();
3968         has_open_options = append_open_options(opts, bs);
3969 
3970         /* If no specific options have been given for this BDS, the filename of
3971          * the underlying file should suffice for this one as well */
3972         if (bs->file->bs->exact_filename[0] && !has_open_options) {
3973             strcpy(bs->exact_filename, bs->file->bs->exact_filename);
3974         }
3975         /* Reconstructing the full options QDict is simple for most format block
3976          * drivers, as long as the full options are known for the underlying
3977          * file BDS. The full options QDict of that file BDS should somehow
3978          * contain a representation of the filename, therefore the following
3979          * suffices without querying the (exact_)filename of this BDS. */
3980         if (bs->file->bs->full_open_options) {
3981             qdict_put_obj(opts, "driver",
3982                           QOBJECT(qstring_from_str(drv->format_name)));
3983             QINCREF(bs->file->bs->full_open_options);
3984             qdict_put_obj(opts, "file",
3985                           QOBJECT(bs->file->bs->full_open_options));
3986 
3987             bs->full_open_options = opts;
3988         } else {
3989             QDECREF(opts);
3990         }
3991     } else if (!bs->full_open_options && qdict_size(bs->options)) {
3992         /* There is no underlying file BDS (at least referenced by BDS.file),
3993          * so the full options QDict should be equal to the options given
3994          * specifically for this block device when it was opened (plus the
3995          * driver specification).
3996          * Because those options don't change, there is no need to update
3997          * full_open_options when it's already set. */
3998 
3999         opts = qdict_new();
4000         append_open_options(opts, bs);
4001         qdict_put_obj(opts, "driver",
4002                       QOBJECT(qstring_from_str(drv->format_name)));
4003 
4004         if (bs->exact_filename[0]) {
4005             /* This may not work for all block protocol drivers (some may
4006              * require this filename to be parsed), but we have to find some
4007              * default solution here, so just include it. If some block driver
4008              * does not support pure options without any filename at all or
4009              * needs some special format of the options QDict, it needs to
4010              * implement the driver-specific bdrv_refresh_filename() function.
4011              */
4012             qdict_put_obj(opts, "filename",
4013                           QOBJECT(qstring_from_str(bs->exact_filename)));
4014         }
4015 
4016         bs->full_open_options = opts;
4017     }
4018 
4019     if (bs->exact_filename[0]) {
4020         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4021     } else if (bs->full_open_options) {
4022         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4023         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4024                  qstring_get_str(json));
4025         QDECREF(json);
4026     }
4027 }
4028 
4029 /*
4030  * Hot add/remove a BDS's child. So the user can take a child offline when
4031  * it is broken and take a new child online
4032  */
4033 void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
4034                     Error **errp)
4035 {
4036 
4037     if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
4038         error_setg(errp, "The node %s does not support adding a child",
4039                    bdrv_get_device_or_node_name(parent_bs));
4040         return;
4041     }
4042 
4043     if (!QLIST_EMPTY(&child_bs->parents)) {
4044         error_setg(errp, "The node %s already has a parent",
4045                    child_bs->node_name);
4046         return;
4047     }
4048 
4049     parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp);
4050 }
4051 
4052 void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
4053 {
4054     BdrvChild *tmp;
4055 
4056     if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
4057         error_setg(errp, "The node %s does not support removing a child",
4058                    bdrv_get_device_or_node_name(parent_bs));
4059         return;
4060     }
4061 
4062     QLIST_FOREACH(tmp, &parent_bs->children, next) {
4063         if (tmp == child) {
4064             break;
4065         }
4066     }
4067 
4068     if (!tmp) {
4069         error_setg(errp, "The node %s does not have a child named %s",
4070                    bdrv_get_device_or_node_name(parent_bs),
4071                    bdrv_get_device_or_node_name(child->bs));
4072         return;
4073     }
4074 
4075     parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
4076 }
4077