xref: /openbmc/qemu/block.c (revision b26ded9a7d9709aacdff36e39630acbb87201057)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "trace.h"
26 #include "block/block_int.h"
27 #include "block/blockjob.h"
28 #include "qemu/error-report.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qapi/qmp/qbool.h"
32 #include "qapi/qmp/qjson.h"
33 #include "sysemu/block-backend.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/notify.h"
36 #include "qemu/coroutine.h"
37 #include "block/qapi.h"
38 #include "qmp-commands.h"
39 #include "qemu/timer.h"
40 #include "qapi-event.h"
41 #include "qemu/cutils.h"
42 #include "qemu/id.h"
43 
44 #ifdef CONFIG_BSD
45 #include <sys/ioctl.h>
46 #include <sys/queue.h>
47 #ifndef __DragonFly__
48 #include <sys/disk.h>
49 #endif
50 #endif
51 
52 #ifdef _WIN32
53 #include <windows.h>
54 #endif
55 
56 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
57 
58 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
59     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
60 
61 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
62     QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
63 
64 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
65     QLIST_HEAD_INITIALIZER(bdrv_drivers);
66 
67 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
68                              const char *reference, QDict *options, int flags,
69                              BlockDriverState *parent,
70                              const BdrvChildRole *child_role, Error **errp);
71 
72 /* If non-zero, use only whitelisted block drivers */
73 static int use_bdrv_whitelist;
74 
75 static void bdrv_close(BlockDriverState *bs);
76 
77 #ifdef _WIN32
78 static int is_windows_drive_prefix(const char *filename)
79 {
80     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
81              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
82             filename[1] == ':');
83 }
84 
85 int is_windows_drive(const char *filename)
86 {
87     if (is_windows_drive_prefix(filename) &&
88         filename[2] == '\0')
89         return 1;
90     if (strstart(filename, "\\\\.\\", NULL) ||
91         strstart(filename, "//./", NULL))
92         return 1;
93     return 0;
94 }
95 #endif
96 
97 size_t bdrv_opt_mem_align(BlockDriverState *bs)
98 {
99     if (!bs || !bs->drv) {
100         /* page size or 4k (hdd sector size) should be on the safe side */
101         return MAX(4096, getpagesize());
102     }
103 
104     return bs->bl.opt_mem_alignment;
105 }
106 
107 size_t bdrv_min_mem_align(BlockDriverState *bs)
108 {
109     if (!bs || !bs->drv) {
110         /* page size or 4k (hdd sector size) should be on the safe side */
111         return MAX(4096, getpagesize());
112     }
113 
114     return bs->bl.min_mem_alignment;
115 }
116 
117 /* check if the path starts with "<protocol>:" */
118 int path_has_protocol(const char *path)
119 {
120     const char *p;
121 
122 #ifdef _WIN32
123     if (is_windows_drive(path) ||
124         is_windows_drive_prefix(path)) {
125         return 0;
126     }
127     p = path + strcspn(path, ":/\\");
128 #else
129     p = path + strcspn(path, ":/");
130 #endif
131 
132     return *p == ':';
133 }
134 
135 int path_is_absolute(const char *path)
136 {
137 #ifdef _WIN32
138     /* specific case for names like: "\\.\d:" */
139     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
140         return 1;
141     }
142     return (*path == '/' || *path == '\\');
143 #else
144     return (*path == '/');
145 #endif
146 }
147 
148 /* if filename is absolute, just copy it to dest. Otherwise, build a
149    path to it by considering it is relative to base_path. URL are
150    supported. */
151 void path_combine(char *dest, int dest_size,
152                   const char *base_path,
153                   const char *filename)
154 {
155     const char *p, *p1;
156     int len;
157 
158     if (dest_size <= 0)
159         return;
160     if (path_is_absolute(filename)) {
161         pstrcpy(dest, dest_size, filename);
162     } else {
163         p = strchr(base_path, ':');
164         if (p)
165             p++;
166         else
167             p = base_path;
168         p1 = strrchr(base_path, '/');
169 #ifdef _WIN32
170         {
171             const char *p2;
172             p2 = strrchr(base_path, '\\');
173             if (!p1 || p2 > p1)
174                 p1 = p2;
175         }
176 #endif
177         if (p1)
178             p1++;
179         else
180             p1 = base_path;
181         if (p1 > p)
182             p = p1;
183         len = p - base_path;
184         if (len > dest_size - 1)
185             len = dest_size - 1;
186         memcpy(dest, base_path, len);
187         dest[len] = '\0';
188         pstrcat(dest, dest_size, filename);
189     }
190 }
191 
192 void bdrv_get_full_backing_filename_from_filename(const char *backed,
193                                                   const char *backing,
194                                                   char *dest, size_t sz,
195                                                   Error **errp)
196 {
197     if (backing[0] == '\0' || path_has_protocol(backing) ||
198         path_is_absolute(backing))
199     {
200         pstrcpy(dest, sz, backing);
201     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
202         error_setg(errp, "Cannot use relative backing file names for '%s'",
203                    backed);
204     } else {
205         path_combine(dest, sz, backed, backing);
206     }
207 }
208 
209 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
210                                     Error **errp)
211 {
212     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
213 
214     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
215                                                  dest, sz, errp);
216 }
217 
218 void bdrv_register(BlockDriver *bdrv)
219 {
220     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
221 }
222 
223 BlockDriverState *bdrv_new_root(void)
224 {
225     return bdrv_new();
226 }
227 
228 BlockDriverState *bdrv_new(void)
229 {
230     BlockDriverState *bs;
231     int i;
232 
233     bs = g_new0(BlockDriverState, 1);
234     QLIST_INIT(&bs->dirty_bitmaps);
235     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
236         QLIST_INIT(&bs->op_blockers[i]);
237     }
238     notifier_with_return_list_init(&bs->before_write_notifiers);
239     bs->refcnt = 1;
240     bs->aio_context = qemu_get_aio_context();
241 
242     QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
243 
244     return bs;
245 }
246 
247 BlockDriver *bdrv_find_format(const char *format_name)
248 {
249     BlockDriver *drv1;
250     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
251         if (!strcmp(drv1->format_name, format_name)) {
252             return drv1;
253         }
254     }
255     return NULL;
256 }
257 
258 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
259 {
260     static const char *whitelist_rw[] = {
261         CONFIG_BDRV_RW_WHITELIST
262     };
263     static const char *whitelist_ro[] = {
264         CONFIG_BDRV_RO_WHITELIST
265     };
266     const char **p;
267 
268     if (!whitelist_rw[0] && !whitelist_ro[0]) {
269         return 1;               /* no whitelist, anything goes */
270     }
271 
272     for (p = whitelist_rw; *p; p++) {
273         if (!strcmp(drv->format_name, *p)) {
274             return 1;
275         }
276     }
277     if (read_only) {
278         for (p = whitelist_ro; *p; p++) {
279             if (!strcmp(drv->format_name, *p)) {
280                 return 1;
281             }
282         }
283     }
284     return 0;
285 }
286 
287 bool bdrv_uses_whitelist(void)
288 {
289     return use_bdrv_whitelist;
290 }
291 
292 typedef struct CreateCo {
293     BlockDriver *drv;
294     char *filename;
295     QemuOpts *opts;
296     int ret;
297     Error *err;
298 } CreateCo;
299 
300 static void coroutine_fn bdrv_create_co_entry(void *opaque)
301 {
302     Error *local_err = NULL;
303     int ret;
304 
305     CreateCo *cco = opaque;
306     assert(cco->drv);
307 
308     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
309     if (local_err) {
310         error_propagate(&cco->err, local_err);
311     }
312     cco->ret = ret;
313 }
314 
315 int bdrv_create(BlockDriver *drv, const char* filename,
316                 QemuOpts *opts, Error **errp)
317 {
318     int ret;
319 
320     Coroutine *co;
321     CreateCo cco = {
322         .drv = drv,
323         .filename = g_strdup(filename),
324         .opts = opts,
325         .ret = NOT_DONE,
326         .err = NULL,
327     };
328 
329     if (!drv->bdrv_create) {
330         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
331         ret = -ENOTSUP;
332         goto out;
333     }
334 
335     if (qemu_in_coroutine()) {
336         /* Fast-path if already in coroutine context */
337         bdrv_create_co_entry(&cco);
338     } else {
339         co = qemu_coroutine_create(bdrv_create_co_entry);
340         qemu_coroutine_enter(co, &cco);
341         while (cco.ret == NOT_DONE) {
342             aio_poll(qemu_get_aio_context(), true);
343         }
344     }
345 
346     ret = cco.ret;
347     if (ret < 0) {
348         if (cco.err) {
349             error_propagate(errp, cco.err);
350         } else {
351             error_setg_errno(errp, -ret, "Could not create image");
352         }
353     }
354 
355 out:
356     g_free(cco.filename);
357     return ret;
358 }
359 
360 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
361 {
362     BlockDriver *drv;
363     Error *local_err = NULL;
364     int ret;
365 
366     drv = bdrv_find_protocol(filename, true, errp);
367     if (drv == NULL) {
368         return -ENOENT;
369     }
370 
371     ret = bdrv_create(drv, filename, opts, &local_err);
372     if (local_err) {
373         error_propagate(errp, local_err);
374     }
375     return ret;
376 }
377 
378 /**
379  * Try to get @bs's logical and physical block size.
380  * On success, store them in @bsz struct and return 0.
381  * On failure return -errno.
382  * @bs must not be empty.
383  */
384 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
385 {
386     BlockDriver *drv = bs->drv;
387 
388     if (drv && drv->bdrv_probe_blocksizes) {
389         return drv->bdrv_probe_blocksizes(bs, bsz);
390     }
391 
392     return -ENOTSUP;
393 }
394 
395 /**
396  * Try to get @bs's geometry (cyls, heads, sectors).
397  * On success, store them in @geo struct and return 0.
398  * On failure return -errno.
399  * @bs must not be empty.
400  */
401 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
402 {
403     BlockDriver *drv = bs->drv;
404 
405     if (drv && drv->bdrv_probe_geometry) {
406         return drv->bdrv_probe_geometry(bs, geo);
407     }
408 
409     return -ENOTSUP;
410 }
411 
412 /*
413  * Create a uniquely-named empty temporary file.
414  * Return 0 upon success, otherwise a negative errno value.
415  */
416 int get_tmp_filename(char *filename, int size)
417 {
418 #ifdef _WIN32
419     char temp_dir[MAX_PATH];
420     /* GetTempFileName requires that its output buffer (4th param)
421        have length MAX_PATH or greater.  */
422     assert(size >= MAX_PATH);
423     return (GetTempPath(MAX_PATH, temp_dir)
424             && GetTempFileName(temp_dir, "qem", 0, filename)
425             ? 0 : -GetLastError());
426 #else
427     int fd;
428     const char *tmpdir;
429     tmpdir = getenv("TMPDIR");
430     if (!tmpdir) {
431         tmpdir = "/var/tmp";
432     }
433     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
434         return -EOVERFLOW;
435     }
436     fd = mkstemp(filename);
437     if (fd < 0) {
438         return -errno;
439     }
440     if (close(fd) != 0) {
441         unlink(filename);
442         return -errno;
443     }
444     return 0;
445 #endif
446 }
447 
448 /*
449  * Detect host devices. By convention, /dev/cdrom[N] is always
450  * recognized as a host CDROM.
451  */
452 static BlockDriver *find_hdev_driver(const char *filename)
453 {
454     int score_max = 0, score;
455     BlockDriver *drv = NULL, *d;
456 
457     QLIST_FOREACH(d, &bdrv_drivers, list) {
458         if (d->bdrv_probe_device) {
459             score = d->bdrv_probe_device(filename);
460             if (score > score_max) {
461                 score_max = score;
462                 drv = d;
463             }
464         }
465     }
466 
467     return drv;
468 }
469 
470 BlockDriver *bdrv_find_protocol(const char *filename,
471                                 bool allow_protocol_prefix,
472                                 Error **errp)
473 {
474     BlockDriver *drv1;
475     char protocol[128];
476     int len;
477     const char *p;
478 
479     /* TODO Drivers without bdrv_file_open must be specified explicitly */
480 
481     /*
482      * XXX(hch): we really should not let host device detection
483      * override an explicit protocol specification, but moving this
484      * later breaks access to device names with colons in them.
485      * Thanks to the brain-dead persistent naming schemes on udev-
486      * based Linux systems those actually are quite common.
487      */
488     drv1 = find_hdev_driver(filename);
489     if (drv1) {
490         return drv1;
491     }
492 
493     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
494         return &bdrv_file;
495     }
496 
497     p = strchr(filename, ':');
498     assert(p != NULL);
499     len = p - filename;
500     if (len > sizeof(protocol) - 1)
501         len = sizeof(protocol) - 1;
502     memcpy(protocol, filename, len);
503     protocol[len] = '\0';
504     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
505         if (drv1->protocol_name &&
506             !strcmp(drv1->protocol_name, protocol)) {
507             return drv1;
508         }
509     }
510 
511     error_setg(errp, "Unknown protocol '%s'", protocol);
512     return NULL;
513 }
514 
515 /*
516  * Guess image format by probing its contents.
517  * This is not a good idea when your image is raw (CVE-2008-2004), but
518  * we do it anyway for backward compatibility.
519  *
520  * @buf         contains the image's first @buf_size bytes.
521  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
522  *              but can be smaller if the image file is smaller)
523  * @filename    is its filename.
524  *
525  * For all block drivers, call the bdrv_probe() method to get its
526  * probing score.
527  * Return the first block driver with the highest probing score.
528  */
529 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
530                             const char *filename)
531 {
532     int score_max = 0, score;
533     BlockDriver *drv = NULL, *d;
534 
535     QLIST_FOREACH(d, &bdrv_drivers, list) {
536         if (d->bdrv_probe) {
537             score = d->bdrv_probe(buf, buf_size, filename);
538             if (score > score_max) {
539                 score_max = score;
540                 drv = d;
541             }
542         }
543     }
544 
545     return drv;
546 }
547 
548 static int find_image_format(BlockDriverState *bs, const char *filename,
549                              BlockDriver **pdrv, Error **errp)
550 {
551     BlockDriver *drv;
552     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
553     int ret = 0;
554 
555     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
556     if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
557         *pdrv = &bdrv_raw;
558         return ret;
559     }
560 
561     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
562     if (ret < 0) {
563         error_setg_errno(errp, -ret, "Could not read image for determining its "
564                          "format");
565         *pdrv = NULL;
566         return ret;
567     }
568 
569     drv = bdrv_probe_all(buf, ret, filename);
570     if (!drv) {
571         error_setg(errp, "Could not determine image format: No compatible "
572                    "driver found");
573         ret = -ENOENT;
574     }
575     *pdrv = drv;
576     return ret;
577 }
578 
579 /**
580  * Set the current 'total_sectors' value
581  * Return 0 on success, -errno on error.
582  */
583 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
584 {
585     BlockDriver *drv = bs->drv;
586 
587     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
588     if (bdrv_is_sg(bs))
589         return 0;
590 
591     /* query actual device if possible, otherwise just trust the hint */
592     if (drv->bdrv_getlength) {
593         int64_t length = drv->bdrv_getlength(bs);
594         if (length < 0) {
595             return length;
596         }
597         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
598     }
599 
600     bs->total_sectors = hint;
601     return 0;
602 }
603 
604 /**
605  * Combines a QDict of new block driver @options with any missing options taken
606  * from @old_options, so that leaving out an option defaults to its old value.
607  */
608 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
609                               QDict *old_options)
610 {
611     if (bs->drv && bs->drv->bdrv_join_options) {
612         bs->drv->bdrv_join_options(options, old_options);
613     } else {
614         qdict_join(options, old_options, false);
615     }
616 }
617 
618 /**
619  * Set open flags for a given discard mode
620  *
621  * Return 0 on success, -1 if the discard mode was invalid.
622  */
623 int bdrv_parse_discard_flags(const char *mode, int *flags)
624 {
625     *flags &= ~BDRV_O_UNMAP;
626 
627     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
628         /* do nothing */
629     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
630         *flags |= BDRV_O_UNMAP;
631     } else {
632         return -1;
633     }
634 
635     return 0;
636 }
637 
638 /**
639  * Set open flags for a given cache mode
640  *
641  * Return 0 on success, -1 if the cache mode was invalid.
642  */
643 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
644 {
645     *flags &= ~BDRV_O_CACHE_MASK;
646 
647     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
648         *writethrough = false;
649         *flags |= BDRV_O_NOCACHE;
650     } else if (!strcmp(mode, "directsync")) {
651         *writethrough = true;
652         *flags |= BDRV_O_NOCACHE;
653     } else if (!strcmp(mode, "writeback")) {
654         *writethrough = false;
655     } else if (!strcmp(mode, "unsafe")) {
656         *writethrough = false;
657         *flags |= BDRV_O_NO_FLUSH;
658     } else if (!strcmp(mode, "writethrough")) {
659         *writethrough = true;
660     } else {
661         return -1;
662     }
663 
664     return 0;
665 }
666 
667 /*
668  * Returns the options and flags that a temporary snapshot should get, based on
669  * the originally requested flags (the originally requested image will have
670  * flags like a backing file)
671  */
672 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
673                                        int parent_flags, QDict *parent_options)
674 {
675     *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
676 
677     /* For temporary files, unconditional cache=unsafe is fine */
678     qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
679     qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
680 }
681 
682 /*
683  * Returns the options and flags that bs->file should get if a protocol driver
684  * is expected, based on the given options and flags for the parent BDS
685  */
686 static void bdrv_inherited_options(int *child_flags, QDict *child_options,
687                                    int parent_flags, QDict *parent_options)
688 {
689     int flags = parent_flags;
690 
691     /* Enable protocol handling, disable format probing for bs->file */
692     flags |= BDRV_O_PROTOCOL;
693 
694     /* If the cache mode isn't explicitly set, inherit direct and no-flush from
695      * the parent. */
696     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
697     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
698 
699     /* Our block drivers take care to send flushes and respect unmap policy,
700      * so we can default to enable both on lower layers regardless of the
701      * corresponding parent options. */
702     flags |= BDRV_O_UNMAP;
703 
704     /* Clear flags that only apply to the top layer */
705     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
706                BDRV_O_NO_IO);
707 
708     *child_flags = flags;
709 }
710 
711 const BdrvChildRole child_file = {
712     .inherit_options = bdrv_inherited_options,
713 };
714 
715 /*
716  * Returns the options and flags that bs->file should get if the use of formats
717  * (and not only protocols) is permitted for it, based on the given options and
718  * flags for the parent BDS
719  */
720 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
721                                        int parent_flags, QDict *parent_options)
722 {
723     child_file.inherit_options(child_flags, child_options,
724                                parent_flags, parent_options);
725 
726     *child_flags &= ~(BDRV_O_PROTOCOL | BDRV_O_NO_IO);
727 }
728 
729 const BdrvChildRole child_format = {
730     .inherit_options = bdrv_inherited_fmt_options,
731 };
732 
733 /*
734  * Returns the options and flags that bs->backing should get, based on the
735  * given options and flags for the parent BDS
736  */
737 static void bdrv_backing_options(int *child_flags, QDict *child_options,
738                                  int parent_flags, QDict *parent_options)
739 {
740     int flags = parent_flags;
741 
742     /* The cache mode is inherited unmodified for backing files; except WCE,
743      * which is only applied on the top level (BlockBackend) */
744     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
745     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
746 
747     /* backing files always opened read-only */
748     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
749 
750     /* snapshot=on is handled on the top layer */
751     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
752 
753     *child_flags = flags;
754 }
755 
756 static const BdrvChildRole child_backing = {
757     .inherit_options = bdrv_backing_options,
758 };
759 
760 static int bdrv_open_flags(BlockDriverState *bs, int flags)
761 {
762     int open_flags = flags;
763 
764     /*
765      * Clear flags that are internal to the block layer before opening the
766      * image.
767      */
768     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
769 
770     /*
771      * Snapshots should be writable.
772      */
773     if (flags & BDRV_O_TEMPORARY) {
774         open_flags |= BDRV_O_RDWR;
775     }
776 
777     return open_flags;
778 }
779 
780 static void update_flags_from_options(int *flags, QemuOpts *opts)
781 {
782     *flags &= ~BDRV_O_CACHE_MASK;
783 
784     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
785     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
786         *flags |= BDRV_O_NO_FLUSH;
787     }
788 
789     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
790     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
791         *flags |= BDRV_O_NOCACHE;
792     }
793 }
794 
795 static void update_options_from_flags(QDict *options, int flags)
796 {
797     if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
798         qdict_put(options, BDRV_OPT_CACHE_DIRECT,
799                   qbool_from_bool(flags & BDRV_O_NOCACHE));
800     }
801     if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
802         qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
803                   qbool_from_bool(flags & BDRV_O_NO_FLUSH));
804     }
805 }
806 
807 static void bdrv_assign_node_name(BlockDriverState *bs,
808                                   const char *node_name,
809                                   Error **errp)
810 {
811     char *gen_node_name = NULL;
812 
813     if (!node_name) {
814         node_name = gen_node_name = id_generate(ID_BLOCK);
815     } else if (!id_wellformed(node_name)) {
816         /*
817          * Check for empty string or invalid characters, but not if it is
818          * generated (generated names use characters not available to the user)
819          */
820         error_setg(errp, "Invalid node name");
821         return;
822     }
823 
824     /* takes care of avoiding namespaces collisions */
825     if (blk_by_name(node_name)) {
826         error_setg(errp, "node-name=%s is conflicting with a device id",
827                    node_name);
828         goto out;
829     }
830 
831     /* takes care of avoiding duplicates node names */
832     if (bdrv_find_node(node_name)) {
833         error_setg(errp, "Duplicate node name");
834         goto out;
835     }
836 
837     /* copy node name into the bs and insert it into the graph list */
838     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
839     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
840 out:
841     g_free(gen_node_name);
842 }
843 
844 static QemuOptsList bdrv_runtime_opts = {
845     .name = "bdrv_common",
846     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
847     .desc = {
848         {
849             .name = "node-name",
850             .type = QEMU_OPT_STRING,
851             .help = "Node name of the block device node",
852         },
853         {
854             .name = "driver",
855             .type = QEMU_OPT_STRING,
856             .help = "Block driver to use for the node",
857         },
858         {
859             .name = BDRV_OPT_CACHE_DIRECT,
860             .type = QEMU_OPT_BOOL,
861             .help = "Bypass software writeback cache on the host",
862         },
863         {
864             .name = BDRV_OPT_CACHE_NO_FLUSH,
865             .type = QEMU_OPT_BOOL,
866             .help = "Ignore flush requests",
867         },
868         { /* end of list */ }
869     },
870 };
871 
872 /*
873  * Common part for opening disk images and files
874  *
875  * Removes all processed options from *options.
876  */
877 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
878                             QDict *options, Error **errp)
879 {
880     int ret, open_flags;
881     const char *filename;
882     const char *driver_name = NULL;
883     const char *node_name = NULL;
884     QemuOpts *opts;
885     BlockDriver *drv;
886     Error *local_err = NULL;
887 
888     assert(bs->file == NULL);
889     assert(options != NULL && bs->options != options);
890 
891     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
892     qemu_opts_absorb_qdict(opts, options, &local_err);
893     if (local_err) {
894         error_propagate(errp, local_err);
895         ret = -EINVAL;
896         goto fail_opts;
897     }
898 
899     driver_name = qemu_opt_get(opts, "driver");
900     drv = bdrv_find_format(driver_name);
901     assert(drv != NULL);
902 
903     if (file != NULL) {
904         filename = file->bs->filename;
905     } else {
906         filename = qdict_get_try_str(options, "filename");
907     }
908 
909     if (drv->bdrv_needs_filename && !filename) {
910         error_setg(errp, "The '%s' block driver requires a file name",
911                    drv->format_name);
912         ret = -EINVAL;
913         goto fail_opts;
914     }
915 
916     trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
917                            drv->format_name);
918 
919     node_name = qemu_opt_get(opts, "node-name");
920     bdrv_assign_node_name(bs, node_name, &local_err);
921     if (local_err) {
922         error_propagate(errp, local_err);
923         ret = -EINVAL;
924         goto fail_opts;
925     }
926 
927     bs->request_alignment = 512;
928     bs->zero_beyond_eof = true;
929     bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
930 
931     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
932         error_setg(errp,
933                    !bs->read_only && bdrv_is_whitelisted(drv, true)
934                         ? "Driver '%s' can only be used for read-only devices"
935                         : "Driver '%s' is not whitelisted",
936                    drv->format_name);
937         ret = -ENOTSUP;
938         goto fail_opts;
939     }
940 
941     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
942     if (bs->open_flags & BDRV_O_COPY_ON_READ) {
943         if (!bs->read_only) {
944             bdrv_enable_copy_on_read(bs);
945         } else {
946             error_setg(errp, "Can't use copy-on-read on read-only device");
947             ret = -EINVAL;
948             goto fail_opts;
949         }
950     }
951 
952     if (filename != NULL) {
953         pstrcpy(bs->filename, sizeof(bs->filename), filename);
954     } else {
955         bs->filename[0] = '\0';
956     }
957     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
958 
959     bs->drv = drv;
960     bs->opaque = g_malloc0(drv->instance_size);
961 
962     /* Apply cache mode options */
963     update_flags_from_options(&bs->open_flags, opts);
964 
965     /* Open the image, either directly or using a protocol */
966     open_flags = bdrv_open_flags(bs, bs->open_flags);
967     if (drv->bdrv_file_open) {
968         assert(file == NULL);
969         assert(!drv->bdrv_needs_filename || filename != NULL);
970         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
971     } else {
972         if (file == NULL) {
973             error_setg(errp, "Can't use '%s' as a block driver for the "
974                        "protocol level", drv->format_name);
975             ret = -EINVAL;
976             goto free_and_fail;
977         }
978         bs->file = file;
979         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
980     }
981 
982     if (ret < 0) {
983         if (local_err) {
984             error_propagate(errp, local_err);
985         } else if (bs->filename[0]) {
986             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
987         } else {
988             error_setg_errno(errp, -ret, "Could not open image");
989         }
990         goto free_and_fail;
991     }
992 
993     ret = refresh_total_sectors(bs, bs->total_sectors);
994     if (ret < 0) {
995         error_setg_errno(errp, -ret, "Could not refresh total sector count");
996         goto free_and_fail;
997     }
998 
999     bdrv_refresh_limits(bs, &local_err);
1000     if (local_err) {
1001         error_propagate(errp, local_err);
1002         ret = -EINVAL;
1003         goto free_and_fail;
1004     }
1005 
1006     assert(bdrv_opt_mem_align(bs) != 0);
1007     assert(bdrv_min_mem_align(bs) != 0);
1008     assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
1009 
1010     qemu_opts_del(opts);
1011     return 0;
1012 
1013 free_and_fail:
1014     bs->file = NULL;
1015     g_free(bs->opaque);
1016     bs->opaque = NULL;
1017     bs->drv = NULL;
1018 fail_opts:
1019     qemu_opts_del(opts);
1020     return ret;
1021 }
1022 
1023 static QDict *parse_json_filename(const char *filename, Error **errp)
1024 {
1025     QObject *options_obj;
1026     QDict *options;
1027     int ret;
1028 
1029     ret = strstart(filename, "json:", &filename);
1030     assert(ret);
1031 
1032     options_obj = qobject_from_json(filename);
1033     if (!options_obj) {
1034         error_setg(errp, "Could not parse the JSON options");
1035         return NULL;
1036     }
1037 
1038     if (qobject_type(options_obj) != QTYPE_QDICT) {
1039         qobject_decref(options_obj);
1040         error_setg(errp, "Invalid JSON object given");
1041         return NULL;
1042     }
1043 
1044     options = qobject_to_qdict(options_obj);
1045     qdict_flatten(options);
1046 
1047     return options;
1048 }
1049 
1050 static void parse_json_protocol(QDict *options, const char **pfilename,
1051                                 Error **errp)
1052 {
1053     QDict *json_options;
1054     Error *local_err = NULL;
1055 
1056     /* Parse json: pseudo-protocol */
1057     if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1058         return;
1059     }
1060 
1061     json_options = parse_json_filename(*pfilename, &local_err);
1062     if (local_err) {
1063         error_propagate(errp, local_err);
1064         return;
1065     }
1066 
1067     /* Options given in the filename have lower priority than options
1068      * specified directly */
1069     qdict_join(options, json_options, false);
1070     QDECREF(json_options);
1071     *pfilename = NULL;
1072 }
1073 
1074 /*
1075  * Fills in default options for opening images and converts the legacy
1076  * filename/flags pair to option QDict entries.
1077  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1078  * block driver has been specified explicitly.
1079  */
1080 static int bdrv_fill_options(QDict **options, const char *filename,
1081                              int *flags, Error **errp)
1082 {
1083     const char *drvname;
1084     bool protocol = *flags & BDRV_O_PROTOCOL;
1085     bool parse_filename = false;
1086     BlockDriver *drv = NULL;
1087     Error *local_err = NULL;
1088 
1089     drvname = qdict_get_try_str(*options, "driver");
1090     if (drvname) {
1091         drv = bdrv_find_format(drvname);
1092         if (!drv) {
1093             error_setg(errp, "Unknown driver '%s'", drvname);
1094             return -ENOENT;
1095         }
1096         /* If the user has explicitly specified the driver, this choice should
1097          * override the BDRV_O_PROTOCOL flag */
1098         protocol = drv->bdrv_file_open;
1099     }
1100 
1101     if (protocol) {
1102         *flags |= BDRV_O_PROTOCOL;
1103     } else {
1104         *flags &= ~BDRV_O_PROTOCOL;
1105     }
1106 
1107     /* Translate cache options from flags into options */
1108     update_options_from_flags(*options, *flags);
1109 
1110     /* Fetch the file name from the options QDict if necessary */
1111     if (protocol && filename) {
1112         if (!qdict_haskey(*options, "filename")) {
1113             qdict_put(*options, "filename", qstring_from_str(filename));
1114             parse_filename = true;
1115         } else {
1116             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1117                              "the same time");
1118             return -EINVAL;
1119         }
1120     }
1121 
1122     /* Find the right block driver */
1123     filename = qdict_get_try_str(*options, "filename");
1124 
1125     if (!drvname && protocol) {
1126         if (filename) {
1127             drv = bdrv_find_protocol(filename, parse_filename, errp);
1128             if (!drv) {
1129                 return -EINVAL;
1130             }
1131 
1132             drvname = drv->format_name;
1133             qdict_put(*options, "driver", qstring_from_str(drvname));
1134         } else {
1135             error_setg(errp, "Must specify either driver or file");
1136             return -EINVAL;
1137         }
1138     }
1139 
1140     assert(drv || !protocol);
1141 
1142     /* Driver-specific filename parsing */
1143     if (drv && drv->bdrv_parse_filename && parse_filename) {
1144         drv->bdrv_parse_filename(filename, *options, &local_err);
1145         if (local_err) {
1146             error_propagate(errp, local_err);
1147             return -EINVAL;
1148         }
1149 
1150         if (!drv->bdrv_needs_filename) {
1151             qdict_del(*options, "filename");
1152         }
1153     }
1154 
1155     return 0;
1156 }
1157 
1158 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
1159                                   const char *child_name,
1160                                   const BdrvChildRole *child_role)
1161 {
1162     BdrvChild *child = g_new(BdrvChild, 1);
1163     *child = (BdrvChild) {
1164         .bs     = child_bs,
1165         .name   = g_strdup(child_name),
1166         .role   = child_role,
1167     };
1168 
1169     QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1170 
1171     return child;
1172 }
1173 
1174 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1175                              BlockDriverState *child_bs,
1176                              const char *child_name,
1177                              const BdrvChildRole *child_role)
1178 {
1179     BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
1180     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1181     return child;
1182 }
1183 
1184 static void bdrv_detach_child(BdrvChild *child)
1185 {
1186     if (child->next.le_prev) {
1187         QLIST_REMOVE(child, next);
1188         child->next.le_prev = NULL;
1189     }
1190     QLIST_REMOVE(child, next_parent);
1191     g_free(child->name);
1192     g_free(child);
1193 }
1194 
1195 void bdrv_root_unref_child(BdrvChild *child)
1196 {
1197     BlockDriverState *child_bs;
1198 
1199     child_bs = child->bs;
1200     bdrv_detach_child(child);
1201     bdrv_unref(child_bs);
1202 }
1203 
1204 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1205 {
1206     if (child == NULL) {
1207         return;
1208     }
1209 
1210     if (child->bs->inherits_from == parent) {
1211         child->bs->inherits_from = NULL;
1212     }
1213 
1214     bdrv_root_unref_child(child);
1215 }
1216 
1217 /*
1218  * Sets the backing file link of a BDS. A new reference is created; callers
1219  * which don't need their own reference any more must call bdrv_unref().
1220  */
1221 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1222 {
1223     if (backing_hd) {
1224         bdrv_ref(backing_hd);
1225     }
1226 
1227     if (bs->backing) {
1228         assert(bs->backing_blocker);
1229         bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1230         bdrv_unref_child(bs, bs->backing);
1231     } else if (backing_hd) {
1232         error_setg(&bs->backing_blocker,
1233                    "node is used as backing hd of '%s'",
1234                    bdrv_get_device_or_node_name(bs));
1235     }
1236 
1237     if (!backing_hd) {
1238         error_free(bs->backing_blocker);
1239         bs->backing_blocker = NULL;
1240         bs->backing = NULL;
1241         goto out;
1242     }
1243     bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
1244     bs->open_flags &= ~BDRV_O_NO_BACKING;
1245     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1246     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1247             backing_hd->drv ? backing_hd->drv->format_name : "");
1248 
1249     bdrv_op_block_all(backing_hd, bs->backing_blocker);
1250     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1251     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1252                     bs->backing_blocker);
1253 out:
1254     bdrv_refresh_limits(bs, NULL);
1255 }
1256 
1257 /*
1258  * Opens the backing file for a BlockDriverState if not yet open
1259  *
1260  * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1261  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1262  * itself, all options starting with "${bdref_key}." are considered part of the
1263  * BlockdevRef.
1264  *
1265  * TODO Can this be unified with bdrv_open_image()?
1266  */
1267 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1268                            const char *bdref_key, Error **errp)
1269 {
1270     char *backing_filename = g_malloc0(PATH_MAX);
1271     char *bdref_key_dot;
1272     const char *reference = NULL;
1273     int ret = 0;
1274     BlockDriverState *backing_hd;
1275     QDict *options;
1276     QDict *tmp_parent_options = NULL;
1277     Error *local_err = NULL;
1278 
1279     if (bs->backing != NULL) {
1280         goto free_exit;
1281     }
1282 
1283     /* NULL means an empty set of options */
1284     if (parent_options == NULL) {
1285         tmp_parent_options = qdict_new();
1286         parent_options = tmp_parent_options;
1287     }
1288 
1289     bs->open_flags &= ~BDRV_O_NO_BACKING;
1290 
1291     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1292     qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1293     g_free(bdref_key_dot);
1294 
1295     reference = qdict_get_try_str(parent_options, bdref_key);
1296     if (reference || qdict_haskey(options, "file.filename")) {
1297         backing_filename[0] = '\0';
1298     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1299         QDECREF(options);
1300         goto free_exit;
1301     } else {
1302         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1303                                        &local_err);
1304         if (local_err) {
1305             ret = -EINVAL;
1306             error_propagate(errp, local_err);
1307             QDECREF(options);
1308             goto free_exit;
1309         }
1310     }
1311 
1312     if (!bs->drv || !bs->drv->supports_backing) {
1313         ret = -EINVAL;
1314         error_setg(errp, "Driver doesn't support backing files");
1315         QDECREF(options);
1316         goto free_exit;
1317     }
1318 
1319     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1320         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1321     }
1322 
1323     backing_hd = NULL;
1324     ret = bdrv_open_inherit(&backing_hd,
1325                             *backing_filename ? backing_filename : NULL,
1326                             reference, options, 0, bs, &child_backing,
1327                             errp);
1328     if (ret < 0) {
1329         bs->open_flags |= BDRV_O_NO_BACKING;
1330         error_prepend(errp, "Could not open backing file: ");
1331         goto free_exit;
1332     }
1333 
1334     /* Hook up the backing file link; drop our reference, bs owns the
1335      * backing_hd reference now */
1336     bdrv_set_backing_hd(bs, backing_hd);
1337     bdrv_unref(backing_hd);
1338 
1339     qdict_del(parent_options, bdref_key);
1340 
1341 free_exit:
1342     g_free(backing_filename);
1343     QDECREF(tmp_parent_options);
1344     return ret;
1345 }
1346 
1347 /*
1348  * Opens a disk image whose options are given as BlockdevRef in another block
1349  * device's options.
1350  *
1351  * If allow_none is true, no image will be opened if filename is false and no
1352  * BlockdevRef is given. NULL will be returned, but errp remains unset.
1353  *
1354  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1355  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1356  * itself, all options starting with "${bdref_key}." are considered part of the
1357  * BlockdevRef.
1358  *
1359  * The BlockdevRef will be removed from the options QDict.
1360  */
1361 BdrvChild *bdrv_open_child(const char *filename,
1362                            QDict *options, const char *bdref_key,
1363                            BlockDriverState* parent,
1364                            const BdrvChildRole *child_role,
1365                            bool allow_none, Error **errp)
1366 {
1367     BdrvChild *c = NULL;
1368     BlockDriverState *bs;
1369     QDict *image_options;
1370     int ret;
1371     char *bdref_key_dot;
1372     const char *reference;
1373 
1374     assert(child_role != NULL);
1375 
1376     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1377     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1378     g_free(bdref_key_dot);
1379 
1380     reference = qdict_get_try_str(options, bdref_key);
1381     if (!filename && !reference && !qdict_size(image_options)) {
1382         if (!allow_none) {
1383             error_setg(errp, "A block device must be specified for \"%s\"",
1384                        bdref_key);
1385         }
1386         QDECREF(image_options);
1387         goto done;
1388     }
1389 
1390     bs = NULL;
1391     ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1392                             parent, child_role, errp);
1393     if (ret < 0) {
1394         goto done;
1395     }
1396 
1397     c = bdrv_attach_child(parent, bs, bdref_key, child_role);
1398 
1399 done:
1400     qdict_del(options, bdref_key);
1401     return c;
1402 }
1403 
1404 static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
1405                                      QDict *snapshot_options, Error **errp)
1406 {
1407     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1408     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1409     int64_t total_size;
1410     QemuOpts *opts = NULL;
1411     BlockDriverState *bs_snapshot;
1412     Error *local_err = NULL;
1413     int ret;
1414 
1415     /* if snapshot, we create a temporary backing file and open it
1416        instead of opening 'filename' directly */
1417 
1418     /* Get the required size from the image */
1419     total_size = bdrv_getlength(bs);
1420     if (total_size < 0) {
1421         ret = total_size;
1422         error_setg_errno(errp, -total_size, "Could not get image size");
1423         goto out;
1424     }
1425 
1426     /* Create the temporary image */
1427     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1428     if (ret < 0) {
1429         error_setg_errno(errp, -ret, "Could not get temporary filename");
1430         goto out;
1431     }
1432 
1433     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1434                             &error_abort);
1435     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1436     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
1437     qemu_opts_del(opts);
1438     if (ret < 0) {
1439         error_prepend(errp, "Could not create temporary overlay '%s': ",
1440                       tmp_filename);
1441         goto out;
1442     }
1443 
1444     /* Prepare options QDict for the temporary file */
1445     qdict_put(snapshot_options, "file.driver",
1446               qstring_from_str("file"));
1447     qdict_put(snapshot_options, "file.filename",
1448               qstring_from_str(tmp_filename));
1449     qdict_put(snapshot_options, "driver",
1450               qstring_from_str("qcow2"));
1451 
1452     bs_snapshot = bdrv_new();
1453 
1454     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1455                     flags, &local_err);
1456     snapshot_options = NULL;
1457     if (ret < 0) {
1458         error_propagate(errp, local_err);
1459         goto out;
1460     }
1461 
1462     bdrv_append(bs_snapshot, bs);
1463 
1464 out:
1465     QDECREF(snapshot_options);
1466     g_free(tmp_filename);
1467     return ret;
1468 }
1469 
1470 /*
1471  * Opens a disk image (raw, qcow2, vmdk, ...)
1472  *
1473  * options is a QDict of options to pass to the block drivers, or NULL for an
1474  * empty set of options. The reference to the QDict belongs to the block layer
1475  * after the call (even on failure), so if the caller intends to reuse the
1476  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1477  *
1478  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1479  * If it is not NULL, the referenced BDS will be reused.
1480  *
1481  * The reference parameter may be used to specify an existing block device which
1482  * should be opened. If specified, neither options nor a filename may be given,
1483  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1484  */
1485 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1486                              const char *reference, QDict *options, int flags,
1487                              BlockDriverState *parent,
1488                              const BdrvChildRole *child_role, Error **errp)
1489 {
1490     int ret;
1491     BdrvChild *file = NULL;
1492     BlockDriverState *bs;
1493     BlockDriver *drv = NULL;
1494     const char *drvname;
1495     const char *backing;
1496     Error *local_err = NULL;
1497     QDict *snapshot_options = NULL;
1498     int snapshot_flags = 0;
1499 
1500     assert(pbs);
1501     assert(!child_role || !flags);
1502     assert(!child_role == !parent);
1503 
1504     if (reference) {
1505         bool options_non_empty = options ? qdict_size(options) : false;
1506         QDECREF(options);
1507 
1508         if (*pbs) {
1509             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1510                        "another block device");
1511             return -EINVAL;
1512         }
1513 
1514         if (filename || options_non_empty) {
1515             error_setg(errp, "Cannot reference an existing block device with "
1516                        "additional options or a new filename");
1517             return -EINVAL;
1518         }
1519 
1520         bs = bdrv_lookup_bs(reference, reference, errp);
1521         if (!bs) {
1522             return -ENODEV;
1523         }
1524 
1525         bdrv_ref(bs);
1526         *pbs = bs;
1527         return 0;
1528     }
1529 
1530     if (*pbs) {
1531         bs = *pbs;
1532     } else {
1533         bs = bdrv_new();
1534     }
1535 
1536     /* NULL means an empty set of options */
1537     if (options == NULL) {
1538         options = qdict_new();
1539     }
1540 
1541     /* json: syntax counts as explicit options, as if in the QDict */
1542     parse_json_protocol(options, &filename, &local_err);
1543     if (local_err) {
1544         ret = -EINVAL;
1545         goto fail;
1546     }
1547 
1548     bs->explicit_options = qdict_clone_shallow(options);
1549 
1550     if (child_role) {
1551         bs->inherits_from = parent;
1552         child_role->inherit_options(&flags, options,
1553                                     parent->open_flags, parent->options);
1554     }
1555 
1556     ret = bdrv_fill_options(&options, filename, &flags, &local_err);
1557     if (local_err) {
1558         goto fail;
1559     }
1560 
1561     bs->open_flags = flags;
1562     bs->options = options;
1563     options = qdict_clone_shallow(options);
1564 
1565     /* Find the right image format driver */
1566     drvname = qdict_get_try_str(options, "driver");
1567     if (drvname) {
1568         drv = bdrv_find_format(drvname);
1569         if (!drv) {
1570             error_setg(errp, "Unknown driver: '%s'", drvname);
1571             ret = -EINVAL;
1572             goto fail;
1573         }
1574     }
1575 
1576     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1577 
1578     backing = qdict_get_try_str(options, "backing");
1579     if (backing && *backing == '\0') {
1580         flags |= BDRV_O_NO_BACKING;
1581         qdict_del(options, "backing");
1582     }
1583 
1584     /* Open image file without format layer */
1585     if ((flags & BDRV_O_PROTOCOL) == 0) {
1586         if (flags & BDRV_O_RDWR) {
1587             flags |= BDRV_O_ALLOW_RDWR;
1588         }
1589         if (flags & BDRV_O_SNAPSHOT) {
1590             snapshot_options = qdict_new();
1591             bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
1592                                        flags, options);
1593             bdrv_backing_options(&flags, options, flags, options);
1594         }
1595 
1596         bs->open_flags = flags;
1597 
1598         file = bdrv_open_child(filename, options, "file", bs,
1599                                &child_file, true, &local_err);
1600         if (local_err) {
1601             ret = -EINVAL;
1602             goto fail;
1603         }
1604     }
1605 
1606     /* Image format probing */
1607     bs->probed = !drv;
1608     if (!drv && file) {
1609         ret = find_image_format(file->bs, filename, &drv, &local_err);
1610         if (ret < 0) {
1611             goto fail;
1612         }
1613         /*
1614          * This option update would logically belong in bdrv_fill_options(),
1615          * but we first need to open bs->file for the probing to work, while
1616          * opening bs->file already requires the (mostly) final set of options
1617          * so that cache mode etc. can be inherited.
1618          *
1619          * Adding the driver later is somewhat ugly, but it's not an option
1620          * that would ever be inherited, so it's correct. We just need to make
1621          * sure to update both bs->options (which has the full effective
1622          * options for bs) and options (which has file.* already removed).
1623          */
1624         qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1625         qdict_put(options, "driver", qstring_from_str(drv->format_name));
1626     } else if (!drv) {
1627         error_setg(errp, "Must specify either driver or file");
1628         ret = -EINVAL;
1629         goto fail;
1630     }
1631 
1632     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1633     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1634     /* file must be NULL if a protocol BDS is about to be created
1635      * (the inverse results in an error message from bdrv_open_common()) */
1636     assert(!(flags & BDRV_O_PROTOCOL) || !file);
1637 
1638     /* Open the image */
1639     ret = bdrv_open_common(bs, file, options, &local_err);
1640     if (ret < 0) {
1641         goto fail;
1642     }
1643 
1644     if (file && (bs->file != file)) {
1645         bdrv_unref_child(bs, file);
1646         file = NULL;
1647     }
1648 
1649     /* If there is a backing file, use it */
1650     if ((flags & BDRV_O_NO_BACKING) == 0) {
1651         ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
1652         if (ret < 0) {
1653             goto close_and_fail;
1654         }
1655     }
1656 
1657     bdrv_refresh_filename(bs);
1658 
1659     /* Check if any unknown options were used */
1660     if (options && (qdict_size(options) != 0)) {
1661         const QDictEntry *entry = qdict_first(options);
1662         if (flags & BDRV_O_PROTOCOL) {
1663             error_setg(errp, "Block protocol '%s' doesn't support the option "
1664                        "'%s'", drv->format_name, entry->key);
1665         } else {
1666             error_setg(errp,
1667                        "Block format '%s' does not support the option '%s'",
1668                        drv->format_name, entry->key);
1669         }
1670 
1671         ret = -EINVAL;
1672         goto close_and_fail;
1673     }
1674 
1675     if (!bdrv_key_required(bs)) {
1676         if (bs->blk) {
1677             blk_dev_change_media_cb(bs->blk, true);
1678         }
1679     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1680                && !runstate_check(RUN_STATE_INMIGRATE)
1681                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1682         error_setg(errp,
1683                    "Guest must be stopped for opening of encrypted image");
1684         ret = -EBUSY;
1685         goto close_and_fail;
1686     }
1687 
1688     QDECREF(options);
1689     *pbs = bs;
1690 
1691     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1692      * temporary snapshot afterwards. */
1693     if (snapshot_flags) {
1694         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
1695                                         &local_err);
1696         snapshot_options = NULL;
1697         if (local_err) {
1698             goto close_and_fail;
1699         }
1700     }
1701 
1702     return 0;
1703 
1704 fail:
1705     if (file != NULL) {
1706         bdrv_unref_child(bs, file);
1707     }
1708     QDECREF(snapshot_options);
1709     QDECREF(bs->explicit_options);
1710     QDECREF(bs->options);
1711     QDECREF(options);
1712     bs->options = NULL;
1713     if (!*pbs) {
1714         /* If *pbs is NULL, a new BDS has been created in this function and
1715            needs to be freed now. Otherwise, it does not need to be closed,
1716            since it has not really been opened yet. */
1717         bdrv_unref(bs);
1718     }
1719     if (local_err) {
1720         error_propagate(errp, local_err);
1721     }
1722     return ret;
1723 
1724 close_and_fail:
1725     /* See fail path, but now the BDS has to be always closed */
1726     if (*pbs) {
1727         bdrv_close(bs);
1728     } else {
1729         bdrv_unref(bs);
1730     }
1731     QDECREF(snapshot_options);
1732     QDECREF(options);
1733     if (local_err) {
1734         error_propagate(errp, local_err);
1735     }
1736     return ret;
1737 }
1738 
1739 int bdrv_open(BlockDriverState **pbs, const char *filename,
1740               const char *reference, QDict *options, int flags, Error **errp)
1741 {
1742     return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1743                              NULL, errp);
1744 }
1745 
1746 typedef struct BlockReopenQueueEntry {
1747      bool prepared;
1748      BDRVReopenState state;
1749      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1750 } BlockReopenQueueEntry;
1751 
1752 /*
1753  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1754  * reopen of multiple devices.
1755  *
1756  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1757  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1758  * be created and initialized. This newly created BlockReopenQueue should be
1759  * passed back in for subsequent calls that are intended to be of the same
1760  * atomic 'set'.
1761  *
1762  * bs is the BlockDriverState to add to the reopen queue.
1763  *
1764  * options contains the changed options for the associated bs
1765  * (the BlockReopenQueue takes ownership)
1766  *
1767  * flags contains the open flags for the associated bs
1768  *
1769  * returns a pointer to bs_queue, which is either the newly allocated
1770  * bs_queue, or the existing bs_queue being used.
1771  *
1772  */
1773 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1774                                                  BlockDriverState *bs,
1775                                                  QDict *options,
1776                                                  int flags,
1777                                                  const BdrvChildRole *role,
1778                                                  QDict *parent_options,
1779                                                  int parent_flags)
1780 {
1781     assert(bs != NULL);
1782 
1783     BlockReopenQueueEntry *bs_entry;
1784     BdrvChild *child;
1785     QDict *old_options, *explicit_options;
1786 
1787     if (bs_queue == NULL) {
1788         bs_queue = g_new0(BlockReopenQueue, 1);
1789         QSIMPLEQ_INIT(bs_queue);
1790     }
1791 
1792     if (!options) {
1793         options = qdict_new();
1794     }
1795 
1796     /*
1797      * Precedence of options:
1798      * 1. Explicitly passed in options (highest)
1799      * 2. Set in flags (only for top level)
1800      * 3. Retained from explicitly set options of bs
1801      * 4. Inherited from parent node
1802      * 5. Retained from effective options of bs
1803      */
1804 
1805     if (!parent_options) {
1806         /*
1807          * Any setting represented by flags is always updated. If the
1808          * corresponding QDict option is set, it takes precedence. Otherwise
1809          * the flag is translated into a QDict option. The old setting of bs is
1810          * not considered.
1811          */
1812         update_options_from_flags(options, flags);
1813     }
1814 
1815     /* Old explicitly set values (don't overwrite by inherited value) */
1816     old_options = qdict_clone_shallow(bs->explicit_options);
1817     bdrv_join_options(bs, options, old_options);
1818     QDECREF(old_options);
1819 
1820     explicit_options = qdict_clone_shallow(options);
1821 
1822     /* Inherit from parent node */
1823     if (parent_options) {
1824         assert(!flags);
1825         role->inherit_options(&flags, options, parent_flags, parent_options);
1826     }
1827 
1828     /* Old values are used for options that aren't set yet */
1829     old_options = qdict_clone_shallow(bs->options);
1830     bdrv_join_options(bs, options, old_options);
1831     QDECREF(old_options);
1832 
1833     /* bdrv_open() masks this flag out */
1834     flags &= ~BDRV_O_PROTOCOL;
1835 
1836     QLIST_FOREACH(child, &bs->children, next) {
1837         QDict *new_child_options;
1838         char *child_key_dot;
1839 
1840         /* reopen can only change the options of block devices that were
1841          * implicitly created and inherited options. For other (referenced)
1842          * block devices, a syntax like "backing.foo" results in an error. */
1843         if (child->bs->inherits_from != bs) {
1844             continue;
1845         }
1846 
1847         child_key_dot = g_strdup_printf("%s.", child->name);
1848         qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1849         g_free(child_key_dot);
1850 
1851         bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1852                                 child->role, options, flags);
1853     }
1854 
1855     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1856     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1857 
1858     bs_entry->state.bs = bs;
1859     bs_entry->state.options = options;
1860     bs_entry->state.explicit_options = explicit_options;
1861     bs_entry->state.flags = flags;
1862 
1863     return bs_queue;
1864 }
1865 
1866 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1867                                     BlockDriverState *bs,
1868                                     QDict *options, int flags)
1869 {
1870     return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1871                                    NULL, NULL, 0);
1872 }
1873 
1874 /*
1875  * Reopen multiple BlockDriverStates atomically & transactionally.
1876  *
1877  * The queue passed in (bs_queue) must have been built up previous
1878  * via bdrv_reopen_queue().
1879  *
1880  * Reopens all BDS specified in the queue, with the appropriate
1881  * flags.  All devices are prepared for reopen, and failure of any
1882  * device will cause all device changes to be abandonded, and intermediate
1883  * data cleaned up.
1884  *
1885  * If all devices prepare successfully, then the changes are committed
1886  * to all devices.
1887  *
1888  */
1889 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1890 {
1891     int ret = -1;
1892     BlockReopenQueueEntry *bs_entry, *next;
1893     Error *local_err = NULL;
1894 
1895     assert(bs_queue != NULL);
1896 
1897     bdrv_drain_all();
1898 
1899     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1900         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1901             error_propagate(errp, local_err);
1902             goto cleanup;
1903         }
1904         bs_entry->prepared = true;
1905     }
1906 
1907     /* If we reach this point, we have success and just need to apply the
1908      * changes
1909      */
1910     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1911         bdrv_reopen_commit(&bs_entry->state);
1912     }
1913 
1914     ret = 0;
1915 
1916 cleanup:
1917     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1918         if (ret && bs_entry->prepared) {
1919             bdrv_reopen_abort(&bs_entry->state);
1920         } else if (ret) {
1921             QDECREF(bs_entry->state.explicit_options);
1922         }
1923         QDECREF(bs_entry->state.options);
1924         g_free(bs_entry);
1925     }
1926     g_free(bs_queue);
1927     return ret;
1928 }
1929 
1930 
1931 /* Reopen a single BlockDriverState with the specified flags. */
1932 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1933 {
1934     int ret = -1;
1935     Error *local_err = NULL;
1936     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1937 
1938     ret = bdrv_reopen_multiple(queue, &local_err);
1939     if (local_err != NULL) {
1940         error_propagate(errp, local_err);
1941     }
1942     return ret;
1943 }
1944 
1945 
1946 /*
1947  * Prepares a BlockDriverState for reopen. All changes are staged in the
1948  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1949  * the block driver layer .bdrv_reopen_prepare()
1950  *
1951  * bs is the BlockDriverState to reopen
1952  * flags are the new open flags
1953  * queue is the reopen queue
1954  *
1955  * Returns 0 on success, non-zero on error.  On error errp will be set
1956  * as well.
1957  *
1958  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1959  * It is the responsibility of the caller to then call the abort() or
1960  * commit() for any other BDS that have been left in a prepare() state
1961  *
1962  */
1963 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1964                         Error **errp)
1965 {
1966     int ret = -1;
1967     Error *local_err = NULL;
1968     BlockDriver *drv;
1969     QemuOpts *opts;
1970     const char *value;
1971 
1972     assert(reopen_state != NULL);
1973     assert(reopen_state->bs->drv != NULL);
1974     drv = reopen_state->bs->drv;
1975 
1976     /* Process generic block layer options */
1977     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1978     qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
1979     if (local_err) {
1980         error_propagate(errp, local_err);
1981         ret = -EINVAL;
1982         goto error;
1983     }
1984 
1985     update_flags_from_options(&reopen_state->flags, opts);
1986 
1987     /* node-name and driver must be unchanged. Put them back into the QDict, so
1988      * that they are checked at the end of this function. */
1989     value = qemu_opt_get(opts, "node-name");
1990     if (value) {
1991         qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
1992     }
1993 
1994     value = qemu_opt_get(opts, "driver");
1995     if (value) {
1996         qdict_put(reopen_state->options, "driver", qstring_from_str(value));
1997     }
1998 
1999     /* if we are to stay read-only, do not allow permission change
2000      * to r/w */
2001     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2002         reopen_state->flags & BDRV_O_RDWR) {
2003         error_setg(errp, "Node '%s' is read only",
2004                    bdrv_get_device_or_node_name(reopen_state->bs));
2005         goto error;
2006     }
2007 
2008 
2009     ret = bdrv_flush(reopen_state->bs);
2010     if (ret) {
2011         error_setg_errno(errp, -ret, "Error flushing drive");
2012         goto error;
2013     }
2014 
2015     if (drv->bdrv_reopen_prepare) {
2016         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2017         if (ret) {
2018             if (local_err != NULL) {
2019                 error_propagate(errp, local_err);
2020             } else {
2021                 error_setg(errp, "failed while preparing to reopen image '%s'",
2022                            reopen_state->bs->filename);
2023             }
2024             goto error;
2025         }
2026     } else {
2027         /* It is currently mandatory to have a bdrv_reopen_prepare()
2028          * handler for each supported drv. */
2029         error_setg(errp, "Block format '%s' used by node '%s' "
2030                    "does not support reopening files", drv->format_name,
2031                    bdrv_get_device_or_node_name(reopen_state->bs));
2032         ret = -1;
2033         goto error;
2034     }
2035 
2036     /* Options that are not handled are only okay if they are unchanged
2037      * compared to the old state. It is expected that some options are only
2038      * used for the initial open, but not reopen (e.g. filename) */
2039     if (qdict_size(reopen_state->options)) {
2040         const QDictEntry *entry = qdict_first(reopen_state->options);
2041 
2042         do {
2043             QString *new_obj = qobject_to_qstring(entry->value);
2044             const char *new = qstring_get_str(new_obj);
2045             const char *old = qdict_get_try_str(reopen_state->bs->options,
2046                                                 entry->key);
2047 
2048             if (!old || strcmp(new, old)) {
2049                 error_setg(errp, "Cannot change the option '%s'", entry->key);
2050                 ret = -EINVAL;
2051                 goto error;
2052             }
2053         } while ((entry = qdict_next(reopen_state->options, entry)));
2054     }
2055 
2056     ret = 0;
2057 
2058 error:
2059     qemu_opts_del(opts);
2060     return ret;
2061 }
2062 
2063 /*
2064  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2065  * makes them final by swapping the staging BlockDriverState contents into
2066  * the active BlockDriverState contents.
2067  */
2068 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2069 {
2070     BlockDriver *drv;
2071 
2072     assert(reopen_state != NULL);
2073     drv = reopen_state->bs->drv;
2074     assert(drv != NULL);
2075 
2076     /* If there are any driver level actions to take */
2077     if (drv->bdrv_reopen_commit) {
2078         drv->bdrv_reopen_commit(reopen_state);
2079     }
2080 
2081     /* set BDS specific flags now */
2082     QDECREF(reopen_state->bs->explicit_options);
2083 
2084     reopen_state->bs->explicit_options   = reopen_state->explicit_options;
2085     reopen_state->bs->open_flags         = reopen_state->flags;
2086     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
2087 
2088     bdrv_refresh_limits(reopen_state->bs, NULL);
2089 }
2090 
2091 /*
2092  * Abort the reopen, and delete and free the staged changes in
2093  * reopen_state
2094  */
2095 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2096 {
2097     BlockDriver *drv;
2098 
2099     assert(reopen_state != NULL);
2100     drv = reopen_state->bs->drv;
2101     assert(drv != NULL);
2102 
2103     if (drv->bdrv_reopen_abort) {
2104         drv->bdrv_reopen_abort(reopen_state);
2105     }
2106 
2107     QDECREF(reopen_state->explicit_options);
2108 }
2109 
2110 
2111 static void bdrv_close(BlockDriverState *bs)
2112 {
2113     BdrvAioNotifier *ban, *ban_next;
2114 
2115     assert(!bs->job);
2116 
2117     bdrv_drained_begin(bs); /* complete I/O */
2118     bdrv_flush(bs);
2119     bdrv_drain(bs); /* in case flush left pending I/O */
2120 
2121     bdrv_release_named_dirty_bitmaps(bs);
2122     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2123 
2124     if (bs->blk) {
2125         blk_dev_change_media_cb(bs->blk, false);
2126     }
2127 
2128     if (bs->drv) {
2129         BdrvChild *child, *next;
2130 
2131         bs->drv->bdrv_close(bs);
2132         bs->drv = NULL;
2133 
2134         bdrv_set_backing_hd(bs, NULL);
2135 
2136         if (bs->file != NULL) {
2137             bdrv_unref_child(bs, bs->file);
2138             bs->file = NULL;
2139         }
2140 
2141         QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
2142             /* TODO Remove bdrv_unref() from drivers' close function and use
2143              * bdrv_unref_child() here */
2144             if (child->bs->inherits_from == bs) {
2145                 child->bs->inherits_from = NULL;
2146             }
2147             bdrv_detach_child(child);
2148         }
2149 
2150         g_free(bs->opaque);
2151         bs->opaque = NULL;
2152         bs->copy_on_read = 0;
2153         bs->backing_file[0] = '\0';
2154         bs->backing_format[0] = '\0';
2155         bs->total_sectors = 0;
2156         bs->encrypted = 0;
2157         bs->valid_key = 0;
2158         bs->sg = 0;
2159         bs->zero_beyond_eof = false;
2160         QDECREF(bs->options);
2161         QDECREF(bs->explicit_options);
2162         bs->options = NULL;
2163         QDECREF(bs->full_open_options);
2164         bs->full_open_options = NULL;
2165     }
2166 
2167     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2168         g_free(ban);
2169     }
2170     QLIST_INIT(&bs->aio_notifiers);
2171     bdrv_drained_end(bs);
2172 }
2173 
2174 void bdrv_close_all(void)
2175 {
2176     BlockDriverState *bs;
2177     AioContext *aio_context;
2178 
2179     /* Drop references from requests still in flight, such as canceled block
2180      * jobs whose AIO context has not been polled yet */
2181     bdrv_drain_all();
2182 
2183     blk_remove_all_bs();
2184     blockdev_close_all_bdrv_states();
2185 
2186     /* Cancel all block jobs */
2187     while (!QTAILQ_EMPTY(&all_bdrv_states)) {
2188         QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
2189             aio_context = bdrv_get_aio_context(bs);
2190 
2191             aio_context_acquire(aio_context);
2192             if (bs->job) {
2193                 block_job_cancel_sync(bs->job);
2194                 aio_context_release(aio_context);
2195                 break;
2196             }
2197             aio_context_release(aio_context);
2198         }
2199 
2200         /* All the remaining BlockDriverStates are referenced directly or
2201          * indirectly from block jobs, so there needs to be at least one BDS
2202          * directly used by a block job */
2203         assert(bs);
2204     }
2205 }
2206 
2207 static void change_parent_backing_link(BlockDriverState *from,
2208                                        BlockDriverState *to)
2209 {
2210     BdrvChild *c, *next;
2211 
2212     if (from->blk) {
2213         /* FIXME We bypass blk_set_bs(), so we need to make these updates
2214          * manually. The root problem is not in this change function, but the
2215          * existence of BlockDriverState.blk. */
2216         to->blk = from->blk;
2217         from->blk = NULL;
2218     }
2219 
2220     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2221         assert(c->role != &child_backing);
2222         c->bs = to;
2223         QLIST_REMOVE(c, next_parent);
2224         QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2225         bdrv_ref(to);
2226         bdrv_unref(from);
2227     }
2228 }
2229 
2230 /*
2231  * Add new bs contents at the top of an image chain while the chain is
2232  * live, while keeping required fields on the top layer.
2233  *
2234  * This will modify the BlockDriverState fields, and swap contents
2235  * between bs_new and bs_top. Both bs_new and bs_top are modified.
2236  *
2237  * bs_new must not be attached to a BlockBackend.
2238  *
2239  * This function does not create any image files.
2240  *
2241  * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2242  * that's what the callers commonly need. bs_new will be referenced by the old
2243  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2244  * reference of its own, it must call bdrv_ref().
2245  */
2246 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2247 {
2248     assert(!bdrv_requests_pending(bs_top));
2249     assert(!bdrv_requests_pending(bs_new));
2250 
2251     bdrv_ref(bs_top);
2252 
2253     change_parent_backing_link(bs_top, bs_new);
2254     bdrv_set_backing_hd(bs_new, bs_top);
2255     bdrv_unref(bs_top);
2256 
2257     /* bs_new is now referenced by its new parents, we don't need the
2258      * additional reference any more. */
2259     bdrv_unref(bs_new);
2260 }
2261 
2262 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2263 {
2264     assert(!bdrv_requests_pending(old));
2265     assert(!bdrv_requests_pending(new));
2266 
2267     bdrv_ref(old);
2268 
2269     change_parent_backing_link(old, new);
2270 
2271     /* Change backing files if a previously independent node is added to the
2272      * chain. For active commit, we replace top by its own (indirect) backing
2273      * file and don't do anything here so we don't build a loop. */
2274     if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2275         bdrv_set_backing_hd(new, backing_bs(old));
2276         bdrv_set_backing_hd(old, NULL);
2277     }
2278 
2279     bdrv_unref(old);
2280 }
2281 
2282 static void bdrv_delete(BlockDriverState *bs)
2283 {
2284     assert(!bs->job);
2285     assert(bdrv_op_blocker_is_empty(bs));
2286     assert(!bs->refcnt);
2287 
2288     bdrv_close(bs);
2289 
2290     /* remove from list, if necessary */
2291     if (bs->node_name[0] != '\0') {
2292         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2293     }
2294     QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
2295 
2296     g_free(bs);
2297 }
2298 
2299 /*
2300  * Run consistency checks on an image
2301  *
2302  * Returns 0 if the check could be completed (it doesn't mean that the image is
2303  * free of errors) or -errno when an internal error occurred. The results of the
2304  * check are stored in res.
2305  */
2306 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2307 {
2308     if (bs->drv == NULL) {
2309         return -ENOMEDIUM;
2310     }
2311     if (bs->drv->bdrv_check == NULL) {
2312         return -ENOTSUP;
2313     }
2314 
2315     memset(res, 0, sizeof(*res));
2316     return bs->drv->bdrv_check(bs, res, fix);
2317 }
2318 
2319 #define COMMIT_BUF_SECTORS 2048
2320 
2321 /* commit COW file into the raw image */
2322 int bdrv_commit(BlockDriverState *bs)
2323 {
2324     BlockDriver *drv = bs->drv;
2325     int64_t sector, total_sectors, length, backing_length;
2326     int n, ro, open_flags;
2327     int ret = 0;
2328     uint8_t *buf = NULL;
2329 
2330     if (!drv)
2331         return -ENOMEDIUM;
2332 
2333     if (!bs->backing) {
2334         return -ENOTSUP;
2335     }
2336 
2337     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2338         bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2339         return -EBUSY;
2340     }
2341 
2342     ro = bs->backing->bs->read_only;
2343     open_flags =  bs->backing->bs->open_flags;
2344 
2345     if (ro) {
2346         if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2347             return -EACCES;
2348         }
2349     }
2350 
2351     length = bdrv_getlength(bs);
2352     if (length < 0) {
2353         ret = length;
2354         goto ro_cleanup;
2355     }
2356 
2357     backing_length = bdrv_getlength(bs->backing->bs);
2358     if (backing_length < 0) {
2359         ret = backing_length;
2360         goto ro_cleanup;
2361     }
2362 
2363     /* If our top snapshot is larger than the backing file image,
2364      * grow the backing file image if possible.  If not possible,
2365      * we must return an error */
2366     if (length > backing_length) {
2367         ret = bdrv_truncate(bs->backing->bs, length);
2368         if (ret < 0) {
2369             goto ro_cleanup;
2370         }
2371     }
2372 
2373     total_sectors = length >> BDRV_SECTOR_BITS;
2374 
2375     /* qemu_try_blockalign() for bs will choose an alignment that works for
2376      * bs->backing->bs as well, so no need to compare the alignment manually. */
2377     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2378     if (buf == NULL) {
2379         ret = -ENOMEM;
2380         goto ro_cleanup;
2381     }
2382 
2383     for (sector = 0; sector < total_sectors; sector += n) {
2384         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2385         if (ret < 0) {
2386             goto ro_cleanup;
2387         }
2388         if (ret) {
2389             ret = bdrv_read(bs, sector, buf, n);
2390             if (ret < 0) {
2391                 goto ro_cleanup;
2392             }
2393 
2394             ret = bdrv_write(bs->backing->bs, sector, buf, n);
2395             if (ret < 0) {
2396                 goto ro_cleanup;
2397             }
2398         }
2399     }
2400 
2401     if (drv->bdrv_make_empty) {
2402         ret = drv->bdrv_make_empty(bs);
2403         if (ret < 0) {
2404             goto ro_cleanup;
2405         }
2406         bdrv_flush(bs);
2407     }
2408 
2409     /*
2410      * Make sure all data we wrote to the backing device is actually
2411      * stable on disk.
2412      */
2413     if (bs->backing) {
2414         bdrv_flush(bs->backing->bs);
2415     }
2416 
2417     ret = 0;
2418 ro_cleanup:
2419     qemu_vfree(buf);
2420 
2421     if (ro) {
2422         /* ignoring error return here */
2423         bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2424     }
2425 
2426     return ret;
2427 }
2428 
2429 /*
2430  * Return values:
2431  * 0        - success
2432  * -EINVAL  - backing format specified, but no file
2433  * -ENOSPC  - can't update the backing file because no space is left in the
2434  *            image file header
2435  * -ENOTSUP - format driver doesn't support changing the backing file
2436  */
2437 int bdrv_change_backing_file(BlockDriverState *bs,
2438     const char *backing_file, const char *backing_fmt)
2439 {
2440     BlockDriver *drv = bs->drv;
2441     int ret;
2442 
2443     /* Backing file format doesn't make sense without a backing file */
2444     if (backing_fmt && !backing_file) {
2445         return -EINVAL;
2446     }
2447 
2448     if (drv->bdrv_change_backing_file != NULL) {
2449         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2450     } else {
2451         ret = -ENOTSUP;
2452     }
2453 
2454     if (ret == 0) {
2455         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2456         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2457     }
2458     return ret;
2459 }
2460 
2461 /*
2462  * Finds the image layer in the chain that has 'bs' as its backing file.
2463  *
2464  * active is the current topmost image.
2465  *
2466  * Returns NULL if bs is not found in active's image chain,
2467  * or if active == bs.
2468  *
2469  * Returns the bottommost base image if bs == NULL.
2470  */
2471 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2472                                     BlockDriverState *bs)
2473 {
2474     while (active && bs != backing_bs(active)) {
2475         active = backing_bs(active);
2476     }
2477 
2478     return active;
2479 }
2480 
2481 /* Given a BDS, searches for the base layer. */
2482 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2483 {
2484     return bdrv_find_overlay(bs, NULL);
2485 }
2486 
2487 /*
2488  * Drops images above 'base' up to and including 'top', and sets the image
2489  * above 'top' to have base as its backing file.
2490  *
2491  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2492  * information in 'bs' can be properly updated.
2493  *
2494  * E.g., this will convert the following chain:
2495  * bottom <- base <- intermediate <- top <- active
2496  *
2497  * to
2498  *
2499  * bottom <- base <- active
2500  *
2501  * It is allowed for bottom==base, in which case it converts:
2502  *
2503  * base <- intermediate <- top <- active
2504  *
2505  * to
2506  *
2507  * base <- active
2508  *
2509  * If backing_file_str is non-NULL, it will be used when modifying top's
2510  * overlay image metadata.
2511  *
2512  * Error conditions:
2513  *  if active == top, that is considered an error
2514  *
2515  */
2516 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2517                            BlockDriverState *base, const char *backing_file_str)
2518 {
2519     BlockDriverState *new_top_bs = NULL;
2520     int ret = -EIO;
2521 
2522     if (!top->drv || !base->drv) {
2523         goto exit;
2524     }
2525 
2526     new_top_bs = bdrv_find_overlay(active, top);
2527 
2528     if (new_top_bs == NULL) {
2529         /* we could not find the image above 'top', this is an error */
2530         goto exit;
2531     }
2532 
2533     /* special case of new_top_bs->backing->bs already pointing to base - nothing
2534      * to do, no intermediate images */
2535     if (backing_bs(new_top_bs) == base) {
2536         ret = 0;
2537         goto exit;
2538     }
2539 
2540     /* Make sure that base is in the backing chain of top */
2541     if (!bdrv_chain_contains(top, base)) {
2542         goto exit;
2543     }
2544 
2545     /* success - we can delete the intermediate states, and link top->base */
2546     backing_file_str = backing_file_str ? backing_file_str : base->filename;
2547     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2548                                    base->drv ? base->drv->format_name : "");
2549     if (ret) {
2550         goto exit;
2551     }
2552     bdrv_set_backing_hd(new_top_bs, base);
2553 
2554     ret = 0;
2555 exit:
2556     return ret;
2557 }
2558 
2559 /**
2560  * Truncate file to 'offset' bytes (needed only for file protocols)
2561  */
2562 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2563 {
2564     BlockDriver *drv = bs->drv;
2565     int ret;
2566     if (!drv)
2567         return -ENOMEDIUM;
2568     if (!drv->bdrv_truncate)
2569         return -ENOTSUP;
2570     if (bs->read_only)
2571         return -EACCES;
2572 
2573     ret = drv->bdrv_truncate(bs, offset);
2574     if (ret == 0) {
2575         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2576         bdrv_dirty_bitmap_truncate(bs);
2577         if (bs->blk) {
2578             blk_dev_resize_cb(bs->blk);
2579         }
2580     }
2581     return ret;
2582 }
2583 
2584 /**
2585  * Length of a allocated file in bytes. Sparse files are counted by actual
2586  * allocated space. Return < 0 if error or unknown.
2587  */
2588 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2589 {
2590     BlockDriver *drv = bs->drv;
2591     if (!drv) {
2592         return -ENOMEDIUM;
2593     }
2594     if (drv->bdrv_get_allocated_file_size) {
2595         return drv->bdrv_get_allocated_file_size(bs);
2596     }
2597     if (bs->file) {
2598         return bdrv_get_allocated_file_size(bs->file->bs);
2599     }
2600     return -ENOTSUP;
2601 }
2602 
2603 /**
2604  * Return number of sectors on success, -errno on error.
2605  */
2606 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2607 {
2608     BlockDriver *drv = bs->drv;
2609 
2610     if (!drv)
2611         return -ENOMEDIUM;
2612 
2613     if (drv->has_variable_length) {
2614         int ret = refresh_total_sectors(bs, bs->total_sectors);
2615         if (ret < 0) {
2616             return ret;
2617         }
2618     }
2619     return bs->total_sectors;
2620 }
2621 
2622 /**
2623  * Return length in bytes on success, -errno on error.
2624  * The length is always a multiple of BDRV_SECTOR_SIZE.
2625  */
2626 int64_t bdrv_getlength(BlockDriverState *bs)
2627 {
2628     int64_t ret = bdrv_nb_sectors(bs);
2629 
2630     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2631     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2632 }
2633 
2634 /* return 0 as number of sectors if no device present or error */
2635 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2636 {
2637     int64_t nb_sectors = bdrv_nb_sectors(bs);
2638 
2639     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2640 }
2641 
2642 int bdrv_is_read_only(BlockDriverState *bs)
2643 {
2644     return bs->read_only;
2645 }
2646 
2647 int bdrv_is_sg(BlockDriverState *bs)
2648 {
2649     return bs->sg;
2650 }
2651 
2652 int bdrv_is_encrypted(BlockDriverState *bs)
2653 {
2654     if (bs->backing && bs->backing->bs->encrypted) {
2655         return 1;
2656     }
2657     return bs->encrypted;
2658 }
2659 
2660 int bdrv_key_required(BlockDriverState *bs)
2661 {
2662     BdrvChild *backing = bs->backing;
2663 
2664     if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2665         return 1;
2666     }
2667     return (bs->encrypted && !bs->valid_key);
2668 }
2669 
2670 int bdrv_set_key(BlockDriverState *bs, const char *key)
2671 {
2672     int ret;
2673     if (bs->backing && bs->backing->bs->encrypted) {
2674         ret = bdrv_set_key(bs->backing->bs, key);
2675         if (ret < 0)
2676             return ret;
2677         if (!bs->encrypted)
2678             return 0;
2679     }
2680     if (!bs->encrypted) {
2681         return -EINVAL;
2682     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2683         return -ENOMEDIUM;
2684     }
2685     ret = bs->drv->bdrv_set_key(bs, key);
2686     if (ret < 0) {
2687         bs->valid_key = 0;
2688     } else if (!bs->valid_key) {
2689         bs->valid_key = 1;
2690         if (bs->blk) {
2691             /* call the change callback now, we skipped it on open */
2692             blk_dev_change_media_cb(bs->blk, true);
2693         }
2694     }
2695     return ret;
2696 }
2697 
2698 /*
2699  * Provide an encryption key for @bs.
2700  * If @key is non-null:
2701  *     If @bs is not encrypted, fail.
2702  *     Else if the key is invalid, fail.
2703  *     Else set @bs's key to @key, replacing the existing key, if any.
2704  * If @key is null:
2705  *     If @bs is encrypted and still lacks a key, fail.
2706  *     Else do nothing.
2707  * On failure, store an error object through @errp if non-null.
2708  */
2709 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2710 {
2711     if (key) {
2712         if (!bdrv_is_encrypted(bs)) {
2713             error_setg(errp, "Node '%s' is not encrypted",
2714                       bdrv_get_device_or_node_name(bs));
2715         } else if (bdrv_set_key(bs, key) < 0) {
2716             error_setg(errp, QERR_INVALID_PASSWORD);
2717         }
2718     } else {
2719         if (bdrv_key_required(bs)) {
2720             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2721                       "'%s' (%s) is encrypted",
2722                       bdrv_get_device_or_node_name(bs),
2723                       bdrv_get_encrypted_filename(bs));
2724         }
2725     }
2726 }
2727 
2728 const char *bdrv_get_format_name(BlockDriverState *bs)
2729 {
2730     return bs->drv ? bs->drv->format_name : NULL;
2731 }
2732 
2733 static int qsort_strcmp(const void *a, const void *b)
2734 {
2735     return strcmp(a, b);
2736 }
2737 
2738 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2739                          void *opaque)
2740 {
2741     BlockDriver *drv;
2742     int count = 0;
2743     int i;
2744     const char **formats = NULL;
2745 
2746     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2747         if (drv->format_name) {
2748             bool found = false;
2749             int i = count;
2750             while (formats && i && !found) {
2751                 found = !strcmp(formats[--i], drv->format_name);
2752             }
2753 
2754             if (!found) {
2755                 formats = g_renew(const char *, formats, count + 1);
2756                 formats[count++] = drv->format_name;
2757             }
2758         }
2759     }
2760 
2761     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2762 
2763     for (i = 0; i < count; i++) {
2764         it(opaque, formats[i]);
2765     }
2766 
2767     g_free(formats);
2768 }
2769 
2770 /* This function is to find a node in the bs graph */
2771 BlockDriverState *bdrv_find_node(const char *node_name)
2772 {
2773     BlockDriverState *bs;
2774 
2775     assert(node_name);
2776 
2777     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2778         if (!strcmp(node_name, bs->node_name)) {
2779             return bs;
2780         }
2781     }
2782     return NULL;
2783 }
2784 
2785 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2786 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2787 {
2788     BlockDeviceInfoList *list, *entry;
2789     BlockDriverState *bs;
2790 
2791     list = NULL;
2792     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2793         BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, errp);
2794         if (!info) {
2795             qapi_free_BlockDeviceInfoList(list);
2796             return NULL;
2797         }
2798         entry = g_malloc0(sizeof(*entry));
2799         entry->value = info;
2800         entry->next = list;
2801         list = entry;
2802     }
2803 
2804     return list;
2805 }
2806 
2807 BlockDriverState *bdrv_lookup_bs(const char *device,
2808                                  const char *node_name,
2809                                  Error **errp)
2810 {
2811     BlockBackend *blk;
2812     BlockDriverState *bs;
2813 
2814     if (device) {
2815         blk = blk_by_name(device);
2816 
2817         if (blk) {
2818             bs = blk_bs(blk);
2819             if (!bs) {
2820                 error_setg(errp, "Device '%s' has no medium", device);
2821             }
2822 
2823             return bs;
2824         }
2825     }
2826 
2827     if (node_name) {
2828         bs = bdrv_find_node(node_name);
2829 
2830         if (bs) {
2831             return bs;
2832         }
2833     }
2834 
2835     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2836                      device ? device : "",
2837                      node_name ? node_name : "");
2838     return NULL;
2839 }
2840 
2841 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2842  * return false.  If either argument is NULL, return false. */
2843 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2844 {
2845     while (top && top != base) {
2846         top = backing_bs(top);
2847     }
2848 
2849     return top != NULL;
2850 }
2851 
2852 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2853 {
2854     if (!bs) {
2855         return QTAILQ_FIRST(&graph_bdrv_states);
2856     }
2857     return QTAILQ_NEXT(bs, node_list);
2858 }
2859 
2860 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
2861  * the monitor or attached to a BlockBackend */
2862 BlockDriverState *bdrv_next(BlockDriverState *bs)
2863 {
2864     if (!bs || bs->blk) {
2865         bs = blk_next_root_bs(bs);
2866         if (bs) {
2867             return bs;
2868         }
2869     }
2870 
2871     /* Ignore all BDSs that are attached to a BlockBackend here; they have been
2872      * handled by the above block already */
2873     do {
2874         bs = bdrv_next_monitor_owned(bs);
2875     } while (bs && bs->blk);
2876     return bs;
2877 }
2878 
2879 const char *bdrv_get_node_name(const BlockDriverState *bs)
2880 {
2881     return bs->node_name;
2882 }
2883 
2884 /* TODO check what callers really want: bs->node_name or blk_name() */
2885 const char *bdrv_get_device_name(const BlockDriverState *bs)
2886 {
2887     return bs->blk ? blk_name(bs->blk) : "";
2888 }
2889 
2890 /* This can be used to identify nodes that might not have a device
2891  * name associated. Since node and device names live in the same
2892  * namespace, the result is unambiguous. The exception is if both are
2893  * absent, then this returns an empty (non-null) string. */
2894 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2895 {
2896     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2897 }
2898 
2899 int bdrv_get_flags(BlockDriverState *bs)
2900 {
2901     return bs->open_flags;
2902 }
2903 
2904 int bdrv_has_zero_init_1(BlockDriverState *bs)
2905 {
2906     return 1;
2907 }
2908 
2909 int bdrv_has_zero_init(BlockDriverState *bs)
2910 {
2911     assert(bs->drv);
2912 
2913     /* If BS is a copy on write image, it is initialized to
2914        the contents of the base image, which may not be zeroes.  */
2915     if (bs->backing) {
2916         return 0;
2917     }
2918     if (bs->drv->bdrv_has_zero_init) {
2919         return bs->drv->bdrv_has_zero_init(bs);
2920     }
2921 
2922     /* safe default */
2923     return 0;
2924 }
2925 
2926 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2927 {
2928     BlockDriverInfo bdi;
2929 
2930     if (bs->backing) {
2931         return false;
2932     }
2933 
2934     if (bdrv_get_info(bs, &bdi) == 0) {
2935         return bdi.unallocated_blocks_are_zero;
2936     }
2937 
2938     return false;
2939 }
2940 
2941 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2942 {
2943     BlockDriverInfo bdi;
2944 
2945     if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
2946         return false;
2947     }
2948 
2949     if (bdrv_get_info(bs, &bdi) == 0) {
2950         return bdi.can_write_zeroes_with_unmap;
2951     }
2952 
2953     return false;
2954 }
2955 
2956 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2957 {
2958     if (bs->backing && bs->backing->bs->encrypted)
2959         return bs->backing_file;
2960     else if (bs->encrypted)
2961         return bs->filename;
2962     else
2963         return NULL;
2964 }
2965 
2966 void bdrv_get_backing_filename(BlockDriverState *bs,
2967                                char *filename, int filename_size)
2968 {
2969     pstrcpy(filename, filename_size, bs->backing_file);
2970 }
2971 
2972 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2973 {
2974     BlockDriver *drv = bs->drv;
2975     if (!drv)
2976         return -ENOMEDIUM;
2977     if (!drv->bdrv_get_info)
2978         return -ENOTSUP;
2979     memset(bdi, 0, sizeof(*bdi));
2980     return drv->bdrv_get_info(bs, bdi);
2981 }
2982 
2983 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2984 {
2985     BlockDriver *drv = bs->drv;
2986     if (drv && drv->bdrv_get_specific_info) {
2987         return drv->bdrv_get_specific_info(bs);
2988     }
2989     return NULL;
2990 }
2991 
2992 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
2993 {
2994     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2995         return;
2996     }
2997 
2998     bs->drv->bdrv_debug_event(bs, event);
2999 }
3000 
3001 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3002                           const char *tag)
3003 {
3004     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3005         bs = bs->file ? bs->file->bs : NULL;
3006     }
3007 
3008     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3009         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3010     }
3011 
3012     return -ENOTSUP;
3013 }
3014 
3015 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3016 {
3017     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3018         bs = bs->file ? bs->file->bs : NULL;
3019     }
3020 
3021     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3022         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3023     }
3024 
3025     return -ENOTSUP;
3026 }
3027 
3028 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3029 {
3030     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3031         bs = bs->file ? bs->file->bs : NULL;
3032     }
3033 
3034     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3035         return bs->drv->bdrv_debug_resume(bs, tag);
3036     }
3037 
3038     return -ENOTSUP;
3039 }
3040 
3041 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3042 {
3043     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3044         bs = bs->file ? bs->file->bs : NULL;
3045     }
3046 
3047     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3048         return bs->drv->bdrv_debug_is_suspended(bs, tag);
3049     }
3050 
3051     return false;
3052 }
3053 
3054 int bdrv_is_snapshot(BlockDriverState *bs)
3055 {
3056     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3057 }
3058 
3059 /* backing_file can either be relative, or absolute, or a protocol.  If it is
3060  * relative, it must be relative to the chain.  So, passing in bs->filename
3061  * from a BDS as backing_file should not be done, as that may be relative to
3062  * the CWD rather than the chain. */
3063 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3064         const char *backing_file)
3065 {
3066     char *filename_full = NULL;
3067     char *backing_file_full = NULL;
3068     char *filename_tmp = NULL;
3069     int is_protocol = 0;
3070     BlockDriverState *curr_bs = NULL;
3071     BlockDriverState *retval = NULL;
3072 
3073     if (!bs || !bs->drv || !backing_file) {
3074         return NULL;
3075     }
3076 
3077     filename_full     = g_malloc(PATH_MAX);
3078     backing_file_full = g_malloc(PATH_MAX);
3079     filename_tmp      = g_malloc(PATH_MAX);
3080 
3081     is_protocol = path_has_protocol(backing_file);
3082 
3083     for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3084 
3085         /* If either of the filename paths is actually a protocol, then
3086          * compare unmodified paths; otherwise make paths relative */
3087         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3088             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3089                 retval = curr_bs->backing->bs;
3090                 break;
3091             }
3092         } else {
3093             /* If not an absolute filename path, make it relative to the current
3094              * image's filename path */
3095             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3096                          backing_file);
3097 
3098             /* We are going to compare absolute pathnames */
3099             if (!realpath(filename_tmp, filename_full)) {
3100                 continue;
3101             }
3102 
3103             /* We need to make sure the backing filename we are comparing against
3104              * is relative to the current image filename (or absolute) */
3105             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3106                          curr_bs->backing_file);
3107 
3108             if (!realpath(filename_tmp, backing_file_full)) {
3109                 continue;
3110             }
3111 
3112             if (strcmp(backing_file_full, filename_full) == 0) {
3113                 retval = curr_bs->backing->bs;
3114                 break;
3115             }
3116         }
3117     }
3118 
3119     g_free(filename_full);
3120     g_free(backing_file_full);
3121     g_free(filename_tmp);
3122     return retval;
3123 }
3124 
3125 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3126 {
3127     if (!bs->drv) {
3128         return 0;
3129     }
3130 
3131     if (!bs->backing) {
3132         return 0;
3133     }
3134 
3135     return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3136 }
3137 
3138 void bdrv_init(void)
3139 {
3140     module_call_init(MODULE_INIT_BLOCK);
3141 }
3142 
3143 void bdrv_init_with_whitelist(void)
3144 {
3145     use_bdrv_whitelist = 1;
3146     bdrv_init();
3147 }
3148 
3149 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3150 {
3151     BdrvChild *child;
3152     Error *local_err = NULL;
3153     int ret;
3154 
3155     if (!bs->drv)  {
3156         return;
3157     }
3158 
3159     if (!(bs->open_flags & BDRV_O_INACTIVE)) {
3160         return;
3161     }
3162     bs->open_flags &= ~BDRV_O_INACTIVE;
3163 
3164     if (bs->drv->bdrv_invalidate_cache) {
3165         bs->drv->bdrv_invalidate_cache(bs, &local_err);
3166         if (local_err) {
3167             bs->open_flags |= BDRV_O_INACTIVE;
3168             error_propagate(errp, local_err);
3169             return;
3170         }
3171     }
3172 
3173     QLIST_FOREACH(child, &bs->children, next) {
3174         bdrv_invalidate_cache(child->bs, &local_err);
3175         if (local_err) {
3176             bs->open_flags |= BDRV_O_INACTIVE;
3177             error_propagate(errp, local_err);
3178             return;
3179         }
3180     }
3181 
3182     ret = refresh_total_sectors(bs, bs->total_sectors);
3183     if (ret < 0) {
3184         bs->open_flags |= BDRV_O_INACTIVE;
3185         error_setg_errno(errp, -ret, "Could not refresh total sector count");
3186         return;
3187     }
3188 }
3189 
3190 void bdrv_invalidate_cache_all(Error **errp)
3191 {
3192     BlockDriverState *bs = NULL;
3193     Error *local_err = NULL;
3194 
3195     while ((bs = bdrv_next(bs)) != NULL) {
3196         AioContext *aio_context = bdrv_get_aio_context(bs);
3197 
3198         aio_context_acquire(aio_context);
3199         bdrv_invalidate_cache(bs, &local_err);
3200         aio_context_release(aio_context);
3201         if (local_err) {
3202             error_propagate(errp, local_err);
3203             return;
3204         }
3205     }
3206 }
3207 
3208 static int bdrv_inactivate_recurse(BlockDriverState *bs,
3209                                    bool setting_flag)
3210 {
3211     BdrvChild *child;
3212     int ret;
3213 
3214     if (!setting_flag && bs->drv->bdrv_inactivate) {
3215         ret = bs->drv->bdrv_inactivate(bs);
3216         if (ret < 0) {
3217             return ret;
3218         }
3219     }
3220 
3221     QLIST_FOREACH(child, &bs->children, next) {
3222         ret = bdrv_inactivate_recurse(child->bs, setting_flag);
3223         if (ret < 0) {
3224             return ret;
3225         }
3226     }
3227 
3228     if (setting_flag) {
3229         bs->open_flags |= BDRV_O_INACTIVE;
3230     }
3231     return 0;
3232 }
3233 
3234 int bdrv_inactivate_all(void)
3235 {
3236     BlockDriverState *bs = NULL;
3237     int ret = 0;
3238     int pass;
3239 
3240     while ((bs = bdrv_next(bs)) != NULL) {
3241         aio_context_acquire(bdrv_get_aio_context(bs));
3242     }
3243 
3244     /* We do two passes of inactivation. The first pass calls to drivers'
3245      * .bdrv_inactivate callbacks recursively so all cache is flushed to disk;
3246      * the second pass sets the BDRV_O_INACTIVE flag so that no further write
3247      * is allowed. */
3248     for (pass = 0; pass < 2; pass++) {
3249         bs = NULL;
3250         while ((bs = bdrv_next(bs)) != NULL) {
3251             ret = bdrv_inactivate_recurse(bs, pass);
3252             if (ret < 0) {
3253                 goto out;
3254             }
3255         }
3256     }
3257 
3258 out:
3259     bs = NULL;
3260     while ((bs = bdrv_next(bs)) != NULL) {
3261         aio_context_release(bdrv_get_aio_context(bs));
3262     }
3263 
3264     return ret;
3265 }
3266 
3267 /**************************************************************/
3268 /* removable device support */
3269 
3270 /**
3271  * Return TRUE if the media is present
3272  */
3273 bool bdrv_is_inserted(BlockDriverState *bs)
3274 {
3275     BlockDriver *drv = bs->drv;
3276     BdrvChild *child;
3277 
3278     if (!drv) {
3279         return false;
3280     }
3281     if (drv->bdrv_is_inserted) {
3282         return drv->bdrv_is_inserted(bs);
3283     }
3284     QLIST_FOREACH(child, &bs->children, next) {
3285         if (!bdrv_is_inserted(child->bs)) {
3286             return false;
3287         }
3288     }
3289     return true;
3290 }
3291 
3292 /**
3293  * Return whether the media changed since the last call to this
3294  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3295  */
3296 int bdrv_media_changed(BlockDriverState *bs)
3297 {
3298     BlockDriver *drv = bs->drv;
3299 
3300     if (drv && drv->bdrv_media_changed) {
3301         return drv->bdrv_media_changed(bs);
3302     }
3303     return -ENOTSUP;
3304 }
3305 
3306 /**
3307  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3308  */
3309 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3310 {
3311     BlockDriver *drv = bs->drv;
3312     const char *device_name;
3313 
3314     if (drv && drv->bdrv_eject) {
3315         drv->bdrv_eject(bs, eject_flag);
3316     }
3317 
3318     device_name = bdrv_get_device_name(bs);
3319     if (device_name[0] != '\0') {
3320         qapi_event_send_device_tray_moved(device_name,
3321                                           eject_flag, &error_abort);
3322     }
3323 }
3324 
3325 /**
3326  * Lock or unlock the media (if it is locked, the user won't be able
3327  * to eject it manually).
3328  */
3329 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3330 {
3331     BlockDriver *drv = bs->drv;
3332 
3333     trace_bdrv_lock_medium(bs, locked);
3334 
3335     if (drv && drv->bdrv_lock_medium) {
3336         drv->bdrv_lock_medium(bs, locked);
3337     }
3338 }
3339 
3340 /* Get a reference to bs */
3341 void bdrv_ref(BlockDriverState *bs)
3342 {
3343     bs->refcnt++;
3344 }
3345 
3346 /* Release a previously grabbed reference to bs.
3347  * If after releasing, reference count is zero, the BlockDriverState is
3348  * deleted. */
3349 void bdrv_unref(BlockDriverState *bs)
3350 {
3351     if (!bs) {
3352         return;
3353     }
3354     assert(bs->refcnt > 0);
3355     if (--bs->refcnt == 0) {
3356         bdrv_delete(bs);
3357     }
3358 }
3359 
3360 struct BdrvOpBlocker {
3361     Error *reason;
3362     QLIST_ENTRY(BdrvOpBlocker) list;
3363 };
3364 
3365 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3366 {
3367     BdrvOpBlocker *blocker;
3368     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3369     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3370         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3371         if (errp) {
3372             *errp = error_copy(blocker->reason);
3373             error_prepend(errp, "Node '%s' is busy: ",
3374                           bdrv_get_device_or_node_name(bs));
3375         }
3376         return true;
3377     }
3378     return false;
3379 }
3380 
3381 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3382 {
3383     BdrvOpBlocker *blocker;
3384     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3385 
3386     blocker = g_new0(BdrvOpBlocker, 1);
3387     blocker->reason = reason;
3388     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3389 }
3390 
3391 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3392 {
3393     BdrvOpBlocker *blocker, *next;
3394     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3395     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3396         if (blocker->reason == reason) {
3397             QLIST_REMOVE(blocker, list);
3398             g_free(blocker);
3399         }
3400     }
3401 }
3402 
3403 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3404 {
3405     int i;
3406     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3407         bdrv_op_block(bs, i, reason);
3408     }
3409 }
3410 
3411 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3412 {
3413     int i;
3414     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3415         bdrv_op_unblock(bs, i, reason);
3416     }
3417 }
3418 
3419 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3420 {
3421     int i;
3422 
3423     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3424         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3425             return false;
3426         }
3427     }
3428     return true;
3429 }
3430 
3431 void bdrv_img_create(const char *filename, const char *fmt,
3432                      const char *base_filename, const char *base_fmt,
3433                      char *options, uint64_t img_size, int flags,
3434                      Error **errp, bool quiet)
3435 {
3436     QemuOptsList *create_opts = NULL;
3437     QemuOpts *opts = NULL;
3438     const char *backing_fmt, *backing_file;
3439     int64_t size;
3440     BlockDriver *drv, *proto_drv;
3441     Error *local_err = NULL;
3442     int ret = 0;
3443 
3444     /* Find driver and parse its options */
3445     drv = bdrv_find_format(fmt);
3446     if (!drv) {
3447         error_setg(errp, "Unknown file format '%s'", fmt);
3448         return;
3449     }
3450 
3451     proto_drv = bdrv_find_protocol(filename, true, errp);
3452     if (!proto_drv) {
3453         return;
3454     }
3455 
3456     if (!drv->create_opts) {
3457         error_setg(errp, "Format driver '%s' does not support image creation",
3458                    drv->format_name);
3459         return;
3460     }
3461 
3462     if (!proto_drv->create_opts) {
3463         error_setg(errp, "Protocol driver '%s' does not support image creation",
3464                    proto_drv->format_name);
3465         return;
3466     }
3467 
3468     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3469     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3470 
3471     /* Create parameter list with default values */
3472     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3473     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3474 
3475     /* Parse -o options */
3476     if (options) {
3477         qemu_opts_do_parse(opts, options, NULL, &local_err);
3478         if (local_err) {
3479             error_report_err(local_err);
3480             local_err = NULL;
3481             error_setg(errp, "Invalid options for file format '%s'", fmt);
3482             goto out;
3483         }
3484     }
3485 
3486     if (base_filename) {
3487         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3488         if (local_err) {
3489             error_setg(errp, "Backing file not supported for file format '%s'",
3490                        fmt);
3491             goto out;
3492         }
3493     }
3494 
3495     if (base_fmt) {
3496         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3497         if (local_err) {
3498             error_setg(errp, "Backing file format not supported for file "
3499                              "format '%s'", fmt);
3500             goto out;
3501         }
3502     }
3503 
3504     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3505     if (backing_file) {
3506         if (!strcmp(filename, backing_file)) {
3507             error_setg(errp, "Error: Trying to create an image with the "
3508                              "same filename as the backing file");
3509             goto out;
3510         }
3511     }
3512 
3513     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3514 
3515     // The size for the image must always be specified, with one exception:
3516     // If we are using a backing file, we can obtain the size from there
3517     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3518     if (size == -1) {
3519         if (backing_file) {
3520             BlockDriverState *bs;
3521             char *full_backing = g_new0(char, PATH_MAX);
3522             int64_t size;
3523             int back_flags;
3524             QDict *backing_options = NULL;
3525 
3526             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3527                                                          full_backing, PATH_MAX,
3528                                                          &local_err);
3529             if (local_err) {
3530                 g_free(full_backing);
3531                 goto out;
3532             }
3533 
3534             /* backing files always opened read-only */
3535             back_flags = flags;
3536             back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3537 
3538             if (backing_fmt) {
3539                 backing_options = qdict_new();
3540                 qdict_put(backing_options, "driver",
3541                           qstring_from_str(backing_fmt));
3542             }
3543 
3544             bs = NULL;
3545             ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3546                             back_flags, &local_err);
3547             g_free(full_backing);
3548             if (ret < 0) {
3549                 goto out;
3550             }
3551             size = bdrv_getlength(bs);
3552             if (size < 0) {
3553                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3554                                  backing_file);
3555                 bdrv_unref(bs);
3556                 goto out;
3557             }
3558 
3559             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3560 
3561             bdrv_unref(bs);
3562         } else {
3563             error_setg(errp, "Image creation needs a size parameter");
3564             goto out;
3565         }
3566     }
3567 
3568     if (!quiet) {
3569         printf("Formatting '%s', fmt=%s ", filename, fmt);
3570         qemu_opts_print(opts, " ");
3571         puts("");
3572     }
3573 
3574     ret = bdrv_create(drv, filename, opts, &local_err);
3575 
3576     if (ret == -EFBIG) {
3577         /* This is generally a better message than whatever the driver would
3578          * deliver (especially because of the cluster_size_hint), since that
3579          * is most probably not much different from "image too large". */
3580         const char *cluster_size_hint = "";
3581         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3582             cluster_size_hint = " (try using a larger cluster size)";
3583         }
3584         error_setg(errp, "The image size is too large for file format '%s'"
3585                    "%s", fmt, cluster_size_hint);
3586         error_free(local_err);
3587         local_err = NULL;
3588     }
3589 
3590 out:
3591     qemu_opts_del(opts);
3592     qemu_opts_free(create_opts);
3593     if (local_err) {
3594         error_propagate(errp, local_err);
3595     }
3596 }
3597 
3598 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3599 {
3600     return bs->aio_context;
3601 }
3602 
3603 void bdrv_detach_aio_context(BlockDriverState *bs)
3604 {
3605     BdrvAioNotifier *baf;
3606 
3607     if (!bs->drv) {
3608         return;
3609     }
3610 
3611     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3612         baf->detach_aio_context(baf->opaque);
3613     }
3614 
3615     if (bs->drv->bdrv_detach_aio_context) {
3616         bs->drv->bdrv_detach_aio_context(bs);
3617     }
3618     if (bs->file) {
3619         bdrv_detach_aio_context(bs->file->bs);
3620     }
3621     if (bs->backing) {
3622         bdrv_detach_aio_context(bs->backing->bs);
3623     }
3624 
3625     bs->aio_context = NULL;
3626 }
3627 
3628 void bdrv_attach_aio_context(BlockDriverState *bs,
3629                              AioContext *new_context)
3630 {
3631     BdrvAioNotifier *ban;
3632 
3633     if (!bs->drv) {
3634         return;
3635     }
3636 
3637     bs->aio_context = new_context;
3638 
3639     if (bs->backing) {
3640         bdrv_attach_aio_context(bs->backing->bs, new_context);
3641     }
3642     if (bs->file) {
3643         bdrv_attach_aio_context(bs->file->bs, new_context);
3644     }
3645     if (bs->drv->bdrv_attach_aio_context) {
3646         bs->drv->bdrv_attach_aio_context(bs, new_context);
3647     }
3648 
3649     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3650         ban->attached_aio_context(new_context, ban->opaque);
3651     }
3652 }
3653 
3654 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3655 {
3656     bdrv_drain(bs); /* ensure there are no in-flight requests */
3657 
3658     bdrv_detach_aio_context(bs);
3659 
3660     /* This function executes in the old AioContext so acquire the new one in
3661      * case it runs in a different thread.
3662      */
3663     aio_context_acquire(new_context);
3664     bdrv_attach_aio_context(bs, new_context);
3665     aio_context_release(new_context);
3666 }
3667 
3668 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3669         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3670         void (*detach_aio_context)(void *opaque), void *opaque)
3671 {
3672     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3673     *ban = (BdrvAioNotifier){
3674         .attached_aio_context = attached_aio_context,
3675         .detach_aio_context   = detach_aio_context,
3676         .opaque               = opaque
3677     };
3678 
3679     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3680 }
3681 
3682 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3683                                       void (*attached_aio_context)(AioContext *,
3684                                                                    void *),
3685                                       void (*detach_aio_context)(void *),
3686                                       void *opaque)
3687 {
3688     BdrvAioNotifier *ban, *ban_next;
3689 
3690     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3691         if (ban->attached_aio_context == attached_aio_context &&
3692             ban->detach_aio_context   == detach_aio_context   &&
3693             ban->opaque               == opaque)
3694         {
3695             QLIST_REMOVE(ban, list);
3696             g_free(ban);
3697 
3698             return;
3699         }
3700     }
3701 
3702     abort();
3703 }
3704 
3705 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3706                        BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
3707 {
3708     if (!bs->drv->bdrv_amend_options) {
3709         return -ENOTSUP;
3710     }
3711     return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
3712 }
3713 
3714 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3715  * of block filter and by bdrv_is_first_non_filter.
3716  * It is used to test if the given bs is the candidate or recurse more in the
3717  * node graph.
3718  */
3719 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3720                                       BlockDriverState *candidate)
3721 {
3722     /* return false if basic checks fails */
3723     if (!bs || !bs->drv) {
3724         return false;
3725     }
3726 
3727     /* the code reached a non block filter driver -> check if the bs is
3728      * the same as the candidate. It's the recursion termination condition.
3729      */
3730     if (!bs->drv->is_filter) {
3731         return bs == candidate;
3732     }
3733     /* Down this path the driver is a block filter driver */
3734 
3735     /* If the block filter recursion method is defined use it to recurse down
3736      * the node graph.
3737      */
3738     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3739         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3740     }
3741 
3742     /* the driver is a block filter but don't allow to recurse -> return false
3743      */
3744     return false;
3745 }
3746 
3747 /* This function checks if the candidate is the first non filter bs down it's
3748  * bs chain. Since we don't have pointers to parents it explore all bs chains
3749  * from the top. Some filters can choose not to pass down the recursion.
3750  */
3751 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3752 {
3753     BlockDriverState *bs = NULL;
3754 
3755     /* walk down the bs forest recursively */
3756     while ((bs = bdrv_next(bs)) != NULL) {
3757         bool perm;
3758 
3759         /* try to recurse in this top level bs */
3760         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3761 
3762         /* candidate is the first non filter */
3763         if (perm) {
3764             return true;
3765         }
3766     }
3767 
3768     return false;
3769 }
3770 
3771 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3772                                         const char *node_name, Error **errp)
3773 {
3774     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3775     AioContext *aio_context;
3776 
3777     if (!to_replace_bs) {
3778         error_setg(errp, "Node name '%s' not found", node_name);
3779         return NULL;
3780     }
3781 
3782     aio_context = bdrv_get_aio_context(to_replace_bs);
3783     aio_context_acquire(aio_context);
3784 
3785     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3786         to_replace_bs = NULL;
3787         goto out;
3788     }
3789 
3790     /* We don't want arbitrary node of the BDS chain to be replaced only the top
3791      * most non filter in order to prevent data corruption.
3792      * Another benefit is that this tests exclude backing files which are
3793      * blocked by the backing blockers.
3794      */
3795     if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
3796         error_setg(errp, "Only top most non filter can be replaced");
3797         to_replace_bs = NULL;
3798         goto out;
3799     }
3800 
3801 out:
3802     aio_context_release(aio_context);
3803     return to_replace_bs;
3804 }
3805 
3806 static bool append_open_options(QDict *d, BlockDriverState *bs)
3807 {
3808     const QDictEntry *entry;
3809     QemuOptDesc *desc;
3810     BdrvChild *child;
3811     bool found_any = false;
3812     const char *p;
3813 
3814     for (entry = qdict_first(bs->options); entry;
3815          entry = qdict_next(bs->options, entry))
3816     {
3817         /* Exclude options for children */
3818         QLIST_FOREACH(child, &bs->children, next) {
3819             if (strstart(qdict_entry_key(entry), child->name, &p)
3820                 && (!*p || *p == '.'))
3821             {
3822                 break;
3823             }
3824         }
3825         if (child) {
3826             continue;
3827         }
3828 
3829         /* And exclude all non-driver-specific options */
3830         for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
3831             if (!strcmp(qdict_entry_key(entry), desc->name)) {
3832                 break;
3833             }
3834         }
3835         if (desc->name) {
3836             continue;
3837         }
3838 
3839         qobject_incref(qdict_entry_value(entry));
3840         qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3841         found_any = true;
3842     }
3843 
3844     return found_any;
3845 }
3846 
3847 /* Updates the following BDS fields:
3848  *  - exact_filename: A filename which may be used for opening a block device
3849  *                    which (mostly) equals the given BDS (even without any
3850  *                    other options; so reading and writing must return the same
3851  *                    results, but caching etc. may be different)
3852  *  - full_open_options: Options which, when given when opening a block device
3853  *                       (without a filename), result in a BDS (mostly)
3854  *                       equalling the given one
3855  *  - filename: If exact_filename is set, it is copied here. Otherwise,
3856  *              full_open_options is converted to a JSON object, prefixed with
3857  *              "json:" (for use through the JSON pseudo protocol) and put here.
3858  */
3859 void bdrv_refresh_filename(BlockDriverState *bs)
3860 {
3861     BlockDriver *drv = bs->drv;
3862     QDict *opts;
3863 
3864     if (!drv) {
3865         return;
3866     }
3867 
3868     /* This BDS's file name will most probably depend on its file's name, so
3869      * refresh that first */
3870     if (bs->file) {
3871         bdrv_refresh_filename(bs->file->bs);
3872     }
3873 
3874     if (drv->bdrv_refresh_filename) {
3875         /* Obsolete information is of no use here, so drop the old file name
3876          * information before refreshing it */
3877         bs->exact_filename[0] = '\0';
3878         if (bs->full_open_options) {
3879             QDECREF(bs->full_open_options);
3880             bs->full_open_options = NULL;
3881         }
3882 
3883         opts = qdict_new();
3884         append_open_options(opts, bs);
3885         drv->bdrv_refresh_filename(bs, opts);
3886         QDECREF(opts);
3887     } else if (bs->file) {
3888         /* Try to reconstruct valid information from the underlying file */
3889         bool has_open_options;
3890 
3891         bs->exact_filename[0] = '\0';
3892         if (bs->full_open_options) {
3893             QDECREF(bs->full_open_options);
3894             bs->full_open_options = NULL;
3895         }
3896 
3897         opts = qdict_new();
3898         has_open_options = append_open_options(opts, bs);
3899 
3900         /* If no specific options have been given for this BDS, the filename of
3901          * the underlying file should suffice for this one as well */
3902         if (bs->file->bs->exact_filename[0] && !has_open_options) {
3903             strcpy(bs->exact_filename, bs->file->bs->exact_filename);
3904         }
3905         /* Reconstructing the full options QDict is simple for most format block
3906          * drivers, as long as the full options are known for the underlying
3907          * file BDS. The full options QDict of that file BDS should somehow
3908          * contain a representation of the filename, therefore the following
3909          * suffices without querying the (exact_)filename of this BDS. */
3910         if (bs->file->bs->full_open_options) {
3911             qdict_put_obj(opts, "driver",
3912                           QOBJECT(qstring_from_str(drv->format_name)));
3913             QINCREF(bs->file->bs->full_open_options);
3914             qdict_put_obj(opts, "file",
3915                           QOBJECT(bs->file->bs->full_open_options));
3916 
3917             bs->full_open_options = opts;
3918         } else {
3919             QDECREF(opts);
3920         }
3921     } else if (!bs->full_open_options && qdict_size(bs->options)) {
3922         /* There is no underlying file BDS (at least referenced by BDS.file),
3923          * so the full options QDict should be equal to the options given
3924          * specifically for this block device when it was opened (plus the
3925          * driver specification).
3926          * Because those options don't change, there is no need to update
3927          * full_open_options when it's already set. */
3928 
3929         opts = qdict_new();
3930         append_open_options(opts, bs);
3931         qdict_put_obj(opts, "driver",
3932                       QOBJECT(qstring_from_str(drv->format_name)));
3933 
3934         if (bs->exact_filename[0]) {
3935             /* This may not work for all block protocol drivers (some may
3936              * require this filename to be parsed), but we have to find some
3937              * default solution here, so just include it. If some block driver
3938              * does not support pure options without any filename at all or
3939              * needs some special format of the options QDict, it needs to
3940              * implement the driver-specific bdrv_refresh_filename() function.
3941              */
3942             qdict_put_obj(opts, "filename",
3943                           QOBJECT(qstring_from_str(bs->exact_filename)));
3944         }
3945 
3946         bs->full_open_options = opts;
3947     }
3948 
3949     if (bs->exact_filename[0]) {
3950         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
3951     } else if (bs->full_open_options) {
3952         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
3953         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
3954                  qstring_get_str(json));
3955         QDECREF(json);
3956     }
3957 }
3958 
3959 /*
3960  * Hot add/remove a BDS's child. So the user can take a child offline when
3961  * it is broken and take a new child online
3962  */
3963 void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
3964                     Error **errp)
3965 {
3966 
3967     if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
3968         error_setg(errp, "The node %s does not support adding a child",
3969                    bdrv_get_device_or_node_name(parent_bs));
3970         return;
3971     }
3972 
3973     if (!QLIST_EMPTY(&child_bs->parents)) {
3974         error_setg(errp, "The node %s already has a parent",
3975                    child_bs->node_name);
3976         return;
3977     }
3978 
3979     parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp);
3980 }
3981 
3982 void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
3983 {
3984     BdrvChild *tmp;
3985 
3986     if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
3987         error_setg(errp, "The node %s does not support removing a child",
3988                    bdrv_get_device_or_node_name(parent_bs));
3989         return;
3990     }
3991 
3992     QLIST_FOREACH(tmp, &parent_bs->children, next) {
3993         if (tmp == child) {
3994             break;
3995         }
3996     }
3997 
3998     if (!tmp) {
3999         error_setg(errp, "The node %s does not have a child named %s",
4000                    bdrv_get_device_or_node_name(parent_bs),
4001                    bdrv_get_device_or_node_name(child->bs));
4002         return;
4003     }
4004 
4005     parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
4006 }
4007