xref: /openbmc/qemu/block.c (revision acc6809ddc45dd9a896c53ec7b61e66604923cd7)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "trace.h"
26 #include "block/block_int.h"
27 #include "block/blockjob.h"
28 #include "qemu/error-report.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qapi/qmp/qbool.h"
32 #include "qapi/qmp/qjson.h"
33 #include "sysemu/block-backend.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/notify.h"
36 #include "qemu/coroutine.h"
37 #include "block/qapi.h"
38 #include "qmp-commands.h"
39 #include "qemu/timer.h"
40 #include "qapi-event.h"
41 #include "block/throttle-groups.h"
42 #include "qemu/cutils.h"
43 #include "qemu/id.h"
44 
45 #ifdef CONFIG_BSD
46 #include <sys/ioctl.h>
47 #include <sys/queue.h>
48 #ifndef __DragonFly__
49 #include <sys/disk.h>
50 #endif
51 #endif
52 
53 #ifdef _WIN32
54 #include <windows.h>
55 #endif
56 
57 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
58 
59 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
60     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
61 
62 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
63     QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
64 
65 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
66     QLIST_HEAD_INITIALIZER(bdrv_drivers);
67 
68 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
69                              const char *reference, QDict *options, int flags,
70                              BlockDriverState *parent,
71                              const BdrvChildRole *child_role, Error **errp);
72 
73 /* If non-zero, use only whitelisted block drivers */
74 static int use_bdrv_whitelist;
75 
76 static void bdrv_close(BlockDriverState *bs);
77 
78 #ifdef _WIN32
79 static int is_windows_drive_prefix(const char *filename)
80 {
81     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
82              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
83             filename[1] == ':');
84 }
85 
86 int is_windows_drive(const char *filename)
87 {
88     if (is_windows_drive_prefix(filename) &&
89         filename[2] == '\0')
90         return 1;
91     if (strstart(filename, "\\\\.\\", NULL) ||
92         strstart(filename, "//./", NULL))
93         return 1;
94     return 0;
95 }
96 #endif
97 
98 size_t bdrv_opt_mem_align(BlockDriverState *bs)
99 {
100     if (!bs || !bs->drv) {
101         /* page size or 4k (hdd sector size) should be on the safe side */
102         return MAX(4096, getpagesize());
103     }
104 
105     return bs->bl.opt_mem_alignment;
106 }
107 
108 size_t bdrv_min_mem_align(BlockDriverState *bs)
109 {
110     if (!bs || !bs->drv) {
111         /* page size or 4k (hdd sector size) should be on the safe side */
112         return MAX(4096, getpagesize());
113     }
114 
115     return bs->bl.min_mem_alignment;
116 }
117 
118 /* check if the path starts with "<protocol>:" */
119 int path_has_protocol(const char *path)
120 {
121     const char *p;
122 
123 #ifdef _WIN32
124     if (is_windows_drive(path) ||
125         is_windows_drive_prefix(path)) {
126         return 0;
127     }
128     p = path + strcspn(path, ":/\\");
129 #else
130     p = path + strcspn(path, ":/");
131 #endif
132 
133     return *p == ':';
134 }
135 
136 int path_is_absolute(const char *path)
137 {
138 #ifdef _WIN32
139     /* specific case for names like: "\\.\d:" */
140     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
141         return 1;
142     }
143     return (*path == '/' || *path == '\\');
144 #else
145     return (*path == '/');
146 #endif
147 }
148 
149 /* if filename is absolute, just copy it to dest. Otherwise, build a
150    path to it by considering it is relative to base_path. URL are
151    supported. */
152 void path_combine(char *dest, int dest_size,
153                   const char *base_path,
154                   const char *filename)
155 {
156     const char *p, *p1;
157     int len;
158 
159     if (dest_size <= 0)
160         return;
161     if (path_is_absolute(filename)) {
162         pstrcpy(dest, dest_size, filename);
163     } else {
164         p = strchr(base_path, ':');
165         if (p)
166             p++;
167         else
168             p = base_path;
169         p1 = strrchr(base_path, '/');
170 #ifdef _WIN32
171         {
172             const char *p2;
173             p2 = strrchr(base_path, '\\');
174             if (!p1 || p2 > p1)
175                 p1 = p2;
176         }
177 #endif
178         if (p1)
179             p1++;
180         else
181             p1 = base_path;
182         if (p1 > p)
183             p = p1;
184         len = p - base_path;
185         if (len > dest_size - 1)
186             len = dest_size - 1;
187         memcpy(dest, base_path, len);
188         dest[len] = '\0';
189         pstrcat(dest, dest_size, filename);
190     }
191 }
192 
193 void bdrv_get_full_backing_filename_from_filename(const char *backed,
194                                                   const char *backing,
195                                                   char *dest, size_t sz,
196                                                   Error **errp)
197 {
198     if (backing[0] == '\0' || path_has_protocol(backing) ||
199         path_is_absolute(backing))
200     {
201         pstrcpy(dest, sz, backing);
202     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
203         error_setg(errp, "Cannot use relative backing file names for '%s'",
204                    backed);
205     } else {
206         path_combine(dest, sz, backed, backing);
207     }
208 }
209 
210 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
211                                     Error **errp)
212 {
213     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
214 
215     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
216                                                  dest, sz, errp);
217 }
218 
219 void bdrv_register(BlockDriver *bdrv)
220 {
221     bdrv_setup_io_funcs(bdrv);
222 
223     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
224 }
225 
226 BlockDriverState *bdrv_new_root(void)
227 {
228     return bdrv_new();
229 }
230 
231 BlockDriverState *bdrv_new(void)
232 {
233     BlockDriverState *bs;
234     int i;
235 
236     bs = g_new0(BlockDriverState, 1);
237     QLIST_INIT(&bs->dirty_bitmaps);
238     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
239         QLIST_INIT(&bs->op_blockers[i]);
240     }
241     notifier_with_return_list_init(&bs->before_write_notifiers);
242     qemu_co_queue_init(&bs->throttled_reqs[0]);
243     qemu_co_queue_init(&bs->throttled_reqs[1]);
244     bs->refcnt = 1;
245     bs->aio_context = qemu_get_aio_context();
246 
247     QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
248 
249     return bs;
250 }
251 
252 BlockDriver *bdrv_find_format(const char *format_name)
253 {
254     BlockDriver *drv1;
255     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
256         if (!strcmp(drv1->format_name, format_name)) {
257             return drv1;
258         }
259     }
260     return NULL;
261 }
262 
263 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
264 {
265     static const char *whitelist_rw[] = {
266         CONFIG_BDRV_RW_WHITELIST
267     };
268     static const char *whitelist_ro[] = {
269         CONFIG_BDRV_RO_WHITELIST
270     };
271     const char **p;
272 
273     if (!whitelist_rw[0] && !whitelist_ro[0]) {
274         return 1;               /* no whitelist, anything goes */
275     }
276 
277     for (p = whitelist_rw; *p; p++) {
278         if (!strcmp(drv->format_name, *p)) {
279             return 1;
280         }
281     }
282     if (read_only) {
283         for (p = whitelist_ro; *p; p++) {
284             if (!strcmp(drv->format_name, *p)) {
285                 return 1;
286             }
287         }
288     }
289     return 0;
290 }
291 
292 bool bdrv_uses_whitelist(void)
293 {
294     return use_bdrv_whitelist;
295 }
296 
297 typedef struct CreateCo {
298     BlockDriver *drv;
299     char *filename;
300     QemuOpts *opts;
301     int ret;
302     Error *err;
303 } CreateCo;
304 
305 static void coroutine_fn bdrv_create_co_entry(void *opaque)
306 {
307     Error *local_err = NULL;
308     int ret;
309 
310     CreateCo *cco = opaque;
311     assert(cco->drv);
312 
313     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
314     if (local_err) {
315         error_propagate(&cco->err, local_err);
316     }
317     cco->ret = ret;
318 }
319 
320 int bdrv_create(BlockDriver *drv, const char* filename,
321                 QemuOpts *opts, Error **errp)
322 {
323     int ret;
324 
325     Coroutine *co;
326     CreateCo cco = {
327         .drv = drv,
328         .filename = g_strdup(filename),
329         .opts = opts,
330         .ret = NOT_DONE,
331         .err = NULL,
332     };
333 
334     if (!drv->bdrv_create) {
335         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
336         ret = -ENOTSUP;
337         goto out;
338     }
339 
340     if (qemu_in_coroutine()) {
341         /* Fast-path if already in coroutine context */
342         bdrv_create_co_entry(&cco);
343     } else {
344         co = qemu_coroutine_create(bdrv_create_co_entry);
345         qemu_coroutine_enter(co, &cco);
346         while (cco.ret == NOT_DONE) {
347             aio_poll(qemu_get_aio_context(), true);
348         }
349     }
350 
351     ret = cco.ret;
352     if (ret < 0) {
353         if (cco.err) {
354             error_propagate(errp, cco.err);
355         } else {
356             error_setg_errno(errp, -ret, "Could not create image");
357         }
358     }
359 
360 out:
361     g_free(cco.filename);
362     return ret;
363 }
364 
365 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
366 {
367     BlockDriver *drv;
368     Error *local_err = NULL;
369     int ret;
370 
371     drv = bdrv_find_protocol(filename, true, errp);
372     if (drv == NULL) {
373         return -ENOENT;
374     }
375 
376     ret = bdrv_create(drv, filename, opts, &local_err);
377     if (local_err) {
378         error_propagate(errp, local_err);
379     }
380     return ret;
381 }
382 
383 /**
384  * Try to get @bs's logical and physical block size.
385  * On success, store them in @bsz struct and return 0.
386  * On failure return -errno.
387  * @bs must not be empty.
388  */
389 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
390 {
391     BlockDriver *drv = bs->drv;
392 
393     if (drv && drv->bdrv_probe_blocksizes) {
394         return drv->bdrv_probe_blocksizes(bs, bsz);
395     }
396 
397     return -ENOTSUP;
398 }
399 
400 /**
401  * Try to get @bs's geometry (cyls, heads, sectors).
402  * On success, store them in @geo struct and return 0.
403  * On failure return -errno.
404  * @bs must not be empty.
405  */
406 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
407 {
408     BlockDriver *drv = bs->drv;
409 
410     if (drv && drv->bdrv_probe_geometry) {
411         return drv->bdrv_probe_geometry(bs, geo);
412     }
413 
414     return -ENOTSUP;
415 }
416 
417 /*
418  * Create a uniquely-named empty temporary file.
419  * Return 0 upon success, otherwise a negative errno value.
420  */
421 int get_tmp_filename(char *filename, int size)
422 {
423 #ifdef _WIN32
424     char temp_dir[MAX_PATH];
425     /* GetTempFileName requires that its output buffer (4th param)
426        have length MAX_PATH or greater.  */
427     assert(size >= MAX_PATH);
428     return (GetTempPath(MAX_PATH, temp_dir)
429             && GetTempFileName(temp_dir, "qem", 0, filename)
430             ? 0 : -GetLastError());
431 #else
432     int fd;
433     const char *tmpdir;
434     tmpdir = getenv("TMPDIR");
435     if (!tmpdir) {
436         tmpdir = "/var/tmp";
437     }
438     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
439         return -EOVERFLOW;
440     }
441     fd = mkstemp(filename);
442     if (fd < 0) {
443         return -errno;
444     }
445     if (close(fd) != 0) {
446         unlink(filename);
447         return -errno;
448     }
449     return 0;
450 #endif
451 }
452 
453 /*
454  * Detect host devices. By convention, /dev/cdrom[N] is always
455  * recognized as a host CDROM.
456  */
457 static BlockDriver *find_hdev_driver(const char *filename)
458 {
459     int score_max = 0, score;
460     BlockDriver *drv = NULL, *d;
461 
462     QLIST_FOREACH(d, &bdrv_drivers, list) {
463         if (d->bdrv_probe_device) {
464             score = d->bdrv_probe_device(filename);
465             if (score > score_max) {
466                 score_max = score;
467                 drv = d;
468             }
469         }
470     }
471 
472     return drv;
473 }
474 
475 BlockDriver *bdrv_find_protocol(const char *filename,
476                                 bool allow_protocol_prefix,
477                                 Error **errp)
478 {
479     BlockDriver *drv1;
480     char protocol[128];
481     int len;
482     const char *p;
483 
484     /* TODO Drivers without bdrv_file_open must be specified explicitly */
485 
486     /*
487      * XXX(hch): we really should not let host device detection
488      * override an explicit protocol specification, but moving this
489      * later breaks access to device names with colons in them.
490      * Thanks to the brain-dead persistent naming schemes on udev-
491      * based Linux systems those actually are quite common.
492      */
493     drv1 = find_hdev_driver(filename);
494     if (drv1) {
495         return drv1;
496     }
497 
498     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
499         return &bdrv_file;
500     }
501 
502     p = strchr(filename, ':');
503     assert(p != NULL);
504     len = p - filename;
505     if (len > sizeof(protocol) - 1)
506         len = sizeof(protocol) - 1;
507     memcpy(protocol, filename, len);
508     protocol[len] = '\0';
509     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
510         if (drv1->protocol_name &&
511             !strcmp(drv1->protocol_name, protocol)) {
512             return drv1;
513         }
514     }
515 
516     error_setg(errp, "Unknown protocol '%s'", protocol);
517     return NULL;
518 }
519 
520 /*
521  * Guess image format by probing its contents.
522  * This is not a good idea when your image is raw (CVE-2008-2004), but
523  * we do it anyway for backward compatibility.
524  *
525  * @buf         contains the image's first @buf_size bytes.
526  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
527  *              but can be smaller if the image file is smaller)
528  * @filename    is its filename.
529  *
530  * For all block drivers, call the bdrv_probe() method to get its
531  * probing score.
532  * Return the first block driver with the highest probing score.
533  */
534 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
535                             const char *filename)
536 {
537     int score_max = 0, score;
538     BlockDriver *drv = NULL, *d;
539 
540     QLIST_FOREACH(d, &bdrv_drivers, list) {
541         if (d->bdrv_probe) {
542             score = d->bdrv_probe(buf, buf_size, filename);
543             if (score > score_max) {
544                 score_max = score;
545                 drv = d;
546             }
547         }
548     }
549 
550     return drv;
551 }
552 
553 static int find_image_format(BlockDriverState *bs, const char *filename,
554                              BlockDriver **pdrv, Error **errp)
555 {
556     BlockDriver *drv;
557     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
558     int ret = 0;
559 
560     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
561     if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
562         *pdrv = &bdrv_raw;
563         return ret;
564     }
565 
566     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
567     if (ret < 0) {
568         error_setg_errno(errp, -ret, "Could not read image for determining its "
569                          "format");
570         *pdrv = NULL;
571         return ret;
572     }
573 
574     drv = bdrv_probe_all(buf, ret, filename);
575     if (!drv) {
576         error_setg(errp, "Could not determine image format: No compatible "
577                    "driver found");
578         ret = -ENOENT;
579     }
580     *pdrv = drv;
581     return ret;
582 }
583 
584 /**
585  * Set the current 'total_sectors' value
586  * Return 0 on success, -errno on error.
587  */
588 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
589 {
590     BlockDriver *drv = bs->drv;
591 
592     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
593     if (bdrv_is_sg(bs))
594         return 0;
595 
596     /* query actual device if possible, otherwise just trust the hint */
597     if (drv->bdrv_getlength) {
598         int64_t length = drv->bdrv_getlength(bs);
599         if (length < 0) {
600             return length;
601         }
602         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
603     }
604 
605     bs->total_sectors = hint;
606     return 0;
607 }
608 
609 /**
610  * Combines a QDict of new block driver @options with any missing options taken
611  * from @old_options, so that leaving out an option defaults to its old value.
612  */
613 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
614                               QDict *old_options)
615 {
616     if (bs->drv && bs->drv->bdrv_join_options) {
617         bs->drv->bdrv_join_options(options, old_options);
618     } else {
619         qdict_join(options, old_options, false);
620     }
621 }
622 
623 /**
624  * Set open flags for a given discard mode
625  *
626  * Return 0 on success, -1 if the discard mode was invalid.
627  */
628 int bdrv_parse_discard_flags(const char *mode, int *flags)
629 {
630     *flags &= ~BDRV_O_UNMAP;
631 
632     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
633         /* do nothing */
634     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
635         *flags |= BDRV_O_UNMAP;
636     } else {
637         return -1;
638     }
639 
640     return 0;
641 }
642 
643 /**
644  * Set open flags for a given cache mode
645  *
646  * Return 0 on success, -1 if the cache mode was invalid.
647  */
648 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
649 {
650     *flags &= ~BDRV_O_CACHE_MASK;
651 
652     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
653         *writethrough = false;
654         *flags |= BDRV_O_NOCACHE;
655     } else if (!strcmp(mode, "directsync")) {
656         *writethrough = true;
657         *flags |= BDRV_O_NOCACHE;
658     } else if (!strcmp(mode, "writeback")) {
659         *writethrough = false;
660     } else if (!strcmp(mode, "unsafe")) {
661         *writethrough = false;
662         *flags |= BDRV_O_NO_FLUSH;
663     } else if (!strcmp(mode, "writethrough")) {
664         *writethrough = true;
665     } else {
666         return -1;
667     }
668 
669     return 0;
670 }
671 
672 /*
673  * Returns the options and flags that a temporary snapshot should get, based on
674  * the originally requested flags (the originally requested image will have
675  * flags like a backing file)
676  */
677 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
678                                        int parent_flags, QDict *parent_options)
679 {
680     *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
681 
682     /* For temporary files, unconditional cache=unsafe is fine */
683     qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
684     qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
685 }
686 
687 /*
688  * Returns the options and flags that bs->file should get if a protocol driver
689  * is expected, based on the given options and flags for the parent BDS
690  */
691 static void bdrv_inherited_options(int *child_flags, QDict *child_options,
692                                    int parent_flags, QDict *parent_options)
693 {
694     int flags = parent_flags;
695 
696     /* Enable protocol handling, disable format probing for bs->file */
697     flags |= BDRV_O_PROTOCOL;
698 
699     /* If the cache mode isn't explicitly set, inherit direct and no-flush from
700      * the parent. */
701     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
702     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
703 
704     /* Our block drivers take care to send flushes and respect unmap policy,
705      * so we can default to enable both on lower layers regardless of the
706      * corresponding parent options. */
707     flags |= BDRV_O_UNMAP;
708 
709     /* Clear flags that only apply to the top layer */
710     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
711                BDRV_O_NO_IO);
712 
713     *child_flags = flags;
714 }
715 
716 const BdrvChildRole child_file = {
717     .inherit_options = bdrv_inherited_options,
718 };
719 
720 /*
721  * Returns the options and flags that bs->file should get if the use of formats
722  * (and not only protocols) is permitted for it, based on the given options and
723  * flags for the parent BDS
724  */
725 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
726                                        int parent_flags, QDict *parent_options)
727 {
728     child_file.inherit_options(child_flags, child_options,
729                                parent_flags, parent_options);
730 
731     *child_flags &= ~(BDRV_O_PROTOCOL | BDRV_O_NO_IO);
732 }
733 
734 const BdrvChildRole child_format = {
735     .inherit_options = bdrv_inherited_fmt_options,
736 };
737 
738 /*
739  * Returns the options and flags that bs->backing should get, based on the
740  * given options and flags for the parent BDS
741  */
742 static void bdrv_backing_options(int *child_flags, QDict *child_options,
743                                  int parent_flags, QDict *parent_options)
744 {
745     int flags = parent_flags;
746 
747     /* The cache mode is inherited unmodified for backing files; except WCE,
748      * which is only applied on the top level (BlockBackend) */
749     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
750     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
751 
752     /* backing files always opened read-only */
753     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
754 
755     /* snapshot=on is handled on the top layer */
756     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
757 
758     *child_flags = flags;
759 }
760 
761 static const BdrvChildRole child_backing = {
762     .inherit_options = bdrv_backing_options,
763 };
764 
765 static int bdrv_open_flags(BlockDriverState *bs, int flags)
766 {
767     int open_flags = flags;
768 
769     /*
770      * Clear flags that are internal to the block layer before opening the
771      * image.
772      */
773     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
774 
775     /*
776      * Snapshots should be writable.
777      */
778     if (flags & BDRV_O_TEMPORARY) {
779         open_flags |= BDRV_O_RDWR;
780     }
781 
782     return open_flags;
783 }
784 
785 static void update_flags_from_options(int *flags, QemuOpts *opts)
786 {
787     *flags &= ~BDRV_O_CACHE_MASK;
788 
789     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
790     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
791         *flags |= BDRV_O_NO_FLUSH;
792     }
793 
794     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
795     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
796         *flags |= BDRV_O_NOCACHE;
797     }
798 }
799 
800 static void update_options_from_flags(QDict *options, int flags)
801 {
802     if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
803         qdict_put(options, BDRV_OPT_CACHE_DIRECT,
804                   qbool_from_bool(flags & BDRV_O_NOCACHE));
805     }
806     if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
807         qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
808                   qbool_from_bool(flags & BDRV_O_NO_FLUSH));
809     }
810 }
811 
812 static void bdrv_assign_node_name(BlockDriverState *bs,
813                                   const char *node_name,
814                                   Error **errp)
815 {
816     char *gen_node_name = NULL;
817 
818     if (!node_name) {
819         node_name = gen_node_name = id_generate(ID_BLOCK);
820     } else if (!id_wellformed(node_name)) {
821         /*
822          * Check for empty string or invalid characters, but not if it is
823          * generated (generated names use characters not available to the user)
824          */
825         error_setg(errp, "Invalid node name");
826         return;
827     }
828 
829     /* takes care of avoiding namespaces collisions */
830     if (blk_by_name(node_name)) {
831         error_setg(errp, "node-name=%s is conflicting with a device id",
832                    node_name);
833         goto out;
834     }
835 
836     /* takes care of avoiding duplicates node names */
837     if (bdrv_find_node(node_name)) {
838         error_setg(errp, "Duplicate node name");
839         goto out;
840     }
841 
842     /* copy node name into the bs and insert it into the graph list */
843     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
844     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
845 out:
846     g_free(gen_node_name);
847 }
848 
849 static QemuOptsList bdrv_runtime_opts = {
850     .name = "bdrv_common",
851     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
852     .desc = {
853         {
854             .name = "node-name",
855             .type = QEMU_OPT_STRING,
856             .help = "Node name of the block device node",
857         },
858         {
859             .name = "driver",
860             .type = QEMU_OPT_STRING,
861             .help = "Block driver to use for the node",
862         },
863         {
864             .name = BDRV_OPT_CACHE_DIRECT,
865             .type = QEMU_OPT_BOOL,
866             .help = "Bypass software writeback cache on the host",
867         },
868         {
869             .name = BDRV_OPT_CACHE_NO_FLUSH,
870             .type = QEMU_OPT_BOOL,
871             .help = "Ignore flush requests",
872         },
873         { /* end of list */ }
874     },
875 };
876 
877 /*
878  * Common part for opening disk images and files
879  *
880  * Removes all processed options from *options.
881  */
882 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
883                             QDict *options, Error **errp)
884 {
885     int ret, open_flags;
886     const char *filename;
887     const char *driver_name = NULL;
888     const char *node_name = NULL;
889     QemuOpts *opts;
890     BlockDriver *drv;
891     Error *local_err = NULL;
892 
893     assert(bs->file == NULL);
894     assert(options != NULL && bs->options != options);
895 
896     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
897     qemu_opts_absorb_qdict(opts, options, &local_err);
898     if (local_err) {
899         error_propagate(errp, local_err);
900         ret = -EINVAL;
901         goto fail_opts;
902     }
903 
904     driver_name = qemu_opt_get(opts, "driver");
905     drv = bdrv_find_format(driver_name);
906     assert(drv != NULL);
907 
908     if (file != NULL) {
909         filename = file->bs->filename;
910     } else {
911         filename = qdict_get_try_str(options, "filename");
912     }
913 
914     if (drv->bdrv_needs_filename && !filename) {
915         error_setg(errp, "The '%s' block driver requires a file name",
916                    drv->format_name);
917         ret = -EINVAL;
918         goto fail_opts;
919     }
920 
921     trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
922                            drv->format_name);
923 
924     node_name = qemu_opt_get(opts, "node-name");
925     bdrv_assign_node_name(bs, node_name, &local_err);
926     if (local_err) {
927         error_propagate(errp, local_err);
928         ret = -EINVAL;
929         goto fail_opts;
930     }
931 
932     bs->request_alignment = 512;
933     bs->zero_beyond_eof = true;
934     bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
935 
936     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
937         error_setg(errp,
938                    !bs->read_only && bdrv_is_whitelisted(drv, true)
939                         ? "Driver '%s' can only be used for read-only devices"
940                         : "Driver '%s' is not whitelisted",
941                    drv->format_name);
942         ret = -ENOTSUP;
943         goto fail_opts;
944     }
945 
946     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
947     if (bs->open_flags & BDRV_O_COPY_ON_READ) {
948         if (!bs->read_only) {
949             bdrv_enable_copy_on_read(bs);
950         } else {
951             error_setg(errp, "Can't use copy-on-read on read-only device");
952             ret = -EINVAL;
953             goto fail_opts;
954         }
955     }
956 
957     if (filename != NULL) {
958         pstrcpy(bs->filename, sizeof(bs->filename), filename);
959     } else {
960         bs->filename[0] = '\0';
961     }
962     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
963 
964     bs->drv = drv;
965     bs->opaque = g_malloc0(drv->instance_size);
966 
967     /* Apply cache mode options */
968     update_flags_from_options(&bs->open_flags, opts);
969 
970     /* Open the image, either directly or using a protocol */
971     open_flags = bdrv_open_flags(bs, bs->open_flags);
972     if (drv->bdrv_file_open) {
973         assert(file == NULL);
974         assert(!drv->bdrv_needs_filename || filename != NULL);
975         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
976     } else {
977         if (file == NULL) {
978             error_setg(errp, "Can't use '%s' as a block driver for the "
979                        "protocol level", drv->format_name);
980             ret = -EINVAL;
981             goto free_and_fail;
982         }
983         bs->file = file;
984         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
985     }
986 
987     if (ret < 0) {
988         if (local_err) {
989             error_propagate(errp, local_err);
990         } else if (bs->filename[0]) {
991             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
992         } else {
993             error_setg_errno(errp, -ret, "Could not open image");
994         }
995         goto free_and_fail;
996     }
997 
998     ret = refresh_total_sectors(bs, bs->total_sectors);
999     if (ret < 0) {
1000         error_setg_errno(errp, -ret, "Could not refresh total sector count");
1001         goto free_and_fail;
1002     }
1003 
1004     bdrv_refresh_limits(bs, &local_err);
1005     if (local_err) {
1006         error_propagate(errp, local_err);
1007         ret = -EINVAL;
1008         goto free_and_fail;
1009     }
1010 
1011     assert(bdrv_opt_mem_align(bs) != 0);
1012     assert(bdrv_min_mem_align(bs) != 0);
1013     assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
1014 
1015     qemu_opts_del(opts);
1016     return 0;
1017 
1018 free_and_fail:
1019     bs->file = NULL;
1020     g_free(bs->opaque);
1021     bs->opaque = NULL;
1022     bs->drv = NULL;
1023 fail_opts:
1024     qemu_opts_del(opts);
1025     return ret;
1026 }
1027 
1028 static QDict *parse_json_filename(const char *filename, Error **errp)
1029 {
1030     QObject *options_obj;
1031     QDict *options;
1032     int ret;
1033 
1034     ret = strstart(filename, "json:", &filename);
1035     assert(ret);
1036 
1037     options_obj = qobject_from_json(filename);
1038     if (!options_obj) {
1039         error_setg(errp, "Could not parse the JSON options");
1040         return NULL;
1041     }
1042 
1043     if (qobject_type(options_obj) != QTYPE_QDICT) {
1044         qobject_decref(options_obj);
1045         error_setg(errp, "Invalid JSON object given");
1046         return NULL;
1047     }
1048 
1049     options = qobject_to_qdict(options_obj);
1050     qdict_flatten(options);
1051 
1052     return options;
1053 }
1054 
1055 static void parse_json_protocol(QDict *options, const char **pfilename,
1056                                 Error **errp)
1057 {
1058     QDict *json_options;
1059     Error *local_err = NULL;
1060 
1061     /* Parse json: pseudo-protocol */
1062     if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1063         return;
1064     }
1065 
1066     json_options = parse_json_filename(*pfilename, &local_err);
1067     if (local_err) {
1068         error_propagate(errp, local_err);
1069         return;
1070     }
1071 
1072     /* Options given in the filename have lower priority than options
1073      * specified directly */
1074     qdict_join(options, json_options, false);
1075     QDECREF(json_options);
1076     *pfilename = NULL;
1077 }
1078 
1079 /*
1080  * Fills in default options for opening images and converts the legacy
1081  * filename/flags pair to option QDict entries.
1082  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1083  * block driver has been specified explicitly.
1084  */
1085 static int bdrv_fill_options(QDict **options, const char *filename,
1086                              int *flags, Error **errp)
1087 {
1088     const char *drvname;
1089     bool protocol = *flags & BDRV_O_PROTOCOL;
1090     bool parse_filename = false;
1091     BlockDriver *drv = NULL;
1092     Error *local_err = NULL;
1093 
1094     drvname = qdict_get_try_str(*options, "driver");
1095     if (drvname) {
1096         drv = bdrv_find_format(drvname);
1097         if (!drv) {
1098             error_setg(errp, "Unknown driver '%s'", drvname);
1099             return -ENOENT;
1100         }
1101         /* If the user has explicitly specified the driver, this choice should
1102          * override the BDRV_O_PROTOCOL flag */
1103         protocol = drv->bdrv_file_open;
1104     }
1105 
1106     if (protocol) {
1107         *flags |= BDRV_O_PROTOCOL;
1108     } else {
1109         *flags &= ~BDRV_O_PROTOCOL;
1110     }
1111 
1112     /* Translate cache options from flags into options */
1113     update_options_from_flags(*options, *flags);
1114 
1115     /* Fetch the file name from the options QDict if necessary */
1116     if (protocol && filename) {
1117         if (!qdict_haskey(*options, "filename")) {
1118             qdict_put(*options, "filename", qstring_from_str(filename));
1119             parse_filename = true;
1120         } else {
1121             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1122                              "the same time");
1123             return -EINVAL;
1124         }
1125     }
1126 
1127     /* Find the right block driver */
1128     filename = qdict_get_try_str(*options, "filename");
1129 
1130     if (!drvname && protocol) {
1131         if (filename) {
1132             drv = bdrv_find_protocol(filename, parse_filename, errp);
1133             if (!drv) {
1134                 return -EINVAL;
1135             }
1136 
1137             drvname = drv->format_name;
1138             qdict_put(*options, "driver", qstring_from_str(drvname));
1139         } else {
1140             error_setg(errp, "Must specify either driver or file");
1141             return -EINVAL;
1142         }
1143     }
1144 
1145     assert(drv || !protocol);
1146 
1147     /* Driver-specific filename parsing */
1148     if (drv && drv->bdrv_parse_filename && parse_filename) {
1149         drv->bdrv_parse_filename(filename, *options, &local_err);
1150         if (local_err) {
1151             error_propagate(errp, local_err);
1152             return -EINVAL;
1153         }
1154 
1155         if (!drv->bdrv_needs_filename) {
1156             qdict_del(*options, "filename");
1157         }
1158     }
1159 
1160     return 0;
1161 }
1162 
1163 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
1164                                   const char *child_name,
1165                                   const BdrvChildRole *child_role)
1166 {
1167     BdrvChild *child = g_new(BdrvChild, 1);
1168     *child = (BdrvChild) {
1169         .bs     = child_bs,
1170         .name   = g_strdup(child_name),
1171         .role   = child_role,
1172     };
1173 
1174     QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1175 
1176     return child;
1177 }
1178 
1179 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1180                                     BlockDriverState *child_bs,
1181                                     const char *child_name,
1182                                     const BdrvChildRole *child_role)
1183 {
1184     BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
1185     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1186     return child;
1187 }
1188 
1189 static void bdrv_detach_child(BdrvChild *child)
1190 {
1191     if (child->next.le_prev) {
1192         QLIST_REMOVE(child, next);
1193         child->next.le_prev = NULL;
1194     }
1195     QLIST_REMOVE(child, next_parent);
1196     g_free(child->name);
1197     g_free(child);
1198 }
1199 
1200 void bdrv_root_unref_child(BdrvChild *child)
1201 {
1202     BlockDriverState *child_bs;
1203 
1204     child_bs = child->bs;
1205     bdrv_detach_child(child);
1206     bdrv_unref(child_bs);
1207 }
1208 
1209 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1210 {
1211     if (child == NULL) {
1212         return;
1213     }
1214 
1215     if (child->bs->inherits_from == parent) {
1216         child->bs->inherits_from = NULL;
1217     }
1218 
1219     bdrv_root_unref_child(child);
1220 }
1221 
1222 /*
1223  * Sets the backing file link of a BDS. A new reference is created; callers
1224  * which don't need their own reference any more must call bdrv_unref().
1225  */
1226 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1227 {
1228     if (backing_hd) {
1229         bdrv_ref(backing_hd);
1230     }
1231 
1232     if (bs->backing) {
1233         assert(bs->backing_blocker);
1234         bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1235         bdrv_unref_child(bs, bs->backing);
1236     } else if (backing_hd) {
1237         error_setg(&bs->backing_blocker,
1238                    "node is used as backing hd of '%s'",
1239                    bdrv_get_device_or_node_name(bs));
1240     }
1241 
1242     if (!backing_hd) {
1243         error_free(bs->backing_blocker);
1244         bs->backing_blocker = NULL;
1245         bs->backing = NULL;
1246         goto out;
1247     }
1248     bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
1249     bs->open_flags &= ~BDRV_O_NO_BACKING;
1250     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1251     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1252             backing_hd->drv ? backing_hd->drv->format_name : "");
1253 
1254     bdrv_op_block_all(backing_hd, bs->backing_blocker);
1255     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1256     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1257                     bs->backing_blocker);
1258 out:
1259     bdrv_refresh_limits(bs, NULL);
1260 }
1261 
1262 /*
1263  * Opens the backing file for a BlockDriverState if not yet open
1264  *
1265  * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1266  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1267  * itself, all options starting with "${bdref_key}." are considered part of the
1268  * BlockdevRef.
1269  *
1270  * TODO Can this be unified with bdrv_open_image()?
1271  */
1272 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1273                            const char *bdref_key, Error **errp)
1274 {
1275     char *backing_filename = g_malloc0(PATH_MAX);
1276     char *bdref_key_dot;
1277     const char *reference = NULL;
1278     int ret = 0;
1279     BlockDriverState *backing_hd;
1280     QDict *options;
1281     QDict *tmp_parent_options = NULL;
1282     Error *local_err = NULL;
1283 
1284     if (bs->backing != NULL) {
1285         goto free_exit;
1286     }
1287 
1288     /* NULL means an empty set of options */
1289     if (parent_options == NULL) {
1290         tmp_parent_options = qdict_new();
1291         parent_options = tmp_parent_options;
1292     }
1293 
1294     bs->open_flags &= ~BDRV_O_NO_BACKING;
1295 
1296     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1297     qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1298     g_free(bdref_key_dot);
1299 
1300     reference = qdict_get_try_str(parent_options, bdref_key);
1301     if (reference || qdict_haskey(options, "file.filename")) {
1302         backing_filename[0] = '\0';
1303     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1304         QDECREF(options);
1305         goto free_exit;
1306     } else {
1307         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1308                                        &local_err);
1309         if (local_err) {
1310             ret = -EINVAL;
1311             error_propagate(errp, local_err);
1312             QDECREF(options);
1313             goto free_exit;
1314         }
1315     }
1316 
1317     if (!bs->drv || !bs->drv->supports_backing) {
1318         ret = -EINVAL;
1319         error_setg(errp, "Driver doesn't support backing files");
1320         QDECREF(options);
1321         goto free_exit;
1322     }
1323 
1324     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1325         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1326     }
1327 
1328     backing_hd = NULL;
1329     ret = bdrv_open_inherit(&backing_hd,
1330                             *backing_filename ? backing_filename : NULL,
1331                             reference, options, 0, bs, &child_backing,
1332                             errp);
1333     if (ret < 0) {
1334         bs->open_flags |= BDRV_O_NO_BACKING;
1335         error_prepend(errp, "Could not open backing file: ");
1336         goto free_exit;
1337     }
1338 
1339     /* Hook up the backing file link; drop our reference, bs owns the
1340      * backing_hd reference now */
1341     bdrv_set_backing_hd(bs, backing_hd);
1342     bdrv_unref(backing_hd);
1343 
1344     qdict_del(parent_options, bdref_key);
1345 
1346 free_exit:
1347     g_free(backing_filename);
1348     QDECREF(tmp_parent_options);
1349     return ret;
1350 }
1351 
1352 /*
1353  * Opens a disk image whose options are given as BlockdevRef in another block
1354  * device's options.
1355  *
1356  * If allow_none is true, no image will be opened if filename is false and no
1357  * BlockdevRef is given. NULL will be returned, but errp remains unset.
1358  *
1359  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1360  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1361  * itself, all options starting with "${bdref_key}." are considered part of the
1362  * BlockdevRef.
1363  *
1364  * The BlockdevRef will be removed from the options QDict.
1365  */
1366 BdrvChild *bdrv_open_child(const char *filename,
1367                            QDict *options, const char *bdref_key,
1368                            BlockDriverState* parent,
1369                            const BdrvChildRole *child_role,
1370                            bool allow_none, Error **errp)
1371 {
1372     BdrvChild *c = NULL;
1373     BlockDriverState *bs;
1374     QDict *image_options;
1375     int ret;
1376     char *bdref_key_dot;
1377     const char *reference;
1378 
1379     assert(child_role != NULL);
1380 
1381     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1382     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1383     g_free(bdref_key_dot);
1384 
1385     reference = qdict_get_try_str(options, bdref_key);
1386     if (!filename && !reference && !qdict_size(image_options)) {
1387         if (!allow_none) {
1388             error_setg(errp, "A block device must be specified for \"%s\"",
1389                        bdref_key);
1390         }
1391         QDECREF(image_options);
1392         goto done;
1393     }
1394 
1395     bs = NULL;
1396     ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1397                             parent, child_role, errp);
1398     if (ret < 0) {
1399         goto done;
1400     }
1401 
1402     c = bdrv_attach_child(parent, bs, bdref_key, child_role);
1403 
1404 done:
1405     qdict_del(options, bdref_key);
1406     return c;
1407 }
1408 
1409 static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
1410                                      QDict *snapshot_options, Error **errp)
1411 {
1412     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1413     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1414     int64_t total_size;
1415     QemuOpts *opts = NULL;
1416     BlockDriverState *bs_snapshot;
1417     Error *local_err = NULL;
1418     int ret;
1419 
1420     /* if snapshot, we create a temporary backing file and open it
1421        instead of opening 'filename' directly */
1422 
1423     /* Get the required size from the image */
1424     total_size = bdrv_getlength(bs);
1425     if (total_size < 0) {
1426         ret = total_size;
1427         error_setg_errno(errp, -total_size, "Could not get image size");
1428         goto out;
1429     }
1430 
1431     /* Create the temporary image */
1432     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1433     if (ret < 0) {
1434         error_setg_errno(errp, -ret, "Could not get temporary filename");
1435         goto out;
1436     }
1437 
1438     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1439                             &error_abort);
1440     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1441     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
1442     qemu_opts_del(opts);
1443     if (ret < 0) {
1444         error_prepend(errp, "Could not create temporary overlay '%s': ",
1445                       tmp_filename);
1446         goto out;
1447     }
1448 
1449     /* Prepare options QDict for the temporary file */
1450     qdict_put(snapshot_options, "file.driver",
1451               qstring_from_str("file"));
1452     qdict_put(snapshot_options, "file.filename",
1453               qstring_from_str(tmp_filename));
1454     qdict_put(snapshot_options, "driver",
1455               qstring_from_str("qcow2"));
1456 
1457     bs_snapshot = bdrv_new();
1458 
1459     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1460                     flags, &local_err);
1461     snapshot_options = NULL;
1462     if (ret < 0) {
1463         error_propagate(errp, local_err);
1464         goto out;
1465     }
1466 
1467     bdrv_append(bs_snapshot, bs);
1468 
1469 out:
1470     QDECREF(snapshot_options);
1471     g_free(tmp_filename);
1472     return ret;
1473 }
1474 
1475 /*
1476  * Opens a disk image (raw, qcow2, vmdk, ...)
1477  *
1478  * options is a QDict of options to pass to the block drivers, or NULL for an
1479  * empty set of options. The reference to the QDict belongs to the block layer
1480  * after the call (even on failure), so if the caller intends to reuse the
1481  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1482  *
1483  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1484  * If it is not NULL, the referenced BDS will be reused.
1485  *
1486  * The reference parameter may be used to specify an existing block device which
1487  * should be opened. If specified, neither options nor a filename may be given,
1488  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1489  */
1490 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1491                              const char *reference, QDict *options, int flags,
1492                              BlockDriverState *parent,
1493                              const BdrvChildRole *child_role, Error **errp)
1494 {
1495     int ret;
1496     BdrvChild *file = NULL;
1497     BlockDriverState *bs;
1498     BlockDriver *drv = NULL;
1499     const char *drvname;
1500     const char *backing;
1501     Error *local_err = NULL;
1502     QDict *snapshot_options = NULL;
1503     int snapshot_flags = 0;
1504 
1505     assert(pbs);
1506     assert(!child_role || !flags);
1507     assert(!child_role == !parent);
1508 
1509     if (reference) {
1510         bool options_non_empty = options ? qdict_size(options) : false;
1511         QDECREF(options);
1512 
1513         if (*pbs) {
1514             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1515                        "another block device");
1516             return -EINVAL;
1517         }
1518 
1519         if (filename || options_non_empty) {
1520             error_setg(errp, "Cannot reference an existing block device with "
1521                        "additional options or a new filename");
1522             return -EINVAL;
1523         }
1524 
1525         bs = bdrv_lookup_bs(reference, reference, errp);
1526         if (!bs) {
1527             return -ENODEV;
1528         }
1529         bdrv_ref(bs);
1530         *pbs = bs;
1531         return 0;
1532     }
1533 
1534     if (*pbs) {
1535         bs = *pbs;
1536     } else {
1537         bs = bdrv_new();
1538     }
1539 
1540     /* NULL means an empty set of options */
1541     if (options == NULL) {
1542         options = qdict_new();
1543     }
1544 
1545     /* json: syntax counts as explicit options, as if in the QDict */
1546     parse_json_protocol(options, &filename, &local_err);
1547     if (local_err) {
1548         ret = -EINVAL;
1549         goto fail;
1550     }
1551 
1552     bs->explicit_options = qdict_clone_shallow(options);
1553 
1554     if (child_role) {
1555         bs->inherits_from = parent;
1556         child_role->inherit_options(&flags, options,
1557                                     parent->open_flags, parent->options);
1558     }
1559 
1560     ret = bdrv_fill_options(&options, filename, &flags, &local_err);
1561     if (local_err) {
1562         goto fail;
1563     }
1564 
1565     bs->open_flags = flags;
1566     bs->options = options;
1567     options = qdict_clone_shallow(options);
1568 
1569     /* Find the right image format driver */
1570     drvname = qdict_get_try_str(options, "driver");
1571     if (drvname) {
1572         drv = bdrv_find_format(drvname);
1573         if (!drv) {
1574             error_setg(errp, "Unknown driver: '%s'", drvname);
1575             ret = -EINVAL;
1576             goto fail;
1577         }
1578     }
1579 
1580     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1581 
1582     backing = qdict_get_try_str(options, "backing");
1583     if (backing && *backing == '\0') {
1584         flags |= BDRV_O_NO_BACKING;
1585         qdict_del(options, "backing");
1586     }
1587 
1588     /* Open image file without format layer */
1589     if ((flags & BDRV_O_PROTOCOL) == 0) {
1590         if (flags & BDRV_O_RDWR) {
1591             flags |= BDRV_O_ALLOW_RDWR;
1592         }
1593         if (flags & BDRV_O_SNAPSHOT) {
1594             snapshot_options = qdict_new();
1595             bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
1596                                        flags, options);
1597             bdrv_backing_options(&flags, options, flags, options);
1598         }
1599 
1600         bs->open_flags = flags;
1601 
1602         file = bdrv_open_child(filename, options, "file", bs,
1603                                &child_file, true, &local_err);
1604         if (local_err) {
1605             ret = -EINVAL;
1606             goto fail;
1607         }
1608     }
1609 
1610     /* Image format probing */
1611     bs->probed = !drv;
1612     if (!drv && file) {
1613         ret = find_image_format(file->bs, filename, &drv, &local_err);
1614         if (ret < 0) {
1615             goto fail;
1616         }
1617         /*
1618          * This option update would logically belong in bdrv_fill_options(),
1619          * but we first need to open bs->file for the probing to work, while
1620          * opening bs->file already requires the (mostly) final set of options
1621          * so that cache mode etc. can be inherited.
1622          *
1623          * Adding the driver later is somewhat ugly, but it's not an option
1624          * that would ever be inherited, so it's correct. We just need to make
1625          * sure to update both bs->options (which has the full effective
1626          * options for bs) and options (which has file.* already removed).
1627          */
1628         qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1629         qdict_put(options, "driver", qstring_from_str(drv->format_name));
1630     } else if (!drv) {
1631         error_setg(errp, "Must specify either driver or file");
1632         ret = -EINVAL;
1633         goto fail;
1634     }
1635 
1636     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1637     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1638     /* file must be NULL if a protocol BDS is about to be created
1639      * (the inverse results in an error message from bdrv_open_common()) */
1640     assert(!(flags & BDRV_O_PROTOCOL) || !file);
1641 
1642     /* Open the image */
1643     ret = bdrv_open_common(bs, file, options, &local_err);
1644     if (ret < 0) {
1645         goto fail;
1646     }
1647 
1648     if (file && (bs->file != file)) {
1649         bdrv_unref_child(bs, file);
1650         file = NULL;
1651     }
1652 
1653     /* If there is a backing file, use it */
1654     if ((flags & BDRV_O_NO_BACKING) == 0) {
1655         ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
1656         if (ret < 0) {
1657             goto close_and_fail;
1658         }
1659     }
1660 
1661     bdrv_refresh_filename(bs);
1662 
1663     /* Check if any unknown options were used */
1664     if (options && (qdict_size(options) != 0)) {
1665         const QDictEntry *entry = qdict_first(options);
1666         if (flags & BDRV_O_PROTOCOL) {
1667             error_setg(errp, "Block protocol '%s' doesn't support the option "
1668                        "'%s'", drv->format_name, entry->key);
1669         } else {
1670             error_setg(errp,
1671                        "Block format '%s' does not support the option '%s'",
1672                        drv->format_name, entry->key);
1673         }
1674 
1675         ret = -EINVAL;
1676         goto close_and_fail;
1677     }
1678 
1679     if (!bdrv_key_required(bs)) {
1680         if (bs->blk) {
1681             blk_dev_change_media_cb(bs->blk, true);
1682         }
1683     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1684                && !runstate_check(RUN_STATE_INMIGRATE)
1685                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1686         error_setg(errp,
1687                    "Guest must be stopped for opening of encrypted image");
1688         ret = -EBUSY;
1689         goto close_and_fail;
1690     }
1691 
1692     QDECREF(options);
1693     *pbs = bs;
1694 
1695     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1696      * temporary snapshot afterwards. */
1697     if (snapshot_flags) {
1698         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
1699                                         &local_err);
1700         snapshot_options = NULL;
1701         if (local_err) {
1702             goto close_and_fail;
1703         }
1704     }
1705 
1706     return 0;
1707 
1708 fail:
1709     if (file != NULL) {
1710         bdrv_unref_child(bs, file);
1711     }
1712     QDECREF(snapshot_options);
1713     QDECREF(bs->explicit_options);
1714     QDECREF(bs->options);
1715     QDECREF(options);
1716     bs->options = NULL;
1717     if (!*pbs) {
1718         /* If *pbs is NULL, a new BDS has been created in this function and
1719            needs to be freed now. Otherwise, it does not need to be closed,
1720            since it has not really been opened yet. */
1721         bdrv_unref(bs);
1722     }
1723     if (local_err) {
1724         error_propagate(errp, local_err);
1725     }
1726     return ret;
1727 
1728 close_and_fail:
1729     /* See fail path, but now the BDS has to be always closed */
1730     if (*pbs) {
1731         bdrv_close(bs);
1732     } else {
1733         bdrv_unref(bs);
1734     }
1735     QDECREF(snapshot_options);
1736     QDECREF(options);
1737     if (local_err) {
1738         error_propagate(errp, local_err);
1739     }
1740     return ret;
1741 }
1742 
1743 int bdrv_open(BlockDriverState **pbs, const char *filename,
1744               const char *reference, QDict *options, int flags, Error **errp)
1745 {
1746     return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1747                              NULL, errp);
1748 }
1749 
1750 typedef struct BlockReopenQueueEntry {
1751      bool prepared;
1752      BDRVReopenState state;
1753      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1754 } BlockReopenQueueEntry;
1755 
1756 /*
1757  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1758  * reopen of multiple devices.
1759  *
1760  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1761  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1762  * be created and initialized. This newly created BlockReopenQueue should be
1763  * passed back in for subsequent calls that are intended to be of the same
1764  * atomic 'set'.
1765  *
1766  * bs is the BlockDriverState to add to the reopen queue.
1767  *
1768  * options contains the changed options for the associated bs
1769  * (the BlockReopenQueue takes ownership)
1770  *
1771  * flags contains the open flags for the associated bs
1772  *
1773  * returns a pointer to bs_queue, which is either the newly allocated
1774  * bs_queue, or the existing bs_queue being used.
1775  *
1776  */
1777 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1778                                                  BlockDriverState *bs,
1779                                                  QDict *options,
1780                                                  int flags,
1781                                                  const BdrvChildRole *role,
1782                                                  QDict *parent_options,
1783                                                  int parent_flags)
1784 {
1785     assert(bs != NULL);
1786 
1787     BlockReopenQueueEntry *bs_entry;
1788     BdrvChild *child;
1789     QDict *old_options, *explicit_options;
1790 
1791     if (bs_queue == NULL) {
1792         bs_queue = g_new0(BlockReopenQueue, 1);
1793         QSIMPLEQ_INIT(bs_queue);
1794     }
1795 
1796     if (!options) {
1797         options = qdict_new();
1798     }
1799 
1800     /*
1801      * Precedence of options:
1802      * 1. Explicitly passed in options (highest)
1803      * 2. Set in flags (only for top level)
1804      * 3. Retained from explicitly set options of bs
1805      * 4. Inherited from parent node
1806      * 5. Retained from effective options of bs
1807      */
1808 
1809     if (!parent_options) {
1810         /*
1811          * Any setting represented by flags is always updated. If the
1812          * corresponding QDict option is set, it takes precedence. Otherwise
1813          * the flag is translated into a QDict option. The old setting of bs is
1814          * not considered.
1815          */
1816         update_options_from_flags(options, flags);
1817     }
1818 
1819     /* Old explicitly set values (don't overwrite by inherited value) */
1820     old_options = qdict_clone_shallow(bs->explicit_options);
1821     bdrv_join_options(bs, options, old_options);
1822     QDECREF(old_options);
1823 
1824     explicit_options = qdict_clone_shallow(options);
1825 
1826     /* Inherit from parent node */
1827     if (parent_options) {
1828         assert(!flags);
1829         role->inherit_options(&flags, options, parent_flags, parent_options);
1830     }
1831 
1832     /* Old values are used for options that aren't set yet */
1833     old_options = qdict_clone_shallow(bs->options);
1834     bdrv_join_options(bs, options, old_options);
1835     QDECREF(old_options);
1836 
1837     /* bdrv_open() masks this flag out */
1838     flags &= ~BDRV_O_PROTOCOL;
1839 
1840     QLIST_FOREACH(child, &bs->children, next) {
1841         QDict *new_child_options;
1842         char *child_key_dot;
1843 
1844         /* reopen can only change the options of block devices that were
1845          * implicitly created and inherited options. For other (referenced)
1846          * block devices, a syntax like "backing.foo" results in an error. */
1847         if (child->bs->inherits_from != bs) {
1848             continue;
1849         }
1850 
1851         child_key_dot = g_strdup_printf("%s.", child->name);
1852         qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1853         g_free(child_key_dot);
1854 
1855         bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1856                                 child->role, options, flags);
1857     }
1858 
1859     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1860     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1861 
1862     bs_entry->state.bs = bs;
1863     bs_entry->state.options = options;
1864     bs_entry->state.explicit_options = explicit_options;
1865     bs_entry->state.flags = flags;
1866 
1867     return bs_queue;
1868 }
1869 
1870 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1871                                     BlockDriverState *bs,
1872                                     QDict *options, int flags)
1873 {
1874     return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1875                                    NULL, NULL, 0);
1876 }
1877 
1878 /*
1879  * Reopen multiple BlockDriverStates atomically & transactionally.
1880  *
1881  * The queue passed in (bs_queue) must have been built up previous
1882  * via bdrv_reopen_queue().
1883  *
1884  * Reopens all BDS specified in the queue, with the appropriate
1885  * flags.  All devices are prepared for reopen, and failure of any
1886  * device will cause all device changes to be abandonded, and intermediate
1887  * data cleaned up.
1888  *
1889  * If all devices prepare successfully, then the changes are committed
1890  * to all devices.
1891  *
1892  */
1893 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1894 {
1895     int ret = -1;
1896     BlockReopenQueueEntry *bs_entry, *next;
1897     Error *local_err = NULL;
1898 
1899     assert(bs_queue != NULL);
1900 
1901     bdrv_drain_all();
1902 
1903     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1904         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1905             error_propagate(errp, local_err);
1906             goto cleanup;
1907         }
1908         bs_entry->prepared = true;
1909     }
1910 
1911     /* If we reach this point, we have success and just need to apply the
1912      * changes
1913      */
1914     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1915         bdrv_reopen_commit(&bs_entry->state);
1916     }
1917 
1918     ret = 0;
1919 
1920 cleanup:
1921     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1922         if (ret && bs_entry->prepared) {
1923             bdrv_reopen_abort(&bs_entry->state);
1924         } else if (ret) {
1925             QDECREF(bs_entry->state.explicit_options);
1926         }
1927         QDECREF(bs_entry->state.options);
1928         g_free(bs_entry);
1929     }
1930     g_free(bs_queue);
1931     return ret;
1932 }
1933 
1934 
1935 /* Reopen a single BlockDriverState with the specified flags. */
1936 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1937 {
1938     int ret = -1;
1939     Error *local_err = NULL;
1940     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1941 
1942     ret = bdrv_reopen_multiple(queue, &local_err);
1943     if (local_err != NULL) {
1944         error_propagate(errp, local_err);
1945     }
1946     return ret;
1947 }
1948 
1949 
1950 /*
1951  * Prepares a BlockDriverState for reopen. All changes are staged in the
1952  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1953  * the block driver layer .bdrv_reopen_prepare()
1954  *
1955  * bs is the BlockDriverState to reopen
1956  * flags are the new open flags
1957  * queue is the reopen queue
1958  *
1959  * Returns 0 on success, non-zero on error.  On error errp will be set
1960  * as well.
1961  *
1962  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1963  * It is the responsibility of the caller to then call the abort() or
1964  * commit() for any other BDS that have been left in a prepare() state
1965  *
1966  */
1967 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1968                         Error **errp)
1969 {
1970     int ret = -1;
1971     Error *local_err = NULL;
1972     BlockDriver *drv;
1973     QemuOpts *opts;
1974     const char *value;
1975 
1976     assert(reopen_state != NULL);
1977     assert(reopen_state->bs->drv != NULL);
1978     drv = reopen_state->bs->drv;
1979 
1980     /* Process generic block layer options */
1981     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1982     qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
1983     if (local_err) {
1984         error_propagate(errp, local_err);
1985         ret = -EINVAL;
1986         goto error;
1987     }
1988 
1989     update_flags_from_options(&reopen_state->flags, opts);
1990 
1991     /* node-name and driver must be unchanged. Put them back into the QDict, so
1992      * that they are checked at the end of this function. */
1993     value = qemu_opt_get(opts, "node-name");
1994     if (value) {
1995         qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
1996     }
1997 
1998     value = qemu_opt_get(opts, "driver");
1999     if (value) {
2000         qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2001     }
2002 
2003     /* if we are to stay read-only, do not allow permission change
2004      * to r/w */
2005     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2006         reopen_state->flags & BDRV_O_RDWR) {
2007         error_setg(errp, "Node '%s' is read only",
2008                    bdrv_get_device_or_node_name(reopen_state->bs));
2009         goto error;
2010     }
2011 
2012 
2013     ret = bdrv_flush(reopen_state->bs);
2014     if (ret) {
2015         error_setg_errno(errp, -ret, "Error flushing drive");
2016         goto error;
2017     }
2018 
2019     if (drv->bdrv_reopen_prepare) {
2020         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2021         if (ret) {
2022             if (local_err != NULL) {
2023                 error_propagate(errp, local_err);
2024             } else {
2025                 error_setg(errp, "failed while preparing to reopen image '%s'",
2026                            reopen_state->bs->filename);
2027             }
2028             goto error;
2029         }
2030     } else {
2031         /* It is currently mandatory to have a bdrv_reopen_prepare()
2032          * handler for each supported drv. */
2033         error_setg(errp, "Block format '%s' used by node '%s' "
2034                    "does not support reopening files", drv->format_name,
2035                    bdrv_get_device_or_node_name(reopen_state->bs));
2036         ret = -1;
2037         goto error;
2038     }
2039 
2040     /* Options that are not handled are only okay if they are unchanged
2041      * compared to the old state. It is expected that some options are only
2042      * used for the initial open, but not reopen (e.g. filename) */
2043     if (qdict_size(reopen_state->options)) {
2044         const QDictEntry *entry = qdict_first(reopen_state->options);
2045 
2046         do {
2047             QString *new_obj = qobject_to_qstring(entry->value);
2048             const char *new = qstring_get_str(new_obj);
2049             const char *old = qdict_get_try_str(reopen_state->bs->options,
2050                                                 entry->key);
2051 
2052             if (!old || strcmp(new, old)) {
2053                 error_setg(errp, "Cannot change the option '%s'", entry->key);
2054                 ret = -EINVAL;
2055                 goto error;
2056             }
2057         } while ((entry = qdict_next(reopen_state->options, entry)));
2058     }
2059 
2060     ret = 0;
2061 
2062 error:
2063     qemu_opts_del(opts);
2064     return ret;
2065 }
2066 
2067 /*
2068  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2069  * makes them final by swapping the staging BlockDriverState contents into
2070  * the active BlockDriverState contents.
2071  */
2072 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2073 {
2074     BlockDriver *drv;
2075 
2076     assert(reopen_state != NULL);
2077     drv = reopen_state->bs->drv;
2078     assert(drv != NULL);
2079 
2080     /* If there are any driver level actions to take */
2081     if (drv->bdrv_reopen_commit) {
2082         drv->bdrv_reopen_commit(reopen_state);
2083     }
2084 
2085     /* set BDS specific flags now */
2086     QDECREF(reopen_state->bs->explicit_options);
2087 
2088     reopen_state->bs->explicit_options   = reopen_state->explicit_options;
2089     reopen_state->bs->open_flags         = reopen_state->flags;
2090     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
2091 
2092     bdrv_refresh_limits(reopen_state->bs, NULL);
2093 }
2094 
2095 /*
2096  * Abort the reopen, and delete and free the staged changes in
2097  * reopen_state
2098  */
2099 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2100 {
2101     BlockDriver *drv;
2102 
2103     assert(reopen_state != NULL);
2104     drv = reopen_state->bs->drv;
2105     assert(drv != NULL);
2106 
2107     if (drv->bdrv_reopen_abort) {
2108         drv->bdrv_reopen_abort(reopen_state);
2109     }
2110 
2111     QDECREF(reopen_state->explicit_options);
2112 }
2113 
2114 
2115 static void bdrv_close(BlockDriverState *bs)
2116 {
2117     BdrvAioNotifier *ban, *ban_next;
2118 
2119     assert(!bs->job);
2120 
2121     /* Disable I/O limits and drain all pending throttled requests */
2122     if (bs->throttle_state) {
2123         bdrv_io_limits_disable(bs);
2124     }
2125 
2126     bdrv_drained_begin(bs); /* complete I/O */
2127     bdrv_flush(bs);
2128     bdrv_drain(bs); /* in case flush left pending I/O */
2129 
2130     bdrv_release_named_dirty_bitmaps(bs);
2131     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2132 
2133     if (bs->blk) {
2134         blk_dev_change_media_cb(bs->blk, false);
2135     }
2136 
2137     if (bs->drv) {
2138         BdrvChild *child, *next;
2139 
2140         bs->drv->bdrv_close(bs);
2141         bs->drv = NULL;
2142 
2143         bdrv_set_backing_hd(bs, NULL);
2144 
2145         if (bs->file != NULL) {
2146             bdrv_unref_child(bs, bs->file);
2147             bs->file = NULL;
2148         }
2149 
2150         QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
2151             /* TODO Remove bdrv_unref() from drivers' close function and use
2152              * bdrv_unref_child() here */
2153             if (child->bs->inherits_from == bs) {
2154                 child->bs->inherits_from = NULL;
2155             }
2156             bdrv_detach_child(child);
2157         }
2158 
2159         g_free(bs->opaque);
2160         bs->opaque = NULL;
2161         bs->copy_on_read = 0;
2162         bs->backing_file[0] = '\0';
2163         bs->backing_format[0] = '\0';
2164         bs->total_sectors = 0;
2165         bs->encrypted = 0;
2166         bs->valid_key = 0;
2167         bs->sg = 0;
2168         bs->zero_beyond_eof = false;
2169         QDECREF(bs->options);
2170         QDECREF(bs->explicit_options);
2171         bs->options = NULL;
2172         QDECREF(bs->full_open_options);
2173         bs->full_open_options = NULL;
2174     }
2175 
2176     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2177         g_free(ban);
2178     }
2179     QLIST_INIT(&bs->aio_notifiers);
2180     bdrv_drained_end(bs);
2181 }
2182 
2183 void bdrv_close_all(void)
2184 {
2185     BlockDriverState *bs;
2186     AioContext *aio_context;
2187 
2188     /* Drop references from requests still in flight, such as canceled block
2189      * jobs whose AIO context has not been polled yet */
2190     bdrv_drain_all();
2191 
2192     blk_remove_all_bs();
2193     blockdev_close_all_bdrv_states();
2194 
2195     /* Cancel all block jobs */
2196     while (!QTAILQ_EMPTY(&all_bdrv_states)) {
2197         QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
2198             aio_context = bdrv_get_aio_context(bs);
2199 
2200             aio_context_acquire(aio_context);
2201             if (bs->job) {
2202                 block_job_cancel_sync(bs->job);
2203                 aio_context_release(aio_context);
2204                 break;
2205             }
2206             aio_context_release(aio_context);
2207         }
2208 
2209         /* All the remaining BlockDriverStates are referenced directly or
2210          * indirectly from block jobs, so there needs to be at least one BDS
2211          * directly used by a block job */
2212         assert(bs);
2213     }
2214 }
2215 
2216 /* Fields that need to stay with the top-level BDS */
2217 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2218                                      BlockDriverState *bs_src)
2219 {
2220     /* move some fields that need to stay attached to the device */
2221 }
2222 
2223 static void change_parent_backing_link(BlockDriverState *from,
2224                                        BlockDriverState *to)
2225 {
2226     BdrvChild *c, *next;
2227 
2228     if (from->blk) {
2229         /* FIXME We bypass blk_set_bs(), so we need to make these updates
2230          * manually. The root problem is not in this change function, but the
2231          * existence of BlockDriverState.blk. */
2232         to->blk = from->blk;
2233         from->blk = NULL;
2234     }
2235 
2236     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2237         assert(c->role != &child_backing);
2238         c->bs = to;
2239         QLIST_REMOVE(c, next_parent);
2240         QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2241         bdrv_ref(to);
2242         bdrv_unref(from);
2243     }
2244 }
2245 
2246 static void swap_feature_fields(BlockDriverState *bs_top,
2247                                 BlockDriverState *bs_new)
2248 {
2249     BlockDriverState tmp;
2250 
2251     bdrv_move_feature_fields(&tmp, bs_top);
2252     bdrv_move_feature_fields(bs_top, bs_new);
2253     bdrv_move_feature_fields(bs_new, &tmp);
2254 
2255     assert(!bs_new->throttle_state);
2256     if (bs_top->throttle_state) {
2257         assert(bs_top->io_limits_enabled);
2258         bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2259         bdrv_io_limits_disable(bs_top);
2260     }
2261 }
2262 
2263 /*
2264  * Add new bs contents at the top of an image chain while the chain is
2265  * live, while keeping required fields on the top layer.
2266  *
2267  * This will modify the BlockDriverState fields, and swap contents
2268  * between bs_new and bs_top. Both bs_new and bs_top are modified.
2269  *
2270  * bs_new must not be attached to a BlockBackend.
2271  *
2272  * This function does not create any image files.
2273  *
2274  * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2275  * that's what the callers commonly need. bs_new will be referenced by the old
2276  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2277  * reference of its own, it must call bdrv_ref().
2278  */
2279 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2280 {
2281     assert(!bdrv_requests_pending(bs_top));
2282     assert(!bdrv_requests_pending(bs_new));
2283 
2284     bdrv_ref(bs_top);
2285     change_parent_backing_link(bs_top, bs_new);
2286 
2287     /* Some fields always stay on top of the backing file chain */
2288     swap_feature_fields(bs_top, bs_new);
2289 
2290     bdrv_set_backing_hd(bs_new, bs_top);
2291     bdrv_unref(bs_top);
2292 
2293     /* bs_new is now referenced by its new parents, we don't need the
2294      * additional reference any more. */
2295     bdrv_unref(bs_new);
2296 }
2297 
2298 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2299 {
2300     assert(!bdrv_requests_pending(old));
2301     assert(!bdrv_requests_pending(new));
2302 
2303     bdrv_ref(old);
2304 
2305     if (old->blk) {
2306         /* As long as these fields aren't in BlockBackend, but in the top-level
2307          * BlockDriverState, it's not possible for a BDS to have two BBs.
2308          *
2309          * We really want to copy the fields from old to new, but we go for a
2310          * swap instead so that pointers aren't duplicated and cause trouble.
2311          * (Also, bdrv_swap() used to do the same.) */
2312         assert(!new->blk);
2313         swap_feature_fields(old, new);
2314     }
2315     change_parent_backing_link(old, new);
2316 
2317     /* Change backing files if a previously independent node is added to the
2318      * chain. For active commit, we replace top by its own (indirect) backing
2319      * file and don't do anything here so we don't build a loop. */
2320     if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2321         bdrv_set_backing_hd(new, backing_bs(old));
2322         bdrv_set_backing_hd(old, NULL);
2323     }
2324 
2325     bdrv_unref(old);
2326 }
2327 
2328 static void bdrv_delete(BlockDriverState *bs)
2329 {
2330     assert(!bs->job);
2331     assert(bdrv_op_blocker_is_empty(bs));
2332     assert(!bs->refcnt);
2333 
2334     bdrv_close(bs);
2335 
2336     /* remove from list, if necessary */
2337     if (bs->node_name[0] != '\0') {
2338         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2339     }
2340     QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
2341 
2342     g_free(bs);
2343 }
2344 
2345 /*
2346  * Run consistency checks on an image
2347  *
2348  * Returns 0 if the check could be completed (it doesn't mean that the image is
2349  * free of errors) or -errno when an internal error occurred. The results of the
2350  * check are stored in res.
2351  */
2352 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2353 {
2354     if (bs->drv == NULL) {
2355         return -ENOMEDIUM;
2356     }
2357     if (bs->drv->bdrv_check == NULL) {
2358         return -ENOTSUP;
2359     }
2360 
2361     memset(res, 0, sizeof(*res));
2362     return bs->drv->bdrv_check(bs, res, fix);
2363 }
2364 
2365 #define COMMIT_BUF_SECTORS 2048
2366 
2367 /* commit COW file into the raw image */
2368 int bdrv_commit(BlockDriverState *bs)
2369 {
2370     BlockDriver *drv = bs->drv;
2371     int64_t sector, total_sectors, length, backing_length;
2372     int n, ro, open_flags;
2373     int ret = 0;
2374     uint8_t *buf = NULL;
2375 
2376     if (!drv)
2377         return -ENOMEDIUM;
2378 
2379     if (!bs->backing) {
2380         return -ENOTSUP;
2381     }
2382 
2383     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2384         bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2385         return -EBUSY;
2386     }
2387 
2388     ro = bs->backing->bs->read_only;
2389     open_flags =  bs->backing->bs->open_flags;
2390 
2391     if (ro) {
2392         if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2393             return -EACCES;
2394         }
2395     }
2396 
2397     length = bdrv_getlength(bs);
2398     if (length < 0) {
2399         ret = length;
2400         goto ro_cleanup;
2401     }
2402 
2403     backing_length = bdrv_getlength(bs->backing->bs);
2404     if (backing_length < 0) {
2405         ret = backing_length;
2406         goto ro_cleanup;
2407     }
2408 
2409     /* If our top snapshot is larger than the backing file image,
2410      * grow the backing file image if possible.  If not possible,
2411      * we must return an error */
2412     if (length > backing_length) {
2413         ret = bdrv_truncate(bs->backing->bs, length);
2414         if (ret < 0) {
2415             goto ro_cleanup;
2416         }
2417     }
2418 
2419     total_sectors = length >> BDRV_SECTOR_BITS;
2420 
2421     /* qemu_try_blockalign() for bs will choose an alignment that works for
2422      * bs->backing->bs as well, so no need to compare the alignment manually. */
2423     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2424     if (buf == NULL) {
2425         ret = -ENOMEM;
2426         goto ro_cleanup;
2427     }
2428 
2429     for (sector = 0; sector < total_sectors; sector += n) {
2430         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2431         if (ret < 0) {
2432             goto ro_cleanup;
2433         }
2434         if (ret) {
2435             ret = bdrv_read(bs, sector, buf, n);
2436             if (ret < 0) {
2437                 goto ro_cleanup;
2438             }
2439 
2440             ret = bdrv_write(bs->backing->bs, sector, buf, n);
2441             if (ret < 0) {
2442                 goto ro_cleanup;
2443             }
2444         }
2445     }
2446 
2447     if (drv->bdrv_make_empty) {
2448         ret = drv->bdrv_make_empty(bs);
2449         if (ret < 0) {
2450             goto ro_cleanup;
2451         }
2452         bdrv_flush(bs);
2453     }
2454 
2455     /*
2456      * Make sure all data we wrote to the backing device is actually
2457      * stable on disk.
2458      */
2459     if (bs->backing) {
2460         bdrv_flush(bs->backing->bs);
2461     }
2462 
2463     ret = 0;
2464 ro_cleanup:
2465     qemu_vfree(buf);
2466 
2467     if (ro) {
2468         /* ignoring error return here */
2469         bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2470     }
2471 
2472     return ret;
2473 }
2474 
2475 /*
2476  * Return values:
2477  * 0        - success
2478  * -EINVAL  - backing format specified, but no file
2479  * -ENOSPC  - can't update the backing file because no space is left in the
2480  *            image file header
2481  * -ENOTSUP - format driver doesn't support changing the backing file
2482  */
2483 int bdrv_change_backing_file(BlockDriverState *bs,
2484     const char *backing_file, const char *backing_fmt)
2485 {
2486     BlockDriver *drv = bs->drv;
2487     int ret;
2488 
2489     /* Backing file format doesn't make sense without a backing file */
2490     if (backing_fmt && !backing_file) {
2491         return -EINVAL;
2492     }
2493 
2494     if (drv->bdrv_change_backing_file != NULL) {
2495         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2496     } else {
2497         ret = -ENOTSUP;
2498     }
2499 
2500     if (ret == 0) {
2501         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2502         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2503     }
2504     return ret;
2505 }
2506 
2507 /*
2508  * Finds the image layer in the chain that has 'bs' as its backing file.
2509  *
2510  * active is the current topmost image.
2511  *
2512  * Returns NULL if bs is not found in active's image chain,
2513  * or if active == bs.
2514  *
2515  * Returns the bottommost base image if bs == NULL.
2516  */
2517 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2518                                     BlockDriverState *bs)
2519 {
2520     while (active && bs != backing_bs(active)) {
2521         active = backing_bs(active);
2522     }
2523 
2524     return active;
2525 }
2526 
2527 /* Given a BDS, searches for the base layer. */
2528 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2529 {
2530     return bdrv_find_overlay(bs, NULL);
2531 }
2532 
2533 /*
2534  * Drops images above 'base' up to and including 'top', and sets the image
2535  * above 'top' to have base as its backing file.
2536  *
2537  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2538  * information in 'bs' can be properly updated.
2539  *
2540  * E.g., this will convert the following chain:
2541  * bottom <- base <- intermediate <- top <- active
2542  *
2543  * to
2544  *
2545  * bottom <- base <- active
2546  *
2547  * It is allowed for bottom==base, in which case it converts:
2548  *
2549  * base <- intermediate <- top <- active
2550  *
2551  * to
2552  *
2553  * base <- active
2554  *
2555  * If backing_file_str is non-NULL, it will be used when modifying top's
2556  * overlay image metadata.
2557  *
2558  * Error conditions:
2559  *  if active == top, that is considered an error
2560  *
2561  */
2562 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2563                            BlockDriverState *base, const char *backing_file_str)
2564 {
2565     BlockDriverState *new_top_bs = NULL;
2566     int ret = -EIO;
2567 
2568     if (!top->drv || !base->drv) {
2569         goto exit;
2570     }
2571 
2572     new_top_bs = bdrv_find_overlay(active, top);
2573 
2574     if (new_top_bs == NULL) {
2575         /* we could not find the image above 'top', this is an error */
2576         goto exit;
2577     }
2578 
2579     /* special case of new_top_bs->backing->bs already pointing to base - nothing
2580      * to do, no intermediate images */
2581     if (backing_bs(new_top_bs) == base) {
2582         ret = 0;
2583         goto exit;
2584     }
2585 
2586     /* Make sure that base is in the backing chain of top */
2587     if (!bdrv_chain_contains(top, base)) {
2588         goto exit;
2589     }
2590 
2591     /* success - we can delete the intermediate states, and link top->base */
2592     backing_file_str = backing_file_str ? backing_file_str : base->filename;
2593     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2594                                    base->drv ? base->drv->format_name : "");
2595     if (ret) {
2596         goto exit;
2597     }
2598     bdrv_set_backing_hd(new_top_bs, base);
2599 
2600     ret = 0;
2601 exit:
2602     return ret;
2603 }
2604 
2605 /**
2606  * Truncate file to 'offset' bytes (needed only for file protocols)
2607  */
2608 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2609 {
2610     BlockDriver *drv = bs->drv;
2611     int ret;
2612     if (!drv)
2613         return -ENOMEDIUM;
2614     if (!drv->bdrv_truncate)
2615         return -ENOTSUP;
2616     if (bs->read_only)
2617         return -EACCES;
2618 
2619     ret = drv->bdrv_truncate(bs, offset);
2620     if (ret == 0) {
2621         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2622         bdrv_dirty_bitmap_truncate(bs);
2623         if (bs->blk) {
2624             blk_dev_resize_cb(bs->blk);
2625         }
2626     }
2627     return ret;
2628 }
2629 
2630 /**
2631  * Length of a allocated file in bytes. Sparse files are counted by actual
2632  * allocated space. Return < 0 if error or unknown.
2633  */
2634 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2635 {
2636     BlockDriver *drv = bs->drv;
2637     if (!drv) {
2638         return -ENOMEDIUM;
2639     }
2640     if (drv->bdrv_get_allocated_file_size) {
2641         return drv->bdrv_get_allocated_file_size(bs);
2642     }
2643     if (bs->file) {
2644         return bdrv_get_allocated_file_size(bs->file->bs);
2645     }
2646     return -ENOTSUP;
2647 }
2648 
2649 /**
2650  * Return number of sectors on success, -errno on error.
2651  */
2652 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2653 {
2654     BlockDriver *drv = bs->drv;
2655 
2656     if (!drv)
2657         return -ENOMEDIUM;
2658 
2659     if (drv->has_variable_length) {
2660         int ret = refresh_total_sectors(bs, bs->total_sectors);
2661         if (ret < 0) {
2662             return ret;
2663         }
2664     }
2665     return bs->total_sectors;
2666 }
2667 
2668 /**
2669  * Return length in bytes on success, -errno on error.
2670  * The length is always a multiple of BDRV_SECTOR_SIZE.
2671  */
2672 int64_t bdrv_getlength(BlockDriverState *bs)
2673 {
2674     int64_t ret = bdrv_nb_sectors(bs);
2675 
2676     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2677     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2678 }
2679 
2680 /* return 0 as number of sectors if no device present or error */
2681 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2682 {
2683     int64_t nb_sectors = bdrv_nb_sectors(bs);
2684 
2685     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2686 }
2687 
2688 int bdrv_is_read_only(BlockDriverState *bs)
2689 {
2690     return bs->read_only;
2691 }
2692 
2693 int bdrv_is_sg(BlockDriverState *bs)
2694 {
2695     return bs->sg;
2696 }
2697 
2698 int bdrv_is_encrypted(BlockDriverState *bs)
2699 {
2700     if (bs->backing && bs->backing->bs->encrypted) {
2701         return 1;
2702     }
2703     return bs->encrypted;
2704 }
2705 
2706 int bdrv_key_required(BlockDriverState *bs)
2707 {
2708     BdrvChild *backing = bs->backing;
2709 
2710     if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2711         return 1;
2712     }
2713     return (bs->encrypted && !bs->valid_key);
2714 }
2715 
2716 int bdrv_set_key(BlockDriverState *bs, const char *key)
2717 {
2718     int ret;
2719     if (bs->backing && bs->backing->bs->encrypted) {
2720         ret = bdrv_set_key(bs->backing->bs, key);
2721         if (ret < 0)
2722             return ret;
2723         if (!bs->encrypted)
2724             return 0;
2725     }
2726     if (!bs->encrypted) {
2727         return -EINVAL;
2728     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2729         return -ENOMEDIUM;
2730     }
2731     ret = bs->drv->bdrv_set_key(bs, key);
2732     if (ret < 0) {
2733         bs->valid_key = 0;
2734     } else if (!bs->valid_key) {
2735         bs->valid_key = 1;
2736         if (bs->blk) {
2737             /* call the change callback now, we skipped it on open */
2738             blk_dev_change_media_cb(bs->blk, true);
2739         }
2740     }
2741     return ret;
2742 }
2743 
2744 /*
2745  * Provide an encryption key for @bs.
2746  * If @key is non-null:
2747  *     If @bs is not encrypted, fail.
2748  *     Else if the key is invalid, fail.
2749  *     Else set @bs's key to @key, replacing the existing key, if any.
2750  * If @key is null:
2751  *     If @bs is encrypted and still lacks a key, fail.
2752  *     Else do nothing.
2753  * On failure, store an error object through @errp if non-null.
2754  */
2755 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2756 {
2757     if (key) {
2758         if (!bdrv_is_encrypted(bs)) {
2759             error_setg(errp, "Node '%s' is not encrypted",
2760                       bdrv_get_device_or_node_name(bs));
2761         } else if (bdrv_set_key(bs, key) < 0) {
2762             error_setg(errp, QERR_INVALID_PASSWORD);
2763         }
2764     } else {
2765         if (bdrv_key_required(bs)) {
2766             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2767                       "'%s' (%s) is encrypted",
2768                       bdrv_get_device_or_node_name(bs),
2769                       bdrv_get_encrypted_filename(bs));
2770         }
2771     }
2772 }
2773 
2774 const char *bdrv_get_format_name(BlockDriverState *bs)
2775 {
2776     return bs->drv ? bs->drv->format_name : NULL;
2777 }
2778 
2779 static int qsort_strcmp(const void *a, const void *b)
2780 {
2781     return strcmp(a, b);
2782 }
2783 
2784 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2785                          void *opaque)
2786 {
2787     BlockDriver *drv;
2788     int count = 0;
2789     int i;
2790     const char **formats = NULL;
2791 
2792     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2793         if (drv->format_name) {
2794             bool found = false;
2795             int i = count;
2796             while (formats && i && !found) {
2797                 found = !strcmp(formats[--i], drv->format_name);
2798             }
2799 
2800             if (!found) {
2801                 formats = g_renew(const char *, formats, count + 1);
2802                 formats[count++] = drv->format_name;
2803             }
2804         }
2805     }
2806 
2807     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2808 
2809     for (i = 0; i < count; i++) {
2810         it(opaque, formats[i]);
2811     }
2812 
2813     g_free(formats);
2814 }
2815 
2816 /* This function is to find a node in the bs graph */
2817 BlockDriverState *bdrv_find_node(const char *node_name)
2818 {
2819     BlockDriverState *bs;
2820 
2821     assert(node_name);
2822 
2823     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2824         if (!strcmp(node_name, bs->node_name)) {
2825             return bs;
2826         }
2827     }
2828     return NULL;
2829 }
2830 
2831 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2832 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2833 {
2834     BlockDeviceInfoList *list, *entry;
2835     BlockDriverState *bs;
2836 
2837     list = NULL;
2838     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2839         BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, errp);
2840         if (!info) {
2841             qapi_free_BlockDeviceInfoList(list);
2842             return NULL;
2843         }
2844         entry = g_malloc0(sizeof(*entry));
2845         entry->value = info;
2846         entry->next = list;
2847         list = entry;
2848     }
2849 
2850     return list;
2851 }
2852 
2853 BlockDriverState *bdrv_lookup_bs(const char *device,
2854                                  const char *node_name,
2855                                  Error **errp)
2856 {
2857     BlockBackend *blk;
2858     BlockDriverState *bs;
2859 
2860     if (device) {
2861         blk = blk_by_name(device);
2862 
2863         if (blk) {
2864             bs = blk_bs(blk);
2865             if (!bs) {
2866                 error_setg(errp, "Device '%s' has no medium", device);
2867             }
2868 
2869             return bs;
2870         }
2871     }
2872 
2873     if (node_name) {
2874         bs = bdrv_find_node(node_name);
2875 
2876         if (bs) {
2877             return bs;
2878         }
2879     }
2880 
2881     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2882                      device ? device : "",
2883                      node_name ? node_name : "");
2884     return NULL;
2885 }
2886 
2887 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2888  * return false.  If either argument is NULL, return false. */
2889 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2890 {
2891     while (top && top != base) {
2892         top = backing_bs(top);
2893     }
2894 
2895     return top != NULL;
2896 }
2897 
2898 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2899 {
2900     if (!bs) {
2901         return QTAILQ_FIRST(&graph_bdrv_states);
2902     }
2903     return QTAILQ_NEXT(bs, node_list);
2904 }
2905 
2906 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
2907  * the monitor or attached to a BlockBackend */
2908 BlockDriverState *bdrv_next(BlockDriverState *bs)
2909 {
2910     if (!bs || bs->blk) {
2911         bs = blk_next_root_bs(bs);
2912         if (bs) {
2913             return bs;
2914         }
2915     }
2916 
2917     /* Ignore all BDSs that are attached to a BlockBackend here; they have been
2918      * handled by the above block already */
2919     do {
2920         bs = bdrv_next_monitor_owned(bs);
2921     } while (bs && bs->blk);
2922     return bs;
2923 }
2924 
2925 const char *bdrv_get_node_name(const BlockDriverState *bs)
2926 {
2927     return bs->node_name;
2928 }
2929 
2930 /* TODO check what callers really want: bs->node_name or blk_name() */
2931 const char *bdrv_get_device_name(const BlockDriverState *bs)
2932 {
2933     return bs->blk ? blk_name(bs->blk) : "";
2934 }
2935 
2936 /* This can be used to identify nodes that might not have a device
2937  * name associated. Since node and device names live in the same
2938  * namespace, the result is unambiguous. The exception is if both are
2939  * absent, then this returns an empty (non-null) string. */
2940 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2941 {
2942     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2943 }
2944 
2945 int bdrv_get_flags(BlockDriverState *bs)
2946 {
2947     return bs->open_flags;
2948 }
2949 
2950 int bdrv_has_zero_init_1(BlockDriverState *bs)
2951 {
2952     return 1;
2953 }
2954 
2955 int bdrv_has_zero_init(BlockDriverState *bs)
2956 {
2957     assert(bs->drv);
2958 
2959     /* If BS is a copy on write image, it is initialized to
2960        the contents of the base image, which may not be zeroes.  */
2961     if (bs->backing) {
2962         return 0;
2963     }
2964     if (bs->drv->bdrv_has_zero_init) {
2965         return bs->drv->bdrv_has_zero_init(bs);
2966     }
2967 
2968     /* safe default */
2969     return 0;
2970 }
2971 
2972 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2973 {
2974     BlockDriverInfo bdi;
2975 
2976     if (bs->backing) {
2977         return false;
2978     }
2979 
2980     if (bdrv_get_info(bs, &bdi) == 0) {
2981         return bdi.unallocated_blocks_are_zero;
2982     }
2983 
2984     return false;
2985 }
2986 
2987 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2988 {
2989     BlockDriverInfo bdi;
2990 
2991     if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
2992         return false;
2993     }
2994 
2995     if (bdrv_get_info(bs, &bdi) == 0) {
2996         return bdi.can_write_zeroes_with_unmap;
2997     }
2998 
2999     return false;
3000 }
3001 
3002 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3003 {
3004     if (bs->backing && bs->backing->bs->encrypted)
3005         return bs->backing_file;
3006     else if (bs->encrypted)
3007         return bs->filename;
3008     else
3009         return NULL;
3010 }
3011 
3012 void bdrv_get_backing_filename(BlockDriverState *bs,
3013                                char *filename, int filename_size)
3014 {
3015     pstrcpy(filename, filename_size, bs->backing_file);
3016 }
3017 
3018 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3019 {
3020     BlockDriver *drv = bs->drv;
3021     if (!drv)
3022         return -ENOMEDIUM;
3023     if (!drv->bdrv_get_info)
3024         return -ENOTSUP;
3025     memset(bdi, 0, sizeof(*bdi));
3026     return drv->bdrv_get_info(bs, bdi);
3027 }
3028 
3029 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3030 {
3031     BlockDriver *drv = bs->drv;
3032     if (drv && drv->bdrv_get_specific_info) {
3033         return drv->bdrv_get_specific_info(bs);
3034     }
3035     return NULL;
3036 }
3037 
3038 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
3039 {
3040     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3041         return;
3042     }
3043 
3044     bs->drv->bdrv_debug_event(bs, event);
3045 }
3046 
3047 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3048                           const char *tag)
3049 {
3050     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3051         bs = bs->file ? bs->file->bs : NULL;
3052     }
3053 
3054     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3055         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3056     }
3057 
3058     return -ENOTSUP;
3059 }
3060 
3061 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3062 {
3063     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3064         bs = bs->file ? bs->file->bs : NULL;
3065     }
3066 
3067     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3068         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3069     }
3070 
3071     return -ENOTSUP;
3072 }
3073 
3074 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3075 {
3076     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3077         bs = bs->file ? bs->file->bs : NULL;
3078     }
3079 
3080     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3081         return bs->drv->bdrv_debug_resume(bs, tag);
3082     }
3083 
3084     return -ENOTSUP;
3085 }
3086 
3087 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3088 {
3089     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3090         bs = bs->file ? bs->file->bs : NULL;
3091     }
3092 
3093     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3094         return bs->drv->bdrv_debug_is_suspended(bs, tag);
3095     }
3096 
3097     return false;
3098 }
3099 
3100 int bdrv_is_snapshot(BlockDriverState *bs)
3101 {
3102     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3103 }
3104 
3105 /* backing_file can either be relative, or absolute, or a protocol.  If it is
3106  * relative, it must be relative to the chain.  So, passing in bs->filename
3107  * from a BDS as backing_file should not be done, as that may be relative to
3108  * the CWD rather than the chain. */
3109 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3110         const char *backing_file)
3111 {
3112     char *filename_full = NULL;
3113     char *backing_file_full = NULL;
3114     char *filename_tmp = NULL;
3115     int is_protocol = 0;
3116     BlockDriverState *curr_bs = NULL;
3117     BlockDriverState *retval = NULL;
3118 
3119     if (!bs || !bs->drv || !backing_file) {
3120         return NULL;
3121     }
3122 
3123     filename_full     = g_malloc(PATH_MAX);
3124     backing_file_full = g_malloc(PATH_MAX);
3125     filename_tmp      = g_malloc(PATH_MAX);
3126 
3127     is_protocol = path_has_protocol(backing_file);
3128 
3129     for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3130 
3131         /* If either of the filename paths is actually a protocol, then
3132          * compare unmodified paths; otherwise make paths relative */
3133         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3134             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3135                 retval = curr_bs->backing->bs;
3136                 break;
3137             }
3138         } else {
3139             /* If not an absolute filename path, make it relative to the current
3140              * image's filename path */
3141             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3142                          backing_file);
3143 
3144             /* We are going to compare absolute pathnames */
3145             if (!realpath(filename_tmp, filename_full)) {
3146                 continue;
3147             }
3148 
3149             /* We need to make sure the backing filename we are comparing against
3150              * is relative to the current image filename (or absolute) */
3151             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3152                          curr_bs->backing_file);
3153 
3154             if (!realpath(filename_tmp, backing_file_full)) {
3155                 continue;
3156             }
3157 
3158             if (strcmp(backing_file_full, filename_full) == 0) {
3159                 retval = curr_bs->backing->bs;
3160                 break;
3161             }
3162         }
3163     }
3164 
3165     g_free(filename_full);
3166     g_free(backing_file_full);
3167     g_free(filename_tmp);
3168     return retval;
3169 }
3170 
3171 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3172 {
3173     if (!bs->drv) {
3174         return 0;
3175     }
3176 
3177     if (!bs->backing) {
3178         return 0;
3179     }
3180 
3181     return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3182 }
3183 
3184 void bdrv_init(void)
3185 {
3186     module_call_init(MODULE_INIT_BLOCK);
3187 }
3188 
3189 void bdrv_init_with_whitelist(void)
3190 {
3191     use_bdrv_whitelist = 1;
3192     bdrv_init();
3193 }
3194 
3195 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3196 {
3197     Error *local_err = NULL;
3198     int ret;
3199 
3200     if (!bs->drv)  {
3201         return;
3202     }
3203 
3204     if (!(bs->open_flags & BDRV_O_INACTIVE)) {
3205         return;
3206     }
3207     bs->open_flags &= ~BDRV_O_INACTIVE;
3208 
3209     if (bs->drv->bdrv_invalidate_cache) {
3210         bs->drv->bdrv_invalidate_cache(bs, &local_err);
3211     } else if (bs->file) {
3212         bdrv_invalidate_cache(bs->file->bs, &local_err);
3213     }
3214     if (local_err) {
3215         bs->open_flags |= BDRV_O_INACTIVE;
3216         error_propagate(errp, local_err);
3217         return;
3218     }
3219 
3220     ret = refresh_total_sectors(bs, bs->total_sectors);
3221     if (ret < 0) {
3222         bs->open_flags |= BDRV_O_INACTIVE;
3223         error_setg_errno(errp, -ret, "Could not refresh total sector count");
3224         return;
3225     }
3226 }
3227 
3228 void bdrv_invalidate_cache_all(Error **errp)
3229 {
3230     BlockDriverState *bs = NULL;
3231     Error *local_err = NULL;
3232 
3233     while ((bs = bdrv_next(bs)) != NULL) {
3234         AioContext *aio_context = bdrv_get_aio_context(bs);
3235 
3236         aio_context_acquire(aio_context);
3237         bdrv_invalidate_cache(bs, &local_err);
3238         aio_context_release(aio_context);
3239         if (local_err) {
3240             error_propagate(errp, local_err);
3241             return;
3242         }
3243     }
3244 }
3245 
3246 static int bdrv_inactivate(BlockDriverState *bs)
3247 {
3248     int ret;
3249 
3250     if (bs->drv->bdrv_inactivate) {
3251         ret = bs->drv->bdrv_inactivate(bs);
3252         if (ret < 0) {
3253             return ret;
3254         }
3255     }
3256 
3257     bs->open_flags |= BDRV_O_INACTIVE;
3258     return 0;
3259 }
3260 
3261 int bdrv_inactivate_all(void)
3262 {
3263     BlockDriverState *bs = NULL;
3264     int ret;
3265 
3266     while ((bs = bdrv_next(bs)) != NULL) {
3267         AioContext *aio_context = bdrv_get_aio_context(bs);
3268 
3269         aio_context_acquire(aio_context);
3270         ret = bdrv_inactivate(bs);
3271         aio_context_release(aio_context);
3272         if (ret < 0) {
3273             return ret;
3274         }
3275     }
3276 
3277     return 0;
3278 }
3279 
3280 /**************************************************************/
3281 /* removable device support */
3282 
3283 /**
3284  * Return TRUE if the media is present
3285  */
3286 bool bdrv_is_inserted(BlockDriverState *bs)
3287 {
3288     BlockDriver *drv = bs->drv;
3289     BdrvChild *child;
3290 
3291     if (!drv) {
3292         return false;
3293     }
3294     if (drv->bdrv_is_inserted) {
3295         return drv->bdrv_is_inserted(bs);
3296     }
3297     QLIST_FOREACH(child, &bs->children, next) {
3298         if (!bdrv_is_inserted(child->bs)) {
3299             return false;
3300         }
3301     }
3302     return true;
3303 }
3304 
3305 /**
3306  * Return whether the media changed since the last call to this
3307  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3308  */
3309 int bdrv_media_changed(BlockDriverState *bs)
3310 {
3311     BlockDriver *drv = bs->drv;
3312 
3313     if (drv && drv->bdrv_media_changed) {
3314         return drv->bdrv_media_changed(bs);
3315     }
3316     return -ENOTSUP;
3317 }
3318 
3319 /**
3320  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3321  */
3322 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3323 {
3324     BlockDriver *drv = bs->drv;
3325     const char *device_name;
3326 
3327     if (drv && drv->bdrv_eject) {
3328         drv->bdrv_eject(bs, eject_flag);
3329     }
3330 
3331     device_name = bdrv_get_device_name(bs);
3332     if (device_name[0] != '\0') {
3333         qapi_event_send_device_tray_moved(device_name,
3334                                           eject_flag, &error_abort);
3335     }
3336 }
3337 
3338 /**
3339  * Lock or unlock the media (if it is locked, the user won't be able
3340  * to eject it manually).
3341  */
3342 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3343 {
3344     BlockDriver *drv = bs->drv;
3345 
3346     trace_bdrv_lock_medium(bs, locked);
3347 
3348     if (drv && drv->bdrv_lock_medium) {
3349         drv->bdrv_lock_medium(bs, locked);
3350     }
3351 }
3352 
3353 /* Get a reference to bs */
3354 void bdrv_ref(BlockDriverState *bs)
3355 {
3356     bs->refcnt++;
3357 }
3358 
3359 /* Release a previously grabbed reference to bs.
3360  * If after releasing, reference count is zero, the BlockDriverState is
3361  * deleted. */
3362 void bdrv_unref(BlockDriverState *bs)
3363 {
3364     if (!bs) {
3365         return;
3366     }
3367     assert(bs->refcnt > 0);
3368     if (--bs->refcnt == 0) {
3369         bdrv_delete(bs);
3370     }
3371 }
3372 
3373 struct BdrvOpBlocker {
3374     Error *reason;
3375     QLIST_ENTRY(BdrvOpBlocker) list;
3376 };
3377 
3378 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3379 {
3380     BdrvOpBlocker *blocker;
3381     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3382     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3383         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3384         if (errp) {
3385             *errp = error_copy(blocker->reason);
3386             error_prepend(errp, "Node '%s' is busy: ",
3387                           bdrv_get_device_or_node_name(bs));
3388         }
3389         return true;
3390     }
3391     return false;
3392 }
3393 
3394 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3395 {
3396     BdrvOpBlocker *blocker;
3397     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3398 
3399     blocker = g_new0(BdrvOpBlocker, 1);
3400     blocker->reason = reason;
3401     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3402 }
3403 
3404 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3405 {
3406     BdrvOpBlocker *blocker, *next;
3407     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3408     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3409         if (blocker->reason == reason) {
3410             QLIST_REMOVE(blocker, list);
3411             g_free(blocker);
3412         }
3413     }
3414 }
3415 
3416 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3417 {
3418     int i;
3419     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3420         bdrv_op_block(bs, i, reason);
3421     }
3422 }
3423 
3424 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3425 {
3426     int i;
3427     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3428         bdrv_op_unblock(bs, i, reason);
3429     }
3430 }
3431 
3432 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3433 {
3434     int i;
3435 
3436     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3437         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3438             return false;
3439         }
3440     }
3441     return true;
3442 }
3443 
3444 void bdrv_img_create(const char *filename, const char *fmt,
3445                      const char *base_filename, const char *base_fmt,
3446                      char *options, uint64_t img_size, int flags,
3447                      Error **errp, bool quiet)
3448 {
3449     QemuOptsList *create_opts = NULL;
3450     QemuOpts *opts = NULL;
3451     const char *backing_fmt, *backing_file;
3452     int64_t size;
3453     BlockDriver *drv, *proto_drv;
3454     Error *local_err = NULL;
3455     int ret = 0;
3456 
3457     /* Find driver and parse its options */
3458     drv = bdrv_find_format(fmt);
3459     if (!drv) {
3460         error_setg(errp, "Unknown file format '%s'", fmt);
3461         return;
3462     }
3463 
3464     proto_drv = bdrv_find_protocol(filename, true, errp);
3465     if (!proto_drv) {
3466         return;
3467     }
3468 
3469     if (!drv->create_opts) {
3470         error_setg(errp, "Format driver '%s' does not support image creation",
3471                    drv->format_name);
3472         return;
3473     }
3474 
3475     if (!proto_drv->create_opts) {
3476         error_setg(errp, "Protocol driver '%s' does not support image creation",
3477                    proto_drv->format_name);
3478         return;
3479     }
3480 
3481     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3482     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3483 
3484     /* Create parameter list with default values */
3485     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3486     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3487 
3488     /* Parse -o options */
3489     if (options) {
3490         qemu_opts_do_parse(opts, options, NULL, &local_err);
3491         if (local_err) {
3492             error_report_err(local_err);
3493             local_err = NULL;
3494             error_setg(errp, "Invalid options for file format '%s'", fmt);
3495             goto out;
3496         }
3497     }
3498 
3499     if (base_filename) {
3500         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3501         if (local_err) {
3502             error_setg(errp, "Backing file not supported for file format '%s'",
3503                        fmt);
3504             goto out;
3505         }
3506     }
3507 
3508     if (base_fmt) {
3509         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3510         if (local_err) {
3511             error_setg(errp, "Backing file format not supported for file "
3512                              "format '%s'", fmt);
3513             goto out;
3514         }
3515     }
3516 
3517     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3518     if (backing_file) {
3519         if (!strcmp(filename, backing_file)) {
3520             error_setg(errp, "Error: Trying to create an image with the "
3521                              "same filename as the backing file");
3522             goto out;
3523         }
3524     }
3525 
3526     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3527 
3528     // The size for the image must always be specified, with one exception:
3529     // If we are using a backing file, we can obtain the size from there
3530     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3531     if (size == -1) {
3532         if (backing_file) {
3533             BlockDriverState *bs;
3534             char *full_backing = g_new0(char, PATH_MAX);
3535             int64_t size;
3536             int back_flags;
3537             QDict *backing_options = NULL;
3538 
3539             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3540                                                          full_backing, PATH_MAX,
3541                                                          &local_err);
3542             if (local_err) {
3543                 g_free(full_backing);
3544                 goto out;
3545             }
3546 
3547             /* backing files always opened read-only */
3548             back_flags = flags;
3549             back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3550 
3551             if (backing_fmt) {
3552                 backing_options = qdict_new();
3553                 qdict_put(backing_options, "driver",
3554                           qstring_from_str(backing_fmt));
3555             }
3556 
3557             bs = NULL;
3558             ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3559                             back_flags, &local_err);
3560             g_free(full_backing);
3561             if (ret < 0) {
3562                 goto out;
3563             }
3564             size = bdrv_getlength(bs);
3565             if (size < 0) {
3566                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3567                                  backing_file);
3568                 bdrv_unref(bs);
3569                 goto out;
3570             }
3571 
3572             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3573 
3574             bdrv_unref(bs);
3575         } else {
3576             error_setg(errp, "Image creation needs a size parameter");
3577             goto out;
3578         }
3579     }
3580 
3581     if (!quiet) {
3582         printf("Formatting '%s', fmt=%s ", filename, fmt);
3583         qemu_opts_print(opts, " ");
3584         puts("");
3585     }
3586 
3587     ret = bdrv_create(drv, filename, opts, &local_err);
3588 
3589     if (ret == -EFBIG) {
3590         /* This is generally a better message than whatever the driver would
3591          * deliver (especially because of the cluster_size_hint), since that
3592          * is most probably not much different from "image too large". */
3593         const char *cluster_size_hint = "";
3594         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3595             cluster_size_hint = " (try using a larger cluster size)";
3596         }
3597         error_setg(errp, "The image size is too large for file format '%s'"
3598                    "%s", fmt, cluster_size_hint);
3599         error_free(local_err);
3600         local_err = NULL;
3601     }
3602 
3603 out:
3604     qemu_opts_del(opts);
3605     qemu_opts_free(create_opts);
3606     if (local_err) {
3607         error_propagate(errp, local_err);
3608     }
3609 }
3610 
3611 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3612 {
3613     return bs->aio_context;
3614 }
3615 
3616 void bdrv_detach_aio_context(BlockDriverState *bs)
3617 {
3618     BdrvAioNotifier *baf;
3619 
3620     if (!bs->drv) {
3621         return;
3622     }
3623 
3624     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3625         baf->detach_aio_context(baf->opaque);
3626     }
3627 
3628     if (bs->throttle_state) {
3629         throttle_timers_detach_aio_context(&bs->throttle_timers);
3630     }
3631     if (bs->drv->bdrv_detach_aio_context) {
3632         bs->drv->bdrv_detach_aio_context(bs);
3633     }
3634     if (bs->file) {
3635         bdrv_detach_aio_context(bs->file->bs);
3636     }
3637     if (bs->backing) {
3638         bdrv_detach_aio_context(bs->backing->bs);
3639     }
3640 
3641     bs->aio_context = NULL;
3642 }
3643 
3644 void bdrv_attach_aio_context(BlockDriverState *bs,
3645                              AioContext *new_context)
3646 {
3647     BdrvAioNotifier *ban;
3648 
3649     if (!bs->drv) {
3650         return;
3651     }
3652 
3653     bs->aio_context = new_context;
3654 
3655     if (bs->backing) {
3656         bdrv_attach_aio_context(bs->backing->bs, new_context);
3657     }
3658     if (bs->file) {
3659         bdrv_attach_aio_context(bs->file->bs, new_context);
3660     }
3661     if (bs->drv->bdrv_attach_aio_context) {
3662         bs->drv->bdrv_attach_aio_context(bs, new_context);
3663     }
3664     if (bs->throttle_state) {
3665         throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3666     }
3667 
3668     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3669         ban->attached_aio_context(new_context, ban->opaque);
3670     }
3671 }
3672 
3673 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3674 {
3675     bdrv_drain(bs); /* ensure there are no in-flight requests */
3676 
3677     bdrv_detach_aio_context(bs);
3678 
3679     /* This function executes in the old AioContext so acquire the new one in
3680      * case it runs in a different thread.
3681      */
3682     aio_context_acquire(new_context);
3683     bdrv_attach_aio_context(bs, new_context);
3684     aio_context_release(new_context);
3685 }
3686 
3687 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3688         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3689         void (*detach_aio_context)(void *opaque), void *opaque)
3690 {
3691     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3692     *ban = (BdrvAioNotifier){
3693         .attached_aio_context = attached_aio_context,
3694         .detach_aio_context   = detach_aio_context,
3695         .opaque               = opaque
3696     };
3697 
3698     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3699 }
3700 
3701 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3702                                       void (*attached_aio_context)(AioContext *,
3703                                                                    void *),
3704                                       void (*detach_aio_context)(void *),
3705                                       void *opaque)
3706 {
3707     BdrvAioNotifier *ban, *ban_next;
3708 
3709     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3710         if (ban->attached_aio_context == attached_aio_context &&
3711             ban->detach_aio_context   == detach_aio_context   &&
3712             ban->opaque               == opaque)
3713         {
3714             QLIST_REMOVE(ban, list);
3715             g_free(ban);
3716 
3717             return;
3718         }
3719     }
3720 
3721     abort();
3722 }
3723 
3724 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3725                        BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
3726 {
3727     if (!bs->drv->bdrv_amend_options) {
3728         return -ENOTSUP;
3729     }
3730     return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
3731 }
3732 
3733 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3734  * of block filter and by bdrv_is_first_non_filter.
3735  * It is used to test if the given bs is the candidate or recurse more in the
3736  * node graph.
3737  */
3738 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3739                                       BlockDriverState *candidate)
3740 {
3741     /* return false if basic checks fails */
3742     if (!bs || !bs->drv) {
3743         return false;
3744     }
3745 
3746     /* the code reached a non block filter driver -> check if the bs is
3747      * the same as the candidate. It's the recursion termination condition.
3748      */
3749     if (!bs->drv->is_filter) {
3750         return bs == candidate;
3751     }
3752     /* Down this path the driver is a block filter driver */
3753 
3754     /* If the block filter recursion method is defined use it to recurse down
3755      * the node graph.
3756      */
3757     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3758         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3759     }
3760 
3761     /* the driver is a block filter but don't allow to recurse -> return false
3762      */
3763     return false;
3764 }
3765 
3766 /* This function checks if the candidate is the first non filter bs down it's
3767  * bs chain. Since we don't have pointers to parents it explore all bs chains
3768  * from the top. Some filters can choose not to pass down the recursion.
3769  */
3770 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3771 {
3772     BlockDriverState *bs = NULL;
3773 
3774     /* walk down the bs forest recursively */
3775     while ((bs = bdrv_next(bs)) != NULL) {
3776         bool perm;
3777 
3778         /* try to recurse in this top level bs */
3779         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3780 
3781         /* candidate is the first non filter */
3782         if (perm) {
3783             return true;
3784         }
3785     }
3786 
3787     return false;
3788 }
3789 
3790 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3791                                         const char *node_name, Error **errp)
3792 {
3793     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3794     AioContext *aio_context;
3795 
3796     if (!to_replace_bs) {
3797         error_setg(errp, "Node name '%s' not found", node_name);
3798         return NULL;
3799     }
3800 
3801     aio_context = bdrv_get_aio_context(to_replace_bs);
3802     aio_context_acquire(aio_context);
3803 
3804     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3805         to_replace_bs = NULL;
3806         goto out;
3807     }
3808 
3809     /* We don't want arbitrary node of the BDS chain to be replaced only the top
3810      * most non filter in order to prevent data corruption.
3811      * Another benefit is that this tests exclude backing files which are
3812      * blocked by the backing blockers.
3813      */
3814     if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
3815         error_setg(errp, "Only top most non filter can be replaced");
3816         to_replace_bs = NULL;
3817         goto out;
3818     }
3819 
3820 out:
3821     aio_context_release(aio_context);
3822     return to_replace_bs;
3823 }
3824 
3825 static bool append_open_options(QDict *d, BlockDriverState *bs)
3826 {
3827     const QDictEntry *entry;
3828     QemuOptDesc *desc;
3829     BdrvChild *child;
3830     bool found_any = false;
3831     const char *p;
3832 
3833     for (entry = qdict_first(bs->options); entry;
3834          entry = qdict_next(bs->options, entry))
3835     {
3836         /* Exclude options for children */
3837         QLIST_FOREACH(child, &bs->children, next) {
3838             if (strstart(qdict_entry_key(entry), child->name, &p)
3839                 && (!*p || *p == '.'))
3840             {
3841                 break;
3842             }
3843         }
3844         if (child) {
3845             continue;
3846         }
3847 
3848         /* And exclude all non-driver-specific options */
3849         for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
3850             if (!strcmp(qdict_entry_key(entry), desc->name)) {
3851                 break;
3852             }
3853         }
3854         if (desc->name) {
3855             continue;
3856         }
3857 
3858         qobject_incref(qdict_entry_value(entry));
3859         qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3860         found_any = true;
3861     }
3862 
3863     return found_any;
3864 }
3865 
3866 /* Updates the following BDS fields:
3867  *  - exact_filename: A filename which may be used for opening a block device
3868  *                    which (mostly) equals the given BDS (even without any
3869  *                    other options; so reading and writing must return the same
3870  *                    results, but caching etc. may be different)
3871  *  - full_open_options: Options which, when given when opening a block device
3872  *                       (without a filename), result in a BDS (mostly)
3873  *                       equalling the given one
3874  *  - filename: If exact_filename is set, it is copied here. Otherwise,
3875  *              full_open_options is converted to a JSON object, prefixed with
3876  *              "json:" (for use through the JSON pseudo protocol) and put here.
3877  */
3878 void bdrv_refresh_filename(BlockDriverState *bs)
3879 {
3880     BlockDriver *drv = bs->drv;
3881     QDict *opts;
3882 
3883     if (!drv) {
3884         return;
3885     }
3886 
3887     /* This BDS's file name will most probably depend on its file's name, so
3888      * refresh that first */
3889     if (bs->file) {
3890         bdrv_refresh_filename(bs->file->bs);
3891     }
3892 
3893     if (drv->bdrv_refresh_filename) {
3894         /* Obsolete information is of no use here, so drop the old file name
3895          * information before refreshing it */
3896         bs->exact_filename[0] = '\0';
3897         if (bs->full_open_options) {
3898             QDECREF(bs->full_open_options);
3899             bs->full_open_options = NULL;
3900         }
3901 
3902         opts = qdict_new();
3903         append_open_options(opts, bs);
3904         drv->bdrv_refresh_filename(bs, opts);
3905         QDECREF(opts);
3906     } else if (bs->file) {
3907         /* Try to reconstruct valid information from the underlying file */
3908         bool has_open_options;
3909 
3910         bs->exact_filename[0] = '\0';
3911         if (bs->full_open_options) {
3912             QDECREF(bs->full_open_options);
3913             bs->full_open_options = NULL;
3914         }
3915 
3916         opts = qdict_new();
3917         has_open_options = append_open_options(opts, bs);
3918 
3919         /* If no specific options have been given for this BDS, the filename of
3920          * the underlying file should suffice for this one as well */
3921         if (bs->file->bs->exact_filename[0] && !has_open_options) {
3922             strcpy(bs->exact_filename, bs->file->bs->exact_filename);
3923         }
3924         /* Reconstructing the full options QDict is simple for most format block
3925          * drivers, as long as the full options are known for the underlying
3926          * file BDS. The full options QDict of that file BDS should somehow
3927          * contain a representation of the filename, therefore the following
3928          * suffices without querying the (exact_)filename of this BDS. */
3929         if (bs->file->bs->full_open_options) {
3930             qdict_put_obj(opts, "driver",
3931                           QOBJECT(qstring_from_str(drv->format_name)));
3932             QINCREF(bs->file->bs->full_open_options);
3933             qdict_put_obj(opts, "file",
3934                           QOBJECT(bs->file->bs->full_open_options));
3935 
3936             bs->full_open_options = opts;
3937         } else {
3938             QDECREF(opts);
3939         }
3940     } else if (!bs->full_open_options && qdict_size(bs->options)) {
3941         /* There is no underlying file BDS (at least referenced by BDS.file),
3942          * so the full options QDict should be equal to the options given
3943          * specifically for this block device when it was opened (plus the
3944          * driver specification).
3945          * Because those options don't change, there is no need to update
3946          * full_open_options when it's already set. */
3947 
3948         opts = qdict_new();
3949         append_open_options(opts, bs);
3950         qdict_put_obj(opts, "driver",
3951                       QOBJECT(qstring_from_str(drv->format_name)));
3952 
3953         if (bs->exact_filename[0]) {
3954             /* This may not work for all block protocol drivers (some may
3955              * require this filename to be parsed), but we have to find some
3956              * default solution here, so just include it. If some block driver
3957              * does not support pure options without any filename at all or
3958              * needs some special format of the options QDict, it needs to
3959              * implement the driver-specific bdrv_refresh_filename() function.
3960              */
3961             qdict_put_obj(opts, "filename",
3962                           QOBJECT(qstring_from_str(bs->exact_filename)));
3963         }
3964 
3965         bs->full_open_options = opts;
3966     }
3967 
3968     if (bs->exact_filename[0]) {
3969         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
3970     } else if (bs->full_open_options) {
3971         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
3972         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
3973                  qstring_get_str(json));
3974         QDECREF(json);
3975     }
3976 }
3977