xref: /openbmc/qemu/block.c (revision 21a24302)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/error-report.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qapi/qmp/qjson.h"
33 #include "sysemu/block-backend.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/notify.h"
36 #include "qemu/coroutine.h"
37 #include "block/qapi.h"
38 #include "qmp-commands.h"
39 #include "qemu/timer.h"
40 #include "qapi-event.h"
41 #include "block/throttle-groups.h"
42 
43 #ifdef CONFIG_BSD
44 #include <sys/types.h>
45 #include <sys/stat.h>
46 #include <sys/ioctl.h>
47 #include <sys/queue.h>
48 #ifndef __DragonFly__
49 #include <sys/disk.h>
50 #endif
51 #endif
52 
53 #ifdef _WIN32
54 #include <windows.h>
55 #endif
56 
57 /**
58  * A BdrvDirtyBitmap can be in three possible states:
59  * (1) successor is NULL and disabled is false: full r/w mode
60  * (2) successor is NULL and disabled is true: read only mode ("disabled")
61  * (3) successor is set: frozen mode.
62  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
63  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
64  */
65 struct BdrvDirtyBitmap {
66     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
67     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
68     char *name;                 /* Optional non-empty unique ID */
69     int64_t size;               /* Size of the bitmap (Number of sectors) */
70     bool disabled;              /* Bitmap is read-only */
71     QLIST_ENTRY(BdrvDirtyBitmap) list;
72 };
73 
74 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
75 
76 struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
77 
78 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
79     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
80 
81 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
82     QLIST_HEAD_INITIALIZER(bdrv_drivers);
83 
84 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
85                              const char *reference, QDict *options, int flags,
86                              BlockDriverState *parent,
87                              const BdrvChildRole *child_role, Error **errp);
88 
89 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
90 /* If non-zero, use only whitelisted block drivers */
91 static int use_bdrv_whitelist;
92 
93 #ifdef _WIN32
94 static int is_windows_drive_prefix(const char *filename)
95 {
96     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
97              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
98             filename[1] == ':');
99 }
100 
101 int is_windows_drive(const char *filename)
102 {
103     if (is_windows_drive_prefix(filename) &&
104         filename[2] == '\0')
105         return 1;
106     if (strstart(filename, "\\\\.\\", NULL) ||
107         strstart(filename, "//./", NULL))
108         return 1;
109     return 0;
110 }
111 #endif
112 
113 size_t bdrv_opt_mem_align(BlockDriverState *bs)
114 {
115     if (!bs || !bs->drv) {
116         /* page size or 4k (hdd sector size) should be on the safe side */
117         return MAX(4096, getpagesize());
118     }
119 
120     return bs->bl.opt_mem_alignment;
121 }
122 
123 size_t bdrv_min_mem_align(BlockDriverState *bs)
124 {
125     if (!bs || !bs->drv) {
126         /* page size or 4k (hdd sector size) should be on the safe side */
127         return MAX(4096, getpagesize());
128     }
129 
130     return bs->bl.min_mem_alignment;
131 }
132 
133 /* check if the path starts with "<protocol>:" */
134 int path_has_protocol(const char *path)
135 {
136     const char *p;
137 
138 #ifdef _WIN32
139     if (is_windows_drive(path) ||
140         is_windows_drive_prefix(path)) {
141         return 0;
142     }
143     p = path + strcspn(path, ":/\\");
144 #else
145     p = path + strcspn(path, ":/");
146 #endif
147 
148     return *p == ':';
149 }
150 
151 int path_is_absolute(const char *path)
152 {
153 #ifdef _WIN32
154     /* specific case for names like: "\\.\d:" */
155     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
156         return 1;
157     }
158     return (*path == '/' || *path == '\\');
159 #else
160     return (*path == '/');
161 #endif
162 }
163 
164 /* if filename is absolute, just copy it to dest. Otherwise, build a
165    path to it by considering it is relative to base_path. URL are
166    supported. */
167 void path_combine(char *dest, int dest_size,
168                   const char *base_path,
169                   const char *filename)
170 {
171     const char *p, *p1;
172     int len;
173 
174     if (dest_size <= 0)
175         return;
176     if (path_is_absolute(filename)) {
177         pstrcpy(dest, dest_size, filename);
178     } else {
179         p = strchr(base_path, ':');
180         if (p)
181             p++;
182         else
183             p = base_path;
184         p1 = strrchr(base_path, '/');
185 #ifdef _WIN32
186         {
187             const char *p2;
188             p2 = strrchr(base_path, '\\');
189             if (!p1 || p2 > p1)
190                 p1 = p2;
191         }
192 #endif
193         if (p1)
194             p1++;
195         else
196             p1 = base_path;
197         if (p1 > p)
198             p = p1;
199         len = p - base_path;
200         if (len > dest_size - 1)
201             len = dest_size - 1;
202         memcpy(dest, base_path, len);
203         dest[len] = '\0';
204         pstrcat(dest, dest_size, filename);
205     }
206 }
207 
208 void bdrv_get_full_backing_filename_from_filename(const char *backed,
209                                                   const char *backing,
210                                                   char *dest, size_t sz,
211                                                   Error **errp)
212 {
213     if (backing[0] == '\0' || path_has_protocol(backing) ||
214         path_is_absolute(backing))
215     {
216         pstrcpy(dest, sz, backing);
217     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
218         error_setg(errp, "Cannot use relative backing file names for '%s'",
219                    backed);
220     } else {
221         path_combine(dest, sz, backed, backing);
222     }
223 }
224 
225 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
226                                     Error **errp)
227 {
228     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
229 
230     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
231                                                  dest, sz, errp);
232 }
233 
234 void bdrv_register(BlockDriver *bdrv)
235 {
236     bdrv_setup_io_funcs(bdrv);
237 
238     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
239 }
240 
241 BlockDriverState *bdrv_new_root(void)
242 {
243     BlockDriverState *bs = bdrv_new();
244 
245     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
246     return bs;
247 }
248 
249 BlockDriverState *bdrv_new(void)
250 {
251     BlockDriverState *bs;
252     int i;
253 
254     bs = g_new0(BlockDriverState, 1);
255     QLIST_INIT(&bs->dirty_bitmaps);
256     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
257         QLIST_INIT(&bs->op_blockers[i]);
258     }
259     notifier_list_init(&bs->close_notifiers);
260     notifier_with_return_list_init(&bs->before_write_notifiers);
261     qemu_co_queue_init(&bs->throttled_reqs[0]);
262     qemu_co_queue_init(&bs->throttled_reqs[1]);
263     bs->refcnt = 1;
264     bs->aio_context = qemu_get_aio_context();
265 
266     return bs;
267 }
268 
269 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
270 {
271     notifier_list_add(&bs->close_notifiers, notify);
272 }
273 
274 BlockDriver *bdrv_find_format(const char *format_name)
275 {
276     BlockDriver *drv1;
277     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
278         if (!strcmp(drv1->format_name, format_name)) {
279             return drv1;
280         }
281     }
282     return NULL;
283 }
284 
285 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
286 {
287     static const char *whitelist_rw[] = {
288         CONFIG_BDRV_RW_WHITELIST
289     };
290     static const char *whitelist_ro[] = {
291         CONFIG_BDRV_RO_WHITELIST
292     };
293     const char **p;
294 
295     if (!whitelist_rw[0] && !whitelist_ro[0]) {
296         return 1;               /* no whitelist, anything goes */
297     }
298 
299     for (p = whitelist_rw; *p; p++) {
300         if (!strcmp(drv->format_name, *p)) {
301             return 1;
302         }
303     }
304     if (read_only) {
305         for (p = whitelist_ro; *p; p++) {
306             if (!strcmp(drv->format_name, *p)) {
307                 return 1;
308             }
309         }
310     }
311     return 0;
312 }
313 
314 typedef struct CreateCo {
315     BlockDriver *drv;
316     char *filename;
317     QemuOpts *opts;
318     int ret;
319     Error *err;
320 } CreateCo;
321 
322 static void coroutine_fn bdrv_create_co_entry(void *opaque)
323 {
324     Error *local_err = NULL;
325     int ret;
326 
327     CreateCo *cco = opaque;
328     assert(cco->drv);
329 
330     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
331     if (local_err) {
332         error_propagate(&cco->err, local_err);
333     }
334     cco->ret = ret;
335 }
336 
337 int bdrv_create(BlockDriver *drv, const char* filename,
338                 QemuOpts *opts, Error **errp)
339 {
340     int ret;
341 
342     Coroutine *co;
343     CreateCo cco = {
344         .drv = drv,
345         .filename = g_strdup(filename),
346         .opts = opts,
347         .ret = NOT_DONE,
348         .err = NULL,
349     };
350 
351     if (!drv->bdrv_create) {
352         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
353         ret = -ENOTSUP;
354         goto out;
355     }
356 
357     if (qemu_in_coroutine()) {
358         /* Fast-path if already in coroutine context */
359         bdrv_create_co_entry(&cco);
360     } else {
361         co = qemu_coroutine_create(bdrv_create_co_entry);
362         qemu_coroutine_enter(co, &cco);
363         while (cco.ret == NOT_DONE) {
364             aio_poll(qemu_get_aio_context(), true);
365         }
366     }
367 
368     ret = cco.ret;
369     if (ret < 0) {
370         if (cco.err) {
371             error_propagate(errp, cco.err);
372         } else {
373             error_setg_errno(errp, -ret, "Could not create image");
374         }
375     }
376 
377 out:
378     g_free(cco.filename);
379     return ret;
380 }
381 
382 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
383 {
384     BlockDriver *drv;
385     Error *local_err = NULL;
386     int ret;
387 
388     drv = bdrv_find_protocol(filename, true, errp);
389     if (drv == NULL) {
390         return -ENOENT;
391     }
392 
393     ret = bdrv_create(drv, filename, opts, &local_err);
394     if (local_err) {
395         error_propagate(errp, local_err);
396     }
397     return ret;
398 }
399 
400 /**
401  * Try to get @bs's logical and physical block size.
402  * On success, store them in @bsz struct and return 0.
403  * On failure return -errno.
404  * @bs must not be empty.
405  */
406 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
407 {
408     BlockDriver *drv = bs->drv;
409 
410     if (drv && drv->bdrv_probe_blocksizes) {
411         return drv->bdrv_probe_blocksizes(bs, bsz);
412     }
413 
414     return -ENOTSUP;
415 }
416 
417 /**
418  * Try to get @bs's geometry (cyls, heads, sectors).
419  * On success, store them in @geo struct and return 0.
420  * On failure return -errno.
421  * @bs must not be empty.
422  */
423 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
424 {
425     BlockDriver *drv = bs->drv;
426 
427     if (drv && drv->bdrv_probe_geometry) {
428         return drv->bdrv_probe_geometry(bs, geo);
429     }
430 
431     return -ENOTSUP;
432 }
433 
434 /*
435  * Create a uniquely-named empty temporary file.
436  * Return 0 upon success, otherwise a negative errno value.
437  */
438 int get_tmp_filename(char *filename, int size)
439 {
440 #ifdef _WIN32
441     char temp_dir[MAX_PATH];
442     /* GetTempFileName requires that its output buffer (4th param)
443        have length MAX_PATH or greater.  */
444     assert(size >= MAX_PATH);
445     return (GetTempPath(MAX_PATH, temp_dir)
446             && GetTempFileName(temp_dir, "qem", 0, filename)
447             ? 0 : -GetLastError());
448 #else
449     int fd;
450     const char *tmpdir;
451     tmpdir = getenv("TMPDIR");
452     if (!tmpdir) {
453         tmpdir = "/var/tmp";
454     }
455     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
456         return -EOVERFLOW;
457     }
458     fd = mkstemp(filename);
459     if (fd < 0) {
460         return -errno;
461     }
462     if (close(fd) != 0) {
463         unlink(filename);
464         return -errno;
465     }
466     return 0;
467 #endif
468 }
469 
470 /*
471  * Detect host devices. By convention, /dev/cdrom[N] is always
472  * recognized as a host CDROM.
473  */
474 static BlockDriver *find_hdev_driver(const char *filename)
475 {
476     int score_max = 0, score;
477     BlockDriver *drv = NULL, *d;
478 
479     QLIST_FOREACH(d, &bdrv_drivers, list) {
480         if (d->bdrv_probe_device) {
481             score = d->bdrv_probe_device(filename);
482             if (score > score_max) {
483                 score_max = score;
484                 drv = d;
485             }
486         }
487     }
488 
489     return drv;
490 }
491 
492 BlockDriver *bdrv_find_protocol(const char *filename,
493                                 bool allow_protocol_prefix,
494                                 Error **errp)
495 {
496     BlockDriver *drv1;
497     char protocol[128];
498     int len;
499     const char *p;
500 
501     /* TODO Drivers without bdrv_file_open must be specified explicitly */
502 
503     /*
504      * XXX(hch): we really should not let host device detection
505      * override an explicit protocol specification, but moving this
506      * later breaks access to device names with colons in them.
507      * Thanks to the brain-dead persistent naming schemes on udev-
508      * based Linux systems those actually are quite common.
509      */
510     drv1 = find_hdev_driver(filename);
511     if (drv1) {
512         return drv1;
513     }
514 
515     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
516         return &bdrv_file;
517     }
518 
519     p = strchr(filename, ':');
520     assert(p != NULL);
521     len = p - filename;
522     if (len > sizeof(protocol) - 1)
523         len = sizeof(protocol) - 1;
524     memcpy(protocol, filename, len);
525     protocol[len] = '\0';
526     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
527         if (drv1->protocol_name &&
528             !strcmp(drv1->protocol_name, protocol)) {
529             return drv1;
530         }
531     }
532 
533     error_setg(errp, "Unknown protocol '%s'", protocol);
534     return NULL;
535 }
536 
537 /*
538  * Guess image format by probing its contents.
539  * This is not a good idea when your image is raw (CVE-2008-2004), but
540  * we do it anyway for backward compatibility.
541  *
542  * @buf         contains the image's first @buf_size bytes.
543  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
544  *              but can be smaller if the image file is smaller)
545  * @filename    is its filename.
546  *
547  * For all block drivers, call the bdrv_probe() method to get its
548  * probing score.
549  * Return the first block driver with the highest probing score.
550  */
551 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
552                             const char *filename)
553 {
554     int score_max = 0, score;
555     BlockDriver *drv = NULL, *d;
556 
557     QLIST_FOREACH(d, &bdrv_drivers, list) {
558         if (d->bdrv_probe) {
559             score = d->bdrv_probe(buf, buf_size, filename);
560             if (score > score_max) {
561                 score_max = score;
562                 drv = d;
563             }
564         }
565     }
566 
567     return drv;
568 }
569 
570 static int find_image_format(BlockDriverState *bs, const char *filename,
571                              BlockDriver **pdrv, Error **errp)
572 {
573     BlockDriver *drv;
574     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
575     int ret = 0;
576 
577     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
578     if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
579         *pdrv = &bdrv_raw;
580         return ret;
581     }
582 
583     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
584     if (ret < 0) {
585         error_setg_errno(errp, -ret, "Could not read image for determining its "
586                          "format");
587         *pdrv = NULL;
588         return ret;
589     }
590 
591     drv = bdrv_probe_all(buf, ret, filename);
592     if (!drv) {
593         error_setg(errp, "Could not determine image format: No compatible "
594                    "driver found");
595         ret = -ENOENT;
596     }
597     *pdrv = drv;
598     return ret;
599 }
600 
601 /**
602  * Set the current 'total_sectors' value
603  * Return 0 on success, -errno on error.
604  */
605 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
606 {
607     BlockDriver *drv = bs->drv;
608 
609     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
610     if (bdrv_is_sg(bs))
611         return 0;
612 
613     /* query actual device if possible, otherwise just trust the hint */
614     if (drv->bdrv_getlength) {
615         int64_t length = drv->bdrv_getlength(bs);
616         if (length < 0) {
617             return length;
618         }
619         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
620     }
621 
622     bs->total_sectors = hint;
623     return 0;
624 }
625 
626 /**
627  * Set open flags for a given discard mode
628  *
629  * Return 0 on success, -1 if the discard mode was invalid.
630  */
631 int bdrv_parse_discard_flags(const char *mode, int *flags)
632 {
633     *flags &= ~BDRV_O_UNMAP;
634 
635     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
636         /* do nothing */
637     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
638         *flags |= BDRV_O_UNMAP;
639     } else {
640         return -1;
641     }
642 
643     return 0;
644 }
645 
646 /**
647  * Set open flags for a given cache mode
648  *
649  * Return 0 on success, -1 if the cache mode was invalid.
650  */
651 int bdrv_parse_cache_flags(const char *mode, int *flags)
652 {
653     *flags &= ~BDRV_O_CACHE_MASK;
654 
655     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
656         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
657     } else if (!strcmp(mode, "directsync")) {
658         *flags |= BDRV_O_NOCACHE;
659     } else if (!strcmp(mode, "writeback")) {
660         *flags |= BDRV_O_CACHE_WB;
661     } else if (!strcmp(mode, "unsafe")) {
662         *flags |= BDRV_O_CACHE_WB;
663         *flags |= BDRV_O_NO_FLUSH;
664     } else if (!strcmp(mode, "writethrough")) {
665         /* this is the default */
666     } else {
667         return -1;
668     }
669 
670     return 0;
671 }
672 
673 /*
674  * Returns the flags that a temporary snapshot should get, based on the
675  * originally requested flags (the originally requested image will have flags
676  * like a backing file)
677  */
678 static int bdrv_temp_snapshot_flags(int flags)
679 {
680     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
681 }
682 
683 /*
684  * Returns the flags that bs->file should get if a protocol driver is expected,
685  * based on the given flags for the parent BDS
686  */
687 static int bdrv_inherited_flags(int flags)
688 {
689     /* Enable protocol handling, disable format probing for bs->file */
690     flags |= BDRV_O_PROTOCOL;
691 
692     /* Our block drivers take care to send flushes and respect unmap policy,
693      * so we can enable both unconditionally on lower layers. */
694     flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
695 
696     /* Clear flags that only apply to the top layer */
697     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
698 
699     return flags;
700 }
701 
702 const BdrvChildRole child_file = {
703     .inherit_flags = bdrv_inherited_flags,
704 };
705 
706 /*
707  * Returns the flags that bs->file should get if the use of formats (and not
708  * only protocols) is permitted for it, based on the given flags for the parent
709  * BDS
710  */
711 static int bdrv_inherited_fmt_flags(int parent_flags)
712 {
713     int flags = child_file.inherit_flags(parent_flags);
714     return flags & ~BDRV_O_PROTOCOL;
715 }
716 
717 const BdrvChildRole child_format = {
718     .inherit_flags = bdrv_inherited_fmt_flags,
719 };
720 
721 /*
722  * Returns the flags that bs->backing should get, based on the given flags
723  * for the parent BDS
724  */
725 static int bdrv_backing_flags(int flags)
726 {
727     /* backing files always opened read-only */
728     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
729 
730     /* snapshot=on is handled on the top layer */
731     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
732 
733     return flags;
734 }
735 
736 static const BdrvChildRole child_backing = {
737     .inherit_flags = bdrv_backing_flags,
738 };
739 
740 static int bdrv_open_flags(BlockDriverState *bs, int flags)
741 {
742     int open_flags = flags | BDRV_O_CACHE_WB;
743 
744     /*
745      * Clear flags that are internal to the block layer before opening the
746      * image.
747      */
748     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
749 
750     /*
751      * Snapshots should be writable.
752      */
753     if (flags & BDRV_O_TEMPORARY) {
754         open_flags |= BDRV_O_RDWR;
755     }
756 
757     return open_flags;
758 }
759 
760 static void bdrv_assign_node_name(BlockDriverState *bs,
761                                   const char *node_name,
762                                   Error **errp)
763 {
764     char *gen_node_name = NULL;
765 
766     if (!node_name) {
767         node_name = gen_node_name = id_generate(ID_BLOCK);
768     } else if (!id_wellformed(node_name)) {
769         /*
770          * Check for empty string or invalid characters, but not if it is
771          * generated (generated names use characters not available to the user)
772          */
773         error_setg(errp, "Invalid node name");
774         return;
775     }
776 
777     /* takes care of avoiding namespaces collisions */
778     if (blk_by_name(node_name)) {
779         error_setg(errp, "node-name=%s is conflicting with a device id",
780                    node_name);
781         goto out;
782     }
783 
784     /* takes care of avoiding duplicates node names */
785     if (bdrv_find_node(node_name)) {
786         error_setg(errp, "Duplicate node name");
787         goto out;
788     }
789 
790     /* copy node name into the bs and insert it into the graph list */
791     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
792     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
793 out:
794     g_free(gen_node_name);
795 }
796 
797 static QemuOptsList bdrv_runtime_opts = {
798     .name = "bdrv_common",
799     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
800     .desc = {
801         {
802             .name = "node-name",
803             .type = QEMU_OPT_STRING,
804             .help = "Node name of the block device node",
805         },
806         { /* end of list */ }
807     },
808 };
809 
810 /*
811  * Common part for opening disk images and files
812  *
813  * Removes all processed options from *options.
814  */
815 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
816     QDict *options, int flags, BlockDriver *drv, Error **errp)
817 {
818     int ret, open_flags;
819     const char *filename;
820     const char *node_name = NULL;
821     QemuOpts *opts;
822     Error *local_err = NULL;
823 
824     assert(drv != NULL);
825     assert(bs->file == NULL);
826     assert(options != NULL && bs->options != options);
827 
828     if (file != NULL) {
829         filename = file->bs->filename;
830     } else {
831         filename = qdict_get_try_str(options, "filename");
832     }
833 
834     if (drv->bdrv_needs_filename && !filename) {
835         error_setg(errp, "The '%s' block driver requires a file name",
836                    drv->format_name);
837         return -EINVAL;
838     }
839 
840     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
841 
842     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
843     qemu_opts_absorb_qdict(opts, options, &local_err);
844     if (local_err) {
845         error_propagate(errp, local_err);
846         ret = -EINVAL;
847         goto fail_opts;
848     }
849 
850     node_name = qemu_opt_get(opts, "node-name");
851     bdrv_assign_node_name(bs, node_name, &local_err);
852     if (local_err) {
853         error_propagate(errp, local_err);
854         ret = -EINVAL;
855         goto fail_opts;
856     }
857 
858     bs->request_alignment = 512;
859     bs->zero_beyond_eof = true;
860     open_flags = bdrv_open_flags(bs, flags);
861     bs->read_only = !(open_flags & BDRV_O_RDWR);
862 
863     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
864         error_setg(errp,
865                    !bs->read_only && bdrv_is_whitelisted(drv, true)
866                         ? "Driver '%s' can only be used for read-only devices"
867                         : "Driver '%s' is not whitelisted",
868                    drv->format_name);
869         ret = -ENOTSUP;
870         goto fail_opts;
871     }
872 
873     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
874     if (flags & BDRV_O_COPY_ON_READ) {
875         if (!bs->read_only) {
876             bdrv_enable_copy_on_read(bs);
877         } else {
878             error_setg(errp, "Can't use copy-on-read on read-only device");
879             ret = -EINVAL;
880             goto fail_opts;
881         }
882     }
883 
884     if (filename != NULL) {
885         pstrcpy(bs->filename, sizeof(bs->filename), filename);
886     } else {
887         bs->filename[0] = '\0';
888     }
889     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
890 
891     bs->drv = drv;
892     bs->opaque = g_malloc0(drv->instance_size);
893 
894     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
895 
896     /* Open the image, either directly or using a protocol */
897     if (drv->bdrv_file_open) {
898         assert(file == NULL);
899         assert(!drv->bdrv_needs_filename || filename != NULL);
900         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
901     } else {
902         if (file == NULL) {
903             error_setg(errp, "Can't use '%s' as a block driver for the "
904                        "protocol level", drv->format_name);
905             ret = -EINVAL;
906             goto free_and_fail;
907         }
908         bs->file = file;
909         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
910     }
911 
912     if (ret < 0) {
913         if (local_err) {
914             error_propagate(errp, local_err);
915         } else if (bs->filename[0]) {
916             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
917         } else {
918             error_setg_errno(errp, -ret, "Could not open image");
919         }
920         goto free_and_fail;
921     }
922 
923     if (bs->encrypted) {
924         error_report("Encrypted images are deprecated");
925         error_printf("Support for them will be removed in a future release.\n"
926                      "You can use 'qemu-img convert' to convert your image"
927                      " to an unencrypted one.\n");
928     }
929 
930     ret = refresh_total_sectors(bs, bs->total_sectors);
931     if (ret < 0) {
932         error_setg_errno(errp, -ret, "Could not refresh total sector count");
933         goto free_and_fail;
934     }
935 
936     bdrv_refresh_limits(bs, &local_err);
937     if (local_err) {
938         error_propagate(errp, local_err);
939         ret = -EINVAL;
940         goto free_and_fail;
941     }
942 
943     assert(bdrv_opt_mem_align(bs) != 0);
944     assert(bdrv_min_mem_align(bs) != 0);
945     assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
946 
947     qemu_opts_del(opts);
948     return 0;
949 
950 free_and_fail:
951     bs->file = NULL;
952     g_free(bs->opaque);
953     bs->opaque = NULL;
954     bs->drv = NULL;
955 fail_opts:
956     qemu_opts_del(opts);
957     return ret;
958 }
959 
960 static QDict *parse_json_filename(const char *filename, Error **errp)
961 {
962     QObject *options_obj;
963     QDict *options;
964     int ret;
965 
966     ret = strstart(filename, "json:", &filename);
967     assert(ret);
968 
969     options_obj = qobject_from_json(filename);
970     if (!options_obj) {
971         error_setg(errp, "Could not parse the JSON options");
972         return NULL;
973     }
974 
975     if (qobject_type(options_obj) != QTYPE_QDICT) {
976         qobject_decref(options_obj);
977         error_setg(errp, "Invalid JSON object given");
978         return NULL;
979     }
980 
981     options = qobject_to_qdict(options_obj);
982     qdict_flatten(options);
983 
984     return options;
985 }
986 
987 /*
988  * Fills in default options for opening images and converts the legacy
989  * filename/flags pair to option QDict entries.
990  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
991  * block driver has been specified explicitly.
992  */
993 static int bdrv_fill_options(QDict **options, const char **pfilename,
994                              int *flags, Error **errp)
995 {
996     const char *filename = *pfilename;
997     const char *drvname;
998     bool protocol = *flags & BDRV_O_PROTOCOL;
999     bool parse_filename = false;
1000     BlockDriver *drv = NULL;
1001     Error *local_err = NULL;
1002 
1003     /* Parse json: pseudo-protocol */
1004     if (filename && g_str_has_prefix(filename, "json:")) {
1005         QDict *json_options = parse_json_filename(filename, &local_err);
1006         if (local_err) {
1007             error_propagate(errp, local_err);
1008             return -EINVAL;
1009         }
1010 
1011         /* Options given in the filename have lower priority than options
1012          * specified directly */
1013         qdict_join(*options, json_options, false);
1014         QDECREF(json_options);
1015         *pfilename = filename = NULL;
1016     }
1017 
1018     drvname = qdict_get_try_str(*options, "driver");
1019     if (drvname) {
1020         drv = bdrv_find_format(drvname);
1021         if (!drv) {
1022             error_setg(errp, "Unknown driver '%s'", drvname);
1023             return -ENOENT;
1024         }
1025         /* If the user has explicitly specified the driver, this choice should
1026          * override the BDRV_O_PROTOCOL flag */
1027         protocol = drv->bdrv_file_open;
1028     }
1029 
1030     if (protocol) {
1031         *flags |= BDRV_O_PROTOCOL;
1032     } else {
1033         *flags &= ~BDRV_O_PROTOCOL;
1034     }
1035 
1036     /* Fetch the file name from the options QDict if necessary */
1037     if (protocol && filename) {
1038         if (!qdict_haskey(*options, "filename")) {
1039             qdict_put(*options, "filename", qstring_from_str(filename));
1040             parse_filename = true;
1041         } else {
1042             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1043                              "the same time");
1044             return -EINVAL;
1045         }
1046     }
1047 
1048     /* Find the right block driver */
1049     filename = qdict_get_try_str(*options, "filename");
1050 
1051     if (!drvname && protocol) {
1052         if (filename) {
1053             drv = bdrv_find_protocol(filename, parse_filename, errp);
1054             if (!drv) {
1055                 return -EINVAL;
1056             }
1057 
1058             drvname = drv->format_name;
1059             qdict_put(*options, "driver", qstring_from_str(drvname));
1060         } else {
1061             error_setg(errp, "Must specify either driver or file");
1062             return -EINVAL;
1063         }
1064     }
1065 
1066     assert(drv || !protocol);
1067 
1068     /* Driver-specific filename parsing */
1069     if (drv && drv->bdrv_parse_filename && parse_filename) {
1070         drv->bdrv_parse_filename(filename, *options, &local_err);
1071         if (local_err) {
1072             error_propagate(errp, local_err);
1073             return -EINVAL;
1074         }
1075 
1076         if (!drv->bdrv_needs_filename) {
1077             qdict_del(*options, "filename");
1078         }
1079     }
1080 
1081     if (runstate_check(RUN_STATE_INMIGRATE)) {
1082         *flags |= BDRV_O_INCOMING;
1083     }
1084 
1085     return 0;
1086 }
1087 
1088 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1089                                     BlockDriverState *child_bs,
1090                                     const BdrvChildRole *child_role)
1091 {
1092     BdrvChild *child = g_new(BdrvChild, 1);
1093     *child = (BdrvChild) {
1094         .bs     = child_bs,
1095         .role   = child_role,
1096     };
1097 
1098     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1099     QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1100 
1101     return child;
1102 }
1103 
1104 static void bdrv_detach_child(BdrvChild *child)
1105 {
1106     QLIST_REMOVE(child, next);
1107     QLIST_REMOVE(child, next_parent);
1108     g_free(child);
1109 }
1110 
1111 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1112 {
1113     BlockDriverState *child_bs;
1114 
1115     if (child == NULL) {
1116         return;
1117     }
1118 
1119     if (child->bs->inherits_from == parent) {
1120         child->bs->inherits_from = NULL;
1121     }
1122 
1123     child_bs = child->bs;
1124     bdrv_detach_child(child);
1125     bdrv_unref(child_bs);
1126 }
1127 
1128 /*
1129  * Sets the backing file link of a BDS. A new reference is created; callers
1130  * which don't need their own reference any more must call bdrv_unref().
1131  */
1132 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1133 {
1134     if (backing_hd) {
1135         bdrv_ref(backing_hd);
1136     }
1137 
1138     if (bs->backing) {
1139         assert(bs->backing_blocker);
1140         bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1141         bdrv_unref_child(bs, bs->backing);
1142     } else if (backing_hd) {
1143         error_setg(&bs->backing_blocker,
1144                    "node is used as backing hd of '%s'",
1145                    bdrv_get_device_or_node_name(bs));
1146     }
1147 
1148     if (!backing_hd) {
1149         error_free(bs->backing_blocker);
1150         bs->backing_blocker = NULL;
1151         bs->backing = NULL;
1152         goto out;
1153     }
1154     bs->backing = bdrv_attach_child(bs, backing_hd, &child_backing);
1155     bs->open_flags &= ~BDRV_O_NO_BACKING;
1156     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1157     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1158             backing_hd->drv ? backing_hd->drv->format_name : "");
1159 
1160     bdrv_op_block_all(backing_hd, bs->backing_blocker);
1161     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1162     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1163                     bs->backing_blocker);
1164 out:
1165     bdrv_refresh_limits(bs, NULL);
1166 }
1167 
1168 /*
1169  * Opens the backing file for a BlockDriverState if not yet open
1170  *
1171  * options is a QDict of options to pass to the block drivers, or NULL for an
1172  * empty set of options. The reference to the QDict is transferred to this
1173  * function (even on failure), so if the caller intends to reuse the dictionary,
1174  * it needs to use QINCREF() before calling bdrv_file_open.
1175  */
1176 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1177 {
1178     char *backing_filename = g_malloc0(PATH_MAX);
1179     int ret = 0;
1180     BlockDriverState *backing_hd;
1181     Error *local_err = NULL;
1182 
1183     if (bs->backing != NULL) {
1184         QDECREF(options);
1185         goto free_exit;
1186     }
1187 
1188     /* NULL means an empty set of options */
1189     if (options == NULL) {
1190         options = qdict_new();
1191     }
1192 
1193     bs->open_flags &= ~BDRV_O_NO_BACKING;
1194     if (qdict_haskey(options, "file.filename")) {
1195         backing_filename[0] = '\0';
1196     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1197         QDECREF(options);
1198         goto free_exit;
1199     } else {
1200         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1201                                        &local_err);
1202         if (local_err) {
1203             ret = -EINVAL;
1204             error_propagate(errp, local_err);
1205             QDECREF(options);
1206             goto free_exit;
1207         }
1208     }
1209 
1210     if (!bs->drv || !bs->drv->supports_backing) {
1211         ret = -EINVAL;
1212         error_setg(errp, "Driver doesn't support backing files");
1213         QDECREF(options);
1214         goto free_exit;
1215     }
1216 
1217     backing_hd = bdrv_new();
1218 
1219     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1220         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1221     }
1222 
1223     assert(bs->backing == NULL);
1224     ret = bdrv_open_inherit(&backing_hd,
1225                             *backing_filename ? backing_filename : NULL,
1226                             NULL, options, 0, bs, &child_backing, &local_err);
1227     if (ret < 0) {
1228         bdrv_unref(backing_hd);
1229         backing_hd = NULL;
1230         bs->open_flags |= BDRV_O_NO_BACKING;
1231         error_setg(errp, "Could not open backing file: %s",
1232                    error_get_pretty(local_err));
1233         error_free(local_err);
1234         goto free_exit;
1235     }
1236 
1237     /* Hook up the backing file link; drop our reference, bs owns the
1238      * backing_hd reference now */
1239     bdrv_set_backing_hd(bs, backing_hd);
1240     bdrv_unref(backing_hd);
1241 
1242 free_exit:
1243     g_free(backing_filename);
1244     return ret;
1245 }
1246 
1247 /*
1248  * Opens a disk image whose options are given as BlockdevRef in another block
1249  * device's options.
1250  *
1251  * If allow_none is true, no image will be opened if filename is false and no
1252  * BlockdevRef is given. NULL will be returned, but errp remains unset.
1253  *
1254  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1255  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1256  * itself, all options starting with "${bdref_key}." are considered part of the
1257  * BlockdevRef.
1258  *
1259  * The BlockdevRef will be removed from the options QDict.
1260  */
1261 BdrvChild *bdrv_open_child(const char *filename,
1262                            QDict *options, const char *bdref_key,
1263                            BlockDriverState* parent,
1264                            const BdrvChildRole *child_role,
1265                            bool allow_none, Error **errp)
1266 {
1267     BdrvChild *c = NULL;
1268     BlockDriverState *bs;
1269     QDict *image_options;
1270     int ret;
1271     char *bdref_key_dot;
1272     const char *reference;
1273 
1274     assert(child_role != NULL);
1275 
1276     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1277     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1278     g_free(bdref_key_dot);
1279 
1280     reference = qdict_get_try_str(options, bdref_key);
1281     if (!filename && !reference && !qdict_size(image_options)) {
1282         if (!allow_none) {
1283             error_setg(errp, "A block device must be specified for \"%s\"",
1284                        bdref_key);
1285         }
1286         QDECREF(image_options);
1287         goto done;
1288     }
1289 
1290     bs = NULL;
1291     ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1292                             parent, child_role, errp);
1293     if (ret < 0) {
1294         goto done;
1295     }
1296 
1297     c = bdrv_attach_child(parent, bs, child_role);
1298 
1299 done:
1300     qdict_del(options, bdref_key);
1301     return c;
1302 }
1303 
1304 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1305 {
1306     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1307     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1308     int64_t total_size;
1309     QemuOpts *opts = NULL;
1310     QDict *snapshot_options;
1311     BlockDriverState *bs_snapshot;
1312     Error *local_err = NULL;
1313     int ret;
1314 
1315     /* if snapshot, we create a temporary backing file and open it
1316        instead of opening 'filename' directly */
1317 
1318     /* Get the required size from the image */
1319     total_size = bdrv_getlength(bs);
1320     if (total_size < 0) {
1321         ret = total_size;
1322         error_setg_errno(errp, -total_size, "Could not get image size");
1323         goto out;
1324     }
1325 
1326     /* Create the temporary image */
1327     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1328     if (ret < 0) {
1329         error_setg_errno(errp, -ret, "Could not get temporary filename");
1330         goto out;
1331     }
1332 
1333     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1334                             &error_abort);
1335     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1336     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1337     qemu_opts_del(opts);
1338     if (ret < 0) {
1339         error_setg_errno(errp, -ret, "Could not create temporary overlay "
1340                          "'%s': %s", tmp_filename,
1341                          error_get_pretty(local_err));
1342         error_free(local_err);
1343         goto out;
1344     }
1345 
1346     /* Prepare a new options QDict for the temporary file */
1347     snapshot_options = qdict_new();
1348     qdict_put(snapshot_options, "file.driver",
1349               qstring_from_str("file"));
1350     qdict_put(snapshot_options, "file.filename",
1351               qstring_from_str(tmp_filename));
1352     qdict_put(snapshot_options, "driver",
1353               qstring_from_str("qcow2"));
1354 
1355     bs_snapshot = bdrv_new();
1356 
1357     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1358                     flags, &local_err);
1359     if (ret < 0) {
1360         error_propagate(errp, local_err);
1361         goto out;
1362     }
1363 
1364     bdrv_append(bs_snapshot, bs);
1365 
1366 out:
1367     g_free(tmp_filename);
1368     return ret;
1369 }
1370 
1371 /*
1372  * Opens a disk image (raw, qcow2, vmdk, ...)
1373  *
1374  * options is a QDict of options to pass to the block drivers, or NULL for an
1375  * empty set of options. The reference to the QDict belongs to the block layer
1376  * after the call (even on failure), so if the caller intends to reuse the
1377  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1378  *
1379  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1380  * If it is not NULL, the referenced BDS will be reused.
1381  *
1382  * The reference parameter may be used to specify an existing block device which
1383  * should be opened. If specified, neither options nor a filename may be given,
1384  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1385  */
1386 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1387                              const char *reference, QDict *options, int flags,
1388                              BlockDriverState *parent,
1389                              const BdrvChildRole *child_role, Error **errp)
1390 {
1391     int ret;
1392     BdrvChild *file = NULL;
1393     BlockDriverState *bs;
1394     BlockDriver *drv = NULL;
1395     const char *drvname;
1396     const char *backing;
1397     Error *local_err = NULL;
1398     int snapshot_flags = 0;
1399 
1400     assert(pbs);
1401     assert(!child_role || !flags);
1402     assert(!child_role == !parent);
1403 
1404     if (reference) {
1405         bool options_non_empty = options ? qdict_size(options) : false;
1406         QDECREF(options);
1407 
1408         if (*pbs) {
1409             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1410                        "another block device");
1411             return -EINVAL;
1412         }
1413 
1414         if (filename || options_non_empty) {
1415             error_setg(errp, "Cannot reference an existing block device with "
1416                        "additional options or a new filename");
1417             return -EINVAL;
1418         }
1419 
1420         bs = bdrv_lookup_bs(reference, reference, errp);
1421         if (!bs) {
1422             return -ENODEV;
1423         }
1424         bdrv_ref(bs);
1425         *pbs = bs;
1426         return 0;
1427     }
1428 
1429     if (*pbs) {
1430         bs = *pbs;
1431     } else {
1432         bs = bdrv_new();
1433     }
1434 
1435     /* NULL means an empty set of options */
1436     if (options == NULL) {
1437         options = qdict_new();
1438     }
1439 
1440     if (child_role) {
1441         bs->inherits_from = parent;
1442         flags = child_role->inherit_flags(parent->open_flags);
1443     }
1444 
1445     ret = bdrv_fill_options(&options, &filename, &flags, &local_err);
1446     if (local_err) {
1447         goto fail;
1448     }
1449 
1450     /* Find the right image format driver */
1451     drvname = qdict_get_try_str(options, "driver");
1452     if (drvname) {
1453         drv = bdrv_find_format(drvname);
1454         qdict_del(options, "driver");
1455         if (!drv) {
1456             error_setg(errp, "Unknown driver: '%s'", drvname);
1457             ret = -EINVAL;
1458             goto fail;
1459         }
1460     }
1461 
1462     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1463 
1464     backing = qdict_get_try_str(options, "backing");
1465     if (backing && *backing == '\0') {
1466         flags |= BDRV_O_NO_BACKING;
1467         qdict_del(options, "backing");
1468     }
1469 
1470     bs->open_flags = flags;
1471     bs->options = options;
1472     options = qdict_clone_shallow(options);
1473 
1474     /* Open image file without format layer */
1475     if ((flags & BDRV_O_PROTOCOL) == 0) {
1476         if (flags & BDRV_O_RDWR) {
1477             flags |= BDRV_O_ALLOW_RDWR;
1478         }
1479         if (flags & BDRV_O_SNAPSHOT) {
1480             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1481             flags = bdrv_backing_flags(flags);
1482         }
1483 
1484         bs->open_flags = flags;
1485 
1486         file = bdrv_open_child(filename, options, "file", bs,
1487                                &child_file, true, &local_err);
1488         if (local_err) {
1489             ret = -EINVAL;
1490             goto fail;
1491         }
1492     }
1493 
1494     /* Image format probing */
1495     bs->probed = !drv;
1496     if (!drv && file) {
1497         ret = find_image_format(file->bs, filename, &drv, &local_err);
1498         if (ret < 0) {
1499             goto fail;
1500         }
1501     } else if (!drv) {
1502         error_setg(errp, "Must specify either driver or file");
1503         ret = -EINVAL;
1504         goto fail;
1505     }
1506 
1507     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1508     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1509     /* file must be NULL if a protocol BDS is about to be created
1510      * (the inverse results in an error message from bdrv_open_common()) */
1511     assert(!(flags & BDRV_O_PROTOCOL) || !file);
1512 
1513     /* Open the image */
1514     ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1515     if (ret < 0) {
1516         goto fail;
1517     }
1518 
1519     if (file && (bs->file != file)) {
1520         bdrv_unref_child(bs, file);
1521         file = NULL;
1522     }
1523 
1524     /* If there is a backing file, use it */
1525     if ((flags & BDRV_O_NO_BACKING) == 0) {
1526         QDict *backing_options;
1527 
1528         qdict_extract_subqdict(options, &backing_options, "backing.");
1529         ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1530         if (ret < 0) {
1531             goto close_and_fail;
1532         }
1533     }
1534 
1535     bdrv_refresh_filename(bs);
1536 
1537     /* Check if any unknown options were used */
1538     if (options && (qdict_size(options) != 0)) {
1539         const QDictEntry *entry = qdict_first(options);
1540         if (flags & BDRV_O_PROTOCOL) {
1541             error_setg(errp, "Block protocol '%s' doesn't support the option "
1542                        "'%s'", drv->format_name, entry->key);
1543         } else {
1544             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1545                        "support the option '%s'", drv->format_name,
1546                        bdrv_get_device_name(bs), entry->key);
1547         }
1548 
1549         ret = -EINVAL;
1550         goto close_and_fail;
1551     }
1552 
1553     if (!bdrv_key_required(bs)) {
1554         if (bs->blk) {
1555             blk_dev_change_media_cb(bs->blk, true);
1556         }
1557     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1558                && !runstate_check(RUN_STATE_INMIGRATE)
1559                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1560         error_setg(errp,
1561                    "Guest must be stopped for opening of encrypted image");
1562         ret = -EBUSY;
1563         goto close_and_fail;
1564     }
1565 
1566     QDECREF(options);
1567     *pbs = bs;
1568 
1569     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1570      * temporary snapshot afterwards. */
1571     if (snapshot_flags) {
1572         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1573         if (local_err) {
1574             goto close_and_fail;
1575         }
1576     }
1577 
1578     return 0;
1579 
1580 fail:
1581     if (file != NULL) {
1582         bdrv_unref_child(bs, file);
1583     }
1584     QDECREF(bs->options);
1585     QDECREF(options);
1586     bs->options = NULL;
1587     if (!*pbs) {
1588         /* If *pbs is NULL, a new BDS has been created in this function and
1589            needs to be freed now. Otherwise, it does not need to be closed,
1590            since it has not really been opened yet. */
1591         bdrv_unref(bs);
1592     }
1593     if (local_err) {
1594         error_propagate(errp, local_err);
1595     }
1596     return ret;
1597 
1598 close_and_fail:
1599     /* See fail path, but now the BDS has to be always closed */
1600     if (*pbs) {
1601         bdrv_close(bs);
1602     } else {
1603         bdrv_unref(bs);
1604     }
1605     QDECREF(options);
1606     if (local_err) {
1607         error_propagate(errp, local_err);
1608     }
1609     return ret;
1610 }
1611 
1612 int bdrv_open(BlockDriverState **pbs, const char *filename,
1613               const char *reference, QDict *options, int flags, Error **errp)
1614 {
1615     return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1616                              NULL, errp);
1617 }
1618 
1619 typedef struct BlockReopenQueueEntry {
1620      bool prepared;
1621      BDRVReopenState state;
1622      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1623 } BlockReopenQueueEntry;
1624 
1625 /*
1626  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1627  * reopen of multiple devices.
1628  *
1629  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1630  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1631  * be created and initialized. This newly created BlockReopenQueue should be
1632  * passed back in for subsequent calls that are intended to be of the same
1633  * atomic 'set'.
1634  *
1635  * bs is the BlockDriverState to add to the reopen queue.
1636  *
1637  * options contains the changed options for the associated bs
1638  * (the BlockReopenQueue takes ownership)
1639  *
1640  * flags contains the open flags for the associated bs
1641  *
1642  * returns a pointer to bs_queue, which is either the newly allocated
1643  * bs_queue, or the existing bs_queue being used.
1644  *
1645  */
1646 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1647                                     BlockDriverState *bs,
1648                                     QDict *options, int flags)
1649 {
1650     assert(bs != NULL);
1651 
1652     BlockReopenQueueEntry *bs_entry;
1653     BdrvChild *child;
1654     QDict *old_options;
1655 
1656     if (bs_queue == NULL) {
1657         bs_queue = g_new0(BlockReopenQueue, 1);
1658         QSIMPLEQ_INIT(bs_queue);
1659     }
1660 
1661     if (!options) {
1662         options = qdict_new();
1663     }
1664 
1665     old_options = qdict_clone_shallow(bs->options);
1666     qdict_join(options, old_options, false);
1667     QDECREF(old_options);
1668 
1669     /* bdrv_open() masks this flag out */
1670     flags &= ~BDRV_O_PROTOCOL;
1671 
1672     QLIST_FOREACH(child, &bs->children, next) {
1673         int child_flags;
1674 
1675         if (child->bs->inherits_from != bs) {
1676             continue;
1677         }
1678 
1679         child_flags = child->role->inherit_flags(flags);
1680         /* TODO Pass down child flags (backing.*, extents.*, ...) */
1681         bdrv_reopen_queue(bs_queue, child->bs, NULL, child_flags);
1682     }
1683 
1684     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1685     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1686 
1687     bs_entry->state.bs = bs;
1688     bs_entry->state.options = options;
1689     bs_entry->state.flags = flags;
1690 
1691     return bs_queue;
1692 }
1693 
1694 /*
1695  * Reopen multiple BlockDriverStates atomically & transactionally.
1696  *
1697  * The queue passed in (bs_queue) must have been built up previous
1698  * via bdrv_reopen_queue().
1699  *
1700  * Reopens all BDS specified in the queue, with the appropriate
1701  * flags.  All devices are prepared for reopen, and failure of any
1702  * device will cause all device changes to be abandonded, and intermediate
1703  * data cleaned up.
1704  *
1705  * If all devices prepare successfully, then the changes are committed
1706  * to all devices.
1707  *
1708  */
1709 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1710 {
1711     int ret = -1;
1712     BlockReopenQueueEntry *bs_entry, *next;
1713     Error *local_err = NULL;
1714 
1715     assert(bs_queue != NULL);
1716 
1717     bdrv_drain_all();
1718 
1719     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1720         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1721             error_propagate(errp, local_err);
1722             goto cleanup;
1723         }
1724         bs_entry->prepared = true;
1725     }
1726 
1727     /* If we reach this point, we have success and just need to apply the
1728      * changes
1729      */
1730     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1731         bdrv_reopen_commit(&bs_entry->state);
1732     }
1733 
1734     ret = 0;
1735 
1736 cleanup:
1737     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1738         if (ret && bs_entry->prepared) {
1739             bdrv_reopen_abort(&bs_entry->state);
1740         }
1741         QDECREF(bs_entry->state.options);
1742         g_free(bs_entry);
1743     }
1744     g_free(bs_queue);
1745     return ret;
1746 }
1747 
1748 
1749 /* Reopen a single BlockDriverState with the specified flags. */
1750 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1751 {
1752     int ret = -1;
1753     Error *local_err = NULL;
1754     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1755 
1756     ret = bdrv_reopen_multiple(queue, &local_err);
1757     if (local_err != NULL) {
1758         error_propagate(errp, local_err);
1759     }
1760     return ret;
1761 }
1762 
1763 
1764 /*
1765  * Prepares a BlockDriverState for reopen. All changes are staged in the
1766  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1767  * the block driver layer .bdrv_reopen_prepare()
1768  *
1769  * bs is the BlockDriverState to reopen
1770  * flags are the new open flags
1771  * queue is the reopen queue
1772  *
1773  * Returns 0 on success, non-zero on error.  On error errp will be set
1774  * as well.
1775  *
1776  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1777  * It is the responsibility of the caller to then call the abort() or
1778  * commit() for any other BDS that have been left in a prepare() state
1779  *
1780  */
1781 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1782                         Error **errp)
1783 {
1784     int ret = -1;
1785     Error *local_err = NULL;
1786     BlockDriver *drv;
1787 
1788     assert(reopen_state != NULL);
1789     assert(reopen_state->bs->drv != NULL);
1790     drv = reopen_state->bs->drv;
1791 
1792     /* if we are to stay read-only, do not allow permission change
1793      * to r/w */
1794     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1795         reopen_state->flags & BDRV_O_RDWR) {
1796         error_setg(errp, "Node '%s' is read only",
1797                    bdrv_get_device_or_node_name(reopen_state->bs));
1798         goto error;
1799     }
1800 
1801 
1802     ret = bdrv_flush(reopen_state->bs);
1803     if (ret) {
1804         error_setg_errno(errp, -ret, "Error flushing drive");
1805         goto error;
1806     }
1807 
1808     if (drv->bdrv_reopen_prepare) {
1809         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1810         if (ret) {
1811             if (local_err != NULL) {
1812                 error_propagate(errp, local_err);
1813             } else {
1814                 error_setg(errp, "failed while preparing to reopen image '%s'",
1815                            reopen_state->bs->filename);
1816             }
1817             goto error;
1818         }
1819     } else {
1820         /* It is currently mandatory to have a bdrv_reopen_prepare()
1821          * handler for each supported drv. */
1822         error_setg(errp, "Block format '%s' used by node '%s' "
1823                    "does not support reopening files", drv->format_name,
1824                    bdrv_get_device_or_node_name(reopen_state->bs));
1825         ret = -1;
1826         goto error;
1827     }
1828 
1829     /* Options that are not handled are only okay if they are unchanged
1830      * compared to the old state. It is expected that some options are only
1831      * used for the initial open, but not reopen (e.g. filename) */
1832     if (qdict_size(reopen_state->options)) {
1833         const QDictEntry *entry = qdict_first(reopen_state->options);
1834 
1835         do {
1836             QString *new_obj = qobject_to_qstring(entry->value);
1837             const char *new = qstring_get_str(new_obj);
1838             const char *old = qdict_get_try_str(reopen_state->bs->options,
1839                                                 entry->key);
1840 
1841             if (!old || strcmp(new, old)) {
1842                 error_setg(errp, "Cannot change the option '%s'", entry->key);
1843                 ret = -EINVAL;
1844                 goto error;
1845             }
1846         } while ((entry = qdict_next(reopen_state->options, entry)));
1847     }
1848 
1849     ret = 0;
1850 
1851 error:
1852     return ret;
1853 }
1854 
1855 /*
1856  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1857  * makes them final by swapping the staging BlockDriverState contents into
1858  * the active BlockDriverState contents.
1859  */
1860 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1861 {
1862     BlockDriver *drv;
1863 
1864     assert(reopen_state != NULL);
1865     drv = reopen_state->bs->drv;
1866     assert(drv != NULL);
1867 
1868     /* If there are any driver level actions to take */
1869     if (drv->bdrv_reopen_commit) {
1870         drv->bdrv_reopen_commit(reopen_state);
1871     }
1872 
1873     /* set BDS specific flags now */
1874     reopen_state->bs->open_flags         = reopen_state->flags;
1875     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1876                                               BDRV_O_CACHE_WB);
1877     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1878 
1879     bdrv_refresh_limits(reopen_state->bs, NULL);
1880 }
1881 
1882 /*
1883  * Abort the reopen, and delete and free the staged changes in
1884  * reopen_state
1885  */
1886 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1887 {
1888     BlockDriver *drv;
1889 
1890     assert(reopen_state != NULL);
1891     drv = reopen_state->bs->drv;
1892     assert(drv != NULL);
1893 
1894     if (drv->bdrv_reopen_abort) {
1895         drv->bdrv_reopen_abort(reopen_state);
1896     }
1897 }
1898 
1899 
1900 void bdrv_close(BlockDriverState *bs)
1901 {
1902     BdrvAioNotifier *ban, *ban_next;
1903 
1904     if (bs->job) {
1905         block_job_cancel_sync(bs->job);
1906     }
1907 
1908     /* Disable I/O limits and drain all pending throttled requests */
1909     if (bs->throttle_state) {
1910         bdrv_io_limits_disable(bs);
1911     }
1912 
1913     bdrv_drain(bs); /* complete I/O */
1914     bdrv_flush(bs);
1915     bdrv_drain(bs); /* in case flush left pending I/O */
1916     notifier_list_notify(&bs->close_notifiers, bs);
1917 
1918     if (bs->blk) {
1919         blk_dev_change_media_cb(bs->blk, false);
1920     }
1921 
1922     if (bs->drv) {
1923         BdrvChild *child, *next;
1924 
1925         bs->drv->bdrv_close(bs);
1926         bs->drv = NULL;
1927 
1928         bdrv_set_backing_hd(bs, NULL);
1929 
1930         if (bs->file != NULL) {
1931             bdrv_unref_child(bs, bs->file);
1932             bs->file = NULL;
1933         }
1934 
1935         QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
1936             /* TODO Remove bdrv_unref() from drivers' close function and use
1937              * bdrv_unref_child() here */
1938             if (child->bs->inherits_from == bs) {
1939                 child->bs->inherits_from = NULL;
1940             }
1941             bdrv_detach_child(child);
1942         }
1943 
1944         g_free(bs->opaque);
1945         bs->opaque = NULL;
1946         bs->copy_on_read = 0;
1947         bs->backing_file[0] = '\0';
1948         bs->backing_format[0] = '\0';
1949         bs->total_sectors = 0;
1950         bs->encrypted = 0;
1951         bs->valid_key = 0;
1952         bs->sg = 0;
1953         bs->zero_beyond_eof = false;
1954         QDECREF(bs->options);
1955         bs->options = NULL;
1956         QDECREF(bs->full_open_options);
1957         bs->full_open_options = NULL;
1958     }
1959 
1960     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1961         g_free(ban);
1962     }
1963     QLIST_INIT(&bs->aio_notifiers);
1964 }
1965 
1966 void bdrv_close_all(void)
1967 {
1968     BlockDriverState *bs;
1969 
1970     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1971         AioContext *aio_context = bdrv_get_aio_context(bs);
1972 
1973         aio_context_acquire(aio_context);
1974         bdrv_close(bs);
1975         aio_context_release(aio_context);
1976     }
1977 }
1978 
1979 /* make a BlockDriverState anonymous by removing from bdrv_state and
1980  * graph_bdrv_state list.
1981    Also, NULL terminate the device_name to prevent double remove */
1982 void bdrv_make_anon(BlockDriverState *bs)
1983 {
1984     /*
1985      * Take care to remove bs from bdrv_states only when it's actually
1986      * in it.  Note that bs->device_list.tqe_prev is initially null,
1987      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
1988      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1989      * resetting it to null on remove.
1990      */
1991     if (bs->device_list.tqe_prev) {
1992         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1993         bs->device_list.tqe_prev = NULL;
1994     }
1995     if (bs->node_name[0] != '\0') {
1996         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1997     }
1998     bs->node_name[0] = '\0';
1999 }
2000 
2001 /* Fields that need to stay with the top-level BDS */
2002 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2003                                      BlockDriverState *bs_src)
2004 {
2005     /* move some fields that need to stay attached to the device */
2006 
2007     /* dev info */
2008     bs_dest->copy_on_read       = bs_src->copy_on_read;
2009 
2010     bs_dest->enable_write_cache = bs_src->enable_write_cache;
2011 
2012     /* dirty bitmap */
2013     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
2014 }
2015 
2016 static void change_parent_backing_link(BlockDriverState *from,
2017                                        BlockDriverState *to)
2018 {
2019     BdrvChild *c, *next;
2020 
2021     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2022         assert(c->role != &child_backing);
2023         c->bs = to;
2024         QLIST_REMOVE(c, next_parent);
2025         QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2026         bdrv_ref(to);
2027         bdrv_unref(from);
2028     }
2029     if (from->blk) {
2030         blk_set_bs(from->blk, to);
2031         if (!to->device_list.tqe_prev) {
2032             QTAILQ_INSERT_BEFORE(from, to, device_list);
2033         }
2034         QTAILQ_REMOVE(&bdrv_states, from, device_list);
2035     }
2036 }
2037 
2038 static void swap_feature_fields(BlockDriverState *bs_top,
2039                                 BlockDriverState *bs_new)
2040 {
2041     BlockDriverState tmp;
2042 
2043     bdrv_move_feature_fields(&tmp, bs_top);
2044     bdrv_move_feature_fields(bs_top, bs_new);
2045     bdrv_move_feature_fields(bs_new, &tmp);
2046 
2047     assert(!bs_new->throttle_state);
2048     if (bs_top->throttle_state) {
2049         assert(bs_top->io_limits_enabled);
2050         bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2051         bdrv_io_limits_disable(bs_top);
2052     }
2053 }
2054 
2055 /*
2056  * Add new bs contents at the top of an image chain while the chain is
2057  * live, while keeping required fields on the top layer.
2058  *
2059  * This will modify the BlockDriverState fields, and swap contents
2060  * between bs_new and bs_top. Both bs_new and bs_top are modified.
2061  *
2062  * bs_new must not be attached to a BlockBackend.
2063  *
2064  * This function does not create any image files.
2065  *
2066  * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2067  * that's what the callers commonly need. bs_new will be referenced by the old
2068  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2069  * reference of its own, it must call bdrv_ref().
2070  */
2071 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2072 {
2073     assert(!bdrv_requests_pending(bs_top));
2074     assert(!bdrv_requests_pending(bs_new));
2075 
2076     bdrv_ref(bs_top);
2077     change_parent_backing_link(bs_top, bs_new);
2078 
2079     /* Some fields always stay on top of the backing file chain */
2080     swap_feature_fields(bs_top, bs_new);
2081 
2082     bdrv_set_backing_hd(bs_new, bs_top);
2083     bdrv_unref(bs_top);
2084 
2085     /* bs_new is now referenced by its new parents, we don't need the
2086      * additional reference any more. */
2087     bdrv_unref(bs_new);
2088 }
2089 
2090 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2091 {
2092     assert(!bdrv_requests_pending(old));
2093     assert(!bdrv_requests_pending(new));
2094 
2095     bdrv_ref(old);
2096 
2097     if (old->blk) {
2098         /* As long as these fields aren't in BlockBackend, but in the top-level
2099          * BlockDriverState, it's not possible for a BDS to have two BBs.
2100          *
2101          * We really want to copy the fields from old to new, but we go for a
2102          * swap instead so that pointers aren't duplicated and cause trouble.
2103          * (Also, bdrv_swap() used to do the same.) */
2104         assert(!new->blk);
2105         swap_feature_fields(old, new);
2106     }
2107     change_parent_backing_link(old, new);
2108 
2109     /* Change backing files if a previously independent node is added to the
2110      * chain. For active commit, we replace top by its own (indirect) backing
2111      * file and don't do anything here so we don't build a loop. */
2112     if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2113         bdrv_set_backing_hd(new, backing_bs(old));
2114         bdrv_set_backing_hd(old, NULL);
2115     }
2116 
2117     bdrv_unref(old);
2118 }
2119 
2120 static void bdrv_delete(BlockDriverState *bs)
2121 {
2122     assert(!bs->job);
2123     assert(bdrv_op_blocker_is_empty(bs));
2124     assert(!bs->refcnt);
2125     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2126 
2127     bdrv_close(bs);
2128 
2129     /* remove from list, if necessary */
2130     bdrv_make_anon(bs);
2131 
2132     g_free(bs);
2133 }
2134 
2135 /*
2136  * Run consistency checks on an image
2137  *
2138  * Returns 0 if the check could be completed (it doesn't mean that the image is
2139  * free of errors) or -errno when an internal error occurred. The results of the
2140  * check are stored in res.
2141  */
2142 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2143 {
2144     if (bs->drv == NULL) {
2145         return -ENOMEDIUM;
2146     }
2147     if (bs->drv->bdrv_check == NULL) {
2148         return -ENOTSUP;
2149     }
2150 
2151     memset(res, 0, sizeof(*res));
2152     return bs->drv->bdrv_check(bs, res, fix);
2153 }
2154 
2155 #define COMMIT_BUF_SECTORS 2048
2156 
2157 /* commit COW file into the raw image */
2158 int bdrv_commit(BlockDriverState *bs)
2159 {
2160     BlockDriver *drv = bs->drv;
2161     int64_t sector, total_sectors, length, backing_length;
2162     int n, ro, open_flags;
2163     int ret = 0;
2164     uint8_t *buf = NULL;
2165 
2166     if (!drv)
2167         return -ENOMEDIUM;
2168 
2169     if (!bs->backing) {
2170         return -ENOTSUP;
2171     }
2172 
2173     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2174         bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2175         return -EBUSY;
2176     }
2177 
2178     ro = bs->backing->bs->read_only;
2179     open_flags =  bs->backing->bs->open_flags;
2180 
2181     if (ro) {
2182         if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2183             return -EACCES;
2184         }
2185     }
2186 
2187     length = bdrv_getlength(bs);
2188     if (length < 0) {
2189         ret = length;
2190         goto ro_cleanup;
2191     }
2192 
2193     backing_length = bdrv_getlength(bs->backing->bs);
2194     if (backing_length < 0) {
2195         ret = backing_length;
2196         goto ro_cleanup;
2197     }
2198 
2199     /* If our top snapshot is larger than the backing file image,
2200      * grow the backing file image if possible.  If not possible,
2201      * we must return an error */
2202     if (length > backing_length) {
2203         ret = bdrv_truncate(bs->backing->bs, length);
2204         if (ret < 0) {
2205             goto ro_cleanup;
2206         }
2207     }
2208 
2209     total_sectors = length >> BDRV_SECTOR_BITS;
2210 
2211     /* qemu_try_blockalign() for bs will choose an alignment that works for
2212      * bs->backing->bs as well, so no need to compare the alignment manually. */
2213     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2214     if (buf == NULL) {
2215         ret = -ENOMEM;
2216         goto ro_cleanup;
2217     }
2218 
2219     for (sector = 0; sector < total_sectors; sector += n) {
2220         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2221         if (ret < 0) {
2222             goto ro_cleanup;
2223         }
2224         if (ret) {
2225             ret = bdrv_read(bs, sector, buf, n);
2226             if (ret < 0) {
2227                 goto ro_cleanup;
2228             }
2229 
2230             ret = bdrv_write(bs->backing->bs, sector, buf, n);
2231             if (ret < 0) {
2232                 goto ro_cleanup;
2233             }
2234         }
2235     }
2236 
2237     if (drv->bdrv_make_empty) {
2238         ret = drv->bdrv_make_empty(bs);
2239         if (ret < 0) {
2240             goto ro_cleanup;
2241         }
2242         bdrv_flush(bs);
2243     }
2244 
2245     /*
2246      * Make sure all data we wrote to the backing device is actually
2247      * stable on disk.
2248      */
2249     if (bs->backing) {
2250         bdrv_flush(bs->backing->bs);
2251     }
2252 
2253     ret = 0;
2254 ro_cleanup:
2255     qemu_vfree(buf);
2256 
2257     if (ro) {
2258         /* ignoring error return here */
2259         bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2260     }
2261 
2262     return ret;
2263 }
2264 
2265 int bdrv_commit_all(void)
2266 {
2267     BlockDriverState *bs;
2268 
2269     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2270         AioContext *aio_context = bdrv_get_aio_context(bs);
2271 
2272         aio_context_acquire(aio_context);
2273         if (bs->drv && bs->backing) {
2274             int ret = bdrv_commit(bs);
2275             if (ret < 0) {
2276                 aio_context_release(aio_context);
2277                 return ret;
2278             }
2279         }
2280         aio_context_release(aio_context);
2281     }
2282     return 0;
2283 }
2284 
2285 /*
2286  * Return values:
2287  * 0        - success
2288  * -EINVAL  - backing format specified, but no file
2289  * -ENOSPC  - can't update the backing file because no space is left in the
2290  *            image file header
2291  * -ENOTSUP - format driver doesn't support changing the backing file
2292  */
2293 int bdrv_change_backing_file(BlockDriverState *bs,
2294     const char *backing_file, const char *backing_fmt)
2295 {
2296     BlockDriver *drv = bs->drv;
2297     int ret;
2298 
2299     /* Backing file format doesn't make sense without a backing file */
2300     if (backing_fmt && !backing_file) {
2301         return -EINVAL;
2302     }
2303 
2304     if (drv->bdrv_change_backing_file != NULL) {
2305         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2306     } else {
2307         ret = -ENOTSUP;
2308     }
2309 
2310     if (ret == 0) {
2311         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2312         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2313     }
2314     return ret;
2315 }
2316 
2317 /*
2318  * Finds the image layer in the chain that has 'bs' as its backing file.
2319  *
2320  * active is the current topmost image.
2321  *
2322  * Returns NULL if bs is not found in active's image chain,
2323  * or if active == bs.
2324  *
2325  * Returns the bottommost base image if bs == NULL.
2326  */
2327 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2328                                     BlockDriverState *bs)
2329 {
2330     while (active && bs != backing_bs(active)) {
2331         active = backing_bs(active);
2332     }
2333 
2334     return active;
2335 }
2336 
2337 /* Given a BDS, searches for the base layer. */
2338 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2339 {
2340     return bdrv_find_overlay(bs, NULL);
2341 }
2342 
2343 /*
2344  * Drops images above 'base' up to and including 'top', and sets the image
2345  * above 'top' to have base as its backing file.
2346  *
2347  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2348  * information in 'bs' can be properly updated.
2349  *
2350  * E.g., this will convert the following chain:
2351  * bottom <- base <- intermediate <- top <- active
2352  *
2353  * to
2354  *
2355  * bottom <- base <- active
2356  *
2357  * It is allowed for bottom==base, in which case it converts:
2358  *
2359  * base <- intermediate <- top <- active
2360  *
2361  * to
2362  *
2363  * base <- active
2364  *
2365  * If backing_file_str is non-NULL, it will be used when modifying top's
2366  * overlay image metadata.
2367  *
2368  * Error conditions:
2369  *  if active == top, that is considered an error
2370  *
2371  */
2372 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2373                            BlockDriverState *base, const char *backing_file_str)
2374 {
2375     BlockDriverState *new_top_bs = NULL;
2376     int ret = -EIO;
2377 
2378     if (!top->drv || !base->drv) {
2379         goto exit;
2380     }
2381 
2382     new_top_bs = bdrv_find_overlay(active, top);
2383 
2384     if (new_top_bs == NULL) {
2385         /* we could not find the image above 'top', this is an error */
2386         goto exit;
2387     }
2388 
2389     /* special case of new_top_bs->backing->bs already pointing to base - nothing
2390      * to do, no intermediate images */
2391     if (backing_bs(new_top_bs) == base) {
2392         ret = 0;
2393         goto exit;
2394     }
2395 
2396     /* Make sure that base is in the backing chain of top */
2397     if (!bdrv_chain_contains(top, base)) {
2398         goto exit;
2399     }
2400 
2401     /* success - we can delete the intermediate states, and link top->base */
2402     backing_file_str = backing_file_str ? backing_file_str : base->filename;
2403     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2404                                    base->drv ? base->drv->format_name : "");
2405     if (ret) {
2406         goto exit;
2407     }
2408     bdrv_set_backing_hd(new_top_bs, base);
2409 
2410     ret = 0;
2411 exit:
2412     return ret;
2413 }
2414 
2415 /**
2416  * Truncate file to 'offset' bytes (needed only for file protocols)
2417  */
2418 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2419 {
2420     BlockDriver *drv = bs->drv;
2421     int ret;
2422     if (!drv)
2423         return -ENOMEDIUM;
2424     if (!drv->bdrv_truncate)
2425         return -ENOTSUP;
2426     if (bs->read_only)
2427         return -EACCES;
2428 
2429     ret = drv->bdrv_truncate(bs, offset);
2430     if (ret == 0) {
2431         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2432         bdrv_dirty_bitmap_truncate(bs);
2433         if (bs->blk) {
2434             blk_dev_resize_cb(bs->blk);
2435         }
2436     }
2437     return ret;
2438 }
2439 
2440 /**
2441  * Length of a allocated file in bytes. Sparse files are counted by actual
2442  * allocated space. Return < 0 if error or unknown.
2443  */
2444 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2445 {
2446     BlockDriver *drv = bs->drv;
2447     if (!drv) {
2448         return -ENOMEDIUM;
2449     }
2450     if (drv->bdrv_get_allocated_file_size) {
2451         return drv->bdrv_get_allocated_file_size(bs);
2452     }
2453     if (bs->file) {
2454         return bdrv_get_allocated_file_size(bs->file->bs);
2455     }
2456     return -ENOTSUP;
2457 }
2458 
2459 /**
2460  * Return number of sectors on success, -errno on error.
2461  */
2462 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2463 {
2464     BlockDriver *drv = bs->drv;
2465 
2466     if (!drv)
2467         return -ENOMEDIUM;
2468 
2469     if (drv->has_variable_length) {
2470         int ret = refresh_total_sectors(bs, bs->total_sectors);
2471         if (ret < 0) {
2472             return ret;
2473         }
2474     }
2475     return bs->total_sectors;
2476 }
2477 
2478 /**
2479  * Return length in bytes on success, -errno on error.
2480  * The length is always a multiple of BDRV_SECTOR_SIZE.
2481  */
2482 int64_t bdrv_getlength(BlockDriverState *bs)
2483 {
2484     int64_t ret = bdrv_nb_sectors(bs);
2485 
2486     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2487     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2488 }
2489 
2490 /* return 0 as number of sectors if no device present or error */
2491 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2492 {
2493     int64_t nb_sectors = bdrv_nb_sectors(bs);
2494 
2495     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2496 }
2497 
2498 int bdrv_is_read_only(BlockDriverState *bs)
2499 {
2500     return bs->read_only;
2501 }
2502 
2503 int bdrv_is_sg(BlockDriverState *bs)
2504 {
2505     return bs->sg;
2506 }
2507 
2508 int bdrv_enable_write_cache(BlockDriverState *bs)
2509 {
2510     return bs->enable_write_cache;
2511 }
2512 
2513 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2514 {
2515     bs->enable_write_cache = wce;
2516 
2517     /* so a reopen() will preserve wce */
2518     if (wce) {
2519         bs->open_flags |= BDRV_O_CACHE_WB;
2520     } else {
2521         bs->open_flags &= ~BDRV_O_CACHE_WB;
2522     }
2523 }
2524 
2525 int bdrv_is_encrypted(BlockDriverState *bs)
2526 {
2527     if (bs->backing && bs->backing->bs->encrypted) {
2528         return 1;
2529     }
2530     return bs->encrypted;
2531 }
2532 
2533 int bdrv_key_required(BlockDriverState *bs)
2534 {
2535     BdrvChild *backing = bs->backing;
2536 
2537     if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2538         return 1;
2539     }
2540     return (bs->encrypted && !bs->valid_key);
2541 }
2542 
2543 int bdrv_set_key(BlockDriverState *bs, const char *key)
2544 {
2545     int ret;
2546     if (bs->backing && bs->backing->bs->encrypted) {
2547         ret = bdrv_set_key(bs->backing->bs, key);
2548         if (ret < 0)
2549             return ret;
2550         if (!bs->encrypted)
2551             return 0;
2552     }
2553     if (!bs->encrypted) {
2554         return -EINVAL;
2555     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2556         return -ENOMEDIUM;
2557     }
2558     ret = bs->drv->bdrv_set_key(bs, key);
2559     if (ret < 0) {
2560         bs->valid_key = 0;
2561     } else if (!bs->valid_key) {
2562         bs->valid_key = 1;
2563         if (bs->blk) {
2564             /* call the change callback now, we skipped it on open */
2565             blk_dev_change_media_cb(bs->blk, true);
2566         }
2567     }
2568     return ret;
2569 }
2570 
2571 /*
2572  * Provide an encryption key for @bs.
2573  * If @key is non-null:
2574  *     If @bs is not encrypted, fail.
2575  *     Else if the key is invalid, fail.
2576  *     Else set @bs's key to @key, replacing the existing key, if any.
2577  * If @key is null:
2578  *     If @bs is encrypted and still lacks a key, fail.
2579  *     Else do nothing.
2580  * On failure, store an error object through @errp if non-null.
2581  */
2582 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2583 {
2584     if (key) {
2585         if (!bdrv_is_encrypted(bs)) {
2586             error_setg(errp, "Node '%s' is not encrypted",
2587                       bdrv_get_device_or_node_name(bs));
2588         } else if (bdrv_set_key(bs, key) < 0) {
2589             error_setg(errp, QERR_INVALID_PASSWORD);
2590         }
2591     } else {
2592         if (bdrv_key_required(bs)) {
2593             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2594                       "'%s' (%s) is encrypted",
2595                       bdrv_get_device_or_node_name(bs),
2596                       bdrv_get_encrypted_filename(bs));
2597         }
2598     }
2599 }
2600 
2601 const char *bdrv_get_format_name(BlockDriverState *bs)
2602 {
2603     return bs->drv ? bs->drv->format_name : NULL;
2604 }
2605 
2606 static int qsort_strcmp(const void *a, const void *b)
2607 {
2608     return strcmp(a, b);
2609 }
2610 
2611 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2612                          void *opaque)
2613 {
2614     BlockDriver *drv;
2615     int count = 0;
2616     int i;
2617     const char **formats = NULL;
2618 
2619     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2620         if (drv->format_name) {
2621             bool found = false;
2622             int i = count;
2623             while (formats && i && !found) {
2624                 found = !strcmp(formats[--i], drv->format_name);
2625             }
2626 
2627             if (!found) {
2628                 formats = g_renew(const char *, formats, count + 1);
2629                 formats[count++] = drv->format_name;
2630             }
2631         }
2632     }
2633 
2634     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2635 
2636     for (i = 0; i < count; i++) {
2637         it(opaque, formats[i]);
2638     }
2639 
2640     g_free(formats);
2641 }
2642 
2643 /* This function is to find a node in the bs graph */
2644 BlockDriverState *bdrv_find_node(const char *node_name)
2645 {
2646     BlockDriverState *bs;
2647 
2648     assert(node_name);
2649 
2650     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2651         if (!strcmp(node_name, bs->node_name)) {
2652             return bs;
2653         }
2654     }
2655     return NULL;
2656 }
2657 
2658 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2659 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2660 {
2661     BlockDeviceInfoList *list, *entry;
2662     BlockDriverState *bs;
2663 
2664     list = NULL;
2665     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2666         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2667         if (!info) {
2668             qapi_free_BlockDeviceInfoList(list);
2669             return NULL;
2670         }
2671         entry = g_malloc0(sizeof(*entry));
2672         entry->value = info;
2673         entry->next = list;
2674         list = entry;
2675     }
2676 
2677     return list;
2678 }
2679 
2680 BlockDriverState *bdrv_lookup_bs(const char *device,
2681                                  const char *node_name,
2682                                  Error **errp)
2683 {
2684     BlockBackend *blk;
2685     BlockDriverState *bs;
2686 
2687     if (device) {
2688         blk = blk_by_name(device);
2689 
2690         if (blk) {
2691             bs = blk_bs(blk);
2692             if (!bs) {
2693                 error_setg(errp, "Device '%s' has no medium", device);
2694             }
2695 
2696             return bs;
2697         }
2698     }
2699 
2700     if (node_name) {
2701         bs = bdrv_find_node(node_name);
2702 
2703         if (bs) {
2704             return bs;
2705         }
2706     }
2707 
2708     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2709                      device ? device : "",
2710                      node_name ? node_name : "");
2711     return NULL;
2712 }
2713 
2714 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2715  * return false.  If either argument is NULL, return false. */
2716 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2717 {
2718     while (top && top != base) {
2719         top = backing_bs(top);
2720     }
2721 
2722     return top != NULL;
2723 }
2724 
2725 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2726 {
2727     if (!bs) {
2728         return QTAILQ_FIRST(&graph_bdrv_states);
2729     }
2730     return QTAILQ_NEXT(bs, node_list);
2731 }
2732 
2733 BlockDriverState *bdrv_next(BlockDriverState *bs)
2734 {
2735     if (!bs) {
2736         return QTAILQ_FIRST(&bdrv_states);
2737     }
2738     return QTAILQ_NEXT(bs, device_list);
2739 }
2740 
2741 const char *bdrv_get_node_name(const BlockDriverState *bs)
2742 {
2743     return bs->node_name;
2744 }
2745 
2746 /* TODO check what callers really want: bs->node_name or blk_name() */
2747 const char *bdrv_get_device_name(const BlockDriverState *bs)
2748 {
2749     return bs->blk ? blk_name(bs->blk) : "";
2750 }
2751 
2752 /* This can be used to identify nodes that might not have a device
2753  * name associated. Since node and device names live in the same
2754  * namespace, the result is unambiguous. The exception is if both are
2755  * absent, then this returns an empty (non-null) string. */
2756 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2757 {
2758     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2759 }
2760 
2761 int bdrv_get_flags(BlockDriverState *bs)
2762 {
2763     return bs->open_flags;
2764 }
2765 
2766 int bdrv_has_zero_init_1(BlockDriverState *bs)
2767 {
2768     return 1;
2769 }
2770 
2771 int bdrv_has_zero_init(BlockDriverState *bs)
2772 {
2773     assert(bs->drv);
2774 
2775     /* If BS is a copy on write image, it is initialized to
2776        the contents of the base image, which may not be zeroes.  */
2777     if (bs->backing) {
2778         return 0;
2779     }
2780     if (bs->drv->bdrv_has_zero_init) {
2781         return bs->drv->bdrv_has_zero_init(bs);
2782     }
2783 
2784     /* safe default */
2785     return 0;
2786 }
2787 
2788 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2789 {
2790     BlockDriverInfo bdi;
2791 
2792     if (bs->backing) {
2793         return false;
2794     }
2795 
2796     if (bdrv_get_info(bs, &bdi) == 0) {
2797         return bdi.unallocated_blocks_are_zero;
2798     }
2799 
2800     return false;
2801 }
2802 
2803 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2804 {
2805     BlockDriverInfo bdi;
2806 
2807     if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
2808         return false;
2809     }
2810 
2811     if (bdrv_get_info(bs, &bdi) == 0) {
2812         return bdi.can_write_zeroes_with_unmap;
2813     }
2814 
2815     return false;
2816 }
2817 
2818 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2819 {
2820     if (bs->backing && bs->backing->bs->encrypted)
2821         return bs->backing_file;
2822     else if (bs->encrypted)
2823         return bs->filename;
2824     else
2825         return NULL;
2826 }
2827 
2828 void bdrv_get_backing_filename(BlockDriverState *bs,
2829                                char *filename, int filename_size)
2830 {
2831     pstrcpy(filename, filename_size, bs->backing_file);
2832 }
2833 
2834 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2835 {
2836     BlockDriver *drv = bs->drv;
2837     if (!drv)
2838         return -ENOMEDIUM;
2839     if (!drv->bdrv_get_info)
2840         return -ENOTSUP;
2841     memset(bdi, 0, sizeof(*bdi));
2842     return drv->bdrv_get_info(bs, bdi);
2843 }
2844 
2845 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2846 {
2847     BlockDriver *drv = bs->drv;
2848     if (drv && drv->bdrv_get_specific_info) {
2849         return drv->bdrv_get_specific_info(bs);
2850     }
2851     return NULL;
2852 }
2853 
2854 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2855 {
2856     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2857         return;
2858     }
2859 
2860     bs->drv->bdrv_debug_event(bs, event);
2861 }
2862 
2863 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2864                           const char *tag)
2865 {
2866     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2867         bs = bs->file ? bs->file->bs : NULL;
2868     }
2869 
2870     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2871         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2872     }
2873 
2874     return -ENOTSUP;
2875 }
2876 
2877 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2878 {
2879     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2880         bs = bs->file ? bs->file->bs : NULL;
2881     }
2882 
2883     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2884         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2885     }
2886 
2887     return -ENOTSUP;
2888 }
2889 
2890 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2891 {
2892     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2893         bs = bs->file ? bs->file->bs : NULL;
2894     }
2895 
2896     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2897         return bs->drv->bdrv_debug_resume(bs, tag);
2898     }
2899 
2900     return -ENOTSUP;
2901 }
2902 
2903 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2904 {
2905     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2906         bs = bs->file ? bs->file->bs : NULL;
2907     }
2908 
2909     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2910         return bs->drv->bdrv_debug_is_suspended(bs, tag);
2911     }
2912 
2913     return false;
2914 }
2915 
2916 int bdrv_is_snapshot(BlockDriverState *bs)
2917 {
2918     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2919 }
2920 
2921 /* backing_file can either be relative, or absolute, or a protocol.  If it is
2922  * relative, it must be relative to the chain.  So, passing in bs->filename
2923  * from a BDS as backing_file should not be done, as that may be relative to
2924  * the CWD rather than the chain. */
2925 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2926         const char *backing_file)
2927 {
2928     char *filename_full = NULL;
2929     char *backing_file_full = NULL;
2930     char *filename_tmp = NULL;
2931     int is_protocol = 0;
2932     BlockDriverState *curr_bs = NULL;
2933     BlockDriverState *retval = NULL;
2934 
2935     if (!bs || !bs->drv || !backing_file) {
2936         return NULL;
2937     }
2938 
2939     filename_full     = g_malloc(PATH_MAX);
2940     backing_file_full = g_malloc(PATH_MAX);
2941     filename_tmp      = g_malloc(PATH_MAX);
2942 
2943     is_protocol = path_has_protocol(backing_file);
2944 
2945     for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
2946 
2947         /* If either of the filename paths is actually a protocol, then
2948          * compare unmodified paths; otherwise make paths relative */
2949         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
2950             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
2951                 retval = curr_bs->backing->bs;
2952                 break;
2953             }
2954         } else {
2955             /* If not an absolute filename path, make it relative to the current
2956              * image's filename path */
2957             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2958                          backing_file);
2959 
2960             /* We are going to compare absolute pathnames */
2961             if (!realpath(filename_tmp, filename_full)) {
2962                 continue;
2963             }
2964 
2965             /* We need to make sure the backing filename we are comparing against
2966              * is relative to the current image filename (or absolute) */
2967             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2968                          curr_bs->backing_file);
2969 
2970             if (!realpath(filename_tmp, backing_file_full)) {
2971                 continue;
2972             }
2973 
2974             if (strcmp(backing_file_full, filename_full) == 0) {
2975                 retval = curr_bs->backing->bs;
2976                 break;
2977             }
2978         }
2979     }
2980 
2981     g_free(filename_full);
2982     g_free(backing_file_full);
2983     g_free(filename_tmp);
2984     return retval;
2985 }
2986 
2987 int bdrv_get_backing_file_depth(BlockDriverState *bs)
2988 {
2989     if (!bs->drv) {
2990         return 0;
2991     }
2992 
2993     if (!bs->backing) {
2994         return 0;
2995     }
2996 
2997     return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
2998 }
2999 
3000 void bdrv_init(void)
3001 {
3002     module_call_init(MODULE_INIT_BLOCK);
3003 }
3004 
3005 void bdrv_init_with_whitelist(void)
3006 {
3007     use_bdrv_whitelist = 1;
3008     bdrv_init();
3009 }
3010 
3011 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3012 {
3013     Error *local_err = NULL;
3014     int ret;
3015 
3016     if (!bs->drv)  {
3017         return;
3018     }
3019 
3020     if (!(bs->open_flags & BDRV_O_INCOMING)) {
3021         return;
3022     }
3023     bs->open_flags &= ~BDRV_O_INCOMING;
3024 
3025     if (bs->drv->bdrv_invalidate_cache) {
3026         bs->drv->bdrv_invalidate_cache(bs, &local_err);
3027     } else if (bs->file) {
3028         bdrv_invalidate_cache(bs->file->bs, &local_err);
3029     }
3030     if (local_err) {
3031         error_propagate(errp, local_err);
3032         return;
3033     }
3034 
3035     ret = refresh_total_sectors(bs, bs->total_sectors);
3036     if (ret < 0) {
3037         error_setg_errno(errp, -ret, "Could not refresh total sector count");
3038         return;
3039     }
3040 }
3041 
3042 void bdrv_invalidate_cache_all(Error **errp)
3043 {
3044     BlockDriverState *bs;
3045     Error *local_err = NULL;
3046 
3047     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3048         AioContext *aio_context = bdrv_get_aio_context(bs);
3049 
3050         aio_context_acquire(aio_context);
3051         bdrv_invalidate_cache(bs, &local_err);
3052         aio_context_release(aio_context);
3053         if (local_err) {
3054             error_propagate(errp, local_err);
3055             return;
3056         }
3057     }
3058 }
3059 
3060 /**************************************************************/
3061 /* removable device support */
3062 
3063 /**
3064  * Return TRUE if the media is present
3065  */
3066 bool bdrv_is_inserted(BlockDriverState *bs)
3067 {
3068     BlockDriver *drv = bs->drv;
3069     BdrvChild *child;
3070 
3071     if (!drv) {
3072         return false;
3073     }
3074     if (drv->bdrv_is_inserted) {
3075         return drv->bdrv_is_inserted(bs);
3076     }
3077     QLIST_FOREACH(child, &bs->children, next) {
3078         if (!bdrv_is_inserted(child->bs)) {
3079             return false;
3080         }
3081     }
3082     return true;
3083 }
3084 
3085 /**
3086  * Return whether the media changed since the last call to this
3087  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3088  */
3089 int bdrv_media_changed(BlockDriverState *bs)
3090 {
3091     BlockDriver *drv = bs->drv;
3092 
3093     if (drv && drv->bdrv_media_changed) {
3094         return drv->bdrv_media_changed(bs);
3095     }
3096     return -ENOTSUP;
3097 }
3098 
3099 /**
3100  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3101  */
3102 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3103 {
3104     BlockDriver *drv = bs->drv;
3105     const char *device_name;
3106 
3107     if (drv && drv->bdrv_eject) {
3108         drv->bdrv_eject(bs, eject_flag);
3109     }
3110 
3111     device_name = bdrv_get_device_name(bs);
3112     if (device_name[0] != '\0') {
3113         qapi_event_send_device_tray_moved(device_name,
3114                                           eject_flag, &error_abort);
3115     }
3116 }
3117 
3118 /**
3119  * Lock or unlock the media (if it is locked, the user won't be able
3120  * to eject it manually).
3121  */
3122 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3123 {
3124     BlockDriver *drv = bs->drv;
3125 
3126     trace_bdrv_lock_medium(bs, locked);
3127 
3128     if (drv && drv->bdrv_lock_medium) {
3129         drv->bdrv_lock_medium(bs, locked);
3130     }
3131 }
3132 
3133 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3134 {
3135     BdrvDirtyBitmap *bm;
3136 
3137     assert(name);
3138     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3139         if (bm->name && !strcmp(name, bm->name)) {
3140             return bm;
3141         }
3142     }
3143     return NULL;
3144 }
3145 
3146 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3147 {
3148     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3149     g_free(bitmap->name);
3150     bitmap->name = NULL;
3151 }
3152 
3153 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3154                                           uint32_t granularity,
3155                                           const char *name,
3156                                           Error **errp)
3157 {
3158     int64_t bitmap_size;
3159     BdrvDirtyBitmap *bitmap;
3160     uint32_t sector_granularity;
3161 
3162     assert((granularity & (granularity - 1)) == 0);
3163 
3164     if (name && bdrv_find_dirty_bitmap(bs, name)) {
3165         error_setg(errp, "Bitmap already exists: %s", name);
3166         return NULL;
3167     }
3168     sector_granularity = granularity >> BDRV_SECTOR_BITS;
3169     assert(sector_granularity);
3170     bitmap_size = bdrv_nb_sectors(bs);
3171     if (bitmap_size < 0) {
3172         error_setg_errno(errp, -bitmap_size, "could not get length of device");
3173         errno = -bitmap_size;
3174         return NULL;
3175     }
3176     bitmap = g_new0(BdrvDirtyBitmap, 1);
3177     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3178     bitmap->size = bitmap_size;
3179     bitmap->name = g_strdup(name);
3180     bitmap->disabled = false;
3181     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3182     return bitmap;
3183 }
3184 
3185 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3186 {
3187     return bitmap->successor;
3188 }
3189 
3190 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3191 {
3192     return !(bitmap->disabled || bitmap->successor);
3193 }
3194 
3195 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3196 {
3197     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3198         return DIRTY_BITMAP_STATUS_FROZEN;
3199     } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3200         return DIRTY_BITMAP_STATUS_DISABLED;
3201     } else {
3202         return DIRTY_BITMAP_STATUS_ACTIVE;
3203     }
3204 }
3205 
3206 /**
3207  * Create a successor bitmap destined to replace this bitmap after an operation.
3208  * Requires that the bitmap is not frozen and has no successor.
3209  */
3210 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3211                                        BdrvDirtyBitmap *bitmap, Error **errp)
3212 {
3213     uint64_t granularity;
3214     BdrvDirtyBitmap *child;
3215 
3216     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3217         error_setg(errp, "Cannot create a successor for a bitmap that is "
3218                    "currently frozen");
3219         return -1;
3220     }
3221     assert(!bitmap->successor);
3222 
3223     /* Create an anonymous successor */
3224     granularity = bdrv_dirty_bitmap_granularity(bitmap);
3225     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3226     if (!child) {
3227         return -1;
3228     }
3229 
3230     /* Successor will be on or off based on our current state. */
3231     child->disabled = bitmap->disabled;
3232 
3233     /* Install the successor and freeze the parent */
3234     bitmap->successor = child;
3235     return 0;
3236 }
3237 
3238 /**
3239  * For a bitmap with a successor, yield our name to the successor,
3240  * delete the old bitmap, and return a handle to the new bitmap.
3241  */
3242 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3243                                             BdrvDirtyBitmap *bitmap,
3244                                             Error **errp)
3245 {
3246     char *name;
3247     BdrvDirtyBitmap *successor = bitmap->successor;
3248 
3249     if (successor == NULL) {
3250         error_setg(errp, "Cannot relinquish control if "
3251                    "there's no successor present");
3252         return NULL;
3253     }
3254 
3255     name = bitmap->name;
3256     bitmap->name = NULL;
3257     successor->name = name;
3258     bitmap->successor = NULL;
3259     bdrv_release_dirty_bitmap(bs, bitmap);
3260 
3261     return successor;
3262 }
3263 
3264 /**
3265  * In cases of failure where we can no longer safely delete the parent,
3266  * we may wish to re-join the parent and child/successor.
3267  * The merged parent will be un-frozen, but not explicitly re-enabled.
3268  */
3269 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3270                                            BdrvDirtyBitmap *parent,
3271                                            Error **errp)
3272 {
3273     BdrvDirtyBitmap *successor = parent->successor;
3274 
3275     if (!successor) {
3276         error_setg(errp, "Cannot reclaim a successor when none is present");
3277         return NULL;
3278     }
3279 
3280     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3281         error_setg(errp, "Merging of parent and successor bitmap failed");
3282         return NULL;
3283     }
3284     bdrv_release_dirty_bitmap(bs, successor);
3285     parent->successor = NULL;
3286 
3287     return parent;
3288 }
3289 
3290 /**
3291  * Truncates _all_ bitmaps attached to a BDS.
3292  */
3293 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3294 {
3295     BdrvDirtyBitmap *bitmap;
3296     uint64_t size = bdrv_nb_sectors(bs);
3297 
3298     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3299         assert(!bdrv_dirty_bitmap_frozen(bitmap));
3300         hbitmap_truncate(bitmap->bitmap, size);
3301         bitmap->size = size;
3302     }
3303 }
3304 
3305 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3306 {
3307     BdrvDirtyBitmap *bm, *next;
3308     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3309         if (bm == bitmap) {
3310             assert(!bdrv_dirty_bitmap_frozen(bm));
3311             QLIST_REMOVE(bitmap, list);
3312             hbitmap_free(bitmap->bitmap);
3313             g_free(bitmap->name);
3314             g_free(bitmap);
3315             return;
3316         }
3317     }
3318 }
3319 
3320 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3321 {
3322     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3323     bitmap->disabled = true;
3324 }
3325 
3326 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3327 {
3328     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3329     bitmap->disabled = false;
3330 }
3331 
3332 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3333 {
3334     BdrvDirtyBitmap *bm;
3335     BlockDirtyInfoList *list = NULL;
3336     BlockDirtyInfoList **plist = &list;
3337 
3338     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3339         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3340         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3341         info->count = bdrv_get_dirty_count(bm);
3342         info->granularity = bdrv_dirty_bitmap_granularity(bm);
3343         info->has_name = !!bm->name;
3344         info->name = g_strdup(bm->name);
3345         info->status = bdrv_dirty_bitmap_status(bm);
3346         entry->value = info;
3347         *plist = entry;
3348         plist = &entry->next;
3349     }
3350 
3351     return list;
3352 }
3353 
3354 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3355 {
3356     if (bitmap) {
3357         return hbitmap_get(bitmap->bitmap, sector);
3358     } else {
3359         return 0;
3360     }
3361 }
3362 
3363 /**
3364  * Chooses a default granularity based on the existing cluster size,
3365  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3366  * is no cluster size information available.
3367  */
3368 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3369 {
3370     BlockDriverInfo bdi;
3371     uint32_t granularity;
3372 
3373     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3374         granularity = MAX(4096, bdi.cluster_size);
3375         granularity = MIN(65536, granularity);
3376     } else {
3377         granularity = 65536;
3378     }
3379 
3380     return granularity;
3381 }
3382 
3383 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3384 {
3385     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3386 }
3387 
3388 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3389 {
3390     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3391 }
3392 
3393 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3394                            int64_t cur_sector, int nr_sectors)
3395 {
3396     assert(bdrv_dirty_bitmap_enabled(bitmap));
3397     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3398 }
3399 
3400 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3401                              int64_t cur_sector, int nr_sectors)
3402 {
3403     assert(bdrv_dirty_bitmap_enabled(bitmap));
3404     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3405 }
3406 
3407 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
3408 {
3409     assert(bdrv_dirty_bitmap_enabled(bitmap));
3410     if (!out) {
3411         hbitmap_reset_all(bitmap->bitmap);
3412     } else {
3413         HBitmap *backup = bitmap->bitmap;
3414         bitmap->bitmap = hbitmap_alloc(bitmap->size,
3415                                        hbitmap_granularity(backup));
3416         *out = backup;
3417     }
3418 }
3419 
3420 void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
3421 {
3422     HBitmap *tmp = bitmap->bitmap;
3423     assert(bdrv_dirty_bitmap_enabled(bitmap));
3424     bitmap->bitmap = in;
3425     hbitmap_free(tmp);
3426 }
3427 
3428 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3429                     int nr_sectors)
3430 {
3431     BdrvDirtyBitmap *bitmap;
3432     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3433         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3434             continue;
3435         }
3436         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3437     }
3438 }
3439 
3440 /**
3441  * Advance an HBitmapIter to an arbitrary offset.
3442  */
3443 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3444 {
3445     assert(hbi->hb);
3446     hbitmap_iter_init(hbi, hbi->hb, offset);
3447 }
3448 
3449 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3450 {
3451     return hbitmap_count(bitmap->bitmap);
3452 }
3453 
3454 /* Get a reference to bs */
3455 void bdrv_ref(BlockDriverState *bs)
3456 {
3457     bs->refcnt++;
3458 }
3459 
3460 /* Release a previously grabbed reference to bs.
3461  * If after releasing, reference count is zero, the BlockDriverState is
3462  * deleted. */
3463 void bdrv_unref(BlockDriverState *bs)
3464 {
3465     if (!bs) {
3466         return;
3467     }
3468     assert(bs->refcnt > 0);
3469     if (--bs->refcnt == 0) {
3470         bdrv_delete(bs);
3471     }
3472 }
3473 
3474 struct BdrvOpBlocker {
3475     Error *reason;
3476     QLIST_ENTRY(BdrvOpBlocker) list;
3477 };
3478 
3479 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3480 {
3481     BdrvOpBlocker *blocker;
3482     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3483     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3484         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3485         if (errp) {
3486             error_setg(errp, "Node '%s' is busy: %s",
3487                        bdrv_get_device_or_node_name(bs),
3488                        error_get_pretty(blocker->reason));
3489         }
3490         return true;
3491     }
3492     return false;
3493 }
3494 
3495 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3496 {
3497     BdrvOpBlocker *blocker;
3498     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3499 
3500     blocker = g_new0(BdrvOpBlocker, 1);
3501     blocker->reason = reason;
3502     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3503 }
3504 
3505 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3506 {
3507     BdrvOpBlocker *blocker, *next;
3508     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3509     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3510         if (blocker->reason == reason) {
3511             QLIST_REMOVE(blocker, list);
3512             g_free(blocker);
3513         }
3514     }
3515 }
3516 
3517 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3518 {
3519     int i;
3520     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3521         bdrv_op_block(bs, i, reason);
3522     }
3523 }
3524 
3525 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3526 {
3527     int i;
3528     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3529         bdrv_op_unblock(bs, i, reason);
3530     }
3531 }
3532 
3533 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3534 {
3535     int i;
3536 
3537     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3538         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3539             return false;
3540         }
3541     }
3542     return true;
3543 }
3544 
3545 void bdrv_img_create(const char *filename, const char *fmt,
3546                      const char *base_filename, const char *base_fmt,
3547                      char *options, uint64_t img_size, int flags,
3548                      Error **errp, bool quiet)
3549 {
3550     QemuOptsList *create_opts = NULL;
3551     QemuOpts *opts = NULL;
3552     const char *backing_fmt, *backing_file;
3553     int64_t size;
3554     BlockDriver *drv, *proto_drv;
3555     Error *local_err = NULL;
3556     int ret = 0;
3557 
3558     /* Find driver and parse its options */
3559     drv = bdrv_find_format(fmt);
3560     if (!drv) {
3561         error_setg(errp, "Unknown file format '%s'", fmt);
3562         return;
3563     }
3564 
3565     proto_drv = bdrv_find_protocol(filename, true, errp);
3566     if (!proto_drv) {
3567         return;
3568     }
3569 
3570     if (!drv->create_opts) {
3571         error_setg(errp, "Format driver '%s' does not support image creation",
3572                    drv->format_name);
3573         return;
3574     }
3575 
3576     if (!proto_drv->create_opts) {
3577         error_setg(errp, "Protocol driver '%s' does not support image creation",
3578                    proto_drv->format_name);
3579         return;
3580     }
3581 
3582     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3583     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3584 
3585     /* Create parameter list with default values */
3586     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3587     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3588 
3589     /* Parse -o options */
3590     if (options) {
3591         qemu_opts_do_parse(opts, options, NULL, &local_err);
3592         if (local_err) {
3593             error_report_err(local_err);
3594             local_err = NULL;
3595             error_setg(errp, "Invalid options for file format '%s'", fmt);
3596             goto out;
3597         }
3598     }
3599 
3600     if (base_filename) {
3601         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3602         if (local_err) {
3603             error_setg(errp, "Backing file not supported for file format '%s'",
3604                        fmt);
3605             goto out;
3606         }
3607     }
3608 
3609     if (base_fmt) {
3610         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3611         if (local_err) {
3612             error_setg(errp, "Backing file format not supported for file "
3613                              "format '%s'", fmt);
3614             goto out;
3615         }
3616     }
3617 
3618     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3619     if (backing_file) {
3620         if (!strcmp(filename, backing_file)) {
3621             error_setg(errp, "Error: Trying to create an image with the "
3622                              "same filename as the backing file");
3623             goto out;
3624         }
3625     }
3626 
3627     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3628 
3629     // The size for the image must always be specified, with one exception:
3630     // If we are using a backing file, we can obtain the size from there
3631     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3632     if (size == -1) {
3633         if (backing_file) {
3634             BlockDriverState *bs;
3635             char *full_backing = g_new0(char, PATH_MAX);
3636             int64_t size;
3637             int back_flags;
3638             QDict *backing_options = NULL;
3639 
3640             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3641                                                          full_backing, PATH_MAX,
3642                                                          &local_err);
3643             if (local_err) {
3644                 g_free(full_backing);
3645                 goto out;
3646             }
3647 
3648             /* backing files always opened read-only */
3649             back_flags =
3650                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3651 
3652             if (backing_fmt) {
3653                 backing_options = qdict_new();
3654                 qdict_put(backing_options, "driver",
3655                           qstring_from_str(backing_fmt));
3656             }
3657 
3658             bs = NULL;
3659             ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3660                             back_flags, &local_err);
3661             g_free(full_backing);
3662             if (ret < 0) {
3663                 goto out;
3664             }
3665             size = bdrv_getlength(bs);
3666             if (size < 0) {
3667                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3668                                  backing_file);
3669                 bdrv_unref(bs);
3670                 goto out;
3671             }
3672 
3673             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3674 
3675             bdrv_unref(bs);
3676         } else {
3677             error_setg(errp, "Image creation needs a size parameter");
3678             goto out;
3679         }
3680     }
3681 
3682     if (!quiet) {
3683         printf("Formatting '%s', fmt=%s ", filename, fmt);
3684         qemu_opts_print(opts, " ");
3685         puts("");
3686     }
3687 
3688     ret = bdrv_create(drv, filename, opts, &local_err);
3689 
3690     if (ret == -EFBIG) {
3691         /* This is generally a better message than whatever the driver would
3692          * deliver (especially because of the cluster_size_hint), since that
3693          * is most probably not much different from "image too large". */
3694         const char *cluster_size_hint = "";
3695         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3696             cluster_size_hint = " (try using a larger cluster size)";
3697         }
3698         error_setg(errp, "The image size is too large for file format '%s'"
3699                    "%s", fmt, cluster_size_hint);
3700         error_free(local_err);
3701         local_err = NULL;
3702     }
3703 
3704 out:
3705     qemu_opts_del(opts);
3706     qemu_opts_free(create_opts);
3707     if (local_err) {
3708         error_propagate(errp, local_err);
3709     }
3710 }
3711 
3712 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3713 {
3714     return bs->aio_context;
3715 }
3716 
3717 void bdrv_detach_aio_context(BlockDriverState *bs)
3718 {
3719     BdrvAioNotifier *baf;
3720 
3721     if (!bs->drv) {
3722         return;
3723     }
3724 
3725     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3726         baf->detach_aio_context(baf->opaque);
3727     }
3728 
3729     if (bs->throttle_state) {
3730         throttle_timers_detach_aio_context(&bs->throttle_timers);
3731     }
3732     if (bs->drv->bdrv_detach_aio_context) {
3733         bs->drv->bdrv_detach_aio_context(bs);
3734     }
3735     if (bs->file) {
3736         bdrv_detach_aio_context(bs->file->bs);
3737     }
3738     if (bs->backing) {
3739         bdrv_detach_aio_context(bs->backing->bs);
3740     }
3741 
3742     bs->aio_context = NULL;
3743 }
3744 
3745 void bdrv_attach_aio_context(BlockDriverState *bs,
3746                              AioContext *new_context)
3747 {
3748     BdrvAioNotifier *ban;
3749 
3750     if (!bs->drv) {
3751         return;
3752     }
3753 
3754     bs->aio_context = new_context;
3755 
3756     if (bs->backing) {
3757         bdrv_attach_aio_context(bs->backing->bs, new_context);
3758     }
3759     if (bs->file) {
3760         bdrv_attach_aio_context(bs->file->bs, new_context);
3761     }
3762     if (bs->drv->bdrv_attach_aio_context) {
3763         bs->drv->bdrv_attach_aio_context(bs, new_context);
3764     }
3765     if (bs->throttle_state) {
3766         throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3767     }
3768 
3769     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3770         ban->attached_aio_context(new_context, ban->opaque);
3771     }
3772 }
3773 
3774 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3775 {
3776     bdrv_drain(bs); /* ensure there are no in-flight requests */
3777 
3778     bdrv_detach_aio_context(bs);
3779 
3780     /* This function executes in the old AioContext so acquire the new one in
3781      * case it runs in a different thread.
3782      */
3783     aio_context_acquire(new_context);
3784     bdrv_attach_aio_context(bs, new_context);
3785     aio_context_release(new_context);
3786 }
3787 
3788 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3789         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3790         void (*detach_aio_context)(void *opaque), void *opaque)
3791 {
3792     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3793     *ban = (BdrvAioNotifier){
3794         .attached_aio_context = attached_aio_context,
3795         .detach_aio_context   = detach_aio_context,
3796         .opaque               = opaque
3797     };
3798 
3799     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3800 }
3801 
3802 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3803                                       void (*attached_aio_context)(AioContext *,
3804                                                                    void *),
3805                                       void (*detach_aio_context)(void *),
3806                                       void *opaque)
3807 {
3808     BdrvAioNotifier *ban, *ban_next;
3809 
3810     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3811         if (ban->attached_aio_context == attached_aio_context &&
3812             ban->detach_aio_context   == detach_aio_context   &&
3813             ban->opaque               == opaque)
3814         {
3815             QLIST_REMOVE(ban, list);
3816             g_free(ban);
3817 
3818             return;
3819         }
3820     }
3821 
3822     abort();
3823 }
3824 
3825 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3826                        BlockDriverAmendStatusCB *status_cb)
3827 {
3828     if (!bs->drv->bdrv_amend_options) {
3829         return -ENOTSUP;
3830     }
3831     return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3832 }
3833 
3834 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3835  * of block filter and by bdrv_is_first_non_filter.
3836  * It is used to test if the given bs is the candidate or recurse more in the
3837  * node graph.
3838  */
3839 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3840                                       BlockDriverState *candidate)
3841 {
3842     /* return false if basic checks fails */
3843     if (!bs || !bs->drv) {
3844         return false;
3845     }
3846 
3847     /* the code reached a non block filter driver -> check if the bs is
3848      * the same as the candidate. It's the recursion termination condition.
3849      */
3850     if (!bs->drv->is_filter) {
3851         return bs == candidate;
3852     }
3853     /* Down this path the driver is a block filter driver */
3854 
3855     /* If the block filter recursion method is defined use it to recurse down
3856      * the node graph.
3857      */
3858     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3859         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3860     }
3861 
3862     /* the driver is a block filter but don't allow to recurse -> return false
3863      */
3864     return false;
3865 }
3866 
3867 /* This function checks if the candidate is the first non filter bs down it's
3868  * bs chain. Since we don't have pointers to parents it explore all bs chains
3869  * from the top. Some filters can choose not to pass down the recursion.
3870  */
3871 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3872 {
3873     BlockDriverState *bs;
3874 
3875     /* walk down the bs forest recursively */
3876     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3877         bool perm;
3878 
3879         /* try to recurse in this top level bs */
3880         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3881 
3882         /* candidate is the first non filter */
3883         if (perm) {
3884             return true;
3885         }
3886     }
3887 
3888     return false;
3889 }
3890 
3891 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3892                                         const char *node_name, Error **errp)
3893 {
3894     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3895     AioContext *aio_context;
3896 
3897     if (!to_replace_bs) {
3898         error_setg(errp, "Node name '%s' not found", node_name);
3899         return NULL;
3900     }
3901 
3902     aio_context = bdrv_get_aio_context(to_replace_bs);
3903     aio_context_acquire(aio_context);
3904 
3905     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3906         to_replace_bs = NULL;
3907         goto out;
3908     }
3909 
3910     /* We don't want arbitrary node of the BDS chain to be replaced only the top
3911      * most non filter in order to prevent data corruption.
3912      * Another benefit is that this tests exclude backing files which are
3913      * blocked by the backing blockers.
3914      */
3915     if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
3916         error_setg(errp, "Only top most non filter can be replaced");
3917         to_replace_bs = NULL;
3918         goto out;
3919     }
3920 
3921 out:
3922     aio_context_release(aio_context);
3923     return to_replace_bs;
3924 }
3925 
3926 static bool append_open_options(QDict *d, BlockDriverState *bs)
3927 {
3928     const QDictEntry *entry;
3929     bool found_any = false;
3930 
3931     for (entry = qdict_first(bs->options); entry;
3932          entry = qdict_next(bs->options, entry))
3933     {
3934         /* Only take options for this level and exclude all non-driver-specific
3935          * options */
3936         if (!strchr(qdict_entry_key(entry), '.') &&
3937             strcmp(qdict_entry_key(entry), "node-name"))
3938         {
3939             qobject_incref(qdict_entry_value(entry));
3940             qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3941             found_any = true;
3942         }
3943     }
3944 
3945     return found_any;
3946 }
3947 
3948 /* Updates the following BDS fields:
3949  *  - exact_filename: A filename which may be used for opening a block device
3950  *                    which (mostly) equals the given BDS (even without any
3951  *                    other options; so reading and writing must return the same
3952  *                    results, but caching etc. may be different)
3953  *  - full_open_options: Options which, when given when opening a block device
3954  *                       (without a filename), result in a BDS (mostly)
3955  *                       equalling the given one
3956  *  - filename: If exact_filename is set, it is copied here. Otherwise,
3957  *              full_open_options is converted to a JSON object, prefixed with
3958  *              "json:" (for use through the JSON pseudo protocol) and put here.
3959  */
3960 void bdrv_refresh_filename(BlockDriverState *bs)
3961 {
3962     BlockDriver *drv = bs->drv;
3963     QDict *opts;
3964 
3965     if (!drv) {
3966         return;
3967     }
3968 
3969     /* This BDS's file name will most probably depend on its file's name, so
3970      * refresh that first */
3971     if (bs->file) {
3972         bdrv_refresh_filename(bs->file->bs);
3973     }
3974 
3975     if (drv->bdrv_refresh_filename) {
3976         /* Obsolete information is of no use here, so drop the old file name
3977          * information before refreshing it */
3978         bs->exact_filename[0] = '\0';
3979         if (bs->full_open_options) {
3980             QDECREF(bs->full_open_options);
3981             bs->full_open_options = NULL;
3982         }
3983 
3984         drv->bdrv_refresh_filename(bs);
3985     } else if (bs->file) {
3986         /* Try to reconstruct valid information from the underlying file */
3987         bool has_open_options;
3988 
3989         bs->exact_filename[0] = '\0';
3990         if (bs->full_open_options) {
3991             QDECREF(bs->full_open_options);
3992             bs->full_open_options = NULL;
3993         }
3994 
3995         opts = qdict_new();
3996         has_open_options = append_open_options(opts, bs);
3997 
3998         /* If no specific options have been given for this BDS, the filename of
3999          * the underlying file should suffice for this one as well */
4000         if (bs->file->bs->exact_filename[0] && !has_open_options) {
4001             strcpy(bs->exact_filename, bs->file->bs->exact_filename);
4002         }
4003         /* Reconstructing the full options QDict is simple for most format block
4004          * drivers, as long as the full options are known for the underlying
4005          * file BDS. The full options QDict of that file BDS should somehow
4006          * contain a representation of the filename, therefore the following
4007          * suffices without querying the (exact_)filename of this BDS. */
4008         if (bs->file->bs->full_open_options) {
4009             qdict_put_obj(opts, "driver",
4010                           QOBJECT(qstring_from_str(drv->format_name)));
4011             QINCREF(bs->file->bs->full_open_options);
4012             qdict_put_obj(opts, "file",
4013                           QOBJECT(bs->file->bs->full_open_options));
4014 
4015             bs->full_open_options = opts;
4016         } else {
4017             QDECREF(opts);
4018         }
4019     } else if (!bs->full_open_options && qdict_size(bs->options)) {
4020         /* There is no underlying file BDS (at least referenced by BDS.file),
4021          * so the full options QDict should be equal to the options given
4022          * specifically for this block device when it was opened (plus the
4023          * driver specification).
4024          * Because those options don't change, there is no need to update
4025          * full_open_options when it's already set. */
4026 
4027         opts = qdict_new();
4028         append_open_options(opts, bs);
4029         qdict_put_obj(opts, "driver",
4030                       QOBJECT(qstring_from_str(drv->format_name)));
4031 
4032         if (bs->exact_filename[0]) {
4033             /* This may not work for all block protocol drivers (some may
4034              * require this filename to be parsed), but we have to find some
4035              * default solution here, so just include it. If some block driver
4036              * does not support pure options without any filename at all or
4037              * needs some special format of the options QDict, it needs to
4038              * implement the driver-specific bdrv_refresh_filename() function.
4039              */
4040             qdict_put_obj(opts, "filename",
4041                           QOBJECT(qstring_from_str(bs->exact_filename)));
4042         }
4043 
4044         bs->full_open_options = opts;
4045     }
4046 
4047     if (bs->exact_filename[0]) {
4048         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4049     } else if (bs->full_open_options) {
4050         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4051         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4052                  qstring_get_str(json));
4053         QDECREF(json);
4054     }
4055 }
4056