xref: /openbmc/qemu/block.c (revision 135a67a692bedb952ea720351026247104da8645)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qjson.h"
31 #include "sysemu/block-backend.h"
32 #include "sysemu/sysemu.h"
33 #include "qemu/notify.h"
34 #include "block/coroutine.h"
35 #include "block/qapi.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
38 #include "qapi-event.h"
39 
40 #ifdef CONFIG_BSD
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <sys/ioctl.h>
44 #include <sys/queue.h>
45 #ifndef __DragonFly__
46 #include <sys/disk.h>
47 #endif
48 #endif
49 
50 #ifdef _WIN32
51 #include <windows.h>
52 #endif
53 
54 /**
55  * A BdrvDirtyBitmap can be in three possible states:
56  * (1) successor is NULL and disabled is false: full r/w mode
57  * (2) successor is NULL and disabled is true: read only mode ("disabled")
58  * (3) successor is set: frozen mode.
59  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
60  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
61  */
62 struct BdrvDirtyBitmap {
63     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
64     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
65     char *name;                 /* Optional non-empty unique ID */
66     int64_t size;               /* Size of the bitmap (Number of sectors) */
67     bool disabled;              /* Bitmap is read-only */
68     QLIST_ENTRY(BdrvDirtyBitmap) list;
69 };
70 
71 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
72 
73 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
74     QTAILQ_HEAD_INITIALIZER(bdrv_states);
75 
76 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
77     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
78 
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80     QLIST_HEAD_INITIALIZER(bdrv_drivers);
81 
82 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
83 /* If non-zero, use only whitelisted block drivers */
84 static int use_bdrv_whitelist;
85 
86 #ifdef _WIN32
87 static int is_windows_drive_prefix(const char *filename)
88 {
89     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
90              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
91             filename[1] == ':');
92 }
93 
94 int is_windows_drive(const char *filename)
95 {
96     if (is_windows_drive_prefix(filename) &&
97         filename[2] == '\0')
98         return 1;
99     if (strstart(filename, "\\\\.\\", NULL) ||
100         strstart(filename, "//./", NULL))
101         return 1;
102     return 0;
103 }
104 #endif
105 
106 size_t bdrv_opt_mem_align(BlockDriverState *bs)
107 {
108     if (!bs || !bs->drv) {
109         /* page size or 4k (hdd sector size) should be on the safe side */
110         return MAX(4096, getpagesize());
111     }
112 
113     return bs->bl.opt_mem_alignment;
114 }
115 
116 size_t bdrv_min_mem_align(BlockDriverState *bs)
117 {
118     if (!bs || !bs->drv) {
119         /* page size or 4k (hdd sector size) should be on the safe side */
120         return MAX(4096, getpagesize());
121     }
122 
123     return bs->bl.min_mem_alignment;
124 }
125 
126 /* check if the path starts with "<protocol>:" */
127 int path_has_protocol(const char *path)
128 {
129     const char *p;
130 
131 #ifdef _WIN32
132     if (is_windows_drive(path) ||
133         is_windows_drive_prefix(path)) {
134         return 0;
135     }
136     p = path + strcspn(path, ":/\\");
137 #else
138     p = path + strcspn(path, ":/");
139 #endif
140 
141     return *p == ':';
142 }
143 
144 int path_is_absolute(const char *path)
145 {
146 #ifdef _WIN32
147     /* specific case for names like: "\\.\d:" */
148     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
149         return 1;
150     }
151     return (*path == '/' || *path == '\\');
152 #else
153     return (*path == '/');
154 #endif
155 }
156 
157 /* if filename is absolute, just copy it to dest. Otherwise, build a
158    path to it by considering it is relative to base_path. URL are
159    supported. */
160 void path_combine(char *dest, int dest_size,
161                   const char *base_path,
162                   const char *filename)
163 {
164     const char *p, *p1;
165     int len;
166 
167     if (dest_size <= 0)
168         return;
169     if (path_is_absolute(filename)) {
170         pstrcpy(dest, dest_size, filename);
171     } else {
172         p = strchr(base_path, ':');
173         if (p)
174             p++;
175         else
176             p = base_path;
177         p1 = strrchr(base_path, '/');
178 #ifdef _WIN32
179         {
180             const char *p2;
181             p2 = strrchr(base_path, '\\');
182             if (!p1 || p2 > p1)
183                 p1 = p2;
184         }
185 #endif
186         if (p1)
187             p1++;
188         else
189             p1 = base_path;
190         if (p1 > p)
191             p = p1;
192         len = p - base_path;
193         if (len > dest_size - 1)
194             len = dest_size - 1;
195         memcpy(dest, base_path, len);
196         dest[len] = '\0';
197         pstrcat(dest, dest_size, filename);
198     }
199 }
200 
201 void bdrv_get_full_backing_filename_from_filename(const char *backed,
202                                                   const char *backing,
203                                                   char *dest, size_t sz,
204                                                   Error **errp)
205 {
206     if (backing[0] == '\0' || path_has_protocol(backing) ||
207         path_is_absolute(backing))
208     {
209         pstrcpy(dest, sz, backing);
210     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
211         error_setg(errp, "Cannot use relative backing file names for '%s'",
212                    backed);
213     } else {
214         path_combine(dest, sz, backed, backing);
215     }
216 }
217 
218 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
219                                     Error **errp)
220 {
221     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
222 
223     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
224                                                  dest, sz, errp);
225 }
226 
227 void bdrv_register(BlockDriver *bdrv)
228 {
229     bdrv_setup_io_funcs(bdrv);
230 
231     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
232 }
233 
234 BlockDriverState *bdrv_new_root(void)
235 {
236     BlockDriverState *bs = bdrv_new();
237 
238     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
239     return bs;
240 }
241 
242 BlockDriverState *bdrv_new(void)
243 {
244     BlockDriverState *bs;
245     int i;
246 
247     bs = g_new0(BlockDriverState, 1);
248     QLIST_INIT(&bs->dirty_bitmaps);
249     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
250         QLIST_INIT(&bs->op_blockers[i]);
251     }
252     bdrv_iostatus_disable(bs);
253     notifier_list_init(&bs->close_notifiers);
254     notifier_with_return_list_init(&bs->before_write_notifiers);
255     qemu_co_queue_init(&bs->throttled_reqs[0]);
256     qemu_co_queue_init(&bs->throttled_reqs[1]);
257     bs->refcnt = 1;
258     bs->aio_context = qemu_get_aio_context();
259 
260     return bs;
261 }
262 
263 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
264 {
265     notifier_list_add(&bs->close_notifiers, notify);
266 }
267 
268 BlockDriver *bdrv_find_format(const char *format_name)
269 {
270     BlockDriver *drv1;
271     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
272         if (!strcmp(drv1->format_name, format_name)) {
273             return drv1;
274         }
275     }
276     return NULL;
277 }
278 
279 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
280 {
281     static const char *whitelist_rw[] = {
282         CONFIG_BDRV_RW_WHITELIST
283     };
284     static const char *whitelist_ro[] = {
285         CONFIG_BDRV_RO_WHITELIST
286     };
287     const char **p;
288 
289     if (!whitelist_rw[0] && !whitelist_ro[0]) {
290         return 1;               /* no whitelist, anything goes */
291     }
292 
293     for (p = whitelist_rw; *p; p++) {
294         if (!strcmp(drv->format_name, *p)) {
295             return 1;
296         }
297     }
298     if (read_only) {
299         for (p = whitelist_ro; *p; p++) {
300             if (!strcmp(drv->format_name, *p)) {
301                 return 1;
302             }
303         }
304     }
305     return 0;
306 }
307 
308 BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
309                                           bool read_only)
310 {
311     BlockDriver *drv = bdrv_find_format(format_name);
312     return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
313 }
314 
315 typedef struct CreateCo {
316     BlockDriver *drv;
317     char *filename;
318     QemuOpts *opts;
319     int ret;
320     Error *err;
321 } CreateCo;
322 
323 static void coroutine_fn bdrv_create_co_entry(void *opaque)
324 {
325     Error *local_err = NULL;
326     int ret;
327 
328     CreateCo *cco = opaque;
329     assert(cco->drv);
330 
331     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
332     if (local_err) {
333         error_propagate(&cco->err, local_err);
334     }
335     cco->ret = ret;
336 }
337 
338 int bdrv_create(BlockDriver *drv, const char* filename,
339                 QemuOpts *opts, Error **errp)
340 {
341     int ret;
342 
343     Coroutine *co;
344     CreateCo cco = {
345         .drv = drv,
346         .filename = g_strdup(filename),
347         .opts = opts,
348         .ret = NOT_DONE,
349         .err = NULL,
350     };
351 
352     if (!drv->bdrv_create) {
353         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
354         ret = -ENOTSUP;
355         goto out;
356     }
357 
358     if (qemu_in_coroutine()) {
359         /* Fast-path if already in coroutine context */
360         bdrv_create_co_entry(&cco);
361     } else {
362         co = qemu_coroutine_create(bdrv_create_co_entry);
363         qemu_coroutine_enter(co, &cco);
364         while (cco.ret == NOT_DONE) {
365             aio_poll(qemu_get_aio_context(), true);
366         }
367     }
368 
369     ret = cco.ret;
370     if (ret < 0) {
371         if (cco.err) {
372             error_propagate(errp, cco.err);
373         } else {
374             error_setg_errno(errp, -ret, "Could not create image");
375         }
376     }
377 
378 out:
379     g_free(cco.filename);
380     return ret;
381 }
382 
383 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
384 {
385     BlockDriver *drv;
386     Error *local_err = NULL;
387     int ret;
388 
389     drv = bdrv_find_protocol(filename, true, errp);
390     if (drv == NULL) {
391         return -ENOENT;
392     }
393 
394     ret = bdrv_create(drv, filename, opts, &local_err);
395     if (local_err) {
396         error_propagate(errp, local_err);
397     }
398     return ret;
399 }
400 
401 /**
402  * Try to get @bs's logical and physical block size.
403  * On success, store them in @bsz struct and return 0.
404  * On failure return -errno.
405  * @bs must not be empty.
406  */
407 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
408 {
409     BlockDriver *drv = bs->drv;
410 
411     if (drv && drv->bdrv_probe_blocksizes) {
412         return drv->bdrv_probe_blocksizes(bs, bsz);
413     }
414 
415     return -ENOTSUP;
416 }
417 
418 /**
419  * Try to get @bs's geometry (cyls, heads, sectors).
420  * On success, store them in @geo struct and return 0.
421  * On failure return -errno.
422  * @bs must not be empty.
423  */
424 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
425 {
426     BlockDriver *drv = bs->drv;
427 
428     if (drv && drv->bdrv_probe_geometry) {
429         return drv->bdrv_probe_geometry(bs, geo);
430     }
431 
432     return -ENOTSUP;
433 }
434 
435 /*
436  * Create a uniquely-named empty temporary file.
437  * Return 0 upon success, otherwise a negative errno value.
438  */
439 int get_tmp_filename(char *filename, int size)
440 {
441 #ifdef _WIN32
442     char temp_dir[MAX_PATH];
443     /* GetTempFileName requires that its output buffer (4th param)
444        have length MAX_PATH or greater.  */
445     assert(size >= MAX_PATH);
446     return (GetTempPath(MAX_PATH, temp_dir)
447             && GetTempFileName(temp_dir, "qem", 0, filename)
448             ? 0 : -GetLastError());
449 #else
450     int fd;
451     const char *tmpdir;
452     tmpdir = getenv("TMPDIR");
453     if (!tmpdir) {
454         tmpdir = "/var/tmp";
455     }
456     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
457         return -EOVERFLOW;
458     }
459     fd = mkstemp(filename);
460     if (fd < 0) {
461         return -errno;
462     }
463     if (close(fd) != 0) {
464         unlink(filename);
465         return -errno;
466     }
467     return 0;
468 #endif
469 }
470 
471 /*
472  * Detect host devices. By convention, /dev/cdrom[N] is always
473  * recognized as a host CDROM.
474  */
475 static BlockDriver *find_hdev_driver(const char *filename)
476 {
477     int score_max = 0, score;
478     BlockDriver *drv = NULL, *d;
479 
480     QLIST_FOREACH(d, &bdrv_drivers, list) {
481         if (d->bdrv_probe_device) {
482             score = d->bdrv_probe_device(filename);
483             if (score > score_max) {
484                 score_max = score;
485                 drv = d;
486             }
487         }
488     }
489 
490     return drv;
491 }
492 
493 BlockDriver *bdrv_find_protocol(const char *filename,
494                                 bool allow_protocol_prefix,
495                                 Error **errp)
496 {
497     BlockDriver *drv1;
498     char protocol[128];
499     int len;
500     const char *p;
501 
502     /* TODO Drivers without bdrv_file_open must be specified explicitly */
503 
504     /*
505      * XXX(hch): we really should not let host device detection
506      * override an explicit protocol specification, but moving this
507      * later breaks access to device names with colons in them.
508      * Thanks to the brain-dead persistent naming schemes on udev-
509      * based Linux systems those actually are quite common.
510      */
511     drv1 = find_hdev_driver(filename);
512     if (drv1) {
513         return drv1;
514     }
515 
516     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
517         return &bdrv_file;
518     }
519 
520     p = strchr(filename, ':');
521     assert(p != NULL);
522     len = p - filename;
523     if (len > sizeof(protocol) - 1)
524         len = sizeof(protocol) - 1;
525     memcpy(protocol, filename, len);
526     protocol[len] = '\0';
527     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
528         if (drv1->protocol_name &&
529             !strcmp(drv1->protocol_name, protocol)) {
530             return drv1;
531         }
532     }
533 
534     error_setg(errp, "Unknown protocol '%s'", protocol);
535     return NULL;
536 }
537 
538 /*
539  * Guess image format by probing its contents.
540  * This is not a good idea when your image is raw (CVE-2008-2004), but
541  * we do it anyway for backward compatibility.
542  *
543  * @buf         contains the image's first @buf_size bytes.
544  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
545  *              but can be smaller if the image file is smaller)
546  * @filename    is its filename.
547  *
548  * For all block drivers, call the bdrv_probe() method to get its
549  * probing score.
550  * Return the first block driver with the highest probing score.
551  */
552 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
553                             const char *filename)
554 {
555     int score_max = 0, score;
556     BlockDriver *drv = NULL, *d;
557 
558     QLIST_FOREACH(d, &bdrv_drivers, list) {
559         if (d->bdrv_probe) {
560             score = d->bdrv_probe(buf, buf_size, filename);
561             if (score > score_max) {
562                 score_max = score;
563                 drv = d;
564             }
565         }
566     }
567 
568     return drv;
569 }
570 
571 static int find_image_format(BlockDriverState *bs, const char *filename,
572                              BlockDriver **pdrv, Error **errp)
573 {
574     BlockDriver *drv;
575     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
576     int ret = 0;
577 
578     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
579     if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
580         *pdrv = &bdrv_raw;
581         return ret;
582     }
583 
584     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
585     if (ret < 0) {
586         error_setg_errno(errp, -ret, "Could not read image for determining its "
587                          "format");
588         *pdrv = NULL;
589         return ret;
590     }
591 
592     drv = bdrv_probe_all(buf, ret, filename);
593     if (!drv) {
594         error_setg(errp, "Could not determine image format: No compatible "
595                    "driver found");
596         ret = -ENOENT;
597     }
598     *pdrv = drv;
599     return ret;
600 }
601 
602 /**
603  * Set the current 'total_sectors' value
604  * Return 0 on success, -errno on error.
605  */
606 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
607 {
608     BlockDriver *drv = bs->drv;
609 
610     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
611     if (bs->sg)
612         return 0;
613 
614     /* query actual device if possible, otherwise just trust the hint */
615     if (drv->bdrv_getlength) {
616         int64_t length = drv->bdrv_getlength(bs);
617         if (length < 0) {
618             return length;
619         }
620         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
621     }
622 
623     bs->total_sectors = hint;
624     return 0;
625 }
626 
627 /**
628  * Set open flags for a given discard mode
629  *
630  * Return 0 on success, -1 if the discard mode was invalid.
631  */
632 int bdrv_parse_discard_flags(const char *mode, int *flags)
633 {
634     *flags &= ~BDRV_O_UNMAP;
635 
636     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
637         /* do nothing */
638     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
639         *flags |= BDRV_O_UNMAP;
640     } else {
641         return -1;
642     }
643 
644     return 0;
645 }
646 
647 /**
648  * Set open flags for a given cache mode
649  *
650  * Return 0 on success, -1 if the cache mode was invalid.
651  */
652 int bdrv_parse_cache_flags(const char *mode, int *flags)
653 {
654     *flags &= ~BDRV_O_CACHE_MASK;
655 
656     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
657         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
658     } else if (!strcmp(mode, "directsync")) {
659         *flags |= BDRV_O_NOCACHE;
660     } else if (!strcmp(mode, "writeback")) {
661         *flags |= BDRV_O_CACHE_WB;
662     } else if (!strcmp(mode, "unsafe")) {
663         *flags |= BDRV_O_CACHE_WB;
664         *flags |= BDRV_O_NO_FLUSH;
665     } else if (!strcmp(mode, "writethrough")) {
666         /* this is the default */
667     } else {
668         return -1;
669     }
670 
671     return 0;
672 }
673 
674 /*
675  * Returns the flags that a temporary snapshot should get, based on the
676  * originally requested flags (the originally requested image will have flags
677  * like a backing file)
678  */
679 static int bdrv_temp_snapshot_flags(int flags)
680 {
681     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
682 }
683 
684 /*
685  * Returns the flags that bs->file should get, based on the given flags for
686  * the parent BDS
687  */
688 static int bdrv_inherited_flags(int flags)
689 {
690     /* Enable protocol handling, disable format probing for bs->file */
691     flags |= BDRV_O_PROTOCOL;
692 
693     /* Our block drivers take care to send flushes and respect unmap policy,
694      * so we can enable both unconditionally on lower layers. */
695     flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
696 
697     /* Clear flags that only apply to the top layer */
698     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
699 
700     return flags;
701 }
702 
703 /*
704  * Returns the flags that bs->backing_hd should get, based on the given flags
705  * for the parent BDS
706  */
707 static int bdrv_backing_flags(int flags)
708 {
709     /* backing files always opened read-only */
710     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
711 
712     /* snapshot=on is handled on the top layer */
713     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
714 
715     return flags;
716 }
717 
718 static int bdrv_open_flags(BlockDriverState *bs, int flags)
719 {
720     int open_flags = flags | BDRV_O_CACHE_WB;
721 
722     /*
723      * Clear flags that are internal to the block layer before opening the
724      * image.
725      */
726     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
727 
728     /*
729      * Snapshots should be writable.
730      */
731     if (flags & BDRV_O_TEMPORARY) {
732         open_flags |= BDRV_O_RDWR;
733     }
734 
735     return open_flags;
736 }
737 
738 static void bdrv_assign_node_name(BlockDriverState *bs,
739                                   const char *node_name,
740                                   Error **errp)
741 {
742     if (!node_name) {
743         return;
744     }
745 
746     /* Check for empty string or invalid characters */
747     if (!id_wellformed(node_name)) {
748         error_setg(errp, "Invalid node name");
749         return;
750     }
751 
752     /* takes care of avoiding namespaces collisions */
753     if (blk_by_name(node_name)) {
754         error_setg(errp, "node-name=%s is conflicting with a device id",
755                    node_name);
756         return;
757     }
758 
759     /* takes care of avoiding duplicates node names */
760     if (bdrv_find_node(node_name)) {
761         error_setg(errp, "Duplicate node name");
762         return;
763     }
764 
765     /* copy node name into the bs and insert it into the graph list */
766     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
767     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
768 }
769 
770 /*
771  * Common part for opening disk images and files
772  *
773  * Removes all processed options from *options.
774  */
775 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
776     QDict *options, int flags, BlockDriver *drv, Error **errp)
777 {
778     int ret, open_flags;
779     const char *filename;
780     const char *node_name = NULL;
781     Error *local_err = NULL;
782 
783     assert(drv != NULL);
784     assert(bs->file == NULL);
785     assert(options != NULL && bs->options != options);
786 
787     if (file != NULL) {
788         filename = file->filename;
789     } else {
790         filename = qdict_get_try_str(options, "filename");
791     }
792 
793     if (drv->bdrv_needs_filename && !filename) {
794         error_setg(errp, "The '%s' block driver requires a file name",
795                    drv->format_name);
796         return -EINVAL;
797     }
798 
799     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
800 
801     node_name = qdict_get_try_str(options, "node-name");
802     bdrv_assign_node_name(bs, node_name, &local_err);
803     if (local_err) {
804         error_propagate(errp, local_err);
805         return -EINVAL;
806     }
807     qdict_del(options, "node-name");
808 
809     /* bdrv_open() with directly using a protocol as drv. This layer is already
810      * opened, so assign it to bs (while file becomes a closed BlockDriverState)
811      * and return immediately. */
812     if (file != NULL && drv->bdrv_file_open) {
813         bdrv_swap(file, bs);
814         return 0;
815     }
816 
817     bs->open_flags = flags;
818     bs->guest_block_size = 512;
819     bs->request_alignment = 512;
820     bs->zero_beyond_eof = true;
821     open_flags = bdrv_open_flags(bs, flags);
822     bs->read_only = !(open_flags & BDRV_O_RDWR);
823 
824     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
825         error_setg(errp,
826                    !bs->read_only && bdrv_is_whitelisted(drv, true)
827                         ? "Driver '%s' can only be used for read-only devices"
828                         : "Driver '%s' is not whitelisted",
829                    drv->format_name);
830         return -ENOTSUP;
831     }
832 
833     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
834     if (flags & BDRV_O_COPY_ON_READ) {
835         if (!bs->read_only) {
836             bdrv_enable_copy_on_read(bs);
837         } else {
838             error_setg(errp, "Can't use copy-on-read on read-only device");
839             return -EINVAL;
840         }
841     }
842 
843     if (filename != NULL) {
844         pstrcpy(bs->filename, sizeof(bs->filename), filename);
845     } else {
846         bs->filename[0] = '\0';
847     }
848     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
849 
850     bs->drv = drv;
851     bs->opaque = g_malloc0(drv->instance_size);
852 
853     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
854 
855     /* Open the image, either directly or using a protocol */
856     if (drv->bdrv_file_open) {
857         assert(file == NULL);
858         assert(!drv->bdrv_needs_filename || filename != NULL);
859         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
860     } else {
861         if (file == NULL) {
862             error_setg(errp, "Can't use '%s' as a block driver for the "
863                        "protocol level", drv->format_name);
864             ret = -EINVAL;
865             goto free_and_fail;
866         }
867         bs->file = file;
868         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
869     }
870 
871     if (ret < 0) {
872         if (local_err) {
873             error_propagate(errp, local_err);
874         } else if (bs->filename[0]) {
875             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
876         } else {
877             error_setg_errno(errp, -ret, "Could not open image");
878         }
879         goto free_and_fail;
880     }
881 
882     if (bs->encrypted) {
883         error_report("Encrypted images are deprecated");
884         error_printf("Support for them will be removed in a future release.\n"
885                      "You can use 'qemu-img convert' to convert your image"
886                      " to an unencrypted one.\n");
887     }
888 
889     ret = refresh_total_sectors(bs, bs->total_sectors);
890     if (ret < 0) {
891         error_setg_errno(errp, -ret, "Could not refresh total sector count");
892         goto free_and_fail;
893     }
894 
895     bdrv_refresh_limits(bs, &local_err);
896     if (local_err) {
897         error_propagate(errp, local_err);
898         ret = -EINVAL;
899         goto free_and_fail;
900     }
901 
902     assert(bdrv_opt_mem_align(bs) != 0);
903     assert(bdrv_min_mem_align(bs) != 0);
904     assert((bs->request_alignment != 0) || bs->sg);
905     return 0;
906 
907 free_and_fail:
908     bs->file = NULL;
909     g_free(bs->opaque);
910     bs->opaque = NULL;
911     bs->drv = NULL;
912     return ret;
913 }
914 
915 static QDict *parse_json_filename(const char *filename, Error **errp)
916 {
917     QObject *options_obj;
918     QDict *options;
919     int ret;
920 
921     ret = strstart(filename, "json:", &filename);
922     assert(ret);
923 
924     options_obj = qobject_from_json(filename);
925     if (!options_obj) {
926         error_setg(errp, "Could not parse the JSON options");
927         return NULL;
928     }
929 
930     if (qobject_type(options_obj) != QTYPE_QDICT) {
931         qobject_decref(options_obj);
932         error_setg(errp, "Invalid JSON object given");
933         return NULL;
934     }
935 
936     options = qobject_to_qdict(options_obj);
937     qdict_flatten(options);
938 
939     return options;
940 }
941 
942 /*
943  * Fills in default options for opening images and converts the legacy
944  * filename/flags pair to option QDict entries.
945  */
946 static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
947                              BlockDriver *drv, Error **errp)
948 {
949     const char *filename = *pfilename;
950     const char *drvname;
951     bool protocol = flags & BDRV_O_PROTOCOL;
952     bool parse_filename = false;
953     Error *local_err = NULL;
954 
955     /* Parse json: pseudo-protocol */
956     if (filename && g_str_has_prefix(filename, "json:")) {
957         QDict *json_options = parse_json_filename(filename, &local_err);
958         if (local_err) {
959             error_propagate(errp, local_err);
960             return -EINVAL;
961         }
962 
963         /* Options given in the filename have lower priority than options
964          * specified directly */
965         qdict_join(*options, json_options, false);
966         QDECREF(json_options);
967         *pfilename = filename = NULL;
968     }
969 
970     /* Fetch the file name from the options QDict if necessary */
971     if (protocol && filename) {
972         if (!qdict_haskey(*options, "filename")) {
973             qdict_put(*options, "filename", qstring_from_str(filename));
974             parse_filename = true;
975         } else {
976             error_setg(errp, "Can't specify 'file' and 'filename' options at "
977                              "the same time");
978             return -EINVAL;
979         }
980     }
981 
982     /* Find the right block driver */
983     filename = qdict_get_try_str(*options, "filename");
984     drvname = qdict_get_try_str(*options, "driver");
985 
986     if (drv) {
987         if (drvname) {
988             error_setg(errp, "Driver specified twice");
989             return -EINVAL;
990         }
991         drvname = drv->format_name;
992         qdict_put(*options, "driver", qstring_from_str(drvname));
993     } else {
994         if (!drvname && protocol) {
995             if (filename) {
996                 drv = bdrv_find_protocol(filename, parse_filename, errp);
997                 if (!drv) {
998                     return -EINVAL;
999                 }
1000 
1001                 drvname = drv->format_name;
1002                 qdict_put(*options, "driver", qstring_from_str(drvname));
1003             } else {
1004                 error_setg(errp, "Must specify either driver or file");
1005                 return -EINVAL;
1006             }
1007         } else if (drvname) {
1008             drv = bdrv_find_format(drvname);
1009             if (!drv) {
1010                 error_setg(errp, "Unknown driver '%s'", drvname);
1011                 return -ENOENT;
1012             }
1013         }
1014     }
1015 
1016     assert(drv || !protocol);
1017 
1018     /* Driver-specific filename parsing */
1019     if (drv && drv->bdrv_parse_filename && parse_filename) {
1020         drv->bdrv_parse_filename(filename, *options, &local_err);
1021         if (local_err) {
1022             error_propagate(errp, local_err);
1023             return -EINVAL;
1024         }
1025 
1026         if (!drv->bdrv_needs_filename) {
1027             qdict_del(*options, "filename");
1028         }
1029     }
1030 
1031     return 0;
1032 }
1033 
1034 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1035 {
1036 
1037     if (bs->backing_hd) {
1038         assert(bs->backing_blocker);
1039         bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1040     } else if (backing_hd) {
1041         error_setg(&bs->backing_blocker,
1042                    "node is used as backing hd of '%s'",
1043                    bdrv_get_device_or_node_name(bs));
1044     }
1045 
1046     bs->backing_hd = backing_hd;
1047     if (!backing_hd) {
1048         error_free(bs->backing_blocker);
1049         bs->backing_blocker = NULL;
1050         goto out;
1051     }
1052     bs->open_flags &= ~BDRV_O_NO_BACKING;
1053     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1054     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1055             backing_hd->drv ? backing_hd->drv->format_name : "");
1056 
1057     bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1058     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1059     bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1060                     bs->backing_blocker);
1061 out:
1062     bdrv_refresh_limits(bs, NULL);
1063 }
1064 
1065 /*
1066  * Opens the backing file for a BlockDriverState if not yet open
1067  *
1068  * options is a QDict of options to pass to the block drivers, or NULL for an
1069  * empty set of options. The reference to the QDict is transferred to this
1070  * function (even on failure), so if the caller intends to reuse the dictionary,
1071  * it needs to use QINCREF() before calling bdrv_file_open.
1072  */
1073 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1074 {
1075     char *backing_filename = g_malloc0(PATH_MAX);
1076     int ret = 0;
1077     BlockDriverState *backing_hd;
1078     Error *local_err = NULL;
1079 
1080     if (bs->backing_hd != NULL) {
1081         QDECREF(options);
1082         goto free_exit;
1083     }
1084 
1085     /* NULL means an empty set of options */
1086     if (options == NULL) {
1087         options = qdict_new();
1088     }
1089 
1090     bs->open_flags &= ~BDRV_O_NO_BACKING;
1091     if (qdict_haskey(options, "file.filename")) {
1092         backing_filename[0] = '\0';
1093     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1094         QDECREF(options);
1095         goto free_exit;
1096     } else {
1097         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1098                                        &local_err);
1099         if (local_err) {
1100             ret = -EINVAL;
1101             error_propagate(errp, local_err);
1102             QDECREF(options);
1103             goto free_exit;
1104         }
1105     }
1106 
1107     if (!bs->drv || !bs->drv->supports_backing) {
1108         ret = -EINVAL;
1109         error_setg(errp, "Driver doesn't support backing files");
1110         QDECREF(options);
1111         goto free_exit;
1112     }
1113 
1114     backing_hd = bdrv_new();
1115 
1116     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1117         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1118     }
1119 
1120     assert(bs->backing_hd == NULL);
1121     ret = bdrv_open(&backing_hd,
1122                     *backing_filename ? backing_filename : NULL, NULL, options,
1123                     bdrv_backing_flags(bs->open_flags), NULL, &local_err);
1124     if (ret < 0) {
1125         bdrv_unref(backing_hd);
1126         backing_hd = NULL;
1127         bs->open_flags |= BDRV_O_NO_BACKING;
1128         error_setg(errp, "Could not open backing file: %s",
1129                    error_get_pretty(local_err));
1130         error_free(local_err);
1131         goto free_exit;
1132     }
1133     bdrv_set_backing_hd(bs, backing_hd);
1134 
1135 free_exit:
1136     g_free(backing_filename);
1137     return ret;
1138 }
1139 
1140 /*
1141  * Opens a disk image whose options are given as BlockdevRef in another block
1142  * device's options.
1143  *
1144  * If allow_none is true, no image will be opened if filename is false and no
1145  * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1146  *
1147  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1148  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1149  * itself, all options starting with "${bdref_key}." are considered part of the
1150  * BlockdevRef.
1151  *
1152  * The BlockdevRef will be removed from the options QDict.
1153  *
1154  * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1155  */
1156 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1157                     QDict *options, const char *bdref_key, int flags,
1158                     bool allow_none, Error **errp)
1159 {
1160     QDict *image_options;
1161     int ret;
1162     char *bdref_key_dot;
1163     const char *reference;
1164 
1165     assert(pbs);
1166     assert(*pbs == NULL);
1167 
1168     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1169     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1170     g_free(bdref_key_dot);
1171 
1172     reference = qdict_get_try_str(options, bdref_key);
1173     if (!filename && !reference && !qdict_size(image_options)) {
1174         if (allow_none) {
1175             ret = 0;
1176         } else {
1177             error_setg(errp, "A block device must be specified for \"%s\"",
1178                        bdref_key);
1179             ret = -EINVAL;
1180         }
1181         QDECREF(image_options);
1182         goto done;
1183     }
1184 
1185     ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
1186 
1187 done:
1188     qdict_del(options, bdref_key);
1189     return ret;
1190 }
1191 
1192 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1193 {
1194     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1195     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1196     int64_t total_size;
1197     QemuOpts *opts = NULL;
1198     QDict *snapshot_options;
1199     BlockDriverState *bs_snapshot;
1200     Error *local_err;
1201     int ret;
1202 
1203     /* if snapshot, we create a temporary backing file and open it
1204        instead of opening 'filename' directly */
1205 
1206     /* Get the required size from the image */
1207     total_size = bdrv_getlength(bs);
1208     if (total_size < 0) {
1209         ret = total_size;
1210         error_setg_errno(errp, -total_size, "Could not get image size");
1211         goto out;
1212     }
1213 
1214     /* Create the temporary image */
1215     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1216     if (ret < 0) {
1217         error_setg_errno(errp, -ret, "Could not get temporary filename");
1218         goto out;
1219     }
1220 
1221     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1222                             &error_abort);
1223     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1224     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1225     qemu_opts_del(opts);
1226     if (ret < 0) {
1227         error_setg_errno(errp, -ret, "Could not create temporary overlay "
1228                          "'%s': %s", tmp_filename,
1229                          error_get_pretty(local_err));
1230         error_free(local_err);
1231         goto out;
1232     }
1233 
1234     /* Prepare a new options QDict for the temporary file */
1235     snapshot_options = qdict_new();
1236     qdict_put(snapshot_options, "file.driver",
1237               qstring_from_str("file"));
1238     qdict_put(snapshot_options, "file.filename",
1239               qstring_from_str(tmp_filename));
1240 
1241     bs_snapshot = bdrv_new();
1242 
1243     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1244                     flags, &bdrv_qcow2, &local_err);
1245     if (ret < 0) {
1246         error_propagate(errp, local_err);
1247         goto out;
1248     }
1249 
1250     bdrv_append(bs_snapshot, bs);
1251 
1252 out:
1253     g_free(tmp_filename);
1254     return ret;
1255 }
1256 
1257 /*
1258  * Opens a disk image (raw, qcow2, vmdk, ...)
1259  *
1260  * options is a QDict of options to pass to the block drivers, or NULL for an
1261  * empty set of options. The reference to the QDict belongs to the block layer
1262  * after the call (even on failure), so if the caller intends to reuse the
1263  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1264  *
1265  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1266  * If it is not NULL, the referenced BDS will be reused.
1267  *
1268  * The reference parameter may be used to specify an existing block device which
1269  * should be opened. If specified, neither options nor a filename may be given,
1270  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1271  */
1272 int bdrv_open(BlockDriverState **pbs, const char *filename,
1273               const char *reference, QDict *options, int flags,
1274               BlockDriver *drv, Error **errp)
1275 {
1276     int ret;
1277     BlockDriverState *file = NULL, *bs;
1278     const char *drvname;
1279     Error *local_err = NULL;
1280     int snapshot_flags = 0;
1281 
1282     assert(pbs);
1283 
1284     if (reference) {
1285         bool options_non_empty = options ? qdict_size(options) : false;
1286         QDECREF(options);
1287 
1288         if (*pbs) {
1289             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1290                        "another block device");
1291             return -EINVAL;
1292         }
1293 
1294         if (filename || options_non_empty) {
1295             error_setg(errp, "Cannot reference an existing block device with "
1296                        "additional options or a new filename");
1297             return -EINVAL;
1298         }
1299 
1300         bs = bdrv_lookup_bs(reference, reference, errp);
1301         if (!bs) {
1302             return -ENODEV;
1303         }
1304         bdrv_ref(bs);
1305         *pbs = bs;
1306         return 0;
1307     }
1308 
1309     if (*pbs) {
1310         bs = *pbs;
1311     } else {
1312         bs = bdrv_new();
1313     }
1314 
1315     /* NULL means an empty set of options */
1316     if (options == NULL) {
1317         options = qdict_new();
1318     }
1319 
1320     ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
1321     if (local_err) {
1322         goto fail;
1323     }
1324 
1325     /* Find the right image format driver */
1326     drv = NULL;
1327     drvname = qdict_get_try_str(options, "driver");
1328     if (drvname) {
1329         drv = bdrv_find_format(drvname);
1330         qdict_del(options, "driver");
1331         if (!drv) {
1332             error_setg(errp, "Unknown driver: '%s'", drvname);
1333             ret = -EINVAL;
1334             goto fail;
1335         }
1336     }
1337 
1338     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1339     if (drv && !drv->bdrv_file_open) {
1340         /* If the user explicitly wants a format driver here, we'll need to add
1341          * another layer for the protocol in bs->file */
1342         flags &= ~BDRV_O_PROTOCOL;
1343     }
1344 
1345     bs->options = options;
1346     options = qdict_clone_shallow(options);
1347 
1348     /* Open image file without format layer */
1349     if ((flags & BDRV_O_PROTOCOL) == 0) {
1350         if (flags & BDRV_O_RDWR) {
1351             flags |= BDRV_O_ALLOW_RDWR;
1352         }
1353         if (flags & BDRV_O_SNAPSHOT) {
1354             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1355             flags = bdrv_backing_flags(flags);
1356         }
1357 
1358         assert(file == NULL);
1359         ret = bdrv_open_image(&file, filename, options, "file",
1360                               bdrv_inherited_flags(flags),
1361                               true, &local_err);
1362         if (ret < 0) {
1363             goto fail;
1364         }
1365     }
1366 
1367     /* Image format probing */
1368     bs->probed = !drv;
1369     if (!drv && file) {
1370         ret = find_image_format(file, filename, &drv, &local_err);
1371         if (ret < 0) {
1372             goto fail;
1373         }
1374     } else if (!drv) {
1375         error_setg(errp, "Must specify either driver or file");
1376         ret = -EINVAL;
1377         goto fail;
1378     }
1379 
1380     /* Open the image */
1381     ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1382     if (ret < 0) {
1383         goto fail;
1384     }
1385 
1386     if (file && (bs->file != file)) {
1387         bdrv_unref(file);
1388         file = NULL;
1389     }
1390 
1391     /* If there is a backing file, use it */
1392     if ((flags & BDRV_O_NO_BACKING) == 0) {
1393         QDict *backing_options;
1394 
1395         qdict_extract_subqdict(options, &backing_options, "backing.");
1396         ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1397         if (ret < 0) {
1398             goto close_and_fail;
1399         }
1400     }
1401 
1402     bdrv_refresh_filename(bs);
1403 
1404     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1405      * temporary snapshot afterwards. */
1406     if (snapshot_flags) {
1407         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1408         if (local_err) {
1409             goto close_and_fail;
1410         }
1411     }
1412 
1413     /* Check if any unknown options were used */
1414     if (options && (qdict_size(options) != 0)) {
1415         const QDictEntry *entry = qdict_first(options);
1416         if (flags & BDRV_O_PROTOCOL) {
1417             error_setg(errp, "Block protocol '%s' doesn't support the option "
1418                        "'%s'", drv->format_name, entry->key);
1419         } else {
1420             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1421                        "support the option '%s'", drv->format_name,
1422                        bdrv_get_device_name(bs), entry->key);
1423         }
1424 
1425         ret = -EINVAL;
1426         goto close_and_fail;
1427     }
1428 
1429     if (!bdrv_key_required(bs)) {
1430         if (bs->blk) {
1431             blk_dev_change_media_cb(bs->blk, true);
1432         }
1433     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1434                && !runstate_check(RUN_STATE_INMIGRATE)
1435                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1436         error_setg(errp,
1437                    "Guest must be stopped for opening of encrypted image");
1438         ret = -EBUSY;
1439         goto close_and_fail;
1440     }
1441 
1442     QDECREF(options);
1443     *pbs = bs;
1444     return 0;
1445 
1446 fail:
1447     if (file != NULL) {
1448         bdrv_unref(file);
1449     }
1450     QDECREF(bs->options);
1451     QDECREF(options);
1452     bs->options = NULL;
1453     if (!*pbs) {
1454         /* If *pbs is NULL, a new BDS has been created in this function and
1455            needs to be freed now. Otherwise, it does not need to be closed,
1456            since it has not really been opened yet. */
1457         bdrv_unref(bs);
1458     }
1459     if (local_err) {
1460         error_propagate(errp, local_err);
1461     }
1462     return ret;
1463 
1464 close_and_fail:
1465     /* See fail path, but now the BDS has to be always closed */
1466     if (*pbs) {
1467         bdrv_close(bs);
1468     } else {
1469         bdrv_unref(bs);
1470     }
1471     QDECREF(options);
1472     if (local_err) {
1473         error_propagate(errp, local_err);
1474     }
1475     return ret;
1476 }
1477 
1478 typedef struct BlockReopenQueueEntry {
1479      bool prepared;
1480      BDRVReopenState state;
1481      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1482 } BlockReopenQueueEntry;
1483 
1484 /*
1485  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1486  * reopen of multiple devices.
1487  *
1488  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1489  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1490  * be created and initialized. This newly created BlockReopenQueue should be
1491  * passed back in for subsequent calls that are intended to be of the same
1492  * atomic 'set'.
1493  *
1494  * bs is the BlockDriverState to add to the reopen queue.
1495  *
1496  * flags contains the open flags for the associated bs
1497  *
1498  * returns a pointer to bs_queue, which is either the newly allocated
1499  * bs_queue, or the existing bs_queue being used.
1500  *
1501  */
1502 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1503                                     BlockDriverState *bs, int flags)
1504 {
1505     assert(bs != NULL);
1506 
1507     BlockReopenQueueEntry *bs_entry;
1508     if (bs_queue == NULL) {
1509         bs_queue = g_new0(BlockReopenQueue, 1);
1510         QSIMPLEQ_INIT(bs_queue);
1511     }
1512 
1513     /* bdrv_open() masks this flag out */
1514     flags &= ~BDRV_O_PROTOCOL;
1515 
1516     if (bs->file) {
1517         bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
1518     }
1519 
1520     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1521     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1522 
1523     bs_entry->state.bs = bs;
1524     bs_entry->state.flags = flags;
1525 
1526     return bs_queue;
1527 }
1528 
1529 /*
1530  * Reopen multiple BlockDriverStates atomically & transactionally.
1531  *
1532  * The queue passed in (bs_queue) must have been built up previous
1533  * via bdrv_reopen_queue().
1534  *
1535  * Reopens all BDS specified in the queue, with the appropriate
1536  * flags.  All devices are prepared for reopen, and failure of any
1537  * device will cause all device changes to be abandonded, and intermediate
1538  * data cleaned up.
1539  *
1540  * If all devices prepare successfully, then the changes are committed
1541  * to all devices.
1542  *
1543  */
1544 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1545 {
1546     int ret = -1;
1547     BlockReopenQueueEntry *bs_entry, *next;
1548     Error *local_err = NULL;
1549 
1550     assert(bs_queue != NULL);
1551 
1552     bdrv_drain_all();
1553 
1554     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1555         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1556             error_propagate(errp, local_err);
1557             goto cleanup;
1558         }
1559         bs_entry->prepared = true;
1560     }
1561 
1562     /* If we reach this point, we have success and just need to apply the
1563      * changes
1564      */
1565     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1566         bdrv_reopen_commit(&bs_entry->state);
1567     }
1568 
1569     ret = 0;
1570 
1571 cleanup:
1572     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1573         if (ret && bs_entry->prepared) {
1574             bdrv_reopen_abort(&bs_entry->state);
1575         }
1576         g_free(bs_entry);
1577     }
1578     g_free(bs_queue);
1579     return ret;
1580 }
1581 
1582 
1583 /* Reopen a single BlockDriverState with the specified flags. */
1584 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1585 {
1586     int ret = -1;
1587     Error *local_err = NULL;
1588     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1589 
1590     ret = bdrv_reopen_multiple(queue, &local_err);
1591     if (local_err != NULL) {
1592         error_propagate(errp, local_err);
1593     }
1594     return ret;
1595 }
1596 
1597 
1598 /*
1599  * Prepares a BlockDriverState for reopen. All changes are staged in the
1600  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1601  * the block driver layer .bdrv_reopen_prepare()
1602  *
1603  * bs is the BlockDriverState to reopen
1604  * flags are the new open flags
1605  * queue is the reopen queue
1606  *
1607  * Returns 0 on success, non-zero on error.  On error errp will be set
1608  * as well.
1609  *
1610  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1611  * It is the responsibility of the caller to then call the abort() or
1612  * commit() for any other BDS that have been left in a prepare() state
1613  *
1614  */
1615 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1616                         Error **errp)
1617 {
1618     int ret = -1;
1619     Error *local_err = NULL;
1620     BlockDriver *drv;
1621 
1622     assert(reopen_state != NULL);
1623     assert(reopen_state->bs->drv != NULL);
1624     drv = reopen_state->bs->drv;
1625 
1626     /* if we are to stay read-only, do not allow permission change
1627      * to r/w */
1628     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1629         reopen_state->flags & BDRV_O_RDWR) {
1630         error_setg(errp, "Node '%s' is read only",
1631                    bdrv_get_device_or_node_name(reopen_state->bs));
1632         goto error;
1633     }
1634 
1635 
1636     ret = bdrv_flush(reopen_state->bs);
1637     if (ret) {
1638         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1639                   strerror(-ret));
1640         goto error;
1641     }
1642 
1643     if (drv->bdrv_reopen_prepare) {
1644         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1645         if (ret) {
1646             if (local_err != NULL) {
1647                 error_propagate(errp, local_err);
1648             } else {
1649                 error_setg(errp, "failed while preparing to reopen image '%s'",
1650                            reopen_state->bs->filename);
1651             }
1652             goto error;
1653         }
1654     } else {
1655         /* It is currently mandatory to have a bdrv_reopen_prepare()
1656          * handler for each supported drv. */
1657         error_setg(errp, "Block format '%s' used by node '%s' "
1658                    "does not support reopening files", drv->format_name,
1659                    bdrv_get_device_or_node_name(reopen_state->bs));
1660         ret = -1;
1661         goto error;
1662     }
1663 
1664     ret = 0;
1665 
1666 error:
1667     return ret;
1668 }
1669 
1670 /*
1671  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1672  * makes them final by swapping the staging BlockDriverState contents into
1673  * the active BlockDriverState contents.
1674  */
1675 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1676 {
1677     BlockDriver *drv;
1678 
1679     assert(reopen_state != NULL);
1680     drv = reopen_state->bs->drv;
1681     assert(drv != NULL);
1682 
1683     /* If there are any driver level actions to take */
1684     if (drv->bdrv_reopen_commit) {
1685         drv->bdrv_reopen_commit(reopen_state);
1686     }
1687 
1688     /* set BDS specific flags now */
1689     reopen_state->bs->open_flags         = reopen_state->flags;
1690     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1691                                               BDRV_O_CACHE_WB);
1692     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1693 
1694     bdrv_refresh_limits(reopen_state->bs, NULL);
1695 }
1696 
1697 /*
1698  * Abort the reopen, and delete and free the staged changes in
1699  * reopen_state
1700  */
1701 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1702 {
1703     BlockDriver *drv;
1704 
1705     assert(reopen_state != NULL);
1706     drv = reopen_state->bs->drv;
1707     assert(drv != NULL);
1708 
1709     if (drv->bdrv_reopen_abort) {
1710         drv->bdrv_reopen_abort(reopen_state);
1711     }
1712 }
1713 
1714 
1715 void bdrv_close(BlockDriverState *bs)
1716 {
1717     BdrvAioNotifier *ban, *ban_next;
1718 
1719     if (bs->job) {
1720         block_job_cancel_sync(bs->job);
1721     }
1722     bdrv_drain_all(); /* complete I/O */
1723     bdrv_flush(bs);
1724     bdrv_drain_all(); /* in case flush left pending I/O */
1725     notifier_list_notify(&bs->close_notifiers, bs);
1726 
1727     if (bs->drv) {
1728         if (bs->backing_hd) {
1729             BlockDriverState *backing_hd = bs->backing_hd;
1730             bdrv_set_backing_hd(bs, NULL);
1731             bdrv_unref(backing_hd);
1732         }
1733         bs->drv->bdrv_close(bs);
1734         g_free(bs->opaque);
1735         bs->opaque = NULL;
1736         bs->drv = NULL;
1737         bs->copy_on_read = 0;
1738         bs->backing_file[0] = '\0';
1739         bs->backing_format[0] = '\0';
1740         bs->total_sectors = 0;
1741         bs->encrypted = 0;
1742         bs->valid_key = 0;
1743         bs->sg = 0;
1744         bs->zero_beyond_eof = false;
1745         QDECREF(bs->options);
1746         bs->options = NULL;
1747         QDECREF(bs->full_open_options);
1748         bs->full_open_options = NULL;
1749 
1750         if (bs->file != NULL) {
1751             bdrv_unref(bs->file);
1752             bs->file = NULL;
1753         }
1754     }
1755 
1756     if (bs->blk) {
1757         blk_dev_change_media_cb(bs->blk, false);
1758     }
1759 
1760     /*throttling disk I/O limits*/
1761     if (bs->io_limits_enabled) {
1762         bdrv_io_limits_disable(bs);
1763     }
1764 
1765     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1766         g_free(ban);
1767     }
1768     QLIST_INIT(&bs->aio_notifiers);
1769 }
1770 
1771 void bdrv_close_all(void)
1772 {
1773     BlockDriverState *bs;
1774 
1775     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1776         AioContext *aio_context = bdrv_get_aio_context(bs);
1777 
1778         aio_context_acquire(aio_context);
1779         bdrv_close(bs);
1780         aio_context_release(aio_context);
1781     }
1782 }
1783 
1784 /* make a BlockDriverState anonymous by removing from bdrv_state and
1785  * graph_bdrv_state list.
1786    Also, NULL terminate the device_name to prevent double remove */
1787 void bdrv_make_anon(BlockDriverState *bs)
1788 {
1789     /*
1790      * Take care to remove bs from bdrv_states only when it's actually
1791      * in it.  Note that bs->device_list.tqe_prev is initially null,
1792      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
1793      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1794      * resetting it to null on remove.
1795      */
1796     if (bs->device_list.tqe_prev) {
1797         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1798         bs->device_list.tqe_prev = NULL;
1799     }
1800     if (bs->node_name[0] != '\0') {
1801         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1802     }
1803     bs->node_name[0] = '\0';
1804 }
1805 
1806 static void bdrv_rebind(BlockDriverState *bs)
1807 {
1808     if (bs->drv && bs->drv->bdrv_rebind) {
1809         bs->drv->bdrv_rebind(bs);
1810     }
1811 }
1812 
1813 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1814                                      BlockDriverState *bs_src)
1815 {
1816     /* move some fields that need to stay attached to the device */
1817 
1818     /* dev info */
1819     bs_dest->guest_block_size   = bs_src->guest_block_size;
1820     bs_dest->copy_on_read       = bs_src->copy_on_read;
1821 
1822     bs_dest->enable_write_cache = bs_src->enable_write_cache;
1823 
1824     /* i/o throttled req */
1825     memcpy(&bs_dest->throttle_state,
1826            &bs_src->throttle_state,
1827            sizeof(ThrottleState));
1828     bs_dest->throttled_reqs[0]  = bs_src->throttled_reqs[0];
1829     bs_dest->throttled_reqs[1]  = bs_src->throttled_reqs[1];
1830     bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
1831 
1832     /* r/w error */
1833     bs_dest->on_read_error      = bs_src->on_read_error;
1834     bs_dest->on_write_error     = bs_src->on_write_error;
1835 
1836     /* i/o status */
1837     bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
1838     bs_dest->iostatus           = bs_src->iostatus;
1839 
1840     /* dirty bitmap */
1841     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
1842 
1843     /* reference count */
1844     bs_dest->refcnt             = bs_src->refcnt;
1845 
1846     /* job */
1847     bs_dest->job                = bs_src->job;
1848 
1849     /* keep the same entry in bdrv_states */
1850     bs_dest->device_list = bs_src->device_list;
1851     bs_dest->blk = bs_src->blk;
1852 
1853     memcpy(bs_dest->op_blockers, bs_src->op_blockers,
1854            sizeof(bs_dest->op_blockers));
1855 }
1856 
1857 /*
1858  * Swap bs contents for two image chains while they are live,
1859  * while keeping required fields on the BlockDriverState that is
1860  * actually attached to a device.
1861  *
1862  * This will modify the BlockDriverState fields, and swap contents
1863  * between bs_new and bs_old. Both bs_new and bs_old are modified.
1864  *
1865  * bs_new must not be attached to a BlockBackend.
1866  *
1867  * This function does not create any image files.
1868  */
1869 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1870 {
1871     BlockDriverState tmp;
1872 
1873     /* The code needs to swap the node_name but simply swapping node_list won't
1874      * work so first remove the nodes from the graph list, do the swap then
1875      * insert them back if needed.
1876      */
1877     if (bs_new->node_name[0] != '\0') {
1878         QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
1879     }
1880     if (bs_old->node_name[0] != '\0') {
1881         QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
1882     }
1883 
1884     /* bs_new must be unattached and shouldn't have anything fancy enabled */
1885     assert(!bs_new->blk);
1886     assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
1887     assert(bs_new->job == NULL);
1888     assert(bs_new->io_limits_enabled == false);
1889     assert(!throttle_have_timer(&bs_new->throttle_state));
1890 
1891     tmp = *bs_new;
1892     *bs_new = *bs_old;
1893     *bs_old = tmp;
1894 
1895     /* there are some fields that should not be swapped, move them back */
1896     bdrv_move_feature_fields(&tmp, bs_old);
1897     bdrv_move_feature_fields(bs_old, bs_new);
1898     bdrv_move_feature_fields(bs_new, &tmp);
1899 
1900     /* bs_new must remain unattached */
1901     assert(!bs_new->blk);
1902 
1903     /* Check a few fields that should remain attached to the device */
1904     assert(bs_new->job == NULL);
1905     assert(bs_new->io_limits_enabled == false);
1906     assert(!throttle_have_timer(&bs_new->throttle_state));
1907 
1908     /* insert the nodes back into the graph node list if needed */
1909     if (bs_new->node_name[0] != '\0') {
1910         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
1911     }
1912     if (bs_old->node_name[0] != '\0') {
1913         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
1914     }
1915 
1916     bdrv_rebind(bs_new);
1917     bdrv_rebind(bs_old);
1918 }
1919 
1920 /*
1921  * Add new bs contents at the top of an image chain while the chain is
1922  * live, while keeping required fields on the top layer.
1923  *
1924  * This will modify the BlockDriverState fields, and swap contents
1925  * between bs_new and bs_top. Both bs_new and bs_top are modified.
1926  *
1927  * bs_new must not be attached to a BlockBackend.
1928  *
1929  * This function does not create any image files.
1930  */
1931 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1932 {
1933     bdrv_swap(bs_new, bs_top);
1934 
1935     /* The contents of 'tmp' will become bs_top, as we are
1936      * swapping bs_new and bs_top contents. */
1937     bdrv_set_backing_hd(bs_top, bs_new);
1938 }
1939 
1940 static void bdrv_delete(BlockDriverState *bs)
1941 {
1942     assert(!bs->job);
1943     assert(bdrv_op_blocker_is_empty(bs));
1944     assert(!bs->refcnt);
1945     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
1946 
1947     bdrv_close(bs);
1948 
1949     /* remove from list, if necessary */
1950     bdrv_make_anon(bs);
1951 
1952     g_free(bs);
1953 }
1954 
1955 /*
1956  * Run consistency checks on an image
1957  *
1958  * Returns 0 if the check could be completed (it doesn't mean that the image is
1959  * free of errors) or -errno when an internal error occurred. The results of the
1960  * check are stored in res.
1961  */
1962 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
1963 {
1964     if (bs->drv == NULL) {
1965         return -ENOMEDIUM;
1966     }
1967     if (bs->drv->bdrv_check == NULL) {
1968         return -ENOTSUP;
1969     }
1970 
1971     memset(res, 0, sizeof(*res));
1972     return bs->drv->bdrv_check(bs, res, fix);
1973 }
1974 
1975 #define COMMIT_BUF_SECTORS 2048
1976 
1977 /* commit COW file into the raw image */
1978 int bdrv_commit(BlockDriverState *bs)
1979 {
1980     BlockDriver *drv = bs->drv;
1981     int64_t sector, total_sectors, length, backing_length;
1982     int n, ro, open_flags;
1983     int ret = 0;
1984     uint8_t *buf = NULL;
1985 
1986     if (!drv)
1987         return -ENOMEDIUM;
1988 
1989     if (!bs->backing_hd) {
1990         return -ENOTSUP;
1991     }
1992 
1993     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
1994         bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
1995         return -EBUSY;
1996     }
1997 
1998     ro = bs->backing_hd->read_only;
1999     open_flags =  bs->backing_hd->open_flags;
2000 
2001     if (ro) {
2002         if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2003             return -EACCES;
2004         }
2005     }
2006 
2007     length = bdrv_getlength(bs);
2008     if (length < 0) {
2009         ret = length;
2010         goto ro_cleanup;
2011     }
2012 
2013     backing_length = bdrv_getlength(bs->backing_hd);
2014     if (backing_length < 0) {
2015         ret = backing_length;
2016         goto ro_cleanup;
2017     }
2018 
2019     /* If our top snapshot is larger than the backing file image,
2020      * grow the backing file image if possible.  If not possible,
2021      * we must return an error */
2022     if (length > backing_length) {
2023         ret = bdrv_truncate(bs->backing_hd, length);
2024         if (ret < 0) {
2025             goto ro_cleanup;
2026         }
2027     }
2028 
2029     total_sectors = length >> BDRV_SECTOR_BITS;
2030 
2031     /* qemu_try_blockalign() for bs will choose an alignment that works for
2032      * bs->backing_hd as well, so no need to compare the alignment manually. */
2033     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2034     if (buf == NULL) {
2035         ret = -ENOMEM;
2036         goto ro_cleanup;
2037     }
2038 
2039     for (sector = 0; sector < total_sectors; sector += n) {
2040         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2041         if (ret < 0) {
2042             goto ro_cleanup;
2043         }
2044         if (ret) {
2045             ret = bdrv_read(bs, sector, buf, n);
2046             if (ret < 0) {
2047                 goto ro_cleanup;
2048             }
2049 
2050             ret = bdrv_write(bs->backing_hd, sector, buf, n);
2051             if (ret < 0) {
2052                 goto ro_cleanup;
2053             }
2054         }
2055     }
2056 
2057     if (drv->bdrv_make_empty) {
2058         ret = drv->bdrv_make_empty(bs);
2059         if (ret < 0) {
2060             goto ro_cleanup;
2061         }
2062         bdrv_flush(bs);
2063     }
2064 
2065     /*
2066      * Make sure all data we wrote to the backing device is actually
2067      * stable on disk.
2068      */
2069     if (bs->backing_hd) {
2070         bdrv_flush(bs->backing_hd);
2071     }
2072 
2073     ret = 0;
2074 ro_cleanup:
2075     qemu_vfree(buf);
2076 
2077     if (ro) {
2078         /* ignoring error return here */
2079         bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2080     }
2081 
2082     return ret;
2083 }
2084 
2085 int bdrv_commit_all(void)
2086 {
2087     BlockDriverState *bs;
2088 
2089     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2090         AioContext *aio_context = bdrv_get_aio_context(bs);
2091 
2092         aio_context_acquire(aio_context);
2093         if (bs->drv && bs->backing_hd) {
2094             int ret = bdrv_commit(bs);
2095             if (ret < 0) {
2096                 aio_context_release(aio_context);
2097                 return ret;
2098             }
2099         }
2100         aio_context_release(aio_context);
2101     }
2102     return 0;
2103 }
2104 
2105 /*
2106  * Return values:
2107  * 0        - success
2108  * -EINVAL  - backing format specified, but no file
2109  * -ENOSPC  - can't update the backing file because no space is left in the
2110  *            image file header
2111  * -ENOTSUP - format driver doesn't support changing the backing file
2112  */
2113 int bdrv_change_backing_file(BlockDriverState *bs,
2114     const char *backing_file, const char *backing_fmt)
2115 {
2116     BlockDriver *drv = bs->drv;
2117     int ret;
2118 
2119     /* Backing file format doesn't make sense without a backing file */
2120     if (backing_fmt && !backing_file) {
2121         return -EINVAL;
2122     }
2123 
2124     if (drv->bdrv_change_backing_file != NULL) {
2125         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2126     } else {
2127         ret = -ENOTSUP;
2128     }
2129 
2130     if (ret == 0) {
2131         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2132         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2133     }
2134     return ret;
2135 }
2136 
2137 /*
2138  * Finds the image layer in the chain that has 'bs' as its backing file.
2139  *
2140  * active is the current topmost image.
2141  *
2142  * Returns NULL if bs is not found in active's image chain,
2143  * or if active == bs.
2144  *
2145  * Returns the bottommost base image if bs == NULL.
2146  */
2147 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2148                                     BlockDriverState *bs)
2149 {
2150     while (active && bs != active->backing_hd) {
2151         active = active->backing_hd;
2152     }
2153 
2154     return active;
2155 }
2156 
2157 /* Given a BDS, searches for the base layer. */
2158 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2159 {
2160     return bdrv_find_overlay(bs, NULL);
2161 }
2162 
2163 typedef struct BlkIntermediateStates {
2164     BlockDriverState *bs;
2165     QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2166 } BlkIntermediateStates;
2167 
2168 
2169 /*
2170  * Drops images above 'base' up to and including 'top', and sets the image
2171  * above 'top' to have base as its backing file.
2172  *
2173  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2174  * information in 'bs' can be properly updated.
2175  *
2176  * E.g., this will convert the following chain:
2177  * bottom <- base <- intermediate <- top <- active
2178  *
2179  * to
2180  *
2181  * bottom <- base <- active
2182  *
2183  * It is allowed for bottom==base, in which case it converts:
2184  *
2185  * base <- intermediate <- top <- active
2186  *
2187  * to
2188  *
2189  * base <- active
2190  *
2191  * If backing_file_str is non-NULL, it will be used when modifying top's
2192  * overlay image metadata.
2193  *
2194  * Error conditions:
2195  *  if active == top, that is considered an error
2196  *
2197  */
2198 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2199                            BlockDriverState *base, const char *backing_file_str)
2200 {
2201     BlockDriverState *intermediate;
2202     BlockDriverState *base_bs = NULL;
2203     BlockDriverState *new_top_bs = NULL;
2204     BlkIntermediateStates *intermediate_state, *next;
2205     int ret = -EIO;
2206 
2207     QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2208     QSIMPLEQ_INIT(&states_to_delete);
2209 
2210     if (!top->drv || !base->drv) {
2211         goto exit;
2212     }
2213 
2214     new_top_bs = bdrv_find_overlay(active, top);
2215 
2216     if (new_top_bs == NULL) {
2217         /* we could not find the image above 'top', this is an error */
2218         goto exit;
2219     }
2220 
2221     /* special case of new_top_bs->backing_hd already pointing to base - nothing
2222      * to do, no intermediate images */
2223     if (new_top_bs->backing_hd == base) {
2224         ret = 0;
2225         goto exit;
2226     }
2227 
2228     intermediate = top;
2229 
2230     /* now we will go down through the list, and add each BDS we find
2231      * into our deletion queue, until we hit the 'base'
2232      */
2233     while (intermediate) {
2234         intermediate_state = g_new0(BlkIntermediateStates, 1);
2235         intermediate_state->bs = intermediate;
2236         QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2237 
2238         if (intermediate->backing_hd == base) {
2239             base_bs = intermediate->backing_hd;
2240             break;
2241         }
2242         intermediate = intermediate->backing_hd;
2243     }
2244     if (base_bs == NULL) {
2245         /* something went wrong, we did not end at the base. safely
2246          * unravel everything, and exit with error */
2247         goto exit;
2248     }
2249 
2250     /* success - we can delete the intermediate states, and link top->base */
2251     backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2252     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2253                                    base_bs->drv ? base_bs->drv->format_name : "");
2254     if (ret) {
2255         goto exit;
2256     }
2257     bdrv_set_backing_hd(new_top_bs, base_bs);
2258 
2259     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2260         /* so that bdrv_close() does not recursively close the chain */
2261         bdrv_set_backing_hd(intermediate_state->bs, NULL);
2262         bdrv_unref(intermediate_state->bs);
2263     }
2264     ret = 0;
2265 
2266 exit:
2267     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2268         g_free(intermediate_state);
2269     }
2270     return ret;
2271 }
2272 
2273 /**
2274  * Truncate file to 'offset' bytes (needed only for file protocols)
2275  */
2276 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2277 {
2278     BlockDriver *drv = bs->drv;
2279     int ret;
2280     if (!drv)
2281         return -ENOMEDIUM;
2282     if (!drv->bdrv_truncate)
2283         return -ENOTSUP;
2284     if (bs->read_only)
2285         return -EACCES;
2286 
2287     ret = drv->bdrv_truncate(bs, offset);
2288     if (ret == 0) {
2289         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2290         bdrv_dirty_bitmap_truncate(bs);
2291         if (bs->blk) {
2292             blk_dev_resize_cb(bs->blk);
2293         }
2294     }
2295     return ret;
2296 }
2297 
2298 /**
2299  * Length of a allocated file in bytes. Sparse files are counted by actual
2300  * allocated space. Return < 0 if error or unknown.
2301  */
2302 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2303 {
2304     BlockDriver *drv = bs->drv;
2305     if (!drv) {
2306         return -ENOMEDIUM;
2307     }
2308     if (drv->bdrv_get_allocated_file_size) {
2309         return drv->bdrv_get_allocated_file_size(bs);
2310     }
2311     if (bs->file) {
2312         return bdrv_get_allocated_file_size(bs->file);
2313     }
2314     return -ENOTSUP;
2315 }
2316 
2317 /**
2318  * Return number of sectors on success, -errno on error.
2319  */
2320 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2321 {
2322     BlockDriver *drv = bs->drv;
2323 
2324     if (!drv)
2325         return -ENOMEDIUM;
2326 
2327     if (drv->has_variable_length) {
2328         int ret = refresh_total_sectors(bs, bs->total_sectors);
2329         if (ret < 0) {
2330             return ret;
2331         }
2332     }
2333     return bs->total_sectors;
2334 }
2335 
2336 /**
2337  * Return length in bytes on success, -errno on error.
2338  * The length is always a multiple of BDRV_SECTOR_SIZE.
2339  */
2340 int64_t bdrv_getlength(BlockDriverState *bs)
2341 {
2342     int64_t ret = bdrv_nb_sectors(bs);
2343 
2344     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2345     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2346 }
2347 
2348 /* return 0 as number of sectors if no device present or error */
2349 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2350 {
2351     int64_t nb_sectors = bdrv_nb_sectors(bs);
2352 
2353     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2354 }
2355 
2356 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2357                        BlockdevOnError on_write_error)
2358 {
2359     bs->on_read_error = on_read_error;
2360     bs->on_write_error = on_write_error;
2361 }
2362 
2363 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2364 {
2365     return is_read ? bs->on_read_error : bs->on_write_error;
2366 }
2367 
2368 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2369 {
2370     BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2371 
2372     switch (on_err) {
2373     case BLOCKDEV_ON_ERROR_ENOSPC:
2374         return (error == ENOSPC) ?
2375                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2376     case BLOCKDEV_ON_ERROR_STOP:
2377         return BLOCK_ERROR_ACTION_STOP;
2378     case BLOCKDEV_ON_ERROR_REPORT:
2379         return BLOCK_ERROR_ACTION_REPORT;
2380     case BLOCKDEV_ON_ERROR_IGNORE:
2381         return BLOCK_ERROR_ACTION_IGNORE;
2382     default:
2383         abort();
2384     }
2385 }
2386 
2387 static void send_qmp_error_event(BlockDriverState *bs,
2388                                  BlockErrorAction action,
2389                                  bool is_read, int error)
2390 {
2391     IoOperationType optype;
2392 
2393     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2394     qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2395                                    bdrv_iostatus_is_enabled(bs),
2396                                    error == ENOSPC, strerror(error),
2397                                    &error_abort);
2398 }
2399 
2400 /* This is done by device models because, while the block layer knows
2401  * about the error, it does not know whether an operation comes from
2402  * the device or the block layer (from a job, for example).
2403  */
2404 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2405                        bool is_read, int error)
2406 {
2407     assert(error >= 0);
2408 
2409     if (action == BLOCK_ERROR_ACTION_STOP) {
2410         /* First set the iostatus, so that "info block" returns an iostatus
2411          * that matches the events raised so far (an additional error iostatus
2412          * is fine, but not a lost one).
2413          */
2414         bdrv_iostatus_set_err(bs, error);
2415 
2416         /* Then raise the request to stop the VM and the event.
2417          * qemu_system_vmstop_request_prepare has two effects.  First,
2418          * it ensures that the STOP event always comes after the
2419          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
2420          * can observe the STOP event and do a "cont" before the STOP
2421          * event is issued, the VM will not stop.  In this case, vm_start()
2422          * also ensures that the STOP/RESUME pair of events is emitted.
2423          */
2424         qemu_system_vmstop_request_prepare();
2425         send_qmp_error_event(bs, action, is_read, error);
2426         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2427     } else {
2428         send_qmp_error_event(bs, action, is_read, error);
2429     }
2430 }
2431 
2432 int bdrv_is_read_only(BlockDriverState *bs)
2433 {
2434     return bs->read_only;
2435 }
2436 
2437 int bdrv_is_sg(BlockDriverState *bs)
2438 {
2439     return bs->sg;
2440 }
2441 
2442 int bdrv_enable_write_cache(BlockDriverState *bs)
2443 {
2444     return bs->enable_write_cache;
2445 }
2446 
2447 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2448 {
2449     bs->enable_write_cache = wce;
2450 
2451     /* so a reopen() will preserve wce */
2452     if (wce) {
2453         bs->open_flags |= BDRV_O_CACHE_WB;
2454     } else {
2455         bs->open_flags &= ~BDRV_O_CACHE_WB;
2456     }
2457 }
2458 
2459 int bdrv_is_encrypted(BlockDriverState *bs)
2460 {
2461     if (bs->backing_hd && bs->backing_hd->encrypted)
2462         return 1;
2463     return bs->encrypted;
2464 }
2465 
2466 int bdrv_key_required(BlockDriverState *bs)
2467 {
2468     BlockDriverState *backing_hd = bs->backing_hd;
2469 
2470     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2471         return 1;
2472     return (bs->encrypted && !bs->valid_key);
2473 }
2474 
2475 int bdrv_set_key(BlockDriverState *bs, const char *key)
2476 {
2477     int ret;
2478     if (bs->backing_hd && bs->backing_hd->encrypted) {
2479         ret = bdrv_set_key(bs->backing_hd, key);
2480         if (ret < 0)
2481             return ret;
2482         if (!bs->encrypted)
2483             return 0;
2484     }
2485     if (!bs->encrypted) {
2486         return -EINVAL;
2487     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2488         return -ENOMEDIUM;
2489     }
2490     ret = bs->drv->bdrv_set_key(bs, key);
2491     if (ret < 0) {
2492         bs->valid_key = 0;
2493     } else if (!bs->valid_key) {
2494         bs->valid_key = 1;
2495         if (bs->blk) {
2496             /* call the change callback now, we skipped it on open */
2497             blk_dev_change_media_cb(bs->blk, true);
2498         }
2499     }
2500     return ret;
2501 }
2502 
2503 /*
2504  * Provide an encryption key for @bs.
2505  * If @key is non-null:
2506  *     If @bs is not encrypted, fail.
2507  *     Else if the key is invalid, fail.
2508  *     Else set @bs's key to @key, replacing the existing key, if any.
2509  * If @key is null:
2510  *     If @bs is encrypted and still lacks a key, fail.
2511  *     Else do nothing.
2512  * On failure, store an error object through @errp if non-null.
2513  */
2514 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2515 {
2516     if (key) {
2517         if (!bdrv_is_encrypted(bs)) {
2518             error_setg(errp, "Node '%s' is not encrypted",
2519                       bdrv_get_device_or_node_name(bs));
2520         } else if (bdrv_set_key(bs, key) < 0) {
2521             error_set(errp, QERR_INVALID_PASSWORD);
2522         }
2523     } else {
2524         if (bdrv_key_required(bs)) {
2525             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2526                       "'%s' (%s) is encrypted",
2527                       bdrv_get_device_or_node_name(bs),
2528                       bdrv_get_encrypted_filename(bs));
2529         }
2530     }
2531 }
2532 
2533 const char *bdrv_get_format_name(BlockDriverState *bs)
2534 {
2535     return bs->drv ? bs->drv->format_name : NULL;
2536 }
2537 
2538 static int qsort_strcmp(const void *a, const void *b)
2539 {
2540     return strcmp(a, b);
2541 }
2542 
2543 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2544                          void *opaque)
2545 {
2546     BlockDriver *drv;
2547     int count = 0;
2548     int i;
2549     const char **formats = NULL;
2550 
2551     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2552         if (drv->format_name) {
2553             bool found = false;
2554             int i = count;
2555             while (formats && i && !found) {
2556                 found = !strcmp(formats[--i], drv->format_name);
2557             }
2558 
2559             if (!found) {
2560                 formats = g_renew(const char *, formats, count + 1);
2561                 formats[count++] = drv->format_name;
2562             }
2563         }
2564     }
2565 
2566     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2567 
2568     for (i = 0; i < count; i++) {
2569         it(opaque, formats[i]);
2570     }
2571 
2572     g_free(formats);
2573 }
2574 
2575 /* This function is to find a node in the bs graph */
2576 BlockDriverState *bdrv_find_node(const char *node_name)
2577 {
2578     BlockDriverState *bs;
2579 
2580     assert(node_name);
2581 
2582     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2583         if (!strcmp(node_name, bs->node_name)) {
2584             return bs;
2585         }
2586     }
2587     return NULL;
2588 }
2589 
2590 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2591 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2592 {
2593     BlockDeviceInfoList *list, *entry;
2594     BlockDriverState *bs;
2595 
2596     list = NULL;
2597     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2598         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2599         if (!info) {
2600             qapi_free_BlockDeviceInfoList(list);
2601             return NULL;
2602         }
2603         entry = g_malloc0(sizeof(*entry));
2604         entry->value = info;
2605         entry->next = list;
2606         list = entry;
2607     }
2608 
2609     return list;
2610 }
2611 
2612 BlockDriverState *bdrv_lookup_bs(const char *device,
2613                                  const char *node_name,
2614                                  Error **errp)
2615 {
2616     BlockBackend *blk;
2617     BlockDriverState *bs;
2618 
2619     if (device) {
2620         blk = blk_by_name(device);
2621 
2622         if (blk) {
2623             return blk_bs(blk);
2624         }
2625     }
2626 
2627     if (node_name) {
2628         bs = bdrv_find_node(node_name);
2629 
2630         if (bs) {
2631             return bs;
2632         }
2633     }
2634 
2635     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2636                      device ? device : "",
2637                      node_name ? node_name : "");
2638     return NULL;
2639 }
2640 
2641 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2642  * return false.  If either argument is NULL, return false. */
2643 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2644 {
2645     while (top && top != base) {
2646         top = top->backing_hd;
2647     }
2648 
2649     return top != NULL;
2650 }
2651 
2652 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2653 {
2654     if (!bs) {
2655         return QTAILQ_FIRST(&graph_bdrv_states);
2656     }
2657     return QTAILQ_NEXT(bs, node_list);
2658 }
2659 
2660 BlockDriverState *bdrv_next(BlockDriverState *bs)
2661 {
2662     if (!bs) {
2663         return QTAILQ_FIRST(&bdrv_states);
2664     }
2665     return QTAILQ_NEXT(bs, device_list);
2666 }
2667 
2668 const char *bdrv_get_node_name(const BlockDriverState *bs)
2669 {
2670     return bs->node_name;
2671 }
2672 
2673 /* TODO check what callers really want: bs->node_name or blk_name() */
2674 const char *bdrv_get_device_name(const BlockDriverState *bs)
2675 {
2676     return bs->blk ? blk_name(bs->blk) : "";
2677 }
2678 
2679 /* This can be used to identify nodes that might not have a device
2680  * name associated. Since node and device names live in the same
2681  * namespace, the result is unambiguous. The exception is if both are
2682  * absent, then this returns an empty (non-null) string. */
2683 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2684 {
2685     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2686 }
2687 
2688 int bdrv_get_flags(BlockDriverState *bs)
2689 {
2690     return bs->open_flags;
2691 }
2692 
2693 int bdrv_has_zero_init_1(BlockDriverState *bs)
2694 {
2695     return 1;
2696 }
2697 
2698 int bdrv_has_zero_init(BlockDriverState *bs)
2699 {
2700     assert(bs->drv);
2701 
2702     /* If BS is a copy on write image, it is initialized to
2703        the contents of the base image, which may not be zeroes.  */
2704     if (bs->backing_hd) {
2705         return 0;
2706     }
2707     if (bs->drv->bdrv_has_zero_init) {
2708         return bs->drv->bdrv_has_zero_init(bs);
2709     }
2710 
2711     /* safe default */
2712     return 0;
2713 }
2714 
2715 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2716 {
2717     BlockDriverInfo bdi;
2718 
2719     if (bs->backing_hd) {
2720         return false;
2721     }
2722 
2723     if (bdrv_get_info(bs, &bdi) == 0) {
2724         return bdi.unallocated_blocks_are_zero;
2725     }
2726 
2727     return false;
2728 }
2729 
2730 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2731 {
2732     BlockDriverInfo bdi;
2733 
2734     if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
2735         return false;
2736     }
2737 
2738     if (bdrv_get_info(bs, &bdi) == 0) {
2739         return bdi.can_write_zeroes_with_unmap;
2740     }
2741 
2742     return false;
2743 }
2744 
2745 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2746 {
2747     if (bs->backing_hd && bs->backing_hd->encrypted)
2748         return bs->backing_file;
2749     else if (bs->encrypted)
2750         return bs->filename;
2751     else
2752         return NULL;
2753 }
2754 
2755 void bdrv_get_backing_filename(BlockDriverState *bs,
2756                                char *filename, int filename_size)
2757 {
2758     pstrcpy(filename, filename_size, bs->backing_file);
2759 }
2760 
2761 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2762 {
2763     BlockDriver *drv = bs->drv;
2764     if (!drv)
2765         return -ENOMEDIUM;
2766     if (!drv->bdrv_get_info)
2767         return -ENOTSUP;
2768     memset(bdi, 0, sizeof(*bdi));
2769     return drv->bdrv_get_info(bs, bdi);
2770 }
2771 
2772 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2773 {
2774     BlockDriver *drv = bs->drv;
2775     if (drv && drv->bdrv_get_specific_info) {
2776         return drv->bdrv_get_specific_info(bs);
2777     }
2778     return NULL;
2779 }
2780 
2781 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2782 {
2783     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2784         return;
2785     }
2786 
2787     bs->drv->bdrv_debug_event(bs, event);
2788 }
2789 
2790 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2791                           const char *tag)
2792 {
2793     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2794         bs = bs->file;
2795     }
2796 
2797     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2798         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2799     }
2800 
2801     return -ENOTSUP;
2802 }
2803 
2804 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2805 {
2806     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2807         bs = bs->file;
2808     }
2809 
2810     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2811         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2812     }
2813 
2814     return -ENOTSUP;
2815 }
2816 
2817 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2818 {
2819     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2820         bs = bs->file;
2821     }
2822 
2823     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2824         return bs->drv->bdrv_debug_resume(bs, tag);
2825     }
2826 
2827     return -ENOTSUP;
2828 }
2829 
2830 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2831 {
2832     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2833         bs = bs->file;
2834     }
2835 
2836     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2837         return bs->drv->bdrv_debug_is_suspended(bs, tag);
2838     }
2839 
2840     return false;
2841 }
2842 
2843 int bdrv_is_snapshot(BlockDriverState *bs)
2844 {
2845     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2846 }
2847 
2848 /* backing_file can either be relative, or absolute, or a protocol.  If it is
2849  * relative, it must be relative to the chain.  So, passing in bs->filename
2850  * from a BDS as backing_file should not be done, as that may be relative to
2851  * the CWD rather than the chain. */
2852 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2853         const char *backing_file)
2854 {
2855     char *filename_full = NULL;
2856     char *backing_file_full = NULL;
2857     char *filename_tmp = NULL;
2858     int is_protocol = 0;
2859     BlockDriverState *curr_bs = NULL;
2860     BlockDriverState *retval = NULL;
2861 
2862     if (!bs || !bs->drv || !backing_file) {
2863         return NULL;
2864     }
2865 
2866     filename_full     = g_malloc(PATH_MAX);
2867     backing_file_full = g_malloc(PATH_MAX);
2868     filename_tmp      = g_malloc(PATH_MAX);
2869 
2870     is_protocol = path_has_protocol(backing_file);
2871 
2872     for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
2873 
2874         /* If either of the filename paths is actually a protocol, then
2875          * compare unmodified paths; otherwise make paths relative */
2876         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
2877             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
2878                 retval = curr_bs->backing_hd;
2879                 break;
2880             }
2881         } else {
2882             /* If not an absolute filename path, make it relative to the current
2883              * image's filename path */
2884             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2885                          backing_file);
2886 
2887             /* We are going to compare absolute pathnames */
2888             if (!realpath(filename_tmp, filename_full)) {
2889                 continue;
2890             }
2891 
2892             /* We need to make sure the backing filename we are comparing against
2893              * is relative to the current image filename (or absolute) */
2894             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2895                          curr_bs->backing_file);
2896 
2897             if (!realpath(filename_tmp, backing_file_full)) {
2898                 continue;
2899             }
2900 
2901             if (strcmp(backing_file_full, filename_full) == 0) {
2902                 retval = curr_bs->backing_hd;
2903                 break;
2904             }
2905         }
2906     }
2907 
2908     g_free(filename_full);
2909     g_free(backing_file_full);
2910     g_free(filename_tmp);
2911     return retval;
2912 }
2913 
2914 int bdrv_get_backing_file_depth(BlockDriverState *bs)
2915 {
2916     if (!bs->drv) {
2917         return 0;
2918     }
2919 
2920     if (!bs->backing_hd) {
2921         return 0;
2922     }
2923 
2924     return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
2925 }
2926 
2927 void bdrv_init(void)
2928 {
2929     module_call_init(MODULE_INIT_BLOCK);
2930 }
2931 
2932 void bdrv_init_with_whitelist(void)
2933 {
2934     use_bdrv_whitelist = 1;
2935     bdrv_init();
2936 }
2937 
2938 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
2939 {
2940     Error *local_err = NULL;
2941     int ret;
2942 
2943     if (!bs->drv)  {
2944         return;
2945     }
2946 
2947     if (!(bs->open_flags & BDRV_O_INCOMING)) {
2948         return;
2949     }
2950     bs->open_flags &= ~BDRV_O_INCOMING;
2951 
2952     if (bs->drv->bdrv_invalidate_cache) {
2953         bs->drv->bdrv_invalidate_cache(bs, &local_err);
2954     } else if (bs->file) {
2955         bdrv_invalidate_cache(bs->file, &local_err);
2956     }
2957     if (local_err) {
2958         error_propagate(errp, local_err);
2959         return;
2960     }
2961 
2962     ret = refresh_total_sectors(bs, bs->total_sectors);
2963     if (ret < 0) {
2964         error_setg_errno(errp, -ret, "Could not refresh total sector count");
2965         return;
2966     }
2967 }
2968 
2969 void bdrv_invalidate_cache_all(Error **errp)
2970 {
2971     BlockDriverState *bs;
2972     Error *local_err = NULL;
2973 
2974     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2975         AioContext *aio_context = bdrv_get_aio_context(bs);
2976 
2977         aio_context_acquire(aio_context);
2978         bdrv_invalidate_cache(bs, &local_err);
2979         aio_context_release(aio_context);
2980         if (local_err) {
2981             error_propagate(errp, local_err);
2982             return;
2983         }
2984     }
2985 }
2986 
2987 /**************************************************************/
2988 /* removable device support */
2989 
2990 /**
2991  * Return TRUE if the media is present
2992  */
2993 int bdrv_is_inserted(BlockDriverState *bs)
2994 {
2995     BlockDriver *drv = bs->drv;
2996 
2997     if (!drv)
2998         return 0;
2999     if (!drv->bdrv_is_inserted)
3000         return 1;
3001     return drv->bdrv_is_inserted(bs);
3002 }
3003 
3004 /**
3005  * Return whether the media changed since the last call to this
3006  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3007  */
3008 int bdrv_media_changed(BlockDriverState *bs)
3009 {
3010     BlockDriver *drv = bs->drv;
3011 
3012     if (drv && drv->bdrv_media_changed) {
3013         return drv->bdrv_media_changed(bs);
3014     }
3015     return -ENOTSUP;
3016 }
3017 
3018 /**
3019  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3020  */
3021 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3022 {
3023     BlockDriver *drv = bs->drv;
3024     const char *device_name;
3025 
3026     if (drv && drv->bdrv_eject) {
3027         drv->bdrv_eject(bs, eject_flag);
3028     }
3029 
3030     device_name = bdrv_get_device_name(bs);
3031     if (device_name[0] != '\0') {
3032         qapi_event_send_device_tray_moved(device_name,
3033                                           eject_flag, &error_abort);
3034     }
3035 }
3036 
3037 /**
3038  * Lock or unlock the media (if it is locked, the user won't be able
3039  * to eject it manually).
3040  */
3041 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3042 {
3043     BlockDriver *drv = bs->drv;
3044 
3045     trace_bdrv_lock_medium(bs, locked);
3046 
3047     if (drv && drv->bdrv_lock_medium) {
3048         drv->bdrv_lock_medium(bs, locked);
3049     }
3050 }
3051 
3052 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3053 {
3054     bs->guest_block_size = align;
3055 }
3056 
3057 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3058 {
3059     BdrvDirtyBitmap *bm;
3060 
3061     assert(name);
3062     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3063         if (bm->name && !strcmp(name, bm->name)) {
3064             return bm;
3065         }
3066     }
3067     return NULL;
3068 }
3069 
3070 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3071 {
3072     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3073     g_free(bitmap->name);
3074     bitmap->name = NULL;
3075 }
3076 
3077 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3078                                           uint32_t granularity,
3079                                           const char *name,
3080                                           Error **errp)
3081 {
3082     int64_t bitmap_size;
3083     BdrvDirtyBitmap *bitmap;
3084     uint32_t sector_granularity;
3085 
3086     assert((granularity & (granularity - 1)) == 0);
3087 
3088     if (name && bdrv_find_dirty_bitmap(bs, name)) {
3089         error_setg(errp, "Bitmap already exists: %s", name);
3090         return NULL;
3091     }
3092     sector_granularity = granularity >> BDRV_SECTOR_BITS;
3093     assert(sector_granularity);
3094     bitmap_size = bdrv_nb_sectors(bs);
3095     if (bitmap_size < 0) {
3096         error_setg_errno(errp, -bitmap_size, "could not get length of device");
3097         errno = -bitmap_size;
3098         return NULL;
3099     }
3100     bitmap = g_new0(BdrvDirtyBitmap, 1);
3101     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3102     bitmap->size = bitmap_size;
3103     bitmap->name = g_strdup(name);
3104     bitmap->disabled = false;
3105     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3106     return bitmap;
3107 }
3108 
3109 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3110 {
3111     return bitmap->successor;
3112 }
3113 
3114 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3115 {
3116     return !(bitmap->disabled || bitmap->successor);
3117 }
3118 
3119 /**
3120  * Create a successor bitmap destined to replace this bitmap after an operation.
3121  * Requires that the bitmap is not frozen and has no successor.
3122  */
3123 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3124                                        BdrvDirtyBitmap *bitmap, Error **errp)
3125 {
3126     uint64_t granularity;
3127     BdrvDirtyBitmap *child;
3128 
3129     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3130         error_setg(errp, "Cannot create a successor for a bitmap that is "
3131                    "currently frozen");
3132         return -1;
3133     }
3134     assert(!bitmap->successor);
3135 
3136     /* Create an anonymous successor */
3137     granularity = bdrv_dirty_bitmap_granularity(bitmap);
3138     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3139     if (!child) {
3140         return -1;
3141     }
3142 
3143     /* Successor will be on or off based on our current state. */
3144     child->disabled = bitmap->disabled;
3145 
3146     /* Install the successor and freeze the parent */
3147     bitmap->successor = child;
3148     return 0;
3149 }
3150 
3151 /**
3152  * For a bitmap with a successor, yield our name to the successor,
3153  * delete the old bitmap, and return a handle to the new bitmap.
3154  */
3155 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3156                                             BdrvDirtyBitmap *bitmap,
3157                                             Error **errp)
3158 {
3159     char *name;
3160     BdrvDirtyBitmap *successor = bitmap->successor;
3161 
3162     if (successor == NULL) {
3163         error_setg(errp, "Cannot relinquish control if "
3164                    "there's no successor present");
3165         return NULL;
3166     }
3167 
3168     name = bitmap->name;
3169     bitmap->name = NULL;
3170     successor->name = name;
3171     bitmap->successor = NULL;
3172     bdrv_release_dirty_bitmap(bs, bitmap);
3173 
3174     return successor;
3175 }
3176 
3177 /**
3178  * In cases of failure where we can no longer safely delete the parent,
3179  * we may wish to re-join the parent and child/successor.
3180  * The merged parent will be un-frozen, but not explicitly re-enabled.
3181  */
3182 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3183                                            BdrvDirtyBitmap *parent,
3184                                            Error **errp)
3185 {
3186     BdrvDirtyBitmap *successor = parent->successor;
3187 
3188     if (!successor) {
3189         error_setg(errp, "Cannot reclaim a successor when none is present");
3190         return NULL;
3191     }
3192 
3193     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3194         error_setg(errp, "Merging of parent and successor bitmap failed");
3195         return NULL;
3196     }
3197     bdrv_release_dirty_bitmap(bs, successor);
3198     parent->successor = NULL;
3199 
3200     return parent;
3201 }
3202 
3203 /**
3204  * Truncates _all_ bitmaps attached to a BDS.
3205  */
3206 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3207 {
3208     BdrvDirtyBitmap *bitmap;
3209     uint64_t size = bdrv_nb_sectors(bs);
3210 
3211     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3212         if (bdrv_dirty_bitmap_frozen(bitmap)) {
3213             continue;
3214         }
3215         hbitmap_truncate(bitmap->bitmap, size);
3216     }
3217 }
3218 
3219 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3220 {
3221     BdrvDirtyBitmap *bm, *next;
3222     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3223         if (bm == bitmap) {
3224             assert(!bdrv_dirty_bitmap_frozen(bm));
3225             QLIST_REMOVE(bitmap, list);
3226             hbitmap_free(bitmap->bitmap);
3227             g_free(bitmap->name);
3228             g_free(bitmap);
3229             return;
3230         }
3231     }
3232 }
3233 
3234 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3235 {
3236     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3237     bitmap->disabled = true;
3238 }
3239 
3240 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3241 {
3242     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3243     bitmap->disabled = false;
3244 }
3245 
3246 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3247 {
3248     BdrvDirtyBitmap *bm;
3249     BlockDirtyInfoList *list = NULL;
3250     BlockDirtyInfoList **plist = &list;
3251 
3252     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3253         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3254         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3255         info->count = bdrv_get_dirty_count(bm);
3256         info->granularity = bdrv_dirty_bitmap_granularity(bm);
3257         info->has_name = !!bm->name;
3258         info->name = g_strdup(bm->name);
3259         info->frozen = bdrv_dirty_bitmap_frozen(bm);
3260         entry->value = info;
3261         *plist = entry;
3262         plist = &entry->next;
3263     }
3264 
3265     return list;
3266 }
3267 
3268 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3269 {
3270     if (bitmap) {
3271         return hbitmap_get(bitmap->bitmap, sector);
3272     } else {
3273         return 0;
3274     }
3275 }
3276 
3277 /**
3278  * Chooses a default granularity based on the existing cluster size,
3279  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3280  * is no cluster size information available.
3281  */
3282 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3283 {
3284     BlockDriverInfo bdi;
3285     uint32_t granularity;
3286 
3287     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3288         granularity = MAX(4096, bdi.cluster_size);
3289         granularity = MIN(65536, granularity);
3290     } else {
3291         granularity = 65536;
3292     }
3293 
3294     return granularity;
3295 }
3296 
3297 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3298 {
3299     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3300 }
3301 
3302 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3303 {
3304     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3305 }
3306 
3307 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3308                            int64_t cur_sector, int nr_sectors)
3309 {
3310     assert(bdrv_dirty_bitmap_enabled(bitmap));
3311     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3312 }
3313 
3314 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3315                              int64_t cur_sector, int nr_sectors)
3316 {
3317     assert(bdrv_dirty_bitmap_enabled(bitmap));
3318     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3319 }
3320 
3321 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3322 {
3323     assert(bdrv_dirty_bitmap_enabled(bitmap));
3324     hbitmap_reset(bitmap->bitmap, 0, bitmap->size);
3325 }
3326 
3327 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3328                     int nr_sectors)
3329 {
3330     BdrvDirtyBitmap *bitmap;
3331     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3332         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3333             continue;
3334         }
3335         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3336     }
3337 }
3338 
3339 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3340                       int nr_sectors)
3341 {
3342     BdrvDirtyBitmap *bitmap;
3343     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3344         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3345             continue;
3346         }
3347         hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3348     }
3349 }
3350 
3351 /**
3352  * Advance an HBitmapIter to an arbitrary offset.
3353  */
3354 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3355 {
3356     assert(hbi->hb);
3357     hbitmap_iter_init(hbi, hbi->hb, offset);
3358 }
3359 
3360 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3361 {
3362     return hbitmap_count(bitmap->bitmap);
3363 }
3364 
3365 /* Get a reference to bs */
3366 void bdrv_ref(BlockDriverState *bs)
3367 {
3368     bs->refcnt++;
3369 }
3370 
3371 /* Release a previously grabbed reference to bs.
3372  * If after releasing, reference count is zero, the BlockDriverState is
3373  * deleted. */
3374 void bdrv_unref(BlockDriverState *bs)
3375 {
3376     if (!bs) {
3377         return;
3378     }
3379     assert(bs->refcnt > 0);
3380     if (--bs->refcnt == 0) {
3381         bdrv_delete(bs);
3382     }
3383 }
3384 
3385 struct BdrvOpBlocker {
3386     Error *reason;
3387     QLIST_ENTRY(BdrvOpBlocker) list;
3388 };
3389 
3390 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3391 {
3392     BdrvOpBlocker *blocker;
3393     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3394     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3395         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3396         if (errp) {
3397             error_setg(errp, "Node '%s' is busy: %s",
3398                        bdrv_get_device_or_node_name(bs),
3399                        error_get_pretty(blocker->reason));
3400         }
3401         return true;
3402     }
3403     return false;
3404 }
3405 
3406 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3407 {
3408     BdrvOpBlocker *blocker;
3409     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3410 
3411     blocker = g_new0(BdrvOpBlocker, 1);
3412     blocker->reason = reason;
3413     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3414 }
3415 
3416 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3417 {
3418     BdrvOpBlocker *blocker, *next;
3419     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3420     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3421         if (blocker->reason == reason) {
3422             QLIST_REMOVE(blocker, list);
3423             g_free(blocker);
3424         }
3425     }
3426 }
3427 
3428 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3429 {
3430     int i;
3431     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3432         bdrv_op_block(bs, i, reason);
3433     }
3434 }
3435 
3436 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3437 {
3438     int i;
3439     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3440         bdrv_op_unblock(bs, i, reason);
3441     }
3442 }
3443 
3444 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3445 {
3446     int i;
3447 
3448     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3449         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3450             return false;
3451         }
3452     }
3453     return true;
3454 }
3455 
3456 void bdrv_iostatus_enable(BlockDriverState *bs)
3457 {
3458     bs->iostatus_enabled = true;
3459     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3460 }
3461 
3462 /* The I/O status is only enabled if the drive explicitly
3463  * enables it _and_ the VM is configured to stop on errors */
3464 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3465 {
3466     return (bs->iostatus_enabled &&
3467            (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3468             bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
3469             bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3470 }
3471 
3472 void bdrv_iostatus_disable(BlockDriverState *bs)
3473 {
3474     bs->iostatus_enabled = false;
3475 }
3476 
3477 void bdrv_iostatus_reset(BlockDriverState *bs)
3478 {
3479     if (bdrv_iostatus_is_enabled(bs)) {
3480         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3481         if (bs->job) {
3482             block_job_iostatus_reset(bs->job);
3483         }
3484     }
3485 }
3486 
3487 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3488 {
3489     assert(bdrv_iostatus_is_enabled(bs));
3490     if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3491         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3492                                          BLOCK_DEVICE_IO_STATUS_FAILED;
3493     }
3494 }
3495 
3496 void bdrv_img_create(const char *filename, const char *fmt,
3497                      const char *base_filename, const char *base_fmt,
3498                      char *options, uint64_t img_size, int flags,
3499                      Error **errp, bool quiet)
3500 {
3501     QemuOptsList *create_opts = NULL;
3502     QemuOpts *opts = NULL;
3503     const char *backing_fmt, *backing_file;
3504     int64_t size;
3505     BlockDriver *drv, *proto_drv;
3506     BlockDriver *backing_drv = NULL;
3507     Error *local_err = NULL;
3508     int ret = 0;
3509 
3510     /* Find driver and parse its options */
3511     drv = bdrv_find_format(fmt);
3512     if (!drv) {
3513         error_setg(errp, "Unknown file format '%s'", fmt);
3514         return;
3515     }
3516 
3517     proto_drv = bdrv_find_protocol(filename, true, errp);
3518     if (!proto_drv) {
3519         return;
3520     }
3521 
3522     if (!drv->create_opts) {
3523         error_setg(errp, "Format driver '%s' does not support image creation",
3524                    drv->format_name);
3525         return;
3526     }
3527 
3528     if (!proto_drv->create_opts) {
3529         error_setg(errp, "Protocol driver '%s' does not support image creation",
3530                    proto_drv->format_name);
3531         return;
3532     }
3533 
3534     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3535     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3536 
3537     /* Create parameter list with default values */
3538     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3539     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3540 
3541     /* Parse -o options */
3542     if (options) {
3543         qemu_opts_do_parse(opts, options, NULL, &local_err);
3544         if (local_err) {
3545             error_report_err(local_err);
3546             local_err = NULL;
3547             error_setg(errp, "Invalid options for file format '%s'", fmt);
3548             goto out;
3549         }
3550     }
3551 
3552     if (base_filename) {
3553         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3554         if (local_err) {
3555             error_setg(errp, "Backing file not supported for file format '%s'",
3556                        fmt);
3557             goto out;
3558         }
3559     }
3560 
3561     if (base_fmt) {
3562         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3563         if (local_err) {
3564             error_setg(errp, "Backing file format not supported for file "
3565                              "format '%s'", fmt);
3566             goto out;
3567         }
3568     }
3569 
3570     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3571     if (backing_file) {
3572         if (!strcmp(filename, backing_file)) {
3573             error_setg(errp, "Error: Trying to create an image with the "
3574                              "same filename as the backing file");
3575             goto out;
3576         }
3577     }
3578 
3579     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3580     if (backing_fmt) {
3581         backing_drv = bdrv_find_format(backing_fmt);
3582         if (!backing_drv) {
3583             error_setg(errp, "Unknown backing file format '%s'",
3584                        backing_fmt);
3585             goto out;
3586         }
3587     }
3588 
3589     // The size for the image must always be specified, with one exception:
3590     // If we are using a backing file, we can obtain the size from there
3591     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3592     if (size == -1) {
3593         if (backing_file) {
3594             BlockDriverState *bs;
3595             char *full_backing = g_new0(char, PATH_MAX);
3596             int64_t size;
3597             int back_flags;
3598 
3599             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3600                                                          full_backing, PATH_MAX,
3601                                                          &local_err);
3602             if (local_err) {
3603                 g_free(full_backing);
3604                 goto out;
3605             }
3606 
3607             /* backing files always opened read-only */
3608             back_flags =
3609                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3610 
3611             bs = NULL;
3612             ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
3613                             backing_drv, &local_err);
3614             g_free(full_backing);
3615             if (ret < 0) {
3616                 goto out;
3617             }
3618             size = bdrv_getlength(bs);
3619             if (size < 0) {
3620                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3621                                  backing_file);
3622                 bdrv_unref(bs);
3623                 goto out;
3624             }
3625 
3626             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3627 
3628             bdrv_unref(bs);
3629         } else {
3630             error_setg(errp, "Image creation needs a size parameter");
3631             goto out;
3632         }
3633     }
3634 
3635     if (!quiet) {
3636         printf("Formatting '%s', fmt=%s", filename, fmt);
3637         qemu_opts_print(opts, " ");
3638         puts("");
3639     }
3640 
3641     ret = bdrv_create(drv, filename, opts, &local_err);
3642 
3643     if (ret == -EFBIG) {
3644         /* This is generally a better message than whatever the driver would
3645          * deliver (especially because of the cluster_size_hint), since that
3646          * is most probably not much different from "image too large". */
3647         const char *cluster_size_hint = "";
3648         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3649             cluster_size_hint = " (try using a larger cluster size)";
3650         }
3651         error_setg(errp, "The image size is too large for file format '%s'"
3652                    "%s", fmt, cluster_size_hint);
3653         error_free(local_err);
3654         local_err = NULL;
3655     }
3656 
3657 out:
3658     qemu_opts_del(opts);
3659     qemu_opts_free(create_opts);
3660     if (local_err) {
3661         error_propagate(errp, local_err);
3662     }
3663 }
3664 
3665 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3666 {
3667     return bs->aio_context;
3668 }
3669 
3670 void bdrv_detach_aio_context(BlockDriverState *bs)
3671 {
3672     BdrvAioNotifier *baf;
3673 
3674     if (!bs->drv) {
3675         return;
3676     }
3677 
3678     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3679         baf->detach_aio_context(baf->opaque);
3680     }
3681 
3682     if (bs->io_limits_enabled) {
3683         throttle_detach_aio_context(&bs->throttle_state);
3684     }
3685     if (bs->drv->bdrv_detach_aio_context) {
3686         bs->drv->bdrv_detach_aio_context(bs);
3687     }
3688     if (bs->file) {
3689         bdrv_detach_aio_context(bs->file);
3690     }
3691     if (bs->backing_hd) {
3692         bdrv_detach_aio_context(bs->backing_hd);
3693     }
3694 
3695     bs->aio_context = NULL;
3696 }
3697 
3698 void bdrv_attach_aio_context(BlockDriverState *bs,
3699                              AioContext *new_context)
3700 {
3701     BdrvAioNotifier *ban;
3702 
3703     if (!bs->drv) {
3704         return;
3705     }
3706 
3707     bs->aio_context = new_context;
3708 
3709     if (bs->backing_hd) {
3710         bdrv_attach_aio_context(bs->backing_hd, new_context);
3711     }
3712     if (bs->file) {
3713         bdrv_attach_aio_context(bs->file, new_context);
3714     }
3715     if (bs->drv->bdrv_attach_aio_context) {
3716         bs->drv->bdrv_attach_aio_context(bs, new_context);
3717     }
3718     if (bs->io_limits_enabled) {
3719         throttle_attach_aio_context(&bs->throttle_state, new_context);
3720     }
3721 
3722     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3723         ban->attached_aio_context(new_context, ban->opaque);
3724     }
3725 }
3726 
3727 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3728 {
3729     bdrv_drain_all(); /* ensure there are no in-flight requests */
3730 
3731     bdrv_detach_aio_context(bs);
3732 
3733     /* This function executes in the old AioContext so acquire the new one in
3734      * case it runs in a different thread.
3735      */
3736     aio_context_acquire(new_context);
3737     bdrv_attach_aio_context(bs, new_context);
3738     aio_context_release(new_context);
3739 }
3740 
3741 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3742         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3743         void (*detach_aio_context)(void *opaque), void *opaque)
3744 {
3745     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3746     *ban = (BdrvAioNotifier){
3747         .attached_aio_context = attached_aio_context,
3748         .detach_aio_context   = detach_aio_context,
3749         .opaque               = opaque
3750     };
3751 
3752     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3753 }
3754 
3755 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3756                                       void (*attached_aio_context)(AioContext *,
3757                                                                    void *),
3758                                       void (*detach_aio_context)(void *),
3759                                       void *opaque)
3760 {
3761     BdrvAioNotifier *ban, *ban_next;
3762 
3763     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3764         if (ban->attached_aio_context == attached_aio_context &&
3765             ban->detach_aio_context   == detach_aio_context   &&
3766             ban->opaque               == opaque)
3767         {
3768             QLIST_REMOVE(ban, list);
3769             g_free(ban);
3770 
3771             return;
3772         }
3773     }
3774 
3775     abort();
3776 }
3777 
3778 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3779                        BlockDriverAmendStatusCB *status_cb)
3780 {
3781     if (!bs->drv->bdrv_amend_options) {
3782         return -ENOTSUP;
3783     }
3784     return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3785 }
3786 
3787 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3788  * of block filter and by bdrv_is_first_non_filter.
3789  * It is used to test if the given bs is the candidate or recurse more in the
3790  * node graph.
3791  */
3792 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3793                                       BlockDriverState *candidate)
3794 {
3795     /* return false if basic checks fails */
3796     if (!bs || !bs->drv) {
3797         return false;
3798     }
3799 
3800     /* the code reached a non block filter driver -> check if the bs is
3801      * the same as the candidate. It's the recursion termination condition.
3802      */
3803     if (!bs->drv->is_filter) {
3804         return bs == candidate;
3805     }
3806     /* Down this path the driver is a block filter driver */
3807 
3808     /* If the block filter recursion method is defined use it to recurse down
3809      * the node graph.
3810      */
3811     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3812         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3813     }
3814 
3815     /* the driver is a block filter but don't allow to recurse -> return false
3816      */
3817     return false;
3818 }
3819 
3820 /* This function checks if the candidate is the first non filter bs down it's
3821  * bs chain. Since we don't have pointers to parents it explore all bs chains
3822  * from the top. Some filters can choose not to pass down the recursion.
3823  */
3824 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3825 {
3826     BlockDriverState *bs;
3827 
3828     /* walk down the bs forest recursively */
3829     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3830         bool perm;
3831 
3832         /* try to recurse in this top level bs */
3833         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3834 
3835         /* candidate is the first non filter */
3836         if (perm) {
3837             return true;
3838         }
3839     }
3840 
3841     return false;
3842 }
3843 
3844 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
3845 {
3846     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3847     AioContext *aio_context;
3848 
3849     if (!to_replace_bs) {
3850         error_setg(errp, "Node name '%s' not found", node_name);
3851         return NULL;
3852     }
3853 
3854     aio_context = bdrv_get_aio_context(to_replace_bs);
3855     aio_context_acquire(aio_context);
3856 
3857     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3858         to_replace_bs = NULL;
3859         goto out;
3860     }
3861 
3862     /* We don't want arbitrary node of the BDS chain to be replaced only the top
3863      * most non filter in order to prevent data corruption.
3864      * Another benefit is that this tests exclude backing files which are
3865      * blocked by the backing blockers.
3866      */
3867     if (!bdrv_is_first_non_filter(to_replace_bs)) {
3868         error_setg(errp, "Only top most non filter can be replaced");
3869         to_replace_bs = NULL;
3870         goto out;
3871     }
3872 
3873 out:
3874     aio_context_release(aio_context);
3875     return to_replace_bs;
3876 }
3877 
3878 static bool append_open_options(QDict *d, BlockDriverState *bs)
3879 {
3880     const QDictEntry *entry;
3881     bool found_any = false;
3882 
3883     for (entry = qdict_first(bs->options); entry;
3884          entry = qdict_next(bs->options, entry))
3885     {
3886         /* Only take options for this level and exclude all non-driver-specific
3887          * options */
3888         if (!strchr(qdict_entry_key(entry), '.') &&
3889             strcmp(qdict_entry_key(entry), "node-name"))
3890         {
3891             qobject_incref(qdict_entry_value(entry));
3892             qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3893             found_any = true;
3894         }
3895     }
3896 
3897     return found_any;
3898 }
3899 
3900 /* Updates the following BDS fields:
3901  *  - exact_filename: A filename which may be used for opening a block device
3902  *                    which (mostly) equals the given BDS (even without any
3903  *                    other options; so reading and writing must return the same
3904  *                    results, but caching etc. may be different)
3905  *  - full_open_options: Options which, when given when opening a block device
3906  *                       (without a filename), result in a BDS (mostly)
3907  *                       equalling the given one
3908  *  - filename: If exact_filename is set, it is copied here. Otherwise,
3909  *              full_open_options is converted to a JSON object, prefixed with
3910  *              "json:" (for use through the JSON pseudo protocol) and put here.
3911  */
3912 void bdrv_refresh_filename(BlockDriverState *bs)
3913 {
3914     BlockDriver *drv = bs->drv;
3915     QDict *opts;
3916 
3917     if (!drv) {
3918         return;
3919     }
3920 
3921     /* This BDS's file name will most probably depend on its file's name, so
3922      * refresh that first */
3923     if (bs->file) {
3924         bdrv_refresh_filename(bs->file);
3925     }
3926 
3927     if (drv->bdrv_refresh_filename) {
3928         /* Obsolete information is of no use here, so drop the old file name
3929          * information before refreshing it */
3930         bs->exact_filename[0] = '\0';
3931         if (bs->full_open_options) {
3932             QDECREF(bs->full_open_options);
3933             bs->full_open_options = NULL;
3934         }
3935 
3936         drv->bdrv_refresh_filename(bs);
3937     } else if (bs->file) {
3938         /* Try to reconstruct valid information from the underlying file */
3939         bool has_open_options;
3940 
3941         bs->exact_filename[0] = '\0';
3942         if (bs->full_open_options) {
3943             QDECREF(bs->full_open_options);
3944             bs->full_open_options = NULL;
3945         }
3946 
3947         opts = qdict_new();
3948         has_open_options = append_open_options(opts, bs);
3949 
3950         /* If no specific options have been given for this BDS, the filename of
3951          * the underlying file should suffice for this one as well */
3952         if (bs->file->exact_filename[0] && !has_open_options) {
3953             strcpy(bs->exact_filename, bs->file->exact_filename);
3954         }
3955         /* Reconstructing the full options QDict is simple for most format block
3956          * drivers, as long as the full options are known for the underlying
3957          * file BDS. The full options QDict of that file BDS should somehow
3958          * contain a representation of the filename, therefore the following
3959          * suffices without querying the (exact_)filename of this BDS. */
3960         if (bs->file->full_open_options) {
3961             qdict_put_obj(opts, "driver",
3962                           QOBJECT(qstring_from_str(drv->format_name)));
3963             QINCREF(bs->file->full_open_options);
3964             qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
3965 
3966             bs->full_open_options = opts;
3967         } else {
3968             QDECREF(opts);
3969         }
3970     } else if (!bs->full_open_options && qdict_size(bs->options)) {
3971         /* There is no underlying file BDS (at least referenced by BDS.file),
3972          * so the full options QDict should be equal to the options given
3973          * specifically for this block device when it was opened (plus the
3974          * driver specification).
3975          * Because those options don't change, there is no need to update
3976          * full_open_options when it's already set. */
3977 
3978         opts = qdict_new();
3979         append_open_options(opts, bs);
3980         qdict_put_obj(opts, "driver",
3981                       QOBJECT(qstring_from_str(drv->format_name)));
3982 
3983         if (bs->exact_filename[0]) {
3984             /* This may not work for all block protocol drivers (some may
3985              * require this filename to be parsed), but we have to find some
3986              * default solution here, so just include it. If some block driver
3987              * does not support pure options without any filename at all or
3988              * needs some special format of the options QDict, it needs to
3989              * implement the driver-specific bdrv_refresh_filename() function.
3990              */
3991             qdict_put_obj(opts, "filename",
3992                           QOBJECT(qstring_from_str(bs->exact_filename)));
3993         }
3994 
3995         bs->full_open_options = opts;
3996     }
3997 
3998     if (bs->exact_filename[0]) {
3999         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4000     } else if (bs->full_open_options) {
4001         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4002         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4003                  qstring_get_str(json));
4004         QDECREF(json);
4005     }
4006 }
4007 
4008 /* This accessor function purpose is to allow the device models to access the
4009  * BlockAcctStats structure embedded inside a BlockDriverState without being
4010  * aware of the BlockDriverState structure layout.
4011  * It will go away when the BlockAcctStats structure will be moved inside
4012  * the device models.
4013  */
4014 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
4015 {
4016     return &bs->stats;
4017 }
4018