xref: /openbmc/qemu/block.c (revision 4196d2f0308cb1ae13ed450424ab7dfe154acda9)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qjson.h"
31 #include "sysemu/block-backend.h"
32 #include "sysemu/sysemu.h"
33 #include "qemu/notify.h"
34 #include "block/coroutine.h"
35 #include "block/qapi.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
38 #include "qapi-event.h"
39 
40 #ifdef CONFIG_BSD
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <sys/ioctl.h>
44 #include <sys/queue.h>
45 #ifndef __DragonFly__
46 #include <sys/disk.h>
47 #endif
48 #endif
49 
50 #ifdef _WIN32
51 #include <windows.h>
52 #endif
53 
54 /**
55  * A BdrvDirtyBitmap can be in three possible states:
56  * (1) successor is NULL and disabled is false: full r/w mode
57  * (2) successor is NULL and disabled is true: read only mode ("disabled")
58  * (3) successor is set: frozen mode.
59  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
60  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
61  */
62 struct BdrvDirtyBitmap {
63     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
64     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
65     char *name;                 /* Optional non-empty unique ID */
66     int64_t size;               /* Size of the bitmap (Number of sectors) */
67     bool disabled;              /* Bitmap is read-only */
68     QLIST_ENTRY(BdrvDirtyBitmap) list;
69 };
70 
71 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
72 
73 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
74     QTAILQ_HEAD_INITIALIZER(bdrv_states);
75 
76 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
77     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
78 
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80     QLIST_HEAD_INITIALIZER(bdrv_drivers);
81 
82 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
83 /* If non-zero, use only whitelisted block drivers */
84 static int use_bdrv_whitelist;
85 
86 #ifdef _WIN32
87 static int is_windows_drive_prefix(const char *filename)
88 {
89     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
90              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
91             filename[1] == ':');
92 }
93 
94 int is_windows_drive(const char *filename)
95 {
96     if (is_windows_drive_prefix(filename) &&
97         filename[2] == '\0')
98         return 1;
99     if (strstart(filename, "\\\\.\\", NULL) ||
100         strstart(filename, "//./", NULL))
101         return 1;
102     return 0;
103 }
104 #endif
105 
106 size_t bdrv_opt_mem_align(BlockDriverState *bs)
107 {
108     if (!bs || !bs->drv) {
109         /* 4k should be on the safe side */
110         return 4096;
111     }
112 
113     return bs->bl.opt_mem_alignment;
114 }
115 
116 size_t bdrv_min_mem_align(BlockDriverState *bs)
117 {
118     if (!bs || !bs->drv) {
119         /* 4k should be on the safe side */
120         return 4096;
121     }
122 
123     return bs->bl.min_mem_alignment;
124 }
125 
126 /* check if the path starts with "<protocol>:" */
127 int path_has_protocol(const char *path)
128 {
129     const char *p;
130 
131 #ifdef _WIN32
132     if (is_windows_drive(path) ||
133         is_windows_drive_prefix(path)) {
134         return 0;
135     }
136     p = path + strcspn(path, ":/\\");
137 #else
138     p = path + strcspn(path, ":/");
139 #endif
140 
141     return *p == ':';
142 }
143 
144 int path_is_absolute(const char *path)
145 {
146 #ifdef _WIN32
147     /* specific case for names like: "\\.\d:" */
148     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
149         return 1;
150     }
151     return (*path == '/' || *path == '\\');
152 #else
153     return (*path == '/');
154 #endif
155 }
156 
157 /* if filename is absolute, just copy it to dest. Otherwise, build a
158    path to it by considering it is relative to base_path. URL are
159    supported. */
160 void path_combine(char *dest, int dest_size,
161                   const char *base_path,
162                   const char *filename)
163 {
164     const char *p, *p1;
165     int len;
166 
167     if (dest_size <= 0)
168         return;
169     if (path_is_absolute(filename)) {
170         pstrcpy(dest, dest_size, filename);
171     } else {
172         p = strchr(base_path, ':');
173         if (p)
174             p++;
175         else
176             p = base_path;
177         p1 = strrchr(base_path, '/');
178 #ifdef _WIN32
179         {
180             const char *p2;
181             p2 = strrchr(base_path, '\\');
182             if (!p1 || p2 > p1)
183                 p1 = p2;
184         }
185 #endif
186         if (p1)
187             p1++;
188         else
189             p1 = base_path;
190         if (p1 > p)
191             p = p1;
192         len = p - base_path;
193         if (len > dest_size - 1)
194             len = dest_size - 1;
195         memcpy(dest, base_path, len);
196         dest[len] = '\0';
197         pstrcat(dest, dest_size, filename);
198     }
199 }
200 
201 void bdrv_get_full_backing_filename_from_filename(const char *backed,
202                                                   const char *backing,
203                                                   char *dest, size_t sz,
204                                                   Error **errp)
205 {
206     if (backing[0] == '\0' || path_has_protocol(backing) ||
207         path_is_absolute(backing))
208     {
209         pstrcpy(dest, sz, backing);
210     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
211         error_setg(errp, "Cannot use relative backing file names for '%s'",
212                    backed);
213     } else {
214         path_combine(dest, sz, backed, backing);
215     }
216 }
217 
218 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
219                                     Error **errp)
220 {
221     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
222 
223     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
224                                                  dest, sz, errp);
225 }
226 
227 void bdrv_register(BlockDriver *bdrv)
228 {
229     bdrv_setup_io_funcs(bdrv);
230 
231     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
232 }
233 
234 BlockDriverState *bdrv_new_root(void)
235 {
236     BlockDriverState *bs = bdrv_new();
237 
238     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
239     return bs;
240 }
241 
242 BlockDriverState *bdrv_new(void)
243 {
244     BlockDriverState *bs;
245     int i;
246 
247     bs = g_new0(BlockDriverState, 1);
248     QLIST_INIT(&bs->dirty_bitmaps);
249     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
250         QLIST_INIT(&bs->op_blockers[i]);
251     }
252     bdrv_iostatus_disable(bs);
253     notifier_list_init(&bs->close_notifiers);
254     notifier_with_return_list_init(&bs->before_write_notifiers);
255     qemu_co_queue_init(&bs->throttled_reqs[0]);
256     qemu_co_queue_init(&bs->throttled_reqs[1]);
257     bs->refcnt = 1;
258     bs->aio_context = qemu_get_aio_context();
259 
260     return bs;
261 }
262 
263 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
264 {
265     notifier_list_add(&bs->close_notifiers, notify);
266 }
267 
268 BlockDriver *bdrv_find_format(const char *format_name)
269 {
270     BlockDriver *drv1;
271     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
272         if (!strcmp(drv1->format_name, format_name)) {
273             return drv1;
274         }
275     }
276     return NULL;
277 }
278 
279 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
280 {
281     static const char *whitelist_rw[] = {
282         CONFIG_BDRV_RW_WHITELIST
283     };
284     static const char *whitelist_ro[] = {
285         CONFIG_BDRV_RO_WHITELIST
286     };
287     const char **p;
288 
289     if (!whitelist_rw[0] && !whitelist_ro[0]) {
290         return 1;               /* no whitelist, anything goes */
291     }
292 
293     for (p = whitelist_rw; *p; p++) {
294         if (!strcmp(drv->format_name, *p)) {
295             return 1;
296         }
297     }
298     if (read_only) {
299         for (p = whitelist_ro; *p; p++) {
300             if (!strcmp(drv->format_name, *p)) {
301                 return 1;
302             }
303         }
304     }
305     return 0;
306 }
307 
308 BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
309                                           bool read_only)
310 {
311     BlockDriver *drv = bdrv_find_format(format_name);
312     return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
313 }
314 
315 typedef struct CreateCo {
316     BlockDriver *drv;
317     char *filename;
318     QemuOpts *opts;
319     int ret;
320     Error *err;
321 } CreateCo;
322 
323 static void coroutine_fn bdrv_create_co_entry(void *opaque)
324 {
325     Error *local_err = NULL;
326     int ret;
327 
328     CreateCo *cco = opaque;
329     assert(cco->drv);
330 
331     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
332     if (local_err) {
333         error_propagate(&cco->err, local_err);
334     }
335     cco->ret = ret;
336 }
337 
338 int bdrv_create(BlockDriver *drv, const char* filename,
339                 QemuOpts *opts, Error **errp)
340 {
341     int ret;
342 
343     Coroutine *co;
344     CreateCo cco = {
345         .drv = drv,
346         .filename = g_strdup(filename),
347         .opts = opts,
348         .ret = NOT_DONE,
349         .err = NULL,
350     };
351 
352     if (!drv->bdrv_create) {
353         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
354         ret = -ENOTSUP;
355         goto out;
356     }
357 
358     if (qemu_in_coroutine()) {
359         /* Fast-path if already in coroutine context */
360         bdrv_create_co_entry(&cco);
361     } else {
362         co = qemu_coroutine_create(bdrv_create_co_entry);
363         qemu_coroutine_enter(co, &cco);
364         while (cco.ret == NOT_DONE) {
365             aio_poll(qemu_get_aio_context(), true);
366         }
367     }
368 
369     ret = cco.ret;
370     if (ret < 0) {
371         if (cco.err) {
372             error_propagate(errp, cco.err);
373         } else {
374             error_setg_errno(errp, -ret, "Could not create image");
375         }
376     }
377 
378 out:
379     g_free(cco.filename);
380     return ret;
381 }
382 
383 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
384 {
385     BlockDriver *drv;
386     Error *local_err = NULL;
387     int ret;
388 
389     drv = bdrv_find_protocol(filename, true, errp);
390     if (drv == NULL) {
391         return -ENOENT;
392     }
393 
394     ret = bdrv_create(drv, filename, opts, &local_err);
395     if (local_err) {
396         error_propagate(errp, local_err);
397     }
398     return ret;
399 }
400 
401 /**
402  * Try to get @bs's logical and physical block size.
403  * On success, store them in @bsz struct and return 0.
404  * On failure return -errno.
405  * @bs must not be empty.
406  */
407 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
408 {
409     BlockDriver *drv = bs->drv;
410 
411     if (drv && drv->bdrv_probe_blocksizes) {
412         return drv->bdrv_probe_blocksizes(bs, bsz);
413     }
414 
415     return -ENOTSUP;
416 }
417 
418 /**
419  * Try to get @bs's geometry (cyls, heads, sectors).
420  * On success, store them in @geo struct and return 0.
421  * On failure return -errno.
422  * @bs must not be empty.
423  */
424 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
425 {
426     BlockDriver *drv = bs->drv;
427 
428     if (drv && drv->bdrv_probe_geometry) {
429         return drv->bdrv_probe_geometry(bs, geo);
430     }
431 
432     return -ENOTSUP;
433 }
434 
435 /*
436  * Create a uniquely-named empty temporary file.
437  * Return 0 upon success, otherwise a negative errno value.
438  */
439 int get_tmp_filename(char *filename, int size)
440 {
441 #ifdef _WIN32
442     char temp_dir[MAX_PATH];
443     /* GetTempFileName requires that its output buffer (4th param)
444        have length MAX_PATH or greater.  */
445     assert(size >= MAX_PATH);
446     return (GetTempPath(MAX_PATH, temp_dir)
447             && GetTempFileName(temp_dir, "qem", 0, filename)
448             ? 0 : -GetLastError());
449 #else
450     int fd;
451     const char *tmpdir;
452     tmpdir = getenv("TMPDIR");
453     if (!tmpdir) {
454         tmpdir = "/var/tmp";
455     }
456     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
457         return -EOVERFLOW;
458     }
459     fd = mkstemp(filename);
460     if (fd < 0) {
461         return -errno;
462     }
463     if (close(fd) != 0) {
464         unlink(filename);
465         return -errno;
466     }
467     return 0;
468 #endif
469 }
470 
471 /*
472  * Detect host devices. By convention, /dev/cdrom[N] is always
473  * recognized as a host CDROM.
474  */
475 static BlockDriver *find_hdev_driver(const char *filename)
476 {
477     int score_max = 0, score;
478     BlockDriver *drv = NULL, *d;
479 
480     QLIST_FOREACH(d, &bdrv_drivers, list) {
481         if (d->bdrv_probe_device) {
482             score = d->bdrv_probe_device(filename);
483             if (score > score_max) {
484                 score_max = score;
485                 drv = d;
486             }
487         }
488     }
489 
490     return drv;
491 }
492 
493 BlockDriver *bdrv_find_protocol(const char *filename,
494                                 bool allow_protocol_prefix,
495                                 Error **errp)
496 {
497     BlockDriver *drv1;
498     char protocol[128];
499     int len;
500     const char *p;
501 
502     /* TODO Drivers without bdrv_file_open must be specified explicitly */
503 
504     /*
505      * XXX(hch): we really should not let host device detection
506      * override an explicit protocol specification, but moving this
507      * later breaks access to device names with colons in them.
508      * Thanks to the brain-dead persistent naming schemes on udev-
509      * based Linux systems those actually are quite common.
510      */
511     drv1 = find_hdev_driver(filename);
512     if (drv1) {
513         return drv1;
514     }
515 
516     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
517         return &bdrv_file;
518     }
519 
520     p = strchr(filename, ':');
521     assert(p != NULL);
522     len = p - filename;
523     if (len > sizeof(protocol) - 1)
524         len = sizeof(protocol) - 1;
525     memcpy(protocol, filename, len);
526     protocol[len] = '\0';
527     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
528         if (drv1->protocol_name &&
529             !strcmp(drv1->protocol_name, protocol)) {
530             return drv1;
531         }
532     }
533 
534     error_setg(errp, "Unknown protocol '%s'", protocol);
535     return NULL;
536 }
537 
538 /*
539  * Guess image format by probing its contents.
540  * This is not a good idea when your image is raw (CVE-2008-2004), but
541  * we do it anyway for backward compatibility.
542  *
543  * @buf         contains the image's first @buf_size bytes.
544  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
545  *              but can be smaller if the image file is smaller)
546  * @filename    is its filename.
547  *
548  * For all block drivers, call the bdrv_probe() method to get its
549  * probing score.
550  * Return the first block driver with the highest probing score.
551  */
552 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
553                             const char *filename)
554 {
555     int score_max = 0, score;
556     BlockDriver *drv = NULL, *d;
557 
558     QLIST_FOREACH(d, &bdrv_drivers, list) {
559         if (d->bdrv_probe) {
560             score = d->bdrv_probe(buf, buf_size, filename);
561             if (score > score_max) {
562                 score_max = score;
563                 drv = d;
564             }
565         }
566     }
567 
568     return drv;
569 }
570 
571 static int find_image_format(BlockDriverState *bs, const char *filename,
572                              BlockDriver **pdrv, Error **errp)
573 {
574     BlockDriver *drv;
575     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
576     int ret = 0;
577 
578     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
579     if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
580         *pdrv = &bdrv_raw;
581         return ret;
582     }
583 
584     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
585     if (ret < 0) {
586         error_setg_errno(errp, -ret, "Could not read image for determining its "
587                          "format");
588         *pdrv = NULL;
589         return ret;
590     }
591 
592     drv = bdrv_probe_all(buf, ret, filename);
593     if (!drv) {
594         error_setg(errp, "Could not determine image format: No compatible "
595                    "driver found");
596         ret = -ENOENT;
597     }
598     *pdrv = drv;
599     return ret;
600 }
601 
602 /**
603  * Set the current 'total_sectors' value
604  * Return 0 on success, -errno on error.
605  */
606 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
607 {
608     BlockDriver *drv = bs->drv;
609 
610     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
611     if (bs->sg)
612         return 0;
613 
614     /* query actual device if possible, otherwise just trust the hint */
615     if (drv->bdrv_getlength) {
616         int64_t length = drv->bdrv_getlength(bs);
617         if (length < 0) {
618             return length;
619         }
620         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
621     }
622 
623     bs->total_sectors = hint;
624     return 0;
625 }
626 
627 /**
628  * Set open flags for a given discard mode
629  *
630  * Return 0 on success, -1 if the discard mode was invalid.
631  */
632 int bdrv_parse_discard_flags(const char *mode, int *flags)
633 {
634     *flags &= ~BDRV_O_UNMAP;
635 
636     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
637         /* do nothing */
638     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
639         *flags |= BDRV_O_UNMAP;
640     } else {
641         return -1;
642     }
643 
644     return 0;
645 }
646 
647 /**
648  * Set open flags for a given cache mode
649  *
650  * Return 0 on success, -1 if the cache mode was invalid.
651  */
652 int bdrv_parse_cache_flags(const char *mode, int *flags)
653 {
654     *flags &= ~BDRV_O_CACHE_MASK;
655 
656     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
657         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
658     } else if (!strcmp(mode, "directsync")) {
659         *flags |= BDRV_O_NOCACHE;
660     } else if (!strcmp(mode, "writeback")) {
661         *flags |= BDRV_O_CACHE_WB;
662     } else if (!strcmp(mode, "unsafe")) {
663         *flags |= BDRV_O_CACHE_WB;
664         *flags |= BDRV_O_NO_FLUSH;
665     } else if (!strcmp(mode, "writethrough")) {
666         /* this is the default */
667     } else {
668         return -1;
669     }
670 
671     return 0;
672 }
673 
674 /*
675  * Returns the flags that a temporary snapshot should get, based on the
676  * originally requested flags (the originally requested image will have flags
677  * like a backing file)
678  */
679 static int bdrv_temp_snapshot_flags(int flags)
680 {
681     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
682 }
683 
684 /*
685  * Returns the flags that bs->file should get, based on the given flags for
686  * the parent BDS
687  */
688 static int bdrv_inherited_flags(int flags)
689 {
690     /* Enable protocol handling, disable format probing for bs->file */
691     flags |= BDRV_O_PROTOCOL;
692 
693     /* Our block drivers take care to send flushes and respect unmap policy,
694      * so we can enable both unconditionally on lower layers. */
695     flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
696 
697     /* Clear flags that only apply to the top layer */
698     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
699 
700     return flags;
701 }
702 
703 /*
704  * Returns the flags that bs->backing_hd should get, based on the given flags
705  * for the parent BDS
706  */
707 static int bdrv_backing_flags(int flags)
708 {
709     /* backing files always opened read-only */
710     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
711 
712     /* snapshot=on is handled on the top layer */
713     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
714 
715     return flags;
716 }
717 
718 static int bdrv_open_flags(BlockDriverState *bs, int flags)
719 {
720     int open_flags = flags | BDRV_O_CACHE_WB;
721 
722     /*
723      * Clear flags that are internal to the block layer before opening the
724      * image.
725      */
726     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
727 
728     /*
729      * Snapshots should be writable.
730      */
731     if (flags & BDRV_O_TEMPORARY) {
732         open_flags |= BDRV_O_RDWR;
733     }
734 
735     return open_flags;
736 }
737 
738 static void bdrv_assign_node_name(BlockDriverState *bs,
739                                   const char *node_name,
740                                   Error **errp)
741 {
742     if (!node_name) {
743         return;
744     }
745 
746     /* Check for empty string or invalid characters */
747     if (!id_wellformed(node_name)) {
748         error_setg(errp, "Invalid node name");
749         return;
750     }
751 
752     /* takes care of avoiding namespaces collisions */
753     if (blk_by_name(node_name)) {
754         error_setg(errp, "node-name=%s is conflicting with a device id",
755                    node_name);
756         return;
757     }
758 
759     /* takes care of avoiding duplicates node names */
760     if (bdrv_find_node(node_name)) {
761         error_setg(errp, "Duplicate node name");
762         return;
763     }
764 
765     /* copy node name into the bs and insert it into the graph list */
766     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
767     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
768 }
769 
770 /*
771  * Common part for opening disk images and files
772  *
773  * Removes all processed options from *options.
774  */
775 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
776     QDict *options, int flags, BlockDriver *drv, Error **errp)
777 {
778     int ret, open_flags;
779     const char *filename;
780     const char *node_name = NULL;
781     Error *local_err = NULL;
782 
783     assert(drv != NULL);
784     assert(bs->file == NULL);
785     assert(options != NULL && bs->options != options);
786 
787     if (file != NULL) {
788         filename = file->filename;
789     } else {
790         filename = qdict_get_try_str(options, "filename");
791     }
792 
793     if (drv->bdrv_needs_filename && !filename) {
794         error_setg(errp, "The '%s' block driver requires a file name",
795                    drv->format_name);
796         return -EINVAL;
797     }
798 
799     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
800 
801     node_name = qdict_get_try_str(options, "node-name");
802     bdrv_assign_node_name(bs, node_name, &local_err);
803     if (local_err) {
804         error_propagate(errp, local_err);
805         return -EINVAL;
806     }
807     qdict_del(options, "node-name");
808 
809     /* bdrv_open() with directly using a protocol as drv. This layer is already
810      * opened, so assign it to bs (while file becomes a closed BlockDriverState)
811      * and return immediately. */
812     if (file != NULL && drv->bdrv_file_open) {
813         bdrv_swap(file, bs);
814         return 0;
815     }
816 
817     bs->open_flags = flags;
818     bs->guest_block_size = 512;
819     bs->request_alignment = 512;
820     bs->zero_beyond_eof = true;
821     open_flags = bdrv_open_flags(bs, flags);
822     bs->read_only = !(open_flags & BDRV_O_RDWR);
823 
824     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
825         error_setg(errp,
826                    !bs->read_only && bdrv_is_whitelisted(drv, true)
827                         ? "Driver '%s' can only be used for read-only devices"
828                         : "Driver '%s' is not whitelisted",
829                    drv->format_name);
830         return -ENOTSUP;
831     }
832 
833     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
834     if (flags & BDRV_O_COPY_ON_READ) {
835         if (!bs->read_only) {
836             bdrv_enable_copy_on_read(bs);
837         } else {
838             error_setg(errp, "Can't use copy-on-read on read-only device");
839             return -EINVAL;
840         }
841     }
842 
843     if (filename != NULL) {
844         pstrcpy(bs->filename, sizeof(bs->filename), filename);
845     } else {
846         bs->filename[0] = '\0';
847     }
848     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
849 
850     bs->drv = drv;
851     bs->opaque = g_malloc0(drv->instance_size);
852 
853     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
854 
855     /* Open the image, either directly or using a protocol */
856     if (drv->bdrv_file_open) {
857         assert(file == NULL);
858         assert(!drv->bdrv_needs_filename || filename != NULL);
859         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
860     } else {
861         if (file == NULL) {
862             error_setg(errp, "Can't use '%s' as a block driver for the "
863                        "protocol level", drv->format_name);
864             ret = -EINVAL;
865             goto free_and_fail;
866         }
867         bs->file = file;
868         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
869     }
870 
871     if (ret < 0) {
872         if (local_err) {
873             error_propagate(errp, local_err);
874         } else if (bs->filename[0]) {
875             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
876         } else {
877             error_setg_errno(errp, -ret, "Could not open image");
878         }
879         goto free_and_fail;
880     }
881 
882     if (bs->encrypted) {
883         error_report("Encrypted images are deprecated");
884         error_printf("Support for them will be removed in a future release.\n"
885                      "You can use 'qemu-img convert' to convert your image"
886                      " to an unencrypted one.\n");
887     }
888 
889     ret = refresh_total_sectors(bs, bs->total_sectors);
890     if (ret < 0) {
891         error_setg_errno(errp, -ret, "Could not refresh total sector count");
892         goto free_and_fail;
893     }
894 
895     bdrv_refresh_limits(bs, &local_err);
896     if (local_err) {
897         error_propagate(errp, local_err);
898         ret = -EINVAL;
899         goto free_and_fail;
900     }
901 
902     assert(bdrv_opt_mem_align(bs) != 0);
903     assert(bdrv_min_mem_align(bs) != 0);
904     assert((bs->request_alignment != 0) || bs->sg);
905     return 0;
906 
907 free_and_fail:
908     bs->file = NULL;
909     g_free(bs->opaque);
910     bs->opaque = NULL;
911     bs->drv = NULL;
912     return ret;
913 }
914 
915 static QDict *parse_json_filename(const char *filename, Error **errp)
916 {
917     QObject *options_obj;
918     QDict *options;
919     int ret;
920 
921     ret = strstart(filename, "json:", &filename);
922     assert(ret);
923 
924     options_obj = qobject_from_json(filename);
925     if (!options_obj) {
926         error_setg(errp, "Could not parse the JSON options");
927         return NULL;
928     }
929 
930     if (qobject_type(options_obj) != QTYPE_QDICT) {
931         qobject_decref(options_obj);
932         error_setg(errp, "Invalid JSON object given");
933         return NULL;
934     }
935 
936     options = qobject_to_qdict(options_obj);
937     qdict_flatten(options);
938 
939     return options;
940 }
941 
942 /*
943  * Fills in default options for opening images and converts the legacy
944  * filename/flags pair to option QDict entries.
945  */
946 static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
947                              BlockDriver *drv, Error **errp)
948 {
949     const char *filename = *pfilename;
950     const char *drvname;
951     bool protocol = flags & BDRV_O_PROTOCOL;
952     bool parse_filename = false;
953     Error *local_err = NULL;
954 
955     /* Parse json: pseudo-protocol */
956     if (filename && g_str_has_prefix(filename, "json:")) {
957         QDict *json_options = parse_json_filename(filename, &local_err);
958         if (local_err) {
959             error_propagate(errp, local_err);
960             return -EINVAL;
961         }
962 
963         /* Options given in the filename have lower priority than options
964          * specified directly */
965         qdict_join(*options, json_options, false);
966         QDECREF(json_options);
967         *pfilename = filename = NULL;
968     }
969 
970     /* Fetch the file name from the options QDict if necessary */
971     if (protocol && filename) {
972         if (!qdict_haskey(*options, "filename")) {
973             qdict_put(*options, "filename", qstring_from_str(filename));
974             parse_filename = true;
975         } else {
976             error_setg(errp, "Can't specify 'file' and 'filename' options at "
977                              "the same time");
978             return -EINVAL;
979         }
980     }
981 
982     /* Find the right block driver */
983     filename = qdict_get_try_str(*options, "filename");
984     drvname = qdict_get_try_str(*options, "driver");
985 
986     if (drv) {
987         if (drvname) {
988             error_setg(errp, "Driver specified twice");
989             return -EINVAL;
990         }
991         drvname = drv->format_name;
992         qdict_put(*options, "driver", qstring_from_str(drvname));
993     } else {
994         if (!drvname && protocol) {
995             if (filename) {
996                 drv = bdrv_find_protocol(filename, parse_filename, errp);
997                 if (!drv) {
998                     return -EINVAL;
999                 }
1000 
1001                 drvname = drv->format_name;
1002                 qdict_put(*options, "driver", qstring_from_str(drvname));
1003             } else {
1004                 error_setg(errp, "Must specify either driver or file");
1005                 return -EINVAL;
1006             }
1007         } else if (drvname) {
1008             drv = bdrv_find_format(drvname);
1009             if (!drv) {
1010                 error_setg(errp, "Unknown driver '%s'", drvname);
1011                 return -ENOENT;
1012             }
1013         }
1014     }
1015 
1016     assert(drv || !protocol);
1017 
1018     /* Driver-specific filename parsing */
1019     if (drv && drv->bdrv_parse_filename && parse_filename) {
1020         drv->bdrv_parse_filename(filename, *options, &local_err);
1021         if (local_err) {
1022             error_propagate(errp, local_err);
1023             return -EINVAL;
1024         }
1025 
1026         if (!drv->bdrv_needs_filename) {
1027             qdict_del(*options, "filename");
1028         }
1029     }
1030 
1031     return 0;
1032 }
1033 
1034 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1035 {
1036 
1037     if (bs->backing_hd) {
1038         assert(bs->backing_blocker);
1039         bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1040     } else if (backing_hd) {
1041         error_setg(&bs->backing_blocker,
1042                    "node is used as backing hd of '%s'",
1043                    bdrv_get_device_or_node_name(bs));
1044     }
1045 
1046     bs->backing_hd = backing_hd;
1047     if (!backing_hd) {
1048         error_free(bs->backing_blocker);
1049         bs->backing_blocker = NULL;
1050         goto out;
1051     }
1052     bs->open_flags &= ~BDRV_O_NO_BACKING;
1053     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1054     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1055             backing_hd->drv ? backing_hd->drv->format_name : "");
1056 
1057     bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1058     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1059     bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1060                     bs->backing_blocker);
1061 out:
1062     bdrv_refresh_limits(bs, NULL);
1063 }
1064 
1065 /*
1066  * Opens the backing file for a BlockDriverState if not yet open
1067  *
1068  * options is a QDict of options to pass to the block drivers, or NULL for an
1069  * empty set of options. The reference to the QDict is transferred to this
1070  * function (even on failure), so if the caller intends to reuse the dictionary,
1071  * it needs to use QINCREF() before calling bdrv_file_open.
1072  */
1073 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1074 {
1075     char *backing_filename = g_malloc0(PATH_MAX);
1076     int ret = 0;
1077     BlockDriverState *backing_hd;
1078     Error *local_err = NULL;
1079 
1080     if (bs->backing_hd != NULL) {
1081         QDECREF(options);
1082         goto free_exit;
1083     }
1084 
1085     /* NULL means an empty set of options */
1086     if (options == NULL) {
1087         options = qdict_new();
1088     }
1089 
1090     bs->open_flags &= ~BDRV_O_NO_BACKING;
1091     if (qdict_haskey(options, "file.filename")) {
1092         backing_filename[0] = '\0';
1093     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1094         QDECREF(options);
1095         goto free_exit;
1096     } else {
1097         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1098                                        &local_err);
1099         if (local_err) {
1100             ret = -EINVAL;
1101             error_propagate(errp, local_err);
1102             QDECREF(options);
1103             goto free_exit;
1104         }
1105     }
1106 
1107     if (!bs->drv || !bs->drv->supports_backing) {
1108         ret = -EINVAL;
1109         error_setg(errp, "Driver doesn't support backing files");
1110         QDECREF(options);
1111         goto free_exit;
1112     }
1113 
1114     backing_hd = bdrv_new();
1115 
1116     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1117         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1118     }
1119 
1120     assert(bs->backing_hd == NULL);
1121     ret = bdrv_open(&backing_hd,
1122                     *backing_filename ? backing_filename : NULL, NULL, options,
1123                     bdrv_backing_flags(bs->open_flags), NULL, &local_err);
1124     if (ret < 0) {
1125         bdrv_unref(backing_hd);
1126         backing_hd = NULL;
1127         bs->open_flags |= BDRV_O_NO_BACKING;
1128         error_setg(errp, "Could not open backing file: %s",
1129                    error_get_pretty(local_err));
1130         error_free(local_err);
1131         goto free_exit;
1132     }
1133     bdrv_set_backing_hd(bs, backing_hd);
1134 
1135 free_exit:
1136     g_free(backing_filename);
1137     return ret;
1138 }
1139 
1140 /*
1141  * Opens a disk image whose options are given as BlockdevRef in another block
1142  * device's options.
1143  *
1144  * If allow_none is true, no image will be opened if filename is false and no
1145  * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1146  *
1147  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1148  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1149  * itself, all options starting with "${bdref_key}." are considered part of the
1150  * BlockdevRef.
1151  *
1152  * The BlockdevRef will be removed from the options QDict.
1153  *
1154  * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1155  */
1156 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1157                     QDict *options, const char *bdref_key, int flags,
1158                     bool allow_none, Error **errp)
1159 {
1160     QDict *image_options;
1161     int ret;
1162     char *bdref_key_dot;
1163     const char *reference;
1164 
1165     assert(pbs);
1166     assert(*pbs == NULL);
1167 
1168     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1169     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1170     g_free(bdref_key_dot);
1171 
1172     reference = qdict_get_try_str(options, bdref_key);
1173     if (!filename && !reference && !qdict_size(image_options)) {
1174         if (allow_none) {
1175             ret = 0;
1176         } else {
1177             error_setg(errp, "A block device must be specified for \"%s\"",
1178                        bdref_key);
1179             ret = -EINVAL;
1180         }
1181         QDECREF(image_options);
1182         goto done;
1183     }
1184 
1185     ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
1186 
1187 done:
1188     qdict_del(options, bdref_key);
1189     return ret;
1190 }
1191 
1192 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1193 {
1194     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1195     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1196     int64_t total_size;
1197     QemuOpts *opts = NULL;
1198     QDict *snapshot_options;
1199     BlockDriverState *bs_snapshot;
1200     Error *local_err;
1201     int ret;
1202 
1203     /* if snapshot, we create a temporary backing file and open it
1204        instead of opening 'filename' directly */
1205 
1206     /* Get the required size from the image */
1207     total_size = bdrv_getlength(bs);
1208     if (total_size < 0) {
1209         ret = total_size;
1210         error_setg_errno(errp, -total_size, "Could not get image size");
1211         goto out;
1212     }
1213 
1214     /* Create the temporary image */
1215     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1216     if (ret < 0) {
1217         error_setg_errno(errp, -ret, "Could not get temporary filename");
1218         goto out;
1219     }
1220 
1221     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1222                             &error_abort);
1223     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1224     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1225     qemu_opts_del(opts);
1226     if (ret < 0) {
1227         error_setg_errno(errp, -ret, "Could not create temporary overlay "
1228                          "'%s': %s", tmp_filename,
1229                          error_get_pretty(local_err));
1230         error_free(local_err);
1231         goto out;
1232     }
1233 
1234     /* Prepare a new options QDict for the temporary file */
1235     snapshot_options = qdict_new();
1236     qdict_put(snapshot_options, "file.driver",
1237               qstring_from_str("file"));
1238     qdict_put(snapshot_options, "file.filename",
1239               qstring_from_str(tmp_filename));
1240 
1241     bs_snapshot = bdrv_new();
1242 
1243     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1244                     flags, &bdrv_qcow2, &local_err);
1245     if (ret < 0) {
1246         error_propagate(errp, local_err);
1247         goto out;
1248     }
1249 
1250     bdrv_append(bs_snapshot, bs);
1251 
1252 out:
1253     g_free(tmp_filename);
1254     return ret;
1255 }
1256 
1257 /*
1258  * Opens a disk image (raw, qcow2, vmdk, ...)
1259  *
1260  * options is a QDict of options to pass to the block drivers, or NULL for an
1261  * empty set of options. The reference to the QDict belongs to the block layer
1262  * after the call (even on failure), so if the caller intends to reuse the
1263  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1264  *
1265  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1266  * If it is not NULL, the referenced BDS will be reused.
1267  *
1268  * The reference parameter may be used to specify an existing block device which
1269  * should be opened. If specified, neither options nor a filename may be given,
1270  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1271  */
1272 int bdrv_open(BlockDriverState **pbs, const char *filename,
1273               const char *reference, QDict *options, int flags,
1274               BlockDriver *drv, Error **errp)
1275 {
1276     int ret;
1277     BlockDriverState *file = NULL, *bs;
1278     const char *drvname;
1279     Error *local_err = NULL;
1280     int snapshot_flags = 0;
1281 
1282     assert(pbs);
1283 
1284     if (reference) {
1285         bool options_non_empty = options ? qdict_size(options) : false;
1286         QDECREF(options);
1287 
1288         if (*pbs) {
1289             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1290                        "another block device");
1291             return -EINVAL;
1292         }
1293 
1294         if (filename || options_non_empty) {
1295             error_setg(errp, "Cannot reference an existing block device with "
1296                        "additional options or a new filename");
1297             return -EINVAL;
1298         }
1299 
1300         bs = bdrv_lookup_bs(reference, reference, errp);
1301         if (!bs) {
1302             return -ENODEV;
1303         }
1304         bdrv_ref(bs);
1305         *pbs = bs;
1306         return 0;
1307     }
1308 
1309     if (*pbs) {
1310         bs = *pbs;
1311     } else {
1312         bs = bdrv_new();
1313     }
1314 
1315     /* NULL means an empty set of options */
1316     if (options == NULL) {
1317         options = qdict_new();
1318     }
1319 
1320     ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
1321     if (local_err) {
1322         goto fail;
1323     }
1324 
1325     /* Find the right image format driver */
1326     drv = NULL;
1327     drvname = qdict_get_try_str(options, "driver");
1328     if (drvname) {
1329         drv = bdrv_find_format(drvname);
1330         qdict_del(options, "driver");
1331         if (!drv) {
1332             error_setg(errp, "Unknown driver: '%s'", drvname);
1333             ret = -EINVAL;
1334             goto fail;
1335         }
1336     }
1337 
1338     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1339     if (drv && !drv->bdrv_file_open) {
1340         /* If the user explicitly wants a format driver here, we'll need to add
1341          * another layer for the protocol in bs->file */
1342         flags &= ~BDRV_O_PROTOCOL;
1343     }
1344 
1345     bs->options = options;
1346     options = qdict_clone_shallow(options);
1347 
1348     /* Open image file without format layer */
1349     if ((flags & BDRV_O_PROTOCOL) == 0) {
1350         if (flags & BDRV_O_RDWR) {
1351             flags |= BDRV_O_ALLOW_RDWR;
1352         }
1353         if (flags & BDRV_O_SNAPSHOT) {
1354             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1355             flags = bdrv_backing_flags(flags);
1356         }
1357 
1358         assert(file == NULL);
1359         ret = bdrv_open_image(&file, filename, options, "file",
1360                               bdrv_inherited_flags(flags),
1361                               true, &local_err);
1362         if (ret < 0) {
1363             goto fail;
1364         }
1365     }
1366 
1367     /* Image format probing */
1368     bs->probed = !drv;
1369     if (!drv && file) {
1370         ret = find_image_format(file, filename, &drv, &local_err);
1371         if (ret < 0) {
1372             goto fail;
1373         }
1374     } else if (!drv) {
1375         error_setg(errp, "Must specify either driver or file");
1376         ret = -EINVAL;
1377         goto fail;
1378     }
1379 
1380     /* Open the image */
1381     ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1382     if (ret < 0) {
1383         goto fail;
1384     }
1385 
1386     if (file && (bs->file != file)) {
1387         bdrv_unref(file);
1388         file = NULL;
1389     }
1390 
1391     /* If there is a backing file, use it */
1392     if ((flags & BDRV_O_NO_BACKING) == 0) {
1393         QDict *backing_options;
1394 
1395         qdict_extract_subqdict(options, &backing_options, "backing.");
1396         ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1397         if (ret < 0) {
1398             goto close_and_fail;
1399         }
1400     }
1401 
1402     bdrv_refresh_filename(bs);
1403 
1404     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1405      * temporary snapshot afterwards. */
1406     if (snapshot_flags) {
1407         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1408         if (local_err) {
1409             goto close_and_fail;
1410         }
1411     }
1412 
1413     /* Check if any unknown options were used */
1414     if (options && (qdict_size(options) != 0)) {
1415         const QDictEntry *entry = qdict_first(options);
1416         if (flags & BDRV_O_PROTOCOL) {
1417             error_setg(errp, "Block protocol '%s' doesn't support the option "
1418                        "'%s'", drv->format_name, entry->key);
1419         } else {
1420             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1421                        "support the option '%s'", drv->format_name,
1422                        bdrv_get_device_name(bs), entry->key);
1423         }
1424 
1425         ret = -EINVAL;
1426         goto close_and_fail;
1427     }
1428 
1429     if (!bdrv_key_required(bs)) {
1430         if (bs->blk) {
1431             blk_dev_change_media_cb(bs->blk, true);
1432         }
1433     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1434                && !runstate_check(RUN_STATE_INMIGRATE)
1435                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1436         error_setg(errp,
1437                    "Guest must be stopped for opening of encrypted image");
1438         ret = -EBUSY;
1439         goto close_and_fail;
1440     }
1441 
1442     QDECREF(options);
1443     *pbs = bs;
1444     return 0;
1445 
1446 fail:
1447     if (file != NULL) {
1448         bdrv_unref(file);
1449     }
1450     QDECREF(bs->options);
1451     QDECREF(options);
1452     bs->options = NULL;
1453     if (!*pbs) {
1454         /* If *pbs is NULL, a new BDS has been created in this function and
1455            needs to be freed now. Otherwise, it does not need to be closed,
1456            since it has not really been opened yet. */
1457         bdrv_unref(bs);
1458     }
1459     if (local_err) {
1460         error_propagate(errp, local_err);
1461     }
1462     return ret;
1463 
1464 close_and_fail:
1465     /* See fail path, but now the BDS has to be always closed */
1466     if (*pbs) {
1467         bdrv_close(bs);
1468     } else {
1469         bdrv_unref(bs);
1470     }
1471     QDECREF(options);
1472     if (local_err) {
1473         error_propagate(errp, local_err);
1474     }
1475     return ret;
1476 }
1477 
1478 typedef struct BlockReopenQueueEntry {
1479      bool prepared;
1480      BDRVReopenState state;
1481      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1482 } BlockReopenQueueEntry;
1483 
1484 /*
1485  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1486  * reopen of multiple devices.
1487  *
1488  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1489  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1490  * be created and initialized. This newly created BlockReopenQueue should be
1491  * passed back in for subsequent calls that are intended to be of the same
1492  * atomic 'set'.
1493  *
1494  * bs is the BlockDriverState to add to the reopen queue.
1495  *
1496  * flags contains the open flags for the associated bs
1497  *
1498  * returns a pointer to bs_queue, which is either the newly allocated
1499  * bs_queue, or the existing bs_queue being used.
1500  *
1501  */
1502 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1503                                     BlockDriverState *bs, int flags)
1504 {
1505     assert(bs != NULL);
1506 
1507     BlockReopenQueueEntry *bs_entry;
1508     if (bs_queue == NULL) {
1509         bs_queue = g_new0(BlockReopenQueue, 1);
1510         QSIMPLEQ_INIT(bs_queue);
1511     }
1512 
1513     /* bdrv_open() masks this flag out */
1514     flags &= ~BDRV_O_PROTOCOL;
1515 
1516     if (bs->file) {
1517         bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
1518     }
1519 
1520     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1521     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1522 
1523     bs_entry->state.bs = bs;
1524     bs_entry->state.flags = flags;
1525 
1526     return bs_queue;
1527 }
1528 
1529 /*
1530  * Reopen multiple BlockDriverStates atomically & transactionally.
1531  *
1532  * The queue passed in (bs_queue) must have been built up previous
1533  * via bdrv_reopen_queue().
1534  *
1535  * Reopens all BDS specified in the queue, with the appropriate
1536  * flags.  All devices are prepared for reopen, and failure of any
1537  * device will cause all device changes to be abandonded, and intermediate
1538  * data cleaned up.
1539  *
1540  * If all devices prepare successfully, then the changes are committed
1541  * to all devices.
1542  *
1543  */
1544 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1545 {
1546     int ret = -1;
1547     BlockReopenQueueEntry *bs_entry, *next;
1548     Error *local_err = NULL;
1549 
1550     assert(bs_queue != NULL);
1551 
1552     bdrv_drain_all();
1553 
1554     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1555         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1556             error_propagate(errp, local_err);
1557             goto cleanup;
1558         }
1559         bs_entry->prepared = true;
1560     }
1561 
1562     /* If we reach this point, we have success and just need to apply the
1563      * changes
1564      */
1565     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1566         bdrv_reopen_commit(&bs_entry->state);
1567     }
1568 
1569     ret = 0;
1570 
1571 cleanup:
1572     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1573         if (ret && bs_entry->prepared) {
1574             bdrv_reopen_abort(&bs_entry->state);
1575         }
1576         g_free(bs_entry);
1577     }
1578     g_free(bs_queue);
1579     return ret;
1580 }
1581 
1582 
1583 /* Reopen a single BlockDriverState with the specified flags. */
1584 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1585 {
1586     int ret = -1;
1587     Error *local_err = NULL;
1588     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1589 
1590     ret = bdrv_reopen_multiple(queue, &local_err);
1591     if (local_err != NULL) {
1592         error_propagate(errp, local_err);
1593     }
1594     return ret;
1595 }
1596 
1597 
1598 /*
1599  * Prepares a BlockDriverState for reopen. All changes are staged in the
1600  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1601  * the block driver layer .bdrv_reopen_prepare()
1602  *
1603  * bs is the BlockDriverState to reopen
1604  * flags are the new open flags
1605  * queue is the reopen queue
1606  *
1607  * Returns 0 on success, non-zero on error.  On error errp will be set
1608  * as well.
1609  *
1610  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1611  * It is the responsibility of the caller to then call the abort() or
1612  * commit() for any other BDS that have been left in a prepare() state
1613  *
1614  */
1615 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1616                         Error **errp)
1617 {
1618     int ret = -1;
1619     Error *local_err = NULL;
1620     BlockDriver *drv;
1621 
1622     assert(reopen_state != NULL);
1623     assert(reopen_state->bs->drv != NULL);
1624     drv = reopen_state->bs->drv;
1625 
1626     /* if we are to stay read-only, do not allow permission change
1627      * to r/w */
1628     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1629         reopen_state->flags & BDRV_O_RDWR) {
1630         error_setg(errp, "Node '%s' is read only",
1631                    bdrv_get_device_or_node_name(reopen_state->bs));
1632         goto error;
1633     }
1634 
1635 
1636     ret = bdrv_flush(reopen_state->bs);
1637     if (ret) {
1638         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1639                   strerror(-ret));
1640         goto error;
1641     }
1642 
1643     if (drv->bdrv_reopen_prepare) {
1644         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1645         if (ret) {
1646             if (local_err != NULL) {
1647                 error_propagate(errp, local_err);
1648             } else {
1649                 error_setg(errp, "failed while preparing to reopen image '%s'",
1650                            reopen_state->bs->filename);
1651             }
1652             goto error;
1653         }
1654     } else {
1655         /* It is currently mandatory to have a bdrv_reopen_prepare()
1656          * handler for each supported drv. */
1657         error_setg(errp, "Block format '%s' used by node '%s' "
1658                    "does not support reopening files", drv->format_name,
1659                    bdrv_get_device_or_node_name(reopen_state->bs));
1660         ret = -1;
1661         goto error;
1662     }
1663 
1664     ret = 0;
1665 
1666 error:
1667     return ret;
1668 }
1669 
1670 /*
1671  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1672  * makes them final by swapping the staging BlockDriverState contents into
1673  * the active BlockDriverState contents.
1674  */
1675 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1676 {
1677     BlockDriver *drv;
1678 
1679     assert(reopen_state != NULL);
1680     drv = reopen_state->bs->drv;
1681     assert(drv != NULL);
1682 
1683     /* If there are any driver level actions to take */
1684     if (drv->bdrv_reopen_commit) {
1685         drv->bdrv_reopen_commit(reopen_state);
1686     }
1687 
1688     /* set BDS specific flags now */
1689     reopen_state->bs->open_flags         = reopen_state->flags;
1690     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1691                                               BDRV_O_CACHE_WB);
1692     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1693 
1694     bdrv_refresh_limits(reopen_state->bs, NULL);
1695 }
1696 
1697 /*
1698  * Abort the reopen, and delete and free the staged changes in
1699  * reopen_state
1700  */
1701 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1702 {
1703     BlockDriver *drv;
1704 
1705     assert(reopen_state != NULL);
1706     drv = reopen_state->bs->drv;
1707     assert(drv != NULL);
1708 
1709     if (drv->bdrv_reopen_abort) {
1710         drv->bdrv_reopen_abort(reopen_state);
1711     }
1712 }
1713 
1714 
1715 void bdrv_close(BlockDriverState *bs)
1716 {
1717     BdrvAioNotifier *ban, *ban_next;
1718 
1719     if (bs->job) {
1720         block_job_cancel_sync(bs->job);
1721     }
1722     bdrv_drain_all(); /* complete I/O */
1723     bdrv_flush(bs);
1724     bdrv_drain_all(); /* in case flush left pending I/O */
1725     notifier_list_notify(&bs->close_notifiers, bs);
1726 
1727     if (bs->drv) {
1728         if (bs->backing_hd) {
1729             BlockDriverState *backing_hd = bs->backing_hd;
1730             bdrv_set_backing_hd(bs, NULL);
1731             bdrv_unref(backing_hd);
1732         }
1733         bs->drv->bdrv_close(bs);
1734         g_free(bs->opaque);
1735         bs->opaque = NULL;
1736         bs->drv = NULL;
1737         bs->copy_on_read = 0;
1738         bs->backing_file[0] = '\0';
1739         bs->backing_format[0] = '\0';
1740         bs->total_sectors = 0;
1741         bs->encrypted = 0;
1742         bs->valid_key = 0;
1743         bs->sg = 0;
1744         bs->zero_beyond_eof = false;
1745         QDECREF(bs->options);
1746         bs->options = NULL;
1747         QDECREF(bs->full_open_options);
1748         bs->full_open_options = NULL;
1749 
1750         if (bs->file != NULL) {
1751             bdrv_unref(bs->file);
1752             bs->file = NULL;
1753         }
1754     }
1755 
1756     if (bs->blk) {
1757         blk_dev_change_media_cb(bs->blk, false);
1758     }
1759 
1760     /*throttling disk I/O limits*/
1761     if (bs->io_limits_enabled) {
1762         bdrv_io_limits_disable(bs);
1763     }
1764 
1765     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1766         g_free(ban);
1767     }
1768     QLIST_INIT(&bs->aio_notifiers);
1769 }
1770 
1771 void bdrv_close_all(void)
1772 {
1773     BlockDriverState *bs;
1774 
1775     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1776         AioContext *aio_context = bdrv_get_aio_context(bs);
1777 
1778         aio_context_acquire(aio_context);
1779         bdrv_close(bs);
1780         aio_context_release(aio_context);
1781     }
1782 }
1783 
1784 /* make a BlockDriverState anonymous by removing from bdrv_state and
1785  * graph_bdrv_state list.
1786    Also, NULL terminate the device_name to prevent double remove */
1787 void bdrv_make_anon(BlockDriverState *bs)
1788 {
1789     /*
1790      * Take care to remove bs from bdrv_states only when it's actually
1791      * in it.  Note that bs->device_list.tqe_prev is initially null,
1792      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
1793      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1794      * resetting it to null on remove.
1795      */
1796     if (bs->device_list.tqe_prev) {
1797         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1798         bs->device_list.tqe_prev = NULL;
1799     }
1800     if (bs->node_name[0] != '\0') {
1801         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1802     }
1803     bs->node_name[0] = '\0';
1804 }
1805 
1806 static void bdrv_rebind(BlockDriverState *bs)
1807 {
1808     if (bs->drv && bs->drv->bdrv_rebind) {
1809         bs->drv->bdrv_rebind(bs);
1810     }
1811 }
1812 
1813 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1814                                      BlockDriverState *bs_src)
1815 {
1816     /* move some fields that need to stay attached to the device */
1817 
1818     /* dev info */
1819     bs_dest->guest_block_size   = bs_src->guest_block_size;
1820     bs_dest->copy_on_read       = bs_src->copy_on_read;
1821 
1822     bs_dest->enable_write_cache = bs_src->enable_write_cache;
1823 
1824     /* i/o throttled req */
1825     memcpy(&bs_dest->throttle_state,
1826            &bs_src->throttle_state,
1827            sizeof(ThrottleState));
1828     bs_dest->throttled_reqs[0]  = bs_src->throttled_reqs[0];
1829     bs_dest->throttled_reqs[1]  = bs_src->throttled_reqs[1];
1830     bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
1831 
1832     /* r/w error */
1833     bs_dest->on_read_error      = bs_src->on_read_error;
1834     bs_dest->on_write_error     = bs_src->on_write_error;
1835 
1836     /* i/o status */
1837     bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
1838     bs_dest->iostatus           = bs_src->iostatus;
1839 
1840     /* dirty bitmap */
1841     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
1842 
1843     /* reference count */
1844     bs_dest->refcnt             = bs_src->refcnt;
1845 
1846     /* job */
1847     bs_dest->job                = bs_src->job;
1848 
1849     /* keep the same entry in bdrv_states */
1850     bs_dest->device_list = bs_src->device_list;
1851     bs_dest->blk = bs_src->blk;
1852 
1853     memcpy(bs_dest->op_blockers, bs_src->op_blockers,
1854            sizeof(bs_dest->op_blockers));
1855 }
1856 
1857 /*
1858  * Swap bs contents for two image chains while they are live,
1859  * while keeping required fields on the BlockDriverState that is
1860  * actually attached to a device.
1861  *
1862  * This will modify the BlockDriverState fields, and swap contents
1863  * between bs_new and bs_old. Both bs_new and bs_old are modified.
1864  *
1865  * bs_new must not be attached to a BlockBackend.
1866  *
1867  * This function does not create any image files.
1868  */
1869 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1870 {
1871     BlockDriverState tmp;
1872 
1873     /* The code needs to swap the node_name but simply swapping node_list won't
1874      * work so first remove the nodes from the graph list, do the swap then
1875      * insert them back if needed.
1876      */
1877     if (bs_new->node_name[0] != '\0') {
1878         QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
1879     }
1880     if (bs_old->node_name[0] != '\0') {
1881         QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
1882     }
1883 
1884     /* bs_new must be unattached and shouldn't have anything fancy enabled */
1885     assert(!bs_new->blk);
1886     assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
1887     assert(bs_new->job == NULL);
1888     assert(bs_new->io_limits_enabled == false);
1889     assert(!throttle_have_timer(&bs_new->throttle_state));
1890 
1891     tmp = *bs_new;
1892     *bs_new = *bs_old;
1893     *bs_old = tmp;
1894 
1895     /* there are some fields that should not be swapped, move them back */
1896     bdrv_move_feature_fields(&tmp, bs_old);
1897     bdrv_move_feature_fields(bs_old, bs_new);
1898     bdrv_move_feature_fields(bs_new, &tmp);
1899 
1900     /* bs_new must remain unattached */
1901     assert(!bs_new->blk);
1902 
1903     /* Check a few fields that should remain attached to the device */
1904     assert(bs_new->job == NULL);
1905     assert(bs_new->io_limits_enabled == false);
1906     assert(!throttle_have_timer(&bs_new->throttle_state));
1907 
1908     /* insert the nodes back into the graph node list if needed */
1909     if (bs_new->node_name[0] != '\0') {
1910         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
1911     }
1912     if (bs_old->node_name[0] != '\0') {
1913         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
1914     }
1915 
1916     bdrv_rebind(bs_new);
1917     bdrv_rebind(bs_old);
1918 }
1919 
1920 /*
1921  * Add new bs contents at the top of an image chain while the chain is
1922  * live, while keeping required fields on the top layer.
1923  *
1924  * This will modify the BlockDriverState fields, and swap contents
1925  * between bs_new and bs_top. Both bs_new and bs_top are modified.
1926  *
1927  * bs_new must not be attached to a BlockBackend.
1928  *
1929  * This function does not create any image files.
1930  */
1931 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1932 {
1933     bdrv_swap(bs_new, bs_top);
1934 
1935     /* The contents of 'tmp' will become bs_top, as we are
1936      * swapping bs_new and bs_top contents. */
1937     bdrv_set_backing_hd(bs_top, bs_new);
1938 }
1939 
1940 static void bdrv_delete(BlockDriverState *bs)
1941 {
1942     assert(!bs->job);
1943     assert(bdrv_op_blocker_is_empty(bs));
1944     assert(!bs->refcnt);
1945     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
1946 
1947     bdrv_close(bs);
1948 
1949     /* remove from list, if necessary */
1950     bdrv_make_anon(bs);
1951 
1952     g_free(bs);
1953 }
1954 
1955 /*
1956  * Run consistency checks on an image
1957  *
1958  * Returns 0 if the check could be completed (it doesn't mean that the image is
1959  * free of errors) or -errno when an internal error occurred. The results of the
1960  * check are stored in res.
1961  */
1962 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
1963 {
1964     if (bs->drv == NULL) {
1965         return -ENOMEDIUM;
1966     }
1967     if (bs->drv->bdrv_check == NULL) {
1968         return -ENOTSUP;
1969     }
1970 
1971     memset(res, 0, sizeof(*res));
1972     return bs->drv->bdrv_check(bs, res, fix);
1973 }
1974 
1975 #define COMMIT_BUF_SECTORS 2048
1976 
1977 /* commit COW file into the raw image */
1978 int bdrv_commit(BlockDriverState *bs)
1979 {
1980     BlockDriver *drv = bs->drv;
1981     int64_t sector, total_sectors, length, backing_length;
1982     int n, ro, open_flags;
1983     int ret = 0;
1984     uint8_t *buf = NULL;
1985 
1986     if (!drv)
1987         return -ENOMEDIUM;
1988 
1989     if (!bs->backing_hd) {
1990         return -ENOTSUP;
1991     }
1992 
1993     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
1994         bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
1995         return -EBUSY;
1996     }
1997 
1998     ro = bs->backing_hd->read_only;
1999     open_flags =  bs->backing_hd->open_flags;
2000 
2001     if (ro) {
2002         if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2003             return -EACCES;
2004         }
2005     }
2006 
2007     length = bdrv_getlength(bs);
2008     if (length < 0) {
2009         ret = length;
2010         goto ro_cleanup;
2011     }
2012 
2013     backing_length = bdrv_getlength(bs->backing_hd);
2014     if (backing_length < 0) {
2015         ret = backing_length;
2016         goto ro_cleanup;
2017     }
2018 
2019     /* If our top snapshot is larger than the backing file image,
2020      * grow the backing file image if possible.  If not possible,
2021      * we must return an error */
2022     if (length > backing_length) {
2023         ret = bdrv_truncate(bs->backing_hd, length);
2024         if (ret < 0) {
2025             goto ro_cleanup;
2026         }
2027     }
2028 
2029     total_sectors = length >> BDRV_SECTOR_BITS;
2030 
2031     /* qemu_try_blockalign() for bs will choose an alignment that works for
2032      * bs->backing_hd as well, so no need to compare the alignment manually. */
2033     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2034     if (buf == NULL) {
2035         ret = -ENOMEM;
2036         goto ro_cleanup;
2037     }
2038 
2039     for (sector = 0; sector < total_sectors; sector += n) {
2040         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2041         if (ret < 0) {
2042             goto ro_cleanup;
2043         }
2044         if (ret) {
2045             ret = bdrv_read(bs, sector, buf, n);
2046             if (ret < 0) {
2047                 goto ro_cleanup;
2048             }
2049 
2050             ret = bdrv_write(bs->backing_hd, sector, buf, n);
2051             if (ret < 0) {
2052                 goto ro_cleanup;
2053             }
2054         }
2055     }
2056 
2057     if (drv->bdrv_make_empty) {
2058         ret = drv->bdrv_make_empty(bs);
2059         if (ret < 0) {
2060             goto ro_cleanup;
2061         }
2062         bdrv_flush(bs);
2063     }
2064 
2065     /*
2066      * Make sure all data we wrote to the backing device is actually
2067      * stable on disk.
2068      */
2069     if (bs->backing_hd) {
2070         bdrv_flush(bs->backing_hd);
2071     }
2072 
2073     ret = 0;
2074 ro_cleanup:
2075     qemu_vfree(buf);
2076 
2077     if (ro) {
2078         /* ignoring error return here */
2079         bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2080     }
2081 
2082     return ret;
2083 }
2084 
2085 int bdrv_commit_all(void)
2086 {
2087     BlockDriverState *bs;
2088 
2089     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2090         AioContext *aio_context = bdrv_get_aio_context(bs);
2091 
2092         aio_context_acquire(aio_context);
2093         if (bs->drv && bs->backing_hd) {
2094             int ret = bdrv_commit(bs);
2095             if (ret < 0) {
2096                 aio_context_release(aio_context);
2097                 return ret;
2098             }
2099         }
2100         aio_context_release(aio_context);
2101     }
2102     return 0;
2103 }
2104 
2105 /*
2106  * Return values:
2107  * 0        - success
2108  * -EINVAL  - backing format specified, but no file
2109  * -ENOSPC  - can't update the backing file because no space is left in the
2110  *            image file header
2111  * -ENOTSUP - format driver doesn't support changing the backing file
2112  */
2113 int bdrv_change_backing_file(BlockDriverState *bs,
2114     const char *backing_file, const char *backing_fmt)
2115 {
2116     BlockDriver *drv = bs->drv;
2117     int ret;
2118 
2119     /* Backing file format doesn't make sense without a backing file */
2120     if (backing_fmt && !backing_file) {
2121         return -EINVAL;
2122     }
2123 
2124     if (drv->bdrv_change_backing_file != NULL) {
2125         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2126     } else {
2127         ret = -ENOTSUP;
2128     }
2129 
2130     if (ret == 0) {
2131         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2132         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2133     }
2134     return ret;
2135 }
2136 
2137 /*
2138  * Finds the image layer in the chain that has 'bs' as its backing file.
2139  *
2140  * active is the current topmost image.
2141  *
2142  * Returns NULL if bs is not found in active's image chain,
2143  * or if active == bs.
2144  *
2145  * Returns the bottommost base image if bs == NULL.
2146  */
2147 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2148                                     BlockDriverState *bs)
2149 {
2150     while (active && bs != active->backing_hd) {
2151         active = active->backing_hd;
2152     }
2153 
2154     return active;
2155 }
2156 
2157 /* Given a BDS, searches for the base layer. */
2158 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2159 {
2160     return bdrv_find_overlay(bs, NULL);
2161 }
2162 
2163 typedef struct BlkIntermediateStates {
2164     BlockDriverState *bs;
2165     QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2166 } BlkIntermediateStates;
2167 
2168 
2169 /*
2170  * Drops images above 'base' up to and including 'top', and sets the image
2171  * above 'top' to have base as its backing file.
2172  *
2173  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2174  * information in 'bs' can be properly updated.
2175  *
2176  * E.g., this will convert the following chain:
2177  * bottom <- base <- intermediate <- top <- active
2178  *
2179  * to
2180  *
2181  * bottom <- base <- active
2182  *
2183  * It is allowed for bottom==base, in which case it converts:
2184  *
2185  * base <- intermediate <- top <- active
2186  *
2187  * to
2188  *
2189  * base <- active
2190  *
2191  * If backing_file_str is non-NULL, it will be used when modifying top's
2192  * overlay image metadata.
2193  *
2194  * Error conditions:
2195  *  if active == top, that is considered an error
2196  *
2197  */
2198 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2199                            BlockDriverState *base, const char *backing_file_str)
2200 {
2201     BlockDriverState *intermediate;
2202     BlockDriverState *base_bs = NULL;
2203     BlockDriverState *new_top_bs = NULL;
2204     BlkIntermediateStates *intermediate_state, *next;
2205     int ret = -EIO;
2206 
2207     QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2208     QSIMPLEQ_INIT(&states_to_delete);
2209 
2210     if (!top->drv || !base->drv) {
2211         goto exit;
2212     }
2213 
2214     new_top_bs = bdrv_find_overlay(active, top);
2215 
2216     if (new_top_bs == NULL) {
2217         /* we could not find the image above 'top', this is an error */
2218         goto exit;
2219     }
2220 
2221     /* special case of new_top_bs->backing_hd already pointing to base - nothing
2222      * to do, no intermediate images */
2223     if (new_top_bs->backing_hd == base) {
2224         ret = 0;
2225         goto exit;
2226     }
2227 
2228     intermediate = top;
2229 
2230     /* now we will go down through the list, and add each BDS we find
2231      * into our deletion queue, until we hit the 'base'
2232      */
2233     while (intermediate) {
2234         intermediate_state = g_new0(BlkIntermediateStates, 1);
2235         intermediate_state->bs = intermediate;
2236         QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2237 
2238         if (intermediate->backing_hd == base) {
2239             base_bs = intermediate->backing_hd;
2240             break;
2241         }
2242         intermediate = intermediate->backing_hd;
2243     }
2244     if (base_bs == NULL) {
2245         /* something went wrong, we did not end at the base. safely
2246          * unravel everything, and exit with error */
2247         goto exit;
2248     }
2249 
2250     /* success - we can delete the intermediate states, and link top->base */
2251     backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2252     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2253                                    base_bs->drv ? base_bs->drv->format_name : "");
2254     if (ret) {
2255         goto exit;
2256     }
2257     bdrv_set_backing_hd(new_top_bs, base_bs);
2258 
2259     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2260         /* so that bdrv_close() does not recursively close the chain */
2261         bdrv_set_backing_hd(intermediate_state->bs, NULL);
2262         bdrv_unref(intermediate_state->bs);
2263     }
2264     ret = 0;
2265 
2266 exit:
2267     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2268         g_free(intermediate_state);
2269     }
2270     return ret;
2271 }
2272 
2273 /**
2274  * Truncate file to 'offset' bytes (needed only for file protocols)
2275  */
2276 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2277 {
2278     BlockDriver *drv = bs->drv;
2279     int ret;
2280     if (!drv)
2281         return -ENOMEDIUM;
2282     if (!drv->bdrv_truncate)
2283         return -ENOTSUP;
2284     if (bs->read_only)
2285         return -EACCES;
2286 
2287     ret = drv->bdrv_truncate(bs, offset);
2288     if (ret == 0) {
2289         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2290         bdrv_dirty_bitmap_truncate(bs);
2291         if (bs->blk) {
2292             blk_dev_resize_cb(bs->blk);
2293         }
2294     }
2295     return ret;
2296 }
2297 
2298 /**
2299  * Length of a allocated file in bytes. Sparse files are counted by actual
2300  * allocated space. Return < 0 if error or unknown.
2301  */
2302 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2303 {
2304     BlockDriver *drv = bs->drv;
2305     if (!drv) {
2306         return -ENOMEDIUM;
2307     }
2308     if (drv->bdrv_get_allocated_file_size) {
2309         return drv->bdrv_get_allocated_file_size(bs);
2310     }
2311     if (bs->file) {
2312         return bdrv_get_allocated_file_size(bs->file);
2313     }
2314     return -ENOTSUP;
2315 }
2316 
2317 /**
2318  * Return number of sectors on success, -errno on error.
2319  */
2320 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2321 {
2322     BlockDriver *drv = bs->drv;
2323 
2324     if (!drv)
2325         return -ENOMEDIUM;
2326 
2327     if (drv->has_variable_length) {
2328         int ret = refresh_total_sectors(bs, bs->total_sectors);
2329         if (ret < 0) {
2330             return ret;
2331         }
2332     }
2333     return bs->total_sectors;
2334 }
2335 
2336 /**
2337  * Return length in bytes on success, -errno on error.
2338  * The length is always a multiple of BDRV_SECTOR_SIZE.
2339  */
2340 int64_t bdrv_getlength(BlockDriverState *bs)
2341 {
2342     int64_t ret = bdrv_nb_sectors(bs);
2343 
2344     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2345 }
2346 
2347 /* return 0 as number of sectors if no device present or error */
2348 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2349 {
2350     int64_t nb_sectors = bdrv_nb_sectors(bs);
2351 
2352     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2353 }
2354 
2355 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2356                        BlockdevOnError on_write_error)
2357 {
2358     bs->on_read_error = on_read_error;
2359     bs->on_write_error = on_write_error;
2360 }
2361 
2362 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2363 {
2364     return is_read ? bs->on_read_error : bs->on_write_error;
2365 }
2366 
2367 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2368 {
2369     BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2370 
2371     switch (on_err) {
2372     case BLOCKDEV_ON_ERROR_ENOSPC:
2373         return (error == ENOSPC) ?
2374                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2375     case BLOCKDEV_ON_ERROR_STOP:
2376         return BLOCK_ERROR_ACTION_STOP;
2377     case BLOCKDEV_ON_ERROR_REPORT:
2378         return BLOCK_ERROR_ACTION_REPORT;
2379     case BLOCKDEV_ON_ERROR_IGNORE:
2380         return BLOCK_ERROR_ACTION_IGNORE;
2381     default:
2382         abort();
2383     }
2384 }
2385 
2386 static void send_qmp_error_event(BlockDriverState *bs,
2387                                  BlockErrorAction action,
2388                                  bool is_read, int error)
2389 {
2390     IoOperationType optype;
2391 
2392     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2393     qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2394                                    bdrv_iostatus_is_enabled(bs),
2395                                    error == ENOSPC, strerror(error),
2396                                    &error_abort);
2397 }
2398 
2399 /* This is done by device models because, while the block layer knows
2400  * about the error, it does not know whether an operation comes from
2401  * the device or the block layer (from a job, for example).
2402  */
2403 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2404                        bool is_read, int error)
2405 {
2406     assert(error >= 0);
2407 
2408     if (action == BLOCK_ERROR_ACTION_STOP) {
2409         /* First set the iostatus, so that "info block" returns an iostatus
2410          * that matches the events raised so far (an additional error iostatus
2411          * is fine, but not a lost one).
2412          */
2413         bdrv_iostatus_set_err(bs, error);
2414 
2415         /* Then raise the request to stop the VM and the event.
2416          * qemu_system_vmstop_request_prepare has two effects.  First,
2417          * it ensures that the STOP event always comes after the
2418          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
2419          * can observe the STOP event and do a "cont" before the STOP
2420          * event is issued, the VM will not stop.  In this case, vm_start()
2421          * also ensures that the STOP/RESUME pair of events is emitted.
2422          */
2423         qemu_system_vmstop_request_prepare();
2424         send_qmp_error_event(bs, action, is_read, error);
2425         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2426     } else {
2427         send_qmp_error_event(bs, action, is_read, error);
2428     }
2429 }
2430 
2431 int bdrv_is_read_only(BlockDriverState *bs)
2432 {
2433     return bs->read_only;
2434 }
2435 
2436 int bdrv_is_sg(BlockDriverState *bs)
2437 {
2438     return bs->sg;
2439 }
2440 
2441 int bdrv_enable_write_cache(BlockDriverState *bs)
2442 {
2443     return bs->enable_write_cache;
2444 }
2445 
2446 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2447 {
2448     bs->enable_write_cache = wce;
2449 
2450     /* so a reopen() will preserve wce */
2451     if (wce) {
2452         bs->open_flags |= BDRV_O_CACHE_WB;
2453     } else {
2454         bs->open_flags &= ~BDRV_O_CACHE_WB;
2455     }
2456 }
2457 
2458 int bdrv_is_encrypted(BlockDriverState *bs)
2459 {
2460     if (bs->backing_hd && bs->backing_hd->encrypted)
2461         return 1;
2462     return bs->encrypted;
2463 }
2464 
2465 int bdrv_key_required(BlockDriverState *bs)
2466 {
2467     BlockDriverState *backing_hd = bs->backing_hd;
2468 
2469     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2470         return 1;
2471     return (bs->encrypted && !bs->valid_key);
2472 }
2473 
2474 int bdrv_set_key(BlockDriverState *bs, const char *key)
2475 {
2476     int ret;
2477     if (bs->backing_hd && bs->backing_hd->encrypted) {
2478         ret = bdrv_set_key(bs->backing_hd, key);
2479         if (ret < 0)
2480             return ret;
2481         if (!bs->encrypted)
2482             return 0;
2483     }
2484     if (!bs->encrypted) {
2485         return -EINVAL;
2486     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2487         return -ENOMEDIUM;
2488     }
2489     ret = bs->drv->bdrv_set_key(bs, key);
2490     if (ret < 0) {
2491         bs->valid_key = 0;
2492     } else if (!bs->valid_key) {
2493         bs->valid_key = 1;
2494         if (bs->blk) {
2495             /* call the change callback now, we skipped it on open */
2496             blk_dev_change_media_cb(bs->blk, true);
2497         }
2498     }
2499     return ret;
2500 }
2501 
2502 /*
2503  * Provide an encryption key for @bs.
2504  * If @key is non-null:
2505  *     If @bs is not encrypted, fail.
2506  *     Else if the key is invalid, fail.
2507  *     Else set @bs's key to @key, replacing the existing key, if any.
2508  * If @key is null:
2509  *     If @bs is encrypted and still lacks a key, fail.
2510  *     Else do nothing.
2511  * On failure, store an error object through @errp if non-null.
2512  */
2513 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2514 {
2515     if (key) {
2516         if (!bdrv_is_encrypted(bs)) {
2517             error_setg(errp, "Node '%s' is not encrypted",
2518                       bdrv_get_device_or_node_name(bs));
2519         } else if (bdrv_set_key(bs, key) < 0) {
2520             error_set(errp, QERR_INVALID_PASSWORD);
2521         }
2522     } else {
2523         if (bdrv_key_required(bs)) {
2524             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2525                       "'%s' (%s) is encrypted",
2526                       bdrv_get_device_or_node_name(bs),
2527                       bdrv_get_encrypted_filename(bs));
2528         }
2529     }
2530 }
2531 
2532 const char *bdrv_get_format_name(BlockDriverState *bs)
2533 {
2534     return bs->drv ? bs->drv->format_name : NULL;
2535 }
2536 
2537 static int qsort_strcmp(const void *a, const void *b)
2538 {
2539     return strcmp(a, b);
2540 }
2541 
2542 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2543                          void *opaque)
2544 {
2545     BlockDriver *drv;
2546     int count = 0;
2547     int i;
2548     const char **formats = NULL;
2549 
2550     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2551         if (drv->format_name) {
2552             bool found = false;
2553             int i = count;
2554             while (formats && i && !found) {
2555                 found = !strcmp(formats[--i], drv->format_name);
2556             }
2557 
2558             if (!found) {
2559                 formats = g_renew(const char *, formats, count + 1);
2560                 formats[count++] = drv->format_name;
2561             }
2562         }
2563     }
2564 
2565     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2566 
2567     for (i = 0; i < count; i++) {
2568         it(opaque, formats[i]);
2569     }
2570 
2571     g_free(formats);
2572 }
2573 
2574 /* This function is to find a node in the bs graph */
2575 BlockDriverState *bdrv_find_node(const char *node_name)
2576 {
2577     BlockDriverState *bs;
2578 
2579     assert(node_name);
2580 
2581     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2582         if (!strcmp(node_name, bs->node_name)) {
2583             return bs;
2584         }
2585     }
2586     return NULL;
2587 }
2588 
2589 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2590 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2591 {
2592     BlockDeviceInfoList *list, *entry;
2593     BlockDriverState *bs;
2594 
2595     list = NULL;
2596     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2597         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2598         if (!info) {
2599             qapi_free_BlockDeviceInfoList(list);
2600             return NULL;
2601         }
2602         entry = g_malloc0(sizeof(*entry));
2603         entry->value = info;
2604         entry->next = list;
2605         list = entry;
2606     }
2607 
2608     return list;
2609 }
2610 
2611 BlockDriverState *bdrv_lookup_bs(const char *device,
2612                                  const char *node_name,
2613                                  Error **errp)
2614 {
2615     BlockBackend *blk;
2616     BlockDriverState *bs;
2617 
2618     if (device) {
2619         blk = blk_by_name(device);
2620 
2621         if (blk) {
2622             return blk_bs(blk);
2623         }
2624     }
2625 
2626     if (node_name) {
2627         bs = bdrv_find_node(node_name);
2628 
2629         if (bs) {
2630             return bs;
2631         }
2632     }
2633 
2634     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2635                      device ? device : "",
2636                      node_name ? node_name : "");
2637     return NULL;
2638 }
2639 
2640 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2641  * return false.  If either argument is NULL, return false. */
2642 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2643 {
2644     while (top && top != base) {
2645         top = top->backing_hd;
2646     }
2647 
2648     return top != NULL;
2649 }
2650 
2651 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2652 {
2653     if (!bs) {
2654         return QTAILQ_FIRST(&graph_bdrv_states);
2655     }
2656     return QTAILQ_NEXT(bs, node_list);
2657 }
2658 
2659 BlockDriverState *bdrv_next(BlockDriverState *bs)
2660 {
2661     if (!bs) {
2662         return QTAILQ_FIRST(&bdrv_states);
2663     }
2664     return QTAILQ_NEXT(bs, device_list);
2665 }
2666 
2667 const char *bdrv_get_node_name(const BlockDriverState *bs)
2668 {
2669     return bs->node_name;
2670 }
2671 
2672 /* TODO check what callers really want: bs->node_name or blk_name() */
2673 const char *bdrv_get_device_name(const BlockDriverState *bs)
2674 {
2675     return bs->blk ? blk_name(bs->blk) : "";
2676 }
2677 
2678 /* This can be used to identify nodes that might not have a device
2679  * name associated. Since node and device names live in the same
2680  * namespace, the result is unambiguous. The exception is if both are
2681  * absent, then this returns an empty (non-null) string. */
2682 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2683 {
2684     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2685 }
2686 
2687 int bdrv_get_flags(BlockDriverState *bs)
2688 {
2689     return bs->open_flags;
2690 }
2691 
2692 int bdrv_has_zero_init_1(BlockDriverState *bs)
2693 {
2694     return 1;
2695 }
2696 
2697 int bdrv_has_zero_init(BlockDriverState *bs)
2698 {
2699     assert(bs->drv);
2700 
2701     /* If BS is a copy on write image, it is initialized to
2702        the contents of the base image, which may not be zeroes.  */
2703     if (bs->backing_hd) {
2704         return 0;
2705     }
2706     if (bs->drv->bdrv_has_zero_init) {
2707         return bs->drv->bdrv_has_zero_init(bs);
2708     }
2709 
2710     /* safe default */
2711     return 0;
2712 }
2713 
2714 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2715 {
2716     BlockDriverInfo bdi;
2717 
2718     if (bs->backing_hd) {
2719         return false;
2720     }
2721 
2722     if (bdrv_get_info(bs, &bdi) == 0) {
2723         return bdi.unallocated_blocks_are_zero;
2724     }
2725 
2726     return false;
2727 }
2728 
2729 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2730 {
2731     BlockDriverInfo bdi;
2732 
2733     if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
2734         return false;
2735     }
2736 
2737     if (bdrv_get_info(bs, &bdi) == 0) {
2738         return bdi.can_write_zeroes_with_unmap;
2739     }
2740 
2741     return false;
2742 }
2743 
2744 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2745 {
2746     if (bs->backing_hd && bs->backing_hd->encrypted)
2747         return bs->backing_file;
2748     else if (bs->encrypted)
2749         return bs->filename;
2750     else
2751         return NULL;
2752 }
2753 
2754 void bdrv_get_backing_filename(BlockDriverState *bs,
2755                                char *filename, int filename_size)
2756 {
2757     pstrcpy(filename, filename_size, bs->backing_file);
2758 }
2759 
2760 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2761 {
2762     BlockDriver *drv = bs->drv;
2763     if (!drv)
2764         return -ENOMEDIUM;
2765     if (!drv->bdrv_get_info)
2766         return -ENOTSUP;
2767     memset(bdi, 0, sizeof(*bdi));
2768     return drv->bdrv_get_info(bs, bdi);
2769 }
2770 
2771 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2772 {
2773     BlockDriver *drv = bs->drv;
2774     if (drv && drv->bdrv_get_specific_info) {
2775         return drv->bdrv_get_specific_info(bs);
2776     }
2777     return NULL;
2778 }
2779 
2780 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2781 {
2782     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2783         return;
2784     }
2785 
2786     bs->drv->bdrv_debug_event(bs, event);
2787 }
2788 
2789 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2790                           const char *tag)
2791 {
2792     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2793         bs = bs->file;
2794     }
2795 
2796     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2797         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2798     }
2799 
2800     return -ENOTSUP;
2801 }
2802 
2803 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2804 {
2805     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2806         bs = bs->file;
2807     }
2808 
2809     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2810         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2811     }
2812 
2813     return -ENOTSUP;
2814 }
2815 
2816 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2817 {
2818     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2819         bs = bs->file;
2820     }
2821 
2822     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2823         return bs->drv->bdrv_debug_resume(bs, tag);
2824     }
2825 
2826     return -ENOTSUP;
2827 }
2828 
2829 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2830 {
2831     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2832         bs = bs->file;
2833     }
2834 
2835     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2836         return bs->drv->bdrv_debug_is_suspended(bs, tag);
2837     }
2838 
2839     return false;
2840 }
2841 
2842 int bdrv_is_snapshot(BlockDriverState *bs)
2843 {
2844     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2845 }
2846 
2847 /* backing_file can either be relative, or absolute, or a protocol.  If it is
2848  * relative, it must be relative to the chain.  So, passing in bs->filename
2849  * from a BDS as backing_file should not be done, as that may be relative to
2850  * the CWD rather than the chain. */
2851 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2852         const char *backing_file)
2853 {
2854     char *filename_full = NULL;
2855     char *backing_file_full = NULL;
2856     char *filename_tmp = NULL;
2857     int is_protocol = 0;
2858     BlockDriverState *curr_bs = NULL;
2859     BlockDriverState *retval = NULL;
2860 
2861     if (!bs || !bs->drv || !backing_file) {
2862         return NULL;
2863     }
2864 
2865     filename_full     = g_malloc(PATH_MAX);
2866     backing_file_full = g_malloc(PATH_MAX);
2867     filename_tmp      = g_malloc(PATH_MAX);
2868 
2869     is_protocol = path_has_protocol(backing_file);
2870 
2871     for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
2872 
2873         /* If either of the filename paths is actually a protocol, then
2874          * compare unmodified paths; otherwise make paths relative */
2875         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
2876             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
2877                 retval = curr_bs->backing_hd;
2878                 break;
2879             }
2880         } else {
2881             /* If not an absolute filename path, make it relative to the current
2882              * image's filename path */
2883             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2884                          backing_file);
2885 
2886             /* We are going to compare absolute pathnames */
2887             if (!realpath(filename_tmp, filename_full)) {
2888                 continue;
2889             }
2890 
2891             /* We need to make sure the backing filename we are comparing against
2892              * is relative to the current image filename (or absolute) */
2893             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2894                          curr_bs->backing_file);
2895 
2896             if (!realpath(filename_tmp, backing_file_full)) {
2897                 continue;
2898             }
2899 
2900             if (strcmp(backing_file_full, filename_full) == 0) {
2901                 retval = curr_bs->backing_hd;
2902                 break;
2903             }
2904         }
2905     }
2906 
2907     g_free(filename_full);
2908     g_free(backing_file_full);
2909     g_free(filename_tmp);
2910     return retval;
2911 }
2912 
2913 int bdrv_get_backing_file_depth(BlockDriverState *bs)
2914 {
2915     if (!bs->drv) {
2916         return 0;
2917     }
2918 
2919     if (!bs->backing_hd) {
2920         return 0;
2921     }
2922 
2923     return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
2924 }
2925 
2926 void bdrv_init(void)
2927 {
2928     module_call_init(MODULE_INIT_BLOCK);
2929 }
2930 
2931 void bdrv_init_with_whitelist(void)
2932 {
2933     use_bdrv_whitelist = 1;
2934     bdrv_init();
2935 }
2936 
2937 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
2938 {
2939     Error *local_err = NULL;
2940     int ret;
2941 
2942     if (!bs->drv)  {
2943         return;
2944     }
2945 
2946     if (!(bs->open_flags & BDRV_O_INCOMING)) {
2947         return;
2948     }
2949     bs->open_flags &= ~BDRV_O_INCOMING;
2950 
2951     if (bs->drv->bdrv_invalidate_cache) {
2952         bs->drv->bdrv_invalidate_cache(bs, &local_err);
2953     } else if (bs->file) {
2954         bdrv_invalidate_cache(bs->file, &local_err);
2955     }
2956     if (local_err) {
2957         error_propagate(errp, local_err);
2958         return;
2959     }
2960 
2961     ret = refresh_total_sectors(bs, bs->total_sectors);
2962     if (ret < 0) {
2963         error_setg_errno(errp, -ret, "Could not refresh total sector count");
2964         return;
2965     }
2966 }
2967 
2968 void bdrv_invalidate_cache_all(Error **errp)
2969 {
2970     BlockDriverState *bs;
2971     Error *local_err = NULL;
2972 
2973     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2974         AioContext *aio_context = bdrv_get_aio_context(bs);
2975 
2976         aio_context_acquire(aio_context);
2977         bdrv_invalidate_cache(bs, &local_err);
2978         aio_context_release(aio_context);
2979         if (local_err) {
2980             error_propagate(errp, local_err);
2981             return;
2982         }
2983     }
2984 }
2985 
2986 /**************************************************************/
2987 /* removable device support */
2988 
2989 /**
2990  * Return TRUE if the media is present
2991  */
2992 int bdrv_is_inserted(BlockDriverState *bs)
2993 {
2994     BlockDriver *drv = bs->drv;
2995 
2996     if (!drv)
2997         return 0;
2998     if (!drv->bdrv_is_inserted)
2999         return 1;
3000     return drv->bdrv_is_inserted(bs);
3001 }
3002 
3003 /**
3004  * Return whether the media changed since the last call to this
3005  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3006  */
3007 int bdrv_media_changed(BlockDriverState *bs)
3008 {
3009     BlockDriver *drv = bs->drv;
3010 
3011     if (drv && drv->bdrv_media_changed) {
3012         return drv->bdrv_media_changed(bs);
3013     }
3014     return -ENOTSUP;
3015 }
3016 
3017 /**
3018  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3019  */
3020 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3021 {
3022     BlockDriver *drv = bs->drv;
3023     const char *device_name;
3024 
3025     if (drv && drv->bdrv_eject) {
3026         drv->bdrv_eject(bs, eject_flag);
3027     }
3028 
3029     device_name = bdrv_get_device_name(bs);
3030     if (device_name[0] != '\0') {
3031         qapi_event_send_device_tray_moved(device_name,
3032                                           eject_flag, &error_abort);
3033     }
3034 }
3035 
3036 /**
3037  * Lock or unlock the media (if it is locked, the user won't be able
3038  * to eject it manually).
3039  */
3040 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3041 {
3042     BlockDriver *drv = bs->drv;
3043 
3044     trace_bdrv_lock_medium(bs, locked);
3045 
3046     if (drv && drv->bdrv_lock_medium) {
3047         drv->bdrv_lock_medium(bs, locked);
3048     }
3049 }
3050 
3051 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3052 {
3053     bs->guest_block_size = align;
3054 }
3055 
3056 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3057 {
3058     BdrvDirtyBitmap *bm;
3059 
3060     assert(name);
3061     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3062         if (bm->name && !strcmp(name, bm->name)) {
3063             return bm;
3064         }
3065     }
3066     return NULL;
3067 }
3068 
3069 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3070 {
3071     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3072     g_free(bitmap->name);
3073     bitmap->name = NULL;
3074 }
3075 
3076 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3077                                           uint32_t granularity,
3078                                           const char *name,
3079                                           Error **errp)
3080 {
3081     int64_t bitmap_size;
3082     BdrvDirtyBitmap *bitmap;
3083     uint32_t sector_granularity;
3084 
3085     assert((granularity & (granularity - 1)) == 0);
3086 
3087     if (name && bdrv_find_dirty_bitmap(bs, name)) {
3088         error_setg(errp, "Bitmap already exists: %s", name);
3089         return NULL;
3090     }
3091     sector_granularity = granularity >> BDRV_SECTOR_BITS;
3092     assert(sector_granularity);
3093     bitmap_size = bdrv_nb_sectors(bs);
3094     if (bitmap_size < 0) {
3095         error_setg_errno(errp, -bitmap_size, "could not get length of device");
3096         errno = -bitmap_size;
3097         return NULL;
3098     }
3099     bitmap = g_new0(BdrvDirtyBitmap, 1);
3100     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3101     bitmap->size = bitmap_size;
3102     bitmap->name = g_strdup(name);
3103     bitmap->disabled = false;
3104     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3105     return bitmap;
3106 }
3107 
3108 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3109 {
3110     return bitmap->successor;
3111 }
3112 
3113 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3114 {
3115     return !(bitmap->disabled || bitmap->successor);
3116 }
3117 
3118 /**
3119  * Create a successor bitmap destined to replace this bitmap after an operation.
3120  * Requires that the bitmap is not frozen and has no successor.
3121  */
3122 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3123                                        BdrvDirtyBitmap *bitmap, Error **errp)
3124 {
3125     uint64_t granularity;
3126     BdrvDirtyBitmap *child;
3127 
3128     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3129         error_setg(errp, "Cannot create a successor for a bitmap that is "
3130                    "currently frozen");
3131         return -1;
3132     }
3133     assert(!bitmap->successor);
3134 
3135     /* Create an anonymous successor */
3136     granularity = bdrv_dirty_bitmap_granularity(bitmap);
3137     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3138     if (!child) {
3139         return -1;
3140     }
3141 
3142     /* Successor will be on or off based on our current state. */
3143     child->disabled = bitmap->disabled;
3144 
3145     /* Install the successor and freeze the parent */
3146     bitmap->successor = child;
3147     return 0;
3148 }
3149 
3150 /**
3151  * For a bitmap with a successor, yield our name to the successor,
3152  * delete the old bitmap, and return a handle to the new bitmap.
3153  */
3154 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3155                                             BdrvDirtyBitmap *bitmap,
3156                                             Error **errp)
3157 {
3158     char *name;
3159     BdrvDirtyBitmap *successor = bitmap->successor;
3160 
3161     if (successor == NULL) {
3162         error_setg(errp, "Cannot relinquish control if "
3163                    "there's no successor present");
3164         return NULL;
3165     }
3166 
3167     name = bitmap->name;
3168     bitmap->name = NULL;
3169     successor->name = name;
3170     bitmap->successor = NULL;
3171     bdrv_release_dirty_bitmap(bs, bitmap);
3172 
3173     return successor;
3174 }
3175 
3176 /**
3177  * In cases of failure where we can no longer safely delete the parent,
3178  * we may wish to re-join the parent and child/successor.
3179  * The merged parent will be un-frozen, but not explicitly re-enabled.
3180  */
3181 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3182                                            BdrvDirtyBitmap *parent,
3183                                            Error **errp)
3184 {
3185     BdrvDirtyBitmap *successor = parent->successor;
3186 
3187     if (!successor) {
3188         error_setg(errp, "Cannot reclaim a successor when none is present");
3189         return NULL;
3190     }
3191 
3192     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3193         error_setg(errp, "Merging of parent and successor bitmap failed");
3194         return NULL;
3195     }
3196     bdrv_release_dirty_bitmap(bs, successor);
3197     parent->successor = NULL;
3198 
3199     return parent;
3200 }
3201 
3202 /**
3203  * Truncates _all_ bitmaps attached to a BDS.
3204  */
3205 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3206 {
3207     BdrvDirtyBitmap *bitmap;
3208     uint64_t size = bdrv_nb_sectors(bs);
3209 
3210     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3211         if (bdrv_dirty_bitmap_frozen(bitmap)) {
3212             continue;
3213         }
3214         hbitmap_truncate(bitmap->bitmap, size);
3215     }
3216 }
3217 
3218 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3219 {
3220     BdrvDirtyBitmap *bm, *next;
3221     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3222         if (bm == bitmap) {
3223             assert(!bdrv_dirty_bitmap_frozen(bm));
3224             QLIST_REMOVE(bitmap, list);
3225             hbitmap_free(bitmap->bitmap);
3226             g_free(bitmap->name);
3227             g_free(bitmap);
3228             return;
3229         }
3230     }
3231 }
3232 
3233 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3234 {
3235     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3236     bitmap->disabled = true;
3237 }
3238 
3239 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3240 {
3241     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3242     bitmap->disabled = false;
3243 }
3244 
3245 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3246 {
3247     BdrvDirtyBitmap *bm;
3248     BlockDirtyInfoList *list = NULL;
3249     BlockDirtyInfoList **plist = &list;
3250 
3251     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3252         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3253         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3254         info->count = bdrv_get_dirty_count(bm);
3255         info->granularity = bdrv_dirty_bitmap_granularity(bm);
3256         info->has_name = !!bm->name;
3257         info->name = g_strdup(bm->name);
3258         info->frozen = bdrv_dirty_bitmap_frozen(bm);
3259         entry->value = info;
3260         *plist = entry;
3261         plist = &entry->next;
3262     }
3263 
3264     return list;
3265 }
3266 
3267 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3268 {
3269     if (bitmap) {
3270         return hbitmap_get(bitmap->bitmap, sector);
3271     } else {
3272         return 0;
3273     }
3274 }
3275 
3276 /**
3277  * Chooses a default granularity based on the existing cluster size,
3278  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3279  * is no cluster size information available.
3280  */
3281 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3282 {
3283     BlockDriverInfo bdi;
3284     uint32_t granularity;
3285 
3286     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3287         granularity = MAX(4096, bdi.cluster_size);
3288         granularity = MIN(65536, granularity);
3289     } else {
3290         granularity = 65536;
3291     }
3292 
3293     return granularity;
3294 }
3295 
3296 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3297 {
3298     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3299 }
3300 
3301 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3302 {
3303     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3304 }
3305 
3306 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3307                            int64_t cur_sector, int nr_sectors)
3308 {
3309     assert(bdrv_dirty_bitmap_enabled(bitmap));
3310     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3311 }
3312 
3313 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3314                              int64_t cur_sector, int nr_sectors)
3315 {
3316     assert(bdrv_dirty_bitmap_enabled(bitmap));
3317     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3318 }
3319 
3320 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3321 {
3322     assert(bdrv_dirty_bitmap_enabled(bitmap));
3323     hbitmap_reset(bitmap->bitmap, 0, bitmap->size);
3324 }
3325 
3326 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3327                     int nr_sectors)
3328 {
3329     BdrvDirtyBitmap *bitmap;
3330     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3331         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3332             continue;
3333         }
3334         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3335     }
3336 }
3337 
3338 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3339                       int nr_sectors)
3340 {
3341     BdrvDirtyBitmap *bitmap;
3342     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3343         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3344             continue;
3345         }
3346         hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3347     }
3348 }
3349 
3350 /**
3351  * Advance an HBitmapIter to an arbitrary offset.
3352  */
3353 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3354 {
3355     assert(hbi->hb);
3356     hbitmap_iter_init(hbi, hbi->hb, offset);
3357 }
3358 
3359 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3360 {
3361     return hbitmap_count(bitmap->bitmap);
3362 }
3363 
3364 /* Get a reference to bs */
3365 void bdrv_ref(BlockDriverState *bs)
3366 {
3367     bs->refcnt++;
3368 }
3369 
3370 /* Release a previously grabbed reference to bs.
3371  * If after releasing, reference count is zero, the BlockDriverState is
3372  * deleted. */
3373 void bdrv_unref(BlockDriverState *bs)
3374 {
3375     if (!bs) {
3376         return;
3377     }
3378     assert(bs->refcnt > 0);
3379     if (--bs->refcnt == 0) {
3380         bdrv_delete(bs);
3381     }
3382 }
3383 
3384 struct BdrvOpBlocker {
3385     Error *reason;
3386     QLIST_ENTRY(BdrvOpBlocker) list;
3387 };
3388 
3389 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3390 {
3391     BdrvOpBlocker *blocker;
3392     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3393     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3394         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3395         if (errp) {
3396             error_setg(errp, "Node '%s' is busy: %s",
3397                        bdrv_get_device_or_node_name(bs),
3398                        error_get_pretty(blocker->reason));
3399         }
3400         return true;
3401     }
3402     return false;
3403 }
3404 
3405 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3406 {
3407     BdrvOpBlocker *blocker;
3408     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3409 
3410     blocker = g_new0(BdrvOpBlocker, 1);
3411     blocker->reason = reason;
3412     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3413 }
3414 
3415 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3416 {
3417     BdrvOpBlocker *blocker, *next;
3418     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3419     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3420         if (blocker->reason == reason) {
3421             QLIST_REMOVE(blocker, list);
3422             g_free(blocker);
3423         }
3424     }
3425 }
3426 
3427 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3428 {
3429     int i;
3430     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3431         bdrv_op_block(bs, i, reason);
3432     }
3433 }
3434 
3435 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3436 {
3437     int i;
3438     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3439         bdrv_op_unblock(bs, i, reason);
3440     }
3441 }
3442 
3443 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3444 {
3445     int i;
3446 
3447     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3448         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3449             return false;
3450         }
3451     }
3452     return true;
3453 }
3454 
3455 void bdrv_iostatus_enable(BlockDriverState *bs)
3456 {
3457     bs->iostatus_enabled = true;
3458     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3459 }
3460 
3461 /* The I/O status is only enabled if the drive explicitly
3462  * enables it _and_ the VM is configured to stop on errors */
3463 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3464 {
3465     return (bs->iostatus_enabled &&
3466            (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3467             bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
3468             bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3469 }
3470 
3471 void bdrv_iostatus_disable(BlockDriverState *bs)
3472 {
3473     bs->iostatus_enabled = false;
3474 }
3475 
3476 void bdrv_iostatus_reset(BlockDriverState *bs)
3477 {
3478     if (bdrv_iostatus_is_enabled(bs)) {
3479         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3480         if (bs->job) {
3481             block_job_iostatus_reset(bs->job);
3482         }
3483     }
3484 }
3485 
3486 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3487 {
3488     assert(bdrv_iostatus_is_enabled(bs));
3489     if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3490         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3491                                          BLOCK_DEVICE_IO_STATUS_FAILED;
3492     }
3493 }
3494 
3495 void bdrv_img_create(const char *filename, const char *fmt,
3496                      const char *base_filename, const char *base_fmt,
3497                      char *options, uint64_t img_size, int flags,
3498                      Error **errp, bool quiet)
3499 {
3500     QemuOptsList *create_opts = NULL;
3501     QemuOpts *opts = NULL;
3502     const char *backing_fmt, *backing_file;
3503     int64_t size;
3504     BlockDriver *drv, *proto_drv;
3505     BlockDriver *backing_drv = NULL;
3506     Error *local_err = NULL;
3507     int ret = 0;
3508 
3509     /* Find driver and parse its options */
3510     drv = bdrv_find_format(fmt);
3511     if (!drv) {
3512         error_setg(errp, "Unknown file format '%s'", fmt);
3513         return;
3514     }
3515 
3516     proto_drv = bdrv_find_protocol(filename, true, errp);
3517     if (!proto_drv) {
3518         return;
3519     }
3520 
3521     if (!drv->create_opts) {
3522         error_setg(errp, "Format driver '%s' does not support image creation",
3523                    drv->format_name);
3524         return;
3525     }
3526 
3527     if (!proto_drv->create_opts) {
3528         error_setg(errp, "Protocol driver '%s' does not support image creation",
3529                    proto_drv->format_name);
3530         return;
3531     }
3532 
3533     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3534     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3535 
3536     /* Create parameter list with default values */
3537     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3538     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3539 
3540     /* Parse -o options */
3541     if (options) {
3542         qemu_opts_do_parse(opts, options, NULL, &local_err);
3543         if (local_err) {
3544             error_report_err(local_err);
3545             local_err = NULL;
3546             error_setg(errp, "Invalid options for file format '%s'", fmt);
3547             goto out;
3548         }
3549     }
3550 
3551     if (base_filename) {
3552         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3553         if (local_err) {
3554             error_setg(errp, "Backing file not supported for file format '%s'",
3555                        fmt);
3556             goto out;
3557         }
3558     }
3559 
3560     if (base_fmt) {
3561         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3562         if (local_err) {
3563             error_setg(errp, "Backing file format not supported for file "
3564                              "format '%s'", fmt);
3565             goto out;
3566         }
3567     }
3568 
3569     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3570     if (backing_file) {
3571         if (!strcmp(filename, backing_file)) {
3572             error_setg(errp, "Error: Trying to create an image with the "
3573                              "same filename as the backing file");
3574             goto out;
3575         }
3576     }
3577 
3578     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3579     if (backing_fmt) {
3580         backing_drv = bdrv_find_format(backing_fmt);
3581         if (!backing_drv) {
3582             error_setg(errp, "Unknown backing file format '%s'",
3583                        backing_fmt);
3584             goto out;
3585         }
3586     }
3587 
3588     // The size for the image must always be specified, with one exception:
3589     // If we are using a backing file, we can obtain the size from there
3590     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3591     if (size == -1) {
3592         if (backing_file) {
3593             BlockDriverState *bs;
3594             char *full_backing = g_new0(char, PATH_MAX);
3595             int64_t size;
3596             int back_flags;
3597 
3598             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3599                                                          full_backing, PATH_MAX,
3600                                                          &local_err);
3601             if (local_err) {
3602                 g_free(full_backing);
3603                 goto out;
3604             }
3605 
3606             /* backing files always opened read-only */
3607             back_flags =
3608                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3609 
3610             bs = NULL;
3611             ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
3612                             backing_drv, &local_err);
3613             g_free(full_backing);
3614             if (ret < 0) {
3615                 goto out;
3616             }
3617             size = bdrv_getlength(bs);
3618             if (size < 0) {
3619                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3620                                  backing_file);
3621                 bdrv_unref(bs);
3622                 goto out;
3623             }
3624 
3625             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3626 
3627             bdrv_unref(bs);
3628         } else {
3629             error_setg(errp, "Image creation needs a size parameter");
3630             goto out;
3631         }
3632     }
3633 
3634     if (!quiet) {
3635         printf("Formatting '%s', fmt=%s", filename, fmt);
3636         qemu_opts_print(opts, " ");
3637         puts("");
3638     }
3639 
3640     ret = bdrv_create(drv, filename, opts, &local_err);
3641 
3642     if (ret == -EFBIG) {
3643         /* This is generally a better message than whatever the driver would
3644          * deliver (especially because of the cluster_size_hint), since that
3645          * is most probably not much different from "image too large". */
3646         const char *cluster_size_hint = "";
3647         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3648             cluster_size_hint = " (try using a larger cluster size)";
3649         }
3650         error_setg(errp, "The image size is too large for file format '%s'"
3651                    "%s", fmt, cluster_size_hint);
3652         error_free(local_err);
3653         local_err = NULL;
3654     }
3655 
3656 out:
3657     qemu_opts_del(opts);
3658     qemu_opts_free(create_opts);
3659     if (local_err) {
3660         error_propagate(errp, local_err);
3661     }
3662 }
3663 
3664 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3665 {
3666     return bs->aio_context;
3667 }
3668 
3669 void bdrv_detach_aio_context(BlockDriverState *bs)
3670 {
3671     BdrvAioNotifier *baf;
3672 
3673     if (!bs->drv) {
3674         return;
3675     }
3676 
3677     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3678         baf->detach_aio_context(baf->opaque);
3679     }
3680 
3681     if (bs->io_limits_enabled) {
3682         throttle_detach_aio_context(&bs->throttle_state);
3683     }
3684     if (bs->drv->bdrv_detach_aio_context) {
3685         bs->drv->bdrv_detach_aio_context(bs);
3686     }
3687     if (bs->file) {
3688         bdrv_detach_aio_context(bs->file);
3689     }
3690     if (bs->backing_hd) {
3691         bdrv_detach_aio_context(bs->backing_hd);
3692     }
3693 
3694     bs->aio_context = NULL;
3695 }
3696 
3697 void bdrv_attach_aio_context(BlockDriverState *bs,
3698                              AioContext *new_context)
3699 {
3700     BdrvAioNotifier *ban;
3701 
3702     if (!bs->drv) {
3703         return;
3704     }
3705 
3706     bs->aio_context = new_context;
3707 
3708     if (bs->backing_hd) {
3709         bdrv_attach_aio_context(bs->backing_hd, new_context);
3710     }
3711     if (bs->file) {
3712         bdrv_attach_aio_context(bs->file, new_context);
3713     }
3714     if (bs->drv->bdrv_attach_aio_context) {
3715         bs->drv->bdrv_attach_aio_context(bs, new_context);
3716     }
3717     if (bs->io_limits_enabled) {
3718         throttle_attach_aio_context(&bs->throttle_state, new_context);
3719     }
3720 
3721     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3722         ban->attached_aio_context(new_context, ban->opaque);
3723     }
3724 }
3725 
3726 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3727 {
3728     bdrv_drain_all(); /* ensure there are no in-flight requests */
3729 
3730     bdrv_detach_aio_context(bs);
3731 
3732     /* This function executes in the old AioContext so acquire the new one in
3733      * case it runs in a different thread.
3734      */
3735     aio_context_acquire(new_context);
3736     bdrv_attach_aio_context(bs, new_context);
3737     aio_context_release(new_context);
3738 }
3739 
3740 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3741         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3742         void (*detach_aio_context)(void *opaque), void *opaque)
3743 {
3744     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3745     *ban = (BdrvAioNotifier){
3746         .attached_aio_context = attached_aio_context,
3747         .detach_aio_context   = detach_aio_context,
3748         .opaque               = opaque
3749     };
3750 
3751     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3752 }
3753 
3754 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3755                                       void (*attached_aio_context)(AioContext *,
3756                                                                    void *),
3757                                       void (*detach_aio_context)(void *),
3758                                       void *opaque)
3759 {
3760     BdrvAioNotifier *ban, *ban_next;
3761 
3762     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3763         if (ban->attached_aio_context == attached_aio_context &&
3764             ban->detach_aio_context   == detach_aio_context   &&
3765             ban->opaque               == opaque)
3766         {
3767             QLIST_REMOVE(ban, list);
3768             g_free(ban);
3769 
3770             return;
3771         }
3772     }
3773 
3774     abort();
3775 }
3776 
3777 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3778                        BlockDriverAmendStatusCB *status_cb)
3779 {
3780     if (!bs->drv->bdrv_amend_options) {
3781         return -ENOTSUP;
3782     }
3783     return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3784 }
3785 
3786 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3787  * of block filter and by bdrv_is_first_non_filter.
3788  * It is used to test if the given bs is the candidate or recurse more in the
3789  * node graph.
3790  */
3791 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3792                                       BlockDriverState *candidate)
3793 {
3794     /* return false if basic checks fails */
3795     if (!bs || !bs->drv) {
3796         return false;
3797     }
3798 
3799     /* the code reached a non block filter driver -> check if the bs is
3800      * the same as the candidate. It's the recursion termination condition.
3801      */
3802     if (!bs->drv->is_filter) {
3803         return bs == candidate;
3804     }
3805     /* Down this path the driver is a block filter driver */
3806 
3807     /* If the block filter recursion method is defined use it to recurse down
3808      * the node graph.
3809      */
3810     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3811         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3812     }
3813 
3814     /* the driver is a block filter but don't allow to recurse -> return false
3815      */
3816     return false;
3817 }
3818 
3819 /* This function checks if the candidate is the first non filter bs down it's
3820  * bs chain. Since we don't have pointers to parents it explore all bs chains
3821  * from the top. Some filters can choose not to pass down the recursion.
3822  */
3823 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3824 {
3825     BlockDriverState *bs;
3826 
3827     /* walk down the bs forest recursively */
3828     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3829         bool perm;
3830 
3831         /* try to recurse in this top level bs */
3832         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3833 
3834         /* candidate is the first non filter */
3835         if (perm) {
3836             return true;
3837         }
3838     }
3839 
3840     return false;
3841 }
3842 
3843 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
3844 {
3845     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3846     AioContext *aio_context;
3847 
3848     if (!to_replace_bs) {
3849         error_setg(errp, "Node name '%s' not found", node_name);
3850         return NULL;
3851     }
3852 
3853     aio_context = bdrv_get_aio_context(to_replace_bs);
3854     aio_context_acquire(aio_context);
3855 
3856     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3857         to_replace_bs = NULL;
3858         goto out;
3859     }
3860 
3861     /* We don't want arbitrary node of the BDS chain to be replaced only the top
3862      * most non filter in order to prevent data corruption.
3863      * Another benefit is that this tests exclude backing files which are
3864      * blocked by the backing blockers.
3865      */
3866     if (!bdrv_is_first_non_filter(to_replace_bs)) {
3867         error_setg(errp, "Only top most non filter can be replaced");
3868         to_replace_bs = NULL;
3869         goto out;
3870     }
3871 
3872 out:
3873     aio_context_release(aio_context);
3874     return to_replace_bs;
3875 }
3876 
3877 static bool append_open_options(QDict *d, BlockDriverState *bs)
3878 {
3879     const QDictEntry *entry;
3880     bool found_any = false;
3881 
3882     for (entry = qdict_first(bs->options); entry;
3883          entry = qdict_next(bs->options, entry))
3884     {
3885         /* Only take options for this level and exclude all non-driver-specific
3886          * options */
3887         if (!strchr(qdict_entry_key(entry), '.') &&
3888             strcmp(qdict_entry_key(entry), "node-name"))
3889         {
3890             qobject_incref(qdict_entry_value(entry));
3891             qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3892             found_any = true;
3893         }
3894     }
3895 
3896     return found_any;
3897 }
3898 
3899 /* Updates the following BDS fields:
3900  *  - exact_filename: A filename which may be used for opening a block device
3901  *                    which (mostly) equals the given BDS (even without any
3902  *                    other options; so reading and writing must return the same
3903  *                    results, but caching etc. may be different)
3904  *  - full_open_options: Options which, when given when opening a block device
3905  *                       (without a filename), result in a BDS (mostly)
3906  *                       equalling the given one
3907  *  - filename: If exact_filename is set, it is copied here. Otherwise,
3908  *              full_open_options is converted to a JSON object, prefixed with
3909  *              "json:" (for use through the JSON pseudo protocol) and put here.
3910  */
3911 void bdrv_refresh_filename(BlockDriverState *bs)
3912 {
3913     BlockDriver *drv = bs->drv;
3914     QDict *opts;
3915 
3916     if (!drv) {
3917         return;
3918     }
3919 
3920     /* This BDS's file name will most probably depend on its file's name, so
3921      * refresh that first */
3922     if (bs->file) {
3923         bdrv_refresh_filename(bs->file);
3924     }
3925 
3926     if (drv->bdrv_refresh_filename) {
3927         /* Obsolete information is of no use here, so drop the old file name
3928          * information before refreshing it */
3929         bs->exact_filename[0] = '\0';
3930         if (bs->full_open_options) {
3931             QDECREF(bs->full_open_options);
3932             bs->full_open_options = NULL;
3933         }
3934 
3935         drv->bdrv_refresh_filename(bs);
3936     } else if (bs->file) {
3937         /* Try to reconstruct valid information from the underlying file */
3938         bool has_open_options;
3939 
3940         bs->exact_filename[0] = '\0';
3941         if (bs->full_open_options) {
3942             QDECREF(bs->full_open_options);
3943             bs->full_open_options = NULL;
3944         }
3945 
3946         opts = qdict_new();
3947         has_open_options = append_open_options(opts, bs);
3948 
3949         /* If no specific options have been given for this BDS, the filename of
3950          * the underlying file should suffice for this one as well */
3951         if (bs->file->exact_filename[0] && !has_open_options) {
3952             strcpy(bs->exact_filename, bs->file->exact_filename);
3953         }
3954         /* Reconstructing the full options QDict is simple for most format block
3955          * drivers, as long as the full options are known for the underlying
3956          * file BDS. The full options QDict of that file BDS should somehow
3957          * contain a representation of the filename, therefore the following
3958          * suffices without querying the (exact_)filename of this BDS. */
3959         if (bs->file->full_open_options) {
3960             qdict_put_obj(opts, "driver",
3961                           QOBJECT(qstring_from_str(drv->format_name)));
3962             QINCREF(bs->file->full_open_options);
3963             qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
3964 
3965             bs->full_open_options = opts;
3966         } else {
3967             QDECREF(opts);
3968         }
3969     } else if (!bs->full_open_options && qdict_size(bs->options)) {
3970         /* There is no underlying file BDS (at least referenced by BDS.file),
3971          * so the full options QDict should be equal to the options given
3972          * specifically for this block device when it was opened (plus the
3973          * driver specification).
3974          * Because those options don't change, there is no need to update
3975          * full_open_options when it's already set. */
3976 
3977         opts = qdict_new();
3978         append_open_options(opts, bs);
3979         qdict_put_obj(opts, "driver",
3980                       QOBJECT(qstring_from_str(drv->format_name)));
3981 
3982         if (bs->exact_filename[0]) {
3983             /* This may not work for all block protocol drivers (some may
3984              * require this filename to be parsed), but we have to find some
3985              * default solution here, so just include it. If some block driver
3986              * does not support pure options without any filename at all or
3987              * needs some special format of the options QDict, it needs to
3988              * implement the driver-specific bdrv_refresh_filename() function.
3989              */
3990             qdict_put_obj(opts, "filename",
3991                           QOBJECT(qstring_from_str(bs->exact_filename)));
3992         }
3993 
3994         bs->full_open_options = opts;
3995     }
3996 
3997     if (bs->exact_filename[0]) {
3998         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
3999     } else if (bs->full_open_options) {
4000         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4001         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4002                  qstring_get_str(json));
4003         QDECREF(json);
4004     }
4005 }
4006 
4007 /* This accessor function purpose is to allow the device models to access the
4008  * BlockAcctStats structure embedded inside a BlockDriverState without being
4009  * aware of the BlockDriverState structure layout.
4010  * It will go away when the BlockAcctStats structure will be moved inside
4011  * the device models.
4012  */
4013 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
4014 {
4015     return &bs->stats;
4016 }
4017