xref: /openbmc/qemu/block.c (revision 489653b5)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qjson.h"
31 #include "sysemu/block-backend.h"
32 #include "sysemu/sysemu.h"
33 #include "qemu/notify.h"
34 #include "block/coroutine.h"
35 #include "block/qapi.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
38 #include "qapi-event.h"
39 
40 #ifdef CONFIG_BSD
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <sys/ioctl.h>
44 #include <sys/queue.h>
45 #ifndef __DragonFly__
46 #include <sys/disk.h>
47 #endif
48 #endif
49 
50 #ifdef _WIN32
51 #include <windows.h>
52 #endif
53 
54 /**
55  * A BdrvDirtyBitmap can be in three possible states:
56  * (1) successor is NULL and disabled is false: full r/w mode
57  * (2) successor is NULL and disabled is true: read only mode ("disabled")
58  * (3) successor is set: frozen mode.
59  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
60  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
61  */
62 struct BdrvDirtyBitmap {
63     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
64     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
65     char *name;                 /* Optional non-empty unique ID */
66     int64_t size;               /* Size of the bitmap (Number of sectors) */
67     bool disabled;              /* Bitmap is read-only */
68     QLIST_ENTRY(BdrvDirtyBitmap) list;
69 };
70 
71 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
72 
73 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
74     QTAILQ_HEAD_INITIALIZER(bdrv_states);
75 
76 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
77     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
78 
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80     QLIST_HEAD_INITIALIZER(bdrv_drivers);
81 
82 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
83 /* If non-zero, use only whitelisted block drivers */
84 static int use_bdrv_whitelist;
85 
86 #ifdef _WIN32
87 static int is_windows_drive_prefix(const char *filename)
88 {
89     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
90              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
91             filename[1] == ':');
92 }
93 
94 int is_windows_drive(const char *filename)
95 {
96     if (is_windows_drive_prefix(filename) &&
97         filename[2] == '\0')
98         return 1;
99     if (strstart(filename, "\\\\.\\", NULL) ||
100         strstart(filename, "//./", NULL))
101         return 1;
102     return 0;
103 }
104 #endif
105 
106 size_t bdrv_opt_mem_align(BlockDriverState *bs)
107 {
108     if (!bs || !bs->drv) {
109         /* page size or 4k (hdd sector size) should be on the safe side */
110         return MAX(4096, getpagesize());
111     }
112 
113     return bs->bl.opt_mem_alignment;
114 }
115 
116 size_t bdrv_min_mem_align(BlockDriverState *bs)
117 {
118     if (!bs || !bs->drv) {
119         /* page size or 4k (hdd sector size) should be on the safe side */
120         return MAX(4096, getpagesize());
121     }
122 
123     return bs->bl.min_mem_alignment;
124 }
125 
126 /* check if the path starts with "<protocol>:" */
127 int path_has_protocol(const char *path)
128 {
129     const char *p;
130 
131 #ifdef _WIN32
132     if (is_windows_drive(path) ||
133         is_windows_drive_prefix(path)) {
134         return 0;
135     }
136     p = path + strcspn(path, ":/\\");
137 #else
138     p = path + strcspn(path, ":/");
139 #endif
140 
141     return *p == ':';
142 }
143 
144 int path_is_absolute(const char *path)
145 {
146 #ifdef _WIN32
147     /* specific case for names like: "\\.\d:" */
148     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
149         return 1;
150     }
151     return (*path == '/' || *path == '\\');
152 #else
153     return (*path == '/');
154 #endif
155 }
156 
157 /* if filename is absolute, just copy it to dest. Otherwise, build a
158    path to it by considering it is relative to base_path. URL are
159    supported. */
160 void path_combine(char *dest, int dest_size,
161                   const char *base_path,
162                   const char *filename)
163 {
164     const char *p, *p1;
165     int len;
166 
167     if (dest_size <= 0)
168         return;
169     if (path_is_absolute(filename)) {
170         pstrcpy(dest, dest_size, filename);
171     } else {
172         p = strchr(base_path, ':');
173         if (p)
174             p++;
175         else
176             p = base_path;
177         p1 = strrchr(base_path, '/');
178 #ifdef _WIN32
179         {
180             const char *p2;
181             p2 = strrchr(base_path, '\\');
182             if (!p1 || p2 > p1)
183                 p1 = p2;
184         }
185 #endif
186         if (p1)
187             p1++;
188         else
189             p1 = base_path;
190         if (p1 > p)
191             p = p1;
192         len = p - base_path;
193         if (len > dest_size - 1)
194             len = dest_size - 1;
195         memcpy(dest, base_path, len);
196         dest[len] = '\0';
197         pstrcat(dest, dest_size, filename);
198     }
199 }
200 
201 void bdrv_get_full_backing_filename_from_filename(const char *backed,
202                                                   const char *backing,
203                                                   char *dest, size_t sz,
204                                                   Error **errp)
205 {
206     if (backing[0] == '\0' || path_has_protocol(backing) ||
207         path_is_absolute(backing))
208     {
209         pstrcpy(dest, sz, backing);
210     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
211         error_setg(errp, "Cannot use relative backing file names for '%s'",
212                    backed);
213     } else {
214         path_combine(dest, sz, backed, backing);
215     }
216 }
217 
218 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
219                                     Error **errp)
220 {
221     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
222 
223     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
224                                                  dest, sz, errp);
225 }
226 
227 void bdrv_register(BlockDriver *bdrv)
228 {
229     bdrv_setup_io_funcs(bdrv);
230 
231     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
232 }
233 
234 BlockDriverState *bdrv_new_root(void)
235 {
236     BlockDriverState *bs = bdrv_new();
237 
238     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
239     return bs;
240 }
241 
242 BlockDriverState *bdrv_new(void)
243 {
244     BlockDriverState *bs;
245     int i;
246 
247     bs = g_new0(BlockDriverState, 1);
248     QLIST_INIT(&bs->dirty_bitmaps);
249     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
250         QLIST_INIT(&bs->op_blockers[i]);
251     }
252     bdrv_iostatus_disable(bs);
253     notifier_list_init(&bs->close_notifiers);
254     notifier_with_return_list_init(&bs->before_write_notifiers);
255     qemu_co_queue_init(&bs->throttled_reqs[0]);
256     qemu_co_queue_init(&bs->throttled_reqs[1]);
257     bs->refcnt = 1;
258     bs->aio_context = qemu_get_aio_context();
259 
260     return bs;
261 }
262 
263 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
264 {
265     notifier_list_add(&bs->close_notifiers, notify);
266 }
267 
268 BlockDriver *bdrv_find_format(const char *format_name)
269 {
270     BlockDriver *drv1;
271     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
272         if (!strcmp(drv1->format_name, format_name)) {
273             return drv1;
274         }
275     }
276     return NULL;
277 }
278 
279 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
280 {
281     static const char *whitelist_rw[] = {
282         CONFIG_BDRV_RW_WHITELIST
283     };
284     static const char *whitelist_ro[] = {
285         CONFIG_BDRV_RO_WHITELIST
286     };
287     const char **p;
288 
289     if (!whitelist_rw[0] && !whitelist_ro[0]) {
290         return 1;               /* no whitelist, anything goes */
291     }
292 
293     for (p = whitelist_rw; *p; p++) {
294         if (!strcmp(drv->format_name, *p)) {
295             return 1;
296         }
297     }
298     if (read_only) {
299         for (p = whitelist_ro; *p; p++) {
300             if (!strcmp(drv->format_name, *p)) {
301                 return 1;
302             }
303         }
304     }
305     return 0;
306 }
307 
308 BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
309                                           bool read_only)
310 {
311     BlockDriver *drv = bdrv_find_format(format_name);
312     return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
313 }
314 
315 typedef struct CreateCo {
316     BlockDriver *drv;
317     char *filename;
318     QemuOpts *opts;
319     int ret;
320     Error *err;
321 } CreateCo;
322 
323 static void coroutine_fn bdrv_create_co_entry(void *opaque)
324 {
325     Error *local_err = NULL;
326     int ret;
327 
328     CreateCo *cco = opaque;
329     assert(cco->drv);
330 
331     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
332     if (local_err) {
333         error_propagate(&cco->err, local_err);
334     }
335     cco->ret = ret;
336 }
337 
338 int bdrv_create(BlockDriver *drv, const char* filename,
339                 QemuOpts *opts, Error **errp)
340 {
341     int ret;
342 
343     Coroutine *co;
344     CreateCo cco = {
345         .drv = drv,
346         .filename = g_strdup(filename),
347         .opts = opts,
348         .ret = NOT_DONE,
349         .err = NULL,
350     };
351 
352     if (!drv->bdrv_create) {
353         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
354         ret = -ENOTSUP;
355         goto out;
356     }
357 
358     if (qemu_in_coroutine()) {
359         /* Fast-path if already in coroutine context */
360         bdrv_create_co_entry(&cco);
361     } else {
362         co = qemu_coroutine_create(bdrv_create_co_entry);
363         qemu_coroutine_enter(co, &cco);
364         while (cco.ret == NOT_DONE) {
365             aio_poll(qemu_get_aio_context(), true);
366         }
367     }
368 
369     ret = cco.ret;
370     if (ret < 0) {
371         if (cco.err) {
372             error_propagate(errp, cco.err);
373         } else {
374             error_setg_errno(errp, -ret, "Could not create image");
375         }
376     }
377 
378 out:
379     g_free(cco.filename);
380     return ret;
381 }
382 
383 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
384 {
385     BlockDriver *drv;
386     Error *local_err = NULL;
387     int ret;
388 
389     drv = bdrv_find_protocol(filename, true, errp);
390     if (drv == NULL) {
391         return -ENOENT;
392     }
393 
394     ret = bdrv_create(drv, filename, opts, &local_err);
395     if (local_err) {
396         error_propagate(errp, local_err);
397     }
398     return ret;
399 }
400 
401 /**
402  * Try to get @bs's logical and physical block size.
403  * On success, store them in @bsz struct and return 0.
404  * On failure return -errno.
405  * @bs must not be empty.
406  */
407 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
408 {
409     BlockDriver *drv = bs->drv;
410 
411     if (drv && drv->bdrv_probe_blocksizes) {
412         return drv->bdrv_probe_blocksizes(bs, bsz);
413     }
414 
415     return -ENOTSUP;
416 }
417 
418 /**
419  * Try to get @bs's geometry (cyls, heads, sectors).
420  * On success, store them in @geo struct and return 0.
421  * On failure return -errno.
422  * @bs must not be empty.
423  */
424 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
425 {
426     BlockDriver *drv = bs->drv;
427 
428     if (drv && drv->bdrv_probe_geometry) {
429         return drv->bdrv_probe_geometry(bs, geo);
430     }
431 
432     return -ENOTSUP;
433 }
434 
435 /*
436  * Create a uniquely-named empty temporary file.
437  * Return 0 upon success, otherwise a negative errno value.
438  */
439 int get_tmp_filename(char *filename, int size)
440 {
441 #ifdef _WIN32
442     char temp_dir[MAX_PATH];
443     /* GetTempFileName requires that its output buffer (4th param)
444        have length MAX_PATH or greater.  */
445     assert(size >= MAX_PATH);
446     return (GetTempPath(MAX_PATH, temp_dir)
447             && GetTempFileName(temp_dir, "qem", 0, filename)
448             ? 0 : -GetLastError());
449 #else
450     int fd;
451     const char *tmpdir;
452     tmpdir = getenv("TMPDIR");
453     if (!tmpdir) {
454         tmpdir = "/var/tmp";
455     }
456     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
457         return -EOVERFLOW;
458     }
459     fd = mkstemp(filename);
460     if (fd < 0) {
461         return -errno;
462     }
463     if (close(fd) != 0) {
464         unlink(filename);
465         return -errno;
466     }
467     return 0;
468 #endif
469 }
470 
471 /*
472  * Detect host devices. By convention, /dev/cdrom[N] is always
473  * recognized as a host CDROM.
474  */
475 static BlockDriver *find_hdev_driver(const char *filename)
476 {
477     int score_max = 0, score;
478     BlockDriver *drv = NULL, *d;
479 
480     QLIST_FOREACH(d, &bdrv_drivers, list) {
481         if (d->bdrv_probe_device) {
482             score = d->bdrv_probe_device(filename);
483             if (score > score_max) {
484                 score_max = score;
485                 drv = d;
486             }
487         }
488     }
489 
490     return drv;
491 }
492 
493 BlockDriver *bdrv_find_protocol(const char *filename,
494                                 bool allow_protocol_prefix,
495                                 Error **errp)
496 {
497     BlockDriver *drv1;
498     char protocol[128];
499     int len;
500     const char *p;
501 
502     /* TODO Drivers without bdrv_file_open must be specified explicitly */
503 
504     /*
505      * XXX(hch): we really should not let host device detection
506      * override an explicit protocol specification, but moving this
507      * later breaks access to device names with colons in them.
508      * Thanks to the brain-dead persistent naming schemes on udev-
509      * based Linux systems those actually are quite common.
510      */
511     drv1 = find_hdev_driver(filename);
512     if (drv1) {
513         return drv1;
514     }
515 
516     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
517         return &bdrv_file;
518     }
519 
520     p = strchr(filename, ':');
521     assert(p != NULL);
522     len = p - filename;
523     if (len > sizeof(protocol) - 1)
524         len = sizeof(protocol) - 1;
525     memcpy(protocol, filename, len);
526     protocol[len] = '\0';
527     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
528         if (drv1->protocol_name &&
529             !strcmp(drv1->protocol_name, protocol)) {
530             return drv1;
531         }
532     }
533 
534     error_setg(errp, "Unknown protocol '%s'", protocol);
535     return NULL;
536 }
537 
538 /*
539  * Guess image format by probing its contents.
540  * This is not a good idea when your image is raw (CVE-2008-2004), but
541  * we do it anyway for backward compatibility.
542  *
543  * @buf         contains the image's first @buf_size bytes.
544  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
545  *              but can be smaller if the image file is smaller)
546  * @filename    is its filename.
547  *
548  * For all block drivers, call the bdrv_probe() method to get its
549  * probing score.
550  * Return the first block driver with the highest probing score.
551  */
552 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
553                             const char *filename)
554 {
555     int score_max = 0, score;
556     BlockDriver *drv = NULL, *d;
557 
558     QLIST_FOREACH(d, &bdrv_drivers, list) {
559         if (d->bdrv_probe) {
560             score = d->bdrv_probe(buf, buf_size, filename);
561             if (score > score_max) {
562                 score_max = score;
563                 drv = d;
564             }
565         }
566     }
567 
568     return drv;
569 }
570 
571 static int find_image_format(BlockDriverState *bs, const char *filename,
572                              BlockDriver **pdrv, Error **errp)
573 {
574     BlockDriver *drv;
575     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
576     int ret = 0;
577 
578     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
579     if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
580         *pdrv = &bdrv_raw;
581         return ret;
582     }
583 
584     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
585     if (ret < 0) {
586         error_setg_errno(errp, -ret, "Could not read image for determining its "
587                          "format");
588         *pdrv = NULL;
589         return ret;
590     }
591 
592     drv = bdrv_probe_all(buf, ret, filename);
593     if (!drv) {
594         error_setg(errp, "Could not determine image format: No compatible "
595                    "driver found");
596         ret = -ENOENT;
597     }
598     *pdrv = drv;
599     return ret;
600 }
601 
602 /**
603  * Set the current 'total_sectors' value
604  * Return 0 on success, -errno on error.
605  */
606 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
607 {
608     BlockDriver *drv = bs->drv;
609 
610     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
611     if (bs->sg)
612         return 0;
613 
614     /* query actual device if possible, otherwise just trust the hint */
615     if (drv->bdrv_getlength) {
616         int64_t length = drv->bdrv_getlength(bs);
617         if (length < 0) {
618             return length;
619         }
620         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
621     }
622 
623     bs->total_sectors = hint;
624     return 0;
625 }
626 
627 /**
628  * Set open flags for a given discard mode
629  *
630  * Return 0 on success, -1 if the discard mode was invalid.
631  */
632 int bdrv_parse_discard_flags(const char *mode, int *flags)
633 {
634     *flags &= ~BDRV_O_UNMAP;
635 
636     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
637         /* do nothing */
638     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
639         *flags |= BDRV_O_UNMAP;
640     } else {
641         return -1;
642     }
643 
644     return 0;
645 }
646 
647 /**
648  * Set open flags for a given cache mode
649  *
650  * Return 0 on success, -1 if the cache mode was invalid.
651  */
652 int bdrv_parse_cache_flags(const char *mode, int *flags)
653 {
654     *flags &= ~BDRV_O_CACHE_MASK;
655 
656     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
657         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
658     } else if (!strcmp(mode, "directsync")) {
659         *flags |= BDRV_O_NOCACHE;
660     } else if (!strcmp(mode, "writeback")) {
661         *flags |= BDRV_O_CACHE_WB;
662     } else if (!strcmp(mode, "unsafe")) {
663         *flags |= BDRV_O_CACHE_WB;
664         *flags |= BDRV_O_NO_FLUSH;
665     } else if (!strcmp(mode, "writethrough")) {
666         /* this is the default */
667     } else {
668         return -1;
669     }
670 
671     return 0;
672 }
673 
674 /*
675  * Returns the flags that a temporary snapshot should get, based on the
676  * originally requested flags (the originally requested image will have flags
677  * like a backing file)
678  */
679 static int bdrv_temp_snapshot_flags(int flags)
680 {
681     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
682 }
683 
684 /*
685  * Returns the flags that bs->file should get, based on the given flags for
686  * the parent BDS
687  */
688 static int bdrv_inherited_flags(int flags)
689 {
690     /* Enable protocol handling, disable format probing for bs->file */
691     flags |= BDRV_O_PROTOCOL;
692 
693     /* Our block drivers take care to send flushes and respect unmap policy,
694      * so we can enable both unconditionally on lower layers. */
695     flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
696 
697     /* Clear flags that only apply to the top layer */
698     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
699 
700     return flags;
701 }
702 
703 /*
704  * Returns the flags that bs->backing_hd should get, based on the given flags
705  * for the parent BDS
706  */
707 static int bdrv_backing_flags(int flags)
708 {
709     /* backing files always opened read-only */
710     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
711 
712     /* snapshot=on is handled on the top layer */
713     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
714 
715     return flags;
716 }
717 
718 static int bdrv_open_flags(BlockDriverState *bs, int flags)
719 {
720     int open_flags = flags | BDRV_O_CACHE_WB;
721 
722     /*
723      * Clear flags that are internal to the block layer before opening the
724      * image.
725      */
726     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
727 
728     /*
729      * Snapshots should be writable.
730      */
731     if (flags & BDRV_O_TEMPORARY) {
732         open_flags |= BDRV_O_RDWR;
733     }
734 
735     return open_flags;
736 }
737 
738 static void bdrv_assign_node_name(BlockDriverState *bs,
739                                   const char *node_name,
740                                   Error **errp)
741 {
742     if (!node_name) {
743         return;
744     }
745 
746     /* Check for empty string or invalid characters */
747     if (!id_wellformed(node_name)) {
748         error_setg(errp, "Invalid node name");
749         return;
750     }
751 
752     /* takes care of avoiding namespaces collisions */
753     if (blk_by_name(node_name)) {
754         error_setg(errp, "node-name=%s is conflicting with a device id",
755                    node_name);
756         return;
757     }
758 
759     /* takes care of avoiding duplicates node names */
760     if (bdrv_find_node(node_name)) {
761         error_setg(errp, "Duplicate node name");
762         return;
763     }
764 
765     /* copy node name into the bs and insert it into the graph list */
766     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
767     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
768 }
769 
770 /*
771  * Common part for opening disk images and files
772  *
773  * Removes all processed options from *options.
774  */
775 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
776     QDict *options, int flags, BlockDriver *drv, Error **errp)
777 {
778     int ret, open_flags;
779     const char *filename;
780     const char *node_name = NULL;
781     Error *local_err = NULL;
782 
783     assert(drv != NULL);
784     assert(bs->file == NULL);
785     assert(options != NULL && bs->options != options);
786 
787     if (file != NULL) {
788         filename = file->filename;
789     } else {
790         filename = qdict_get_try_str(options, "filename");
791     }
792 
793     if (drv->bdrv_needs_filename && !filename) {
794         error_setg(errp, "The '%s' block driver requires a file name",
795                    drv->format_name);
796         return -EINVAL;
797     }
798 
799     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
800 
801     node_name = qdict_get_try_str(options, "node-name");
802     bdrv_assign_node_name(bs, node_name, &local_err);
803     if (local_err) {
804         error_propagate(errp, local_err);
805         return -EINVAL;
806     }
807     qdict_del(options, "node-name");
808 
809     /* bdrv_open() with directly using a protocol as drv. This layer is already
810      * opened, so assign it to bs (while file becomes a closed BlockDriverState)
811      * and return immediately. */
812     if (file != NULL && drv->bdrv_file_open) {
813         bdrv_swap(file, bs);
814         return 0;
815     }
816 
817     bs->open_flags = flags;
818     bs->guest_block_size = 512;
819     bs->request_alignment = 512;
820     bs->zero_beyond_eof = true;
821     open_flags = bdrv_open_flags(bs, flags);
822     bs->read_only = !(open_flags & BDRV_O_RDWR);
823 
824     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
825         error_setg(errp,
826                    !bs->read_only && bdrv_is_whitelisted(drv, true)
827                         ? "Driver '%s' can only be used for read-only devices"
828                         : "Driver '%s' is not whitelisted",
829                    drv->format_name);
830         return -ENOTSUP;
831     }
832 
833     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
834     if (flags & BDRV_O_COPY_ON_READ) {
835         if (!bs->read_only) {
836             bdrv_enable_copy_on_read(bs);
837         } else {
838             error_setg(errp, "Can't use copy-on-read on read-only device");
839             return -EINVAL;
840         }
841     }
842 
843     if (filename != NULL) {
844         pstrcpy(bs->filename, sizeof(bs->filename), filename);
845     } else {
846         bs->filename[0] = '\0';
847     }
848     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
849 
850     bs->drv = drv;
851     bs->opaque = g_malloc0(drv->instance_size);
852 
853     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
854 
855     /* Open the image, either directly or using a protocol */
856     if (drv->bdrv_file_open) {
857         assert(file == NULL);
858         assert(!drv->bdrv_needs_filename || filename != NULL);
859         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
860     } else {
861         if (file == NULL) {
862             error_setg(errp, "Can't use '%s' as a block driver for the "
863                        "protocol level", drv->format_name);
864             ret = -EINVAL;
865             goto free_and_fail;
866         }
867         bs->file = file;
868         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
869     }
870 
871     if (ret < 0) {
872         if (local_err) {
873             error_propagate(errp, local_err);
874         } else if (bs->filename[0]) {
875             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
876         } else {
877             error_setg_errno(errp, -ret, "Could not open image");
878         }
879         goto free_and_fail;
880     }
881 
882     if (bs->encrypted) {
883         error_report("Encrypted images are deprecated");
884         error_printf("Support for them will be removed in a future release.\n"
885                      "You can use 'qemu-img convert' to convert your image"
886                      " to an unencrypted one.\n");
887     }
888 
889     ret = refresh_total_sectors(bs, bs->total_sectors);
890     if (ret < 0) {
891         error_setg_errno(errp, -ret, "Could not refresh total sector count");
892         goto free_and_fail;
893     }
894 
895     bdrv_refresh_limits(bs, &local_err);
896     if (local_err) {
897         error_propagate(errp, local_err);
898         ret = -EINVAL;
899         goto free_and_fail;
900     }
901 
902     assert(bdrv_opt_mem_align(bs) != 0);
903     assert(bdrv_min_mem_align(bs) != 0);
904     assert((bs->request_alignment != 0) || bs->sg);
905     return 0;
906 
907 free_and_fail:
908     bs->file = NULL;
909     g_free(bs->opaque);
910     bs->opaque = NULL;
911     bs->drv = NULL;
912     return ret;
913 }
914 
915 static QDict *parse_json_filename(const char *filename, Error **errp)
916 {
917     QObject *options_obj;
918     QDict *options;
919     int ret;
920 
921     ret = strstart(filename, "json:", &filename);
922     assert(ret);
923 
924     options_obj = qobject_from_json(filename);
925     if (!options_obj) {
926         error_setg(errp, "Could not parse the JSON options");
927         return NULL;
928     }
929 
930     if (qobject_type(options_obj) != QTYPE_QDICT) {
931         qobject_decref(options_obj);
932         error_setg(errp, "Invalid JSON object given");
933         return NULL;
934     }
935 
936     options = qobject_to_qdict(options_obj);
937     qdict_flatten(options);
938 
939     return options;
940 }
941 
942 /*
943  * Fills in default options for opening images and converts the legacy
944  * filename/flags pair to option QDict entries.
945  */
946 static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
947                              BlockDriver *drv, Error **errp)
948 {
949     const char *filename = *pfilename;
950     const char *drvname;
951     bool protocol = flags & BDRV_O_PROTOCOL;
952     bool parse_filename = false;
953     Error *local_err = NULL;
954 
955     /* Parse json: pseudo-protocol */
956     if (filename && g_str_has_prefix(filename, "json:")) {
957         QDict *json_options = parse_json_filename(filename, &local_err);
958         if (local_err) {
959             error_propagate(errp, local_err);
960             return -EINVAL;
961         }
962 
963         /* Options given in the filename have lower priority than options
964          * specified directly */
965         qdict_join(*options, json_options, false);
966         QDECREF(json_options);
967         *pfilename = filename = NULL;
968     }
969 
970     /* Fetch the file name from the options QDict if necessary */
971     if (protocol && filename) {
972         if (!qdict_haskey(*options, "filename")) {
973             qdict_put(*options, "filename", qstring_from_str(filename));
974             parse_filename = true;
975         } else {
976             error_setg(errp, "Can't specify 'file' and 'filename' options at "
977                              "the same time");
978             return -EINVAL;
979         }
980     }
981 
982     /* Find the right block driver */
983     filename = qdict_get_try_str(*options, "filename");
984     drvname = qdict_get_try_str(*options, "driver");
985 
986     if (drv) {
987         if (drvname) {
988             error_setg(errp, "Driver specified twice");
989             return -EINVAL;
990         }
991         drvname = drv->format_name;
992         qdict_put(*options, "driver", qstring_from_str(drvname));
993     } else {
994         if (!drvname && protocol) {
995             if (filename) {
996                 drv = bdrv_find_protocol(filename, parse_filename, errp);
997                 if (!drv) {
998                     return -EINVAL;
999                 }
1000 
1001                 drvname = drv->format_name;
1002                 qdict_put(*options, "driver", qstring_from_str(drvname));
1003             } else {
1004                 error_setg(errp, "Must specify either driver or file");
1005                 return -EINVAL;
1006             }
1007         } else if (drvname) {
1008             drv = bdrv_find_format(drvname);
1009             if (!drv) {
1010                 error_setg(errp, "Unknown driver '%s'", drvname);
1011                 return -ENOENT;
1012             }
1013         }
1014     }
1015 
1016     assert(drv || !protocol);
1017 
1018     /* Driver-specific filename parsing */
1019     if (drv && drv->bdrv_parse_filename && parse_filename) {
1020         drv->bdrv_parse_filename(filename, *options, &local_err);
1021         if (local_err) {
1022             error_propagate(errp, local_err);
1023             return -EINVAL;
1024         }
1025 
1026         if (!drv->bdrv_needs_filename) {
1027             qdict_del(*options, "filename");
1028         }
1029     }
1030 
1031     return 0;
1032 }
1033 
1034 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1035 {
1036 
1037     if (bs->backing_hd) {
1038         assert(bs->backing_blocker);
1039         bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1040     } else if (backing_hd) {
1041         error_setg(&bs->backing_blocker,
1042                    "node is used as backing hd of '%s'",
1043                    bdrv_get_device_or_node_name(bs));
1044     }
1045 
1046     bs->backing_hd = backing_hd;
1047     if (!backing_hd) {
1048         error_free(bs->backing_blocker);
1049         bs->backing_blocker = NULL;
1050         goto out;
1051     }
1052     bs->open_flags &= ~BDRV_O_NO_BACKING;
1053     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1054     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1055             backing_hd->drv ? backing_hd->drv->format_name : "");
1056 
1057     bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1058     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1059     bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1060                     bs->backing_blocker);
1061 out:
1062     bdrv_refresh_limits(bs, NULL);
1063 }
1064 
1065 /*
1066  * Opens the backing file for a BlockDriverState if not yet open
1067  *
1068  * options is a QDict of options to pass to the block drivers, or NULL for an
1069  * empty set of options. The reference to the QDict is transferred to this
1070  * function (even on failure), so if the caller intends to reuse the dictionary,
1071  * it needs to use QINCREF() before calling bdrv_file_open.
1072  */
1073 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1074 {
1075     char *backing_filename = g_malloc0(PATH_MAX);
1076     int ret = 0;
1077     BlockDriverState *backing_hd;
1078     Error *local_err = NULL;
1079 
1080     if (bs->backing_hd != NULL) {
1081         QDECREF(options);
1082         goto free_exit;
1083     }
1084 
1085     /* NULL means an empty set of options */
1086     if (options == NULL) {
1087         options = qdict_new();
1088     }
1089 
1090     bs->open_flags &= ~BDRV_O_NO_BACKING;
1091     if (qdict_haskey(options, "file.filename")) {
1092         backing_filename[0] = '\0';
1093     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1094         QDECREF(options);
1095         goto free_exit;
1096     } else {
1097         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1098                                        &local_err);
1099         if (local_err) {
1100             ret = -EINVAL;
1101             error_propagate(errp, local_err);
1102             QDECREF(options);
1103             goto free_exit;
1104         }
1105     }
1106 
1107     if (!bs->drv || !bs->drv->supports_backing) {
1108         ret = -EINVAL;
1109         error_setg(errp, "Driver doesn't support backing files");
1110         QDECREF(options);
1111         goto free_exit;
1112     }
1113 
1114     backing_hd = bdrv_new();
1115 
1116     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1117         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1118     }
1119 
1120     assert(bs->backing_hd == NULL);
1121     ret = bdrv_open(&backing_hd,
1122                     *backing_filename ? backing_filename : NULL, NULL, options,
1123                     bdrv_backing_flags(bs->open_flags), NULL, &local_err);
1124     if (ret < 0) {
1125         bdrv_unref(backing_hd);
1126         backing_hd = NULL;
1127         bs->open_flags |= BDRV_O_NO_BACKING;
1128         error_setg(errp, "Could not open backing file: %s",
1129                    error_get_pretty(local_err));
1130         error_free(local_err);
1131         goto free_exit;
1132     }
1133     bdrv_set_backing_hd(bs, backing_hd);
1134 
1135 free_exit:
1136     g_free(backing_filename);
1137     return ret;
1138 }
1139 
1140 /*
1141  * Opens a disk image whose options are given as BlockdevRef in another block
1142  * device's options.
1143  *
1144  * If allow_none is true, no image will be opened if filename is false and no
1145  * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1146  *
1147  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1148  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1149  * itself, all options starting with "${bdref_key}." are considered part of the
1150  * BlockdevRef.
1151  *
1152  * The BlockdevRef will be removed from the options QDict.
1153  *
1154  * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1155  */
1156 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1157                     QDict *options, const char *bdref_key, int flags,
1158                     bool allow_none, Error **errp)
1159 {
1160     QDict *image_options;
1161     int ret;
1162     char *bdref_key_dot;
1163     const char *reference;
1164 
1165     assert(pbs);
1166     assert(*pbs == NULL);
1167 
1168     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1169     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1170     g_free(bdref_key_dot);
1171 
1172     reference = qdict_get_try_str(options, bdref_key);
1173     if (!filename && !reference && !qdict_size(image_options)) {
1174         if (allow_none) {
1175             ret = 0;
1176         } else {
1177             error_setg(errp, "A block device must be specified for \"%s\"",
1178                        bdref_key);
1179             ret = -EINVAL;
1180         }
1181         QDECREF(image_options);
1182         goto done;
1183     }
1184 
1185     ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
1186 
1187 done:
1188     qdict_del(options, bdref_key);
1189     return ret;
1190 }
1191 
1192 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1193 {
1194     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1195     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1196     int64_t total_size;
1197     QemuOpts *opts = NULL;
1198     QDict *snapshot_options;
1199     BlockDriverState *bs_snapshot;
1200     Error *local_err;
1201     int ret;
1202 
1203     /* if snapshot, we create a temporary backing file and open it
1204        instead of opening 'filename' directly */
1205 
1206     /* Get the required size from the image */
1207     total_size = bdrv_getlength(bs);
1208     if (total_size < 0) {
1209         ret = total_size;
1210         error_setg_errno(errp, -total_size, "Could not get image size");
1211         goto out;
1212     }
1213 
1214     /* Create the temporary image */
1215     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1216     if (ret < 0) {
1217         error_setg_errno(errp, -ret, "Could not get temporary filename");
1218         goto out;
1219     }
1220 
1221     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1222                             &error_abort);
1223     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1224     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1225     qemu_opts_del(opts);
1226     if (ret < 0) {
1227         error_setg_errno(errp, -ret, "Could not create temporary overlay "
1228                          "'%s': %s", tmp_filename,
1229                          error_get_pretty(local_err));
1230         error_free(local_err);
1231         goto out;
1232     }
1233 
1234     /* Prepare a new options QDict for the temporary file */
1235     snapshot_options = qdict_new();
1236     qdict_put(snapshot_options, "file.driver",
1237               qstring_from_str("file"));
1238     qdict_put(snapshot_options, "file.filename",
1239               qstring_from_str(tmp_filename));
1240 
1241     bs_snapshot = bdrv_new();
1242 
1243     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1244                     flags, &bdrv_qcow2, &local_err);
1245     if (ret < 0) {
1246         error_propagate(errp, local_err);
1247         goto out;
1248     }
1249 
1250     bdrv_append(bs_snapshot, bs);
1251 
1252 out:
1253     g_free(tmp_filename);
1254     return ret;
1255 }
1256 
1257 /*
1258  * Opens a disk image (raw, qcow2, vmdk, ...)
1259  *
1260  * options is a QDict of options to pass to the block drivers, or NULL for an
1261  * empty set of options. The reference to the QDict belongs to the block layer
1262  * after the call (even on failure), so if the caller intends to reuse the
1263  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1264  *
1265  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1266  * If it is not NULL, the referenced BDS will be reused.
1267  *
1268  * The reference parameter may be used to specify an existing block device which
1269  * should be opened. If specified, neither options nor a filename may be given,
1270  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1271  */
1272 int bdrv_open(BlockDriverState **pbs, const char *filename,
1273               const char *reference, QDict *options, int flags,
1274               BlockDriver *drv, Error **errp)
1275 {
1276     int ret;
1277     BlockDriverState *file = NULL, *bs;
1278     const char *drvname;
1279     Error *local_err = NULL;
1280     int snapshot_flags = 0;
1281 
1282     assert(pbs);
1283 
1284     if (reference) {
1285         bool options_non_empty = options ? qdict_size(options) : false;
1286         QDECREF(options);
1287 
1288         if (*pbs) {
1289             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1290                        "another block device");
1291             return -EINVAL;
1292         }
1293 
1294         if (filename || options_non_empty) {
1295             error_setg(errp, "Cannot reference an existing block device with "
1296                        "additional options or a new filename");
1297             return -EINVAL;
1298         }
1299 
1300         bs = bdrv_lookup_bs(reference, reference, errp);
1301         if (!bs) {
1302             return -ENODEV;
1303         }
1304         bdrv_ref(bs);
1305         *pbs = bs;
1306         return 0;
1307     }
1308 
1309     if (*pbs) {
1310         bs = *pbs;
1311     } else {
1312         bs = bdrv_new();
1313     }
1314 
1315     /* NULL means an empty set of options */
1316     if (options == NULL) {
1317         options = qdict_new();
1318     }
1319 
1320     ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
1321     if (local_err) {
1322         goto fail;
1323     }
1324 
1325     /* Find the right image format driver */
1326     drv = NULL;
1327     drvname = qdict_get_try_str(options, "driver");
1328     if (drvname) {
1329         drv = bdrv_find_format(drvname);
1330         qdict_del(options, "driver");
1331         if (!drv) {
1332             error_setg(errp, "Unknown driver: '%s'", drvname);
1333             ret = -EINVAL;
1334             goto fail;
1335         }
1336     }
1337 
1338     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1339     if (drv && !drv->bdrv_file_open) {
1340         /* If the user explicitly wants a format driver here, we'll need to add
1341          * another layer for the protocol in bs->file */
1342         flags &= ~BDRV_O_PROTOCOL;
1343     }
1344 
1345     bs->options = options;
1346     options = qdict_clone_shallow(options);
1347 
1348     /* Open image file without format layer */
1349     if ((flags & BDRV_O_PROTOCOL) == 0) {
1350         if (flags & BDRV_O_RDWR) {
1351             flags |= BDRV_O_ALLOW_RDWR;
1352         }
1353         if (flags & BDRV_O_SNAPSHOT) {
1354             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1355             flags = bdrv_backing_flags(flags);
1356         }
1357 
1358         assert(file == NULL);
1359         ret = bdrv_open_image(&file, filename, options, "file",
1360                               bdrv_inherited_flags(flags),
1361                               true, &local_err);
1362         if (ret < 0) {
1363             goto fail;
1364         }
1365     }
1366 
1367     /* Image format probing */
1368     bs->probed = !drv;
1369     if (!drv && file) {
1370         ret = find_image_format(file, filename, &drv, &local_err);
1371         if (ret < 0) {
1372             goto fail;
1373         }
1374     } else if (!drv) {
1375         error_setg(errp, "Must specify either driver or file");
1376         ret = -EINVAL;
1377         goto fail;
1378     }
1379 
1380     /* Open the image */
1381     ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1382     if (ret < 0) {
1383         goto fail;
1384     }
1385 
1386     if (file && (bs->file != file)) {
1387         bdrv_unref(file);
1388         file = NULL;
1389     }
1390 
1391     /* If there is a backing file, use it */
1392     if ((flags & BDRV_O_NO_BACKING) == 0) {
1393         QDict *backing_options;
1394 
1395         qdict_extract_subqdict(options, &backing_options, "backing.");
1396         ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1397         if (ret < 0) {
1398             goto close_and_fail;
1399         }
1400     }
1401 
1402     bdrv_refresh_filename(bs);
1403 
1404     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1405      * temporary snapshot afterwards. */
1406     if (snapshot_flags) {
1407         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1408         if (local_err) {
1409             goto close_and_fail;
1410         }
1411     }
1412 
1413     /* Check if any unknown options were used */
1414     if (options && (qdict_size(options) != 0)) {
1415         const QDictEntry *entry = qdict_first(options);
1416         if (flags & BDRV_O_PROTOCOL) {
1417             error_setg(errp, "Block protocol '%s' doesn't support the option "
1418                        "'%s'", drv->format_name, entry->key);
1419         } else {
1420             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1421                        "support the option '%s'", drv->format_name,
1422                        bdrv_get_device_name(bs), entry->key);
1423         }
1424 
1425         ret = -EINVAL;
1426         goto close_and_fail;
1427     }
1428 
1429     if (!bdrv_key_required(bs)) {
1430         if (bs->blk) {
1431             blk_dev_change_media_cb(bs->blk, true);
1432         }
1433     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1434                && !runstate_check(RUN_STATE_INMIGRATE)
1435                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1436         error_setg(errp,
1437                    "Guest must be stopped for opening of encrypted image");
1438         ret = -EBUSY;
1439         goto close_and_fail;
1440     }
1441 
1442     QDECREF(options);
1443     *pbs = bs;
1444     return 0;
1445 
1446 fail:
1447     if (file != NULL) {
1448         bdrv_unref(file);
1449     }
1450     QDECREF(bs->options);
1451     QDECREF(options);
1452     bs->options = NULL;
1453     if (!*pbs) {
1454         /* If *pbs is NULL, a new BDS has been created in this function and
1455            needs to be freed now. Otherwise, it does not need to be closed,
1456            since it has not really been opened yet. */
1457         bdrv_unref(bs);
1458     }
1459     if (local_err) {
1460         error_propagate(errp, local_err);
1461     }
1462     return ret;
1463 
1464 close_and_fail:
1465     /* See fail path, but now the BDS has to be always closed */
1466     if (*pbs) {
1467         bdrv_close(bs);
1468     } else {
1469         bdrv_unref(bs);
1470     }
1471     QDECREF(options);
1472     if (local_err) {
1473         error_propagate(errp, local_err);
1474     }
1475     return ret;
1476 }
1477 
1478 typedef struct BlockReopenQueueEntry {
1479      bool prepared;
1480      BDRVReopenState state;
1481      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1482 } BlockReopenQueueEntry;
1483 
1484 /*
1485  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1486  * reopen of multiple devices.
1487  *
1488  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1489  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1490  * be created and initialized. This newly created BlockReopenQueue should be
1491  * passed back in for subsequent calls that are intended to be of the same
1492  * atomic 'set'.
1493  *
1494  * bs is the BlockDriverState to add to the reopen queue.
1495  *
1496  * flags contains the open flags for the associated bs
1497  *
1498  * returns a pointer to bs_queue, which is either the newly allocated
1499  * bs_queue, or the existing bs_queue being used.
1500  *
1501  */
1502 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1503                                     BlockDriverState *bs, int flags)
1504 {
1505     assert(bs != NULL);
1506 
1507     BlockReopenQueueEntry *bs_entry;
1508     if (bs_queue == NULL) {
1509         bs_queue = g_new0(BlockReopenQueue, 1);
1510         QSIMPLEQ_INIT(bs_queue);
1511     }
1512 
1513     /* bdrv_open() masks this flag out */
1514     flags &= ~BDRV_O_PROTOCOL;
1515 
1516     if (bs->file) {
1517         bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
1518     }
1519 
1520     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1521     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1522 
1523     bs_entry->state.bs = bs;
1524     bs_entry->state.flags = flags;
1525 
1526     return bs_queue;
1527 }
1528 
1529 /*
1530  * Reopen multiple BlockDriverStates atomically & transactionally.
1531  *
1532  * The queue passed in (bs_queue) must have been built up previous
1533  * via bdrv_reopen_queue().
1534  *
1535  * Reopens all BDS specified in the queue, with the appropriate
1536  * flags.  All devices are prepared for reopen, and failure of any
1537  * device will cause all device changes to be abandonded, and intermediate
1538  * data cleaned up.
1539  *
1540  * If all devices prepare successfully, then the changes are committed
1541  * to all devices.
1542  *
1543  */
1544 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1545 {
1546     int ret = -1;
1547     BlockReopenQueueEntry *bs_entry, *next;
1548     Error *local_err = NULL;
1549 
1550     assert(bs_queue != NULL);
1551 
1552     bdrv_drain_all();
1553 
1554     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1555         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1556             error_propagate(errp, local_err);
1557             goto cleanup;
1558         }
1559         bs_entry->prepared = true;
1560     }
1561 
1562     /* If we reach this point, we have success and just need to apply the
1563      * changes
1564      */
1565     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1566         bdrv_reopen_commit(&bs_entry->state);
1567     }
1568 
1569     ret = 0;
1570 
1571 cleanup:
1572     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1573         if (ret && bs_entry->prepared) {
1574             bdrv_reopen_abort(&bs_entry->state);
1575         }
1576         g_free(bs_entry);
1577     }
1578     g_free(bs_queue);
1579     return ret;
1580 }
1581 
1582 
1583 /* Reopen a single BlockDriverState with the specified flags. */
1584 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1585 {
1586     int ret = -1;
1587     Error *local_err = NULL;
1588     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1589 
1590     ret = bdrv_reopen_multiple(queue, &local_err);
1591     if (local_err != NULL) {
1592         error_propagate(errp, local_err);
1593     }
1594     return ret;
1595 }
1596 
1597 
1598 /*
1599  * Prepares a BlockDriverState for reopen. All changes are staged in the
1600  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1601  * the block driver layer .bdrv_reopen_prepare()
1602  *
1603  * bs is the BlockDriverState to reopen
1604  * flags are the new open flags
1605  * queue is the reopen queue
1606  *
1607  * Returns 0 on success, non-zero on error.  On error errp will be set
1608  * as well.
1609  *
1610  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1611  * It is the responsibility of the caller to then call the abort() or
1612  * commit() for any other BDS that have been left in a prepare() state
1613  *
1614  */
1615 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1616                         Error **errp)
1617 {
1618     int ret = -1;
1619     Error *local_err = NULL;
1620     BlockDriver *drv;
1621 
1622     assert(reopen_state != NULL);
1623     assert(reopen_state->bs->drv != NULL);
1624     drv = reopen_state->bs->drv;
1625 
1626     /* if we are to stay read-only, do not allow permission change
1627      * to r/w */
1628     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1629         reopen_state->flags & BDRV_O_RDWR) {
1630         error_setg(errp, "Node '%s' is read only",
1631                    bdrv_get_device_or_node_name(reopen_state->bs));
1632         goto error;
1633     }
1634 
1635 
1636     ret = bdrv_flush(reopen_state->bs);
1637     if (ret) {
1638         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1639                   strerror(-ret));
1640         goto error;
1641     }
1642 
1643     if (drv->bdrv_reopen_prepare) {
1644         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1645         if (ret) {
1646             if (local_err != NULL) {
1647                 error_propagate(errp, local_err);
1648             } else {
1649                 error_setg(errp, "failed while preparing to reopen image '%s'",
1650                            reopen_state->bs->filename);
1651             }
1652             goto error;
1653         }
1654     } else {
1655         /* It is currently mandatory to have a bdrv_reopen_prepare()
1656          * handler for each supported drv. */
1657         error_setg(errp, "Block format '%s' used by node '%s' "
1658                    "does not support reopening files", drv->format_name,
1659                    bdrv_get_device_or_node_name(reopen_state->bs));
1660         ret = -1;
1661         goto error;
1662     }
1663 
1664     ret = 0;
1665 
1666 error:
1667     return ret;
1668 }
1669 
1670 /*
1671  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1672  * makes them final by swapping the staging BlockDriverState contents into
1673  * the active BlockDriverState contents.
1674  */
1675 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1676 {
1677     BlockDriver *drv;
1678 
1679     assert(reopen_state != NULL);
1680     drv = reopen_state->bs->drv;
1681     assert(drv != NULL);
1682 
1683     /* If there are any driver level actions to take */
1684     if (drv->bdrv_reopen_commit) {
1685         drv->bdrv_reopen_commit(reopen_state);
1686     }
1687 
1688     /* set BDS specific flags now */
1689     reopen_state->bs->open_flags         = reopen_state->flags;
1690     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1691                                               BDRV_O_CACHE_WB);
1692     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1693 
1694     bdrv_refresh_limits(reopen_state->bs, NULL);
1695 }
1696 
1697 /*
1698  * Abort the reopen, and delete and free the staged changes in
1699  * reopen_state
1700  */
1701 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1702 {
1703     BlockDriver *drv;
1704 
1705     assert(reopen_state != NULL);
1706     drv = reopen_state->bs->drv;
1707     assert(drv != NULL);
1708 
1709     if (drv->bdrv_reopen_abort) {
1710         drv->bdrv_reopen_abort(reopen_state);
1711     }
1712 }
1713 
1714 
1715 void bdrv_close(BlockDriverState *bs)
1716 {
1717     BdrvAioNotifier *ban, *ban_next;
1718 
1719     if (bs->job) {
1720         block_job_cancel_sync(bs->job);
1721     }
1722     bdrv_drain_all(); /* complete I/O */
1723     bdrv_flush(bs);
1724     bdrv_drain_all(); /* in case flush left pending I/O */
1725     notifier_list_notify(&bs->close_notifiers, bs);
1726 
1727     if (bs->drv) {
1728         if (bs->backing_hd) {
1729             BlockDriverState *backing_hd = bs->backing_hd;
1730             bdrv_set_backing_hd(bs, NULL);
1731             bdrv_unref(backing_hd);
1732         }
1733         bs->drv->bdrv_close(bs);
1734         g_free(bs->opaque);
1735         bs->opaque = NULL;
1736         bs->drv = NULL;
1737         bs->copy_on_read = 0;
1738         bs->backing_file[0] = '\0';
1739         bs->backing_format[0] = '\0';
1740         bs->total_sectors = 0;
1741         bs->encrypted = 0;
1742         bs->valid_key = 0;
1743         bs->sg = 0;
1744         bs->zero_beyond_eof = false;
1745         QDECREF(bs->options);
1746         bs->options = NULL;
1747         QDECREF(bs->full_open_options);
1748         bs->full_open_options = NULL;
1749 
1750         if (bs->file != NULL) {
1751             bdrv_unref(bs->file);
1752             bs->file = NULL;
1753         }
1754     }
1755 
1756     if (bs->blk) {
1757         blk_dev_change_media_cb(bs->blk, false);
1758     }
1759 
1760     /*throttling disk I/O limits*/
1761     if (bs->io_limits_enabled) {
1762         bdrv_io_limits_disable(bs);
1763     }
1764 
1765     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1766         g_free(ban);
1767     }
1768     QLIST_INIT(&bs->aio_notifiers);
1769 }
1770 
1771 void bdrv_close_all(void)
1772 {
1773     BlockDriverState *bs;
1774 
1775     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1776         AioContext *aio_context = bdrv_get_aio_context(bs);
1777 
1778         aio_context_acquire(aio_context);
1779         bdrv_close(bs);
1780         aio_context_release(aio_context);
1781     }
1782 }
1783 
1784 /* make a BlockDriverState anonymous by removing from bdrv_state and
1785  * graph_bdrv_state list.
1786    Also, NULL terminate the device_name to prevent double remove */
1787 void bdrv_make_anon(BlockDriverState *bs)
1788 {
1789     /*
1790      * Take care to remove bs from bdrv_states only when it's actually
1791      * in it.  Note that bs->device_list.tqe_prev is initially null,
1792      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
1793      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1794      * resetting it to null on remove.
1795      */
1796     if (bs->device_list.tqe_prev) {
1797         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1798         bs->device_list.tqe_prev = NULL;
1799     }
1800     if (bs->node_name[0] != '\0') {
1801         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1802     }
1803     bs->node_name[0] = '\0';
1804 }
1805 
1806 static void bdrv_rebind(BlockDriverState *bs)
1807 {
1808     if (bs->drv && bs->drv->bdrv_rebind) {
1809         bs->drv->bdrv_rebind(bs);
1810     }
1811 }
1812 
1813 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1814                                      BlockDriverState *bs_src)
1815 {
1816     /* move some fields that need to stay attached to the device */
1817 
1818     /* dev info */
1819     bs_dest->guest_block_size   = bs_src->guest_block_size;
1820     bs_dest->copy_on_read       = bs_src->copy_on_read;
1821 
1822     bs_dest->enable_write_cache = bs_src->enable_write_cache;
1823 
1824     /* i/o throttled req */
1825     memcpy(&bs_dest->throttle_state,
1826            &bs_src->throttle_state,
1827            sizeof(ThrottleState));
1828     bs_dest->throttled_reqs[0]  = bs_src->throttled_reqs[0];
1829     bs_dest->throttled_reqs[1]  = bs_src->throttled_reqs[1];
1830     bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
1831 
1832     /* r/w error */
1833     bs_dest->on_read_error      = bs_src->on_read_error;
1834     bs_dest->on_write_error     = bs_src->on_write_error;
1835 
1836     /* i/o status */
1837     bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
1838     bs_dest->iostatus           = bs_src->iostatus;
1839 
1840     /* dirty bitmap */
1841     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
1842 
1843     /* reference count */
1844     bs_dest->refcnt             = bs_src->refcnt;
1845 
1846     /* job */
1847     bs_dest->job                = bs_src->job;
1848 
1849     /* keep the same entry in bdrv_states */
1850     bs_dest->device_list = bs_src->device_list;
1851     bs_dest->blk = bs_src->blk;
1852 
1853     memcpy(bs_dest->op_blockers, bs_src->op_blockers,
1854            sizeof(bs_dest->op_blockers));
1855 }
1856 
1857 /*
1858  * Swap bs contents for two image chains while they are live,
1859  * while keeping required fields on the BlockDriverState that is
1860  * actually attached to a device.
1861  *
1862  * This will modify the BlockDriverState fields, and swap contents
1863  * between bs_new and bs_old. Both bs_new and bs_old are modified.
1864  *
1865  * bs_new must not be attached to a BlockBackend.
1866  *
1867  * This function does not create any image files.
1868  */
1869 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1870 {
1871     BlockDriverState tmp;
1872 
1873     /* The code needs to swap the node_name but simply swapping node_list won't
1874      * work so first remove the nodes from the graph list, do the swap then
1875      * insert them back if needed.
1876      */
1877     if (bs_new->node_name[0] != '\0') {
1878         QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
1879     }
1880     if (bs_old->node_name[0] != '\0') {
1881         QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
1882     }
1883 
1884     /* bs_new must be unattached and shouldn't have anything fancy enabled */
1885     assert(!bs_new->blk);
1886     assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
1887     assert(bs_new->job == NULL);
1888     assert(bs_new->io_limits_enabled == false);
1889     assert(!throttle_have_timer(&bs_new->throttle_state));
1890 
1891     tmp = *bs_new;
1892     *bs_new = *bs_old;
1893     *bs_old = tmp;
1894 
1895     /* there are some fields that should not be swapped, move them back */
1896     bdrv_move_feature_fields(&tmp, bs_old);
1897     bdrv_move_feature_fields(bs_old, bs_new);
1898     bdrv_move_feature_fields(bs_new, &tmp);
1899 
1900     /* bs_new must remain unattached */
1901     assert(!bs_new->blk);
1902 
1903     /* Check a few fields that should remain attached to the device */
1904     assert(bs_new->job == NULL);
1905     assert(bs_new->io_limits_enabled == false);
1906     assert(!throttle_have_timer(&bs_new->throttle_state));
1907 
1908     /* insert the nodes back into the graph node list if needed */
1909     if (bs_new->node_name[0] != '\0') {
1910         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
1911     }
1912     if (bs_old->node_name[0] != '\0') {
1913         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
1914     }
1915 
1916     bdrv_rebind(bs_new);
1917     bdrv_rebind(bs_old);
1918 }
1919 
1920 /*
1921  * Add new bs contents at the top of an image chain while the chain is
1922  * live, while keeping required fields on the top layer.
1923  *
1924  * This will modify the BlockDriverState fields, and swap contents
1925  * between bs_new and bs_top. Both bs_new and bs_top are modified.
1926  *
1927  * bs_new must not be attached to a BlockBackend.
1928  *
1929  * This function does not create any image files.
1930  */
1931 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1932 {
1933     bdrv_swap(bs_new, bs_top);
1934 
1935     /* The contents of 'tmp' will become bs_top, as we are
1936      * swapping bs_new and bs_top contents. */
1937     bdrv_set_backing_hd(bs_top, bs_new);
1938 }
1939 
1940 static void bdrv_delete(BlockDriverState *bs)
1941 {
1942     assert(!bs->job);
1943     assert(bdrv_op_blocker_is_empty(bs));
1944     assert(!bs->refcnt);
1945     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
1946 
1947     bdrv_close(bs);
1948 
1949     /* remove from list, if necessary */
1950     bdrv_make_anon(bs);
1951 
1952     g_free(bs);
1953 }
1954 
1955 /*
1956  * Run consistency checks on an image
1957  *
1958  * Returns 0 if the check could be completed (it doesn't mean that the image is
1959  * free of errors) or -errno when an internal error occurred. The results of the
1960  * check are stored in res.
1961  */
1962 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
1963 {
1964     if (bs->drv == NULL) {
1965         return -ENOMEDIUM;
1966     }
1967     if (bs->drv->bdrv_check == NULL) {
1968         return -ENOTSUP;
1969     }
1970 
1971     memset(res, 0, sizeof(*res));
1972     return bs->drv->bdrv_check(bs, res, fix);
1973 }
1974 
1975 #define COMMIT_BUF_SECTORS 2048
1976 
1977 /* commit COW file into the raw image */
1978 int bdrv_commit(BlockDriverState *bs)
1979 {
1980     BlockDriver *drv = bs->drv;
1981     int64_t sector, total_sectors, length, backing_length;
1982     int n, ro, open_flags;
1983     int ret = 0;
1984     uint8_t *buf = NULL;
1985 
1986     if (!drv)
1987         return -ENOMEDIUM;
1988 
1989     if (!bs->backing_hd) {
1990         return -ENOTSUP;
1991     }
1992 
1993     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
1994         bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
1995         return -EBUSY;
1996     }
1997 
1998     ro = bs->backing_hd->read_only;
1999     open_flags =  bs->backing_hd->open_flags;
2000 
2001     if (ro) {
2002         if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2003             return -EACCES;
2004         }
2005     }
2006 
2007     length = bdrv_getlength(bs);
2008     if (length < 0) {
2009         ret = length;
2010         goto ro_cleanup;
2011     }
2012 
2013     backing_length = bdrv_getlength(bs->backing_hd);
2014     if (backing_length < 0) {
2015         ret = backing_length;
2016         goto ro_cleanup;
2017     }
2018 
2019     /* If our top snapshot is larger than the backing file image,
2020      * grow the backing file image if possible.  If not possible,
2021      * we must return an error */
2022     if (length > backing_length) {
2023         ret = bdrv_truncate(bs->backing_hd, length);
2024         if (ret < 0) {
2025             goto ro_cleanup;
2026         }
2027     }
2028 
2029     total_sectors = length >> BDRV_SECTOR_BITS;
2030 
2031     /* qemu_try_blockalign() for bs will choose an alignment that works for
2032      * bs->backing_hd as well, so no need to compare the alignment manually. */
2033     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2034     if (buf == NULL) {
2035         ret = -ENOMEM;
2036         goto ro_cleanup;
2037     }
2038 
2039     for (sector = 0; sector < total_sectors; sector += n) {
2040         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2041         if (ret < 0) {
2042             goto ro_cleanup;
2043         }
2044         if (ret) {
2045             ret = bdrv_read(bs, sector, buf, n);
2046             if (ret < 0) {
2047                 goto ro_cleanup;
2048             }
2049 
2050             ret = bdrv_write(bs->backing_hd, sector, buf, n);
2051             if (ret < 0) {
2052                 goto ro_cleanup;
2053             }
2054         }
2055     }
2056 
2057     if (drv->bdrv_make_empty) {
2058         ret = drv->bdrv_make_empty(bs);
2059         if (ret < 0) {
2060             goto ro_cleanup;
2061         }
2062         bdrv_flush(bs);
2063     }
2064 
2065     /*
2066      * Make sure all data we wrote to the backing device is actually
2067      * stable on disk.
2068      */
2069     if (bs->backing_hd) {
2070         bdrv_flush(bs->backing_hd);
2071     }
2072 
2073     ret = 0;
2074 ro_cleanup:
2075     qemu_vfree(buf);
2076 
2077     if (ro) {
2078         /* ignoring error return here */
2079         bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2080     }
2081 
2082     return ret;
2083 }
2084 
2085 int bdrv_commit_all(void)
2086 {
2087     BlockDriverState *bs;
2088 
2089     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2090         AioContext *aio_context = bdrv_get_aio_context(bs);
2091 
2092         aio_context_acquire(aio_context);
2093         if (bs->drv && bs->backing_hd) {
2094             int ret = bdrv_commit(bs);
2095             if (ret < 0) {
2096                 aio_context_release(aio_context);
2097                 return ret;
2098             }
2099         }
2100         aio_context_release(aio_context);
2101     }
2102     return 0;
2103 }
2104 
2105 /*
2106  * Return values:
2107  * 0        - success
2108  * -EINVAL  - backing format specified, but no file
2109  * -ENOSPC  - can't update the backing file because no space is left in the
2110  *            image file header
2111  * -ENOTSUP - format driver doesn't support changing the backing file
2112  */
2113 int bdrv_change_backing_file(BlockDriverState *bs,
2114     const char *backing_file, const char *backing_fmt)
2115 {
2116     BlockDriver *drv = bs->drv;
2117     int ret;
2118 
2119     /* Backing file format doesn't make sense without a backing file */
2120     if (backing_fmt && !backing_file) {
2121         return -EINVAL;
2122     }
2123 
2124     if (drv->bdrv_change_backing_file != NULL) {
2125         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2126     } else {
2127         ret = -ENOTSUP;
2128     }
2129 
2130     if (ret == 0) {
2131         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2132         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2133     }
2134     return ret;
2135 }
2136 
2137 /*
2138  * Finds the image layer in the chain that has 'bs' as its backing file.
2139  *
2140  * active is the current topmost image.
2141  *
2142  * Returns NULL if bs is not found in active's image chain,
2143  * or if active == bs.
2144  *
2145  * Returns the bottommost base image if bs == NULL.
2146  */
2147 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2148                                     BlockDriverState *bs)
2149 {
2150     while (active && bs != active->backing_hd) {
2151         active = active->backing_hd;
2152     }
2153 
2154     return active;
2155 }
2156 
2157 /* Given a BDS, searches for the base layer. */
2158 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2159 {
2160     return bdrv_find_overlay(bs, NULL);
2161 }
2162 
2163 typedef struct BlkIntermediateStates {
2164     BlockDriverState *bs;
2165     QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2166 } BlkIntermediateStates;
2167 
2168 
2169 /*
2170  * Drops images above 'base' up to and including 'top', and sets the image
2171  * above 'top' to have base as its backing file.
2172  *
2173  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2174  * information in 'bs' can be properly updated.
2175  *
2176  * E.g., this will convert the following chain:
2177  * bottom <- base <- intermediate <- top <- active
2178  *
2179  * to
2180  *
2181  * bottom <- base <- active
2182  *
2183  * It is allowed for bottom==base, in which case it converts:
2184  *
2185  * base <- intermediate <- top <- active
2186  *
2187  * to
2188  *
2189  * base <- active
2190  *
2191  * If backing_file_str is non-NULL, it will be used when modifying top's
2192  * overlay image metadata.
2193  *
2194  * Error conditions:
2195  *  if active == top, that is considered an error
2196  *
2197  */
2198 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2199                            BlockDriverState *base, const char *backing_file_str)
2200 {
2201     BlockDriverState *intermediate;
2202     BlockDriverState *base_bs = NULL;
2203     BlockDriverState *new_top_bs = NULL;
2204     BlkIntermediateStates *intermediate_state, *next;
2205     int ret = -EIO;
2206 
2207     QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2208     QSIMPLEQ_INIT(&states_to_delete);
2209 
2210     if (!top->drv || !base->drv) {
2211         goto exit;
2212     }
2213 
2214     new_top_bs = bdrv_find_overlay(active, top);
2215 
2216     if (new_top_bs == NULL) {
2217         /* we could not find the image above 'top', this is an error */
2218         goto exit;
2219     }
2220 
2221     /* special case of new_top_bs->backing_hd already pointing to base - nothing
2222      * to do, no intermediate images */
2223     if (new_top_bs->backing_hd == base) {
2224         ret = 0;
2225         goto exit;
2226     }
2227 
2228     intermediate = top;
2229 
2230     /* now we will go down through the list, and add each BDS we find
2231      * into our deletion queue, until we hit the 'base'
2232      */
2233     while (intermediate) {
2234         intermediate_state = g_new0(BlkIntermediateStates, 1);
2235         intermediate_state->bs = intermediate;
2236         QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2237 
2238         if (intermediate->backing_hd == base) {
2239             base_bs = intermediate->backing_hd;
2240             break;
2241         }
2242         intermediate = intermediate->backing_hd;
2243     }
2244     if (base_bs == NULL) {
2245         /* something went wrong, we did not end at the base. safely
2246          * unravel everything, and exit with error */
2247         goto exit;
2248     }
2249 
2250     /* success - we can delete the intermediate states, and link top->base */
2251     backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2252     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2253                                    base_bs->drv ? base_bs->drv->format_name : "");
2254     if (ret) {
2255         goto exit;
2256     }
2257     bdrv_set_backing_hd(new_top_bs, base_bs);
2258 
2259     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2260         /* so that bdrv_close() does not recursively close the chain */
2261         bdrv_set_backing_hd(intermediate_state->bs, NULL);
2262         bdrv_unref(intermediate_state->bs);
2263     }
2264     ret = 0;
2265 
2266 exit:
2267     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2268         g_free(intermediate_state);
2269     }
2270     return ret;
2271 }
2272 
2273 /**
2274  * Truncate file to 'offset' bytes (needed only for file protocols)
2275  */
2276 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2277 {
2278     BlockDriver *drv = bs->drv;
2279     int ret;
2280     if (!drv)
2281         return -ENOMEDIUM;
2282     if (!drv->bdrv_truncate)
2283         return -ENOTSUP;
2284     if (bs->read_only)
2285         return -EACCES;
2286 
2287     ret = drv->bdrv_truncate(bs, offset);
2288     if (ret == 0) {
2289         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2290         bdrv_dirty_bitmap_truncate(bs);
2291         if (bs->blk) {
2292             blk_dev_resize_cb(bs->blk);
2293         }
2294     }
2295     return ret;
2296 }
2297 
2298 /**
2299  * Length of a allocated file in bytes. Sparse files are counted by actual
2300  * allocated space. Return < 0 if error or unknown.
2301  */
2302 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2303 {
2304     BlockDriver *drv = bs->drv;
2305     if (!drv) {
2306         return -ENOMEDIUM;
2307     }
2308     if (drv->bdrv_get_allocated_file_size) {
2309         return drv->bdrv_get_allocated_file_size(bs);
2310     }
2311     if (bs->file) {
2312         return bdrv_get_allocated_file_size(bs->file);
2313     }
2314     return -ENOTSUP;
2315 }
2316 
2317 /**
2318  * Return number of sectors on success, -errno on error.
2319  */
2320 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2321 {
2322     BlockDriver *drv = bs->drv;
2323 
2324     if (!drv)
2325         return -ENOMEDIUM;
2326 
2327     if (drv->has_variable_length) {
2328         int ret = refresh_total_sectors(bs, bs->total_sectors);
2329         if (ret < 0) {
2330             return ret;
2331         }
2332     }
2333     return bs->total_sectors;
2334 }
2335 
2336 /**
2337  * Return length in bytes on success, -errno on error.
2338  * The length is always a multiple of BDRV_SECTOR_SIZE.
2339  */
2340 int64_t bdrv_getlength(BlockDriverState *bs)
2341 {
2342     int64_t ret = bdrv_nb_sectors(bs);
2343 
2344     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2345     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2346 }
2347 
2348 /* return 0 as number of sectors if no device present or error */
2349 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2350 {
2351     int64_t nb_sectors = bdrv_nb_sectors(bs);
2352 
2353     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2354 }
2355 
2356 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2357                        BlockdevOnError on_write_error)
2358 {
2359     bs->on_read_error = on_read_error;
2360     bs->on_write_error = on_write_error;
2361 }
2362 
2363 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2364 {
2365     return is_read ? bs->on_read_error : bs->on_write_error;
2366 }
2367 
2368 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2369 {
2370     BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2371 
2372     switch (on_err) {
2373     case BLOCKDEV_ON_ERROR_ENOSPC:
2374         return (error == ENOSPC) ?
2375                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2376     case BLOCKDEV_ON_ERROR_STOP:
2377         return BLOCK_ERROR_ACTION_STOP;
2378     case BLOCKDEV_ON_ERROR_REPORT:
2379         return BLOCK_ERROR_ACTION_REPORT;
2380     case BLOCKDEV_ON_ERROR_IGNORE:
2381         return BLOCK_ERROR_ACTION_IGNORE;
2382     default:
2383         abort();
2384     }
2385 }
2386 
2387 static void send_qmp_error_event(BlockDriverState *bs,
2388                                  BlockErrorAction action,
2389                                  bool is_read, int error)
2390 {
2391     IoOperationType optype;
2392 
2393     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2394     qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2395                                    bdrv_iostatus_is_enabled(bs),
2396                                    error == ENOSPC, strerror(error),
2397                                    &error_abort);
2398 }
2399 
2400 /* This is done by device models because, while the block layer knows
2401  * about the error, it does not know whether an operation comes from
2402  * the device or the block layer (from a job, for example).
2403  */
2404 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2405                        bool is_read, int error)
2406 {
2407     assert(error >= 0);
2408 
2409     if (action == BLOCK_ERROR_ACTION_STOP) {
2410         /* First set the iostatus, so that "info block" returns an iostatus
2411          * that matches the events raised so far (an additional error iostatus
2412          * is fine, but not a lost one).
2413          */
2414         bdrv_iostatus_set_err(bs, error);
2415 
2416         /* Then raise the request to stop the VM and the event.
2417          * qemu_system_vmstop_request_prepare has two effects.  First,
2418          * it ensures that the STOP event always comes after the
2419          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
2420          * can observe the STOP event and do a "cont" before the STOP
2421          * event is issued, the VM will not stop.  In this case, vm_start()
2422          * also ensures that the STOP/RESUME pair of events is emitted.
2423          */
2424         qemu_system_vmstop_request_prepare();
2425         send_qmp_error_event(bs, action, is_read, error);
2426         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2427     } else {
2428         send_qmp_error_event(bs, action, is_read, error);
2429     }
2430 }
2431 
2432 int bdrv_is_read_only(BlockDriverState *bs)
2433 {
2434     return bs->read_only;
2435 }
2436 
2437 int bdrv_is_sg(BlockDriverState *bs)
2438 {
2439     return bs->sg;
2440 }
2441 
2442 int bdrv_enable_write_cache(BlockDriverState *bs)
2443 {
2444     return bs->enable_write_cache;
2445 }
2446 
2447 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2448 {
2449     bs->enable_write_cache = wce;
2450 
2451     /* so a reopen() will preserve wce */
2452     if (wce) {
2453         bs->open_flags |= BDRV_O_CACHE_WB;
2454     } else {
2455         bs->open_flags &= ~BDRV_O_CACHE_WB;
2456     }
2457 }
2458 
2459 int bdrv_is_encrypted(BlockDriverState *bs)
2460 {
2461     if (bs->backing_hd && bs->backing_hd->encrypted)
2462         return 1;
2463     return bs->encrypted;
2464 }
2465 
2466 int bdrv_key_required(BlockDriverState *bs)
2467 {
2468     BlockDriverState *backing_hd = bs->backing_hd;
2469 
2470     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2471         return 1;
2472     return (bs->encrypted && !bs->valid_key);
2473 }
2474 
2475 int bdrv_set_key(BlockDriverState *bs, const char *key)
2476 {
2477     int ret;
2478     if (bs->backing_hd && bs->backing_hd->encrypted) {
2479         ret = bdrv_set_key(bs->backing_hd, key);
2480         if (ret < 0)
2481             return ret;
2482         if (!bs->encrypted)
2483             return 0;
2484     }
2485     if (!bs->encrypted) {
2486         return -EINVAL;
2487     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2488         return -ENOMEDIUM;
2489     }
2490     ret = bs->drv->bdrv_set_key(bs, key);
2491     if (ret < 0) {
2492         bs->valid_key = 0;
2493     } else if (!bs->valid_key) {
2494         bs->valid_key = 1;
2495         if (bs->blk) {
2496             /* call the change callback now, we skipped it on open */
2497             blk_dev_change_media_cb(bs->blk, true);
2498         }
2499     }
2500     return ret;
2501 }
2502 
2503 /*
2504  * Provide an encryption key for @bs.
2505  * If @key is non-null:
2506  *     If @bs is not encrypted, fail.
2507  *     Else if the key is invalid, fail.
2508  *     Else set @bs's key to @key, replacing the existing key, if any.
2509  * If @key is null:
2510  *     If @bs is encrypted and still lacks a key, fail.
2511  *     Else do nothing.
2512  * On failure, store an error object through @errp if non-null.
2513  */
2514 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2515 {
2516     if (key) {
2517         if (!bdrv_is_encrypted(bs)) {
2518             error_setg(errp, "Node '%s' is not encrypted",
2519                       bdrv_get_device_or_node_name(bs));
2520         } else if (bdrv_set_key(bs, key) < 0) {
2521             error_set(errp, QERR_INVALID_PASSWORD);
2522         }
2523     } else {
2524         if (bdrv_key_required(bs)) {
2525             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2526                       "'%s' (%s) is encrypted",
2527                       bdrv_get_device_or_node_name(bs),
2528                       bdrv_get_encrypted_filename(bs));
2529         }
2530     }
2531 }
2532 
2533 const char *bdrv_get_format_name(BlockDriverState *bs)
2534 {
2535     return bs->drv ? bs->drv->format_name : NULL;
2536 }
2537 
2538 static int qsort_strcmp(const void *a, const void *b)
2539 {
2540     return strcmp(a, b);
2541 }
2542 
2543 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2544                          void *opaque)
2545 {
2546     BlockDriver *drv;
2547     int count = 0;
2548     int i;
2549     const char **formats = NULL;
2550 
2551     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2552         if (drv->format_name) {
2553             bool found = false;
2554             int i = count;
2555             while (formats && i && !found) {
2556                 found = !strcmp(formats[--i], drv->format_name);
2557             }
2558 
2559             if (!found) {
2560                 formats = g_renew(const char *, formats, count + 1);
2561                 formats[count++] = drv->format_name;
2562             }
2563         }
2564     }
2565 
2566     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2567 
2568     for (i = 0; i < count; i++) {
2569         it(opaque, formats[i]);
2570     }
2571 
2572     g_free(formats);
2573 }
2574 
2575 /* This function is to find a node in the bs graph */
2576 BlockDriverState *bdrv_find_node(const char *node_name)
2577 {
2578     BlockDriverState *bs;
2579 
2580     assert(node_name);
2581 
2582     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2583         if (!strcmp(node_name, bs->node_name)) {
2584             return bs;
2585         }
2586     }
2587     return NULL;
2588 }
2589 
2590 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2591 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2592 {
2593     BlockDeviceInfoList *list, *entry;
2594     BlockDriverState *bs;
2595 
2596     list = NULL;
2597     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2598         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2599         if (!info) {
2600             qapi_free_BlockDeviceInfoList(list);
2601             return NULL;
2602         }
2603         entry = g_malloc0(sizeof(*entry));
2604         entry->value = info;
2605         entry->next = list;
2606         list = entry;
2607     }
2608 
2609     return list;
2610 }
2611 
2612 BlockDriverState *bdrv_lookup_bs(const char *device,
2613                                  const char *node_name,
2614                                  Error **errp)
2615 {
2616     BlockBackend *blk;
2617     BlockDriverState *bs;
2618 
2619     if (device) {
2620         blk = blk_by_name(device);
2621 
2622         if (blk) {
2623             return blk_bs(blk);
2624         }
2625     }
2626 
2627     if (node_name) {
2628         bs = bdrv_find_node(node_name);
2629 
2630         if (bs) {
2631             return bs;
2632         }
2633     }
2634 
2635     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2636                      device ? device : "",
2637                      node_name ? node_name : "");
2638     return NULL;
2639 }
2640 
2641 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2642  * return false.  If either argument is NULL, return false. */
2643 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2644 {
2645     while (top && top != base) {
2646         top = top->backing_hd;
2647     }
2648 
2649     return top != NULL;
2650 }
2651 
2652 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2653 {
2654     if (!bs) {
2655         return QTAILQ_FIRST(&graph_bdrv_states);
2656     }
2657     return QTAILQ_NEXT(bs, node_list);
2658 }
2659 
2660 BlockDriverState *bdrv_next(BlockDriverState *bs)
2661 {
2662     if (!bs) {
2663         return QTAILQ_FIRST(&bdrv_states);
2664     }
2665     return QTAILQ_NEXT(bs, device_list);
2666 }
2667 
2668 const char *bdrv_get_node_name(const BlockDriverState *bs)
2669 {
2670     return bs->node_name;
2671 }
2672 
2673 /* TODO check what callers really want: bs->node_name or blk_name() */
2674 const char *bdrv_get_device_name(const BlockDriverState *bs)
2675 {
2676     return bs->blk ? blk_name(bs->blk) : "";
2677 }
2678 
2679 /* This can be used to identify nodes that might not have a device
2680  * name associated. Since node and device names live in the same
2681  * namespace, the result is unambiguous. The exception is if both are
2682  * absent, then this returns an empty (non-null) string. */
2683 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2684 {
2685     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2686 }
2687 
2688 int bdrv_get_flags(BlockDriverState *bs)
2689 {
2690     return bs->open_flags;
2691 }
2692 
2693 int bdrv_has_zero_init_1(BlockDriverState *bs)
2694 {
2695     return 1;
2696 }
2697 
2698 int bdrv_has_zero_init(BlockDriverState *bs)
2699 {
2700     assert(bs->drv);
2701 
2702     /* If BS is a copy on write image, it is initialized to
2703        the contents of the base image, which may not be zeroes.  */
2704     if (bs->backing_hd) {
2705         return 0;
2706     }
2707     if (bs->drv->bdrv_has_zero_init) {
2708         return bs->drv->bdrv_has_zero_init(bs);
2709     }
2710 
2711     /* safe default */
2712     return 0;
2713 }
2714 
2715 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2716 {
2717     BlockDriverInfo bdi;
2718 
2719     if (bs->backing_hd) {
2720         return false;
2721     }
2722 
2723     if (bdrv_get_info(bs, &bdi) == 0) {
2724         return bdi.unallocated_blocks_are_zero;
2725     }
2726 
2727     return false;
2728 }
2729 
2730 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2731 {
2732     BlockDriverInfo bdi;
2733 
2734     if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
2735         return false;
2736     }
2737 
2738     if (bdrv_get_info(bs, &bdi) == 0) {
2739         return bdi.can_write_zeroes_with_unmap;
2740     }
2741 
2742     return false;
2743 }
2744 
2745 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2746 {
2747     if (bs->backing_hd && bs->backing_hd->encrypted)
2748         return bs->backing_file;
2749     else if (bs->encrypted)
2750         return bs->filename;
2751     else
2752         return NULL;
2753 }
2754 
2755 void bdrv_get_backing_filename(BlockDriverState *bs,
2756                                char *filename, int filename_size)
2757 {
2758     pstrcpy(filename, filename_size, bs->backing_file);
2759 }
2760 
2761 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2762 {
2763     BlockDriver *drv = bs->drv;
2764     if (!drv)
2765         return -ENOMEDIUM;
2766     if (!drv->bdrv_get_info)
2767         return -ENOTSUP;
2768     memset(bdi, 0, sizeof(*bdi));
2769     return drv->bdrv_get_info(bs, bdi);
2770 }
2771 
2772 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2773 {
2774     BlockDriver *drv = bs->drv;
2775     if (drv && drv->bdrv_get_specific_info) {
2776         return drv->bdrv_get_specific_info(bs);
2777     }
2778     return NULL;
2779 }
2780 
2781 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2782 {
2783     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2784         return;
2785     }
2786 
2787     bs->drv->bdrv_debug_event(bs, event);
2788 }
2789 
2790 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2791                           const char *tag)
2792 {
2793     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2794         bs = bs->file;
2795     }
2796 
2797     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2798         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2799     }
2800 
2801     return -ENOTSUP;
2802 }
2803 
2804 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2805 {
2806     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2807         bs = bs->file;
2808     }
2809 
2810     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2811         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2812     }
2813 
2814     return -ENOTSUP;
2815 }
2816 
2817 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2818 {
2819     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2820         bs = bs->file;
2821     }
2822 
2823     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2824         return bs->drv->bdrv_debug_resume(bs, tag);
2825     }
2826 
2827     return -ENOTSUP;
2828 }
2829 
2830 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2831 {
2832     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2833         bs = bs->file;
2834     }
2835 
2836     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2837         return bs->drv->bdrv_debug_is_suspended(bs, tag);
2838     }
2839 
2840     return false;
2841 }
2842 
2843 int bdrv_is_snapshot(BlockDriverState *bs)
2844 {
2845     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2846 }
2847 
2848 /* backing_file can either be relative, or absolute, or a protocol.  If it is
2849  * relative, it must be relative to the chain.  So, passing in bs->filename
2850  * from a BDS as backing_file should not be done, as that may be relative to
2851  * the CWD rather than the chain. */
2852 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2853         const char *backing_file)
2854 {
2855     char *filename_full = NULL;
2856     char *backing_file_full = NULL;
2857     char *filename_tmp = NULL;
2858     int is_protocol = 0;
2859     BlockDriverState *curr_bs = NULL;
2860     BlockDriverState *retval = NULL;
2861 
2862     if (!bs || !bs->drv || !backing_file) {
2863         return NULL;
2864     }
2865 
2866     filename_full     = g_malloc(PATH_MAX);
2867     backing_file_full = g_malloc(PATH_MAX);
2868     filename_tmp      = g_malloc(PATH_MAX);
2869 
2870     is_protocol = path_has_protocol(backing_file);
2871 
2872     for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
2873 
2874         /* If either of the filename paths is actually a protocol, then
2875          * compare unmodified paths; otherwise make paths relative */
2876         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
2877             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
2878                 retval = curr_bs->backing_hd;
2879                 break;
2880             }
2881         } else {
2882             /* If not an absolute filename path, make it relative to the current
2883              * image's filename path */
2884             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2885                          backing_file);
2886 
2887             /* We are going to compare absolute pathnames */
2888             if (!realpath(filename_tmp, filename_full)) {
2889                 continue;
2890             }
2891 
2892             /* We need to make sure the backing filename we are comparing against
2893              * is relative to the current image filename (or absolute) */
2894             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2895                          curr_bs->backing_file);
2896 
2897             if (!realpath(filename_tmp, backing_file_full)) {
2898                 continue;
2899             }
2900 
2901             if (strcmp(backing_file_full, filename_full) == 0) {
2902                 retval = curr_bs->backing_hd;
2903                 break;
2904             }
2905         }
2906     }
2907 
2908     g_free(filename_full);
2909     g_free(backing_file_full);
2910     g_free(filename_tmp);
2911     return retval;
2912 }
2913 
2914 int bdrv_get_backing_file_depth(BlockDriverState *bs)
2915 {
2916     if (!bs->drv) {
2917         return 0;
2918     }
2919 
2920     if (!bs->backing_hd) {
2921         return 0;
2922     }
2923 
2924     return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
2925 }
2926 
2927 void bdrv_init(void)
2928 {
2929     module_call_init(MODULE_INIT_BLOCK);
2930 }
2931 
2932 void bdrv_init_with_whitelist(void)
2933 {
2934     use_bdrv_whitelist = 1;
2935     bdrv_init();
2936 }
2937 
2938 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
2939 {
2940     Error *local_err = NULL;
2941     int ret;
2942 
2943     if (!bs->drv)  {
2944         return;
2945     }
2946 
2947     if (!(bs->open_flags & BDRV_O_INCOMING)) {
2948         return;
2949     }
2950     bs->open_flags &= ~BDRV_O_INCOMING;
2951 
2952     if (bs->drv->bdrv_invalidate_cache) {
2953         bs->drv->bdrv_invalidate_cache(bs, &local_err);
2954     } else if (bs->file) {
2955         bdrv_invalidate_cache(bs->file, &local_err);
2956     }
2957     if (local_err) {
2958         error_propagate(errp, local_err);
2959         return;
2960     }
2961 
2962     ret = refresh_total_sectors(bs, bs->total_sectors);
2963     if (ret < 0) {
2964         error_setg_errno(errp, -ret, "Could not refresh total sector count");
2965         return;
2966     }
2967 }
2968 
2969 void bdrv_invalidate_cache_all(Error **errp)
2970 {
2971     BlockDriverState *bs;
2972     Error *local_err = NULL;
2973 
2974     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2975         AioContext *aio_context = bdrv_get_aio_context(bs);
2976 
2977         aio_context_acquire(aio_context);
2978         bdrv_invalidate_cache(bs, &local_err);
2979         aio_context_release(aio_context);
2980         if (local_err) {
2981             error_propagate(errp, local_err);
2982             return;
2983         }
2984     }
2985 }
2986 
2987 /**************************************************************/
2988 /* removable device support */
2989 
2990 /**
2991  * Return TRUE if the media is present
2992  */
2993 int bdrv_is_inserted(BlockDriverState *bs)
2994 {
2995     BlockDriver *drv = bs->drv;
2996 
2997     if (!drv)
2998         return 0;
2999     if (!drv->bdrv_is_inserted)
3000         return 1;
3001     return drv->bdrv_is_inserted(bs);
3002 }
3003 
3004 /**
3005  * Return whether the media changed since the last call to this
3006  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3007  */
3008 int bdrv_media_changed(BlockDriverState *bs)
3009 {
3010     BlockDriver *drv = bs->drv;
3011 
3012     if (drv && drv->bdrv_media_changed) {
3013         return drv->bdrv_media_changed(bs);
3014     }
3015     return -ENOTSUP;
3016 }
3017 
3018 /**
3019  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3020  */
3021 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3022 {
3023     BlockDriver *drv = bs->drv;
3024     const char *device_name;
3025 
3026     if (drv && drv->bdrv_eject) {
3027         drv->bdrv_eject(bs, eject_flag);
3028     }
3029 
3030     device_name = bdrv_get_device_name(bs);
3031     if (device_name[0] != '\0') {
3032         qapi_event_send_device_tray_moved(device_name,
3033                                           eject_flag, &error_abort);
3034     }
3035 }
3036 
3037 /**
3038  * Lock or unlock the media (if it is locked, the user won't be able
3039  * to eject it manually).
3040  */
3041 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3042 {
3043     BlockDriver *drv = bs->drv;
3044 
3045     trace_bdrv_lock_medium(bs, locked);
3046 
3047     if (drv && drv->bdrv_lock_medium) {
3048         drv->bdrv_lock_medium(bs, locked);
3049     }
3050 }
3051 
3052 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3053 {
3054     bs->guest_block_size = align;
3055 }
3056 
3057 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3058 {
3059     BdrvDirtyBitmap *bm;
3060 
3061     assert(name);
3062     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3063         if (bm->name && !strcmp(name, bm->name)) {
3064             return bm;
3065         }
3066     }
3067     return NULL;
3068 }
3069 
3070 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3071 {
3072     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3073     g_free(bitmap->name);
3074     bitmap->name = NULL;
3075 }
3076 
3077 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3078                                           uint32_t granularity,
3079                                           const char *name,
3080                                           Error **errp)
3081 {
3082     int64_t bitmap_size;
3083     BdrvDirtyBitmap *bitmap;
3084     uint32_t sector_granularity;
3085 
3086     assert((granularity & (granularity - 1)) == 0);
3087 
3088     if (name && bdrv_find_dirty_bitmap(bs, name)) {
3089         error_setg(errp, "Bitmap already exists: %s", name);
3090         return NULL;
3091     }
3092     sector_granularity = granularity >> BDRV_SECTOR_BITS;
3093     assert(sector_granularity);
3094     bitmap_size = bdrv_nb_sectors(bs);
3095     if (bitmap_size < 0) {
3096         error_setg_errno(errp, -bitmap_size, "could not get length of device");
3097         errno = -bitmap_size;
3098         return NULL;
3099     }
3100     bitmap = g_new0(BdrvDirtyBitmap, 1);
3101     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3102     bitmap->size = bitmap_size;
3103     bitmap->name = g_strdup(name);
3104     bitmap->disabled = false;
3105     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3106     return bitmap;
3107 }
3108 
3109 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3110 {
3111     return bitmap->successor;
3112 }
3113 
3114 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3115 {
3116     return !(bitmap->disabled || bitmap->successor);
3117 }
3118 
3119 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3120 {
3121     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3122         return DIRTY_BITMAP_STATUS_FROZEN;
3123     } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3124         return DIRTY_BITMAP_STATUS_DISABLED;
3125     } else {
3126         return DIRTY_BITMAP_STATUS_ACTIVE;
3127     }
3128 }
3129 
3130 /**
3131  * Create a successor bitmap destined to replace this bitmap after an operation.
3132  * Requires that the bitmap is not frozen and has no successor.
3133  */
3134 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3135                                        BdrvDirtyBitmap *bitmap, Error **errp)
3136 {
3137     uint64_t granularity;
3138     BdrvDirtyBitmap *child;
3139 
3140     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3141         error_setg(errp, "Cannot create a successor for a bitmap that is "
3142                    "currently frozen");
3143         return -1;
3144     }
3145     assert(!bitmap->successor);
3146 
3147     /* Create an anonymous successor */
3148     granularity = bdrv_dirty_bitmap_granularity(bitmap);
3149     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3150     if (!child) {
3151         return -1;
3152     }
3153 
3154     /* Successor will be on or off based on our current state. */
3155     child->disabled = bitmap->disabled;
3156 
3157     /* Install the successor and freeze the parent */
3158     bitmap->successor = child;
3159     return 0;
3160 }
3161 
3162 /**
3163  * For a bitmap with a successor, yield our name to the successor,
3164  * delete the old bitmap, and return a handle to the new bitmap.
3165  */
3166 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3167                                             BdrvDirtyBitmap *bitmap,
3168                                             Error **errp)
3169 {
3170     char *name;
3171     BdrvDirtyBitmap *successor = bitmap->successor;
3172 
3173     if (successor == NULL) {
3174         error_setg(errp, "Cannot relinquish control if "
3175                    "there's no successor present");
3176         return NULL;
3177     }
3178 
3179     name = bitmap->name;
3180     bitmap->name = NULL;
3181     successor->name = name;
3182     bitmap->successor = NULL;
3183     bdrv_release_dirty_bitmap(bs, bitmap);
3184 
3185     return successor;
3186 }
3187 
3188 /**
3189  * In cases of failure where we can no longer safely delete the parent,
3190  * we may wish to re-join the parent and child/successor.
3191  * The merged parent will be un-frozen, but not explicitly re-enabled.
3192  */
3193 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3194                                            BdrvDirtyBitmap *parent,
3195                                            Error **errp)
3196 {
3197     BdrvDirtyBitmap *successor = parent->successor;
3198 
3199     if (!successor) {
3200         error_setg(errp, "Cannot reclaim a successor when none is present");
3201         return NULL;
3202     }
3203 
3204     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3205         error_setg(errp, "Merging of parent and successor bitmap failed");
3206         return NULL;
3207     }
3208     bdrv_release_dirty_bitmap(bs, successor);
3209     parent->successor = NULL;
3210 
3211     return parent;
3212 }
3213 
3214 /**
3215  * Truncates _all_ bitmaps attached to a BDS.
3216  */
3217 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3218 {
3219     BdrvDirtyBitmap *bitmap;
3220     uint64_t size = bdrv_nb_sectors(bs);
3221 
3222     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3223         if (bdrv_dirty_bitmap_frozen(bitmap)) {
3224             continue;
3225         }
3226         hbitmap_truncate(bitmap->bitmap, size);
3227     }
3228 }
3229 
3230 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3231 {
3232     BdrvDirtyBitmap *bm, *next;
3233     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3234         if (bm == bitmap) {
3235             assert(!bdrv_dirty_bitmap_frozen(bm));
3236             QLIST_REMOVE(bitmap, list);
3237             hbitmap_free(bitmap->bitmap);
3238             g_free(bitmap->name);
3239             g_free(bitmap);
3240             return;
3241         }
3242     }
3243 }
3244 
3245 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3246 {
3247     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3248     bitmap->disabled = true;
3249 }
3250 
3251 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3252 {
3253     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3254     bitmap->disabled = false;
3255 }
3256 
3257 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3258 {
3259     BdrvDirtyBitmap *bm;
3260     BlockDirtyInfoList *list = NULL;
3261     BlockDirtyInfoList **plist = &list;
3262 
3263     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3264         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3265         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3266         info->count = bdrv_get_dirty_count(bm);
3267         info->granularity = bdrv_dirty_bitmap_granularity(bm);
3268         info->has_name = !!bm->name;
3269         info->name = g_strdup(bm->name);
3270         info->status = bdrv_dirty_bitmap_status(bm);
3271         entry->value = info;
3272         *plist = entry;
3273         plist = &entry->next;
3274     }
3275 
3276     return list;
3277 }
3278 
3279 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3280 {
3281     if (bitmap) {
3282         return hbitmap_get(bitmap->bitmap, sector);
3283     } else {
3284         return 0;
3285     }
3286 }
3287 
3288 /**
3289  * Chooses a default granularity based on the existing cluster size,
3290  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3291  * is no cluster size information available.
3292  */
3293 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3294 {
3295     BlockDriverInfo bdi;
3296     uint32_t granularity;
3297 
3298     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3299         granularity = MAX(4096, bdi.cluster_size);
3300         granularity = MIN(65536, granularity);
3301     } else {
3302         granularity = 65536;
3303     }
3304 
3305     return granularity;
3306 }
3307 
3308 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3309 {
3310     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3311 }
3312 
3313 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3314 {
3315     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3316 }
3317 
3318 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3319                            int64_t cur_sector, int nr_sectors)
3320 {
3321     assert(bdrv_dirty_bitmap_enabled(bitmap));
3322     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3323 }
3324 
3325 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3326                              int64_t cur_sector, int nr_sectors)
3327 {
3328     assert(bdrv_dirty_bitmap_enabled(bitmap));
3329     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3330 }
3331 
3332 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3333 {
3334     assert(bdrv_dirty_bitmap_enabled(bitmap));
3335     hbitmap_reset(bitmap->bitmap, 0, bitmap->size);
3336 }
3337 
3338 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3339                     int nr_sectors)
3340 {
3341     BdrvDirtyBitmap *bitmap;
3342     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3343         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3344             continue;
3345         }
3346         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3347     }
3348 }
3349 
3350 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3351                       int nr_sectors)
3352 {
3353     BdrvDirtyBitmap *bitmap;
3354     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3355         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3356             continue;
3357         }
3358         hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3359     }
3360 }
3361 
3362 /**
3363  * Advance an HBitmapIter to an arbitrary offset.
3364  */
3365 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3366 {
3367     assert(hbi->hb);
3368     hbitmap_iter_init(hbi, hbi->hb, offset);
3369 }
3370 
3371 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3372 {
3373     return hbitmap_count(bitmap->bitmap);
3374 }
3375 
3376 /* Get a reference to bs */
3377 void bdrv_ref(BlockDriverState *bs)
3378 {
3379     bs->refcnt++;
3380 }
3381 
3382 /* Release a previously grabbed reference to bs.
3383  * If after releasing, reference count is zero, the BlockDriverState is
3384  * deleted. */
3385 void bdrv_unref(BlockDriverState *bs)
3386 {
3387     if (!bs) {
3388         return;
3389     }
3390     assert(bs->refcnt > 0);
3391     if (--bs->refcnt == 0) {
3392         bdrv_delete(bs);
3393     }
3394 }
3395 
3396 struct BdrvOpBlocker {
3397     Error *reason;
3398     QLIST_ENTRY(BdrvOpBlocker) list;
3399 };
3400 
3401 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3402 {
3403     BdrvOpBlocker *blocker;
3404     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3405     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3406         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3407         if (errp) {
3408             error_setg(errp, "Node '%s' is busy: %s",
3409                        bdrv_get_device_or_node_name(bs),
3410                        error_get_pretty(blocker->reason));
3411         }
3412         return true;
3413     }
3414     return false;
3415 }
3416 
3417 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3418 {
3419     BdrvOpBlocker *blocker;
3420     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3421 
3422     blocker = g_new0(BdrvOpBlocker, 1);
3423     blocker->reason = reason;
3424     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3425 }
3426 
3427 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3428 {
3429     BdrvOpBlocker *blocker, *next;
3430     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3431     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3432         if (blocker->reason == reason) {
3433             QLIST_REMOVE(blocker, list);
3434             g_free(blocker);
3435         }
3436     }
3437 }
3438 
3439 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3440 {
3441     int i;
3442     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3443         bdrv_op_block(bs, i, reason);
3444     }
3445 }
3446 
3447 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3448 {
3449     int i;
3450     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3451         bdrv_op_unblock(bs, i, reason);
3452     }
3453 }
3454 
3455 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3456 {
3457     int i;
3458 
3459     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3460         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3461             return false;
3462         }
3463     }
3464     return true;
3465 }
3466 
3467 void bdrv_iostatus_enable(BlockDriverState *bs)
3468 {
3469     bs->iostatus_enabled = true;
3470     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3471 }
3472 
3473 /* The I/O status is only enabled if the drive explicitly
3474  * enables it _and_ the VM is configured to stop on errors */
3475 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3476 {
3477     return (bs->iostatus_enabled &&
3478            (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3479             bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
3480             bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3481 }
3482 
3483 void bdrv_iostatus_disable(BlockDriverState *bs)
3484 {
3485     bs->iostatus_enabled = false;
3486 }
3487 
3488 void bdrv_iostatus_reset(BlockDriverState *bs)
3489 {
3490     if (bdrv_iostatus_is_enabled(bs)) {
3491         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3492         if (bs->job) {
3493             block_job_iostatus_reset(bs->job);
3494         }
3495     }
3496 }
3497 
3498 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3499 {
3500     assert(bdrv_iostatus_is_enabled(bs));
3501     if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3502         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3503                                          BLOCK_DEVICE_IO_STATUS_FAILED;
3504     }
3505 }
3506 
3507 void bdrv_img_create(const char *filename, const char *fmt,
3508                      const char *base_filename, const char *base_fmt,
3509                      char *options, uint64_t img_size, int flags,
3510                      Error **errp, bool quiet)
3511 {
3512     QemuOptsList *create_opts = NULL;
3513     QemuOpts *opts = NULL;
3514     const char *backing_fmt, *backing_file;
3515     int64_t size;
3516     BlockDriver *drv, *proto_drv;
3517     BlockDriver *backing_drv = NULL;
3518     Error *local_err = NULL;
3519     int ret = 0;
3520 
3521     /* Find driver and parse its options */
3522     drv = bdrv_find_format(fmt);
3523     if (!drv) {
3524         error_setg(errp, "Unknown file format '%s'", fmt);
3525         return;
3526     }
3527 
3528     proto_drv = bdrv_find_protocol(filename, true, errp);
3529     if (!proto_drv) {
3530         return;
3531     }
3532 
3533     if (!drv->create_opts) {
3534         error_setg(errp, "Format driver '%s' does not support image creation",
3535                    drv->format_name);
3536         return;
3537     }
3538 
3539     if (!proto_drv->create_opts) {
3540         error_setg(errp, "Protocol driver '%s' does not support image creation",
3541                    proto_drv->format_name);
3542         return;
3543     }
3544 
3545     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3546     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3547 
3548     /* Create parameter list with default values */
3549     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3550     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3551 
3552     /* Parse -o options */
3553     if (options) {
3554         qemu_opts_do_parse(opts, options, NULL, &local_err);
3555         if (local_err) {
3556             error_report_err(local_err);
3557             local_err = NULL;
3558             error_setg(errp, "Invalid options for file format '%s'", fmt);
3559             goto out;
3560         }
3561     }
3562 
3563     if (base_filename) {
3564         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3565         if (local_err) {
3566             error_setg(errp, "Backing file not supported for file format '%s'",
3567                        fmt);
3568             goto out;
3569         }
3570     }
3571 
3572     if (base_fmt) {
3573         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3574         if (local_err) {
3575             error_setg(errp, "Backing file format not supported for file "
3576                              "format '%s'", fmt);
3577             goto out;
3578         }
3579     }
3580 
3581     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3582     if (backing_file) {
3583         if (!strcmp(filename, backing_file)) {
3584             error_setg(errp, "Error: Trying to create an image with the "
3585                              "same filename as the backing file");
3586             goto out;
3587         }
3588     }
3589 
3590     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3591     if (backing_fmt) {
3592         backing_drv = bdrv_find_format(backing_fmt);
3593         if (!backing_drv) {
3594             error_setg(errp, "Unknown backing file format '%s'",
3595                        backing_fmt);
3596             goto out;
3597         }
3598     }
3599 
3600     // The size for the image must always be specified, with one exception:
3601     // If we are using a backing file, we can obtain the size from there
3602     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3603     if (size == -1) {
3604         if (backing_file) {
3605             BlockDriverState *bs;
3606             char *full_backing = g_new0(char, PATH_MAX);
3607             int64_t size;
3608             int back_flags;
3609 
3610             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3611                                                          full_backing, PATH_MAX,
3612                                                          &local_err);
3613             if (local_err) {
3614                 g_free(full_backing);
3615                 goto out;
3616             }
3617 
3618             /* backing files always opened read-only */
3619             back_flags =
3620                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3621 
3622             bs = NULL;
3623             ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
3624                             backing_drv, &local_err);
3625             g_free(full_backing);
3626             if (ret < 0) {
3627                 goto out;
3628             }
3629             size = bdrv_getlength(bs);
3630             if (size < 0) {
3631                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3632                                  backing_file);
3633                 bdrv_unref(bs);
3634                 goto out;
3635             }
3636 
3637             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3638 
3639             bdrv_unref(bs);
3640         } else {
3641             error_setg(errp, "Image creation needs a size parameter");
3642             goto out;
3643         }
3644     }
3645 
3646     if (!quiet) {
3647         printf("Formatting '%s', fmt=%s", filename, fmt);
3648         qemu_opts_print(opts, " ");
3649         puts("");
3650     }
3651 
3652     ret = bdrv_create(drv, filename, opts, &local_err);
3653 
3654     if (ret == -EFBIG) {
3655         /* This is generally a better message than whatever the driver would
3656          * deliver (especially because of the cluster_size_hint), since that
3657          * is most probably not much different from "image too large". */
3658         const char *cluster_size_hint = "";
3659         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3660             cluster_size_hint = " (try using a larger cluster size)";
3661         }
3662         error_setg(errp, "The image size is too large for file format '%s'"
3663                    "%s", fmt, cluster_size_hint);
3664         error_free(local_err);
3665         local_err = NULL;
3666     }
3667 
3668 out:
3669     qemu_opts_del(opts);
3670     qemu_opts_free(create_opts);
3671     if (local_err) {
3672         error_propagate(errp, local_err);
3673     }
3674 }
3675 
3676 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3677 {
3678     return bs->aio_context;
3679 }
3680 
3681 void bdrv_detach_aio_context(BlockDriverState *bs)
3682 {
3683     BdrvAioNotifier *baf;
3684 
3685     if (!bs->drv) {
3686         return;
3687     }
3688 
3689     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3690         baf->detach_aio_context(baf->opaque);
3691     }
3692 
3693     if (bs->io_limits_enabled) {
3694         throttle_detach_aio_context(&bs->throttle_state);
3695     }
3696     if (bs->drv->bdrv_detach_aio_context) {
3697         bs->drv->bdrv_detach_aio_context(bs);
3698     }
3699     if (bs->file) {
3700         bdrv_detach_aio_context(bs->file);
3701     }
3702     if (bs->backing_hd) {
3703         bdrv_detach_aio_context(bs->backing_hd);
3704     }
3705 
3706     bs->aio_context = NULL;
3707 }
3708 
3709 void bdrv_attach_aio_context(BlockDriverState *bs,
3710                              AioContext *new_context)
3711 {
3712     BdrvAioNotifier *ban;
3713 
3714     if (!bs->drv) {
3715         return;
3716     }
3717 
3718     bs->aio_context = new_context;
3719 
3720     if (bs->backing_hd) {
3721         bdrv_attach_aio_context(bs->backing_hd, new_context);
3722     }
3723     if (bs->file) {
3724         bdrv_attach_aio_context(bs->file, new_context);
3725     }
3726     if (bs->drv->bdrv_attach_aio_context) {
3727         bs->drv->bdrv_attach_aio_context(bs, new_context);
3728     }
3729     if (bs->io_limits_enabled) {
3730         throttle_attach_aio_context(&bs->throttle_state, new_context);
3731     }
3732 
3733     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3734         ban->attached_aio_context(new_context, ban->opaque);
3735     }
3736 }
3737 
3738 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3739 {
3740     bdrv_drain_all(); /* ensure there are no in-flight requests */
3741 
3742     bdrv_detach_aio_context(bs);
3743 
3744     /* This function executes in the old AioContext so acquire the new one in
3745      * case it runs in a different thread.
3746      */
3747     aio_context_acquire(new_context);
3748     bdrv_attach_aio_context(bs, new_context);
3749     aio_context_release(new_context);
3750 }
3751 
3752 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3753         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3754         void (*detach_aio_context)(void *opaque), void *opaque)
3755 {
3756     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3757     *ban = (BdrvAioNotifier){
3758         .attached_aio_context = attached_aio_context,
3759         .detach_aio_context   = detach_aio_context,
3760         .opaque               = opaque
3761     };
3762 
3763     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3764 }
3765 
3766 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3767                                       void (*attached_aio_context)(AioContext *,
3768                                                                    void *),
3769                                       void (*detach_aio_context)(void *),
3770                                       void *opaque)
3771 {
3772     BdrvAioNotifier *ban, *ban_next;
3773 
3774     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3775         if (ban->attached_aio_context == attached_aio_context &&
3776             ban->detach_aio_context   == detach_aio_context   &&
3777             ban->opaque               == opaque)
3778         {
3779             QLIST_REMOVE(ban, list);
3780             g_free(ban);
3781 
3782             return;
3783         }
3784     }
3785 
3786     abort();
3787 }
3788 
3789 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3790                        BlockDriverAmendStatusCB *status_cb)
3791 {
3792     if (!bs->drv->bdrv_amend_options) {
3793         return -ENOTSUP;
3794     }
3795     return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3796 }
3797 
3798 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3799  * of block filter and by bdrv_is_first_non_filter.
3800  * It is used to test if the given bs is the candidate or recurse more in the
3801  * node graph.
3802  */
3803 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3804                                       BlockDriverState *candidate)
3805 {
3806     /* return false if basic checks fails */
3807     if (!bs || !bs->drv) {
3808         return false;
3809     }
3810 
3811     /* the code reached a non block filter driver -> check if the bs is
3812      * the same as the candidate. It's the recursion termination condition.
3813      */
3814     if (!bs->drv->is_filter) {
3815         return bs == candidate;
3816     }
3817     /* Down this path the driver is a block filter driver */
3818 
3819     /* If the block filter recursion method is defined use it to recurse down
3820      * the node graph.
3821      */
3822     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3823         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3824     }
3825 
3826     /* the driver is a block filter but don't allow to recurse -> return false
3827      */
3828     return false;
3829 }
3830 
3831 /* This function checks if the candidate is the first non filter bs down it's
3832  * bs chain. Since we don't have pointers to parents it explore all bs chains
3833  * from the top. Some filters can choose not to pass down the recursion.
3834  */
3835 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3836 {
3837     BlockDriverState *bs;
3838 
3839     /* walk down the bs forest recursively */
3840     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3841         bool perm;
3842 
3843         /* try to recurse in this top level bs */
3844         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3845 
3846         /* candidate is the first non filter */
3847         if (perm) {
3848             return true;
3849         }
3850     }
3851 
3852     return false;
3853 }
3854 
3855 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
3856 {
3857     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3858     AioContext *aio_context;
3859 
3860     if (!to_replace_bs) {
3861         error_setg(errp, "Node name '%s' not found", node_name);
3862         return NULL;
3863     }
3864 
3865     aio_context = bdrv_get_aio_context(to_replace_bs);
3866     aio_context_acquire(aio_context);
3867 
3868     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3869         to_replace_bs = NULL;
3870         goto out;
3871     }
3872 
3873     /* We don't want arbitrary node of the BDS chain to be replaced only the top
3874      * most non filter in order to prevent data corruption.
3875      * Another benefit is that this tests exclude backing files which are
3876      * blocked by the backing blockers.
3877      */
3878     if (!bdrv_is_first_non_filter(to_replace_bs)) {
3879         error_setg(errp, "Only top most non filter can be replaced");
3880         to_replace_bs = NULL;
3881         goto out;
3882     }
3883 
3884 out:
3885     aio_context_release(aio_context);
3886     return to_replace_bs;
3887 }
3888 
3889 static bool append_open_options(QDict *d, BlockDriverState *bs)
3890 {
3891     const QDictEntry *entry;
3892     bool found_any = false;
3893 
3894     for (entry = qdict_first(bs->options); entry;
3895          entry = qdict_next(bs->options, entry))
3896     {
3897         /* Only take options for this level and exclude all non-driver-specific
3898          * options */
3899         if (!strchr(qdict_entry_key(entry), '.') &&
3900             strcmp(qdict_entry_key(entry), "node-name"))
3901         {
3902             qobject_incref(qdict_entry_value(entry));
3903             qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3904             found_any = true;
3905         }
3906     }
3907 
3908     return found_any;
3909 }
3910 
3911 /* Updates the following BDS fields:
3912  *  - exact_filename: A filename which may be used for opening a block device
3913  *                    which (mostly) equals the given BDS (even without any
3914  *                    other options; so reading and writing must return the same
3915  *                    results, but caching etc. may be different)
3916  *  - full_open_options: Options which, when given when opening a block device
3917  *                       (without a filename), result in a BDS (mostly)
3918  *                       equalling the given one
3919  *  - filename: If exact_filename is set, it is copied here. Otherwise,
3920  *              full_open_options is converted to a JSON object, prefixed with
3921  *              "json:" (for use through the JSON pseudo protocol) and put here.
3922  */
3923 void bdrv_refresh_filename(BlockDriverState *bs)
3924 {
3925     BlockDriver *drv = bs->drv;
3926     QDict *opts;
3927 
3928     if (!drv) {
3929         return;
3930     }
3931 
3932     /* This BDS's file name will most probably depend on its file's name, so
3933      * refresh that first */
3934     if (bs->file) {
3935         bdrv_refresh_filename(bs->file);
3936     }
3937 
3938     if (drv->bdrv_refresh_filename) {
3939         /* Obsolete information is of no use here, so drop the old file name
3940          * information before refreshing it */
3941         bs->exact_filename[0] = '\0';
3942         if (bs->full_open_options) {
3943             QDECREF(bs->full_open_options);
3944             bs->full_open_options = NULL;
3945         }
3946 
3947         drv->bdrv_refresh_filename(bs);
3948     } else if (bs->file) {
3949         /* Try to reconstruct valid information from the underlying file */
3950         bool has_open_options;
3951 
3952         bs->exact_filename[0] = '\0';
3953         if (bs->full_open_options) {
3954             QDECREF(bs->full_open_options);
3955             bs->full_open_options = NULL;
3956         }
3957 
3958         opts = qdict_new();
3959         has_open_options = append_open_options(opts, bs);
3960 
3961         /* If no specific options have been given for this BDS, the filename of
3962          * the underlying file should suffice for this one as well */
3963         if (bs->file->exact_filename[0] && !has_open_options) {
3964             strcpy(bs->exact_filename, bs->file->exact_filename);
3965         }
3966         /* Reconstructing the full options QDict is simple for most format block
3967          * drivers, as long as the full options are known for the underlying
3968          * file BDS. The full options QDict of that file BDS should somehow
3969          * contain a representation of the filename, therefore the following
3970          * suffices without querying the (exact_)filename of this BDS. */
3971         if (bs->file->full_open_options) {
3972             qdict_put_obj(opts, "driver",
3973                           QOBJECT(qstring_from_str(drv->format_name)));
3974             QINCREF(bs->file->full_open_options);
3975             qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
3976 
3977             bs->full_open_options = opts;
3978         } else {
3979             QDECREF(opts);
3980         }
3981     } else if (!bs->full_open_options && qdict_size(bs->options)) {
3982         /* There is no underlying file BDS (at least referenced by BDS.file),
3983          * so the full options QDict should be equal to the options given
3984          * specifically for this block device when it was opened (plus the
3985          * driver specification).
3986          * Because those options don't change, there is no need to update
3987          * full_open_options when it's already set. */
3988 
3989         opts = qdict_new();
3990         append_open_options(opts, bs);
3991         qdict_put_obj(opts, "driver",
3992                       QOBJECT(qstring_from_str(drv->format_name)));
3993 
3994         if (bs->exact_filename[0]) {
3995             /* This may not work for all block protocol drivers (some may
3996              * require this filename to be parsed), but we have to find some
3997              * default solution here, so just include it. If some block driver
3998              * does not support pure options without any filename at all or
3999              * needs some special format of the options QDict, it needs to
4000              * implement the driver-specific bdrv_refresh_filename() function.
4001              */
4002             qdict_put_obj(opts, "filename",
4003                           QOBJECT(qstring_from_str(bs->exact_filename)));
4004         }
4005 
4006         bs->full_open_options = opts;
4007     }
4008 
4009     if (bs->exact_filename[0]) {
4010         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4011     } else if (bs->full_open_options) {
4012         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4013         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4014                  qstring_get_str(json));
4015         QDECREF(json);
4016     }
4017 }
4018 
4019 /* This accessor function purpose is to allow the device models to access the
4020  * BlockAcctStats structure embedded inside a BlockDriverState without being
4021  * aware of the BlockDriverState structure layout.
4022  * It will go away when the BlockAcctStats structure will be moved inside
4023  * the device models.
4024  */
4025 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
4026 {
4027     return &bs->stats;
4028 }
4029