xref: /openbmc/qemu/block.c (revision cd2bc889e5b30c69926fc1511b6522e7cb4c705d)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qjson.h"
31 #include "sysemu/block-backend.h"
32 #include "sysemu/sysemu.h"
33 #include "qemu/notify.h"
34 #include "block/coroutine.h"
35 #include "block/qapi.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
38 #include "qapi-event.h"
39 
40 #ifdef CONFIG_BSD
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <sys/ioctl.h>
44 #include <sys/queue.h>
45 #ifndef __DragonFly__
46 #include <sys/disk.h>
47 #endif
48 #endif
49 
50 #ifdef _WIN32
51 #include <windows.h>
52 #endif
53 
54 /**
55  * A BdrvDirtyBitmap can be in three possible states:
56  * (1) successor is NULL and disabled is false: full r/w mode
57  * (2) successor is NULL and disabled is true: read only mode ("disabled")
58  * (3) successor is set: frozen mode.
59  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
60  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
61  */
62 struct BdrvDirtyBitmap {
63     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
64     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
65     char *name;                 /* Optional non-empty unique ID */
66     int64_t size;               /* Size of the bitmap (Number of sectors) */
67     bool disabled;              /* Bitmap is read-only */
68     QLIST_ENTRY(BdrvDirtyBitmap) list;
69 };
70 
71 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
72 
73 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
74     QTAILQ_HEAD_INITIALIZER(bdrv_states);
75 
76 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
77     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
78 
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80     QLIST_HEAD_INITIALIZER(bdrv_drivers);
81 
82 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
83 /* If non-zero, use only whitelisted block drivers */
84 static int use_bdrv_whitelist;
85 
86 #ifdef _WIN32
87 static int is_windows_drive_prefix(const char *filename)
88 {
89     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
90              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
91             filename[1] == ':');
92 }
93 
94 int is_windows_drive(const char *filename)
95 {
96     if (is_windows_drive_prefix(filename) &&
97         filename[2] == '\0')
98         return 1;
99     if (strstart(filename, "\\\\.\\", NULL) ||
100         strstart(filename, "//./", NULL))
101         return 1;
102     return 0;
103 }
104 #endif
105 
106 size_t bdrv_opt_mem_align(BlockDriverState *bs)
107 {
108     if (!bs || !bs->drv) {
109         /* 4k should be on the safe side */
110         return 4096;
111     }
112 
113     return bs->bl.opt_mem_alignment;
114 }
115 
116 /* check if the path starts with "<protocol>:" */
117 int path_has_protocol(const char *path)
118 {
119     const char *p;
120 
121 #ifdef _WIN32
122     if (is_windows_drive(path) ||
123         is_windows_drive_prefix(path)) {
124         return 0;
125     }
126     p = path + strcspn(path, ":/\\");
127 #else
128     p = path + strcspn(path, ":/");
129 #endif
130 
131     return *p == ':';
132 }
133 
134 int path_is_absolute(const char *path)
135 {
136 #ifdef _WIN32
137     /* specific case for names like: "\\.\d:" */
138     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
139         return 1;
140     }
141     return (*path == '/' || *path == '\\');
142 #else
143     return (*path == '/');
144 #endif
145 }
146 
147 /* if filename is absolute, just copy it to dest. Otherwise, build a
148    path to it by considering it is relative to base_path. URL are
149    supported. */
150 void path_combine(char *dest, int dest_size,
151                   const char *base_path,
152                   const char *filename)
153 {
154     const char *p, *p1;
155     int len;
156 
157     if (dest_size <= 0)
158         return;
159     if (path_is_absolute(filename)) {
160         pstrcpy(dest, dest_size, filename);
161     } else {
162         p = strchr(base_path, ':');
163         if (p)
164             p++;
165         else
166             p = base_path;
167         p1 = strrchr(base_path, '/');
168 #ifdef _WIN32
169         {
170             const char *p2;
171             p2 = strrchr(base_path, '\\');
172             if (!p1 || p2 > p1)
173                 p1 = p2;
174         }
175 #endif
176         if (p1)
177             p1++;
178         else
179             p1 = base_path;
180         if (p1 > p)
181             p = p1;
182         len = p - base_path;
183         if (len > dest_size - 1)
184             len = dest_size - 1;
185         memcpy(dest, base_path, len);
186         dest[len] = '\0';
187         pstrcat(dest, dest_size, filename);
188     }
189 }
190 
191 void bdrv_get_full_backing_filename_from_filename(const char *backed,
192                                                   const char *backing,
193                                                   char *dest, size_t sz,
194                                                   Error **errp)
195 {
196     if (backing[0] == '\0' || path_has_protocol(backing) ||
197         path_is_absolute(backing))
198     {
199         pstrcpy(dest, sz, backing);
200     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
201         error_setg(errp, "Cannot use relative backing file names for '%s'",
202                    backed);
203     } else {
204         path_combine(dest, sz, backed, backing);
205     }
206 }
207 
208 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
209                                     Error **errp)
210 {
211     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
212 
213     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
214                                                  dest, sz, errp);
215 }
216 
217 void bdrv_register(BlockDriver *bdrv)
218 {
219     bdrv_setup_io_funcs(bdrv);
220 
221     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
222 }
223 
224 BlockDriverState *bdrv_new_root(void)
225 {
226     BlockDriverState *bs = bdrv_new();
227 
228     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
229     return bs;
230 }
231 
232 BlockDriverState *bdrv_new(void)
233 {
234     BlockDriverState *bs;
235     int i;
236 
237     bs = g_new0(BlockDriverState, 1);
238     QLIST_INIT(&bs->dirty_bitmaps);
239     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
240         QLIST_INIT(&bs->op_blockers[i]);
241     }
242     bdrv_iostatus_disable(bs);
243     notifier_list_init(&bs->close_notifiers);
244     notifier_with_return_list_init(&bs->before_write_notifiers);
245     qemu_co_queue_init(&bs->throttled_reqs[0]);
246     qemu_co_queue_init(&bs->throttled_reqs[1]);
247     bs->refcnt = 1;
248     bs->aio_context = qemu_get_aio_context();
249 
250     return bs;
251 }
252 
253 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
254 {
255     notifier_list_add(&bs->close_notifiers, notify);
256 }
257 
258 BlockDriver *bdrv_find_format(const char *format_name)
259 {
260     BlockDriver *drv1;
261     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
262         if (!strcmp(drv1->format_name, format_name)) {
263             return drv1;
264         }
265     }
266     return NULL;
267 }
268 
269 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
270 {
271     static const char *whitelist_rw[] = {
272         CONFIG_BDRV_RW_WHITELIST
273     };
274     static const char *whitelist_ro[] = {
275         CONFIG_BDRV_RO_WHITELIST
276     };
277     const char **p;
278 
279     if (!whitelist_rw[0] && !whitelist_ro[0]) {
280         return 1;               /* no whitelist, anything goes */
281     }
282 
283     for (p = whitelist_rw; *p; p++) {
284         if (!strcmp(drv->format_name, *p)) {
285             return 1;
286         }
287     }
288     if (read_only) {
289         for (p = whitelist_ro; *p; p++) {
290             if (!strcmp(drv->format_name, *p)) {
291                 return 1;
292             }
293         }
294     }
295     return 0;
296 }
297 
298 BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
299                                           bool read_only)
300 {
301     BlockDriver *drv = bdrv_find_format(format_name);
302     return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
303 }
304 
305 typedef struct CreateCo {
306     BlockDriver *drv;
307     char *filename;
308     QemuOpts *opts;
309     int ret;
310     Error *err;
311 } CreateCo;
312 
313 static void coroutine_fn bdrv_create_co_entry(void *opaque)
314 {
315     Error *local_err = NULL;
316     int ret;
317 
318     CreateCo *cco = opaque;
319     assert(cco->drv);
320 
321     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
322     if (local_err) {
323         error_propagate(&cco->err, local_err);
324     }
325     cco->ret = ret;
326 }
327 
328 int bdrv_create(BlockDriver *drv, const char* filename,
329                 QemuOpts *opts, Error **errp)
330 {
331     int ret;
332 
333     Coroutine *co;
334     CreateCo cco = {
335         .drv = drv,
336         .filename = g_strdup(filename),
337         .opts = opts,
338         .ret = NOT_DONE,
339         .err = NULL,
340     };
341 
342     if (!drv->bdrv_create) {
343         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
344         ret = -ENOTSUP;
345         goto out;
346     }
347 
348     if (qemu_in_coroutine()) {
349         /* Fast-path if already in coroutine context */
350         bdrv_create_co_entry(&cco);
351     } else {
352         co = qemu_coroutine_create(bdrv_create_co_entry);
353         qemu_coroutine_enter(co, &cco);
354         while (cco.ret == NOT_DONE) {
355             aio_poll(qemu_get_aio_context(), true);
356         }
357     }
358 
359     ret = cco.ret;
360     if (ret < 0) {
361         if (cco.err) {
362             error_propagate(errp, cco.err);
363         } else {
364             error_setg_errno(errp, -ret, "Could not create image");
365         }
366     }
367 
368 out:
369     g_free(cco.filename);
370     return ret;
371 }
372 
373 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
374 {
375     BlockDriver *drv;
376     Error *local_err = NULL;
377     int ret;
378 
379     drv = bdrv_find_protocol(filename, true, errp);
380     if (drv == NULL) {
381         return -ENOENT;
382     }
383 
384     ret = bdrv_create(drv, filename, opts, &local_err);
385     if (local_err) {
386         error_propagate(errp, local_err);
387     }
388     return ret;
389 }
390 
391 /**
392  * Try to get @bs's logical and physical block size.
393  * On success, store them in @bsz struct and return 0.
394  * On failure return -errno.
395  * @bs must not be empty.
396  */
397 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
398 {
399     BlockDriver *drv = bs->drv;
400 
401     if (drv && drv->bdrv_probe_blocksizes) {
402         return drv->bdrv_probe_blocksizes(bs, bsz);
403     }
404 
405     return -ENOTSUP;
406 }
407 
408 /**
409  * Try to get @bs's geometry (cyls, heads, sectors).
410  * On success, store them in @geo struct and return 0.
411  * On failure return -errno.
412  * @bs must not be empty.
413  */
414 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
415 {
416     BlockDriver *drv = bs->drv;
417 
418     if (drv && drv->bdrv_probe_geometry) {
419         return drv->bdrv_probe_geometry(bs, geo);
420     }
421 
422     return -ENOTSUP;
423 }
424 
425 /*
426  * Create a uniquely-named empty temporary file.
427  * Return 0 upon success, otherwise a negative errno value.
428  */
429 int get_tmp_filename(char *filename, int size)
430 {
431 #ifdef _WIN32
432     char temp_dir[MAX_PATH];
433     /* GetTempFileName requires that its output buffer (4th param)
434        have length MAX_PATH or greater.  */
435     assert(size >= MAX_PATH);
436     return (GetTempPath(MAX_PATH, temp_dir)
437             && GetTempFileName(temp_dir, "qem", 0, filename)
438             ? 0 : -GetLastError());
439 #else
440     int fd;
441     const char *tmpdir;
442     tmpdir = getenv("TMPDIR");
443     if (!tmpdir) {
444         tmpdir = "/var/tmp";
445     }
446     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
447         return -EOVERFLOW;
448     }
449     fd = mkstemp(filename);
450     if (fd < 0) {
451         return -errno;
452     }
453     if (close(fd) != 0) {
454         unlink(filename);
455         return -errno;
456     }
457     return 0;
458 #endif
459 }
460 
461 /*
462  * Detect host devices. By convention, /dev/cdrom[N] is always
463  * recognized as a host CDROM.
464  */
465 static BlockDriver *find_hdev_driver(const char *filename)
466 {
467     int score_max = 0, score;
468     BlockDriver *drv = NULL, *d;
469 
470     QLIST_FOREACH(d, &bdrv_drivers, list) {
471         if (d->bdrv_probe_device) {
472             score = d->bdrv_probe_device(filename);
473             if (score > score_max) {
474                 score_max = score;
475                 drv = d;
476             }
477         }
478     }
479 
480     return drv;
481 }
482 
483 BlockDriver *bdrv_find_protocol(const char *filename,
484                                 bool allow_protocol_prefix,
485                                 Error **errp)
486 {
487     BlockDriver *drv1;
488     char protocol[128];
489     int len;
490     const char *p;
491 
492     /* TODO Drivers without bdrv_file_open must be specified explicitly */
493 
494     /*
495      * XXX(hch): we really should not let host device detection
496      * override an explicit protocol specification, but moving this
497      * later breaks access to device names with colons in them.
498      * Thanks to the brain-dead persistent naming schemes on udev-
499      * based Linux systems those actually are quite common.
500      */
501     drv1 = find_hdev_driver(filename);
502     if (drv1) {
503         return drv1;
504     }
505 
506     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
507         return &bdrv_file;
508     }
509 
510     p = strchr(filename, ':');
511     assert(p != NULL);
512     len = p - filename;
513     if (len > sizeof(protocol) - 1)
514         len = sizeof(protocol) - 1;
515     memcpy(protocol, filename, len);
516     protocol[len] = '\0';
517     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
518         if (drv1->protocol_name &&
519             !strcmp(drv1->protocol_name, protocol)) {
520             return drv1;
521         }
522     }
523 
524     error_setg(errp, "Unknown protocol '%s'", protocol);
525     return NULL;
526 }
527 
528 /*
529  * Guess image format by probing its contents.
530  * This is not a good idea when your image is raw (CVE-2008-2004), but
531  * we do it anyway for backward compatibility.
532  *
533  * @buf         contains the image's first @buf_size bytes.
534  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
535  *              but can be smaller if the image file is smaller)
536  * @filename    is its filename.
537  *
538  * For all block drivers, call the bdrv_probe() method to get its
539  * probing score.
540  * Return the first block driver with the highest probing score.
541  */
542 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
543                             const char *filename)
544 {
545     int score_max = 0, score;
546     BlockDriver *drv = NULL, *d;
547 
548     QLIST_FOREACH(d, &bdrv_drivers, list) {
549         if (d->bdrv_probe) {
550             score = d->bdrv_probe(buf, buf_size, filename);
551             if (score > score_max) {
552                 score_max = score;
553                 drv = d;
554             }
555         }
556     }
557 
558     return drv;
559 }
560 
561 static int find_image_format(BlockDriverState *bs, const char *filename,
562                              BlockDriver **pdrv, Error **errp)
563 {
564     BlockDriver *drv;
565     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
566     int ret = 0;
567 
568     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
569     if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
570         *pdrv = &bdrv_raw;
571         return ret;
572     }
573 
574     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
575     if (ret < 0) {
576         error_setg_errno(errp, -ret, "Could not read image for determining its "
577                          "format");
578         *pdrv = NULL;
579         return ret;
580     }
581 
582     drv = bdrv_probe_all(buf, ret, filename);
583     if (!drv) {
584         error_setg(errp, "Could not determine image format: No compatible "
585                    "driver found");
586         ret = -ENOENT;
587     }
588     *pdrv = drv;
589     return ret;
590 }
591 
592 /**
593  * Set the current 'total_sectors' value
594  * Return 0 on success, -errno on error.
595  */
596 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
597 {
598     BlockDriver *drv = bs->drv;
599 
600     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
601     if (bs->sg)
602         return 0;
603 
604     /* query actual device if possible, otherwise just trust the hint */
605     if (drv->bdrv_getlength) {
606         int64_t length = drv->bdrv_getlength(bs);
607         if (length < 0) {
608             return length;
609         }
610         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
611     }
612 
613     bs->total_sectors = hint;
614     return 0;
615 }
616 
617 /**
618  * Set open flags for a given discard mode
619  *
620  * Return 0 on success, -1 if the discard mode was invalid.
621  */
622 int bdrv_parse_discard_flags(const char *mode, int *flags)
623 {
624     *flags &= ~BDRV_O_UNMAP;
625 
626     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
627         /* do nothing */
628     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
629         *flags |= BDRV_O_UNMAP;
630     } else {
631         return -1;
632     }
633 
634     return 0;
635 }
636 
637 /**
638  * Set open flags for a given cache mode
639  *
640  * Return 0 on success, -1 if the cache mode was invalid.
641  */
642 int bdrv_parse_cache_flags(const char *mode, int *flags)
643 {
644     *flags &= ~BDRV_O_CACHE_MASK;
645 
646     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
647         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
648     } else if (!strcmp(mode, "directsync")) {
649         *flags |= BDRV_O_NOCACHE;
650     } else if (!strcmp(mode, "writeback")) {
651         *flags |= BDRV_O_CACHE_WB;
652     } else if (!strcmp(mode, "unsafe")) {
653         *flags |= BDRV_O_CACHE_WB;
654         *flags |= BDRV_O_NO_FLUSH;
655     } else if (!strcmp(mode, "writethrough")) {
656         /* this is the default */
657     } else {
658         return -1;
659     }
660 
661     return 0;
662 }
663 
664 /*
665  * Returns the flags that a temporary snapshot should get, based on the
666  * originally requested flags (the originally requested image will have flags
667  * like a backing file)
668  */
669 static int bdrv_temp_snapshot_flags(int flags)
670 {
671     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
672 }
673 
674 /*
675  * Returns the flags that bs->file should get, based on the given flags for
676  * the parent BDS
677  */
678 static int bdrv_inherited_flags(int flags)
679 {
680     /* Enable protocol handling, disable format probing for bs->file */
681     flags |= BDRV_O_PROTOCOL;
682 
683     /* Our block drivers take care to send flushes and respect unmap policy,
684      * so we can enable both unconditionally on lower layers. */
685     flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
686 
687     /* Clear flags that only apply to the top layer */
688     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
689 
690     return flags;
691 }
692 
693 /*
694  * Returns the flags that bs->backing_hd should get, based on the given flags
695  * for the parent BDS
696  */
697 static int bdrv_backing_flags(int flags)
698 {
699     /* backing files always opened read-only */
700     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
701 
702     /* snapshot=on is handled on the top layer */
703     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
704 
705     return flags;
706 }
707 
708 static int bdrv_open_flags(BlockDriverState *bs, int flags)
709 {
710     int open_flags = flags | BDRV_O_CACHE_WB;
711 
712     /*
713      * Clear flags that are internal to the block layer before opening the
714      * image.
715      */
716     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
717 
718     /*
719      * Snapshots should be writable.
720      */
721     if (flags & BDRV_O_TEMPORARY) {
722         open_flags |= BDRV_O_RDWR;
723     }
724 
725     return open_flags;
726 }
727 
728 static void bdrv_assign_node_name(BlockDriverState *bs,
729                                   const char *node_name,
730                                   Error **errp)
731 {
732     if (!node_name) {
733         return;
734     }
735 
736     /* Check for empty string or invalid characters */
737     if (!id_wellformed(node_name)) {
738         error_setg(errp, "Invalid node name");
739         return;
740     }
741 
742     /* takes care of avoiding namespaces collisions */
743     if (blk_by_name(node_name)) {
744         error_setg(errp, "node-name=%s is conflicting with a device id",
745                    node_name);
746         return;
747     }
748 
749     /* takes care of avoiding duplicates node names */
750     if (bdrv_find_node(node_name)) {
751         error_setg(errp, "Duplicate node name");
752         return;
753     }
754 
755     /* copy node name into the bs and insert it into the graph list */
756     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
757     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
758 }
759 
760 /*
761  * Common part for opening disk images and files
762  *
763  * Removes all processed options from *options.
764  */
765 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
766     QDict *options, int flags, BlockDriver *drv, Error **errp)
767 {
768     int ret, open_flags;
769     const char *filename;
770     const char *node_name = NULL;
771     Error *local_err = NULL;
772 
773     assert(drv != NULL);
774     assert(bs->file == NULL);
775     assert(options != NULL && bs->options != options);
776 
777     if (file != NULL) {
778         filename = file->filename;
779     } else {
780         filename = qdict_get_try_str(options, "filename");
781     }
782 
783     if (drv->bdrv_needs_filename && !filename) {
784         error_setg(errp, "The '%s' block driver requires a file name",
785                    drv->format_name);
786         return -EINVAL;
787     }
788 
789     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
790 
791     node_name = qdict_get_try_str(options, "node-name");
792     bdrv_assign_node_name(bs, node_name, &local_err);
793     if (local_err) {
794         error_propagate(errp, local_err);
795         return -EINVAL;
796     }
797     qdict_del(options, "node-name");
798 
799     /* bdrv_open() with directly using a protocol as drv. This layer is already
800      * opened, so assign it to bs (while file becomes a closed BlockDriverState)
801      * and return immediately. */
802     if (file != NULL && drv->bdrv_file_open) {
803         bdrv_swap(file, bs);
804         return 0;
805     }
806 
807     bs->open_flags = flags;
808     bs->guest_block_size = 512;
809     bs->request_alignment = 512;
810     bs->zero_beyond_eof = true;
811     open_flags = bdrv_open_flags(bs, flags);
812     bs->read_only = !(open_flags & BDRV_O_RDWR);
813 
814     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
815         error_setg(errp,
816                    !bs->read_only && bdrv_is_whitelisted(drv, true)
817                         ? "Driver '%s' can only be used for read-only devices"
818                         : "Driver '%s' is not whitelisted",
819                    drv->format_name);
820         return -ENOTSUP;
821     }
822 
823     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
824     if (flags & BDRV_O_COPY_ON_READ) {
825         if (!bs->read_only) {
826             bdrv_enable_copy_on_read(bs);
827         } else {
828             error_setg(errp, "Can't use copy-on-read on read-only device");
829             return -EINVAL;
830         }
831     }
832 
833     if (filename != NULL) {
834         pstrcpy(bs->filename, sizeof(bs->filename), filename);
835     } else {
836         bs->filename[0] = '\0';
837     }
838     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
839 
840     bs->drv = drv;
841     bs->opaque = g_malloc0(drv->instance_size);
842 
843     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
844 
845     /* Open the image, either directly or using a protocol */
846     if (drv->bdrv_file_open) {
847         assert(file == NULL);
848         assert(!drv->bdrv_needs_filename || filename != NULL);
849         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
850     } else {
851         if (file == NULL) {
852             error_setg(errp, "Can't use '%s' as a block driver for the "
853                        "protocol level", drv->format_name);
854             ret = -EINVAL;
855             goto free_and_fail;
856         }
857         bs->file = file;
858         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
859     }
860 
861     if (ret < 0) {
862         if (local_err) {
863             error_propagate(errp, local_err);
864         } else if (bs->filename[0]) {
865             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
866         } else {
867             error_setg_errno(errp, -ret, "Could not open image");
868         }
869         goto free_and_fail;
870     }
871 
872     if (bs->encrypted) {
873         error_report("Encrypted images are deprecated");
874         error_printf("Support for them will be removed in a future release.\n"
875                      "You can use 'qemu-img convert' to convert your image"
876                      " to an unencrypted one.\n");
877     }
878 
879     ret = refresh_total_sectors(bs, bs->total_sectors);
880     if (ret < 0) {
881         error_setg_errno(errp, -ret, "Could not refresh total sector count");
882         goto free_and_fail;
883     }
884 
885     bdrv_refresh_limits(bs, &local_err);
886     if (local_err) {
887         error_propagate(errp, local_err);
888         ret = -EINVAL;
889         goto free_and_fail;
890     }
891 
892     assert(bdrv_opt_mem_align(bs) != 0);
893     assert((bs->request_alignment != 0) || bs->sg);
894     return 0;
895 
896 free_and_fail:
897     bs->file = NULL;
898     g_free(bs->opaque);
899     bs->opaque = NULL;
900     bs->drv = NULL;
901     return ret;
902 }
903 
904 static QDict *parse_json_filename(const char *filename, Error **errp)
905 {
906     QObject *options_obj;
907     QDict *options;
908     int ret;
909 
910     ret = strstart(filename, "json:", &filename);
911     assert(ret);
912 
913     options_obj = qobject_from_json(filename);
914     if (!options_obj) {
915         error_setg(errp, "Could not parse the JSON options");
916         return NULL;
917     }
918 
919     if (qobject_type(options_obj) != QTYPE_QDICT) {
920         qobject_decref(options_obj);
921         error_setg(errp, "Invalid JSON object given");
922         return NULL;
923     }
924 
925     options = qobject_to_qdict(options_obj);
926     qdict_flatten(options);
927 
928     return options;
929 }
930 
931 /*
932  * Fills in default options for opening images and converts the legacy
933  * filename/flags pair to option QDict entries.
934  */
935 static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
936                              BlockDriver *drv, Error **errp)
937 {
938     const char *filename = *pfilename;
939     const char *drvname;
940     bool protocol = flags & BDRV_O_PROTOCOL;
941     bool parse_filename = false;
942     Error *local_err = NULL;
943 
944     /* Parse json: pseudo-protocol */
945     if (filename && g_str_has_prefix(filename, "json:")) {
946         QDict *json_options = parse_json_filename(filename, &local_err);
947         if (local_err) {
948             error_propagate(errp, local_err);
949             return -EINVAL;
950         }
951 
952         /* Options given in the filename have lower priority than options
953          * specified directly */
954         qdict_join(*options, json_options, false);
955         QDECREF(json_options);
956         *pfilename = filename = NULL;
957     }
958 
959     /* Fetch the file name from the options QDict if necessary */
960     if (protocol && filename) {
961         if (!qdict_haskey(*options, "filename")) {
962             qdict_put(*options, "filename", qstring_from_str(filename));
963             parse_filename = true;
964         } else {
965             error_setg(errp, "Can't specify 'file' and 'filename' options at "
966                              "the same time");
967             return -EINVAL;
968         }
969     }
970 
971     /* Find the right block driver */
972     filename = qdict_get_try_str(*options, "filename");
973     drvname = qdict_get_try_str(*options, "driver");
974 
975     if (drv) {
976         if (drvname) {
977             error_setg(errp, "Driver specified twice");
978             return -EINVAL;
979         }
980         drvname = drv->format_name;
981         qdict_put(*options, "driver", qstring_from_str(drvname));
982     } else {
983         if (!drvname && protocol) {
984             if (filename) {
985                 drv = bdrv_find_protocol(filename, parse_filename, errp);
986                 if (!drv) {
987                     return -EINVAL;
988                 }
989 
990                 drvname = drv->format_name;
991                 qdict_put(*options, "driver", qstring_from_str(drvname));
992             } else {
993                 error_setg(errp, "Must specify either driver or file");
994                 return -EINVAL;
995             }
996         } else if (drvname) {
997             drv = bdrv_find_format(drvname);
998             if (!drv) {
999                 error_setg(errp, "Unknown driver '%s'", drvname);
1000                 return -ENOENT;
1001             }
1002         }
1003     }
1004 
1005     assert(drv || !protocol);
1006 
1007     /* Driver-specific filename parsing */
1008     if (drv && drv->bdrv_parse_filename && parse_filename) {
1009         drv->bdrv_parse_filename(filename, *options, &local_err);
1010         if (local_err) {
1011             error_propagate(errp, local_err);
1012             return -EINVAL;
1013         }
1014 
1015         if (!drv->bdrv_needs_filename) {
1016             qdict_del(*options, "filename");
1017         }
1018     }
1019 
1020     return 0;
1021 }
1022 
1023 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1024 {
1025 
1026     if (bs->backing_hd) {
1027         assert(bs->backing_blocker);
1028         bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1029     } else if (backing_hd) {
1030         error_setg(&bs->backing_blocker,
1031                    "node is used as backing hd of '%s'",
1032                    bdrv_get_device_or_node_name(bs));
1033     }
1034 
1035     bs->backing_hd = backing_hd;
1036     if (!backing_hd) {
1037         error_free(bs->backing_blocker);
1038         bs->backing_blocker = NULL;
1039         goto out;
1040     }
1041     bs->open_flags &= ~BDRV_O_NO_BACKING;
1042     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1043     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1044             backing_hd->drv ? backing_hd->drv->format_name : "");
1045 
1046     bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1047     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1048     bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1049                     bs->backing_blocker);
1050 out:
1051     bdrv_refresh_limits(bs, NULL);
1052 }
1053 
1054 /*
1055  * Opens the backing file for a BlockDriverState if not yet open
1056  *
1057  * options is a QDict of options to pass to the block drivers, or NULL for an
1058  * empty set of options. The reference to the QDict is transferred to this
1059  * function (even on failure), so if the caller intends to reuse the dictionary,
1060  * it needs to use QINCREF() before calling bdrv_file_open.
1061  */
1062 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1063 {
1064     char *backing_filename = g_malloc0(PATH_MAX);
1065     int ret = 0;
1066     BlockDriverState *backing_hd;
1067     Error *local_err = NULL;
1068 
1069     if (bs->backing_hd != NULL) {
1070         QDECREF(options);
1071         goto free_exit;
1072     }
1073 
1074     /* NULL means an empty set of options */
1075     if (options == NULL) {
1076         options = qdict_new();
1077     }
1078 
1079     bs->open_flags &= ~BDRV_O_NO_BACKING;
1080     if (qdict_haskey(options, "file.filename")) {
1081         backing_filename[0] = '\0';
1082     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1083         QDECREF(options);
1084         goto free_exit;
1085     } else {
1086         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1087                                        &local_err);
1088         if (local_err) {
1089             ret = -EINVAL;
1090             error_propagate(errp, local_err);
1091             QDECREF(options);
1092             goto free_exit;
1093         }
1094     }
1095 
1096     if (!bs->drv || !bs->drv->supports_backing) {
1097         ret = -EINVAL;
1098         error_setg(errp, "Driver doesn't support backing files");
1099         QDECREF(options);
1100         goto free_exit;
1101     }
1102 
1103     backing_hd = bdrv_new();
1104 
1105     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1106         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1107     }
1108 
1109     assert(bs->backing_hd == NULL);
1110     ret = bdrv_open(&backing_hd,
1111                     *backing_filename ? backing_filename : NULL, NULL, options,
1112                     bdrv_backing_flags(bs->open_flags), NULL, &local_err);
1113     if (ret < 0) {
1114         bdrv_unref(backing_hd);
1115         backing_hd = NULL;
1116         bs->open_flags |= BDRV_O_NO_BACKING;
1117         error_setg(errp, "Could not open backing file: %s",
1118                    error_get_pretty(local_err));
1119         error_free(local_err);
1120         goto free_exit;
1121     }
1122     bdrv_set_backing_hd(bs, backing_hd);
1123 
1124 free_exit:
1125     g_free(backing_filename);
1126     return ret;
1127 }
1128 
1129 /*
1130  * Opens a disk image whose options are given as BlockdevRef in another block
1131  * device's options.
1132  *
1133  * If allow_none is true, no image will be opened if filename is false and no
1134  * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1135  *
1136  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1137  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1138  * itself, all options starting with "${bdref_key}." are considered part of the
1139  * BlockdevRef.
1140  *
1141  * The BlockdevRef will be removed from the options QDict.
1142  *
1143  * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1144  */
1145 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1146                     QDict *options, const char *bdref_key, int flags,
1147                     bool allow_none, Error **errp)
1148 {
1149     QDict *image_options;
1150     int ret;
1151     char *bdref_key_dot;
1152     const char *reference;
1153 
1154     assert(pbs);
1155     assert(*pbs == NULL);
1156 
1157     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1158     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1159     g_free(bdref_key_dot);
1160 
1161     reference = qdict_get_try_str(options, bdref_key);
1162     if (!filename && !reference && !qdict_size(image_options)) {
1163         if (allow_none) {
1164             ret = 0;
1165         } else {
1166             error_setg(errp, "A block device must be specified for \"%s\"",
1167                        bdref_key);
1168             ret = -EINVAL;
1169         }
1170         QDECREF(image_options);
1171         goto done;
1172     }
1173 
1174     ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
1175 
1176 done:
1177     qdict_del(options, bdref_key);
1178     return ret;
1179 }
1180 
1181 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1182 {
1183     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1184     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1185     int64_t total_size;
1186     QemuOpts *opts = NULL;
1187     QDict *snapshot_options;
1188     BlockDriverState *bs_snapshot;
1189     Error *local_err;
1190     int ret;
1191 
1192     /* if snapshot, we create a temporary backing file and open it
1193        instead of opening 'filename' directly */
1194 
1195     /* Get the required size from the image */
1196     total_size = bdrv_getlength(bs);
1197     if (total_size < 0) {
1198         ret = total_size;
1199         error_setg_errno(errp, -total_size, "Could not get image size");
1200         goto out;
1201     }
1202 
1203     /* Create the temporary image */
1204     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1205     if (ret < 0) {
1206         error_setg_errno(errp, -ret, "Could not get temporary filename");
1207         goto out;
1208     }
1209 
1210     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1211                             &error_abort);
1212     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1213     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1214     qemu_opts_del(opts);
1215     if (ret < 0) {
1216         error_setg_errno(errp, -ret, "Could not create temporary overlay "
1217                          "'%s': %s", tmp_filename,
1218                          error_get_pretty(local_err));
1219         error_free(local_err);
1220         goto out;
1221     }
1222 
1223     /* Prepare a new options QDict for the temporary file */
1224     snapshot_options = qdict_new();
1225     qdict_put(snapshot_options, "file.driver",
1226               qstring_from_str("file"));
1227     qdict_put(snapshot_options, "file.filename",
1228               qstring_from_str(tmp_filename));
1229 
1230     bs_snapshot = bdrv_new();
1231 
1232     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1233                     flags, &bdrv_qcow2, &local_err);
1234     if (ret < 0) {
1235         error_propagate(errp, local_err);
1236         goto out;
1237     }
1238 
1239     bdrv_append(bs_snapshot, bs);
1240 
1241 out:
1242     g_free(tmp_filename);
1243     return ret;
1244 }
1245 
1246 /*
1247  * Opens a disk image (raw, qcow2, vmdk, ...)
1248  *
1249  * options is a QDict of options to pass to the block drivers, or NULL for an
1250  * empty set of options. The reference to the QDict belongs to the block layer
1251  * after the call (even on failure), so if the caller intends to reuse the
1252  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1253  *
1254  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1255  * If it is not NULL, the referenced BDS will be reused.
1256  *
1257  * The reference parameter may be used to specify an existing block device which
1258  * should be opened. If specified, neither options nor a filename may be given,
1259  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1260  */
1261 int bdrv_open(BlockDriverState **pbs, const char *filename,
1262               const char *reference, QDict *options, int flags,
1263               BlockDriver *drv, Error **errp)
1264 {
1265     int ret;
1266     BlockDriverState *file = NULL, *bs;
1267     const char *drvname;
1268     Error *local_err = NULL;
1269     int snapshot_flags = 0;
1270 
1271     assert(pbs);
1272 
1273     if (reference) {
1274         bool options_non_empty = options ? qdict_size(options) : false;
1275         QDECREF(options);
1276 
1277         if (*pbs) {
1278             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1279                        "another block device");
1280             return -EINVAL;
1281         }
1282 
1283         if (filename || options_non_empty) {
1284             error_setg(errp, "Cannot reference an existing block device with "
1285                        "additional options or a new filename");
1286             return -EINVAL;
1287         }
1288 
1289         bs = bdrv_lookup_bs(reference, reference, errp);
1290         if (!bs) {
1291             return -ENODEV;
1292         }
1293         bdrv_ref(bs);
1294         *pbs = bs;
1295         return 0;
1296     }
1297 
1298     if (*pbs) {
1299         bs = *pbs;
1300     } else {
1301         bs = bdrv_new();
1302     }
1303 
1304     /* NULL means an empty set of options */
1305     if (options == NULL) {
1306         options = qdict_new();
1307     }
1308 
1309     ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
1310     if (local_err) {
1311         goto fail;
1312     }
1313 
1314     /* Find the right image format driver */
1315     drv = NULL;
1316     drvname = qdict_get_try_str(options, "driver");
1317     if (drvname) {
1318         drv = bdrv_find_format(drvname);
1319         qdict_del(options, "driver");
1320         if (!drv) {
1321             error_setg(errp, "Unknown driver: '%s'", drvname);
1322             ret = -EINVAL;
1323             goto fail;
1324         }
1325     }
1326 
1327     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1328     if (drv && !drv->bdrv_file_open) {
1329         /* If the user explicitly wants a format driver here, we'll need to add
1330          * another layer for the protocol in bs->file */
1331         flags &= ~BDRV_O_PROTOCOL;
1332     }
1333 
1334     bs->options = options;
1335     options = qdict_clone_shallow(options);
1336 
1337     /* Open image file without format layer */
1338     if ((flags & BDRV_O_PROTOCOL) == 0) {
1339         if (flags & BDRV_O_RDWR) {
1340             flags |= BDRV_O_ALLOW_RDWR;
1341         }
1342         if (flags & BDRV_O_SNAPSHOT) {
1343             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1344             flags = bdrv_backing_flags(flags);
1345         }
1346 
1347         assert(file == NULL);
1348         ret = bdrv_open_image(&file, filename, options, "file",
1349                               bdrv_inherited_flags(flags),
1350                               true, &local_err);
1351         if (ret < 0) {
1352             goto fail;
1353         }
1354     }
1355 
1356     /* Image format probing */
1357     bs->probed = !drv;
1358     if (!drv && file) {
1359         ret = find_image_format(file, filename, &drv, &local_err);
1360         if (ret < 0) {
1361             goto fail;
1362         }
1363     } else if (!drv) {
1364         error_setg(errp, "Must specify either driver or file");
1365         ret = -EINVAL;
1366         goto fail;
1367     }
1368 
1369     /* Open the image */
1370     ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1371     if (ret < 0) {
1372         goto fail;
1373     }
1374 
1375     if (file && (bs->file != file)) {
1376         bdrv_unref(file);
1377         file = NULL;
1378     }
1379 
1380     /* If there is a backing file, use it */
1381     if ((flags & BDRV_O_NO_BACKING) == 0) {
1382         QDict *backing_options;
1383 
1384         qdict_extract_subqdict(options, &backing_options, "backing.");
1385         ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1386         if (ret < 0) {
1387             goto close_and_fail;
1388         }
1389     }
1390 
1391     bdrv_refresh_filename(bs);
1392 
1393     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1394      * temporary snapshot afterwards. */
1395     if (snapshot_flags) {
1396         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1397         if (local_err) {
1398             goto close_and_fail;
1399         }
1400     }
1401 
1402     /* Check if any unknown options were used */
1403     if (options && (qdict_size(options) != 0)) {
1404         const QDictEntry *entry = qdict_first(options);
1405         if (flags & BDRV_O_PROTOCOL) {
1406             error_setg(errp, "Block protocol '%s' doesn't support the option "
1407                        "'%s'", drv->format_name, entry->key);
1408         } else {
1409             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1410                        "support the option '%s'", drv->format_name,
1411                        bdrv_get_device_name(bs), entry->key);
1412         }
1413 
1414         ret = -EINVAL;
1415         goto close_and_fail;
1416     }
1417 
1418     if (!bdrv_key_required(bs)) {
1419         if (bs->blk) {
1420             blk_dev_change_media_cb(bs->blk, true);
1421         }
1422     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1423                && !runstate_check(RUN_STATE_INMIGRATE)
1424                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1425         error_setg(errp,
1426                    "Guest must be stopped for opening of encrypted image");
1427         ret = -EBUSY;
1428         goto close_and_fail;
1429     }
1430 
1431     QDECREF(options);
1432     *pbs = bs;
1433     return 0;
1434 
1435 fail:
1436     if (file != NULL) {
1437         bdrv_unref(file);
1438     }
1439     QDECREF(bs->options);
1440     QDECREF(options);
1441     bs->options = NULL;
1442     if (!*pbs) {
1443         /* If *pbs is NULL, a new BDS has been created in this function and
1444            needs to be freed now. Otherwise, it does not need to be closed,
1445            since it has not really been opened yet. */
1446         bdrv_unref(bs);
1447     }
1448     if (local_err) {
1449         error_propagate(errp, local_err);
1450     }
1451     return ret;
1452 
1453 close_and_fail:
1454     /* See fail path, but now the BDS has to be always closed */
1455     if (*pbs) {
1456         bdrv_close(bs);
1457     } else {
1458         bdrv_unref(bs);
1459     }
1460     QDECREF(options);
1461     if (local_err) {
1462         error_propagate(errp, local_err);
1463     }
1464     return ret;
1465 }
1466 
1467 typedef struct BlockReopenQueueEntry {
1468      bool prepared;
1469      BDRVReopenState state;
1470      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1471 } BlockReopenQueueEntry;
1472 
1473 /*
1474  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1475  * reopen of multiple devices.
1476  *
1477  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1478  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1479  * be created and initialized. This newly created BlockReopenQueue should be
1480  * passed back in for subsequent calls that are intended to be of the same
1481  * atomic 'set'.
1482  *
1483  * bs is the BlockDriverState to add to the reopen queue.
1484  *
1485  * flags contains the open flags for the associated bs
1486  *
1487  * returns a pointer to bs_queue, which is either the newly allocated
1488  * bs_queue, or the existing bs_queue being used.
1489  *
1490  */
1491 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1492                                     BlockDriverState *bs, int flags)
1493 {
1494     assert(bs != NULL);
1495 
1496     BlockReopenQueueEntry *bs_entry;
1497     if (bs_queue == NULL) {
1498         bs_queue = g_new0(BlockReopenQueue, 1);
1499         QSIMPLEQ_INIT(bs_queue);
1500     }
1501 
1502     /* bdrv_open() masks this flag out */
1503     flags &= ~BDRV_O_PROTOCOL;
1504 
1505     if (bs->file) {
1506         bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
1507     }
1508 
1509     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1510     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1511 
1512     bs_entry->state.bs = bs;
1513     bs_entry->state.flags = flags;
1514 
1515     return bs_queue;
1516 }
1517 
1518 /*
1519  * Reopen multiple BlockDriverStates atomically & transactionally.
1520  *
1521  * The queue passed in (bs_queue) must have been built up previous
1522  * via bdrv_reopen_queue().
1523  *
1524  * Reopens all BDS specified in the queue, with the appropriate
1525  * flags.  All devices are prepared for reopen, and failure of any
1526  * device will cause all device changes to be abandonded, and intermediate
1527  * data cleaned up.
1528  *
1529  * If all devices prepare successfully, then the changes are committed
1530  * to all devices.
1531  *
1532  */
1533 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1534 {
1535     int ret = -1;
1536     BlockReopenQueueEntry *bs_entry, *next;
1537     Error *local_err = NULL;
1538 
1539     assert(bs_queue != NULL);
1540 
1541     bdrv_drain_all();
1542 
1543     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1544         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1545             error_propagate(errp, local_err);
1546             goto cleanup;
1547         }
1548         bs_entry->prepared = true;
1549     }
1550 
1551     /* If we reach this point, we have success and just need to apply the
1552      * changes
1553      */
1554     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1555         bdrv_reopen_commit(&bs_entry->state);
1556     }
1557 
1558     ret = 0;
1559 
1560 cleanup:
1561     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1562         if (ret && bs_entry->prepared) {
1563             bdrv_reopen_abort(&bs_entry->state);
1564         }
1565         g_free(bs_entry);
1566     }
1567     g_free(bs_queue);
1568     return ret;
1569 }
1570 
1571 
1572 /* Reopen a single BlockDriverState with the specified flags. */
1573 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1574 {
1575     int ret = -1;
1576     Error *local_err = NULL;
1577     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1578 
1579     ret = bdrv_reopen_multiple(queue, &local_err);
1580     if (local_err != NULL) {
1581         error_propagate(errp, local_err);
1582     }
1583     return ret;
1584 }
1585 
1586 
1587 /*
1588  * Prepares a BlockDriverState for reopen. All changes are staged in the
1589  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1590  * the block driver layer .bdrv_reopen_prepare()
1591  *
1592  * bs is the BlockDriverState to reopen
1593  * flags are the new open flags
1594  * queue is the reopen queue
1595  *
1596  * Returns 0 on success, non-zero on error.  On error errp will be set
1597  * as well.
1598  *
1599  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1600  * It is the responsibility of the caller to then call the abort() or
1601  * commit() for any other BDS that have been left in a prepare() state
1602  *
1603  */
1604 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1605                         Error **errp)
1606 {
1607     int ret = -1;
1608     Error *local_err = NULL;
1609     BlockDriver *drv;
1610 
1611     assert(reopen_state != NULL);
1612     assert(reopen_state->bs->drv != NULL);
1613     drv = reopen_state->bs->drv;
1614 
1615     /* if we are to stay read-only, do not allow permission change
1616      * to r/w */
1617     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1618         reopen_state->flags & BDRV_O_RDWR) {
1619         error_setg(errp, "Node '%s' is read only",
1620                    bdrv_get_device_or_node_name(reopen_state->bs));
1621         goto error;
1622     }
1623 
1624 
1625     ret = bdrv_flush(reopen_state->bs);
1626     if (ret) {
1627         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1628                   strerror(-ret));
1629         goto error;
1630     }
1631 
1632     if (drv->bdrv_reopen_prepare) {
1633         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1634         if (ret) {
1635             if (local_err != NULL) {
1636                 error_propagate(errp, local_err);
1637             } else {
1638                 error_setg(errp, "failed while preparing to reopen image '%s'",
1639                            reopen_state->bs->filename);
1640             }
1641             goto error;
1642         }
1643     } else {
1644         /* It is currently mandatory to have a bdrv_reopen_prepare()
1645          * handler for each supported drv. */
1646         error_setg(errp, "Block format '%s' used by node '%s' "
1647                    "does not support reopening files", drv->format_name,
1648                    bdrv_get_device_or_node_name(reopen_state->bs));
1649         ret = -1;
1650         goto error;
1651     }
1652 
1653     ret = 0;
1654 
1655 error:
1656     return ret;
1657 }
1658 
1659 /*
1660  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1661  * makes them final by swapping the staging BlockDriverState contents into
1662  * the active BlockDriverState contents.
1663  */
1664 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1665 {
1666     BlockDriver *drv;
1667 
1668     assert(reopen_state != NULL);
1669     drv = reopen_state->bs->drv;
1670     assert(drv != NULL);
1671 
1672     /* If there are any driver level actions to take */
1673     if (drv->bdrv_reopen_commit) {
1674         drv->bdrv_reopen_commit(reopen_state);
1675     }
1676 
1677     /* set BDS specific flags now */
1678     reopen_state->bs->open_flags         = reopen_state->flags;
1679     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1680                                               BDRV_O_CACHE_WB);
1681     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1682 
1683     bdrv_refresh_limits(reopen_state->bs, NULL);
1684 }
1685 
1686 /*
1687  * Abort the reopen, and delete and free the staged changes in
1688  * reopen_state
1689  */
1690 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1691 {
1692     BlockDriver *drv;
1693 
1694     assert(reopen_state != NULL);
1695     drv = reopen_state->bs->drv;
1696     assert(drv != NULL);
1697 
1698     if (drv->bdrv_reopen_abort) {
1699         drv->bdrv_reopen_abort(reopen_state);
1700     }
1701 }
1702 
1703 
1704 void bdrv_close(BlockDriverState *bs)
1705 {
1706     BdrvAioNotifier *ban, *ban_next;
1707 
1708     if (bs->job) {
1709         block_job_cancel_sync(bs->job);
1710     }
1711     bdrv_drain_all(); /* complete I/O */
1712     bdrv_flush(bs);
1713     bdrv_drain_all(); /* in case flush left pending I/O */
1714     notifier_list_notify(&bs->close_notifiers, bs);
1715 
1716     if (bs->drv) {
1717         if (bs->backing_hd) {
1718             BlockDriverState *backing_hd = bs->backing_hd;
1719             bdrv_set_backing_hd(bs, NULL);
1720             bdrv_unref(backing_hd);
1721         }
1722         bs->drv->bdrv_close(bs);
1723         g_free(bs->opaque);
1724         bs->opaque = NULL;
1725         bs->drv = NULL;
1726         bs->copy_on_read = 0;
1727         bs->backing_file[0] = '\0';
1728         bs->backing_format[0] = '\0';
1729         bs->total_sectors = 0;
1730         bs->encrypted = 0;
1731         bs->valid_key = 0;
1732         bs->sg = 0;
1733         bs->zero_beyond_eof = false;
1734         QDECREF(bs->options);
1735         bs->options = NULL;
1736         QDECREF(bs->full_open_options);
1737         bs->full_open_options = NULL;
1738 
1739         if (bs->file != NULL) {
1740             bdrv_unref(bs->file);
1741             bs->file = NULL;
1742         }
1743     }
1744 
1745     if (bs->blk) {
1746         blk_dev_change_media_cb(bs->blk, false);
1747     }
1748 
1749     /*throttling disk I/O limits*/
1750     if (bs->io_limits_enabled) {
1751         bdrv_io_limits_disable(bs);
1752     }
1753 
1754     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1755         g_free(ban);
1756     }
1757     QLIST_INIT(&bs->aio_notifiers);
1758 }
1759 
1760 void bdrv_close_all(void)
1761 {
1762     BlockDriverState *bs;
1763 
1764     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1765         AioContext *aio_context = bdrv_get_aio_context(bs);
1766 
1767         aio_context_acquire(aio_context);
1768         bdrv_close(bs);
1769         aio_context_release(aio_context);
1770     }
1771 }
1772 
1773 /* make a BlockDriverState anonymous by removing from bdrv_state and
1774  * graph_bdrv_state list.
1775    Also, NULL terminate the device_name to prevent double remove */
1776 void bdrv_make_anon(BlockDriverState *bs)
1777 {
1778     /*
1779      * Take care to remove bs from bdrv_states only when it's actually
1780      * in it.  Note that bs->device_list.tqe_prev is initially null,
1781      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
1782      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1783      * resetting it to null on remove.
1784      */
1785     if (bs->device_list.tqe_prev) {
1786         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1787         bs->device_list.tqe_prev = NULL;
1788     }
1789     if (bs->node_name[0] != '\0') {
1790         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1791     }
1792     bs->node_name[0] = '\0';
1793 }
1794 
1795 static void bdrv_rebind(BlockDriverState *bs)
1796 {
1797     if (bs->drv && bs->drv->bdrv_rebind) {
1798         bs->drv->bdrv_rebind(bs);
1799     }
1800 }
1801 
1802 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1803                                      BlockDriverState *bs_src)
1804 {
1805     /* move some fields that need to stay attached to the device */
1806 
1807     /* dev info */
1808     bs_dest->guest_block_size   = bs_src->guest_block_size;
1809     bs_dest->copy_on_read       = bs_src->copy_on_read;
1810 
1811     bs_dest->enable_write_cache = bs_src->enable_write_cache;
1812 
1813     /* i/o throttled req */
1814     memcpy(&bs_dest->throttle_state,
1815            &bs_src->throttle_state,
1816            sizeof(ThrottleState));
1817     bs_dest->throttled_reqs[0]  = bs_src->throttled_reqs[0];
1818     bs_dest->throttled_reqs[1]  = bs_src->throttled_reqs[1];
1819     bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
1820 
1821     /* r/w error */
1822     bs_dest->on_read_error      = bs_src->on_read_error;
1823     bs_dest->on_write_error     = bs_src->on_write_error;
1824 
1825     /* i/o status */
1826     bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
1827     bs_dest->iostatus           = bs_src->iostatus;
1828 
1829     /* dirty bitmap */
1830     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
1831 
1832     /* reference count */
1833     bs_dest->refcnt             = bs_src->refcnt;
1834 
1835     /* job */
1836     bs_dest->job                = bs_src->job;
1837 
1838     /* keep the same entry in bdrv_states */
1839     bs_dest->device_list = bs_src->device_list;
1840     bs_dest->blk = bs_src->blk;
1841 
1842     memcpy(bs_dest->op_blockers, bs_src->op_blockers,
1843            sizeof(bs_dest->op_blockers));
1844 }
1845 
1846 /*
1847  * Swap bs contents for two image chains while they are live,
1848  * while keeping required fields on the BlockDriverState that is
1849  * actually attached to a device.
1850  *
1851  * This will modify the BlockDriverState fields, and swap contents
1852  * between bs_new and bs_old. Both bs_new and bs_old are modified.
1853  *
1854  * bs_new must not be attached to a BlockBackend.
1855  *
1856  * This function does not create any image files.
1857  */
1858 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1859 {
1860     BlockDriverState tmp;
1861 
1862     /* The code needs to swap the node_name but simply swapping node_list won't
1863      * work so first remove the nodes from the graph list, do the swap then
1864      * insert them back if needed.
1865      */
1866     if (bs_new->node_name[0] != '\0') {
1867         QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
1868     }
1869     if (bs_old->node_name[0] != '\0') {
1870         QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
1871     }
1872 
1873     /* bs_new must be unattached and shouldn't have anything fancy enabled */
1874     assert(!bs_new->blk);
1875     assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
1876     assert(bs_new->job == NULL);
1877     assert(bs_new->io_limits_enabled == false);
1878     assert(!throttle_have_timer(&bs_new->throttle_state));
1879 
1880     tmp = *bs_new;
1881     *bs_new = *bs_old;
1882     *bs_old = tmp;
1883 
1884     /* there are some fields that should not be swapped, move them back */
1885     bdrv_move_feature_fields(&tmp, bs_old);
1886     bdrv_move_feature_fields(bs_old, bs_new);
1887     bdrv_move_feature_fields(bs_new, &tmp);
1888 
1889     /* bs_new must remain unattached */
1890     assert(!bs_new->blk);
1891 
1892     /* Check a few fields that should remain attached to the device */
1893     assert(bs_new->job == NULL);
1894     assert(bs_new->io_limits_enabled == false);
1895     assert(!throttle_have_timer(&bs_new->throttle_state));
1896 
1897     /* insert the nodes back into the graph node list if needed */
1898     if (bs_new->node_name[0] != '\0') {
1899         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
1900     }
1901     if (bs_old->node_name[0] != '\0') {
1902         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
1903     }
1904 
1905     bdrv_rebind(bs_new);
1906     bdrv_rebind(bs_old);
1907 }
1908 
1909 /*
1910  * Add new bs contents at the top of an image chain while the chain is
1911  * live, while keeping required fields on the top layer.
1912  *
1913  * This will modify the BlockDriverState fields, and swap contents
1914  * between bs_new and bs_top. Both bs_new and bs_top are modified.
1915  *
1916  * bs_new must not be attached to a BlockBackend.
1917  *
1918  * This function does not create any image files.
1919  */
1920 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1921 {
1922     bdrv_swap(bs_new, bs_top);
1923 
1924     /* The contents of 'tmp' will become bs_top, as we are
1925      * swapping bs_new and bs_top contents. */
1926     bdrv_set_backing_hd(bs_top, bs_new);
1927 }
1928 
1929 static void bdrv_delete(BlockDriverState *bs)
1930 {
1931     assert(!bs->job);
1932     assert(bdrv_op_blocker_is_empty(bs));
1933     assert(!bs->refcnt);
1934     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
1935 
1936     bdrv_close(bs);
1937 
1938     /* remove from list, if necessary */
1939     bdrv_make_anon(bs);
1940 
1941     g_free(bs);
1942 }
1943 
1944 /*
1945  * Run consistency checks on an image
1946  *
1947  * Returns 0 if the check could be completed (it doesn't mean that the image is
1948  * free of errors) or -errno when an internal error occurred. The results of the
1949  * check are stored in res.
1950  */
1951 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
1952 {
1953     if (bs->drv == NULL) {
1954         return -ENOMEDIUM;
1955     }
1956     if (bs->drv->bdrv_check == NULL) {
1957         return -ENOTSUP;
1958     }
1959 
1960     memset(res, 0, sizeof(*res));
1961     return bs->drv->bdrv_check(bs, res, fix);
1962 }
1963 
1964 #define COMMIT_BUF_SECTORS 2048
1965 
1966 /* commit COW file into the raw image */
1967 int bdrv_commit(BlockDriverState *bs)
1968 {
1969     BlockDriver *drv = bs->drv;
1970     int64_t sector, total_sectors, length, backing_length;
1971     int n, ro, open_flags;
1972     int ret = 0;
1973     uint8_t *buf = NULL;
1974 
1975     if (!drv)
1976         return -ENOMEDIUM;
1977 
1978     if (!bs->backing_hd) {
1979         return -ENOTSUP;
1980     }
1981 
1982     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
1983         bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
1984         return -EBUSY;
1985     }
1986 
1987     ro = bs->backing_hd->read_only;
1988     open_flags =  bs->backing_hd->open_flags;
1989 
1990     if (ro) {
1991         if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
1992             return -EACCES;
1993         }
1994     }
1995 
1996     length = bdrv_getlength(bs);
1997     if (length < 0) {
1998         ret = length;
1999         goto ro_cleanup;
2000     }
2001 
2002     backing_length = bdrv_getlength(bs->backing_hd);
2003     if (backing_length < 0) {
2004         ret = backing_length;
2005         goto ro_cleanup;
2006     }
2007 
2008     /* If our top snapshot is larger than the backing file image,
2009      * grow the backing file image if possible.  If not possible,
2010      * we must return an error */
2011     if (length > backing_length) {
2012         ret = bdrv_truncate(bs->backing_hd, length);
2013         if (ret < 0) {
2014             goto ro_cleanup;
2015         }
2016     }
2017 
2018     total_sectors = length >> BDRV_SECTOR_BITS;
2019 
2020     /* qemu_try_blockalign() for bs will choose an alignment that works for
2021      * bs->backing_hd as well, so no need to compare the alignment manually. */
2022     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2023     if (buf == NULL) {
2024         ret = -ENOMEM;
2025         goto ro_cleanup;
2026     }
2027 
2028     for (sector = 0; sector < total_sectors; sector += n) {
2029         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2030         if (ret < 0) {
2031             goto ro_cleanup;
2032         }
2033         if (ret) {
2034             ret = bdrv_read(bs, sector, buf, n);
2035             if (ret < 0) {
2036                 goto ro_cleanup;
2037             }
2038 
2039             ret = bdrv_write(bs->backing_hd, sector, buf, n);
2040             if (ret < 0) {
2041                 goto ro_cleanup;
2042             }
2043         }
2044     }
2045 
2046     if (drv->bdrv_make_empty) {
2047         ret = drv->bdrv_make_empty(bs);
2048         if (ret < 0) {
2049             goto ro_cleanup;
2050         }
2051         bdrv_flush(bs);
2052     }
2053 
2054     /*
2055      * Make sure all data we wrote to the backing device is actually
2056      * stable on disk.
2057      */
2058     if (bs->backing_hd) {
2059         bdrv_flush(bs->backing_hd);
2060     }
2061 
2062     ret = 0;
2063 ro_cleanup:
2064     qemu_vfree(buf);
2065 
2066     if (ro) {
2067         /* ignoring error return here */
2068         bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2069     }
2070 
2071     return ret;
2072 }
2073 
2074 int bdrv_commit_all(void)
2075 {
2076     BlockDriverState *bs;
2077 
2078     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2079         AioContext *aio_context = bdrv_get_aio_context(bs);
2080 
2081         aio_context_acquire(aio_context);
2082         if (bs->drv && bs->backing_hd) {
2083             int ret = bdrv_commit(bs);
2084             if (ret < 0) {
2085                 aio_context_release(aio_context);
2086                 return ret;
2087             }
2088         }
2089         aio_context_release(aio_context);
2090     }
2091     return 0;
2092 }
2093 
2094 /*
2095  * Return values:
2096  * 0        - success
2097  * -EINVAL  - backing format specified, but no file
2098  * -ENOSPC  - can't update the backing file because no space is left in the
2099  *            image file header
2100  * -ENOTSUP - format driver doesn't support changing the backing file
2101  */
2102 int bdrv_change_backing_file(BlockDriverState *bs,
2103     const char *backing_file, const char *backing_fmt)
2104 {
2105     BlockDriver *drv = bs->drv;
2106     int ret;
2107 
2108     /* Backing file format doesn't make sense without a backing file */
2109     if (backing_fmt && !backing_file) {
2110         return -EINVAL;
2111     }
2112 
2113     if (drv->bdrv_change_backing_file != NULL) {
2114         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2115     } else {
2116         ret = -ENOTSUP;
2117     }
2118 
2119     if (ret == 0) {
2120         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2121         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2122     }
2123     return ret;
2124 }
2125 
2126 /*
2127  * Finds the image layer in the chain that has 'bs' as its backing file.
2128  *
2129  * active is the current topmost image.
2130  *
2131  * Returns NULL if bs is not found in active's image chain,
2132  * or if active == bs.
2133  *
2134  * Returns the bottommost base image if bs == NULL.
2135  */
2136 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2137                                     BlockDriverState *bs)
2138 {
2139     while (active && bs != active->backing_hd) {
2140         active = active->backing_hd;
2141     }
2142 
2143     return active;
2144 }
2145 
2146 /* Given a BDS, searches for the base layer. */
2147 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2148 {
2149     return bdrv_find_overlay(bs, NULL);
2150 }
2151 
2152 typedef struct BlkIntermediateStates {
2153     BlockDriverState *bs;
2154     QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2155 } BlkIntermediateStates;
2156 
2157 
2158 /*
2159  * Drops images above 'base' up to and including 'top', and sets the image
2160  * above 'top' to have base as its backing file.
2161  *
2162  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2163  * information in 'bs' can be properly updated.
2164  *
2165  * E.g., this will convert the following chain:
2166  * bottom <- base <- intermediate <- top <- active
2167  *
2168  * to
2169  *
2170  * bottom <- base <- active
2171  *
2172  * It is allowed for bottom==base, in which case it converts:
2173  *
2174  * base <- intermediate <- top <- active
2175  *
2176  * to
2177  *
2178  * base <- active
2179  *
2180  * If backing_file_str is non-NULL, it will be used when modifying top's
2181  * overlay image metadata.
2182  *
2183  * Error conditions:
2184  *  if active == top, that is considered an error
2185  *
2186  */
2187 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2188                            BlockDriverState *base, const char *backing_file_str)
2189 {
2190     BlockDriverState *intermediate;
2191     BlockDriverState *base_bs = NULL;
2192     BlockDriverState *new_top_bs = NULL;
2193     BlkIntermediateStates *intermediate_state, *next;
2194     int ret = -EIO;
2195 
2196     QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2197     QSIMPLEQ_INIT(&states_to_delete);
2198 
2199     if (!top->drv || !base->drv) {
2200         goto exit;
2201     }
2202 
2203     new_top_bs = bdrv_find_overlay(active, top);
2204 
2205     if (new_top_bs == NULL) {
2206         /* we could not find the image above 'top', this is an error */
2207         goto exit;
2208     }
2209 
2210     /* special case of new_top_bs->backing_hd already pointing to base - nothing
2211      * to do, no intermediate images */
2212     if (new_top_bs->backing_hd == base) {
2213         ret = 0;
2214         goto exit;
2215     }
2216 
2217     intermediate = top;
2218 
2219     /* now we will go down through the list, and add each BDS we find
2220      * into our deletion queue, until we hit the 'base'
2221      */
2222     while (intermediate) {
2223         intermediate_state = g_new0(BlkIntermediateStates, 1);
2224         intermediate_state->bs = intermediate;
2225         QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2226 
2227         if (intermediate->backing_hd == base) {
2228             base_bs = intermediate->backing_hd;
2229             break;
2230         }
2231         intermediate = intermediate->backing_hd;
2232     }
2233     if (base_bs == NULL) {
2234         /* something went wrong, we did not end at the base. safely
2235          * unravel everything, and exit with error */
2236         goto exit;
2237     }
2238 
2239     /* success - we can delete the intermediate states, and link top->base */
2240     backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2241     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2242                                    base_bs->drv ? base_bs->drv->format_name : "");
2243     if (ret) {
2244         goto exit;
2245     }
2246     bdrv_set_backing_hd(new_top_bs, base_bs);
2247 
2248     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2249         /* so that bdrv_close() does not recursively close the chain */
2250         bdrv_set_backing_hd(intermediate_state->bs, NULL);
2251         bdrv_unref(intermediate_state->bs);
2252     }
2253     ret = 0;
2254 
2255 exit:
2256     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2257         g_free(intermediate_state);
2258     }
2259     return ret;
2260 }
2261 
2262 /**
2263  * Truncate file to 'offset' bytes (needed only for file protocols)
2264  */
2265 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2266 {
2267     BlockDriver *drv = bs->drv;
2268     int ret;
2269     if (!drv)
2270         return -ENOMEDIUM;
2271     if (!drv->bdrv_truncate)
2272         return -ENOTSUP;
2273     if (bs->read_only)
2274         return -EACCES;
2275 
2276     ret = drv->bdrv_truncate(bs, offset);
2277     if (ret == 0) {
2278         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2279         bdrv_dirty_bitmap_truncate(bs);
2280         if (bs->blk) {
2281             blk_dev_resize_cb(bs->blk);
2282         }
2283     }
2284     return ret;
2285 }
2286 
2287 /**
2288  * Length of a allocated file in bytes. Sparse files are counted by actual
2289  * allocated space. Return < 0 if error or unknown.
2290  */
2291 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2292 {
2293     BlockDriver *drv = bs->drv;
2294     if (!drv) {
2295         return -ENOMEDIUM;
2296     }
2297     if (drv->bdrv_get_allocated_file_size) {
2298         return drv->bdrv_get_allocated_file_size(bs);
2299     }
2300     if (bs->file) {
2301         return bdrv_get_allocated_file_size(bs->file);
2302     }
2303     return -ENOTSUP;
2304 }
2305 
2306 /**
2307  * Return number of sectors on success, -errno on error.
2308  */
2309 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2310 {
2311     BlockDriver *drv = bs->drv;
2312 
2313     if (!drv)
2314         return -ENOMEDIUM;
2315 
2316     if (drv->has_variable_length) {
2317         int ret = refresh_total_sectors(bs, bs->total_sectors);
2318         if (ret < 0) {
2319             return ret;
2320         }
2321     }
2322     return bs->total_sectors;
2323 }
2324 
2325 /**
2326  * Return length in bytes on success, -errno on error.
2327  * The length is always a multiple of BDRV_SECTOR_SIZE.
2328  */
2329 int64_t bdrv_getlength(BlockDriverState *bs)
2330 {
2331     int64_t ret = bdrv_nb_sectors(bs);
2332 
2333     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2334 }
2335 
2336 /* return 0 as number of sectors if no device present or error */
2337 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2338 {
2339     int64_t nb_sectors = bdrv_nb_sectors(bs);
2340 
2341     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2342 }
2343 
2344 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2345                        BlockdevOnError on_write_error)
2346 {
2347     bs->on_read_error = on_read_error;
2348     bs->on_write_error = on_write_error;
2349 }
2350 
2351 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2352 {
2353     return is_read ? bs->on_read_error : bs->on_write_error;
2354 }
2355 
2356 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2357 {
2358     BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2359 
2360     switch (on_err) {
2361     case BLOCKDEV_ON_ERROR_ENOSPC:
2362         return (error == ENOSPC) ?
2363                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2364     case BLOCKDEV_ON_ERROR_STOP:
2365         return BLOCK_ERROR_ACTION_STOP;
2366     case BLOCKDEV_ON_ERROR_REPORT:
2367         return BLOCK_ERROR_ACTION_REPORT;
2368     case BLOCKDEV_ON_ERROR_IGNORE:
2369         return BLOCK_ERROR_ACTION_IGNORE;
2370     default:
2371         abort();
2372     }
2373 }
2374 
2375 static void send_qmp_error_event(BlockDriverState *bs,
2376                                  BlockErrorAction action,
2377                                  bool is_read, int error)
2378 {
2379     IoOperationType optype;
2380 
2381     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2382     qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2383                                    bdrv_iostatus_is_enabled(bs),
2384                                    error == ENOSPC, strerror(error),
2385                                    &error_abort);
2386 }
2387 
2388 /* This is done by device models because, while the block layer knows
2389  * about the error, it does not know whether an operation comes from
2390  * the device or the block layer (from a job, for example).
2391  */
2392 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2393                        bool is_read, int error)
2394 {
2395     assert(error >= 0);
2396 
2397     if (action == BLOCK_ERROR_ACTION_STOP) {
2398         /* First set the iostatus, so that "info block" returns an iostatus
2399          * that matches the events raised so far (an additional error iostatus
2400          * is fine, but not a lost one).
2401          */
2402         bdrv_iostatus_set_err(bs, error);
2403 
2404         /* Then raise the request to stop the VM and the event.
2405          * qemu_system_vmstop_request_prepare has two effects.  First,
2406          * it ensures that the STOP event always comes after the
2407          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
2408          * can observe the STOP event and do a "cont" before the STOP
2409          * event is issued, the VM will not stop.  In this case, vm_start()
2410          * also ensures that the STOP/RESUME pair of events is emitted.
2411          */
2412         qemu_system_vmstop_request_prepare();
2413         send_qmp_error_event(bs, action, is_read, error);
2414         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2415     } else {
2416         send_qmp_error_event(bs, action, is_read, error);
2417     }
2418 }
2419 
2420 int bdrv_is_read_only(BlockDriverState *bs)
2421 {
2422     return bs->read_only;
2423 }
2424 
2425 int bdrv_is_sg(BlockDriverState *bs)
2426 {
2427     return bs->sg;
2428 }
2429 
2430 int bdrv_enable_write_cache(BlockDriverState *bs)
2431 {
2432     return bs->enable_write_cache;
2433 }
2434 
2435 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2436 {
2437     bs->enable_write_cache = wce;
2438 
2439     /* so a reopen() will preserve wce */
2440     if (wce) {
2441         bs->open_flags |= BDRV_O_CACHE_WB;
2442     } else {
2443         bs->open_flags &= ~BDRV_O_CACHE_WB;
2444     }
2445 }
2446 
2447 int bdrv_is_encrypted(BlockDriverState *bs)
2448 {
2449     if (bs->backing_hd && bs->backing_hd->encrypted)
2450         return 1;
2451     return bs->encrypted;
2452 }
2453 
2454 int bdrv_key_required(BlockDriverState *bs)
2455 {
2456     BlockDriverState *backing_hd = bs->backing_hd;
2457 
2458     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2459         return 1;
2460     return (bs->encrypted && !bs->valid_key);
2461 }
2462 
2463 int bdrv_set_key(BlockDriverState *bs, const char *key)
2464 {
2465     int ret;
2466     if (bs->backing_hd && bs->backing_hd->encrypted) {
2467         ret = bdrv_set_key(bs->backing_hd, key);
2468         if (ret < 0)
2469             return ret;
2470         if (!bs->encrypted)
2471             return 0;
2472     }
2473     if (!bs->encrypted) {
2474         return -EINVAL;
2475     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2476         return -ENOMEDIUM;
2477     }
2478     ret = bs->drv->bdrv_set_key(bs, key);
2479     if (ret < 0) {
2480         bs->valid_key = 0;
2481     } else if (!bs->valid_key) {
2482         bs->valid_key = 1;
2483         if (bs->blk) {
2484             /* call the change callback now, we skipped it on open */
2485             blk_dev_change_media_cb(bs->blk, true);
2486         }
2487     }
2488     return ret;
2489 }
2490 
2491 /*
2492  * Provide an encryption key for @bs.
2493  * If @key is non-null:
2494  *     If @bs is not encrypted, fail.
2495  *     Else if the key is invalid, fail.
2496  *     Else set @bs's key to @key, replacing the existing key, if any.
2497  * If @key is null:
2498  *     If @bs is encrypted and still lacks a key, fail.
2499  *     Else do nothing.
2500  * On failure, store an error object through @errp if non-null.
2501  */
2502 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2503 {
2504     if (key) {
2505         if (!bdrv_is_encrypted(bs)) {
2506             error_setg(errp, "Node '%s' is not encrypted",
2507                       bdrv_get_device_or_node_name(bs));
2508         } else if (bdrv_set_key(bs, key) < 0) {
2509             error_set(errp, QERR_INVALID_PASSWORD);
2510         }
2511     } else {
2512         if (bdrv_key_required(bs)) {
2513             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2514                       "'%s' (%s) is encrypted",
2515                       bdrv_get_device_or_node_name(bs),
2516                       bdrv_get_encrypted_filename(bs));
2517         }
2518     }
2519 }
2520 
2521 const char *bdrv_get_format_name(BlockDriverState *bs)
2522 {
2523     return bs->drv ? bs->drv->format_name : NULL;
2524 }
2525 
2526 static int qsort_strcmp(const void *a, const void *b)
2527 {
2528     return strcmp(a, b);
2529 }
2530 
2531 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2532                          void *opaque)
2533 {
2534     BlockDriver *drv;
2535     int count = 0;
2536     int i;
2537     const char **formats = NULL;
2538 
2539     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2540         if (drv->format_name) {
2541             bool found = false;
2542             int i = count;
2543             while (formats && i && !found) {
2544                 found = !strcmp(formats[--i], drv->format_name);
2545             }
2546 
2547             if (!found) {
2548                 formats = g_renew(const char *, formats, count + 1);
2549                 formats[count++] = drv->format_name;
2550             }
2551         }
2552     }
2553 
2554     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2555 
2556     for (i = 0; i < count; i++) {
2557         it(opaque, formats[i]);
2558     }
2559 
2560     g_free(formats);
2561 }
2562 
2563 /* This function is to find a node in the bs graph */
2564 BlockDriverState *bdrv_find_node(const char *node_name)
2565 {
2566     BlockDriverState *bs;
2567 
2568     assert(node_name);
2569 
2570     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2571         if (!strcmp(node_name, bs->node_name)) {
2572             return bs;
2573         }
2574     }
2575     return NULL;
2576 }
2577 
2578 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2579 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2580 {
2581     BlockDeviceInfoList *list, *entry;
2582     BlockDriverState *bs;
2583 
2584     list = NULL;
2585     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2586         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2587         if (!info) {
2588             qapi_free_BlockDeviceInfoList(list);
2589             return NULL;
2590         }
2591         entry = g_malloc0(sizeof(*entry));
2592         entry->value = info;
2593         entry->next = list;
2594         list = entry;
2595     }
2596 
2597     return list;
2598 }
2599 
2600 BlockDriverState *bdrv_lookup_bs(const char *device,
2601                                  const char *node_name,
2602                                  Error **errp)
2603 {
2604     BlockBackend *blk;
2605     BlockDriverState *bs;
2606 
2607     if (device) {
2608         blk = blk_by_name(device);
2609 
2610         if (blk) {
2611             return blk_bs(blk);
2612         }
2613     }
2614 
2615     if (node_name) {
2616         bs = bdrv_find_node(node_name);
2617 
2618         if (bs) {
2619             return bs;
2620         }
2621     }
2622 
2623     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2624                      device ? device : "",
2625                      node_name ? node_name : "");
2626     return NULL;
2627 }
2628 
2629 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2630  * return false.  If either argument is NULL, return false. */
2631 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2632 {
2633     while (top && top != base) {
2634         top = top->backing_hd;
2635     }
2636 
2637     return top != NULL;
2638 }
2639 
2640 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2641 {
2642     if (!bs) {
2643         return QTAILQ_FIRST(&graph_bdrv_states);
2644     }
2645     return QTAILQ_NEXT(bs, node_list);
2646 }
2647 
2648 BlockDriverState *bdrv_next(BlockDriverState *bs)
2649 {
2650     if (!bs) {
2651         return QTAILQ_FIRST(&bdrv_states);
2652     }
2653     return QTAILQ_NEXT(bs, device_list);
2654 }
2655 
2656 const char *bdrv_get_node_name(const BlockDriverState *bs)
2657 {
2658     return bs->node_name;
2659 }
2660 
2661 /* TODO check what callers really want: bs->node_name or blk_name() */
2662 const char *bdrv_get_device_name(const BlockDriverState *bs)
2663 {
2664     return bs->blk ? blk_name(bs->blk) : "";
2665 }
2666 
2667 /* This can be used to identify nodes that might not have a device
2668  * name associated. Since node and device names live in the same
2669  * namespace, the result is unambiguous. The exception is if both are
2670  * absent, then this returns an empty (non-null) string. */
2671 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2672 {
2673     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2674 }
2675 
2676 int bdrv_get_flags(BlockDriverState *bs)
2677 {
2678     return bs->open_flags;
2679 }
2680 
2681 int bdrv_has_zero_init_1(BlockDriverState *bs)
2682 {
2683     return 1;
2684 }
2685 
2686 int bdrv_has_zero_init(BlockDriverState *bs)
2687 {
2688     assert(bs->drv);
2689 
2690     /* If BS is a copy on write image, it is initialized to
2691        the contents of the base image, which may not be zeroes.  */
2692     if (bs->backing_hd) {
2693         return 0;
2694     }
2695     if (bs->drv->bdrv_has_zero_init) {
2696         return bs->drv->bdrv_has_zero_init(bs);
2697     }
2698 
2699     /* safe default */
2700     return 0;
2701 }
2702 
2703 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2704 {
2705     BlockDriverInfo bdi;
2706 
2707     if (bs->backing_hd) {
2708         return false;
2709     }
2710 
2711     if (bdrv_get_info(bs, &bdi) == 0) {
2712         return bdi.unallocated_blocks_are_zero;
2713     }
2714 
2715     return false;
2716 }
2717 
2718 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2719 {
2720     BlockDriverInfo bdi;
2721 
2722     if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
2723         return false;
2724     }
2725 
2726     if (bdrv_get_info(bs, &bdi) == 0) {
2727         return bdi.can_write_zeroes_with_unmap;
2728     }
2729 
2730     return false;
2731 }
2732 
2733 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2734 {
2735     if (bs->backing_hd && bs->backing_hd->encrypted)
2736         return bs->backing_file;
2737     else if (bs->encrypted)
2738         return bs->filename;
2739     else
2740         return NULL;
2741 }
2742 
2743 void bdrv_get_backing_filename(BlockDriverState *bs,
2744                                char *filename, int filename_size)
2745 {
2746     pstrcpy(filename, filename_size, bs->backing_file);
2747 }
2748 
2749 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2750 {
2751     BlockDriver *drv = bs->drv;
2752     if (!drv)
2753         return -ENOMEDIUM;
2754     if (!drv->bdrv_get_info)
2755         return -ENOTSUP;
2756     memset(bdi, 0, sizeof(*bdi));
2757     return drv->bdrv_get_info(bs, bdi);
2758 }
2759 
2760 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2761 {
2762     BlockDriver *drv = bs->drv;
2763     if (drv && drv->bdrv_get_specific_info) {
2764         return drv->bdrv_get_specific_info(bs);
2765     }
2766     return NULL;
2767 }
2768 
2769 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2770 {
2771     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2772         return;
2773     }
2774 
2775     bs->drv->bdrv_debug_event(bs, event);
2776 }
2777 
2778 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2779                           const char *tag)
2780 {
2781     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2782         bs = bs->file;
2783     }
2784 
2785     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2786         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2787     }
2788 
2789     return -ENOTSUP;
2790 }
2791 
2792 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2793 {
2794     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2795         bs = bs->file;
2796     }
2797 
2798     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2799         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2800     }
2801 
2802     return -ENOTSUP;
2803 }
2804 
2805 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2806 {
2807     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2808         bs = bs->file;
2809     }
2810 
2811     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2812         return bs->drv->bdrv_debug_resume(bs, tag);
2813     }
2814 
2815     return -ENOTSUP;
2816 }
2817 
2818 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2819 {
2820     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2821         bs = bs->file;
2822     }
2823 
2824     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2825         return bs->drv->bdrv_debug_is_suspended(bs, tag);
2826     }
2827 
2828     return false;
2829 }
2830 
2831 int bdrv_is_snapshot(BlockDriverState *bs)
2832 {
2833     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2834 }
2835 
2836 /* backing_file can either be relative, or absolute, or a protocol.  If it is
2837  * relative, it must be relative to the chain.  So, passing in bs->filename
2838  * from a BDS as backing_file should not be done, as that may be relative to
2839  * the CWD rather than the chain. */
2840 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2841         const char *backing_file)
2842 {
2843     char *filename_full = NULL;
2844     char *backing_file_full = NULL;
2845     char *filename_tmp = NULL;
2846     int is_protocol = 0;
2847     BlockDriverState *curr_bs = NULL;
2848     BlockDriverState *retval = NULL;
2849 
2850     if (!bs || !bs->drv || !backing_file) {
2851         return NULL;
2852     }
2853 
2854     filename_full     = g_malloc(PATH_MAX);
2855     backing_file_full = g_malloc(PATH_MAX);
2856     filename_tmp      = g_malloc(PATH_MAX);
2857 
2858     is_protocol = path_has_protocol(backing_file);
2859 
2860     for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
2861 
2862         /* If either of the filename paths is actually a protocol, then
2863          * compare unmodified paths; otherwise make paths relative */
2864         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
2865             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
2866                 retval = curr_bs->backing_hd;
2867                 break;
2868             }
2869         } else {
2870             /* If not an absolute filename path, make it relative to the current
2871              * image's filename path */
2872             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2873                          backing_file);
2874 
2875             /* We are going to compare absolute pathnames */
2876             if (!realpath(filename_tmp, filename_full)) {
2877                 continue;
2878             }
2879 
2880             /* We need to make sure the backing filename we are comparing against
2881              * is relative to the current image filename (or absolute) */
2882             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2883                          curr_bs->backing_file);
2884 
2885             if (!realpath(filename_tmp, backing_file_full)) {
2886                 continue;
2887             }
2888 
2889             if (strcmp(backing_file_full, filename_full) == 0) {
2890                 retval = curr_bs->backing_hd;
2891                 break;
2892             }
2893         }
2894     }
2895 
2896     g_free(filename_full);
2897     g_free(backing_file_full);
2898     g_free(filename_tmp);
2899     return retval;
2900 }
2901 
2902 int bdrv_get_backing_file_depth(BlockDriverState *bs)
2903 {
2904     if (!bs->drv) {
2905         return 0;
2906     }
2907 
2908     if (!bs->backing_hd) {
2909         return 0;
2910     }
2911 
2912     return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
2913 }
2914 
2915 void bdrv_init(void)
2916 {
2917     module_call_init(MODULE_INIT_BLOCK);
2918 }
2919 
2920 void bdrv_init_with_whitelist(void)
2921 {
2922     use_bdrv_whitelist = 1;
2923     bdrv_init();
2924 }
2925 
2926 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
2927 {
2928     Error *local_err = NULL;
2929     int ret;
2930 
2931     if (!bs->drv)  {
2932         return;
2933     }
2934 
2935     if (!(bs->open_flags & BDRV_O_INCOMING)) {
2936         return;
2937     }
2938     bs->open_flags &= ~BDRV_O_INCOMING;
2939 
2940     if (bs->drv->bdrv_invalidate_cache) {
2941         bs->drv->bdrv_invalidate_cache(bs, &local_err);
2942     } else if (bs->file) {
2943         bdrv_invalidate_cache(bs->file, &local_err);
2944     }
2945     if (local_err) {
2946         error_propagate(errp, local_err);
2947         return;
2948     }
2949 
2950     ret = refresh_total_sectors(bs, bs->total_sectors);
2951     if (ret < 0) {
2952         error_setg_errno(errp, -ret, "Could not refresh total sector count");
2953         return;
2954     }
2955 }
2956 
2957 void bdrv_invalidate_cache_all(Error **errp)
2958 {
2959     BlockDriverState *bs;
2960     Error *local_err = NULL;
2961 
2962     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2963         AioContext *aio_context = bdrv_get_aio_context(bs);
2964 
2965         aio_context_acquire(aio_context);
2966         bdrv_invalidate_cache(bs, &local_err);
2967         aio_context_release(aio_context);
2968         if (local_err) {
2969             error_propagate(errp, local_err);
2970             return;
2971         }
2972     }
2973 }
2974 
2975 /**************************************************************/
2976 /* removable device support */
2977 
2978 /**
2979  * Return TRUE if the media is present
2980  */
2981 int bdrv_is_inserted(BlockDriverState *bs)
2982 {
2983     BlockDriver *drv = bs->drv;
2984 
2985     if (!drv)
2986         return 0;
2987     if (!drv->bdrv_is_inserted)
2988         return 1;
2989     return drv->bdrv_is_inserted(bs);
2990 }
2991 
2992 /**
2993  * Return whether the media changed since the last call to this
2994  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
2995  */
2996 int bdrv_media_changed(BlockDriverState *bs)
2997 {
2998     BlockDriver *drv = bs->drv;
2999 
3000     if (drv && drv->bdrv_media_changed) {
3001         return drv->bdrv_media_changed(bs);
3002     }
3003     return -ENOTSUP;
3004 }
3005 
3006 /**
3007  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3008  */
3009 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3010 {
3011     BlockDriver *drv = bs->drv;
3012     const char *device_name;
3013 
3014     if (drv && drv->bdrv_eject) {
3015         drv->bdrv_eject(bs, eject_flag);
3016     }
3017 
3018     device_name = bdrv_get_device_name(bs);
3019     if (device_name[0] != '\0') {
3020         qapi_event_send_device_tray_moved(device_name,
3021                                           eject_flag, &error_abort);
3022     }
3023 }
3024 
3025 /**
3026  * Lock or unlock the media (if it is locked, the user won't be able
3027  * to eject it manually).
3028  */
3029 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3030 {
3031     BlockDriver *drv = bs->drv;
3032 
3033     trace_bdrv_lock_medium(bs, locked);
3034 
3035     if (drv && drv->bdrv_lock_medium) {
3036         drv->bdrv_lock_medium(bs, locked);
3037     }
3038 }
3039 
3040 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3041 {
3042     bs->guest_block_size = align;
3043 }
3044 
3045 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3046 {
3047     BdrvDirtyBitmap *bm;
3048 
3049     assert(name);
3050     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3051         if (bm->name && !strcmp(name, bm->name)) {
3052             return bm;
3053         }
3054     }
3055     return NULL;
3056 }
3057 
3058 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3059 {
3060     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3061     g_free(bitmap->name);
3062     bitmap->name = NULL;
3063 }
3064 
3065 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3066                                           uint32_t granularity,
3067                                           const char *name,
3068                                           Error **errp)
3069 {
3070     int64_t bitmap_size;
3071     BdrvDirtyBitmap *bitmap;
3072     uint32_t sector_granularity;
3073 
3074     assert((granularity & (granularity - 1)) == 0);
3075 
3076     if (name && bdrv_find_dirty_bitmap(bs, name)) {
3077         error_setg(errp, "Bitmap already exists: %s", name);
3078         return NULL;
3079     }
3080     sector_granularity = granularity >> BDRV_SECTOR_BITS;
3081     assert(sector_granularity);
3082     bitmap_size = bdrv_nb_sectors(bs);
3083     if (bitmap_size < 0) {
3084         error_setg_errno(errp, -bitmap_size, "could not get length of device");
3085         errno = -bitmap_size;
3086         return NULL;
3087     }
3088     bitmap = g_new0(BdrvDirtyBitmap, 1);
3089     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3090     bitmap->size = bitmap_size;
3091     bitmap->name = g_strdup(name);
3092     bitmap->disabled = false;
3093     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3094     return bitmap;
3095 }
3096 
3097 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3098 {
3099     return bitmap->successor;
3100 }
3101 
3102 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3103 {
3104     return !(bitmap->disabled || bitmap->successor);
3105 }
3106 
3107 /**
3108  * Create a successor bitmap destined to replace this bitmap after an operation.
3109  * Requires that the bitmap is not frozen and has no successor.
3110  */
3111 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3112                                        BdrvDirtyBitmap *bitmap, Error **errp)
3113 {
3114     uint64_t granularity;
3115     BdrvDirtyBitmap *child;
3116 
3117     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3118         error_setg(errp, "Cannot create a successor for a bitmap that is "
3119                    "currently frozen");
3120         return -1;
3121     }
3122     assert(!bitmap->successor);
3123 
3124     /* Create an anonymous successor */
3125     granularity = bdrv_dirty_bitmap_granularity(bitmap);
3126     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3127     if (!child) {
3128         return -1;
3129     }
3130 
3131     /* Successor will be on or off based on our current state. */
3132     child->disabled = bitmap->disabled;
3133 
3134     /* Install the successor and freeze the parent */
3135     bitmap->successor = child;
3136     return 0;
3137 }
3138 
3139 /**
3140  * For a bitmap with a successor, yield our name to the successor,
3141  * delete the old bitmap, and return a handle to the new bitmap.
3142  */
3143 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3144                                             BdrvDirtyBitmap *bitmap,
3145                                             Error **errp)
3146 {
3147     char *name;
3148     BdrvDirtyBitmap *successor = bitmap->successor;
3149 
3150     if (successor == NULL) {
3151         error_setg(errp, "Cannot relinquish control if "
3152                    "there's no successor present");
3153         return NULL;
3154     }
3155 
3156     name = bitmap->name;
3157     bitmap->name = NULL;
3158     successor->name = name;
3159     bitmap->successor = NULL;
3160     bdrv_release_dirty_bitmap(bs, bitmap);
3161 
3162     return successor;
3163 }
3164 
3165 /**
3166  * In cases of failure where we can no longer safely delete the parent,
3167  * we may wish to re-join the parent and child/successor.
3168  * The merged parent will be un-frozen, but not explicitly re-enabled.
3169  */
3170 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3171                                            BdrvDirtyBitmap *parent,
3172                                            Error **errp)
3173 {
3174     BdrvDirtyBitmap *successor = parent->successor;
3175 
3176     if (!successor) {
3177         error_setg(errp, "Cannot reclaim a successor when none is present");
3178         return NULL;
3179     }
3180 
3181     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3182         error_setg(errp, "Merging of parent and successor bitmap failed");
3183         return NULL;
3184     }
3185     bdrv_release_dirty_bitmap(bs, successor);
3186     parent->successor = NULL;
3187 
3188     return parent;
3189 }
3190 
3191 /**
3192  * Truncates _all_ bitmaps attached to a BDS.
3193  */
3194 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3195 {
3196     BdrvDirtyBitmap *bitmap;
3197     uint64_t size = bdrv_nb_sectors(bs);
3198 
3199     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3200         if (bdrv_dirty_bitmap_frozen(bitmap)) {
3201             continue;
3202         }
3203         hbitmap_truncate(bitmap->bitmap, size);
3204     }
3205 }
3206 
3207 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3208 {
3209     BdrvDirtyBitmap *bm, *next;
3210     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3211         if (bm == bitmap) {
3212             assert(!bdrv_dirty_bitmap_frozen(bm));
3213             QLIST_REMOVE(bitmap, list);
3214             hbitmap_free(bitmap->bitmap);
3215             g_free(bitmap->name);
3216             g_free(bitmap);
3217             return;
3218         }
3219     }
3220 }
3221 
3222 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3223 {
3224     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3225     bitmap->disabled = true;
3226 }
3227 
3228 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3229 {
3230     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3231     bitmap->disabled = false;
3232 }
3233 
3234 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3235 {
3236     BdrvDirtyBitmap *bm;
3237     BlockDirtyInfoList *list = NULL;
3238     BlockDirtyInfoList **plist = &list;
3239 
3240     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3241         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3242         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3243         info->count = bdrv_get_dirty_count(bm);
3244         info->granularity = bdrv_dirty_bitmap_granularity(bm);
3245         info->has_name = !!bm->name;
3246         info->name = g_strdup(bm->name);
3247         info->frozen = bdrv_dirty_bitmap_frozen(bm);
3248         entry->value = info;
3249         *plist = entry;
3250         plist = &entry->next;
3251     }
3252 
3253     return list;
3254 }
3255 
3256 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3257 {
3258     if (bitmap) {
3259         return hbitmap_get(bitmap->bitmap, sector);
3260     } else {
3261         return 0;
3262     }
3263 }
3264 
3265 /**
3266  * Chooses a default granularity based on the existing cluster size,
3267  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3268  * is no cluster size information available.
3269  */
3270 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3271 {
3272     BlockDriverInfo bdi;
3273     uint32_t granularity;
3274 
3275     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3276         granularity = MAX(4096, bdi.cluster_size);
3277         granularity = MIN(65536, granularity);
3278     } else {
3279         granularity = 65536;
3280     }
3281 
3282     return granularity;
3283 }
3284 
3285 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3286 {
3287     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3288 }
3289 
3290 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3291 {
3292     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3293 }
3294 
3295 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3296                            int64_t cur_sector, int nr_sectors)
3297 {
3298     assert(bdrv_dirty_bitmap_enabled(bitmap));
3299     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3300 }
3301 
3302 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3303                              int64_t cur_sector, int nr_sectors)
3304 {
3305     assert(bdrv_dirty_bitmap_enabled(bitmap));
3306     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3307 }
3308 
3309 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3310 {
3311     assert(bdrv_dirty_bitmap_enabled(bitmap));
3312     hbitmap_reset(bitmap->bitmap, 0, bitmap->size);
3313 }
3314 
3315 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3316                     int nr_sectors)
3317 {
3318     BdrvDirtyBitmap *bitmap;
3319     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3320         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3321             continue;
3322         }
3323         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3324     }
3325 }
3326 
3327 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3328                       int nr_sectors)
3329 {
3330     BdrvDirtyBitmap *bitmap;
3331     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3332         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3333             continue;
3334         }
3335         hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3336     }
3337 }
3338 
3339 /**
3340  * Advance an HBitmapIter to an arbitrary offset.
3341  */
3342 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3343 {
3344     assert(hbi->hb);
3345     hbitmap_iter_init(hbi, hbi->hb, offset);
3346 }
3347 
3348 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3349 {
3350     return hbitmap_count(bitmap->bitmap);
3351 }
3352 
3353 /* Get a reference to bs */
3354 void bdrv_ref(BlockDriverState *bs)
3355 {
3356     bs->refcnt++;
3357 }
3358 
3359 /* Release a previously grabbed reference to bs.
3360  * If after releasing, reference count is zero, the BlockDriverState is
3361  * deleted. */
3362 void bdrv_unref(BlockDriverState *bs)
3363 {
3364     if (!bs) {
3365         return;
3366     }
3367     assert(bs->refcnt > 0);
3368     if (--bs->refcnt == 0) {
3369         bdrv_delete(bs);
3370     }
3371 }
3372 
3373 struct BdrvOpBlocker {
3374     Error *reason;
3375     QLIST_ENTRY(BdrvOpBlocker) list;
3376 };
3377 
3378 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3379 {
3380     BdrvOpBlocker *blocker;
3381     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3382     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3383         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3384         if (errp) {
3385             error_setg(errp, "Node '%s' is busy: %s",
3386                        bdrv_get_device_or_node_name(bs),
3387                        error_get_pretty(blocker->reason));
3388         }
3389         return true;
3390     }
3391     return false;
3392 }
3393 
3394 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3395 {
3396     BdrvOpBlocker *blocker;
3397     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3398 
3399     blocker = g_new0(BdrvOpBlocker, 1);
3400     blocker->reason = reason;
3401     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3402 }
3403 
3404 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3405 {
3406     BdrvOpBlocker *blocker, *next;
3407     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3408     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3409         if (blocker->reason == reason) {
3410             QLIST_REMOVE(blocker, list);
3411             g_free(blocker);
3412         }
3413     }
3414 }
3415 
3416 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3417 {
3418     int i;
3419     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3420         bdrv_op_block(bs, i, reason);
3421     }
3422 }
3423 
3424 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3425 {
3426     int i;
3427     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3428         bdrv_op_unblock(bs, i, reason);
3429     }
3430 }
3431 
3432 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3433 {
3434     int i;
3435 
3436     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3437         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3438             return false;
3439         }
3440     }
3441     return true;
3442 }
3443 
3444 void bdrv_iostatus_enable(BlockDriverState *bs)
3445 {
3446     bs->iostatus_enabled = true;
3447     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3448 }
3449 
3450 /* The I/O status is only enabled if the drive explicitly
3451  * enables it _and_ the VM is configured to stop on errors */
3452 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3453 {
3454     return (bs->iostatus_enabled &&
3455            (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3456             bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
3457             bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3458 }
3459 
3460 void bdrv_iostatus_disable(BlockDriverState *bs)
3461 {
3462     bs->iostatus_enabled = false;
3463 }
3464 
3465 void bdrv_iostatus_reset(BlockDriverState *bs)
3466 {
3467     if (bdrv_iostatus_is_enabled(bs)) {
3468         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3469         if (bs->job) {
3470             block_job_iostatus_reset(bs->job);
3471         }
3472     }
3473 }
3474 
3475 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3476 {
3477     assert(bdrv_iostatus_is_enabled(bs));
3478     if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3479         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3480                                          BLOCK_DEVICE_IO_STATUS_FAILED;
3481     }
3482 }
3483 
3484 void bdrv_img_create(const char *filename, const char *fmt,
3485                      const char *base_filename, const char *base_fmt,
3486                      char *options, uint64_t img_size, int flags,
3487                      Error **errp, bool quiet)
3488 {
3489     QemuOptsList *create_opts = NULL;
3490     QemuOpts *opts = NULL;
3491     const char *backing_fmt, *backing_file;
3492     int64_t size;
3493     BlockDriver *drv, *proto_drv;
3494     BlockDriver *backing_drv = NULL;
3495     Error *local_err = NULL;
3496     int ret = 0;
3497 
3498     /* Find driver and parse its options */
3499     drv = bdrv_find_format(fmt);
3500     if (!drv) {
3501         error_setg(errp, "Unknown file format '%s'", fmt);
3502         return;
3503     }
3504 
3505     proto_drv = bdrv_find_protocol(filename, true, errp);
3506     if (!proto_drv) {
3507         return;
3508     }
3509 
3510     if (!drv->create_opts) {
3511         error_setg(errp, "Format driver '%s' does not support image creation",
3512                    drv->format_name);
3513         return;
3514     }
3515 
3516     if (!proto_drv->create_opts) {
3517         error_setg(errp, "Protocol driver '%s' does not support image creation",
3518                    proto_drv->format_name);
3519         return;
3520     }
3521 
3522     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3523     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3524 
3525     /* Create parameter list with default values */
3526     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3527     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3528 
3529     /* Parse -o options */
3530     if (options) {
3531         qemu_opts_do_parse(opts, options, NULL, &local_err);
3532         if (local_err) {
3533             error_report_err(local_err);
3534             local_err = NULL;
3535             error_setg(errp, "Invalid options for file format '%s'", fmt);
3536             goto out;
3537         }
3538     }
3539 
3540     if (base_filename) {
3541         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3542         if (local_err) {
3543             error_setg(errp, "Backing file not supported for file format '%s'",
3544                        fmt);
3545             goto out;
3546         }
3547     }
3548 
3549     if (base_fmt) {
3550         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3551         if (local_err) {
3552             error_setg(errp, "Backing file format not supported for file "
3553                              "format '%s'", fmt);
3554             goto out;
3555         }
3556     }
3557 
3558     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3559     if (backing_file) {
3560         if (!strcmp(filename, backing_file)) {
3561             error_setg(errp, "Error: Trying to create an image with the "
3562                              "same filename as the backing file");
3563             goto out;
3564         }
3565     }
3566 
3567     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3568     if (backing_fmt) {
3569         backing_drv = bdrv_find_format(backing_fmt);
3570         if (!backing_drv) {
3571             error_setg(errp, "Unknown backing file format '%s'",
3572                        backing_fmt);
3573             goto out;
3574         }
3575     }
3576 
3577     // The size for the image must always be specified, with one exception:
3578     // If we are using a backing file, we can obtain the size from there
3579     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3580     if (size == -1) {
3581         if (backing_file) {
3582             BlockDriverState *bs;
3583             char *full_backing = g_new0(char, PATH_MAX);
3584             int64_t size;
3585             int back_flags;
3586 
3587             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3588                                                          full_backing, PATH_MAX,
3589                                                          &local_err);
3590             if (local_err) {
3591                 g_free(full_backing);
3592                 goto out;
3593             }
3594 
3595             /* backing files always opened read-only */
3596             back_flags =
3597                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3598 
3599             bs = NULL;
3600             ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
3601                             backing_drv, &local_err);
3602             g_free(full_backing);
3603             if (ret < 0) {
3604                 goto out;
3605             }
3606             size = bdrv_getlength(bs);
3607             if (size < 0) {
3608                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3609                                  backing_file);
3610                 bdrv_unref(bs);
3611                 goto out;
3612             }
3613 
3614             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3615 
3616             bdrv_unref(bs);
3617         } else {
3618             error_setg(errp, "Image creation needs a size parameter");
3619             goto out;
3620         }
3621     }
3622 
3623     if (!quiet) {
3624         printf("Formatting '%s', fmt=%s", filename, fmt);
3625         qemu_opts_print(opts, " ");
3626         puts("");
3627     }
3628 
3629     ret = bdrv_create(drv, filename, opts, &local_err);
3630 
3631     if (ret == -EFBIG) {
3632         /* This is generally a better message than whatever the driver would
3633          * deliver (especially because of the cluster_size_hint), since that
3634          * is most probably not much different from "image too large". */
3635         const char *cluster_size_hint = "";
3636         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3637             cluster_size_hint = " (try using a larger cluster size)";
3638         }
3639         error_setg(errp, "The image size is too large for file format '%s'"
3640                    "%s", fmt, cluster_size_hint);
3641         error_free(local_err);
3642         local_err = NULL;
3643     }
3644 
3645 out:
3646     qemu_opts_del(opts);
3647     qemu_opts_free(create_opts);
3648     if (local_err) {
3649         error_propagate(errp, local_err);
3650     }
3651 }
3652 
3653 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3654 {
3655     return bs->aio_context;
3656 }
3657 
3658 void bdrv_detach_aio_context(BlockDriverState *bs)
3659 {
3660     BdrvAioNotifier *baf;
3661 
3662     if (!bs->drv) {
3663         return;
3664     }
3665 
3666     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3667         baf->detach_aio_context(baf->opaque);
3668     }
3669 
3670     if (bs->io_limits_enabled) {
3671         throttle_detach_aio_context(&bs->throttle_state);
3672     }
3673     if (bs->drv->bdrv_detach_aio_context) {
3674         bs->drv->bdrv_detach_aio_context(bs);
3675     }
3676     if (bs->file) {
3677         bdrv_detach_aio_context(bs->file);
3678     }
3679     if (bs->backing_hd) {
3680         bdrv_detach_aio_context(bs->backing_hd);
3681     }
3682 
3683     bs->aio_context = NULL;
3684 }
3685 
3686 void bdrv_attach_aio_context(BlockDriverState *bs,
3687                              AioContext *new_context)
3688 {
3689     BdrvAioNotifier *ban;
3690 
3691     if (!bs->drv) {
3692         return;
3693     }
3694 
3695     bs->aio_context = new_context;
3696 
3697     if (bs->backing_hd) {
3698         bdrv_attach_aio_context(bs->backing_hd, new_context);
3699     }
3700     if (bs->file) {
3701         bdrv_attach_aio_context(bs->file, new_context);
3702     }
3703     if (bs->drv->bdrv_attach_aio_context) {
3704         bs->drv->bdrv_attach_aio_context(bs, new_context);
3705     }
3706     if (bs->io_limits_enabled) {
3707         throttle_attach_aio_context(&bs->throttle_state, new_context);
3708     }
3709 
3710     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3711         ban->attached_aio_context(new_context, ban->opaque);
3712     }
3713 }
3714 
3715 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3716 {
3717     bdrv_drain_all(); /* ensure there are no in-flight requests */
3718 
3719     bdrv_detach_aio_context(bs);
3720 
3721     /* This function executes in the old AioContext so acquire the new one in
3722      * case it runs in a different thread.
3723      */
3724     aio_context_acquire(new_context);
3725     bdrv_attach_aio_context(bs, new_context);
3726     aio_context_release(new_context);
3727 }
3728 
3729 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3730         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3731         void (*detach_aio_context)(void *opaque), void *opaque)
3732 {
3733     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3734     *ban = (BdrvAioNotifier){
3735         .attached_aio_context = attached_aio_context,
3736         .detach_aio_context   = detach_aio_context,
3737         .opaque               = opaque
3738     };
3739 
3740     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3741 }
3742 
3743 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3744                                       void (*attached_aio_context)(AioContext *,
3745                                                                    void *),
3746                                       void (*detach_aio_context)(void *),
3747                                       void *opaque)
3748 {
3749     BdrvAioNotifier *ban, *ban_next;
3750 
3751     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3752         if (ban->attached_aio_context == attached_aio_context &&
3753             ban->detach_aio_context   == detach_aio_context   &&
3754             ban->opaque               == opaque)
3755         {
3756             QLIST_REMOVE(ban, list);
3757             g_free(ban);
3758 
3759             return;
3760         }
3761     }
3762 
3763     abort();
3764 }
3765 
3766 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3767                        BlockDriverAmendStatusCB *status_cb)
3768 {
3769     if (!bs->drv->bdrv_amend_options) {
3770         return -ENOTSUP;
3771     }
3772     return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3773 }
3774 
3775 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3776  * of block filter and by bdrv_is_first_non_filter.
3777  * It is used to test if the given bs is the candidate or recurse more in the
3778  * node graph.
3779  */
3780 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3781                                       BlockDriverState *candidate)
3782 {
3783     /* return false if basic checks fails */
3784     if (!bs || !bs->drv) {
3785         return false;
3786     }
3787 
3788     /* the code reached a non block filter driver -> check if the bs is
3789      * the same as the candidate. It's the recursion termination condition.
3790      */
3791     if (!bs->drv->is_filter) {
3792         return bs == candidate;
3793     }
3794     /* Down this path the driver is a block filter driver */
3795 
3796     /* If the block filter recursion method is defined use it to recurse down
3797      * the node graph.
3798      */
3799     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3800         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3801     }
3802 
3803     /* the driver is a block filter but don't allow to recurse -> return false
3804      */
3805     return false;
3806 }
3807 
3808 /* This function checks if the candidate is the first non filter bs down it's
3809  * bs chain. Since we don't have pointers to parents it explore all bs chains
3810  * from the top. Some filters can choose not to pass down the recursion.
3811  */
3812 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3813 {
3814     BlockDriverState *bs;
3815 
3816     /* walk down the bs forest recursively */
3817     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3818         bool perm;
3819 
3820         /* try to recurse in this top level bs */
3821         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3822 
3823         /* candidate is the first non filter */
3824         if (perm) {
3825             return true;
3826         }
3827     }
3828 
3829     return false;
3830 }
3831 
3832 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
3833 {
3834     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3835     AioContext *aio_context;
3836 
3837     if (!to_replace_bs) {
3838         error_setg(errp, "Node name '%s' not found", node_name);
3839         return NULL;
3840     }
3841 
3842     aio_context = bdrv_get_aio_context(to_replace_bs);
3843     aio_context_acquire(aio_context);
3844 
3845     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3846         to_replace_bs = NULL;
3847         goto out;
3848     }
3849 
3850     /* We don't want arbitrary node of the BDS chain to be replaced only the top
3851      * most non filter in order to prevent data corruption.
3852      * Another benefit is that this tests exclude backing files which are
3853      * blocked by the backing blockers.
3854      */
3855     if (!bdrv_is_first_non_filter(to_replace_bs)) {
3856         error_setg(errp, "Only top most non filter can be replaced");
3857         to_replace_bs = NULL;
3858         goto out;
3859     }
3860 
3861 out:
3862     aio_context_release(aio_context);
3863     return to_replace_bs;
3864 }
3865 
3866 static bool append_open_options(QDict *d, BlockDriverState *bs)
3867 {
3868     const QDictEntry *entry;
3869     bool found_any = false;
3870 
3871     for (entry = qdict_first(bs->options); entry;
3872          entry = qdict_next(bs->options, entry))
3873     {
3874         /* Only take options for this level and exclude all non-driver-specific
3875          * options */
3876         if (!strchr(qdict_entry_key(entry), '.') &&
3877             strcmp(qdict_entry_key(entry), "node-name"))
3878         {
3879             qobject_incref(qdict_entry_value(entry));
3880             qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3881             found_any = true;
3882         }
3883     }
3884 
3885     return found_any;
3886 }
3887 
3888 /* Updates the following BDS fields:
3889  *  - exact_filename: A filename which may be used for opening a block device
3890  *                    which (mostly) equals the given BDS (even without any
3891  *                    other options; so reading and writing must return the same
3892  *                    results, but caching etc. may be different)
3893  *  - full_open_options: Options which, when given when opening a block device
3894  *                       (without a filename), result in a BDS (mostly)
3895  *                       equalling the given one
3896  *  - filename: If exact_filename is set, it is copied here. Otherwise,
3897  *              full_open_options is converted to a JSON object, prefixed with
3898  *              "json:" (for use through the JSON pseudo protocol) and put here.
3899  */
3900 void bdrv_refresh_filename(BlockDriverState *bs)
3901 {
3902     BlockDriver *drv = bs->drv;
3903     QDict *opts;
3904 
3905     if (!drv) {
3906         return;
3907     }
3908 
3909     /* This BDS's file name will most probably depend on its file's name, so
3910      * refresh that first */
3911     if (bs->file) {
3912         bdrv_refresh_filename(bs->file);
3913     }
3914 
3915     if (drv->bdrv_refresh_filename) {
3916         /* Obsolete information is of no use here, so drop the old file name
3917          * information before refreshing it */
3918         bs->exact_filename[0] = '\0';
3919         if (bs->full_open_options) {
3920             QDECREF(bs->full_open_options);
3921             bs->full_open_options = NULL;
3922         }
3923 
3924         drv->bdrv_refresh_filename(bs);
3925     } else if (bs->file) {
3926         /* Try to reconstruct valid information from the underlying file */
3927         bool has_open_options;
3928 
3929         bs->exact_filename[0] = '\0';
3930         if (bs->full_open_options) {
3931             QDECREF(bs->full_open_options);
3932             bs->full_open_options = NULL;
3933         }
3934 
3935         opts = qdict_new();
3936         has_open_options = append_open_options(opts, bs);
3937 
3938         /* If no specific options have been given for this BDS, the filename of
3939          * the underlying file should suffice for this one as well */
3940         if (bs->file->exact_filename[0] && !has_open_options) {
3941             strcpy(bs->exact_filename, bs->file->exact_filename);
3942         }
3943         /* Reconstructing the full options QDict is simple for most format block
3944          * drivers, as long as the full options are known for the underlying
3945          * file BDS. The full options QDict of that file BDS should somehow
3946          * contain a representation of the filename, therefore the following
3947          * suffices without querying the (exact_)filename of this BDS. */
3948         if (bs->file->full_open_options) {
3949             qdict_put_obj(opts, "driver",
3950                           QOBJECT(qstring_from_str(drv->format_name)));
3951             QINCREF(bs->file->full_open_options);
3952             qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
3953 
3954             bs->full_open_options = opts;
3955         } else {
3956             QDECREF(opts);
3957         }
3958     } else if (!bs->full_open_options && qdict_size(bs->options)) {
3959         /* There is no underlying file BDS (at least referenced by BDS.file),
3960          * so the full options QDict should be equal to the options given
3961          * specifically for this block device when it was opened (plus the
3962          * driver specification).
3963          * Because those options don't change, there is no need to update
3964          * full_open_options when it's already set. */
3965 
3966         opts = qdict_new();
3967         append_open_options(opts, bs);
3968         qdict_put_obj(opts, "driver",
3969                       QOBJECT(qstring_from_str(drv->format_name)));
3970 
3971         if (bs->exact_filename[0]) {
3972             /* This may not work for all block protocol drivers (some may
3973              * require this filename to be parsed), but we have to find some
3974              * default solution here, so just include it. If some block driver
3975              * does not support pure options without any filename at all or
3976              * needs some special format of the options QDict, it needs to
3977              * implement the driver-specific bdrv_refresh_filename() function.
3978              */
3979             qdict_put_obj(opts, "filename",
3980                           QOBJECT(qstring_from_str(bs->exact_filename)));
3981         }
3982 
3983         bs->full_open_options = opts;
3984     }
3985 
3986     if (bs->exact_filename[0]) {
3987         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
3988     } else if (bs->full_open_options) {
3989         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
3990         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
3991                  qstring_get_str(json));
3992         QDECREF(json);
3993     }
3994 }
3995 
3996 /* This accessor function purpose is to allow the device models to access the
3997  * BlockAcctStats structure embedded inside a BlockDriverState without being
3998  * aware of the BlockDriverState structure layout.
3999  * It will go away when the BlockAcctStats structure will be moved inside
4000  * the device models.
4001  */
4002 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
4003 {
4004     return &bs->stats;
4005 }
4006