xref: /openbmc/qemu/block.c (revision 91bfcdb0)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/error-report.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qapi/qmp/qjson.h"
33 #include "sysemu/block-backend.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/notify.h"
36 #include "qemu/coroutine.h"
37 #include "block/qapi.h"
38 #include "qmp-commands.h"
39 #include "qemu/timer.h"
40 #include "qapi-event.h"
41 #include "block/throttle-groups.h"
42 
43 #ifdef CONFIG_BSD
44 #include <sys/types.h>
45 #include <sys/stat.h>
46 #include <sys/ioctl.h>
47 #include <sys/queue.h>
48 #ifndef __DragonFly__
49 #include <sys/disk.h>
50 #endif
51 #endif
52 
53 #ifdef _WIN32
54 #include <windows.h>
55 #endif
56 
57 /**
58  * A BdrvDirtyBitmap can be in three possible states:
59  * (1) successor is NULL and disabled is false: full r/w mode
60  * (2) successor is NULL and disabled is true: read only mode ("disabled")
61  * (3) successor is set: frozen mode.
62  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
63  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
64  */
65 struct BdrvDirtyBitmap {
66     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
67     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
68     char *name;                 /* Optional non-empty unique ID */
69     int64_t size;               /* Size of the bitmap (Number of sectors) */
70     bool disabled;              /* Bitmap is read-only */
71     QLIST_ENTRY(BdrvDirtyBitmap) list;
72 };
73 
74 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
75 
76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77     QTAILQ_HEAD_INITIALIZER(bdrv_states);
78 
79 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
80     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
81 
82 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
83     QLIST_HEAD_INITIALIZER(bdrv_drivers);
84 
85 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
86                              const char *reference, QDict *options, int flags,
87                              BlockDriverState *parent,
88                              const BdrvChildRole *child_role, Error **errp);
89 
90 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
91 /* If non-zero, use only whitelisted block drivers */
92 static int use_bdrv_whitelist;
93 
94 #ifdef _WIN32
95 static int is_windows_drive_prefix(const char *filename)
96 {
97     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
98              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
99             filename[1] == ':');
100 }
101 
102 int is_windows_drive(const char *filename)
103 {
104     if (is_windows_drive_prefix(filename) &&
105         filename[2] == '\0')
106         return 1;
107     if (strstart(filename, "\\\\.\\", NULL) ||
108         strstart(filename, "//./", NULL))
109         return 1;
110     return 0;
111 }
112 #endif
113 
114 size_t bdrv_opt_mem_align(BlockDriverState *bs)
115 {
116     if (!bs || !bs->drv) {
117         /* page size or 4k (hdd sector size) should be on the safe side */
118         return MAX(4096, getpagesize());
119     }
120 
121     return bs->bl.opt_mem_alignment;
122 }
123 
124 size_t bdrv_min_mem_align(BlockDriverState *bs)
125 {
126     if (!bs || !bs->drv) {
127         /* page size or 4k (hdd sector size) should be on the safe side */
128         return MAX(4096, getpagesize());
129     }
130 
131     return bs->bl.min_mem_alignment;
132 }
133 
134 /* check if the path starts with "<protocol>:" */
135 int path_has_protocol(const char *path)
136 {
137     const char *p;
138 
139 #ifdef _WIN32
140     if (is_windows_drive(path) ||
141         is_windows_drive_prefix(path)) {
142         return 0;
143     }
144     p = path + strcspn(path, ":/\\");
145 #else
146     p = path + strcspn(path, ":/");
147 #endif
148 
149     return *p == ':';
150 }
151 
152 int path_is_absolute(const char *path)
153 {
154 #ifdef _WIN32
155     /* specific case for names like: "\\.\d:" */
156     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
157         return 1;
158     }
159     return (*path == '/' || *path == '\\');
160 #else
161     return (*path == '/');
162 #endif
163 }
164 
165 /* if filename is absolute, just copy it to dest. Otherwise, build a
166    path to it by considering it is relative to base_path. URL are
167    supported. */
168 void path_combine(char *dest, int dest_size,
169                   const char *base_path,
170                   const char *filename)
171 {
172     const char *p, *p1;
173     int len;
174 
175     if (dest_size <= 0)
176         return;
177     if (path_is_absolute(filename)) {
178         pstrcpy(dest, dest_size, filename);
179     } else {
180         p = strchr(base_path, ':');
181         if (p)
182             p++;
183         else
184             p = base_path;
185         p1 = strrchr(base_path, '/');
186 #ifdef _WIN32
187         {
188             const char *p2;
189             p2 = strrchr(base_path, '\\');
190             if (!p1 || p2 > p1)
191                 p1 = p2;
192         }
193 #endif
194         if (p1)
195             p1++;
196         else
197             p1 = base_path;
198         if (p1 > p)
199             p = p1;
200         len = p - base_path;
201         if (len > dest_size - 1)
202             len = dest_size - 1;
203         memcpy(dest, base_path, len);
204         dest[len] = '\0';
205         pstrcat(dest, dest_size, filename);
206     }
207 }
208 
209 void bdrv_get_full_backing_filename_from_filename(const char *backed,
210                                                   const char *backing,
211                                                   char *dest, size_t sz,
212                                                   Error **errp)
213 {
214     if (backing[0] == '\0' || path_has_protocol(backing) ||
215         path_is_absolute(backing))
216     {
217         pstrcpy(dest, sz, backing);
218     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
219         error_setg(errp, "Cannot use relative backing file names for '%s'",
220                    backed);
221     } else {
222         path_combine(dest, sz, backed, backing);
223     }
224 }
225 
226 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
227                                     Error **errp)
228 {
229     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
230 
231     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
232                                                  dest, sz, errp);
233 }
234 
235 void bdrv_register(BlockDriver *bdrv)
236 {
237     bdrv_setup_io_funcs(bdrv);
238 
239     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
240 }
241 
242 BlockDriverState *bdrv_new_root(void)
243 {
244     BlockDriverState *bs = bdrv_new();
245 
246     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
247     return bs;
248 }
249 
250 BlockDriverState *bdrv_new(void)
251 {
252     BlockDriverState *bs;
253     int i;
254 
255     bs = g_new0(BlockDriverState, 1);
256     QLIST_INIT(&bs->dirty_bitmaps);
257     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
258         QLIST_INIT(&bs->op_blockers[i]);
259     }
260     bdrv_iostatus_disable(bs);
261     notifier_list_init(&bs->close_notifiers);
262     notifier_with_return_list_init(&bs->before_write_notifiers);
263     qemu_co_queue_init(&bs->throttled_reqs[0]);
264     qemu_co_queue_init(&bs->throttled_reqs[1]);
265     bs->refcnt = 1;
266     bs->aio_context = qemu_get_aio_context();
267 
268     return bs;
269 }
270 
271 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
272 {
273     notifier_list_add(&bs->close_notifiers, notify);
274 }
275 
276 BlockDriver *bdrv_find_format(const char *format_name)
277 {
278     BlockDriver *drv1;
279     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
280         if (!strcmp(drv1->format_name, format_name)) {
281             return drv1;
282         }
283     }
284     return NULL;
285 }
286 
287 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
288 {
289     static const char *whitelist_rw[] = {
290         CONFIG_BDRV_RW_WHITELIST
291     };
292     static const char *whitelist_ro[] = {
293         CONFIG_BDRV_RO_WHITELIST
294     };
295     const char **p;
296 
297     if (!whitelist_rw[0] && !whitelist_ro[0]) {
298         return 1;               /* no whitelist, anything goes */
299     }
300 
301     for (p = whitelist_rw; *p; p++) {
302         if (!strcmp(drv->format_name, *p)) {
303             return 1;
304         }
305     }
306     if (read_only) {
307         for (p = whitelist_ro; *p; p++) {
308             if (!strcmp(drv->format_name, *p)) {
309                 return 1;
310             }
311         }
312     }
313     return 0;
314 }
315 
316 typedef struct CreateCo {
317     BlockDriver *drv;
318     char *filename;
319     QemuOpts *opts;
320     int ret;
321     Error *err;
322 } CreateCo;
323 
324 static void coroutine_fn bdrv_create_co_entry(void *opaque)
325 {
326     Error *local_err = NULL;
327     int ret;
328 
329     CreateCo *cco = opaque;
330     assert(cco->drv);
331 
332     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
333     if (local_err) {
334         error_propagate(&cco->err, local_err);
335     }
336     cco->ret = ret;
337 }
338 
339 int bdrv_create(BlockDriver *drv, const char* filename,
340                 QemuOpts *opts, Error **errp)
341 {
342     int ret;
343 
344     Coroutine *co;
345     CreateCo cco = {
346         .drv = drv,
347         .filename = g_strdup(filename),
348         .opts = opts,
349         .ret = NOT_DONE,
350         .err = NULL,
351     };
352 
353     if (!drv->bdrv_create) {
354         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
355         ret = -ENOTSUP;
356         goto out;
357     }
358 
359     if (qemu_in_coroutine()) {
360         /* Fast-path if already in coroutine context */
361         bdrv_create_co_entry(&cco);
362     } else {
363         co = qemu_coroutine_create(bdrv_create_co_entry);
364         qemu_coroutine_enter(co, &cco);
365         while (cco.ret == NOT_DONE) {
366             aio_poll(qemu_get_aio_context(), true);
367         }
368     }
369 
370     ret = cco.ret;
371     if (ret < 0) {
372         if (cco.err) {
373             error_propagate(errp, cco.err);
374         } else {
375             error_setg_errno(errp, -ret, "Could not create image");
376         }
377     }
378 
379 out:
380     g_free(cco.filename);
381     return ret;
382 }
383 
384 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
385 {
386     BlockDriver *drv;
387     Error *local_err = NULL;
388     int ret;
389 
390     drv = bdrv_find_protocol(filename, true, errp);
391     if (drv == NULL) {
392         return -ENOENT;
393     }
394 
395     ret = bdrv_create(drv, filename, opts, &local_err);
396     if (local_err) {
397         error_propagate(errp, local_err);
398     }
399     return ret;
400 }
401 
402 /**
403  * Try to get @bs's logical and physical block size.
404  * On success, store them in @bsz struct and return 0.
405  * On failure return -errno.
406  * @bs must not be empty.
407  */
408 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
409 {
410     BlockDriver *drv = bs->drv;
411 
412     if (drv && drv->bdrv_probe_blocksizes) {
413         return drv->bdrv_probe_blocksizes(bs, bsz);
414     }
415 
416     return -ENOTSUP;
417 }
418 
419 /**
420  * Try to get @bs's geometry (cyls, heads, sectors).
421  * On success, store them in @geo struct and return 0.
422  * On failure return -errno.
423  * @bs must not be empty.
424  */
425 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
426 {
427     BlockDriver *drv = bs->drv;
428 
429     if (drv && drv->bdrv_probe_geometry) {
430         return drv->bdrv_probe_geometry(bs, geo);
431     }
432 
433     return -ENOTSUP;
434 }
435 
436 /*
437  * Create a uniquely-named empty temporary file.
438  * Return 0 upon success, otherwise a negative errno value.
439  */
440 int get_tmp_filename(char *filename, int size)
441 {
442 #ifdef _WIN32
443     char temp_dir[MAX_PATH];
444     /* GetTempFileName requires that its output buffer (4th param)
445        have length MAX_PATH or greater.  */
446     assert(size >= MAX_PATH);
447     return (GetTempPath(MAX_PATH, temp_dir)
448             && GetTempFileName(temp_dir, "qem", 0, filename)
449             ? 0 : -GetLastError());
450 #else
451     int fd;
452     const char *tmpdir;
453     tmpdir = getenv("TMPDIR");
454     if (!tmpdir) {
455         tmpdir = "/var/tmp";
456     }
457     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
458         return -EOVERFLOW;
459     }
460     fd = mkstemp(filename);
461     if (fd < 0) {
462         return -errno;
463     }
464     if (close(fd) != 0) {
465         unlink(filename);
466         return -errno;
467     }
468     return 0;
469 #endif
470 }
471 
472 /*
473  * Detect host devices. By convention, /dev/cdrom[N] is always
474  * recognized as a host CDROM.
475  */
476 static BlockDriver *find_hdev_driver(const char *filename)
477 {
478     int score_max = 0, score;
479     BlockDriver *drv = NULL, *d;
480 
481     QLIST_FOREACH(d, &bdrv_drivers, list) {
482         if (d->bdrv_probe_device) {
483             score = d->bdrv_probe_device(filename);
484             if (score > score_max) {
485                 score_max = score;
486                 drv = d;
487             }
488         }
489     }
490 
491     return drv;
492 }
493 
494 BlockDriver *bdrv_find_protocol(const char *filename,
495                                 bool allow_protocol_prefix,
496                                 Error **errp)
497 {
498     BlockDriver *drv1;
499     char protocol[128];
500     int len;
501     const char *p;
502 
503     /* TODO Drivers without bdrv_file_open must be specified explicitly */
504 
505     /*
506      * XXX(hch): we really should not let host device detection
507      * override an explicit protocol specification, but moving this
508      * later breaks access to device names with colons in them.
509      * Thanks to the brain-dead persistent naming schemes on udev-
510      * based Linux systems those actually are quite common.
511      */
512     drv1 = find_hdev_driver(filename);
513     if (drv1) {
514         return drv1;
515     }
516 
517     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
518         return &bdrv_file;
519     }
520 
521     p = strchr(filename, ':');
522     assert(p != NULL);
523     len = p - filename;
524     if (len > sizeof(protocol) - 1)
525         len = sizeof(protocol) - 1;
526     memcpy(protocol, filename, len);
527     protocol[len] = '\0';
528     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
529         if (drv1->protocol_name &&
530             !strcmp(drv1->protocol_name, protocol)) {
531             return drv1;
532         }
533     }
534 
535     error_setg(errp, "Unknown protocol '%s'", protocol);
536     return NULL;
537 }
538 
539 /*
540  * Guess image format by probing its contents.
541  * This is not a good idea when your image is raw (CVE-2008-2004), but
542  * we do it anyway for backward compatibility.
543  *
544  * @buf         contains the image's first @buf_size bytes.
545  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
546  *              but can be smaller if the image file is smaller)
547  * @filename    is its filename.
548  *
549  * For all block drivers, call the bdrv_probe() method to get its
550  * probing score.
551  * Return the first block driver with the highest probing score.
552  */
553 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
554                             const char *filename)
555 {
556     int score_max = 0, score;
557     BlockDriver *drv = NULL, *d;
558 
559     QLIST_FOREACH(d, &bdrv_drivers, list) {
560         if (d->bdrv_probe) {
561             score = d->bdrv_probe(buf, buf_size, filename);
562             if (score > score_max) {
563                 score_max = score;
564                 drv = d;
565             }
566         }
567     }
568 
569     return drv;
570 }
571 
572 static int find_image_format(BlockDriverState *bs, const char *filename,
573                              BlockDriver **pdrv, Error **errp)
574 {
575     BlockDriver *drv;
576     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
577     int ret = 0;
578 
579     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
580     if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
581         *pdrv = &bdrv_raw;
582         return ret;
583     }
584 
585     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
586     if (ret < 0) {
587         error_setg_errno(errp, -ret, "Could not read image for determining its "
588                          "format");
589         *pdrv = NULL;
590         return ret;
591     }
592 
593     drv = bdrv_probe_all(buf, ret, filename);
594     if (!drv) {
595         error_setg(errp, "Could not determine image format: No compatible "
596                    "driver found");
597         ret = -ENOENT;
598     }
599     *pdrv = drv;
600     return ret;
601 }
602 
603 /**
604  * Set the current 'total_sectors' value
605  * Return 0 on success, -errno on error.
606  */
607 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
608 {
609     BlockDriver *drv = bs->drv;
610 
611     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
612     if (bdrv_is_sg(bs))
613         return 0;
614 
615     /* query actual device if possible, otherwise just trust the hint */
616     if (drv->bdrv_getlength) {
617         int64_t length = drv->bdrv_getlength(bs);
618         if (length < 0) {
619             return length;
620         }
621         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
622     }
623 
624     bs->total_sectors = hint;
625     return 0;
626 }
627 
628 /**
629  * Set open flags for a given discard mode
630  *
631  * Return 0 on success, -1 if the discard mode was invalid.
632  */
633 int bdrv_parse_discard_flags(const char *mode, int *flags)
634 {
635     *flags &= ~BDRV_O_UNMAP;
636 
637     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
638         /* do nothing */
639     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
640         *flags |= BDRV_O_UNMAP;
641     } else {
642         return -1;
643     }
644 
645     return 0;
646 }
647 
648 /**
649  * Set open flags for a given cache mode
650  *
651  * Return 0 on success, -1 if the cache mode was invalid.
652  */
653 int bdrv_parse_cache_flags(const char *mode, int *flags)
654 {
655     *flags &= ~BDRV_O_CACHE_MASK;
656 
657     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
658         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
659     } else if (!strcmp(mode, "directsync")) {
660         *flags |= BDRV_O_NOCACHE;
661     } else if (!strcmp(mode, "writeback")) {
662         *flags |= BDRV_O_CACHE_WB;
663     } else if (!strcmp(mode, "unsafe")) {
664         *flags |= BDRV_O_CACHE_WB;
665         *flags |= BDRV_O_NO_FLUSH;
666     } else if (!strcmp(mode, "writethrough")) {
667         /* this is the default */
668     } else {
669         return -1;
670     }
671 
672     return 0;
673 }
674 
675 /*
676  * Returns the flags that a temporary snapshot should get, based on the
677  * originally requested flags (the originally requested image will have flags
678  * like a backing file)
679  */
680 static int bdrv_temp_snapshot_flags(int flags)
681 {
682     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
683 }
684 
685 /*
686  * Returns the flags that bs->file should get if a protocol driver is expected,
687  * based on the given flags for the parent BDS
688  */
689 static int bdrv_inherited_flags(int flags)
690 {
691     /* Enable protocol handling, disable format probing for bs->file */
692     flags |= BDRV_O_PROTOCOL;
693 
694     /* Our block drivers take care to send flushes and respect unmap policy,
695      * so we can enable both unconditionally on lower layers. */
696     flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
697 
698     /* Clear flags that only apply to the top layer */
699     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
700 
701     return flags;
702 }
703 
704 const BdrvChildRole child_file = {
705     .inherit_flags = bdrv_inherited_flags,
706 };
707 
708 /*
709  * Returns the flags that bs->file should get if the use of formats (and not
710  * only protocols) is permitted for it, based on the given flags for the parent
711  * BDS
712  */
713 static int bdrv_inherited_fmt_flags(int parent_flags)
714 {
715     int flags = child_file.inherit_flags(parent_flags);
716     return flags & ~BDRV_O_PROTOCOL;
717 }
718 
719 const BdrvChildRole child_format = {
720     .inherit_flags = bdrv_inherited_fmt_flags,
721 };
722 
723 /*
724  * Returns the flags that bs->backing should get, based on the given flags
725  * for the parent BDS
726  */
727 static int bdrv_backing_flags(int flags)
728 {
729     /* backing files always opened read-only */
730     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
731 
732     /* snapshot=on is handled on the top layer */
733     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
734 
735     return flags;
736 }
737 
738 static const BdrvChildRole child_backing = {
739     .inherit_flags = bdrv_backing_flags,
740 };
741 
742 static int bdrv_open_flags(BlockDriverState *bs, int flags)
743 {
744     int open_flags = flags | BDRV_O_CACHE_WB;
745 
746     /*
747      * Clear flags that are internal to the block layer before opening the
748      * image.
749      */
750     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
751 
752     /*
753      * Snapshots should be writable.
754      */
755     if (flags & BDRV_O_TEMPORARY) {
756         open_flags |= BDRV_O_RDWR;
757     }
758 
759     return open_flags;
760 }
761 
762 static void bdrv_assign_node_name(BlockDriverState *bs,
763                                   const char *node_name,
764                                   Error **errp)
765 {
766     char *gen_node_name = NULL;
767 
768     if (!node_name) {
769         node_name = gen_node_name = id_generate(ID_BLOCK);
770     } else if (!id_wellformed(node_name)) {
771         /*
772          * Check for empty string or invalid characters, but not if it is
773          * generated (generated names use characters not available to the user)
774          */
775         error_setg(errp, "Invalid node name");
776         return;
777     }
778 
779     /* takes care of avoiding namespaces collisions */
780     if (blk_by_name(node_name)) {
781         error_setg(errp, "node-name=%s is conflicting with a device id",
782                    node_name);
783         goto out;
784     }
785 
786     /* takes care of avoiding duplicates node names */
787     if (bdrv_find_node(node_name)) {
788         error_setg(errp, "Duplicate node name");
789         goto out;
790     }
791 
792     /* copy node name into the bs and insert it into the graph list */
793     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
794     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
795 out:
796     g_free(gen_node_name);
797 }
798 
799 static QemuOptsList bdrv_runtime_opts = {
800     .name = "bdrv_common",
801     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
802     .desc = {
803         {
804             .name = "node-name",
805             .type = QEMU_OPT_STRING,
806             .help = "Node name of the block device node",
807         },
808         { /* end of list */ }
809     },
810 };
811 
812 /*
813  * Common part for opening disk images and files
814  *
815  * Removes all processed options from *options.
816  */
817 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
818     QDict *options, int flags, BlockDriver *drv, Error **errp)
819 {
820     int ret, open_flags;
821     const char *filename;
822     const char *node_name = NULL;
823     QemuOpts *opts;
824     Error *local_err = NULL;
825 
826     assert(drv != NULL);
827     assert(bs->file == NULL);
828     assert(options != NULL && bs->options != options);
829 
830     if (file != NULL) {
831         filename = file->bs->filename;
832     } else {
833         filename = qdict_get_try_str(options, "filename");
834     }
835 
836     if (drv->bdrv_needs_filename && !filename) {
837         error_setg(errp, "The '%s' block driver requires a file name",
838                    drv->format_name);
839         return -EINVAL;
840     }
841 
842     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
843 
844     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
845     qemu_opts_absorb_qdict(opts, options, &local_err);
846     if (local_err) {
847         error_propagate(errp, local_err);
848         ret = -EINVAL;
849         goto fail_opts;
850     }
851 
852     node_name = qemu_opt_get(opts, "node-name");
853     bdrv_assign_node_name(bs, node_name, &local_err);
854     if (local_err) {
855         error_propagate(errp, local_err);
856         ret = -EINVAL;
857         goto fail_opts;
858     }
859 
860     bs->guest_block_size = 512;
861     bs->request_alignment = 512;
862     bs->zero_beyond_eof = true;
863     open_flags = bdrv_open_flags(bs, flags);
864     bs->read_only = !(open_flags & BDRV_O_RDWR);
865 
866     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
867         error_setg(errp,
868                    !bs->read_only && bdrv_is_whitelisted(drv, true)
869                         ? "Driver '%s' can only be used for read-only devices"
870                         : "Driver '%s' is not whitelisted",
871                    drv->format_name);
872         ret = -ENOTSUP;
873         goto fail_opts;
874     }
875 
876     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
877     if (flags & BDRV_O_COPY_ON_READ) {
878         if (!bs->read_only) {
879             bdrv_enable_copy_on_read(bs);
880         } else {
881             error_setg(errp, "Can't use copy-on-read on read-only device");
882             ret = -EINVAL;
883             goto fail_opts;
884         }
885     }
886 
887     if (filename != NULL) {
888         pstrcpy(bs->filename, sizeof(bs->filename), filename);
889     } else {
890         bs->filename[0] = '\0';
891     }
892     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
893 
894     bs->drv = drv;
895     bs->opaque = g_malloc0(drv->instance_size);
896 
897     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
898 
899     /* Open the image, either directly or using a protocol */
900     if (drv->bdrv_file_open) {
901         assert(file == NULL);
902         assert(!drv->bdrv_needs_filename || filename != NULL);
903         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
904     } else {
905         if (file == NULL) {
906             error_setg(errp, "Can't use '%s' as a block driver for the "
907                        "protocol level", drv->format_name);
908             ret = -EINVAL;
909             goto free_and_fail;
910         }
911         bs->file = file;
912         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
913     }
914 
915     if (ret < 0) {
916         if (local_err) {
917             error_propagate(errp, local_err);
918         } else if (bs->filename[0]) {
919             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
920         } else {
921             error_setg_errno(errp, -ret, "Could not open image");
922         }
923         goto free_and_fail;
924     }
925 
926     if (bs->encrypted) {
927         error_report("Encrypted images are deprecated");
928         error_printf("Support for them will be removed in a future release.\n"
929                      "You can use 'qemu-img convert' to convert your image"
930                      " to an unencrypted one.\n");
931     }
932 
933     ret = refresh_total_sectors(bs, bs->total_sectors);
934     if (ret < 0) {
935         error_setg_errno(errp, -ret, "Could not refresh total sector count");
936         goto free_and_fail;
937     }
938 
939     bdrv_refresh_limits(bs, &local_err);
940     if (local_err) {
941         error_propagate(errp, local_err);
942         ret = -EINVAL;
943         goto free_and_fail;
944     }
945 
946     assert(bdrv_opt_mem_align(bs) != 0);
947     assert(bdrv_min_mem_align(bs) != 0);
948     assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
949 
950     qemu_opts_del(opts);
951     return 0;
952 
953 free_and_fail:
954     bs->file = NULL;
955     g_free(bs->opaque);
956     bs->opaque = NULL;
957     bs->drv = NULL;
958 fail_opts:
959     qemu_opts_del(opts);
960     return ret;
961 }
962 
963 static QDict *parse_json_filename(const char *filename, Error **errp)
964 {
965     QObject *options_obj;
966     QDict *options;
967     int ret;
968 
969     ret = strstart(filename, "json:", &filename);
970     assert(ret);
971 
972     options_obj = qobject_from_json(filename);
973     if (!options_obj) {
974         error_setg(errp, "Could not parse the JSON options");
975         return NULL;
976     }
977 
978     if (qobject_type(options_obj) != QTYPE_QDICT) {
979         qobject_decref(options_obj);
980         error_setg(errp, "Invalid JSON object given");
981         return NULL;
982     }
983 
984     options = qobject_to_qdict(options_obj);
985     qdict_flatten(options);
986 
987     return options;
988 }
989 
990 /*
991  * Fills in default options for opening images and converts the legacy
992  * filename/flags pair to option QDict entries.
993  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
994  * block driver has been specified explicitly.
995  */
996 static int bdrv_fill_options(QDict **options, const char **pfilename,
997                              int *flags, Error **errp)
998 {
999     const char *filename = *pfilename;
1000     const char *drvname;
1001     bool protocol = *flags & BDRV_O_PROTOCOL;
1002     bool parse_filename = false;
1003     BlockDriver *drv = NULL;
1004     Error *local_err = NULL;
1005 
1006     /* Parse json: pseudo-protocol */
1007     if (filename && g_str_has_prefix(filename, "json:")) {
1008         QDict *json_options = parse_json_filename(filename, &local_err);
1009         if (local_err) {
1010             error_propagate(errp, local_err);
1011             return -EINVAL;
1012         }
1013 
1014         /* Options given in the filename have lower priority than options
1015          * specified directly */
1016         qdict_join(*options, json_options, false);
1017         QDECREF(json_options);
1018         *pfilename = filename = NULL;
1019     }
1020 
1021     drvname = qdict_get_try_str(*options, "driver");
1022     if (drvname) {
1023         drv = bdrv_find_format(drvname);
1024         if (!drv) {
1025             error_setg(errp, "Unknown driver '%s'", drvname);
1026             return -ENOENT;
1027         }
1028         /* If the user has explicitly specified the driver, this choice should
1029          * override the BDRV_O_PROTOCOL flag */
1030         protocol = drv->bdrv_file_open;
1031     }
1032 
1033     if (protocol) {
1034         *flags |= BDRV_O_PROTOCOL;
1035     } else {
1036         *flags &= ~BDRV_O_PROTOCOL;
1037     }
1038 
1039     /* Fetch the file name from the options QDict if necessary */
1040     if (protocol && filename) {
1041         if (!qdict_haskey(*options, "filename")) {
1042             qdict_put(*options, "filename", qstring_from_str(filename));
1043             parse_filename = true;
1044         } else {
1045             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1046                              "the same time");
1047             return -EINVAL;
1048         }
1049     }
1050 
1051     /* Find the right block driver */
1052     filename = qdict_get_try_str(*options, "filename");
1053 
1054     if (!drvname && protocol) {
1055         if (filename) {
1056             drv = bdrv_find_protocol(filename, parse_filename, errp);
1057             if (!drv) {
1058                 return -EINVAL;
1059             }
1060 
1061             drvname = drv->format_name;
1062             qdict_put(*options, "driver", qstring_from_str(drvname));
1063         } else {
1064             error_setg(errp, "Must specify either driver or file");
1065             return -EINVAL;
1066         }
1067     }
1068 
1069     assert(drv || !protocol);
1070 
1071     /* Driver-specific filename parsing */
1072     if (drv && drv->bdrv_parse_filename && parse_filename) {
1073         drv->bdrv_parse_filename(filename, *options, &local_err);
1074         if (local_err) {
1075             error_propagate(errp, local_err);
1076             return -EINVAL;
1077         }
1078 
1079         if (!drv->bdrv_needs_filename) {
1080             qdict_del(*options, "filename");
1081         }
1082     }
1083 
1084     return 0;
1085 }
1086 
1087 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1088                                     BlockDriverState *child_bs,
1089                                     const BdrvChildRole *child_role)
1090 {
1091     BdrvChild *child = g_new(BdrvChild, 1);
1092     *child = (BdrvChild) {
1093         .bs     = child_bs,
1094         .role   = child_role,
1095     };
1096 
1097     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1098     QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1099 
1100     return child;
1101 }
1102 
1103 static void bdrv_detach_child(BdrvChild *child)
1104 {
1105     QLIST_REMOVE(child, next);
1106     QLIST_REMOVE(child, next_parent);
1107     g_free(child);
1108 }
1109 
1110 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1111 {
1112     BlockDriverState *child_bs;
1113 
1114     if (child == NULL) {
1115         return;
1116     }
1117 
1118     if (child->bs->inherits_from == parent) {
1119         child->bs->inherits_from = NULL;
1120     }
1121 
1122     child_bs = child->bs;
1123     bdrv_detach_child(child);
1124     bdrv_unref(child_bs);
1125 }
1126 
1127 /*
1128  * Sets the backing file link of a BDS. A new reference is created; callers
1129  * which don't need their own reference any more must call bdrv_unref().
1130  */
1131 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1132 {
1133     if (backing_hd) {
1134         bdrv_ref(backing_hd);
1135     }
1136 
1137     if (bs->backing) {
1138         assert(bs->backing_blocker);
1139         bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1140         bdrv_unref_child(bs, bs->backing);
1141     } else if (backing_hd) {
1142         error_setg(&bs->backing_blocker,
1143                    "node is used as backing hd of '%s'",
1144                    bdrv_get_device_or_node_name(bs));
1145     }
1146 
1147     if (!backing_hd) {
1148         error_free(bs->backing_blocker);
1149         bs->backing_blocker = NULL;
1150         bs->backing = NULL;
1151         goto out;
1152     }
1153     bs->backing = bdrv_attach_child(bs, backing_hd, &child_backing);
1154     bs->open_flags &= ~BDRV_O_NO_BACKING;
1155     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1156     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1157             backing_hd->drv ? backing_hd->drv->format_name : "");
1158 
1159     bdrv_op_block_all(backing_hd, bs->backing_blocker);
1160     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1161     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1162                     bs->backing_blocker);
1163 out:
1164     bdrv_refresh_limits(bs, NULL);
1165 }
1166 
1167 /*
1168  * Opens the backing file for a BlockDriverState if not yet open
1169  *
1170  * options is a QDict of options to pass to the block drivers, or NULL for an
1171  * empty set of options. The reference to the QDict is transferred to this
1172  * function (even on failure), so if the caller intends to reuse the dictionary,
1173  * it needs to use QINCREF() before calling bdrv_file_open.
1174  */
1175 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1176 {
1177     char *backing_filename = g_malloc0(PATH_MAX);
1178     int ret = 0;
1179     BlockDriverState *backing_hd;
1180     Error *local_err = NULL;
1181 
1182     if (bs->backing != NULL) {
1183         QDECREF(options);
1184         goto free_exit;
1185     }
1186 
1187     /* NULL means an empty set of options */
1188     if (options == NULL) {
1189         options = qdict_new();
1190     }
1191 
1192     bs->open_flags &= ~BDRV_O_NO_BACKING;
1193     if (qdict_haskey(options, "file.filename")) {
1194         backing_filename[0] = '\0';
1195     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1196         QDECREF(options);
1197         goto free_exit;
1198     } else {
1199         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1200                                        &local_err);
1201         if (local_err) {
1202             ret = -EINVAL;
1203             error_propagate(errp, local_err);
1204             QDECREF(options);
1205             goto free_exit;
1206         }
1207     }
1208 
1209     if (!bs->drv || !bs->drv->supports_backing) {
1210         ret = -EINVAL;
1211         error_setg(errp, "Driver doesn't support backing files");
1212         QDECREF(options);
1213         goto free_exit;
1214     }
1215 
1216     backing_hd = bdrv_new();
1217 
1218     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1219         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1220     }
1221 
1222     assert(bs->backing == NULL);
1223     ret = bdrv_open_inherit(&backing_hd,
1224                             *backing_filename ? backing_filename : NULL,
1225                             NULL, options, 0, bs, &child_backing, &local_err);
1226     if (ret < 0) {
1227         bdrv_unref(backing_hd);
1228         backing_hd = NULL;
1229         bs->open_flags |= BDRV_O_NO_BACKING;
1230         error_setg(errp, "Could not open backing file: %s",
1231                    error_get_pretty(local_err));
1232         error_free(local_err);
1233         goto free_exit;
1234     }
1235 
1236     /* Hook up the backing file link; drop our reference, bs owns the
1237      * backing_hd reference now */
1238     bdrv_set_backing_hd(bs, backing_hd);
1239     bdrv_unref(backing_hd);
1240 
1241 free_exit:
1242     g_free(backing_filename);
1243     return ret;
1244 }
1245 
1246 /*
1247  * Opens a disk image whose options are given as BlockdevRef in another block
1248  * device's options.
1249  *
1250  * If allow_none is true, no image will be opened if filename is false and no
1251  * BlockdevRef is given. NULL will be returned, but errp remains unset.
1252  *
1253  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1254  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1255  * itself, all options starting with "${bdref_key}." are considered part of the
1256  * BlockdevRef.
1257  *
1258  * The BlockdevRef will be removed from the options QDict.
1259  */
1260 BdrvChild *bdrv_open_child(const char *filename,
1261                            QDict *options, const char *bdref_key,
1262                            BlockDriverState* parent,
1263                            const BdrvChildRole *child_role,
1264                            bool allow_none, Error **errp)
1265 {
1266     BdrvChild *c = NULL;
1267     BlockDriverState *bs;
1268     QDict *image_options;
1269     int ret;
1270     char *bdref_key_dot;
1271     const char *reference;
1272 
1273     assert(child_role != NULL);
1274 
1275     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1276     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1277     g_free(bdref_key_dot);
1278 
1279     reference = qdict_get_try_str(options, bdref_key);
1280     if (!filename && !reference && !qdict_size(image_options)) {
1281         if (!allow_none) {
1282             error_setg(errp, "A block device must be specified for \"%s\"",
1283                        bdref_key);
1284         }
1285         QDECREF(image_options);
1286         goto done;
1287     }
1288 
1289     bs = NULL;
1290     ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1291                             parent, child_role, errp);
1292     if (ret < 0) {
1293         goto done;
1294     }
1295 
1296     c = bdrv_attach_child(parent, bs, child_role);
1297 
1298 done:
1299     qdict_del(options, bdref_key);
1300     return c;
1301 }
1302 
1303 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1304 {
1305     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1306     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1307     int64_t total_size;
1308     QemuOpts *opts = NULL;
1309     QDict *snapshot_options;
1310     BlockDriverState *bs_snapshot;
1311     Error *local_err = NULL;
1312     int ret;
1313 
1314     /* if snapshot, we create a temporary backing file and open it
1315        instead of opening 'filename' directly */
1316 
1317     /* Get the required size from the image */
1318     total_size = bdrv_getlength(bs);
1319     if (total_size < 0) {
1320         ret = total_size;
1321         error_setg_errno(errp, -total_size, "Could not get image size");
1322         goto out;
1323     }
1324 
1325     /* Create the temporary image */
1326     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1327     if (ret < 0) {
1328         error_setg_errno(errp, -ret, "Could not get temporary filename");
1329         goto out;
1330     }
1331 
1332     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1333                             &error_abort);
1334     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1335     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1336     qemu_opts_del(opts);
1337     if (ret < 0) {
1338         error_setg_errno(errp, -ret, "Could not create temporary overlay "
1339                          "'%s': %s", tmp_filename,
1340                          error_get_pretty(local_err));
1341         error_free(local_err);
1342         goto out;
1343     }
1344 
1345     /* Prepare a new options QDict for the temporary file */
1346     snapshot_options = qdict_new();
1347     qdict_put(snapshot_options, "file.driver",
1348               qstring_from_str("file"));
1349     qdict_put(snapshot_options, "file.filename",
1350               qstring_from_str(tmp_filename));
1351     qdict_put(snapshot_options, "driver",
1352               qstring_from_str("qcow2"));
1353 
1354     bs_snapshot = bdrv_new();
1355 
1356     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1357                     flags, &local_err);
1358     if (ret < 0) {
1359         error_propagate(errp, local_err);
1360         goto out;
1361     }
1362 
1363     bdrv_append(bs_snapshot, bs);
1364 
1365 out:
1366     g_free(tmp_filename);
1367     return ret;
1368 }
1369 
1370 /*
1371  * Opens a disk image (raw, qcow2, vmdk, ...)
1372  *
1373  * options is a QDict of options to pass to the block drivers, or NULL for an
1374  * empty set of options. The reference to the QDict belongs to the block layer
1375  * after the call (even on failure), so if the caller intends to reuse the
1376  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1377  *
1378  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1379  * If it is not NULL, the referenced BDS will be reused.
1380  *
1381  * The reference parameter may be used to specify an existing block device which
1382  * should be opened. If specified, neither options nor a filename may be given,
1383  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1384  */
1385 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1386                              const char *reference, QDict *options, int flags,
1387                              BlockDriverState *parent,
1388                              const BdrvChildRole *child_role, Error **errp)
1389 {
1390     int ret;
1391     BdrvChild *file = NULL;
1392     BlockDriverState *bs;
1393     BlockDriver *drv = NULL;
1394     const char *drvname;
1395     Error *local_err = NULL;
1396     int snapshot_flags = 0;
1397 
1398     assert(pbs);
1399     assert(!child_role || !flags);
1400     assert(!child_role == !parent);
1401 
1402     if (reference) {
1403         bool options_non_empty = options ? qdict_size(options) : false;
1404         QDECREF(options);
1405 
1406         if (*pbs) {
1407             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1408                        "another block device");
1409             return -EINVAL;
1410         }
1411 
1412         if (filename || options_non_empty) {
1413             error_setg(errp, "Cannot reference an existing block device with "
1414                        "additional options or a new filename");
1415             return -EINVAL;
1416         }
1417 
1418         bs = bdrv_lookup_bs(reference, reference, errp);
1419         if (!bs) {
1420             return -ENODEV;
1421         }
1422         bdrv_ref(bs);
1423         *pbs = bs;
1424         return 0;
1425     }
1426 
1427     if (*pbs) {
1428         bs = *pbs;
1429     } else {
1430         bs = bdrv_new();
1431     }
1432 
1433     /* NULL means an empty set of options */
1434     if (options == NULL) {
1435         options = qdict_new();
1436     }
1437 
1438     if (child_role) {
1439         bs->inherits_from = parent;
1440         flags = child_role->inherit_flags(parent->open_flags);
1441     }
1442 
1443     ret = bdrv_fill_options(&options, &filename, &flags, &local_err);
1444     if (local_err) {
1445         goto fail;
1446     }
1447 
1448     /* Find the right image format driver */
1449     drvname = qdict_get_try_str(options, "driver");
1450     if (drvname) {
1451         drv = bdrv_find_format(drvname);
1452         qdict_del(options, "driver");
1453         if (!drv) {
1454             error_setg(errp, "Unknown driver: '%s'", drvname);
1455             ret = -EINVAL;
1456             goto fail;
1457         }
1458     }
1459 
1460     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1461 
1462     bs->open_flags = flags;
1463     bs->options = options;
1464     options = qdict_clone_shallow(options);
1465 
1466     /* Open image file without format layer */
1467     if ((flags & BDRV_O_PROTOCOL) == 0) {
1468         if (flags & BDRV_O_RDWR) {
1469             flags |= BDRV_O_ALLOW_RDWR;
1470         }
1471         if (flags & BDRV_O_SNAPSHOT) {
1472             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1473             flags = bdrv_backing_flags(flags);
1474         }
1475 
1476         bs->open_flags = flags;
1477 
1478         file = bdrv_open_child(filename, options, "file", bs,
1479                                &child_file, true, &local_err);
1480         if (local_err) {
1481             ret = -EINVAL;
1482             goto fail;
1483         }
1484     }
1485 
1486     /* Image format probing */
1487     bs->probed = !drv;
1488     if (!drv && file) {
1489         ret = find_image_format(file->bs, filename, &drv, &local_err);
1490         if (ret < 0) {
1491             goto fail;
1492         }
1493     } else if (!drv) {
1494         error_setg(errp, "Must specify either driver or file");
1495         ret = -EINVAL;
1496         goto fail;
1497     }
1498 
1499     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1500     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1501     /* file must be NULL if a protocol BDS is about to be created
1502      * (the inverse results in an error message from bdrv_open_common()) */
1503     assert(!(flags & BDRV_O_PROTOCOL) || !file);
1504 
1505     /* Open the image */
1506     ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1507     if (ret < 0) {
1508         goto fail;
1509     }
1510 
1511     if (file && (bs->file != file)) {
1512         bdrv_unref_child(bs, file);
1513         file = NULL;
1514     }
1515 
1516     /* If there is a backing file, use it */
1517     if ((flags & BDRV_O_NO_BACKING) == 0) {
1518         QDict *backing_options;
1519 
1520         qdict_extract_subqdict(options, &backing_options, "backing.");
1521         ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1522         if (ret < 0) {
1523             goto close_and_fail;
1524         }
1525     }
1526 
1527     bdrv_refresh_filename(bs);
1528 
1529     /* Check if any unknown options were used */
1530     if (options && (qdict_size(options) != 0)) {
1531         const QDictEntry *entry = qdict_first(options);
1532         if (flags & BDRV_O_PROTOCOL) {
1533             error_setg(errp, "Block protocol '%s' doesn't support the option "
1534                        "'%s'", drv->format_name, entry->key);
1535         } else {
1536             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1537                        "support the option '%s'", drv->format_name,
1538                        bdrv_get_device_name(bs), entry->key);
1539         }
1540 
1541         ret = -EINVAL;
1542         goto close_and_fail;
1543     }
1544 
1545     if (!bdrv_key_required(bs)) {
1546         if (bs->blk) {
1547             blk_dev_change_media_cb(bs->blk, true);
1548         }
1549     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1550                && !runstate_check(RUN_STATE_INMIGRATE)
1551                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1552         error_setg(errp,
1553                    "Guest must be stopped for opening of encrypted image");
1554         ret = -EBUSY;
1555         goto close_and_fail;
1556     }
1557 
1558     QDECREF(options);
1559     *pbs = bs;
1560 
1561     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1562      * temporary snapshot afterwards. */
1563     if (snapshot_flags) {
1564         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1565         if (local_err) {
1566             goto close_and_fail;
1567         }
1568     }
1569 
1570     return 0;
1571 
1572 fail:
1573     if (file != NULL) {
1574         bdrv_unref_child(bs, file);
1575     }
1576     QDECREF(bs->options);
1577     QDECREF(options);
1578     bs->options = NULL;
1579     if (!*pbs) {
1580         /* If *pbs is NULL, a new BDS has been created in this function and
1581            needs to be freed now. Otherwise, it does not need to be closed,
1582            since it has not really been opened yet. */
1583         bdrv_unref(bs);
1584     }
1585     if (local_err) {
1586         error_propagate(errp, local_err);
1587     }
1588     return ret;
1589 
1590 close_and_fail:
1591     /* See fail path, but now the BDS has to be always closed */
1592     if (*pbs) {
1593         bdrv_close(bs);
1594     } else {
1595         bdrv_unref(bs);
1596     }
1597     QDECREF(options);
1598     if (local_err) {
1599         error_propagate(errp, local_err);
1600     }
1601     return ret;
1602 }
1603 
1604 int bdrv_open(BlockDriverState **pbs, const char *filename,
1605               const char *reference, QDict *options, int flags, Error **errp)
1606 {
1607     return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1608                              NULL, errp);
1609 }
1610 
1611 typedef struct BlockReopenQueueEntry {
1612      bool prepared;
1613      BDRVReopenState state;
1614      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1615 } BlockReopenQueueEntry;
1616 
1617 /*
1618  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1619  * reopen of multiple devices.
1620  *
1621  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1622  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1623  * be created and initialized. This newly created BlockReopenQueue should be
1624  * passed back in for subsequent calls that are intended to be of the same
1625  * atomic 'set'.
1626  *
1627  * bs is the BlockDriverState to add to the reopen queue.
1628  *
1629  * options contains the changed options for the associated bs
1630  * (the BlockReopenQueue takes ownership)
1631  *
1632  * flags contains the open flags for the associated bs
1633  *
1634  * returns a pointer to bs_queue, which is either the newly allocated
1635  * bs_queue, or the existing bs_queue being used.
1636  *
1637  */
1638 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1639                                     BlockDriverState *bs,
1640                                     QDict *options, int flags)
1641 {
1642     assert(bs != NULL);
1643 
1644     BlockReopenQueueEntry *bs_entry;
1645     BdrvChild *child;
1646     QDict *old_options;
1647 
1648     if (bs_queue == NULL) {
1649         bs_queue = g_new0(BlockReopenQueue, 1);
1650         QSIMPLEQ_INIT(bs_queue);
1651     }
1652 
1653     if (!options) {
1654         options = qdict_new();
1655     }
1656 
1657     old_options = qdict_clone_shallow(bs->options);
1658     qdict_join(options, old_options, false);
1659     QDECREF(old_options);
1660 
1661     /* bdrv_open() masks this flag out */
1662     flags &= ~BDRV_O_PROTOCOL;
1663 
1664     QLIST_FOREACH(child, &bs->children, next) {
1665         int child_flags;
1666 
1667         if (child->bs->inherits_from != bs) {
1668             continue;
1669         }
1670 
1671         child_flags = child->role->inherit_flags(flags);
1672         /* TODO Pass down child flags (backing.*, extents.*, ...) */
1673         bdrv_reopen_queue(bs_queue, child->bs, NULL, child_flags);
1674     }
1675 
1676     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1677     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1678 
1679     bs_entry->state.bs = bs;
1680     bs_entry->state.options = options;
1681     bs_entry->state.flags = flags;
1682 
1683     return bs_queue;
1684 }
1685 
1686 /*
1687  * Reopen multiple BlockDriverStates atomically & transactionally.
1688  *
1689  * The queue passed in (bs_queue) must have been built up previous
1690  * via bdrv_reopen_queue().
1691  *
1692  * Reopens all BDS specified in the queue, with the appropriate
1693  * flags.  All devices are prepared for reopen, and failure of any
1694  * device will cause all device changes to be abandonded, and intermediate
1695  * data cleaned up.
1696  *
1697  * If all devices prepare successfully, then the changes are committed
1698  * to all devices.
1699  *
1700  */
1701 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1702 {
1703     int ret = -1;
1704     BlockReopenQueueEntry *bs_entry, *next;
1705     Error *local_err = NULL;
1706 
1707     assert(bs_queue != NULL);
1708 
1709     bdrv_drain_all();
1710 
1711     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1712         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1713             error_propagate(errp, local_err);
1714             goto cleanup;
1715         }
1716         bs_entry->prepared = true;
1717     }
1718 
1719     /* If we reach this point, we have success and just need to apply the
1720      * changes
1721      */
1722     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1723         bdrv_reopen_commit(&bs_entry->state);
1724     }
1725 
1726     ret = 0;
1727 
1728 cleanup:
1729     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1730         if (ret && bs_entry->prepared) {
1731             bdrv_reopen_abort(&bs_entry->state);
1732         }
1733         QDECREF(bs_entry->state.options);
1734         g_free(bs_entry);
1735     }
1736     g_free(bs_queue);
1737     return ret;
1738 }
1739 
1740 
1741 /* Reopen a single BlockDriverState with the specified flags. */
1742 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1743 {
1744     int ret = -1;
1745     Error *local_err = NULL;
1746     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1747 
1748     ret = bdrv_reopen_multiple(queue, &local_err);
1749     if (local_err != NULL) {
1750         error_propagate(errp, local_err);
1751     }
1752     return ret;
1753 }
1754 
1755 
1756 /*
1757  * Prepares a BlockDriverState for reopen. All changes are staged in the
1758  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1759  * the block driver layer .bdrv_reopen_prepare()
1760  *
1761  * bs is the BlockDriverState to reopen
1762  * flags are the new open flags
1763  * queue is the reopen queue
1764  *
1765  * Returns 0 on success, non-zero on error.  On error errp will be set
1766  * as well.
1767  *
1768  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1769  * It is the responsibility of the caller to then call the abort() or
1770  * commit() for any other BDS that have been left in a prepare() state
1771  *
1772  */
1773 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1774                         Error **errp)
1775 {
1776     int ret = -1;
1777     Error *local_err = NULL;
1778     BlockDriver *drv;
1779 
1780     assert(reopen_state != NULL);
1781     assert(reopen_state->bs->drv != NULL);
1782     drv = reopen_state->bs->drv;
1783 
1784     /* if we are to stay read-only, do not allow permission change
1785      * to r/w */
1786     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1787         reopen_state->flags & BDRV_O_RDWR) {
1788         error_setg(errp, "Node '%s' is read only",
1789                    bdrv_get_device_or_node_name(reopen_state->bs));
1790         goto error;
1791     }
1792 
1793 
1794     ret = bdrv_flush(reopen_state->bs);
1795     if (ret) {
1796         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1797                   strerror(-ret));
1798         goto error;
1799     }
1800 
1801     if (drv->bdrv_reopen_prepare) {
1802         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1803         if (ret) {
1804             if (local_err != NULL) {
1805                 error_propagate(errp, local_err);
1806             } else {
1807                 error_setg(errp, "failed while preparing to reopen image '%s'",
1808                            reopen_state->bs->filename);
1809             }
1810             goto error;
1811         }
1812     } else {
1813         /* It is currently mandatory to have a bdrv_reopen_prepare()
1814          * handler for each supported drv. */
1815         error_setg(errp, "Block format '%s' used by node '%s' "
1816                    "does not support reopening files", drv->format_name,
1817                    bdrv_get_device_or_node_name(reopen_state->bs));
1818         ret = -1;
1819         goto error;
1820     }
1821 
1822     /* Options that are not handled are only okay if they are unchanged
1823      * compared to the old state. It is expected that some options are only
1824      * used for the initial open, but not reopen (e.g. filename) */
1825     if (qdict_size(reopen_state->options)) {
1826         const QDictEntry *entry = qdict_first(reopen_state->options);
1827 
1828         do {
1829             QString *new_obj = qobject_to_qstring(entry->value);
1830             const char *new = qstring_get_str(new_obj);
1831             const char *old = qdict_get_try_str(reopen_state->bs->options,
1832                                                 entry->key);
1833 
1834             if (!old || strcmp(new, old)) {
1835                 error_setg(errp, "Cannot change the option '%s'", entry->key);
1836                 ret = -EINVAL;
1837                 goto error;
1838             }
1839         } while ((entry = qdict_next(reopen_state->options, entry)));
1840     }
1841 
1842     ret = 0;
1843 
1844 error:
1845     return ret;
1846 }
1847 
1848 /*
1849  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1850  * makes them final by swapping the staging BlockDriverState contents into
1851  * the active BlockDriverState contents.
1852  */
1853 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1854 {
1855     BlockDriver *drv;
1856 
1857     assert(reopen_state != NULL);
1858     drv = reopen_state->bs->drv;
1859     assert(drv != NULL);
1860 
1861     /* If there are any driver level actions to take */
1862     if (drv->bdrv_reopen_commit) {
1863         drv->bdrv_reopen_commit(reopen_state);
1864     }
1865 
1866     /* set BDS specific flags now */
1867     reopen_state->bs->open_flags         = reopen_state->flags;
1868     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1869                                               BDRV_O_CACHE_WB);
1870     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1871 
1872     bdrv_refresh_limits(reopen_state->bs, NULL);
1873 }
1874 
1875 /*
1876  * Abort the reopen, and delete and free the staged changes in
1877  * reopen_state
1878  */
1879 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1880 {
1881     BlockDriver *drv;
1882 
1883     assert(reopen_state != NULL);
1884     drv = reopen_state->bs->drv;
1885     assert(drv != NULL);
1886 
1887     if (drv->bdrv_reopen_abort) {
1888         drv->bdrv_reopen_abort(reopen_state);
1889     }
1890 }
1891 
1892 
1893 void bdrv_close(BlockDriverState *bs)
1894 {
1895     BdrvAioNotifier *ban, *ban_next;
1896 
1897     if (bs->job) {
1898         block_job_cancel_sync(bs->job);
1899     }
1900 
1901     /* Disable I/O limits and drain all pending throttled requests */
1902     if (bs->io_limits_enabled) {
1903         bdrv_io_limits_disable(bs);
1904     }
1905 
1906     bdrv_drain(bs); /* complete I/O */
1907     bdrv_flush(bs);
1908     bdrv_drain(bs); /* in case flush left pending I/O */
1909     notifier_list_notify(&bs->close_notifiers, bs);
1910 
1911     if (bs->drv) {
1912         BdrvChild *child, *next;
1913 
1914         bs->drv->bdrv_close(bs);
1915         bs->drv = NULL;
1916 
1917         bdrv_set_backing_hd(bs, NULL);
1918 
1919         if (bs->file != NULL) {
1920             bdrv_unref_child(bs, bs->file);
1921             bs->file = NULL;
1922         }
1923 
1924         QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
1925             /* TODO Remove bdrv_unref() from drivers' close function and use
1926              * bdrv_unref_child() here */
1927             if (child->bs->inherits_from == bs) {
1928                 child->bs->inherits_from = NULL;
1929             }
1930             bdrv_detach_child(child);
1931         }
1932 
1933         g_free(bs->opaque);
1934         bs->opaque = NULL;
1935         bs->copy_on_read = 0;
1936         bs->backing_file[0] = '\0';
1937         bs->backing_format[0] = '\0';
1938         bs->total_sectors = 0;
1939         bs->encrypted = 0;
1940         bs->valid_key = 0;
1941         bs->sg = 0;
1942         bs->zero_beyond_eof = false;
1943         QDECREF(bs->options);
1944         bs->options = NULL;
1945         QDECREF(bs->full_open_options);
1946         bs->full_open_options = NULL;
1947     }
1948 
1949     if (bs->blk) {
1950         blk_dev_change_media_cb(bs->blk, false);
1951     }
1952 
1953     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1954         g_free(ban);
1955     }
1956     QLIST_INIT(&bs->aio_notifiers);
1957 }
1958 
1959 void bdrv_close_all(void)
1960 {
1961     BlockDriverState *bs;
1962 
1963     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1964         AioContext *aio_context = bdrv_get_aio_context(bs);
1965 
1966         aio_context_acquire(aio_context);
1967         bdrv_close(bs);
1968         aio_context_release(aio_context);
1969     }
1970 }
1971 
1972 /* make a BlockDriverState anonymous by removing from bdrv_state and
1973  * graph_bdrv_state list.
1974    Also, NULL terminate the device_name to prevent double remove */
1975 void bdrv_make_anon(BlockDriverState *bs)
1976 {
1977     /*
1978      * Take care to remove bs from bdrv_states only when it's actually
1979      * in it.  Note that bs->device_list.tqe_prev is initially null,
1980      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
1981      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1982      * resetting it to null on remove.
1983      */
1984     if (bs->device_list.tqe_prev) {
1985         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1986         bs->device_list.tqe_prev = NULL;
1987     }
1988     if (bs->node_name[0] != '\0') {
1989         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1990     }
1991     bs->node_name[0] = '\0';
1992 }
1993 
1994 /* Fields that need to stay with the top-level BDS */
1995 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1996                                      BlockDriverState *bs_src)
1997 {
1998     /* move some fields that need to stay attached to the device */
1999 
2000     /* dev info */
2001     bs_dest->guest_block_size   = bs_src->guest_block_size;
2002     bs_dest->copy_on_read       = bs_src->copy_on_read;
2003 
2004     bs_dest->enable_write_cache = bs_src->enable_write_cache;
2005 
2006     /* r/w error */
2007     bs_dest->on_read_error      = bs_src->on_read_error;
2008     bs_dest->on_write_error     = bs_src->on_write_error;
2009 
2010     /* i/o status */
2011     bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
2012     bs_dest->iostatus           = bs_src->iostatus;
2013 
2014     /* dirty bitmap */
2015     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
2016 }
2017 
2018 static void change_parent_backing_link(BlockDriverState *from,
2019                                        BlockDriverState *to)
2020 {
2021     BdrvChild *c, *next;
2022 
2023     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2024         assert(c->role != &child_backing);
2025         c->bs = to;
2026         QLIST_REMOVE(c, next_parent);
2027         QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2028         bdrv_ref(to);
2029         bdrv_unref(from);
2030     }
2031     if (from->blk) {
2032         blk_set_bs(from->blk, to);
2033         if (!to->device_list.tqe_prev) {
2034             QTAILQ_INSERT_BEFORE(from, to, device_list);
2035         }
2036         QTAILQ_REMOVE(&bdrv_states, from, device_list);
2037     }
2038 }
2039 
2040 static void swap_feature_fields(BlockDriverState *bs_top,
2041                                 BlockDriverState *bs_new)
2042 {
2043     BlockDriverState tmp;
2044 
2045     bdrv_move_feature_fields(&tmp, bs_top);
2046     bdrv_move_feature_fields(bs_top, bs_new);
2047     bdrv_move_feature_fields(bs_new, &tmp);
2048 
2049     assert(!bs_new->throttle_state);
2050     if (bs_top->throttle_state) {
2051         assert(bs_top->io_limits_enabled);
2052         bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2053         bdrv_io_limits_disable(bs_top);
2054     }
2055 }
2056 
2057 /*
2058  * Add new bs contents at the top of an image chain while the chain is
2059  * live, while keeping required fields on the top layer.
2060  *
2061  * This will modify the BlockDriverState fields, and swap contents
2062  * between bs_new and bs_top. Both bs_new and bs_top are modified.
2063  *
2064  * bs_new must not be attached to a BlockBackend.
2065  *
2066  * This function does not create any image files.
2067  *
2068  * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2069  * that's what the callers commonly need. bs_new will be referenced by the old
2070  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2071  * reference of its own, it must call bdrv_ref().
2072  */
2073 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2074 {
2075     assert(!bdrv_requests_pending(bs_top));
2076     assert(!bdrv_requests_pending(bs_new));
2077 
2078     bdrv_ref(bs_top);
2079     change_parent_backing_link(bs_top, bs_new);
2080 
2081     /* Some fields always stay on top of the backing file chain */
2082     swap_feature_fields(bs_top, bs_new);
2083 
2084     bdrv_set_backing_hd(bs_new, bs_top);
2085     bdrv_unref(bs_top);
2086 
2087     /* bs_new is now referenced by its new parents, we don't need the
2088      * additional reference any more. */
2089     bdrv_unref(bs_new);
2090 }
2091 
2092 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2093 {
2094     assert(!bdrv_requests_pending(old));
2095     assert(!bdrv_requests_pending(new));
2096 
2097     bdrv_ref(old);
2098 
2099     if (old->blk) {
2100         /* As long as these fields aren't in BlockBackend, but in the top-level
2101          * BlockDriverState, it's not possible for a BDS to have two BBs.
2102          *
2103          * We really want to copy the fields from old to new, but we go for a
2104          * swap instead so that pointers aren't duplicated and cause trouble.
2105          * (Also, bdrv_swap() used to do the same.) */
2106         assert(!new->blk);
2107         swap_feature_fields(old, new);
2108     }
2109     change_parent_backing_link(old, new);
2110 
2111     /* Change backing files if a previously independent node is added to the
2112      * chain. For active commit, we replace top by its own (indirect) backing
2113      * file and don't do anything here so we don't build a loop. */
2114     if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2115         bdrv_set_backing_hd(new, backing_bs(old));
2116         bdrv_set_backing_hd(old, NULL);
2117     }
2118 
2119     bdrv_unref(old);
2120 }
2121 
2122 static void bdrv_delete(BlockDriverState *bs)
2123 {
2124     assert(!bs->job);
2125     assert(bdrv_op_blocker_is_empty(bs));
2126     assert(!bs->refcnt);
2127     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2128 
2129     bdrv_close(bs);
2130 
2131     /* remove from list, if necessary */
2132     bdrv_make_anon(bs);
2133 
2134     g_free(bs);
2135 }
2136 
2137 /*
2138  * Run consistency checks on an image
2139  *
2140  * Returns 0 if the check could be completed (it doesn't mean that the image is
2141  * free of errors) or -errno when an internal error occurred. The results of the
2142  * check are stored in res.
2143  */
2144 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2145 {
2146     if (bs->drv == NULL) {
2147         return -ENOMEDIUM;
2148     }
2149     if (bs->drv->bdrv_check == NULL) {
2150         return -ENOTSUP;
2151     }
2152 
2153     memset(res, 0, sizeof(*res));
2154     return bs->drv->bdrv_check(bs, res, fix);
2155 }
2156 
2157 #define COMMIT_BUF_SECTORS 2048
2158 
2159 /* commit COW file into the raw image */
2160 int bdrv_commit(BlockDriverState *bs)
2161 {
2162     BlockDriver *drv = bs->drv;
2163     int64_t sector, total_sectors, length, backing_length;
2164     int n, ro, open_flags;
2165     int ret = 0;
2166     uint8_t *buf = NULL;
2167 
2168     if (!drv)
2169         return -ENOMEDIUM;
2170 
2171     if (!bs->backing) {
2172         return -ENOTSUP;
2173     }
2174 
2175     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2176         bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2177         return -EBUSY;
2178     }
2179 
2180     ro = bs->backing->bs->read_only;
2181     open_flags =  bs->backing->bs->open_flags;
2182 
2183     if (ro) {
2184         if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2185             return -EACCES;
2186         }
2187     }
2188 
2189     length = bdrv_getlength(bs);
2190     if (length < 0) {
2191         ret = length;
2192         goto ro_cleanup;
2193     }
2194 
2195     backing_length = bdrv_getlength(bs->backing->bs);
2196     if (backing_length < 0) {
2197         ret = backing_length;
2198         goto ro_cleanup;
2199     }
2200 
2201     /* If our top snapshot is larger than the backing file image,
2202      * grow the backing file image if possible.  If not possible,
2203      * we must return an error */
2204     if (length > backing_length) {
2205         ret = bdrv_truncate(bs->backing->bs, length);
2206         if (ret < 0) {
2207             goto ro_cleanup;
2208         }
2209     }
2210 
2211     total_sectors = length >> BDRV_SECTOR_BITS;
2212 
2213     /* qemu_try_blockalign() for bs will choose an alignment that works for
2214      * bs->backing->bs as well, so no need to compare the alignment manually. */
2215     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2216     if (buf == NULL) {
2217         ret = -ENOMEM;
2218         goto ro_cleanup;
2219     }
2220 
2221     for (sector = 0; sector < total_sectors; sector += n) {
2222         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2223         if (ret < 0) {
2224             goto ro_cleanup;
2225         }
2226         if (ret) {
2227             ret = bdrv_read(bs, sector, buf, n);
2228             if (ret < 0) {
2229                 goto ro_cleanup;
2230             }
2231 
2232             ret = bdrv_write(bs->backing->bs, sector, buf, n);
2233             if (ret < 0) {
2234                 goto ro_cleanup;
2235             }
2236         }
2237     }
2238 
2239     if (drv->bdrv_make_empty) {
2240         ret = drv->bdrv_make_empty(bs);
2241         if (ret < 0) {
2242             goto ro_cleanup;
2243         }
2244         bdrv_flush(bs);
2245     }
2246 
2247     /*
2248      * Make sure all data we wrote to the backing device is actually
2249      * stable on disk.
2250      */
2251     if (bs->backing) {
2252         bdrv_flush(bs->backing->bs);
2253     }
2254 
2255     ret = 0;
2256 ro_cleanup:
2257     qemu_vfree(buf);
2258 
2259     if (ro) {
2260         /* ignoring error return here */
2261         bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2262     }
2263 
2264     return ret;
2265 }
2266 
2267 int bdrv_commit_all(void)
2268 {
2269     BlockDriverState *bs;
2270 
2271     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2272         AioContext *aio_context = bdrv_get_aio_context(bs);
2273 
2274         aio_context_acquire(aio_context);
2275         if (bs->drv && bs->backing) {
2276             int ret = bdrv_commit(bs);
2277             if (ret < 0) {
2278                 aio_context_release(aio_context);
2279                 return ret;
2280             }
2281         }
2282         aio_context_release(aio_context);
2283     }
2284     return 0;
2285 }
2286 
2287 /*
2288  * Return values:
2289  * 0        - success
2290  * -EINVAL  - backing format specified, but no file
2291  * -ENOSPC  - can't update the backing file because no space is left in the
2292  *            image file header
2293  * -ENOTSUP - format driver doesn't support changing the backing file
2294  */
2295 int bdrv_change_backing_file(BlockDriverState *bs,
2296     const char *backing_file, const char *backing_fmt)
2297 {
2298     BlockDriver *drv = bs->drv;
2299     int ret;
2300 
2301     /* Backing file format doesn't make sense without a backing file */
2302     if (backing_fmt && !backing_file) {
2303         return -EINVAL;
2304     }
2305 
2306     if (drv->bdrv_change_backing_file != NULL) {
2307         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2308     } else {
2309         ret = -ENOTSUP;
2310     }
2311 
2312     if (ret == 0) {
2313         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2314         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2315     }
2316     return ret;
2317 }
2318 
2319 /*
2320  * Finds the image layer in the chain that has 'bs' as its backing file.
2321  *
2322  * active is the current topmost image.
2323  *
2324  * Returns NULL if bs is not found in active's image chain,
2325  * or if active == bs.
2326  *
2327  * Returns the bottommost base image if bs == NULL.
2328  */
2329 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2330                                     BlockDriverState *bs)
2331 {
2332     while (active && bs != backing_bs(active)) {
2333         active = backing_bs(active);
2334     }
2335 
2336     return active;
2337 }
2338 
2339 /* Given a BDS, searches for the base layer. */
2340 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2341 {
2342     return bdrv_find_overlay(bs, NULL);
2343 }
2344 
2345 /*
2346  * Drops images above 'base' up to and including 'top', and sets the image
2347  * above 'top' to have base as its backing file.
2348  *
2349  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2350  * information in 'bs' can be properly updated.
2351  *
2352  * E.g., this will convert the following chain:
2353  * bottom <- base <- intermediate <- top <- active
2354  *
2355  * to
2356  *
2357  * bottom <- base <- active
2358  *
2359  * It is allowed for bottom==base, in which case it converts:
2360  *
2361  * base <- intermediate <- top <- active
2362  *
2363  * to
2364  *
2365  * base <- active
2366  *
2367  * If backing_file_str is non-NULL, it will be used when modifying top's
2368  * overlay image metadata.
2369  *
2370  * Error conditions:
2371  *  if active == top, that is considered an error
2372  *
2373  */
2374 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2375                            BlockDriverState *base, const char *backing_file_str)
2376 {
2377     BlockDriverState *new_top_bs = NULL;
2378     int ret = -EIO;
2379 
2380     if (!top->drv || !base->drv) {
2381         goto exit;
2382     }
2383 
2384     new_top_bs = bdrv_find_overlay(active, top);
2385 
2386     if (new_top_bs == NULL) {
2387         /* we could not find the image above 'top', this is an error */
2388         goto exit;
2389     }
2390 
2391     /* special case of new_top_bs->backing->bs already pointing to base - nothing
2392      * to do, no intermediate images */
2393     if (backing_bs(new_top_bs) == base) {
2394         ret = 0;
2395         goto exit;
2396     }
2397 
2398     /* Make sure that base is in the backing chain of top */
2399     if (!bdrv_chain_contains(top, base)) {
2400         goto exit;
2401     }
2402 
2403     /* success - we can delete the intermediate states, and link top->base */
2404     backing_file_str = backing_file_str ? backing_file_str : base->filename;
2405     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2406                                    base->drv ? base->drv->format_name : "");
2407     if (ret) {
2408         goto exit;
2409     }
2410     bdrv_set_backing_hd(new_top_bs, base);
2411 
2412     ret = 0;
2413 exit:
2414     return ret;
2415 }
2416 
2417 /**
2418  * Truncate file to 'offset' bytes (needed only for file protocols)
2419  */
2420 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2421 {
2422     BlockDriver *drv = bs->drv;
2423     int ret;
2424     if (!drv)
2425         return -ENOMEDIUM;
2426     if (!drv->bdrv_truncate)
2427         return -ENOTSUP;
2428     if (bs->read_only)
2429         return -EACCES;
2430 
2431     ret = drv->bdrv_truncate(bs, offset);
2432     if (ret == 0) {
2433         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2434         bdrv_dirty_bitmap_truncate(bs);
2435         if (bs->blk) {
2436             blk_dev_resize_cb(bs->blk);
2437         }
2438     }
2439     return ret;
2440 }
2441 
2442 /**
2443  * Length of a allocated file in bytes. Sparse files are counted by actual
2444  * allocated space. Return < 0 if error or unknown.
2445  */
2446 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2447 {
2448     BlockDriver *drv = bs->drv;
2449     if (!drv) {
2450         return -ENOMEDIUM;
2451     }
2452     if (drv->bdrv_get_allocated_file_size) {
2453         return drv->bdrv_get_allocated_file_size(bs);
2454     }
2455     if (bs->file) {
2456         return bdrv_get_allocated_file_size(bs->file->bs);
2457     }
2458     return -ENOTSUP;
2459 }
2460 
2461 /**
2462  * Return number of sectors on success, -errno on error.
2463  */
2464 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2465 {
2466     BlockDriver *drv = bs->drv;
2467 
2468     if (!drv)
2469         return -ENOMEDIUM;
2470 
2471     if (drv->has_variable_length) {
2472         int ret = refresh_total_sectors(bs, bs->total_sectors);
2473         if (ret < 0) {
2474             return ret;
2475         }
2476     }
2477     return bs->total_sectors;
2478 }
2479 
2480 /**
2481  * Return length in bytes on success, -errno on error.
2482  * The length is always a multiple of BDRV_SECTOR_SIZE.
2483  */
2484 int64_t bdrv_getlength(BlockDriverState *bs)
2485 {
2486     int64_t ret = bdrv_nb_sectors(bs);
2487 
2488     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2489     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2490 }
2491 
2492 /* return 0 as number of sectors if no device present or error */
2493 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2494 {
2495     int64_t nb_sectors = bdrv_nb_sectors(bs);
2496 
2497     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2498 }
2499 
2500 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2501                        BlockdevOnError on_write_error)
2502 {
2503     bs->on_read_error = on_read_error;
2504     bs->on_write_error = on_write_error;
2505 }
2506 
2507 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2508 {
2509     return is_read ? bs->on_read_error : bs->on_write_error;
2510 }
2511 
2512 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2513 {
2514     BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2515 
2516     switch (on_err) {
2517     case BLOCKDEV_ON_ERROR_ENOSPC:
2518         return (error == ENOSPC) ?
2519                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2520     case BLOCKDEV_ON_ERROR_STOP:
2521         return BLOCK_ERROR_ACTION_STOP;
2522     case BLOCKDEV_ON_ERROR_REPORT:
2523         return BLOCK_ERROR_ACTION_REPORT;
2524     case BLOCKDEV_ON_ERROR_IGNORE:
2525         return BLOCK_ERROR_ACTION_IGNORE;
2526     default:
2527         abort();
2528     }
2529 }
2530 
2531 static void send_qmp_error_event(BlockDriverState *bs,
2532                                  BlockErrorAction action,
2533                                  bool is_read, int error)
2534 {
2535     IoOperationType optype;
2536 
2537     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2538     qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2539                                    bdrv_iostatus_is_enabled(bs),
2540                                    error == ENOSPC, strerror(error),
2541                                    &error_abort);
2542 }
2543 
2544 /* This is done by device models because, while the block layer knows
2545  * about the error, it does not know whether an operation comes from
2546  * the device or the block layer (from a job, for example).
2547  */
2548 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2549                        bool is_read, int error)
2550 {
2551     assert(error >= 0);
2552 
2553     if (action == BLOCK_ERROR_ACTION_STOP) {
2554         /* First set the iostatus, so that "info block" returns an iostatus
2555          * that matches the events raised so far (an additional error iostatus
2556          * is fine, but not a lost one).
2557          */
2558         bdrv_iostatus_set_err(bs, error);
2559 
2560         /* Then raise the request to stop the VM and the event.
2561          * qemu_system_vmstop_request_prepare has two effects.  First,
2562          * it ensures that the STOP event always comes after the
2563          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
2564          * can observe the STOP event and do a "cont" before the STOP
2565          * event is issued, the VM will not stop.  In this case, vm_start()
2566          * also ensures that the STOP/RESUME pair of events is emitted.
2567          */
2568         qemu_system_vmstop_request_prepare();
2569         send_qmp_error_event(bs, action, is_read, error);
2570         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2571     } else {
2572         send_qmp_error_event(bs, action, is_read, error);
2573     }
2574 }
2575 
2576 int bdrv_is_read_only(BlockDriverState *bs)
2577 {
2578     return bs->read_only;
2579 }
2580 
2581 int bdrv_is_sg(BlockDriverState *bs)
2582 {
2583     return bs->sg;
2584 }
2585 
2586 int bdrv_enable_write_cache(BlockDriverState *bs)
2587 {
2588     return bs->enable_write_cache;
2589 }
2590 
2591 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2592 {
2593     bs->enable_write_cache = wce;
2594 
2595     /* so a reopen() will preserve wce */
2596     if (wce) {
2597         bs->open_flags |= BDRV_O_CACHE_WB;
2598     } else {
2599         bs->open_flags &= ~BDRV_O_CACHE_WB;
2600     }
2601 }
2602 
2603 int bdrv_is_encrypted(BlockDriverState *bs)
2604 {
2605     if (bs->backing && bs->backing->bs->encrypted) {
2606         return 1;
2607     }
2608     return bs->encrypted;
2609 }
2610 
2611 int bdrv_key_required(BlockDriverState *bs)
2612 {
2613     BdrvChild *backing = bs->backing;
2614 
2615     if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2616         return 1;
2617     }
2618     return (bs->encrypted && !bs->valid_key);
2619 }
2620 
2621 int bdrv_set_key(BlockDriverState *bs, const char *key)
2622 {
2623     int ret;
2624     if (bs->backing && bs->backing->bs->encrypted) {
2625         ret = bdrv_set_key(bs->backing->bs, key);
2626         if (ret < 0)
2627             return ret;
2628         if (!bs->encrypted)
2629             return 0;
2630     }
2631     if (!bs->encrypted) {
2632         return -EINVAL;
2633     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2634         return -ENOMEDIUM;
2635     }
2636     ret = bs->drv->bdrv_set_key(bs, key);
2637     if (ret < 0) {
2638         bs->valid_key = 0;
2639     } else if (!bs->valid_key) {
2640         bs->valid_key = 1;
2641         if (bs->blk) {
2642             /* call the change callback now, we skipped it on open */
2643             blk_dev_change_media_cb(bs->blk, true);
2644         }
2645     }
2646     return ret;
2647 }
2648 
2649 /*
2650  * Provide an encryption key for @bs.
2651  * If @key is non-null:
2652  *     If @bs is not encrypted, fail.
2653  *     Else if the key is invalid, fail.
2654  *     Else set @bs's key to @key, replacing the existing key, if any.
2655  * If @key is null:
2656  *     If @bs is encrypted and still lacks a key, fail.
2657  *     Else do nothing.
2658  * On failure, store an error object through @errp if non-null.
2659  */
2660 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2661 {
2662     if (key) {
2663         if (!bdrv_is_encrypted(bs)) {
2664             error_setg(errp, "Node '%s' is not encrypted",
2665                       bdrv_get_device_or_node_name(bs));
2666         } else if (bdrv_set_key(bs, key) < 0) {
2667             error_setg(errp, QERR_INVALID_PASSWORD);
2668         }
2669     } else {
2670         if (bdrv_key_required(bs)) {
2671             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2672                       "'%s' (%s) is encrypted",
2673                       bdrv_get_device_or_node_name(bs),
2674                       bdrv_get_encrypted_filename(bs));
2675         }
2676     }
2677 }
2678 
2679 const char *bdrv_get_format_name(BlockDriverState *bs)
2680 {
2681     return bs->drv ? bs->drv->format_name : NULL;
2682 }
2683 
2684 static int qsort_strcmp(const void *a, const void *b)
2685 {
2686     return strcmp(a, b);
2687 }
2688 
2689 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2690                          void *opaque)
2691 {
2692     BlockDriver *drv;
2693     int count = 0;
2694     int i;
2695     const char **formats = NULL;
2696 
2697     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2698         if (drv->format_name) {
2699             bool found = false;
2700             int i = count;
2701             while (formats && i && !found) {
2702                 found = !strcmp(formats[--i], drv->format_name);
2703             }
2704 
2705             if (!found) {
2706                 formats = g_renew(const char *, formats, count + 1);
2707                 formats[count++] = drv->format_name;
2708             }
2709         }
2710     }
2711 
2712     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2713 
2714     for (i = 0; i < count; i++) {
2715         it(opaque, formats[i]);
2716     }
2717 
2718     g_free(formats);
2719 }
2720 
2721 /* This function is to find a node in the bs graph */
2722 BlockDriverState *bdrv_find_node(const char *node_name)
2723 {
2724     BlockDriverState *bs;
2725 
2726     assert(node_name);
2727 
2728     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2729         if (!strcmp(node_name, bs->node_name)) {
2730             return bs;
2731         }
2732     }
2733     return NULL;
2734 }
2735 
2736 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2737 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2738 {
2739     BlockDeviceInfoList *list, *entry;
2740     BlockDriverState *bs;
2741 
2742     list = NULL;
2743     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2744         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2745         if (!info) {
2746             qapi_free_BlockDeviceInfoList(list);
2747             return NULL;
2748         }
2749         entry = g_malloc0(sizeof(*entry));
2750         entry->value = info;
2751         entry->next = list;
2752         list = entry;
2753     }
2754 
2755     return list;
2756 }
2757 
2758 BlockDriverState *bdrv_lookup_bs(const char *device,
2759                                  const char *node_name,
2760                                  Error **errp)
2761 {
2762     BlockBackend *blk;
2763     BlockDriverState *bs;
2764 
2765     if (device) {
2766         blk = blk_by_name(device);
2767 
2768         if (blk) {
2769             return blk_bs(blk);
2770         }
2771     }
2772 
2773     if (node_name) {
2774         bs = bdrv_find_node(node_name);
2775 
2776         if (bs) {
2777             return bs;
2778         }
2779     }
2780 
2781     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2782                      device ? device : "",
2783                      node_name ? node_name : "");
2784     return NULL;
2785 }
2786 
2787 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2788  * return false.  If either argument is NULL, return false. */
2789 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2790 {
2791     while (top && top != base) {
2792         top = backing_bs(top);
2793     }
2794 
2795     return top != NULL;
2796 }
2797 
2798 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2799 {
2800     if (!bs) {
2801         return QTAILQ_FIRST(&graph_bdrv_states);
2802     }
2803     return QTAILQ_NEXT(bs, node_list);
2804 }
2805 
2806 BlockDriverState *bdrv_next(BlockDriverState *bs)
2807 {
2808     if (!bs) {
2809         return QTAILQ_FIRST(&bdrv_states);
2810     }
2811     return QTAILQ_NEXT(bs, device_list);
2812 }
2813 
2814 const char *bdrv_get_node_name(const BlockDriverState *bs)
2815 {
2816     return bs->node_name;
2817 }
2818 
2819 /* TODO check what callers really want: bs->node_name or blk_name() */
2820 const char *bdrv_get_device_name(const BlockDriverState *bs)
2821 {
2822     return bs->blk ? blk_name(bs->blk) : "";
2823 }
2824 
2825 /* This can be used to identify nodes that might not have a device
2826  * name associated. Since node and device names live in the same
2827  * namespace, the result is unambiguous. The exception is if both are
2828  * absent, then this returns an empty (non-null) string. */
2829 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2830 {
2831     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2832 }
2833 
2834 int bdrv_get_flags(BlockDriverState *bs)
2835 {
2836     return bs->open_flags;
2837 }
2838 
2839 int bdrv_has_zero_init_1(BlockDriverState *bs)
2840 {
2841     return 1;
2842 }
2843 
2844 int bdrv_has_zero_init(BlockDriverState *bs)
2845 {
2846     assert(bs->drv);
2847 
2848     /* If BS is a copy on write image, it is initialized to
2849        the contents of the base image, which may not be zeroes.  */
2850     if (bs->backing) {
2851         return 0;
2852     }
2853     if (bs->drv->bdrv_has_zero_init) {
2854         return bs->drv->bdrv_has_zero_init(bs);
2855     }
2856 
2857     /* safe default */
2858     return 0;
2859 }
2860 
2861 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2862 {
2863     BlockDriverInfo bdi;
2864 
2865     if (bs->backing) {
2866         return false;
2867     }
2868 
2869     if (bdrv_get_info(bs, &bdi) == 0) {
2870         return bdi.unallocated_blocks_are_zero;
2871     }
2872 
2873     return false;
2874 }
2875 
2876 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2877 {
2878     BlockDriverInfo bdi;
2879 
2880     if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
2881         return false;
2882     }
2883 
2884     if (bdrv_get_info(bs, &bdi) == 0) {
2885         return bdi.can_write_zeroes_with_unmap;
2886     }
2887 
2888     return false;
2889 }
2890 
2891 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2892 {
2893     if (bs->backing && bs->backing->bs->encrypted)
2894         return bs->backing_file;
2895     else if (bs->encrypted)
2896         return bs->filename;
2897     else
2898         return NULL;
2899 }
2900 
2901 void bdrv_get_backing_filename(BlockDriverState *bs,
2902                                char *filename, int filename_size)
2903 {
2904     pstrcpy(filename, filename_size, bs->backing_file);
2905 }
2906 
2907 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2908 {
2909     BlockDriver *drv = bs->drv;
2910     if (!drv)
2911         return -ENOMEDIUM;
2912     if (!drv->bdrv_get_info)
2913         return -ENOTSUP;
2914     memset(bdi, 0, sizeof(*bdi));
2915     return drv->bdrv_get_info(bs, bdi);
2916 }
2917 
2918 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2919 {
2920     BlockDriver *drv = bs->drv;
2921     if (drv && drv->bdrv_get_specific_info) {
2922         return drv->bdrv_get_specific_info(bs);
2923     }
2924     return NULL;
2925 }
2926 
2927 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2928 {
2929     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2930         return;
2931     }
2932 
2933     bs->drv->bdrv_debug_event(bs, event);
2934 }
2935 
2936 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2937                           const char *tag)
2938 {
2939     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2940         bs = bs->file ? bs->file->bs : NULL;
2941     }
2942 
2943     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2944         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2945     }
2946 
2947     return -ENOTSUP;
2948 }
2949 
2950 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2951 {
2952     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2953         bs = bs->file ? bs->file->bs : NULL;
2954     }
2955 
2956     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2957         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2958     }
2959 
2960     return -ENOTSUP;
2961 }
2962 
2963 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2964 {
2965     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2966         bs = bs->file ? bs->file->bs : NULL;
2967     }
2968 
2969     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2970         return bs->drv->bdrv_debug_resume(bs, tag);
2971     }
2972 
2973     return -ENOTSUP;
2974 }
2975 
2976 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2977 {
2978     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2979         bs = bs->file ? bs->file->bs : NULL;
2980     }
2981 
2982     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2983         return bs->drv->bdrv_debug_is_suspended(bs, tag);
2984     }
2985 
2986     return false;
2987 }
2988 
2989 int bdrv_is_snapshot(BlockDriverState *bs)
2990 {
2991     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2992 }
2993 
2994 /* backing_file can either be relative, or absolute, or a protocol.  If it is
2995  * relative, it must be relative to the chain.  So, passing in bs->filename
2996  * from a BDS as backing_file should not be done, as that may be relative to
2997  * the CWD rather than the chain. */
2998 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2999         const char *backing_file)
3000 {
3001     char *filename_full = NULL;
3002     char *backing_file_full = NULL;
3003     char *filename_tmp = NULL;
3004     int is_protocol = 0;
3005     BlockDriverState *curr_bs = NULL;
3006     BlockDriverState *retval = NULL;
3007 
3008     if (!bs || !bs->drv || !backing_file) {
3009         return NULL;
3010     }
3011 
3012     filename_full     = g_malloc(PATH_MAX);
3013     backing_file_full = g_malloc(PATH_MAX);
3014     filename_tmp      = g_malloc(PATH_MAX);
3015 
3016     is_protocol = path_has_protocol(backing_file);
3017 
3018     for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3019 
3020         /* If either of the filename paths is actually a protocol, then
3021          * compare unmodified paths; otherwise make paths relative */
3022         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3023             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3024                 retval = curr_bs->backing->bs;
3025                 break;
3026             }
3027         } else {
3028             /* If not an absolute filename path, make it relative to the current
3029              * image's filename path */
3030             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3031                          backing_file);
3032 
3033             /* We are going to compare absolute pathnames */
3034             if (!realpath(filename_tmp, filename_full)) {
3035                 continue;
3036             }
3037 
3038             /* We need to make sure the backing filename we are comparing against
3039              * is relative to the current image filename (or absolute) */
3040             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3041                          curr_bs->backing_file);
3042 
3043             if (!realpath(filename_tmp, backing_file_full)) {
3044                 continue;
3045             }
3046 
3047             if (strcmp(backing_file_full, filename_full) == 0) {
3048                 retval = curr_bs->backing->bs;
3049                 break;
3050             }
3051         }
3052     }
3053 
3054     g_free(filename_full);
3055     g_free(backing_file_full);
3056     g_free(filename_tmp);
3057     return retval;
3058 }
3059 
3060 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3061 {
3062     if (!bs->drv) {
3063         return 0;
3064     }
3065 
3066     if (!bs->backing) {
3067         return 0;
3068     }
3069 
3070     return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3071 }
3072 
3073 void bdrv_init(void)
3074 {
3075     module_call_init(MODULE_INIT_BLOCK);
3076 }
3077 
3078 void bdrv_init_with_whitelist(void)
3079 {
3080     use_bdrv_whitelist = 1;
3081     bdrv_init();
3082 }
3083 
3084 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3085 {
3086     Error *local_err = NULL;
3087     int ret;
3088 
3089     if (!bs->drv)  {
3090         return;
3091     }
3092 
3093     if (!(bs->open_flags & BDRV_O_INCOMING)) {
3094         return;
3095     }
3096     bs->open_flags &= ~BDRV_O_INCOMING;
3097 
3098     if (bs->drv->bdrv_invalidate_cache) {
3099         bs->drv->bdrv_invalidate_cache(bs, &local_err);
3100     } else if (bs->file) {
3101         bdrv_invalidate_cache(bs->file->bs, &local_err);
3102     }
3103     if (local_err) {
3104         error_propagate(errp, local_err);
3105         return;
3106     }
3107 
3108     ret = refresh_total_sectors(bs, bs->total_sectors);
3109     if (ret < 0) {
3110         error_setg_errno(errp, -ret, "Could not refresh total sector count");
3111         return;
3112     }
3113 }
3114 
3115 void bdrv_invalidate_cache_all(Error **errp)
3116 {
3117     BlockDriverState *bs;
3118     Error *local_err = NULL;
3119 
3120     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3121         AioContext *aio_context = bdrv_get_aio_context(bs);
3122 
3123         aio_context_acquire(aio_context);
3124         bdrv_invalidate_cache(bs, &local_err);
3125         aio_context_release(aio_context);
3126         if (local_err) {
3127             error_propagate(errp, local_err);
3128             return;
3129         }
3130     }
3131 }
3132 
3133 /**************************************************************/
3134 /* removable device support */
3135 
3136 /**
3137  * Return TRUE if the media is present
3138  */
3139 int bdrv_is_inserted(BlockDriverState *bs)
3140 {
3141     BlockDriver *drv = bs->drv;
3142 
3143     if (!drv)
3144         return 0;
3145     if (!drv->bdrv_is_inserted)
3146         return 1;
3147     return drv->bdrv_is_inserted(bs);
3148 }
3149 
3150 /**
3151  * Return whether the media changed since the last call to this
3152  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3153  */
3154 int bdrv_media_changed(BlockDriverState *bs)
3155 {
3156     BlockDriver *drv = bs->drv;
3157 
3158     if (drv && drv->bdrv_media_changed) {
3159         return drv->bdrv_media_changed(bs);
3160     }
3161     return -ENOTSUP;
3162 }
3163 
3164 /**
3165  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3166  */
3167 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3168 {
3169     BlockDriver *drv = bs->drv;
3170     const char *device_name;
3171 
3172     if (drv && drv->bdrv_eject) {
3173         drv->bdrv_eject(bs, eject_flag);
3174     }
3175 
3176     device_name = bdrv_get_device_name(bs);
3177     if (device_name[0] != '\0') {
3178         qapi_event_send_device_tray_moved(device_name,
3179                                           eject_flag, &error_abort);
3180     }
3181 }
3182 
3183 /**
3184  * Lock or unlock the media (if it is locked, the user won't be able
3185  * to eject it manually).
3186  */
3187 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3188 {
3189     BlockDriver *drv = bs->drv;
3190 
3191     trace_bdrv_lock_medium(bs, locked);
3192 
3193     if (drv && drv->bdrv_lock_medium) {
3194         drv->bdrv_lock_medium(bs, locked);
3195     }
3196 }
3197 
3198 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3199 {
3200     bs->guest_block_size = align;
3201 }
3202 
3203 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3204 {
3205     BdrvDirtyBitmap *bm;
3206 
3207     assert(name);
3208     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3209         if (bm->name && !strcmp(name, bm->name)) {
3210             return bm;
3211         }
3212     }
3213     return NULL;
3214 }
3215 
3216 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3217 {
3218     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3219     g_free(bitmap->name);
3220     bitmap->name = NULL;
3221 }
3222 
3223 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3224                                           uint32_t granularity,
3225                                           const char *name,
3226                                           Error **errp)
3227 {
3228     int64_t bitmap_size;
3229     BdrvDirtyBitmap *bitmap;
3230     uint32_t sector_granularity;
3231 
3232     assert((granularity & (granularity - 1)) == 0);
3233 
3234     if (name && bdrv_find_dirty_bitmap(bs, name)) {
3235         error_setg(errp, "Bitmap already exists: %s", name);
3236         return NULL;
3237     }
3238     sector_granularity = granularity >> BDRV_SECTOR_BITS;
3239     assert(sector_granularity);
3240     bitmap_size = bdrv_nb_sectors(bs);
3241     if (bitmap_size < 0) {
3242         error_setg_errno(errp, -bitmap_size, "could not get length of device");
3243         errno = -bitmap_size;
3244         return NULL;
3245     }
3246     bitmap = g_new0(BdrvDirtyBitmap, 1);
3247     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3248     bitmap->size = bitmap_size;
3249     bitmap->name = g_strdup(name);
3250     bitmap->disabled = false;
3251     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3252     return bitmap;
3253 }
3254 
3255 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3256 {
3257     return bitmap->successor;
3258 }
3259 
3260 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3261 {
3262     return !(bitmap->disabled || bitmap->successor);
3263 }
3264 
3265 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3266 {
3267     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3268         return DIRTY_BITMAP_STATUS_FROZEN;
3269     } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3270         return DIRTY_BITMAP_STATUS_DISABLED;
3271     } else {
3272         return DIRTY_BITMAP_STATUS_ACTIVE;
3273     }
3274 }
3275 
3276 /**
3277  * Create a successor bitmap destined to replace this bitmap after an operation.
3278  * Requires that the bitmap is not frozen and has no successor.
3279  */
3280 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3281                                        BdrvDirtyBitmap *bitmap, Error **errp)
3282 {
3283     uint64_t granularity;
3284     BdrvDirtyBitmap *child;
3285 
3286     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3287         error_setg(errp, "Cannot create a successor for a bitmap that is "
3288                    "currently frozen");
3289         return -1;
3290     }
3291     assert(!bitmap->successor);
3292 
3293     /* Create an anonymous successor */
3294     granularity = bdrv_dirty_bitmap_granularity(bitmap);
3295     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3296     if (!child) {
3297         return -1;
3298     }
3299 
3300     /* Successor will be on or off based on our current state. */
3301     child->disabled = bitmap->disabled;
3302 
3303     /* Install the successor and freeze the parent */
3304     bitmap->successor = child;
3305     return 0;
3306 }
3307 
3308 /**
3309  * For a bitmap with a successor, yield our name to the successor,
3310  * delete the old bitmap, and return a handle to the new bitmap.
3311  */
3312 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3313                                             BdrvDirtyBitmap *bitmap,
3314                                             Error **errp)
3315 {
3316     char *name;
3317     BdrvDirtyBitmap *successor = bitmap->successor;
3318 
3319     if (successor == NULL) {
3320         error_setg(errp, "Cannot relinquish control if "
3321                    "there's no successor present");
3322         return NULL;
3323     }
3324 
3325     name = bitmap->name;
3326     bitmap->name = NULL;
3327     successor->name = name;
3328     bitmap->successor = NULL;
3329     bdrv_release_dirty_bitmap(bs, bitmap);
3330 
3331     return successor;
3332 }
3333 
3334 /**
3335  * In cases of failure where we can no longer safely delete the parent,
3336  * we may wish to re-join the parent and child/successor.
3337  * The merged parent will be un-frozen, but not explicitly re-enabled.
3338  */
3339 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3340                                            BdrvDirtyBitmap *parent,
3341                                            Error **errp)
3342 {
3343     BdrvDirtyBitmap *successor = parent->successor;
3344 
3345     if (!successor) {
3346         error_setg(errp, "Cannot reclaim a successor when none is present");
3347         return NULL;
3348     }
3349 
3350     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3351         error_setg(errp, "Merging of parent and successor bitmap failed");
3352         return NULL;
3353     }
3354     bdrv_release_dirty_bitmap(bs, successor);
3355     parent->successor = NULL;
3356 
3357     return parent;
3358 }
3359 
3360 /**
3361  * Truncates _all_ bitmaps attached to a BDS.
3362  */
3363 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3364 {
3365     BdrvDirtyBitmap *bitmap;
3366     uint64_t size = bdrv_nb_sectors(bs);
3367 
3368     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3369         assert(!bdrv_dirty_bitmap_frozen(bitmap));
3370         hbitmap_truncate(bitmap->bitmap, size);
3371         bitmap->size = size;
3372     }
3373 }
3374 
3375 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3376 {
3377     BdrvDirtyBitmap *bm, *next;
3378     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3379         if (bm == bitmap) {
3380             assert(!bdrv_dirty_bitmap_frozen(bm));
3381             QLIST_REMOVE(bitmap, list);
3382             hbitmap_free(bitmap->bitmap);
3383             g_free(bitmap->name);
3384             g_free(bitmap);
3385             return;
3386         }
3387     }
3388 }
3389 
3390 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3391 {
3392     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3393     bitmap->disabled = true;
3394 }
3395 
3396 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3397 {
3398     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3399     bitmap->disabled = false;
3400 }
3401 
3402 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3403 {
3404     BdrvDirtyBitmap *bm;
3405     BlockDirtyInfoList *list = NULL;
3406     BlockDirtyInfoList **plist = &list;
3407 
3408     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3409         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3410         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3411         info->count = bdrv_get_dirty_count(bm);
3412         info->granularity = bdrv_dirty_bitmap_granularity(bm);
3413         info->has_name = !!bm->name;
3414         info->name = g_strdup(bm->name);
3415         info->status = bdrv_dirty_bitmap_status(bm);
3416         entry->value = info;
3417         *plist = entry;
3418         plist = &entry->next;
3419     }
3420 
3421     return list;
3422 }
3423 
3424 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3425 {
3426     if (bitmap) {
3427         return hbitmap_get(bitmap->bitmap, sector);
3428     } else {
3429         return 0;
3430     }
3431 }
3432 
3433 /**
3434  * Chooses a default granularity based on the existing cluster size,
3435  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3436  * is no cluster size information available.
3437  */
3438 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3439 {
3440     BlockDriverInfo bdi;
3441     uint32_t granularity;
3442 
3443     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3444         granularity = MAX(4096, bdi.cluster_size);
3445         granularity = MIN(65536, granularity);
3446     } else {
3447         granularity = 65536;
3448     }
3449 
3450     return granularity;
3451 }
3452 
3453 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3454 {
3455     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3456 }
3457 
3458 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3459 {
3460     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3461 }
3462 
3463 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3464                            int64_t cur_sector, int nr_sectors)
3465 {
3466     assert(bdrv_dirty_bitmap_enabled(bitmap));
3467     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3468 }
3469 
3470 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3471                              int64_t cur_sector, int nr_sectors)
3472 {
3473     assert(bdrv_dirty_bitmap_enabled(bitmap));
3474     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3475 }
3476 
3477 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3478 {
3479     assert(bdrv_dirty_bitmap_enabled(bitmap));
3480     hbitmap_reset_all(bitmap->bitmap);
3481 }
3482 
3483 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3484                     int nr_sectors)
3485 {
3486     BdrvDirtyBitmap *bitmap;
3487     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3488         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3489             continue;
3490         }
3491         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3492     }
3493 }
3494 
3495 /**
3496  * Advance an HBitmapIter to an arbitrary offset.
3497  */
3498 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3499 {
3500     assert(hbi->hb);
3501     hbitmap_iter_init(hbi, hbi->hb, offset);
3502 }
3503 
3504 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3505 {
3506     return hbitmap_count(bitmap->bitmap);
3507 }
3508 
3509 /* Get a reference to bs */
3510 void bdrv_ref(BlockDriverState *bs)
3511 {
3512     bs->refcnt++;
3513 }
3514 
3515 /* Release a previously grabbed reference to bs.
3516  * If after releasing, reference count is zero, the BlockDriverState is
3517  * deleted. */
3518 void bdrv_unref(BlockDriverState *bs)
3519 {
3520     if (!bs) {
3521         return;
3522     }
3523     assert(bs->refcnt > 0);
3524     if (--bs->refcnt == 0) {
3525         bdrv_delete(bs);
3526     }
3527 }
3528 
3529 struct BdrvOpBlocker {
3530     Error *reason;
3531     QLIST_ENTRY(BdrvOpBlocker) list;
3532 };
3533 
3534 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3535 {
3536     BdrvOpBlocker *blocker;
3537     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3538     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3539         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3540         if (errp) {
3541             error_setg(errp, "Node '%s' is busy: %s",
3542                        bdrv_get_device_or_node_name(bs),
3543                        error_get_pretty(blocker->reason));
3544         }
3545         return true;
3546     }
3547     return false;
3548 }
3549 
3550 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3551 {
3552     BdrvOpBlocker *blocker;
3553     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3554 
3555     blocker = g_new0(BdrvOpBlocker, 1);
3556     blocker->reason = reason;
3557     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3558 }
3559 
3560 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3561 {
3562     BdrvOpBlocker *blocker, *next;
3563     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3564     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3565         if (blocker->reason == reason) {
3566             QLIST_REMOVE(blocker, list);
3567             g_free(blocker);
3568         }
3569     }
3570 }
3571 
3572 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3573 {
3574     int i;
3575     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3576         bdrv_op_block(bs, i, reason);
3577     }
3578 }
3579 
3580 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3581 {
3582     int i;
3583     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3584         bdrv_op_unblock(bs, i, reason);
3585     }
3586 }
3587 
3588 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3589 {
3590     int i;
3591 
3592     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3593         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3594             return false;
3595         }
3596     }
3597     return true;
3598 }
3599 
3600 void bdrv_iostatus_enable(BlockDriverState *bs)
3601 {
3602     bs->iostatus_enabled = true;
3603     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3604 }
3605 
3606 /* The I/O status is only enabled if the drive explicitly
3607  * enables it _and_ the VM is configured to stop on errors */
3608 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3609 {
3610     return (bs->iostatus_enabled &&
3611            (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3612             bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
3613             bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3614 }
3615 
3616 void bdrv_iostatus_disable(BlockDriverState *bs)
3617 {
3618     bs->iostatus_enabled = false;
3619 }
3620 
3621 void bdrv_iostatus_reset(BlockDriverState *bs)
3622 {
3623     if (bdrv_iostatus_is_enabled(bs)) {
3624         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3625         if (bs->job) {
3626             block_job_iostatus_reset(bs->job);
3627         }
3628     }
3629 }
3630 
3631 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3632 {
3633     assert(bdrv_iostatus_is_enabled(bs));
3634     if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3635         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3636                                          BLOCK_DEVICE_IO_STATUS_FAILED;
3637     }
3638 }
3639 
3640 void bdrv_img_create(const char *filename, const char *fmt,
3641                      const char *base_filename, const char *base_fmt,
3642                      char *options, uint64_t img_size, int flags,
3643                      Error **errp, bool quiet)
3644 {
3645     QemuOptsList *create_opts = NULL;
3646     QemuOpts *opts = NULL;
3647     const char *backing_fmt, *backing_file;
3648     int64_t size;
3649     BlockDriver *drv, *proto_drv;
3650     Error *local_err = NULL;
3651     int ret = 0;
3652 
3653     /* Find driver and parse its options */
3654     drv = bdrv_find_format(fmt);
3655     if (!drv) {
3656         error_setg(errp, "Unknown file format '%s'", fmt);
3657         return;
3658     }
3659 
3660     proto_drv = bdrv_find_protocol(filename, true, errp);
3661     if (!proto_drv) {
3662         return;
3663     }
3664 
3665     if (!drv->create_opts) {
3666         error_setg(errp, "Format driver '%s' does not support image creation",
3667                    drv->format_name);
3668         return;
3669     }
3670 
3671     if (!proto_drv->create_opts) {
3672         error_setg(errp, "Protocol driver '%s' does not support image creation",
3673                    proto_drv->format_name);
3674         return;
3675     }
3676 
3677     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3678     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3679 
3680     /* Create parameter list with default values */
3681     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3682     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3683 
3684     /* Parse -o options */
3685     if (options) {
3686         qemu_opts_do_parse(opts, options, NULL, &local_err);
3687         if (local_err) {
3688             error_report_err(local_err);
3689             local_err = NULL;
3690             error_setg(errp, "Invalid options for file format '%s'", fmt);
3691             goto out;
3692         }
3693     }
3694 
3695     if (base_filename) {
3696         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3697         if (local_err) {
3698             error_setg(errp, "Backing file not supported for file format '%s'",
3699                        fmt);
3700             goto out;
3701         }
3702     }
3703 
3704     if (base_fmt) {
3705         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3706         if (local_err) {
3707             error_setg(errp, "Backing file format not supported for file "
3708                              "format '%s'", fmt);
3709             goto out;
3710         }
3711     }
3712 
3713     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3714     if (backing_file) {
3715         if (!strcmp(filename, backing_file)) {
3716             error_setg(errp, "Error: Trying to create an image with the "
3717                              "same filename as the backing file");
3718             goto out;
3719         }
3720     }
3721 
3722     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3723 
3724     // The size for the image must always be specified, with one exception:
3725     // If we are using a backing file, we can obtain the size from there
3726     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3727     if (size == -1) {
3728         if (backing_file) {
3729             BlockDriverState *bs;
3730             char *full_backing = g_new0(char, PATH_MAX);
3731             int64_t size;
3732             int back_flags;
3733             QDict *backing_options = NULL;
3734 
3735             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3736                                                          full_backing, PATH_MAX,
3737                                                          &local_err);
3738             if (local_err) {
3739                 g_free(full_backing);
3740                 goto out;
3741             }
3742 
3743             /* backing files always opened read-only */
3744             back_flags =
3745                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3746 
3747             if (backing_fmt) {
3748                 backing_options = qdict_new();
3749                 qdict_put(backing_options, "driver",
3750                           qstring_from_str(backing_fmt));
3751             }
3752 
3753             bs = NULL;
3754             ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3755                             back_flags, &local_err);
3756             g_free(full_backing);
3757             if (ret < 0) {
3758                 goto out;
3759             }
3760             size = bdrv_getlength(bs);
3761             if (size < 0) {
3762                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3763                                  backing_file);
3764                 bdrv_unref(bs);
3765                 goto out;
3766             }
3767 
3768             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3769 
3770             bdrv_unref(bs);
3771         } else {
3772             error_setg(errp, "Image creation needs a size parameter");
3773             goto out;
3774         }
3775     }
3776 
3777     if (!quiet) {
3778         printf("Formatting '%s', fmt=%s ", filename, fmt);
3779         qemu_opts_print(opts, " ");
3780         puts("");
3781     }
3782 
3783     ret = bdrv_create(drv, filename, opts, &local_err);
3784 
3785     if (ret == -EFBIG) {
3786         /* This is generally a better message than whatever the driver would
3787          * deliver (especially because of the cluster_size_hint), since that
3788          * is most probably not much different from "image too large". */
3789         const char *cluster_size_hint = "";
3790         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3791             cluster_size_hint = " (try using a larger cluster size)";
3792         }
3793         error_setg(errp, "The image size is too large for file format '%s'"
3794                    "%s", fmt, cluster_size_hint);
3795         error_free(local_err);
3796         local_err = NULL;
3797     }
3798 
3799 out:
3800     qemu_opts_del(opts);
3801     qemu_opts_free(create_opts);
3802     if (local_err) {
3803         error_propagate(errp, local_err);
3804     }
3805 }
3806 
3807 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3808 {
3809     return bs->aio_context;
3810 }
3811 
3812 void bdrv_detach_aio_context(BlockDriverState *bs)
3813 {
3814     BdrvAioNotifier *baf;
3815 
3816     if (!bs->drv) {
3817         return;
3818     }
3819 
3820     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3821         baf->detach_aio_context(baf->opaque);
3822     }
3823 
3824     if (bs->io_limits_enabled) {
3825         throttle_timers_detach_aio_context(&bs->throttle_timers);
3826     }
3827     if (bs->drv->bdrv_detach_aio_context) {
3828         bs->drv->bdrv_detach_aio_context(bs);
3829     }
3830     if (bs->file) {
3831         bdrv_detach_aio_context(bs->file->bs);
3832     }
3833     if (bs->backing) {
3834         bdrv_detach_aio_context(bs->backing->bs);
3835     }
3836 
3837     bs->aio_context = NULL;
3838 }
3839 
3840 void bdrv_attach_aio_context(BlockDriverState *bs,
3841                              AioContext *new_context)
3842 {
3843     BdrvAioNotifier *ban;
3844 
3845     if (!bs->drv) {
3846         return;
3847     }
3848 
3849     bs->aio_context = new_context;
3850 
3851     if (bs->backing) {
3852         bdrv_attach_aio_context(bs->backing->bs, new_context);
3853     }
3854     if (bs->file) {
3855         bdrv_attach_aio_context(bs->file->bs, new_context);
3856     }
3857     if (bs->drv->bdrv_attach_aio_context) {
3858         bs->drv->bdrv_attach_aio_context(bs, new_context);
3859     }
3860     if (bs->io_limits_enabled) {
3861         throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3862     }
3863 
3864     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3865         ban->attached_aio_context(new_context, ban->opaque);
3866     }
3867 }
3868 
3869 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3870 {
3871     bdrv_drain(bs); /* ensure there are no in-flight requests */
3872 
3873     bdrv_detach_aio_context(bs);
3874 
3875     /* This function executes in the old AioContext so acquire the new one in
3876      * case it runs in a different thread.
3877      */
3878     aio_context_acquire(new_context);
3879     bdrv_attach_aio_context(bs, new_context);
3880     aio_context_release(new_context);
3881 }
3882 
3883 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3884         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3885         void (*detach_aio_context)(void *opaque), void *opaque)
3886 {
3887     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3888     *ban = (BdrvAioNotifier){
3889         .attached_aio_context = attached_aio_context,
3890         .detach_aio_context   = detach_aio_context,
3891         .opaque               = opaque
3892     };
3893 
3894     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3895 }
3896 
3897 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3898                                       void (*attached_aio_context)(AioContext *,
3899                                                                    void *),
3900                                       void (*detach_aio_context)(void *),
3901                                       void *opaque)
3902 {
3903     BdrvAioNotifier *ban, *ban_next;
3904 
3905     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3906         if (ban->attached_aio_context == attached_aio_context &&
3907             ban->detach_aio_context   == detach_aio_context   &&
3908             ban->opaque               == opaque)
3909         {
3910             QLIST_REMOVE(ban, list);
3911             g_free(ban);
3912 
3913             return;
3914         }
3915     }
3916 
3917     abort();
3918 }
3919 
3920 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3921                        BlockDriverAmendStatusCB *status_cb)
3922 {
3923     if (!bs->drv->bdrv_amend_options) {
3924         return -ENOTSUP;
3925     }
3926     return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3927 }
3928 
3929 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3930  * of block filter and by bdrv_is_first_non_filter.
3931  * It is used to test if the given bs is the candidate or recurse more in the
3932  * node graph.
3933  */
3934 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3935                                       BlockDriverState *candidate)
3936 {
3937     /* return false if basic checks fails */
3938     if (!bs || !bs->drv) {
3939         return false;
3940     }
3941 
3942     /* the code reached a non block filter driver -> check if the bs is
3943      * the same as the candidate. It's the recursion termination condition.
3944      */
3945     if (!bs->drv->is_filter) {
3946         return bs == candidate;
3947     }
3948     /* Down this path the driver is a block filter driver */
3949 
3950     /* If the block filter recursion method is defined use it to recurse down
3951      * the node graph.
3952      */
3953     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3954         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3955     }
3956 
3957     /* the driver is a block filter but don't allow to recurse -> return false
3958      */
3959     return false;
3960 }
3961 
3962 /* This function checks if the candidate is the first non filter bs down it's
3963  * bs chain. Since we don't have pointers to parents it explore all bs chains
3964  * from the top. Some filters can choose not to pass down the recursion.
3965  */
3966 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3967 {
3968     BlockDriverState *bs;
3969 
3970     /* walk down the bs forest recursively */
3971     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3972         bool perm;
3973 
3974         /* try to recurse in this top level bs */
3975         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3976 
3977         /* candidate is the first non filter */
3978         if (perm) {
3979             return true;
3980         }
3981     }
3982 
3983     return false;
3984 }
3985 
3986 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3987                                         const char *node_name, Error **errp)
3988 {
3989     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3990     AioContext *aio_context;
3991 
3992     if (!to_replace_bs) {
3993         error_setg(errp, "Node name '%s' not found", node_name);
3994         return NULL;
3995     }
3996 
3997     aio_context = bdrv_get_aio_context(to_replace_bs);
3998     aio_context_acquire(aio_context);
3999 
4000     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
4001         to_replace_bs = NULL;
4002         goto out;
4003     }
4004 
4005     /* We don't want arbitrary node of the BDS chain to be replaced only the top
4006      * most non filter in order to prevent data corruption.
4007      * Another benefit is that this tests exclude backing files which are
4008      * blocked by the backing blockers.
4009      */
4010     if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
4011         error_setg(errp, "Only top most non filter can be replaced");
4012         to_replace_bs = NULL;
4013         goto out;
4014     }
4015 
4016 out:
4017     aio_context_release(aio_context);
4018     return to_replace_bs;
4019 }
4020 
4021 static bool append_open_options(QDict *d, BlockDriverState *bs)
4022 {
4023     const QDictEntry *entry;
4024     bool found_any = false;
4025 
4026     for (entry = qdict_first(bs->options); entry;
4027          entry = qdict_next(bs->options, entry))
4028     {
4029         /* Only take options for this level and exclude all non-driver-specific
4030          * options */
4031         if (!strchr(qdict_entry_key(entry), '.') &&
4032             strcmp(qdict_entry_key(entry), "node-name"))
4033         {
4034             qobject_incref(qdict_entry_value(entry));
4035             qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
4036             found_any = true;
4037         }
4038     }
4039 
4040     return found_any;
4041 }
4042 
4043 /* Updates the following BDS fields:
4044  *  - exact_filename: A filename which may be used for opening a block device
4045  *                    which (mostly) equals the given BDS (even without any
4046  *                    other options; so reading and writing must return the same
4047  *                    results, but caching etc. may be different)
4048  *  - full_open_options: Options which, when given when opening a block device
4049  *                       (without a filename), result in a BDS (mostly)
4050  *                       equalling the given one
4051  *  - filename: If exact_filename is set, it is copied here. Otherwise,
4052  *              full_open_options is converted to a JSON object, prefixed with
4053  *              "json:" (for use through the JSON pseudo protocol) and put here.
4054  */
4055 void bdrv_refresh_filename(BlockDriverState *bs)
4056 {
4057     BlockDriver *drv = bs->drv;
4058     QDict *opts;
4059 
4060     if (!drv) {
4061         return;
4062     }
4063 
4064     /* This BDS's file name will most probably depend on its file's name, so
4065      * refresh that first */
4066     if (bs->file) {
4067         bdrv_refresh_filename(bs->file->bs);
4068     }
4069 
4070     if (drv->bdrv_refresh_filename) {
4071         /* Obsolete information is of no use here, so drop the old file name
4072          * information before refreshing it */
4073         bs->exact_filename[0] = '\0';
4074         if (bs->full_open_options) {
4075             QDECREF(bs->full_open_options);
4076             bs->full_open_options = NULL;
4077         }
4078 
4079         drv->bdrv_refresh_filename(bs);
4080     } else if (bs->file) {
4081         /* Try to reconstruct valid information from the underlying file */
4082         bool has_open_options;
4083 
4084         bs->exact_filename[0] = '\0';
4085         if (bs->full_open_options) {
4086             QDECREF(bs->full_open_options);
4087             bs->full_open_options = NULL;
4088         }
4089 
4090         opts = qdict_new();
4091         has_open_options = append_open_options(opts, bs);
4092 
4093         /* If no specific options have been given for this BDS, the filename of
4094          * the underlying file should suffice for this one as well */
4095         if (bs->file->bs->exact_filename[0] && !has_open_options) {
4096             strcpy(bs->exact_filename, bs->file->bs->exact_filename);
4097         }
4098         /* Reconstructing the full options QDict is simple for most format block
4099          * drivers, as long as the full options are known for the underlying
4100          * file BDS. The full options QDict of that file BDS should somehow
4101          * contain a representation of the filename, therefore the following
4102          * suffices without querying the (exact_)filename of this BDS. */
4103         if (bs->file->bs->full_open_options) {
4104             qdict_put_obj(opts, "driver",
4105                           QOBJECT(qstring_from_str(drv->format_name)));
4106             QINCREF(bs->file->bs->full_open_options);
4107             qdict_put_obj(opts, "file",
4108                           QOBJECT(bs->file->bs->full_open_options));
4109 
4110             bs->full_open_options = opts;
4111         } else {
4112             QDECREF(opts);
4113         }
4114     } else if (!bs->full_open_options && qdict_size(bs->options)) {
4115         /* There is no underlying file BDS (at least referenced by BDS.file),
4116          * so the full options QDict should be equal to the options given
4117          * specifically for this block device when it was opened (plus the
4118          * driver specification).
4119          * Because those options don't change, there is no need to update
4120          * full_open_options when it's already set. */
4121 
4122         opts = qdict_new();
4123         append_open_options(opts, bs);
4124         qdict_put_obj(opts, "driver",
4125                       QOBJECT(qstring_from_str(drv->format_name)));
4126 
4127         if (bs->exact_filename[0]) {
4128             /* This may not work for all block protocol drivers (some may
4129              * require this filename to be parsed), but we have to find some
4130              * default solution here, so just include it. If some block driver
4131              * does not support pure options without any filename at all or
4132              * needs some special format of the options QDict, it needs to
4133              * implement the driver-specific bdrv_refresh_filename() function.
4134              */
4135             qdict_put_obj(opts, "filename",
4136                           QOBJECT(qstring_from_str(bs->exact_filename)));
4137         }
4138 
4139         bs->full_open_options = opts;
4140     }
4141 
4142     if (bs->exact_filename[0]) {
4143         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4144     } else if (bs->full_open_options) {
4145         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4146         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4147                  qstring_get_str(json));
4148         QDECREF(json);
4149     }
4150 }
4151 
4152 /* This accessor function purpose is to allow the device models to access the
4153  * BlockAcctStats structure embedded inside a BlockDriverState without being
4154  * aware of the BlockDriverState structure layout.
4155  * It will go away when the BlockAcctStats structure will be moved inside
4156  * the device models.
4157  */
4158 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
4159 {
4160     return &bs->stats;
4161 }
4162