xref: /openbmc/qemu/block.c (revision 779020cbdc67011026c10fb620f701bfc6b85547)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/error-report.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qapi/qmp/qjson.h"
33 #include "sysemu/block-backend.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/notify.h"
36 #include "block/coroutine.h"
37 #include "block/qapi.h"
38 #include "qmp-commands.h"
39 #include "qemu/timer.h"
40 #include "qapi-event.h"
41 #include "block/throttle-groups.h"
42 
43 #ifdef CONFIG_BSD
44 #include <sys/types.h>
45 #include <sys/stat.h>
46 #include <sys/ioctl.h>
47 #include <sys/queue.h>
48 #ifndef __DragonFly__
49 #include <sys/disk.h>
50 #endif
51 #endif
52 
53 #ifdef _WIN32
54 #include <windows.h>
55 #endif
56 
57 /**
58  * A BdrvDirtyBitmap can be in three possible states:
59  * (1) successor is NULL and disabled is false: full r/w mode
60  * (2) successor is NULL and disabled is true: read only mode ("disabled")
61  * (3) successor is set: frozen mode.
62  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
63  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
64  */
65 struct BdrvDirtyBitmap {
66     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
67     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
68     char *name;                 /* Optional non-empty unique ID */
69     int64_t size;               /* Size of the bitmap (Number of sectors) */
70     bool disabled;              /* Bitmap is read-only */
71     QLIST_ENTRY(BdrvDirtyBitmap) list;
72 };
73 
74 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
75 
76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77     QTAILQ_HEAD_INITIALIZER(bdrv_states);
78 
79 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
80     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
81 
82 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
83     QLIST_HEAD_INITIALIZER(bdrv_drivers);
84 
85 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
86                              const char *reference, QDict *options, int flags,
87                              BlockDriverState *parent,
88                              const BdrvChildRole *child_role, Error **errp);
89 
90 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
91 /* If non-zero, use only whitelisted block drivers */
92 static int use_bdrv_whitelist;
93 
94 #ifdef _WIN32
95 static int is_windows_drive_prefix(const char *filename)
96 {
97     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
98              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
99             filename[1] == ':');
100 }
101 
102 int is_windows_drive(const char *filename)
103 {
104     if (is_windows_drive_prefix(filename) &&
105         filename[2] == '\0')
106         return 1;
107     if (strstart(filename, "\\\\.\\", NULL) ||
108         strstart(filename, "//./", NULL))
109         return 1;
110     return 0;
111 }
112 #endif
113 
114 size_t bdrv_opt_mem_align(BlockDriverState *bs)
115 {
116     if (!bs || !bs->drv) {
117         /* page size or 4k (hdd sector size) should be on the safe side */
118         return MAX(4096, getpagesize());
119     }
120 
121     return bs->bl.opt_mem_alignment;
122 }
123 
124 size_t bdrv_min_mem_align(BlockDriverState *bs)
125 {
126     if (!bs || !bs->drv) {
127         /* page size or 4k (hdd sector size) should be on the safe side */
128         return MAX(4096, getpagesize());
129     }
130 
131     return bs->bl.min_mem_alignment;
132 }
133 
134 /* check if the path starts with "<protocol>:" */
135 int path_has_protocol(const char *path)
136 {
137     const char *p;
138 
139 #ifdef _WIN32
140     if (is_windows_drive(path) ||
141         is_windows_drive_prefix(path)) {
142         return 0;
143     }
144     p = path + strcspn(path, ":/\\");
145 #else
146     p = path + strcspn(path, ":/");
147 #endif
148 
149     return *p == ':';
150 }
151 
152 int path_is_absolute(const char *path)
153 {
154 #ifdef _WIN32
155     /* specific case for names like: "\\.\d:" */
156     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
157         return 1;
158     }
159     return (*path == '/' || *path == '\\');
160 #else
161     return (*path == '/');
162 #endif
163 }
164 
165 /* if filename is absolute, just copy it to dest. Otherwise, build a
166    path to it by considering it is relative to base_path. URL are
167    supported. */
168 void path_combine(char *dest, int dest_size,
169                   const char *base_path,
170                   const char *filename)
171 {
172     const char *p, *p1;
173     int len;
174 
175     if (dest_size <= 0)
176         return;
177     if (path_is_absolute(filename)) {
178         pstrcpy(dest, dest_size, filename);
179     } else {
180         p = strchr(base_path, ':');
181         if (p)
182             p++;
183         else
184             p = base_path;
185         p1 = strrchr(base_path, '/');
186 #ifdef _WIN32
187         {
188             const char *p2;
189             p2 = strrchr(base_path, '\\');
190             if (!p1 || p2 > p1)
191                 p1 = p2;
192         }
193 #endif
194         if (p1)
195             p1++;
196         else
197             p1 = base_path;
198         if (p1 > p)
199             p = p1;
200         len = p - base_path;
201         if (len > dest_size - 1)
202             len = dest_size - 1;
203         memcpy(dest, base_path, len);
204         dest[len] = '\0';
205         pstrcat(dest, dest_size, filename);
206     }
207 }
208 
209 void bdrv_get_full_backing_filename_from_filename(const char *backed,
210                                                   const char *backing,
211                                                   char *dest, size_t sz,
212                                                   Error **errp)
213 {
214     if (backing[0] == '\0' || path_has_protocol(backing) ||
215         path_is_absolute(backing))
216     {
217         pstrcpy(dest, sz, backing);
218     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
219         error_setg(errp, "Cannot use relative backing file names for '%s'",
220                    backed);
221     } else {
222         path_combine(dest, sz, backed, backing);
223     }
224 }
225 
226 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
227                                     Error **errp)
228 {
229     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
230 
231     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
232                                                  dest, sz, errp);
233 }
234 
235 void bdrv_register(BlockDriver *bdrv)
236 {
237     bdrv_setup_io_funcs(bdrv);
238 
239     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
240 }
241 
242 BlockDriverState *bdrv_new_root(void)
243 {
244     BlockDriverState *bs = bdrv_new();
245 
246     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
247     return bs;
248 }
249 
250 BlockDriverState *bdrv_new(void)
251 {
252     BlockDriverState *bs;
253     int i;
254 
255     bs = g_new0(BlockDriverState, 1);
256     QLIST_INIT(&bs->dirty_bitmaps);
257     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
258         QLIST_INIT(&bs->op_blockers[i]);
259     }
260     bdrv_iostatus_disable(bs);
261     notifier_list_init(&bs->close_notifiers);
262     notifier_with_return_list_init(&bs->before_write_notifiers);
263     qemu_co_queue_init(&bs->throttled_reqs[0]);
264     qemu_co_queue_init(&bs->throttled_reqs[1]);
265     bs->refcnt = 1;
266     bs->aio_context = qemu_get_aio_context();
267 
268     return bs;
269 }
270 
271 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
272 {
273     notifier_list_add(&bs->close_notifiers, notify);
274 }
275 
276 BlockDriver *bdrv_find_format(const char *format_name)
277 {
278     BlockDriver *drv1;
279     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
280         if (!strcmp(drv1->format_name, format_name)) {
281             return drv1;
282         }
283     }
284     return NULL;
285 }
286 
287 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
288 {
289     static const char *whitelist_rw[] = {
290         CONFIG_BDRV_RW_WHITELIST
291     };
292     static const char *whitelist_ro[] = {
293         CONFIG_BDRV_RO_WHITELIST
294     };
295     const char **p;
296 
297     if (!whitelist_rw[0] && !whitelist_ro[0]) {
298         return 1;               /* no whitelist, anything goes */
299     }
300 
301     for (p = whitelist_rw; *p; p++) {
302         if (!strcmp(drv->format_name, *p)) {
303             return 1;
304         }
305     }
306     if (read_only) {
307         for (p = whitelist_ro; *p; p++) {
308             if (!strcmp(drv->format_name, *p)) {
309                 return 1;
310             }
311         }
312     }
313     return 0;
314 }
315 
316 typedef struct CreateCo {
317     BlockDriver *drv;
318     char *filename;
319     QemuOpts *opts;
320     int ret;
321     Error *err;
322 } CreateCo;
323 
324 static void coroutine_fn bdrv_create_co_entry(void *opaque)
325 {
326     Error *local_err = NULL;
327     int ret;
328 
329     CreateCo *cco = opaque;
330     assert(cco->drv);
331 
332     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
333     if (local_err) {
334         error_propagate(&cco->err, local_err);
335     }
336     cco->ret = ret;
337 }
338 
339 int bdrv_create(BlockDriver *drv, const char* filename,
340                 QemuOpts *opts, Error **errp)
341 {
342     int ret;
343 
344     Coroutine *co;
345     CreateCo cco = {
346         .drv = drv,
347         .filename = g_strdup(filename),
348         .opts = opts,
349         .ret = NOT_DONE,
350         .err = NULL,
351     };
352 
353     if (!drv->bdrv_create) {
354         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
355         ret = -ENOTSUP;
356         goto out;
357     }
358 
359     if (qemu_in_coroutine()) {
360         /* Fast-path if already in coroutine context */
361         bdrv_create_co_entry(&cco);
362     } else {
363         co = qemu_coroutine_create(bdrv_create_co_entry);
364         qemu_coroutine_enter(co, &cco);
365         while (cco.ret == NOT_DONE) {
366             aio_poll(qemu_get_aio_context(), true);
367         }
368     }
369 
370     ret = cco.ret;
371     if (ret < 0) {
372         if (cco.err) {
373             error_propagate(errp, cco.err);
374         } else {
375             error_setg_errno(errp, -ret, "Could not create image");
376         }
377     }
378 
379 out:
380     g_free(cco.filename);
381     return ret;
382 }
383 
384 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
385 {
386     BlockDriver *drv;
387     Error *local_err = NULL;
388     int ret;
389 
390     drv = bdrv_find_protocol(filename, true, errp);
391     if (drv == NULL) {
392         return -ENOENT;
393     }
394 
395     ret = bdrv_create(drv, filename, opts, &local_err);
396     if (local_err) {
397         error_propagate(errp, local_err);
398     }
399     return ret;
400 }
401 
402 /**
403  * Try to get @bs's logical and physical block size.
404  * On success, store them in @bsz struct and return 0.
405  * On failure return -errno.
406  * @bs must not be empty.
407  */
408 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
409 {
410     BlockDriver *drv = bs->drv;
411 
412     if (drv && drv->bdrv_probe_blocksizes) {
413         return drv->bdrv_probe_blocksizes(bs, bsz);
414     }
415 
416     return -ENOTSUP;
417 }
418 
419 /**
420  * Try to get @bs's geometry (cyls, heads, sectors).
421  * On success, store them in @geo struct and return 0.
422  * On failure return -errno.
423  * @bs must not be empty.
424  */
425 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
426 {
427     BlockDriver *drv = bs->drv;
428 
429     if (drv && drv->bdrv_probe_geometry) {
430         return drv->bdrv_probe_geometry(bs, geo);
431     }
432 
433     return -ENOTSUP;
434 }
435 
436 /*
437  * Create a uniquely-named empty temporary file.
438  * Return 0 upon success, otherwise a negative errno value.
439  */
440 int get_tmp_filename(char *filename, int size)
441 {
442 #ifdef _WIN32
443     char temp_dir[MAX_PATH];
444     /* GetTempFileName requires that its output buffer (4th param)
445        have length MAX_PATH or greater.  */
446     assert(size >= MAX_PATH);
447     return (GetTempPath(MAX_PATH, temp_dir)
448             && GetTempFileName(temp_dir, "qem", 0, filename)
449             ? 0 : -GetLastError());
450 #else
451     int fd;
452     const char *tmpdir;
453     tmpdir = getenv("TMPDIR");
454     if (!tmpdir) {
455         tmpdir = "/var/tmp";
456     }
457     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
458         return -EOVERFLOW;
459     }
460     fd = mkstemp(filename);
461     if (fd < 0) {
462         return -errno;
463     }
464     if (close(fd) != 0) {
465         unlink(filename);
466         return -errno;
467     }
468     return 0;
469 #endif
470 }
471 
472 /*
473  * Detect host devices. By convention, /dev/cdrom[N] is always
474  * recognized as a host CDROM.
475  */
476 static BlockDriver *find_hdev_driver(const char *filename)
477 {
478     int score_max = 0, score;
479     BlockDriver *drv = NULL, *d;
480 
481     QLIST_FOREACH(d, &bdrv_drivers, list) {
482         if (d->bdrv_probe_device) {
483             score = d->bdrv_probe_device(filename);
484             if (score > score_max) {
485                 score_max = score;
486                 drv = d;
487             }
488         }
489     }
490 
491     return drv;
492 }
493 
494 BlockDriver *bdrv_find_protocol(const char *filename,
495                                 bool allow_protocol_prefix,
496                                 Error **errp)
497 {
498     BlockDriver *drv1;
499     char protocol[128];
500     int len;
501     const char *p;
502 
503     /* TODO Drivers without bdrv_file_open must be specified explicitly */
504 
505     /*
506      * XXX(hch): we really should not let host device detection
507      * override an explicit protocol specification, but moving this
508      * later breaks access to device names with colons in them.
509      * Thanks to the brain-dead persistent naming schemes on udev-
510      * based Linux systems those actually are quite common.
511      */
512     drv1 = find_hdev_driver(filename);
513     if (drv1) {
514         return drv1;
515     }
516 
517     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
518         return &bdrv_file;
519     }
520 
521     p = strchr(filename, ':');
522     assert(p != NULL);
523     len = p - filename;
524     if (len > sizeof(protocol) - 1)
525         len = sizeof(protocol) - 1;
526     memcpy(protocol, filename, len);
527     protocol[len] = '\0';
528     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
529         if (drv1->protocol_name &&
530             !strcmp(drv1->protocol_name, protocol)) {
531             return drv1;
532         }
533     }
534 
535     error_setg(errp, "Unknown protocol '%s'", protocol);
536     return NULL;
537 }
538 
539 /*
540  * Guess image format by probing its contents.
541  * This is not a good idea when your image is raw (CVE-2008-2004), but
542  * we do it anyway for backward compatibility.
543  *
544  * @buf         contains the image's first @buf_size bytes.
545  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
546  *              but can be smaller if the image file is smaller)
547  * @filename    is its filename.
548  *
549  * For all block drivers, call the bdrv_probe() method to get its
550  * probing score.
551  * Return the first block driver with the highest probing score.
552  */
553 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
554                             const char *filename)
555 {
556     int score_max = 0, score;
557     BlockDriver *drv = NULL, *d;
558 
559     QLIST_FOREACH(d, &bdrv_drivers, list) {
560         if (d->bdrv_probe) {
561             score = d->bdrv_probe(buf, buf_size, filename);
562             if (score > score_max) {
563                 score_max = score;
564                 drv = d;
565             }
566         }
567     }
568 
569     return drv;
570 }
571 
572 static int find_image_format(BlockDriverState *bs, const char *filename,
573                              BlockDriver **pdrv, Error **errp)
574 {
575     BlockDriver *drv;
576     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
577     int ret = 0;
578 
579     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
580     if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
581         *pdrv = &bdrv_raw;
582         return ret;
583     }
584 
585     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
586     if (ret < 0) {
587         error_setg_errno(errp, -ret, "Could not read image for determining its "
588                          "format");
589         *pdrv = NULL;
590         return ret;
591     }
592 
593     drv = bdrv_probe_all(buf, ret, filename);
594     if (!drv) {
595         error_setg(errp, "Could not determine image format: No compatible "
596                    "driver found");
597         ret = -ENOENT;
598     }
599     *pdrv = drv;
600     return ret;
601 }
602 
603 /**
604  * Set the current 'total_sectors' value
605  * Return 0 on success, -errno on error.
606  */
607 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
608 {
609     BlockDriver *drv = bs->drv;
610 
611     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
612     if (bdrv_is_sg(bs))
613         return 0;
614 
615     /* query actual device if possible, otherwise just trust the hint */
616     if (drv->bdrv_getlength) {
617         int64_t length = drv->bdrv_getlength(bs);
618         if (length < 0) {
619             return length;
620         }
621         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
622     }
623 
624     bs->total_sectors = hint;
625     return 0;
626 }
627 
628 /**
629  * Set open flags for a given discard mode
630  *
631  * Return 0 on success, -1 if the discard mode was invalid.
632  */
633 int bdrv_parse_discard_flags(const char *mode, int *flags)
634 {
635     *flags &= ~BDRV_O_UNMAP;
636 
637     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
638         /* do nothing */
639     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
640         *flags |= BDRV_O_UNMAP;
641     } else {
642         return -1;
643     }
644 
645     return 0;
646 }
647 
648 /**
649  * Set open flags for a given cache mode
650  *
651  * Return 0 on success, -1 if the cache mode was invalid.
652  */
653 int bdrv_parse_cache_flags(const char *mode, int *flags)
654 {
655     *flags &= ~BDRV_O_CACHE_MASK;
656 
657     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
658         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
659     } else if (!strcmp(mode, "directsync")) {
660         *flags |= BDRV_O_NOCACHE;
661     } else if (!strcmp(mode, "writeback")) {
662         *flags |= BDRV_O_CACHE_WB;
663     } else if (!strcmp(mode, "unsafe")) {
664         *flags |= BDRV_O_CACHE_WB;
665         *flags |= BDRV_O_NO_FLUSH;
666     } else if (!strcmp(mode, "writethrough")) {
667         /* this is the default */
668     } else {
669         return -1;
670     }
671 
672     return 0;
673 }
674 
675 /*
676  * Returns the flags that a temporary snapshot should get, based on the
677  * originally requested flags (the originally requested image will have flags
678  * like a backing file)
679  */
680 static int bdrv_temp_snapshot_flags(int flags)
681 {
682     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
683 }
684 
685 /*
686  * Returns the flags that bs->file should get if a protocol driver is expected,
687  * based on the given flags for the parent BDS
688  */
689 static int bdrv_inherited_flags(int flags)
690 {
691     /* Enable protocol handling, disable format probing for bs->file */
692     flags |= BDRV_O_PROTOCOL;
693 
694     /* Our block drivers take care to send flushes and respect unmap policy,
695      * so we can enable both unconditionally on lower layers. */
696     flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
697 
698     /* Clear flags that only apply to the top layer */
699     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
700 
701     return flags;
702 }
703 
704 const BdrvChildRole child_file = {
705     .inherit_flags = bdrv_inherited_flags,
706 };
707 
708 /*
709  * Returns the flags that bs->file should get if the use of formats (and not
710  * only protocols) is permitted for it, based on the given flags for the parent
711  * BDS
712  */
713 static int bdrv_inherited_fmt_flags(int parent_flags)
714 {
715     int flags = child_file.inherit_flags(parent_flags);
716     return flags & ~BDRV_O_PROTOCOL;
717 }
718 
719 const BdrvChildRole child_format = {
720     .inherit_flags = bdrv_inherited_fmt_flags,
721 };
722 
723 /*
724  * Returns the flags that bs->backing should get, based on the given flags
725  * for the parent BDS
726  */
727 static int bdrv_backing_flags(int flags)
728 {
729     /* backing files always opened read-only */
730     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
731 
732     /* snapshot=on is handled on the top layer */
733     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
734 
735     return flags;
736 }
737 
738 static const BdrvChildRole child_backing = {
739     .inherit_flags = bdrv_backing_flags,
740 };
741 
742 static int bdrv_open_flags(BlockDriverState *bs, int flags)
743 {
744     int open_flags = flags | BDRV_O_CACHE_WB;
745 
746     /*
747      * Clear flags that are internal to the block layer before opening the
748      * image.
749      */
750     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
751 
752     /*
753      * Snapshots should be writable.
754      */
755     if (flags & BDRV_O_TEMPORARY) {
756         open_flags |= BDRV_O_RDWR;
757     }
758 
759     return open_flags;
760 }
761 
762 static void bdrv_assign_node_name(BlockDriverState *bs,
763                                   const char *node_name,
764                                   Error **errp)
765 {
766     if (!node_name) {
767         return;
768     }
769 
770     /* Check for empty string or invalid characters */
771     if (!id_wellformed(node_name)) {
772         error_setg(errp, "Invalid node name");
773         return;
774     }
775 
776     /* takes care of avoiding namespaces collisions */
777     if (blk_by_name(node_name)) {
778         error_setg(errp, "node-name=%s is conflicting with a device id",
779                    node_name);
780         return;
781     }
782 
783     /* takes care of avoiding duplicates node names */
784     if (bdrv_find_node(node_name)) {
785         error_setg(errp, "Duplicate node name");
786         return;
787     }
788 
789     /* copy node name into the bs and insert it into the graph list */
790     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
791     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
792 }
793 
794 static QemuOptsList bdrv_runtime_opts = {
795     .name = "bdrv_common",
796     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
797     .desc = {
798         {
799             .name = "node-name",
800             .type = QEMU_OPT_STRING,
801             .help = "Node name of the block device node",
802         },
803         { /* end of list */ }
804     },
805 };
806 
807 /*
808  * Common part for opening disk images and files
809  *
810  * Removes all processed options from *options.
811  */
812 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
813     QDict *options, int flags, BlockDriver *drv, Error **errp)
814 {
815     int ret, open_flags;
816     const char *filename;
817     const char *node_name = NULL;
818     QemuOpts *opts;
819     Error *local_err = NULL;
820 
821     assert(drv != NULL);
822     assert(bs->file == NULL);
823     assert(options != NULL && bs->options != options);
824 
825     if (file != NULL) {
826         filename = file->bs->filename;
827     } else {
828         filename = qdict_get_try_str(options, "filename");
829     }
830 
831     if (drv->bdrv_needs_filename && !filename) {
832         error_setg(errp, "The '%s' block driver requires a file name",
833                    drv->format_name);
834         return -EINVAL;
835     }
836 
837     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
838 
839     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
840     qemu_opts_absorb_qdict(opts, options, &local_err);
841     if (local_err) {
842         error_propagate(errp, local_err);
843         ret = -EINVAL;
844         goto fail_opts;
845     }
846 
847     node_name = qemu_opt_get(opts, "node-name");
848     bdrv_assign_node_name(bs, node_name, &local_err);
849     if (local_err) {
850         error_propagate(errp, local_err);
851         ret = -EINVAL;
852         goto fail_opts;
853     }
854 
855     bs->guest_block_size = 512;
856     bs->request_alignment = 512;
857     bs->zero_beyond_eof = true;
858     open_flags = bdrv_open_flags(bs, flags);
859     bs->read_only = !(open_flags & BDRV_O_RDWR);
860 
861     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
862         error_setg(errp,
863                    !bs->read_only && bdrv_is_whitelisted(drv, true)
864                         ? "Driver '%s' can only be used for read-only devices"
865                         : "Driver '%s' is not whitelisted",
866                    drv->format_name);
867         ret = -ENOTSUP;
868         goto fail_opts;
869     }
870 
871     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
872     if (flags & BDRV_O_COPY_ON_READ) {
873         if (!bs->read_only) {
874             bdrv_enable_copy_on_read(bs);
875         } else {
876             error_setg(errp, "Can't use copy-on-read on read-only device");
877             ret = -EINVAL;
878             goto fail_opts;
879         }
880     }
881 
882     if (filename != NULL) {
883         pstrcpy(bs->filename, sizeof(bs->filename), filename);
884     } else {
885         bs->filename[0] = '\0';
886     }
887     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
888 
889     bs->drv = drv;
890     bs->opaque = g_malloc0(drv->instance_size);
891 
892     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
893 
894     /* Open the image, either directly or using a protocol */
895     if (drv->bdrv_file_open) {
896         assert(file == NULL);
897         assert(!drv->bdrv_needs_filename || filename != NULL);
898         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
899     } else {
900         if (file == NULL) {
901             error_setg(errp, "Can't use '%s' as a block driver for the "
902                        "protocol level", drv->format_name);
903             ret = -EINVAL;
904             goto free_and_fail;
905         }
906         bs->file = file;
907         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
908     }
909 
910     if (ret < 0) {
911         if (local_err) {
912             error_propagate(errp, local_err);
913         } else if (bs->filename[0]) {
914             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
915         } else {
916             error_setg_errno(errp, -ret, "Could not open image");
917         }
918         goto free_and_fail;
919     }
920 
921     if (bs->encrypted) {
922         error_report("Encrypted images are deprecated");
923         error_printf("Support for them will be removed in a future release.\n"
924                      "You can use 'qemu-img convert' to convert your image"
925                      " to an unencrypted one.\n");
926     }
927 
928     ret = refresh_total_sectors(bs, bs->total_sectors);
929     if (ret < 0) {
930         error_setg_errno(errp, -ret, "Could not refresh total sector count");
931         goto free_and_fail;
932     }
933 
934     bdrv_refresh_limits(bs, &local_err);
935     if (local_err) {
936         error_propagate(errp, local_err);
937         ret = -EINVAL;
938         goto free_and_fail;
939     }
940 
941     assert(bdrv_opt_mem_align(bs) != 0);
942     assert(bdrv_min_mem_align(bs) != 0);
943     assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
944 
945     qemu_opts_del(opts);
946     return 0;
947 
948 free_and_fail:
949     bs->file = NULL;
950     g_free(bs->opaque);
951     bs->opaque = NULL;
952     bs->drv = NULL;
953 fail_opts:
954     qemu_opts_del(opts);
955     return ret;
956 }
957 
958 static QDict *parse_json_filename(const char *filename, Error **errp)
959 {
960     QObject *options_obj;
961     QDict *options;
962     int ret;
963 
964     ret = strstart(filename, "json:", &filename);
965     assert(ret);
966 
967     options_obj = qobject_from_json(filename);
968     if (!options_obj) {
969         error_setg(errp, "Could not parse the JSON options");
970         return NULL;
971     }
972 
973     if (qobject_type(options_obj) != QTYPE_QDICT) {
974         qobject_decref(options_obj);
975         error_setg(errp, "Invalid JSON object given");
976         return NULL;
977     }
978 
979     options = qobject_to_qdict(options_obj);
980     qdict_flatten(options);
981 
982     return options;
983 }
984 
985 /*
986  * Fills in default options for opening images and converts the legacy
987  * filename/flags pair to option QDict entries.
988  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
989  * block driver has been specified explicitly.
990  */
991 static int bdrv_fill_options(QDict **options, const char **pfilename,
992                              int *flags, Error **errp)
993 {
994     const char *filename = *pfilename;
995     const char *drvname;
996     bool protocol = *flags & BDRV_O_PROTOCOL;
997     bool parse_filename = false;
998     BlockDriver *drv = NULL;
999     Error *local_err = NULL;
1000 
1001     /* Parse json: pseudo-protocol */
1002     if (filename && g_str_has_prefix(filename, "json:")) {
1003         QDict *json_options = parse_json_filename(filename, &local_err);
1004         if (local_err) {
1005             error_propagate(errp, local_err);
1006             return -EINVAL;
1007         }
1008 
1009         /* Options given in the filename have lower priority than options
1010          * specified directly */
1011         qdict_join(*options, json_options, false);
1012         QDECREF(json_options);
1013         *pfilename = filename = NULL;
1014     }
1015 
1016     drvname = qdict_get_try_str(*options, "driver");
1017     if (drvname) {
1018         drv = bdrv_find_format(drvname);
1019         if (!drv) {
1020             error_setg(errp, "Unknown driver '%s'", drvname);
1021             return -ENOENT;
1022         }
1023         /* If the user has explicitly specified the driver, this choice should
1024          * override the BDRV_O_PROTOCOL flag */
1025         protocol = drv->bdrv_file_open;
1026     }
1027 
1028     if (protocol) {
1029         *flags |= BDRV_O_PROTOCOL;
1030     } else {
1031         *flags &= ~BDRV_O_PROTOCOL;
1032     }
1033 
1034     /* Fetch the file name from the options QDict if necessary */
1035     if (protocol && filename) {
1036         if (!qdict_haskey(*options, "filename")) {
1037             qdict_put(*options, "filename", qstring_from_str(filename));
1038             parse_filename = true;
1039         } else {
1040             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1041                              "the same time");
1042             return -EINVAL;
1043         }
1044     }
1045 
1046     /* Find the right block driver */
1047     filename = qdict_get_try_str(*options, "filename");
1048 
1049     if (!drvname && protocol) {
1050         if (filename) {
1051             drv = bdrv_find_protocol(filename, parse_filename, errp);
1052             if (!drv) {
1053                 return -EINVAL;
1054             }
1055 
1056             drvname = drv->format_name;
1057             qdict_put(*options, "driver", qstring_from_str(drvname));
1058         } else {
1059             error_setg(errp, "Must specify either driver or file");
1060             return -EINVAL;
1061         }
1062     }
1063 
1064     assert(drv || !protocol);
1065 
1066     /* Driver-specific filename parsing */
1067     if (drv && drv->bdrv_parse_filename && parse_filename) {
1068         drv->bdrv_parse_filename(filename, *options, &local_err);
1069         if (local_err) {
1070             error_propagate(errp, local_err);
1071             return -EINVAL;
1072         }
1073 
1074         if (!drv->bdrv_needs_filename) {
1075             qdict_del(*options, "filename");
1076         }
1077     }
1078 
1079     return 0;
1080 }
1081 
1082 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1083                                     BlockDriverState *child_bs,
1084                                     const BdrvChildRole *child_role)
1085 {
1086     BdrvChild *child = g_new(BdrvChild, 1);
1087     *child = (BdrvChild) {
1088         .bs     = child_bs,
1089         .role   = child_role,
1090     };
1091 
1092     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1093     QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1094 
1095     return child;
1096 }
1097 
1098 static void bdrv_detach_child(BdrvChild *child)
1099 {
1100     QLIST_REMOVE(child, next);
1101     QLIST_REMOVE(child, next_parent);
1102     g_free(child);
1103 }
1104 
1105 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1106 {
1107     BlockDriverState *child_bs;
1108 
1109     if (child == NULL) {
1110         return;
1111     }
1112 
1113     if (child->bs->inherits_from == parent) {
1114         child->bs->inherits_from = NULL;
1115     }
1116 
1117     child_bs = child->bs;
1118     bdrv_detach_child(child);
1119     bdrv_unref(child_bs);
1120 }
1121 
1122 /*
1123  * Sets the backing file link of a BDS. A new reference is created; callers
1124  * which don't need their own reference any more must call bdrv_unref().
1125  */
1126 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1127 {
1128     if (backing_hd) {
1129         bdrv_ref(backing_hd);
1130     }
1131 
1132     if (bs->backing) {
1133         assert(bs->backing_blocker);
1134         bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1135         bdrv_unref_child(bs, bs->backing);
1136     } else if (backing_hd) {
1137         error_setg(&bs->backing_blocker,
1138                    "node is used as backing hd of '%s'",
1139                    bdrv_get_device_or_node_name(bs));
1140     }
1141 
1142     if (!backing_hd) {
1143         error_free(bs->backing_blocker);
1144         bs->backing_blocker = NULL;
1145         bs->backing = NULL;
1146         goto out;
1147     }
1148     bs->backing = bdrv_attach_child(bs, backing_hd, &child_backing);
1149     bs->open_flags &= ~BDRV_O_NO_BACKING;
1150     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1151     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1152             backing_hd->drv ? backing_hd->drv->format_name : "");
1153 
1154     bdrv_op_block_all(backing_hd, bs->backing_blocker);
1155     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1156     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1157                     bs->backing_blocker);
1158 out:
1159     bdrv_refresh_limits(bs, NULL);
1160 }
1161 
1162 /*
1163  * Opens the backing file for a BlockDriverState if not yet open
1164  *
1165  * options is a QDict of options to pass to the block drivers, or NULL for an
1166  * empty set of options. The reference to the QDict is transferred to this
1167  * function (even on failure), so if the caller intends to reuse the dictionary,
1168  * it needs to use QINCREF() before calling bdrv_file_open.
1169  */
1170 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1171 {
1172     char *backing_filename = g_malloc0(PATH_MAX);
1173     int ret = 0;
1174     BlockDriverState *backing_hd;
1175     Error *local_err = NULL;
1176 
1177     if (bs->backing != NULL) {
1178         QDECREF(options);
1179         goto free_exit;
1180     }
1181 
1182     /* NULL means an empty set of options */
1183     if (options == NULL) {
1184         options = qdict_new();
1185     }
1186 
1187     bs->open_flags &= ~BDRV_O_NO_BACKING;
1188     if (qdict_haskey(options, "file.filename")) {
1189         backing_filename[0] = '\0';
1190     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1191         QDECREF(options);
1192         goto free_exit;
1193     } else {
1194         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1195                                        &local_err);
1196         if (local_err) {
1197             ret = -EINVAL;
1198             error_propagate(errp, local_err);
1199             QDECREF(options);
1200             goto free_exit;
1201         }
1202     }
1203 
1204     if (!bs->drv || !bs->drv->supports_backing) {
1205         ret = -EINVAL;
1206         error_setg(errp, "Driver doesn't support backing files");
1207         QDECREF(options);
1208         goto free_exit;
1209     }
1210 
1211     backing_hd = bdrv_new();
1212 
1213     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1214         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1215     }
1216 
1217     assert(bs->backing == NULL);
1218     ret = bdrv_open_inherit(&backing_hd,
1219                             *backing_filename ? backing_filename : NULL,
1220                             NULL, options, 0, bs, &child_backing, &local_err);
1221     if (ret < 0) {
1222         bdrv_unref(backing_hd);
1223         backing_hd = NULL;
1224         bs->open_flags |= BDRV_O_NO_BACKING;
1225         error_setg(errp, "Could not open backing file: %s",
1226                    error_get_pretty(local_err));
1227         error_free(local_err);
1228         goto free_exit;
1229     }
1230 
1231     /* Hook up the backing file link; drop our reference, bs owns the
1232      * backing_hd reference now */
1233     bdrv_set_backing_hd(bs, backing_hd);
1234     bdrv_unref(backing_hd);
1235 
1236 free_exit:
1237     g_free(backing_filename);
1238     return ret;
1239 }
1240 
1241 /*
1242  * Opens a disk image whose options are given as BlockdevRef in another block
1243  * device's options.
1244  *
1245  * If allow_none is true, no image will be opened if filename is false and no
1246  * BlockdevRef is given. NULL will be returned, but errp remains unset.
1247  *
1248  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1249  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1250  * itself, all options starting with "${bdref_key}." are considered part of the
1251  * BlockdevRef.
1252  *
1253  * The BlockdevRef will be removed from the options QDict.
1254  */
1255 BdrvChild *bdrv_open_child(const char *filename,
1256                            QDict *options, const char *bdref_key,
1257                            BlockDriverState* parent,
1258                            const BdrvChildRole *child_role,
1259                            bool allow_none, Error **errp)
1260 {
1261     BdrvChild *c = NULL;
1262     BlockDriverState *bs;
1263     QDict *image_options;
1264     int ret;
1265     char *bdref_key_dot;
1266     const char *reference;
1267 
1268     assert(child_role != NULL);
1269 
1270     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1271     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1272     g_free(bdref_key_dot);
1273 
1274     reference = qdict_get_try_str(options, bdref_key);
1275     if (!filename && !reference && !qdict_size(image_options)) {
1276         if (!allow_none) {
1277             error_setg(errp, "A block device must be specified for \"%s\"",
1278                        bdref_key);
1279         }
1280         QDECREF(image_options);
1281         goto done;
1282     }
1283 
1284     bs = NULL;
1285     ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1286                             parent, child_role, errp);
1287     if (ret < 0) {
1288         goto done;
1289     }
1290 
1291     c = bdrv_attach_child(parent, bs, child_role);
1292 
1293 done:
1294     qdict_del(options, bdref_key);
1295     return c;
1296 }
1297 
1298 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1299 {
1300     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1301     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1302     int64_t total_size;
1303     QemuOpts *opts = NULL;
1304     QDict *snapshot_options;
1305     BlockDriverState *bs_snapshot;
1306     Error *local_err = NULL;
1307     int ret;
1308 
1309     /* if snapshot, we create a temporary backing file and open it
1310        instead of opening 'filename' directly */
1311 
1312     /* Get the required size from the image */
1313     total_size = bdrv_getlength(bs);
1314     if (total_size < 0) {
1315         ret = total_size;
1316         error_setg_errno(errp, -total_size, "Could not get image size");
1317         goto out;
1318     }
1319 
1320     /* Create the temporary image */
1321     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1322     if (ret < 0) {
1323         error_setg_errno(errp, -ret, "Could not get temporary filename");
1324         goto out;
1325     }
1326 
1327     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1328                             &error_abort);
1329     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1330     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1331     qemu_opts_del(opts);
1332     if (ret < 0) {
1333         error_setg_errno(errp, -ret, "Could not create temporary overlay "
1334                          "'%s': %s", tmp_filename,
1335                          error_get_pretty(local_err));
1336         error_free(local_err);
1337         goto out;
1338     }
1339 
1340     /* Prepare a new options QDict for the temporary file */
1341     snapshot_options = qdict_new();
1342     qdict_put(snapshot_options, "file.driver",
1343               qstring_from_str("file"));
1344     qdict_put(snapshot_options, "file.filename",
1345               qstring_from_str(tmp_filename));
1346     qdict_put(snapshot_options, "driver",
1347               qstring_from_str("qcow2"));
1348 
1349     bs_snapshot = bdrv_new();
1350 
1351     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1352                     flags, &local_err);
1353     if (ret < 0) {
1354         error_propagate(errp, local_err);
1355         goto out;
1356     }
1357 
1358     bdrv_append(bs_snapshot, bs);
1359 
1360 out:
1361     g_free(tmp_filename);
1362     return ret;
1363 }
1364 
1365 /*
1366  * Opens a disk image (raw, qcow2, vmdk, ...)
1367  *
1368  * options is a QDict of options to pass to the block drivers, or NULL for an
1369  * empty set of options. The reference to the QDict belongs to the block layer
1370  * after the call (even on failure), so if the caller intends to reuse the
1371  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1372  *
1373  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1374  * If it is not NULL, the referenced BDS will be reused.
1375  *
1376  * The reference parameter may be used to specify an existing block device which
1377  * should be opened. If specified, neither options nor a filename may be given,
1378  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1379  */
1380 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1381                              const char *reference, QDict *options, int flags,
1382                              BlockDriverState *parent,
1383                              const BdrvChildRole *child_role, Error **errp)
1384 {
1385     int ret;
1386     BdrvChild *file = NULL;
1387     BlockDriverState *bs;
1388     BlockDriver *drv = NULL;
1389     const char *drvname;
1390     Error *local_err = NULL;
1391     int snapshot_flags = 0;
1392 
1393     assert(pbs);
1394     assert(!child_role || !flags);
1395     assert(!child_role == !parent);
1396 
1397     if (reference) {
1398         bool options_non_empty = options ? qdict_size(options) : false;
1399         QDECREF(options);
1400 
1401         if (*pbs) {
1402             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1403                        "another block device");
1404             return -EINVAL;
1405         }
1406 
1407         if (filename || options_non_empty) {
1408             error_setg(errp, "Cannot reference an existing block device with "
1409                        "additional options or a new filename");
1410             return -EINVAL;
1411         }
1412 
1413         bs = bdrv_lookup_bs(reference, reference, errp);
1414         if (!bs) {
1415             return -ENODEV;
1416         }
1417         bdrv_ref(bs);
1418         *pbs = bs;
1419         return 0;
1420     }
1421 
1422     if (*pbs) {
1423         bs = *pbs;
1424     } else {
1425         bs = bdrv_new();
1426     }
1427 
1428     /* NULL means an empty set of options */
1429     if (options == NULL) {
1430         options = qdict_new();
1431     }
1432 
1433     if (child_role) {
1434         bs->inherits_from = parent;
1435         flags = child_role->inherit_flags(parent->open_flags);
1436     }
1437 
1438     ret = bdrv_fill_options(&options, &filename, &flags, &local_err);
1439     if (local_err) {
1440         goto fail;
1441     }
1442 
1443     /* Find the right image format driver */
1444     drvname = qdict_get_try_str(options, "driver");
1445     if (drvname) {
1446         drv = bdrv_find_format(drvname);
1447         qdict_del(options, "driver");
1448         if (!drv) {
1449             error_setg(errp, "Unknown driver: '%s'", drvname);
1450             ret = -EINVAL;
1451             goto fail;
1452         }
1453     }
1454 
1455     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1456 
1457     bs->open_flags = flags;
1458     bs->options = options;
1459     options = qdict_clone_shallow(options);
1460 
1461     /* Open image file without format layer */
1462     if ((flags & BDRV_O_PROTOCOL) == 0) {
1463         if (flags & BDRV_O_RDWR) {
1464             flags |= BDRV_O_ALLOW_RDWR;
1465         }
1466         if (flags & BDRV_O_SNAPSHOT) {
1467             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1468             flags = bdrv_backing_flags(flags);
1469         }
1470 
1471         bs->open_flags = flags;
1472 
1473         file = bdrv_open_child(filename, options, "file", bs,
1474                                &child_file, true, &local_err);
1475         if (local_err) {
1476             ret = -EINVAL;
1477             goto fail;
1478         }
1479     }
1480 
1481     /* Image format probing */
1482     bs->probed = !drv;
1483     if (!drv && file) {
1484         ret = find_image_format(file->bs, filename, &drv, &local_err);
1485         if (ret < 0) {
1486             goto fail;
1487         }
1488     } else if (!drv) {
1489         error_setg(errp, "Must specify either driver or file");
1490         ret = -EINVAL;
1491         goto fail;
1492     }
1493 
1494     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1495     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1496     /* file must be NULL if a protocol BDS is about to be created
1497      * (the inverse results in an error message from bdrv_open_common()) */
1498     assert(!(flags & BDRV_O_PROTOCOL) || !file);
1499 
1500     /* Open the image */
1501     ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1502     if (ret < 0) {
1503         goto fail;
1504     }
1505 
1506     if (file && (bs->file != file)) {
1507         bdrv_unref_child(bs, file);
1508         file = NULL;
1509     }
1510 
1511     /* If there is a backing file, use it */
1512     if ((flags & BDRV_O_NO_BACKING) == 0) {
1513         QDict *backing_options;
1514 
1515         qdict_extract_subqdict(options, &backing_options, "backing.");
1516         ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1517         if (ret < 0) {
1518             goto close_and_fail;
1519         }
1520     }
1521 
1522     bdrv_refresh_filename(bs);
1523 
1524     /* Check if any unknown options were used */
1525     if (options && (qdict_size(options) != 0)) {
1526         const QDictEntry *entry = qdict_first(options);
1527         if (flags & BDRV_O_PROTOCOL) {
1528             error_setg(errp, "Block protocol '%s' doesn't support the option "
1529                        "'%s'", drv->format_name, entry->key);
1530         } else {
1531             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1532                        "support the option '%s'", drv->format_name,
1533                        bdrv_get_device_name(bs), entry->key);
1534         }
1535 
1536         ret = -EINVAL;
1537         goto close_and_fail;
1538     }
1539 
1540     if (!bdrv_key_required(bs)) {
1541         if (bs->blk) {
1542             blk_dev_change_media_cb(bs->blk, true);
1543         }
1544     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1545                && !runstate_check(RUN_STATE_INMIGRATE)
1546                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1547         error_setg(errp,
1548                    "Guest must be stopped for opening of encrypted image");
1549         ret = -EBUSY;
1550         goto close_and_fail;
1551     }
1552 
1553     QDECREF(options);
1554     *pbs = bs;
1555 
1556     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1557      * temporary snapshot afterwards. */
1558     if (snapshot_flags) {
1559         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1560         if (local_err) {
1561             goto close_and_fail;
1562         }
1563     }
1564 
1565     return 0;
1566 
1567 fail:
1568     if (file != NULL) {
1569         bdrv_unref_child(bs, file);
1570     }
1571     QDECREF(bs->options);
1572     QDECREF(options);
1573     bs->options = NULL;
1574     if (!*pbs) {
1575         /* If *pbs is NULL, a new BDS has been created in this function and
1576            needs to be freed now. Otherwise, it does not need to be closed,
1577            since it has not really been opened yet. */
1578         bdrv_unref(bs);
1579     }
1580     if (local_err) {
1581         error_propagate(errp, local_err);
1582     }
1583     return ret;
1584 
1585 close_and_fail:
1586     /* See fail path, but now the BDS has to be always closed */
1587     if (*pbs) {
1588         bdrv_close(bs);
1589     } else {
1590         bdrv_unref(bs);
1591     }
1592     QDECREF(options);
1593     if (local_err) {
1594         error_propagate(errp, local_err);
1595     }
1596     return ret;
1597 }
1598 
1599 int bdrv_open(BlockDriverState **pbs, const char *filename,
1600               const char *reference, QDict *options, int flags, Error **errp)
1601 {
1602     return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1603                              NULL, errp);
1604 }
1605 
1606 typedef struct BlockReopenQueueEntry {
1607      bool prepared;
1608      BDRVReopenState state;
1609      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1610 } BlockReopenQueueEntry;
1611 
1612 /*
1613  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1614  * reopen of multiple devices.
1615  *
1616  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1617  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1618  * be created and initialized. This newly created BlockReopenQueue should be
1619  * passed back in for subsequent calls that are intended to be of the same
1620  * atomic 'set'.
1621  *
1622  * bs is the BlockDriverState to add to the reopen queue.
1623  *
1624  * options contains the changed options for the associated bs
1625  * (the BlockReopenQueue takes ownership)
1626  *
1627  * flags contains the open flags for the associated bs
1628  *
1629  * returns a pointer to bs_queue, which is either the newly allocated
1630  * bs_queue, or the existing bs_queue being used.
1631  *
1632  */
1633 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1634                                     BlockDriverState *bs,
1635                                     QDict *options, int flags)
1636 {
1637     assert(bs != NULL);
1638 
1639     BlockReopenQueueEntry *bs_entry;
1640     BdrvChild *child;
1641     QDict *old_options;
1642 
1643     if (bs_queue == NULL) {
1644         bs_queue = g_new0(BlockReopenQueue, 1);
1645         QSIMPLEQ_INIT(bs_queue);
1646     }
1647 
1648     if (!options) {
1649         options = qdict_new();
1650     }
1651 
1652     old_options = qdict_clone_shallow(bs->options);
1653     qdict_join(options, old_options, false);
1654     QDECREF(old_options);
1655 
1656     /* bdrv_open() masks this flag out */
1657     flags &= ~BDRV_O_PROTOCOL;
1658 
1659     QLIST_FOREACH(child, &bs->children, next) {
1660         int child_flags;
1661 
1662         if (child->bs->inherits_from != bs) {
1663             continue;
1664         }
1665 
1666         child_flags = child->role->inherit_flags(flags);
1667         /* TODO Pass down child flags (backing.*, extents.*, ...) */
1668         bdrv_reopen_queue(bs_queue, child->bs, NULL, child_flags);
1669     }
1670 
1671     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1672     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1673 
1674     bs_entry->state.bs = bs;
1675     bs_entry->state.options = options;
1676     bs_entry->state.flags = flags;
1677 
1678     return bs_queue;
1679 }
1680 
1681 /*
1682  * Reopen multiple BlockDriverStates atomically & transactionally.
1683  *
1684  * The queue passed in (bs_queue) must have been built up previous
1685  * via bdrv_reopen_queue().
1686  *
1687  * Reopens all BDS specified in the queue, with the appropriate
1688  * flags.  All devices are prepared for reopen, and failure of any
1689  * device will cause all device changes to be abandonded, and intermediate
1690  * data cleaned up.
1691  *
1692  * If all devices prepare successfully, then the changes are committed
1693  * to all devices.
1694  *
1695  */
1696 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1697 {
1698     int ret = -1;
1699     BlockReopenQueueEntry *bs_entry, *next;
1700     Error *local_err = NULL;
1701 
1702     assert(bs_queue != NULL);
1703 
1704     bdrv_drain_all();
1705 
1706     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1707         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1708             error_propagate(errp, local_err);
1709             goto cleanup;
1710         }
1711         bs_entry->prepared = true;
1712     }
1713 
1714     /* If we reach this point, we have success and just need to apply the
1715      * changes
1716      */
1717     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1718         bdrv_reopen_commit(&bs_entry->state);
1719     }
1720 
1721     ret = 0;
1722 
1723 cleanup:
1724     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1725         if (ret && bs_entry->prepared) {
1726             bdrv_reopen_abort(&bs_entry->state);
1727         }
1728         QDECREF(bs_entry->state.options);
1729         g_free(bs_entry);
1730     }
1731     g_free(bs_queue);
1732     return ret;
1733 }
1734 
1735 
1736 /* Reopen a single BlockDriverState with the specified flags. */
1737 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1738 {
1739     int ret = -1;
1740     Error *local_err = NULL;
1741     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1742 
1743     ret = bdrv_reopen_multiple(queue, &local_err);
1744     if (local_err != NULL) {
1745         error_propagate(errp, local_err);
1746     }
1747     return ret;
1748 }
1749 
1750 
1751 /*
1752  * Prepares a BlockDriverState for reopen. All changes are staged in the
1753  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1754  * the block driver layer .bdrv_reopen_prepare()
1755  *
1756  * bs is the BlockDriverState to reopen
1757  * flags are the new open flags
1758  * queue is the reopen queue
1759  *
1760  * Returns 0 on success, non-zero on error.  On error errp will be set
1761  * as well.
1762  *
1763  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1764  * It is the responsibility of the caller to then call the abort() or
1765  * commit() for any other BDS that have been left in a prepare() state
1766  *
1767  */
1768 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1769                         Error **errp)
1770 {
1771     int ret = -1;
1772     Error *local_err = NULL;
1773     BlockDriver *drv;
1774 
1775     assert(reopen_state != NULL);
1776     assert(reopen_state->bs->drv != NULL);
1777     drv = reopen_state->bs->drv;
1778 
1779     /* if we are to stay read-only, do not allow permission change
1780      * to r/w */
1781     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1782         reopen_state->flags & BDRV_O_RDWR) {
1783         error_setg(errp, "Node '%s' is read only",
1784                    bdrv_get_device_or_node_name(reopen_state->bs));
1785         goto error;
1786     }
1787 
1788 
1789     ret = bdrv_flush(reopen_state->bs);
1790     if (ret) {
1791         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1792                   strerror(-ret));
1793         goto error;
1794     }
1795 
1796     if (drv->bdrv_reopen_prepare) {
1797         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1798         if (ret) {
1799             if (local_err != NULL) {
1800                 error_propagate(errp, local_err);
1801             } else {
1802                 error_setg(errp, "failed while preparing to reopen image '%s'",
1803                            reopen_state->bs->filename);
1804             }
1805             goto error;
1806         }
1807     } else {
1808         /* It is currently mandatory to have a bdrv_reopen_prepare()
1809          * handler for each supported drv. */
1810         error_setg(errp, "Block format '%s' used by node '%s' "
1811                    "does not support reopening files", drv->format_name,
1812                    bdrv_get_device_or_node_name(reopen_state->bs));
1813         ret = -1;
1814         goto error;
1815     }
1816 
1817     /* Options that are not handled are only okay if they are unchanged
1818      * compared to the old state. It is expected that some options are only
1819      * used for the initial open, but not reopen (e.g. filename) */
1820     if (qdict_size(reopen_state->options)) {
1821         const QDictEntry *entry = qdict_first(reopen_state->options);
1822 
1823         do {
1824             QString *new_obj = qobject_to_qstring(entry->value);
1825             const char *new = qstring_get_str(new_obj);
1826             const char *old = qdict_get_try_str(reopen_state->bs->options,
1827                                                 entry->key);
1828 
1829             if (!old || strcmp(new, old)) {
1830                 error_setg(errp, "Cannot change the option '%s'", entry->key);
1831                 ret = -EINVAL;
1832                 goto error;
1833             }
1834         } while ((entry = qdict_next(reopen_state->options, entry)));
1835     }
1836 
1837     ret = 0;
1838 
1839 error:
1840     return ret;
1841 }
1842 
1843 /*
1844  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1845  * makes them final by swapping the staging BlockDriverState contents into
1846  * the active BlockDriverState contents.
1847  */
1848 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1849 {
1850     BlockDriver *drv;
1851 
1852     assert(reopen_state != NULL);
1853     drv = reopen_state->bs->drv;
1854     assert(drv != NULL);
1855 
1856     /* If there are any driver level actions to take */
1857     if (drv->bdrv_reopen_commit) {
1858         drv->bdrv_reopen_commit(reopen_state);
1859     }
1860 
1861     /* set BDS specific flags now */
1862     reopen_state->bs->open_flags         = reopen_state->flags;
1863     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1864                                               BDRV_O_CACHE_WB);
1865     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1866 
1867     bdrv_refresh_limits(reopen_state->bs, NULL);
1868 }
1869 
1870 /*
1871  * Abort the reopen, and delete and free the staged changes in
1872  * reopen_state
1873  */
1874 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1875 {
1876     BlockDriver *drv;
1877 
1878     assert(reopen_state != NULL);
1879     drv = reopen_state->bs->drv;
1880     assert(drv != NULL);
1881 
1882     if (drv->bdrv_reopen_abort) {
1883         drv->bdrv_reopen_abort(reopen_state);
1884     }
1885 }
1886 
1887 
1888 void bdrv_close(BlockDriverState *bs)
1889 {
1890     BdrvAioNotifier *ban, *ban_next;
1891 
1892     if (bs->job) {
1893         block_job_cancel_sync(bs->job);
1894     }
1895 
1896     /* Disable I/O limits and drain all pending throttled requests */
1897     if (bs->io_limits_enabled) {
1898         bdrv_io_limits_disable(bs);
1899     }
1900 
1901     bdrv_drain(bs); /* complete I/O */
1902     bdrv_flush(bs);
1903     bdrv_drain(bs); /* in case flush left pending I/O */
1904     notifier_list_notify(&bs->close_notifiers, bs);
1905 
1906     if (bs->drv) {
1907         BdrvChild *child, *next;
1908 
1909         bs->drv->bdrv_close(bs);
1910         bs->drv = NULL;
1911 
1912         bdrv_set_backing_hd(bs, NULL);
1913 
1914         if (bs->file != NULL) {
1915             bdrv_unref_child(bs, bs->file);
1916             bs->file = NULL;
1917         }
1918 
1919         QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
1920             /* TODO Remove bdrv_unref() from drivers' close function and use
1921              * bdrv_unref_child() here */
1922             if (child->bs->inherits_from == bs) {
1923                 child->bs->inherits_from = NULL;
1924             }
1925             bdrv_detach_child(child);
1926         }
1927 
1928         g_free(bs->opaque);
1929         bs->opaque = NULL;
1930         bs->copy_on_read = 0;
1931         bs->backing_file[0] = '\0';
1932         bs->backing_format[0] = '\0';
1933         bs->total_sectors = 0;
1934         bs->encrypted = 0;
1935         bs->valid_key = 0;
1936         bs->sg = 0;
1937         bs->zero_beyond_eof = false;
1938         QDECREF(bs->options);
1939         bs->options = NULL;
1940         QDECREF(bs->full_open_options);
1941         bs->full_open_options = NULL;
1942     }
1943 
1944     if (bs->blk) {
1945         blk_dev_change_media_cb(bs->blk, false);
1946     }
1947 
1948     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1949         g_free(ban);
1950     }
1951     QLIST_INIT(&bs->aio_notifiers);
1952 }
1953 
1954 void bdrv_close_all(void)
1955 {
1956     BlockDriverState *bs;
1957 
1958     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1959         AioContext *aio_context = bdrv_get_aio_context(bs);
1960 
1961         aio_context_acquire(aio_context);
1962         bdrv_close(bs);
1963         aio_context_release(aio_context);
1964     }
1965 }
1966 
1967 /* make a BlockDriverState anonymous by removing from bdrv_state and
1968  * graph_bdrv_state list.
1969    Also, NULL terminate the device_name to prevent double remove */
1970 void bdrv_make_anon(BlockDriverState *bs)
1971 {
1972     /*
1973      * Take care to remove bs from bdrv_states only when it's actually
1974      * in it.  Note that bs->device_list.tqe_prev is initially null,
1975      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
1976      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1977      * resetting it to null on remove.
1978      */
1979     if (bs->device_list.tqe_prev) {
1980         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1981         bs->device_list.tqe_prev = NULL;
1982     }
1983     if (bs->node_name[0] != '\0') {
1984         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1985     }
1986     bs->node_name[0] = '\0';
1987 }
1988 
1989 /* Fields that need to stay with the top-level BDS */
1990 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1991                                      BlockDriverState *bs_src)
1992 {
1993     /* move some fields that need to stay attached to the device */
1994 
1995     /* dev info */
1996     bs_dest->guest_block_size   = bs_src->guest_block_size;
1997     bs_dest->copy_on_read       = bs_src->copy_on_read;
1998 
1999     bs_dest->enable_write_cache = bs_src->enable_write_cache;
2000 
2001     /* r/w error */
2002     bs_dest->on_read_error      = bs_src->on_read_error;
2003     bs_dest->on_write_error     = bs_src->on_write_error;
2004 
2005     /* i/o status */
2006     bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
2007     bs_dest->iostatus           = bs_src->iostatus;
2008 
2009     /* dirty bitmap */
2010     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
2011 }
2012 
2013 static void change_parent_backing_link(BlockDriverState *from,
2014                                        BlockDriverState *to)
2015 {
2016     BdrvChild *c, *next;
2017 
2018     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2019         assert(c->role != &child_backing);
2020         c->bs = to;
2021         QLIST_REMOVE(c, next_parent);
2022         QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2023         bdrv_ref(to);
2024         bdrv_unref(from);
2025     }
2026     if (from->blk) {
2027         blk_set_bs(from->blk, to);
2028         if (!to->device_list.tqe_prev) {
2029             QTAILQ_INSERT_BEFORE(from, to, device_list);
2030         }
2031         QTAILQ_REMOVE(&bdrv_states, from, device_list);
2032     }
2033 }
2034 
2035 static void swap_feature_fields(BlockDriverState *bs_top,
2036                                 BlockDriverState *bs_new)
2037 {
2038     BlockDriverState tmp;
2039 
2040     bdrv_move_feature_fields(&tmp, bs_top);
2041     bdrv_move_feature_fields(bs_top, bs_new);
2042     bdrv_move_feature_fields(bs_new, &tmp);
2043 
2044     assert(!bs_new->throttle_state);
2045     if (bs_top->throttle_state) {
2046         assert(bs_top->io_limits_enabled);
2047         bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2048         bdrv_io_limits_disable(bs_top);
2049     }
2050 }
2051 
2052 /*
2053  * Add new bs contents at the top of an image chain while the chain is
2054  * live, while keeping required fields on the top layer.
2055  *
2056  * This will modify the BlockDriverState fields, and swap contents
2057  * between bs_new and bs_top. Both bs_new and bs_top are modified.
2058  *
2059  * bs_new must not be attached to a BlockBackend.
2060  *
2061  * This function does not create any image files.
2062  *
2063  * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2064  * that's what the callers commonly need. bs_new will be referenced by the old
2065  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2066  * reference of its own, it must call bdrv_ref().
2067  */
2068 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2069 {
2070     assert(!bdrv_requests_pending(bs_top));
2071     assert(!bdrv_requests_pending(bs_new));
2072 
2073     bdrv_ref(bs_top);
2074     change_parent_backing_link(bs_top, bs_new);
2075 
2076     /* Some fields always stay on top of the backing file chain */
2077     swap_feature_fields(bs_top, bs_new);
2078 
2079     bdrv_set_backing_hd(bs_new, bs_top);
2080     bdrv_unref(bs_top);
2081 
2082     /* bs_new is now referenced by its new parents, we don't need the
2083      * additional reference any more. */
2084     bdrv_unref(bs_new);
2085 }
2086 
2087 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2088 {
2089     assert(!bdrv_requests_pending(old));
2090     assert(!bdrv_requests_pending(new));
2091 
2092     bdrv_ref(old);
2093 
2094     if (old->blk) {
2095         /* As long as these fields aren't in BlockBackend, but in the top-level
2096          * BlockDriverState, it's not possible for a BDS to have two BBs.
2097          *
2098          * We really want to copy the fields from old to new, but we go for a
2099          * swap instead so that pointers aren't duplicated and cause trouble.
2100          * (Also, bdrv_swap() used to do the same.) */
2101         assert(!new->blk);
2102         swap_feature_fields(old, new);
2103     }
2104     change_parent_backing_link(old, new);
2105 
2106     /* Change backing files if a previously independent node is added to the
2107      * chain. For active commit, we replace top by its own (indirect) backing
2108      * file and don't do anything here so we don't build a loop. */
2109     if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2110         bdrv_set_backing_hd(new, backing_bs(old));
2111         bdrv_set_backing_hd(old, NULL);
2112     }
2113 
2114     bdrv_unref(old);
2115 }
2116 
2117 static void bdrv_delete(BlockDriverState *bs)
2118 {
2119     assert(!bs->job);
2120     assert(bdrv_op_blocker_is_empty(bs));
2121     assert(!bs->refcnt);
2122     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2123 
2124     bdrv_close(bs);
2125 
2126     /* remove from list, if necessary */
2127     bdrv_make_anon(bs);
2128 
2129     g_free(bs);
2130 }
2131 
2132 /*
2133  * Run consistency checks on an image
2134  *
2135  * Returns 0 if the check could be completed (it doesn't mean that the image is
2136  * free of errors) or -errno when an internal error occurred. The results of the
2137  * check are stored in res.
2138  */
2139 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2140 {
2141     if (bs->drv == NULL) {
2142         return -ENOMEDIUM;
2143     }
2144     if (bs->drv->bdrv_check == NULL) {
2145         return -ENOTSUP;
2146     }
2147 
2148     memset(res, 0, sizeof(*res));
2149     return bs->drv->bdrv_check(bs, res, fix);
2150 }
2151 
2152 #define COMMIT_BUF_SECTORS 2048
2153 
2154 /* commit COW file into the raw image */
2155 int bdrv_commit(BlockDriverState *bs)
2156 {
2157     BlockDriver *drv = bs->drv;
2158     int64_t sector, total_sectors, length, backing_length;
2159     int n, ro, open_flags;
2160     int ret = 0;
2161     uint8_t *buf = NULL;
2162 
2163     if (!drv)
2164         return -ENOMEDIUM;
2165 
2166     if (!bs->backing) {
2167         return -ENOTSUP;
2168     }
2169 
2170     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2171         bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2172         return -EBUSY;
2173     }
2174 
2175     ro = bs->backing->bs->read_only;
2176     open_flags =  bs->backing->bs->open_flags;
2177 
2178     if (ro) {
2179         if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2180             return -EACCES;
2181         }
2182     }
2183 
2184     length = bdrv_getlength(bs);
2185     if (length < 0) {
2186         ret = length;
2187         goto ro_cleanup;
2188     }
2189 
2190     backing_length = bdrv_getlength(bs->backing->bs);
2191     if (backing_length < 0) {
2192         ret = backing_length;
2193         goto ro_cleanup;
2194     }
2195 
2196     /* If our top snapshot is larger than the backing file image,
2197      * grow the backing file image if possible.  If not possible,
2198      * we must return an error */
2199     if (length > backing_length) {
2200         ret = bdrv_truncate(bs->backing->bs, length);
2201         if (ret < 0) {
2202             goto ro_cleanup;
2203         }
2204     }
2205 
2206     total_sectors = length >> BDRV_SECTOR_BITS;
2207 
2208     /* qemu_try_blockalign() for bs will choose an alignment that works for
2209      * bs->backing->bs as well, so no need to compare the alignment manually. */
2210     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2211     if (buf == NULL) {
2212         ret = -ENOMEM;
2213         goto ro_cleanup;
2214     }
2215 
2216     for (sector = 0; sector < total_sectors; sector += n) {
2217         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2218         if (ret < 0) {
2219             goto ro_cleanup;
2220         }
2221         if (ret) {
2222             ret = bdrv_read(bs, sector, buf, n);
2223             if (ret < 0) {
2224                 goto ro_cleanup;
2225             }
2226 
2227             ret = bdrv_write(bs->backing->bs, sector, buf, n);
2228             if (ret < 0) {
2229                 goto ro_cleanup;
2230             }
2231         }
2232     }
2233 
2234     if (drv->bdrv_make_empty) {
2235         ret = drv->bdrv_make_empty(bs);
2236         if (ret < 0) {
2237             goto ro_cleanup;
2238         }
2239         bdrv_flush(bs);
2240     }
2241 
2242     /*
2243      * Make sure all data we wrote to the backing device is actually
2244      * stable on disk.
2245      */
2246     if (bs->backing) {
2247         bdrv_flush(bs->backing->bs);
2248     }
2249 
2250     ret = 0;
2251 ro_cleanup:
2252     qemu_vfree(buf);
2253 
2254     if (ro) {
2255         /* ignoring error return here */
2256         bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2257     }
2258 
2259     return ret;
2260 }
2261 
2262 int bdrv_commit_all(void)
2263 {
2264     BlockDriverState *bs;
2265 
2266     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2267         AioContext *aio_context = bdrv_get_aio_context(bs);
2268 
2269         aio_context_acquire(aio_context);
2270         if (bs->drv && bs->backing) {
2271             int ret = bdrv_commit(bs);
2272             if (ret < 0) {
2273                 aio_context_release(aio_context);
2274                 return ret;
2275             }
2276         }
2277         aio_context_release(aio_context);
2278     }
2279     return 0;
2280 }
2281 
2282 /*
2283  * Return values:
2284  * 0        - success
2285  * -EINVAL  - backing format specified, but no file
2286  * -ENOSPC  - can't update the backing file because no space is left in the
2287  *            image file header
2288  * -ENOTSUP - format driver doesn't support changing the backing file
2289  */
2290 int bdrv_change_backing_file(BlockDriverState *bs,
2291     const char *backing_file, const char *backing_fmt)
2292 {
2293     BlockDriver *drv = bs->drv;
2294     int ret;
2295 
2296     /* Backing file format doesn't make sense without a backing file */
2297     if (backing_fmt && !backing_file) {
2298         return -EINVAL;
2299     }
2300 
2301     if (drv->bdrv_change_backing_file != NULL) {
2302         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2303     } else {
2304         ret = -ENOTSUP;
2305     }
2306 
2307     if (ret == 0) {
2308         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2309         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2310     }
2311     return ret;
2312 }
2313 
2314 /*
2315  * Finds the image layer in the chain that has 'bs' as its backing file.
2316  *
2317  * active is the current topmost image.
2318  *
2319  * Returns NULL if bs is not found in active's image chain,
2320  * or if active == bs.
2321  *
2322  * Returns the bottommost base image if bs == NULL.
2323  */
2324 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2325                                     BlockDriverState *bs)
2326 {
2327     while (active && bs != backing_bs(active)) {
2328         active = backing_bs(active);
2329     }
2330 
2331     return active;
2332 }
2333 
2334 /* Given a BDS, searches for the base layer. */
2335 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2336 {
2337     return bdrv_find_overlay(bs, NULL);
2338 }
2339 
2340 /*
2341  * Drops images above 'base' up to and including 'top', and sets the image
2342  * above 'top' to have base as its backing file.
2343  *
2344  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2345  * information in 'bs' can be properly updated.
2346  *
2347  * E.g., this will convert the following chain:
2348  * bottom <- base <- intermediate <- top <- active
2349  *
2350  * to
2351  *
2352  * bottom <- base <- active
2353  *
2354  * It is allowed for bottom==base, in which case it converts:
2355  *
2356  * base <- intermediate <- top <- active
2357  *
2358  * to
2359  *
2360  * base <- active
2361  *
2362  * If backing_file_str is non-NULL, it will be used when modifying top's
2363  * overlay image metadata.
2364  *
2365  * Error conditions:
2366  *  if active == top, that is considered an error
2367  *
2368  */
2369 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2370                            BlockDriverState *base, const char *backing_file_str)
2371 {
2372     BlockDriverState *new_top_bs = NULL;
2373     int ret = -EIO;
2374 
2375     if (!top->drv || !base->drv) {
2376         goto exit;
2377     }
2378 
2379     new_top_bs = bdrv_find_overlay(active, top);
2380 
2381     if (new_top_bs == NULL) {
2382         /* we could not find the image above 'top', this is an error */
2383         goto exit;
2384     }
2385 
2386     /* special case of new_top_bs->backing->bs already pointing to base - nothing
2387      * to do, no intermediate images */
2388     if (backing_bs(new_top_bs) == base) {
2389         ret = 0;
2390         goto exit;
2391     }
2392 
2393     /* Make sure that base is in the backing chain of top */
2394     if (!bdrv_chain_contains(top, base)) {
2395         goto exit;
2396     }
2397 
2398     /* success - we can delete the intermediate states, and link top->base */
2399     backing_file_str = backing_file_str ? backing_file_str : base->filename;
2400     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2401                                    base->drv ? base->drv->format_name : "");
2402     if (ret) {
2403         goto exit;
2404     }
2405     bdrv_set_backing_hd(new_top_bs, base);
2406 
2407     ret = 0;
2408 exit:
2409     return ret;
2410 }
2411 
2412 /**
2413  * Truncate file to 'offset' bytes (needed only for file protocols)
2414  */
2415 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2416 {
2417     BlockDriver *drv = bs->drv;
2418     int ret;
2419     if (!drv)
2420         return -ENOMEDIUM;
2421     if (!drv->bdrv_truncate)
2422         return -ENOTSUP;
2423     if (bs->read_only)
2424         return -EACCES;
2425 
2426     ret = drv->bdrv_truncate(bs, offset);
2427     if (ret == 0) {
2428         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2429         bdrv_dirty_bitmap_truncate(bs);
2430         if (bs->blk) {
2431             blk_dev_resize_cb(bs->blk);
2432         }
2433     }
2434     return ret;
2435 }
2436 
2437 /**
2438  * Length of a allocated file in bytes. Sparse files are counted by actual
2439  * allocated space. Return < 0 if error or unknown.
2440  */
2441 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2442 {
2443     BlockDriver *drv = bs->drv;
2444     if (!drv) {
2445         return -ENOMEDIUM;
2446     }
2447     if (drv->bdrv_get_allocated_file_size) {
2448         return drv->bdrv_get_allocated_file_size(bs);
2449     }
2450     if (bs->file) {
2451         return bdrv_get_allocated_file_size(bs->file->bs);
2452     }
2453     return -ENOTSUP;
2454 }
2455 
2456 /**
2457  * Return number of sectors on success, -errno on error.
2458  */
2459 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2460 {
2461     BlockDriver *drv = bs->drv;
2462 
2463     if (!drv)
2464         return -ENOMEDIUM;
2465 
2466     if (drv->has_variable_length) {
2467         int ret = refresh_total_sectors(bs, bs->total_sectors);
2468         if (ret < 0) {
2469             return ret;
2470         }
2471     }
2472     return bs->total_sectors;
2473 }
2474 
2475 /**
2476  * Return length in bytes on success, -errno on error.
2477  * The length is always a multiple of BDRV_SECTOR_SIZE.
2478  */
2479 int64_t bdrv_getlength(BlockDriverState *bs)
2480 {
2481     int64_t ret = bdrv_nb_sectors(bs);
2482 
2483     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2484     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2485 }
2486 
2487 /* return 0 as number of sectors if no device present or error */
2488 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2489 {
2490     int64_t nb_sectors = bdrv_nb_sectors(bs);
2491 
2492     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2493 }
2494 
2495 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2496                        BlockdevOnError on_write_error)
2497 {
2498     bs->on_read_error = on_read_error;
2499     bs->on_write_error = on_write_error;
2500 }
2501 
2502 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2503 {
2504     return is_read ? bs->on_read_error : bs->on_write_error;
2505 }
2506 
2507 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2508 {
2509     BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2510 
2511     switch (on_err) {
2512     case BLOCKDEV_ON_ERROR_ENOSPC:
2513         return (error == ENOSPC) ?
2514                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2515     case BLOCKDEV_ON_ERROR_STOP:
2516         return BLOCK_ERROR_ACTION_STOP;
2517     case BLOCKDEV_ON_ERROR_REPORT:
2518         return BLOCK_ERROR_ACTION_REPORT;
2519     case BLOCKDEV_ON_ERROR_IGNORE:
2520         return BLOCK_ERROR_ACTION_IGNORE;
2521     default:
2522         abort();
2523     }
2524 }
2525 
2526 static void send_qmp_error_event(BlockDriverState *bs,
2527                                  BlockErrorAction action,
2528                                  bool is_read, int error)
2529 {
2530     IoOperationType optype;
2531 
2532     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2533     qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2534                                    bdrv_iostatus_is_enabled(bs),
2535                                    error == ENOSPC, strerror(error),
2536                                    &error_abort);
2537 }
2538 
2539 /* This is done by device models because, while the block layer knows
2540  * about the error, it does not know whether an operation comes from
2541  * the device or the block layer (from a job, for example).
2542  */
2543 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2544                        bool is_read, int error)
2545 {
2546     assert(error >= 0);
2547 
2548     if (action == BLOCK_ERROR_ACTION_STOP) {
2549         /* First set the iostatus, so that "info block" returns an iostatus
2550          * that matches the events raised so far (an additional error iostatus
2551          * is fine, but not a lost one).
2552          */
2553         bdrv_iostatus_set_err(bs, error);
2554 
2555         /* Then raise the request to stop the VM and the event.
2556          * qemu_system_vmstop_request_prepare has two effects.  First,
2557          * it ensures that the STOP event always comes after the
2558          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
2559          * can observe the STOP event and do a "cont" before the STOP
2560          * event is issued, the VM will not stop.  In this case, vm_start()
2561          * also ensures that the STOP/RESUME pair of events is emitted.
2562          */
2563         qemu_system_vmstop_request_prepare();
2564         send_qmp_error_event(bs, action, is_read, error);
2565         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2566     } else {
2567         send_qmp_error_event(bs, action, is_read, error);
2568     }
2569 }
2570 
2571 int bdrv_is_read_only(BlockDriverState *bs)
2572 {
2573     return bs->read_only;
2574 }
2575 
2576 int bdrv_is_sg(BlockDriverState *bs)
2577 {
2578     return bs->sg;
2579 }
2580 
2581 int bdrv_enable_write_cache(BlockDriverState *bs)
2582 {
2583     return bs->enable_write_cache;
2584 }
2585 
2586 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2587 {
2588     bs->enable_write_cache = wce;
2589 
2590     /* so a reopen() will preserve wce */
2591     if (wce) {
2592         bs->open_flags |= BDRV_O_CACHE_WB;
2593     } else {
2594         bs->open_flags &= ~BDRV_O_CACHE_WB;
2595     }
2596 }
2597 
2598 int bdrv_is_encrypted(BlockDriverState *bs)
2599 {
2600     if (bs->backing && bs->backing->bs->encrypted) {
2601         return 1;
2602     }
2603     return bs->encrypted;
2604 }
2605 
2606 int bdrv_key_required(BlockDriverState *bs)
2607 {
2608     BdrvChild *backing = bs->backing;
2609 
2610     if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2611         return 1;
2612     }
2613     return (bs->encrypted && !bs->valid_key);
2614 }
2615 
2616 int bdrv_set_key(BlockDriverState *bs, const char *key)
2617 {
2618     int ret;
2619     if (bs->backing && bs->backing->bs->encrypted) {
2620         ret = bdrv_set_key(bs->backing->bs, key);
2621         if (ret < 0)
2622             return ret;
2623         if (!bs->encrypted)
2624             return 0;
2625     }
2626     if (!bs->encrypted) {
2627         return -EINVAL;
2628     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2629         return -ENOMEDIUM;
2630     }
2631     ret = bs->drv->bdrv_set_key(bs, key);
2632     if (ret < 0) {
2633         bs->valid_key = 0;
2634     } else if (!bs->valid_key) {
2635         bs->valid_key = 1;
2636         if (bs->blk) {
2637             /* call the change callback now, we skipped it on open */
2638             blk_dev_change_media_cb(bs->blk, true);
2639         }
2640     }
2641     return ret;
2642 }
2643 
2644 /*
2645  * Provide an encryption key for @bs.
2646  * If @key is non-null:
2647  *     If @bs is not encrypted, fail.
2648  *     Else if the key is invalid, fail.
2649  *     Else set @bs's key to @key, replacing the existing key, if any.
2650  * If @key is null:
2651  *     If @bs is encrypted and still lacks a key, fail.
2652  *     Else do nothing.
2653  * On failure, store an error object through @errp if non-null.
2654  */
2655 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2656 {
2657     if (key) {
2658         if (!bdrv_is_encrypted(bs)) {
2659             error_setg(errp, "Node '%s' is not encrypted",
2660                       bdrv_get_device_or_node_name(bs));
2661         } else if (bdrv_set_key(bs, key) < 0) {
2662             error_setg(errp, QERR_INVALID_PASSWORD);
2663         }
2664     } else {
2665         if (bdrv_key_required(bs)) {
2666             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2667                       "'%s' (%s) is encrypted",
2668                       bdrv_get_device_or_node_name(bs),
2669                       bdrv_get_encrypted_filename(bs));
2670         }
2671     }
2672 }
2673 
2674 const char *bdrv_get_format_name(BlockDriverState *bs)
2675 {
2676     return bs->drv ? bs->drv->format_name : NULL;
2677 }
2678 
2679 static int qsort_strcmp(const void *a, const void *b)
2680 {
2681     return strcmp(a, b);
2682 }
2683 
2684 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2685                          void *opaque)
2686 {
2687     BlockDriver *drv;
2688     int count = 0;
2689     int i;
2690     const char **formats = NULL;
2691 
2692     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2693         if (drv->format_name) {
2694             bool found = false;
2695             int i = count;
2696             while (formats && i && !found) {
2697                 found = !strcmp(formats[--i], drv->format_name);
2698             }
2699 
2700             if (!found) {
2701                 formats = g_renew(const char *, formats, count + 1);
2702                 formats[count++] = drv->format_name;
2703             }
2704         }
2705     }
2706 
2707     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2708 
2709     for (i = 0; i < count; i++) {
2710         it(opaque, formats[i]);
2711     }
2712 
2713     g_free(formats);
2714 }
2715 
2716 /* This function is to find a node in the bs graph */
2717 BlockDriverState *bdrv_find_node(const char *node_name)
2718 {
2719     BlockDriverState *bs;
2720 
2721     assert(node_name);
2722 
2723     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2724         if (!strcmp(node_name, bs->node_name)) {
2725             return bs;
2726         }
2727     }
2728     return NULL;
2729 }
2730 
2731 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2732 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2733 {
2734     BlockDeviceInfoList *list, *entry;
2735     BlockDriverState *bs;
2736 
2737     list = NULL;
2738     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2739         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2740         if (!info) {
2741             qapi_free_BlockDeviceInfoList(list);
2742             return NULL;
2743         }
2744         entry = g_malloc0(sizeof(*entry));
2745         entry->value = info;
2746         entry->next = list;
2747         list = entry;
2748     }
2749 
2750     return list;
2751 }
2752 
2753 BlockDriverState *bdrv_lookup_bs(const char *device,
2754                                  const char *node_name,
2755                                  Error **errp)
2756 {
2757     BlockBackend *blk;
2758     BlockDriverState *bs;
2759 
2760     if (device) {
2761         blk = blk_by_name(device);
2762 
2763         if (blk) {
2764             return blk_bs(blk);
2765         }
2766     }
2767 
2768     if (node_name) {
2769         bs = bdrv_find_node(node_name);
2770 
2771         if (bs) {
2772             return bs;
2773         }
2774     }
2775 
2776     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2777                      device ? device : "",
2778                      node_name ? node_name : "");
2779     return NULL;
2780 }
2781 
2782 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2783  * return false.  If either argument is NULL, return false. */
2784 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2785 {
2786     while (top && top != base) {
2787         top = backing_bs(top);
2788     }
2789 
2790     return top != NULL;
2791 }
2792 
2793 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2794 {
2795     if (!bs) {
2796         return QTAILQ_FIRST(&graph_bdrv_states);
2797     }
2798     return QTAILQ_NEXT(bs, node_list);
2799 }
2800 
2801 BlockDriverState *bdrv_next(BlockDriverState *bs)
2802 {
2803     if (!bs) {
2804         return QTAILQ_FIRST(&bdrv_states);
2805     }
2806     return QTAILQ_NEXT(bs, device_list);
2807 }
2808 
2809 const char *bdrv_get_node_name(const BlockDriverState *bs)
2810 {
2811     return bs->node_name;
2812 }
2813 
2814 /* TODO check what callers really want: bs->node_name or blk_name() */
2815 const char *bdrv_get_device_name(const BlockDriverState *bs)
2816 {
2817     return bs->blk ? blk_name(bs->blk) : "";
2818 }
2819 
2820 /* This can be used to identify nodes that might not have a device
2821  * name associated. Since node and device names live in the same
2822  * namespace, the result is unambiguous. The exception is if both are
2823  * absent, then this returns an empty (non-null) string. */
2824 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2825 {
2826     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2827 }
2828 
2829 int bdrv_get_flags(BlockDriverState *bs)
2830 {
2831     return bs->open_flags;
2832 }
2833 
2834 int bdrv_has_zero_init_1(BlockDriverState *bs)
2835 {
2836     return 1;
2837 }
2838 
2839 int bdrv_has_zero_init(BlockDriverState *bs)
2840 {
2841     assert(bs->drv);
2842 
2843     /* If BS is a copy on write image, it is initialized to
2844        the contents of the base image, which may not be zeroes.  */
2845     if (bs->backing) {
2846         return 0;
2847     }
2848     if (bs->drv->bdrv_has_zero_init) {
2849         return bs->drv->bdrv_has_zero_init(bs);
2850     }
2851 
2852     /* safe default */
2853     return 0;
2854 }
2855 
2856 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2857 {
2858     BlockDriverInfo bdi;
2859 
2860     if (bs->backing) {
2861         return false;
2862     }
2863 
2864     if (bdrv_get_info(bs, &bdi) == 0) {
2865         return bdi.unallocated_blocks_are_zero;
2866     }
2867 
2868     return false;
2869 }
2870 
2871 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2872 {
2873     BlockDriverInfo bdi;
2874 
2875     if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
2876         return false;
2877     }
2878 
2879     if (bdrv_get_info(bs, &bdi) == 0) {
2880         return bdi.can_write_zeroes_with_unmap;
2881     }
2882 
2883     return false;
2884 }
2885 
2886 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2887 {
2888     if (bs->backing && bs->backing->bs->encrypted)
2889         return bs->backing_file;
2890     else if (bs->encrypted)
2891         return bs->filename;
2892     else
2893         return NULL;
2894 }
2895 
2896 void bdrv_get_backing_filename(BlockDriverState *bs,
2897                                char *filename, int filename_size)
2898 {
2899     pstrcpy(filename, filename_size, bs->backing_file);
2900 }
2901 
2902 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2903 {
2904     BlockDriver *drv = bs->drv;
2905     if (!drv)
2906         return -ENOMEDIUM;
2907     if (!drv->bdrv_get_info)
2908         return -ENOTSUP;
2909     memset(bdi, 0, sizeof(*bdi));
2910     return drv->bdrv_get_info(bs, bdi);
2911 }
2912 
2913 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2914 {
2915     BlockDriver *drv = bs->drv;
2916     if (drv && drv->bdrv_get_specific_info) {
2917         return drv->bdrv_get_specific_info(bs);
2918     }
2919     return NULL;
2920 }
2921 
2922 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2923 {
2924     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2925         return;
2926     }
2927 
2928     bs->drv->bdrv_debug_event(bs, event);
2929 }
2930 
2931 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2932                           const char *tag)
2933 {
2934     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2935         bs = bs->file ? bs->file->bs : NULL;
2936     }
2937 
2938     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2939         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2940     }
2941 
2942     return -ENOTSUP;
2943 }
2944 
2945 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2946 {
2947     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2948         bs = bs->file ? bs->file->bs : NULL;
2949     }
2950 
2951     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2952         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2953     }
2954 
2955     return -ENOTSUP;
2956 }
2957 
2958 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2959 {
2960     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2961         bs = bs->file ? bs->file->bs : NULL;
2962     }
2963 
2964     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2965         return bs->drv->bdrv_debug_resume(bs, tag);
2966     }
2967 
2968     return -ENOTSUP;
2969 }
2970 
2971 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2972 {
2973     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2974         bs = bs->file ? bs->file->bs : NULL;
2975     }
2976 
2977     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2978         return bs->drv->bdrv_debug_is_suspended(bs, tag);
2979     }
2980 
2981     return false;
2982 }
2983 
2984 int bdrv_is_snapshot(BlockDriverState *bs)
2985 {
2986     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2987 }
2988 
2989 /* backing_file can either be relative, or absolute, or a protocol.  If it is
2990  * relative, it must be relative to the chain.  So, passing in bs->filename
2991  * from a BDS as backing_file should not be done, as that may be relative to
2992  * the CWD rather than the chain. */
2993 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2994         const char *backing_file)
2995 {
2996     char *filename_full = NULL;
2997     char *backing_file_full = NULL;
2998     char *filename_tmp = NULL;
2999     int is_protocol = 0;
3000     BlockDriverState *curr_bs = NULL;
3001     BlockDriverState *retval = NULL;
3002 
3003     if (!bs || !bs->drv || !backing_file) {
3004         return NULL;
3005     }
3006 
3007     filename_full     = g_malloc(PATH_MAX);
3008     backing_file_full = g_malloc(PATH_MAX);
3009     filename_tmp      = g_malloc(PATH_MAX);
3010 
3011     is_protocol = path_has_protocol(backing_file);
3012 
3013     for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3014 
3015         /* If either of the filename paths is actually a protocol, then
3016          * compare unmodified paths; otherwise make paths relative */
3017         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3018             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3019                 retval = curr_bs->backing->bs;
3020                 break;
3021             }
3022         } else {
3023             /* If not an absolute filename path, make it relative to the current
3024              * image's filename path */
3025             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3026                          backing_file);
3027 
3028             /* We are going to compare absolute pathnames */
3029             if (!realpath(filename_tmp, filename_full)) {
3030                 continue;
3031             }
3032 
3033             /* We need to make sure the backing filename we are comparing against
3034              * is relative to the current image filename (or absolute) */
3035             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3036                          curr_bs->backing_file);
3037 
3038             if (!realpath(filename_tmp, backing_file_full)) {
3039                 continue;
3040             }
3041 
3042             if (strcmp(backing_file_full, filename_full) == 0) {
3043                 retval = curr_bs->backing->bs;
3044                 break;
3045             }
3046         }
3047     }
3048 
3049     g_free(filename_full);
3050     g_free(backing_file_full);
3051     g_free(filename_tmp);
3052     return retval;
3053 }
3054 
3055 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3056 {
3057     if (!bs->drv) {
3058         return 0;
3059     }
3060 
3061     if (!bs->backing) {
3062         return 0;
3063     }
3064 
3065     return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3066 }
3067 
3068 void bdrv_init(void)
3069 {
3070     module_call_init(MODULE_INIT_BLOCK);
3071 }
3072 
3073 void bdrv_init_with_whitelist(void)
3074 {
3075     use_bdrv_whitelist = 1;
3076     bdrv_init();
3077 }
3078 
3079 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3080 {
3081     Error *local_err = NULL;
3082     int ret;
3083 
3084     if (!bs->drv)  {
3085         return;
3086     }
3087 
3088     if (!(bs->open_flags & BDRV_O_INCOMING)) {
3089         return;
3090     }
3091     bs->open_flags &= ~BDRV_O_INCOMING;
3092 
3093     if (bs->drv->bdrv_invalidate_cache) {
3094         bs->drv->bdrv_invalidate_cache(bs, &local_err);
3095     } else if (bs->file) {
3096         bdrv_invalidate_cache(bs->file->bs, &local_err);
3097     }
3098     if (local_err) {
3099         error_propagate(errp, local_err);
3100         return;
3101     }
3102 
3103     ret = refresh_total_sectors(bs, bs->total_sectors);
3104     if (ret < 0) {
3105         error_setg_errno(errp, -ret, "Could not refresh total sector count");
3106         return;
3107     }
3108 }
3109 
3110 void bdrv_invalidate_cache_all(Error **errp)
3111 {
3112     BlockDriverState *bs;
3113     Error *local_err = NULL;
3114 
3115     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3116         AioContext *aio_context = bdrv_get_aio_context(bs);
3117 
3118         aio_context_acquire(aio_context);
3119         bdrv_invalidate_cache(bs, &local_err);
3120         aio_context_release(aio_context);
3121         if (local_err) {
3122             error_propagate(errp, local_err);
3123             return;
3124         }
3125     }
3126 }
3127 
3128 /**************************************************************/
3129 /* removable device support */
3130 
3131 /**
3132  * Return TRUE if the media is present
3133  */
3134 int bdrv_is_inserted(BlockDriverState *bs)
3135 {
3136     BlockDriver *drv = bs->drv;
3137 
3138     if (!drv)
3139         return 0;
3140     if (!drv->bdrv_is_inserted)
3141         return 1;
3142     return drv->bdrv_is_inserted(bs);
3143 }
3144 
3145 /**
3146  * Return whether the media changed since the last call to this
3147  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3148  */
3149 int bdrv_media_changed(BlockDriverState *bs)
3150 {
3151     BlockDriver *drv = bs->drv;
3152 
3153     if (drv && drv->bdrv_media_changed) {
3154         return drv->bdrv_media_changed(bs);
3155     }
3156     return -ENOTSUP;
3157 }
3158 
3159 /**
3160  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3161  */
3162 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3163 {
3164     BlockDriver *drv = bs->drv;
3165     const char *device_name;
3166 
3167     if (drv && drv->bdrv_eject) {
3168         drv->bdrv_eject(bs, eject_flag);
3169     }
3170 
3171     device_name = bdrv_get_device_name(bs);
3172     if (device_name[0] != '\0') {
3173         qapi_event_send_device_tray_moved(device_name,
3174                                           eject_flag, &error_abort);
3175     }
3176 }
3177 
3178 /**
3179  * Lock or unlock the media (if it is locked, the user won't be able
3180  * to eject it manually).
3181  */
3182 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3183 {
3184     BlockDriver *drv = bs->drv;
3185 
3186     trace_bdrv_lock_medium(bs, locked);
3187 
3188     if (drv && drv->bdrv_lock_medium) {
3189         drv->bdrv_lock_medium(bs, locked);
3190     }
3191 }
3192 
3193 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3194 {
3195     bs->guest_block_size = align;
3196 }
3197 
3198 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3199 {
3200     BdrvDirtyBitmap *bm;
3201 
3202     assert(name);
3203     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3204         if (bm->name && !strcmp(name, bm->name)) {
3205             return bm;
3206         }
3207     }
3208     return NULL;
3209 }
3210 
3211 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3212 {
3213     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3214     g_free(bitmap->name);
3215     bitmap->name = NULL;
3216 }
3217 
3218 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3219                                           uint32_t granularity,
3220                                           const char *name,
3221                                           Error **errp)
3222 {
3223     int64_t bitmap_size;
3224     BdrvDirtyBitmap *bitmap;
3225     uint32_t sector_granularity;
3226 
3227     assert((granularity & (granularity - 1)) == 0);
3228 
3229     if (name && bdrv_find_dirty_bitmap(bs, name)) {
3230         error_setg(errp, "Bitmap already exists: %s", name);
3231         return NULL;
3232     }
3233     sector_granularity = granularity >> BDRV_SECTOR_BITS;
3234     assert(sector_granularity);
3235     bitmap_size = bdrv_nb_sectors(bs);
3236     if (bitmap_size < 0) {
3237         error_setg_errno(errp, -bitmap_size, "could not get length of device");
3238         errno = -bitmap_size;
3239         return NULL;
3240     }
3241     bitmap = g_new0(BdrvDirtyBitmap, 1);
3242     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3243     bitmap->size = bitmap_size;
3244     bitmap->name = g_strdup(name);
3245     bitmap->disabled = false;
3246     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3247     return bitmap;
3248 }
3249 
3250 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3251 {
3252     return bitmap->successor;
3253 }
3254 
3255 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3256 {
3257     return !(bitmap->disabled || bitmap->successor);
3258 }
3259 
3260 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3261 {
3262     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3263         return DIRTY_BITMAP_STATUS_FROZEN;
3264     } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3265         return DIRTY_BITMAP_STATUS_DISABLED;
3266     } else {
3267         return DIRTY_BITMAP_STATUS_ACTIVE;
3268     }
3269 }
3270 
3271 /**
3272  * Create a successor bitmap destined to replace this bitmap after an operation.
3273  * Requires that the bitmap is not frozen and has no successor.
3274  */
3275 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3276                                        BdrvDirtyBitmap *bitmap, Error **errp)
3277 {
3278     uint64_t granularity;
3279     BdrvDirtyBitmap *child;
3280 
3281     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3282         error_setg(errp, "Cannot create a successor for a bitmap that is "
3283                    "currently frozen");
3284         return -1;
3285     }
3286     assert(!bitmap->successor);
3287 
3288     /* Create an anonymous successor */
3289     granularity = bdrv_dirty_bitmap_granularity(bitmap);
3290     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3291     if (!child) {
3292         return -1;
3293     }
3294 
3295     /* Successor will be on or off based on our current state. */
3296     child->disabled = bitmap->disabled;
3297 
3298     /* Install the successor and freeze the parent */
3299     bitmap->successor = child;
3300     return 0;
3301 }
3302 
3303 /**
3304  * For a bitmap with a successor, yield our name to the successor,
3305  * delete the old bitmap, and return a handle to the new bitmap.
3306  */
3307 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3308                                             BdrvDirtyBitmap *bitmap,
3309                                             Error **errp)
3310 {
3311     char *name;
3312     BdrvDirtyBitmap *successor = bitmap->successor;
3313 
3314     if (successor == NULL) {
3315         error_setg(errp, "Cannot relinquish control if "
3316                    "there's no successor present");
3317         return NULL;
3318     }
3319 
3320     name = bitmap->name;
3321     bitmap->name = NULL;
3322     successor->name = name;
3323     bitmap->successor = NULL;
3324     bdrv_release_dirty_bitmap(bs, bitmap);
3325 
3326     return successor;
3327 }
3328 
3329 /**
3330  * In cases of failure where we can no longer safely delete the parent,
3331  * we may wish to re-join the parent and child/successor.
3332  * The merged parent will be un-frozen, but not explicitly re-enabled.
3333  */
3334 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3335                                            BdrvDirtyBitmap *parent,
3336                                            Error **errp)
3337 {
3338     BdrvDirtyBitmap *successor = parent->successor;
3339 
3340     if (!successor) {
3341         error_setg(errp, "Cannot reclaim a successor when none is present");
3342         return NULL;
3343     }
3344 
3345     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3346         error_setg(errp, "Merging of parent and successor bitmap failed");
3347         return NULL;
3348     }
3349     bdrv_release_dirty_bitmap(bs, successor);
3350     parent->successor = NULL;
3351 
3352     return parent;
3353 }
3354 
3355 /**
3356  * Truncates _all_ bitmaps attached to a BDS.
3357  */
3358 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3359 {
3360     BdrvDirtyBitmap *bitmap;
3361     uint64_t size = bdrv_nb_sectors(bs);
3362 
3363     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3364         assert(!bdrv_dirty_bitmap_frozen(bitmap));
3365         hbitmap_truncate(bitmap->bitmap, size);
3366         bitmap->size = size;
3367     }
3368 }
3369 
3370 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3371 {
3372     BdrvDirtyBitmap *bm, *next;
3373     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3374         if (bm == bitmap) {
3375             assert(!bdrv_dirty_bitmap_frozen(bm));
3376             QLIST_REMOVE(bitmap, list);
3377             hbitmap_free(bitmap->bitmap);
3378             g_free(bitmap->name);
3379             g_free(bitmap);
3380             return;
3381         }
3382     }
3383 }
3384 
3385 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3386 {
3387     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3388     bitmap->disabled = true;
3389 }
3390 
3391 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3392 {
3393     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3394     bitmap->disabled = false;
3395 }
3396 
3397 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3398 {
3399     BdrvDirtyBitmap *bm;
3400     BlockDirtyInfoList *list = NULL;
3401     BlockDirtyInfoList **plist = &list;
3402 
3403     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3404         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3405         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3406         info->count = bdrv_get_dirty_count(bm);
3407         info->granularity = bdrv_dirty_bitmap_granularity(bm);
3408         info->has_name = !!bm->name;
3409         info->name = g_strdup(bm->name);
3410         info->status = bdrv_dirty_bitmap_status(bm);
3411         entry->value = info;
3412         *plist = entry;
3413         plist = &entry->next;
3414     }
3415 
3416     return list;
3417 }
3418 
3419 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3420 {
3421     if (bitmap) {
3422         return hbitmap_get(bitmap->bitmap, sector);
3423     } else {
3424         return 0;
3425     }
3426 }
3427 
3428 /**
3429  * Chooses a default granularity based on the existing cluster size,
3430  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3431  * is no cluster size information available.
3432  */
3433 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3434 {
3435     BlockDriverInfo bdi;
3436     uint32_t granularity;
3437 
3438     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3439         granularity = MAX(4096, bdi.cluster_size);
3440         granularity = MIN(65536, granularity);
3441     } else {
3442         granularity = 65536;
3443     }
3444 
3445     return granularity;
3446 }
3447 
3448 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3449 {
3450     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3451 }
3452 
3453 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3454 {
3455     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3456 }
3457 
3458 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3459                            int64_t cur_sector, int nr_sectors)
3460 {
3461     assert(bdrv_dirty_bitmap_enabled(bitmap));
3462     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3463 }
3464 
3465 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3466                              int64_t cur_sector, int nr_sectors)
3467 {
3468     assert(bdrv_dirty_bitmap_enabled(bitmap));
3469     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3470 }
3471 
3472 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3473 {
3474     assert(bdrv_dirty_bitmap_enabled(bitmap));
3475     hbitmap_reset_all(bitmap->bitmap);
3476 }
3477 
3478 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3479                     int nr_sectors)
3480 {
3481     BdrvDirtyBitmap *bitmap;
3482     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3483         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3484             continue;
3485         }
3486         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3487     }
3488 }
3489 
3490 /**
3491  * Advance an HBitmapIter to an arbitrary offset.
3492  */
3493 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3494 {
3495     assert(hbi->hb);
3496     hbitmap_iter_init(hbi, hbi->hb, offset);
3497 }
3498 
3499 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3500 {
3501     return hbitmap_count(bitmap->bitmap);
3502 }
3503 
3504 /* Get a reference to bs */
3505 void bdrv_ref(BlockDriverState *bs)
3506 {
3507     bs->refcnt++;
3508 }
3509 
3510 /* Release a previously grabbed reference to bs.
3511  * If after releasing, reference count is zero, the BlockDriverState is
3512  * deleted. */
3513 void bdrv_unref(BlockDriverState *bs)
3514 {
3515     if (!bs) {
3516         return;
3517     }
3518     assert(bs->refcnt > 0);
3519     if (--bs->refcnt == 0) {
3520         bdrv_delete(bs);
3521     }
3522 }
3523 
3524 struct BdrvOpBlocker {
3525     Error *reason;
3526     QLIST_ENTRY(BdrvOpBlocker) list;
3527 };
3528 
3529 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3530 {
3531     BdrvOpBlocker *blocker;
3532     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3533     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3534         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3535         if (errp) {
3536             error_setg(errp, "Node '%s' is busy: %s",
3537                        bdrv_get_device_or_node_name(bs),
3538                        error_get_pretty(blocker->reason));
3539         }
3540         return true;
3541     }
3542     return false;
3543 }
3544 
3545 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3546 {
3547     BdrvOpBlocker *blocker;
3548     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3549 
3550     blocker = g_new0(BdrvOpBlocker, 1);
3551     blocker->reason = reason;
3552     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3553 }
3554 
3555 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3556 {
3557     BdrvOpBlocker *blocker, *next;
3558     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3559     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3560         if (blocker->reason == reason) {
3561             QLIST_REMOVE(blocker, list);
3562             g_free(blocker);
3563         }
3564     }
3565 }
3566 
3567 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3568 {
3569     int i;
3570     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3571         bdrv_op_block(bs, i, reason);
3572     }
3573 }
3574 
3575 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3576 {
3577     int i;
3578     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3579         bdrv_op_unblock(bs, i, reason);
3580     }
3581 }
3582 
3583 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3584 {
3585     int i;
3586 
3587     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3588         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3589             return false;
3590         }
3591     }
3592     return true;
3593 }
3594 
3595 void bdrv_iostatus_enable(BlockDriverState *bs)
3596 {
3597     bs->iostatus_enabled = true;
3598     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3599 }
3600 
3601 /* The I/O status is only enabled if the drive explicitly
3602  * enables it _and_ the VM is configured to stop on errors */
3603 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3604 {
3605     return (bs->iostatus_enabled &&
3606            (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3607             bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
3608             bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3609 }
3610 
3611 void bdrv_iostatus_disable(BlockDriverState *bs)
3612 {
3613     bs->iostatus_enabled = false;
3614 }
3615 
3616 void bdrv_iostatus_reset(BlockDriverState *bs)
3617 {
3618     if (bdrv_iostatus_is_enabled(bs)) {
3619         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3620         if (bs->job) {
3621             block_job_iostatus_reset(bs->job);
3622         }
3623     }
3624 }
3625 
3626 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3627 {
3628     assert(bdrv_iostatus_is_enabled(bs));
3629     if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3630         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3631                                          BLOCK_DEVICE_IO_STATUS_FAILED;
3632     }
3633 }
3634 
3635 void bdrv_img_create(const char *filename, const char *fmt,
3636                      const char *base_filename, const char *base_fmt,
3637                      char *options, uint64_t img_size, int flags,
3638                      Error **errp, bool quiet)
3639 {
3640     QemuOptsList *create_opts = NULL;
3641     QemuOpts *opts = NULL;
3642     const char *backing_fmt, *backing_file;
3643     int64_t size;
3644     BlockDriver *drv, *proto_drv;
3645     Error *local_err = NULL;
3646     int ret = 0;
3647 
3648     /* Find driver and parse its options */
3649     drv = bdrv_find_format(fmt);
3650     if (!drv) {
3651         error_setg(errp, "Unknown file format '%s'", fmt);
3652         return;
3653     }
3654 
3655     proto_drv = bdrv_find_protocol(filename, true, errp);
3656     if (!proto_drv) {
3657         return;
3658     }
3659 
3660     if (!drv->create_opts) {
3661         error_setg(errp, "Format driver '%s' does not support image creation",
3662                    drv->format_name);
3663         return;
3664     }
3665 
3666     if (!proto_drv->create_opts) {
3667         error_setg(errp, "Protocol driver '%s' does not support image creation",
3668                    proto_drv->format_name);
3669         return;
3670     }
3671 
3672     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3673     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3674 
3675     /* Create parameter list with default values */
3676     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3677     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3678 
3679     /* Parse -o options */
3680     if (options) {
3681         qemu_opts_do_parse(opts, options, NULL, &local_err);
3682         if (local_err) {
3683             error_report_err(local_err);
3684             local_err = NULL;
3685             error_setg(errp, "Invalid options for file format '%s'", fmt);
3686             goto out;
3687         }
3688     }
3689 
3690     if (base_filename) {
3691         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3692         if (local_err) {
3693             error_setg(errp, "Backing file not supported for file format '%s'",
3694                        fmt);
3695             goto out;
3696         }
3697     }
3698 
3699     if (base_fmt) {
3700         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3701         if (local_err) {
3702             error_setg(errp, "Backing file format not supported for file "
3703                              "format '%s'", fmt);
3704             goto out;
3705         }
3706     }
3707 
3708     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3709     if (backing_file) {
3710         if (!strcmp(filename, backing_file)) {
3711             error_setg(errp, "Error: Trying to create an image with the "
3712                              "same filename as the backing file");
3713             goto out;
3714         }
3715     }
3716 
3717     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3718 
3719     // The size for the image must always be specified, with one exception:
3720     // If we are using a backing file, we can obtain the size from there
3721     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3722     if (size == -1) {
3723         if (backing_file) {
3724             BlockDriverState *bs;
3725             char *full_backing = g_new0(char, PATH_MAX);
3726             int64_t size;
3727             int back_flags;
3728             QDict *backing_options = NULL;
3729 
3730             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3731                                                          full_backing, PATH_MAX,
3732                                                          &local_err);
3733             if (local_err) {
3734                 g_free(full_backing);
3735                 goto out;
3736             }
3737 
3738             /* backing files always opened read-only */
3739             back_flags =
3740                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3741 
3742             if (backing_fmt) {
3743                 backing_options = qdict_new();
3744                 qdict_put(backing_options, "driver",
3745                           qstring_from_str(backing_fmt));
3746             }
3747 
3748             bs = NULL;
3749             ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3750                             back_flags, &local_err);
3751             g_free(full_backing);
3752             if (ret < 0) {
3753                 goto out;
3754             }
3755             size = bdrv_getlength(bs);
3756             if (size < 0) {
3757                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3758                                  backing_file);
3759                 bdrv_unref(bs);
3760                 goto out;
3761             }
3762 
3763             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3764 
3765             bdrv_unref(bs);
3766         } else {
3767             error_setg(errp, "Image creation needs a size parameter");
3768             goto out;
3769         }
3770     }
3771 
3772     if (!quiet) {
3773         printf("Formatting '%s', fmt=%s ", filename, fmt);
3774         qemu_opts_print(opts, " ");
3775         puts("");
3776     }
3777 
3778     ret = bdrv_create(drv, filename, opts, &local_err);
3779 
3780     if (ret == -EFBIG) {
3781         /* This is generally a better message than whatever the driver would
3782          * deliver (especially because of the cluster_size_hint), since that
3783          * is most probably not much different from "image too large". */
3784         const char *cluster_size_hint = "";
3785         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3786             cluster_size_hint = " (try using a larger cluster size)";
3787         }
3788         error_setg(errp, "The image size is too large for file format '%s'"
3789                    "%s", fmt, cluster_size_hint);
3790         error_free(local_err);
3791         local_err = NULL;
3792     }
3793 
3794 out:
3795     qemu_opts_del(opts);
3796     qemu_opts_free(create_opts);
3797     if (local_err) {
3798         error_propagate(errp, local_err);
3799     }
3800 }
3801 
3802 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3803 {
3804     return bs->aio_context;
3805 }
3806 
3807 void bdrv_detach_aio_context(BlockDriverState *bs)
3808 {
3809     BdrvAioNotifier *baf;
3810 
3811     if (!bs->drv) {
3812         return;
3813     }
3814 
3815     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3816         baf->detach_aio_context(baf->opaque);
3817     }
3818 
3819     if (bs->io_limits_enabled) {
3820         throttle_timers_detach_aio_context(&bs->throttle_timers);
3821     }
3822     if (bs->drv->bdrv_detach_aio_context) {
3823         bs->drv->bdrv_detach_aio_context(bs);
3824     }
3825     if (bs->file) {
3826         bdrv_detach_aio_context(bs->file->bs);
3827     }
3828     if (bs->backing) {
3829         bdrv_detach_aio_context(bs->backing->bs);
3830     }
3831 
3832     bs->aio_context = NULL;
3833 }
3834 
3835 void bdrv_attach_aio_context(BlockDriverState *bs,
3836                              AioContext *new_context)
3837 {
3838     BdrvAioNotifier *ban;
3839 
3840     if (!bs->drv) {
3841         return;
3842     }
3843 
3844     bs->aio_context = new_context;
3845 
3846     if (bs->backing) {
3847         bdrv_attach_aio_context(bs->backing->bs, new_context);
3848     }
3849     if (bs->file) {
3850         bdrv_attach_aio_context(bs->file->bs, new_context);
3851     }
3852     if (bs->drv->bdrv_attach_aio_context) {
3853         bs->drv->bdrv_attach_aio_context(bs, new_context);
3854     }
3855     if (bs->io_limits_enabled) {
3856         throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3857     }
3858 
3859     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3860         ban->attached_aio_context(new_context, ban->opaque);
3861     }
3862 }
3863 
3864 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3865 {
3866     bdrv_drain(bs); /* ensure there are no in-flight requests */
3867 
3868     bdrv_detach_aio_context(bs);
3869 
3870     /* This function executes in the old AioContext so acquire the new one in
3871      * case it runs in a different thread.
3872      */
3873     aio_context_acquire(new_context);
3874     bdrv_attach_aio_context(bs, new_context);
3875     aio_context_release(new_context);
3876 }
3877 
3878 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3879         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3880         void (*detach_aio_context)(void *opaque), void *opaque)
3881 {
3882     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3883     *ban = (BdrvAioNotifier){
3884         .attached_aio_context = attached_aio_context,
3885         .detach_aio_context   = detach_aio_context,
3886         .opaque               = opaque
3887     };
3888 
3889     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3890 }
3891 
3892 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3893                                       void (*attached_aio_context)(AioContext *,
3894                                                                    void *),
3895                                       void (*detach_aio_context)(void *),
3896                                       void *opaque)
3897 {
3898     BdrvAioNotifier *ban, *ban_next;
3899 
3900     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3901         if (ban->attached_aio_context == attached_aio_context &&
3902             ban->detach_aio_context   == detach_aio_context   &&
3903             ban->opaque               == opaque)
3904         {
3905             QLIST_REMOVE(ban, list);
3906             g_free(ban);
3907 
3908             return;
3909         }
3910     }
3911 
3912     abort();
3913 }
3914 
3915 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3916                        BlockDriverAmendStatusCB *status_cb)
3917 {
3918     if (!bs->drv->bdrv_amend_options) {
3919         return -ENOTSUP;
3920     }
3921     return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3922 }
3923 
3924 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3925  * of block filter and by bdrv_is_first_non_filter.
3926  * It is used to test if the given bs is the candidate or recurse more in the
3927  * node graph.
3928  */
3929 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3930                                       BlockDriverState *candidate)
3931 {
3932     /* return false if basic checks fails */
3933     if (!bs || !bs->drv) {
3934         return false;
3935     }
3936 
3937     /* the code reached a non block filter driver -> check if the bs is
3938      * the same as the candidate. It's the recursion termination condition.
3939      */
3940     if (!bs->drv->is_filter) {
3941         return bs == candidate;
3942     }
3943     /* Down this path the driver is a block filter driver */
3944 
3945     /* If the block filter recursion method is defined use it to recurse down
3946      * the node graph.
3947      */
3948     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3949         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3950     }
3951 
3952     /* the driver is a block filter but don't allow to recurse -> return false
3953      */
3954     return false;
3955 }
3956 
3957 /* This function checks if the candidate is the first non filter bs down it's
3958  * bs chain. Since we don't have pointers to parents it explore all bs chains
3959  * from the top. Some filters can choose not to pass down the recursion.
3960  */
3961 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3962 {
3963     BlockDriverState *bs;
3964 
3965     /* walk down the bs forest recursively */
3966     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3967         bool perm;
3968 
3969         /* try to recurse in this top level bs */
3970         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3971 
3972         /* candidate is the first non filter */
3973         if (perm) {
3974             return true;
3975         }
3976     }
3977 
3978     return false;
3979 }
3980 
3981 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3982                                         const char *node_name, Error **errp)
3983 {
3984     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3985     AioContext *aio_context;
3986 
3987     if (!to_replace_bs) {
3988         error_setg(errp, "Node name '%s' not found", node_name);
3989         return NULL;
3990     }
3991 
3992     aio_context = bdrv_get_aio_context(to_replace_bs);
3993     aio_context_acquire(aio_context);
3994 
3995     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3996         to_replace_bs = NULL;
3997         goto out;
3998     }
3999 
4000     /* We don't want arbitrary node of the BDS chain to be replaced only the top
4001      * most non filter in order to prevent data corruption.
4002      * Another benefit is that this tests exclude backing files which are
4003      * blocked by the backing blockers.
4004      */
4005     if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
4006         error_setg(errp, "Only top most non filter can be replaced");
4007         to_replace_bs = NULL;
4008         goto out;
4009     }
4010 
4011 out:
4012     aio_context_release(aio_context);
4013     return to_replace_bs;
4014 }
4015 
4016 static bool append_open_options(QDict *d, BlockDriverState *bs)
4017 {
4018     const QDictEntry *entry;
4019     bool found_any = false;
4020 
4021     for (entry = qdict_first(bs->options); entry;
4022          entry = qdict_next(bs->options, entry))
4023     {
4024         /* Only take options for this level and exclude all non-driver-specific
4025          * options */
4026         if (!strchr(qdict_entry_key(entry), '.') &&
4027             strcmp(qdict_entry_key(entry), "node-name"))
4028         {
4029             qobject_incref(qdict_entry_value(entry));
4030             qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
4031             found_any = true;
4032         }
4033     }
4034 
4035     return found_any;
4036 }
4037 
4038 /* Updates the following BDS fields:
4039  *  - exact_filename: A filename which may be used for opening a block device
4040  *                    which (mostly) equals the given BDS (even without any
4041  *                    other options; so reading and writing must return the same
4042  *                    results, but caching etc. may be different)
4043  *  - full_open_options: Options which, when given when opening a block device
4044  *                       (without a filename), result in a BDS (mostly)
4045  *                       equalling the given one
4046  *  - filename: If exact_filename is set, it is copied here. Otherwise,
4047  *              full_open_options is converted to a JSON object, prefixed with
4048  *              "json:" (for use through the JSON pseudo protocol) and put here.
4049  */
4050 void bdrv_refresh_filename(BlockDriverState *bs)
4051 {
4052     BlockDriver *drv = bs->drv;
4053     QDict *opts;
4054 
4055     if (!drv) {
4056         return;
4057     }
4058 
4059     /* This BDS's file name will most probably depend on its file's name, so
4060      * refresh that first */
4061     if (bs->file) {
4062         bdrv_refresh_filename(bs->file->bs);
4063     }
4064 
4065     if (drv->bdrv_refresh_filename) {
4066         /* Obsolete information is of no use here, so drop the old file name
4067          * information before refreshing it */
4068         bs->exact_filename[0] = '\0';
4069         if (bs->full_open_options) {
4070             QDECREF(bs->full_open_options);
4071             bs->full_open_options = NULL;
4072         }
4073 
4074         drv->bdrv_refresh_filename(bs);
4075     } else if (bs->file) {
4076         /* Try to reconstruct valid information from the underlying file */
4077         bool has_open_options;
4078 
4079         bs->exact_filename[0] = '\0';
4080         if (bs->full_open_options) {
4081             QDECREF(bs->full_open_options);
4082             bs->full_open_options = NULL;
4083         }
4084 
4085         opts = qdict_new();
4086         has_open_options = append_open_options(opts, bs);
4087 
4088         /* If no specific options have been given for this BDS, the filename of
4089          * the underlying file should suffice for this one as well */
4090         if (bs->file->bs->exact_filename[0] && !has_open_options) {
4091             strcpy(bs->exact_filename, bs->file->bs->exact_filename);
4092         }
4093         /* Reconstructing the full options QDict is simple for most format block
4094          * drivers, as long as the full options are known for the underlying
4095          * file BDS. The full options QDict of that file BDS should somehow
4096          * contain a representation of the filename, therefore the following
4097          * suffices without querying the (exact_)filename of this BDS. */
4098         if (bs->file->bs->full_open_options) {
4099             qdict_put_obj(opts, "driver",
4100                           QOBJECT(qstring_from_str(drv->format_name)));
4101             QINCREF(bs->file->bs->full_open_options);
4102             qdict_put_obj(opts, "file",
4103                           QOBJECT(bs->file->bs->full_open_options));
4104 
4105             bs->full_open_options = opts;
4106         } else {
4107             QDECREF(opts);
4108         }
4109     } else if (!bs->full_open_options && qdict_size(bs->options)) {
4110         /* There is no underlying file BDS (at least referenced by BDS.file),
4111          * so the full options QDict should be equal to the options given
4112          * specifically for this block device when it was opened (plus the
4113          * driver specification).
4114          * Because those options don't change, there is no need to update
4115          * full_open_options when it's already set. */
4116 
4117         opts = qdict_new();
4118         append_open_options(opts, bs);
4119         qdict_put_obj(opts, "driver",
4120                       QOBJECT(qstring_from_str(drv->format_name)));
4121 
4122         if (bs->exact_filename[0]) {
4123             /* This may not work for all block protocol drivers (some may
4124              * require this filename to be parsed), but we have to find some
4125              * default solution here, so just include it. If some block driver
4126              * does not support pure options without any filename at all or
4127              * needs some special format of the options QDict, it needs to
4128              * implement the driver-specific bdrv_refresh_filename() function.
4129              */
4130             qdict_put_obj(opts, "filename",
4131                           QOBJECT(qstring_from_str(bs->exact_filename)));
4132         }
4133 
4134         bs->full_open_options = opts;
4135     }
4136 
4137     if (bs->exact_filename[0]) {
4138         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4139     } else if (bs->full_open_options) {
4140         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4141         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4142                  qstring_get_str(json));
4143         QDECREF(json);
4144     }
4145 }
4146 
4147 /* This accessor function purpose is to allow the device models to access the
4148  * BlockAcctStats structure embedded inside a BlockDriverState without being
4149  * aware of the BlockDriverState structure layout.
4150  * It will go away when the BlockAcctStats structure will be moved inside
4151  * the device models.
4152  */
4153 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
4154 {
4155     return &bs->stats;
4156 }
4157