xref: /openbmc/qemu/block.c (revision f3930ed0bb1945b59da8e591072b5c79606d0760)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qjson.h"
31 #include "sysemu/block-backend.h"
32 #include "sysemu/sysemu.h"
33 #include "qemu/notify.h"
34 #include "block/coroutine.h"
35 #include "block/qapi.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
38 #include "qapi-event.h"
39 
40 #ifdef CONFIG_BSD
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <sys/ioctl.h>
44 #include <sys/queue.h>
45 #ifndef __DragonFly__
46 #include <sys/disk.h>
47 #endif
48 #endif
49 
50 #ifdef _WIN32
51 #include <windows.h>
52 #endif
53 
54 /**
55  * A BdrvDirtyBitmap can be in three possible states:
56  * (1) successor is NULL and disabled is false: full r/w mode
57  * (2) successor is NULL and disabled is true: read only mode ("disabled")
58  * (3) successor is set: frozen mode.
59  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
60  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
61  */
62 struct BdrvDirtyBitmap {
63     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
64     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
65     char *name;                 /* Optional non-empty unique ID */
66     int64_t size;               /* Size of the bitmap (Number of sectors) */
67     bool disabled;              /* Bitmap is read-only */
68     QLIST_ENTRY(BdrvDirtyBitmap) list;
69 };
70 
71 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
72 
73 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
74     QTAILQ_HEAD_INITIALIZER(bdrv_states);
75 
76 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
77     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
78 
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80     QLIST_HEAD_INITIALIZER(bdrv_drivers);
81 
82 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
83                              const char *reference, QDict *options, int flags,
84                              BlockDriverState *parent,
85                              const BdrvChildRole *child_role,
86                              BlockDriver *drv, Error **errp);
87 
88 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
89 /* If non-zero, use only whitelisted block drivers */
90 static int use_bdrv_whitelist;
91 
92 #ifdef _WIN32
93 static int is_windows_drive_prefix(const char *filename)
94 {
95     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
96              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
97             filename[1] == ':');
98 }
99 
100 int is_windows_drive(const char *filename)
101 {
102     if (is_windows_drive_prefix(filename) &&
103         filename[2] == '\0')
104         return 1;
105     if (strstart(filename, "\\\\.\\", NULL) ||
106         strstart(filename, "//./", NULL))
107         return 1;
108     return 0;
109 }
110 #endif
111 
112 size_t bdrv_opt_mem_align(BlockDriverState *bs)
113 {
114     if (!bs || !bs->drv) {
115         /* page size or 4k (hdd sector size) should be on the safe side */
116         return MAX(4096, getpagesize());
117     }
118 
119     return bs->bl.opt_mem_alignment;
120 }
121 
122 size_t bdrv_min_mem_align(BlockDriverState *bs)
123 {
124     if (!bs || !bs->drv) {
125         /* page size or 4k (hdd sector size) should be on the safe side */
126         return MAX(4096, getpagesize());
127     }
128 
129     return bs->bl.min_mem_alignment;
130 }
131 
132 /* check if the path starts with "<protocol>:" */
133 int path_has_protocol(const char *path)
134 {
135     const char *p;
136 
137 #ifdef _WIN32
138     if (is_windows_drive(path) ||
139         is_windows_drive_prefix(path)) {
140         return 0;
141     }
142     p = path + strcspn(path, ":/\\");
143 #else
144     p = path + strcspn(path, ":/");
145 #endif
146 
147     return *p == ':';
148 }
149 
150 int path_is_absolute(const char *path)
151 {
152 #ifdef _WIN32
153     /* specific case for names like: "\\.\d:" */
154     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
155         return 1;
156     }
157     return (*path == '/' || *path == '\\');
158 #else
159     return (*path == '/');
160 #endif
161 }
162 
163 /* if filename is absolute, just copy it to dest. Otherwise, build a
164    path to it by considering it is relative to base_path. URL are
165    supported. */
166 void path_combine(char *dest, int dest_size,
167                   const char *base_path,
168                   const char *filename)
169 {
170     const char *p, *p1;
171     int len;
172 
173     if (dest_size <= 0)
174         return;
175     if (path_is_absolute(filename)) {
176         pstrcpy(dest, dest_size, filename);
177     } else {
178         p = strchr(base_path, ':');
179         if (p)
180             p++;
181         else
182             p = base_path;
183         p1 = strrchr(base_path, '/');
184 #ifdef _WIN32
185         {
186             const char *p2;
187             p2 = strrchr(base_path, '\\');
188             if (!p1 || p2 > p1)
189                 p1 = p2;
190         }
191 #endif
192         if (p1)
193             p1++;
194         else
195             p1 = base_path;
196         if (p1 > p)
197             p = p1;
198         len = p - base_path;
199         if (len > dest_size - 1)
200             len = dest_size - 1;
201         memcpy(dest, base_path, len);
202         dest[len] = '\0';
203         pstrcat(dest, dest_size, filename);
204     }
205 }
206 
207 void bdrv_get_full_backing_filename_from_filename(const char *backed,
208                                                   const char *backing,
209                                                   char *dest, size_t sz,
210                                                   Error **errp)
211 {
212     if (backing[0] == '\0' || path_has_protocol(backing) ||
213         path_is_absolute(backing))
214     {
215         pstrcpy(dest, sz, backing);
216     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
217         error_setg(errp, "Cannot use relative backing file names for '%s'",
218                    backed);
219     } else {
220         path_combine(dest, sz, backed, backing);
221     }
222 }
223 
224 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
225                                     Error **errp)
226 {
227     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
228 
229     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
230                                                  dest, sz, errp);
231 }
232 
233 void bdrv_register(BlockDriver *bdrv)
234 {
235     bdrv_setup_io_funcs(bdrv);
236 
237     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
238 }
239 
240 BlockDriverState *bdrv_new_root(void)
241 {
242     BlockDriverState *bs = bdrv_new();
243 
244     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
245     return bs;
246 }
247 
248 BlockDriverState *bdrv_new(void)
249 {
250     BlockDriverState *bs;
251     int i;
252 
253     bs = g_new0(BlockDriverState, 1);
254     QLIST_INIT(&bs->dirty_bitmaps);
255     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
256         QLIST_INIT(&bs->op_blockers[i]);
257     }
258     bdrv_iostatus_disable(bs);
259     notifier_list_init(&bs->close_notifiers);
260     notifier_with_return_list_init(&bs->before_write_notifiers);
261     qemu_co_queue_init(&bs->throttled_reqs[0]);
262     qemu_co_queue_init(&bs->throttled_reqs[1]);
263     bs->refcnt = 1;
264     bs->aio_context = qemu_get_aio_context();
265 
266     return bs;
267 }
268 
269 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
270 {
271     notifier_list_add(&bs->close_notifiers, notify);
272 }
273 
274 BlockDriver *bdrv_find_format(const char *format_name)
275 {
276     BlockDriver *drv1;
277     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
278         if (!strcmp(drv1->format_name, format_name)) {
279             return drv1;
280         }
281     }
282     return NULL;
283 }
284 
285 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
286 {
287     static const char *whitelist_rw[] = {
288         CONFIG_BDRV_RW_WHITELIST
289     };
290     static const char *whitelist_ro[] = {
291         CONFIG_BDRV_RO_WHITELIST
292     };
293     const char **p;
294 
295     if (!whitelist_rw[0] && !whitelist_ro[0]) {
296         return 1;               /* no whitelist, anything goes */
297     }
298 
299     for (p = whitelist_rw; *p; p++) {
300         if (!strcmp(drv->format_name, *p)) {
301             return 1;
302         }
303     }
304     if (read_only) {
305         for (p = whitelist_ro; *p; p++) {
306             if (!strcmp(drv->format_name, *p)) {
307                 return 1;
308             }
309         }
310     }
311     return 0;
312 }
313 
314 BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
315                                           bool read_only)
316 {
317     BlockDriver *drv = bdrv_find_format(format_name);
318     return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
319 }
320 
321 typedef struct CreateCo {
322     BlockDriver *drv;
323     char *filename;
324     QemuOpts *opts;
325     int ret;
326     Error *err;
327 } CreateCo;
328 
329 static void coroutine_fn bdrv_create_co_entry(void *opaque)
330 {
331     Error *local_err = NULL;
332     int ret;
333 
334     CreateCo *cco = opaque;
335     assert(cco->drv);
336 
337     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
338     if (local_err) {
339         error_propagate(&cco->err, local_err);
340     }
341     cco->ret = ret;
342 }
343 
344 int bdrv_create(BlockDriver *drv, const char* filename,
345                 QemuOpts *opts, Error **errp)
346 {
347     int ret;
348 
349     Coroutine *co;
350     CreateCo cco = {
351         .drv = drv,
352         .filename = g_strdup(filename),
353         .opts = opts,
354         .ret = NOT_DONE,
355         .err = NULL,
356     };
357 
358     if (!drv->bdrv_create) {
359         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
360         ret = -ENOTSUP;
361         goto out;
362     }
363 
364     if (qemu_in_coroutine()) {
365         /* Fast-path if already in coroutine context */
366         bdrv_create_co_entry(&cco);
367     } else {
368         co = qemu_coroutine_create(bdrv_create_co_entry);
369         qemu_coroutine_enter(co, &cco);
370         while (cco.ret == NOT_DONE) {
371             aio_poll(qemu_get_aio_context(), true);
372         }
373     }
374 
375     ret = cco.ret;
376     if (ret < 0) {
377         if (cco.err) {
378             error_propagate(errp, cco.err);
379         } else {
380             error_setg_errno(errp, -ret, "Could not create image");
381         }
382     }
383 
384 out:
385     g_free(cco.filename);
386     return ret;
387 }
388 
389 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
390 {
391     BlockDriver *drv;
392     Error *local_err = NULL;
393     int ret;
394 
395     drv = bdrv_find_protocol(filename, true, errp);
396     if (drv == NULL) {
397         return -ENOENT;
398     }
399 
400     ret = bdrv_create(drv, filename, opts, &local_err);
401     if (local_err) {
402         error_propagate(errp, local_err);
403     }
404     return ret;
405 }
406 
407 /**
408  * Try to get @bs's logical and physical block size.
409  * On success, store them in @bsz struct and return 0.
410  * On failure return -errno.
411  * @bs must not be empty.
412  */
413 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
414 {
415     BlockDriver *drv = bs->drv;
416 
417     if (drv && drv->bdrv_probe_blocksizes) {
418         return drv->bdrv_probe_blocksizes(bs, bsz);
419     }
420 
421     return -ENOTSUP;
422 }
423 
424 /**
425  * Try to get @bs's geometry (cyls, heads, sectors).
426  * On success, store them in @geo struct and return 0.
427  * On failure return -errno.
428  * @bs must not be empty.
429  */
430 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
431 {
432     BlockDriver *drv = bs->drv;
433 
434     if (drv && drv->bdrv_probe_geometry) {
435         return drv->bdrv_probe_geometry(bs, geo);
436     }
437 
438     return -ENOTSUP;
439 }
440 
441 /*
442  * Create a uniquely-named empty temporary file.
443  * Return 0 upon success, otherwise a negative errno value.
444  */
445 int get_tmp_filename(char *filename, int size)
446 {
447 #ifdef _WIN32
448     char temp_dir[MAX_PATH];
449     /* GetTempFileName requires that its output buffer (4th param)
450        have length MAX_PATH or greater.  */
451     assert(size >= MAX_PATH);
452     return (GetTempPath(MAX_PATH, temp_dir)
453             && GetTempFileName(temp_dir, "qem", 0, filename)
454             ? 0 : -GetLastError());
455 #else
456     int fd;
457     const char *tmpdir;
458     tmpdir = getenv("TMPDIR");
459     if (!tmpdir) {
460         tmpdir = "/var/tmp";
461     }
462     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
463         return -EOVERFLOW;
464     }
465     fd = mkstemp(filename);
466     if (fd < 0) {
467         return -errno;
468     }
469     if (close(fd) != 0) {
470         unlink(filename);
471         return -errno;
472     }
473     return 0;
474 #endif
475 }
476 
477 /*
478  * Detect host devices. By convention, /dev/cdrom[N] is always
479  * recognized as a host CDROM.
480  */
481 static BlockDriver *find_hdev_driver(const char *filename)
482 {
483     int score_max = 0, score;
484     BlockDriver *drv = NULL, *d;
485 
486     QLIST_FOREACH(d, &bdrv_drivers, list) {
487         if (d->bdrv_probe_device) {
488             score = d->bdrv_probe_device(filename);
489             if (score > score_max) {
490                 score_max = score;
491                 drv = d;
492             }
493         }
494     }
495 
496     return drv;
497 }
498 
499 BlockDriver *bdrv_find_protocol(const char *filename,
500                                 bool allow_protocol_prefix,
501                                 Error **errp)
502 {
503     BlockDriver *drv1;
504     char protocol[128];
505     int len;
506     const char *p;
507 
508     /* TODO Drivers without bdrv_file_open must be specified explicitly */
509 
510     /*
511      * XXX(hch): we really should not let host device detection
512      * override an explicit protocol specification, but moving this
513      * later breaks access to device names with colons in them.
514      * Thanks to the brain-dead persistent naming schemes on udev-
515      * based Linux systems those actually are quite common.
516      */
517     drv1 = find_hdev_driver(filename);
518     if (drv1) {
519         return drv1;
520     }
521 
522     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
523         return &bdrv_file;
524     }
525 
526     p = strchr(filename, ':');
527     assert(p != NULL);
528     len = p - filename;
529     if (len > sizeof(protocol) - 1)
530         len = sizeof(protocol) - 1;
531     memcpy(protocol, filename, len);
532     protocol[len] = '\0';
533     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
534         if (drv1->protocol_name &&
535             !strcmp(drv1->protocol_name, protocol)) {
536             return drv1;
537         }
538     }
539 
540     error_setg(errp, "Unknown protocol '%s'", protocol);
541     return NULL;
542 }
543 
544 /*
545  * Guess image format by probing its contents.
546  * This is not a good idea when your image is raw (CVE-2008-2004), but
547  * we do it anyway for backward compatibility.
548  *
549  * @buf         contains the image's first @buf_size bytes.
550  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
551  *              but can be smaller if the image file is smaller)
552  * @filename    is its filename.
553  *
554  * For all block drivers, call the bdrv_probe() method to get its
555  * probing score.
556  * Return the first block driver with the highest probing score.
557  */
558 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
559                             const char *filename)
560 {
561     int score_max = 0, score;
562     BlockDriver *drv = NULL, *d;
563 
564     QLIST_FOREACH(d, &bdrv_drivers, list) {
565         if (d->bdrv_probe) {
566             score = d->bdrv_probe(buf, buf_size, filename);
567             if (score > score_max) {
568                 score_max = score;
569                 drv = d;
570             }
571         }
572     }
573 
574     return drv;
575 }
576 
577 static int find_image_format(BlockDriverState *bs, const char *filename,
578                              BlockDriver **pdrv, Error **errp)
579 {
580     BlockDriver *drv;
581     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
582     int ret = 0;
583 
584     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
585     if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
586         *pdrv = &bdrv_raw;
587         return ret;
588     }
589 
590     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
591     if (ret < 0) {
592         error_setg_errno(errp, -ret, "Could not read image for determining its "
593                          "format");
594         *pdrv = NULL;
595         return ret;
596     }
597 
598     drv = bdrv_probe_all(buf, ret, filename);
599     if (!drv) {
600         error_setg(errp, "Could not determine image format: No compatible "
601                    "driver found");
602         ret = -ENOENT;
603     }
604     *pdrv = drv;
605     return ret;
606 }
607 
608 /**
609  * Set the current 'total_sectors' value
610  * Return 0 on success, -errno on error.
611  */
612 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
613 {
614     BlockDriver *drv = bs->drv;
615 
616     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
617     if (bs->sg)
618         return 0;
619 
620     /* query actual device if possible, otherwise just trust the hint */
621     if (drv->bdrv_getlength) {
622         int64_t length = drv->bdrv_getlength(bs);
623         if (length < 0) {
624             return length;
625         }
626         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
627     }
628 
629     bs->total_sectors = hint;
630     return 0;
631 }
632 
633 /**
634  * Set open flags for a given discard mode
635  *
636  * Return 0 on success, -1 if the discard mode was invalid.
637  */
638 int bdrv_parse_discard_flags(const char *mode, int *flags)
639 {
640     *flags &= ~BDRV_O_UNMAP;
641 
642     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
643         /* do nothing */
644     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
645         *flags |= BDRV_O_UNMAP;
646     } else {
647         return -1;
648     }
649 
650     return 0;
651 }
652 
653 /**
654  * Set open flags for a given cache mode
655  *
656  * Return 0 on success, -1 if the cache mode was invalid.
657  */
658 int bdrv_parse_cache_flags(const char *mode, int *flags)
659 {
660     *flags &= ~BDRV_O_CACHE_MASK;
661 
662     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
663         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
664     } else if (!strcmp(mode, "directsync")) {
665         *flags |= BDRV_O_NOCACHE;
666     } else if (!strcmp(mode, "writeback")) {
667         *flags |= BDRV_O_CACHE_WB;
668     } else if (!strcmp(mode, "unsafe")) {
669         *flags |= BDRV_O_CACHE_WB;
670         *flags |= BDRV_O_NO_FLUSH;
671     } else if (!strcmp(mode, "writethrough")) {
672         /* this is the default */
673     } else {
674         return -1;
675     }
676 
677     return 0;
678 }
679 
680 /*
681  * Returns the flags that a temporary snapshot should get, based on the
682  * originally requested flags (the originally requested image will have flags
683  * like a backing file)
684  */
685 static int bdrv_temp_snapshot_flags(int flags)
686 {
687     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
688 }
689 
690 /*
691  * Returns the flags that bs->file should get if a protocol driver is expected,
692  * based on the given flags for the parent BDS
693  */
694 static int bdrv_inherited_flags(int flags)
695 {
696     /* Enable protocol handling, disable format probing for bs->file */
697     flags |= BDRV_O_PROTOCOL;
698 
699     /* Our block drivers take care to send flushes and respect unmap policy,
700      * so we can enable both unconditionally on lower layers. */
701     flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
702 
703     /* Clear flags that only apply to the top layer */
704     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
705 
706     return flags;
707 }
708 
709 const BdrvChildRole child_file = {
710     .inherit_flags = bdrv_inherited_flags,
711 };
712 
713 /*
714  * Returns the flags that bs->file should get if the use of formats (and not
715  * only protocols) is permitted for it, based on the given flags for the parent
716  * BDS
717  */
718 static int bdrv_inherited_fmt_flags(int parent_flags)
719 {
720     int flags = child_file.inherit_flags(parent_flags);
721     return flags & ~BDRV_O_PROTOCOL;
722 }
723 
724 const BdrvChildRole child_format = {
725     .inherit_flags = bdrv_inherited_fmt_flags,
726 };
727 
728 /*
729  * Returns the flags that bs->backing_hd should get, based on the given flags
730  * for the parent BDS
731  */
732 static int bdrv_backing_flags(int flags)
733 {
734     /* backing files always opened read-only */
735     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
736 
737     /* snapshot=on is handled on the top layer */
738     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
739 
740     return flags;
741 }
742 
743 static const BdrvChildRole child_backing = {
744     .inherit_flags = bdrv_backing_flags,
745 };
746 
747 static int bdrv_open_flags(BlockDriverState *bs, int flags)
748 {
749     int open_flags = flags | BDRV_O_CACHE_WB;
750 
751     /*
752      * Clear flags that are internal to the block layer before opening the
753      * image.
754      */
755     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
756 
757     /*
758      * Snapshots should be writable.
759      */
760     if (flags & BDRV_O_TEMPORARY) {
761         open_flags |= BDRV_O_RDWR;
762     }
763 
764     return open_flags;
765 }
766 
767 static void bdrv_assign_node_name(BlockDriverState *bs,
768                                   const char *node_name,
769                                   Error **errp)
770 {
771     if (!node_name) {
772         return;
773     }
774 
775     /* Check for empty string or invalid characters */
776     if (!id_wellformed(node_name)) {
777         error_setg(errp, "Invalid node name");
778         return;
779     }
780 
781     /* takes care of avoiding namespaces collisions */
782     if (blk_by_name(node_name)) {
783         error_setg(errp, "node-name=%s is conflicting with a device id",
784                    node_name);
785         return;
786     }
787 
788     /* takes care of avoiding duplicates node names */
789     if (bdrv_find_node(node_name)) {
790         error_setg(errp, "Duplicate node name");
791         return;
792     }
793 
794     /* copy node name into the bs and insert it into the graph list */
795     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
796     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
797 }
798 
799 static QemuOptsList bdrv_runtime_opts = {
800     .name = "bdrv_common",
801     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
802     .desc = {
803         {
804             .name = "node-name",
805             .type = QEMU_OPT_STRING,
806             .help = "Node name of the block device node",
807         },
808         { /* end of list */ }
809     },
810 };
811 
812 /*
813  * Common part for opening disk images and files
814  *
815  * Removes all processed options from *options.
816  */
817 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
818     QDict *options, int flags, BlockDriver *drv, Error **errp)
819 {
820     int ret, open_flags;
821     const char *filename;
822     const char *node_name = NULL;
823     QemuOpts *opts;
824     Error *local_err = NULL;
825 
826     assert(drv != NULL);
827     assert(bs->file == NULL);
828     assert(options != NULL && bs->options != options);
829 
830     if (file != NULL) {
831         filename = file->filename;
832     } else {
833         filename = qdict_get_try_str(options, "filename");
834     }
835 
836     if (drv->bdrv_needs_filename && !filename) {
837         error_setg(errp, "The '%s' block driver requires a file name",
838                    drv->format_name);
839         return -EINVAL;
840     }
841 
842     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
843 
844     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
845     qemu_opts_absorb_qdict(opts, options, &local_err);
846     if (local_err) {
847         error_propagate(errp, local_err);
848         ret = -EINVAL;
849         goto fail_opts;
850     }
851 
852     node_name = qemu_opt_get(opts, "node-name");
853     bdrv_assign_node_name(bs, node_name, &local_err);
854     if (local_err) {
855         error_propagate(errp, local_err);
856         ret = -EINVAL;
857         goto fail_opts;
858     }
859 
860     bs->guest_block_size = 512;
861     bs->request_alignment = 512;
862     bs->zero_beyond_eof = true;
863     open_flags = bdrv_open_flags(bs, flags);
864     bs->read_only = !(open_flags & BDRV_O_RDWR);
865 
866     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
867         error_setg(errp,
868                    !bs->read_only && bdrv_is_whitelisted(drv, true)
869                         ? "Driver '%s' can only be used for read-only devices"
870                         : "Driver '%s' is not whitelisted",
871                    drv->format_name);
872         ret = -ENOTSUP;
873         goto fail_opts;
874     }
875 
876     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
877     if (flags & BDRV_O_COPY_ON_READ) {
878         if (!bs->read_only) {
879             bdrv_enable_copy_on_read(bs);
880         } else {
881             error_setg(errp, "Can't use copy-on-read on read-only device");
882             ret = -EINVAL;
883             goto fail_opts;
884         }
885     }
886 
887     if (filename != NULL) {
888         pstrcpy(bs->filename, sizeof(bs->filename), filename);
889     } else {
890         bs->filename[0] = '\0';
891     }
892     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
893 
894     bs->drv = drv;
895     bs->opaque = g_malloc0(drv->instance_size);
896 
897     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
898 
899     /* Open the image, either directly or using a protocol */
900     if (drv->bdrv_file_open) {
901         assert(file == NULL);
902         assert(!drv->bdrv_needs_filename || filename != NULL);
903         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
904     } else {
905         if (file == NULL) {
906             error_setg(errp, "Can't use '%s' as a block driver for the "
907                        "protocol level", drv->format_name);
908             ret = -EINVAL;
909             goto free_and_fail;
910         }
911         bs->file = file;
912         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
913     }
914 
915     if (ret < 0) {
916         if (local_err) {
917             error_propagate(errp, local_err);
918         } else if (bs->filename[0]) {
919             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
920         } else {
921             error_setg_errno(errp, -ret, "Could not open image");
922         }
923         goto free_and_fail;
924     }
925 
926     if (bs->encrypted) {
927         error_report("Encrypted images are deprecated");
928         error_printf("Support for them will be removed in a future release.\n"
929                      "You can use 'qemu-img convert' to convert your image"
930                      " to an unencrypted one.\n");
931     }
932 
933     ret = refresh_total_sectors(bs, bs->total_sectors);
934     if (ret < 0) {
935         error_setg_errno(errp, -ret, "Could not refresh total sector count");
936         goto free_and_fail;
937     }
938 
939     bdrv_refresh_limits(bs, &local_err);
940     if (local_err) {
941         error_propagate(errp, local_err);
942         ret = -EINVAL;
943         goto free_and_fail;
944     }
945 
946     assert(bdrv_opt_mem_align(bs) != 0);
947     assert(bdrv_min_mem_align(bs) != 0);
948     assert((bs->request_alignment != 0) || bs->sg);
949 
950     qemu_opts_del(opts);
951     return 0;
952 
953 free_and_fail:
954     bs->file = NULL;
955     g_free(bs->opaque);
956     bs->opaque = NULL;
957     bs->drv = NULL;
958 fail_opts:
959     qemu_opts_del(opts);
960     return ret;
961 }
962 
963 static QDict *parse_json_filename(const char *filename, Error **errp)
964 {
965     QObject *options_obj;
966     QDict *options;
967     int ret;
968 
969     ret = strstart(filename, "json:", &filename);
970     assert(ret);
971 
972     options_obj = qobject_from_json(filename);
973     if (!options_obj) {
974         error_setg(errp, "Could not parse the JSON options");
975         return NULL;
976     }
977 
978     if (qobject_type(options_obj) != QTYPE_QDICT) {
979         qobject_decref(options_obj);
980         error_setg(errp, "Invalid JSON object given");
981         return NULL;
982     }
983 
984     options = qobject_to_qdict(options_obj);
985     qdict_flatten(options);
986 
987     return options;
988 }
989 
990 /*
991  * Fills in default options for opening images and converts the legacy
992  * filename/flags pair to option QDict entries.
993  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
994  * block driver has been specified explicitly.
995  */
996 static int bdrv_fill_options(QDict **options, const char **pfilename,
997                              int *flags, BlockDriver *drv, Error **errp)
998 {
999     const char *filename = *pfilename;
1000     const char *drvname;
1001     bool protocol = *flags & BDRV_O_PROTOCOL;
1002     bool parse_filename = false;
1003     BlockDriver *tmp_drv;
1004     Error *local_err = NULL;
1005 
1006     /* Parse json: pseudo-protocol */
1007     if (filename && g_str_has_prefix(filename, "json:")) {
1008         QDict *json_options = parse_json_filename(filename, &local_err);
1009         if (local_err) {
1010             error_propagate(errp, local_err);
1011             return -EINVAL;
1012         }
1013 
1014         /* Options given in the filename have lower priority than options
1015          * specified directly */
1016         qdict_join(*options, json_options, false);
1017         QDECREF(json_options);
1018         *pfilename = filename = NULL;
1019     }
1020 
1021     drvname = qdict_get_try_str(*options, "driver");
1022 
1023     /* If the user has explicitly specified the driver, this choice should
1024      * override the BDRV_O_PROTOCOL flag */
1025     tmp_drv = drv;
1026     if (!tmp_drv && drvname) {
1027         tmp_drv = bdrv_find_format(drvname);
1028     }
1029     if (tmp_drv) {
1030         protocol = tmp_drv->bdrv_file_open;
1031     }
1032 
1033     if (protocol) {
1034         *flags |= BDRV_O_PROTOCOL;
1035     } else {
1036         *flags &= ~BDRV_O_PROTOCOL;
1037     }
1038 
1039     /* Fetch the file name from the options QDict if necessary */
1040     if (protocol && filename) {
1041         if (!qdict_haskey(*options, "filename")) {
1042             qdict_put(*options, "filename", qstring_from_str(filename));
1043             parse_filename = true;
1044         } else {
1045             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1046                              "the same time");
1047             return -EINVAL;
1048         }
1049     }
1050 
1051     /* Find the right block driver */
1052     filename = qdict_get_try_str(*options, "filename");
1053 
1054     if (drv) {
1055         if (drvname) {
1056             error_setg(errp, "Driver specified twice");
1057             return -EINVAL;
1058         }
1059         drvname = drv->format_name;
1060         qdict_put(*options, "driver", qstring_from_str(drvname));
1061     } else {
1062         if (!drvname && protocol) {
1063             if (filename) {
1064                 drv = bdrv_find_protocol(filename, parse_filename, errp);
1065                 if (!drv) {
1066                     return -EINVAL;
1067                 }
1068 
1069                 drvname = drv->format_name;
1070                 qdict_put(*options, "driver", qstring_from_str(drvname));
1071             } else {
1072                 error_setg(errp, "Must specify either driver or file");
1073                 return -EINVAL;
1074             }
1075         } else if (drvname) {
1076             drv = bdrv_find_format(drvname);
1077             if (!drv) {
1078                 error_setg(errp, "Unknown driver '%s'", drvname);
1079                 return -ENOENT;
1080             }
1081         }
1082     }
1083 
1084     assert(drv || !protocol);
1085 
1086     /* Driver-specific filename parsing */
1087     if (drv && drv->bdrv_parse_filename && parse_filename) {
1088         drv->bdrv_parse_filename(filename, *options, &local_err);
1089         if (local_err) {
1090             error_propagate(errp, local_err);
1091             return -EINVAL;
1092         }
1093 
1094         if (!drv->bdrv_needs_filename) {
1095             qdict_del(*options, "filename");
1096         }
1097     }
1098 
1099     return 0;
1100 }
1101 
1102 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1103 {
1104 
1105     if (bs->backing_hd) {
1106         assert(bs->backing_blocker);
1107         bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1108     } else if (backing_hd) {
1109         error_setg(&bs->backing_blocker,
1110                    "node is used as backing hd of '%s'",
1111                    bdrv_get_device_or_node_name(bs));
1112     }
1113 
1114     bs->backing_hd = backing_hd;
1115     if (!backing_hd) {
1116         error_free(bs->backing_blocker);
1117         bs->backing_blocker = NULL;
1118         goto out;
1119     }
1120     bs->open_flags &= ~BDRV_O_NO_BACKING;
1121     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1122     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1123             backing_hd->drv ? backing_hd->drv->format_name : "");
1124 
1125     bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1126     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1127     bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1128                     bs->backing_blocker);
1129 out:
1130     bdrv_refresh_limits(bs, NULL);
1131 }
1132 
1133 /*
1134  * Opens the backing file for a BlockDriverState if not yet open
1135  *
1136  * options is a QDict of options to pass to the block drivers, or NULL for an
1137  * empty set of options. The reference to the QDict is transferred to this
1138  * function (even on failure), so if the caller intends to reuse the dictionary,
1139  * it needs to use QINCREF() before calling bdrv_file_open.
1140  */
1141 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1142 {
1143     char *backing_filename = g_malloc0(PATH_MAX);
1144     int ret = 0;
1145     BlockDriverState *backing_hd;
1146     Error *local_err = NULL;
1147 
1148     if (bs->backing_hd != NULL) {
1149         QDECREF(options);
1150         goto free_exit;
1151     }
1152 
1153     /* NULL means an empty set of options */
1154     if (options == NULL) {
1155         options = qdict_new();
1156     }
1157 
1158     bs->open_flags &= ~BDRV_O_NO_BACKING;
1159     if (qdict_haskey(options, "file.filename")) {
1160         backing_filename[0] = '\0';
1161     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1162         QDECREF(options);
1163         goto free_exit;
1164     } else {
1165         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1166                                        &local_err);
1167         if (local_err) {
1168             ret = -EINVAL;
1169             error_propagate(errp, local_err);
1170             QDECREF(options);
1171             goto free_exit;
1172         }
1173     }
1174 
1175     if (!bs->drv || !bs->drv->supports_backing) {
1176         ret = -EINVAL;
1177         error_setg(errp, "Driver doesn't support backing files");
1178         QDECREF(options);
1179         goto free_exit;
1180     }
1181 
1182     backing_hd = bdrv_new();
1183 
1184     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1185         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1186     }
1187 
1188     assert(bs->backing_hd == NULL);
1189     ret = bdrv_open_inherit(&backing_hd,
1190                             *backing_filename ? backing_filename : NULL,
1191                             NULL, options, 0, bs, &child_backing,
1192                             NULL, &local_err);
1193     if (ret < 0) {
1194         bdrv_unref(backing_hd);
1195         backing_hd = NULL;
1196         bs->open_flags |= BDRV_O_NO_BACKING;
1197         error_setg(errp, "Could not open backing file: %s",
1198                    error_get_pretty(local_err));
1199         error_free(local_err);
1200         goto free_exit;
1201     }
1202     bdrv_set_backing_hd(bs, backing_hd);
1203 
1204 free_exit:
1205     g_free(backing_filename);
1206     return ret;
1207 }
1208 
1209 /*
1210  * Opens a disk image whose options are given as BlockdevRef in another block
1211  * device's options.
1212  *
1213  * If allow_none is true, no image will be opened if filename is false and no
1214  * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1215  *
1216  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1217  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1218  * itself, all options starting with "${bdref_key}." are considered part of the
1219  * BlockdevRef.
1220  *
1221  * The BlockdevRef will be removed from the options QDict.
1222  *
1223  * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1224  */
1225 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1226                     QDict *options, const char *bdref_key,
1227                     BlockDriverState* parent, const BdrvChildRole *child_role,
1228                     bool allow_none, Error **errp)
1229 {
1230     QDict *image_options;
1231     int ret;
1232     char *bdref_key_dot;
1233     const char *reference;
1234 
1235     assert(pbs);
1236     assert(*pbs == NULL);
1237 
1238     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1239     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1240     g_free(bdref_key_dot);
1241 
1242     reference = qdict_get_try_str(options, bdref_key);
1243     if (!filename && !reference && !qdict_size(image_options)) {
1244         if (allow_none) {
1245             ret = 0;
1246         } else {
1247             error_setg(errp, "A block device must be specified for \"%s\"",
1248                        bdref_key);
1249             ret = -EINVAL;
1250         }
1251         QDECREF(image_options);
1252         goto done;
1253     }
1254 
1255     ret = bdrv_open_inherit(pbs, filename, reference, image_options, 0,
1256                             parent, child_role, NULL, errp);
1257 
1258 done:
1259     qdict_del(options, bdref_key);
1260     return ret;
1261 }
1262 
1263 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1264 {
1265     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1266     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1267     int64_t total_size;
1268     QemuOpts *opts = NULL;
1269     QDict *snapshot_options;
1270     BlockDriverState *bs_snapshot;
1271     Error *local_err;
1272     int ret;
1273 
1274     /* if snapshot, we create a temporary backing file and open it
1275        instead of opening 'filename' directly */
1276 
1277     /* Get the required size from the image */
1278     total_size = bdrv_getlength(bs);
1279     if (total_size < 0) {
1280         ret = total_size;
1281         error_setg_errno(errp, -total_size, "Could not get image size");
1282         goto out;
1283     }
1284 
1285     /* Create the temporary image */
1286     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1287     if (ret < 0) {
1288         error_setg_errno(errp, -ret, "Could not get temporary filename");
1289         goto out;
1290     }
1291 
1292     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1293                             &error_abort);
1294     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1295     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1296     qemu_opts_del(opts);
1297     if (ret < 0) {
1298         error_setg_errno(errp, -ret, "Could not create temporary overlay "
1299                          "'%s': %s", tmp_filename,
1300                          error_get_pretty(local_err));
1301         error_free(local_err);
1302         goto out;
1303     }
1304 
1305     /* Prepare a new options QDict for the temporary file */
1306     snapshot_options = qdict_new();
1307     qdict_put(snapshot_options, "file.driver",
1308               qstring_from_str("file"));
1309     qdict_put(snapshot_options, "file.filename",
1310               qstring_from_str(tmp_filename));
1311 
1312     bs_snapshot = bdrv_new();
1313 
1314     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1315                     flags, &bdrv_qcow2, &local_err);
1316     if (ret < 0) {
1317         error_propagate(errp, local_err);
1318         goto out;
1319     }
1320 
1321     bdrv_append(bs_snapshot, bs);
1322 
1323 out:
1324     g_free(tmp_filename);
1325     return ret;
1326 }
1327 
1328 /*
1329  * Opens a disk image (raw, qcow2, vmdk, ...)
1330  *
1331  * options is a QDict of options to pass to the block drivers, or NULL for an
1332  * empty set of options. The reference to the QDict belongs to the block layer
1333  * after the call (even on failure), so if the caller intends to reuse the
1334  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1335  *
1336  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1337  * If it is not NULL, the referenced BDS will be reused.
1338  *
1339  * The reference parameter may be used to specify an existing block device which
1340  * should be opened. If specified, neither options nor a filename may be given,
1341  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1342  */
1343 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1344                              const char *reference, QDict *options, int flags,
1345                              BlockDriverState *parent,
1346                              const BdrvChildRole *child_role,
1347                              BlockDriver *drv, Error **errp)
1348 {
1349     int ret;
1350     BlockDriverState *file = NULL, *bs;
1351     const char *drvname;
1352     Error *local_err = NULL;
1353     int snapshot_flags = 0;
1354 
1355     assert(pbs);
1356     assert(!child_role || !flags);
1357     assert(!child_role == !parent);
1358 
1359     if (reference) {
1360         bool options_non_empty = options ? qdict_size(options) : false;
1361         QDECREF(options);
1362 
1363         if (*pbs) {
1364             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1365                        "another block device");
1366             return -EINVAL;
1367         }
1368 
1369         if (filename || options_non_empty) {
1370             error_setg(errp, "Cannot reference an existing block device with "
1371                        "additional options or a new filename");
1372             return -EINVAL;
1373         }
1374 
1375         bs = bdrv_lookup_bs(reference, reference, errp);
1376         if (!bs) {
1377             return -ENODEV;
1378         }
1379         bdrv_ref(bs);
1380         *pbs = bs;
1381         return 0;
1382     }
1383 
1384     if (*pbs) {
1385         bs = *pbs;
1386     } else {
1387         bs = bdrv_new();
1388     }
1389 
1390     /* NULL means an empty set of options */
1391     if (options == NULL) {
1392         options = qdict_new();
1393     }
1394 
1395     if (child_role) {
1396         flags = child_role->inherit_flags(parent->open_flags);
1397     }
1398 
1399     ret = bdrv_fill_options(&options, &filename, &flags, drv, &local_err);
1400     if (local_err) {
1401         goto fail;
1402     }
1403 
1404     /* Find the right image format driver */
1405     drv = NULL;
1406     drvname = qdict_get_try_str(options, "driver");
1407     if (drvname) {
1408         drv = bdrv_find_format(drvname);
1409         qdict_del(options, "driver");
1410         if (!drv) {
1411             error_setg(errp, "Unknown driver: '%s'", drvname);
1412             ret = -EINVAL;
1413             goto fail;
1414         }
1415     }
1416 
1417     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1418 
1419     bs->open_flags = flags;
1420     bs->options = options;
1421     options = qdict_clone_shallow(options);
1422 
1423     /* Open image file without format layer */
1424     if ((flags & BDRV_O_PROTOCOL) == 0) {
1425         if (flags & BDRV_O_RDWR) {
1426             flags |= BDRV_O_ALLOW_RDWR;
1427         }
1428         if (flags & BDRV_O_SNAPSHOT) {
1429             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1430             flags = bdrv_backing_flags(flags);
1431         }
1432 
1433         assert(file == NULL);
1434         bs->open_flags = flags;
1435         ret = bdrv_open_image(&file, filename, options, "file",
1436                               bs, &child_file, true, &local_err);
1437         if (ret < 0) {
1438             goto fail;
1439         }
1440     }
1441 
1442     /* Image format probing */
1443     bs->probed = !drv;
1444     if (!drv && file) {
1445         ret = find_image_format(file, filename, &drv, &local_err);
1446         if (ret < 0) {
1447             goto fail;
1448         }
1449     } else if (!drv) {
1450         error_setg(errp, "Must specify either driver or file");
1451         ret = -EINVAL;
1452         goto fail;
1453     }
1454 
1455     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1456     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1457     /* file must be NULL if a protocol BDS is about to be created
1458      * (the inverse results in an error message from bdrv_open_common()) */
1459     assert(!(flags & BDRV_O_PROTOCOL) || !file);
1460 
1461     /* Open the image */
1462     ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1463     if (ret < 0) {
1464         goto fail;
1465     }
1466 
1467     if (file && (bs->file != file)) {
1468         bdrv_unref(file);
1469         file = NULL;
1470     }
1471 
1472     /* If there is a backing file, use it */
1473     if ((flags & BDRV_O_NO_BACKING) == 0) {
1474         QDict *backing_options;
1475 
1476         qdict_extract_subqdict(options, &backing_options, "backing.");
1477         ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1478         if (ret < 0) {
1479             goto close_and_fail;
1480         }
1481     }
1482 
1483     bdrv_refresh_filename(bs);
1484 
1485     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1486      * temporary snapshot afterwards. */
1487     if (snapshot_flags) {
1488         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1489         if (local_err) {
1490             goto close_and_fail;
1491         }
1492     }
1493 
1494     /* Check if any unknown options were used */
1495     if (options && (qdict_size(options) != 0)) {
1496         const QDictEntry *entry = qdict_first(options);
1497         if (flags & BDRV_O_PROTOCOL) {
1498             error_setg(errp, "Block protocol '%s' doesn't support the option "
1499                        "'%s'", drv->format_name, entry->key);
1500         } else {
1501             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1502                        "support the option '%s'", drv->format_name,
1503                        bdrv_get_device_name(bs), entry->key);
1504         }
1505 
1506         ret = -EINVAL;
1507         goto close_and_fail;
1508     }
1509 
1510     if (!bdrv_key_required(bs)) {
1511         if (bs->blk) {
1512             blk_dev_change_media_cb(bs->blk, true);
1513         }
1514     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1515                && !runstate_check(RUN_STATE_INMIGRATE)
1516                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1517         error_setg(errp,
1518                    "Guest must be stopped for opening of encrypted image");
1519         ret = -EBUSY;
1520         goto close_and_fail;
1521     }
1522 
1523     QDECREF(options);
1524     *pbs = bs;
1525     return 0;
1526 
1527 fail:
1528     if (file != NULL) {
1529         bdrv_unref(file);
1530     }
1531     QDECREF(bs->options);
1532     QDECREF(options);
1533     bs->options = NULL;
1534     if (!*pbs) {
1535         /* If *pbs is NULL, a new BDS has been created in this function and
1536            needs to be freed now. Otherwise, it does not need to be closed,
1537            since it has not really been opened yet. */
1538         bdrv_unref(bs);
1539     }
1540     if (local_err) {
1541         error_propagate(errp, local_err);
1542     }
1543     return ret;
1544 
1545 close_and_fail:
1546     /* See fail path, but now the BDS has to be always closed */
1547     if (*pbs) {
1548         bdrv_close(bs);
1549     } else {
1550         bdrv_unref(bs);
1551     }
1552     QDECREF(options);
1553     if (local_err) {
1554         error_propagate(errp, local_err);
1555     }
1556     return ret;
1557 }
1558 
1559 int bdrv_open(BlockDriverState **pbs, const char *filename,
1560               const char *reference, QDict *options, int flags,
1561               BlockDriver *drv, Error **errp)
1562 {
1563     return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1564                              NULL, drv, errp);
1565 }
1566 
1567 typedef struct BlockReopenQueueEntry {
1568      bool prepared;
1569      BDRVReopenState state;
1570      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1571 } BlockReopenQueueEntry;
1572 
1573 /*
1574  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1575  * reopen of multiple devices.
1576  *
1577  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1578  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1579  * be created and initialized. This newly created BlockReopenQueue should be
1580  * passed back in for subsequent calls that are intended to be of the same
1581  * atomic 'set'.
1582  *
1583  * bs is the BlockDriverState to add to the reopen queue.
1584  *
1585  * flags contains the open flags for the associated bs
1586  *
1587  * returns a pointer to bs_queue, which is either the newly allocated
1588  * bs_queue, or the existing bs_queue being used.
1589  *
1590  */
1591 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1592                                     BlockDriverState *bs, int flags)
1593 {
1594     assert(bs != NULL);
1595 
1596     BlockReopenQueueEntry *bs_entry;
1597     if (bs_queue == NULL) {
1598         bs_queue = g_new0(BlockReopenQueue, 1);
1599         QSIMPLEQ_INIT(bs_queue);
1600     }
1601 
1602     /* bdrv_open() masks this flag out */
1603     flags &= ~BDRV_O_PROTOCOL;
1604 
1605     if (bs->file) {
1606         bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
1607     }
1608 
1609     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1610     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1611 
1612     bs_entry->state.bs = bs;
1613     bs_entry->state.flags = flags;
1614 
1615     return bs_queue;
1616 }
1617 
1618 /*
1619  * Reopen multiple BlockDriverStates atomically & transactionally.
1620  *
1621  * The queue passed in (bs_queue) must have been built up previous
1622  * via bdrv_reopen_queue().
1623  *
1624  * Reopens all BDS specified in the queue, with the appropriate
1625  * flags.  All devices are prepared for reopen, and failure of any
1626  * device will cause all device changes to be abandonded, and intermediate
1627  * data cleaned up.
1628  *
1629  * If all devices prepare successfully, then the changes are committed
1630  * to all devices.
1631  *
1632  */
1633 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1634 {
1635     int ret = -1;
1636     BlockReopenQueueEntry *bs_entry, *next;
1637     Error *local_err = NULL;
1638 
1639     assert(bs_queue != NULL);
1640 
1641     bdrv_drain_all();
1642 
1643     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1644         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1645             error_propagate(errp, local_err);
1646             goto cleanup;
1647         }
1648         bs_entry->prepared = true;
1649     }
1650 
1651     /* If we reach this point, we have success and just need to apply the
1652      * changes
1653      */
1654     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1655         bdrv_reopen_commit(&bs_entry->state);
1656     }
1657 
1658     ret = 0;
1659 
1660 cleanup:
1661     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1662         if (ret && bs_entry->prepared) {
1663             bdrv_reopen_abort(&bs_entry->state);
1664         }
1665         g_free(bs_entry);
1666     }
1667     g_free(bs_queue);
1668     return ret;
1669 }
1670 
1671 
1672 /* Reopen a single BlockDriverState with the specified flags. */
1673 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1674 {
1675     int ret = -1;
1676     Error *local_err = NULL;
1677     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1678 
1679     ret = bdrv_reopen_multiple(queue, &local_err);
1680     if (local_err != NULL) {
1681         error_propagate(errp, local_err);
1682     }
1683     return ret;
1684 }
1685 
1686 
1687 /*
1688  * Prepares a BlockDriverState for reopen. All changes are staged in the
1689  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1690  * the block driver layer .bdrv_reopen_prepare()
1691  *
1692  * bs is the BlockDriverState to reopen
1693  * flags are the new open flags
1694  * queue is the reopen queue
1695  *
1696  * Returns 0 on success, non-zero on error.  On error errp will be set
1697  * as well.
1698  *
1699  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1700  * It is the responsibility of the caller to then call the abort() or
1701  * commit() for any other BDS that have been left in a prepare() state
1702  *
1703  */
1704 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1705                         Error **errp)
1706 {
1707     int ret = -1;
1708     Error *local_err = NULL;
1709     BlockDriver *drv;
1710 
1711     assert(reopen_state != NULL);
1712     assert(reopen_state->bs->drv != NULL);
1713     drv = reopen_state->bs->drv;
1714 
1715     /* if we are to stay read-only, do not allow permission change
1716      * to r/w */
1717     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1718         reopen_state->flags & BDRV_O_RDWR) {
1719         error_setg(errp, "Node '%s' is read only",
1720                    bdrv_get_device_or_node_name(reopen_state->bs));
1721         goto error;
1722     }
1723 
1724 
1725     ret = bdrv_flush(reopen_state->bs);
1726     if (ret) {
1727         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1728                   strerror(-ret));
1729         goto error;
1730     }
1731 
1732     if (drv->bdrv_reopen_prepare) {
1733         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1734         if (ret) {
1735             if (local_err != NULL) {
1736                 error_propagate(errp, local_err);
1737             } else {
1738                 error_setg(errp, "failed while preparing to reopen image '%s'",
1739                            reopen_state->bs->filename);
1740             }
1741             goto error;
1742         }
1743     } else {
1744         /* It is currently mandatory to have a bdrv_reopen_prepare()
1745          * handler for each supported drv. */
1746         error_setg(errp, "Block format '%s' used by node '%s' "
1747                    "does not support reopening files", drv->format_name,
1748                    bdrv_get_device_or_node_name(reopen_state->bs));
1749         ret = -1;
1750         goto error;
1751     }
1752 
1753     ret = 0;
1754 
1755 error:
1756     return ret;
1757 }
1758 
1759 /*
1760  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1761  * makes them final by swapping the staging BlockDriverState contents into
1762  * the active BlockDriverState contents.
1763  */
1764 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1765 {
1766     BlockDriver *drv;
1767 
1768     assert(reopen_state != NULL);
1769     drv = reopen_state->bs->drv;
1770     assert(drv != NULL);
1771 
1772     /* If there are any driver level actions to take */
1773     if (drv->bdrv_reopen_commit) {
1774         drv->bdrv_reopen_commit(reopen_state);
1775     }
1776 
1777     /* set BDS specific flags now */
1778     reopen_state->bs->open_flags         = reopen_state->flags;
1779     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1780                                               BDRV_O_CACHE_WB);
1781     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1782 
1783     bdrv_refresh_limits(reopen_state->bs, NULL);
1784 }
1785 
1786 /*
1787  * Abort the reopen, and delete and free the staged changes in
1788  * reopen_state
1789  */
1790 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1791 {
1792     BlockDriver *drv;
1793 
1794     assert(reopen_state != NULL);
1795     drv = reopen_state->bs->drv;
1796     assert(drv != NULL);
1797 
1798     if (drv->bdrv_reopen_abort) {
1799         drv->bdrv_reopen_abort(reopen_state);
1800     }
1801 }
1802 
1803 
1804 void bdrv_close(BlockDriverState *bs)
1805 {
1806     BdrvAioNotifier *ban, *ban_next;
1807 
1808     if (bs->job) {
1809         block_job_cancel_sync(bs->job);
1810     }
1811     bdrv_drain_all(); /* complete I/O */
1812     bdrv_flush(bs);
1813     bdrv_drain_all(); /* in case flush left pending I/O */
1814     notifier_list_notify(&bs->close_notifiers, bs);
1815 
1816     if (bs->drv) {
1817         if (bs->backing_hd) {
1818             BlockDriverState *backing_hd = bs->backing_hd;
1819             bdrv_set_backing_hd(bs, NULL);
1820             bdrv_unref(backing_hd);
1821         }
1822         bs->drv->bdrv_close(bs);
1823         g_free(bs->opaque);
1824         bs->opaque = NULL;
1825         bs->drv = NULL;
1826         bs->copy_on_read = 0;
1827         bs->backing_file[0] = '\0';
1828         bs->backing_format[0] = '\0';
1829         bs->total_sectors = 0;
1830         bs->encrypted = 0;
1831         bs->valid_key = 0;
1832         bs->sg = 0;
1833         bs->zero_beyond_eof = false;
1834         QDECREF(bs->options);
1835         bs->options = NULL;
1836         QDECREF(bs->full_open_options);
1837         bs->full_open_options = NULL;
1838 
1839         if (bs->file != NULL) {
1840             bdrv_unref(bs->file);
1841             bs->file = NULL;
1842         }
1843     }
1844 
1845     if (bs->blk) {
1846         blk_dev_change_media_cb(bs->blk, false);
1847     }
1848 
1849     /*throttling disk I/O limits*/
1850     if (bs->io_limits_enabled) {
1851         bdrv_io_limits_disable(bs);
1852     }
1853 
1854     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1855         g_free(ban);
1856     }
1857     QLIST_INIT(&bs->aio_notifiers);
1858 }
1859 
1860 void bdrv_close_all(void)
1861 {
1862     BlockDriverState *bs;
1863 
1864     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1865         AioContext *aio_context = bdrv_get_aio_context(bs);
1866 
1867         aio_context_acquire(aio_context);
1868         bdrv_close(bs);
1869         aio_context_release(aio_context);
1870     }
1871 }
1872 
1873 /* make a BlockDriverState anonymous by removing from bdrv_state and
1874  * graph_bdrv_state list.
1875    Also, NULL terminate the device_name to prevent double remove */
1876 void bdrv_make_anon(BlockDriverState *bs)
1877 {
1878     /*
1879      * Take care to remove bs from bdrv_states only when it's actually
1880      * in it.  Note that bs->device_list.tqe_prev is initially null,
1881      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
1882      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1883      * resetting it to null on remove.
1884      */
1885     if (bs->device_list.tqe_prev) {
1886         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1887         bs->device_list.tqe_prev = NULL;
1888     }
1889     if (bs->node_name[0] != '\0') {
1890         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1891     }
1892     bs->node_name[0] = '\0';
1893 }
1894 
1895 static void bdrv_rebind(BlockDriverState *bs)
1896 {
1897     if (bs->drv && bs->drv->bdrv_rebind) {
1898         bs->drv->bdrv_rebind(bs);
1899     }
1900 }
1901 
1902 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1903                                      BlockDriverState *bs_src)
1904 {
1905     /* move some fields that need to stay attached to the device */
1906 
1907     /* dev info */
1908     bs_dest->guest_block_size   = bs_src->guest_block_size;
1909     bs_dest->copy_on_read       = bs_src->copy_on_read;
1910 
1911     bs_dest->enable_write_cache = bs_src->enable_write_cache;
1912 
1913     /* i/o throttled req */
1914     memcpy(&bs_dest->throttle_state,
1915            &bs_src->throttle_state,
1916            sizeof(ThrottleState));
1917     bs_dest->throttled_reqs[0]  = bs_src->throttled_reqs[0];
1918     bs_dest->throttled_reqs[1]  = bs_src->throttled_reqs[1];
1919     bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
1920 
1921     /* r/w error */
1922     bs_dest->on_read_error      = bs_src->on_read_error;
1923     bs_dest->on_write_error     = bs_src->on_write_error;
1924 
1925     /* i/o status */
1926     bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
1927     bs_dest->iostatus           = bs_src->iostatus;
1928 
1929     /* dirty bitmap */
1930     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
1931 
1932     /* reference count */
1933     bs_dest->refcnt             = bs_src->refcnt;
1934 
1935     /* job */
1936     bs_dest->job                = bs_src->job;
1937 
1938     /* keep the same entry in bdrv_states */
1939     bs_dest->device_list = bs_src->device_list;
1940     bs_dest->blk = bs_src->blk;
1941 
1942     memcpy(bs_dest->op_blockers, bs_src->op_blockers,
1943            sizeof(bs_dest->op_blockers));
1944 }
1945 
1946 /*
1947  * Swap bs contents for two image chains while they are live,
1948  * while keeping required fields on the BlockDriverState that is
1949  * actually attached to a device.
1950  *
1951  * This will modify the BlockDriverState fields, and swap contents
1952  * between bs_new and bs_old. Both bs_new and bs_old are modified.
1953  *
1954  * bs_new must not be attached to a BlockBackend.
1955  *
1956  * This function does not create any image files.
1957  */
1958 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1959 {
1960     BlockDriverState tmp;
1961 
1962     /* The code needs to swap the node_name but simply swapping node_list won't
1963      * work so first remove the nodes from the graph list, do the swap then
1964      * insert them back if needed.
1965      */
1966     if (bs_new->node_name[0] != '\0') {
1967         QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
1968     }
1969     if (bs_old->node_name[0] != '\0') {
1970         QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
1971     }
1972 
1973     /* bs_new must be unattached and shouldn't have anything fancy enabled */
1974     assert(!bs_new->blk);
1975     assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
1976     assert(bs_new->job == NULL);
1977     assert(bs_new->io_limits_enabled == false);
1978     assert(!throttle_have_timer(&bs_new->throttle_state));
1979 
1980     tmp = *bs_new;
1981     *bs_new = *bs_old;
1982     *bs_old = tmp;
1983 
1984     /* there are some fields that should not be swapped, move them back */
1985     bdrv_move_feature_fields(&tmp, bs_old);
1986     bdrv_move_feature_fields(bs_old, bs_new);
1987     bdrv_move_feature_fields(bs_new, &tmp);
1988 
1989     /* bs_new must remain unattached */
1990     assert(!bs_new->blk);
1991 
1992     /* Check a few fields that should remain attached to the device */
1993     assert(bs_new->job == NULL);
1994     assert(bs_new->io_limits_enabled == false);
1995     assert(!throttle_have_timer(&bs_new->throttle_state));
1996 
1997     /* insert the nodes back into the graph node list if needed */
1998     if (bs_new->node_name[0] != '\0') {
1999         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2000     }
2001     if (bs_old->node_name[0] != '\0') {
2002         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2003     }
2004 
2005     bdrv_rebind(bs_new);
2006     bdrv_rebind(bs_old);
2007 }
2008 
2009 /*
2010  * Add new bs contents at the top of an image chain while the chain is
2011  * live, while keeping required fields on the top layer.
2012  *
2013  * This will modify the BlockDriverState fields, and swap contents
2014  * between bs_new and bs_top. Both bs_new and bs_top are modified.
2015  *
2016  * bs_new must not be attached to a BlockBackend.
2017  *
2018  * This function does not create any image files.
2019  */
2020 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2021 {
2022     bdrv_swap(bs_new, bs_top);
2023 
2024     /* The contents of 'tmp' will become bs_top, as we are
2025      * swapping bs_new and bs_top contents. */
2026     bdrv_set_backing_hd(bs_top, bs_new);
2027 }
2028 
2029 static void bdrv_delete(BlockDriverState *bs)
2030 {
2031     assert(!bs->job);
2032     assert(bdrv_op_blocker_is_empty(bs));
2033     assert(!bs->refcnt);
2034     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2035 
2036     bdrv_close(bs);
2037 
2038     /* remove from list, if necessary */
2039     bdrv_make_anon(bs);
2040 
2041     g_free(bs);
2042 }
2043 
2044 /*
2045  * Run consistency checks on an image
2046  *
2047  * Returns 0 if the check could be completed (it doesn't mean that the image is
2048  * free of errors) or -errno when an internal error occurred. The results of the
2049  * check are stored in res.
2050  */
2051 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2052 {
2053     if (bs->drv == NULL) {
2054         return -ENOMEDIUM;
2055     }
2056     if (bs->drv->bdrv_check == NULL) {
2057         return -ENOTSUP;
2058     }
2059 
2060     memset(res, 0, sizeof(*res));
2061     return bs->drv->bdrv_check(bs, res, fix);
2062 }
2063 
2064 #define COMMIT_BUF_SECTORS 2048
2065 
2066 /* commit COW file into the raw image */
2067 int bdrv_commit(BlockDriverState *bs)
2068 {
2069     BlockDriver *drv = bs->drv;
2070     int64_t sector, total_sectors, length, backing_length;
2071     int n, ro, open_flags;
2072     int ret = 0;
2073     uint8_t *buf = NULL;
2074 
2075     if (!drv)
2076         return -ENOMEDIUM;
2077 
2078     if (!bs->backing_hd) {
2079         return -ENOTSUP;
2080     }
2081 
2082     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2083         bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2084         return -EBUSY;
2085     }
2086 
2087     ro = bs->backing_hd->read_only;
2088     open_flags =  bs->backing_hd->open_flags;
2089 
2090     if (ro) {
2091         if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2092             return -EACCES;
2093         }
2094     }
2095 
2096     length = bdrv_getlength(bs);
2097     if (length < 0) {
2098         ret = length;
2099         goto ro_cleanup;
2100     }
2101 
2102     backing_length = bdrv_getlength(bs->backing_hd);
2103     if (backing_length < 0) {
2104         ret = backing_length;
2105         goto ro_cleanup;
2106     }
2107 
2108     /* If our top snapshot is larger than the backing file image,
2109      * grow the backing file image if possible.  If not possible,
2110      * we must return an error */
2111     if (length > backing_length) {
2112         ret = bdrv_truncate(bs->backing_hd, length);
2113         if (ret < 0) {
2114             goto ro_cleanup;
2115         }
2116     }
2117 
2118     total_sectors = length >> BDRV_SECTOR_BITS;
2119 
2120     /* qemu_try_blockalign() for bs will choose an alignment that works for
2121      * bs->backing_hd as well, so no need to compare the alignment manually. */
2122     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2123     if (buf == NULL) {
2124         ret = -ENOMEM;
2125         goto ro_cleanup;
2126     }
2127 
2128     for (sector = 0; sector < total_sectors; sector += n) {
2129         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2130         if (ret < 0) {
2131             goto ro_cleanup;
2132         }
2133         if (ret) {
2134             ret = bdrv_read(bs, sector, buf, n);
2135             if (ret < 0) {
2136                 goto ro_cleanup;
2137             }
2138 
2139             ret = bdrv_write(bs->backing_hd, sector, buf, n);
2140             if (ret < 0) {
2141                 goto ro_cleanup;
2142             }
2143         }
2144     }
2145 
2146     if (drv->bdrv_make_empty) {
2147         ret = drv->bdrv_make_empty(bs);
2148         if (ret < 0) {
2149             goto ro_cleanup;
2150         }
2151         bdrv_flush(bs);
2152     }
2153 
2154     /*
2155      * Make sure all data we wrote to the backing device is actually
2156      * stable on disk.
2157      */
2158     if (bs->backing_hd) {
2159         bdrv_flush(bs->backing_hd);
2160     }
2161 
2162     ret = 0;
2163 ro_cleanup:
2164     qemu_vfree(buf);
2165 
2166     if (ro) {
2167         /* ignoring error return here */
2168         bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2169     }
2170 
2171     return ret;
2172 }
2173 
2174 int bdrv_commit_all(void)
2175 {
2176     BlockDriverState *bs;
2177 
2178     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2179         AioContext *aio_context = bdrv_get_aio_context(bs);
2180 
2181         aio_context_acquire(aio_context);
2182         if (bs->drv && bs->backing_hd) {
2183             int ret = bdrv_commit(bs);
2184             if (ret < 0) {
2185                 aio_context_release(aio_context);
2186                 return ret;
2187             }
2188         }
2189         aio_context_release(aio_context);
2190     }
2191     return 0;
2192 }
2193 
2194 /*
2195  * Return values:
2196  * 0        - success
2197  * -EINVAL  - backing format specified, but no file
2198  * -ENOSPC  - can't update the backing file because no space is left in the
2199  *            image file header
2200  * -ENOTSUP - format driver doesn't support changing the backing file
2201  */
2202 int bdrv_change_backing_file(BlockDriverState *bs,
2203     const char *backing_file, const char *backing_fmt)
2204 {
2205     BlockDriver *drv = bs->drv;
2206     int ret;
2207 
2208     /* Backing file format doesn't make sense without a backing file */
2209     if (backing_fmt && !backing_file) {
2210         return -EINVAL;
2211     }
2212 
2213     if (drv->bdrv_change_backing_file != NULL) {
2214         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2215     } else {
2216         ret = -ENOTSUP;
2217     }
2218 
2219     if (ret == 0) {
2220         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2221         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2222     }
2223     return ret;
2224 }
2225 
2226 /*
2227  * Finds the image layer in the chain that has 'bs' as its backing file.
2228  *
2229  * active is the current topmost image.
2230  *
2231  * Returns NULL if bs is not found in active's image chain,
2232  * or if active == bs.
2233  *
2234  * Returns the bottommost base image if bs == NULL.
2235  */
2236 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2237                                     BlockDriverState *bs)
2238 {
2239     while (active && bs != active->backing_hd) {
2240         active = active->backing_hd;
2241     }
2242 
2243     return active;
2244 }
2245 
2246 /* Given a BDS, searches for the base layer. */
2247 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2248 {
2249     return bdrv_find_overlay(bs, NULL);
2250 }
2251 
2252 typedef struct BlkIntermediateStates {
2253     BlockDriverState *bs;
2254     QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2255 } BlkIntermediateStates;
2256 
2257 
2258 /*
2259  * Drops images above 'base' up to and including 'top', and sets the image
2260  * above 'top' to have base as its backing file.
2261  *
2262  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2263  * information in 'bs' can be properly updated.
2264  *
2265  * E.g., this will convert the following chain:
2266  * bottom <- base <- intermediate <- top <- active
2267  *
2268  * to
2269  *
2270  * bottom <- base <- active
2271  *
2272  * It is allowed for bottom==base, in which case it converts:
2273  *
2274  * base <- intermediate <- top <- active
2275  *
2276  * to
2277  *
2278  * base <- active
2279  *
2280  * If backing_file_str is non-NULL, it will be used when modifying top's
2281  * overlay image metadata.
2282  *
2283  * Error conditions:
2284  *  if active == top, that is considered an error
2285  *
2286  */
2287 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2288                            BlockDriverState *base, const char *backing_file_str)
2289 {
2290     BlockDriverState *intermediate;
2291     BlockDriverState *base_bs = NULL;
2292     BlockDriverState *new_top_bs = NULL;
2293     BlkIntermediateStates *intermediate_state, *next;
2294     int ret = -EIO;
2295 
2296     QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2297     QSIMPLEQ_INIT(&states_to_delete);
2298 
2299     if (!top->drv || !base->drv) {
2300         goto exit;
2301     }
2302 
2303     new_top_bs = bdrv_find_overlay(active, top);
2304 
2305     if (new_top_bs == NULL) {
2306         /* we could not find the image above 'top', this is an error */
2307         goto exit;
2308     }
2309 
2310     /* special case of new_top_bs->backing_hd already pointing to base - nothing
2311      * to do, no intermediate images */
2312     if (new_top_bs->backing_hd == base) {
2313         ret = 0;
2314         goto exit;
2315     }
2316 
2317     intermediate = top;
2318 
2319     /* now we will go down through the list, and add each BDS we find
2320      * into our deletion queue, until we hit the 'base'
2321      */
2322     while (intermediate) {
2323         intermediate_state = g_new0(BlkIntermediateStates, 1);
2324         intermediate_state->bs = intermediate;
2325         QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2326 
2327         if (intermediate->backing_hd == base) {
2328             base_bs = intermediate->backing_hd;
2329             break;
2330         }
2331         intermediate = intermediate->backing_hd;
2332     }
2333     if (base_bs == NULL) {
2334         /* something went wrong, we did not end at the base. safely
2335          * unravel everything, and exit with error */
2336         goto exit;
2337     }
2338 
2339     /* success - we can delete the intermediate states, and link top->base */
2340     backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2341     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2342                                    base_bs->drv ? base_bs->drv->format_name : "");
2343     if (ret) {
2344         goto exit;
2345     }
2346     bdrv_set_backing_hd(new_top_bs, base_bs);
2347 
2348     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2349         /* so that bdrv_close() does not recursively close the chain */
2350         bdrv_set_backing_hd(intermediate_state->bs, NULL);
2351         bdrv_unref(intermediate_state->bs);
2352     }
2353     ret = 0;
2354 
2355 exit:
2356     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2357         g_free(intermediate_state);
2358     }
2359     return ret;
2360 }
2361 
2362 /**
2363  * Truncate file to 'offset' bytes (needed only for file protocols)
2364  */
2365 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2366 {
2367     BlockDriver *drv = bs->drv;
2368     int ret;
2369     if (!drv)
2370         return -ENOMEDIUM;
2371     if (!drv->bdrv_truncate)
2372         return -ENOTSUP;
2373     if (bs->read_only)
2374         return -EACCES;
2375 
2376     ret = drv->bdrv_truncate(bs, offset);
2377     if (ret == 0) {
2378         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2379         bdrv_dirty_bitmap_truncate(bs);
2380         if (bs->blk) {
2381             blk_dev_resize_cb(bs->blk);
2382         }
2383     }
2384     return ret;
2385 }
2386 
2387 /**
2388  * Length of a allocated file in bytes. Sparse files are counted by actual
2389  * allocated space. Return < 0 if error or unknown.
2390  */
2391 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2392 {
2393     BlockDriver *drv = bs->drv;
2394     if (!drv) {
2395         return -ENOMEDIUM;
2396     }
2397     if (drv->bdrv_get_allocated_file_size) {
2398         return drv->bdrv_get_allocated_file_size(bs);
2399     }
2400     if (bs->file) {
2401         return bdrv_get_allocated_file_size(bs->file);
2402     }
2403     return -ENOTSUP;
2404 }
2405 
2406 /**
2407  * Return number of sectors on success, -errno on error.
2408  */
2409 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2410 {
2411     BlockDriver *drv = bs->drv;
2412 
2413     if (!drv)
2414         return -ENOMEDIUM;
2415 
2416     if (drv->has_variable_length) {
2417         int ret = refresh_total_sectors(bs, bs->total_sectors);
2418         if (ret < 0) {
2419             return ret;
2420         }
2421     }
2422     return bs->total_sectors;
2423 }
2424 
2425 /**
2426  * Return length in bytes on success, -errno on error.
2427  * The length is always a multiple of BDRV_SECTOR_SIZE.
2428  */
2429 int64_t bdrv_getlength(BlockDriverState *bs)
2430 {
2431     int64_t ret = bdrv_nb_sectors(bs);
2432 
2433     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2434     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2435 }
2436 
2437 /* return 0 as number of sectors if no device present or error */
2438 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2439 {
2440     int64_t nb_sectors = bdrv_nb_sectors(bs);
2441 
2442     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2443 }
2444 
2445 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2446                        BlockdevOnError on_write_error)
2447 {
2448     bs->on_read_error = on_read_error;
2449     bs->on_write_error = on_write_error;
2450 }
2451 
2452 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2453 {
2454     return is_read ? bs->on_read_error : bs->on_write_error;
2455 }
2456 
2457 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2458 {
2459     BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2460 
2461     switch (on_err) {
2462     case BLOCKDEV_ON_ERROR_ENOSPC:
2463         return (error == ENOSPC) ?
2464                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2465     case BLOCKDEV_ON_ERROR_STOP:
2466         return BLOCK_ERROR_ACTION_STOP;
2467     case BLOCKDEV_ON_ERROR_REPORT:
2468         return BLOCK_ERROR_ACTION_REPORT;
2469     case BLOCKDEV_ON_ERROR_IGNORE:
2470         return BLOCK_ERROR_ACTION_IGNORE;
2471     default:
2472         abort();
2473     }
2474 }
2475 
2476 static void send_qmp_error_event(BlockDriverState *bs,
2477                                  BlockErrorAction action,
2478                                  bool is_read, int error)
2479 {
2480     IoOperationType optype;
2481 
2482     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2483     qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2484                                    bdrv_iostatus_is_enabled(bs),
2485                                    error == ENOSPC, strerror(error),
2486                                    &error_abort);
2487 }
2488 
2489 /* This is done by device models because, while the block layer knows
2490  * about the error, it does not know whether an operation comes from
2491  * the device or the block layer (from a job, for example).
2492  */
2493 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2494                        bool is_read, int error)
2495 {
2496     assert(error >= 0);
2497 
2498     if (action == BLOCK_ERROR_ACTION_STOP) {
2499         /* First set the iostatus, so that "info block" returns an iostatus
2500          * that matches the events raised so far (an additional error iostatus
2501          * is fine, but not a lost one).
2502          */
2503         bdrv_iostatus_set_err(bs, error);
2504 
2505         /* Then raise the request to stop the VM and the event.
2506          * qemu_system_vmstop_request_prepare has two effects.  First,
2507          * it ensures that the STOP event always comes after the
2508          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
2509          * can observe the STOP event and do a "cont" before the STOP
2510          * event is issued, the VM will not stop.  In this case, vm_start()
2511          * also ensures that the STOP/RESUME pair of events is emitted.
2512          */
2513         qemu_system_vmstop_request_prepare();
2514         send_qmp_error_event(bs, action, is_read, error);
2515         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2516     } else {
2517         send_qmp_error_event(bs, action, is_read, error);
2518     }
2519 }
2520 
2521 int bdrv_is_read_only(BlockDriverState *bs)
2522 {
2523     return bs->read_only;
2524 }
2525 
2526 int bdrv_is_sg(BlockDriverState *bs)
2527 {
2528     return bs->sg;
2529 }
2530 
2531 int bdrv_enable_write_cache(BlockDriverState *bs)
2532 {
2533     return bs->enable_write_cache;
2534 }
2535 
2536 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2537 {
2538     bs->enable_write_cache = wce;
2539 
2540     /* so a reopen() will preserve wce */
2541     if (wce) {
2542         bs->open_flags |= BDRV_O_CACHE_WB;
2543     } else {
2544         bs->open_flags &= ~BDRV_O_CACHE_WB;
2545     }
2546 }
2547 
2548 int bdrv_is_encrypted(BlockDriverState *bs)
2549 {
2550     if (bs->backing_hd && bs->backing_hd->encrypted)
2551         return 1;
2552     return bs->encrypted;
2553 }
2554 
2555 int bdrv_key_required(BlockDriverState *bs)
2556 {
2557     BlockDriverState *backing_hd = bs->backing_hd;
2558 
2559     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2560         return 1;
2561     return (bs->encrypted && !bs->valid_key);
2562 }
2563 
2564 int bdrv_set_key(BlockDriverState *bs, const char *key)
2565 {
2566     int ret;
2567     if (bs->backing_hd && bs->backing_hd->encrypted) {
2568         ret = bdrv_set_key(bs->backing_hd, key);
2569         if (ret < 0)
2570             return ret;
2571         if (!bs->encrypted)
2572             return 0;
2573     }
2574     if (!bs->encrypted) {
2575         return -EINVAL;
2576     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2577         return -ENOMEDIUM;
2578     }
2579     ret = bs->drv->bdrv_set_key(bs, key);
2580     if (ret < 0) {
2581         bs->valid_key = 0;
2582     } else if (!bs->valid_key) {
2583         bs->valid_key = 1;
2584         if (bs->blk) {
2585             /* call the change callback now, we skipped it on open */
2586             blk_dev_change_media_cb(bs->blk, true);
2587         }
2588     }
2589     return ret;
2590 }
2591 
2592 /*
2593  * Provide an encryption key for @bs.
2594  * If @key is non-null:
2595  *     If @bs is not encrypted, fail.
2596  *     Else if the key is invalid, fail.
2597  *     Else set @bs's key to @key, replacing the existing key, if any.
2598  * If @key is null:
2599  *     If @bs is encrypted and still lacks a key, fail.
2600  *     Else do nothing.
2601  * On failure, store an error object through @errp if non-null.
2602  */
2603 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2604 {
2605     if (key) {
2606         if (!bdrv_is_encrypted(bs)) {
2607             error_setg(errp, "Node '%s' is not encrypted",
2608                       bdrv_get_device_or_node_name(bs));
2609         } else if (bdrv_set_key(bs, key) < 0) {
2610             error_set(errp, QERR_INVALID_PASSWORD);
2611         }
2612     } else {
2613         if (bdrv_key_required(bs)) {
2614             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2615                       "'%s' (%s) is encrypted",
2616                       bdrv_get_device_or_node_name(bs),
2617                       bdrv_get_encrypted_filename(bs));
2618         }
2619     }
2620 }
2621 
2622 const char *bdrv_get_format_name(BlockDriverState *bs)
2623 {
2624     return bs->drv ? bs->drv->format_name : NULL;
2625 }
2626 
2627 static int qsort_strcmp(const void *a, const void *b)
2628 {
2629     return strcmp(a, b);
2630 }
2631 
2632 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2633                          void *opaque)
2634 {
2635     BlockDriver *drv;
2636     int count = 0;
2637     int i;
2638     const char **formats = NULL;
2639 
2640     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2641         if (drv->format_name) {
2642             bool found = false;
2643             int i = count;
2644             while (formats && i && !found) {
2645                 found = !strcmp(formats[--i], drv->format_name);
2646             }
2647 
2648             if (!found) {
2649                 formats = g_renew(const char *, formats, count + 1);
2650                 formats[count++] = drv->format_name;
2651             }
2652         }
2653     }
2654 
2655     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2656 
2657     for (i = 0; i < count; i++) {
2658         it(opaque, formats[i]);
2659     }
2660 
2661     g_free(formats);
2662 }
2663 
2664 /* This function is to find a node in the bs graph */
2665 BlockDriverState *bdrv_find_node(const char *node_name)
2666 {
2667     BlockDriverState *bs;
2668 
2669     assert(node_name);
2670 
2671     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2672         if (!strcmp(node_name, bs->node_name)) {
2673             return bs;
2674         }
2675     }
2676     return NULL;
2677 }
2678 
2679 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2680 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2681 {
2682     BlockDeviceInfoList *list, *entry;
2683     BlockDriverState *bs;
2684 
2685     list = NULL;
2686     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2687         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2688         if (!info) {
2689             qapi_free_BlockDeviceInfoList(list);
2690             return NULL;
2691         }
2692         entry = g_malloc0(sizeof(*entry));
2693         entry->value = info;
2694         entry->next = list;
2695         list = entry;
2696     }
2697 
2698     return list;
2699 }
2700 
2701 BlockDriverState *bdrv_lookup_bs(const char *device,
2702                                  const char *node_name,
2703                                  Error **errp)
2704 {
2705     BlockBackend *blk;
2706     BlockDriverState *bs;
2707 
2708     if (device) {
2709         blk = blk_by_name(device);
2710 
2711         if (blk) {
2712             return blk_bs(blk);
2713         }
2714     }
2715 
2716     if (node_name) {
2717         bs = bdrv_find_node(node_name);
2718 
2719         if (bs) {
2720             return bs;
2721         }
2722     }
2723 
2724     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2725                      device ? device : "",
2726                      node_name ? node_name : "");
2727     return NULL;
2728 }
2729 
2730 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2731  * return false.  If either argument is NULL, return false. */
2732 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2733 {
2734     while (top && top != base) {
2735         top = top->backing_hd;
2736     }
2737 
2738     return top != NULL;
2739 }
2740 
2741 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2742 {
2743     if (!bs) {
2744         return QTAILQ_FIRST(&graph_bdrv_states);
2745     }
2746     return QTAILQ_NEXT(bs, node_list);
2747 }
2748 
2749 BlockDriverState *bdrv_next(BlockDriverState *bs)
2750 {
2751     if (!bs) {
2752         return QTAILQ_FIRST(&bdrv_states);
2753     }
2754     return QTAILQ_NEXT(bs, device_list);
2755 }
2756 
2757 const char *bdrv_get_node_name(const BlockDriverState *bs)
2758 {
2759     return bs->node_name;
2760 }
2761 
2762 /* TODO check what callers really want: bs->node_name or blk_name() */
2763 const char *bdrv_get_device_name(const BlockDriverState *bs)
2764 {
2765     return bs->blk ? blk_name(bs->blk) : "";
2766 }
2767 
2768 /* This can be used to identify nodes that might not have a device
2769  * name associated. Since node and device names live in the same
2770  * namespace, the result is unambiguous. The exception is if both are
2771  * absent, then this returns an empty (non-null) string. */
2772 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2773 {
2774     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2775 }
2776 
2777 int bdrv_get_flags(BlockDriverState *bs)
2778 {
2779     return bs->open_flags;
2780 }
2781 
2782 int bdrv_has_zero_init_1(BlockDriverState *bs)
2783 {
2784     return 1;
2785 }
2786 
2787 int bdrv_has_zero_init(BlockDriverState *bs)
2788 {
2789     assert(bs->drv);
2790 
2791     /* If BS is a copy on write image, it is initialized to
2792        the contents of the base image, which may not be zeroes.  */
2793     if (bs->backing_hd) {
2794         return 0;
2795     }
2796     if (bs->drv->bdrv_has_zero_init) {
2797         return bs->drv->bdrv_has_zero_init(bs);
2798     }
2799 
2800     /* safe default */
2801     return 0;
2802 }
2803 
2804 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2805 {
2806     BlockDriverInfo bdi;
2807 
2808     if (bs->backing_hd) {
2809         return false;
2810     }
2811 
2812     if (bdrv_get_info(bs, &bdi) == 0) {
2813         return bdi.unallocated_blocks_are_zero;
2814     }
2815 
2816     return false;
2817 }
2818 
2819 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2820 {
2821     BlockDriverInfo bdi;
2822 
2823     if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
2824         return false;
2825     }
2826 
2827     if (bdrv_get_info(bs, &bdi) == 0) {
2828         return bdi.can_write_zeroes_with_unmap;
2829     }
2830 
2831     return false;
2832 }
2833 
2834 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2835 {
2836     if (bs->backing_hd && bs->backing_hd->encrypted)
2837         return bs->backing_file;
2838     else if (bs->encrypted)
2839         return bs->filename;
2840     else
2841         return NULL;
2842 }
2843 
2844 void bdrv_get_backing_filename(BlockDriverState *bs,
2845                                char *filename, int filename_size)
2846 {
2847     pstrcpy(filename, filename_size, bs->backing_file);
2848 }
2849 
2850 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2851 {
2852     BlockDriver *drv = bs->drv;
2853     if (!drv)
2854         return -ENOMEDIUM;
2855     if (!drv->bdrv_get_info)
2856         return -ENOTSUP;
2857     memset(bdi, 0, sizeof(*bdi));
2858     return drv->bdrv_get_info(bs, bdi);
2859 }
2860 
2861 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2862 {
2863     BlockDriver *drv = bs->drv;
2864     if (drv && drv->bdrv_get_specific_info) {
2865         return drv->bdrv_get_specific_info(bs);
2866     }
2867     return NULL;
2868 }
2869 
2870 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2871 {
2872     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2873         return;
2874     }
2875 
2876     bs->drv->bdrv_debug_event(bs, event);
2877 }
2878 
2879 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2880                           const char *tag)
2881 {
2882     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2883         bs = bs->file;
2884     }
2885 
2886     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2887         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2888     }
2889 
2890     return -ENOTSUP;
2891 }
2892 
2893 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2894 {
2895     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2896         bs = bs->file;
2897     }
2898 
2899     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2900         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2901     }
2902 
2903     return -ENOTSUP;
2904 }
2905 
2906 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2907 {
2908     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2909         bs = bs->file;
2910     }
2911 
2912     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2913         return bs->drv->bdrv_debug_resume(bs, tag);
2914     }
2915 
2916     return -ENOTSUP;
2917 }
2918 
2919 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2920 {
2921     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2922         bs = bs->file;
2923     }
2924 
2925     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2926         return bs->drv->bdrv_debug_is_suspended(bs, tag);
2927     }
2928 
2929     return false;
2930 }
2931 
2932 int bdrv_is_snapshot(BlockDriverState *bs)
2933 {
2934     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2935 }
2936 
2937 /* backing_file can either be relative, or absolute, or a protocol.  If it is
2938  * relative, it must be relative to the chain.  So, passing in bs->filename
2939  * from a BDS as backing_file should not be done, as that may be relative to
2940  * the CWD rather than the chain. */
2941 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2942         const char *backing_file)
2943 {
2944     char *filename_full = NULL;
2945     char *backing_file_full = NULL;
2946     char *filename_tmp = NULL;
2947     int is_protocol = 0;
2948     BlockDriverState *curr_bs = NULL;
2949     BlockDriverState *retval = NULL;
2950 
2951     if (!bs || !bs->drv || !backing_file) {
2952         return NULL;
2953     }
2954 
2955     filename_full     = g_malloc(PATH_MAX);
2956     backing_file_full = g_malloc(PATH_MAX);
2957     filename_tmp      = g_malloc(PATH_MAX);
2958 
2959     is_protocol = path_has_protocol(backing_file);
2960 
2961     for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
2962 
2963         /* If either of the filename paths is actually a protocol, then
2964          * compare unmodified paths; otherwise make paths relative */
2965         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
2966             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
2967                 retval = curr_bs->backing_hd;
2968                 break;
2969             }
2970         } else {
2971             /* If not an absolute filename path, make it relative to the current
2972              * image's filename path */
2973             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2974                          backing_file);
2975 
2976             /* We are going to compare absolute pathnames */
2977             if (!realpath(filename_tmp, filename_full)) {
2978                 continue;
2979             }
2980 
2981             /* We need to make sure the backing filename we are comparing against
2982              * is relative to the current image filename (or absolute) */
2983             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2984                          curr_bs->backing_file);
2985 
2986             if (!realpath(filename_tmp, backing_file_full)) {
2987                 continue;
2988             }
2989 
2990             if (strcmp(backing_file_full, filename_full) == 0) {
2991                 retval = curr_bs->backing_hd;
2992                 break;
2993             }
2994         }
2995     }
2996 
2997     g_free(filename_full);
2998     g_free(backing_file_full);
2999     g_free(filename_tmp);
3000     return retval;
3001 }
3002 
3003 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3004 {
3005     if (!bs->drv) {
3006         return 0;
3007     }
3008 
3009     if (!bs->backing_hd) {
3010         return 0;
3011     }
3012 
3013     return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
3014 }
3015 
3016 void bdrv_init(void)
3017 {
3018     module_call_init(MODULE_INIT_BLOCK);
3019 }
3020 
3021 void bdrv_init_with_whitelist(void)
3022 {
3023     use_bdrv_whitelist = 1;
3024     bdrv_init();
3025 }
3026 
3027 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3028 {
3029     Error *local_err = NULL;
3030     int ret;
3031 
3032     if (!bs->drv)  {
3033         return;
3034     }
3035 
3036     if (!(bs->open_flags & BDRV_O_INCOMING)) {
3037         return;
3038     }
3039     bs->open_flags &= ~BDRV_O_INCOMING;
3040 
3041     if (bs->drv->bdrv_invalidate_cache) {
3042         bs->drv->bdrv_invalidate_cache(bs, &local_err);
3043     } else if (bs->file) {
3044         bdrv_invalidate_cache(bs->file, &local_err);
3045     }
3046     if (local_err) {
3047         error_propagate(errp, local_err);
3048         return;
3049     }
3050 
3051     ret = refresh_total_sectors(bs, bs->total_sectors);
3052     if (ret < 0) {
3053         error_setg_errno(errp, -ret, "Could not refresh total sector count");
3054         return;
3055     }
3056 }
3057 
3058 void bdrv_invalidate_cache_all(Error **errp)
3059 {
3060     BlockDriverState *bs;
3061     Error *local_err = NULL;
3062 
3063     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3064         AioContext *aio_context = bdrv_get_aio_context(bs);
3065 
3066         aio_context_acquire(aio_context);
3067         bdrv_invalidate_cache(bs, &local_err);
3068         aio_context_release(aio_context);
3069         if (local_err) {
3070             error_propagate(errp, local_err);
3071             return;
3072         }
3073     }
3074 }
3075 
3076 /**************************************************************/
3077 /* removable device support */
3078 
3079 /**
3080  * Return TRUE if the media is present
3081  */
3082 int bdrv_is_inserted(BlockDriverState *bs)
3083 {
3084     BlockDriver *drv = bs->drv;
3085 
3086     if (!drv)
3087         return 0;
3088     if (!drv->bdrv_is_inserted)
3089         return 1;
3090     return drv->bdrv_is_inserted(bs);
3091 }
3092 
3093 /**
3094  * Return whether the media changed since the last call to this
3095  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3096  */
3097 int bdrv_media_changed(BlockDriverState *bs)
3098 {
3099     BlockDriver *drv = bs->drv;
3100 
3101     if (drv && drv->bdrv_media_changed) {
3102         return drv->bdrv_media_changed(bs);
3103     }
3104     return -ENOTSUP;
3105 }
3106 
3107 /**
3108  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3109  */
3110 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3111 {
3112     BlockDriver *drv = bs->drv;
3113     const char *device_name;
3114 
3115     if (drv && drv->bdrv_eject) {
3116         drv->bdrv_eject(bs, eject_flag);
3117     }
3118 
3119     device_name = bdrv_get_device_name(bs);
3120     if (device_name[0] != '\0') {
3121         qapi_event_send_device_tray_moved(device_name,
3122                                           eject_flag, &error_abort);
3123     }
3124 }
3125 
3126 /**
3127  * Lock or unlock the media (if it is locked, the user won't be able
3128  * to eject it manually).
3129  */
3130 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3131 {
3132     BlockDriver *drv = bs->drv;
3133 
3134     trace_bdrv_lock_medium(bs, locked);
3135 
3136     if (drv && drv->bdrv_lock_medium) {
3137         drv->bdrv_lock_medium(bs, locked);
3138     }
3139 }
3140 
3141 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3142 {
3143     bs->guest_block_size = align;
3144 }
3145 
3146 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3147 {
3148     BdrvDirtyBitmap *bm;
3149 
3150     assert(name);
3151     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3152         if (bm->name && !strcmp(name, bm->name)) {
3153             return bm;
3154         }
3155     }
3156     return NULL;
3157 }
3158 
3159 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3160 {
3161     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3162     g_free(bitmap->name);
3163     bitmap->name = NULL;
3164 }
3165 
3166 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3167                                           uint32_t granularity,
3168                                           const char *name,
3169                                           Error **errp)
3170 {
3171     int64_t bitmap_size;
3172     BdrvDirtyBitmap *bitmap;
3173     uint32_t sector_granularity;
3174 
3175     assert((granularity & (granularity - 1)) == 0);
3176 
3177     if (name && bdrv_find_dirty_bitmap(bs, name)) {
3178         error_setg(errp, "Bitmap already exists: %s", name);
3179         return NULL;
3180     }
3181     sector_granularity = granularity >> BDRV_SECTOR_BITS;
3182     assert(sector_granularity);
3183     bitmap_size = bdrv_nb_sectors(bs);
3184     if (bitmap_size < 0) {
3185         error_setg_errno(errp, -bitmap_size, "could not get length of device");
3186         errno = -bitmap_size;
3187         return NULL;
3188     }
3189     bitmap = g_new0(BdrvDirtyBitmap, 1);
3190     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3191     bitmap->size = bitmap_size;
3192     bitmap->name = g_strdup(name);
3193     bitmap->disabled = false;
3194     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3195     return bitmap;
3196 }
3197 
3198 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3199 {
3200     return bitmap->successor;
3201 }
3202 
3203 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3204 {
3205     return !(bitmap->disabled || bitmap->successor);
3206 }
3207 
3208 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3209 {
3210     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3211         return DIRTY_BITMAP_STATUS_FROZEN;
3212     } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3213         return DIRTY_BITMAP_STATUS_DISABLED;
3214     } else {
3215         return DIRTY_BITMAP_STATUS_ACTIVE;
3216     }
3217 }
3218 
3219 /**
3220  * Create a successor bitmap destined to replace this bitmap after an operation.
3221  * Requires that the bitmap is not frozen and has no successor.
3222  */
3223 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3224                                        BdrvDirtyBitmap *bitmap, Error **errp)
3225 {
3226     uint64_t granularity;
3227     BdrvDirtyBitmap *child;
3228 
3229     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3230         error_setg(errp, "Cannot create a successor for a bitmap that is "
3231                    "currently frozen");
3232         return -1;
3233     }
3234     assert(!bitmap->successor);
3235 
3236     /* Create an anonymous successor */
3237     granularity = bdrv_dirty_bitmap_granularity(bitmap);
3238     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3239     if (!child) {
3240         return -1;
3241     }
3242 
3243     /* Successor will be on or off based on our current state. */
3244     child->disabled = bitmap->disabled;
3245 
3246     /* Install the successor and freeze the parent */
3247     bitmap->successor = child;
3248     return 0;
3249 }
3250 
3251 /**
3252  * For a bitmap with a successor, yield our name to the successor,
3253  * delete the old bitmap, and return a handle to the new bitmap.
3254  */
3255 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3256                                             BdrvDirtyBitmap *bitmap,
3257                                             Error **errp)
3258 {
3259     char *name;
3260     BdrvDirtyBitmap *successor = bitmap->successor;
3261 
3262     if (successor == NULL) {
3263         error_setg(errp, "Cannot relinquish control if "
3264                    "there's no successor present");
3265         return NULL;
3266     }
3267 
3268     name = bitmap->name;
3269     bitmap->name = NULL;
3270     successor->name = name;
3271     bitmap->successor = NULL;
3272     bdrv_release_dirty_bitmap(bs, bitmap);
3273 
3274     return successor;
3275 }
3276 
3277 /**
3278  * In cases of failure where we can no longer safely delete the parent,
3279  * we may wish to re-join the parent and child/successor.
3280  * The merged parent will be un-frozen, but not explicitly re-enabled.
3281  */
3282 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3283                                            BdrvDirtyBitmap *parent,
3284                                            Error **errp)
3285 {
3286     BdrvDirtyBitmap *successor = parent->successor;
3287 
3288     if (!successor) {
3289         error_setg(errp, "Cannot reclaim a successor when none is present");
3290         return NULL;
3291     }
3292 
3293     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3294         error_setg(errp, "Merging of parent and successor bitmap failed");
3295         return NULL;
3296     }
3297     bdrv_release_dirty_bitmap(bs, successor);
3298     parent->successor = NULL;
3299 
3300     return parent;
3301 }
3302 
3303 /**
3304  * Truncates _all_ bitmaps attached to a BDS.
3305  */
3306 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3307 {
3308     BdrvDirtyBitmap *bitmap;
3309     uint64_t size = bdrv_nb_sectors(bs);
3310 
3311     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3312         assert(!bdrv_dirty_bitmap_frozen(bitmap));
3313         hbitmap_truncate(bitmap->bitmap, size);
3314         bitmap->size = size;
3315     }
3316 }
3317 
3318 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3319 {
3320     BdrvDirtyBitmap *bm, *next;
3321     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3322         if (bm == bitmap) {
3323             assert(!bdrv_dirty_bitmap_frozen(bm));
3324             QLIST_REMOVE(bitmap, list);
3325             hbitmap_free(bitmap->bitmap);
3326             g_free(bitmap->name);
3327             g_free(bitmap);
3328             return;
3329         }
3330     }
3331 }
3332 
3333 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3334 {
3335     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3336     bitmap->disabled = true;
3337 }
3338 
3339 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3340 {
3341     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3342     bitmap->disabled = false;
3343 }
3344 
3345 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3346 {
3347     BdrvDirtyBitmap *bm;
3348     BlockDirtyInfoList *list = NULL;
3349     BlockDirtyInfoList **plist = &list;
3350 
3351     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3352         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3353         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3354         info->count = bdrv_get_dirty_count(bm);
3355         info->granularity = bdrv_dirty_bitmap_granularity(bm);
3356         info->has_name = !!bm->name;
3357         info->name = g_strdup(bm->name);
3358         info->status = bdrv_dirty_bitmap_status(bm);
3359         entry->value = info;
3360         *plist = entry;
3361         plist = &entry->next;
3362     }
3363 
3364     return list;
3365 }
3366 
3367 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3368 {
3369     if (bitmap) {
3370         return hbitmap_get(bitmap->bitmap, sector);
3371     } else {
3372         return 0;
3373     }
3374 }
3375 
3376 /**
3377  * Chooses a default granularity based on the existing cluster size,
3378  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3379  * is no cluster size information available.
3380  */
3381 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3382 {
3383     BlockDriverInfo bdi;
3384     uint32_t granularity;
3385 
3386     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3387         granularity = MAX(4096, bdi.cluster_size);
3388         granularity = MIN(65536, granularity);
3389     } else {
3390         granularity = 65536;
3391     }
3392 
3393     return granularity;
3394 }
3395 
3396 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3397 {
3398     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3399 }
3400 
3401 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3402 {
3403     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3404 }
3405 
3406 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3407                            int64_t cur_sector, int nr_sectors)
3408 {
3409     assert(bdrv_dirty_bitmap_enabled(bitmap));
3410     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3411 }
3412 
3413 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3414                              int64_t cur_sector, int nr_sectors)
3415 {
3416     assert(bdrv_dirty_bitmap_enabled(bitmap));
3417     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3418 }
3419 
3420 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3421 {
3422     assert(bdrv_dirty_bitmap_enabled(bitmap));
3423     hbitmap_reset(bitmap->bitmap, 0, bitmap->size);
3424 }
3425 
3426 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3427                     int nr_sectors)
3428 {
3429     BdrvDirtyBitmap *bitmap;
3430     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3431         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3432             continue;
3433         }
3434         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3435     }
3436 }
3437 
3438 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3439                       int nr_sectors)
3440 {
3441     BdrvDirtyBitmap *bitmap;
3442     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3443         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3444             continue;
3445         }
3446         hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3447     }
3448 }
3449 
3450 /**
3451  * Advance an HBitmapIter to an arbitrary offset.
3452  */
3453 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3454 {
3455     assert(hbi->hb);
3456     hbitmap_iter_init(hbi, hbi->hb, offset);
3457 }
3458 
3459 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3460 {
3461     return hbitmap_count(bitmap->bitmap);
3462 }
3463 
3464 /* Get a reference to bs */
3465 void bdrv_ref(BlockDriverState *bs)
3466 {
3467     bs->refcnt++;
3468 }
3469 
3470 /* Release a previously grabbed reference to bs.
3471  * If after releasing, reference count is zero, the BlockDriverState is
3472  * deleted. */
3473 void bdrv_unref(BlockDriverState *bs)
3474 {
3475     if (!bs) {
3476         return;
3477     }
3478     assert(bs->refcnt > 0);
3479     if (--bs->refcnt == 0) {
3480         bdrv_delete(bs);
3481     }
3482 }
3483 
3484 struct BdrvOpBlocker {
3485     Error *reason;
3486     QLIST_ENTRY(BdrvOpBlocker) list;
3487 };
3488 
3489 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3490 {
3491     BdrvOpBlocker *blocker;
3492     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3493     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3494         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3495         if (errp) {
3496             error_setg(errp, "Node '%s' is busy: %s",
3497                        bdrv_get_device_or_node_name(bs),
3498                        error_get_pretty(blocker->reason));
3499         }
3500         return true;
3501     }
3502     return false;
3503 }
3504 
3505 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3506 {
3507     BdrvOpBlocker *blocker;
3508     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3509 
3510     blocker = g_new0(BdrvOpBlocker, 1);
3511     blocker->reason = reason;
3512     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3513 }
3514 
3515 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3516 {
3517     BdrvOpBlocker *blocker, *next;
3518     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3519     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3520         if (blocker->reason == reason) {
3521             QLIST_REMOVE(blocker, list);
3522             g_free(blocker);
3523         }
3524     }
3525 }
3526 
3527 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3528 {
3529     int i;
3530     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3531         bdrv_op_block(bs, i, reason);
3532     }
3533 }
3534 
3535 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3536 {
3537     int i;
3538     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3539         bdrv_op_unblock(bs, i, reason);
3540     }
3541 }
3542 
3543 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3544 {
3545     int i;
3546 
3547     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3548         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3549             return false;
3550         }
3551     }
3552     return true;
3553 }
3554 
3555 void bdrv_iostatus_enable(BlockDriverState *bs)
3556 {
3557     bs->iostatus_enabled = true;
3558     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3559 }
3560 
3561 /* The I/O status is only enabled if the drive explicitly
3562  * enables it _and_ the VM is configured to stop on errors */
3563 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3564 {
3565     return (bs->iostatus_enabled &&
3566            (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3567             bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
3568             bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3569 }
3570 
3571 void bdrv_iostatus_disable(BlockDriverState *bs)
3572 {
3573     bs->iostatus_enabled = false;
3574 }
3575 
3576 void bdrv_iostatus_reset(BlockDriverState *bs)
3577 {
3578     if (bdrv_iostatus_is_enabled(bs)) {
3579         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3580         if (bs->job) {
3581             block_job_iostatus_reset(bs->job);
3582         }
3583     }
3584 }
3585 
3586 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3587 {
3588     assert(bdrv_iostatus_is_enabled(bs));
3589     if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3590         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3591                                          BLOCK_DEVICE_IO_STATUS_FAILED;
3592     }
3593 }
3594 
3595 void bdrv_img_create(const char *filename, const char *fmt,
3596                      const char *base_filename, const char *base_fmt,
3597                      char *options, uint64_t img_size, int flags,
3598                      Error **errp, bool quiet)
3599 {
3600     QemuOptsList *create_opts = NULL;
3601     QemuOpts *opts = NULL;
3602     const char *backing_fmt, *backing_file;
3603     int64_t size;
3604     BlockDriver *drv, *proto_drv;
3605     BlockDriver *backing_drv = NULL;
3606     Error *local_err = NULL;
3607     int ret = 0;
3608 
3609     /* Find driver and parse its options */
3610     drv = bdrv_find_format(fmt);
3611     if (!drv) {
3612         error_setg(errp, "Unknown file format '%s'", fmt);
3613         return;
3614     }
3615 
3616     proto_drv = bdrv_find_protocol(filename, true, errp);
3617     if (!proto_drv) {
3618         return;
3619     }
3620 
3621     if (!drv->create_opts) {
3622         error_setg(errp, "Format driver '%s' does not support image creation",
3623                    drv->format_name);
3624         return;
3625     }
3626 
3627     if (!proto_drv->create_opts) {
3628         error_setg(errp, "Protocol driver '%s' does not support image creation",
3629                    proto_drv->format_name);
3630         return;
3631     }
3632 
3633     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3634     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3635 
3636     /* Create parameter list with default values */
3637     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3638     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3639 
3640     /* Parse -o options */
3641     if (options) {
3642         qemu_opts_do_parse(opts, options, NULL, &local_err);
3643         if (local_err) {
3644             error_report_err(local_err);
3645             local_err = NULL;
3646             error_setg(errp, "Invalid options for file format '%s'", fmt);
3647             goto out;
3648         }
3649     }
3650 
3651     if (base_filename) {
3652         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3653         if (local_err) {
3654             error_setg(errp, "Backing file not supported for file format '%s'",
3655                        fmt);
3656             goto out;
3657         }
3658     }
3659 
3660     if (base_fmt) {
3661         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3662         if (local_err) {
3663             error_setg(errp, "Backing file format not supported for file "
3664                              "format '%s'", fmt);
3665             goto out;
3666         }
3667     }
3668 
3669     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3670     if (backing_file) {
3671         if (!strcmp(filename, backing_file)) {
3672             error_setg(errp, "Error: Trying to create an image with the "
3673                              "same filename as the backing file");
3674             goto out;
3675         }
3676     }
3677 
3678     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3679     if (backing_fmt) {
3680         backing_drv = bdrv_find_format(backing_fmt);
3681         if (!backing_drv) {
3682             error_setg(errp, "Unknown backing file format '%s'",
3683                        backing_fmt);
3684             goto out;
3685         }
3686     }
3687 
3688     // The size for the image must always be specified, with one exception:
3689     // If we are using a backing file, we can obtain the size from there
3690     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3691     if (size == -1) {
3692         if (backing_file) {
3693             BlockDriverState *bs;
3694             char *full_backing = g_new0(char, PATH_MAX);
3695             int64_t size;
3696             int back_flags;
3697 
3698             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3699                                                          full_backing, PATH_MAX,
3700                                                          &local_err);
3701             if (local_err) {
3702                 g_free(full_backing);
3703                 goto out;
3704             }
3705 
3706             /* backing files always opened read-only */
3707             back_flags =
3708                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3709 
3710             bs = NULL;
3711             ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
3712                             backing_drv, &local_err);
3713             g_free(full_backing);
3714             if (ret < 0) {
3715                 goto out;
3716             }
3717             size = bdrv_getlength(bs);
3718             if (size < 0) {
3719                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3720                                  backing_file);
3721                 bdrv_unref(bs);
3722                 goto out;
3723             }
3724 
3725             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3726 
3727             bdrv_unref(bs);
3728         } else {
3729             error_setg(errp, "Image creation needs a size parameter");
3730             goto out;
3731         }
3732     }
3733 
3734     if (!quiet) {
3735         printf("Formatting '%s', fmt=%s", filename, fmt);
3736         qemu_opts_print(opts, " ");
3737         puts("");
3738     }
3739 
3740     ret = bdrv_create(drv, filename, opts, &local_err);
3741 
3742     if (ret == -EFBIG) {
3743         /* This is generally a better message than whatever the driver would
3744          * deliver (especially because of the cluster_size_hint), since that
3745          * is most probably not much different from "image too large". */
3746         const char *cluster_size_hint = "";
3747         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3748             cluster_size_hint = " (try using a larger cluster size)";
3749         }
3750         error_setg(errp, "The image size is too large for file format '%s'"
3751                    "%s", fmt, cluster_size_hint);
3752         error_free(local_err);
3753         local_err = NULL;
3754     }
3755 
3756 out:
3757     qemu_opts_del(opts);
3758     qemu_opts_free(create_opts);
3759     if (local_err) {
3760         error_propagate(errp, local_err);
3761     }
3762 }
3763 
3764 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3765 {
3766     return bs->aio_context;
3767 }
3768 
3769 void bdrv_detach_aio_context(BlockDriverState *bs)
3770 {
3771     BdrvAioNotifier *baf;
3772 
3773     if (!bs->drv) {
3774         return;
3775     }
3776 
3777     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3778         baf->detach_aio_context(baf->opaque);
3779     }
3780 
3781     if (bs->io_limits_enabled) {
3782         throttle_detach_aio_context(&bs->throttle_state);
3783     }
3784     if (bs->drv->bdrv_detach_aio_context) {
3785         bs->drv->bdrv_detach_aio_context(bs);
3786     }
3787     if (bs->file) {
3788         bdrv_detach_aio_context(bs->file);
3789     }
3790     if (bs->backing_hd) {
3791         bdrv_detach_aio_context(bs->backing_hd);
3792     }
3793 
3794     bs->aio_context = NULL;
3795 }
3796 
3797 void bdrv_attach_aio_context(BlockDriverState *bs,
3798                              AioContext *new_context)
3799 {
3800     BdrvAioNotifier *ban;
3801 
3802     if (!bs->drv) {
3803         return;
3804     }
3805 
3806     bs->aio_context = new_context;
3807 
3808     if (bs->backing_hd) {
3809         bdrv_attach_aio_context(bs->backing_hd, new_context);
3810     }
3811     if (bs->file) {
3812         bdrv_attach_aio_context(bs->file, new_context);
3813     }
3814     if (bs->drv->bdrv_attach_aio_context) {
3815         bs->drv->bdrv_attach_aio_context(bs, new_context);
3816     }
3817     if (bs->io_limits_enabled) {
3818         throttle_attach_aio_context(&bs->throttle_state, new_context);
3819     }
3820 
3821     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3822         ban->attached_aio_context(new_context, ban->opaque);
3823     }
3824 }
3825 
3826 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3827 {
3828     bdrv_drain_all(); /* ensure there are no in-flight requests */
3829 
3830     bdrv_detach_aio_context(bs);
3831 
3832     /* This function executes in the old AioContext so acquire the new one in
3833      * case it runs in a different thread.
3834      */
3835     aio_context_acquire(new_context);
3836     bdrv_attach_aio_context(bs, new_context);
3837     aio_context_release(new_context);
3838 }
3839 
3840 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3841         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3842         void (*detach_aio_context)(void *opaque), void *opaque)
3843 {
3844     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3845     *ban = (BdrvAioNotifier){
3846         .attached_aio_context = attached_aio_context,
3847         .detach_aio_context   = detach_aio_context,
3848         .opaque               = opaque
3849     };
3850 
3851     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3852 }
3853 
3854 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3855                                       void (*attached_aio_context)(AioContext *,
3856                                                                    void *),
3857                                       void (*detach_aio_context)(void *),
3858                                       void *opaque)
3859 {
3860     BdrvAioNotifier *ban, *ban_next;
3861 
3862     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3863         if (ban->attached_aio_context == attached_aio_context &&
3864             ban->detach_aio_context   == detach_aio_context   &&
3865             ban->opaque               == opaque)
3866         {
3867             QLIST_REMOVE(ban, list);
3868             g_free(ban);
3869 
3870             return;
3871         }
3872     }
3873 
3874     abort();
3875 }
3876 
3877 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3878                        BlockDriverAmendStatusCB *status_cb)
3879 {
3880     if (!bs->drv->bdrv_amend_options) {
3881         return -ENOTSUP;
3882     }
3883     return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3884 }
3885 
3886 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3887  * of block filter and by bdrv_is_first_non_filter.
3888  * It is used to test if the given bs is the candidate or recurse more in the
3889  * node graph.
3890  */
3891 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3892                                       BlockDriverState *candidate)
3893 {
3894     /* return false if basic checks fails */
3895     if (!bs || !bs->drv) {
3896         return false;
3897     }
3898 
3899     /* the code reached a non block filter driver -> check if the bs is
3900      * the same as the candidate. It's the recursion termination condition.
3901      */
3902     if (!bs->drv->is_filter) {
3903         return bs == candidate;
3904     }
3905     /* Down this path the driver is a block filter driver */
3906 
3907     /* If the block filter recursion method is defined use it to recurse down
3908      * the node graph.
3909      */
3910     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3911         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3912     }
3913 
3914     /* the driver is a block filter but don't allow to recurse -> return false
3915      */
3916     return false;
3917 }
3918 
3919 /* This function checks if the candidate is the first non filter bs down it's
3920  * bs chain. Since we don't have pointers to parents it explore all bs chains
3921  * from the top. Some filters can choose not to pass down the recursion.
3922  */
3923 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3924 {
3925     BlockDriverState *bs;
3926 
3927     /* walk down the bs forest recursively */
3928     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3929         bool perm;
3930 
3931         /* try to recurse in this top level bs */
3932         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3933 
3934         /* candidate is the first non filter */
3935         if (perm) {
3936             return true;
3937         }
3938     }
3939 
3940     return false;
3941 }
3942 
3943 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
3944 {
3945     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3946     AioContext *aio_context;
3947 
3948     if (!to_replace_bs) {
3949         error_setg(errp, "Node name '%s' not found", node_name);
3950         return NULL;
3951     }
3952 
3953     aio_context = bdrv_get_aio_context(to_replace_bs);
3954     aio_context_acquire(aio_context);
3955 
3956     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3957         to_replace_bs = NULL;
3958         goto out;
3959     }
3960 
3961     /* We don't want arbitrary node of the BDS chain to be replaced only the top
3962      * most non filter in order to prevent data corruption.
3963      * Another benefit is that this tests exclude backing files which are
3964      * blocked by the backing blockers.
3965      */
3966     if (!bdrv_is_first_non_filter(to_replace_bs)) {
3967         error_setg(errp, "Only top most non filter can be replaced");
3968         to_replace_bs = NULL;
3969         goto out;
3970     }
3971 
3972 out:
3973     aio_context_release(aio_context);
3974     return to_replace_bs;
3975 }
3976 
3977 static bool append_open_options(QDict *d, BlockDriverState *bs)
3978 {
3979     const QDictEntry *entry;
3980     bool found_any = false;
3981 
3982     for (entry = qdict_first(bs->options); entry;
3983          entry = qdict_next(bs->options, entry))
3984     {
3985         /* Only take options for this level and exclude all non-driver-specific
3986          * options */
3987         if (!strchr(qdict_entry_key(entry), '.') &&
3988             strcmp(qdict_entry_key(entry), "node-name"))
3989         {
3990             qobject_incref(qdict_entry_value(entry));
3991             qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3992             found_any = true;
3993         }
3994     }
3995 
3996     return found_any;
3997 }
3998 
3999 /* Updates the following BDS fields:
4000  *  - exact_filename: A filename which may be used for opening a block device
4001  *                    which (mostly) equals the given BDS (even without any
4002  *                    other options; so reading and writing must return the same
4003  *                    results, but caching etc. may be different)
4004  *  - full_open_options: Options which, when given when opening a block device
4005  *                       (without a filename), result in a BDS (mostly)
4006  *                       equalling the given one
4007  *  - filename: If exact_filename is set, it is copied here. Otherwise,
4008  *              full_open_options is converted to a JSON object, prefixed with
4009  *              "json:" (for use through the JSON pseudo protocol) and put here.
4010  */
4011 void bdrv_refresh_filename(BlockDriverState *bs)
4012 {
4013     BlockDriver *drv = bs->drv;
4014     QDict *opts;
4015 
4016     if (!drv) {
4017         return;
4018     }
4019 
4020     /* This BDS's file name will most probably depend on its file's name, so
4021      * refresh that first */
4022     if (bs->file) {
4023         bdrv_refresh_filename(bs->file);
4024     }
4025 
4026     if (drv->bdrv_refresh_filename) {
4027         /* Obsolete information is of no use here, so drop the old file name
4028          * information before refreshing it */
4029         bs->exact_filename[0] = '\0';
4030         if (bs->full_open_options) {
4031             QDECREF(bs->full_open_options);
4032             bs->full_open_options = NULL;
4033         }
4034 
4035         drv->bdrv_refresh_filename(bs);
4036     } else if (bs->file) {
4037         /* Try to reconstruct valid information from the underlying file */
4038         bool has_open_options;
4039 
4040         bs->exact_filename[0] = '\0';
4041         if (bs->full_open_options) {
4042             QDECREF(bs->full_open_options);
4043             bs->full_open_options = NULL;
4044         }
4045 
4046         opts = qdict_new();
4047         has_open_options = append_open_options(opts, bs);
4048 
4049         /* If no specific options have been given for this BDS, the filename of
4050          * the underlying file should suffice for this one as well */
4051         if (bs->file->exact_filename[0] && !has_open_options) {
4052             strcpy(bs->exact_filename, bs->file->exact_filename);
4053         }
4054         /* Reconstructing the full options QDict is simple for most format block
4055          * drivers, as long as the full options are known for the underlying
4056          * file BDS. The full options QDict of that file BDS should somehow
4057          * contain a representation of the filename, therefore the following
4058          * suffices without querying the (exact_)filename of this BDS. */
4059         if (bs->file->full_open_options) {
4060             qdict_put_obj(opts, "driver",
4061                           QOBJECT(qstring_from_str(drv->format_name)));
4062             QINCREF(bs->file->full_open_options);
4063             qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
4064 
4065             bs->full_open_options = opts;
4066         } else {
4067             QDECREF(opts);
4068         }
4069     } else if (!bs->full_open_options && qdict_size(bs->options)) {
4070         /* There is no underlying file BDS (at least referenced by BDS.file),
4071          * so the full options QDict should be equal to the options given
4072          * specifically for this block device when it was opened (plus the
4073          * driver specification).
4074          * Because those options don't change, there is no need to update
4075          * full_open_options when it's already set. */
4076 
4077         opts = qdict_new();
4078         append_open_options(opts, bs);
4079         qdict_put_obj(opts, "driver",
4080                       QOBJECT(qstring_from_str(drv->format_name)));
4081 
4082         if (bs->exact_filename[0]) {
4083             /* This may not work for all block protocol drivers (some may
4084              * require this filename to be parsed), but we have to find some
4085              * default solution here, so just include it. If some block driver
4086              * does not support pure options without any filename at all or
4087              * needs some special format of the options QDict, it needs to
4088              * implement the driver-specific bdrv_refresh_filename() function.
4089              */
4090             qdict_put_obj(opts, "filename",
4091                           QOBJECT(qstring_from_str(bs->exact_filename)));
4092         }
4093 
4094         bs->full_open_options = opts;
4095     }
4096 
4097     if (bs->exact_filename[0]) {
4098         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4099     } else if (bs->full_open_options) {
4100         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4101         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4102                  qstring_get_str(json));
4103         QDECREF(json);
4104     }
4105 }
4106 
4107 /* This accessor function purpose is to allow the device models to access the
4108  * BlockAcctStats structure embedded inside a BlockDriverState without being
4109  * aware of the BlockDriverState structure layout.
4110  * It will go away when the BlockAcctStats structure will be moved inside
4111  * the device models.
4112  */
4113 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
4114 {
4115     return &bs->stats;
4116 }
4117