xref: /openbmc/qemu/block.c (revision 6ee4ce1ee75a651c246d926c2302281b51981f6d)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qjson.h"
31 #include "sysemu/block-backend.h"
32 #include "sysemu/sysemu.h"
33 #include "qemu/notify.h"
34 #include "block/coroutine.h"
35 #include "block/qapi.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
38 #include "qapi-event.h"
39 
40 #ifdef CONFIG_BSD
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <sys/ioctl.h>
44 #include <sys/queue.h>
45 #ifndef __DragonFly__
46 #include <sys/disk.h>
47 #endif
48 #endif
49 
50 #ifdef _WIN32
51 #include <windows.h>
52 #endif
53 
54 /**
55  * A BdrvDirtyBitmap can be in three possible states:
56  * (1) successor is NULL and disabled is false: full r/w mode
57  * (2) successor is NULL and disabled is true: read only mode ("disabled")
58  * (3) successor is set: frozen mode.
59  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
60  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
61  */
62 struct BdrvDirtyBitmap {
63     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
64     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
65     char *name;                 /* Optional non-empty unique ID */
66     int64_t size;               /* Size of the bitmap (Number of sectors) */
67     bool disabled;              /* Bitmap is read-only */
68     QLIST_ENTRY(BdrvDirtyBitmap) list;
69 };
70 
71 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
72 
73 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
74     QTAILQ_HEAD_INITIALIZER(bdrv_states);
75 
76 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
77     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
78 
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80     QLIST_HEAD_INITIALIZER(bdrv_drivers);
81 
82 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
83                              const char *reference, QDict *options, int flags,
84                              BlockDriverState *parent,
85                              const BdrvChildRole *child_role,
86                              BlockDriver *drv, Error **errp);
87 
88 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
89 /* If non-zero, use only whitelisted block drivers */
90 static int use_bdrv_whitelist;
91 
92 #ifdef _WIN32
93 static int is_windows_drive_prefix(const char *filename)
94 {
95     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
96              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
97             filename[1] == ':');
98 }
99 
100 int is_windows_drive(const char *filename)
101 {
102     if (is_windows_drive_prefix(filename) &&
103         filename[2] == '\0')
104         return 1;
105     if (strstart(filename, "\\\\.\\", NULL) ||
106         strstart(filename, "//./", NULL))
107         return 1;
108     return 0;
109 }
110 #endif
111 
112 size_t bdrv_opt_mem_align(BlockDriverState *bs)
113 {
114     if (!bs || !bs->drv) {
115         /* page size or 4k (hdd sector size) should be on the safe side */
116         return MAX(4096, getpagesize());
117     }
118 
119     return bs->bl.opt_mem_alignment;
120 }
121 
122 size_t bdrv_min_mem_align(BlockDriverState *bs)
123 {
124     if (!bs || !bs->drv) {
125         /* page size or 4k (hdd sector size) should be on the safe side */
126         return MAX(4096, getpagesize());
127     }
128 
129     return bs->bl.min_mem_alignment;
130 }
131 
132 /* check if the path starts with "<protocol>:" */
133 int path_has_protocol(const char *path)
134 {
135     const char *p;
136 
137 #ifdef _WIN32
138     if (is_windows_drive(path) ||
139         is_windows_drive_prefix(path)) {
140         return 0;
141     }
142     p = path + strcspn(path, ":/\\");
143 #else
144     p = path + strcspn(path, ":/");
145 #endif
146 
147     return *p == ':';
148 }
149 
150 int path_is_absolute(const char *path)
151 {
152 #ifdef _WIN32
153     /* specific case for names like: "\\.\d:" */
154     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
155         return 1;
156     }
157     return (*path == '/' || *path == '\\');
158 #else
159     return (*path == '/');
160 #endif
161 }
162 
163 /* if filename is absolute, just copy it to dest. Otherwise, build a
164    path to it by considering it is relative to base_path. URL are
165    supported. */
166 void path_combine(char *dest, int dest_size,
167                   const char *base_path,
168                   const char *filename)
169 {
170     const char *p, *p1;
171     int len;
172 
173     if (dest_size <= 0)
174         return;
175     if (path_is_absolute(filename)) {
176         pstrcpy(dest, dest_size, filename);
177     } else {
178         p = strchr(base_path, ':');
179         if (p)
180             p++;
181         else
182             p = base_path;
183         p1 = strrchr(base_path, '/');
184 #ifdef _WIN32
185         {
186             const char *p2;
187             p2 = strrchr(base_path, '\\');
188             if (!p1 || p2 > p1)
189                 p1 = p2;
190         }
191 #endif
192         if (p1)
193             p1++;
194         else
195             p1 = base_path;
196         if (p1 > p)
197             p = p1;
198         len = p - base_path;
199         if (len > dest_size - 1)
200             len = dest_size - 1;
201         memcpy(dest, base_path, len);
202         dest[len] = '\0';
203         pstrcat(dest, dest_size, filename);
204     }
205 }
206 
207 void bdrv_get_full_backing_filename_from_filename(const char *backed,
208                                                   const char *backing,
209                                                   char *dest, size_t sz,
210                                                   Error **errp)
211 {
212     if (backing[0] == '\0' || path_has_protocol(backing) ||
213         path_is_absolute(backing))
214     {
215         pstrcpy(dest, sz, backing);
216     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
217         error_setg(errp, "Cannot use relative backing file names for '%s'",
218                    backed);
219     } else {
220         path_combine(dest, sz, backed, backing);
221     }
222 }
223 
224 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
225                                     Error **errp)
226 {
227     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
228 
229     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
230                                                  dest, sz, errp);
231 }
232 
233 void bdrv_register(BlockDriver *bdrv)
234 {
235     bdrv_setup_io_funcs(bdrv);
236 
237     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
238 }
239 
240 BlockDriverState *bdrv_new_root(void)
241 {
242     BlockDriverState *bs = bdrv_new();
243 
244     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
245     return bs;
246 }
247 
248 BlockDriverState *bdrv_new(void)
249 {
250     BlockDriverState *bs;
251     int i;
252 
253     bs = g_new0(BlockDriverState, 1);
254     QLIST_INIT(&bs->dirty_bitmaps);
255     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
256         QLIST_INIT(&bs->op_blockers[i]);
257     }
258     bdrv_iostatus_disable(bs);
259     notifier_list_init(&bs->close_notifiers);
260     notifier_with_return_list_init(&bs->before_write_notifiers);
261     qemu_co_queue_init(&bs->throttled_reqs[0]);
262     qemu_co_queue_init(&bs->throttled_reqs[1]);
263     bs->refcnt = 1;
264     bs->aio_context = qemu_get_aio_context();
265 
266     return bs;
267 }
268 
269 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
270 {
271     notifier_list_add(&bs->close_notifiers, notify);
272 }
273 
274 BlockDriver *bdrv_find_format(const char *format_name)
275 {
276     BlockDriver *drv1;
277     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
278         if (!strcmp(drv1->format_name, format_name)) {
279             return drv1;
280         }
281     }
282     return NULL;
283 }
284 
285 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
286 {
287     static const char *whitelist_rw[] = {
288         CONFIG_BDRV_RW_WHITELIST
289     };
290     static const char *whitelist_ro[] = {
291         CONFIG_BDRV_RO_WHITELIST
292     };
293     const char **p;
294 
295     if (!whitelist_rw[0] && !whitelist_ro[0]) {
296         return 1;               /* no whitelist, anything goes */
297     }
298 
299     for (p = whitelist_rw; *p; p++) {
300         if (!strcmp(drv->format_name, *p)) {
301             return 1;
302         }
303     }
304     if (read_only) {
305         for (p = whitelist_ro; *p; p++) {
306             if (!strcmp(drv->format_name, *p)) {
307                 return 1;
308             }
309         }
310     }
311     return 0;
312 }
313 
314 BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
315                                           bool read_only)
316 {
317     BlockDriver *drv = bdrv_find_format(format_name);
318     return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
319 }
320 
321 typedef struct CreateCo {
322     BlockDriver *drv;
323     char *filename;
324     QemuOpts *opts;
325     int ret;
326     Error *err;
327 } CreateCo;
328 
329 static void coroutine_fn bdrv_create_co_entry(void *opaque)
330 {
331     Error *local_err = NULL;
332     int ret;
333 
334     CreateCo *cco = opaque;
335     assert(cco->drv);
336 
337     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
338     if (local_err) {
339         error_propagate(&cco->err, local_err);
340     }
341     cco->ret = ret;
342 }
343 
344 int bdrv_create(BlockDriver *drv, const char* filename,
345                 QemuOpts *opts, Error **errp)
346 {
347     int ret;
348 
349     Coroutine *co;
350     CreateCo cco = {
351         .drv = drv,
352         .filename = g_strdup(filename),
353         .opts = opts,
354         .ret = NOT_DONE,
355         .err = NULL,
356     };
357 
358     if (!drv->bdrv_create) {
359         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
360         ret = -ENOTSUP;
361         goto out;
362     }
363 
364     if (qemu_in_coroutine()) {
365         /* Fast-path if already in coroutine context */
366         bdrv_create_co_entry(&cco);
367     } else {
368         co = qemu_coroutine_create(bdrv_create_co_entry);
369         qemu_coroutine_enter(co, &cco);
370         while (cco.ret == NOT_DONE) {
371             aio_poll(qemu_get_aio_context(), true);
372         }
373     }
374 
375     ret = cco.ret;
376     if (ret < 0) {
377         if (cco.err) {
378             error_propagate(errp, cco.err);
379         } else {
380             error_setg_errno(errp, -ret, "Could not create image");
381         }
382     }
383 
384 out:
385     g_free(cco.filename);
386     return ret;
387 }
388 
389 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
390 {
391     BlockDriver *drv;
392     Error *local_err = NULL;
393     int ret;
394 
395     drv = bdrv_find_protocol(filename, true, errp);
396     if (drv == NULL) {
397         return -ENOENT;
398     }
399 
400     ret = bdrv_create(drv, filename, opts, &local_err);
401     if (local_err) {
402         error_propagate(errp, local_err);
403     }
404     return ret;
405 }
406 
407 /**
408  * Try to get @bs's logical and physical block size.
409  * On success, store them in @bsz struct and return 0.
410  * On failure return -errno.
411  * @bs must not be empty.
412  */
413 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
414 {
415     BlockDriver *drv = bs->drv;
416 
417     if (drv && drv->bdrv_probe_blocksizes) {
418         return drv->bdrv_probe_blocksizes(bs, bsz);
419     }
420 
421     return -ENOTSUP;
422 }
423 
424 /**
425  * Try to get @bs's geometry (cyls, heads, sectors).
426  * On success, store them in @geo struct and return 0.
427  * On failure return -errno.
428  * @bs must not be empty.
429  */
430 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
431 {
432     BlockDriver *drv = bs->drv;
433 
434     if (drv && drv->bdrv_probe_geometry) {
435         return drv->bdrv_probe_geometry(bs, geo);
436     }
437 
438     return -ENOTSUP;
439 }
440 
441 /*
442  * Create a uniquely-named empty temporary file.
443  * Return 0 upon success, otherwise a negative errno value.
444  */
445 int get_tmp_filename(char *filename, int size)
446 {
447 #ifdef _WIN32
448     char temp_dir[MAX_PATH];
449     /* GetTempFileName requires that its output buffer (4th param)
450        have length MAX_PATH or greater.  */
451     assert(size >= MAX_PATH);
452     return (GetTempPath(MAX_PATH, temp_dir)
453             && GetTempFileName(temp_dir, "qem", 0, filename)
454             ? 0 : -GetLastError());
455 #else
456     int fd;
457     const char *tmpdir;
458     tmpdir = getenv("TMPDIR");
459     if (!tmpdir) {
460         tmpdir = "/var/tmp";
461     }
462     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
463         return -EOVERFLOW;
464     }
465     fd = mkstemp(filename);
466     if (fd < 0) {
467         return -errno;
468     }
469     if (close(fd) != 0) {
470         unlink(filename);
471         return -errno;
472     }
473     return 0;
474 #endif
475 }
476 
477 /*
478  * Detect host devices. By convention, /dev/cdrom[N] is always
479  * recognized as a host CDROM.
480  */
481 static BlockDriver *find_hdev_driver(const char *filename)
482 {
483     int score_max = 0, score;
484     BlockDriver *drv = NULL, *d;
485 
486     QLIST_FOREACH(d, &bdrv_drivers, list) {
487         if (d->bdrv_probe_device) {
488             score = d->bdrv_probe_device(filename);
489             if (score > score_max) {
490                 score_max = score;
491                 drv = d;
492             }
493         }
494     }
495 
496     return drv;
497 }
498 
499 BlockDriver *bdrv_find_protocol(const char *filename,
500                                 bool allow_protocol_prefix,
501                                 Error **errp)
502 {
503     BlockDriver *drv1;
504     char protocol[128];
505     int len;
506     const char *p;
507 
508     /* TODO Drivers without bdrv_file_open must be specified explicitly */
509 
510     /*
511      * XXX(hch): we really should not let host device detection
512      * override an explicit protocol specification, but moving this
513      * later breaks access to device names with colons in them.
514      * Thanks to the brain-dead persistent naming schemes on udev-
515      * based Linux systems those actually are quite common.
516      */
517     drv1 = find_hdev_driver(filename);
518     if (drv1) {
519         return drv1;
520     }
521 
522     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
523         return &bdrv_file;
524     }
525 
526     p = strchr(filename, ':');
527     assert(p != NULL);
528     len = p - filename;
529     if (len > sizeof(protocol) - 1)
530         len = sizeof(protocol) - 1;
531     memcpy(protocol, filename, len);
532     protocol[len] = '\0';
533     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
534         if (drv1->protocol_name &&
535             !strcmp(drv1->protocol_name, protocol)) {
536             return drv1;
537         }
538     }
539 
540     error_setg(errp, "Unknown protocol '%s'", protocol);
541     return NULL;
542 }
543 
544 /*
545  * Guess image format by probing its contents.
546  * This is not a good idea when your image is raw (CVE-2008-2004), but
547  * we do it anyway for backward compatibility.
548  *
549  * @buf         contains the image's first @buf_size bytes.
550  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
551  *              but can be smaller if the image file is smaller)
552  * @filename    is its filename.
553  *
554  * For all block drivers, call the bdrv_probe() method to get its
555  * probing score.
556  * Return the first block driver with the highest probing score.
557  */
558 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
559                             const char *filename)
560 {
561     int score_max = 0, score;
562     BlockDriver *drv = NULL, *d;
563 
564     QLIST_FOREACH(d, &bdrv_drivers, list) {
565         if (d->bdrv_probe) {
566             score = d->bdrv_probe(buf, buf_size, filename);
567             if (score > score_max) {
568                 score_max = score;
569                 drv = d;
570             }
571         }
572     }
573 
574     return drv;
575 }
576 
577 static int find_image_format(BlockDriverState *bs, const char *filename,
578                              BlockDriver **pdrv, Error **errp)
579 {
580     BlockDriver *drv;
581     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
582     int ret = 0;
583 
584     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
585     if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
586         *pdrv = &bdrv_raw;
587         return ret;
588     }
589 
590     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
591     if (ret < 0) {
592         error_setg_errno(errp, -ret, "Could not read image for determining its "
593                          "format");
594         *pdrv = NULL;
595         return ret;
596     }
597 
598     drv = bdrv_probe_all(buf, ret, filename);
599     if (!drv) {
600         error_setg(errp, "Could not determine image format: No compatible "
601                    "driver found");
602         ret = -ENOENT;
603     }
604     *pdrv = drv;
605     return ret;
606 }
607 
608 /**
609  * Set the current 'total_sectors' value
610  * Return 0 on success, -errno on error.
611  */
612 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
613 {
614     BlockDriver *drv = bs->drv;
615 
616     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
617     if (bs->sg)
618         return 0;
619 
620     /* query actual device if possible, otherwise just trust the hint */
621     if (drv->bdrv_getlength) {
622         int64_t length = drv->bdrv_getlength(bs);
623         if (length < 0) {
624             return length;
625         }
626         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
627     }
628 
629     bs->total_sectors = hint;
630     return 0;
631 }
632 
633 /**
634  * Set open flags for a given discard mode
635  *
636  * Return 0 on success, -1 if the discard mode was invalid.
637  */
638 int bdrv_parse_discard_flags(const char *mode, int *flags)
639 {
640     *flags &= ~BDRV_O_UNMAP;
641 
642     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
643         /* do nothing */
644     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
645         *flags |= BDRV_O_UNMAP;
646     } else {
647         return -1;
648     }
649 
650     return 0;
651 }
652 
653 /**
654  * Set open flags for a given cache mode
655  *
656  * Return 0 on success, -1 if the cache mode was invalid.
657  */
658 int bdrv_parse_cache_flags(const char *mode, int *flags)
659 {
660     *flags &= ~BDRV_O_CACHE_MASK;
661 
662     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
663         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
664     } else if (!strcmp(mode, "directsync")) {
665         *flags |= BDRV_O_NOCACHE;
666     } else if (!strcmp(mode, "writeback")) {
667         *flags |= BDRV_O_CACHE_WB;
668     } else if (!strcmp(mode, "unsafe")) {
669         *flags |= BDRV_O_CACHE_WB;
670         *flags |= BDRV_O_NO_FLUSH;
671     } else if (!strcmp(mode, "writethrough")) {
672         /* this is the default */
673     } else {
674         return -1;
675     }
676 
677     return 0;
678 }
679 
680 /*
681  * Returns the flags that a temporary snapshot should get, based on the
682  * originally requested flags (the originally requested image will have flags
683  * like a backing file)
684  */
685 static int bdrv_temp_snapshot_flags(int flags)
686 {
687     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
688 }
689 
690 /*
691  * Returns the flags that bs->file should get if a protocol driver is expected,
692  * based on the given flags for the parent BDS
693  */
694 static int bdrv_inherited_flags(int flags)
695 {
696     /* Enable protocol handling, disable format probing for bs->file */
697     flags |= BDRV_O_PROTOCOL;
698 
699     /* Our block drivers take care to send flushes and respect unmap policy,
700      * so we can enable both unconditionally on lower layers. */
701     flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
702 
703     /* Clear flags that only apply to the top layer */
704     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
705 
706     return flags;
707 }
708 
709 const BdrvChildRole child_file = {
710     .inherit_flags = bdrv_inherited_flags,
711 };
712 
713 /*
714  * Returns the flags that bs->file should get if the use of formats (and not
715  * only protocols) is permitted for it, based on the given flags for the parent
716  * BDS
717  */
718 static int bdrv_inherited_fmt_flags(int parent_flags)
719 {
720     int flags = child_file.inherit_flags(parent_flags);
721     return flags & ~BDRV_O_PROTOCOL;
722 }
723 
724 const BdrvChildRole child_format = {
725     .inherit_flags = bdrv_inherited_fmt_flags,
726 };
727 
728 /*
729  * Returns the flags that bs->backing_hd should get, based on the given flags
730  * for the parent BDS
731  */
732 static int bdrv_backing_flags(int flags)
733 {
734     /* backing files always opened read-only */
735     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
736 
737     /* snapshot=on is handled on the top layer */
738     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
739 
740     return flags;
741 }
742 
743 static const BdrvChildRole child_backing = {
744     .inherit_flags = bdrv_backing_flags,
745 };
746 
747 static int bdrv_open_flags(BlockDriverState *bs, int flags)
748 {
749     int open_flags = flags | BDRV_O_CACHE_WB;
750 
751     /*
752      * Clear flags that are internal to the block layer before opening the
753      * image.
754      */
755     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
756 
757     /*
758      * Snapshots should be writable.
759      */
760     if (flags & BDRV_O_TEMPORARY) {
761         open_flags |= BDRV_O_RDWR;
762     }
763 
764     return open_flags;
765 }
766 
767 static void bdrv_assign_node_name(BlockDriverState *bs,
768                                   const char *node_name,
769                                   Error **errp)
770 {
771     if (!node_name) {
772         return;
773     }
774 
775     /* Check for empty string or invalid characters */
776     if (!id_wellformed(node_name)) {
777         error_setg(errp, "Invalid node name");
778         return;
779     }
780 
781     /* takes care of avoiding namespaces collisions */
782     if (blk_by_name(node_name)) {
783         error_setg(errp, "node-name=%s is conflicting with a device id",
784                    node_name);
785         return;
786     }
787 
788     /* takes care of avoiding duplicates node names */
789     if (bdrv_find_node(node_name)) {
790         error_setg(errp, "Duplicate node name");
791         return;
792     }
793 
794     /* copy node name into the bs and insert it into the graph list */
795     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
796     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
797 }
798 
799 static QemuOptsList bdrv_runtime_opts = {
800     .name = "bdrv_common",
801     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
802     .desc = {
803         {
804             .name = "node-name",
805             .type = QEMU_OPT_STRING,
806             .help = "Node name of the block device node",
807         },
808         { /* end of list */ }
809     },
810 };
811 
812 /*
813  * Common part for opening disk images and files
814  *
815  * Removes all processed options from *options.
816  */
817 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
818     QDict *options, int flags, BlockDriver *drv, Error **errp)
819 {
820     int ret, open_flags;
821     const char *filename;
822     const char *node_name = NULL;
823     QemuOpts *opts;
824     Error *local_err = NULL;
825 
826     assert(drv != NULL);
827     assert(bs->file == NULL);
828     assert(options != NULL && bs->options != options);
829 
830     if (file != NULL) {
831         filename = file->filename;
832     } else {
833         filename = qdict_get_try_str(options, "filename");
834     }
835 
836     if (drv->bdrv_needs_filename && !filename) {
837         error_setg(errp, "The '%s' block driver requires a file name",
838                    drv->format_name);
839         return -EINVAL;
840     }
841 
842     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
843 
844     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
845     qemu_opts_absorb_qdict(opts, options, &local_err);
846     if (local_err) {
847         error_propagate(errp, local_err);
848         ret = -EINVAL;
849         goto fail_opts;
850     }
851 
852     node_name = qemu_opt_get(opts, "node-name");
853     bdrv_assign_node_name(bs, node_name, &local_err);
854     if (local_err) {
855         error_propagate(errp, local_err);
856         ret = -EINVAL;
857         goto fail_opts;
858     }
859 
860     bs->guest_block_size = 512;
861     bs->request_alignment = 512;
862     bs->zero_beyond_eof = true;
863     open_flags = bdrv_open_flags(bs, flags);
864     bs->read_only = !(open_flags & BDRV_O_RDWR);
865 
866     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
867         error_setg(errp,
868                    !bs->read_only && bdrv_is_whitelisted(drv, true)
869                         ? "Driver '%s' can only be used for read-only devices"
870                         : "Driver '%s' is not whitelisted",
871                    drv->format_name);
872         ret = -ENOTSUP;
873         goto fail_opts;
874     }
875 
876     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
877     if (flags & BDRV_O_COPY_ON_READ) {
878         if (!bs->read_only) {
879             bdrv_enable_copy_on_read(bs);
880         } else {
881             error_setg(errp, "Can't use copy-on-read on read-only device");
882             ret = -EINVAL;
883             goto fail_opts;
884         }
885     }
886 
887     if (filename != NULL) {
888         pstrcpy(bs->filename, sizeof(bs->filename), filename);
889     } else {
890         bs->filename[0] = '\0';
891     }
892     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
893 
894     bs->drv = drv;
895     bs->opaque = g_malloc0(drv->instance_size);
896 
897     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
898 
899     /* Open the image, either directly or using a protocol */
900     if (drv->bdrv_file_open) {
901         assert(file == NULL);
902         assert(!drv->bdrv_needs_filename || filename != NULL);
903         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
904     } else {
905         if (file == NULL) {
906             error_setg(errp, "Can't use '%s' as a block driver for the "
907                        "protocol level", drv->format_name);
908             ret = -EINVAL;
909             goto free_and_fail;
910         }
911         bs->file = file;
912         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
913     }
914 
915     if (ret < 0) {
916         if (local_err) {
917             error_propagate(errp, local_err);
918         } else if (bs->filename[0]) {
919             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
920         } else {
921             error_setg_errno(errp, -ret, "Could not open image");
922         }
923         goto free_and_fail;
924     }
925 
926     if (bs->encrypted) {
927         error_report("Encrypted images are deprecated");
928         error_printf("Support for them will be removed in a future release.\n"
929                      "You can use 'qemu-img convert' to convert your image"
930                      " to an unencrypted one.\n");
931     }
932 
933     ret = refresh_total_sectors(bs, bs->total_sectors);
934     if (ret < 0) {
935         error_setg_errno(errp, -ret, "Could not refresh total sector count");
936         goto free_and_fail;
937     }
938 
939     bdrv_refresh_limits(bs, &local_err);
940     if (local_err) {
941         error_propagate(errp, local_err);
942         ret = -EINVAL;
943         goto free_and_fail;
944     }
945 
946     assert(bdrv_opt_mem_align(bs) != 0);
947     assert(bdrv_min_mem_align(bs) != 0);
948     assert((bs->request_alignment != 0) || bs->sg);
949 
950     qemu_opts_del(opts);
951     return 0;
952 
953 free_and_fail:
954     bs->file = NULL;
955     g_free(bs->opaque);
956     bs->opaque = NULL;
957     bs->drv = NULL;
958 fail_opts:
959     qemu_opts_del(opts);
960     return ret;
961 }
962 
963 static QDict *parse_json_filename(const char *filename, Error **errp)
964 {
965     QObject *options_obj;
966     QDict *options;
967     int ret;
968 
969     ret = strstart(filename, "json:", &filename);
970     assert(ret);
971 
972     options_obj = qobject_from_json(filename);
973     if (!options_obj) {
974         error_setg(errp, "Could not parse the JSON options");
975         return NULL;
976     }
977 
978     if (qobject_type(options_obj) != QTYPE_QDICT) {
979         qobject_decref(options_obj);
980         error_setg(errp, "Invalid JSON object given");
981         return NULL;
982     }
983 
984     options = qobject_to_qdict(options_obj);
985     qdict_flatten(options);
986 
987     return options;
988 }
989 
990 /*
991  * Fills in default options for opening images and converts the legacy
992  * filename/flags pair to option QDict entries.
993  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
994  * block driver has been specified explicitly.
995  */
996 static int bdrv_fill_options(QDict **options, const char **pfilename,
997                              int *flags, BlockDriver *drv, Error **errp)
998 {
999     const char *filename = *pfilename;
1000     const char *drvname;
1001     bool protocol = *flags & BDRV_O_PROTOCOL;
1002     bool parse_filename = false;
1003     BlockDriver *tmp_drv;
1004     Error *local_err = NULL;
1005 
1006     /* Parse json: pseudo-protocol */
1007     if (filename && g_str_has_prefix(filename, "json:")) {
1008         QDict *json_options = parse_json_filename(filename, &local_err);
1009         if (local_err) {
1010             error_propagate(errp, local_err);
1011             return -EINVAL;
1012         }
1013 
1014         /* Options given in the filename have lower priority than options
1015          * specified directly */
1016         qdict_join(*options, json_options, false);
1017         QDECREF(json_options);
1018         *pfilename = filename = NULL;
1019     }
1020 
1021     drvname = qdict_get_try_str(*options, "driver");
1022 
1023     /* If the user has explicitly specified the driver, this choice should
1024      * override the BDRV_O_PROTOCOL flag */
1025     tmp_drv = drv;
1026     if (!tmp_drv && drvname) {
1027         tmp_drv = bdrv_find_format(drvname);
1028     }
1029     if (tmp_drv) {
1030         protocol = tmp_drv->bdrv_file_open;
1031     }
1032 
1033     if (protocol) {
1034         *flags |= BDRV_O_PROTOCOL;
1035     } else {
1036         *flags &= ~BDRV_O_PROTOCOL;
1037     }
1038 
1039     /* Fetch the file name from the options QDict if necessary */
1040     if (protocol && filename) {
1041         if (!qdict_haskey(*options, "filename")) {
1042             qdict_put(*options, "filename", qstring_from_str(filename));
1043             parse_filename = true;
1044         } else {
1045             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1046                              "the same time");
1047             return -EINVAL;
1048         }
1049     }
1050 
1051     /* Find the right block driver */
1052     filename = qdict_get_try_str(*options, "filename");
1053 
1054     if (drv) {
1055         if (drvname) {
1056             error_setg(errp, "Driver specified twice");
1057             return -EINVAL;
1058         }
1059         drvname = drv->format_name;
1060         qdict_put(*options, "driver", qstring_from_str(drvname));
1061     } else {
1062         if (!drvname && protocol) {
1063             if (filename) {
1064                 drv = bdrv_find_protocol(filename, parse_filename, errp);
1065                 if (!drv) {
1066                     return -EINVAL;
1067                 }
1068 
1069                 drvname = drv->format_name;
1070                 qdict_put(*options, "driver", qstring_from_str(drvname));
1071             } else {
1072                 error_setg(errp, "Must specify either driver or file");
1073                 return -EINVAL;
1074             }
1075         } else if (drvname) {
1076             drv = bdrv_find_format(drvname);
1077             if (!drv) {
1078                 error_setg(errp, "Unknown driver '%s'", drvname);
1079                 return -ENOENT;
1080             }
1081         }
1082     }
1083 
1084     assert(drv || !protocol);
1085 
1086     /* Driver-specific filename parsing */
1087     if (drv && drv->bdrv_parse_filename && parse_filename) {
1088         drv->bdrv_parse_filename(filename, *options, &local_err);
1089         if (local_err) {
1090             error_propagate(errp, local_err);
1091             return -EINVAL;
1092         }
1093 
1094         if (!drv->bdrv_needs_filename) {
1095             qdict_del(*options, "filename");
1096         }
1097     }
1098 
1099     return 0;
1100 }
1101 
1102 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1103 {
1104 
1105     if (bs->backing_hd) {
1106         assert(bs->backing_blocker);
1107         bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1108     } else if (backing_hd) {
1109         error_setg(&bs->backing_blocker,
1110                    "node is used as backing hd of '%s'",
1111                    bdrv_get_device_or_node_name(bs));
1112     }
1113 
1114     bs->backing_hd = backing_hd;
1115     if (!backing_hd) {
1116         error_free(bs->backing_blocker);
1117         bs->backing_blocker = NULL;
1118         goto out;
1119     }
1120     bs->open_flags &= ~BDRV_O_NO_BACKING;
1121     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1122     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1123             backing_hd->drv ? backing_hd->drv->format_name : "");
1124 
1125     bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1126     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1127     bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1128                     bs->backing_blocker);
1129 out:
1130     bdrv_refresh_limits(bs, NULL);
1131 }
1132 
1133 /*
1134  * Opens the backing file for a BlockDriverState if not yet open
1135  *
1136  * options is a QDict of options to pass to the block drivers, or NULL for an
1137  * empty set of options. The reference to the QDict is transferred to this
1138  * function (even on failure), so if the caller intends to reuse the dictionary,
1139  * it needs to use QINCREF() before calling bdrv_file_open.
1140  */
1141 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1142 {
1143     char *backing_filename = g_malloc0(PATH_MAX);
1144     int ret = 0;
1145     BlockDriverState *backing_hd;
1146     Error *local_err = NULL;
1147 
1148     if (bs->backing_hd != NULL) {
1149         QDECREF(options);
1150         goto free_exit;
1151     }
1152 
1153     /* NULL means an empty set of options */
1154     if (options == NULL) {
1155         options = qdict_new();
1156     }
1157 
1158     bs->open_flags &= ~BDRV_O_NO_BACKING;
1159     if (qdict_haskey(options, "file.filename")) {
1160         backing_filename[0] = '\0';
1161     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1162         QDECREF(options);
1163         goto free_exit;
1164     } else {
1165         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1166                                        &local_err);
1167         if (local_err) {
1168             ret = -EINVAL;
1169             error_propagate(errp, local_err);
1170             QDECREF(options);
1171             goto free_exit;
1172         }
1173     }
1174 
1175     if (!bs->drv || !bs->drv->supports_backing) {
1176         ret = -EINVAL;
1177         error_setg(errp, "Driver doesn't support backing files");
1178         QDECREF(options);
1179         goto free_exit;
1180     }
1181 
1182     backing_hd = bdrv_new();
1183 
1184     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1185         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1186     }
1187 
1188     assert(bs->backing_hd == NULL);
1189     ret = bdrv_open_inherit(&backing_hd,
1190                             *backing_filename ? backing_filename : NULL,
1191                             NULL, options, 0, bs, &child_backing,
1192                             NULL, &local_err);
1193     if (ret < 0) {
1194         bdrv_unref(backing_hd);
1195         backing_hd = NULL;
1196         bs->open_flags |= BDRV_O_NO_BACKING;
1197         error_setg(errp, "Could not open backing file: %s",
1198                    error_get_pretty(local_err));
1199         error_free(local_err);
1200         goto free_exit;
1201     }
1202     bdrv_set_backing_hd(bs, backing_hd);
1203 
1204 free_exit:
1205     g_free(backing_filename);
1206     return ret;
1207 }
1208 
1209 /*
1210  * Opens a disk image whose options are given as BlockdevRef in another block
1211  * device's options.
1212  *
1213  * If allow_none is true, no image will be opened if filename is false and no
1214  * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1215  *
1216  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1217  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1218  * itself, all options starting with "${bdref_key}." are considered part of the
1219  * BlockdevRef.
1220  *
1221  * The BlockdevRef will be removed from the options QDict.
1222  *
1223  * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1224  */
1225 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1226                     QDict *options, const char *bdref_key,
1227                     BlockDriverState* parent, const BdrvChildRole *child_role,
1228                     bool allow_none, Error **errp)
1229 {
1230     QDict *image_options;
1231     int ret;
1232     char *bdref_key_dot;
1233     const char *reference;
1234 
1235     assert(pbs);
1236     assert(*pbs == NULL);
1237 
1238     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1239     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1240     g_free(bdref_key_dot);
1241 
1242     reference = qdict_get_try_str(options, bdref_key);
1243     if (!filename && !reference && !qdict_size(image_options)) {
1244         if (allow_none) {
1245             ret = 0;
1246         } else {
1247             error_setg(errp, "A block device must be specified for \"%s\"",
1248                        bdref_key);
1249             ret = -EINVAL;
1250         }
1251         QDECREF(image_options);
1252         goto done;
1253     }
1254 
1255     ret = bdrv_open_inherit(pbs, filename, reference, image_options, 0,
1256                             parent, child_role, NULL, errp);
1257 
1258 done:
1259     qdict_del(options, bdref_key);
1260     return ret;
1261 }
1262 
1263 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1264 {
1265     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1266     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1267     int64_t total_size;
1268     QemuOpts *opts = NULL;
1269     QDict *snapshot_options;
1270     BlockDriverState *bs_snapshot;
1271     Error *local_err;
1272     int ret;
1273 
1274     /* if snapshot, we create a temporary backing file and open it
1275        instead of opening 'filename' directly */
1276 
1277     /* Get the required size from the image */
1278     total_size = bdrv_getlength(bs);
1279     if (total_size < 0) {
1280         ret = total_size;
1281         error_setg_errno(errp, -total_size, "Could not get image size");
1282         goto out;
1283     }
1284 
1285     /* Create the temporary image */
1286     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1287     if (ret < 0) {
1288         error_setg_errno(errp, -ret, "Could not get temporary filename");
1289         goto out;
1290     }
1291 
1292     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1293                             &error_abort);
1294     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1295     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1296     qemu_opts_del(opts);
1297     if (ret < 0) {
1298         error_setg_errno(errp, -ret, "Could not create temporary overlay "
1299                          "'%s': %s", tmp_filename,
1300                          error_get_pretty(local_err));
1301         error_free(local_err);
1302         goto out;
1303     }
1304 
1305     /* Prepare a new options QDict for the temporary file */
1306     snapshot_options = qdict_new();
1307     qdict_put(snapshot_options, "file.driver",
1308               qstring_from_str("file"));
1309     qdict_put(snapshot_options, "file.filename",
1310               qstring_from_str(tmp_filename));
1311 
1312     bs_snapshot = bdrv_new();
1313 
1314     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1315                     flags, &bdrv_qcow2, &local_err);
1316     if (ret < 0) {
1317         error_propagate(errp, local_err);
1318         goto out;
1319     }
1320 
1321     bdrv_append(bs_snapshot, bs);
1322 
1323 out:
1324     g_free(tmp_filename);
1325     return ret;
1326 }
1327 
1328 /*
1329  * Opens a disk image (raw, qcow2, vmdk, ...)
1330  *
1331  * options is a QDict of options to pass to the block drivers, or NULL for an
1332  * empty set of options. The reference to the QDict belongs to the block layer
1333  * after the call (even on failure), so if the caller intends to reuse the
1334  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1335  *
1336  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1337  * If it is not NULL, the referenced BDS will be reused.
1338  *
1339  * The reference parameter may be used to specify an existing block device which
1340  * should be opened. If specified, neither options nor a filename may be given,
1341  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1342  */
1343 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1344                              const char *reference, QDict *options, int flags,
1345                              BlockDriverState *parent,
1346                              const BdrvChildRole *child_role,
1347                              BlockDriver *drv, Error **errp)
1348 {
1349     int ret;
1350     BlockDriverState *file = NULL, *bs;
1351     const char *drvname;
1352     Error *local_err = NULL;
1353     int snapshot_flags = 0;
1354 
1355     assert(pbs);
1356     assert(!child_role || !flags);
1357     assert(!child_role == !parent);
1358 
1359     if (reference) {
1360         bool options_non_empty = options ? qdict_size(options) : false;
1361         QDECREF(options);
1362 
1363         if (*pbs) {
1364             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1365                        "another block device");
1366             return -EINVAL;
1367         }
1368 
1369         if (filename || options_non_empty) {
1370             error_setg(errp, "Cannot reference an existing block device with "
1371                        "additional options or a new filename");
1372             return -EINVAL;
1373         }
1374 
1375         bs = bdrv_lookup_bs(reference, reference, errp);
1376         if (!bs) {
1377             return -ENODEV;
1378         }
1379         bdrv_ref(bs);
1380         *pbs = bs;
1381         return 0;
1382     }
1383 
1384     if (*pbs) {
1385         bs = *pbs;
1386     } else {
1387         bs = bdrv_new();
1388     }
1389 
1390     /* NULL means an empty set of options */
1391     if (options == NULL) {
1392         options = qdict_new();
1393     }
1394 
1395     if (child_role) {
1396         flags = child_role->inherit_flags(parent->open_flags);
1397     }
1398 
1399     ret = bdrv_fill_options(&options, &filename, &flags, drv, &local_err);
1400     if (local_err) {
1401         goto fail;
1402     }
1403 
1404     /* Find the right image format driver */
1405     drv = NULL;
1406     drvname = qdict_get_try_str(options, "driver");
1407     if (drvname) {
1408         drv = bdrv_find_format(drvname);
1409         qdict_del(options, "driver");
1410         if (!drv) {
1411             error_setg(errp, "Unknown driver: '%s'", drvname);
1412             ret = -EINVAL;
1413             goto fail;
1414         }
1415     }
1416 
1417     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1418 
1419     bs->open_flags = flags;
1420     bs->options = options;
1421     options = qdict_clone_shallow(options);
1422 
1423     /* Open image file without format layer */
1424     if ((flags & BDRV_O_PROTOCOL) == 0) {
1425         if (flags & BDRV_O_RDWR) {
1426             flags |= BDRV_O_ALLOW_RDWR;
1427         }
1428         if (flags & BDRV_O_SNAPSHOT) {
1429             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1430             flags = bdrv_backing_flags(flags);
1431         }
1432 
1433         assert(file == NULL);
1434         bs->open_flags = flags;
1435         ret = bdrv_open_image(&file, filename, options, "file",
1436                               bs, &child_file, true, &local_err);
1437         if (ret < 0) {
1438             goto fail;
1439         }
1440     }
1441 
1442     /* Image format probing */
1443     bs->probed = !drv;
1444     if (!drv && file) {
1445         ret = find_image_format(file, filename, &drv, &local_err);
1446         if (ret < 0) {
1447             goto fail;
1448         }
1449     } else if (!drv) {
1450         error_setg(errp, "Must specify either driver or file");
1451         ret = -EINVAL;
1452         goto fail;
1453     }
1454 
1455     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1456     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1457     /* file must be NULL if a protocol BDS is about to be created
1458      * (the inverse results in an error message from bdrv_open_common()) */
1459     assert(!(flags & BDRV_O_PROTOCOL) || !file);
1460 
1461     /* Open the image */
1462     ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1463     if (ret < 0) {
1464         goto fail;
1465     }
1466 
1467     if (file && (bs->file != file)) {
1468         bdrv_unref(file);
1469         file = NULL;
1470     }
1471 
1472     /* If there is a backing file, use it */
1473     if ((flags & BDRV_O_NO_BACKING) == 0) {
1474         QDict *backing_options;
1475 
1476         qdict_extract_subqdict(options, &backing_options, "backing.");
1477         ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1478         if (ret < 0) {
1479             goto close_and_fail;
1480         }
1481     }
1482 
1483     bdrv_refresh_filename(bs);
1484 
1485     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1486      * temporary snapshot afterwards. */
1487     if (snapshot_flags) {
1488         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1489         if (local_err) {
1490             goto close_and_fail;
1491         }
1492     }
1493 
1494     /* Check if any unknown options were used */
1495     if (options && (qdict_size(options) != 0)) {
1496         const QDictEntry *entry = qdict_first(options);
1497         if (flags & BDRV_O_PROTOCOL) {
1498             error_setg(errp, "Block protocol '%s' doesn't support the option "
1499                        "'%s'", drv->format_name, entry->key);
1500         } else {
1501             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1502                        "support the option '%s'", drv->format_name,
1503                        bdrv_get_device_name(bs), entry->key);
1504         }
1505 
1506         ret = -EINVAL;
1507         goto close_and_fail;
1508     }
1509 
1510     if (!bdrv_key_required(bs)) {
1511         if (bs->blk) {
1512             blk_dev_change_media_cb(bs->blk, true);
1513         }
1514     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1515                && !runstate_check(RUN_STATE_INMIGRATE)
1516                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1517         error_setg(errp,
1518                    "Guest must be stopped for opening of encrypted image");
1519         ret = -EBUSY;
1520         goto close_and_fail;
1521     }
1522 
1523     QDECREF(options);
1524     *pbs = bs;
1525     return 0;
1526 
1527 fail:
1528     if (file != NULL) {
1529         bdrv_unref(file);
1530     }
1531     QDECREF(bs->options);
1532     QDECREF(options);
1533     bs->options = NULL;
1534     if (!*pbs) {
1535         /* If *pbs is NULL, a new BDS has been created in this function and
1536            needs to be freed now. Otherwise, it does not need to be closed,
1537            since it has not really been opened yet. */
1538         bdrv_unref(bs);
1539     }
1540     if (local_err) {
1541         error_propagate(errp, local_err);
1542     }
1543     return ret;
1544 
1545 close_and_fail:
1546     /* See fail path, but now the BDS has to be always closed */
1547     if (*pbs) {
1548         bdrv_close(bs);
1549     } else {
1550         bdrv_unref(bs);
1551     }
1552     QDECREF(options);
1553     if (local_err) {
1554         error_propagate(errp, local_err);
1555     }
1556     return ret;
1557 }
1558 
1559 int bdrv_open(BlockDriverState **pbs, const char *filename,
1560               const char *reference, QDict *options, int flags,
1561               BlockDriver *drv, Error **errp)
1562 {
1563     return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1564                              NULL, drv, errp);
1565 }
1566 
1567 typedef struct BlockReopenQueueEntry {
1568      bool prepared;
1569      BDRVReopenState state;
1570      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1571 } BlockReopenQueueEntry;
1572 
1573 /*
1574  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1575  * reopen of multiple devices.
1576  *
1577  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1578  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1579  * be created and initialized. This newly created BlockReopenQueue should be
1580  * passed back in for subsequent calls that are intended to be of the same
1581  * atomic 'set'.
1582  *
1583  * bs is the BlockDriverState to add to the reopen queue.
1584  *
1585  * flags contains the open flags for the associated bs
1586  *
1587  * returns a pointer to bs_queue, which is either the newly allocated
1588  * bs_queue, or the existing bs_queue being used.
1589  *
1590  */
1591 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1592                                     BlockDriverState *bs, int flags)
1593 {
1594     assert(bs != NULL);
1595 
1596     BlockReopenQueueEntry *bs_entry;
1597     if (bs_queue == NULL) {
1598         bs_queue = g_new0(BlockReopenQueue, 1);
1599         QSIMPLEQ_INIT(bs_queue);
1600     }
1601 
1602     /* bdrv_open() masks this flag out */
1603     flags &= ~BDRV_O_PROTOCOL;
1604 
1605     if (bs->file) {
1606         bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
1607     }
1608 
1609     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1610     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1611 
1612     bs_entry->state.bs = bs;
1613     bs_entry->state.flags = flags;
1614 
1615     return bs_queue;
1616 }
1617 
1618 /*
1619  * Reopen multiple BlockDriverStates atomically & transactionally.
1620  *
1621  * The queue passed in (bs_queue) must have been built up previous
1622  * via bdrv_reopen_queue().
1623  *
1624  * Reopens all BDS specified in the queue, with the appropriate
1625  * flags.  All devices are prepared for reopen, and failure of any
1626  * device will cause all device changes to be abandonded, and intermediate
1627  * data cleaned up.
1628  *
1629  * If all devices prepare successfully, then the changes are committed
1630  * to all devices.
1631  *
1632  */
1633 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1634 {
1635     int ret = -1;
1636     BlockReopenQueueEntry *bs_entry, *next;
1637     Error *local_err = NULL;
1638 
1639     assert(bs_queue != NULL);
1640 
1641     bdrv_drain_all();
1642 
1643     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1644         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1645             error_propagate(errp, local_err);
1646             goto cleanup;
1647         }
1648         bs_entry->prepared = true;
1649     }
1650 
1651     /* If we reach this point, we have success and just need to apply the
1652      * changes
1653      */
1654     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1655         bdrv_reopen_commit(&bs_entry->state);
1656     }
1657 
1658     ret = 0;
1659 
1660 cleanup:
1661     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1662         if (ret && bs_entry->prepared) {
1663             bdrv_reopen_abort(&bs_entry->state);
1664         }
1665         g_free(bs_entry);
1666     }
1667     g_free(bs_queue);
1668     return ret;
1669 }
1670 
1671 
1672 /* Reopen a single BlockDriverState with the specified flags. */
1673 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1674 {
1675     int ret = -1;
1676     Error *local_err = NULL;
1677     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1678 
1679     ret = bdrv_reopen_multiple(queue, &local_err);
1680     if (local_err != NULL) {
1681         error_propagate(errp, local_err);
1682     }
1683     return ret;
1684 }
1685 
1686 
1687 /*
1688  * Prepares a BlockDriverState for reopen. All changes are staged in the
1689  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1690  * the block driver layer .bdrv_reopen_prepare()
1691  *
1692  * bs is the BlockDriverState to reopen
1693  * flags are the new open flags
1694  * queue is the reopen queue
1695  *
1696  * Returns 0 on success, non-zero on error.  On error errp will be set
1697  * as well.
1698  *
1699  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1700  * It is the responsibility of the caller to then call the abort() or
1701  * commit() for any other BDS that have been left in a prepare() state
1702  *
1703  */
1704 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1705                         Error **errp)
1706 {
1707     int ret = -1;
1708     Error *local_err = NULL;
1709     BlockDriver *drv;
1710 
1711     assert(reopen_state != NULL);
1712     assert(reopen_state->bs->drv != NULL);
1713     drv = reopen_state->bs->drv;
1714 
1715     /* if we are to stay read-only, do not allow permission change
1716      * to r/w */
1717     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1718         reopen_state->flags & BDRV_O_RDWR) {
1719         error_setg(errp, "Node '%s' is read only",
1720                    bdrv_get_device_or_node_name(reopen_state->bs));
1721         goto error;
1722     }
1723 
1724 
1725     ret = bdrv_flush(reopen_state->bs);
1726     if (ret) {
1727         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1728                   strerror(-ret));
1729         goto error;
1730     }
1731 
1732     if (drv->bdrv_reopen_prepare) {
1733         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1734         if (ret) {
1735             if (local_err != NULL) {
1736                 error_propagate(errp, local_err);
1737             } else {
1738                 error_setg(errp, "failed while preparing to reopen image '%s'",
1739                            reopen_state->bs->filename);
1740             }
1741             goto error;
1742         }
1743     } else {
1744         /* It is currently mandatory to have a bdrv_reopen_prepare()
1745          * handler for each supported drv. */
1746         error_setg(errp, "Block format '%s' used by node '%s' "
1747                    "does not support reopening files", drv->format_name,
1748                    bdrv_get_device_or_node_name(reopen_state->bs));
1749         ret = -1;
1750         goto error;
1751     }
1752 
1753     ret = 0;
1754 
1755 error:
1756     return ret;
1757 }
1758 
1759 /*
1760  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1761  * makes them final by swapping the staging BlockDriverState contents into
1762  * the active BlockDriverState contents.
1763  */
1764 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1765 {
1766     BlockDriver *drv;
1767 
1768     assert(reopen_state != NULL);
1769     drv = reopen_state->bs->drv;
1770     assert(drv != NULL);
1771 
1772     /* If there are any driver level actions to take */
1773     if (drv->bdrv_reopen_commit) {
1774         drv->bdrv_reopen_commit(reopen_state);
1775     }
1776 
1777     /* set BDS specific flags now */
1778     reopen_state->bs->open_flags         = reopen_state->flags;
1779     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1780                                               BDRV_O_CACHE_WB);
1781     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1782 
1783     bdrv_refresh_limits(reopen_state->bs, NULL);
1784 }
1785 
1786 /*
1787  * Abort the reopen, and delete and free the staged changes in
1788  * reopen_state
1789  */
1790 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1791 {
1792     BlockDriver *drv;
1793 
1794     assert(reopen_state != NULL);
1795     drv = reopen_state->bs->drv;
1796     assert(drv != NULL);
1797 
1798     if (drv->bdrv_reopen_abort) {
1799         drv->bdrv_reopen_abort(reopen_state);
1800     }
1801 }
1802 
1803 
1804 void bdrv_close(BlockDriverState *bs)
1805 {
1806     BdrvAioNotifier *ban, *ban_next;
1807 
1808     if (bs->job) {
1809         block_job_cancel_sync(bs->job);
1810     }
1811     bdrv_drain_all(); /* complete I/O */
1812     bdrv_flush(bs);
1813     bdrv_drain_all(); /* in case flush left pending I/O */
1814     notifier_list_notify(&bs->close_notifiers, bs);
1815 
1816     if (bs->drv) {
1817         if (bs->backing_hd) {
1818             BlockDriverState *backing_hd = bs->backing_hd;
1819             bdrv_set_backing_hd(bs, NULL);
1820             bdrv_unref(backing_hd);
1821         }
1822         bs->drv->bdrv_close(bs);
1823         g_free(bs->opaque);
1824         bs->opaque = NULL;
1825         bs->drv = NULL;
1826         bs->copy_on_read = 0;
1827         bs->backing_file[0] = '\0';
1828         bs->backing_format[0] = '\0';
1829         bs->total_sectors = 0;
1830         bs->encrypted = 0;
1831         bs->valid_key = 0;
1832         bs->sg = 0;
1833         bs->zero_beyond_eof = false;
1834         QDECREF(bs->options);
1835         bs->options = NULL;
1836         QDECREF(bs->full_open_options);
1837         bs->full_open_options = NULL;
1838 
1839         if (bs->file != NULL) {
1840             bdrv_unref(bs->file);
1841             bs->file = NULL;
1842         }
1843     }
1844 
1845     if (bs->blk) {
1846         blk_dev_change_media_cb(bs->blk, false);
1847     }
1848 
1849     /*throttling disk I/O limits*/
1850     if (bs->io_limits_enabled) {
1851         bdrv_io_limits_disable(bs);
1852     }
1853 
1854     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1855         g_free(ban);
1856     }
1857     QLIST_INIT(&bs->aio_notifiers);
1858 }
1859 
1860 void bdrv_close_all(void)
1861 {
1862     BlockDriverState *bs;
1863 
1864     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1865         AioContext *aio_context = bdrv_get_aio_context(bs);
1866 
1867         aio_context_acquire(aio_context);
1868         bdrv_close(bs);
1869         aio_context_release(aio_context);
1870     }
1871 }
1872 
1873 /* make a BlockDriverState anonymous by removing from bdrv_state and
1874  * graph_bdrv_state list.
1875    Also, NULL terminate the device_name to prevent double remove */
1876 void bdrv_make_anon(BlockDriverState *bs)
1877 {
1878     /*
1879      * Take care to remove bs from bdrv_states only when it's actually
1880      * in it.  Note that bs->device_list.tqe_prev is initially null,
1881      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
1882      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1883      * resetting it to null on remove.
1884      */
1885     if (bs->device_list.tqe_prev) {
1886         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1887         bs->device_list.tqe_prev = NULL;
1888     }
1889     if (bs->node_name[0] != '\0') {
1890         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1891     }
1892     bs->node_name[0] = '\0';
1893 }
1894 
1895 static void bdrv_rebind(BlockDriverState *bs)
1896 {
1897     if (bs->drv && bs->drv->bdrv_rebind) {
1898         bs->drv->bdrv_rebind(bs);
1899     }
1900 }
1901 
1902 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1903                                      BlockDriverState *bs_src)
1904 {
1905     /* move some fields that need to stay attached to the device */
1906 
1907     /* dev info */
1908     bs_dest->guest_block_size   = bs_src->guest_block_size;
1909     bs_dest->copy_on_read       = bs_src->copy_on_read;
1910 
1911     bs_dest->enable_write_cache = bs_src->enable_write_cache;
1912 
1913     /* i/o throttled req */
1914     memcpy(&bs_dest->throttle_state,
1915            &bs_src->throttle_state,
1916            sizeof(ThrottleState));
1917     bs_dest->throttled_reqs[0]  = bs_src->throttled_reqs[0];
1918     bs_dest->throttled_reqs[1]  = bs_src->throttled_reqs[1];
1919     bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
1920 
1921     /* r/w error */
1922     bs_dest->on_read_error      = bs_src->on_read_error;
1923     bs_dest->on_write_error     = bs_src->on_write_error;
1924 
1925     /* i/o status */
1926     bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
1927     bs_dest->iostatus           = bs_src->iostatus;
1928 
1929     /* dirty bitmap */
1930     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
1931 
1932     /* reference count */
1933     bs_dest->refcnt             = bs_src->refcnt;
1934 
1935     /* job */
1936     bs_dest->job                = bs_src->job;
1937 
1938     /* keep the same entry in bdrv_states */
1939     bs_dest->device_list = bs_src->device_list;
1940     bs_dest->blk = bs_src->blk;
1941 
1942     memcpy(bs_dest->op_blockers, bs_src->op_blockers,
1943            sizeof(bs_dest->op_blockers));
1944 }
1945 
1946 /*
1947  * Swap bs contents for two image chains while they are live,
1948  * while keeping required fields on the BlockDriverState that is
1949  * actually attached to a device.
1950  *
1951  * This will modify the BlockDriverState fields, and swap contents
1952  * between bs_new and bs_old. Both bs_new and bs_old are modified.
1953  *
1954  * bs_new must not be attached to a BlockBackend.
1955  *
1956  * This function does not create any image files.
1957  */
1958 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1959 {
1960     BlockDriverState tmp;
1961 
1962     bdrv_drain(bs_new);
1963     bdrv_drain(bs_old);
1964 
1965     /* The code needs to swap the node_name but simply swapping node_list won't
1966      * work so first remove the nodes from the graph list, do the swap then
1967      * insert them back if needed.
1968      */
1969     if (bs_new->node_name[0] != '\0') {
1970         QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
1971     }
1972     if (bs_old->node_name[0] != '\0') {
1973         QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
1974     }
1975 
1976     /* bs_new must be unattached and shouldn't have anything fancy enabled */
1977     assert(!bs_new->blk);
1978     assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
1979     assert(bs_new->job == NULL);
1980     assert(bs_new->io_limits_enabled == false);
1981     assert(!throttle_have_timer(&bs_new->throttle_state));
1982 
1983     tmp = *bs_new;
1984     *bs_new = *bs_old;
1985     *bs_old = tmp;
1986 
1987     /* there are some fields that should not be swapped, move them back */
1988     bdrv_move_feature_fields(&tmp, bs_old);
1989     bdrv_move_feature_fields(bs_old, bs_new);
1990     bdrv_move_feature_fields(bs_new, &tmp);
1991 
1992     /* bs_new must remain unattached */
1993     assert(!bs_new->blk);
1994 
1995     /* Check a few fields that should remain attached to the device */
1996     assert(bs_new->job == NULL);
1997     assert(bs_new->io_limits_enabled == false);
1998     assert(!throttle_have_timer(&bs_new->throttle_state));
1999 
2000     /* insert the nodes back into the graph node list if needed */
2001     if (bs_new->node_name[0] != '\0') {
2002         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2003     }
2004     if (bs_old->node_name[0] != '\0') {
2005         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2006     }
2007 
2008     assert(QLIST_EMPTY(&bs_old->tracked_requests));
2009     assert(QLIST_EMPTY(&bs_new->tracked_requests));
2010 
2011     bdrv_rebind(bs_new);
2012     bdrv_rebind(bs_old);
2013 }
2014 
2015 /*
2016  * Add new bs contents at the top of an image chain while the chain is
2017  * live, while keeping required fields on the top layer.
2018  *
2019  * This will modify the BlockDriverState fields, and swap contents
2020  * between bs_new and bs_top. Both bs_new and bs_top are modified.
2021  *
2022  * bs_new must not be attached to a BlockBackend.
2023  *
2024  * This function does not create any image files.
2025  */
2026 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2027 {
2028     bdrv_swap(bs_new, bs_top);
2029 
2030     /* The contents of 'tmp' will become bs_top, as we are
2031      * swapping bs_new and bs_top contents. */
2032     bdrv_set_backing_hd(bs_top, bs_new);
2033 }
2034 
2035 static void bdrv_delete(BlockDriverState *bs)
2036 {
2037     assert(!bs->job);
2038     assert(bdrv_op_blocker_is_empty(bs));
2039     assert(!bs->refcnt);
2040     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2041 
2042     bdrv_close(bs);
2043 
2044     /* remove from list, if necessary */
2045     bdrv_make_anon(bs);
2046 
2047     g_free(bs);
2048 }
2049 
2050 /*
2051  * Run consistency checks on an image
2052  *
2053  * Returns 0 if the check could be completed (it doesn't mean that the image is
2054  * free of errors) or -errno when an internal error occurred. The results of the
2055  * check are stored in res.
2056  */
2057 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2058 {
2059     if (bs->drv == NULL) {
2060         return -ENOMEDIUM;
2061     }
2062     if (bs->drv->bdrv_check == NULL) {
2063         return -ENOTSUP;
2064     }
2065 
2066     memset(res, 0, sizeof(*res));
2067     return bs->drv->bdrv_check(bs, res, fix);
2068 }
2069 
2070 #define COMMIT_BUF_SECTORS 2048
2071 
2072 /* commit COW file into the raw image */
2073 int bdrv_commit(BlockDriverState *bs)
2074 {
2075     BlockDriver *drv = bs->drv;
2076     int64_t sector, total_sectors, length, backing_length;
2077     int n, ro, open_flags;
2078     int ret = 0;
2079     uint8_t *buf = NULL;
2080 
2081     if (!drv)
2082         return -ENOMEDIUM;
2083 
2084     if (!bs->backing_hd) {
2085         return -ENOTSUP;
2086     }
2087 
2088     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2089         bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2090         return -EBUSY;
2091     }
2092 
2093     ro = bs->backing_hd->read_only;
2094     open_flags =  bs->backing_hd->open_flags;
2095 
2096     if (ro) {
2097         if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2098             return -EACCES;
2099         }
2100     }
2101 
2102     length = bdrv_getlength(bs);
2103     if (length < 0) {
2104         ret = length;
2105         goto ro_cleanup;
2106     }
2107 
2108     backing_length = bdrv_getlength(bs->backing_hd);
2109     if (backing_length < 0) {
2110         ret = backing_length;
2111         goto ro_cleanup;
2112     }
2113 
2114     /* If our top snapshot is larger than the backing file image,
2115      * grow the backing file image if possible.  If not possible,
2116      * we must return an error */
2117     if (length > backing_length) {
2118         ret = bdrv_truncate(bs->backing_hd, length);
2119         if (ret < 0) {
2120             goto ro_cleanup;
2121         }
2122     }
2123 
2124     total_sectors = length >> BDRV_SECTOR_BITS;
2125 
2126     /* qemu_try_blockalign() for bs will choose an alignment that works for
2127      * bs->backing_hd as well, so no need to compare the alignment manually. */
2128     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2129     if (buf == NULL) {
2130         ret = -ENOMEM;
2131         goto ro_cleanup;
2132     }
2133 
2134     for (sector = 0; sector < total_sectors; sector += n) {
2135         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2136         if (ret < 0) {
2137             goto ro_cleanup;
2138         }
2139         if (ret) {
2140             ret = bdrv_read(bs, sector, buf, n);
2141             if (ret < 0) {
2142                 goto ro_cleanup;
2143             }
2144 
2145             ret = bdrv_write(bs->backing_hd, sector, buf, n);
2146             if (ret < 0) {
2147                 goto ro_cleanup;
2148             }
2149         }
2150     }
2151 
2152     if (drv->bdrv_make_empty) {
2153         ret = drv->bdrv_make_empty(bs);
2154         if (ret < 0) {
2155             goto ro_cleanup;
2156         }
2157         bdrv_flush(bs);
2158     }
2159 
2160     /*
2161      * Make sure all data we wrote to the backing device is actually
2162      * stable on disk.
2163      */
2164     if (bs->backing_hd) {
2165         bdrv_flush(bs->backing_hd);
2166     }
2167 
2168     ret = 0;
2169 ro_cleanup:
2170     qemu_vfree(buf);
2171 
2172     if (ro) {
2173         /* ignoring error return here */
2174         bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2175     }
2176 
2177     return ret;
2178 }
2179 
2180 int bdrv_commit_all(void)
2181 {
2182     BlockDriverState *bs;
2183 
2184     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2185         AioContext *aio_context = bdrv_get_aio_context(bs);
2186 
2187         aio_context_acquire(aio_context);
2188         if (bs->drv && bs->backing_hd) {
2189             int ret = bdrv_commit(bs);
2190             if (ret < 0) {
2191                 aio_context_release(aio_context);
2192                 return ret;
2193             }
2194         }
2195         aio_context_release(aio_context);
2196     }
2197     return 0;
2198 }
2199 
2200 /*
2201  * Return values:
2202  * 0        - success
2203  * -EINVAL  - backing format specified, but no file
2204  * -ENOSPC  - can't update the backing file because no space is left in the
2205  *            image file header
2206  * -ENOTSUP - format driver doesn't support changing the backing file
2207  */
2208 int bdrv_change_backing_file(BlockDriverState *bs,
2209     const char *backing_file, const char *backing_fmt)
2210 {
2211     BlockDriver *drv = bs->drv;
2212     int ret;
2213 
2214     /* Backing file format doesn't make sense without a backing file */
2215     if (backing_fmt && !backing_file) {
2216         return -EINVAL;
2217     }
2218 
2219     if (drv->bdrv_change_backing_file != NULL) {
2220         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2221     } else {
2222         ret = -ENOTSUP;
2223     }
2224 
2225     if (ret == 0) {
2226         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2227         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2228     }
2229     return ret;
2230 }
2231 
2232 /*
2233  * Finds the image layer in the chain that has 'bs' as its backing file.
2234  *
2235  * active is the current topmost image.
2236  *
2237  * Returns NULL if bs is not found in active's image chain,
2238  * or if active == bs.
2239  *
2240  * Returns the bottommost base image if bs == NULL.
2241  */
2242 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2243                                     BlockDriverState *bs)
2244 {
2245     while (active && bs != active->backing_hd) {
2246         active = active->backing_hd;
2247     }
2248 
2249     return active;
2250 }
2251 
2252 /* Given a BDS, searches for the base layer. */
2253 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2254 {
2255     return bdrv_find_overlay(bs, NULL);
2256 }
2257 
2258 typedef struct BlkIntermediateStates {
2259     BlockDriverState *bs;
2260     QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2261 } BlkIntermediateStates;
2262 
2263 
2264 /*
2265  * Drops images above 'base' up to and including 'top', and sets the image
2266  * above 'top' to have base as its backing file.
2267  *
2268  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2269  * information in 'bs' can be properly updated.
2270  *
2271  * E.g., this will convert the following chain:
2272  * bottom <- base <- intermediate <- top <- active
2273  *
2274  * to
2275  *
2276  * bottom <- base <- active
2277  *
2278  * It is allowed for bottom==base, in which case it converts:
2279  *
2280  * base <- intermediate <- top <- active
2281  *
2282  * to
2283  *
2284  * base <- active
2285  *
2286  * If backing_file_str is non-NULL, it will be used when modifying top's
2287  * overlay image metadata.
2288  *
2289  * Error conditions:
2290  *  if active == top, that is considered an error
2291  *
2292  */
2293 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2294                            BlockDriverState *base, const char *backing_file_str)
2295 {
2296     BlockDriverState *intermediate;
2297     BlockDriverState *base_bs = NULL;
2298     BlockDriverState *new_top_bs = NULL;
2299     BlkIntermediateStates *intermediate_state, *next;
2300     int ret = -EIO;
2301 
2302     QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2303     QSIMPLEQ_INIT(&states_to_delete);
2304 
2305     if (!top->drv || !base->drv) {
2306         goto exit;
2307     }
2308 
2309     new_top_bs = bdrv_find_overlay(active, top);
2310 
2311     if (new_top_bs == NULL) {
2312         /* we could not find the image above 'top', this is an error */
2313         goto exit;
2314     }
2315 
2316     /* special case of new_top_bs->backing_hd already pointing to base - nothing
2317      * to do, no intermediate images */
2318     if (new_top_bs->backing_hd == base) {
2319         ret = 0;
2320         goto exit;
2321     }
2322 
2323     intermediate = top;
2324 
2325     /* now we will go down through the list, and add each BDS we find
2326      * into our deletion queue, until we hit the 'base'
2327      */
2328     while (intermediate) {
2329         intermediate_state = g_new0(BlkIntermediateStates, 1);
2330         intermediate_state->bs = intermediate;
2331         QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2332 
2333         if (intermediate->backing_hd == base) {
2334             base_bs = intermediate->backing_hd;
2335             break;
2336         }
2337         intermediate = intermediate->backing_hd;
2338     }
2339     if (base_bs == NULL) {
2340         /* something went wrong, we did not end at the base. safely
2341          * unravel everything, and exit with error */
2342         goto exit;
2343     }
2344 
2345     /* success - we can delete the intermediate states, and link top->base */
2346     backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2347     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2348                                    base_bs->drv ? base_bs->drv->format_name : "");
2349     if (ret) {
2350         goto exit;
2351     }
2352     bdrv_set_backing_hd(new_top_bs, base_bs);
2353 
2354     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2355         /* so that bdrv_close() does not recursively close the chain */
2356         bdrv_set_backing_hd(intermediate_state->bs, NULL);
2357         bdrv_unref(intermediate_state->bs);
2358     }
2359     ret = 0;
2360 
2361 exit:
2362     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2363         g_free(intermediate_state);
2364     }
2365     return ret;
2366 }
2367 
2368 /**
2369  * Truncate file to 'offset' bytes (needed only for file protocols)
2370  */
2371 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2372 {
2373     BlockDriver *drv = bs->drv;
2374     int ret;
2375     if (!drv)
2376         return -ENOMEDIUM;
2377     if (!drv->bdrv_truncate)
2378         return -ENOTSUP;
2379     if (bs->read_only)
2380         return -EACCES;
2381 
2382     ret = drv->bdrv_truncate(bs, offset);
2383     if (ret == 0) {
2384         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2385         bdrv_dirty_bitmap_truncate(bs);
2386         if (bs->blk) {
2387             blk_dev_resize_cb(bs->blk);
2388         }
2389     }
2390     return ret;
2391 }
2392 
2393 /**
2394  * Length of a allocated file in bytes. Sparse files are counted by actual
2395  * allocated space. Return < 0 if error or unknown.
2396  */
2397 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2398 {
2399     BlockDriver *drv = bs->drv;
2400     if (!drv) {
2401         return -ENOMEDIUM;
2402     }
2403     if (drv->bdrv_get_allocated_file_size) {
2404         return drv->bdrv_get_allocated_file_size(bs);
2405     }
2406     if (bs->file) {
2407         return bdrv_get_allocated_file_size(bs->file);
2408     }
2409     return -ENOTSUP;
2410 }
2411 
2412 /**
2413  * Return number of sectors on success, -errno on error.
2414  */
2415 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2416 {
2417     BlockDriver *drv = bs->drv;
2418 
2419     if (!drv)
2420         return -ENOMEDIUM;
2421 
2422     if (drv->has_variable_length) {
2423         int ret = refresh_total_sectors(bs, bs->total_sectors);
2424         if (ret < 0) {
2425             return ret;
2426         }
2427     }
2428     return bs->total_sectors;
2429 }
2430 
2431 /**
2432  * Return length in bytes on success, -errno on error.
2433  * The length is always a multiple of BDRV_SECTOR_SIZE.
2434  */
2435 int64_t bdrv_getlength(BlockDriverState *bs)
2436 {
2437     int64_t ret = bdrv_nb_sectors(bs);
2438 
2439     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2440     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2441 }
2442 
2443 /* return 0 as number of sectors if no device present or error */
2444 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2445 {
2446     int64_t nb_sectors = bdrv_nb_sectors(bs);
2447 
2448     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2449 }
2450 
2451 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2452                        BlockdevOnError on_write_error)
2453 {
2454     bs->on_read_error = on_read_error;
2455     bs->on_write_error = on_write_error;
2456 }
2457 
2458 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2459 {
2460     return is_read ? bs->on_read_error : bs->on_write_error;
2461 }
2462 
2463 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2464 {
2465     BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2466 
2467     switch (on_err) {
2468     case BLOCKDEV_ON_ERROR_ENOSPC:
2469         return (error == ENOSPC) ?
2470                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2471     case BLOCKDEV_ON_ERROR_STOP:
2472         return BLOCK_ERROR_ACTION_STOP;
2473     case BLOCKDEV_ON_ERROR_REPORT:
2474         return BLOCK_ERROR_ACTION_REPORT;
2475     case BLOCKDEV_ON_ERROR_IGNORE:
2476         return BLOCK_ERROR_ACTION_IGNORE;
2477     default:
2478         abort();
2479     }
2480 }
2481 
2482 static void send_qmp_error_event(BlockDriverState *bs,
2483                                  BlockErrorAction action,
2484                                  bool is_read, int error)
2485 {
2486     IoOperationType optype;
2487 
2488     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2489     qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2490                                    bdrv_iostatus_is_enabled(bs),
2491                                    error == ENOSPC, strerror(error),
2492                                    &error_abort);
2493 }
2494 
2495 /* This is done by device models because, while the block layer knows
2496  * about the error, it does not know whether an operation comes from
2497  * the device or the block layer (from a job, for example).
2498  */
2499 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2500                        bool is_read, int error)
2501 {
2502     assert(error >= 0);
2503 
2504     if (action == BLOCK_ERROR_ACTION_STOP) {
2505         /* First set the iostatus, so that "info block" returns an iostatus
2506          * that matches the events raised so far (an additional error iostatus
2507          * is fine, but not a lost one).
2508          */
2509         bdrv_iostatus_set_err(bs, error);
2510 
2511         /* Then raise the request to stop the VM and the event.
2512          * qemu_system_vmstop_request_prepare has two effects.  First,
2513          * it ensures that the STOP event always comes after the
2514          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
2515          * can observe the STOP event and do a "cont" before the STOP
2516          * event is issued, the VM will not stop.  In this case, vm_start()
2517          * also ensures that the STOP/RESUME pair of events is emitted.
2518          */
2519         qemu_system_vmstop_request_prepare();
2520         send_qmp_error_event(bs, action, is_read, error);
2521         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2522     } else {
2523         send_qmp_error_event(bs, action, is_read, error);
2524     }
2525 }
2526 
2527 int bdrv_is_read_only(BlockDriverState *bs)
2528 {
2529     return bs->read_only;
2530 }
2531 
2532 int bdrv_is_sg(BlockDriverState *bs)
2533 {
2534     return bs->sg;
2535 }
2536 
2537 int bdrv_enable_write_cache(BlockDriverState *bs)
2538 {
2539     return bs->enable_write_cache;
2540 }
2541 
2542 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2543 {
2544     bs->enable_write_cache = wce;
2545 
2546     /* so a reopen() will preserve wce */
2547     if (wce) {
2548         bs->open_flags |= BDRV_O_CACHE_WB;
2549     } else {
2550         bs->open_flags &= ~BDRV_O_CACHE_WB;
2551     }
2552 }
2553 
2554 int bdrv_is_encrypted(BlockDriverState *bs)
2555 {
2556     if (bs->backing_hd && bs->backing_hd->encrypted)
2557         return 1;
2558     return bs->encrypted;
2559 }
2560 
2561 int bdrv_key_required(BlockDriverState *bs)
2562 {
2563     BlockDriverState *backing_hd = bs->backing_hd;
2564 
2565     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2566         return 1;
2567     return (bs->encrypted && !bs->valid_key);
2568 }
2569 
2570 int bdrv_set_key(BlockDriverState *bs, const char *key)
2571 {
2572     int ret;
2573     if (bs->backing_hd && bs->backing_hd->encrypted) {
2574         ret = bdrv_set_key(bs->backing_hd, key);
2575         if (ret < 0)
2576             return ret;
2577         if (!bs->encrypted)
2578             return 0;
2579     }
2580     if (!bs->encrypted) {
2581         return -EINVAL;
2582     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2583         return -ENOMEDIUM;
2584     }
2585     ret = bs->drv->bdrv_set_key(bs, key);
2586     if (ret < 0) {
2587         bs->valid_key = 0;
2588     } else if (!bs->valid_key) {
2589         bs->valid_key = 1;
2590         if (bs->blk) {
2591             /* call the change callback now, we skipped it on open */
2592             blk_dev_change_media_cb(bs->blk, true);
2593         }
2594     }
2595     return ret;
2596 }
2597 
2598 /*
2599  * Provide an encryption key for @bs.
2600  * If @key is non-null:
2601  *     If @bs is not encrypted, fail.
2602  *     Else if the key is invalid, fail.
2603  *     Else set @bs's key to @key, replacing the existing key, if any.
2604  * If @key is null:
2605  *     If @bs is encrypted and still lacks a key, fail.
2606  *     Else do nothing.
2607  * On failure, store an error object through @errp if non-null.
2608  */
2609 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2610 {
2611     if (key) {
2612         if (!bdrv_is_encrypted(bs)) {
2613             error_setg(errp, "Node '%s' is not encrypted",
2614                       bdrv_get_device_or_node_name(bs));
2615         } else if (bdrv_set_key(bs, key) < 0) {
2616             error_set(errp, QERR_INVALID_PASSWORD);
2617         }
2618     } else {
2619         if (bdrv_key_required(bs)) {
2620             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2621                       "'%s' (%s) is encrypted",
2622                       bdrv_get_device_or_node_name(bs),
2623                       bdrv_get_encrypted_filename(bs));
2624         }
2625     }
2626 }
2627 
2628 const char *bdrv_get_format_name(BlockDriverState *bs)
2629 {
2630     return bs->drv ? bs->drv->format_name : NULL;
2631 }
2632 
2633 static int qsort_strcmp(const void *a, const void *b)
2634 {
2635     return strcmp(a, b);
2636 }
2637 
2638 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2639                          void *opaque)
2640 {
2641     BlockDriver *drv;
2642     int count = 0;
2643     int i;
2644     const char **formats = NULL;
2645 
2646     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2647         if (drv->format_name) {
2648             bool found = false;
2649             int i = count;
2650             while (formats && i && !found) {
2651                 found = !strcmp(formats[--i], drv->format_name);
2652             }
2653 
2654             if (!found) {
2655                 formats = g_renew(const char *, formats, count + 1);
2656                 formats[count++] = drv->format_name;
2657             }
2658         }
2659     }
2660 
2661     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2662 
2663     for (i = 0; i < count; i++) {
2664         it(opaque, formats[i]);
2665     }
2666 
2667     g_free(formats);
2668 }
2669 
2670 /* This function is to find a node in the bs graph */
2671 BlockDriverState *bdrv_find_node(const char *node_name)
2672 {
2673     BlockDriverState *bs;
2674 
2675     assert(node_name);
2676 
2677     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2678         if (!strcmp(node_name, bs->node_name)) {
2679             return bs;
2680         }
2681     }
2682     return NULL;
2683 }
2684 
2685 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2686 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2687 {
2688     BlockDeviceInfoList *list, *entry;
2689     BlockDriverState *bs;
2690 
2691     list = NULL;
2692     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2693         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2694         if (!info) {
2695             qapi_free_BlockDeviceInfoList(list);
2696             return NULL;
2697         }
2698         entry = g_malloc0(sizeof(*entry));
2699         entry->value = info;
2700         entry->next = list;
2701         list = entry;
2702     }
2703 
2704     return list;
2705 }
2706 
2707 BlockDriverState *bdrv_lookup_bs(const char *device,
2708                                  const char *node_name,
2709                                  Error **errp)
2710 {
2711     BlockBackend *blk;
2712     BlockDriverState *bs;
2713 
2714     if (device) {
2715         blk = blk_by_name(device);
2716 
2717         if (blk) {
2718             return blk_bs(blk);
2719         }
2720     }
2721 
2722     if (node_name) {
2723         bs = bdrv_find_node(node_name);
2724 
2725         if (bs) {
2726             return bs;
2727         }
2728     }
2729 
2730     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2731                      device ? device : "",
2732                      node_name ? node_name : "");
2733     return NULL;
2734 }
2735 
2736 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2737  * return false.  If either argument is NULL, return false. */
2738 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2739 {
2740     while (top && top != base) {
2741         top = top->backing_hd;
2742     }
2743 
2744     return top != NULL;
2745 }
2746 
2747 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2748 {
2749     if (!bs) {
2750         return QTAILQ_FIRST(&graph_bdrv_states);
2751     }
2752     return QTAILQ_NEXT(bs, node_list);
2753 }
2754 
2755 BlockDriverState *bdrv_next(BlockDriverState *bs)
2756 {
2757     if (!bs) {
2758         return QTAILQ_FIRST(&bdrv_states);
2759     }
2760     return QTAILQ_NEXT(bs, device_list);
2761 }
2762 
2763 const char *bdrv_get_node_name(const BlockDriverState *bs)
2764 {
2765     return bs->node_name;
2766 }
2767 
2768 /* TODO check what callers really want: bs->node_name or blk_name() */
2769 const char *bdrv_get_device_name(const BlockDriverState *bs)
2770 {
2771     return bs->blk ? blk_name(bs->blk) : "";
2772 }
2773 
2774 /* This can be used to identify nodes that might not have a device
2775  * name associated. Since node and device names live in the same
2776  * namespace, the result is unambiguous. The exception is if both are
2777  * absent, then this returns an empty (non-null) string. */
2778 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2779 {
2780     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2781 }
2782 
2783 int bdrv_get_flags(BlockDriverState *bs)
2784 {
2785     return bs->open_flags;
2786 }
2787 
2788 int bdrv_has_zero_init_1(BlockDriverState *bs)
2789 {
2790     return 1;
2791 }
2792 
2793 int bdrv_has_zero_init(BlockDriverState *bs)
2794 {
2795     assert(bs->drv);
2796 
2797     /* If BS is a copy on write image, it is initialized to
2798        the contents of the base image, which may not be zeroes.  */
2799     if (bs->backing_hd) {
2800         return 0;
2801     }
2802     if (bs->drv->bdrv_has_zero_init) {
2803         return bs->drv->bdrv_has_zero_init(bs);
2804     }
2805 
2806     /* safe default */
2807     return 0;
2808 }
2809 
2810 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2811 {
2812     BlockDriverInfo bdi;
2813 
2814     if (bs->backing_hd) {
2815         return false;
2816     }
2817 
2818     if (bdrv_get_info(bs, &bdi) == 0) {
2819         return bdi.unallocated_blocks_are_zero;
2820     }
2821 
2822     return false;
2823 }
2824 
2825 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2826 {
2827     BlockDriverInfo bdi;
2828 
2829     if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
2830         return false;
2831     }
2832 
2833     if (bdrv_get_info(bs, &bdi) == 0) {
2834         return bdi.can_write_zeroes_with_unmap;
2835     }
2836 
2837     return false;
2838 }
2839 
2840 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2841 {
2842     if (bs->backing_hd && bs->backing_hd->encrypted)
2843         return bs->backing_file;
2844     else if (bs->encrypted)
2845         return bs->filename;
2846     else
2847         return NULL;
2848 }
2849 
2850 void bdrv_get_backing_filename(BlockDriverState *bs,
2851                                char *filename, int filename_size)
2852 {
2853     pstrcpy(filename, filename_size, bs->backing_file);
2854 }
2855 
2856 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2857 {
2858     BlockDriver *drv = bs->drv;
2859     if (!drv)
2860         return -ENOMEDIUM;
2861     if (!drv->bdrv_get_info)
2862         return -ENOTSUP;
2863     memset(bdi, 0, sizeof(*bdi));
2864     return drv->bdrv_get_info(bs, bdi);
2865 }
2866 
2867 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2868 {
2869     BlockDriver *drv = bs->drv;
2870     if (drv && drv->bdrv_get_specific_info) {
2871         return drv->bdrv_get_specific_info(bs);
2872     }
2873     return NULL;
2874 }
2875 
2876 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2877 {
2878     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2879         return;
2880     }
2881 
2882     bs->drv->bdrv_debug_event(bs, event);
2883 }
2884 
2885 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2886                           const char *tag)
2887 {
2888     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2889         bs = bs->file;
2890     }
2891 
2892     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2893         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2894     }
2895 
2896     return -ENOTSUP;
2897 }
2898 
2899 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2900 {
2901     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2902         bs = bs->file;
2903     }
2904 
2905     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2906         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2907     }
2908 
2909     return -ENOTSUP;
2910 }
2911 
2912 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2913 {
2914     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2915         bs = bs->file;
2916     }
2917 
2918     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2919         return bs->drv->bdrv_debug_resume(bs, tag);
2920     }
2921 
2922     return -ENOTSUP;
2923 }
2924 
2925 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2926 {
2927     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2928         bs = bs->file;
2929     }
2930 
2931     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2932         return bs->drv->bdrv_debug_is_suspended(bs, tag);
2933     }
2934 
2935     return false;
2936 }
2937 
2938 int bdrv_is_snapshot(BlockDriverState *bs)
2939 {
2940     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2941 }
2942 
2943 /* backing_file can either be relative, or absolute, or a protocol.  If it is
2944  * relative, it must be relative to the chain.  So, passing in bs->filename
2945  * from a BDS as backing_file should not be done, as that may be relative to
2946  * the CWD rather than the chain. */
2947 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2948         const char *backing_file)
2949 {
2950     char *filename_full = NULL;
2951     char *backing_file_full = NULL;
2952     char *filename_tmp = NULL;
2953     int is_protocol = 0;
2954     BlockDriverState *curr_bs = NULL;
2955     BlockDriverState *retval = NULL;
2956 
2957     if (!bs || !bs->drv || !backing_file) {
2958         return NULL;
2959     }
2960 
2961     filename_full     = g_malloc(PATH_MAX);
2962     backing_file_full = g_malloc(PATH_MAX);
2963     filename_tmp      = g_malloc(PATH_MAX);
2964 
2965     is_protocol = path_has_protocol(backing_file);
2966 
2967     for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
2968 
2969         /* If either of the filename paths is actually a protocol, then
2970          * compare unmodified paths; otherwise make paths relative */
2971         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
2972             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
2973                 retval = curr_bs->backing_hd;
2974                 break;
2975             }
2976         } else {
2977             /* If not an absolute filename path, make it relative to the current
2978              * image's filename path */
2979             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2980                          backing_file);
2981 
2982             /* We are going to compare absolute pathnames */
2983             if (!realpath(filename_tmp, filename_full)) {
2984                 continue;
2985             }
2986 
2987             /* We need to make sure the backing filename we are comparing against
2988              * is relative to the current image filename (or absolute) */
2989             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2990                          curr_bs->backing_file);
2991 
2992             if (!realpath(filename_tmp, backing_file_full)) {
2993                 continue;
2994             }
2995 
2996             if (strcmp(backing_file_full, filename_full) == 0) {
2997                 retval = curr_bs->backing_hd;
2998                 break;
2999             }
3000         }
3001     }
3002 
3003     g_free(filename_full);
3004     g_free(backing_file_full);
3005     g_free(filename_tmp);
3006     return retval;
3007 }
3008 
3009 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3010 {
3011     if (!bs->drv) {
3012         return 0;
3013     }
3014 
3015     if (!bs->backing_hd) {
3016         return 0;
3017     }
3018 
3019     return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
3020 }
3021 
3022 void bdrv_init(void)
3023 {
3024     module_call_init(MODULE_INIT_BLOCK);
3025 }
3026 
3027 void bdrv_init_with_whitelist(void)
3028 {
3029     use_bdrv_whitelist = 1;
3030     bdrv_init();
3031 }
3032 
3033 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3034 {
3035     Error *local_err = NULL;
3036     int ret;
3037 
3038     if (!bs->drv)  {
3039         return;
3040     }
3041 
3042     if (!(bs->open_flags & BDRV_O_INCOMING)) {
3043         return;
3044     }
3045     bs->open_flags &= ~BDRV_O_INCOMING;
3046 
3047     if (bs->drv->bdrv_invalidate_cache) {
3048         bs->drv->bdrv_invalidate_cache(bs, &local_err);
3049     } else if (bs->file) {
3050         bdrv_invalidate_cache(bs->file, &local_err);
3051     }
3052     if (local_err) {
3053         error_propagate(errp, local_err);
3054         return;
3055     }
3056 
3057     ret = refresh_total_sectors(bs, bs->total_sectors);
3058     if (ret < 0) {
3059         error_setg_errno(errp, -ret, "Could not refresh total sector count");
3060         return;
3061     }
3062 }
3063 
3064 void bdrv_invalidate_cache_all(Error **errp)
3065 {
3066     BlockDriverState *bs;
3067     Error *local_err = NULL;
3068 
3069     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3070         AioContext *aio_context = bdrv_get_aio_context(bs);
3071 
3072         aio_context_acquire(aio_context);
3073         bdrv_invalidate_cache(bs, &local_err);
3074         aio_context_release(aio_context);
3075         if (local_err) {
3076             error_propagate(errp, local_err);
3077             return;
3078         }
3079     }
3080 }
3081 
3082 /**************************************************************/
3083 /* removable device support */
3084 
3085 /**
3086  * Return TRUE if the media is present
3087  */
3088 int bdrv_is_inserted(BlockDriverState *bs)
3089 {
3090     BlockDriver *drv = bs->drv;
3091 
3092     if (!drv)
3093         return 0;
3094     if (!drv->bdrv_is_inserted)
3095         return 1;
3096     return drv->bdrv_is_inserted(bs);
3097 }
3098 
3099 /**
3100  * Return whether the media changed since the last call to this
3101  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3102  */
3103 int bdrv_media_changed(BlockDriverState *bs)
3104 {
3105     BlockDriver *drv = bs->drv;
3106 
3107     if (drv && drv->bdrv_media_changed) {
3108         return drv->bdrv_media_changed(bs);
3109     }
3110     return -ENOTSUP;
3111 }
3112 
3113 /**
3114  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3115  */
3116 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3117 {
3118     BlockDriver *drv = bs->drv;
3119     const char *device_name;
3120 
3121     if (drv && drv->bdrv_eject) {
3122         drv->bdrv_eject(bs, eject_flag);
3123     }
3124 
3125     device_name = bdrv_get_device_name(bs);
3126     if (device_name[0] != '\0') {
3127         qapi_event_send_device_tray_moved(device_name,
3128                                           eject_flag, &error_abort);
3129     }
3130 }
3131 
3132 /**
3133  * Lock or unlock the media (if it is locked, the user won't be able
3134  * to eject it manually).
3135  */
3136 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3137 {
3138     BlockDriver *drv = bs->drv;
3139 
3140     trace_bdrv_lock_medium(bs, locked);
3141 
3142     if (drv && drv->bdrv_lock_medium) {
3143         drv->bdrv_lock_medium(bs, locked);
3144     }
3145 }
3146 
3147 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3148 {
3149     bs->guest_block_size = align;
3150 }
3151 
3152 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3153 {
3154     BdrvDirtyBitmap *bm;
3155 
3156     assert(name);
3157     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3158         if (bm->name && !strcmp(name, bm->name)) {
3159             return bm;
3160         }
3161     }
3162     return NULL;
3163 }
3164 
3165 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3166 {
3167     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3168     g_free(bitmap->name);
3169     bitmap->name = NULL;
3170 }
3171 
3172 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3173                                           uint32_t granularity,
3174                                           const char *name,
3175                                           Error **errp)
3176 {
3177     int64_t bitmap_size;
3178     BdrvDirtyBitmap *bitmap;
3179     uint32_t sector_granularity;
3180 
3181     assert((granularity & (granularity - 1)) == 0);
3182 
3183     if (name && bdrv_find_dirty_bitmap(bs, name)) {
3184         error_setg(errp, "Bitmap already exists: %s", name);
3185         return NULL;
3186     }
3187     sector_granularity = granularity >> BDRV_SECTOR_BITS;
3188     assert(sector_granularity);
3189     bitmap_size = bdrv_nb_sectors(bs);
3190     if (bitmap_size < 0) {
3191         error_setg_errno(errp, -bitmap_size, "could not get length of device");
3192         errno = -bitmap_size;
3193         return NULL;
3194     }
3195     bitmap = g_new0(BdrvDirtyBitmap, 1);
3196     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3197     bitmap->size = bitmap_size;
3198     bitmap->name = g_strdup(name);
3199     bitmap->disabled = false;
3200     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3201     return bitmap;
3202 }
3203 
3204 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3205 {
3206     return bitmap->successor;
3207 }
3208 
3209 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3210 {
3211     return !(bitmap->disabled || bitmap->successor);
3212 }
3213 
3214 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3215 {
3216     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3217         return DIRTY_BITMAP_STATUS_FROZEN;
3218     } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3219         return DIRTY_BITMAP_STATUS_DISABLED;
3220     } else {
3221         return DIRTY_BITMAP_STATUS_ACTIVE;
3222     }
3223 }
3224 
3225 /**
3226  * Create a successor bitmap destined to replace this bitmap after an operation.
3227  * Requires that the bitmap is not frozen and has no successor.
3228  */
3229 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3230                                        BdrvDirtyBitmap *bitmap, Error **errp)
3231 {
3232     uint64_t granularity;
3233     BdrvDirtyBitmap *child;
3234 
3235     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3236         error_setg(errp, "Cannot create a successor for a bitmap that is "
3237                    "currently frozen");
3238         return -1;
3239     }
3240     assert(!bitmap->successor);
3241 
3242     /* Create an anonymous successor */
3243     granularity = bdrv_dirty_bitmap_granularity(bitmap);
3244     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3245     if (!child) {
3246         return -1;
3247     }
3248 
3249     /* Successor will be on or off based on our current state. */
3250     child->disabled = bitmap->disabled;
3251 
3252     /* Install the successor and freeze the parent */
3253     bitmap->successor = child;
3254     return 0;
3255 }
3256 
3257 /**
3258  * For a bitmap with a successor, yield our name to the successor,
3259  * delete the old bitmap, and return a handle to the new bitmap.
3260  */
3261 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3262                                             BdrvDirtyBitmap *bitmap,
3263                                             Error **errp)
3264 {
3265     char *name;
3266     BdrvDirtyBitmap *successor = bitmap->successor;
3267 
3268     if (successor == NULL) {
3269         error_setg(errp, "Cannot relinquish control if "
3270                    "there's no successor present");
3271         return NULL;
3272     }
3273 
3274     name = bitmap->name;
3275     bitmap->name = NULL;
3276     successor->name = name;
3277     bitmap->successor = NULL;
3278     bdrv_release_dirty_bitmap(bs, bitmap);
3279 
3280     return successor;
3281 }
3282 
3283 /**
3284  * In cases of failure where we can no longer safely delete the parent,
3285  * we may wish to re-join the parent and child/successor.
3286  * The merged parent will be un-frozen, but not explicitly re-enabled.
3287  */
3288 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3289                                            BdrvDirtyBitmap *parent,
3290                                            Error **errp)
3291 {
3292     BdrvDirtyBitmap *successor = parent->successor;
3293 
3294     if (!successor) {
3295         error_setg(errp, "Cannot reclaim a successor when none is present");
3296         return NULL;
3297     }
3298 
3299     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3300         error_setg(errp, "Merging of parent and successor bitmap failed");
3301         return NULL;
3302     }
3303     bdrv_release_dirty_bitmap(bs, successor);
3304     parent->successor = NULL;
3305 
3306     return parent;
3307 }
3308 
3309 /**
3310  * Truncates _all_ bitmaps attached to a BDS.
3311  */
3312 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3313 {
3314     BdrvDirtyBitmap *bitmap;
3315     uint64_t size = bdrv_nb_sectors(bs);
3316 
3317     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3318         assert(!bdrv_dirty_bitmap_frozen(bitmap));
3319         hbitmap_truncate(bitmap->bitmap, size);
3320         bitmap->size = size;
3321     }
3322 }
3323 
3324 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3325 {
3326     BdrvDirtyBitmap *bm, *next;
3327     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3328         if (bm == bitmap) {
3329             assert(!bdrv_dirty_bitmap_frozen(bm));
3330             QLIST_REMOVE(bitmap, list);
3331             hbitmap_free(bitmap->bitmap);
3332             g_free(bitmap->name);
3333             g_free(bitmap);
3334             return;
3335         }
3336     }
3337 }
3338 
3339 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3340 {
3341     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3342     bitmap->disabled = true;
3343 }
3344 
3345 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3346 {
3347     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3348     bitmap->disabled = false;
3349 }
3350 
3351 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3352 {
3353     BdrvDirtyBitmap *bm;
3354     BlockDirtyInfoList *list = NULL;
3355     BlockDirtyInfoList **plist = &list;
3356 
3357     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3358         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3359         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3360         info->count = bdrv_get_dirty_count(bm);
3361         info->granularity = bdrv_dirty_bitmap_granularity(bm);
3362         info->has_name = !!bm->name;
3363         info->name = g_strdup(bm->name);
3364         info->status = bdrv_dirty_bitmap_status(bm);
3365         entry->value = info;
3366         *plist = entry;
3367         plist = &entry->next;
3368     }
3369 
3370     return list;
3371 }
3372 
3373 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3374 {
3375     if (bitmap) {
3376         return hbitmap_get(bitmap->bitmap, sector);
3377     } else {
3378         return 0;
3379     }
3380 }
3381 
3382 /**
3383  * Chooses a default granularity based on the existing cluster size,
3384  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3385  * is no cluster size information available.
3386  */
3387 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3388 {
3389     BlockDriverInfo bdi;
3390     uint32_t granularity;
3391 
3392     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3393         granularity = MAX(4096, bdi.cluster_size);
3394         granularity = MIN(65536, granularity);
3395     } else {
3396         granularity = 65536;
3397     }
3398 
3399     return granularity;
3400 }
3401 
3402 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3403 {
3404     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3405 }
3406 
3407 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3408 {
3409     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3410 }
3411 
3412 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3413                            int64_t cur_sector, int nr_sectors)
3414 {
3415     assert(bdrv_dirty_bitmap_enabled(bitmap));
3416     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3417 }
3418 
3419 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3420                              int64_t cur_sector, int nr_sectors)
3421 {
3422     assert(bdrv_dirty_bitmap_enabled(bitmap));
3423     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3424 }
3425 
3426 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3427 {
3428     assert(bdrv_dirty_bitmap_enabled(bitmap));
3429     hbitmap_reset(bitmap->bitmap, 0, bitmap->size);
3430 }
3431 
3432 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3433                     int nr_sectors)
3434 {
3435     BdrvDirtyBitmap *bitmap;
3436     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3437         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3438             continue;
3439         }
3440         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3441     }
3442 }
3443 
3444 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3445                       int nr_sectors)
3446 {
3447     BdrvDirtyBitmap *bitmap;
3448     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3449         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3450             continue;
3451         }
3452         hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3453     }
3454 }
3455 
3456 /**
3457  * Advance an HBitmapIter to an arbitrary offset.
3458  */
3459 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3460 {
3461     assert(hbi->hb);
3462     hbitmap_iter_init(hbi, hbi->hb, offset);
3463 }
3464 
3465 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3466 {
3467     return hbitmap_count(bitmap->bitmap);
3468 }
3469 
3470 /* Get a reference to bs */
3471 void bdrv_ref(BlockDriverState *bs)
3472 {
3473     bs->refcnt++;
3474 }
3475 
3476 /* Release a previously grabbed reference to bs.
3477  * If after releasing, reference count is zero, the BlockDriverState is
3478  * deleted. */
3479 void bdrv_unref(BlockDriverState *bs)
3480 {
3481     if (!bs) {
3482         return;
3483     }
3484     assert(bs->refcnt > 0);
3485     if (--bs->refcnt == 0) {
3486         bdrv_delete(bs);
3487     }
3488 }
3489 
3490 struct BdrvOpBlocker {
3491     Error *reason;
3492     QLIST_ENTRY(BdrvOpBlocker) list;
3493 };
3494 
3495 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3496 {
3497     BdrvOpBlocker *blocker;
3498     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3499     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3500         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3501         if (errp) {
3502             error_setg(errp, "Node '%s' is busy: %s",
3503                        bdrv_get_device_or_node_name(bs),
3504                        error_get_pretty(blocker->reason));
3505         }
3506         return true;
3507     }
3508     return false;
3509 }
3510 
3511 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3512 {
3513     BdrvOpBlocker *blocker;
3514     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3515 
3516     blocker = g_new0(BdrvOpBlocker, 1);
3517     blocker->reason = reason;
3518     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3519 }
3520 
3521 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3522 {
3523     BdrvOpBlocker *blocker, *next;
3524     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3525     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3526         if (blocker->reason == reason) {
3527             QLIST_REMOVE(blocker, list);
3528             g_free(blocker);
3529         }
3530     }
3531 }
3532 
3533 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3534 {
3535     int i;
3536     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3537         bdrv_op_block(bs, i, reason);
3538     }
3539 }
3540 
3541 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3542 {
3543     int i;
3544     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3545         bdrv_op_unblock(bs, i, reason);
3546     }
3547 }
3548 
3549 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3550 {
3551     int i;
3552 
3553     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3554         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3555             return false;
3556         }
3557     }
3558     return true;
3559 }
3560 
3561 void bdrv_iostatus_enable(BlockDriverState *bs)
3562 {
3563     bs->iostatus_enabled = true;
3564     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3565 }
3566 
3567 /* The I/O status is only enabled if the drive explicitly
3568  * enables it _and_ the VM is configured to stop on errors */
3569 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3570 {
3571     return (bs->iostatus_enabled &&
3572            (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3573             bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
3574             bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3575 }
3576 
3577 void bdrv_iostatus_disable(BlockDriverState *bs)
3578 {
3579     bs->iostatus_enabled = false;
3580 }
3581 
3582 void bdrv_iostatus_reset(BlockDriverState *bs)
3583 {
3584     if (bdrv_iostatus_is_enabled(bs)) {
3585         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3586         if (bs->job) {
3587             block_job_iostatus_reset(bs->job);
3588         }
3589     }
3590 }
3591 
3592 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3593 {
3594     assert(bdrv_iostatus_is_enabled(bs));
3595     if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3596         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3597                                          BLOCK_DEVICE_IO_STATUS_FAILED;
3598     }
3599 }
3600 
3601 void bdrv_img_create(const char *filename, const char *fmt,
3602                      const char *base_filename, const char *base_fmt,
3603                      char *options, uint64_t img_size, int flags,
3604                      Error **errp, bool quiet)
3605 {
3606     QemuOptsList *create_opts = NULL;
3607     QemuOpts *opts = NULL;
3608     const char *backing_fmt, *backing_file;
3609     int64_t size;
3610     BlockDriver *drv, *proto_drv;
3611     BlockDriver *backing_drv = NULL;
3612     Error *local_err = NULL;
3613     int ret = 0;
3614 
3615     /* Find driver and parse its options */
3616     drv = bdrv_find_format(fmt);
3617     if (!drv) {
3618         error_setg(errp, "Unknown file format '%s'", fmt);
3619         return;
3620     }
3621 
3622     proto_drv = bdrv_find_protocol(filename, true, errp);
3623     if (!proto_drv) {
3624         return;
3625     }
3626 
3627     if (!drv->create_opts) {
3628         error_setg(errp, "Format driver '%s' does not support image creation",
3629                    drv->format_name);
3630         return;
3631     }
3632 
3633     if (!proto_drv->create_opts) {
3634         error_setg(errp, "Protocol driver '%s' does not support image creation",
3635                    proto_drv->format_name);
3636         return;
3637     }
3638 
3639     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3640     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3641 
3642     /* Create parameter list with default values */
3643     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3644     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3645 
3646     /* Parse -o options */
3647     if (options) {
3648         qemu_opts_do_parse(opts, options, NULL, &local_err);
3649         if (local_err) {
3650             error_report_err(local_err);
3651             local_err = NULL;
3652             error_setg(errp, "Invalid options for file format '%s'", fmt);
3653             goto out;
3654         }
3655     }
3656 
3657     if (base_filename) {
3658         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3659         if (local_err) {
3660             error_setg(errp, "Backing file not supported for file format '%s'",
3661                        fmt);
3662             goto out;
3663         }
3664     }
3665 
3666     if (base_fmt) {
3667         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3668         if (local_err) {
3669             error_setg(errp, "Backing file format not supported for file "
3670                              "format '%s'", fmt);
3671             goto out;
3672         }
3673     }
3674 
3675     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3676     if (backing_file) {
3677         if (!strcmp(filename, backing_file)) {
3678             error_setg(errp, "Error: Trying to create an image with the "
3679                              "same filename as the backing file");
3680             goto out;
3681         }
3682     }
3683 
3684     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3685     if (backing_fmt) {
3686         backing_drv = bdrv_find_format(backing_fmt);
3687         if (!backing_drv) {
3688             error_setg(errp, "Unknown backing file format '%s'",
3689                        backing_fmt);
3690             goto out;
3691         }
3692     }
3693 
3694     // The size for the image must always be specified, with one exception:
3695     // If we are using a backing file, we can obtain the size from there
3696     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3697     if (size == -1) {
3698         if (backing_file) {
3699             BlockDriverState *bs;
3700             char *full_backing = g_new0(char, PATH_MAX);
3701             int64_t size;
3702             int back_flags;
3703 
3704             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3705                                                          full_backing, PATH_MAX,
3706                                                          &local_err);
3707             if (local_err) {
3708                 g_free(full_backing);
3709                 goto out;
3710             }
3711 
3712             /* backing files always opened read-only */
3713             back_flags =
3714                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3715 
3716             bs = NULL;
3717             ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
3718                             backing_drv, &local_err);
3719             g_free(full_backing);
3720             if (ret < 0) {
3721                 goto out;
3722             }
3723             size = bdrv_getlength(bs);
3724             if (size < 0) {
3725                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3726                                  backing_file);
3727                 bdrv_unref(bs);
3728                 goto out;
3729             }
3730 
3731             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3732 
3733             bdrv_unref(bs);
3734         } else {
3735             error_setg(errp, "Image creation needs a size parameter");
3736             goto out;
3737         }
3738     }
3739 
3740     if (!quiet) {
3741         printf("Formatting '%s', fmt=%s", filename, fmt);
3742         qemu_opts_print(opts, " ");
3743         puts("");
3744     }
3745 
3746     ret = bdrv_create(drv, filename, opts, &local_err);
3747 
3748     if (ret == -EFBIG) {
3749         /* This is generally a better message than whatever the driver would
3750          * deliver (especially because of the cluster_size_hint), since that
3751          * is most probably not much different from "image too large". */
3752         const char *cluster_size_hint = "";
3753         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3754             cluster_size_hint = " (try using a larger cluster size)";
3755         }
3756         error_setg(errp, "The image size is too large for file format '%s'"
3757                    "%s", fmt, cluster_size_hint);
3758         error_free(local_err);
3759         local_err = NULL;
3760     }
3761 
3762 out:
3763     qemu_opts_del(opts);
3764     qemu_opts_free(create_opts);
3765     if (local_err) {
3766         error_propagate(errp, local_err);
3767     }
3768 }
3769 
3770 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3771 {
3772     return bs->aio_context;
3773 }
3774 
3775 void bdrv_detach_aio_context(BlockDriverState *bs)
3776 {
3777     BdrvAioNotifier *baf;
3778 
3779     if (!bs->drv) {
3780         return;
3781     }
3782 
3783     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3784         baf->detach_aio_context(baf->opaque);
3785     }
3786 
3787     if (bs->io_limits_enabled) {
3788         throttle_detach_aio_context(&bs->throttle_state);
3789     }
3790     if (bs->drv->bdrv_detach_aio_context) {
3791         bs->drv->bdrv_detach_aio_context(bs);
3792     }
3793     if (bs->file) {
3794         bdrv_detach_aio_context(bs->file);
3795     }
3796     if (bs->backing_hd) {
3797         bdrv_detach_aio_context(bs->backing_hd);
3798     }
3799 
3800     bs->aio_context = NULL;
3801 }
3802 
3803 void bdrv_attach_aio_context(BlockDriverState *bs,
3804                              AioContext *new_context)
3805 {
3806     BdrvAioNotifier *ban;
3807 
3808     if (!bs->drv) {
3809         return;
3810     }
3811 
3812     bs->aio_context = new_context;
3813 
3814     if (bs->backing_hd) {
3815         bdrv_attach_aio_context(bs->backing_hd, new_context);
3816     }
3817     if (bs->file) {
3818         bdrv_attach_aio_context(bs->file, new_context);
3819     }
3820     if (bs->drv->bdrv_attach_aio_context) {
3821         bs->drv->bdrv_attach_aio_context(bs, new_context);
3822     }
3823     if (bs->io_limits_enabled) {
3824         throttle_attach_aio_context(&bs->throttle_state, new_context);
3825     }
3826 
3827     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3828         ban->attached_aio_context(new_context, ban->opaque);
3829     }
3830 }
3831 
3832 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3833 {
3834     bdrv_drain_all(); /* ensure there are no in-flight requests */
3835 
3836     bdrv_detach_aio_context(bs);
3837 
3838     /* This function executes in the old AioContext so acquire the new one in
3839      * case it runs in a different thread.
3840      */
3841     aio_context_acquire(new_context);
3842     bdrv_attach_aio_context(bs, new_context);
3843     aio_context_release(new_context);
3844 }
3845 
3846 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3847         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3848         void (*detach_aio_context)(void *opaque), void *opaque)
3849 {
3850     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3851     *ban = (BdrvAioNotifier){
3852         .attached_aio_context = attached_aio_context,
3853         .detach_aio_context   = detach_aio_context,
3854         .opaque               = opaque
3855     };
3856 
3857     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3858 }
3859 
3860 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3861                                       void (*attached_aio_context)(AioContext *,
3862                                                                    void *),
3863                                       void (*detach_aio_context)(void *),
3864                                       void *opaque)
3865 {
3866     BdrvAioNotifier *ban, *ban_next;
3867 
3868     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3869         if (ban->attached_aio_context == attached_aio_context &&
3870             ban->detach_aio_context   == detach_aio_context   &&
3871             ban->opaque               == opaque)
3872         {
3873             QLIST_REMOVE(ban, list);
3874             g_free(ban);
3875 
3876             return;
3877         }
3878     }
3879 
3880     abort();
3881 }
3882 
3883 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3884                        BlockDriverAmendStatusCB *status_cb)
3885 {
3886     if (!bs->drv->bdrv_amend_options) {
3887         return -ENOTSUP;
3888     }
3889     return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3890 }
3891 
3892 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3893  * of block filter and by bdrv_is_first_non_filter.
3894  * It is used to test if the given bs is the candidate or recurse more in the
3895  * node graph.
3896  */
3897 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3898                                       BlockDriverState *candidate)
3899 {
3900     /* return false if basic checks fails */
3901     if (!bs || !bs->drv) {
3902         return false;
3903     }
3904 
3905     /* the code reached a non block filter driver -> check if the bs is
3906      * the same as the candidate. It's the recursion termination condition.
3907      */
3908     if (!bs->drv->is_filter) {
3909         return bs == candidate;
3910     }
3911     /* Down this path the driver is a block filter driver */
3912 
3913     /* If the block filter recursion method is defined use it to recurse down
3914      * the node graph.
3915      */
3916     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3917         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3918     }
3919 
3920     /* the driver is a block filter but don't allow to recurse -> return false
3921      */
3922     return false;
3923 }
3924 
3925 /* This function checks if the candidate is the first non filter bs down it's
3926  * bs chain. Since we don't have pointers to parents it explore all bs chains
3927  * from the top. Some filters can choose not to pass down the recursion.
3928  */
3929 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3930 {
3931     BlockDriverState *bs;
3932 
3933     /* walk down the bs forest recursively */
3934     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3935         bool perm;
3936 
3937         /* try to recurse in this top level bs */
3938         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3939 
3940         /* candidate is the first non filter */
3941         if (perm) {
3942             return true;
3943         }
3944     }
3945 
3946     return false;
3947 }
3948 
3949 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
3950 {
3951     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3952     AioContext *aio_context;
3953 
3954     if (!to_replace_bs) {
3955         error_setg(errp, "Node name '%s' not found", node_name);
3956         return NULL;
3957     }
3958 
3959     aio_context = bdrv_get_aio_context(to_replace_bs);
3960     aio_context_acquire(aio_context);
3961 
3962     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3963         to_replace_bs = NULL;
3964         goto out;
3965     }
3966 
3967     /* We don't want arbitrary node of the BDS chain to be replaced only the top
3968      * most non filter in order to prevent data corruption.
3969      * Another benefit is that this tests exclude backing files which are
3970      * blocked by the backing blockers.
3971      */
3972     if (!bdrv_is_first_non_filter(to_replace_bs)) {
3973         error_setg(errp, "Only top most non filter can be replaced");
3974         to_replace_bs = NULL;
3975         goto out;
3976     }
3977 
3978 out:
3979     aio_context_release(aio_context);
3980     return to_replace_bs;
3981 }
3982 
3983 static bool append_open_options(QDict *d, BlockDriverState *bs)
3984 {
3985     const QDictEntry *entry;
3986     bool found_any = false;
3987 
3988     for (entry = qdict_first(bs->options); entry;
3989          entry = qdict_next(bs->options, entry))
3990     {
3991         /* Only take options for this level and exclude all non-driver-specific
3992          * options */
3993         if (!strchr(qdict_entry_key(entry), '.') &&
3994             strcmp(qdict_entry_key(entry), "node-name"))
3995         {
3996             qobject_incref(qdict_entry_value(entry));
3997             qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3998             found_any = true;
3999         }
4000     }
4001 
4002     return found_any;
4003 }
4004 
4005 /* Updates the following BDS fields:
4006  *  - exact_filename: A filename which may be used for opening a block device
4007  *                    which (mostly) equals the given BDS (even without any
4008  *                    other options; so reading and writing must return the same
4009  *                    results, but caching etc. may be different)
4010  *  - full_open_options: Options which, when given when opening a block device
4011  *                       (without a filename), result in a BDS (mostly)
4012  *                       equalling the given one
4013  *  - filename: If exact_filename is set, it is copied here. Otherwise,
4014  *              full_open_options is converted to a JSON object, prefixed with
4015  *              "json:" (for use through the JSON pseudo protocol) and put here.
4016  */
4017 void bdrv_refresh_filename(BlockDriverState *bs)
4018 {
4019     BlockDriver *drv = bs->drv;
4020     QDict *opts;
4021 
4022     if (!drv) {
4023         return;
4024     }
4025 
4026     /* This BDS's file name will most probably depend on its file's name, so
4027      * refresh that first */
4028     if (bs->file) {
4029         bdrv_refresh_filename(bs->file);
4030     }
4031 
4032     if (drv->bdrv_refresh_filename) {
4033         /* Obsolete information is of no use here, so drop the old file name
4034          * information before refreshing it */
4035         bs->exact_filename[0] = '\0';
4036         if (bs->full_open_options) {
4037             QDECREF(bs->full_open_options);
4038             bs->full_open_options = NULL;
4039         }
4040 
4041         drv->bdrv_refresh_filename(bs);
4042     } else if (bs->file) {
4043         /* Try to reconstruct valid information from the underlying file */
4044         bool has_open_options;
4045 
4046         bs->exact_filename[0] = '\0';
4047         if (bs->full_open_options) {
4048             QDECREF(bs->full_open_options);
4049             bs->full_open_options = NULL;
4050         }
4051 
4052         opts = qdict_new();
4053         has_open_options = append_open_options(opts, bs);
4054 
4055         /* If no specific options have been given for this BDS, the filename of
4056          * the underlying file should suffice for this one as well */
4057         if (bs->file->exact_filename[0] && !has_open_options) {
4058             strcpy(bs->exact_filename, bs->file->exact_filename);
4059         }
4060         /* Reconstructing the full options QDict is simple for most format block
4061          * drivers, as long as the full options are known for the underlying
4062          * file BDS. The full options QDict of that file BDS should somehow
4063          * contain a representation of the filename, therefore the following
4064          * suffices without querying the (exact_)filename of this BDS. */
4065         if (bs->file->full_open_options) {
4066             qdict_put_obj(opts, "driver",
4067                           QOBJECT(qstring_from_str(drv->format_name)));
4068             QINCREF(bs->file->full_open_options);
4069             qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
4070 
4071             bs->full_open_options = opts;
4072         } else {
4073             QDECREF(opts);
4074         }
4075     } else if (!bs->full_open_options && qdict_size(bs->options)) {
4076         /* There is no underlying file BDS (at least referenced by BDS.file),
4077          * so the full options QDict should be equal to the options given
4078          * specifically for this block device when it was opened (plus the
4079          * driver specification).
4080          * Because those options don't change, there is no need to update
4081          * full_open_options when it's already set. */
4082 
4083         opts = qdict_new();
4084         append_open_options(opts, bs);
4085         qdict_put_obj(opts, "driver",
4086                       QOBJECT(qstring_from_str(drv->format_name)));
4087 
4088         if (bs->exact_filename[0]) {
4089             /* This may not work for all block protocol drivers (some may
4090              * require this filename to be parsed), but we have to find some
4091              * default solution here, so just include it. If some block driver
4092              * does not support pure options without any filename at all or
4093              * needs some special format of the options QDict, it needs to
4094              * implement the driver-specific bdrv_refresh_filename() function.
4095              */
4096             qdict_put_obj(opts, "filename",
4097                           QOBJECT(qstring_from_str(bs->exact_filename)));
4098         }
4099 
4100         bs->full_open_options = opts;
4101     }
4102 
4103     if (bs->exact_filename[0]) {
4104         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4105     } else if (bs->full_open_options) {
4106         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4107         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4108                  qstring_get_str(json));
4109         QDECREF(json);
4110     }
4111 }
4112 
4113 /* This accessor function purpose is to allow the device models to access the
4114  * BlockAcctStats structure embedded inside a BlockDriverState without being
4115  * aware of the BlockDriverState structure layout.
4116  * It will go away when the BlockAcctStats structure will be moved inside
4117  * the device models.
4118  */
4119 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
4120 {
4121     return &bs->stats;
4122 }
4123