xref: /openbmc/qemu/block.c (revision 62392ebb09fc6e87626aa151a616f24c4e921493)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/error-report.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qapi/qmp/qjson.h"
33 #include "sysemu/block-backend.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/notify.h"
36 #include "qemu/coroutine.h"
37 #include "block/qapi.h"
38 #include "qmp-commands.h"
39 #include "qemu/timer.h"
40 #include "qapi-event.h"
41 #include "block/throttle-groups.h"
42 
43 #ifdef CONFIG_BSD
44 #include <sys/types.h>
45 #include <sys/stat.h>
46 #include <sys/ioctl.h>
47 #include <sys/queue.h>
48 #ifndef __DragonFly__
49 #include <sys/disk.h>
50 #endif
51 #endif
52 
53 #ifdef _WIN32
54 #include <windows.h>
55 #endif
56 
57 /**
58  * A BdrvDirtyBitmap can be in three possible states:
59  * (1) successor is NULL and disabled is false: full r/w mode
60  * (2) successor is NULL and disabled is true: read only mode ("disabled")
61  * (3) successor is set: frozen mode.
62  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
63  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
64  */
65 struct BdrvDirtyBitmap {
66     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
67     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
68     char *name;                 /* Optional non-empty unique ID */
69     int64_t size;               /* Size of the bitmap (Number of sectors) */
70     bool disabled;              /* Bitmap is read-only */
71     QLIST_ENTRY(BdrvDirtyBitmap) list;
72 };
73 
74 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
75 
76 struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
77 
78 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
79     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
80 
81 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
82     QLIST_HEAD_INITIALIZER(bdrv_drivers);
83 
84 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
85                              const char *reference, QDict *options, int flags,
86                              BlockDriverState *parent,
87                              const BdrvChildRole *child_role, Error **errp);
88 
89 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
90 /* If non-zero, use only whitelisted block drivers */
91 static int use_bdrv_whitelist;
92 
93 #ifdef _WIN32
94 static int is_windows_drive_prefix(const char *filename)
95 {
96     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
97              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
98             filename[1] == ':');
99 }
100 
101 int is_windows_drive(const char *filename)
102 {
103     if (is_windows_drive_prefix(filename) &&
104         filename[2] == '\0')
105         return 1;
106     if (strstart(filename, "\\\\.\\", NULL) ||
107         strstart(filename, "//./", NULL))
108         return 1;
109     return 0;
110 }
111 #endif
112 
113 size_t bdrv_opt_mem_align(BlockDriverState *bs)
114 {
115     if (!bs || !bs->drv) {
116         /* page size or 4k (hdd sector size) should be on the safe side */
117         return MAX(4096, getpagesize());
118     }
119 
120     return bs->bl.opt_mem_alignment;
121 }
122 
123 size_t bdrv_min_mem_align(BlockDriverState *bs)
124 {
125     if (!bs || !bs->drv) {
126         /* page size or 4k (hdd sector size) should be on the safe side */
127         return MAX(4096, getpagesize());
128     }
129 
130     return bs->bl.min_mem_alignment;
131 }
132 
133 /* check if the path starts with "<protocol>:" */
134 int path_has_protocol(const char *path)
135 {
136     const char *p;
137 
138 #ifdef _WIN32
139     if (is_windows_drive(path) ||
140         is_windows_drive_prefix(path)) {
141         return 0;
142     }
143     p = path + strcspn(path, ":/\\");
144 #else
145     p = path + strcspn(path, ":/");
146 #endif
147 
148     return *p == ':';
149 }
150 
151 int path_is_absolute(const char *path)
152 {
153 #ifdef _WIN32
154     /* specific case for names like: "\\.\d:" */
155     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
156         return 1;
157     }
158     return (*path == '/' || *path == '\\');
159 #else
160     return (*path == '/');
161 #endif
162 }
163 
164 /* if filename is absolute, just copy it to dest. Otherwise, build a
165    path to it by considering it is relative to base_path. URL are
166    supported. */
167 void path_combine(char *dest, int dest_size,
168                   const char *base_path,
169                   const char *filename)
170 {
171     const char *p, *p1;
172     int len;
173 
174     if (dest_size <= 0)
175         return;
176     if (path_is_absolute(filename)) {
177         pstrcpy(dest, dest_size, filename);
178     } else {
179         p = strchr(base_path, ':');
180         if (p)
181             p++;
182         else
183             p = base_path;
184         p1 = strrchr(base_path, '/');
185 #ifdef _WIN32
186         {
187             const char *p2;
188             p2 = strrchr(base_path, '\\');
189             if (!p1 || p2 > p1)
190                 p1 = p2;
191         }
192 #endif
193         if (p1)
194             p1++;
195         else
196             p1 = base_path;
197         if (p1 > p)
198             p = p1;
199         len = p - base_path;
200         if (len > dest_size - 1)
201             len = dest_size - 1;
202         memcpy(dest, base_path, len);
203         dest[len] = '\0';
204         pstrcat(dest, dest_size, filename);
205     }
206 }
207 
208 void bdrv_get_full_backing_filename_from_filename(const char *backed,
209                                                   const char *backing,
210                                                   char *dest, size_t sz,
211                                                   Error **errp)
212 {
213     if (backing[0] == '\0' || path_has_protocol(backing) ||
214         path_is_absolute(backing))
215     {
216         pstrcpy(dest, sz, backing);
217     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
218         error_setg(errp, "Cannot use relative backing file names for '%s'",
219                    backed);
220     } else {
221         path_combine(dest, sz, backed, backing);
222     }
223 }
224 
225 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
226                                     Error **errp)
227 {
228     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
229 
230     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
231                                                  dest, sz, errp);
232 }
233 
234 void bdrv_register(BlockDriver *bdrv)
235 {
236     bdrv_setup_io_funcs(bdrv);
237 
238     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
239 }
240 
241 BlockDriverState *bdrv_new_root(void)
242 {
243     BlockDriverState *bs = bdrv_new();
244 
245     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
246     return bs;
247 }
248 
249 BlockDriverState *bdrv_new(void)
250 {
251     BlockDriverState *bs;
252     int i;
253 
254     bs = g_new0(BlockDriverState, 1);
255     QLIST_INIT(&bs->dirty_bitmaps);
256     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
257         QLIST_INIT(&bs->op_blockers[i]);
258     }
259     notifier_list_init(&bs->close_notifiers);
260     notifier_with_return_list_init(&bs->before_write_notifiers);
261     qemu_co_queue_init(&bs->throttled_reqs[0]);
262     qemu_co_queue_init(&bs->throttled_reqs[1]);
263     bs->refcnt = 1;
264     bs->aio_context = qemu_get_aio_context();
265 
266     return bs;
267 }
268 
269 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
270 {
271     notifier_list_add(&bs->close_notifiers, notify);
272 }
273 
274 BlockDriver *bdrv_find_format(const char *format_name)
275 {
276     BlockDriver *drv1;
277     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
278         if (!strcmp(drv1->format_name, format_name)) {
279             return drv1;
280         }
281     }
282     return NULL;
283 }
284 
285 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
286 {
287     static const char *whitelist_rw[] = {
288         CONFIG_BDRV_RW_WHITELIST
289     };
290     static const char *whitelist_ro[] = {
291         CONFIG_BDRV_RO_WHITELIST
292     };
293     const char **p;
294 
295     if (!whitelist_rw[0] && !whitelist_ro[0]) {
296         return 1;               /* no whitelist, anything goes */
297     }
298 
299     for (p = whitelist_rw; *p; p++) {
300         if (!strcmp(drv->format_name, *p)) {
301             return 1;
302         }
303     }
304     if (read_only) {
305         for (p = whitelist_ro; *p; p++) {
306             if (!strcmp(drv->format_name, *p)) {
307                 return 1;
308             }
309         }
310     }
311     return 0;
312 }
313 
314 typedef struct CreateCo {
315     BlockDriver *drv;
316     char *filename;
317     QemuOpts *opts;
318     int ret;
319     Error *err;
320 } CreateCo;
321 
322 static void coroutine_fn bdrv_create_co_entry(void *opaque)
323 {
324     Error *local_err = NULL;
325     int ret;
326 
327     CreateCo *cco = opaque;
328     assert(cco->drv);
329 
330     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
331     if (local_err) {
332         error_propagate(&cco->err, local_err);
333     }
334     cco->ret = ret;
335 }
336 
337 int bdrv_create(BlockDriver *drv, const char* filename,
338                 QemuOpts *opts, Error **errp)
339 {
340     int ret;
341 
342     Coroutine *co;
343     CreateCo cco = {
344         .drv = drv,
345         .filename = g_strdup(filename),
346         .opts = opts,
347         .ret = NOT_DONE,
348         .err = NULL,
349     };
350 
351     if (!drv->bdrv_create) {
352         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
353         ret = -ENOTSUP;
354         goto out;
355     }
356 
357     if (qemu_in_coroutine()) {
358         /* Fast-path if already in coroutine context */
359         bdrv_create_co_entry(&cco);
360     } else {
361         co = qemu_coroutine_create(bdrv_create_co_entry);
362         qemu_coroutine_enter(co, &cco);
363         while (cco.ret == NOT_DONE) {
364             aio_poll(qemu_get_aio_context(), true);
365         }
366     }
367 
368     ret = cco.ret;
369     if (ret < 0) {
370         if (cco.err) {
371             error_propagate(errp, cco.err);
372         } else {
373             error_setg_errno(errp, -ret, "Could not create image");
374         }
375     }
376 
377 out:
378     g_free(cco.filename);
379     return ret;
380 }
381 
382 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
383 {
384     BlockDriver *drv;
385     Error *local_err = NULL;
386     int ret;
387 
388     drv = bdrv_find_protocol(filename, true, errp);
389     if (drv == NULL) {
390         return -ENOENT;
391     }
392 
393     ret = bdrv_create(drv, filename, opts, &local_err);
394     if (local_err) {
395         error_propagate(errp, local_err);
396     }
397     return ret;
398 }
399 
400 /**
401  * Try to get @bs's logical and physical block size.
402  * On success, store them in @bsz struct and return 0.
403  * On failure return -errno.
404  * @bs must not be empty.
405  */
406 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
407 {
408     BlockDriver *drv = bs->drv;
409 
410     if (drv && drv->bdrv_probe_blocksizes) {
411         return drv->bdrv_probe_blocksizes(bs, bsz);
412     }
413 
414     return -ENOTSUP;
415 }
416 
417 /**
418  * Try to get @bs's geometry (cyls, heads, sectors).
419  * On success, store them in @geo struct and return 0.
420  * On failure return -errno.
421  * @bs must not be empty.
422  */
423 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
424 {
425     BlockDriver *drv = bs->drv;
426 
427     if (drv && drv->bdrv_probe_geometry) {
428         return drv->bdrv_probe_geometry(bs, geo);
429     }
430 
431     return -ENOTSUP;
432 }
433 
434 /*
435  * Create a uniquely-named empty temporary file.
436  * Return 0 upon success, otherwise a negative errno value.
437  */
438 int get_tmp_filename(char *filename, int size)
439 {
440 #ifdef _WIN32
441     char temp_dir[MAX_PATH];
442     /* GetTempFileName requires that its output buffer (4th param)
443        have length MAX_PATH or greater.  */
444     assert(size >= MAX_PATH);
445     return (GetTempPath(MAX_PATH, temp_dir)
446             && GetTempFileName(temp_dir, "qem", 0, filename)
447             ? 0 : -GetLastError());
448 #else
449     int fd;
450     const char *tmpdir;
451     tmpdir = getenv("TMPDIR");
452     if (!tmpdir) {
453         tmpdir = "/var/tmp";
454     }
455     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
456         return -EOVERFLOW;
457     }
458     fd = mkstemp(filename);
459     if (fd < 0) {
460         return -errno;
461     }
462     if (close(fd) != 0) {
463         unlink(filename);
464         return -errno;
465     }
466     return 0;
467 #endif
468 }
469 
470 /*
471  * Detect host devices. By convention, /dev/cdrom[N] is always
472  * recognized as a host CDROM.
473  */
474 static BlockDriver *find_hdev_driver(const char *filename)
475 {
476     int score_max = 0, score;
477     BlockDriver *drv = NULL, *d;
478 
479     QLIST_FOREACH(d, &bdrv_drivers, list) {
480         if (d->bdrv_probe_device) {
481             score = d->bdrv_probe_device(filename);
482             if (score > score_max) {
483                 score_max = score;
484                 drv = d;
485             }
486         }
487     }
488 
489     return drv;
490 }
491 
492 BlockDriver *bdrv_find_protocol(const char *filename,
493                                 bool allow_protocol_prefix,
494                                 Error **errp)
495 {
496     BlockDriver *drv1;
497     char protocol[128];
498     int len;
499     const char *p;
500 
501     /* TODO Drivers without bdrv_file_open must be specified explicitly */
502 
503     /*
504      * XXX(hch): we really should not let host device detection
505      * override an explicit protocol specification, but moving this
506      * later breaks access to device names with colons in them.
507      * Thanks to the brain-dead persistent naming schemes on udev-
508      * based Linux systems those actually are quite common.
509      */
510     drv1 = find_hdev_driver(filename);
511     if (drv1) {
512         return drv1;
513     }
514 
515     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
516         return &bdrv_file;
517     }
518 
519     p = strchr(filename, ':');
520     assert(p != NULL);
521     len = p - filename;
522     if (len > sizeof(protocol) - 1)
523         len = sizeof(protocol) - 1;
524     memcpy(protocol, filename, len);
525     protocol[len] = '\0';
526     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
527         if (drv1->protocol_name &&
528             !strcmp(drv1->protocol_name, protocol)) {
529             return drv1;
530         }
531     }
532 
533     error_setg(errp, "Unknown protocol '%s'", protocol);
534     return NULL;
535 }
536 
537 /*
538  * Guess image format by probing its contents.
539  * This is not a good idea when your image is raw (CVE-2008-2004), but
540  * we do it anyway for backward compatibility.
541  *
542  * @buf         contains the image's first @buf_size bytes.
543  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
544  *              but can be smaller if the image file is smaller)
545  * @filename    is its filename.
546  *
547  * For all block drivers, call the bdrv_probe() method to get its
548  * probing score.
549  * Return the first block driver with the highest probing score.
550  */
551 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
552                             const char *filename)
553 {
554     int score_max = 0, score;
555     BlockDriver *drv = NULL, *d;
556 
557     QLIST_FOREACH(d, &bdrv_drivers, list) {
558         if (d->bdrv_probe) {
559             score = d->bdrv_probe(buf, buf_size, filename);
560             if (score > score_max) {
561                 score_max = score;
562                 drv = d;
563             }
564         }
565     }
566 
567     return drv;
568 }
569 
570 static int find_image_format(BlockDriverState *bs, const char *filename,
571                              BlockDriver **pdrv, Error **errp)
572 {
573     BlockDriver *drv;
574     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
575     int ret = 0;
576 
577     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
578     if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
579         *pdrv = &bdrv_raw;
580         return ret;
581     }
582 
583     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
584     if (ret < 0) {
585         error_setg_errno(errp, -ret, "Could not read image for determining its "
586                          "format");
587         *pdrv = NULL;
588         return ret;
589     }
590 
591     drv = bdrv_probe_all(buf, ret, filename);
592     if (!drv) {
593         error_setg(errp, "Could not determine image format: No compatible "
594                    "driver found");
595         ret = -ENOENT;
596     }
597     *pdrv = drv;
598     return ret;
599 }
600 
601 /**
602  * Set the current 'total_sectors' value
603  * Return 0 on success, -errno on error.
604  */
605 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
606 {
607     BlockDriver *drv = bs->drv;
608 
609     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
610     if (bdrv_is_sg(bs))
611         return 0;
612 
613     /* query actual device if possible, otherwise just trust the hint */
614     if (drv->bdrv_getlength) {
615         int64_t length = drv->bdrv_getlength(bs);
616         if (length < 0) {
617             return length;
618         }
619         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
620     }
621 
622     bs->total_sectors = hint;
623     return 0;
624 }
625 
626 /**
627  * Combines a QDict of new block driver @options with any missing options taken
628  * from @old_options, so that leaving out an option defaults to its old value.
629  */
630 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
631                               QDict *old_options)
632 {
633     if (bs->drv && bs->drv->bdrv_join_options) {
634         bs->drv->bdrv_join_options(options, old_options);
635     } else {
636         qdict_join(options, old_options, false);
637     }
638 }
639 
640 /**
641  * Set open flags for a given discard mode
642  *
643  * Return 0 on success, -1 if the discard mode was invalid.
644  */
645 int bdrv_parse_discard_flags(const char *mode, int *flags)
646 {
647     *flags &= ~BDRV_O_UNMAP;
648 
649     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
650         /* do nothing */
651     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
652         *flags |= BDRV_O_UNMAP;
653     } else {
654         return -1;
655     }
656 
657     return 0;
658 }
659 
660 /**
661  * Set open flags for a given cache mode
662  *
663  * Return 0 on success, -1 if the cache mode was invalid.
664  */
665 int bdrv_parse_cache_flags(const char *mode, int *flags)
666 {
667     *flags &= ~BDRV_O_CACHE_MASK;
668 
669     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
670         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
671     } else if (!strcmp(mode, "directsync")) {
672         *flags |= BDRV_O_NOCACHE;
673     } else if (!strcmp(mode, "writeback")) {
674         *flags |= BDRV_O_CACHE_WB;
675     } else if (!strcmp(mode, "unsafe")) {
676         *flags |= BDRV_O_CACHE_WB;
677         *flags |= BDRV_O_NO_FLUSH;
678     } else if (!strcmp(mode, "writethrough")) {
679         /* this is the default */
680     } else {
681         return -1;
682     }
683 
684     return 0;
685 }
686 
687 /*
688  * Returns the flags that a temporary snapshot should get, based on the
689  * originally requested flags (the originally requested image will have flags
690  * like a backing file)
691  */
692 static int bdrv_temp_snapshot_flags(int flags)
693 {
694     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
695 }
696 
697 /*
698  * Returns the flags that bs->file should get if a protocol driver is expected,
699  * based on the given flags for the parent BDS
700  */
701 static int bdrv_inherited_flags(int flags)
702 {
703     /* Enable protocol handling, disable format probing for bs->file */
704     flags |= BDRV_O_PROTOCOL;
705 
706     /* Our block drivers take care to send flushes and respect unmap policy,
707      * so we can enable both unconditionally on lower layers. */
708     flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
709 
710     /* Clear flags that only apply to the top layer */
711     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
712 
713     return flags;
714 }
715 
716 const BdrvChildRole child_file = {
717     .inherit_flags = bdrv_inherited_flags,
718 };
719 
720 /*
721  * Returns the flags that bs->file should get if the use of formats (and not
722  * only protocols) is permitted for it, based on the given flags for the parent
723  * BDS
724  */
725 static int bdrv_inherited_fmt_flags(int parent_flags)
726 {
727     int flags = child_file.inherit_flags(parent_flags);
728     return flags & ~BDRV_O_PROTOCOL;
729 }
730 
731 const BdrvChildRole child_format = {
732     .inherit_flags = bdrv_inherited_fmt_flags,
733 };
734 
735 /*
736  * Returns the flags that bs->backing should get, based on the given flags
737  * for the parent BDS
738  */
739 static int bdrv_backing_flags(int flags)
740 {
741     /* backing files always opened read-only */
742     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
743 
744     /* snapshot=on is handled on the top layer */
745     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
746 
747     return flags;
748 }
749 
750 static const BdrvChildRole child_backing = {
751     .inherit_flags = bdrv_backing_flags,
752 };
753 
754 static int bdrv_open_flags(BlockDriverState *bs, int flags)
755 {
756     int open_flags = flags | BDRV_O_CACHE_WB;
757 
758     /*
759      * Clear flags that are internal to the block layer before opening the
760      * image.
761      */
762     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
763 
764     /*
765      * Snapshots should be writable.
766      */
767     if (flags & BDRV_O_TEMPORARY) {
768         open_flags |= BDRV_O_RDWR;
769     }
770 
771     return open_flags;
772 }
773 
774 static void bdrv_assign_node_name(BlockDriverState *bs,
775                                   const char *node_name,
776                                   Error **errp)
777 {
778     char *gen_node_name = NULL;
779 
780     if (!node_name) {
781         node_name = gen_node_name = id_generate(ID_BLOCK);
782     } else if (!id_wellformed(node_name)) {
783         /*
784          * Check for empty string or invalid characters, but not if it is
785          * generated (generated names use characters not available to the user)
786          */
787         error_setg(errp, "Invalid node name");
788         return;
789     }
790 
791     /* takes care of avoiding namespaces collisions */
792     if (blk_by_name(node_name)) {
793         error_setg(errp, "node-name=%s is conflicting with a device id",
794                    node_name);
795         goto out;
796     }
797 
798     /* takes care of avoiding duplicates node names */
799     if (bdrv_find_node(node_name)) {
800         error_setg(errp, "Duplicate node name");
801         goto out;
802     }
803 
804     /* copy node name into the bs and insert it into the graph list */
805     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
806     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
807 out:
808     g_free(gen_node_name);
809 }
810 
811 static QemuOptsList bdrv_runtime_opts = {
812     .name = "bdrv_common",
813     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
814     .desc = {
815         {
816             .name = "node-name",
817             .type = QEMU_OPT_STRING,
818             .help = "Node name of the block device node",
819         },
820         {
821             .name = "driver",
822             .type = QEMU_OPT_STRING,
823             .help = "Block driver to use for the node",
824         },
825         { /* end of list */ }
826     },
827 };
828 
829 /*
830  * Common part for opening disk images and files
831  *
832  * Removes all processed options from *options.
833  */
834 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
835                             QDict *options, int flags, Error **errp)
836 {
837     int ret, open_flags;
838     const char *filename;
839     const char *driver_name = NULL;
840     const char *node_name = NULL;
841     QemuOpts *opts;
842     BlockDriver *drv;
843     Error *local_err = NULL;
844 
845     assert(bs->file == NULL);
846     assert(options != NULL && bs->options != options);
847 
848     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
849     qemu_opts_absorb_qdict(opts, options, &local_err);
850     if (local_err) {
851         error_propagate(errp, local_err);
852         ret = -EINVAL;
853         goto fail_opts;
854     }
855 
856     driver_name = qemu_opt_get(opts, "driver");
857     drv = bdrv_find_format(driver_name);
858     assert(drv != NULL);
859 
860     if (file != NULL) {
861         filename = file->bs->filename;
862     } else {
863         filename = qdict_get_try_str(options, "filename");
864     }
865 
866     if (drv->bdrv_needs_filename && !filename) {
867         error_setg(errp, "The '%s' block driver requires a file name",
868                    drv->format_name);
869         ret = -EINVAL;
870         goto fail_opts;
871     }
872 
873     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
874 
875     node_name = qemu_opt_get(opts, "node-name");
876     bdrv_assign_node_name(bs, node_name, &local_err);
877     if (local_err) {
878         error_propagate(errp, local_err);
879         ret = -EINVAL;
880         goto fail_opts;
881     }
882 
883     bs->request_alignment = 512;
884     bs->zero_beyond_eof = true;
885     open_flags = bdrv_open_flags(bs, flags);
886     bs->read_only = !(open_flags & BDRV_O_RDWR);
887 
888     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
889         error_setg(errp,
890                    !bs->read_only && bdrv_is_whitelisted(drv, true)
891                         ? "Driver '%s' can only be used for read-only devices"
892                         : "Driver '%s' is not whitelisted",
893                    drv->format_name);
894         ret = -ENOTSUP;
895         goto fail_opts;
896     }
897 
898     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
899     if (flags & BDRV_O_COPY_ON_READ) {
900         if (!bs->read_only) {
901             bdrv_enable_copy_on_read(bs);
902         } else {
903             error_setg(errp, "Can't use copy-on-read on read-only device");
904             ret = -EINVAL;
905             goto fail_opts;
906         }
907     }
908 
909     if (filename != NULL) {
910         pstrcpy(bs->filename, sizeof(bs->filename), filename);
911     } else {
912         bs->filename[0] = '\0';
913     }
914     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
915 
916     bs->drv = drv;
917     bs->opaque = g_malloc0(drv->instance_size);
918 
919     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
920 
921     /* Open the image, either directly or using a protocol */
922     if (drv->bdrv_file_open) {
923         assert(file == NULL);
924         assert(!drv->bdrv_needs_filename || filename != NULL);
925         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
926     } else {
927         if (file == NULL) {
928             error_setg(errp, "Can't use '%s' as a block driver for the "
929                        "protocol level", drv->format_name);
930             ret = -EINVAL;
931             goto free_and_fail;
932         }
933         bs->file = file;
934         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
935     }
936 
937     if (ret < 0) {
938         if (local_err) {
939             error_propagate(errp, local_err);
940         } else if (bs->filename[0]) {
941             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
942         } else {
943             error_setg_errno(errp, -ret, "Could not open image");
944         }
945         goto free_and_fail;
946     }
947 
948     if (bs->encrypted) {
949         error_report("Encrypted images are deprecated");
950         error_printf("Support for them will be removed in a future release.\n"
951                      "You can use 'qemu-img convert' to convert your image"
952                      " to an unencrypted one.\n");
953     }
954 
955     ret = refresh_total_sectors(bs, bs->total_sectors);
956     if (ret < 0) {
957         error_setg_errno(errp, -ret, "Could not refresh total sector count");
958         goto free_and_fail;
959     }
960 
961     bdrv_refresh_limits(bs, &local_err);
962     if (local_err) {
963         error_propagate(errp, local_err);
964         ret = -EINVAL;
965         goto free_and_fail;
966     }
967 
968     assert(bdrv_opt_mem_align(bs) != 0);
969     assert(bdrv_min_mem_align(bs) != 0);
970     assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
971 
972     qemu_opts_del(opts);
973     return 0;
974 
975 free_and_fail:
976     bs->file = NULL;
977     g_free(bs->opaque);
978     bs->opaque = NULL;
979     bs->drv = NULL;
980 fail_opts:
981     qemu_opts_del(opts);
982     return ret;
983 }
984 
985 static QDict *parse_json_filename(const char *filename, Error **errp)
986 {
987     QObject *options_obj;
988     QDict *options;
989     int ret;
990 
991     ret = strstart(filename, "json:", &filename);
992     assert(ret);
993 
994     options_obj = qobject_from_json(filename);
995     if (!options_obj) {
996         error_setg(errp, "Could not parse the JSON options");
997         return NULL;
998     }
999 
1000     if (qobject_type(options_obj) != QTYPE_QDICT) {
1001         qobject_decref(options_obj);
1002         error_setg(errp, "Invalid JSON object given");
1003         return NULL;
1004     }
1005 
1006     options = qobject_to_qdict(options_obj);
1007     qdict_flatten(options);
1008 
1009     return options;
1010 }
1011 
1012 /*
1013  * Fills in default options for opening images and converts the legacy
1014  * filename/flags pair to option QDict entries.
1015  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1016  * block driver has been specified explicitly.
1017  */
1018 static int bdrv_fill_options(QDict **options, const char **pfilename,
1019                              int *flags, Error **errp)
1020 {
1021     const char *filename = *pfilename;
1022     const char *drvname;
1023     bool protocol = *flags & BDRV_O_PROTOCOL;
1024     bool parse_filename = false;
1025     BlockDriver *drv = NULL;
1026     Error *local_err = NULL;
1027 
1028     /* Parse json: pseudo-protocol */
1029     if (filename && g_str_has_prefix(filename, "json:")) {
1030         QDict *json_options = parse_json_filename(filename, &local_err);
1031         if (local_err) {
1032             error_propagate(errp, local_err);
1033             return -EINVAL;
1034         }
1035 
1036         /* Options given in the filename have lower priority than options
1037          * specified directly */
1038         qdict_join(*options, json_options, false);
1039         QDECREF(json_options);
1040         *pfilename = filename = NULL;
1041     }
1042 
1043     drvname = qdict_get_try_str(*options, "driver");
1044     if (drvname) {
1045         drv = bdrv_find_format(drvname);
1046         if (!drv) {
1047             error_setg(errp, "Unknown driver '%s'", drvname);
1048             return -ENOENT;
1049         }
1050         /* If the user has explicitly specified the driver, this choice should
1051          * override the BDRV_O_PROTOCOL flag */
1052         protocol = drv->bdrv_file_open;
1053     }
1054 
1055     if (protocol) {
1056         *flags |= BDRV_O_PROTOCOL;
1057     } else {
1058         *flags &= ~BDRV_O_PROTOCOL;
1059     }
1060 
1061     /* Fetch the file name from the options QDict if necessary */
1062     if (protocol && filename) {
1063         if (!qdict_haskey(*options, "filename")) {
1064             qdict_put(*options, "filename", qstring_from_str(filename));
1065             parse_filename = true;
1066         } else {
1067             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1068                              "the same time");
1069             return -EINVAL;
1070         }
1071     }
1072 
1073     /* Find the right block driver */
1074     filename = qdict_get_try_str(*options, "filename");
1075 
1076     if (!drvname && protocol) {
1077         if (filename) {
1078             drv = bdrv_find_protocol(filename, parse_filename, errp);
1079             if (!drv) {
1080                 return -EINVAL;
1081             }
1082 
1083             drvname = drv->format_name;
1084             qdict_put(*options, "driver", qstring_from_str(drvname));
1085         } else {
1086             error_setg(errp, "Must specify either driver or file");
1087             return -EINVAL;
1088         }
1089     }
1090 
1091     assert(drv || !protocol);
1092 
1093     /* Driver-specific filename parsing */
1094     if (drv && drv->bdrv_parse_filename && parse_filename) {
1095         drv->bdrv_parse_filename(filename, *options, &local_err);
1096         if (local_err) {
1097             error_propagate(errp, local_err);
1098             return -EINVAL;
1099         }
1100 
1101         if (!drv->bdrv_needs_filename) {
1102             qdict_del(*options, "filename");
1103         }
1104     }
1105 
1106     if (runstate_check(RUN_STATE_INMIGRATE)) {
1107         *flags |= BDRV_O_INCOMING;
1108     }
1109 
1110     return 0;
1111 }
1112 
1113 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1114                                     BlockDriverState *child_bs,
1115                                     const char *child_name,
1116                                     const BdrvChildRole *child_role)
1117 {
1118     BdrvChild *child = g_new(BdrvChild, 1);
1119     *child = (BdrvChild) {
1120         .bs     = child_bs,
1121         .name   = g_strdup(child_name),
1122         .role   = child_role,
1123     };
1124 
1125     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1126     QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1127 
1128     return child;
1129 }
1130 
1131 static void bdrv_detach_child(BdrvChild *child)
1132 {
1133     QLIST_REMOVE(child, next);
1134     QLIST_REMOVE(child, next_parent);
1135     g_free(child->name);
1136     g_free(child);
1137 }
1138 
1139 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1140 {
1141     BlockDriverState *child_bs;
1142 
1143     if (child == NULL) {
1144         return;
1145     }
1146 
1147     if (child->bs->inherits_from == parent) {
1148         child->bs->inherits_from = NULL;
1149     }
1150 
1151     child_bs = child->bs;
1152     bdrv_detach_child(child);
1153     bdrv_unref(child_bs);
1154 }
1155 
1156 /*
1157  * Sets the backing file link of a BDS. A new reference is created; callers
1158  * which don't need their own reference any more must call bdrv_unref().
1159  */
1160 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1161 {
1162     if (backing_hd) {
1163         bdrv_ref(backing_hd);
1164     }
1165 
1166     if (bs->backing) {
1167         assert(bs->backing_blocker);
1168         bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1169         bdrv_unref_child(bs, bs->backing);
1170     } else if (backing_hd) {
1171         error_setg(&bs->backing_blocker,
1172                    "node is used as backing hd of '%s'",
1173                    bdrv_get_device_or_node_name(bs));
1174     }
1175 
1176     if (!backing_hd) {
1177         error_free(bs->backing_blocker);
1178         bs->backing_blocker = NULL;
1179         bs->backing = NULL;
1180         goto out;
1181     }
1182     bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
1183     bs->open_flags &= ~BDRV_O_NO_BACKING;
1184     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1185     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1186             backing_hd->drv ? backing_hd->drv->format_name : "");
1187 
1188     bdrv_op_block_all(backing_hd, bs->backing_blocker);
1189     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1190     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1191                     bs->backing_blocker);
1192 out:
1193     bdrv_refresh_limits(bs, NULL);
1194 }
1195 
1196 /*
1197  * Opens the backing file for a BlockDriverState if not yet open
1198  *
1199  * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1200  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1201  * itself, all options starting with "${bdref_key}." are considered part of the
1202  * BlockdevRef.
1203  *
1204  * TODO Can this be unified with bdrv_open_image()?
1205  */
1206 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1207                            const char *bdref_key, Error **errp)
1208 {
1209     char *backing_filename = g_malloc0(PATH_MAX);
1210     char *bdref_key_dot;
1211     const char *reference = NULL;
1212     int ret = 0;
1213     BlockDriverState *backing_hd;
1214     QDict *options;
1215     QDict *tmp_parent_options = NULL;
1216     Error *local_err = NULL;
1217 
1218     if (bs->backing != NULL) {
1219         goto free_exit;
1220     }
1221 
1222     /* NULL means an empty set of options */
1223     if (parent_options == NULL) {
1224         tmp_parent_options = qdict_new();
1225         parent_options = tmp_parent_options;
1226     }
1227 
1228     bs->open_flags &= ~BDRV_O_NO_BACKING;
1229 
1230     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1231     qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1232     g_free(bdref_key_dot);
1233 
1234     reference = qdict_get_try_str(parent_options, bdref_key);
1235     if (reference || qdict_haskey(options, "file.filename")) {
1236         backing_filename[0] = '\0';
1237     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1238         QDECREF(options);
1239         goto free_exit;
1240     } else {
1241         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1242                                        &local_err);
1243         if (local_err) {
1244             ret = -EINVAL;
1245             error_propagate(errp, local_err);
1246             QDECREF(options);
1247             goto free_exit;
1248         }
1249     }
1250 
1251     if (!bs->drv || !bs->drv->supports_backing) {
1252         ret = -EINVAL;
1253         error_setg(errp, "Driver doesn't support backing files");
1254         QDECREF(options);
1255         goto free_exit;
1256     }
1257 
1258     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1259         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1260     }
1261 
1262     backing_hd = NULL;
1263     ret = bdrv_open_inherit(&backing_hd,
1264                             *backing_filename ? backing_filename : NULL,
1265                             reference, options, 0, bs, &child_backing,
1266                             &local_err);
1267     if (ret < 0) {
1268         bs->open_flags |= BDRV_O_NO_BACKING;
1269         error_setg(errp, "Could not open backing file: %s",
1270                    error_get_pretty(local_err));
1271         error_free(local_err);
1272         goto free_exit;
1273     }
1274 
1275     /* Hook up the backing file link; drop our reference, bs owns the
1276      * backing_hd reference now */
1277     bdrv_set_backing_hd(bs, backing_hd);
1278     bdrv_unref(backing_hd);
1279 
1280     qdict_del(parent_options, bdref_key);
1281 
1282 free_exit:
1283     g_free(backing_filename);
1284     QDECREF(tmp_parent_options);
1285     return ret;
1286 }
1287 
1288 /*
1289  * Opens a disk image whose options are given as BlockdevRef in another block
1290  * device's options.
1291  *
1292  * If allow_none is true, no image will be opened if filename is false and no
1293  * BlockdevRef is given. NULL will be returned, but errp remains unset.
1294  *
1295  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1296  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1297  * itself, all options starting with "${bdref_key}." are considered part of the
1298  * BlockdevRef.
1299  *
1300  * The BlockdevRef will be removed from the options QDict.
1301  */
1302 BdrvChild *bdrv_open_child(const char *filename,
1303                            QDict *options, const char *bdref_key,
1304                            BlockDriverState* parent,
1305                            const BdrvChildRole *child_role,
1306                            bool allow_none, Error **errp)
1307 {
1308     BdrvChild *c = NULL;
1309     BlockDriverState *bs;
1310     QDict *image_options;
1311     int ret;
1312     char *bdref_key_dot;
1313     const char *reference;
1314 
1315     assert(child_role != NULL);
1316 
1317     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1318     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1319     g_free(bdref_key_dot);
1320 
1321     reference = qdict_get_try_str(options, bdref_key);
1322     if (!filename && !reference && !qdict_size(image_options)) {
1323         if (!allow_none) {
1324             error_setg(errp, "A block device must be specified for \"%s\"",
1325                        bdref_key);
1326         }
1327         QDECREF(image_options);
1328         goto done;
1329     }
1330 
1331     bs = NULL;
1332     ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1333                             parent, child_role, errp);
1334     if (ret < 0) {
1335         goto done;
1336     }
1337 
1338     c = bdrv_attach_child(parent, bs, bdref_key, child_role);
1339 
1340 done:
1341     qdict_del(options, bdref_key);
1342     return c;
1343 }
1344 
1345 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1346 {
1347     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1348     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1349     int64_t total_size;
1350     QemuOpts *opts = NULL;
1351     QDict *snapshot_options;
1352     BlockDriverState *bs_snapshot;
1353     Error *local_err = NULL;
1354     int ret;
1355 
1356     /* if snapshot, we create a temporary backing file and open it
1357        instead of opening 'filename' directly */
1358 
1359     /* Get the required size from the image */
1360     total_size = bdrv_getlength(bs);
1361     if (total_size < 0) {
1362         ret = total_size;
1363         error_setg_errno(errp, -total_size, "Could not get image size");
1364         goto out;
1365     }
1366 
1367     /* Create the temporary image */
1368     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1369     if (ret < 0) {
1370         error_setg_errno(errp, -ret, "Could not get temporary filename");
1371         goto out;
1372     }
1373 
1374     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1375                             &error_abort);
1376     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1377     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1378     qemu_opts_del(opts);
1379     if (ret < 0) {
1380         error_setg_errno(errp, -ret, "Could not create temporary overlay "
1381                          "'%s': %s", tmp_filename,
1382                          error_get_pretty(local_err));
1383         error_free(local_err);
1384         goto out;
1385     }
1386 
1387     /* Prepare a new options QDict for the temporary file */
1388     snapshot_options = qdict_new();
1389     qdict_put(snapshot_options, "file.driver",
1390               qstring_from_str("file"));
1391     qdict_put(snapshot_options, "file.filename",
1392               qstring_from_str(tmp_filename));
1393     qdict_put(snapshot_options, "driver",
1394               qstring_from_str("qcow2"));
1395 
1396     bs_snapshot = bdrv_new();
1397 
1398     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1399                     flags, &local_err);
1400     if (ret < 0) {
1401         error_propagate(errp, local_err);
1402         goto out;
1403     }
1404 
1405     bdrv_append(bs_snapshot, bs);
1406 
1407 out:
1408     g_free(tmp_filename);
1409     return ret;
1410 }
1411 
1412 /*
1413  * Opens a disk image (raw, qcow2, vmdk, ...)
1414  *
1415  * options is a QDict of options to pass to the block drivers, or NULL for an
1416  * empty set of options. The reference to the QDict belongs to the block layer
1417  * after the call (even on failure), so if the caller intends to reuse the
1418  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1419  *
1420  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1421  * If it is not NULL, the referenced BDS will be reused.
1422  *
1423  * The reference parameter may be used to specify an existing block device which
1424  * should be opened. If specified, neither options nor a filename may be given,
1425  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1426  */
1427 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1428                              const char *reference, QDict *options, int flags,
1429                              BlockDriverState *parent,
1430                              const BdrvChildRole *child_role, Error **errp)
1431 {
1432     int ret;
1433     BdrvChild *file = NULL;
1434     BlockDriverState *bs;
1435     BlockDriver *drv = NULL;
1436     const char *drvname;
1437     const char *backing;
1438     Error *local_err = NULL;
1439     int snapshot_flags = 0;
1440 
1441     assert(pbs);
1442     assert(!child_role || !flags);
1443     assert(!child_role == !parent);
1444 
1445     if (reference) {
1446         bool options_non_empty = options ? qdict_size(options) : false;
1447         QDECREF(options);
1448 
1449         if (*pbs) {
1450             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1451                        "another block device");
1452             return -EINVAL;
1453         }
1454 
1455         if (filename || options_non_empty) {
1456             error_setg(errp, "Cannot reference an existing block device with "
1457                        "additional options or a new filename");
1458             return -EINVAL;
1459         }
1460 
1461         bs = bdrv_lookup_bs(reference, reference, errp);
1462         if (!bs) {
1463             return -ENODEV;
1464         }
1465         bdrv_ref(bs);
1466         *pbs = bs;
1467         return 0;
1468     }
1469 
1470     if (*pbs) {
1471         bs = *pbs;
1472     } else {
1473         bs = bdrv_new();
1474     }
1475 
1476     /* NULL means an empty set of options */
1477     if (options == NULL) {
1478         options = qdict_new();
1479     }
1480 
1481     if (child_role) {
1482         bs->inherits_from = parent;
1483         flags = child_role->inherit_flags(parent->open_flags);
1484     }
1485 
1486     ret = bdrv_fill_options(&options, &filename, &flags, &local_err);
1487     if (local_err) {
1488         goto fail;
1489     }
1490 
1491     bs->open_flags = flags;
1492     bs->options = options;
1493     options = qdict_clone_shallow(options);
1494 
1495     /* Find the right image format driver */
1496     drvname = qdict_get_try_str(options, "driver");
1497     if (drvname) {
1498         drv = bdrv_find_format(drvname);
1499         if (!drv) {
1500             error_setg(errp, "Unknown driver: '%s'", drvname);
1501             ret = -EINVAL;
1502             goto fail;
1503         }
1504     }
1505 
1506     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1507 
1508     backing = qdict_get_try_str(options, "backing");
1509     if (backing && *backing == '\0') {
1510         flags |= BDRV_O_NO_BACKING;
1511         qdict_del(options, "backing");
1512     }
1513 
1514     /* Open image file without format layer */
1515     if ((flags & BDRV_O_PROTOCOL) == 0) {
1516         if (flags & BDRV_O_RDWR) {
1517             flags |= BDRV_O_ALLOW_RDWR;
1518         }
1519         if (flags & BDRV_O_SNAPSHOT) {
1520             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1521             flags = bdrv_backing_flags(flags);
1522         }
1523 
1524         bs->open_flags = flags;
1525 
1526         file = bdrv_open_child(filename, options, "file", bs,
1527                                &child_file, true, &local_err);
1528         if (local_err) {
1529             ret = -EINVAL;
1530             goto fail;
1531         }
1532     }
1533 
1534     /* Image format probing */
1535     bs->probed = !drv;
1536     if (!drv && file) {
1537         ret = find_image_format(file->bs, filename, &drv, &local_err);
1538         if (ret < 0) {
1539             goto fail;
1540         }
1541         /*
1542          * This option update would logically belong in bdrv_fill_options(),
1543          * but we first need to open bs->file for the probing to work, while
1544          * opening bs->file already requires the (mostly) final set of options
1545          * so that cache mode etc. can be inherited.
1546          *
1547          * Adding the driver later is somewhat ugly, but it's not an option
1548          * that would ever be inherited, so it's correct. We just need to make
1549          * sure to update both bs->options (which has the full effective
1550          * options for bs) and options (which has file.* already removed).
1551          */
1552         qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1553         qdict_put(options, "driver", qstring_from_str(drv->format_name));
1554     } else if (!drv) {
1555         error_setg(errp, "Must specify either driver or file");
1556         ret = -EINVAL;
1557         goto fail;
1558     }
1559 
1560     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1561     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1562     /* file must be NULL if a protocol BDS is about to be created
1563      * (the inverse results in an error message from bdrv_open_common()) */
1564     assert(!(flags & BDRV_O_PROTOCOL) || !file);
1565 
1566     /* Open the image */
1567     ret = bdrv_open_common(bs, file, options, flags, &local_err);
1568     if (ret < 0) {
1569         goto fail;
1570     }
1571 
1572     if (file && (bs->file != file)) {
1573         bdrv_unref_child(bs, file);
1574         file = NULL;
1575     }
1576 
1577     /* If there is a backing file, use it */
1578     if ((flags & BDRV_O_NO_BACKING) == 0) {
1579         ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
1580         if (ret < 0) {
1581             goto close_and_fail;
1582         }
1583     }
1584 
1585     bdrv_refresh_filename(bs);
1586 
1587     /* Check if any unknown options were used */
1588     if (options && (qdict_size(options) != 0)) {
1589         const QDictEntry *entry = qdict_first(options);
1590         if (flags & BDRV_O_PROTOCOL) {
1591             error_setg(errp, "Block protocol '%s' doesn't support the option "
1592                        "'%s'", drv->format_name, entry->key);
1593         } else {
1594             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1595                        "support the option '%s'", drv->format_name,
1596                        bdrv_get_device_name(bs), entry->key);
1597         }
1598 
1599         ret = -EINVAL;
1600         goto close_and_fail;
1601     }
1602 
1603     if (!bdrv_key_required(bs)) {
1604         if (bs->blk) {
1605             blk_dev_change_media_cb(bs->blk, true);
1606         }
1607     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1608                && !runstate_check(RUN_STATE_INMIGRATE)
1609                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1610         error_setg(errp,
1611                    "Guest must be stopped for opening of encrypted image");
1612         ret = -EBUSY;
1613         goto close_and_fail;
1614     }
1615 
1616     QDECREF(options);
1617     *pbs = bs;
1618 
1619     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1620      * temporary snapshot afterwards. */
1621     if (snapshot_flags) {
1622         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1623         if (local_err) {
1624             goto close_and_fail;
1625         }
1626     }
1627 
1628     return 0;
1629 
1630 fail:
1631     if (file != NULL) {
1632         bdrv_unref_child(bs, file);
1633     }
1634     QDECREF(bs->options);
1635     QDECREF(options);
1636     bs->options = NULL;
1637     if (!*pbs) {
1638         /* If *pbs is NULL, a new BDS has been created in this function and
1639            needs to be freed now. Otherwise, it does not need to be closed,
1640            since it has not really been opened yet. */
1641         bdrv_unref(bs);
1642     }
1643     if (local_err) {
1644         error_propagate(errp, local_err);
1645     }
1646     return ret;
1647 
1648 close_and_fail:
1649     /* See fail path, but now the BDS has to be always closed */
1650     if (*pbs) {
1651         bdrv_close(bs);
1652     } else {
1653         bdrv_unref(bs);
1654     }
1655     QDECREF(options);
1656     if (local_err) {
1657         error_propagate(errp, local_err);
1658     }
1659     return ret;
1660 }
1661 
1662 int bdrv_open(BlockDriverState **pbs, const char *filename,
1663               const char *reference, QDict *options, int flags, Error **errp)
1664 {
1665     return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1666                              NULL, errp);
1667 }
1668 
1669 typedef struct BlockReopenQueueEntry {
1670      bool prepared;
1671      BDRVReopenState state;
1672      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1673 } BlockReopenQueueEntry;
1674 
1675 /*
1676  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1677  * reopen of multiple devices.
1678  *
1679  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1680  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1681  * be created and initialized. This newly created BlockReopenQueue should be
1682  * passed back in for subsequent calls that are intended to be of the same
1683  * atomic 'set'.
1684  *
1685  * bs is the BlockDriverState to add to the reopen queue.
1686  *
1687  * options contains the changed options for the associated bs
1688  * (the BlockReopenQueue takes ownership)
1689  *
1690  * flags contains the open flags for the associated bs
1691  *
1692  * returns a pointer to bs_queue, which is either the newly allocated
1693  * bs_queue, or the existing bs_queue being used.
1694  *
1695  */
1696 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1697                                     BlockDriverState *bs,
1698                                     QDict *options, int flags)
1699 {
1700     assert(bs != NULL);
1701 
1702     BlockReopenQueueEntry *bs_entry;
1703     BdrvChild *child;
1704     QDict *old_options;
1705 
1706     if (bs_queue == NULL) {
1707         bs_queue = g_new0(BlockReopenQueue, 1);
1708         QSIMPLEQ_INIT(bs_queue);
1709     }
1710 
1711     if (!options) {
1712         options = qdict_new();
1713     }
1714 
1715     old_options = qdict_clone_shallow(bs->options);
1716     bdrv_join_options(bs, options, old_options);
1717     QDECREF(old_options);
1718 
1719     /* bdrv_open() masks this flag out */
1720     flags &= ~BDRV_O_PROTOCOL;
1721 
1722     QLIST_FOREACH(child, &bs->children, next) {
1723         int child_flags;
1724 
1725         if (child->bs->inherits_from != bs) {
1726             continue;
1727         }
1728 
1729         child_flags = child->role->inherit_flags(flags);
1730         /* TODO Pass down child flags (backing.*, extents.*, ...) */
1731         bdrv_reopen_queue(bs_queue, child->bs, NULL, child_flags);
1732     }
1733 
1734     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1735     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1736 
1737     bs_entry->state.bs = bs;
1738     bs_entry->state.options = options;
1739     bs_entry->state.flags = flags;
1740 
1741     return bs_queue;
1742 }
1743 
1744 /*
1745  * Reopen multiple BlockDriverStates atomically & transactionally.
1746  *
1747  * The queue passed in (bs_queue) must have been built up previous
1748  * via bdrv_reopen_queue().
1749  *
1750  * Reopens all BDS specified in the queue, with the appropriate
1751  * flags.  All devices are prepared for reopen, and failure of any
1752  * device will cause all device changes to be abandonded, and intermediate
1753  * data cleaned up.
1754  *
1755  * If all devices prepare successfully, then the changes are committed
1756  * to all devices.
1757  *
1758  */
1759 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1760 {
1761     int ret = -1;
1762     BlockReopenQueueEntry *bs_entry, *next;
1763     Error *local_err = NULL;
1764 
1765     assert(bs_queue != NULL);
1766 
1767     bdrv_drain_all();
1768 
1769     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1770         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1771             error_propagate(errp, local_err);
1772             goto cleanup;
1773         }
1774         bs_entry->prepared = true;
1775     }
1776 
1777     /* If we reach this point, we have success and just need to apply the
1778      * changes
1779      */
1780     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1781         bdrv_reopen_commit(&bs_entry->state);
1782     }
1783 
1784     ret = 0;
1785 
1786 cleanup:
1787     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1788         if (ret && bs_entry->prepared) {
1789             bdrv_reopen_abort(&bs_entry->state);
1790         }
1791         QDECREF(bs_entry->state.options);
1792         g_free(bs_entry);
1793     }
1794     g_free(bs_queue);
1795     return ret;
1796 }
1797 
1798 
1799 /* Reopen a single BlockDriverState with the specified flags. */
1800 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1801 {
1802     int ret = -1;
1803     Error *local_err = NULL;
1804     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1805 
1806     ret = bdrv_reopen_multiple(queue, &local_err);
1807     if (local_err != NULL) {
1808         error_propagate(errp, local_err);
1809     }
1810     return ret;
1811 }
1812 
1813 
1814 /*
1815  * Prepares a BlockDriverState for reopen. All changes are staged in the
1816  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1817  * the block driver layer .bdrv_reopen_prepare()
1818  *
1819  * bs is the BlockDriverState to reopen
1820  * flags are the new open flags
1821  * queue is the reopen queue
1822  *
1823  * Returns 0 on success, non-zero on error.  On error errp will be set
1824  * as well.
1825  *
1826  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1827  * It is the responsibility of the caller to then call the abort() or
1828  * commit() for any other BDS that have been left in a prepare() state
1829  *
1830  */
1831 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1832                         Error **errp)
1833 {
1834     int ret = -1;
1835     Error *local_err = NULL;
1836     BlockDriver *drv;
1837 
1838     assert(reopen_state != NULL);
1839     assert(reopen_state->bs->drv != NULL);
1840     drv = reopen_state->bs->drv;
1841 
1842     /* if we are to stay read-only, do not allow permission change
1843      * to r/w */
1844     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1845         reopen_state->flags & BDRV_O_RDWR) {
1846         error_setg(errp, "Node '%s' is read only",
1847                    bdrv_get_device_or_node_name(reopen_state->bs));
1848         goto error;
1849     }
1850 
1851 
1852     ret = bdrv_flush(reopen_state->bs);
1853     if (ret) {
1854         error_setg_errno(errp, -ret, "Error flushing drive");
1855         goto error;
1856     }
1857 
1858     if (drv->bdrv_reopen_prepare) {
1859         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1860         if (ret) {
1861             if (local_err != NULL) {
1862                 error_propagate(errp, local_err);
1863             } else {
1864                 error_setg(errp, "failed while preparing to reopen image '%s'",
1865                            reopen_state->bs->filename);
1866             }
1867             goto error;
1868         }
1869     } else {
1870         /* It is currently mandatory to have a bdrv_reopen_prepare()
1871          * handler for each supported drv. */
1872         error_setg(errp, "Block format '%s' used by node '%s' "
1873                    "does not support reopening files", drv->format_name,
1874                    bdrv_get_device_or_node_name(reopen_state->bs));
1875         ret = -1;
1876         goto error;
1877     }
1878 
1879     /* Options that are not handled are only okay if they are unchanged
1880      * compared to the old state. It is expected that some options are only
1881      * used for the initial open, but not reopen (e.g. filename) */
1882     if (qdict_size(reopen_state->options)) {
1883         const QDictEntry *entry = qdict_first(reopen_state->options);
1884 
1885         do {
1886             QString *new_obj = qobject_to_qstring(entry->value);
1887             const char *new = qstring_get_str(new_obj);
1888             const char *old = qdict_get_try_str(reopen_state->bs->options,
1889                                                 entry->key);
1890 
1891             if (!old || strcmp(new, old)) {
1892                 error_setg(errp, "Cannot change the option '%s'", entry->key);
1893                 ret = -EINVAL;
1894                 goto error;
1895             }
1896         } while ((entry = qdict_next(reopen_state->options, entry)));
1897     }
1898 
1899     ret = 0;
1900 
1901 error:
1902     return ret;
1903 }
1904 
1905 /*
1906  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1907  * makes them final by swapping the staging BlockDriverState contents into
1908  * the active BlockDriverState contents.
1909  */
1910 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1911 {
1912     BlockDriver *drv;
1913 
1914     assert(reopen_state != NULL);
1915     drv = reopen_state->bs->drv;
1916     assert(drv != NULL);
1917 
1918     /* If there are any driver level actions to take */
1919     if (drv->bdrv_reopen_commit) {
1920         drv->bdrv_reopen_commit(reopen_state);
1921     }
1922 
1923     /* set BDS specific flags now */
1924     reopen_state->bs->open_flags         = reopen_state->flags;
1925     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1926                                               BDRV_O_CACHE_WB);
1927     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1928 
1929     bdrv_refresh_limits(reopen_state->bs, NULL);
1930 }
1931 
1932 /*
1933  * Abort the reopen, and delete and free the staged changes in
1934  * reopen_state
1935  */
1936 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1937 {
1938     BlockDriver *drv;
1939 
1940     assert(reopen_state != NULL);
1941     drv = reopen_state->bs->drv;
1942     assert(drv != NULL);
1943 
1944     if (drv->bdrv_reopen_abort) {
1945         drv->bdrv_reopen_abort(reopen_state);
1946     }
1947 }
1948 
1949 
1950 void bdrv_close(BlockDriverState *bs)
1951 {
1952     BdrvAioNotifier *ban, *ban_next;
1953 
1954     if (bs->job) {
1955         block_job_cancel_sync(bs->job);
1956     }
1957 
1958     /* Disable I/O limits and drain all pending throttled requests */
1959     if (bs->throttle_state) {
1960         bdrv_io_limits_disable(bs);
1961     }
1962 
1963     bdrv_drain(bs); /* complete I/O */
1964     bdrv_flush(bs);
1965     bdrv_drain(bs); /* in case flush left pending I/O */
1966     notifier_list_notify(&bs->close_notifiers, bs);
1967 
1968     if (bs->blk) {
1969         blk_dev_change_media_cb(bs->blk, false);
1970     }
1971 
1972     if (bs->drv) {
1973         BdrvChild *child, *next;
1974 
1975         bs->drv->bdrv_close(bs);
1976         bs->drv = NULL;
1977 
1978         bdrv_set_backing_hd(bs, NULL);
1979 
1980         if (bs->file != NULL) {
1981             bdrv_unref_child(bs, bs->file);
1982             bs->file = NULL;
1983         }
1984 
1985         QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
1986             /* TODO Remove bdrv_unref() from drivers' close function and use
1987              * bdrv_unref_child() here */
1988             if (child->bs->inherits_from == bs) {
1989                 child->bs->inherits_from = NULL;
1990             }
1991             bdrv_detach_child(child);
1992         }
1993 
1994         g_free(bs->opaque);
1995         bs->opaque = NULL;
1996         bs->copy_on_read = 0;
1997         bs->backing_file[0] = '\0';
1998         bs->backing_format[0] = '\0';
1999         bs->total_sectors = 0;
2000         bs->encrypted = 0;
2001         bs->valid_key = 0;
2002         bs->sg = 0;
2003         bs->zero_beyond_eof = false;
2004         QDECREF(bs->options);
2005         bs->options = NULL;
2006         QDECREF(bs->full_open_options);
2007         bs->full_open_options = NULL;
2008     }
2009 
2010     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2011         g_free(ban);
2012     }
2013     QLIST_INIT(&bs->aio_notifiers);
2014 }
2015 
2016 void bdrv_close_all(void)
2017 {
2018     BlockDriverState *bs;
2019 
2020     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2021         AioContext *aio_context = bdrv_get_aio_context(bs);
2022 
2023         aio_context_acquire(aio_context);
2024         bdrv_close(bs);
2025         aio_context_release(aio_context);
2026     }
2027 }
2028 
2029 /* make a BlockDriverState anonymous by removing from bdrv_state and
2030  * graph_bdrv_state list.
2031    Also, NULL terminate the device_name to prevent double remove */
2032 void bdrv_make_anon(BlockDriverState *bs)
2033 {
2034     /*
2035      * Take care to remove bs from bdrv_states only when it's actually
2036      * in it.  Note that bs->device_list.tqe_prev is initially null,
2037      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
2038      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2039      * resetting it to null on remove.
2040      */
2041     if (bs->device_list.tqe_prev) {
2042         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
2043         bs->device_list.tqe_prev = NULL;
2044     }
2045     if (bs->node_name[0] != '\0') {
2046         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2047     }
2048     bs->node_name[0] = '\0';
2049 }
2050 
2051 /* Fields that need to stay with the top-level BDS */
2052 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2053                                      BlockDriverState *bs_src)
2054 {
2055     /* move some fields that need to stay attached to the device */
2056 
2057     /* dev info */
2058     bs_dest->copy_on_read       = bs_src->copy_on_read;
2059 
2060     bs_dest->enable_write_cache = bs_src->enable_write_cache;
2061 
2062     /* dirty bitmap */
2063     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
2064 }
2065 
2066 static void change_parent_backing_link(BlockDriverState *from,
2067                                        BlockDriverState *to)
2068 {
2069     BdrvChild *c, *next;
2070 
2071     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2072         assert(c->role != &child_backing);
2073         c->bs = to;
2074         QLIST_REMOVE(c, next_parent);
2075         QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2076         bdrv_ref(to);
2077         bdrv_unref(from);
2078     }
2079     if (from->blk) {
2080         blk_set_bs(from->blk, to);
2081         if (!to->device_list.tqe_prev) {
2082             QTAILQ_INSERT_BEFORE(from, to, device_list);
2083         }
2084         QTAILQ_REMOVE(&bdrv_states, from, device_list);
2085     }
2086 }
2087 
2088 static void swap_feature_fields(BlockDriverState *bs_top,
2089                                 BlockDriverState *bs_new)
2090 {
2091     BlockDriverState tmp;
2092 
2093     bdrv_move_feature_fields(&tmp, bs_top);
2094     bdrv_move_feature_fields(bs_top, bs_new);
2095     bdrv_move_feature_fields(bs_new, &tmp);
2096 
2097     assert(!bs_new->throttle_state);
2098     if (bs_top->throttle_state) {
2099         assert(bs_top->io_limits_enabled);
2100         bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2101         bdrv_io_limits_disable(bs_top);
2102     }
2103 }
2104 
2105 /*
2106  * Add new bs contents at the top of an image chain while the chain is
2107  * live, while keeping required fields on the top layer.
2108  *
2109  * This will modify the BlockDriverState fields, and swap contents
2110  * between bs_new and bs_top. Both bs_new and bs_top are modified.
2111  *
2112  * bs_new must not be attached to a BlockBackend.
2113  *
2114  * This function does not create any image files.
2115  *
2116  * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2117  * that's what the callers commonly need. bs_new will be referenced by the old
2118  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2119  * reference of its own, it must call bdrv_ref().
2120  */
2121 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2122 {
2123     assert(!bdrv_requests_pending(bs_top));
2124     assert(!bdrv_requests_pending(bs_new));
2125 
2126     bdrv_ref(bs_top);
2127     change_parent_backing_link(bs_top, bs_new);
2128 
2129     /* Some fields always stay on top of the backing file chain */
2130     swap_feature_fields(bs_top, bs_new);
2131 
2132     bdrv_set_backing_hd(bs_new, bs_top);
2133     bdrv_unref(bs_top);
2134 
2135     /* bs_new is now referenced by its new parents, we don't need the
2136      * additional reference any more. */
2137     bdrv_unref(bs_new);
2138 }
2139 
2140 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2141 {
2142     assert(!bdrv_requests_pending(old));
2143     assert(!bdrv_requests_pending(new));
2144 
2145     bdrv_ref(old);
2146 
2147     if (old->blk) {
2148         /* As long as these fields aren't in BlockBackend, but in the top-level
2149          * BlockDriverState, it's not possible for a BDS to have two BBs.
2150          *
2151          * We really want to copy the fields from old to new, but we go for a
2152          * swap instead so that pointers aren't duplicated and cause trouble.
2153          * (Also, bdrv_swap() used to do the same.) */
2154         assert(!new->blk);
2155         swap_feature_fields(old, new);
2156     }
2157     change_parent_backing_link(old, new);
2158 
2159     /* Change backing files if a previously independent node is added to the
2160      * chain. For active commit, we replace top by its own (indirect) backing
2161      * file and don't do anything here so we don't build a loop. */
2162     if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2163         bdrv_set_backing_hd(new, backing_bs(old));
2164         bdrv_set_backing_hd(old, NULL);
2165     }
2166 
2167     bdrv_unref(old);
2168 }
2169 
2170 static void bdrv_delete(BlockDriverState *bs)
2171 {
2172     assert(!bs->job);
2173     assert(bdrv_op_blocker_is_empty(bs));
2174     assert(!bs->refcnt);
2175     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2176 
2177     bdrv_close(bs);
2178 
2179     /* remove from list, if necessary */
2180     bdrv_make_anon(bs);
2181 
2182     g_free(bs);
2183 }
2184 
2185 /*
2186  * Run consistency checks on an image
2187  *
2188  * Returns 0 if the check could be completed (it doesn't mean that the image is
2189  * free of errors) or -errno when an internal error occurred. The results of the
2190  * check are stored in res.
2191  */
2192 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2193 {
2194     if (bs->drv == NULL) {
2195         return -ENOMEDIUM;
2196     }
2197     if (bs->drv->bdrv_check == NULL) {
2198         return -ENOTSUP;
2199     }
2200 
2201     memset(res, 0, sizeof(*res));
2202     return bs->drv->bdrv_check(bs, res, fix);
2203 }
2204 
2205 #define COMMIT_BUF_SECTORS 2048
2206 
2207 /* commit COW file into the raw image */
2208 int bdrv_commit(BlockDriverState *bs)
2209 {
2210     BlockDriver *drv = bs->drv;
2211     int64_t sector, total_sectors, length, backing_length;
2212     int n, ro, open_flags;
2213     int ret = 0;
2214     uint8_t *buf = NULL;
2215 
2216     if (!drv)
2217         return -ENOMEDIUM;
2218 
2219     if (!bs->backing) {
2220         return -ENOTSUP;
2221     }
2222 
2223     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2224         bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2225         return -EBUSY;
2226     }
2227 
2228     ro = bs->backing->bs->read_only;
2229     open_flags =  bs->backing->bs->open_flags;
2230 
2231     if (ro) {
2232         if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2233             return -EACCES;
2234         }
2235     }
2236 
2237     length = bdrv_getlength(bs);
2238     if (length < 0) {
2239         ret = length;
2240         goto ro_cleanup;
2241     }
2242 
2243     backing_length = bdrv_getlength(bs->backing->bs);
2244     if (backing_length < 0) {
2245         ret = backing_length;
2246         goto ro_cleanup;
2247     }
2248 
2249     /* If our top snapshot is larger than the backing file image,
2250      * grow the backing file image if possible.  If not possible,
2251      * we must return an error */
2252     if (length > backing_length) {
2253         ret = bdrv_truncate(bs->backing->bs, length);
2254         if (ret < 0) {
2255             goto ro_cleanup;
2256         }
2257     }
2258 
2259     total_sectors = length >> BDRV_SECTOR_BITS;
2260 
2261     /* qemu_try_blockalign() for bs will choose an alignment that works for
2262      * bs->backing->bs as well, so no need to compare the alignment manually. */
2263     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2264     if (buf == NULL) {
2265         ret = -ENOMEM;
2266         goto ro_cleanup;
2267     }
2268 
2269     for (sector = 0; sector < total_sectors; sector += n) {
2270         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2271         if (ret < 0) {
2272             goto ro_cleanup;
2273         }
2274         if (ret) {
2275             ret = bdrv_read(bs, sector, buf, n);
2276             if (ret < 0) {
2277                 goto ro_cleanup;
2278             }
2279 
2280             ret = bdrv_write(bs->backing->bs, sector, buf, n);
2281             if (ret < 0) {
2282                 goto ro_cleanup;
2283             }
2284         }
2285     }
2286 
2287     if (drv->bdrv_make_empty) {
2288         ret = drv->bdrv_make_empty(bs);
2289         if (ret < 0) {
2290             goto ro_cleanup;
2291         }
2292         bdrv_flush(bs);
2293     }
2294 
2295     /*
2296      * Make sure all data we wrote to the backing device is actually
2297      * stable on disk.
2298      */
2299     if (bs->backing) {
2300         bdrv_flush(bs->backing->bs);
2301     }
2302 
2303     ret = 0;
2304 ro_cleanup:
2305     qemu_vfree(buf);
2306 
2307     if (ro) {
2308         /* ignoring error return here */
2309         bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2310     }
2311 
2312     return ret;
2313 }
2314 
2315 int bdrv_commit_all(void)
2316 {
2317     BlockDriverState *bs;
2318 
2319     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2320         AioContext *aio_context = bdrv_get_aio_context(bs);
2321 
2322         aio_context_acquire(aio_context);
2323         if (bs->drv && bs->backing) {
2324             int ret = bdrv_commit(bs);
2325             if (ret < 0) {
2326                 aio_context_release(aio_context);
2327                 return ret;
2328             }
2329         }
2330         aio_context_release(aio_context);
2331     }
2332     return 0;
2333 }
2334 
2335 /*
2336  * Return values:
2337  * 0        - success
2338  * -EINVAL  - backing format specified, but no file
2339  * -ENOSPC  - can't update the backing file because no space is left in the
2340  *            image file header
2341  * -ENOTSUP - format driver doesn't support changing the backing file
2342  */
2343 int bdrv_change_backing_file(BlockDriverState *bs,
2344     const char *backing_file, const char *backing_fmt)
2345 {
2346     BlockDriver *drv = bs->drv;
2347     int ret;
2348 
2349     /* Backing file format doesn't make sense without a backing file */
2350     if (backing_fmt && !backing_file) {
2351         return -EINVAL;
2352     }
2353 
2354     if (drv->bdrv_change_backing_file != NULL) {
2355         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2356     } else {
2357         ret = -ENOTSUP;
2358     }
2359 
2360     if (ret == 0) {
2361         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2362         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2363     }
2364     return ret;
2365 }
2366 
2367 /*
2368  * Finds the image layer in the chain that has 'bs' as its backing file.
2369  *
2370  * active is the current topmost image.
2371  *
2372  * Returns NULL if bs is not found in active's image chain,
2373  * or if active == bs.
2374  *
2375  * Returns the bottommost base image if bs == NULL.
2376  */
2377 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2378                                     BlockDriverState *bs)
2379 {
2380     while (active && bs != backing_bs(active)) {
2381         active = backing_bs(active);
2382     }
2383 
2384     return active;
2385 }
2386 
2387 /* Given a BDS, searches for the base layer. */
2388 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2389 {
2390     return bdrv_find_overlay(bs, NULL);
2391 }
2392 
2393 /*
2394  * Drops images above 'base' up to and including 'top', and sets the image
2395  * above 'top' to have base as its backing file.
2396  *
2397  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2398  * information in 'bs' can be properly updated.
2399  *
2400  * E.g., this will convert the following chain:
2401  * bottom <- base <- intermediate <- top <- active
2402  *
2403  * to
2404  *
2405  * bottom <- base <- active
2406  *
2407  * It is allowed for bottom==base, in which case it converts:
2408  *
2409  * base <- intermediate <- top <- active
2410  *
2411  * to
2412  *
2413  * base <- active
2414  *
2415  * If backing_file_str is non-NULL, it will be used when modifying top's
2416  * overlay image metadata.
2417  *
2418  * Error conditions:
2419  *  if active == top, that is considered an error
2420  *
2421  */
2422 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2423                            BlockDriverState *base, const char *backing_file_str)
2424 {
2425     BlockDriverState *new_top_bs = NULL;
2426     int ret = -EIO;
2427 
2428     if (!top->drv || !base->drv) {
2429         goto exit;
2430     }
2431 
2432     new_top_bs = bdrv_find_overlay(active, top);
2433 
2434     if (new_top_bs == NULL) {
2435         /* we could not find the image above 'top', this is an error */
2436         goto exit;
2437     }
2438 
2439     /* special case of new_top_bs->backing->bs already pointing to base - nothing
2440      * to do, no intermediate images */
2441     if (backing_bs(new_top_bs) == base) {
2442         ret = 0;
2443         goto exit;
2444     }
2445 
2446     /* Make sure that base is in the backing chain of top */
2447     if (!bdrv_chain_contains(top, base)) {
2448         goto exit;
2449     }
2450 
2451     /* success - we can delete the intermediate states, and link top->base */
2452     backing_file_str = backing_file_str ? backing_file_str : base->filename;
2453     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2454                                    base->drv ? base->drv->format_name : "");
2455     if (ret) {
2456         goto exit;
2457     }
2458     bdrv_set_backing_hd(new_top_bs, base);
2459 
2460     ret = 0;
2461 exit:
2462     return ret;
2463 }
2464 
2465 /**
2466  * Truncate file to 'offset' bytes (needed only for file protocols)
2467  */
2468 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2469 {
2470     BlockDriver *drv = bs->drv;
2471     int ret;
2472     if (!drv)
2473         return -ENOMEDIUM;
2474     if (!drv->bdrv_truncate)
2475         return -ENOTSUP;
2476     if (bs->read_only)
2477         return -EACCES;
2478 
2479     ret = drv->bdrv_truncate(bs, offset);
2480     if (ret == 0) {
2481         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2482         bdrv_dirty_bitmap_truncate(bs);
2483         if (bs->blk) {
2484             blk_dev_resize_cb(bs->blk);
2485         }
2486     }
2487     return ret;
2488 }
2489 
2490 /**
2491  * Length of a allocated file in bytes. Sparse files are counted by actual
2492  * allocated space. Return < 0 if error or unknown.
2493  */
2494 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2495 {
2496     BlockDriver *drv = bs->drv;
2497     if (!drv) {
2498         return -ENOMEDIUM;
2499     }
2500     if (drv->bdrv_get_allocated_file_size) {
2501         return drv->bdrv_get_allocated_file_size(bs);
2502     }
2503     if (bs->file) {
2504         return bdrv_get_allocated_file_size(bs->file->bs);
2505     }
2506     return -ENOTSUP;
2507 }
2508 
2509 /**
2510  * Return number of sectors on success, -errno on error.
2511  */
2512 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2513 {
2514     BlockDriver *drv = bs->drv;
2515 
2516     if (!drv)
2517         return -ENOMEDIUM;
2518 
2519     if (drv->has_variable_length) {
2520         int ret = refresh_total_sectors(bs, bs->total_sectors);
2521         if (ret < 0) {
2522             return ret;
2523         }
2524     }
2525     return bs->total_sectors;
2526 }
2527 
2528 /**
2529  * Return length in bytes on success, -errno on error.
2530  * The length is always a multiple of BDRV_SECTOR_SIZE.
2531  */
2532 int64_t bdrv_getlength(BlockDriverState *bs)
2533 {
2534     int64_t ret = bdrv_nb_sectors(bs);
2535 
2536     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2537     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2538 }
2539 
2540 /* return 0 as number of sectors if no device present or error */
2541 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2542 {
2543     int64_t nb_sectors = bdrv_nb_sectors(bs);
2544 
2545     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2546 }
2547 
2548 int bdrv_is_read_only(BlockDriverState *bs)
2549 {
2550     return bs->read_only;
2551 }
2552 
2553 int bdrv_is_sg(BlockDriverState *bs)
2554 {
2555     return bs->sg;
2556 }
2557 
2558 int bdrv_enable_write_cache(BlockDriverState *bs)
2559 {
2560     return bs->enable_write_cache;
2561 }
2562 
2563 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2564 {
2565     bs->enable_write_cache = wce;
2566 
2567     /* so a reopen() will preserve wce */
2568     if (wce) {
2569         bs->open_flags |= BDRV_O_CACHE_WB;
2570     } else {
2571         bs->open_flags &= ~BDRV_O_CACHE_WB;
2572     }
2573 }
2574 
2575 int bdrv_is_encrypted(BlockDriverState *bs)
2576 {
2577     if (bs->backing && bs->backing->bs->encrypted) {
2578         return 1;
2579     }
2580     return bs->encrypted;
2581 }
2582 
2583 int bdrv_key_required(BlockDriverState *bs)
2584 {
2585     BdrvChild *backing = bs->backing;
2586 
2587     if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2588         return 1;
2589     }
2590     return (bs->encrypted && !bs->valid_key);
2591 }
2592 
2593 int bdrv_set_key(BlockDriverState *bs, const char *key)
2594 {
2595     int ret;
2596     if (bs->backing && bs->backing->bs->encrypted) {
2597         ret = bdrv_set_key(bs->backing->bs, key);
2598         if (ret < 0)
2599             return ret;
2600         if (!bs->encrypted)
2601             return 0;
2602     }
2603     if (!bs->encrypted) {
2604         return -EINVAL;
2605     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2606         return -ENOMEDIUM;
2607     }
2608     ret = bs->drv->bdrv_set_key(bs, key);
2609     if (ret < 0) {
2610         bs->valid_key = 0;
2611     } else if (!bs->valid_key) {
2612         bs->valid_key = 1;
2613         if (bs->blk) {
2614             /* call the change callback now, we skipped it on open */
2615             blk_dev_change_media_cb(bs->blk, true);
2616         }
2617     }
2618     return ret;
2619 }
2620 
2621 /*
2622  * Provide an encryption key for @bs.
2623  * If @key is non-null:
2624  *     If @bs is not encrypted, fail.
2625  *     Else if the key is invalid, fail.
2626  *     Else set @bs's key to @key, replacing the existing key, if any.
2627  * If @key is null:
2628  *     If @bs is encrypted and still lacks a key, fail.
2629  *     Else do nothing.
2630  * On failure, store an error object through @errp if non-null.
2631  */
2632 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2633 {
2634     if (key) {
2635         if (!bdrv_is_encrypted(bs)) {
2636             error_setg(errp, "Node '%s' is not encrypted",
2637                       bdrv_get_device_or_node_name(bs));
2638         } else if (bdrv_set_key(bs, key) < 0) {
2639             error_setg(errp, QERR_INVALID_PASSWORD);
2640         }
2641     } else {
2642         if (bdrv_key_required(bs)) {
2643             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2644                       "'%s' (%s) is encrypted",
2645                       bdrv_get_device_or_node_name(bs),
2646                       bdrv_get_encrypted_filename(bs));
2647         }
2648     }
2649 }
2650 
2651 const char *bdrv_get_format_name(BlockDriverState *bs)
2652 {
2653     return bs->drv ? bs->drv->format_name : NULL;
2654 }
2655 
2656 static int qsort_strcmp(const void *a, const void *b)
2657 {
2658     return strcmp(a, b);
2659 }
2660 
2661 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2662                          void *opaque)
2663 {
2664     BlockDriver *drv;
2665     int count = 0;
2666     int i;
2667     const char **formats = NULL;
2668 
2669     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2670         if (drv->format_name) {
2671             bool found = false;
2672             int i = count;
2673             while (formats && i && !found) {
2674                 found = !strcmp(formats[--i], drv->format_name);
2675             }
2676 
2677             if (!found) {
2678                 formats = g_renew(const char *, formats, count + 1);
2679                 formats[count++] = drv->format_name;
2680             }
2681         }
2682     }
2683 
2684     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2685 
2686     for (i = 0; i < count; i++) {
2687         it(opaque, formats[i]);
2688     }
2689 
2690     g_free(formats);
2691 }
2692 
2693 /* This function is to find a node in the bs graph */
2694 BlockDriverState *bdrv_find_node(const char *node_name)
2695 {
2696     BlockDriverState *bs;
2697 
2698     assert(node_name);
2699 
2700     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2701         if (!strcmp(node_name, bs->node_name)) {
2702             return bs;
2703         }
2704     }
2705     return NULL;
2706 }
2707 
2708 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2709 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2710 {
2711     BlockDeviceInfoList *list, *entry;
2712     BlockDriverState *bs;
2713 
2714     list = NULL;
2715     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2716         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2717         if (!info) {
2718             qapi_free_BlockDeviceInfoList(list);
2719             return NULL;
2720         }
2721         entry = g_malloc0(sizeof(*entry));
2722         entry->value = info;
2723         entry->next = list;
2724         list = entry;
2725     }
2726 
2727     return list;
2728 }
2729 
2730 BlockDriverState *bdrv_lookup_bs(const char *device,
2731                                  const char *node_name,
2732                                  Error **errp)
2733 {
2734     BlockBackend *blk;
2735     BlockDriverState *bs;
2736 
2737     if (device) {
2738         blk = blk_by_name(device);
2739 
2740         if (blk) {
2741             bs = blk_bs(blk);
2742             if (!bs) {
2743                 error_setg(errp, "Device '%s' has no medium", device);
2744             }
2745 
2746             return bs;
2747         }
2748     }
2749 
2750     if (node_name) {
2751         bs = bdrv_find_node(node_name);
2752 
2753         if (bs) {
2754             return bs;
2755         }
2756     }
2757 
2758     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2759                      device ? device : "",
2760                      node_name ? node_name : "");
2761     return NULL;
2762 }
2763 
2764 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2765  * return false.  If either argument is NULL, return false. */
2766 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2767 {
2768     while (top && top != base) {
2769         top = backing_bs(top);
2770     }
2771 
2772     return top != NULL;
2773 }
2774 
2775 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2776 {
2777     if (!bs) {
2778         return QTAILQ_FIRST(&graph_bdrv_states);
2779     }
2780     return QTAILQ_NEXT(bs, node_list);
2781 }
2782 
2783 BlockDriverState *bdrv_next(BlockDriverState *bs)
2784 {
2785     if (!bs) {
2786         return QTAILQ_FIRST(&bdrv_states);
2787     }
2788     return QTAILQ_NEXT(bs, device_list);
2789 }
2790 
2791 const char *bdrv_get_node_name(const BlockDriverState *bs)
2792 {
2793     return bs->node_name;
2794 }
2795 
2796 /* TODO check what callers really want: bs->node_name or blk_name() */
2797 const char *bdrv_get_device_name(const BlockDriverState *bs)
2798 {
2799     return bs->blk ? blk_name(bs->blk) : "";
2800 }
2801 
2802 /* This can be used to identify nodes that might not have a device
2803  * name associated. Since node and device names live in the same
2804  * namespace, the result is unambiguous. The exception is if both are
2805  * absent, then this returns an empty (non-null) string. */
2806 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2807 {
2808     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2809 }
2810 
2811 int bdrv_get_flags(BlockDriverState *bs)
2812 {
2813     return bs->open_flags;
2814 }
2815 
2816 int bdrv_has_zero_init_1(BlockDriverState *bs)
2817 {
2818     return 1;
2819 }
2820 
2821 int bdrv_has_zero_init(BlockDriverState *bs)
2822 {
2823     assert(bs->drv);
2824 
2825     /* If BS is a copy on write image, it is initialized to
2826        the contents of the base image, which may not be zeroes.  */
2827     if (bs->backing) {
2828         return 0;
2829     }
2830     if (bs->drv->bdrv_has_zero_init) {
2831         return bs->drv->bdrv_has_zero_init(bs);
2832     }
2833 
2834     /* safe default */
2835     return 0;
2836 }
2837 
2838 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2839 {
2840     BlockDriverInfo bdi;
2841 
2842     if (bs->backing) {
2843         return false;
2844     }
2845 
2846     if (bdrv_get_info(bs, &bdi) == 0) {
2847         return bdi.unallocated_blocks_are_zero;
2848     }
2849 
2850     return false;
2851 }
2852 
2853 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2854 {
2855     BlockDriverInfo bdi;
2856 
2857     if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
2858         return false;
2859     }
2860 
2861     if (bdrv_get_info(bs, &bdi) == 0) {
2862         return bdi.can_write_zeroes_with_unmap;
2863     }
2864 
2865     return false;
2866 }
2867 
2868 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2869 {
2870     if (bs->backing && bs->backing->bs->encrypted)
2871         return bs->backing_file;
2872     else if (bs->encrypted)
2873         return bs->filename;
2874     else
2875         return NULL;
2876 }
2877 
2878 void bdrv_get_backing_filename(BlockDriverState *bs,
2879                                char *filename, int filename_size)
2880 {
2881     pstrcpy(filename, filename_size, bs->backing_file);
2882 }
2883 
2884 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2885 {
2886     BlockDriver *drv = bs->drv;
2887     if (!drv)
2888         return -ENOMEDIUM;
2889     if (!drv->bdrv_get_info)
2890         return -ENOTSUP;
2891     memset(bdi, 0, sizeof(*bdi));
2892     return drv->bdrv_get_info(bs, bdi);
2893 }
2894 
2895 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2896 {
2897     BlockDriver *drv = bs->drv;
2898     if (drv && drv->bdrv_get_specific_info) {
2899         return drv->bdrv_get_specific_info(bs);
2900     }
2901     return NULL;
2902 }
2903 
2904 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
2905 {
2906     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2907         return;
2908     }
2909 
2910     bs->drv->bdrv_debug_event(bs, event);
2911 }
2912 
2913 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2914                           const char *tag)
2915 {
2916     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2917         bs = bs->file ? bs->file->bs : NULL;
2918     }
2919 
2920     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2921         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2922     }
2923 
2924     return -ENOTSUP;
2925 }
2926 
2927 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2928 {
2929     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2930         bs = bs->file ? bs->file->bs : NULL;
2931     }
2932 
2933     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2934         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2935     }
2936 
2937     return -ENOTSUP;
2938 }
2939 
2940 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2941 {
2942     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2943         bs = bs->file ? bs->file->bs : NULL;
2944     }
2945 
2946     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2947         return bs->drv->bdrv_debug_resume(bs, tag);
2948     }
2949 
2950     return -ENOTSUP;
2951 }
2952 
2953 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2954 {
2955     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2956         bs = bs->file ? bs->file->bs : NULL;
2957     }
2958 
2959     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2960         return bs->drv->bdrv_debug_is_suspended(bs, tag);
2961     }
2962 
2963     return false;
2964 }
2965 
2966 int bdrv_is_snapshot(BlockDriverState *bs)
2967 {
2968     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2969 }
2970 
2971 /* backing_file can either be relative, or absolute, or a protocol.  If it is
2972  * relative, it must be relative to the chain.  So, passing in bs->filename
2973  * from a BDS as backing_file should not be done, as that may be relative to
2974  * the CWD rather than the chain. */
2975 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2976         const char *backing_file)
2977 {
2978     char *filename_full = NULL;
2979     char *backing_file_full = NULL;
2980     char *filename_tmp = NULL;
2981     int is_protocol = 0;
2982     BlockDriverState *curr_bs = NULL;
2983     BlockDriverState *retval = NULL;
2984 
2985     if (!bs || !bs->drv || !backing_file) {
2986         return NULL;
2987     }
2988 
2989     filename_full     = g_malloc(PATH_MAX);
2990     backing_file_full = g_malloc(PATH_MAX);
2991     filename_tmp      = g_malloc(PATH_MAX);
2992 
2993     is_protocol = path_has_protocol(backing_file);
2994 
2995     for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
2996 
2997         /* If either of the filename paths is actually a protocol, then
2998          * compare unmodified paths; otherwise make paths relative */
2999         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3000             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3001                 retval = curr_bs->backing->bs;
3002                 break;
3003             }
3004         } else {
3005             /* If not an absolute filename path, make it relative to the current
3006              * image's filename path */
3007             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3008                          backing_file);
3009 
3010             /* We are going to compare absolute pathnames */
3011             if (!realpath(filename_tmp, filename_full)) {
3012                 continue;
3013             }
3014 
3015             /* We need to make sure the backing filename we are comparing against
3016              * is relative to the current image filename (or absolute) */
3017             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3018                          curr_bs->backing_file);
3019 
3020             if (!realpath(filename_tmp, backing_file_full)) {
3021                 continue;
3022             }
3023 
3024             if (strcmp(backing_file_full, filename_full) == 0) {
3025                 retval = curr_bs->backing->bs;
3026                 break;
3027             }
3028         }
3029     }
3030 
3031     g_free(filename_full);
3032     g_free(backing_file_full);
3033     g_free(filename_tmp);
3034     return retval;
3035 }
3036 
3037 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3038 {
3039     if (!bs->drv) {
3040         return 0;
3041     }
3042 
3043     if (!bs->backing) {
3044         return 0;
3045     }
3046 
3047     return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3048 }
3049 
3050 void bdrv_init(void)
3051 {
3052     module_call_init(MODULE_INIT_BLOCK);
3053 }
3054 
3055 void bdrv_init_with_whitelist(void)
3056 {
3057     use_bdrv_whitelist = 1;
3058     bdrv_init();
3059 }
3060 
3061 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3062 {
3063     Error *local_err = NULL;
3064     int ret;
3065 
3066     if (!bs->drv)  {
3067         return;
3068     }
3069 
3070     if (!(bs->open_flags & BDRV_O_INCOMING)) {
3071         return;
3072     }
3073     bs->open_flags &= ~BDRV_O_INCOMING;
3074 
3075     if (bs->drv->bdrv_invalidate_cache) {
3076         bs->drv->bdrv_invalidate_cache(bs, &local_err);
3077     } else if (bs->file) {
3078         bdrv_invalidate_cache(bs->file->bs, &local_err);
3079     }
3080     if (local_err) {
3081         error_propagate(errp, local_err);
3082         return;
3083     }
3084 
3085     ret = refresh_total_sectors(bs, bs->total_sectors);
3086     if (ret < 0) {
3087         error_setg_errno(errp, -ret, "Could not refresh total sector count");
3088         return;
3089     }
3090 }
3091 
3092 void bdrv_invalidate_cache_all(Error **errp)
3093 {
3094     BlockDriverState *bs;
3095     Error *local_err = NULL;
3096 
3097     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3098         AioContext *aio_context = bdrv_get_aio_context(bs);
3099 
3100         aio_context_acquire(aio_context);
3101         bdrv_invalidate_cache(bs, &local_err);
3102         aio_context_release(aio_context);
3103         if (local_err) {
3104             error_propagate(errp, local_err);
3105             return;
3106         }
3107     }
3108 }
3109 
3110 /**************************************************************/
3111 /* removable device support */
3112 
3113 /**
3114  * Return TRUE if the media is present
3115  */
3116 bool bdrv_is_inserted(BlockDriverState *bs)
3117 {
3118     BlockDriver *drv = bs->drv;
3119     BdrvChild *child;
3120 
3121     if (!drv) {
3122         return false;
3123     }
3124     if (drv->bdrv_is_inserted) {
3125         return drv->bdrv_is_inserted(bs);
3126     }
3127     QLIST_FOREACH(child, &bs->children, next) {
3128         if (!bdrv_is_inserted(child->bs)) {
3129             return false;
3130         }
3131     }
3132     return true;
3133 }
3134 
3135 /**
3136  * Return whether the media changed since the last call to this
3137  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3138  */
3139 int bdrv_media_changed(BlockDriverState *bs)
3140 {
3141     BlockDriver *drv = bs->drv;
3142 
3143     if (drv && drv->bdrv_media_changed) {
3144         return drv->bdrv_media_changed(bs);
3145     }
3146     return -ENOTSUP;
3147 }
3148 
3149 /**
3150  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3151  */
3152 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3153 {
3154     BlockDriver *drv = bs->drv;
3155     const char *device_name;
3156 
3157     if (drv && drv->bdrv_eject) {
3158         drv->bdrv_eject(bs, eject_flag);
3159     }
3160 
3161     device_name = bdrv_get_device_name(bs);
3162     if (device_name[0] != '\0') {
3163         qapi_event_send_device_tray_moved(device_name,
3164                                           eject_flag, &error_abort);
3165     }
3166 }
3167 
3168 /**
3169  * Lock or unlock the media (if it is locked, the user won't be able
3170  * to eject it manually).
3171  */
3172 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3173 {
3174     BlockDriver *drv = bs->drv;
3175 
3176     trace_bdrv_lock_medium(bs, locked);
3177 
3178     if (drv && drv->bdrv_lock_medium) {
3179         drv->bdrv_lock_medium(bs, locked);
3180     }
3181 }
3182 
3183 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3184 {
3185     BdrvDirtyBitmap *bm;
3186 
3187     assert(name);
3188     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3189         if (bm->name && !strcmp(name, bm->name)) {
3190             return bm;
3191         }
3192     }
3193     return NULL;
3194 }
3195 
3196 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3197 {
3198     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3199     g_free(bitmap->name);
3200     bitmap->name = NULL;
3201 }
3202 
3203 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3204                                           uint32_t granularity,
3205                                           const char *name,
3206                                           Error **errp)
3207 {
3208     int64_t bitmap_size;
3209     BdrvDirtyBitmap *bitmap;
3210     uint32_t sector_granularity;
3211 
3212     assert((granularity & (granularity - 1)) == 0);
3213 
3214     if (name && bdrv_find_dirty_bitmap(bs, name)) {
3215         error_setg(errp, "Bitmap already exists: %s", name);
3216         return NULL;
3217     }
3218     sector_granularity = granularity >> BDRV_SECTOR_BITS;
3219     assert(sector_granularity);
3220     bitmap_size = bdrv_nb_sectors(bs);
3221     if (bitmap_size < 0) {
3222         error_setg_errno(errp, -bitmap_size, "could not get length of device");
3223         errno = -bitmap_size;
3224         return NULL;
3225     }
3226     bitmap = g_new0(BdrvDirtyBitmap, 1);
3227     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3228     bitmap->size = bitmap_size;
3229     bitmap->name = g_strdup(name);
3230     bitmap->disabled = false;
3231     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3232     return bitmap;
3233 }
3234 
3235 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3236 {
3237     return bitmap->successor;
3238 }
3239 
3240 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3241 {
3242     return !(bitmap->disabled || bitmap->successor);
3243 }
3244 
3245 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3246 {
3247     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3248         return DIRTY_BITMAP_STATUS_FROZEN;
3249     } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3250         return DIRTY_BITMAP_STATUS_DISABLED;
3251     } else {
3252         return DIRTY_BITMAP_STATUS_ACTIVE;
3253     }
3254 }
3255 
3256 /**
3257  * Create a successor bitmap destined to replace this bitmap after an operation.
3258  * Requires that the bitmap is not frozen and has no successor.
3259  */
3260 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3261                                        BdrvDirtyBitmap *bitmap, Error **errp)
3262 {
3263     uint64_t granularity;
3264     BdrvDirtyBitmap *child;
3265 
3266     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3267         error_setg(errp, "Cannot create a successor for a bitmap that is "
3268                    "currently frozen");
3269         return -1;
3270     }
3271     assert(!bitmap->successor);
3272 
3273     /* Create an anonymous successor */
3274     granularity = bdrv_dirty_bitmap_granularity(bitmap);
3275     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3276     if (!child) {
3277         return -1;
3278     }
3279 
3280     /* Successor will be on or off based on our current state. */
3281     child->disabled = bitmap->disabled;
3282 
3283     /* Install the successor and freeze the parent */
3284     bitmap->successor = child;
3285     return 0;
3286 }
3287 
3288 /**
3289  * For a bitmap with a successor, yield our name to the successor,
3290  * delete the old bitmap, and return a handle to the new bitmap.
3291  */
3292 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3293                                             BdrvDirtyBitmap *bitmap,
3294                                             Error **errp)
3295 {
3296     char *name;
3297     BdrvDirtyBitmap *successor = bitmap->successor;
3298 
3299     if (successor == NULL) {
3300         error_setg(errp, "Cannot relinquish control if "
3301                    "there's no successor present");
3302         return NULL;
3303     }
3304 
3305     name = bitmap->name;
3306     bitmap->name = NULL;
3307     successor->name = name;
3308     bitmap->successor = NULL;
3309     bdrv_release_dirty_bitmap(bs, bitmap);
3310 
3311     return successor;
3312 }
3313 
3314 /**
3315  * In cases of failure where we can no longer safely delete the parent,
3316  * we may wish to re-join the parent and child/successor.
3317  * The merged parent will be un-frozen, but not explicitly re-enabled.
3318  */
3319 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3320                                            BdrvDirtyBitmap *parent,
3321                                            Error **errp)
3322 {
3323     BdrvDirtyBitmap *successor = parent->successor;
3324 
3325     if (!successor) {
3326         error_setg(errp, "Cannot reclaim a successor when none is present");
3327         return NULL;
3328     }
3329 
3330     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3331         error_setg(errp, "Merging of parent and successor bitmap failed");
3332         return NULL;
3333     }
3334     bdrv_release_dirty_bitmap(bs, successor);
3335     parent->successor = NULL;
3336 
3337     return parent;
3338 }
3339 
3340 /**
3341  * Truncates _all_ bitmaps attached to a BDS.
3342  */
3343 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3344 {
3345     BdrvDirtyBitmap *bitmap;
3346     uint64_t size = bdrv_nb_sectors(bs);
3347 
3348     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3349         assert(!bdrv_dirty_bitmap_frozen(bitmap));
3350         hbitmap_truncate(bitmap->bitmap, size);
3351         bitmap->size = size;
3352     }
3353 }
3354 
3355 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3356 {
3357     BdrvDirtyBitmap *bm, *next;
3358     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3359         if (bm == bitmap) {
3360             assert(!bdrv_dirty_bitmap_frozen(bm));
3361             QLIST_REMOVE(bitmap, list);
3362             hbitmap_free(bitmap->bitmap);
3363             g_free(bitmap->name);
3364             g_free(bitmap);
3365             return;
3366         }
3367     }
3368 }
3369 
3370 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3371 {
3372     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3373     bitmap->disabled = true;
3374 }
3375 
3376 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3377 {
3378     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3379     bitmap->disabled = false;
3380 }
3381 
3382 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3383 {
3384     BdrvDirtyBitmap *bm;
3385     BlockDirtyInfoList *list = NULL;
3386     BlockDirtyInfoList **plist = &list;
3387 
3388     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3389         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3390         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3391         info->count = bdrv_get_dirty_count(bm);
3392         info->granularity = bdrv_dirty_bitmap_granularity(bm);
3393         info->has_name = !!bm->name;
3394         info->name = g_strdup(bm->name);
3395         info->status = bdrv_dirty_bitmap_status(bm);
3396         entry->value = info;
3397         *plist = entry;
3398         plist = &entry->next;
3399     }
3400 
3401     return list;
3402 }
3403 
3404 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3405 {
3406     if (bitmap) {
3407         return hbitmap_get(bitmap->bitmap, sector);
3408     } else {
3409         return 0;
3410     }
3411 }
3412 
3413 /**
3414  * Chooses a default granularity based on the existing cluster size,
3415  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3416  * is no cluster size information available.
3417  */
3418 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3419 {
3420     BlockDriverInfo bdi;
3421     uint32_t granularity;
3422 
3423     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3424         granularity = MAX(4096, bdi.cluster_size);
3425         granularity = MIN(65536, granularity);
3426     } else {
3427         granularity = 65536;
3428     }
3429 
3430     return granularity;
3431 }
3432 
3433 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3434 {
3435     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3436 }
3437 
3438 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3439 {
3440     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3441 }
3442 
3443 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3444                            int64_t cur_sector, int nr_sectors)
3445 {
3446     assert(bdrv_dirty_bitmap_enabled(bitmap));
3447     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3448 }
3449 
3450 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3451                              int64_t cur_sector, int nr_sectors)
3452 {
3453     assert(bdrv_dirty_bitmap_enabled(bitmap));
3454     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3455 }
3456 
3457 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
3458 {
3459     assert(bdrv_dirty_bitmap_enabled(bitmap));
3460     if (!out) {
3461         hbitmap_reset_all(bitmap->bitmap);
3462     } else {
3463         HBitmap *backup = bitmap->bitmap;
3464         bitmap->bitmap = hbitmap_alloc(bitmap->size,
3465                                        hbitmap_granularity(backup));
3466         *out = backup;
3467     }
3468 }
3469 
3470 void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
3471 {
3472     HBitmap *tmp = bitmap->bitmap;
3473     assert(bdrv_dirty_bitmap_enabled(bitmap));
3474     bitmap->bitmap = in;
3475     hbitmap_free(tmp);
3476 }
3477 
3478 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3479                     int nr_sectors)
3480 {
3481     BdrvDirtyBitmap *bitmap;
3482     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3483         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3484             continue;
3485         }
3486         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3487     }
3488 }
3489 
3490 /**
3491  * Advance an HBitmapIter to an arbitrary offset.
3492  */
3493 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3494 {
3495     assert(hbi->hb);
3496     hbitmap_iter_init(hbi, hbi->hb, offset);
3497 }
3498 
3499 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3500 {
3501     return hbitmap_count(bitmap->bitmap);
3502 }
3503 
3504 /* Get a reference to bs */
3505 void bdrv_ref(BlockDriverState *bs)
3506 {
3507     bs->refcnt++;
3508 }
3509 
3510 /* Release a previously grabbed reference to bs.
3511  * If after releasing, reference count is zero, the BlockDriverState is
3512  * deleted. */
3513 void bdrv_unref(BlockDriverState *bs)
3514 {
3515     if (!bs) {
3516         return;
3517     }
3518     assert(bs->refcnt > 0);
3519     if (--bs->refcnt == 0) {
3520         bdrv_delete(bs);
3521     }
3522 }
3523 
3524 struct BdrvOpBlocker {
3525     Error *reason;
3526     QLIST_ENTRY(BdrvOpBlocker) list;
3527 };
3528 
3529 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3530 {
3531     BdrvOpBlocker *blocker;
3532     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3533     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3534         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3535         if (errp) {
3536             error_setg(errp, "Node '%s' is busy: %s",
3537                        bdrv_get_device_or_node_name(bs),
3538                        error_get_pretty(blocker->reason));
3539         }
3540         return true;
3541     }
3542     return false;
3543 }
3544 
3545 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3546 {
3547     BdrvOpBlocker *blocker;
3548     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3549 
3550     blocker = g_new0(BdrvOpBlocker, 1);
3551     blocker->reason = reason;
3552     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3553 }
3554 
3555 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3556 {
3557     BdrvOpBlocker *blocker, *next;
3558     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3559     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3560         if (blocker->reason == reason) {
3561             QLIST_REMOVE(blocker, list);
3562             g_free(blocker);
3563         }
3564     }
3565 }
3566 
3567 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3568 {
3569     int i;
3570     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3571         bdrv_op_block(bs, i, reason);
3572     }
3573 }
3574 
3575 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3576 {
3577     int i;
3578     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3579         bdrv_op_unblock(bs, i, reason);
3580     }
3581 }
3582 
3583 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3584 {
3585     int i;
3586 
3587     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3588         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3589             return false;
3590         }
3591     }
3592     return true;
3593 }
3594 
3595 void bdrv_img_create(const char *filename, const char *fmt,
3596                      const char *base_filename, const char *base_fmt,
3597                      char *options, uint64_t img_size, int flags,
3598                      Error **errp, bool quiet)
3599 {
3600     QemuOptsList *create_opts = NULL;
3601     QemuOpts *opts = NULL;
3602     const char *backing_fmt, *backing_file;
3603     int64_t size;
3604     BlockDriver *drv, *proto_drv;
3605     Error *local_err = NULL;
3606     int ret = 0;
3607 
3608     /* Find driver and parse its options */
3609     drv = bdrv_find_format(fmt);
3610     if (!drv) {
3611         error_setg(errp, "Unknown file format '%s'", fmt);
3612         return;
3613     }
3614 
3615     proto_drv = bdrv_find_protocol(filename, true, errp);
3616     if (!proto_drv) {
3617         return;
3618     }
3619 
3620     if (!drv->create_opts) {
3621         error_setg(errp, "Format driver '%s' does not support image creation",
3622                    drv->format_name);
3623         return;
3624     }
3625 
3626     if (!proto_drv->create_opts) {
3627         error_setg(errp, "Protocol driver '%s' does not support image creation",
3628                    proto_drv->format_name);
3629         return;
3630     }
3631 
3632     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3633     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3634 
3635     /* Create parameter list with default values */
3636     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3637     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3638 
3639     /* Parse -o options */
3640     if (options) {
3641         qemu_opts_do_parse(opts, options, NULL, &local_err);
3642         if (local_err) {
3643             error_report_err(local_err);
3644             local_err = NULL;
3645             error_setg(errp, "Invalid options for file format '%s'", fmt);
3646             goto out;
3647         }
3648     }
3649 
3650     if (base_filename) {
3651         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3652         if (local_err) {
3653             error_setg(errp, "Backing file not supported for file format '%s'",
3654                        fmt);
3655             goto out;
3656         }
3657     }
3658 
3659     if (base_fmt) {
3660         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3661         if (local_err) {
3662             error_setg(errp, "Backing file format not supported for file "
3663                              "format '%s'", fmt);
3664             goto out;
3665         }
3666     }
3667 
3668     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3669     if (backing_file) {
3670         if (!strcmp(filename, backing_file)) {
3671             error_setg(errp, "Error: Trying to create an image with the "
3672                              "same filename as the backing file");
3673             goto out;
3674         }
3675     }
3676 
3677     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3678 
3679     // The size for the image must always be specified, with one exception:
3680     // If we are using a backing file, we can obtain the size from there
3681     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3682     if (size == -1) {
3683         if (backing_file) {
3684             BlockDriverState *bs;
3685             char *full_backing = g_new0(char, PATH_MAX);
3686             int64_t size;
3687             int back_flags;
3688             QDict *backing_options = NULL;
3689 
3690             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3691                                                          full_backing, PATH_MAX,
3692                                                          &local_err);
3693             if (local_err) {
3694                 g_free(full_backing);
3695                 goto out;
3696             }
3697 
3698             /* backing files always opened read-only */
3699             back_flags =
3700                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3701 
3702             if (backing_fmt) {
3703                 backing_options = qdict_new();
3704                 qdict_put(backing_options, "driver",
3705                           qstring_from_str(backing_fmt));
3706             }
3707 
3708             bs = NULL;
3709             ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3710                             back_flags, &local_err);
3711             g_free(full_backing);
3712             if (ret < 0) {
3713                 goto out;
3714             }
3715             size = bdrv_getlength(bs);
3716             if (size < 0) {
3717                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3718                                  backing_file);
3719                 bdrv_unref(bs);
3720                 goto out;
3721             }
3722 
3723             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3724 
3725             bdrv_unref(bs);
3726         } else {
3727             error_setg(errp, "Image creation needs a size parameter");
3728             goto out;
3729         }
3730     }
3731 
3732     if (!quiet) {
3733         printf("Formatting '%s', fmt=%s ", filename, fmt);
3734         qemu_opts_print(opts, " ");
3735         puts("");
3736     }
3737 
3738     ret = bdrv_create(drv, filename, opts, &local_err);
3739 
3740     if (ret == -EFBIG) {
3741         /* This is generally a better message than whatever the driver would
3742          * deliver (especially because of the cluster_size_hint), since that
3743          * is most probably not much different from "image too large". */
3744         const char *cluster_size_hint = "";
3745         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3746             cluster_size_hint = " (try using a larger cluster size)";
3747         }
3748         error_setg(errp, "The image size is too large for file format '%s'"
3749                    "%s", fmt, cluster_size_hint);
3750         error_free(local_err);
3751         local_err = NULL;
3752     }
3753 
3754 out:
3755     qemu_opts_del(opts);
3756     qemu_opts_free(create_opts);
3757     if (local_err) {
3758         error_propagate(errp, local_err);
3759     }
3760 }
3761 
3762 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3763 {
3764     return bs->aio_context;
3765 }
3766 
3767 void bdrv_detach_aio_context(BlockDriverState *bs)
3768 {
3769     BdrvAioNotifier *baf;
3770 
3771     if (!bs->drv) {
3772         return;
3773     }
3774 
3775     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3776         baf->detach_aio_context(baf->opaque);
3777     }
3778 
3779     if (bs->throttle_state) {
3780         throttle_timers_detach_aio_context(&bs->throttle_timers);
3781     }
3782     if (bs->drv->bdrv_detach_aio_context) {
3783         bs->drv->bdrv_detach_aio_context(bs);
3784     }
3785     if (bs->file) {
3786         bdrv_detach_aio_context(bs->file->bs);
3787     }
3788     if (bs->backing) {
3789         bdrv_detach_aio_context(bs->backing->bs);
3790     }
3791 
3792     bs->aio_context = NULL;
3793 }
3794 
3795 void bdrv_attach_aio_context(BlockDriverState *bs,
3796                              AioContext *new_context)
3797 {
3798     BdrvAioNotifier *ban;
3799 
3800     if (!bs->drv) {
3801         return;
3802     }
3803 
3804     bs->aio_context = new_context;
3805 
3806     if (bs->backing) {
3807         bdrv_attach_aio_context(bs->backing->bs, new_context);
3808     }
3809     if (bs->file) {
3810         bdrv_attach_aio_context(bs->file->bs, new_context);
3811     }
3812     if (bs->drv->bdrv_attach_aio_context) {
3813         bs->drv->bdrv_attach_aio_context(bs, new_context);
3814     }
3815     if (bs->throttle_state) {
3816         throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3817     }
3818 
3819     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3820         ban->attached_aio_context(new_context, ban->opaque);
3821     }
3822 }
3823 
3824 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3825 {
3826     bdrv_drain(bs); /* ensure there are no in-flight requests */
3827 
3828     bdrv_detach_aio_context(bs);
3829 
3830     /* This function executes in the old AioContext so acquire the new one in
3831      * case it runs in a different thread.
3832      */
3833     aio_context_acquire(new_context);
3834     bdrv_attach_aio_context(bs, new_context);
3835     aio_context_release(new_context);
3836 }
3837 
3838 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3839         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3840         void (*detach_aio_context)(void *opaque), void *opaque)
3841 {
3842     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3843     *ban = (BdrvAioNotifier){
3844         .attached_aio_context = attached_aio_context,
3845         .detach_aio_context   = detach_aio_context,
3846         .opaque               = opaque
3847     };
3848 
3849     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3850 }
3851 
3852 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3853                                       void (*attached_aio_context)(AioContext *,
3854                                                                    void *),
3855                                       void (*detach_aio_context)(void *),
3856                                       void *opaque)
3857 {
3858     BdrvAioNotifier *ban, *ban_next;
3859 
3860     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3861         if (ban->attached_aio_context == attached_aio_context &&
3862             ban->detach_aio_context   == detach_aio_context   &&
3863             ban->opaque               == opaque)
3864         {
3865             QLIST_REMOVE(ban, list);
3866             g_free(ban);
3867 
3868             return;
3869         }
3870     }
3871 
3872     abort();
3873 }
3874 
3875 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3876                        BlockDriverAmendStatusCB *status_cb)
3877 {
3878     if (!bs->drv->bdrv_amend_options) {
3879         return -ENOTSUP;
3880     }
3881     return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3882 }
3883 
3884 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3885  * of block filter and by bdrv_is_first_non_filter.
3886  * It is used to test if the given bs is the candidate or recurse more in the
3887  * node graph.
3888  */
3889 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3890                                       BlockDriverState *candidate)
3891 {
3892     /* return false if basic checks fails */
3893     if (!bs || !bs->drv) {
3894         return false;
3895     }
3896 
3897     /* the code reached a non block filter driver -> check if the bs is
3898      * the same as the candidate. It's the recursion termination condition.
3899      */
3900     if (!bs->drv->is_filter) {
3901         return bs == candidate;
3902     }
3903     /* Down this path the driver is a block filter driver */
3904 
3905     /* If the block filter recursion method is defined use it to recurse down
3906      * the node graph.
3907      */
3908     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3909         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3910     }
3911 
3912     /* the driver is a block filter but don't allow to recurse -> return false
3913      */
3914     return false;
3915 }
3916 
3917 /* This function checks if the candidate is the first non filter bs down it's
3918  * bs chain. Since we don't have pointers to parents it explore all bs chains
3919  * from the top. Some filters can choose not to pass down the recursion.
3920  */
3921 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3922 {
3923     BlockDriverState *bs;
3924 
3925     /* walk down the bs forest recursively */
3926     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3927         bool perm;
3928 
3929         /* try to recurse in this top level bs */
3930         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3931 
3932         /* candidate is the first non filter */
3933         if (perm) {
3934             return true;
3935         }
3936     }
3937 
3938     return false;
3939 }
3940 
3941 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3942                                         const char *node_name, Error **errp)
3943 {
3944     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3945     AioContext *aio_context;
3946 
3947     if (!to_replace_bs) {
3948         error_setg(errp, "Node name '%s' not found", node_name);
3949         return NULL;
3950     }
3951 
3952     aio_context = bdrv_get_aio_context(to_replace_bs);
3953     aio_context_acquire(aio_context);
3954 
3955     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3956         to_replace_bs = NULL;
3957         goto out;
3958     }
3959 
3960     /* We don't want arbitrary node of the BDS chain to be replaced only the top
3961      * most non filter in order to prevent data corruption.
3962      * Another benefit is that this tests exclude backing files which are
3963      * blocked by the backing blockers.
3964      */
3965     if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
3966         error_setg(errp, "Only top most non filter can be replaced");
3967         to_replace_bs = NULL;
3968         goto out;
3969     }
3970 
3971 out:
3972     aio_context_release(aio_context);
3973     return to_replace_bs;
3974 }
3975 
3976 static bool append_open_options(QDict *d, BlockDriverState *bs)
3977 {
3978     const QDictEntry *entry;
3979     QemuOptDesc *desc;
3980     BdrvChild *child;
3981     bool found_any = false;
3982     const char *p;
3983 
3984     for (entry = qdict_first(bs->options); entry;
3985          entry = qdict_next(bs->options, entry))
3986     {
3987         /* Exclude options for children */
3988         QLIST_FOREACH(child, &bs->children, next) {
3989             if (strstart(qdict_entry_key(entry), child->name, &p)
3990                 && (!*p || *p == '.'))
3991             {
3992                 break;
3993             }
3994         }
3995         if (child) {
3996             continue;
3997         }
3998 
3999         /* And exclude all non-driver-specific options */
4000         for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
4001             if (!strcmp(qdict_entry_key(entry), desc->name)) {
4002                 break;
4003             }
4004         }
4005         if (desc->name) {
4006             continue;
4007         }
4008 
4009         qobject_incref(qdict_entry_value(entry));
4010         qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
4011         found_any = true;
4012     }
4013 
4014     return found_any;
4015 }
4016 
4017 /* Updates the following BDS fields:
4018  *  - exact_filename: A filename which may be used for opening a block device
4019  *                    which (mostly) equals the given BDS (even without any
4020  *                    other options; so reading and writing must return the same
4021  *                    results, but caching etc. may be different)
4022  *  - full_open_options: Options which, when given when opening a block device
4023  *                       (without a filename), result in a BDS (mostly)
4024  *                       equalling the given one
4025  *  - filename: If exact_filename is set, it is copied here. Otherwise,
4026  *              full_open_options is converted to a JSON object, prefixed with
4027  *              "json:" (for use through the JSON pseudo protocol) and put here.
4028  */
4029 void bdrv_refresh_filename(BlockDriverState *bs)
4030 {
4031     BlockDriver *drv = bs->drv;
4032     QDict *opts;
4033 
4034     if (!drv) {
4035         return;
4036     }
4037 
4038     /* This BDS's file name will most probably depend on its file's name, so
4039      * refresh that first */
4040     if (bs->file) {
4041         bdrv_refresh_filename(bs->file->bs);
4042     }
4043 
4044     if (drv->bdrv_refresh_filename) {
4045         /* Obsolete information is of no use here, so drop the old file name
4046          * information before refreshing it */
4047         bs->exact_filename[0] = '\0';
4048         if (bs->full_open_options) {
4049             QDECREF(bs->full_open_options);
4050             bs->full_open_options = NULL;
4051         }
4052 
4053         opts = qdict_new();
4054         append_open_options(opts, bs);
4055         drv->bdrv_refresh_filename(bs, opts);
4056         QDECREF(opts);
4057     } else if (bs->file) {
4058         /* Try to reconstruct valid information from the underlying file */
4059         bool has_open_options;
4060 
4061         bs->exact_filename[0] = '\0';
4062         if (bs->full_open_options) {
4063             QDECREF(bs->full_open_options);
4064             bs->full_open_options = NULL;
4065         }
4066 
4067         opts = qdict_new();
4068         has_open_options = append_open_options(opts, bs);
4069 
4070         /* If no specific options have been given for this BDS, the filename of
4071          * the underlying file should suffice for this one as well */
4072         if (bs->file->bs->exact_filename[0] && !has_open_options) {
4073             strcpy(bs->exact_filename, bs->file->bs->exact_filename);
4074         }
4075         /* Reconstructing the full options QDict is simple for most format block
4076          * drivers, as long as the full options are known for the underlying
4077          * file BDS. The full options QDict of that file BDS should somehow
4078          * contain a representation of the filename, therefore the following
4079          * suffices without querying the (exact_)filename of this BDS. */
4080         if (bs->file->bs->full_open_options) {
4081             qdict_put_obj(opts, "driver",
4082                           QOBJECT(qstring_from_str(drv->format_name)));
4083             QINCREF(bs->file->bs->full_open_options);
4084             qdict_put_obj(opts, "file",
4085                           QOBJECT(bs->file->bs->full_open_options));
4086 
4087             bs->full_open_options = opts;
4088         } else {
4089             QDECREF(opts);
4090         }
4091     } else if (!bs->full_open_options && qdict_size(bs->options)) {
4092         /* There is no underlying file BDS (at least referenced by BDS.file),
4093          * so the full options QDict should be equal to the options given
4094          * specifically for this block device when it was opened (plus the
4095          * driver specification).
4096          * Because those options don't change, there is no need to update
4097          * full_open_options when it's already set. */
4098 
4099         opts = qdict_new();
4100         append_open_options(opts, bs);
4101         qdict_put_obj(opts, "driver",
4102                       QOBJECT(qstring_from_str(drv->format_name)));
4103 
4104         if (bs->exact_filename[0]) {
4105             /* This may not work for all block protocol drivers (some may
4106              * require this filename to be parsed), but we have to find some
4107              * default solution here, so just include it. If some block driver
4108              * does not support pure options without any filename at all or
4109              * needs some special format of the options QDict, it needs to
4110              * implement the driver-specific bdrv_refresh_filename() function.
4111              */
4112             qdict_put_obj(opts, "filename",
4113                           QOBJECT(qstring_from_str(bs->exact_filename)));
4114         }
4115 
4116         bs->full_open_options = opts;
4117     }
4118 
4119     if (bs->exact_filename[0]) {
4120         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4121     } else if (bs->full_open_options) {
4122         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4123         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4124                  qstring_get_str(json));
4125         QDECREF(json);
4126     }
4127 }
4128