xref: /openbmc/qemu/block.c (revision 76f4afb4)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qjson.h"
31 #include "sysemu/block-backend.h"
32 #include "sysemu/sysemu.h"
33 #include "qemu/notify.h"
34 #include "block/coroutine.h"
35 #include "block/qapi.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
38 #include "qapi-event.h"
39 
40 #ifdef CONFIG_BSD
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <sys/ioctl.h>
44 #include <sys/queue.h>
45 #ifndef __DragonFly__
46 #include <sys/disk.h>
47 #endif
48 #endif
49 
50 #ifdef _WIN32
51 #include <windows.h>
52 #endif
53 
54 /**
55  * A BdrvDirtyBitmap can be in three possible states:
56  * (1) successor is NULL and disabled is false: full r/w mode
57  * (2) successor is NULL and disabled is true: read only mode ("disabled")
58  * (3) successor is set: frozen mode.
59  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
60  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
61  */
62 struct BdrvDirtyBitmap {
63     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
64     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
65     char *name;                 /* Optional non-empty unique ID */
66     int64_t size;               /* Size of the bitmap (Number of sectors) */
67     bool disabled;              /* Bitmap is read-only */
68     QLIST_ENTRY(BdrvDirtyBitmap) list;
69 };
70 
71 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
72 
73 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
74     QTAILQ_HEAD_INITIALIZER(bdrv_states);
75 
76 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
77     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
78 
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80     QLIST_HEAD_INITIALIZER(bdrv_drivers);
81 
82 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
83 /* If non-zero, use only whitelisted block drivers */
84 static int use_bdrv_whitelist;
85 
86 #ifdef _WIN32
87 static int is_windows_drive_prefix(const char *filename)
88 {
89     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
90              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
91             filename[1] == ':');
92 }
93 
94 int is_windows_drive(const char *filename)
95 {
96     if (is_windows_drive_prefix(filename) &&
97         filename[2] == '\0')
98         return 1;
99     if (strstart(filename, "\\\\.\\", NULL) ||
100         strstart(filename, "//./", NULL))
101         return 1;
102     return 0;
103 }
104 #endif
105 
106 size_t bdrv_opt_mem_align(BlockDriverState *bs)
107 {
108     if (!bs || !bs->drv) {
109         /* page size or 4k (hdd sector size) should be on the safe side */
110         return MAX(4096, getpagesize());
111     }
112 
113     return bs->bl.opt_mem_alignment;
114 }
115 
116 size_t bdrv_min_mem_align(BlockDriverState *bs)
117 {
118     if (!bs || !bs->drv) {
119         /* page size or 4k (hdd sector size) should be on the safe side */
120         return MAX(4096, getpagesize());
121     }
122 
123     return bs->bl.min_mem_alignment;
124 }
125 
126 /* check if the path starts with "<protocol>:" */
127 int path_has_protocol(const char *path)
128 {
129     const char *p;
130 
131 #ifdef _WIN32
132     if (is_windows_drive(path) ||
133         is_windows_drive_prefix(path)) {
134         return 0;
135     }
136     p = path + strcspn(path, ":/\\");
137 #else
138     p = path + strcspn(path, ":/");
139 #endif
140 
141     return *p == ':';
142 }
143 
144 int path_is_absolute(const char *path)
145 {
146 #ifdef _WIN32
147     /* specific case for names like: "\\.\d:" */
148     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
149         return 1;
150     }
151     return (*path == '/' || *path == '\\');
152 #else
153     return (*path == '/');
154 #endif
155 }
156 
157 /* if filename is absolute, just copy it to dest. Otherwise, build a
158    path to it by considering it is relative to base_path. URL are
159    supported. */
160 void path_combine(char *dest, int dest_size,
161                   const char *base_path,
162                   const char *filename)
163 {
164     const char *p, *p1;
165     int len;
166 
167     if (dest_size <= 0)
168         return;
169     if (path_is_absolute(filename)) {
170         pstrcpy(dest, dest_size, filename);
171     } else {
172         p = strchr(base_path, ':');
173         if (p)
174             p++;
175         else
176             p = base_path;
177         p1 = strrchr(base_path, '/');
178 #ifdef _WIN32
179         {
180             const char *p2;
181             p2 = strrchr(base_path, '\\');
182             if (!p1 || p2 > p1)
183                 p1 = p2;
184         }
185 #endif
186         if (p1)
187             p1++;
188         else
189             p1 = base_path;
190         if (p1 > p)
191             p = p1;
192         len = p - base_path;
193         if (len > dest_size - 1)
194             len = dest_size - 1;
195         memcpy(dest, base_path, len);
196         dest[len] = '\0';
197         pstrcat(dest, dest_size, filename);
198     }
199 }
200 
201 void bdrv_get_full_backing_filename_from_filename(const char *backed,
202                                                   const char *backing,
203                                                   char *dest, size_t sz,
204                                                   Error **errp)
205 {
206     if (backing[0] == '\0' || path_has_protocol(backing) ||
207         path_is_absolute(backing))
208     {
209         pstrcpy(dest, sz, backing);
210     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
211         error_setg(errp, "Cannot use relative backing file names for '%s'",
212                    backed);
213     } else {
214         path_combine(dest, sz, backed, backing);
215     }
216 }
217 
218 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
219                                     Error **errp)
220 {
221     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
222 
223     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
224                                                  dest, sz, errp);
225 }
226 
227 void bdrv_register(BlockDriver *bdrv)
228 {
229     bdrv_setup_io_funcs(bdrv);
230 
231     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
232 }
233 
234 BlockDriverState *bdrv_new_root(void)
235 {
236     BlockDriverState *bs = bdrv_new();
237 
238     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
239     return bs;
240 }
241 
242 BlockDriverState *bdrv_new(void)
243 {
244     BlockDriverState *bs;
245     int i;
246 
247     bs = g_new0(BlockDriverState, 1);
248     QLIST_INIT(&bs->dirty_bitmaps);
249     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
250         QLIST_INIT(&bs->op_blockers[i]);
251     }
252     bdrv_iostatus_disable(bs);
253     notifier_list_init(&bs->close_notifiers);
254     notifier_with_return_list_init(&bs->before_write_notifiers);
255     qemu_co_queue_init(&bs->throttled_reqs[0]);
256     qemu_co_queue_init(&bs->throttled_reqs[1]);
257     bs->refcnt = 1;
258     bs->aio_context = qemu_get_aio_context();
259 
260     return bs;
261 }
262 
263 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
264 {
265     notifier_list_add(&bs->close_notifiers, notify);
266 }
267 
268 BlockDriver *bdrv_find_format(const char *format_name)
269 {
270     BlockDriver *drv1;
271     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
272         if (!strcmp(drv1->format_name, format_name)) {
273             return drv1;
274         }
275     }
276     return NULL;
277 }
278 
279 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
280 {
281     static const char *whitelist_rw[] = {
282         CONFIG_BDRV_RW_WHITELIST
283     };
284     static const char *whitelist_ro[] = {
285         CONFIG_BDRV_RO_WHITELIST
286     };
287     const char **p;
288 
289     if (!whitelist_rw[0] && !whitelist_ro[0]) {
290         return 1;               /* no whitelist, anything goes */
291     }
292 
293     for (p = whitelist_rw; *p; p++) {
294         if (!strcmp(drv->format_name, *p)) {
295             return 1;
296         }
297     }
298     if (read_only) {
299         for (p = whitelist_ro; *p; p++) {
300             if (!strcmp(drv->format_name, *p)) {
301                 return 1;
302             }
303         }
304     }
305     return 0;
306 }
307 
308 BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
309                                           bool read_only)
310 {
311     BlockDriver *drv = bdrv_find_format(format_name);
312     return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
313 }
314 
315 typedef struct CreateCo {
316     BlockDriver *drv;
317     char *filename;
318     QemuOpts *opts;
319     int ret;
320     Error *err;
321 } CreateCo;
322 
323 static void coroutine_fn bdrv_create_co_entry(void *opaque)
324 {
325     Error *local_err = NULL;
326     int ret;
327 
328     CreateCo *cco = opaque;
329     assert(cco->drv);
330 
331     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
332     if (local_err) {
333         error_propagate(&cco->err, local_err);
334     }
335     cco->ret = ret;
336 }
337 
338 int bdrv_create(BlockDriver *drv, const char* filename,
339                 QemuOpts *opts, Error **errp)
340 {
341     int ret;
342 
343     Coroutine *co;
344     CreateCo cco = {
345         .drv = drv,
346         .filename = g_strdup(filename),
347         .opts = opts,
348         .ret = NOT_DONE,
349         .err = NULL,
350     };
351 
352     if (!drv->bdrv_create) {
353         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
354         ret = -ENOTSUP;
355         goto out;
356     }
357 
358     if (qemu_in_coroutine()) {
359         /* Fast-path if already in coroutine context */
360         bdrv_create_co_entry(&cco);
361     } else {
362         co = qemu_coroutine_create(bdrv_create_co_entry);
363         qemu_coroutine_enter(co, &cco);
364         while (cco.ret == NOT_DONE) {
365             aio_poll(qemu_get_aio_context(), true);
366         }
367     }
368 
369     ret = cco.ret;
370     if (ret < 0) {
371         if (cco.err) {
372             error_propagate(errp, cco.err);
373         } else {
374             error_setg_errno(errp, -ret, "Could not create image");
375         }
376     }
377 
378 out:
379     g_free(cco.filename);
380     return ret;
381 }
382 
383 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
384 {
385     BlockDriver *drv;
386     Error *local_err = NULL;
387     int ret;
388 
389     drv = bdrv_find_protocol(filename, true, errp);
390     if (drv == NULL) {
391         return -ENOENT;
392     }
393 
394     ret = bdrv_create(drv, filename, opts, &local_err);
395     if (local_err) {
396         error_propagate(errp, local_err);
397     }
398     return ret;
399 }
400 
401 /**
402  * Try to get @bs's logical and physical block size.
403  * On success, store them in @bsz struct and return 0.
404  * On failure return -errno.
405  * @bs must not be empty.
406  */
407 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
408 {
409     BlockDriver *drv = bs->drv;
410 
411     if (drv && drv->bdrv_probe_blocksizes) {
412         return drv->bdrv_probe_blocksizes(bs, bsz);
413     }
414 
415     return -ENOTSUP;
416 }
417 
418 /**
419  * Try to get @bs's geometry (cyls, heads, sectors).
420  * On success, store them in @geo struct and return 0.
421  * On failure return -errno.
422  * @bs must not be empty.
423  */
424 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
425 {
426     BlockDriver *drv = bs->drv;
427 
428     if (drv && drv->bdrv_probe_geometry) {
429         return drv->bdrv_probe_geometry(bs, geo);
430     }
431 
432     return -ENOTSUP;
433 }
434 
435 /*
436  * Create a uniquely-named empty temporary file.
437  * Return 0 upon success, otherwise a negative errno value.
438  */
439 int get_tmp_filename(char *filename, int size)
440 {
441 #ifdef _WIN32
442     char temp_dir[MAX_PATH];
443     /* GetTempFileName requires that its output buffer (4th param)
444        have length MAX_PATH or greater.  */
445     assert(size >= MAX_PATH);
446     return (GetTempPath(MAX_PATH, temp_dir)
447             && GetTempFileName(temp_dir, "qem", 0, filename)
448             ? 0 : -GetLastError());
449 #else
450     int fd;
451     const char *tmpdir;
452     tmpdir = getenv("TMPDIR");
453     if (!tmpdir) {
454         tmpdir = "/var/tmp";
455     }
456     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
457         return -EOVERFLOW;
458     }
459     fd = mkstemp(filename);
460     if (fd < 0) {
461         return -errno;
462     }
463     if (close(fd) != 0) {
464         unlink(filename);
465         return -errno;
466     }
467     return 0;
468 #endif
469 }
470 
471 /*
472  * Detect host devices. By convention, /dev/cdrom[N] is always
473  * recognized as a host CDROM.
474  */
475 static BlockDriver *find_hdev_driver(const char *filename)
476 {
477     int score_max = 0, score;
478     BlockDriver *drv = NULL, *d;
479 
480     QLIST_FOREACH(d, &bdrv_drivers, list) {
481         if (d->bdrv_probe_device) {
482             score = d->bdrv_probe_device(filename);
483             if (score > score_max) {
484                 score_max = score;
485                 drv = d;
486             }
487         }
488     }
489 
490     return drv;
491 }
492 
493 BlockDriver *bdrv_find_protocol(const char *filename,
494                                 bool allow_protocol_prefix,
495                                 Error **errp)
496 {
497     BlockDriver *drv1;
498     char protocol[128];
499     int len;
500     const char *p;
501 
502     /* TODO Drivers without bdrv_file_open must be specified explicitly */
503 
504     /*
505      * XXX(hch): we really should not let host device detection
506      * override an explicit protocol specification, but moving this
507      * later breaks access to device names with colons in them.
508      * Thanks to the brain-dead persistent naming schemes on udev-
509      * based Linux systems those actually are quite common.
510      */
511     drv1 = find_hdev_driver(filename);
512     if (drv1) {
513         return drv1;
514     }
515 
516     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
517         return &bdrv_file;
518     }
519 
520     p = strchr(filename, ':');
521     assert(p != NULL);
522     len = p - filename;
523     if (len > sizeof(protocol) - 1)
524         len = sizeof(protocol) - 1;
525     memcpy(protocol, filename, len);
526     protocol[len] = '\0';
527     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
528         if (drv1->protocol_name &&
529             !strcmp(drv1->protocol_name, protocol)) {
530             return drv1;
531         }
532     }
533 
534     error_setg(errp, "Unknown protocol '%s'", protocol);
535     return NULL;
536 }
537 
538 /*
539  * Guess image format by probing its contents.
540  * This is not a good idea when your image is raw (CVE-2008-2004), but
541  * we do it anyway for backward compatibility.
542  *
543  * @buf         contains the image's first @buf_size bytes.
544  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
545  *              but can be smaller if the image file is smaller)
546  * @filename    is its filename.
547  *
548  * For all block drivers, call the bdrv_probe() method to get its
549  * probing score.
550  * Return the first block driver with the highest probing score.
551  */
552 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
553                             const char *filename)
554 {
555     int score_max = 0, score;
556     BlockDriver *drv = NULL, *d;
557 
558     QLIST_FOREACH(d, &bdrv_drivers, list) {
559         if (d->bdrv_probe) {
560             score = d->bdrv_probe(buf, buf_size, filename);
561             if (score > score_max) {
562                 score_max = score;
563                 drv = d;
564             }
565         }
566     }
567 
568     return drv;
569 }
570 
571 static int find_image_format(BlockDriverState *bs, const char *filename,
572                              BlockDriver **pdrv, Error **errp)
573 {
574     BlockDriver *drv;
575     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
576     int ret = 0;
577 
578     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
579     if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
580         *pdrv = &bdrv_raw;
581         return ret;
582     }
583 
584     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
585     if (ret < 0) {
586         error_setg_errno(errp, -ret, "Could not read image for determining its "
587                          "format");
588         *pdrv = NULL;
589         return ret;
590     }
591 
592     drv = bdrv_probe_all(buf, ret, filename);
593     if (!drv) {
594         error_setg(errp, "Could not determine image format: No compatible "
595                    "driver found");
596         ret = -ENOENT;
597     }
598     *pdrv = drv;
599     return ret;
600 }
601 
602 /**
603  * Set the current 'total_sectors' value
604  * Return 0 on success, -errno on error.
605  */
606 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
607 {
608     BlockDriver *drv = bs->drv;
609 
610     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
611     if (bs->sg)
612         return 0;
613 
614     /* query actual device if possible, otherwise just trust the hint */
615     if (drv->bdrv_getlength) {
616         int64_t length = drv->bdrv_getlength(bs);
617         if (length < 0) {
618             return length;
619         }
620         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
621     }
622 
623     bs->total_sectors = hint;
624     return 0;
625 }
626 
627 /**
628  * Set open flags for a given discard mode
629  *
630  * Return 0 on success, -1 if the discard mode was invalid.
631  */
632 int bdrv_parse_discard_flags(const char *mode, int *flags)
633 {
634     *flags &= ~BDRV_O_UNMAP;
635 
636     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
637         /* do nothing */
638     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
639         *flags |= BDRV_O_UNMAP;
640     } else {
641         return -1;
642     }
643 
644     return 0;
645 }
646 
647 /**
648  * Set open flags for a given cache mode
649  *
650  * Return 0 on success, -1 if the cache mode was invalid.
651  */
652 int bdrv_parse_cache_flags(const char *mode, int *flags)
653 {
654     *flags &= ~BDRV_O_CACHE_MASK;
655 
656     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
657         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
658     } else if (!strcmp(mode, "directsync")) {
659         *flags |= BDRV_O_NOCACHE;
660     } else if (!strcmp(mode, "writeback")) {
661         *flags |= BDRV_O_CACHE_WB;
662     } else if (!strcmp(mode, "unsafe")) {
663         *flags |= BDRV_O_CACHE_WB;
664         *flags |= BDRV_O_NO_FLUSH;
665     } else if (!strcmp(mode, "writethrough")) {
666         /* this is the default */
667     } else {
668         return -1;
669     }
670 
671     return 0;
672 }
673 
674 /*
675  * Returns the flags that a temporary snapshot should get, based on the
676  * originally requested flags (the originally requested image will have flags
677  * like a backing file)
678  */
679 static int bdrv_temp_snapshot_flags(int flags)
680 {
681     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
682 }
683 
684 /*
685  * Returns the flags that bs->file should get, based on the given flags for
686  * the parent BDS
687  */
688 static int bdrv_inherited_flags(int flags)
689 {
690     /* Enable protocol handling, disable format probing for bs->file */
691     flags |= BDRV_O_PROTOCOL;
692 
693     /* Our block drivers take care to send flushes and respect unmap policy,
694      * so we can enable both unconditionally on lower layers. */
695     flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
696 
697     /* Clear flags that only apply to the top layer */
698     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
699 
700     return flags;
701 }
702 
703 /*
704  * Returns the flags that bs->backing_hd should get, based on the given flags
705  * for the parent BDS
706  */
707 static int bdrv_backing_flags(int flags)
708 {
709     /* backing files always opened read-only */
710     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
711 
712     /* snapshot=on is handled on the top layer */
713     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
714 
715     return flags;
716 }
717 
718 static int bdrv_open_flags(BlockDriverState *bs, int flags)
719 {
720     int open_flags = flags | BDRV_O_CACHE_WB;
721 
722     /*
723      * Clear flags that are internal to the block layer before opening the
724      * image.
725      */
726     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
727 
728     /*
729      * Snapshots should be writable.
730      */
731     if (flags & BDRV_O_TEMPORARY) {
732         open_flags |= BDRV_O_RDWR;
733     }
734 
735     return open_flags;
736 }
737 
738 static void bdrv_assign_node_name(BlockDriverState *bs,
739                                   const char *node_name,
740                                   Error **errp)
741 {
742     if (!node_name) {
743         return;
744     }
745 
746     /* Check for empty string or invalid characters */
747     if (!id_wellformed(node_name)) {
748         error_setg(errp, "Invalid node name");
749         return;
750     }
751 
752     /* takes care of avoiding namespaces collisions */
753     if (blk_by_name(node_name)) {
754         error_setg(errp, "node-name=%s is conflicting with a device id",
755                    node_name);
756         return;
757     }
758 
759     /* takes care of avoiding duplicates node names */
760     if (bdrv_find_node(node_name)) {
761         error_setg(errp, "Duplicate node name");
762         return;
763     }
764 
765     /* copy node name into the bs and insert it into the graph list */
766     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
767     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
768 }
769 
770 /*
771  * Common part for opening disk images and files
772  *
773  * Removes all processed options from *options.
774  */
775 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
776     QDict *options, int flags, BlockDriver *drv, Error **errp)
777 {
778     int ret, open_flags;
779     const char *filename;
780     const char *node_name = NULL;
781     Error *local_err = NULL;
782 
783     assert(drv != NULL);
784     assert(bs->file == NULL);
785     assert(options != NULL && bs->options != options);
786 
787     if (file != NULL) {
788         filename = file->filename;
789     } else {
790         filename = qdict_get_try_str(options, "filename");
791     }
792 
793     if (drv->bdrv_needs_filename && !filename) {
794         error_setg(errp, "The '%s' block driver requires a file name",
795                    drv->format_name);
796         return -EINVAL;
797     }
798 
799     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
800 
801     node_name = qdict_get_try_str(options, "node-name");
802     bdrv_assign_node_name(bs, node_name, &local_err);
803     if (local_err) {
804         error_propagate(errp, local_err);
805         return -EINVAL;
806     }
807     qdict_del(options, "node-name");
808 
809     /* bdrv_open() with directly using a protocol as drv. This layer is already
810      * opened, so assign it to bs (while file becomes a closed BlockDriverState)
811      * and return immediately. */
812     if (file != NULL && drv->bdrv_file_open) {
813         bdrv_swap(file, bs);
814         return 0;
815     }
816 
817     bs->open_flags = flags;
818     bs->guest_block_size = 512;
819     bs->request_alignment = 512;
820     bs->zero_beyond_eof = true;
821     open_flags = bdrv_open_flags(bs, flags);
822     bs->read_only = !(open_flags & BDRV_O_RDWR);
823 
824     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
825         error_setg(errp,
826                    !bs->read_only && bdrv_is_whitelisted(drv, true)
827                         ? "Driver '%s' can only be used for read-only devices"
828                         : "Driver '%s' is not whitelisted",
829                    drv->format_name);
830         return -ENOTSUP;
831     }
832 
833     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
834     if (flags & BDRV_O_COPY_ON_READ) {
835         if (!bs->read_only) {
836             bdrv_enable_copy_on_read(bs);
837         } else {
838             error_setg(errp, "Can't use copy-on-read on read-only device");
839             return -EINVAL;
840         }
841     }
842 
843     if (filename != NULL) {
844         pstrcpy(bs->filename, sizeof(bs->filename), filename);
845     } else {
846         bs->filename[0] = '\0';
847     }
848     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
849 
850     bs->drv = drv;
851     bs->opaque = g_malloc0(drv->instance_size);
852 
853     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
854 
855     /* Open the image, either directly or using a protocol */
856     if (drv->bdrv_file_open) {
857         assert(file == NULL);
858         assert(!drv->bdrv_needs_filename || filename != NULL);
859         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
860     } else {
861         if (file == NULL) {
862             error_setg(errp, "Can't use '%s' as a block driver for the "
863                        "protocol level", drv->format_name);
864             ret = -EINVAL;
865             goto free_and_fail;
866         }
867         bs->file = file;
868         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
869     }
870 
871     if (ret < 0) {
872         if (local_err) {
873             error_propagate(errp, local_err);
874         } else if (bs->filename[0]) {
875             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
876         } else {
877             error_setg_errno(errp, -ret, "Could not open image");
878         }
879         goto free_and_fail;
880     }
881 
882     if (bs->encrypted) {
883         error_report("Encrypted images are deprecated");
884         error_printf("Support for them will be removed in a future release.\n"
885                      "You can use 'qemu-img convert' to convert your image"
886                      " to an unencrypted one.\n");
887     }
888 
889     ret = refresh_total_sectors(bs, bs->total_sectors);
890     if (ret < 0) {
891         error_setg_errno(errp, -ret, "Could not refresh total sector count");
892         goto free_and_fail;
893     }
894 
895     bdrv_refresh_limits(bs, &local_err);
896     if (local_err) {
897         error_propagate(errp, local_err);
898         ret = -EINVAL;
899         goto free_and_fail;
900     }
901 
902     assert(bdrv_opt_mem_align(bs) != 0);
903     assert(bdrv_min_mem_align(bs) != 0);
904     assert((bs->request_alignment != 0) || bs->sg);
905     return 0;
906 
907 free_and_fail:
908     bs->file = NULL;
909     g_free(bs->opaque);
910     bs->opaque = NULL;
911     bs->drv = NULL;
912     return ret;
913 }
914 
915 static QDict *parse_json_filename(const char *filename, Error **errp)
916 {
917     QObject *options_obj;
918     QDict *options;
919     int ret;
920 
921     ret = strstart(filename, "json:", &filename);
922     assert(ret);
923 
924     options_obj = qobject_from_json(filename);
925     if (!options_obj) {
926         error_setg(errp, "Could not parse the JSON options");
927         return NULL;
928     }
929 
930     if (qobject_type(options_obj) != QTYPE_QDICT) {
931         qobject_decref(options_obj);
932         error_setg(errp, "Invalid JSON object given");
933         return NULL;
934     }
935 
936     options = qobject_to_qdict(options_obj);
937     qdict_flatten(options);
938 
939     return options;
940 }
941 
942 /*
943  * Fills in default options for opening images and converts the legacy
944  * filename/flags pair to option QDict entries.
945  */
946 static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
947                              BlockDriver *drv, Error **errp)
948 {
949     const char *filename = *pfilename;
950     const char *drvname;
951     bool protocol = flags & BDRV_O_PROTOCOL;
952     bool parse_filename = false;
953     Error *local_err = NULL;
954 
955     /* Parse json: pseudo-protocol */
956     if (filename && g_str_has_prefix(filename, "json:")) {
957         QDict *json_options = parse_json_filename(filename, &local_err);
958         if (local_err) {
959             error_propagate(errp, local_err);
960             return -EINVAL;
961         }
962 
963         /* Options given in the filename have lower priority than options
964          * specified directly */
965         qdict_join(*options, json_options, false);
966         QDECREF(json_options);
967         *pfilename = filename = NULL;
968     }
969 
970     /* Fetch the file name from the options QDict if necessary */
971     if (protocol && filename) {
972         if (!qdict_haskey(*options, "filename")) {
973             qdict_put(*options, "filename", qstring_from_str(filename));
974             parse_filename = true;
975         } else {
976             error_setg(errp, "Can't specify 'file' and 'filename' options at "
977                              "the same time");
978             return -EINVAL;
979         }
980     }
981 
982     /* Find the right block driver */
983     filename = qdict_get_try_str(*options, "filename");
984     drvname = qdict_get_try_str(*options, "driver");
985 
986     if (drv) {
987         if (drvname) {
988             error_setg(errp, "Driver specified twice");
989             return -EINVAL;
990         }
991         drvname = drv->format_name;
992         qdict_put(*options, "driver", qstring_from_str(drvname));
993     } else {
994         if (!drvname && protocol) {
995             if (filename) {
996                 drv = bdrv_find_protocol(filename, parse_filename, errp);
997                 if (!drv) {
998                     return -EINVAL;
999                 }
1000 
1001                 drvname = drv->format_name;
1002                 qdict_put(*options, "driver", qstring_from_str(drvname));
1003             } else {
1004                 error_setg(errp, "Must specify either driver or file");
1005                 return -EINVAL;
1006             }
1007         } else if (drvname) {
1008             drv = bdrv_find_format(drvname);
1009             if (!drv) {
1010                 error_setg(errp, "Unknown driver '%s'", drvname);
1011                 return -ENOENT;
1012             }
1013         }
1014     }
1015 
1016     assert(drv || !protocol);
1017 
1018     /* Driver-specific filename parsing */
1019     if (drv && drv->bdrv_parse_filename && parse_filename) {
1020         drv->bdrv_parse_filename(filename, *options, &local_err);
1021         if (local_err) {
1022             error_propagate(errp, local_err);
1023             return -EINVAL;
1024         }
1025 
1026         if (!drv->bdrv_needs_filename) {
1027             qdict_del(*options, "filename");
1028         }
1029     }
1030 
1031     return 0;
1032 }
1033 
1034 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1035 {
1036 
1037     if (bs->backing_hd) {
1038         assert(bs->backing_blocker);
1039         bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1040     } else if (backing_hd) {
1041         error_setg(&bs->backing_blocker,
1042                    "node is used as backing hd of '%s'",
1043                    bdrv_get_device_or_node_name(bs));
1044     }
1045 
1046     bs->backing_hd = backing_hd;
1047     if (!backing_hd) {
1048         error_free(bs->backing_blocker);
1049         bs->backing_blocker = NULL;
1050         goto out;
1051     }
1052     bs->open_flags &= ~BDRV_O_NO_BACKING;
1053     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1054     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1055             backing_hd->drv ? backing_hd->drv->format_name : "");
1056 
1057     bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1058     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1059     bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1060                     bs->backing_blocker);
1061 out:
1062     bdrv_refresh_limits(bs, NULL);
1063 }
1064 
1065 /*
1066  * Opens the backing file for a BlockDriverState if not yet open
1067  *
1068  * options is a QDict of options to pass to the block drivers, or NULL for an
1069  * empty set of options. The reference to the QDict is transferred to this
1070  * function (even on failure), so if the caller intends to reuse the dictionary,
1071  * it needs to use QINCREF() before calling bdrv_file_open.
1072  */
1073 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1074 {
1075     char *backing_filename = g_malloc0(PATH_MAX);
1076     int ret = 0;
1077     BlockDriverState *backing_hd;
1078     Error *local_err = NULL;
1079 
1080     if (bs->backing_hd != NULL) {
1081         QDECREF(options);
1082         goto free_exit;
1083     }
1084 
1085     /* NULL means an empty set of options */
1086     if (options == NULL) {
1087         options = qdict_new();
1088     }
1089 
1090     bs->open_flags &= ~BDRV_O_NO_BACKING;
1091     if (qdict_haskey(options, "file.filename")) {
1092         backing_filename[0] = '\0';
1093     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1094         QDECREF(options);
1095         goto free_exit;
1096     } else {
1097         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1098                                        &local_err);
1099         if (local_err) {
1100             ret = -EINVAL;
1101             error_propagate(errp, local_err);
1102             QDECREF(options);
1103             goto free_exit;
1104         }
1105     }
1106 
1107     if (!bs->drv || !bs->drv->supports_backing) {
1108         ret = -EINVAL;
1109         error_setg(errp, "Driver doesn't support backing files");
1110         QDECREF(options);
1111         goto free_exit;
1112     }
1113 
1114     backing_hd = bdrv_new();
1115 
1116     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1117         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1118     }
1119 
1120     assert(bs->backing_hd == NULL);
1121     ret = bdrv_open(&backing_hd,
1122                     *backing_filename ? backing_filename : NULL, NULL, options,
1123                     bdrv_backing_flags(bs->open_flags), NULL, &local_err);
1124     if (ret < 0) {
1125         bdrv_unref(backing_hd);
1126         backing_hd = NULL;
1127         bs->open_flags |= BDRV_O_NO_BACKING;
1128         error_setg(errp, "Could not open backing file: %s",
1129                    error_get_pretty(local_err));
1130         error_free(local_err);
1131         goto free_exit;
1132     }
1133     bdrv_set_backing_hd(bs, backing_hd);
1134 
1135 free_exit:
1136     g_free(backing_filename);
1137     return ret;
1138 }
1139 
1140 /*
1141  * Opens a disk image whose options are given as BlockdevRef in another block
1142  * device's options.
1143  *
1144  * If allow_none is true, no image will be opened if filename is false and no
1145  * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1146  *
1147  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1148  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1149  * itself, all options starting with "${bdref_key}." are considered part of the
1150  * BlockdevRef.
1151  *
1152  * The BlockdevRef will be removed from the options QDict.
1153  *
1154  * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1155  */
1156 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1157                     QDict *options, const char *bdref_key, int flags,
1158                     bool allow_none, Error **errp)
1159 {
1160     QDict *image_options;
1161     int ret;
1162     char *bdref_key_dot;
1163     const char *reference;
1164 
1165     assert(pbs);
1166     assert(*pbs == NULL);
1167 
1168     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1169     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1170     g_free(bdref_key_dot);
1171 
1172     reference = qdict_get_try_str(options, bdref_key);
1173     if (!filename && !reference && !qdict_size(image_options)) {
1174         if (allow_none) {
1175             ret = 0;
1176         } else {
1177             error_setg(errp, "A block device must be specified for \"%s\"",
1178                        bdref_key);
1179             ret = -EINVAL;
1180         }
1181         QDECREF(image_options);
1182         goto done;
1183     }
1184 
1185     ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
1186 
1187 done:
1188     qdict_del(options, bdref_key);
1189     return ret;
1190 }
1191 
1192 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1193 {
1194     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1195     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1196     int64_t total_size;
1197     QemuOpts *opts = NULL;
1198     QDict *snapshot_options;
1199     BlockDriverState *bs_snapshot;
1200     Error *local_err;
1201     int ret;
1202 
1203     /* if snapshot, we create a temporary backing file and open it
1204        instead of opening 'filename' directly */
1205 
1206     /* Get the required size from the image */
1207     total_size = bdrv_getlength(bs);
1208     if (total_size < 0) {
1209         ret = total_size;
1210         error_setg_errno(errp, -total_size, "Could not get image size");
1211         goto out;
1212     }
1213 
1214     /* Create the temporary image */
1215     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1216     if (ret < 0) {
1217         error_setg_errno(errp, -ret, "Could not get temporary filename");
1218         goto out;
1219     }
1220 
1221     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1222                             &error_abort);
1223     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1224     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1225     qemu_opts_del(opts);
1226     if (ret < 0) {
1227         error_setg_errno(errp, -ret, "Could not create temporary overlay "
1228                          "'%s': %s", tmp_filename,
1229                          error_get_pretty(local_err));
1230         error_free(local_err);
1231         goto out;
1232     }
1233 
1234     /* Prepare a new options QDict for the temporary file */
1235     snapshot_options = qdict_new();
1236     qdict_put(snapshot_options, "file.driver",
1237               qstring_from_str("file"));
1238     qdict_put(snapshot_options, "file.filename",
1239               qstring_from_str(tmp_filename));
1240 
1241     bs_snapshot = bdrv_new();
1242 
1243     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1244                     flags, &bdrv_qcow2, &local_err);
1245     if (ret < 0) {
1246         error_propagate(errp, local_err);
1247         goto out;
1248     }
1249 
1250     bdrv_append(bs_snapshot, bs);
1251 
1252 out:
1253     g_free(tmp_filename);
1254     return ret;
1255 }
1256 
1257 /*
1258  * Opens a disk image (raw, qcow2, vmdk, ...)
1259  *
1260  * options is a QDict of options to pass to the block drivers, or NULL for an
1261  * empty set of options. The reference to the QDict belongs to the block layer
1262  * after the call (even on failure), so if the caller intends to reuse the
1263  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1264  *
1265  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1266  * If it is not NULL, the referenced BDS will be reused.
1267  *
1268  * The reference parameter may be used to specify an existing block device which
1269  * should be opened. If specified, neither options nor a filename may be given,
1270  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1271  */
1272 int bdrv_open(BlockDriverState **pbs, const char *filename,
1273               const char *reference, QDict *options, int flags,
1274               BlockDriver *drv, Error **errp)
1275 {
1276     int ret;
1277     BlockDriverState *file = NULL, *bs;
1278     const char *drvname;
1279     Error *local_err = NULL;
1280     int snapshot_flags = 0;
1281 
1282     assert(pbs);
1283 
1284     if (reference) {
1285         bool options_non_empty = options ? qdict_size(options) : false;
1286         QDECREF(options);
1287 
1288         if (*pbs) {
1289             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1290                        "another block device");
1291             return -EINVAL;
1292         }
1293 
1294         if (filename || options_non_empty) {
1295             error_setg(errp, "Cannot reference an existing block device with "
1296                        "additional options or a new filename");
1297             return -EINVAL;
1298         }
1299 
1300         bs = bdrv_lookup_bs(reference, reference, errp);
1301         if (!bs) {
1302             return -ENODEV;
1303         }
1304         bdrv_ref(bs);
1305         *pbs = bs;
1306         return 0;
1307     }
1308 
1309     if (*pbs) {
1310         bs = *pbs;
1311     } else {
1312         bs = bdrv_new();
1313     }
1314 
1315     /* NULL means an empty set of options */
1316     if (options == NULL) {
1317         options = qdict_new();
1318     }
1319 
1320     ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
1321     if (local_err) {
1322         goto fail;
1323     }
1324 
1325     /* Find the right image format driver */
1326     drv = NULL;
1327     drvname = qdict_get_try_str(options, "driver");
1328     if (drvname) {
1329         drv = bdrv_find_format(drvname);
1330         qdict_del(options, "driver");
1331         if (!drv) {
1332             error_setg(errp, "Unknown driver: '%s'", drvname);
1333             ret = -EINVAL;
1334             goto fail;
1335         }
1336     }
1337 
1338     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1339     if (drv && !drv->bdrv_file_open) {
1340         /* If the user explicitly wants a format driver here, we'll need to add
1341          * another layer for the protocol in bs->file */
1342         flags &= ~BDRV_O_PROTOCOL;
1343     }
1344 
1345     bs->options = options;
1346     options = qdict_clone_shallow(options);
1347 
1348     /* Open image file without format layer */
1349     if ((flags & BDRV_O_PROTOCOL) == 0) {
1350         if (flags & BDRV_O_RDWR) {
1351             flags |= BDRV_O_ALLOW_RDWR;
1352         }
1353         if (flags & BDRV_O_SNAPSHOT) {
1354             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1355             flags = bdrv_backing_flags(flags);
1356         }
1357 
1358         assert(file == NULL);
1359         ret = bdrv_open_image(&file, filename, options, "file",
1360                               bdrv_inherited_flags(flags),
1361                               true, &local_err);
1362         if (ret < 0) {
1363             goto fail;
1364         }
1365     }
1366 
1367     /* Image format probing */
1368     bs->probed = !drv;
1369     if (!drv && file) {
1370         ret = find_image_format(file, filename, &drv, &local_err);
1371         if (ret < 0) {
1372             goto fail;
1373         }
1374     } else if (!drv) {
1375         error_setg(errp, "Must specify either driver or file");
1376         ret = -EINVAL;
1377         goto fail;
1378     }
1379 
1380     /* Open the image */
1381     ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1382     if (ret < 0) {
1383         goto fail;
1384     }
1385 
1386     if (file && (bs->file != file)) {
1387         bdrv_unref(file);
1388         file = NULL;
1389     }
1390 
1391     /* If there is a backing file, use it */
1392     if ((flags & BDRV_O_NO_BACKING) == 0) {
1393         QDict *backing_options;
1394 
1395         qdict_extract_subqdict(options, &backing_options, "backing.");
1396         ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1397         if (ret < 0) {
1398             goto close_and_fail;
1399         }
1400     }
1401 
1402     bdrv_refresh_filename(bs);
1403 
1404     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1405      * temporary snapshot afterwards. */
1406     if (snapshot_flags) {
1407         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1408         if (local_err) {
1409             goto close_and_fail;
1410         }
1411     }
1412 
1413     /* Check if any unknown options were used */
1414     if (options && (qdict_size(options) != 0)) {
1415         const QDictEntry *entry = qdict_first(options);
1416         if (flags & BDRV_O_PROTOCOL) {
1417             error_setg(errp, "Block protocol '%s' doesn't support the option "
1418                        "'%s'", drv->format_name, entry->key);
1419         } else {
1420             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1421                        "support the option '%s'", drv->format_name,
1422                        bdrv_get_device_name(bs), entry->key);
1423         }
1424 
1425         ret = -EINVAL;
1426         goto close_and_fail;
1427     }
1428 
1429     if (!bdrv_key_required(bs)) {
1430         if (bs->blk) {
1431             blk_dev_change_media_cb(bs->blk, true);
1432         }
1433     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1434                && !runstate_check(RUN_STATE_INMIGRATE)
1435                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1436         error_setg(errp,
1437                    "Guest must be stopped for opening of encrypted image");
1438         ret = -EBUSY;
1439         goto close_and_fail;
1440     }
1441 
1442     QDECREF(options);
1443     *pbs = bs;
1444     return 0;
1445 
1446 fail:
1447     if (file != NULL) {
1448         bdrv_unref(file);
1449     }
1450     QDECREF(bs->options);
1451     QDECREF(options);
1452     bs->options = NULL;
1453     if (!*pbs) {
1454         /* If *pbs is NULL, a new BDS has been created in this function and
1455            needs to be freed now. Otherwise, it does not need to be closed,
1456            since it has not really been opened yet. */
1457         bdrv_unref(bs);
1458     }
1459     if (local_err) {
1460         error_propagate(errp, local_err);
1461     }
1462     return ret;
1463 
1464 close_and_fail:
1465     /* See fail path, but now the BDS has to be always closed */
1466     if (*pbs) {
1467         bdrv_close(bs);
1468     } else {
1469         bdrv_unref(bs);
1470     }
1471     QDECREF(options);
1472     if (local_err) {
1473         error_propagate(errp, local_err);
1474     }
1475     return ret;
1476 }
1477 
1478 typedef struct BlockReopenQueueEntry {
1479      bool prepared;
1480      BDRVReopenState state;
1481      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1482 } BlockReopenQueueEntry;
1483 
1484 /*
1485  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1486  * reopen of multiple devices.
1487  *
1488  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1489  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1490  * be created and initialized. This newly created BlockReopenQueue should be
1491  * passed back in for subsequent calls that are intended to be of the same
1492  * atomic 'set'.
1493  *
1494  * bs is the BlockDriverState to add to the reopen queue.
1495  *
1496  * flags contains the open flags for the associated bs
1497  *
1498  * returns a pointer to bs_queue, which is either the newly allocated
1499  * bs_queue, or the existing bs_queue being used.
1500  *
1501  */
1502 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1503                                     BlockDriverState *bs, int flags)
1504 {
1505     assert(bs != NULL);
1506 
1507     BlockReopenQueueEntry *bs_entry;
1508     if (bs_queue == NULL) {
1509         bs_queue = g_new0(BlockReopenQueue, 1);
1510         QSIMPLEQ_INIT(bs_queue);
1511     }
1512 
1513     /* bdrv_open() masks this flag out */
1514     flags &= ~BDRV_O_PROTOCOL;
1515 
1516     if (bs->file) {
1517         bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
1518     }
1519 
1520     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1521     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1522 
1523     bs_entry->state.bs = bs;
1524     bs_entry->state.flags = flags;
1525 
1526     return bs_queue;
1527 }
1528 
1529 /*
1530  * Reopen multiple BlockDriverStates atomically & transactionally.
1531  *
1532  * The queue passed in (bs_queue) must have been built up previous
1533  * via bdrv_reopen_queue().
1534  *
1535  * Reopens all BDS specified in the queue, with the appropriate
1536  * flags.  All devices are prepared for reopen, and failure of any
1537  * device will cause all device changes to be abandonded, and intermediate
1538  * data cleaned up.
1539  *
1540  * If all devices prepare successfully, then the changes are committed
1541  * to all devices.
1542  *
1543  */
1544 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1545 {
1546     int ret = -1;
1547     BlockReopenQueueEntry *bs_entry, *next;
1548     Error *local_err = NULL;
1549 
1550     assert(bs_queue != NULL);
1551 
1552     bdrv_drain_all();
1553 
1554     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1555         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1556             error_propagate(errp, local_err);
1557             goto cleanup;
1558         }
1559         bs_entry->prepared = true;
1560     }
1561 
1562     /* If we reach this point, we have success and just need to apply the
1563      * changes
1564      */
1565     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1566         bdrv_reopen_commit(&bs_entry->state);
1567     }
1568 
1569     ret = 0;
1570 
1571 cleanup:
1572     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1573         if (ret && bs_entry->prepared) {
1574             bdrv_reopen_abort(&bs_entry->state);
1575         }
1576         g_free(bs_entry);
1577     }
1578     g_free(bs_queue);
1579     return ret;
1580 }
1581 
1582 
1583 /* Reopen a single BlockDriverState with the specified flags. */
1584 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1585 {
1586     int ret = -1;
1587     Error *local_err = NULL;
1588     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1589 
1590     ret = bdrv_reopen_multiple(queue, &local_err);
1591     if (local_err != NULL) {
1592         error_propagate(errp, local_err);
1593     }
1594     return ret;
1595 }
1596 
1597 
1598 /*
1599  * Prepares a BlockDriverState for reopen. All changes are staged in the
1600  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1601  * the block driver layer .bdrv_reopen_prepare()
1602  *
1603  * bs is the BlockDriverState to reopen
1604  * flags are the new open flags
1605  * queue is the reopen queue
1606  *
1607  * Returns 0 on success, non-zero on error.  On error errp will be set
1608  * as well.
1609  *
1610  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1611  * It is the responsibility of the caller to then call the abort() or
1612  * commit() for any other BDS that have been left in a prepare() state
1613  *
1614  */
1615 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1616                         Error **errp)
1617 {
1618     int ret = -1;
1619     Error *local_err = NULL;
1620     BlockDriver *drv;
1621 
1622     assert(reopen_state != NULL);
1623     assert(reopen_state->bs->drv != NULL);
1624     drv = reopen_state->bs->drv;
1625 
1626     /* if we are to stay read-only, do not allow permission change
1627      * to r/w */
1628     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1629         reopen_state->flags & BDRV_O_RDWR) {
1630         error_setg(errp, "Node '%s' is read only",
1631                    bdrv_get_device_or_node_name(reopen_state->bs));
1632         goto error;
1633     }
1634 
1635 
1636     ret = bdrv_flush(reopen_state->bs);
1637     if (ret) {
1638         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1639                   strerror(-ret));
1640         goto error;
1641     }
1642 
1643     if (drv->bdrv_reopen_prepare) {
1644         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1645         if (ret) {
1646             if (local_err != NULL) {
1647                 error_propagate(errp, local_err);
1648             } else {
1649                 error_setg(errp, "failed while preparing to reopen image '%s'",
1650                            reopen_state->bs->filename);
1651             }
1652             goto error;
1653         }
1654     } else {
1655         /* It is currently mandatory to have a bdrv_reopen_prepare()
1656          * handler for each supported drv. */
1657         error_setg(errp, "Block format '%s' used by node '%s' "
1658                    "does not support reopening files", drv->format_name,
1659                    bdrv_get_device_or_node_name(reopen_state->bs));
1660         ret = -1;
1661         goto error;
1662     }
1663 
1664     ret = 0;
1665 
1666 error:
1667     return ret;
1668 }
1669 
1670 /*
1671  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1672  * makes them final by swapping the staging BlockDriverState contents into
1673  * the active BlockDriverState contents.
1674  */
1675 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1676 {
1677     BlockDriver *drv;
1678 
1679     assert(reopen_state != NULL);
1680     drv = reopen_state->bs->drv;
1681     assert(drv != NULL);
1682 
1683     /* If there are any driver level actions to take */
1684     if (drv->bdrv_reopen_commit) {
1685         drv->bdrv_reopen_commit(reopen_state);
1686     }
1687 
1688     /* set BDS specific flags now */
1689     reopen_state->bs->open_flags         = reopen_state->flags;
1690     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1691                                               BDRV_O_CACHE_WB);
1692     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1693 
1694     bdrv_refresh_limits(reopen_state->bs, NULL);
1695 }
1696 
1697 /*
1698  * Abort the reopen, and delete and free the staged changes in
1699  * reopen_state
1700  */
1701 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1702 {
1703     BlockDriver *drv;
1704 
1705     assert(reopen_state != NULL);
1706     drv = reopen_state->bs->drv;
1707     assert(drv != NULL);
1708 
1709     if (drv->bdrv_reopen_abort) {
1710         drv->bdrv_reopen_abort(reopen_state);
1711     }
1712 }
1713 
1714 
1715 void bdrv_close(BlockDriverState *bs)
1716 {
1717     BdrvAioNotifier *ban, *ban_next;
1718 
1719     if (bs->job) {
1720         block_job_cancel_sync(bs->job);
1721     }
1722     bdrv_drain_all(); /* complete I/O */
1723     bdrv_flush(bs);
1724     bdrv_drain_all(); /* in case flush left pending I/O */
1725     notifier_list_notify(&bs->close_notifiers, bs);
1726 
1727     if (bs->drv) {
1728         if (bs->backing_hd) {
1729             BlockDriverState *backing_hd = bs->backing_hd;
1730             bdrv_set_backing_hd(bs, NULL);
1731             bdrv_unref(backing_hd);
1732         }
1733         bs->drv->bdrv_close(bs);
1734         g_free(bs->opaque);
1735         bs->opaque = NULL;
1736         bs->drv = NULL;
1737         bs->copy_on_read = 0;
1738         bs->backing_file[0] = '\0';
1739         bs->backing_format[0] = '\0';
1740         bs->total_sectors = 0;
1741         bs->encrypted = 0;
1742         bs->valid_key = 0;
1743         bs->sg = 0;
1744         bs->zero_beyond_eof = false;
1745         QDECREF(bs->options);
1746         bs->options = NULL;
1747         QDECREF(bs->full_open_options);
1748         bs->full_open_options = NULL;
1749 
1750         if (bs->file != NULL) {
1751             bdrv_unref(bs->file);
1752             bs->file = NULL;
1753         }
1754     }
1755 
1756     if (bs->blk) {
1757         blk_dev_change_media_cb(bs->blk, false);
1758     }
1759 
1760     /*throttling disk I/O limits*/
1761     if (bs->io_limits_enabled) {
1762         bdrv_io_limits_disable(bs);
1763     }
1764 
1765     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1766         g_free(ban);
1767     }
1768     QLIST_INIT(&bs->aio_notifiers);
1769 }
1770 
1771 void bdrv_close_all(void)
1772 {
1773     BlockDriverState *bs;
1774 
1775     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1776         AioContext *aio_context = bdrv_get_aio_context(bs);
1777 
1778         aio_context_acquire(aio_context);
1779         bdrv_close(bs);
1780         aio_context_release(aio_context);
1781     }
1782 }
1783 
1784 /* make a BlockDriverState anonymous by removing from bdrv_state and
1785  * graph_bdrv_state list.
1786    Also, NULL terminate the device_name to prevent double remove */
1787 void bdrv_make_anon(BlockDriverState *bs)
1788 {
1789     /*
1790      * Take care to remove bs from bdrv_states only when it's actually
1791      * in it.  Note that bs->device_list.tqe_prev is initially null,
1792      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
1793      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1794      * resetting it to null on remove.
1795      */
1796     if (bs->device_list.tqe_prev) {
1797         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1798         bs->device_list.tqe_prev = NULL;
1799     }
1800     if (bs->node_name[0] != '\0') {
1801         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1802     }
1803     bs->node_name[0] = '\0';
1804 }
1805 
1806 static void bdrv_rebind(BlockDriverState *bs)
1807 {
1808     if (bs->drv && bs->drv->bdrv_rebind) {
1809         bs->drv->bdrv_rebind(bs);
1810     }
1811 }
1812 
1813 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1814                                      BlockDriverState *bs_src)
1815 {
1816     /* move some fields that need to stay attached to the device */
1817 
1818     /* dev info */
1819     bs_dest->guest_block_size   = bs_src->guest_block_size;
1820     bs_dest->copy_on_read       = bs_src->copy_on_read;
1821 
1822     bs_dest->enable_write_cache = bs_src->enable_write_cache;
1823 
1824     /* i/o throttled req */
1825     bs_dest->throttle_state     = bs_src->throttle_state,
1826     bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
1827     bs_dest->pending_reqs[0]    = bs_src->pending_reqs[0];
1828     bs_dest->pending_reqs[1]    = bs_src->pending_reqs[1];
1829     bs_dest->throttled_reqs[0]  = bs_src->throttled_reqs[0];
1830     bs_dest->throttled_reqs[1]  = bs_src->throttled_reqs[1];
1831     memcpy(&bs_dest->round_robin,
1832            &bs_src->round_robin,
1833            sizeof(bs_dest->round_robin));
1834     memcpy(&bs_dest->throttle_timers,
1835            &bs_src->throttle_timers,
1836            sizeof(ThrottleTimers));
1837 
1838     /* r/w error */
1839     bs_dest->on_read_error      = bs_src->on_read_error;
1840     bs_dest->on_write_error     = bs_src->on_write_error;
1841 
1842     /* i/o status */
1843     bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
1844     bs_dest->iostatus           = bs_src->iostatus;
1845 
1846     /* dirty bitmap */
1847     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
1848 
1849     /* reference count */
1850     bs_dest->refcnt             = bs_src->refcnt;
1851 
1852     /* job */
1853     bs_dest->job                = bs_src->job;
1854 
1855     /* keep the same entry in bdrv_states */
1856     bs_dest->device_list = bs_src->device_list;
1857     bs_dest->blk = bs_src->blk;
1858 
1859     memcpy(bs_dest->op_blockers, bs_src->op_blockers,
1860            sizeof(bs_dest->op_blockers));
1861 }
1862 
1863 /*
1864  * Swap bs contents for two image chains while they are live,
1865  * while keeping required fields on the BlockDriverState that is
1866  * actually attached to a device.
1867  *
1868  * This will modify the BlockDriverState fields, and swap contents
1869  * between bs_new and bs_old. Both bs_new and bs_old are modified.
1870  *
1871  * bs_new must not be attached to a BlockBackend.
1872  *
1873  * This function does not create any image files.
1874  */
1875 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1876 {
1877     BlockDriverState tmp;
1878 
1879     /* The code needs to swap the node_name but simply swapping node_list won't
1880      * work so first remove the nodes from the graph list, do the swap then
1881      * insert them back if needed.
1882      */
1883     if (bs_new->node_name[0] != '\0') {
1884         QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
1885     }
1886     if (bs_old->node_name[0] != '\0') {
1887         QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
1888     }
1889 
1890     /* bs_new must be unattached and shouldn't have anything fancy enabled */
1891     assert(!bs_new->blk);
1892     assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
1893     assert(bs_new->job == NULL);
1894     assert(bs_new->io_limits_enabled == false);
1895     assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
1896 
1897     tmp = *bs_new;
1898     *bs_new = *bs_old;
1899     *bs_old = tmp;
1900 
1901     /* there are some fields that should not be swapped, move them back */
1902     bdrv_move_feature_fields(&tmp, bs_old);
1903     bdrv_move_feature_fields(bs_old, bs_new);
1904     bdrv_move_feature_fields(bs_new, &tmp);
1905 
1906     /* bs_new must remain unattached */
1907     assert(!bs_new->blk);
1908 
1909     /* Check a few fields that should remain attached to the device */
1910     assert(bs_new->job == NULL);
1911     assert(bs_new->io_limits_enabled == false);
1912     assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
1913 
1914     /* insert the nodes back into the graph node list if needed */
1915     if (bs_new->node_name[0] != '\0') {
1916         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
1917     }
1918     if (bs_old->node_name[0] != '\0') {
1919         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
1920     }
1921 
1922     bdrv_rebind(bs_new);
1923     bdrv_rebind(bs_old);
1924 }
1925 
1926 /*
1927  * Add new bs contents at the top of an image chain while the chain is
1928  * live, while keeping required fields on the top layer.
1929  *
1930  * This will modify the BlockDriverState fields, and swap contents
1931  * between bs_new and bs_top. Both bs_new and bs_top are modified.
1932  *
1933  * bs_new must not be attached to a BlockBackend.
1934  *
1935  * This function does not create any image files.
1936  */
1937 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1938 {
1939     bdrv_swap(bs_new, bs_top);
1940 
1941     /* The contents of 'tmp' will become bs_top, as we are
1942      * swapping bs_new and bs_top contents. */
1943     bdrv_set_backing_hd(bs_top, bs_new);
1944 }
1945 
1946 static void bdrv_delete(BlockDriverState *bs)
1947 {
1948     assert(!bs->job);
1949     assert(bdrv_op_blocker_is_empty(bs));
1950     assert(!bs->refcnt);
1951     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
1952 
1953     bdrv_close(bs);
1954 
1955     /* remove from list, if necessary */
1956     bdrv_make_anon(bs);
1957 
1958     g_free(bs);
1959 }
1960 
1961 /*
1962  * Run consistency checks on an image
1963  *
1964  * Returns 0 if the check could be completed (it doesn't mean that the image is
1965  * free of errors) or -errno when an internal error occurred. The results of the
1966  * check are stored in res.
1967  */
1968 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
1969 {
1970     if (bs->drv == NULL) {
1971         return -ENOMEDIUM;
1972     }
1973     if (bs->drv->bdrv_check == NULL) {
1974         return -ENOTSUP;
1975     }
1976 
1977     memset(res, 0, sizeof(*res));
1978     return bs->drv->bdrv_check(bs, res, fix);
1979 }
1980 
1981 #define COMMIT_BUF_SECTORS 2048
1982 
1983 /* commit COW file into the raw image */
1984 int bdrv_commit(BlockDriverState *bs)
1985 {
1986     BlockDriver *drv = bs->drv;
1987     int64_t sector, total_sectors, length, backing_length;
1988     int n, ro, open_flags;
1989     int ret = 0;
1990     uint8_t *buf = NULL;
1991 
1992     if (!drv)
1993         return -ENOMEDIUM;
1994 
1995     if (!bs->backing_hd) {
1996         return -ENOTSUP;
1997     }
1998 
1999     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2000         bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2001         return -EBUSY;
2002     }
2003 
2004     ro = bs->backing_hd->read_only;
2005     open_flags =  bs->backing_hd->open_flags;
2006 
2007     if (ro) {
2008         if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2009             return -EACCES;
2010         }
2011     }
2012 
2013     length = bdrv_getlength(bs);
2014     if (length < 0) {
2015         ret = length;
2016         goto ro_cleanup;
2017     }
2018 
2019     backing_length = bdrv_getlength(bs->backing_hd);
2020     if (backing_length < 0) {
2021         ret = backing_length;
2022         goto ro_cleanup;
2023     }
2024 
2025     /* If our top snapshot is larger than the backing file image,
2026      * grow the backing file image if possible.  If not possible,
2027      * we must return an error */
2028     if (length > backing_length) {
2029         ret = bdrv_truncate(bs->backing_hd, length);
2030         if (ret < 0) {
2031             goto ro_cleanup;
2032         }
2033     }
2034 
2035     total_sectors = length >> BDRV_SECTOR_BITS;
2036 
2037     /* qemu_try_blockalign() for bs will choose an alignment that works for
2038      * bs->backing_hd as well, so no need to compare the alignment manually. */
2039     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2040     if (buf == NULL) {
2041         ret = -ENOMEM;
2042         goto ro_cleanup;
2043     }
2044 
2045     for (sector = 0; sector < total_sectors; sector += n) {
2046         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2047         if (ret < 0) {
2048             goto ro_cleanup;
2049         }
2050         if (ret) {
2051             ret = bdrv_read(bs, sector, buf, n);
2052             if (ret < 0) {
2053                 goto ro_cleanup;
2054             }
2055 
2056             ret = bdrv_write(bs->backing_hd, sector, buf, n);
2057             if (ret < 0) {
2058                 goto ro_cleanup;
2059             }
2060         }
2061     }
2062 
2063     if (drv->bdrv_make_empty) {
2064         ret = drv->bdrv_make_empty(bs);
2065         if (ret < 0) {
2066             goto ro_cleanup;
2067         }
2068         bdrv_flush(bs);
2069     }
2070 
2071     /*
2072      * Make sure all data we wrote to the backing device is actually
2073      * stable on disk.
2074      */
2075     if (bs->backing_hd) {
2076         bdrv_flush(bs->backing_hd);
2077     }
2078 
2079     ret = 0;
2080 ro_cleanup:
2081     qemu_vfree(buf);
2082 
2083     if (ro) {
2084         /* ignoring error return here */
2085         bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2086     }
2087 
2088     return ret;
2089 }
2090 
2091 int bdrv_commit_all(void)
2092 {
2093     BlockDriverState *bs;
2094 
2095     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2096         AioContext *aio_context = bdrv_get_aio_context(bs);
2097 
2098         aio_context_acquire(aio_context);
2099         if (bs->drv && bs->backing_hd) {
2100             int ret = bdrv_commit(bs);
2101             if (ret < 0) {
2102                 aio_context_release(aio_context);
2103                 return ret;
2104             }
2105         }
2106         aio_context_release(aio_context);
2107     }
2108     return 0;
2109 }
2110 
2111 /*
2112  * Return values:
2113  * 0        - success
2114  * -EINVAL  - backing format specified, but no file
2115  * -ENOSPC  - can't update the backing file because no space is left in the
2116  *            image file header
2117  * -ENOTSUP - format driver doesn't support changing the backing file
2118  */
2119 int bdrv_change_backing_file(BlockDriverState *bs,
2120     const char *backing_file, const char *backing_fmt)
2121 {
2122     BlockDriver *drv = bs->drv;
2123     int ret;
2124 
2125     /* Backing file format doesn't make sense without a backing file */
2126     if (backing_fmt && !backing_file) {
2127         return -EINVAL;
2128     }
2129 
2130     if (drv->bdrv_change_backing_file != NULL) {
2131         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2132     } else {
2133         ret = -ENOTSUP;
2134     }
2135 
2136     if (ret == 0) {
2137         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2138         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2139     }
2140     return ret;
2141 }
2142 
2143 /*
2144  * Finds the image layer in the chain that has 'bs' as its backing file.
2145  *
2146  * active is the current topmost image.
2147  *
2148  * Returns NULL if bs is not found in active's image chain,
2149  * or if active == bs.
2150  *
2151  * Returns the bottommost base image if bs == NULL.
2152  */
2153 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2154                                     BlockDriverState *bs)
2155 {
2156     while (active && bs != active->backing_hd) {
2157         active = active->backing_hd;
2158     }
2159 
2160     return active;
2161 }
2162 
2163 /* Given a BDS, searches for the base layer. */
2164 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2165 {
2166     return bdrv_find_overlay(bs, NULL);
2167 }
2168 
2169 typedef struct BlkIntermediateStates {
2170     BlockDriverState *bs;
2171     QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2172 } BlkIntermediateStates;
2173 
2174 
2175 /*
2176  * Drops images above 'base' up to and including 'top', and sets the image
2177  * above 'top' to have base as its backing file.
2178  *
2179  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2180  * information in 'bs' can be properly updated.
2181  *
2182  * E.g., this will convert the following chain:
2183  * bottom <- base <- intermediate <- top <- active
2184  *
2185  * to
2186  *
2187  * bottom <- base <- active
2188  *
2189  * It is allowed for bottom==base, in which case it converts:
2190  *
2191  * base <- intermediate <- top <- active
2192  *
2193  * to
2194  *
2195  * base <- active
2196  *
2197  * If backing_file_str is non-NULL, it will be used when modifying top's
2198  * overlay image metadata.
2199  *
2200  * Error conditions:
2201  *  if active == top, that is considered an error
2202  *
2203  */
2204 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2205                            BlockDriverState *base, const char *backing_file_str)
2206 {
2207     BlockDriverState *intermediate;
2208     BlockDriverState *base_bs = NULL;
2209     BlockDriverState *new_top_bs = NULL;
2210     BlkIntermediateStates *intermediate_state, *next;
2211     int ret = -EIO;
2212 
2213     QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2214     QSIMPLEQ_INIT(&states_to_delete);
2215 
2216     if (!top->drv || !base->drv) {
2217         goto exit;
2218     }
2219 
2220     new_top_bs = bdrv_find_overlay(active, top);
2221 
2222     if (new_top_bs == NULL) {
2223         /* we could not find the image above 'top', this is an error */
2224         goto exit;
2225     }
2226 
2227     /* special case of new_top_bs->backing_hd already pointing to base - nothing
2228      * to do, no intermediate images */
2229     if (new_top_bs->backing_hd == base) {
2230         ret = 0;
2231         goto exit;
2232     }
2233 
2234     intermediate = top;
2235 
2236     /* now we will go down through the list, and add each BDS we find
2237      * into our deletion queue, until we hit the 'base'
2238      */
2239     while (intermediate) {
2240         intermediate_state = g_new0(BlkIntermediateStates, 1);
2241         intermediate_state->bs = intermediate;
2242         QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2243 
2244         if (intermediate->backing_hd == base) {
2245             base_bs = intermediate->backing_hd;
2246             break;
2247         }
2248         intermediate = intermediate->backing_hd;
2249     }
2250     if (base_bs == NULL) {
2251         /* something went wrong, we did not end at the base. safely
2252          * unravel everything, and exit with error */
2253         goto exit;
2254     }
2255 
2256     /* success - we can delete the intermediate states, and link top->base */
2257     backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2258     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2259                                    base_bs->drv ? base_bs->drv->format_name : "");
2260     if (ret) {
2261         goto exit;
2262     }
2263     bdrv_set_backing_hd(new_top_bs, base_bs);
2264 
2265     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2266         /* so that bdrv_close() does not recursively close the chain */
2267         bdrv_set_backing_hd(intermediate_state->bs, NULL);
2268         bdrv_unref(intermediate_state->bs);
2269     }
2270     ret = 0;
2271 
2272 exit:
2273     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2274         g_free(intermediate_state);
2275     }
2276     return ret;
2277 }
2278 
2279 /**
2280  * Truncate file to 'offset' bytes (needed only for file protocols)
2281  */
2282 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2283 {
2284     BlockDriver *drv = bs->drv;
2285     int ret;
2286     if (!drv)
2287         return -ENOMEDIUM;
2288     if (!drv->bdrv_truncate)
2289         return -ENOTSUP;
2290     if (bs->read_only)
2291         return -EACCES;
2292 
2293     ret = drv->bdrv_truncate(bs, offset);
2294     if (ret == 0) {
2295         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2296         bdrv_dirty_bitmap_truncate(bs);
2297         if (bs->blk) {
2298             blk_dev_resize_cb(bs->blk);
2299         }
2300     }
2301     return ret;
2302 }
2303 
2304 /**
2305  * Length of a allocated file in bytes. Sparse files are counted by actual
2306  * allocated space. Return < 0 if error or unknown.
2307  */
2308 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2309 {
2310     BlockDriver *drv = bs->drv;
2311     if (!drv) {
2312         return -ENOMEDIUM;
2313     }
2314     if (drv->bdrv_get_allocated_file_size) {
2315         return drv->bdrv_get_allocated_file_size(bs);
2316     }
2317     if (bs->file) {
2318         return bdrv_get_allocated_file_size(bs->file);
2319     }
2320     return -ENOTSUP;
2321 }
2322 
2323 /**
2324  * Return number of sectors on success, -errno on error.
2325  */
2326 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2327 {
2328     BlockDriver *drv = bs->drv;
2329 
2330     if (!drv)
2331         return -ENOMEDIUM;
2332 
2333     if (drv->has_variable_length) {
2334         int ret = refresh_total_sectors(bs, bs->total_sectors);
2335         if (ret < 0) {
2336             return ret;
2337         }
2338     }
2339     return bs->total_sectors;
2340 }
2341 
2342 /**
2343  * Return length in bytes on success, -errno on error.
2344  * The length is always a multiple of BDRV_SECTOR_SIZE.
2345  */
2346 int64_t bdrv_getlength(BlockDriverState *bs)
2347 {
2348     int64_t ret = bdrv_nb_sectors(bs);
2349 
2350     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2351     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2352 }
2353 
2354 /* return 0 as number of sectors if no device present or error */
2355 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2356 {
2357     int64_t nb_sectors = bdrv_nb_sectors(bs);
2358 
2359     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2360 }
2361 
2362 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2363                        BlockdevOnError on_write_error)
2364 {
2365     bs->on_read_error = on_read_error;
2366     bs->on_write_error = on_write_error;
2367 }
2368 
2369 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2370 {
2371     return is_read ? bs->on_read_error : bs->on_write_error;
2372 }
2373 
2374 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2375 {
2376     BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2377 
2378     switch (on_err) {
2379     case BLOCKDEV_ON_ERROR_ENOSPC:
2380         return (error == ENOSPC) ?
2381                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2382     case BLOCKDEV_ON_ERROR_STOP:
2383         return BLOCK_ERROR_ACTION_STOP;
2384     case BLOCKDEV_ON_ERROR_REPORT:
2385         return BLOCK_ERROR_ACTION_REPORT;
2386     case BLOCKDEV_ON_ERROR_IGNORE:
2387         return BLOCK_ERROR_ACTION_IGNORE;
2388     default:
2389         abort();
2390     }
2391 }
2392 
2393 static void send_qmp_error_event(BlockDriverState *bs,
2394                                  BlockErrorAction action,
2395                                  bool is_read, int error)
2396 {
2397     IoOperationType optype;
2398 
2399     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2400     qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2401                                    bdrv_iostatus_is_enabled(bs),
2402                                    error == ENOSPC, strerror(error),
2403                                    &error_abort);
2404 }
2405 
2406 /* This is done by device models because, while the block layer knows
2407  * about the error, it does not know whether an operation comes from
2408  * the device or the block layer (from a job, for example).
2409  */
2410 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2411                        bool is_read, int error)
2412 {
2413     assert(error >= 0);
2414 
2415     if (action == BLOCK_ERROR_ACTION_STOP) {
2416         /* First set the iostatus, so that "info block" returns an iostatus
2417          * that matches the events raised so far (an additional error iostatus
2418          * is fine, but not a lost one).
2419          */
2420         bdrv_iostatus_set_err(bs, error);
2421 
2422         /* Then raise the request to stop the VM and the event.
2423          * qemu_system_vmstop_request_prepare has two effects.  First,
2424          * it ensures that the STOP event always comes after the
2425          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
2426          * can observe the STOP event and do a "cont" before the STOP
2427          * event is issued, the VM will not stop.  In this case, vm_start()
2428          * also ensures that the STOP/RESUME pair of events is emitted.
2429          */
2430         qemu_system_vmstop_request_prepare();
2431         send_qmp_error_event(bs, action, is_read, error);
2432         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2433     } else {
2434         send_qmp_error_event(bs, action, is_read, error);
2435     }
2436 }
2437 
2438 int bdrv_is_read_only(BlockDriverState *bs)
2439 {
2440     return bs->read_only;
2441 }
2442 
2443 int bdrv_is_sg(BlockDriverState *bs)
2444 {
2445     return bs->sg;
2446 }
2447 
2448 int bdrv_enable_write_cache(BlockDriverState *bs)
2449 {
2450     return bs->enable_write_cache;
2451 }
2452 
2453 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2454 {
2455     bs->enable_write_cache = wce;
2456 
2457     /* so a reopen() will preserve wce */
2458     if (wce) {
2459         bs->open_flags |= BDRV_O_CACHE_WB;
2460     } else {
2461         bs->open_flags &= ~BDRV_O_CACHE_WB;
2462     }
2463 }
2464 
2465 int bdrv_is_encrypted(BlockDriverState *bs)
2466 {
2467     if (bs->backing_hd && bs->backing_hd->encrypted)
2468         return 1;
2469     return bs->encrypted;
2470 }
2471 
2472 int bdrv_key_required(BlockDriverState *bs)
2473 {
2474     BlockDriverState *backing_hd = bs->backing_hd;
2475 
2476     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2477         return 1;
2478     return (bs->encrypted && !bs->valid_key);
2479 }
2480 
2481 int bdrv_set_key(BlockDriverState *bs, const char *key)
2482 {
2483     int ret;
2484     if (bs->backing_hd && bs->backing_hd->encrypted) {
2485         ret = bdrv_set_key(bs->backing_hd, key);
2486         if (ret < 0)
2487             return ret;
2488         if (!bs->encrypted)
2489             return 0;
2490     }
2491     if (!bs->encrypted) {
2492         return -EINVAL;
2493     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2494         return -ENOMEDIUM;
2495     }
2496     ret = bs->drv->bdrv_set_key(bs, key);
2497     if (ret < 0) {
2498         bs->valid_key = 0;
2499     } else if (!bs->valid_key) {
2500         bs->valid_key = 1;
2501         if (bs->blk) {
2502             /* call the change callback now, we skipped it on open */
2503             blk_dev_change_media_cb(bs->blk, true);
2504         }
2505     }
2506     return ret;
2507 }
2508 
2509 /*
2510  * Provide an encryption key for @bs.
2511  * If @key is non-null:
2512  *     If @bs is not encrypted, fail.
2513  *     Else if the key is invalid, fail.
2514  *     Else set @bs's key to @key, replacing the existing key, if any.
2515  * If @key is null:
2516  *     If @bs is encrypted and still lacks a key, fail.
2517  *     Else do nothing.
2518  * On failure, store an error object through @errp if non-null.
2519  */
2520 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2521 {
2522     if (key) {
2523         if (!bdrv_is_encrypted(bs)) {
2524             error_setg(errp, "Node '%s' is not encrypted",
2525                       bdrv_get_device_or_node_name(bs));
2526         } else if (bdrv_set_key(bs, key) < 0) {
2527             error_set(errp, QERR_INVALID_PASSWORD);
2528         }
2529     } else {
2530         if (bdrv_key_required(bs)) {
2531             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2532                       "'%s' (%s) is encrypted",
2533                       bdrv_get_device_or_node_name(bs),
2534                       bdrv_get_encrypted_filename(bs));
2535         }
2536     }
2537 }
2538 
2539 const char *bdrv_get_format_name(BlockDriverState *bs)
2540 {
2541     return bs->drv ? bs->drv->format_name : NULL;
2542 }
2543 
2544 static int qsort_strcmp(const void *a, const void *b)
2545 {
2546     return strcmp(a, b);
2547 }
2548 
2549 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2550                          void *opaque)
2551 {
2552     BlockDriver *drv;
2553     int count = 0;
2554     int i;
2555     const char **formats = NULL;
2556 
2557     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2558         if (drv->format_name) {
2559             bool found = false;
2560             int i = count;
2561             while (formats && i && !found) {
2562                 found = !strcmp(formats[--i], drv->format_name);
2563             }
2564 
2565             if (!found) {
2566                 formats = g_renew(const char *, formats, count + 1);
2567                 formats[count++] = drv->format_name;
2568             }
2569         }
2570     }
2571 
2572     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2573 
2574     for (i = 0; i < count; i++) {
2575         it(opaque, formats[i]);
2576     }
2577 
2578     g_free(formats);
2579 }
2580 
2581 /* This function is to find a node in the bs graph */
2582 BlockDriverState *bdrv_find_node(const char *node_name)
2583 {
2584     BlockDriverState *bs;
2585 
2586     assert(node_name);
2587 
2588     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2589         if (!strcmp(node_name, bs->node_name)) {
2590             return bs;
2591         }
2592     }
2593     return NULL;
2594 }
2595 
2596 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2597 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2598 {
2599     BlockDeviceInfoList *list, *entry;
2600     BlockDriverState *bs;
2601 
2602     list = NULL;
2603     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2604         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2605         if (!info) {
2606             qapi_free_BlockDeviceInfoList(list);
2607             return NULL;
2608         }
2609         entry = g_malloc0(sizeof(*entry));
2610         entry->value = info;
2611         entry->next = list;
2612         list = entry;
2613     }
2614 
2615     return list;
2616 }
2617 
2618 BlockDriverState *bdrv_lookup_bs(const char *device,
2619                                  const char *node_name,
2620                                  Error **errp)
2621 {
2622     BlockBackend *blk;
2623     BlockDriverState *bs;
2624 
2625     if (device) {
2626         blk = blk_by_name(device);
2627 
2628         if (blk) {
2629             return blk_bs(blk);
2630         }
2631     }
2632 
2633     if (node_name) {
2634         bs = bdrv_find_node(node_name);
2635 
2636         if (bs) {
2637             return bs;
2638         }
2639     }
2640 
2641     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2642                      device ? device : "",
2643                      node_name ? node_name : "");
2644     return NULL;
2645 }
2646 
2647 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2648  * return false.  If either argument is NULL, return false. */
2649 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2650 {
2651     while (top && top != base) {
2652         top = top->backing_hd;
2653     }
2654 
2655     return top != NULL;
2656 }
2657 
2658 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2659 {
2660     if (!bs) {
2661         return QTAILQ_FIRST(&graph_bdrv_states);
2662     }
2663     return QTAILQ_NEXT(bs, node_list);
2664 }
2665 
2666 BlockDriverState *bdrv_next(BlockDriverState *bs)
2667 {
2668     if (!bs) {
2669         return QTAILQ_FIRST(&bdrv_states);
2670     }
2671     return QTAILQ_NEXT(bs, device_list);
2672 }
2673 
2674 const char *bdrv_get_node_name(const BlockDriverState *bs)
2675 {
2676     return bs->node_name;
2677 }
2678 
2679 /* TODO check what callers really want: bs->node_name or blk_name() */
2680 const char *bdrv_get_device_name(const BlockDriverState *bs)
2681 {
2682     return bs->blk ? blk_name(bs->blk) : "";
2683 }
2684 
2685 /* This can be used to identify nodes that might not have a device
2686  * name associated. Since node and device names live in the same
2687  * namespace, the result is unambiguous. The exception is if both are
2688  * absent, then this returns an empty (non-null) string. */
2689 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2690 {
2691     return bs->blk ? blk_name(bs->blk) : bs->node_name;
2692 }
2693 
2694 int bdrv_get_flags(BlockDriverState *bs)
2695 {
2696     return bs->open_flags;
2697 }
2698 
2699 int bdrv_has_zero_init_1(BlockDriverState *bs)
2700 {
2701     return 1;
2702 }
2703 
2704 int bdrv_has_zero_init(BlockDriverState *bs)
2705 {
2706     assert(bs->drv);
2707 
2708     /* If BS is a copy on write image, it is initialized to
2709        the contents of the base image, which may not be zeroes.  */
2710     if (bs->backing_hd) {
2711         return 0;
2712     }
2713     if (bs->drv->bdrv_has_zero_init) {
2714         return bs->drv->bdrv_has_zero_init(bs);
2715     }
2716 
2717     /* safe default */
2718     return 0;
2719 }
2720 
2721 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2722 {
2723     BlockDriverInfo bdi;
2724 
2725     if (bs->backing_hd) {
2726         return false;
2727     }
2728 
2729     if (bdrv_get_info(bs, &bdi) == 0) {
2730         return bdi.unallocated_blocks_are_zero;
2731     }
2732 
2733     return false;
2734 }
2735 
2736 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2737 {
2738     BlockDriverInfo bdi;
2739 
2740     if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
2741         return false;
2742     }
2743 
2744     if (bdrv_get_info(bs, &bdi) == 0) {
2745         return bdi.can_write_zeroes_with_unmap;
2746     }
2747 
2748     return false;
2749 }
2750 
2751 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2752 {
2753     if (bs->backing_hd && bs->backing_hd->encrypted)
2754         return bs->backing_file;
2755     else if (bs->encrypted)
2756         return bs->filename;
2757     else
2758         return NULL;
2759 }
2760 
2761 void bdrv_get_backing_filename(BlockDriverState *bs,
2762                                char *filename, int filename_size)
2763 {
2764     pstrcpy(filename, filename_size, bs->backing_file);
2765 }
2766 
2767 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2768 {
2769     BlockDriver *drv = bs->drv;
2770     if (!drv)
2771         return -ENOMEDIUM;
2772     if (!drv->bdrv_get_info)
2773         return -ENOTSUP;
2774     memset(bdi, 0, sizeof(*bdi));
2775     return drv->bdrv_get_info(bs, bdi);
2776 }
2777 
2778 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2779 {
2780     BlockDriver *drv = bs->drv;
2781     if (drv && drv->bdrv_get_specific_info) {
2782         return drv->bdrv_get_specific_info(bs);
2783     }
2784     return NULL;
2785 }
2786 
2787 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2788 {
2789     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2790         return;
2791     }
2792 
2793     bs->drv->bdrv_debug_event(bs, event);
2794 }
2795 
2796 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2797                           const char *tag)
2798 {
2799     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2800         bs = bs->file;
2801     }
2802 
2803     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2804         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2805     }
2806 
2807     return -ENOTSUP;
2808 }
2809 
2810 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2811 {
2812     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2813         bs = bs->file;
2814     }
2815 
2816     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2817         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2818     }
2819 
2820     return -ENOTSUP;
2821 }
2822 
2823 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2824 {
2825     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2826         bs = bs->file;
2827     }
2828 
2829     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2830         return bs->drv->bdrv_debug_resume(bs, tag);
2831     }
2832 
2833     return -ENOTSUP;
2834 }
2835 
2836 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2837 {
2838     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2839         bs = bs->file;
2840     }
2841 
2842     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2843         return bs->drv->bdrv_debug_is_suspended(bs, tag);
2844     }
2845 
2846     return false;
2847 }
2848 
2849 int bdrv_is_snapshot(BlockDriverState *bs)
2850 {
2851     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2852 }
2853 
2854 /* backing_file can either be relative, or absolute, or a protocol.  If it is
2855  * relative, it must be relative to the chain.  So, passing in bs->filename
2856  * from a BDS as backing_file should not be done, as that may be relative to
2857  * the CWD rather than the chain. */
2858 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2859         const char *backing_file)
2860 {
2861     char *filename_full = NULL;
2862     char *backing_file_full = NULL;
2863     char *filename_tmp = NULL;
2864     int is_protocol = 0;
2865     BlockDriverState *curr_bs = NULL;
2866     BlockDriverState *retval = NULL;
2867 
2868     if (!bs || !bs->drv || !backing_file) {
2869         return NULL;
2870     }
2871 
2872     filename_full     = g_malloc(PATH_MAX);
2873     backing_file_full = g_malloc(PATH_MAX);
2874     filename_tmp      = g_malloc(PATH_MAX);
2875 
2876     is_protocol = path_has_protocol(backing_file);
2877 
2878     for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
2879 
2880         /* If either of the filename paths is actually a protocol, then
2881          * compare unmodified paths; otherwise make paths relative */
2882         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
2883             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
2884                 retval = curr_bs->backing_hd;
2885                 break;
2886             }
2887         } else {
2888             /* If not an absolute filename path, make it relative to the current
2889              * image's filename path */
2890             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2891                          backing_file);
2892 
2893             /* We are going to compare absolute pathnames */
2894             if (!realpath(filename_tmp, filename_full)) {
2895                 continue;
2896             }
2897 
2898             /* We need to make sure the backing filename we are comparing against
2899              * is relative to the current image filename (or absolute) */
2900             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2901                          curr_bs->backing_file);
2902 
2903             if (!realpath(filename_tmp, backing_file_full)) {
2904                 continue;
2905             }
2906 
2907             if (strcmp(backing_file_full, filename_full) == 0) {
2908                 retval = curr_bs->backing_hd;
2909                 break;
2910             }
2911         }
2912     }
2913 
2914     g_free(filename_full);
2915     g_free(backing_file_full);
2916     g_free(filename_tmp);
2917     return retval;
2918 }
2919 
2920 int bdrv_get_backing_file_depth(BlockDriverState *bs)
2921 {
2922     if (!bs->drv) {
2923         return 0;
2924     }
2925 
2926     if (!bs->backing_hd) {
2927         return 0;
2928     }
2929 
2930     return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
2931 }
2932 
2933 void bdrv_init(void)
2934 {
2935     module_call_init(MODULE_INIT_BLOCK);
2936 }
2937 
2938 void bdrv_init_with_whitelist(void)
2939 {
2940     use_bdrv_whitelist = 1;
2941     bdrv_init();
2942 }
2943 
2944 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
2945 {
2946     Error *local_err = NULL;
2947     int ret;
2948 
2949     if (!bs->drv)  {
2950         return;
2951     }
2952 
2953     if (!(bs->open_flags & BDRV_O_INCOMING)) {
2954         return;
2955     }
2956     bs->open_flags &= ~BDRV_O_INCOMING;
2957 
2958     if (bs->drv->bdrv_invalidate_cache) {
2959         bs->drv->bdrv_invalidate_cache(bs, &local_err);
2960     } else if (bs->file) {
2961         bdrv_invalidate_cache(bs->file, &local_err);
2962     }
2963     if (local_err) {
2964         error_propagate(errp, local_err);
2965         return;
2966     }
2967 
2968     ret = refresh_total_sectors(bs, bs->total_sectors);
2969     if (ret < 0) {
2970         error_setg_errno(errp, -ret, "Could not refresh total sector count");
2971         return;
2972     }
2973 }
2974 
2975 void bdrv_invalidate_cache_all(Error **errp)
2976 {
2977     BlockDriverState *bs;
2978     Error *local_err = NULL;
2979 
2980     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2981         AioContext *aio_context = bdrv_get_aio_context(bs);
2982 
2983         aio_context_acquire(aio_context);
2984         bdrv_invalidate_cache(bs, &local_err);
2985         aio_context_release(aio_context);
2986         if (local_err) {
2987             error_propagate(errp, local_err);
2988             return;
2989         }
2990     }
2991 }
2992 
2993 /**************************************************************/
2994 /* removable device support */
2995 
2996 /**
2997  * Return TRUE if the media is present
2998  */
2999 int bdrv_is_inserted(BlockDriverState *bs)
3000 {
3001     BlockDriver *drv = bs->drv;
3002 
3003     if (!drv)
3004         return 0;
3005     if (!drv->bdrv_is_inserted)
3006         return 1;
3007     return drv->bdrv_is_inserted(bs);
3008 }
3009 
3010 /**
3011  * Return whether the media changed since the last call to this
3012  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3013  */
3014 int bdrv_media_changed(BlockDriverState *bs)
3015 {
3016     BlockDriver *drv = bs->drv;
3017 
3018     if (drv && drv->bdrv_media_changed) {
3019         return drv->bdrv_media_changed(bs);
3020     }
3021     return -ENOTSUP;
3022 }
3023 
3024 /**
3025  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3026  */
3027 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3028 {
3029     BlockDriver *drv = bs->drv;
3030     const char *device_name;
3031 
3032     if (drv && drv->bdrv_eject) {
3033         drv->bdrv_eject(bs, eject_flag);
3034     }
3035 
3036     device_name = bdrv_get_device_name(bs);
3037     if (device_name[0] != '\0') {
3038         qapi_event_send_device_tray_moved(device_name,
3039                                           eject_flag, &error_abort);
3040     }
3041 }
3042 
3043 /**
3044  * Lock or unlock the media (if it is locked, the user won't be able
3045  * to eject it manually).
3046  */
3047 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3048 {
3049     BlockDriver *drv = bs->drv;
3050 
3051     trace_bdrv_lock_medium(bs, locked);
3052 
3053     if (drv && drv->bdrv_lock_medium) {
3054         drv->bdrv_lock_medium(bs, locked);
3055     }
3056 }
3057 
3058 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3059 {
3060     bs->guest_block_size = align;
3061 }
3062 
3063 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3064 {
3065     BdrvDirtyBitmap *bm;
3066 
3067     assert(name);
3068     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3069         if (bm->name && !strcmp(name, bm->name)) {
3070             return bm;
3071         }
3072     }
3073     return NULL;
3074 }
3075 
3076 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3077 {
3078     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3079     g_free(bitmap->name);
3080     bitmap->name = NULL;
3081 }
3082 
3083 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3084                                           uint32_t granularity,
3085                                           const char *name,
3086                                           Error **errp)
3087 {
3088     int64_t bitmap_size;
3089     BdrvDirtyBitmap *bitmap;
3090     uint32_t sector_granularity;
3091 
3092     assert((granularity & (granularity - 1)) == 0);
3093 
3094     if (name && bdrv_find_dirty_bitmap(bs, name)) {
3095         error_setg(errp, "Bitmap already exists: %s", name);
3096         return NULL;
3097     }
3098     sector_granularity = granularity >> BDRV_SECTOR_BITS;
3099     assert(sector_granularity);
3100     bitmap_size = bdrv_nb_sectors(bs);
3101     if (bitmap_size < 0) {
3102         error_setg_errno(errp, -bitmap_size, "could not get length of device");
3103         errno = -bitmap_size;
3104         return NULL;
3105     }
3106     bitmap = g_new0(BdrvDirtyBitmap, 1);
3107     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3108     bitmap->size = bitmap_size;
3109     bitmap->name = g_strdup(name);
3110     bitmap->disabled = false;
3111     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3112     return bitmap;
3113 }
3114 
3115 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3116 {
3117     return bitmap->successor;
3118 }
3119 
3120 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3121 {
3122     return !(bitmap->disabled || bitmap->successor);
3123 }
3124 
3125 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3126 {
3127     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3128         return DIRTY_BITMAP_STATUS_FROZEN;
3129     } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3130         return DIRTY_BITMAP_STATUS_DISABLED;
3131     } else {
3132         return DIRTY_BITMAP_STATUS_ACTIVE;
3133     }
3134 }
3135 
3136 /**
3137  * Create a successor bitmap destined to replace this bitmap after an operation.
3138  * Requires that the bitmap is not frozen and has no successor.
3139  */
3140 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3141                                        BdrvDirtyBitmap *bitmap, Error **errp)
3142 {
3143     uint64_t granularity;
3144     BdrvDirtyBitmap *child;
3145 
3146     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3147         error_setg(errp, "Cannot create a successor for a bitmap that is "
3148                    "currently frozen");
3149         return -1;
3150     }
3151     assert(!bitmap->successor);
3152 
3153     /* Create an anonymous successor */
3154     granularity = bdrv_dirty_bitmap_granularity(bitmap);
3155     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3156     if (!child) {
3157         return -1;
3158     }
3159 
3160     /* Successor will be on or off based on our current state. */
3161     child->disabled = bitmap->disabled;
3162 
3163     /* Install the successor and freeze the parent */
3164     bitmap->successor = child;
3165     return 0;
3166 }
3167 
3168 /**
3169  * For a bitmap with a successor, yield our name to the successor,
3170  * delete the old bitmap, and return a handle to the new bitmap.
3171  */
3172 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3173                                             BdrvDirtyBitmap *bitmap,
3174                                             Error **errp)
3175 {
3176     char *name;
3177     BdrvDirtyBitmap *successor = bitmap->successor;
3178 
3179     if (successor == NULL) {
3180         error_setg(errp, "Cannot relinquish control if "
3181                    "there's no successor present");
3182         return NULL;
3183     }
3184 
3185     name = bitmap->name;
3186     bitmap->name = NULL;
3187     successor->name = name;
3188     bitmap->successor = NULL;
3189     bdrv_release_dirty_bitmap(bs, bitmap);
3190 
3191     return successor;
3192 }
3193 
3194 /**
3195  * In cases of failure where we can no longer safely delete the parent,
3196  * we may wish to re-join the parent and child/successor.
3197  * The merged parent will be un-frozen, but not explicitly re-enabled.
3198  */
3199 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3200                                            BdrvDirtyBitmap *parent,
3201                                            Error **errp)
3202 {
3203     BdrvDirtyBitmap *successor = parent->successor;
3204 
3205     if (!successor) {
3206         error_setg(errp, "Cannot reclaim a successor when none is present");
3207         return NULL;
3208     }
3209 
3210     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3211         error_setg(errp, "Merging of parent and successor bitmap failed");
3212         return NULL;
3213     }
3214     bdrv_release_dirty_bitmap(bs, successor);
3215     parent->successor = NULL;
3216 
3217     return parent;
3218 }
3219 
3220 /**
3221  * Truncates _all_ bitmaps attached to a BDS.
3222  */
3223 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3224 {
3225     BdrvDirtyBitmap *bitmap;
3226     uint64_t size = bdrv_nb_sectors(bs);
3227 
3228     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3229         if (bdrv_dirty_bitmap_frozen(bitmap)) {
3230             continue;
3231         }
3232         hbitmap_truncate(bitmap->bitmap, size);
3233     }
3234 }
3235 
3236 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3237 {
3238     BdrvDirtyBitmap *bm, *next;
3239     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3240         if (bm == bitmap) {
3241             assert(!bdrv_dirty_bitmap_frozen(bm));
3242             QLIST_REMOVE(bitmap, list);
3243             hbitmap_free(bitmap->bitmap);
3244             g_free(bitmap->name);
3245             g_free(bitmap);
3246             return;
3247         }
3248     }
3249 }
3250 
3251 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3252 {
3253     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3254     bitmap->disabled = true;
3255 }
3256 
3257 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3258 {
3259     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3260     bitmap->disabled = false;
3261 }
3262 
3263 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3264 {
3265     BdrvDirtyBitmap *bm;
3266     BlockDirtyInfoList *list = NULL;
3267     BlockDirtyInfoList **plist = &list;
3268 
3269     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3270         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3271         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3272         info->count = bdrv_get_dirty_count(bm);
3273         info->granularity = bdrv_dirty_bitmap_granularity(bm);
3274         info->has_name = !!bm->name;
3275         info->name = g_strdup(bm->name);
3276         info->status = bdrv_dirty_bitmap_status(bm);
3277         entry->value = info;
3278         *plist = entry;
3279         plist = &entry->next;
3280     }
3281 
3282     return list;
3283 }
3284 
3285 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3286 {
3287     if (bitmap) {
3288         return hbitmap_get(bitmap->bitmap, sector);
3289     } else {
3290         return 0;
3291     }
3292 }
3293 
3294 /**
3295  * Chooses a default granularity based on the existing cluster size,
3296  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3297  * is no cluster size information available.
3298  */
3299 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3300 {
3301     BlockDriverInfo bdi;
3302     uint32_t granularity;
3303 
3304     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3305         granularity = MAX(4096, bdi.cluster_size);
3306         granularity = MIN(65536, granularity);
3307     } else {
3308         granularity = 65536;
3309     }
3310 
3311     return granularity;
3312 }
3313 
3314 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3315 {
3316     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3317 }
3318 
3319 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3320 {
3321     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3322 }
3323 
3324 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3325                            int64_t cur_sector, int nr_sectors)
3326 {
3327     assert(bdrv_dirty_bitmap_enabled(bitmap));
3328     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3329 }
3330 
3331 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3332                              int64_t cur_sector, int nr_sectors)
3333 {
3334     assert(bdrv_dirty_bitmap_enabled(bitmap));
3335     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3336 }
3337 
3338 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3339 {
3340     assert(bdrv_dirty_bitmap_enabled(bitmap));
3341     hbitmap_reset(bitmap->bitmap, 0, bitmap->size);
3342 }
3343 
3344 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3345                     int nr_sectors)
3346 {
3347     BdrvDirtyBitmap *bitmap;
3348     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3349         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3350             continue;
3351         }
3352         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3353     }
3354 }
3355 
3356 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3357                       int nr_sectors)
3358 {
3359     BdrvDirtyBitmap *bitmap;
3360     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3361         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3362             continue;
3363         }
3364         hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3365     }
3366 }
3367 
3368 /**
3369  * Advance an HBitmapIter to an arbitrary offset.
3370  */
3371 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3372 {
3373     assert(hbi->hb);
3374     hbitmap_iter_init(hbi, hbi->hb, offset);
3375 }
3376 
3377 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3378 {
3379     return hbitmap_count(bitmap->bitmap);
3380 }
3381 
3382 /* Get a reference to bs */
3383 void bdrv_ref(BlockDriverState *bs)
3384 {
3385     bs->refcnt++;
3386 }
3387 
3388 /* Release a previously grabbed reference to bs.
3389  * If after releasing, reference count is zero, the BlockDriverState is
3390  * deleted. */
3391 void bdrv_unref(BlockDriverState *bs)
3392 {
3393     if (!bs) {
3394         return;
3395     }
3396     assert(bs->refcnt > 0);
3397     if (--bs->refcnt == 0) {
3398         bdrv_delete(bs);
3399     }
3400 }
3401 
3402 struct BdrvOpBlocker {
3403     Error *reason;
3404     QLIST_ENTRY(BdrvOpBlocker) list;
3405 };
3406 
3407 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3408 {
3409     BdrvOpBlocker *blocker;
3410     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3411     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3412         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3413         if (errp) {
3414             error_setg(errp, "Node '%s' is busy: %s",
3415                        bdrv_get_device_or_node_name(bs),
3416                        error_get_pretty(blocker->reason));
3417         }
3418         return true;
3419     }
3420     return false;
3421 }
3422 
3423 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3424 {
3425     BdrvOpBlocker *blocker;
3426     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3427 
3428     blocker = g_new0(BdrvOpBlocker, 1);
3429     blocker->reason = reason;
3430     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3431 }
3432 
3433 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3434 {
3435     BdrvOpBlocker *blocker, *next;
3436     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3437     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3438         if (blocker->reason == reason) {
3439             QLIST_REMOVE(blocker, list);
3440             g_free(blocker);
3441         }
3442     }
3443 }
3444 
3445 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3446 {
3447     int i;
3448     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3449         bdrv_op_block(bs, i, reason);
3450     }
3451 }
3452 
3453 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3454 {
3455     int i;
3456     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3457         bdrv_op_unblock(bs, i, reason);
3458     }
3459 }
3460 
3461 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3462 {
3463     int i;
3464 
3465     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3466         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3467             return false;
3468         }
3469     }
3470     return true;
3471 }
3472 
3473 void bdrv_iostatus_enable(BlockDriverState *bs)
3474 {
3475     bs->iostatus_enabled = true;
3476     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3477 }
3478 
3479 /* The I/O status is only enabled if the drive explicitly
3480  * enables it _and_ the VM is configured to stop on errors */
3481 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3482 {
3483     return (bs->iostatus_enabled &&
3484            (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3485             bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
3486             bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3487 }
3488 
3489 void bdrv_iostatus_disable(BlockDriverState *bs)
3490 {
3491     bs->iostatus_enabled = false;
3492 }
3493 
3494 void bdrv_iostatus_reset(BlockDriverState *bs)
3495 {
3496     if (bdrv_iostatus_is_enabled(bs)) {
3497         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3498         if (bs->job) {
3499             block_job_iostatus_reset(bs->job);
3500         }
3501     }
3502 }
3503 
3504 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3505 {
3506     assert(bdrv_iostatus_is_enabled(bs));
3507     if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3508         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3509                                          BLOCK_DEVICE_IO_STATUS_FAILED;
3510     }
3511 }
3512 
3513 void bdrv_img_create(const char *filename, const char *fmt,
3514                      const char *base_filename, const char *base_fmt,
3515                      char *options, uint64_t img_size, int flags,
3516                      Error **errp, bool quiet)
3517 {
3518     QemuOptsList *create_opts = NULL;
3519     QemuOpts *opts = NULL;
3520     const char *backing_fmt, *backing_file;
3521     int64_t size;
3522     BlockDriver *drv, *proto_drv;
3523     BlockDriver *backing_drv = NULL;
3524     Error *local_err = NULL;
3525     int ret = 0;
3526 
3527     /* Find driver and parse its options */
3528     drv = bdrv_find_format(fmt);
3529     if (!drv) {
3530         error_setg(errp, "Unknown file format '%s'", fmt);
3531         return;
3532     }
3533 
3534     proto_drv = bdrv_find_protocol(filename, true, errp);
3535     if (!proto_drv) {
3536         return;
3537     }
3538 
3539     if (!drv->create_opts) {
3540         error_setg(errp, "Format driver '%s' does not support image creation",
3541                    drv->format_name);
3542         return;
3543     }
3544 
3545     if (!proto_drv->create_opts) {
3546         error_setg(errp, "Protocol driver '%s' does not support image creation",
3547                    proto_drv->format_name);
3548         return;
3549     }
3550 
3551     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3552     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3553 
3554     /* Create parameter list with default values */
3555     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3556     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3557 
3558     /* Parse -o options */
3559     if (options) {
3560         qemu_opts_do_parse(opts, options, NULL, &local_err);
3561         if (local_err) {
3562             error_report_err(local_err);
3563             local_err = NULL;
3564             error_setg(errp, "Invalid options for file format '%s'", fmt);
3565             goto out;
3566         }
3567     }
3568 
3569     if (base_filename) {
3570         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3571         if (local_err) {
3572             error_setg(errp, "Backing file not supported for file format '%s'",
3573                        fmt);
3574             goto out;
3575         }
3576     }
3577 
3578     if (base_fmt) {
3579         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3580         if (local_err) {
3581             error_setg(errp, "Backing file format not supported for file "
3582                              "format '%s'", fmt);
3583             goto out;
3584         }
3585     }
3586 
3587     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3588     if (backing_file) {
3589         if (!strcmp(filename, backing_file)) {
3590             error_setg(errp, "Error: Trying to create an image with the "
3591                              "same filename as the backing file");
3592             goto out;
3593         }
3594     }
3595 
3596     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3597     if (backing_fmt) {
3598         backing_drv = bdrv_find_format(backing_fmt);
3599         if (!backing_drv) {
3600             error_setg(errp, "Unknown backing file format '%s'",
3601                        backing_fmt);
3602             goto out;
3603         }
3604     }
3605 
3606     // The size for the image must always be specified, with one exception:
3607     // If we are using a backing file, we can obtain the size from there
3608     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3609     if (size == -1) {
3610         if (backing_file) {
3611             BlockDriverState *bs;
3612             char *full_backing = g_new0(char, PATH_MAX);
3613             int64_t size;
3614             int back_flags;
3615 
3616             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3617                                                          full_backing, PATH_MAX,
3618                                                          &local_err);
3619             if (local_err) {
3620                 g_free(full_backing);
3621                 goto out;
3622             }
3623 
3624             /* backing files always opened read-only */
3625             back_flags =
3626                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3627 
3628             bs = NULL;
3629             ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
3630                             backing_drv, &local_err);
3631             g_free(full_backing);
3632             if (ret < 0) {
3633                 goto out;
3634             }
3635             size = bdrv_getlength(bs);
3636             if (size < 0) {
3637                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3638                                  backing_file);
3639                 bdrv_unref(bs);
3640                 goto out;
3641             }
3642 
3643             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3644 
3645             bdrv_unref(bs);
3646         } else {
3647             error_setg(errp, "Image creation needs a size parameter");
3648             goto out;
3649         }
3650     }
3651 
3652     if (!quiet) {
3653         printf("Formatting '%s', fmt=%s", filename, fmt);
3654         qemu_opts_print(opts, " ");
3655         puts("");
3656     }
3657 
3658     ret = bdrv_create(drv, filename, opts, &local_err);
3659 
3660     if (ret == -EFBIG) {
3661         /* This is generally a better message than whatever the driver would
3662          * deliver (especially because of the cluster_size_hint), since that
3663          * is most probably not much different from "image too large". */
3664         const char *cluster_size_hint = "";
3665         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3666             cluster_size_hint = " (try using a larger cluster size)";
3667         }
3668         error_setg(errp, "The image size is too large for file format '%s'"
3669                    "%s", fmt, cluster_size_hint);
3670         error_free(local_err);
3671         local_err = NULL;
3672     }
3673 
3674 out:
3675     qemu_opts_del(opts);
3676     qemu_opts_free(create_opts);
3677     if (local_err) {
3678         error_propagate(errp, local_err);
3679     }
3680 }
3681 
3682 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3683 {
3684     return bs->aio_context;
3685 }
3686 
3687 void bdrv_detach_aio_context(BlockDriverState *bs)
3688 {
3689     BdrvAioNotifier *baf;
3690 
3691     if (!bs->drv) {
3692         return;
3693     }
3694 
3695     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3696         baf->detach_aio_context(baf->opaque);
3697     }
3698 
3699     if (bs->io_limits_enabled) {
3700         throttle_timers_detach_aio_context(&bs->throttle_timers);
3701     }
3702     if (bs->drv->bdrv_detach_aio_context) {
3703         bs->drv->bdrv_detach_aio_context(bs);
3704     }
3705     if (bs->file) {
3706         bdrv_detach_aio_context(bs->file);
3707     }
3708     if (bs->backing_hd) {
3709         bdrv_detach_aio_context(bs->backing_hd);
3710     }
3711 
3712     bs->aio_context = NULL;
3713 }
3714 
3715 void bdrv_attach_aio_context(BlockDriverState *bs,
3716                              AioContext *new_context)
3717 {
3718     BdrvAioNotifier *ban;
3719 
3720     if (!bs->drv) {
3721         return;
3722     }
3723 
3724     bs->aio_context = new_context;
3725 
3726     if (bs->backing_hd) {
3727         bdrv_attach_aio_context(bs->backing_hd, new_context);
3728     }
3729     if (bs->file) {
3730         bdrv_attach_aio_context(bs->file, new_context);
3731     }
3732     if (bs->drv->bdrv_attach_aio_context) {
3733         bs->drv->bdrv_attach_aio_context(bs, new_context);
3734     }
3735     if (bs->io_limits_enabled) {
3736         throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3737     }
3738 
3739     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3740         ban->attached_aio_context(new_context, ban->opaque);
3741     }
3742 }
3743 
3744 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3745 {
3746     bdrv_drain_all(); /* ensure there are no in-flight requests */
3747 
3748     bdrv_detach_aio_context(bs);
3749 
3750     /* This function executes in the old AioContext so acquire the new one in
3751      * case it runs in a different thread.
3752      */
3753     aio_context_acquire(new_context);
3754     bdrv_attach_aio_context(bs, new_context);
3755     aio_context_release(new_context);
3756 }
3757 
3758 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3759         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3760         void (*detach_aio_context)(void *opaque), void *opaque)
3761 {
3762     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3763     *ban = (BdrvAioNotifier){
3764         .attached_aio_context = attached_aio_context,
3765         .detach_aio_context   = detach_aio_context,
3766         .opaque               = opaque
3767     };
3768 
3769     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3770 }
3771 
3772 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3773                                       void (*attached_aio_context)(AioContext *,
3774                                                                    void *),
3775                                       void (*detach_aio_context)(void *),
3776                                       void *opaque)
3777 {
3778     BdrvAioNotifier *ban, *ban_next;
3779 
3780     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3781         if (ban->attached_aio_context == attached_aio_context &&
3782             ban->detach_aio_context   == detach_aio_context   &&
3783             ban->opaque               == opaque)
3784         {
3785             QLIST_REMOVE(ban, list);
3786             g_free(ban);
3787 
3788             return;
3789         }
3790     }
3791 
3792     abort();
3793 }
3794 
3795 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3796                        BlockDriverAmendStatusCB *status_cb)
3797 {
3798     if (!bs->drv->bdrv_amend_options) {
3799         return -ENOTSUP;
3800     }
3801     return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3802 }
3803 
3804 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3805  * of block filter and by bdrv_is_first_non_filter.
3806  * It is used to test if the given bs is the candidate or recurse more in the
3807  * node graph.
3808  */
3809 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3810                                       BlockDriverState *candidate)
3811 {
3812     /* return false if basic checks fails */
3813     if (!bs || !bs->drv) {
3814         return false;
3815     }
3816 
3817     /* the code reached a non block filter driver -> check if the bs is
3818      * the same as the candidate. It's the recursion termination condition.
3819      */
3820     if (!bs->drv->is_filter) {
3821         return bs == candidate;
3822     }
3823     /* Down this path the driver is a block filter driver */
3824 
3825     /* If the block filter recursion method is defined use it to recurse down
3826      * the node graph.
3827      */
3828     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3829         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3830     }
3831 
3832     /* the driver is a block filter but don't allow to recurse -> return false
3833      */
3834     return false;
3835 }
3836 
3837 /* This function checks if the candidate is the first non filter bs down it's
3838  * bs chain. Since we don't have pointers to parents it explore all bs chains
3839  * from the top. Some filters can choose not to pass down the recursion.
3840  */
3841 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3842 {
3843     BlockDriverState *bs;
3844 
3845     /* walk down the bs forest recursively */
3846     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3847         bool perm;
3848 
3849         /* try to recurse in this top level bs */
3850         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3851 
3852         /* candidate is the first non filter */
3853         if (perm) {
3854             return true;
3855         }
3856     }
3857 
3858     return false;
3859 }
3860 
3861 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
3862 {
3863     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3864     AioContext *aio_context;
3865 
3866     if (!to_replace_bs) {
3867         error_setg(errp, "Node name '%s' not found", node_name);
3868         return NULL;
3869     }
3870 
3871     aio_context = bdrv_get_aio_context(to_replace_bs);
3872     aio_context_acquire(aio_context);
3873 
3874     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3875         to_replace_bs = NULL;
3876         goto out;
3877     }
3878 
3879     /* We don't want arbitrary node of the BDS chain to be replaced only the top
3880      * most non filter in order to prevent data corruption.
3881      * Another benefit is that this tests exclude backing files which are
3882      * blocked by the backing blockers.
3883      */
3884     if (!bdrv_is_first_non_filter(to_replace_bs)) {
3885         error_setg(errp, "Only top most non filter can be replaced");
3886         to_replace_bs = NULL;
3887         goto out;
3888     }
3889 
3890 out:
3891     aio_context_release(aio_context);
3892     return to_replace_bs;
3893 }
3894 
3895 static bool append_open_options(QDict *d, BlockDriverState *bs)
3896 {
3897     const QDictEntry *entry;
3898     bool found_any = false;
3899 
3900     for (entry = qdict_first(bs->options); entry;
3901          entry = qdict_next(bs->options, entry))
3902     {
3903         /* Only take options for this level and exclude all non-driver-specific
3904          * options */
3905         if (!strchr(qdict_entry_key(entry), '.') &&
3906             strcmp(qdict_entry_key(entry), "node-name"))
3907         {
3908             qobject_incref(qdict_entry_value(entry));
3909             qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3910             found_any = true;
3911         }
3912     }
3913 
3914     return found_any;
3915 }
3916 
3917 /* Updates the following BDS fields:
3918  *  - exact_filename: A filename which may be used for opening a block device
3919  *                    which (mostly) equals the given BDS (even without any
3920  *                    other options; so reading and writing must return the same
3921  *                    results, but caching etc. may be different)
3922  *  - full_open_options: Options which, when given when opening a block device
3923  *                       (without a filename), result in a BDS (mostly)
3924  *                       equalling the given one
3925  *  - filename: If exact_filename is set, it is copied here. Otherwise,
3926  *              full_open_options is converted to a JSON object, prefixed with
3927  *              "json:" (for use through the JSON pseudo protocol) and put here.
3928  */
3929 void bdrv_refresh_filename(BlockDriverState *bs)
3930 {
3931     BlockDriver *drv = bs->drv;
3932     QDict *opts;
3933 
3934     if (!drv) {
3935         return;
3936     }
3937 
3938     /* This BDS's file name will most probably depend on its file's name, so
3939      * refresh that first */
3940     if (bs->file) {
3941         bdrv_refresh_filename(bs->file);
3942     }
3943 
3944     if (drv->bdrv_refresh_filename) {
3945         /* Obsolete information is of no use here, so drop the old file name
3946          * information before refreshing it */
3947         bs->exact_filename[0] = '\0';
3948         if (bs->full_open_options) {
3949             QDECREF(bs->full_open_options);
3950             bs->full_open_options = NULL;
3951         }
3952 
3953         drv->bdrv_refresh_filename(bs);
3954     } else if (bs->file) {
3955         /* Try to reconstruct valid information from the underlying file */
3956         bool has_open_options;
3957 
3958         bs->exact_filename[0] = '\0';
3959         if (bs->full_open_options) {
3960             QDECREF(bs->full_open_options);
3961             bs->full_open_options = NULL;
3962         }
3963 
3964         opts = qdict_new();
3965         has_open_options = append_open_options(opts, bs);
3966 
3967         /* If no specific options have been given for this BDS, the filename of
3968          * the underlying file should suffice for this one as well */
3969         if (bs->file->exact_filename[0] && !has_open_options) {
3970             strcpy(bs->exact_filename, bs->file->exact_filename);
3971         }
3972         /* Reconstructing the full options QDict is simple for most format block
3973          * drivers, as long as the full options are known for the underlying
3974          * file BDS. The full options QDict of that file BDS should somehow
3975          * contain a representation of the filename, therefore the following
3976          * suffices without querying the (exact_)filename of this BDS. */
3977         if (bs->file->full_open_options) {
3978             qdict_put_obj(opts, "driver",
3979                           QOBJECT(qstring_from_str(drv->format_name)));
3980             QINCREF(bs->file->full_open_options);
3981             qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
3982 
3983             bs->full_open_options = opts;
3984         } else {
3985             QDECREF(opts);
3986         }
3987     } else if (!bs->full_open_options && qdict_size(bs->options)) {
3988         /* There is no underlying file BDS (at least referenced by BDS.file),
3989          * so the full options QDict should be equal to the options given
3990          * specifically for this block device when it was opened (plus the
3991          * driver specification).
3992          * Because those options don't change, there is no need to update
3993          * full_open_options when it's already set. */
3994 
3995         opts = qdict_new();
3996         append_open_options(opts, bs);
3997         qdict_put_obj(opts, "driver",
3998                       QOBJECT(qstring_from_str(drv->format_name)));
3999 
4000         if (bs->exact_filename[0]) {
4001             /* This may not work for all block protocol drivers (some may
4002              * require this filename to be parsed), but we have to find some
4003              * default solution here, so just include it. If some block driver
4004              * does not support pure options without any filename at all or
4005              * needs some special format of the options QDict, it needs to
4006              * implement the driver-specific bdrv_refresh_filename() function.
4007              */
4008             qdict_put_obj(opts, "filename",
4009                           QOBJECT(qstring_from_str(bs->exact_filename)));
4010         }
4011 
4012         bs->full_open_options = opts;
4013     }
4014 
4015     if (bs->exact_filename[0]) {
4016         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4017     } else if (bs->full_open_options) {
4018         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4019         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4020                  qstring_get_str(json));
4021         QDECREF(json);
4022     }
4023 }
4024 
4025 /* This accessor function purpose is to allow the device models to access the
4026  * BlockAcctStats structure embedded inside a BlockDriverState without being
4027  * aware of the BlockDriverState structure layout.
4028  * It will go away when the BlockAcctStats structure will be moved inside
4029  * the device models.
4030  */
4031 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
4032 {
4033     return &bs->stats;
4034 }
4035