xref: /openbmc/qemu/block.c (revision f348b6d1a53e5271cf1c9f9acc4646b4b98c1771)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "trace.h"
26 #include "block/block_int.h"
27 #include "block/blockjob.h"
28 #include "qemu/error-report.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qapi/qmp/qbool.h"
32 #include "qapi/qmp/qjson.h"
33 #include "sysemu/block-backend.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/notify.h"
36 #include "qemu/coroutine.h"
37 #include "block/qapi.h"
38 #include "qmp-commands.h"
39 #include "qemu/timer.h"
40 #include "qapi-event.h"
41 #include "block/throttle-groups.h"
42 #include "qemu/cutils.h"
43 #include "qemu/id.h"
44 
45 #ifdef CONFIG_BSD
46 #include <sys/ioctl.h>
47 #include <sys/queue.h>
48 #ifndef __DragonFly__
49 #include <sys/disk.h>
50 #endif
51 #endif
52 
53 #ifdef _WIN32
54 #include <windows.h>
55 #endif
56 
57 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
58 
59 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
60     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
61 
62 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
63     QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
64 
65 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
66     QLIST_HEAD_INITIALIZER(bdrv_drivers);
67 
68 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
69                              const char *reference, QDict *options, int flags,
70                              BlockDriverState *parent,
71                              const BdrvChildRole *child_role, Error **errp);
72 
73 /* If non-zero, use only whitelisted block drivers */
74 static int use_bdrv_whitelist;
75 
76 static void bdrv_close(BlockDriverState *bs);
77 
78 #ifdef _WIN32
79 static int is_windows_drive_prefix(const char *filename)
80 {
81     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
82              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
83             filename[1] == ':');
84 }
85 
86 int is_windows_drive(const char *filename)
87 {
88     if (is_windows_drive_prefix(filename) &&
89         filename[2] == '\0')
90         return 1;
91     if (strstart(filename, "\\\\.\\", NULL) ||
92         strstart(filename, "//./", NULL))
93         return 1;
94     return 0;
95 }
96 #endif
97 
98 size_t bdrv_opt_mem_align(BlockDriverState *bs)
99 {
100     if (!bs || !bs->drv) {
101         /* page size or 4k (hdd sector size) should be on the safe side */
102         return MAX(4096, getpagesize());
103     }
104 
105     return bs->bl.opt_mem_alignment;
106 }
107 
108 size_t bdrv_min_mem_align(BlockDriverState *bs)
109 {
110     if (!bs || !bs->drv) {
111         /* page size or 4k (hdd sector size) should be on the safe side */
112         return MAX(4096, getpagesize());
113     }
114 
115     return bs->bl.min_mem_alignment;
116 }
117 
118 /* check if the path starts with "<protocol>:" */
119 int path_has_protocol(const char *path)
120 {
121     const char *p;
122 
123 #ifdef _WIN32
124     if (is_windows_drive(path) ||
125         is_windows_drive_prefix(path)) {
126         return 0;
127     }
128     p = path + strcspn(path, ":/\\");
129 #else
130     p = path + strcspn(path, ":/");
131 #endif
132 
133     return *p == ':';
134 }
135 
136 int path_is_absolute(const char *path)
137 {
138 #ifdef _WIN32
139     /* specific case for names like: "\\.\d:" */
140     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
141         return 1;
142     }
143     return (*path == '/' || *path == '\\');
144 #else
145     return (*path == '/');
146 #endif
147 }
148 
149 /* if filename is absolute, just copy it to dest. Otherwise, build a
150    path to it by considering it is relative to base_path. URL are
151    supported. */
152 void path_combine(char *dest, int dest_size,
153                   const char *base_path,
154                   const char *filename)
155 {
156     const char *p, *p1;
157     int len;
158 
159     if (dest_size <= 0)
160         return;
161     if (path_is_absolute(filename)) {
162         pstrcpy(dest, dest_size, filename);
163     } else {
164         p = strchr(base_path, ':');
165         if (p)
166             p++;
167         else
168             p = base_path;
169         p1 = strrchr(base_path, '/');
170 #ifdef _WIN32
171         {
172             const char *p2;
173             p2 = strrchr(base_path, '\\');
174             if (!p1 || p2 > p1)
175                 p1 = p2;
176         }
177 #endif
178         if (p1)
179             p1++;
180         else
181             p1 = base_path;
182         if (p1 > p)
183             p = p1;
184         len = p - base_path;
185         if (len > dest_size - 1)
186             len = dest_size - 1;
187         memcpy(dest, base_path, len);
188         dest[len] = '\0';
189         pstrcat(dest, dest_size, filename);
190     }
191 }
192 
193 void bdrv_get_full_backing_filename_from_filename(const char *backed,
194                                                   const char *backing,
195                                                   char *dest, size_t sz,
196                                                   Error **errp)
197 {
198     if (backing[0] == '\0' || path_has_protocol(backing) ||
199         path_is_absolute(backing))
200     {
201         pstrcpy(dest, sz, backing);
202     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
203         error_setg(errp, "Cannot use relative backing file names for '%s'",
204                    backed);
205     } else {
206         path_combine(dest, sz, backed, backing);
207     }
208 }
209 
210 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
211                                     Error **errp)
212 {
213     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
214 
215     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
216                                                  dest, sz, errp);
217 }
218 
219 void bdrv_register(BlockDriver *bdrv)
220 {
221     bdrv_setup_io_funcs(bdrv);
222 
223     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
224 }
225 
226 BlockDriverState *bdrv_new_root(void)
227 {
228     return bdrv_new();
229 }
230 
231 BlockDriverState *bdrv_new(void)
232 {
233     BlockDriverState *bs;
234     int i;
235 
236     bs = g_new0(BlockDriverState, 1);
237     QLIST_INIT(&bs->dirty_bitmaps);
238     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
239         QLIST_INIT(&bs->op_blockers[i]);
240     }
241     notifier_with_return_list_init(&bs->before_write_notifiers);
242     qemu_co_queue_init(&bs->throttled_reqs[0]);
243     qemu_co_queue_init(&bs->throttled_reqs[1]);
244     bs->refcnt = 1;
245     bs->aio_context = qemu_get_aio_context();
246 
247     QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
248 
249     return bs;
250 }
251 
252 BlockDriver *bdrv_find_format(const char *format_name)
253 {
254     BlockDriver *drv1;
255     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
256         if (!strcmp(drv1->format_name, format_name)) {
257             return drv1;
258         }
259     }
260     return NULL;
261 }
262 
263 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
264 {
265     static const char *whitelist_rw[] = {
266         CONFIG_BDRV_RW_WHITELIST
267     };
268     static const char *whitelist_ro[] = {
269         CONFIG_BDRV_RO_WHITELIST
270     };
271     const char **p;
272 
273     if (!whitelist_rw[0] && !whitelist_ro[0]) {
274         return 1;               /* no whitelist, anything goes */
275     }
276 
277     for (p = whitelist_rw; *p; p++) {
278         if (!strcmp(drv->format_name, *p)) {
279             return 1;
280         }
281     }
282     if (read_only) {
283         for (p = whitelist_ro; *p; p++) {
284             if (!strcmp(drv->format_name, *p)) {
285                 return 1;
286             }
287         }
288     }
289     return 0;
290 }
291 
292 typedef struct CreateCo {
293     BlockDriver *drv;
294     char *filename;
295     QemuOpts *opts;
296     int ret;
297     Error *err;
298 } CreateCo;
299 
300 static void coroutine_fn bdrv_create_co_entry(void *opaque)
301 {
302     Error *local_err = NULL;
303     int ret;
304 
305     CreateCo *cco = opaque;
306     assert(cco->drv);
307 
308     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
309     if (local_err) {
310         error_propagate(&cco->err, local_err);
311     }
312     cco->ret = ret;
313 }
314 
315 int bdrv_create(BlockDriver *drv, const char* filename,
316                 QemuOpts *opts, Error **errp)
317 {
318     int ret;
319 
320     Coroutine *co;
321     CreateCo cco = {
322         .drv = drv,
323         .filename = g_strdup(filename),
324         .opts = opts,
325         .ret = NOT_DONE,
326         .err = NULL,
327     };
328 
329     if (!drv->bdrv_create) {
330         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
331         ret = -ENOTSUP;
332         goto out;
333     }
334 
335     if (qemu_in_coroutine()) {
336         /* Fast-path if already in coroutine context */
337         bdrv_create_co_entry(&cco);
338     } else {
339         co = qemu_coroutine_create(bdrv_create_co_entry);
340         qemu_coroutine_enter(co, &cco);
341         while (cco.ret == NOT_DONE) {
342             aio_poll(qemu_get_aio_context(), true);
343         }
344     }
345 
346     ret = cco.ret;
347     if (ret < 0) {
348         if (cco.err) {
349             error_propagate(errp, cco.err);
350         } else {
351             error_setg_errno(errp, -ret, "Could not create image");
352         }
353     }
354 
355 out:
356     g_free(cco.filename);
357     return ret;
358 }
359 
360 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
361 {
362     BlockDriver *drv;
363     Error *local_err = NULL;
364     int ret;
365 
366     drv = bdrv_find_protocol(filename, true, errp);
367     if (drv == NULL) {
368         return -ENOENT;
369     }
370 
371     ret = bdrv_create(drv, filename, opts, &local_err);
372     if (local_err) {
373         error_propagate(errp, local_err);
374     }
375     return ret;
376 }
377 
378 /**
379  * Try to get @bs's logical and physical block size.
380  * On success, store them in @bsz struct and return 0.
381  * On failure return -errno.
382  * @bs must not be empty.
383  */
384 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
385 {
386     BlockDriver *drv = bs->drv;
387 
388     if (drv && drv->bdrv_probe_blocksizes) {
389         return drv->bdrv_probe_blocksizes(bs, bsz);
390     }
391 
392     return -ENOTSUP;
393 }
394 
395 /**
396  * Try to get @bs's geometry (cyls, heads, sectors).
397  * On success, store them in @geo struct and return 0.
398  * On failure return -errno.
399  * @bs must not be empty.
400  */
401 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
402 {
403     BlockDriver *drv = bs->drv;
404 
405     if (drv && drv->bdrv_probe_geometry) {
406         return drv->bdrv_probe_geometry(bs, geo);
407     }
408 
409     return -ENOTSUP;
410 }
411 
412 /*
413  * Create a uniquely-named empty temporary file.
414  * Return 0 upon success, otherwise a negative errno value.
415  */
416 int get_tmp_filename(char *filename, int size)
417 {
418 #ifdef _WIN32
419     char temp_dir[MAX_PATH];
420     /* GetTempFileName requires that its output buffer (4th param)
421        have length MAX_PATH or greater.  */
422     assert(size >= MAX_PATH);
423     return (GetTempPath(MAX_PATH, temp_dir)
424             && GetTempFileName(temp_dir, "qem", 0, filename)
425             ? 0 : -GetLastError());
426 #else
427     int fd;
428     const char *tmpdir;
429     tmpdir = getenv("TMPDIR");
430     if (!tmpdir) {
431         tmpdir = "/var/tmp";
432     }
433     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
434         return -EOVERFLOW;
435     }
436     fd = mkstemp(filename);
437     if (fd < 0) {
438         return -errno;
439     }
440     if (close(fd) != 0) {
441         unlink(filename);
442         return -errno;
443     }
444     return 0;
445 #endif
446 }
447 
448 /*
449  * Detect host devices. By convention, /dev/cdrom[N] is always
450  * recognized as a host CDROM.
451  */
452 static BlockDriver *find_hdev_driver(const char *filename)
453 {
454     int score_max = 0, score;
455     BlockDriver *drv = NULL, *d;
456 
457     QLIST_FOREACH(d, &bdrv_drivers, list) {
458         if (d->bdrv_probe_device) {
459             score = d->bdrv_probe_device(filename);
460             if (score > score_max) {
461                 score_max = score;
462                 drv = d;
463             }
464         }
465     }
466 
467     return drv;
468 }
469 
470 BlockDriver *bdrv_find_protocol(const char *filename,
471                                 bool allow_protocol_prefix,
472                                 Error **errp)
473 {
474     BlockDriver *drv1;
475     char protocol[128];
476     int len;
477     const char *p;
478 
479     /* TODO Drivers without bdrv_file_open must be specified explicitly */
480 
481     /*
482      * XXX(hch): we really should not let host device detection
483      * override an explicit protocol specification, but moving this
484      * later breaks access to device names with colons in them.
485      * Thanks to the brain-dead persistent naming schemes on udev-
486      * based Linux systems those actually are quite common.
487      */
488     drv1 = find_hdev_driver(filename);
489     if (drv1) {
490         return drv1;
491     }
492 
493     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
494         return &bdrv_file;
495     }
496 
497     p = strchr(filename, ':');
498     assert(p != NULL);
499     len = p - filename;
500     if (len > sizeof(protocol) - 1)
501         len = sizeof(protocol) - 1;
502     memcpy(protocol, filename, len);
503     protocol[len] = '\0';
504     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
505         if (drv1->protocol_name &&
506             !strcmp(drv1->protocol_name, protocol)) {
507             return drv1;
508         }
509     }
510 
511     error_setg(errp, "Unknown protocol '%s'", protocol);
512     return NULL;
513 }
514 
515 /*
516  * Guess image format by probing its contents.
517  * This is not a good idea when your image is raw (CVE-2008-2004), but
518  * we do it anyway for backward compatibility.
519  *
520  * @buf         contains the image's first @buf_size bytes.
521  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
522  *              but can be smaller if the image file is smaller)
523  * @filename    is its filename.
524  *
525  * For all block drivers, call the bdrv_probe() method to get its
526  * probing score.
527  * Return the first block driver with the highest probing score.
528  */
529 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
530                             const char *filename)
531 {
532     int score_max = 0, score;
533     BlockDriver *drv = NULL, *d;
534 
535     QLIST_FOREACH(d, &bdrv_drivers, list) {
536         if (d->bdrv_probe) {
537             score = d->bdrv_probe(buf, buf_size, filename);
538             if (score > score_max) {
539                 score_max = score;
540                 drv = d;
541             }
542         }
543     }
544 
545     return drv;
546 }
547 
548 static int find_image_format(BlockDriverState *bs, const char *filename,
549                              BlockDriver **pdrv, Error **errp)
550 {
551     BlockDriver *drv;
552     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
553     int ret = 0;
554 
555     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
556     if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
557         *pdrv = &bdrv_raw;
558         return ret;
559     }
560 
561     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
562     if (ret < 0) {
563         error_setg_errno(errp, -ret, "Could not read image for determining its "
564                          "format");
565         *pdrv = NULL;
566         return ret;
567     }
568 
569     drv = bdrv_probe_all(buf, ret, filename);
570     if (!drv) {
571         error_setg(errp, "Could not determine image format: No compatible "
572                    "driver found");
573         ret = -ENOENT;
574     }
575     *pdrv = drv;
576     return ret;
577 }
578 
579 /**
580  * Set the current 'total_sectors' value
581  * Return 0 on success, -errno on error.
582  */
583 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
584 {
585     BlockDriver *drv = bs->drv;
586 
587     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
588     if (bdrv_is_sg(bs))
589         return 0;
590 
591     /* query actual device if possible, otherwise just trust the hint */
592     if (drv->bdrv_getlength) {
593         int64_t length = drv->bdrv_getlength(bs);
594         if (length < 0) {
595             return length;
596         }
597         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
598     }
599 
600     bs->total_sectors = hint;
601     return 0;
602 }
603 
604 /**
605  * Combines a QDict of new block driver @options with any missing options taken
606  * from @old_options, so that leaving out an option defaults to its old value.
607  */
608 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
609                               QDict *old_options)
610 {
611     if (bs->drv && bs->drv->bdrv_join_options) {
612         bs->drv->bdrv_join_options(options, old_options);
613     } else {
614         qdict_join(options, old_options, false);
615     }
616 }
617 
618 /**
619  * Set open flags for a given discard mode
620  *
621  * Return 0 on success, -1 if the discard mode was invalid.
622  */
623 int bdrv_parse_discard_flags(const char *mode, int *flags)
624 {
625     *flags &= ~BDRV_O_UNMAP;
626 
627     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
628         /* do nothing */
629     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
630         *flags |= BDRV_O_UNMAP;
631     } else {
632         return -1;
633     }
634 
635     return 0;
636 }
637 
638 /**
639  * Set open flags for a given cache mode
640  *
641  * Return 0 on success, -1 if the cache mode was invalid.
642  */
643 int bdrv_parse_cache_flags(const char *mode, int *flags)
644 {
645     *flags &= ~BDRV_O_CACHE_MASK;
646 
647     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
648         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
649     } else if (!strcmp(mode, "directsync")) {
650         *flags |= BDRV_O_NOCACHE;
651     } else if (!strcmp(mode, "writeback")) {
652         *flags |= BDRV_O_CACHE_WB;
653     } else if (!strcmp(mode, "unsafe")) {
654         *flags |= BDRV_O_CACHE_WB;
655         *flags |= BDRV_O_NO_FLUSH;
656     } else if (!strcmp(mode, "writethrough")) {
657         /* this is the default */
658     } else {
659         return -1;
660     }
661 
662     return 0;
663 }
664 
665 /*
666  * Returns the options and flags that a temporary snapshot should get, based on
667  * the originally requested flags (the originally requested image will have
668  * flags like a backing file)
669  */
670 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
671                                        int parent_flags, QDict *parent_options)
672 {
673     *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
674 
675     /* For temporary files, unconditional cache=unsafe is fine */
676     qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
677     qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
678     qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
679 }
680 
681 /*
682  * Returns the options and flags that bs->file should get if a protocol driver
683  * is expected, based on the given options and flags for the parent BDS
684  */
685 static void bdrv_inherited_options(int *child_flags, QDict *child_options,
686                                    int parent_flags, QDict *parent_options)
687 {
688     int flags = parent_flags;
689 
690     /* Enable protocol handling, disable format probing for bs->file */
691     flags |= BDRV_O_PROTOCOL;
692 
693     /* If the cache mode isn't explicitly set, inherit direct and no-flush from
694      * the parent. */
695     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
696     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
697 
698     /* Our block drivers take care to send flushes and respect unmap policy,
699      * so we can default to enable both on lower layers regardless of the
700      * corresponding parent options. */
701     qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
702     flags |= BDRV_O_UNMAP;
703 
704     /* Clear flags that only apply to the top layer */
705     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
706 
707     *child_flags = flags;
708 }
709 
710 const BdrvChildRole child_file = {
711     .inherit_options = bdrv_inherited_options,
712 };
713 
714 /*
715  * Returns the options and flags that bs->file should get if the use of formats
716  * (and not only protocols) is permitted for it, based on the given options and
717  * flags for the parent BDS
718  */
719 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
720                                        int parent_flags, QDict *parent_options)
721 {
722     child_file.inherit_options(child_flags, child_options,
723                                parent_flags, parent_options);
724 
725     *child_flags &= ~BDRV_O_PROTOCOL;
726 }
727 
728 const BdrvChildRole child_format = {
729     .inherit_options = bdrv_inherited_fmt_options,
730 };
731 
732 /*
733  * Returns the options and flags that bs->backing should get, based on the
734  * given options and flags for the parent BDS
735  */
736 static void bdrv_backing_options(int *child_flags, QDict *child_options,
737                                  int parent_flags, QDict *parent_options)
738 {
739     int flags = parent_flags;
740 
741     /* The cache mode is inherited unmodified for backing files */
742     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_WB);
743     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
744     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
745 
746     /* backing files always opened read-only */
747     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
748 
749     /* snapshot=on is handled on the top layer */
750     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
751 
752     *child_flags = flags;
753 }
754 
755 static const BdrvChildRole child_backing = {
756     .inherit_options = bdrv_backing_options,
757 };
758 
759 static int bdrv_open_flags(BlockDriverState *bs, int flags)
760 {
761     int open_flags = flags | BDRV_O_CACHE_WB;
762 
763     /*
764      * Clear flags that are internal to the block layer before opening the
765      * image.
766      */
767     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
768 
769     /*
770      * Snapshots should be writable.
771      */
772     if (flags & BDRV_O_TEMPORARY) {
773         open_flags |= BDRV_O_RDWR;
774     }
775 
776     return open_flags;
777 }
778 
779 static void update_flags_from_options(int *flags, QemuOpts *opts)
780 {
781     *flags &= ~BDRV_O_CACHE_MASK;
782 
783     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_WB));
784     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, false)) {
785         *flags |= BDRV_O_CACHE_WB;
786     }
787 
788     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
789     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
790         *flags |= BDRV_O_NO_FLUSH;
791     }
792 
793     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
794     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
795         *flags |= BDRV_O_NOCACHE;
796     }
797 }
798 
799 static void update_options_from_flags(QDict *options, int flags)
800 {
801     if (!qdict_haskey(options, BDRV_OPT_CACHE_WB)) {
802         qdict_put(options, BDRV_OPT_CACHE_WB,
803                   qbool_from_bool(flags & BDRV_O_CACHE_WB));
804     }
805     if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
806         qdict_put(options, BDRV_OPT_CACHE_DIRECT,
807                   qbool_from_bool(flags & BDRV_O_NOCACHE));
808     }
809     if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
810         qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
811                   qbool_from_bool(flags & BDRV_O_NO_FLUSH));
812     }
813 }
814 
815 static void bdrv_assign_node_name(BlockDriverState *bs,
816                                   const char *node_name,
817                                   Error **errp)
818 {
819     char *gen_node_name = NULL;
820 
821     if (!node_name) {
822         node_name = gen_node_name = id_generate(ID_BLOCK);
823     } else if (!id_wellformed(node_name)) {
824         /*
825          * Check for empty string or invalid characters, but not if it is
826          * generated (generated names use characters not available to the user)
827          */
828         error_setg(errp, "Invalid node name");
829         return;
830     }
831 
832     /* takes care of avoiding namespaces collisions */
833     if (blk_by_name(node_name)) {
834         error_setg(errp, "node-name=%s is conflicting with a device id",
835                    node_name);
836         goto out;
837     }
838 
839     /* takes care of avoiding duplicates node names */
840     if (bdrv_find_node(node_name)) {
841         error_setg(errp, "Duplicate node name");
842         goto out;
843     }
844 
845     /* copy node name into the bs and insert it into the graph list */
846     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
847     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
848 out:
849     g_free(gen_node_name);
850 }
851 
852 static QemuOptsList bdrv_runtime_opts = {
853     .name = "bdrv_common",
854     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
855     .desc = {
856         {
857             .name = "node-name",
858             .type = QEMU_OPT_STRING,
859             .help = "Node name of the block device node",
860         },
861         {
862             .name = "driver",
863             .type = QEMU_OPT_STRING,
864             .help = "Block driver to use for the node",
865         },
866         {
867             .name = BDRV_OPT_CACHE_WB,
868             .type = QEMU_OPT_BOOL,
869             .help = "Enable writeback mode",
870         },
871         {
872             .name = BDRV_OPT_CACHE_DIRECT,
873             .type = QEMU_OPT_BOOL,
874             .help = "Bypass software writeback cache on the host",
875         },
876         {
877             .name = BDRV_OPT_CACHE_NO_FLUSH,
878             .type = QEMU_OPT_BOOL,
879             .help = "Ignore flush requests",
880         },
881         { /* end of list */ }
882     },
883 };
884 
885 /*
886  * Common part for opening disk images and files
887  *
888  * Removes all processed options from *options.
889  */
890 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
891                             QDict *options, Error **errp)
892 {
893     int ret, open_flags;
894     const char *filename;
895     const char *driver_name = NULL;
896     const char *node_name = NULL;
897     QemuOpts *opts;
898     BlockDriver *drv;
899     Error *local_err = NULL;
900 
901     assert(bs->file == NULL);
902     assert(options != NULL && bs->options != options);
903 
904     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
905     qemu_opts_absorb_qdict(opts, options, &local_err);
906     if (local_err) {
907         error_propagate(errp, local_err);
908         ret = -EINVAL;
909         goto fail_opts;
910     }
911 
912     driver_name = qemu_opt_get(opts, "driver");
913     drv = bdrv_find_format(driver_name);
914     assert(drv != NULL);
915 
916     if (file != NULL) {
917         filename = file->bs->filename;
918     } else {
919         filename = qdict_get_try_str(options, "filename");
920     }
921 
922     if (drv->bdrv_needs_filename && !filename) {
923         error_setg(errp, "The '%s' block driver requires a file name",
924                    drv->format_name);
925         ret = -EINVAL;
926         goto fail_opts;
927     }
928 
929     trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
930                            drv->format_name);
931 
932     node_name = qemu_opt_get(opts, "node-name");
933     bdrv_assign_node_name(bs, node_name, &local_err);
934     if (local_err) {
935         error_propagate(errp, local_err);
936         ret = -EINVAL;
937         goto fail_opts;
938     }
939 
940     bs->request_alignment = 512;
941     bs->zero_beyond_eof = true;
942     bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
943 
944     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
945         error_setg(errp,
946                    !bs->read_only && bdrv_is_whitelisted(drv, true)
947                         ? "Driver '%s' can only be used for read-only devices"
948                         : "Driver '%s' is not whitelisted",
949                    drv->format_name);
950         ret = -ENOTSUP;
951         goto fail_opts;
952     }
953 
954     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
955     if (bs->open_flags & BDRV_O_COPY_ON_READ) {
956         if (!bs->read_only) {
957             bdrv_enable_copy_on_read(bs);
958         } else {
959             error_setg(errp, "Can't use copy-on-read on read-only device");
960             ret = -EINVAL;
961             goto fail_opts;
962         }
963     }
964 
965     if (filename != NULL) {
966         pstrcpy(bs->filename, sizeof(bs->filename), filename);
967     } else {
968         bs->filename[0] = '\0';
969     }
970     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
971 
972     bs->drv = drv;
973     bs->opaque = g_malloc0(drv->instance_size);
974 
975     /* Apply cache mode options */
976     update_flags_from_options(&bs->open_flags, opts);
977     bdrv_set_enable_write_cache(bs, bs->open_flags & BDRV_O_CACHE_WB);
978 
979     /* Open the image, either directly or using a protocol */
980     open_flags = bdrv_open_flags(bs, bs->open_flags);
981     if (drv->bdrv_file_open) {
982         assert(file == NULL);
983         assert(!drv->bdrv_needs_filename || filename != NULL);
984         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
985     } else {
986         if (file == NULL) {
987             error_setg(errp, "Can't use '%s' as a block driver for the "
988                        "protocol level", drv->format_name);
989             ret = -EINVAL;
990             goto free_and_fail;
991         }
992         bs->file = file;
993         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
994     }
995 
996     if (ret < 0) {
997         if (local_err) {
998             error_propagate(errp, local_err);
999         } else if (bs->filename[0]) {
1000             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1001         } else {
1002             error_setg_errno(errp, -ret, "Could not open image");
1003         }
1004         goto free_and_fail;
1005     }
1006 
1007     if (bs->encrypted) {
1008         error_report("Encrypted images are deprecated");
1009         error_printf("Support for them will be removed in a future release.\n"
1010                      "You can use 'qemu-img convert' to convert your image"
1011                      " to an unencrypted one.\n");
1012     }
1013 
1014     ret = refresh_total_sectors(bs, bs->total_sectors);
1015     if (ret < 0) {
1016         error_setg_errno(errp, -ret, "Could not refresh total sector count");
1017         goto free_and_fail;
1018     }
1019 
1020     bdrv_refresh_limits(bs, &local_err);
1021     if (local_err) {
1022         error_propagate(errp, local_err);
1023         ret = -EINVAL;
1024         goto free_and_fail;
1025     }
1026 
1027     assert(bdrv_opt_mem_align(bs) != 0);
1028     assert(bdrv_min_mem_align(bs) != 0);
1029     assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
1030 
1031     qemu_opts_del(opts);
1032     return 0;
1033 
1034 free_and_fail:
1035     bs->file = NULL;
1036     g_free(bs->opaque);
1037     bs->opaque = NULL;
1038     bs->drv = NULL;
1039 fail_opts:
1040     qemu_opts_del(opts);
1041     return ret;
1042 }
1043 
1044 static QDict *parse_json_filename(const char *filename, Error **errp)
1045 {
1046     QObject *options_obj;
1047     QDict *options;
1048     int ret;
1049 
1050     ret = strstart(filename, "json:", &filename);
1051     assert(ret);
1052 
1053     options_obj = qobject_from_json(filename);
1054     if (!options_obj) {
1055         error_setg(errp, "Could not parse the JSON options");
1056         return NULL;
1057     }
1058 
1059     if (qobject_type(options_obj) != QTYPE_QDICT) {
1060         qobject_decref(options_obj);
1061         error_setg(errp, "Invalid JSON object given");
1062         return NULL;
1063     }
1064 
1065     options = qobject_to_qdict(options_obj);
1066     qdict_flatten(options);
1067 
1068     return options;
1069 }
1070 
1071 static void parse_json_protocol(QDict *options, const char **pfilename,
1072                                 Error **errp)
1073 {
1074     QDict *json_options;
1075     Error *local_err = NULL;
1076 
1077     /* Parse json: pseudo-protocol */
1078     if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1079         return;
1080     }
1081 
1082     json_options = parse_json_filename(*pfilename, &local_err);
1083     if (local_err) {
1084         error_propagate(errp, local_err);
1085         return;
1086     }
1087 
1088     /* Options given in the filename have lower priority than options
1089      * specified directly */
1090     qdict_join(options, json_options, false);
1091     QDECREF(json_options);
1092     *pfilename = NULL;
1093 }
1094 
1095 /*
1096  * Fills in default options for opening images and converts the legacy
1097  * filename/flags pair to option QDict entries.
1098  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1099  * block driver has been specified explicitly.
1100  */
1101 static int bdrv_fill_options(QDict **options, const char *filename,
1102                              int *flags, Error **errp)
1103 {
1104     const char *drvname;
1105     bool protocol = *flags & BDRV_O_PROTOCOL;
1106     bool parse_filename = false;
1107     BlockDriver *drv = NULL;
1108     Error *local_err = NULL;
1109 
1110     drvname = qdict_get_try_str(*options, "driver");
1111     if (drvname) {
1112         drv = bdrv_find_format(drvname);
1113         if (!drv) {
1114             error_setg(errp, "Unknown driver '%s'", drvname);
1115             return -ENOENT;
1116         }
1117         /* If the user has explicitly specified the driver, this choice should
1118          * override the BDRV_O_PROTOCOL flag */
1119         protocol = drv->bdrv_file_open;
1120     }
1121 
1122     if (protocol) {
1123         *flags |= BDRV_O_PROTOCOL;
1124     } else {
1125         *flags &= ~BDRV_O_PROTOCOL;
1126     }
1127 
1128     /* Translate cache options from flags into options */
1129     update_options_from_flags(*options, *flags);
1130 
1131     /* Fetch the file name from the options QDict if necessary */
1132     if (protocol && filename) {
1133         if (!qdict_haskey(*options, "filename")) {
1134             qdict_put(*options, "filename", qstring_from_str(filename));
1135             parse_filename = true;
1136         } else {
1137             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1138                              "the same time");
1139             return -EINVAL;
1140         }
1141     }
1142 
1143     /* Find the right block driver */
1144     filename = qdict_get_try_str(*options, "filename");
1145 
1146     if (!drvname && protocol) {
1147         if (filename) {
1148             drv = bdrv_find_protocol(filename, parse_filename, errp);
1149             if (!drv) {
1150                 return -EINVAL;
1151             }
1152 
1153             drvname = drv->format_name;
1154             qdict_put(*options, "driver", qstring_from_str(drvname));
1155         } else {
1156             error_setg(errp, "Must specify either driver or file");
1157             return -EINVAL;
1158         }
1159     }
1160 
1161     assert(drv || !protocol);
1162 
1163     /* Driver-specific filename parsing */
1164     if (drv && drv->bdrv_parse_filename && parse_filename) {
1165         drv->bdrv_parse_filename(filename, *options, &local_err);
1166         if (local_err) {
1167             error_propagate(errp, local_err);
1168             return -EINVAL;
1169         }
1170 
1171         if (!drv->bdrv_needs_filename) {
1172             qdict_del(*options, "filename");
1173         }
1174     }
1175 
1176     return 0;
1177 }
1178 
1179 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
1180                                   const char *child_name,
1181                                   const BdrvChildRole *child_role)
1182 {
1183     BdrvChild *child = g_new(BdrvChild, 1);
1184     *child = (BdrvChild) {
1185         .bs     = child_bs,
1186         .name   = g_strdup(child_name),
1187         .role   = child_role,
1188     };
1189 
1190     QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1191 
1192     return child;
1193 }
1194 
1195 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1196                                     BlockDriverState *child_bs,
1197                                     const char *child_name,
1198                                     const BdrvChildRole *child_role)
1199 {
1200     BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
1201     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1202     return child;
1203 }
1204 
1205 static void bdrv_detach_child(BdrvChild *child)
1206 {
1207     if (child->next.le_prev) {
1208         QLIST_REMOVE(child, next);
1209         child->next.le_prev = NULL;
1210     }
1211     QLIST_REMOVE(child, next_parent);
1212     g_free(child->name);
1213     g_free(child);
1214 }
1215 
1216 void bdrv_root_unref_child(BdrvChild *child)
1217 {
1218     BlockDriverState *child_bs;
1219 
1220     child_bs = child->bs;
1221     bdrv_detach_child(child);
1222     bdrv_unref(child_bs);
1223 }
1224 
1225 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1226 {
1227     if (child == NULL) {
1228         return;
1229     }
1230 
1231     if (child->bs->inherits_from == parent) {
1232         child->bs->inherits_from = NULL;
1233     }
1234 
1235     bdrv_root_unref_child(child);
1236 }
1237 
1238 /*
1239  * Sets the backing file link of a BDS. A new reference is created; callers
1240  * which don't need their own reference any more must call bdrv_unref().
1241  */
1242 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1243 {
1244     if (backing_hd) {
1245         bdrv_ref(backing_hd);
1246     }
1247 
1248     if (bs->backing) {
1249         assert(bs->backing_blocker);
1250         bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1251         bdrv_unref_child(bs, bs->backing);
1252     } else if (backing_hd) {
1253         error_setg(&bs->backing_blocker,
1254                    "node is used as backing hd of '%s'",
1255                    bdrv_get_device_or_node_name(bs));
1256     }
1257 
1258     if (!backing_hd) {
1259         error_free(bs->backing_blocker);
1260         bs->backing_blocker = NULL;
1261         bs->backing = NULL;
1262         goto out;
1263     }
1264     bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
1265     bs->open_flags &= ~BDRV_O_NO_BACKING;
1266     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1267     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1268             backing_hd->drv ? backing_hd->drv->format_name : "");
1269 
1270     bdrv_op_block_all(backing_hd, bs->backing_blocker);
1271     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1272     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1273                     bs->backing_blocker);
1274 out:
1275     bdrv_refresh_limits(bs, NULL);
1276 }
1277 
1278 /*
1279  * Opens the backing file for a BlockDriverState if not yet open
1280  *
1281  * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1282  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1283  * itself, all options starting with "${bdref_key}." are considered part of the
1284  * BlockdevRef.
1285  *
1286  * TODO Can this be unified with bdrv_open_image()?
1287  */
1288 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1289                            const char *bdref_key, Error **errp)
1290 {
1291     char *backing_filename = g_malloc0(PATH_MAX);
1292     char *bdref_key_dot;
1293     const char *reference = NULL;
1294     int ret = 0;
1295     BlockDriverState *backing_hd;
1296     QDict *options;
1297     QDict *tmp_parent_options = NULL;
1298     Error *local_err = NULL;
1299 
1300     if (bs->backing != NULL) {
1301         goto free_exit;
1302     }
1303 
1304     /* NULL means an empty set of options */
1305     if (parent_options == NULL) {
1306         tmp_parent_options = qdict_new();
1307         parent_options = tmp_parent_options;
1308     }
1309 
1310     bs->open_flags &= ~BDRV_O_NO_BACKING;
1311 
1312     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1313     qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1314     g_free(bdref_key_dot);
1315 
1316     reference = qdict_get_try_str(parent_options, bdref_key);
1317     if (reference || qdict_haskey(options, "file.filename")) {
1318         backing_filename[0] = '\0';
1319     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1320         QDECREF(options);
1321         goto free_exit;
1322     } else {
1323         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1324                                        &local_err);
1325         if (local_err) {
1326             ret = -EINVAL;
1327             error_propagate(errp, local_err);
1328             QDECREF(options);
1329             goto free_exit;
1330         }
1331     }
1332 
1333     if (!bs->drv || !bs->drv->supports_backing) {
1334         ret = -EINVAL;
1335         error_setg(errp, "Driver doesn't support backing files");
1336         QDECREF(options);
1337         goto free_exit;
1338     }
1339 
1340     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1341         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1342     }
1343 
1344     backing_hd = NULL;
1345     ret = bdrv_open_inherit(&backing_hd,
1346                             *backing_filename ? backing_filename : NULL,
1347                             reference, options, 0, bs, &child_backing,
1348                             errp);
1349     if (ret < 0) {
1350         bs->open_flags |= BDRV_O_NO_BACKING;
1351         error_prepend(errp, "Could not open backing file: ");
1352         goto free_exit;
1353     }
1354 
1355     /* Hook up the backing file link; drop our reference, bs owns the
1356      * backing_hd reference now */
1357     bdrv_set_backing_hd(bs, backing_hd);
1358     bdrv_unref(backing_hd);
1359 
1360     qdict_del(parent_options, bdref_key);
1361 
1362 free_exit:
1363     g_free(backing_filename);
1364     QDECREF(tmp_parent_options);
1365     return ret;
1366 }
1367 
1368 /*
1369  * Opens a disk image whose options are given as BlockdevRef in another block
1370  * device's options.
1371  *
1372  * If allow_none is true, no image will be opened if filename is false and no
1373  * BlockdevRef is given. NULL will be returned, but errp remains unset.
1374  *
1375  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1376  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1377  * itself, all options starting with "${bdref_key}." are considered part of the
1378  * BlockdevRef.
1379  *
1380  * The BlockdevRef will be removed from the options QDict.
1381  */
1382 BdrvChild *bdrv_open_child(const char *filename,
1383                            QDict *options, const char *bdref_key,
1384                            BlockDriverState* parent,
1385                            const BdrvChildRole *child_role,
1386                            bool allow_none, Error **errp)
1387 {
1388     BdrvChild *c = NULL;
1389     BlockDriverState *bs;
1390     QDict *image_options;
1391     int ret;
1392     char *bdref_key_dot;
1393     const char *reference;
1394 
1395     assert(child_role != NULL);
1396 
1397     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1398     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1399     g_free(bdref_key_dot);
1400 
1401     reference = qdict_get_try_str(options, bdref_key);
1402     if (!filename && !reference && !qdict_size(image_options)) {
1403         if (!allow_none) {
1404             error_setg(errp, "A block device must be specified for \"%s\"",
1405                        bdref_key);
1406         }
1407         QDECREF(image_options);
1408         goto done;
1409     }
1410 
1411     bs = NULL;
1412     ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1413                             parent, child_role, errp);
1414     if (ret < 0) {
1415         goto done;
1416     }
1417 
1418     c = bdrv_attach_child(parent, bs, bdref_key, child_role);
1419 
1420 done:
1421     qdict_del(options, bdref_key);
1422     return c;
1423 }
1424 
1425 static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
1426                                      QDict *snapshot_options, Error **errp)
1427 {
1428     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1429     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1430     int64_t total_size;
1431     QemuOpts *opts = NULL;
1432     BlockDriverState *bs_snapshot;
1433     Error *local_err = NULL;
1434     int ret;
1435 
1436     /* if snapshot, we create a temporary backing file and open it
1437        instead of opening 'filename' directly */
1438 
1439     /* Get the required size from the image */
1440     total_size = bdrv_getlength(bs);
1441     if (total_size < 0) {
1442         ret = total_size;
1443         error_setg_errno(errp, -total_size, "Could not get image size");
1444         goto out;
1445     }
1446 
1447     /* Create the temporary image */
1448     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1449     if (ret < 0) {
1450         error_setg_errno(errp, -ret, "Could not get temporary filename");
1451         goto out;
1452     }
1453 
1454     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1455                             &error_abort);
1456     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1457     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
1458     qemu_opts_del(opts);
1459     if (ret < 0) {
1460         error_prepend(errp, "Could not create temporary overlay '%s': ",
1461                       tmp_filename);
1462         goto out;
1463     }
1464 
1465     /* Prepare options QDict for the temporary file */
1466     qdict_put(snapshot_options, "file.driver",
1467               qstring_from_str("file"));
1468     qdict_put(snapshot_options, "file.filename",
1469               qstring_from_str(tmp_filename));
1470     qdict_put(snapshot_options, "driver",
1471               qstring_from_str("qcow2"));
1472 
1473     bs_snapshot = bdrv_new();
1474 
1475     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1476                     flags, &local_err);
1477     snapshot_options = NULL;
1478     if (ret < 0) {
1479         error_propagate(errp, local_err);
1480         goto out;
1481     }
1482 
1483     bdrv_append(bs_snapshot, bs);
1484 
1485 out:
1486     QDECREF(snapshot_options);
1487     g_free(tmp_filename);
1488     return ret;
1489 }
1490 
1491 /*
1492  * Opens a disk image (raw, qcow2, vmdk, ...)
1493  *
1494  * options is a QDict of options to pass to the block drivers, or NULL for an
1495  * empty set of options. The reference to the QDict belongs to the block layer
1496  * after the call (even on failure), so if the caller intends to reuse the
1497  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1498  *
1499  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1500  * If it is not NULL, the referenced BDS will be reused.
1501  *
1502  * The reference parameter may be used to specify an existing block device which
1503  * should be opened. If specified, neither options nor a filename may be given,
1504  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1505  */
1506 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1507                              const char *reference, QDict *options, int flags,
1508                              BlockDriverState *parent,
1509                              const BdrvChildRole *child_role, Error **errp)
1510 {
1511     int ret;
1512     BdrvChild *file = NULL;
1513     BlockDriverState *bs;
1514     BlockDriver *drv = NULL;
1515     const char *drvname;
1516     const char *backing;
1517     Error *local_err = NULL;
1518     QDict *snapshot_options = NULL;
1519     int snapshot_flags = 0;
1520 
1521     assert(pbs);
1522     assert(!child_role || !flags);
1523     assert(!child_role == !parent);
1524 
1525     if (reference) {
1526         bool options_non_empty = options ? qdict_size(options) : false;
1527         QDECREF(options);
1528 
1529         if (*pbs) {
1530             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1531                        "another block device");
1532             return -EINVAL;
1533         }
1534 
1535         if (filename || options_non_empty) {
1536             error_setg(errp, "Cannot reference an existing block device with "
1537                        "additional options or a new filename");
1538             return -EINVAL;
1539         }
1540 
1541         bs = bdrv_lookup_bs(reference, reference, errp);
1542         if (!bs) {
1543             return -ENODEV;
1544         }
1545         bdrv_ref(bs);
1546         *pbs = bs;
1547         return 0;
1548     }
1549 
1550     if (*pbs) {
1551         bs = *pbs;
1552     } else {
1553         bs = bdrv_new();
1554     }
1555 
1556     /* NULL means an empty set of options */
1557     if (options == NULL) {
1558         options = qdict_new();
1559     }
1560 
1561     /* json: syntax counts as explicit options, as if in the QDict */
1562     parse_json_protocol(options, &filename, &local_err);
1563     if (local_err) {
1564         ret = -EINVAL;
1565         goto fail;
1566     }
1567 
1568     bs->explicit_options = qdict_clone_shallow(options);
1569 
1570     if (child_role) {
1571         bs->inherits_from = parent;
1572         child_role->inherit_options(&flags, options,
1573                                     parent->open_flags, parent->options);
1574     }
1575 
1576     ret = bdrv_fill_options(&options, filename, &flags, &local_err);
1577     if (local_err) {
1578         goto fail;
1579     }
1580 
1581     bs->open_flags = flags;
1582     bs->options = options;
1583     options = qdict_clone_shallow(options);
1584 
1585     /* Find the right image format driver */
1586     drvname = qdict_get_try_str(options, "driver");
1587     if (drvname) {
1588         drv = bdrv_find_format(drvname);
1589         if (!drv) {
1590             error_setg(errp, "Unknown driver: '%s'", drvname);
1591             ret = -EINVAL;
1592             goto fail;
1593         }
1594     }
1595 
1596     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1597 
1598     backing = qdict_get_try_str(options, "backing");
1599     if (backing && *backing == '\0') {
1600         flags |= BDRV_O_NO_BACKING;
1601         qdict_del(options, "backing");
1602     }
1603 
1604     /* Open image file without format layer */
1605     if ((flags & BDRV_O_PROTOCOL) == 0) {
1606         if (flags & BDRV_O_RDWR) {
1607             flags |= BDRV_O_ALLOW_RDWR;
1608         }
1609         if (flags & BDRV_O_SNAPSHOT) {
1610             snapshot_options = qdict_new();
1611             bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
1612                                        flags, options);
1613             bdrv_backing_options(&flags, options, flags, options);
1614         }
1615 
1616         bs->open_flags = flags;
1617 
1618         file = bdrv_open_child(filename, options, "file", bs,
1619                                &child_file, true, &local_err);
1620         if (local_err) {
1621             ret = -EINVAL;
1622             goto fail;
1623         }
1624     }
1625 
1626     /* Image format probing */
1627     bs->probed = !drv;
1628     if (!drv && file) {
1629         ret = find_image_format(file->bs, filename, &drv, &local_err);
1630         if (ret < 0) {
1631             goto fail;
1632         }
1633         /*
1634          * This option update would logically belong in bdrv_fill_options(),
1635          * but we first need to open bs->file for the probing to work, while
1636          * opening bs->file already requires the (mostly) final set of options
1637          * so that cache mode etc. can be inherited.
1638          *
1639          * Adding the driver later is somewhat ugly, but it's not an option
1640          * that would ever be inherited, so it's correct. We just need to make
1641          * sure to update both bs->options (which has the full effective
1642          * options for bs) and options (which has file.* already removed).
1643          */
1644         qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1645         qdict_put(options, "driver", qstring_from_str(drv->format_name));
1646     } else if (!drv) {
1647         error_setg(errp, "Must specify either driver or file");
1648         ret = -EINVAL;
1649         goto fail;
1650     }
1651 
1652     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1653     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1654     /* file must be NULL if a protocol BDS is about to be created
1655      * (the inverse results in an error message from bdrv_open_common()) */
1656     assert(!(flags & BDRV_O_PROTOCOL) || !file);
1657 
1658     /* Open the image */
1659     ret = bdrv_open_common(bs, file, options, &local_err);
1660     if (ret < 0) {
1661         goto fail;
1662     }
1663 
1664     if (file && (bs->file != file)) {
1665         bdrv_unref_child(bs, file);
1666         file = NULL;
1667     }
1668 
1669     /* If there is a backing file, use it */
1670     if ((flags & BDRV_O_NO_BACKING) == 0) {
1671         ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
1672         if (ret < 0) {
1673             goto close_and_fail;
1674         }
1675     }
1676 
1677     bdrv_refresh_filename(bs);
1678 
1679     /* Check if any unknown options were used */
1680     if (options && (qdict_size(options) != 0)) {
1681         const QDictEntry *entry = qdict_first(options);
1682         if (flags & BDRV_O_PROTOCOL) {
1683             error_setg(errp, "Block protocol '%s' doesn't support the option "
1684                        "'%s'", drv->format_name, entry->key);
1685         } else {
1686             error_setg(errp,
1687                        "Block format '%s' does not support the option '%s'",
1688                        drv->format_name, entry->key);
1689         }
1690 
1691         ret = -EINVAL;
1692         goto close_and_fail;
1693     }
1694 
1695     if (!bdrv_key_required(bs)) {
1696         if (bs->blk) {
1697             blk_dev_change_media_cb(bs->blk, true);
1698         }
1699     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1700                && !runstate_check(RUN_STATE_INMIGRATE)
1701                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1702         error_setg(errp,
1703                    "Guest must be stopped for opening of encrypted image");
1704         ret = -EBUSY;
1705         goto close_and_fail;
1706     }
1707 
1708     QDECREF(options);
1709     *pbs = bs;
1710 
1711     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1712      * temporary snapshot afterwards. */
1713     if (snapshot_flags) {
1714         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
1715                                         &local_err);
1716         snapshot_options = NULL;
1717         if (local_err) {
1718             goto close_and_fail;
1719         }
1720     }
1721 
1722     return 0;
1723 
1724 fail:
1725     if (file != NULL) {
1726         bdrv_unref_child(bs, file);
1727     }
1728     QDECREF(snapshot_options);
1729     QDECREF(bs->explicit_options);
1730     QDECREF(bs->options);
1731     QDECREF(options);
1732     bs->options = NULL;
1733     if (!*pbs) {
1734         /* If *pbs is NULL, a new BDS has been created in this function and
1735            needs to be freed now. Otherwise, it does not need to be closed,
1736            since it has not really been opened yet. */
1737         bdrv_unref(bs);
1738     }
1739     if (local_err) {
1740         error_propagate(errp, local_err);
1741     }
1742     return ret;
1743 
1744 close_and_fail:
1745     /* See fail path, but now the BDS has to be always closed */
1746     if (*pbs) {
1747         bdrv_close(bs);
1748     } else {
1749         bdrv_unref(bs);
1750     }
1751     QDECREF(snapshot_options);
1752     QDECREF(options);
1753     if (local_err) {
1754         error_propagate(errp, local_err);
1755     }
1756     return ret;
1757 }
1758 
1759 int bdrv_open(BlockDriverState **pbs, const char *filename,
1760               const char *reference, QDict *options, int flags, Error **errp)
1761 {
1762     return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1763                              NULL, errp);
1764 }
1765 
1766 typedef struct BlockReopenQueueEntry {
1767      bool prepared;
1768      BDRVReopenState state;
1769      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1770 } BlockReopenQueueEntry;
1771 
1772 /*
1773  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1774  * reopen of multiple devices.
1775  *
1776  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1777  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1778  * be created and initialized. This newly created BlockReopenQueue should be
1779  * passed back in for subsequent calls that are intended to be of the same
1780  * atomic 'set'.
1781  *
1782  * bs is the BlockDriverState to add to the reopen queue.
1783  *
1784  * options contains the changed options for the associated bs
1785  * (the BlockReopenQueue takes ownership)
1786  *
1787  * flags contains the open flags for the associated bs
1788  *
1789  * returns a pointer to bs_queue, which is either the newly allocated
1790  * bs_queue, or the existing bs_queue being used.
1791  *
1792  */
1793 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1794                                                  BlockDriverState *bs,
1795                                                  QDict *options,
1796                                                  int flags,
1797                                                  const BdrvChildRole *role,
1798                                                  QDict *parent_options,
1799                                                  int parent_flags)
1800 {
1801     assert(bs != NULL);
1802 
1803     BlockReopenQueueEntry *bs_entry;
1804     BdrvChild *child;
1805     QDict *old_options, *explicit_options;
1806 
1807     if (bs_queue == NULL) {
1808         bs_queue = g_new0(BlockReopenQueue, 1);
1809         QSIMPLEQ_INIT(bs_queue);
1810     }
1811 
1812     if (!options) {
1813         options = qdict_new();
1814     }
1815 
1816     /*
1817      * Precedence of options:
1818      * 1. Explicitly passed in options (highest)
1819      * 2. Set in flags (only for top level)
1820      * 3. Retained from explicitly set options of bs
1821      * 4. Inherited from parent node
1822      * 5. Retained from effective options of bs
1823      */
1824 
1825     if (!parent_options) {
1826         /*
1827          * Any setting represented by flags is always updated. If the
1828          * corresponding QDict option is set, it takes precedence. Otherwise
1829          * the flag is translated into a QDict option. The old setting of bs is
1830          * not considered.
1831          */
1832         update_options_from_flags(options, flags);
1833     }
1834 
1835     /* Old explicitly set values (don't overwrite by inherited value) */
1836     old_options = qdict_clone_shallow(bs->explicit_options);
1837     bdrv_join_options(bs, options, old_options);
1838     QDECREF(old_options);
1839 
1840     explicit_options = qdict_clone_shallow(options);
1841 
1842     /* Inherit from parent node */
1843     if (parent_options) {
1844         assert(!flags);
1845         role->inherit_options(&flags, options, parent_flags, parent_options);
1846     }
1847 
1848     /* Old values are used for options that aren't set yet */
1849     old_options = qdict_clone_shallow(bs->options);
1850     bdrv_join_options(bs, options, old_options);
1851     QDECREF(old_options);
1852 
1853     /* bdrv_open() masks this flag out */
1854     flags &= ~BDRV_O_PROTOCOL;
1855 
1856     QLIST_FOREACH(child, &bs->children, next) {
1857         QDict *new_child_options;
1858         char *child_key_dot;
1859 
1860         /* reopen can only change the options of block devices that were
1861          * implicitly created and inherited options. For other (referenced)
1862          * block devices, a syntax like "backing.foo" results in an error. */
1863         if (child->bs->inherits_from != bs) {
1864             continue;
1865         }
1866 
1867         child_key_dot = g_strdup_printf("%s.", child->name);
1868         qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1869         g_free(child_key_dot);
1870 
1871         bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1872                                 child->role, options, flags);
1873     }
1874 
1875     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1876     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1877 
1878     bs_entry->state.bs = bs;
1879     bs_entry->state.options = options;
1880     bs_entry->state.explicit_options = explicit_options;
1881     bs_entry->state.flags = flags;
1882 
1883     return bs_queue;
1884 }
1885 
1886 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1887                                     BlockDriverState *bs,
1888                                     QDict *options, int flags)
1889 {
1890     return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1891                                    NULL, NULL, 0);
1892 }
1893 
1894 /*
1895  * Reopen multiple BlockDriverStates atomically & transactionally.
1896  *
1897  * The queue passed in (bs_queue) must have been built up previous
1898  * via bdrv_reopen_queue().
1899  *
1900  * Reopens all BDS specified in the queue, with the appropriate
1901  * flags.  All devices are prepared for reopen, and failure of any
1902  * device will cause all device changes to be abandonded, and intermediate
1903  * data cleaned up.
1904  *
1905  * If all devices prepare successfully, then the changes are committed
1906  * to all devices.
1907  *
1908  */
1909 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1910 {
1911     int ret = -1;
1912     BlockReopenQueueEntry *bs_entry, *next;
1913     Error *local_err = NULL;
1914 
1915     assert(bs_queue != NULL);
1916 
1917     bdrv_drain_all();
1918 
1919     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1920         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1921             error_propagate(errp, local_err);
1922             goto cleanup;
1923         }
1924         bs_entry->prepared = true;
1925     }
1926 
1927     /* If we reach this point, we have success and just need to apply the
1928      * changes
1929      */
1930     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1931         bdrv_reopen_commit(&bs_entry->state);
1932     }
1933 
1934     ret = 0;
1935 
1936 cleanup:
1937     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1938         if (ret && bs_entry->prepared) {
1939             bdrv_reopen_abort(&bs_entry->state);
1940         } else if (ret) {
1941             QDECREF(bs_entry->state.explicit_options);
1942         }
1943         QDECREF(bs_entry->state.options);
1944         g_free(bs_entry);
1945     }
1946     g_free(bs_queue);
1947     return ret;
1948 }
1949 
1950 
1951 /* Reopen a single BlockDriverState with the specified flags. */
1952 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1953 {
1954     int ret = -1;
1955     Error *local_err = NULL;
1956     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1957 
1958     ret = bdrv_reopen_multiple(queue, &local_err);
1959     if (local_err != NULL) {
1960         error_propagate(errp, local_err);
1961     }
1962     return ret;
1963 }
1964 
1965 
1966 /*
1967  * Prepares a BlockDriverState for reopen. All changes are staged in the
1968  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1969  * the block driver layer .bdrv_reopen_prepare()
1970  *
1971  * bs is the BlockDriverState to reopen
1972  * flags are the new open flags
1973  * queue is the reopen queue
1974  *
1975  * Returns 0 on success, non-zero on error.  On error errp will be set
1976  * as well.
1977  *
1978  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1979  * It is the responsibility of the caller to then call the abort() or
1980  * commit() for any other BDS that have been left in a prepare() state
1981  *
1982  */
1983 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1984                         Error **errp)
1985 {
1986     int ret = -1;
1987     Error *local_err = NULL;
1988     BlockDriver *drv;
1989     QemuOpts *opts;
1990     const char *value;
1991 
1992     assert(reopen_state != NULL);
1993     assert(reopen_state->bs->drv != NULL);
1994     drv = reopen_state->bs->drv;
1995 
1996     /* Process generic block layer options */
1997     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1998     qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
1999     if (local_err) {
2000         error_propagate(errp, local_err);
2001         ret = -EINVAL;
2002         goto error;
2003     }
2004 
2005     update_flags_from_options(&reopen_state->flags, opts);
2006 
2007     /* If a guest device is attached, it owns WCE */
2008     if (reopen_state->bs->blk && blk_get_attached_dev(reopen_state->bs->blk)) {
2009         bool old_wce = bdrv_enable_write_cache(reopen_state->bs);
2010         bool new_wce = (reopen_state->flags & BDRV_O_CACHE_WB);
2011         if (old_wce != new_wce) {
2012             error_setg(errp, "Cannot change cache.writeback: Device attached");
2013             ret = -EINVAL;
2014             goto error;
2015         }
2016     }
2017 
2018     /* node-name and driver must be unchanged. Put them back into the QDict, so
2019      * that they are checked at the end of this function. */
2020     value = qemu_opt_get(opts, "node-name");
2021     if (value) {
2022         qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
2023     }
2024 
2025     value = qemu_opt_get(opts, "driver");
2026     if (value) {
2027         qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2028     }
2029 
2030     /* if we are to stay read-only, do not allow permission change
2031      * to r/w */
2032     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2033         reopen_state->flags & BDRV_O_RDWR) {
2034         error_setg(errp, "Node '%s' is read only",
2035                    bdrv_get_device_or_node_name(reopen_state->bs));
2036         goto error;
2037     }
2038 
2039 
2040     ret = bdrv_flush(reopen_state->bs);
2041     if (ret) {
2042         error_setg_errno(errp, -ret, "Error flushing drive");
2043         goto error;
2044     }
2045 
2046     if (drv->bdrv_reopen_prepare) {
2047         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2048         if (ret) {
2049             if (local_err != NULL) {
2050                 error_propagate(errp, local_err);
2051             } else {
2052                 error_setg(errp, "failed while preparing to reopen image '%s'",
2053                            reopen_state->bs->filename);
2054             }
2055             goto error;
2056         }
2057     } else {
2058         /* It is currently mandatory to have a bdrv_reopen_prepare()
2059          * handler for each supported drv. */
2060         error_setg(errp, "Block format '%s' used by node '%s' "
2061                    "does not support reopening files", drv->format_name,
2062                    bdrv_get_device_or_node_name(reopen_state->bs));
2063         ret = -1;
2064         goto error;
2065     }
2066 
2067     /* Options that are not handled are only okay if they are unchanged
2068      * compared to the old state. It is expected that some options are only
2069      * used for the initial open, but not reopen (e.g. filename) */
2070     if (qdict_size(reopen_state->options)) {
2071         const QDictEntry *entry = qdict_first(reopen_state->options);
2072 
2073         do {
2074             QString *new_obj = qobject_to_qstring(entry->value);
2075             const char *new = qstring_get_str(new_obj);
2076             const char *old = qdict_get_try_str(reopen_state->bs->options,
2077                                                 entry->key);
2078 
2079             if (!old || strcmp(new, old)) {
2080                 error_setg(errp, "Cannot change the option '%s'", entry->key);
2081                 ret = -EINVAL;
2082                 goto error;
2083             }
2084         } while ((entry = qdict_next(reopen_state->options, entry)));
2085     }
2086 
2087     ret = 0;
2088 
2089 error:
2090     qemu_opts_del(opts);
2091     return ret;
2092 }
2093 
2094 /*
2095  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2096  * makes them final by swapping the staging BlockDriverState contents into
2097  * the active BlockDriverState contents.
2098  */
2099 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2100 {
2101     BlockDriver *drv;
2102 
2103     assert(reopen_state != NULL);
2104     drv = reopen_state->bs->drv;
2105     assert(drv != NULL);
2106 
2107     /* If there are any driver level actions to take */
2108     if (drv->bdrv_reopen_commit) {
2109         drv->bdrv_reopen_commit(reopen_state);
2110     }
2111 
2112     /* set BDS specific flags now */
2113     QDECREF(reopen_state->bs->explicit_options);
2114 
2115     reopen_state->bs->explicit_options   = reopen_state->explicit_options;
2116     reopen_state->bs->open_flags         = reopen_state->flags;
2117     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
2118                                               BDRV_O_CACHE_WB);
2119     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
2120 
2121     bdrv_refresh_limits(reopen_state->bs, NULL);
2122 }
2123 
2124 /*
2125  * Abort the reopen, and delete and free the staged changes in
2126  * reopen_state
2127  */
2128 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2129 {
2130     BlockDriver *drv;
2131 
2132     assert(reopen_state != NULL);
2133     drv = reopen_state->bs->drv;
2134     assert(drv != NULL);
2135 
2136     if (drv->bdrv_reopen_abort) {
2137         drv->bdrv_reopen_abort(reopen_state);
2138     }
2139 
2140     QDECREF(reopen_state->explicit_options);
2141 }
2142 
2143 
2144 static void bdrv_close(BlockDriverState *bs)
2145 {
2146     BdrvAioNotifier *ban, *ban_next;
2147 
2148     assert(!bs->job);
2149 
2150     /* Disable I/O limits and drain all pending throttled requests */
2151     if (bs->throttle_state) {
2152         bdrv_io_limits_disable(bs);
2153     }
2154 
2155     bdrv_drained_begin(bs); /* complete I/O */
2156     bdrv_flush(bs);
2157     bdrv_drain(bs); /* in case flush left pending I/O */
2158 
2159     bdrv_release_named_dirty_bitmaps(bs);
2160     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2161 
2162     if (bs->blk) {
2163         blk_dev_change_media_cb(bs->blk, false);
2164     }
2165 
2166     if (bs->drv) {
2167         BdrvChild *child, *next;
2168 
2169         bs->drv->bdrv_close(bs);
2170         bs->drv = NULL;
2171 
2172         bdrv_set_backing_hd(bs, NULL);
2173 
2174         if (bs->file != NULL) {
2175             bdrv_unref_child(bs, bs->file);
2176             bs->file = NULL;
2177         }
2178 
2179         QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
2180             /* TODO Remove bdrv_unref() from drivers' close function and use
2181              * bdrv_unref_child() here */
2182             if (child->bs->inherits_from == bs) {
2183                 child->bs->inherits_from = NULL;
2184             }
2185             bdrv_detach_child(child);
2186         }
2187 
2188         g_free(bs->opaque);
2189         bs->opaque = NULL;
2190         bs->copy_on_read = 0;
2191         bs->backing_file[0] = '\0';
2192         bs->backing_format[0] = '\0';
2193         bs->total_sectors = 0;
2194         bs->encrypted = 0;
2195         bs->valid_key = 0;
2196         bs->sg = 0;
2197         bs->zero_beyond_eof = false;
2198         QDECREF(bs->options);
2199         QDECREF(bs->explicit_options);
2200         bs->options = NULL;
2201         QDECREF(bs->full_open_options);
2202         bs->full_open_options = NULL;
2203     }
2204 
2205     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2206         g_free(ban);
2207     }
2208     QLIST_INIT(&bs->aio_notifiers);
2209     bdrv_drained_end(bs);
2210 }
2211 
2212 void bdrv_close_all(void)
2213 {
2214     BlockDriverState *bs;
2215     AioContext *aio_context;
2216 
2217     /* Drop references from requests still in flight, such as canceled block
2218      * jobs whose AIO context has not been polled yet */
2219     bdrv_drain_all();
2220 
2221     blk_remove_all_bs();
2222     blockdev_close_all_bdrv_states();
2223 
2224     /* Cancel all block jobs */
2225     while (!QTAILQ_EMPTY(&all_bdrv_states)) {
2226         QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
2227             aio_context = bdrv_get_aio_context(bs);
2228 
2229             aio_context_acquire(aio_context);
2230             if (bs->job) {
2231                 block_job_cancel_sync(bs->job);
2232                 aio_context_release(aio_context);
2233                 break;
2234             }
2235             aio_context_release(aio_context);
2236         }
2237 
2238         /* All the remaining BlockDriverStates are referenced directly or
2239          * indirectly from block jobs, so there needs to be at least one BDS
2240          * directly used by a block job */
2241         assert(bs);
2242     }
2243 }
2244 
2245 /* make a BlockDriverState anonymous by removing from graph_bdrv_state list.
2246  * Also, NULL terminate the device_name to prevent double remove */
2247 void bdrv_make_anon(BlockDriverState *bs)
2248 {
2249     if (bs->node_name[0] != '\0') {
2250         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2251     }
2252     bs->node_name[0] = '\0';
2253 }
2254 
2255 /* Fields that need to stay with the top-level BDS */
2256 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2257                                      BlockDriverState *bs_src)
2258 {
2259     /* move some fields that need to stay attached to the device */
2260 
2261     /* dev info */
2262     bs_dest->copy_on_read       = bs_src->copy_on_read;
2263 
2264     bs_dest->enable_write_cache = bs_src->enable_write_cache;
2265 
2266     /* dirty bitmap */
2267     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
2268 }
2269 
2270 static void change_parent_backing_link(BlockDriverState *from,
2271                                        BlockDriverState *to)
2272 {
2273     BdrvChild *c, *next;
2274 
2275     if (from->blk) {
2276         /* FIXME We bypass blk_set_bs(), so we need to make these updates
2277          * manually. The root problem is not in this change function, but the
2278          * existence of BlockDriverState.blk. */
2279         to->blk = from->blk;
2280         from->blk = NULL;
2281     }
2282 
2283     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2284         assert(c->role != &child_backing);
2285         c->bs = to;
2286         QLIST_REMOVE(c, next_parent);
2287         QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2288         bdrv_ref(to);
2289         bdrv_unref(from);
2290     }
2291 }
2292 
2293 static void swap_feature_fields(BlockDriverState *bs_top,
2294                                 BlockDriverState *bs_new)
2295 {
2296     BlockDriverState tmp;
2297 
2298     bdrv_move_feature_fields(&tmp, bs_top);
2299     bdrv_move_feature_fields(bs_top, bs_new);
2300     bdrv_move_feature_fields(bs_new, &tmp);
2301 
2302     assert(!bs_new->throttle_state);
2303     if (bs_top->throttle_state) {
2304         assert(bs_top->io_limits_enabled);
2305         bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2306         bdrv_io_limits_disable(bs_top);
2307     }
2308 }
2309 
2310 /*
2311  * Add new bs contents at the top of an image chain while the chain is
2312  * live, while keeping required fields on the top layer.
2313  *
2314  * This will modify the BlockDriverState fields, and swap contents
2315  * between bs_new and bs_top. Both bs_new and bs_top are modified.
2316  *
2317  * bs_new must not be attached to a BlockBackend.
2318  *
2319  * This function does not create any image files.
2320  *
2321  * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2322  * that's what the callers commonly need. bs_new will be referenced by the old
2323  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2324  * reference of its own, it must call bdrv_ref().
2325  */
2326 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2327 {
2328     assert(!bdrv_requests_pending(bs_top));
2329     assert(!bdrv_requests_pending(bs_new));
2330 
2331     bdrv_ref(bs_top);
2332     change_parent_backing_link(bs_top, bs_new);
2333 
2334     /* Some fields always stay on top of the backing file chain */
2335     swap_feature_fields(bs_top, bs_new);
2336 
2337     bdrv_set_backing_hd(bs_new, bs_top);
2338     bdrv_unref(bs_top);
2339 
2340     /* bs_new is now referenced by its new parents, we don't need the
2341      * additional reference any more. */
2342     bdrv_unref(bs_new);
2343 }
2344 
2345 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2346 {
2347     assert(!bdrv_requests_pending(old));
2348     assert(!bdrv_requests_pending(new));
2349 
2350     bdrv_ref(old);
2351 
2352     if (old->blk) {
2353         /* As long as these fields aren't in BlockBackend, but in the top-level
2354          * BlockDriverState, it's not possible for a BDS to have two BBs.
2355          *
2356          * We really want to copy the fields from old to new, but we go for a
2357          * swap instead so that pointers aren't duplicated and cause trouble.
2358          * (Also, bdrv_swap() used to do the same.) */
2359         assert(!new->blk);
2360         swap_feature_fields(old, new);
2361     }
2362     change_parent_backing_link(old, new);
2363 
2364     /* Change backing files if a previously independent node is added to the
2365      * chain. For active commit, we replace top by its own (indirect) backing
2366      * file and don't do anything here so we don't build a loop. */
2367     if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2368         bdrv_set_backing_hd(new, backing_bs(old));
2369         bdrv_set_backing_hd(old, NULL);
2370     }
2371 
2372     bdrv_unref(old);
2373 }
2374 
2375 static void bdrv_delete(BlockDriverState *bs)
2376 {
2377     assert(!bs->job);
2378     assert(bdrv_op_blocker_is_empty(bs));
2379     assert(!bs->refcnt);
2380 
2381     bdrv_close(bs);
2382 
2383     /* remove from list, if necessary */
2384     bdrv_make_anon(bs);
2385 
2386     QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
2387 
2388     g_free(bs);
2389 }
2390 
2391 /*
2392  * Run consistency checks on an image
2393  *
2394  * Returns 0 if the check could be completed (it doesn't mean that the image is
2395  * free of errors) or -errno when an internal error occurred. The results of the
2396  * check are stored in res.
2397  */
2398 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2399 {
2400     if (bs->drv == NULL) {
2401         return -ENOMEDIUM;
2402     }
2403     if (bs->drv->bdrv_check == NULL) {
2404         return -ENOTSUP;
2405     }
2406 
2407     memset(res, 0, sizeof(*res));
2408     return bs->drv->bdrv_check(bs, res, fix);
2409 }
2410 
2411 #define COMMIT_BUF_SECTORS 2048
2412 
2413 /* commit COW file into the raw image */
2414 int bdrv_commit(BlockDriverState *bs)
2415 {
2416     BlockDriver *drv = bs->drv;
2417     int64_t sector, total_sectors, length, backing_length;
2418     int n, ro, open_flags;
2419     int ret = 0;
2420     uint8_t *buf = NULL;
2421 
2422     if (!drv)
2423         return -ENOMEDIUM;
2424 
2425     if (!bs->backing) {
2426         return -ENOTSUP;
2427     }
2428 
2429     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2430         bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2431         return -EBUSY;
2432     }
2433 
2434     ro = bs->backing->bs->read_only;
2435     open_flags =  bs->backing->bs->open_flags;
2436 
2437     if (ro) {
2438         if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2439             return -EACCES;
2440         }
2441     }
2442 
2443     length = bdrv_getlength(bs);
2444     if (length < 0) {
2445         ret = length;
2446         goto ro_cleanup;
2447     }
2448 
2449     backing_length = bdrv_getlength(bs->backing->bs);
2450     if (backing_length < 0) {
2451         ret = backing_length;
2452         goto ro_cleanup;
2453     }
2454 
2455     /* If our top snapshot is larger than the backing file image,
2456      * grow the backing file image if possible.  If not possible,
2457      * we must return an error */
2458     if (length > backing_length) {
2459         ret = bdrv_truncate(bs->backing->bs, length);
2460         if (ret < 0) {
2461             goto ro_cleanup;
2462         }
2463     }
2464 
2465     total_sectors = length >> BDRV_SECTOR_BITS;
2466 
2467     /* qemu_try_blockalign() for bs will choose an alignment that works for
2468      * bs->backing->bs as well, so no need to compare the alignment manually. */
2469     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2470     if (buf == NULL) {
2471         ret = -ENOMEM;
2472         goto ro_cleanup;
2473     }
2474 
2475     for (sector = 0; sector < total_sectors; sector += n) {
2476         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2477         if (ret < 0) {
2478             goto ro_cleanup;
2479         }
2480         if (ret) {
2481             ret = bdrv_read(bs, sector, buf, n);
2482             if (ret < 0) {
2483                 goto ro_cleanup;
2484             }
2485 
2486             ret = bdrv_write(bs->backing->bs, sector, buf, n);
2487             if (ret < 0) {
2488                 goto ro_cleanup;
2489             }
2490         }
2491     }
2492 
2493     if (drv->bdrv_make_empty) {
2494         ret = drv->bdrv_make_empty(bs);
2495         if (ret < 0) {
2496             goto ro_cleanup;
2497         }
2498         bdrv_flush(bs);
2499     }
2500 
2501     /*
2502      * Make sure all data we wrote to the backing device is actually
2503      * stable on disk.
2504      */
2505     if (bs->backing) {
2506         bdrv_flush(bs->backing->bs);
2507     }
2508 
2509     ret = 0;
2510 ro_cleanup:
2511     qemu_vfree(buf);
2512 
2513     if (ro) {
2514         /* ignoring error return here */
2515         bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2516     }
2517 
2518     return ret;
2519 }
2520 
2521 /*
2522  * Return values:
2523  * 0        - success
2524  * -EINVAL  - backing format specified, but no file
2525  * -ENOSPC  - can't update the backing file because no space is left in the
2526  *            image file header
2527  * -ENOTSUP - format driver doesn't support changing the backing file
2528  */
2529 int bdrv_change_backing_file(BlockDriverState *bs,
2530     const char *backing_file, const char *backing_fmt)
2531 {
2532     BlockDriver *drv = bs->drv;
2533     int ret;
2534 
2535     /* Backing file format doesn't make sense without a backing file */
2536     if (backing_fmt && !backing_file) {
2537         return -EINVAL;
2538     }
2539 
2540     if (drv->bdrv_change_backing_file != NULL) {
2541         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2542     } else {
2543         ret = -ENOTSUP;
2544     }
2545 
2546     if (ret == 0) {
2547         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2548         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2549     }
2550     return ret;
2551 }
2552 
2553 /*
2554  * Finds the image layer in the chain that has 'bs' as its backing file.
2555  *
2556  * active is the current topmost image.
2557  *
2558  * Returns NULL if bs is not found in active's image chain,
2559  * or if active == bs.
2560  *
2561  * Returns the bottommost base image if bs == NULL.
2562  */
2563 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2564                                     BlockDriverState *bs)
2565 {
2566     while (active && bs != backing_bs(active)) {
2567         active = backing_bs(active);
2568     }
2569 
2570     return active;
2571 }
2572 
2573 /* Given a BDS, searches for the base layer. */
2574 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2575 {
2576     return bdrv_find_overlay(bs, NULL);
2577 }
2578 
2579 /*
2580  * Drops images above 'base' up to and including 'top', and sets the image
2581  * above 'top' to have base as its backing file.
2582  *
2583  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2584  * information in 'bs' can be properly updated.
2585  *
2586  * E.g., this will convert the following chain:
2587  * bottom <- base <- intermediate <- top <- active
2588  *
2589  * to
2590  *
2591  * bottom <- base <- active
2592  *
2593  * It is allowed for bottom==base, in which case it converts:
2594  *
2595  * base <- intermediate <- top <- active
2596  *
2597  * to
2598  *
2599  * base <- active
2600  *
2601  * If backing_file_str is non-NULL, it will be used when modifying top's
2602  * overlay image metadata.
2603  *
2604  * Error conditions:
2605  *  if active == top, that is considered an error
2606  *
2607  */
2608 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2609                            BlockDriverState *base, const char *backing_file_str)
2610 {
2611     BlockDriverState *new_top_bs = NULL;
2612     int ret = -EIO;
2613 
2614     if (!top->drv || !base->drv) {
2615         goto exit;
2616     }
2617 
2618     new_top_bs = bdrv_find_overlay(active, top);
2619 
2620     if (new_top_bs == NULL) {
2621         /* we could not find the image above 'top', this is an error */
2622         goto exit;
2623     }
2624 
2625     /* special case of new_top_bs->backing->bs already pointing to base - nothing
2626      * to do, no intermediate images */
2627     if (backing_bs(new_top_bs) == base) {
2628         ret = 0;
2629         goto exit;
2630     }
2631 
2632     /* Make sure that base is in the backing chain of top */
2633     if (!bdrv_chain_contains(top, base)) {
2634         goto exit;
2635     }
2636 
2637     /* success - we can delete the intermediate states, and link top->base */
2638     backing_file_str = backing_file_str ? backing_file_str : base->filename;
2639     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2640                                    base->drv ? base->drv->format_name : "");
2641     if (ret) {
2642         goto exit;
2643     }
2644     bdrv_set_backing_hd(new_top_bs, base);
2645 
2646     ret = 0;
2647 exit:
2648     return ret;
2649 }
2650 
2651 /**
2652  * Truncate file to 'offset' bytes (needed only for file protocols)
2653  */
2654 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2655 {
2656     BlockDriver *drv = bs->drv;
2657     int ret;
2658     if (!drv)
2659         return -ENOMEDIUM;
2660     if (!drv->bdrv_truncate)
2661         return -ENOTSUP;
2662     if (bs->read_only)
2663         return -EACCES;
2664 
2665     ret = drv->bdrv_truncate(bs, offset);
2666     if (ret == 0) {
2667         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2668         bdrv_dirty_bitmap_truncate(bs);
2669         if (bs->blk) {
2670             blk_dev_resize_cb(bs->blk);
2671         }
2672     }
2673     return ret;
2674 }
2675 
2676 /**
2677  * Length of a allocated file in bytes. Sparse files are counted by actual
2678  * allocated space. Return < 0 if error or unknown.
2679  */
2680 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2681 {
2682     BlockDriver *drv = bs->drv;
2683     if (!drv) {
2684         return -ENOMEDIUM;
2685     }
2686     if (drv->bdrv_get_allocated_file_size) {
2687         return drv->bdrv_get_allocated_file_size(bs);
2688     }
2689     if (bs->file) {
2690         return bdrv_get_allocated_file_size(bs->file->bs);
2691     }
2692     return -ENOTSUP;
2693 }
2694 
2695 /**
2696  * Return number of sectors on success, -errno on error.
2697  */
2698 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2699 {
2700     BlockDriver *drv = bs->drv;
2701 
2702     if (!drv)
2703         return -ENOMEDIUM;
2704 
2705     if (drv->has_variable_length) {
2706         int ret = refresh_total_sectors(bs, bs->total_sectors);
2707         if (ret < 0) {
2708             return ret;
2709         }
2710     }
2711     return bs->total_sectors;
2712 }
2713 
2714 /**
2715  * Return length in bytes on success, -errno on error.
2716  * The length is always a multiple of BDRV_SECTOR_SIZE.
2717  */
2718 int64_t bdrv_getlength(BlockDriverState *bs)
2719 {
2720     int64_t ret = bdrv_nb_sectors(bs);
2721 
2722     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2723     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2724 }
2725 
2726 /* return 0 as number of sectors if no device present or error */
2727 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2728 {
2729     int64_t nb_sectors = bdrv_nb_sectors(bs);
2730 
2731     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2732 }
2733 
2734 int bdrv_is_read_only(BlockDriverState *bs)
2735 {
2736     return bs->read_only;
2737 }
2738 
2739 int bdrv_is_sg(BlockDriverState *bs)
2740 {
2741     return bs->sg;
2742 }
2743 
2744 int bdrv_enable_write_cache(BlockDriverState *bs)
2745 {
2746     return bs->enable_write_cache;
2747 }
2748 
2749 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2750 {
2751     bs->enable_write_cache = wce;
2752 
2753     /* so a reopen() will preserve wce */
2754     if (wce) {
2755         bs->open_flags |= BDRV_O_CACHE_WB;
2756     } else {
2757         bs->open_flags &= ~BDRV_O_CACHE_WB;
2758     }
2759 }
2760 
2761 int bdrv_is_encrypted(BlockDriverState *bs)
2762 {
2763     if (bs->backing && bs->backing->bs->encrypted) {
2764         return 1;
2765     }
2766     return bs->encrypted;
2767 }
2768 
2769 int bdrv_key_required(BlockDriverState *bs)
2770 {
2771     BdrvChild *backing = bs->backing;
2772 
2773     if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2774         return 1;
2775     }
2776     return (bs->encrypted && !bs->valid_key);
2777 }
2778 
2779 int bdrv_set_key(BlockDriverState *bs, const char *key)
2780 {
2781     int ret;
2782     if (bs->backing && bs->backing->bs->encrypted) {
2783         ret = bdrv_set_key(bs->backing->bs, key);
2784         if (ret < 0)
2785             return ret;
2786         if (!bs->encrypted)
2787             return 0;
2788     }
2789     if (!bs->encrypted) {
2790         return -EINVAL;
2791     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2792         return -ENOMEDIUM;
2793     }
2794     ret = bs->drv->bdrv_set_key(bs, key);
2795     if (ret < 0) {
2796         bs->valid_key = 0;
2797     } else if (!bs->valid_key) {
2798         bs->valid_key = 1;
2799         if (bs->blk) {
2800             /* call the change callback now, we skipped it on open */
2801             blk_dev_change_media_cb(bs->blk, true);
2802         }
2803     }
2804     return ret;
2805 }
2806 
2807 /*
2808  * Provide an encryption key for @bs.
2809  * If @key is non-null:
2810  *     If @bs is not encrypted, fail.
2811  *     Else if the key is invalid, fail.
2812  *     Else set @bs's key to @key, replacing the existing key, if any.
2813  * If @key is null:
2814  *     If @bs is encrypted and still lacks a key, fail.
2815  *     Else do nothing.
2816  * On failure, store an error object through @errp if non-null.
2817  */
2818 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2819 {
2820     if (key) {
2821         if (!bdrv_is_encrypted(bs)) {
2822             error_setg(errp, "Node '%s' is not encrypted",
2823                       bdrv_get_device_or_node_name(bs));
2824         } else if (bdrv_set_key(bs, key) < 0) {
2825             error_setg(errp, QERR_INVALID_PASSWORD);
2826         }
2827     } else {
2828         if (bdrv_key_required(bs)) {
2829             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2830                       "'%s' (%s) is encrypted",
2831                       bdrv_get_device_or_node_name(bs),
2832                       bdrv_get_encrypted_filename(bs));
2833         }
2834     }
2835 }
2836 
2837 const char *bdrv_get_format_name(BlockDriverState *bs)
2838 {
2839     return bs->drv ? bs->drv->format_name : NULL;
2840 }
2841 
2842 static int qsort_strcmp(const void *a, const void *b)
2843 {
2844     return strcmp(a, b);
2845 }
2846 
2847 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2848                          void *opaque)
2849 {
2850     BlockDriver *drv;
2851     int count = 0;
2852     int i;
2853     const char **formats = NULL;
2854 
2855     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2856         if (drv->format_name) {
2857             bool found = false;
2858             int i = count;
2859             while (formats && i && !found) {
2860                 found = !strcmp(formats[--i], drv->format_name);
2861             }
2862 
2863             if (!found) {
2864                 formats = g_renew(const char *, formats, count + 1);
2865                 formats[count++] = drv->format_name;
2866             }
2867         }
2868     }
2869 
2870     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2871 
2872     for (i = 0; i < count; i++) {
2873         it(opaque, formats[i]);
2874     }
2875 
2876     g_free(formats);
2877 }
2878 
2879 /* This function is to find a node in the bs graph */
2880 BlockDriverState *bdrv_find_node(const char *node_name)
2881 {
2882     BlockDriverState *bs;
2883 
2884     assert(node_name);
2885 
2886     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2887         if (!strcmp(node_name, bs->node_name)) {
2888             return bs;
2889         }
2890     }
2891     return NULL;
2892 }
2893 
2894 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2895 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2896 {
2897     BlockDeviceInfoList *list, *entry;
2898     BlockDriverState *bs;
2899 
2900     list = NULL;
2901     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2902         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2903         if (!info) {
2904             qapi_free_BlockDeviceInfoList(list);
2905             return NULL;
2906         }
2907         entry = g_malloc0(sizeof(*entry));
2908         entry->value = info;
2909         entry->next = list;
2910         list = entry;
2911     }
2912 
2913     return list;
2914 }
2915 
2916 BlockDriverState *bdrv_lookup_bs(const char *device,
2917                                  const char *node_name,
2918                                  Error **errp)
2919 {
2920     BlockBackend *blk;
2921     BlockDriverState *bs;
2922 
2923     if (device) {
2924         blk = blk_by_name(device);
2925 
2926         if (blk) {
2927             bs = blk_bs(blk);
2928             if (!bs) {
2929                 error_setg(errp, "Device '%s' has no medium", device);
2930             }
2931 
2932             return bs;
2933         }
2934     }
2935 
2936     if (node_name) {
2937         bs = bdrv_find_node(node_name);
2938 
2939         if (bs) {
2940             return bs;
2941         }
2942     }
2943 
2944     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2945                      device ? device : "",
2946                      node_name ? node_name : "");
2947     return NULL;
2948 }
2949 
2950 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2951  * return false.  If either argument is NULL, return false. */
2952 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2953 {
2954     while (top && top != base) {
2955         top = backing_bs(top);
2956     }
2957 
2958     return top != NULL;
2959 }
2960 
2961 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2962 {
2963     if (!bs) {
2964         return QTAILQ_FIRST(&graph_bdrv_states);
2965     }
2966     return QTAILQ_NEXT(bs, node_list);
2967 }
2968 
2969 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
2970  * the monitor or attached to a BlockBackend */
2971 BlockDriverState *bdrv_next(BlockDriverState *bs)
2972 {
2973     if (!bs || bs->blk) {
2974         bs = blk_next_root_bs(bs);
2975         if (bs) {
2976             return bs;
2977         }
2978     }
2979 
2980     /* Ignore all BDSs that are attached to a BlockBackend here; they have been
2981      * handled by the above block already */
2982     do {
2983         bs = bdrv_next_monitor_owned(bs);
2984     } while (bs && bs->blk);
2985     return bs;
2986 }
2987 
2988 const char *bdrv_get_node_name(const BlockDriverState *bs)
2989 {
2990     return bs->node_name;
2991 }
2992 
2993 /* TODO check what callers really want: bs->node_name or blk_name() */
2994 const char *bdrv_get_device_name(const BlockDriverState *bs)
2995 {
2996     return bs->blk ? blk_name(bs->blk) : "";
2997 }
2998 
2999 /* This can be used to identify nodes that might not have a device
3000  * name associated. Since node and device names live in the same
3001  * namespace, the result is unambiguous. The exception is if both are
3002  * absent, then this returns an empty (non-null) string. */
3003 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
3004 {
3005     return bs->blk ? blk_name(bs->blk) : bs->node_name;
3006 }
3007 
3008 int bdrv_get_flags(BlockDriverState *bs)
3009 {
3010     return bs->open_flags;
3011 }
3012 
3013 int bdrv_has_zero_init_1(BlockDriverState *bs)
3014 {
3015     return 1;
3016 }
3017 
3018 int bdrv_has_zero_init(BlockDriverState *bs)
3019 {
3020     assert(bs->drv);
3021 
3022     /* If BS is a copy on write image, it is initialized to
3023        the contents of the base image, which may not be zeroes.  */
3024     if (bs->backing) {
3025         return 0;
3026     }
3027     if (bs->drv->bdrv_has_zero_init) {
3028         return bs->drv->bdrv_has_zero_init(bs);
3029     }
3030 
3031     /* safe default */
3032     return 0;
3033 }
3034 
3035 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3036 {
3037     BlockDriverInfo bdi;
3038 
3039     if (bs->backing) {
3040         return false;
3041     }
3042 
3043     if (bdrv_get_info(bs, &bdi) == 0) {
3044         return bdi.unallocated_blocks_are_zero;
3045     }
3046 
3047     return false;
3048 }
3049 
3050 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3051 {
3052     BlockDriverInfo bdi;
3053 
3054     if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
3055         return false;
3056     }
3057 
3058     if (bdrv_get_info(bs, &bdi) == 0) {
3059         return bdi.can_write_zeroes_with_unmap;
3060     }
3061 
3062     return false;
3063 }
3064 
3065 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3066 {
3067     if (bs->backing && bs->backing->bs->encrypted)
3068         return bs->backing_file;
3069     else if (bs->encrypted)
3070         return bs->filename;
3071     else
3072         return NULL;
3073 }
3074 
3075 void bdrv_get_backing_filename(BlockDriverState *bs,
3076                                char *filename, int filename_size)
3077 {
3078     pstrcpy(filename, filename_size, bs->backing_file);
3079 }
3080 
3081 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3082 {
3083     BlockDriver *drv = bs->drv;
3084     if (!drv)
3085         return -ENOMEDIUM;
3086     if (!drv->bdrv_get_info)
3087         return -ENOTSUP;
3088     memset(bdi, 0, sizeof(*bdi));
3089     return drv->bdrv_get_info(bs, bdi);
3090 }
3091 
3092 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3093 {
3094     BlockDriver *drv = bs->drv;
3095     if (drv && drv->bdrv_get_specific_info) {
3096         return drv->bdrv_get_specific_info(bs);
3097     }
3098     return NULL;
3099 }
3100 
3101 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
3102 {
3103     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3104         return;
3105     }
3106 
3107     bs->drv->bdrv_debug_event(bs, event);
3108 }
3109 
3110 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3111                           const char *tag)
3112 {
3113     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3114         bs = bs->file ? bs->file->bs : NULL;
3115     }
3116 
3117     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3118         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3119     }
3120 
3121     return -ENOTSUP;
3122 }
3123 
3124 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3125 {
3126     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3127         bs = bs->file ? bs->file->bs : NULL;
3128     }
3129 
3130     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3131         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3132     }
3133 
3134     return -ENOTSUP;
3135 }
3136 
3137 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3138 {
3139     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3140         bs = bs->file ? bs->file->bs : NULL;
3141     }
3142 
3143     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3144         return bs->drv->bdrv_debug_resume(bs, tag);
3145     }
3146 
3147     return -ENOTSUP;
3148 }
3149 
3150 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3151 {
3152     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3153         bs = bs->file ? bs->file->bs : NULL;
3154     }
3155 
3156     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3157         return bs->drv->bdrv_debug_is_suspended(bs, tag);
3158     }
3159 
3160     return false;
3161 }
3162 
3163 int bdrv_is_snapshot(BlockDriverState *bs)
3164 {
3165     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3166 }
3167 
3168 /* backing_file can either be relative, or absolute, or a protocol.  If it is
3169  * relative, it must be relative to the chain.  So, passing in bs->filename
3170  * from a BDS as backing_file should not be done, as that may be relative to
3171  * the CWD rather than the chain. */
3172 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3173         const char *backing_file)
3174 {
3175     char *filename_full = NULL;
3176     char *backing_file_full = NULL;
3177     char *filename_tmp = NULL;
3178     int is_protocol = 0;
3179     BlockDriverState *curr_bs = NULL;
3180     BlockDriverState *retval = NULL;
3181 
3182     if (!bs || !bs->drv || !backing_file) {
3183         return NULL;
3184     }
3185 
3186     filename_full     = g_malloc(PATH_MAX);
3187     backing_file_full = g_malloc(PATH_MAX);
3188     filename_tmp      = g_malloc(PATH_MAX);
3189 
3190     is_protocol = path_has_protocol(backing_file);
3191 
3192     for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3193 
3194         /* If either of the filename paths is actually a protocol, then
3195          * compare unmodified paths; otherwise make paths relative */
3196         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3197             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3198                 retval = curr_bs->backing->bs;
3199                 break;
3200             }
3201         } else {
3202             /* If not an absolute filename path, make it relative to the current
3203              * image's filename path */
3204             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3205                          backing_file);
3206 
3207             /* We are going to compare absolute pathnames */
3208             if (!realpath(filename_tmp, filename_full)) {
3209                 continue;
3210             }
3211 
3212             /* We need to make sure the backing filename we are comparing against
3213              * is relative to the current image filename (or absolute) */
3214             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3215                          curr_bs->backing_file);
3216 
3217             if (!realpath(filename_tmp, backing_file_full)) {
3218                 continue;
3219             }
3220 
3221             if (strcmp(backing_file_full, filename_full) == 0) {
3222                 retval = curr_bs->backing->bs;
3223                 break;
3224             }
3225         }
3226     }
3227 
3228     g_free(filename_full);
3229     g_free(backing_file_full);
3230     g_free(filename_tmp);
3231     return retval;
3232 }
3233 
3234 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3235 {
3236     if (!bs->drv) {
3237         return 0;
3238     }
3239 
3240     if (!bs->backing) {
3241         return 0;
3242     }
3243 
3244     return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3245 }
3246 
3247 void bdrv_init(void)
3248 {
3249     module_call_init(MODULE_INIT_BLOCK);
3250 }
3251 
3252 void bdrv_init_with_whitelist(void)
3253 {
3254     use_bdrv_whitelist = 1;
3255     bdrv_init();
3256 }
3257 
3258 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3259 {
3260     Error *local_err = NULL;
3261     int ret;
3262 
3263     if (!bs->drv)  {
3264         return;
3265     }
3266 
3267     if (!(bs->open_flags & BDRV_O_INACTIVE)) {
3268         return;
3269     }
3270     bs->open_flags &= ~BDRV_O_INACTIVE;
3271 
3272     if (bs->drv->bdrv_invalidate_cache) {
3273         bs->drv->bdrv_invalidate_cache(bs, &local_err);
3274     } else if (bs->file) {
3275         bdrv_invalidate_cache(bs->file->bs, &local_err);
3276     }
3277     if (local_err) {
3278         bs->open_flags |= BDRV_O_INACTIVE;
3279         error_propagate(errp, local_err);
3280         return;
3281     }
3282 
3283     ret = refresh_total_sectors(bs, bs->total_sectors);
3284     if (ret < 0) {
3285         bs->open_flags |= BDRV_O_INACTIVE;
3286         error_setg_errno(errp, -ret, "Could not refresh total sector count");
3287         return;
3288     }
3289 }
3290 
3291 void bdrv_invalidate_cache_all(Error **errp)
3292 {
3293     BlockDriverState *bs = NULL;
3294     Error *local_err = NULL;
3295 
3296     while ((bs = bdrv_next(bs)) != NULL) {
3297         AioContext *aio_context = bdrv_get_aio_context(bs);
3298 
3299         aio_context_acquire(aio_context);
3300         bdrv_invalidate_cache(bs, &local_err);
3301         aio_context_release(aio_context);
3302         if (local_err) {
3303             error_propagate(errp, local_err);
3304             return;
3305         }
3306     }
3307 }
3308 
3309 static int bdrv_inactivate(BlockDriverState *bs)
3310 {
3311     int ret;
3312 
3313     if (bs->drv->bdrv_inactivate) {
3314         ret = bs->drv->bdrv_inactivate(bs);
3315         if (ret < 0) {
3316             return ret;
3317         }
3318     }
3319 
3320     bs->open_flags |= BDRV_O_INACTIVE;
3321     return 0;
3322 }
3323 
3324 int bdrv_inactivate_all(void)
3325 {
3326     BlockDriverState *bs = NULL;
3327     int ret;
3328 
3329     while ((bs = bdrv_next(bs)) != NULL) {
3330         AioContext *aio_context = bdrv_get_aio_context(bs);
3331 
3332         aio_context_acquire(aio_context);
3333         ret = bdrv_inactivate(bs);
3334         aio_context_release(aio_context);
3335         if (ret < 0) {
3336             return ret;
3337         }
3338     }
3339 
3340     return 0;
3341 }
3342 
3343 /**************************************************************/
3344 /* removable device support */
3345 
3346 /**
3347  * Return TRUE if the media is present
3348  */
3349 bool bdrv_is_inserted(BlockDriverState *bs)
3350 {
3351     BlockDriver *drv = bs->drv;
3352     BdrvChild *child;
3353 
3354     if (!drv) {
3355         return false;
3356     }
3357     if (drv->bdrv_is_inserted) {
3358         return drv->bdrv_is_inserted(bs);
3359     }
3360     QLIST_FOREACH(child, &bs->children, next) {
3361         if (!bdrv_is_inserted(child->bs)) {
3362             return false;
3363         }
3364     }
3365     return true;
3366 }
3367 
3368 /**
3369  * Return whether the media changed since the last call to this
3370  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3371  */
3372 int bdrv_media_changed(BlockDriverState *bs)
3373 {
3374     BlockDriver *drv = bs->drv;
3375 
3376     if (drv && drv->bdrv_media_changed) {
3377         return drv->bdrv_media_changed(bs);
3378     }
3379     return -ENOTSUP;
3380 }
3381 
3382 /**
3383  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3384  */
3385 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3386 {
3387     BlockDriver *drv = bs->drv;
3388     const char *device_name;
3389 
3390     if (drv && drv->bdrv_eject) {
3391         drv->bdrv_eject(bs, eject_flag);
3392     }
3393 
3394     device_name = bdrv_get_device_name(bs);
3395     if (device_name[0] != '\0') {
3396         qapi_event_send_device_tray_moved(device_name,
3397                                           eject_flag, &error_abort);
3398     }
3399 }
3400 
3401 /**
3402  * Lock or unlock the media (if it is locked, the user won't be able
3403  * to eject it manually).
3404  */
3405 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3406 {
3407     BlockDriver *drv = bs->drv;
3408 
3409     trace_bdrv_lock_medium(bs, locked);
3410 
3411     if (drv && drv->bdrv_lock_medium) {
3412         drv->bdrv_lock_medium(bs, locked);
3413     }
3414 }
3415 
3416 /* Get a reference to bs */
3417 void bdrv_ref(BlockDriverState *bs)
3418 {
3419     bs->refcnt++;
3420 }
3421 
3422 /* Release a previously grabbed reference to bs.
3423  * If after releasing, reference count is zero, the BlockDriverState is
3424  * deleted. */
3425 void bdrv_unref(BlockDriverState *bs)
3426 {
3427     if (!bs) {
3428         return;
3429     }
3430     assert(bs->refcnt > 0);
3431     if (--bs->refcnt == 0) {
3432         bdrv_delete(bs);
3433     }
3434 }
3435 
3436 struct BdrvOpBlocker {
3437     Error *reason;
3438     QLIST_ENTRY(BdrvOpBlocker) list;
3439 };
3440 
3441 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3442 {
3443     BdrvOpBlocker *blocker;
3444     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3445     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3446         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3447         if (errp) {
3448             *errp = error_copy(blocker->reason);
3449             error_prepend(errp, "Node '%s' is busy: ",
3450                           bdrv_get_device_or_node_name(bs));
3451         }
3452         return true;
3453     }
3454     return false;
3455 }
3456 
3457 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3458 {
3459     BdrvOpBlocker *blocker;
3460     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3461 
3462     blocker = g_new0(BdrvOpBlocker, 1);
3463     blocker->reason = reason;
3464     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3465 }
3466 
3467 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3468 {
3469     BdrvOpBlocker *blocker, *next;
3470     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3471     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3472         if (blocker->reason == reason) {
3473             QLIST_REMOVE(blocker, list);
3474             g_free(blocker);
3475         }
3476     }
3477 }
3478 
3479 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3480 {
3481     int i;
3482     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3483         bdrv_op_block(bs, i, reason);
3484     }
3485 }
3486 
3487 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3488 {
3489     int i;
3490     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3491         bdrv_op_unblock(bs, i, reason);
3492     }
3493 }
3494 
3495 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3496 {
3497     int i;
3498 
3499     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3500         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3501             return false;
3502         }
3503     }
3504     return true;
3505 }
3506 
3507 void bdrv_img_create(const char *filename, const char *fmt,
3508                      const char *base_filename, const char *base_fmt,
3509                      char *options, uint64_t img_size, int flags,
3510                      Error **errp, bool quiet)
3511 {
3512     QemuOptsList *create_opts = NULL;
3513     QemuOpts *opts = NULL;
3514     const char *backing_fmt, *backing_file;
3515     int64_t size;
3516     BlockDriver *drv, *proto_drv;
3517     Error *local_err = NULL;
3518     int ret = 0;
3519 
3520     /* Find driver and parse its options */
3521     drv = bdrv_find_format(fmt);
3522     if (!drv) {
3523         error_setg(errp, "Unknown file format '%s'", fmt);
3524         return;
3525     }
3526 
3527     proto_drv = bdrv_find_protocol(filename, true, errp);
3528     if (!proto_drv) {
3529         return;
3530     }
3531 
3532     if (!drv->create_opts) {
3533         error_setg(errp, "Format driver '%s' does not support image creation",
3534                    drv->format_name);
3535         return;
3536     }
3537 
3538     if (!proto_drv->create_opts) {
3539         error_setg(errp, "Protocol driver '%s' does not support image creation",
3540                    proto_drv->format_name);
3541         return;
3542     }
3543 
3544     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3545     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3546 
3547     /* Create parameter list with default values */
3548     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3549     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3550 
3551     /* Parse -o options */
3552     if (options) {
3553         qemu_opts_do_parse(opts, options, NULL, &local_err);
3554         if (local_err) {
3555             error_report_err(local_err);
3556             local_err = NULL;
3557             error_setg(errp, "Invalid options for file format '%s'", fmt);
3558             goto out;
3559         }
3560     }
3561 
3562     if (base_filename) {
3563         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3564         if (local_err) {
3565             error_setg(errp, "Backing file not supported for file format '%s'",
3566                        fmt);
3567             goto out;
3568         }
3569     }
3570 
3571     if (base_fmt) {
3572         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3573         if (local_err) {
3574             error_setg(errp, "Backing file format not supported for file "
3575                              "format '%s'", fmt);
3576             goto out;
3577         }
3578     }
3579 
3580     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3581     if (backing_file) {
3582         if (!strcmp(filename, backing_file)) {
3583             error_setg(errp, "Error: Trying to create an image with the "
3584                              "same filename as the backing file");
3585             goto out;
3586         }
3587     }
3588 
3589     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3590 
3591     // The size for the image must always be specified, with one exception:
3592     // If we are using a backing file, we can obtain the size from there
3593     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3594     if (size == -1) {
3595         if (backing_file) {
3596             BlockDriverState *bs;
3597             char *full_backing = g_new0(char, PATH_MAX);
3598             int64_t size;
3599             int back_flags;
3600             QDict *backing_options = NULL;
3601 
3602             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3603                                                          full_backing, PATH_MAX,
3604                                                          &local_err);
3605             if (local_err) {
3606                 g_free(full_backing);
3607                 goto out;
3608             }
3609 
3610             /* backing files always opened read-only */
3611             back_flags =
3612                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3613 
3614             if (backing_fmt) {
3615                 backing_options = qdict_new();
3616                 qdict_put(backing_options, "driver",
3617                           qstring_from_str(backing_fmt));
3618             }
3619 
3620             bs = NULL;
3621             ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3622                             back_flags, &local_err);
3623             g_free(full_backing);
3624             if (ret < 0) {
3625                 goto out;
3626             }
3627             size = bdrv_getlength(bs);
3628             if (size < 0) {
3629                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3630                                  backing_file);
3631                 bdrv_unref(bs);
3632                 goto out;
3633             }
3634 
3635             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3636 
3637             bdrv_unref(bs);
3638         } else {
3639             error_setg(errp, "Image creation needs a size parameter");
3640             goto out;
3641         }
3642     }
3643 
3644     if (!quiet) {
3645         printf("Formatting '%s', fmt=%s ", filename, fmt);
3646         qemu_opts_print(opts, " ");
3647         puts("");
3648     }
3649 
3650     ret = bdrv_create(drv, filename, opts, &local_err);
3651 
3652     if (ret == -EFBIG) {
3653         /* This is generally a better message than whatever the driver would
3654          * deliver (especially because of the cluster_size_hint), since that
3655          * is most probably not much different from "image too large". */
3656         const char *cluster_size_hint = "";
3657         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3658             cluster_size_hint = " (try using a larger cluster size)";
3659         }
3660         error_setg(errp, "The image size is too large for file format '%s'"
3661                    "%s", fmt, cluster_size_hint);
3662         error_free(local_err);
3663         local_err = NULL;
3664     }
3665 
3666 out:
3667     qemu_opts_del(opts);
3668     qemu_opts_free(create_opts);
3669     if (local_err) {
3670         error_propagate(errp, local_err);
3671     }
3672 }
3673 
3674 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3675 {
3676     return bs->aio_context;
3677 }
3678 
3679 void bdrv_detach_aio_context(BlockDriverState *bs)
3680 {
3681     BdrvAioNotifier *baf;
3682 
3683     if (!bs->drv) {
3684         return;
3685     }
3686 
3687     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3688         baf->detach_aio_context(baf->opaque);
3689     }
3690 
3691     if (bs->throttle_state) {
3692         throttle_timers_detach_aio_context(&bs->throttle_timers);
3693     }
3694     if (bs->drv->bdrv_detach_aio_context) {
3695         bs->drv->bdrv_detach_aio_context(bs);
3696     }
3697     if (bs->file) {
3698         bdrv_detach_aio_context(bs->file->bs);
3699     }
3700     if (bs->backing) {
3701         bdrv_detach_aio_context(bs->backing->bs);
3702     }
3703 
3704     bs->aio_context = NULL;
3705 }
3706 
3707 void bdrv_attach_aio_context(BlockDriverState *bs,
3708                              AioContext *new_context)
3709 {
3710     BdrvAioNotifier *ban;
3711 
3712     if (!bs->drv) {
3713         return;
3714     }
3715 
3716     bs->aio_context = new_context;
3717 
3718     if (bs->backing) {
3719         bdrv_attach_aio_context(bs->backing->bs, new_context);
3720     }
3721     if (bs->file) {
3722         bdrv_attach_aio_context(bs->file->bs, new_context);
3723     }
3724     if (bs->drv->bdrv_attach_aio_context) {
3725         bs->drv->bdrv_attach_aio_context(bs, new_context);
3726     }
3727     if (bs->throttle_state) {
3728         throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3729     }
3730 
3731     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3732         ban->attached_aio_context(new_context, ban->opaque);
3733     }
3734 }
3735 
3736 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3737 {
3738     bdrv_drain(bs); /* ensure there are no in-flight requests */
3739 
3740     bdrv_detach_aio_context(bs);
3741 
3742     /* This function executes in the old AioContext so acquire the new one in
3743      * case it runs in a different thread.
3744      */
3745     aio_context_acquire(new_context);
3746     bdrv_attach_aio_context(bs, new_context);
3747     aio_context_release(new_context);
3748 }
3749 
3750 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3751         void (*attached_aio_context)(AioContext *new_context, void *opaque),
3752         void (*detach_aio_context)(void *opaque), void *opaque)
3753 {
3754     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3755     *ban = (BdrvAioNotifier){
3756         .attached_aio_context = attached_aio_context,
3757         .detach_aio_context   = detach_aio_context,
3758         .opaque               = opaque
3759     };
3760 
3761     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3762 }
3763 
3764 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3765                                       void (*attached_aio_context)(AioContext *,
3766                                                                    void *),
3767                                       void (*detach_aio_context)(void *),
3768                                       void *opaque)
3769 {
3770     BdrvAioNotifier *ban, *ban_next;
3771 
3772     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3773         if (ban->attached_aio_context == attached_aio_context &&
3774             ban->detach_aio_context   == detach_aio_context   &&
3775             ban->opaque               == opaque)
3776         {
3777             QLIST_REMOVE(ban, list);
3778             g_free(ban);
3779 
3780             return;
3781         }
3782     }
3783 
3784     abort();
3785 }
3786 
3787 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3788                        BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
3789 {
3790     if (!bs->drv->bdrv_amend_options) {
3791         return -ENOTSUP;
3792     }
3793     return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
3794 }
3795 
3796 /* This function will be called by the bdrv_recurse_is_first_non_filter method
3797  * of block filter and by bdrv_is_first_non_filter.
3798  * It is used to test if the given bs is the candidate or recurse more in the
3799  * node graph.
3800  */
3801 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3802                                       BlockDriverState *candidate)
3803 {
3804     /* return false if basic checks fails */
3805     if (!bs || !bs->drv) {
3806         return false;
3807     }
3808 
3809     /* the code reached a non block filter driver -> check if the bs is
3810      * the same as the candidate. It's the recursion termination condition.
3811      */
3812     if (!bs->drv->is_filter) {
3813         return bs == candidate;
3814     }
3815     /* Down this path the driver is a block filter driver */
3816 
3817     /* If the block filter recursion method is defined use it to recurse down
3818      * the node graph.
3819      */
3820     if (bs->drv->bdrv_recurse_is_first_non_filter) {
3821         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3822     }
3823 
3824     /* the driver is a block filter but don't allow to recurse -> return false
3825      */
3826     return false;
3827 }
3828 
3829 /* This function checks if the candidate is the first non filter bs down it's
3830  * bs chain. Since we don't have pointers to parents it explore all bs chains
3831  * from the top. Some filters can choose not to pass down the recursion.
3832  */
3833 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3834 {
3835     BlockDriverState *bs = NULL;
3836 
3837     /* walk down the bs forest recursively */
3838     while ((bs = bdrv_next(bs)) != NULL) {
3839         bool perm;
3840 
3841         /* try to recurse in this top level bs */
3842         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3843 
3844         /* candidate is the first non filter */
3845         if (perm) {
3846             return true;
3847         }
3848     }
3849 
3850     return false;
3851 }
3852 
3853 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3854                                         const char *node_name, Error **errp)
3855 {
3856     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3857     AioContext *aio_context;
3858 
3859     if (!to_replace_bs) {
3860         error_setg(errp, "Node name '%s' not found", node_name);
3861         return NULL;
3862     }
3863 
3864     aio_context = bdrv_get_aio_context(to_replace_bs);
3865     aio_context_acquire(aio_context);
3866 
3867     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3868         to_replace_bs = NULL;
3869         goto out;
3870     }
3871 
3872     /* We don't want arbitrary node of the BDS chain to be replaced only the top
3873      * most non filter in order to prevent data corruption.
3874      * Another benefit is that this tests exclude backing files which are
3875      * blocked by the backing blockers.
3876      */
3877     if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
3878         error_setg(errp, "Only top most non filter can be replaced");
3879         to_replace_bs = NULL;
3880         goto out;
3881     }
3882 
3883 out:
3884     aio_context_release(aio_context);
3885     return to_replace_bs;
3886 }
3887 
3888 static bool append_open_options(QDict *d, BlockDriverState *bs)
3889 {
3890     const QDictEntry *entry;
3891     QemuOptDesc *desc;
3892     BdrvChild *child;
3893     bool found_any = false;
3894     const char *p;
3895 
3896     for (entry = qdict_first(bs->options); entry;
3897          entry = qdict_next(bs->options, entry))
3898     {
3899         /* Exclude options for children */
3900         QLIST_FOREACH(child, &bs->children, next) {
3901             if (strstart(qdict_entry_key(entry), child->name, &p)
3902                 && (!*p || *p == '.'))
3903             {
3904                 break;
3905             }
3906         }
3907         if (child) {
3908             continue;
3909         }
3910 
3911         /* And exclude all non-driver-specific options */
3912         for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
3913             if (!strcmp(qdict_entry_key(entry), desc->name)) {
3914                 break;
3915             }
3916         }
3917         if (desc->name) {
3918             continue;
3919         }
3920 
3921         qobject_incref(qdict_entry_value(entry));
3922         qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3923         found_any = true;
3924     }
3925 
3926     return found_any;
3927 }
3928 
3929 /* Updates the following BDS fields:
3930  *  - exact_filename: A filename which may be used for opening a block device
3931  *                    which (mostly) equals the given BDS (even without any
3932  *                    other options; so reading and writing must return the same
3933  *                    results, but caching etc. may be different)
3934  *  - full_open_options: Options which, when given when opening a block device
3935  *                       (without a filename), result in a BDS (mostly)
3936  *                       equalling the given one
3937  *  - filename: If exact_filename is set, it is copied here. Otherwise,
3938  *              full_open_options is converted to a JSON object, prefixed with
3939  *              "json:" (for use through the JSON pseudo protocol) and put here.
3940  */
3941 void bdrv_refresh_filename(BlockDriverState *bs)
3942 {
3943     BlockDriver *drv = bs->drv;
3944     QDict *opts;
3945 
3946     if (!drv) {
3947         return;
3948     }
3949 
3950     /* This BDS's file name will most probably depend on its file's name, so
3951      * refresh that first */
3952     if (bs->file) {
3953         bdrv_refresh_filename(bs->file->bs);
3954     }
3955 
3956     if (drv->bdrv_refresh_filename) {
3957         /* Obsolete information is of no use here, so drop the old file name
3958          * information before refreshing it */
3959         bs->exact_filename[0] = '\0';
3960         if (bs->full_open_options) {
3961             QDECREF(bs->full_open_options);
3962             bs->full_open_options = NULL;
3963         }
3964 
3965         opts = qdict_new();
3966         append_open_options(opts, bs);
3967         drv->bdrv_refresh_filename(bs, opts);
3968         QDECREF(opts);
3969     } else if (bs->file) {
3970         /* Try to reconstruct valid information from the underlying file */
3971         bool has_open_options;
3972 
3973         bs->exact_filename[0] = '\0';
3974         if (bs->full_open_options) {
3975             QDECREF(bs->full_open_options);
3976             bs->full_open_options = NULL;
3977         }
3978 
3979         opts = qdict_new();
3980         has_open_options = append_open_options(opts, bs);
3981 
3982         /* If no specific options have been given for this BDS, the filename of
3983          * the underlying file should suffice for this one as well */
3984         if (bs->file->bs->exact_filename[0] && !has_open_options) {
3985             strcpy(bs->exact_filename, bs->file->bs->exact_filename);
3986         }
3987         /* Reconstructing the full options QDict is simple for most format block
3988          * drivers, as long as the full options are known for the underlying
3989          * file BDS. The full options QDict of that file BDS should somehow
3990          * contain a representation of the filename, therefore the following
3991          * suffices without querying the (exact_)filename of this BDS. */
3992         if (bs->file->bs->full_open_options) {
3993             qdict_put_obj(opts, "driver",
3994                           QOBJECT(qstring_from_str(drv->format_name)));
3995             QINCREF(bs->file->bs->full_open_options);
3996             qdict_put_obj(opts, "file",
3997                           QOBJECT(bs->file->bs->full_open_options));
3998 
3999             bs->full_open_options = opts;
4000         } else {
4001             QDECREF(opts);
4002         }
4003     } else if (!bs->full_open_options && qdict_size(bs->options)) {
4004         /* There is no underlying file BDS (at least referenced by BDS.file),
4005          * so the full options QDict should be equal to the options given
4006          * specifically for this block device when it was opened (plus the
4007          * driver specification).
4008          * Because those options don't change, there is no need to update
4009          * full_open_options when it's already set. */
4010 
4011         opts = qdict_new();
4012         append_open_options(opts, bs);
4013         qdict_put_obj(opts, "driver",
4014                       QOBJECT(qstring_from_str(drv->format_name)));
4015 
4016         if (bs->exact_filename[0]) {
4017             /* This may not work for all block protocol drivers (some may
4018              * require this filename to be parsed), but we have to find some
4019              * default solution here, so just include it. If some block driver
4020              * does not support pure options without any filename at all or
4021              * needs some special format of the options QDict, it needs to
4022              * implement the driver-specific bdrv_refresh_filename() function.
4023              */
4024             qdict_put_obj(opts, "filename",
4025                           QOBJECT(qstring_from_str(bs->exact_filename)));
4026         }
4027 
4028         bs->full_open_options = opts;
4029     }
4030 
4031     if (bs->exact_filename[0]) {
4032         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4033     } else if (bs->full_open_options) {
4034         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4035         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4036                  qstring_get_str(json));
4037         QDECREF(json);
4038     }
4039 }
4040