xref: /openbmc/qemu/block.c (revision 0d54a6fed3ebaf0e17656a712e5d6575c712459b)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "block/trace.h"
26 #include "block/block_int.h"
27 #include "block/blockjob.h"
28 #include "block/nbd.h"
29 #include "qemu/error-report.h"
30 #include "module_block.h"
31 #include "qemu/module.h"
32 #include "qapi/qmp/qerror.h"
33 #include "qapi/qmp/qbool.h"
34 #include "qapi/qmp/qjson.h"
35 #include "sysemu/block-backend.h"
36 #include "sysemu/sysemu.h"
37 #include "qemu/notify.h"
38 #include "qemu/coroutine.h"
39 #include "block/qapi.h"
40 #include "qmp-commands.h"
41 #include "qemu/timer.h"
42 #include "qapi-event.h"
43 #include "qemu/cutils.h"
44 #include "qemu/id.h"
45 #include "qapi/util.h"
46 
47 #ifdef CONFIG_BSD
48 #include <sys/ioctl.h>
49 #include <sys/queue.h>
50 #ifndef __DragonFly__
51 #include <sys/disk.h>
52 #endif
53 #endif
54 
55 #ifdef _WIN32
56 #include <windows.h>
57 #endif
58 
59 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
60 
61 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
62     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
63 
64 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
65     QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
66 
67 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
68     QLIST_HEAD_INITIALIZER(bdrv_drivers);
69 
70 static BlockDriverState *bdrv_open_inherit(const char *filename,
71                                            const char *reference,
72                                            QDict *options, int flags,
73                                            BlockDriverState *parent,
74                                            const BdrvChildRole *child_role,
75                                            Error **errp);
76 
77 /* If non-zero, use only whitelisted block drivers */
78 static int use_bdrv_whitelist;
79 
80 #ifdef _WIN32
81 static int is_windows_drive_prefix(const char *filename)
82 {
83     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
84              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
85             filename[1] == ':');
86 }
87 
88 int is_windows_drive(const char *filename)
89 {
90     if (is_windows_drive_prefix(filename) &&
91         filename[2] == '\0')
92         return 1;
93     if (strstart(filename, "\\\\.\\", NULL) ||
94         strstart(filename, "//./", NULL))
95         return 1;
96     return 0;
97 }
98 #endif
99 
100 size_t bdrv_opt_mem_align(BlockDriverState *bs)
101 {
102     if (!bs || !bs->drv) {
103         /* page size or 4k (hdd sector size) should be on the safe side */
104         return MAX(4096, getpagesize());
105     }
106 
107     return bs->bl.opt_mem_alignment;
108 }
109 
110 size_t bdrv_min_mem_align(BlockDriverState *bs)
111 {
112     if (!bs || !bs->drv) {
113         /* page size or 4k (hdd sector size) should be on the safe side */
114         return MAX(4096, getpagesize());
115     }
116 
117     return bs->bl.min_mem_alignment;
118 }
119 
120 /* check if the path starts with "<protocol>:" */
121 int path_has_protocol(const char *path)
122 {
123     const char *p;
124 
125 #ifdef _WIN32
126     if (is_windows_drive(path) ||
127         is_windows_drive_prefix(path)) {
128         return 0;
129     }
130     p = path + strcspn(path, ":/\\");
131 #else
132     p = path + strcspn(path, ":/");
133 #endif
134 
135     return *p == ':';
136 }
137 
138 int path_is_absolute(const char *path)
139 {
140 #ifdef _WIN32
141     /* specific case for names like: "\\.\d:" */
142     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
143         return 1;
144     }
145     return (*path == '/' || *path == '\\');
146 #else
147     return (*path == '/');
148 #endif
149 }
150 
151 /* if filename is absolute, just copy it to dest. Otherwise, build a
152    path to it by considering it is relative to base_path. URL are
153    supported. */
154 void path_combine(char *dest, int dest_size,
155                   const char *base_path,
156                   const char *filename)
157 {
158     const char *p, *p1;
159     int len;
160 
161     if (dest_size <= 0)
162         return;
163     if (path_is_absolute(filename)) {
164         pstrcpy(dest, dest_size, filename);
165     } else {
166         const char *protocol_stripped = NULL;
167 
168         if (path_has_protocol(base_path)) {
169             protocol_stripped = strchr(base_path, ':');
170             if (protocol_stripped) {
171                 protocol_stripped++;
172             }
173         }
174         p = protocol_stripped ?: base_path;
175 
176         p1 = strrchr(base_path, '/');
177 #ifdef _WIN32
178         {
179             const char *p2;
180             p2 = strrchr(base_path, '\\');
181             if (!p1 || p2 > p1)
182                 p1 = p2;
183         }
184 #endif
185         if (p1)
186             p1++;
187         else
188             p1 = base_path;
189         if (p1 > p)
190             p = p1;
191         len = p - base_path;
192         if (len > dest_size - 1)
193             len = dest_size - 1;
194         memcpy(dest, base_path, len);
195         dest[len] = '\0';
196         pstrcat(dest, dest_size, filename);
197     }
198 }
199 
200 /* Returns whether the image file is opened as read-only. Note that this can
201  * return false and writing to the image file is still not possible because the
202  * image is inactivated. */
203 bool bdrv_is_read_only(BlockDriverState *bs)
204 {
205     return bs->read_only;
206 }
207 
208 /* Returns whether the image file can be written to right now */
209 bool bdrv_is_writable(BlockDriverState *bs)
210 {
211     return !bdrv_is_read_only(bs) && !(bs->open_flags & BDRV_O_INACTIVE);
212 }
213 
214 int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
215 {
216     /* Do not set read_only if copy_on_read is enabled */
217     if (bs->copy_on_read && read_only) {
218         error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
219                    bdrv_get_device_or_node_name(bs));
220         return -EINVAL;
221     }
222 
223     /* Do not clear read_only if it is prohibited */
224     if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR)) {
225         error_setg(errp, "Node '%s' is read only",
226                    bdrv_get_device_or_node_name(bs));
227         return -EPERM;
228     }
229 
230     return 0;
231 }
232 
233 int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
234 {
235     int ret = 0;
236 
237     ret = bdrv_can_set_read_only(bs, read_only, errp);
238     if (ret < 0) {
239         return ret;
240     }
241 
242     bs->read_only = read_only;
243     return 0;
244 }
245 
246 void bdrv_get_full_backing_filename_from_filename(const char *backed,
247                                                   const char *backing,
248                                                   char *dest, size_t sz,
249                                                   Error **errp)
250 {
251     if (backing[0] == '\0' || path_has_protocol(backing) ||
252         path_is_absolute(backing))
253     {
254         pstrcpy(dest, sz, backing);
255     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
256         error_setg(errp, "Cannot use relative backing file names for '%s'",
257                    backed);
258     } else {
259         path_combine(dest, sz, backed, backing);
260     }
261 }
262 
263 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
264                                     Error **errp)
265 {
266     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
267 
268     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
269                                                  dest, sz, errp);
270 }
271 
272 void bdrv_register(BlockDriver *bdrv)
273 {
274     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
275 }
276 
277 BlockDriverState *bdrv_new(void)
278 {
279     BlockDriverState *bs;
280     int i;
281 
282     bs = g_new0(BlockDriverState, 1);
283     QLIST_INIT(&bs->dirty_bitmaps);
284     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
285         QLIST_INIT(&bs->op_blockers[i]);
286     }
287     notifier_with_return_list_init(&bs->before_write_notifiers);
288     bs->refcnt = 1;
289     bs->aio_context = qemu_get_aio_context();
290 
291     qemu_co_queue_init(&bs->flush_queue);
292 
293     QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
294 
295     return bs;
296 }
297 
298 static BlockDriver *bdrv_do_find_format(const char *format_name)
299 {
300     BlockDriver *drv1;
301 
302     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
303         if (!strcmp(drv1->format_name, format_name)) {
304             return drv1;
305         }
306     }
307 
308     return NULL;
309 }
310 
311 BlockDriver *bdrv_find_format(const char *format_name)
312 {
313     BlockDriver *drv1;
314     int i;
315 
316     drv1 = bdrv_do_find_format(format_name);
317     if (drv1) {
318         return drv1;
319     }
320 
321     /* The driver isn't registered, maybe we need to load a module */
322     for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
323         if (!strcmp(block_driver_modules[i].format_name, format_name)) {
324             block_module_load_one(block_driver_modules[i].library_name);
325             break;
326         }
327     }
328 
329     return bdrv_do_find_format(format_name);
330 }
331 
332 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
333 {
334     static const char *whitelist_rw[] = {
335         CONFIG_BDRV_RW_WHITELIST
336     };
337     static const char *whitelist_ro[] = {
338         CONFIG_BDRV_RO_WHITELIST
339     };
340     const char **p;
341 
342     if (!whitelist_rw[0] && !whitelist_ro[0]) {
343         return 1;               /* no whitelist, anything goes */
344     }
345 
346     for (p = whitelist_rw; *p; p++) {
347         if (!strcmp(drv->format_name, *p)) {
348             return 1;
349         }
350     }
351     if (read_only) {
352         for (p = whitelist_ro; *p; p++) {
353             if (!strcmp(drv->format_name, *p)) {
354                 return 1;
355             }
356         }
357     }
358     return 0;
359 }
360 
361 bool bdrv_uses_whitelist(void)
362 {
363     return use_bdrv_whitelist;
364 }
365 
366 typedef struct CreateCo {
367     BlockDriver *drv;
368     char *filename;
369     QemuOpts *opts;
370     int ret;
371     Error *err;
372 } CreateCo;
373 
374 static void coroutine_fn bdrv_create_co_entry(void *opaque)
375 {
376     Error *local_err = NULL;
377     int ret;
378 
379     CreateCo *cco = opaque;
380     assert(cco->drv);
381 
382     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
383     error_propagate(&cco->err, local_err);
384     cco->ret = ret;
385 }
386 
387 int bdrv_create(BlockDriver *drv, const char* filename,
388                 QemuOpts *opts, Error **errp)
389 {
390     int ret;
391 
392     Coroutine *co;
393     CreateCo cco = {
394         .drv = drv,
395         .filename = g_strdup(filename),
396         .opts = opts,
397         .ret = NOT_DONE,
398         .err = NULL,
399     };
400 
401     if (!drv->bdrv_create) {
402         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
403         ret = -ENOTSUP;
404         goto out;
405     }
406 
407     if (qemu_in_coroutine()) {
408         /* Fast-path if already in coroutine context */
409         bdrv_create_co_entry(&cco);
410     } else {
411         co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
412         qemu_coroutine_enter(co);
413         while (cco.ret == NOT_DONE) {
414             aio_poll(qemu_get_aio_context(), true);
415         }
416     }
417 
418     ret = cco.ret;
419     if (ret < 0) {
420         if (cco.err) {
421             error_propagate(errp, cco.err);
422         } else {
423             error_setg_errno(errp, -ret, "Could not create image");
424         }
425     }
426 
427 out:
428     g_free(cco.filename);
429     return ret;
430 }
431 
432 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
433 {
434     BlockDriver *drv;
435     Error *local_err = NULL;
436     int ret;
437 
438     drv = bdrv_find_protocol(filename, true, errp);
439     if (drv == NULL) {
440         return -ENOENT;
441     }
442 
443     ret = bdrv_create(drv, filename, opts, &local_err);
444     error_propagate(errp, local_err);
445     return ret;
446 }
447 
448 /**
449  * Try to get @bs's logical and physical block size.
450  * On success, store them in @bsz struct and return 0.
451  * On failure return -errno.
452  * @bs must not be empty.
453  */
454 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
455 {
456     BlockDriver *drv = bs->drv;
457 
458     if (drv && drv->bdrv_probe_blocksizes) {
459         return drv->bdrv_probe_blocksizes(bs, bsz);
460     }
461 
462     return -ENOTSUP;
463 }
464 
465 /**
466  * Try to get @bs's geometry (cyls, heads, sectors).
467  * On success, store them in @geo struct and return 0.
468  * On failure return -errno.
469  * @bs must not be empty.
470  */
471 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
472 {
473     BlockDriver *drv = bs->drv;
474 
475     if (drv && drv->bdrv_probe_geometry) {
476         return drv->bdrv_probe_geometry(bs, geo);
477     }
478 
479     return -ENOTSUP;
480 }
481 
482 /*
483  * Create a uniquely-named empty temporary file.
484  * Return 0 upon success, otherwise a negative errno value.
485  */
486 int get_tmp_filename(char *filename, int size)
487 {
488 #ifdef _WIN32
489     char temp_dir[MAX_PATH];
490     /* GetTempFileName requires that its output buffer (4th param)
491        have length MAX_PATH or greater.  */
492     assert(size >= MAX_PATH);
493     return (GetTempPath(MAX_PATH, temp_dir)
494             && GetTempFileName(temp_dir, "qem", 0, filename)
495             ? 0 : -GetLastError());
496 #else
497     int fd;
498     const char *tmpdir;
499     tmpdir = getenv("TMPDIR");
500     if (!tmpdir) {
501         tmpdir = "/var/tmp";
502     }
503     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
504         return -EOVERFLOW;
505     }
506     fd = mkstemp(filename);
507     if (fd < 0) {
508         return -errno;
509     }
510     if (close(fd) != 0) {
511         unlink(filename);
512         return -errno;
513     }
514     return 0;
515 #endif
516 }
517 
518 /*
519  * Detect host devices. By convention, /dev/cdrom[N] is always
520  * recognized as a host CDROM.
521  */
522 static BlockDriver *find_hdev_driver(const char *filename)
523 {
524     int score_max = 0, score;
525     BlockDriver *drv = NULL, *d;
526 
527     QLIST_FOREACH(d, &bdrv_drivers, list) {
528         if (d->bdrv_probe_device) {
529             score = d->bdrv_probe_device(filename);
530             if (score > score_max) {
531                 score_max = score;
532                 drv = d;
533             }
534         }
535     }
536 
537     return drv;
538 }
539 
540 static BlockDriver *bdrv_do_find_protocol(const char *protocol)
541 {
542     BlockDriver *drv1;
543 
544     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
545         if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
546             return drv1;
547         }
548     }
549 
550     return NULL;
551 }
552 
553 BlockDriver *bdrv_find_protocol(const char *filename,
554                                 bool allow_protocol_prefix,
555                                 Error **errp)
556 {
557     BlockDriver *drv1;
558     char protocol[128];
559     int len;
560     const char *p;
561     int i;
562 
563     /* TODO Drivers without bdrv_file_open must be specified explicitly */
564 
565     /*
566      * XXX(hch): we really should not let host device detection
567      * override an explicit protocol specification, but moving this
568      * later breaks access to device names with colons in them.
569      * Thanks to the brain-dead persistent naming schemes on udev-
570      * based Linux systems those actually are quite common.
571      */
572     drv1 = find_hdev_driver(filename);
573     if (drv1) {
574         return drv1;
575     }
576 
577     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
578         return &bdrv_file;
579     }
580 
581     p = strchr(filename, ':');
582     assert(p != NULL);
583     len = p - filename;
584     if (len > sizeof(protocol) - 1)
585         len = sizeof(protocol) - 1;
586     memcpy(protocol, filename, len);
587     protocol[len] = '\0';
588 
589     drv1 = bdrv_do_find_protocol(protocol);
590     if (drv1) {
591         return drv1;
592     }
593 
594     for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
595         if (block_driver_modules[i].protocol_name &&
596             !strcmp(block_driver_modules[i].protocol_name, protocol)) {
597             block_module_load_one(block_driver_modules[i].library_name);
598             break;
599         }
600     }
601 
602     drv1 = bdrv_do_find_protocol(protocol);
603     if (!drv1) {
604         error_setg(errp, "Unknown protocol '%s'", protocol);
605     }
606     return drv1;
607 }
608 
609 /*
610  * Guess image format by probing its contents.
611  * This is not a good idea when your image is raw (CVE-2008-2004), but
612  * we do it anyway for backward compatibility.
613  *
614  * @buf         contains the image's first @buf_size bytes.
615  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
616  *              but can be smaller if the image file is smaller)
617  * @filename    is its filename.
618  *
619  * For all block drivers, call the bdrv_probe() method to get its
620  * probing score.
621  * Return the first block driver with the highest probing score.
622  */
623 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
624                             const char *filename)
625 {
626     int score_max = 0, score;
627     BlockDriver *drv = NULL, *d;
628 
629     QLIST_FOREACH(d, &bdrv_drivers, list) {
630         if (d->bdrv_probe) {
631             score = d->bdrv_probe(buf, buf_size, filename);
632             if (score > score_max) {
633                 score_max = score;
634                 drv = d;
635             }
636         }
637     }
638 
639     return drv;
640 }
641 
642 static int find_image_format(BlockBackend *file, const char *filename,
643                              BlockDriver **pdrv, Error **errp)
644 {
645     BlockDriver *drv;
646     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
647     int ret = 0;
648 
649     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
650     if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
651         *pdrv = &bdrv_raw;
652         return ret;
653     }
654 
655     ret = blk_pread(file, 0, buf, sizeof(buf));
656     if (ret < 0) {
657         error_setg_errno(errp, -ret, "Could not read image for determining its "
658                          "format");
659         *pdrv = NULL;
660         return ret;
661     }
662 
663     drv = bdrv_probe_all(buf, ret, filename);
664     if (!drv) {
665         error_setg(errp, "Could not determine image format: No compatible "
666                    "driver found");
667         ret = -ENOENT;
668     }
669     *pdrv = drv;
670     return ret;
671 }
672 
673 /**
674  * Set the current 'total_sectors' value
675  * Return 0 on success, -errno on error.
676  */
677 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
678 {
679     BlockDriver *drv = bs->drv;
680 
681     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
682     if (bdrv_is_sg(bs))
683         return 0;
684 
685     /* query actual device if possible, otherwise just trust the hint */
686     if (drv->bdrv_getlength) {
687         int64_t length = drv->bdrv_getlength(bs);
688         if (length < 0) {
689             return length;
690         }
691         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
692     }
693 
694     bs->total_sectors = hint;
695     return 0;
696 }
697 
698 /**
699  * Combines a QDict of new block driver @options with any missing options taken
700  * from @old_options, so that leaving out an option defaults to its old value.
701  */
702 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
703                               QDict *old_options)
704 {
705     if (bs->drv && bs->drv->bdrv_join_options) {
706         bs->drv->bdrv_join_options(options, old_options);
707     } else {
708         qdict_join(options, old_options, false);
709     }
710 }
711 
712 /**
713  * Set open flags for a given discard mode
714  *
715  * Return 0 on success, -1 if the discard mode was invalid.
716  */
717 int bdrv_parse_discard_flags(const char *mode, int *flags)
718 {
719     *flags &= ~BDRV_O_UNMAP;
720 
721     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
722         /* do nothing */
723     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
724         *flags |= BDRV_O_UNMAP;
725     } else {
726         return -1;
727     }
728 
729     return 0;
730 }
731 
732 /**
733  * Set open flags for a given cache mode
734  *
735  * Return 0 on success, -1 if the cache mode was invalid.
736  */
737 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
738 {
739     *flags &= ~BDRV_O_CACHE_MASK;
740 
741     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
742         *writethrough = false;
743         *flags |= BDRV_O_NOCACHE;
744     } else if (!strcmp(mode, "directsync")) {
745         *writethrough = true;
746         *flags |= BDRV_O_NOCACHE;
747     } else if (!strcmp(mode, "writeback")) {
748         *writethrough = false;
749     } else if (!strcmp(mode, "unsafe")) {
750         *writethrough = false;
751         *flags |= BDRV_O_NO_FLUSH;
752     } else if (!strcmp(mode, "writethrough")) {
753         *writethrough = true;
754     } else {
755         return -1;
756     }
757 
758     return 0;
759 }
760 
761 static char *bdrv_child_get_parent_desc(BdrvChild *c)
762 {
763     BlockDriverState *parent = c->opaque;
764     return g_strdup(bdrv_get_device_or_node_name(parent));
765 }
766 
767 static void bdrv_child_cb_drained_begin(BdrvChild *child)
768 {
769     BlockDriverState *bs = child->opaque;
770     bdrv_drained_begin(bs);
771 }
772 
773 static void bdrv_child_cb_drained_end(BdrvChild *child)
774 {
775     BlockDriverState *bs = child->opaque;
776     bdrv_drained_end(bs);
777 }
778 
779 static int bdrv_child_cb_inactivate(BdrvChild *child)
780 {
781     BlockDriverState *bs = child->opaque;
782     assert(bs->open_flags & BDRV_O_INACTIVE);
783     return 0;
784 }
785 
786 /*
787  * Returns the options and flags that a temporary snapshot should get, based on
788  * the originally requested flags (the originally requested image will have
789  * flags like a backing file)
790  */
791 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
792                                        int parent_flags, QDict *parent_options)
793 {
794     *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
795 
796     /* For temporary files, unconditional cache=unsafe is fine */
797     qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
798     qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
799 
800     /* Copy the read-only option from the parent */
801     qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
802 
803     /* aio=native doesn't work for cache.direct=off, so disable it for the
804      * temporary snapshot */
805     *child_flags &= ~BDRV_O_NATIVE_AIO;
806 }
807 
808 /*
809  * Returns the options and flags that bs->file should get if a protocol driver
810  * is expected, based on the given options and flags for the parent BDS
811  */
812 static void bdrv_inherited_options(int *child_flags, QDict *child_options,
813                                    int parent_flags, QDict *parent_options)
814 {
815     int flags = parent_flags;
816 
817     /* Enable protocol handling, disable format probing for bs->file */
818     flags |= BDRV_O_PROTOCOL;
819 
820     /* If the cache mode isn't explicitly set, inherit direct and no-flush from
821      * the parent. */
822     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
823     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
824     qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);
825 
826     /* Inherit the read-only option from the parent if it's not set */
827     qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
828 
829     /* Our block drivers take care to send flushes and respect unmap policy,
830      * so we can default to enable both on lower layers regardless of the
831      * corresponding parent options. */
832     qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap");
833 
834     /* Clear flags that only apply to the top layer */
835     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
836                BDRV_O_NO_IO);
837 
838     *child_flags = flags;
839 }
840 
841 const BdrvChildRole child_file = {
842     .get_parent_desc = bdrv_child_get_parent_desc,
843     .inherit_options = bdrv_inherited_options,
844     .drained_begin   = bdrv_child_cb_drained_begin,
845     .drained_end     = bdrv_child_cb_drained_end,
846     .inactivate      = bdrv_child_cb_inactivate,
847 };
848 
849 /*
850  * Returns the options and flags that bs->file should get if the use of formats
851  * (and not only protocols) is permitted for it, based on the given options and
852  * flags for the parent BDS
853  */
854 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
855                                        int parent_flags, QDict *parent_options)
856 {
857     child_file.inherit_options(child_flags, child_options,
858                                parent_flags, parent_options);
859 
860     *child_flags &= ~(BDRV_O_PROTOCOL | BDRV_O_NO_IO);
861 }
862 
863 const BdrvChildRole child_format = {
864     .get_parent_desc = bdrv_child_get_parent_desc,
865     .inherit_options = bdrv_inherited_fmt_options,
866     .drained_begin   = bdrv_child_cb_drained_begin,
867     .drained_end     = bdrv_child_cb_drained_end,
868     .inactivate      = bdrv_child_cb_inactivate,
869 };
870 
871 static void bdrv_backing_attach(BdrvChild *c)
872 {
873     BlockDriverState *parent = c->opaque;
874     BlockDriverState *backing_hd = c->bs;
875 
876     assert(!parent->backing_blocker);
877     error_setg(&parent->backing_blocker,
878                "node is used as backing hd of '%s'",
879                bdrv_get_device_or_node_name(parent));
880 
881     parent->open_flags &= ~BDRV_O_NO_BACKING;
882     pstrcpy(parent->backing_file, sizeof(parent->backing_file),
883             backing_hd->filename);
884     pstrcpy(parent->backing_format, sizeof(parent->backing_format),
885             backing_hd->drv ? backing_hd->drv->format_name : "");
886 
887     bdrv_op_block_all(backing_hd, parent->backing_blocker);
888     /* Otherwise we won't be able to commit or stream */
889     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
890                     parent->backing_blocker);
891     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
892                     parent->backing_blocker);
893     /*
894      * We do backup in 3 ways:
895      * 1. drive backup
896      *    The target bs is new opened, and the source is top BDS
897      * 2. blockdev backup
898      *    Both the source and the target are top BDSes.
899      * 3. internal backup(used for block replication)
900      *    Both the source and the target are backing file
901      *
902      * In case 1 and 2, neither the source nor the target is the backing file.
903      * In case 3, we will block the top BDS, so there is only one block job
904      * for the top BDS and its backing chain.
905      */
906     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
907                     parent->backing_blocker);
908     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
909                     parent->backing_blocker);
910 }
911 
912 static void bdrv_backing_detach(BdrvChild *c)
913 {
914     BlockDriverState *parent = c->opaque;
915 
916     assert(parent->backing_blocker);
917     bdrv_op_unblock_all(c->bs, parent->backing_blocker);
918     error_free(parent->backing_blocker);
919     parent->backing_blocker = NULL;
920 }
921 
922 /*
923  * Returns the options and flags that bs->backing should get, based on the
924  * given options and flags for the parent BDS
925  */
926 static void bdrv_backing_options(int *child_flags, QDict *child_options,
927                                  int parent_flags, QDict *parent_options)
928 {
929     int flags = parent_flags;
930 
931     /* The cache mode is inherited unmodified for backing files; except WCE,
932      * which is only applied on the top level (BlockBackend) */
933     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
934     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
935     qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);
936 
937     /* backing files always opened read-only */
938     qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on");
939     flags &= ~BDRV_O_COPY_ON_READ;
940 
941     /* snapshot=on is handled on the top layer */
942     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
943 
944     *child_flags = flags;
945 }
946 
947 const BdrvChildRole child_backing = {
948     .get_parent_desc = bdrv_child_get_parent_desc,
949     .attach          = bdrv_backing_attach,
950     .detach          = bdrv_backing_detach,
951     .inherit_options = bdrv_backing_options,
952     .drained_begin   = bdrv_child_cb_drained_begin,
953     .drained_end     = bdrv_child_cb_drained_end,
954     .inactivate      = bdrv_child_cb_inactivate,
955 };
956 
957 static int bdrv_open_flags(BlockDriverState *bs, int flags)
958 {
959     int open_flags = flags;
960 
961     /*
962      * Clear flags that are internal to the block layer before opening the
963      * image.
964      */
965     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
966 
967     /*
968      * Snapshots should be writable.
969      */
970     if (flags & BDRV_O_TEMPORARY) {
971         open_flags |= BDRV_O_RDWR;
972     }
973 
974     return open_flags;
975 }
976 
977 static void update_flags_from_options(int *flags, QemuOpts *opts)
978 {
979     *flags &= ~BDRV_O_CACHE_MASK;
980 
981     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
982     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
983         *flags |= BDRV_O_NO_FLUSH;
984     }
985 
986     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
987     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
988         *flags |= BDRV_O_NOCACHE;
989     }
990 
991     *flags &= ~BDRV_O_RDWR;
992 
993     assert(qemu_opt_find(opts, BDRV_OPT_READ_ONLY));
994     if (!qemu_opt_get_bool(opts, BDRV_OPT_READ_ONLY, false)) {
995         *flags |= BDRV_O_RDWR;
996     }
997 
998 }
999 
1000 static void update_options_from_flags(QDict *options, int flags)
1001 {
1002     if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
1003         qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
1004     }
1005     if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
1006         qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH,
1007                        flags & BDRV_O_NO_FLUSH);
1008     }
1009     if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) {
1010         qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR));
1011     }
1012 }
1013 
1014 static void bdrv_assign_node_name(BlockDriverState *bs,
1015                                   const char *node_name,
1016                                   Error **errp)
1017 {
1018     char *gen_node_name = NULL;
1019 
1020     if (!node_name) {
1021         node_name = gen_node_name = id_generate(ID_BLOCK);
1022     } else if (!id_wellformed(node_name)) {
1023         /*
1024          * Check for empty string or invalid characters, but not if it is
1025          * generated (generated names use characters not available to the user)
1026          */
1027         error_setg(errp, "Invalid node name");
1028         return;
1029     }
1030 
1031     /* takes care of avoiding namespaces collisions */
1032     if (blk_by_name(node_name)) {
1033         error_setg(errp, "node-name=%s is conflicting with a device id",
1034                    node_name);
1035         goto out;
1036     }
1037 
1038     /* takes care of avoiding duplicates node names */
1039     if (bdrv_find_node(node_name)) {
1040         error_setg(errp, "Duplicate node name");
1041         goto out;
1042     }
1043 
1044     /* copy node name into the bs and insert it into the graph list */
1045     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
1046     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
1047 out:
1048     g_free(gen_node_name);
1049 }
1050 
1051 static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
1052                             const char *node_name, QDict *options,
1053                             int open_flags, Error **errp)
1054 {
1055     Error *local_err = NULL;
1056     int ret;
1057 
1058     bdrv_assign_node_name(bs, node_name, &local_err);
1059     if (local_err) {
1060         error_propagate(errp, local_err);
1061         return -EINVAL;
1062     }
1063 
1064     bs->drv = drv;
1065     bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
1066     bs->opaque = g_malloc0(drv->instance_size);
1067 
1068     if (drv->bdrv_file_open) {
1069         assert(!drv->bdrv_needs_filename || bs->filename[0]);
1070         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1071     } else if (drv->bdrv_open) {
1072         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
1073     } else {
1074         ret = 0;
1075     }
1076 
1077     if (ret < 0) {
1078         if (local_err) {
1079             error_propagate(errp, local_err);
1080         } else if (bs->filename[0]) {
1081             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1082         } else {
1083             error_setg_errno(errp, -ret, "Could not open image");
1084         }
1085         goto free_and_fail;
1086     }
1087 
1088     ret = refresh_total_sectors(bs, bs->total_sectors);
1089     if (ret < 0) {
1090         error_setg_errno(errp, -ret, "Could not refresh total sector count");
1091         goto free_and_fail;
1092     }
1093 
1094     bdrv_refresh_limits(bs, &local_err);
1095     if (local_err) {
1096         error_propagate(errp, local_err);
1097         ret = -EINVAL;
1098         goto free_and_fail;
1099     }
1100 
1101     assert(bdrv_opt_mem_align(bs) != 0);
1102     assert(bdrv_min_mem_align(bs) != 0);
1103     assert(is_power_of_2(bs->bl.request_alignment));
1104 
1105     return 0;
1106 
1107 free_and_fail:
1108     /* FIXME Close bs first if already opened*/
1109     g_free(bs->opaque);
1110     bs->opaque = NULL;
1111     bs->drv = NULL;
1112     return ret;
1113 }
1114 
1115 BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
1116                                        int flags, Error **errp)
1117 {
1118     BlockDriverState *bs;
1119     int ret;
1120 
1121     bs = bdrv_new();
1122     bs->open_flags = flags;
1123     bs->explicit_options = qdict_new();
1124     bs->options = qdict_new();
1125     bs->opaque = NULL;
1126 
1127     update_options_from_flags(bs->options, flags);
1128 
1129     ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp);
1130     if (ret < 0) {
1131         QDECREF(bs->explicit_options);
1132         QDECREF(bs->options);
1133         bdrv_unref(bs);
1134         return NULL;
1135     }
1136 
1137     return bs;
1138 }
1139 
1140 QemuOptsList bdrv_runtime_opts = {
1141     .name = "bdrv_common",
1142     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
1143     .desc = {
1144         {
1145             .name = "node-name",
1146             .type = QEMU_OPT_STRING,
1147             .help = "Node name of the block device node",
1148         },
1149         {
1150             .name = "driver",
1151             .type = QEMU_OPT_STRING,
1152             .help = "Block driver to use for the node",
1153         },
1154         {
1155             .name = BDRV_OPT_CACHE_DIRECT,
1156             .type = QEMU_OPT_BOOL,
1157             .help = "Bypass software writeback cache on the host",
1158         },
1159         {
1160             .name = BDRV_OPT_CACHE_NO_FLUSH,
1161             .type = QEMU_OPT_BOOL,
1162             .help = "Ignore flush requests",
1163         },
1164         {
1165             .name = BDRV_OPT_READ_ONLY,
1166             .type = QEMU_OPT_BOOL,
1167             .help = "Node is opened in read-only mode",
1168         },
1169         {
1170             .name = "detect-zeroes",
1171             .type = QEMU_OPT_STRING,
1172             .help = "try to optimize zero writes (off, on, unmap)",
1173         },
1174         {
1175             .name = "discard",
1176             .type = QEMU_OPT_STRING,
1177             .help = "discard operation (ignore/off, unmap/on)",
1178         },
1179         {
1180             .name = BDRV_OPT_FORCE_SHARE,
1181             .type = QEMU_OPT_BOOL,
1182             .help = "always accept other writers (default: off)",
1183         },
1184         { /* end of list */ }
1185     },
1186 };
1187 
1188 /*
1189  * Common part for opening disk images and files
1190  *
1191  * Removes all processed options from *options.
1192  */
1193 static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
1194                             QDict *options, Error **errp)
1195 {
1196     int ret, open_flags;
1197     const char *filename;
1198     const char *driver_name = NULL;
1199     const char *node_name = NULL;
1200     const char *discard;
1201     const char *detect_zeroes;
1202     QemuOpts *opts;
1203     BlockDriver *drv;
1204     Error *local_err = NULL;
1205 
1206     assert(bs->file == NULL);
1207     assert(options != NULL && bs->options != options);
1208 
1209     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1210     qemu_opts_absorb_qdict(opts, options, &local_err);
1211     if (local_err) {
1212         error_propagate(errp, local_err);
1213         ret = -EINVAL;
1214         goto fail_opts;
1215     }
1216 
1217     update_flags_from_options(&bs->open_flags, opts);
1218 
1219     driver_name = qemu_opt_get(opts, "driver");
1220     drv = bdrv_find_format(driver_name);
1221     assert(drv != NULL);
1222 
1223     bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false);
1224 
1225     if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) {
1226         error_setg(errp,
1227                    BDRV_OPT_FORCE_SHARE
1228                    "=on can only be used with read-only images");
1229         ret = -EINVAL;
1230         goto fail_opts;
1231     }
1232 
1233     if (file != NULL) {
1234         filename = blk_bs(file)->filename;
1235     } else {
1236         /*
1237          * Caution: while qdict_get_try_str() is fine, getting
1238          * non-string types would require more care.  When @options
1239          * come from -blockdev or blockdev_add, its members are typed
1240          * according to the QAPI schema, but when they come from
1241          * -drive, they're all QString.
1242          */
1243         filename = qdict_get_try_str(options, "filename");
1244     }
1245 
1246     if (drv->bdrv_needs_filename && (!filename || !filename[0])) {
1247         error_setg(errp, "The '%s' block driver requires a file name",
1248                    drv->format_name);
1249         ret = -EINVAL;
1250         goto fail_opts;
1251     }
1252 
1253     trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
1254                            drv->format_name);
1255 
1256     bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
1257 
1258     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
1259         error_setg(errp,
1260                    !bs->read_only && bdrv_is_whitelisted(drv, true)
1261                         ? "Driver '%s' can only be used for read-only devices"
1262                         : "Driver '%s' is not whitelisted",
1263                    drv->format_name);
1264         ret = -ENOTSUP;
1265         goto fail_opts;
1266     }
1267 
1268     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
1269     if (bs->open_flags & BDRV_O_COPY_ON_READ) {
1270         if (!bs->read_only) {
1271             bdrv_enable_copy_on_read(bs);
1272         } else {
1273             error_setg(errp, "Can't use copy-on-read on read-only device");
1274             ret = -EINVAL;
1275             goto fail_opts;
1276         }
1277     }
1278 
1279     discard = qemu_opt_get(opts, "discard");
1280     if (discard != NULL) {
1281         if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) {
1282             error_setg(errp, "Invalid discard option");
1283             ret = -EINVAL;
1284             goto fail_opts;
1285         }
1286     }
1287 
1288     detect_zeroes = qemu_opt_get(opts, "detect-zeroes");
1289     if (detect_zeroes) {
1290         BlockdevDetectZeroesOptions value =
1291             qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
1292                             detect_zeroes,
1293                             BLOCKDEV_DETECT_ZEROES_OPTIONS__MAX,
1294                             BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
1295                             &local_err);
1296         if (local_err) {
1297             error_propagate(errp, local_err);
1298             ret = -EINVAL;
1299             goto fail_opts;
1300         }
1301 
1302         if (value == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
1303             !(bs->open_flags & BDRV_O_UNMAP))
1304         {
1305             error_setg(errp, "setting detect-zeroes to unmap is not allowed "
1306                              "without setting discard operation to unmap");
1307             ret = -EINVAL;
1308             goto fail_opts;
1309         }
1310 
1311         bs->detect_zeroes = value;
1312     }
1313 
1314     if (filename != NULL) {
1315         pstrcpy(bs->filename, sizeof(bs->filename), filename);
1316     } else {
1317         bs->filename[0] = '\0';
1318     }
1319     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
1320 
1321     /* Open the image, either directly or using a protocol */
1322     open_flags = bdrv_open_flags(bs, bs->open_flags);
1323     node_name = qemu_opt_get(opts, "node-name");
1324 
1325     assert(!drv->bdrv_file_open || file == NULL);
1326     ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp);
1327     if (ret < 0) {
1328         goto fail_opts;
1329     }
1330 
1331     qemu_opts_del(opts);
1332     return 0;
1333 
1334 fail_opts:
1335     qemu_opts_del(opts);
1336     return ret;
1337 }
1338 
1339 static QDict *parse_json_filename(const char *filename, Error **errp)
1340 {
1341     QObject *options_obj;
1342     QDict *options;
1343     int ret;
1344 
1345     ret = strstart(filename, "json:", &filename);
1346     assert(ret);
1347 
1348     options_obj = qobject_from_json(filename, errp);
1349     if (!options_obj) {
1350         /* Work around qobject_from_json() lossage TODO fix that */
1351         if (errp && !*errp) {
1352             error_setg(errp, "Could not parse the JSON options");
1353             return NULL;
1354         }
1355         error_prepend(errp, "Could not parse the JSON options: ");
1356         return NULL;
1357     }
1358 
1359     options = qobject_to_qdict(options_obj);
1360     if (!options) {
1361         qobject_decref(options_obj);
1362         error_setg(errp, "Invalid JSON object given");
1363         return NULL;
1364     }
1365 
1366     qdict_flatten(options);
1367 
1368     return options;
1369 }
1370 
1371 static void parse_json_protocol(QDict *options, const char **pfilename,
1372                                 Error **errp)
1373 {
1374     QDict *json_options;
1375     Error *local_err = NULL;
1376 
1377     /* Parse json: pseudo-protocol */
1378     if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1379         return;
1380     }
1381 
1382     json_options = parse_json_filename(*pfilename, &local_err);
1383     if (local_err) {
1384         error_propagate(errp, local_err);
1385         return;
1386     }
1387 
1388     /* Options given in the filename have lower priority than options
1389      * specified directly */
1390     qdict_join(options, json_options, false);
1391     QDECREF(json_options);
1392     *pfilename = NULL;
1393 }
1394 
1395 /*
1396  * Fills in default options for opening images and converts the legacy
1397  * filename/flags pair to option QDict entries.
1398  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1399  * block driver has been specified explicitly.
1400  */
1401 static int bdrv_fill_options(QDict **options, const char *filename,
1402                              int *flags, Error **errp)
1403 {
1404     const char *drvname;
1405     bool protocol = *flags & BDRV_O_PROTOCOL;
1406     bool parse_filename = false;
1407     BlockDriver *drv = NULL;
1408     Error *local_err = NULL;
1409 
1410     /*
1411      * Caution: while qdict_get_try_str() is fine, getting non-string
1412      * types would require more care.  When @options come from
1413      * -blockdev or blockdev_add, its members are typed according to
1414      * the QAPI schema, but when they come from -drive, they're all
1415      * QString.
1416      */
1417     drvname = qdict_get_try_str(*options, "driver");
1418     if (drvname) {
1419         drv = bdrv_find_format(drvname);
1420         if (!drv) {
1421             error_setg(errp, "Unknown driver '%s'", drvname);
1422             return -ENOENT;
1423         }
1424         /* If the user has explicitly specified the driver, this choice should
1425          * override the BDRV_O_PROTOCOL flag */
1426         protocol = drv->bdrv_file_open;
1427     }
1428 
1429     if (protocol) {
1430         *flags |= BDRV_O_PROTOCOL;
1431     } else {
1432         *flags &= ~BDRV_O_PROTOCOL;
1433     }
1434 
1435     /* Translate cache options from flags into options */
1436     update_options_from_flags(*options, *flags);
1437 
1438     /* Fetch the file name from the options QDict if necessary */
1439     if (protocol && filename) {
1440         if (!qdict_haskey(*options, "filename")) {
1441             qdict_put_str(*options, "filename", filename);
1442             parse_filename = true;
1443         } else {
1444             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1445                              "the same time");
1446             return -EINVAL;
1447         }
1448     }
1449 
1450     /* Find the right block driver */
1451     /* See cautionary note on accessing @options above */
1452     filename = qdict_get_try_str(*options, "filename");
1453 
1454     if (!drvname && protocol) {
1455         if (filename) {
1456             drv = bdrv_find_protocol(filename, parse_filename, errp);
1457             if (!drv) {
1458                 return -EINVAL;
1459             }
1460 
1461             drvname = drv->format_name;
1462             qdict_put_str(*options, "driver", drvname);
1463         } else {
1464             error_setg(errp, "Must specify either driver or file");
1465             return -EINVAL;
1466         }
1467     }
1468 
1469     assert(drv || !protocol);
1470 
1471     /* Driver-specific filename parsing */
1472     if (drv && drv->bdrv_parse_filename && parse_filename) {
1473         drv->bdrv_parse_filename(filename, *options, &local_err);
1474         if (local_err) {
1475             error_propagate(errp, local_err);
1476             return -EINVAL;
1477         }
1478 
1479         if (!drv->bdrv_needs_filename) {
1480             qdict_del(*options, "filename");
1481         }
1482     }
1483 
1484     return 0;
1485 }
1486 
1487 static int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
1488                                  GSList *ignore_children, Error **errp);
1489 static void bdrv_child_abort_perm_update(BdrvChild *c);
1490 static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared);
1491 
1492 static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
1493                             BdrvChild *c,
1494                             const BdrvChildRole *role,
1495                             uint64_t parent_perm, uint64_t parent_shared,
1496                             uint64_t *nperm, uint64_t *nshared)
1497 {
1498     if (bs->drv && bs->drv->bdrv_child_perm) {
1499         bs->drv->bdrv_child_perm(bs, c, role,
1500                                  parent_perm, parent_shared,
1501                                  nperm, nshared);
1502     }
1503     if (child_bs && child_bs->force_share) {
1504         *nshared = BLK_PERM_ALL;
1505     }
1506 }
1507 
1508 /*
1509  * Check whether permissions on this node can be changed in a way that
1510  * @cumulative_perms and @cumulative_shared_perms are the new cumulative
1511  * permissions of all its parents. This involves checking whether all necessary
1512  * permission changes to child nodes can be performed.
1513  *
1514  * A call to this function must always be followed by a call to bdrv_set_perm()
1515  * or bdrv_abort_perm_update().
1516  */
1517 static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms,
1518                            uint64_t cumulative_shared_perms,
1519                            GSList *ignore_children, Error **errp)
1520 {
1521     BlockDriver *drv = bs->drv;
1522     BdrvChild *c;
1523     int ret;
1524 
1525     /* Write permissions never work with read-only images */
1526     if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
1527         !bdrv_is_writable(bs))
1528     {
1529         error_setg(errp, "Block node is read-only");
1530         return -EPERM;
1531     }
1532 
1533     /* Check this node */
1534     if (!drv) {
1535         return 0;
1536     }
1537 
1538     if (drv->bdrv_check_perm) {
1539         return drv->bdrv_check_perm(bs, cumulative_perms,
1540                                     cumulative_shared_perms, errp);
1541     }
1542 
1543     /* Drivers that never have children can omit .bdrv_child_perm() */
1544     if (!drv->bdrv_child_perm) {
1545         assert(QLIST_EMPTY(&bs->children));
1546         return 0;
1547     }
1548 
1549     /* Check all children */
1550     QLIST_FOREACH(c, &bs->children, next) {
1551         uint64_t cur_perm, cur_shared;
1552         bdrv_child_perm(bs, c->bs, c, c->role,
1553                         cumulative_perms, cumulative_shared_perms,
1554                         &cur_perm, &cur_shared);
1555         ret = bdrv_child_check_perm(c, cur_perm, cur_shared, ignore_children,
1556                                     errp);
1557         if (ret < 0) {
1558             return ret;
1559         }
1560     }
1561 
1562     return 0;
1563 }
1564 
1565 /*
1566  * Notifies drivers that after a previous bdrv_check_perm() call, the
1567  * permission update is not performed and any preparations made for it (e.g.
1568  * taken file locks) need to be undone.
1569  *
1570  * This function recursively notifies all child nodes.
1571  */
1572 static void bdrv_abort_perm_update(BlockDriverState *bs)
1573 {
1574     BlockDriver *drv = bs->drv;
1575     BdrvChild *c;
1576 
1577     if (!drv) {
1578         return;
1579     }
1580 
1581     if (drv->bdrv_abort_perm_update) {
1582         drv->bdrv_abort_perm_update(bs);
1583     }
1584 
1585     QLIST_FOREACH(c, &bs->children, next) {
1586         bdrv_child_abort_perm_update(c);
1587     }
1588 }
1589 
1590 static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms,
1591                           uint64_t cumulative_shared_perms)
1592 {
1593     BlockDriver *drv = bs->drv;
1594     BdrvChild *c;
1595 
1596     if (!drv) {
1597         return;
1598     }
1599 
1600     /* Update this node */
1601     if (drv->bdrv_set_perm) {
1602         drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
1603     }
1604 
1605     /* Drivers that never have children can omit .bdrv_child_perm() */
1606     if (!drv->bdrv_child_perm) {
1607         assert(QLIST_EMPTY(&bs->children));
1608         return;
1609     }
1610 
1611     /* Update all children */
1612     QLIST_FOREACH(c, &bs->children, next) {
1613         uint64_t cur_perm, cur_shared;
1614         bdrv_child_perm(bs, c->bs, c, c->role,
1615                         cumulative_perms, cumulative_shared_perms,
1616                         &cur_perm, &cur_shared);
1617         bdrv_child_set_perm(c, cur_perm, cur_shared);
1618     }
1619 }
1620 
1621 static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
1622                                      uint64_t *shared_perm)
1623 {
1624     BdrvChild *c;
1625     uint64_t cumulative_perms = 0;
1626     uint64_t cumulative_shared_perms = BLK_PERM_ALL;
1627 
1628     QLIST_FOREACH(c, &bs->parents, next_parent) {
1629         cumulative_perms |= c->perm;
1630         cumulative_shared_perms &= c->shared_perm;
1631     }
1632 
1633     *perm = cumulative_perms;
1634     *shared_perm = cumulative_shared_perms;
1635 }
1636 
1637 static char *bdrv_child_user_desc(BdrvChild *c)
1638 {
1639     if (c->role->get_parent_desc) {
1640         return c->role->get_parent_desc(c);
1641     }
1642 
1643     return g_strdup("another user");
1644 }
1645 
1646 char *bdrv_perm_names(uint64_t perm)
1647 {
1648     struct perm_name {
1649         uint64_t perm;
1650         const char *name;
1651     } permissions[] = {
1652         { BLK_PERM_CONSISTENT_READ, "consistent read" },
1653         { BLK_PERM_WRITE,           "write" },
1654         { BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
1655         { BLK_PERM_RESIZE,          "resize" },
1656         { BLK_PERM_GRAPH_MOD,       "change children" },
1657         { 0, NULL }
1658     };
1659 
1660     char *result = g_strdup("");
1661     struct perm_name *p;
1662 
1663     for (p = permissions; p->name; p++) {
1664         if (perm & p->perm) {
1665             char *old = result;
1666             result = g_strdup_printf("%s%s%s", old, *old ? ", " : "", p->name);
1667             g_free(old);
1668         }
1669     }
1670 
1671     return result;
1672 }
1673 
1674 /*
1675  * Checks whether a new reference to @bs can be added if the new user requires
1676  * @new_used_perm/@new_shared_perm as its permissions. If @ignore_children is
1677  * set, the BdrvChild objects in this list are ignored in the calculations;
1678  * this allows checking permission updates for an existing reference.
1679  *
1680  * Needs to be followed by a call to either bdrv_set_perm() or
1681  * bdrv_abort_perm_update(). */
1682 static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm,
1683                                   uint64_t new_shared_perm,
1684                                   GSList *ignore_children, Error **errp)
1685 {
1686     BdrvChild *c;
1687     uint64_t cumulative_perms = new_used_perm;
1688     uint64_t cumulative_shared_perms = new_shared_perm;
1689 
1690     /* There is no reason why anyone couldn't tolerate write_unchanged */
1691     assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED);
1692 
1693     QLIST_FOREACH(c, &bs->parents, next_parent) {
1694         if (g_slist_find(ignore_children, c)) {
1695             continue;
1696         }
1697 
1698         if ((new_used_perm & c->shared_perm) != new_used_perm) {
1699             char *user = bdrv_child_user_desc(c);
1700             char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm);
1701             error_setg(errp, "Conflicts with use by %s as '%s', which does not "
1702                              "allow '%s' on %s",
1703                        user, c->name, perm_names, bdrv_get_node_name(c->bs));
1704             g_free(user);
1705             g_free(perm_names);
1706             return -EPERM;
1707         }
1708 
1709         if ((c->perm & new_shared_perm) != c->perm) {
1710             char *user = bdrv_child_user_desc(c);
1711             char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm);
1712             error_setg(errp, "Conflicts with use by %s as '%s', which uses "
1713                              "'%s' on %s",
1714                        user, c->name, perm_names, bdrv_get_node_name(c->bs));
1715             g_free(user);
1716             g_free(perm_names);
1717             return -EPERM;
1718         }
1719 
1720         cumulative_perms |= c->perm;
1721         cumulative_shared_perms &= c->shared_perm;
1722     }
1723 
1724     return bdrv_check_perm(bs, cumulative_perms, cumulative_shared_perms,
1725                            ignore_children, errp);
1726 }
1727 
1728 /* Needs to be followed by a call to either bdrv_child_set_perm() or
1729  * bdrv_child_abort_perm_update(). */
1730 static int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
1731                                  GSList *ignore_children, Error **errp)
1732 {
1733     int ret;
1734 
1735     ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c);
1736     ret = bdrv_check_update_perm(c->bs, perm, shared, ignore_children, errp);
1737     g_slist_free(ignore_children);
1738 
1739     return ret;
1740 }
1741 
1742 static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared)
1743 {
1744     uint64_t cumulative_perms, cumulative_shared_perms;
1745 
1746     c->perm = perm;
1747     c->shared_perm = shared;
1748 
1749     bdrv_get_cumulative_perm(c->bs, &cumulative_perms,
1750                              &cumulative_shared_perms);
1751     bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms);
1752 }
1753 
1754 static void bdrv_child_abort_perm_update(BdrvChild *c)
1755 {
1756     bdrv_abort_perm_update(c->bs);
1757 }
1758 
1759 int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
1760                             Error **errp)
1761 {
1762     int ret;
1763 
1764     ret = bdrv_child_check_perm(c, perm, shared, NULL, errp);
1765     if (ret < 0) {
1766         bdrv_child_abort_perm_update(c);
1767         return ret;
1768     }
1769 
1770     bdrv_child_set_perm(c, perm, shared);
1771 
1772     return 0;
1773 }
1774 
1775 #define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
1776                                  | BLK_PERM_WRITE \
1777                                  | BLK_PERM_WRITE_UNCHANGED \
1778                                  | BLK_PERM_RESIZE)
1779 #define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH)
1780 
1781 void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
1782                                const BdrvChildRole *role,
1783                                uint64_t perm, uint64_t shared,
1784                                uint64_t *nperm, uint64_t *nshared)
1785 {
1786     if (c == NULL) {
1787         *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
1788         *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
1789         return;
1790     }
1791 
1792     *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) |
1793              (c->perm & DEFAULT_PERM_UNCHANGED);
1794     *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) |
1795                (c->shared_perm & DEFAULT_PERM_UNCHANGED);
1796 }
1797 
1798 void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
1799                                const BdrvChildRole *role,
1800                                uint64_t perm, uint64_t shared,
1801                                uint64_t *nperm, uint64_t *nshared)
1802 {
1803     bool backing = (role == &child_backing);
1804     assert(role == &child_backing || role == &child_file);
1805 
1806     if (!backing) {
1807         /* Apart from the modifications below, the same permissions are
1808          * forwarded and left alone as for filters */
1809         bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared);
1810 
1811         /* Format drivers may touch metadata even if the guest doesn't write */
1812         if (bdrv_is_writable(bs)) {
1813             perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
1814         }
1815 
1816         /* bs->file always needs to be consistent because of the metadata. We
1817          * can never allow other users to resize or write to it. */
1818         perm |= BLK_PERM_CONSISTENT_READ;
1819         shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
1820     } else {
1821         /* We want consistent read from backing files if the parent needs it.
1822          * No other operations are performed on backing files. */
1823         perm &= BLK_PERM_CONSISTENT_READ;
1824 
1825         /* If the parent can deal with changing data, we're okay with a
1826          * writable and resizable backing file. */
1827         /* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? */
1828         if (shared & BLK_PERM_WRITE) {
1829             shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
1830         } else {
1831             shared = 0;
1832         }
1833 
1834         shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD |
1835                   BLK_PERM_WRITE_UNCHANGED;
1836     }
1837 
1838     if (bs->open_flags & BDRV_O_INACTIVE) {
1839         shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
1840     }
1841 
1842     *nperm = perm;
1843     *nshared = shared;
1844 }
1845 
1846 static void bdrv_replace_child_noperm(BdrvChild *child,
1847                                       BlockDriverState *new_bs)
1848 {
1849     BlockDriverState *old_bs = child->bs;
1850 
1851     if (old_bs && new_bs) {
1852         assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
1853     }
1854     if (old_bs) {
1855         if (old_bs->quiesce_counter && child->role->drained_end) {
1856             child->role->drained_end(child);
1857         }
1858         if (child->role->detach) {
1859             child->role->detach(child);
1860         }
1861         QLIST_REMOVE(child, next_parent);
1862     }
1863 
1864     child->bs = new_bs;
1865 
1866     if (new_bs) {
1867         QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
1868         if (new_bs->quiesce_counter && child->role->drained_begin) {
1869             child->role->drained_begin(child);
1870         }
1871 
1872         if (child->role->attach) {
1873             child->role->attach(child);
1874         }
1875     }
1876 }
1877 
1878 /*
1879  * Updates @child to change its reference to point to @new_bs, including
1880  * checking and applying the necessary permisson updates both to the old node
1881  * and to @new_bs.
1882  *
1883  * NULL is passed as @new_bs for removing the reference before freeing @child.
1884  *
1885  * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this
1886  * function uses bdrv_set_perm() to update the permissions according to the new
1887  * reference that @new_bs gets.
1888  */
1889 static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
1890 {
1891     BlockDriverState *old_bs = child->bs;
1892     uint64_t perm, shared_perm;
1893 
1894     if (old_bs) {
1895         /* Update permissions for old node. This is guaranteed to succeed
1896          * because we're just taking a parent away, so we're loosening
1897          * restrictions. */
1898         bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm);
1899         bdrv_check_perm(old_bs, perm, shared_perm, NULL, &error_abort);
1900         bdrv_set_perm(old_bs, perm, shared_perm);
1901     }
1902 
1903     bdrv_replace_child_noperm(child, new_bs);
1904 
1905     if (new_bs) {
1906         bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm);
1907         bdrv_set_perm(new_bs, perm, shared_perm);
1908     }
1909 }
1910 
1911 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
1912                                   const char *child_name,
1913                                   const BdrvChildRole *child_role,
1914                                   uint64_t perm, uint64_t shared_perm,
1915                                   void *opaque, Error **errp)
1916 {
1917     BdrvChild *child;
1918     int ret;
1919 
1920     ret = bdrv_check_update_perm(child_bs, perm, shared_perm, NULL, errp);
1921     if (ret < 0) {
1922         bdrv_abort_perm_update(child_bs);
1923         return NULL;
1924     }
1925 
1926     child = g_new(BdrvChild, 1);
1927     *child = (BdrvChild) {
1928         .bs             = NULL,
1929         .name           = g_strdup(child_name),
1930         .role           = child_role,
1931         .perm           = perm,
1932         .shared_perm    = shared_perm,
1933         .opaque         = opaque,
1934     };
1935 
1936     /* This performs the matching bdrv_set_perm() for the above check. */
1937     bdrv_replace_child(child, child_bs);
1938 
1939     return child;
1940 }
1941 
1942 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1943                              BlockDriverState *child_bs,
1944                              const char *child_name,
1945                              const BdrvChildRole *child_role,
1946                              Error **errp)
1947 {
1948     BdrvChild *child;
1949     uint64_t perm, shared_perm;
1950 
1951     bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
1952 
1953     assert(parent_bs->drv);
1954     assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs));
1955     bdrv_child_perm(parent_bs, child_bs, NULL, child_role,
1956                     perm, shared_perm, &perm, &shared_perm);
1957 
1958     child = bdrv_root_attach_child(child_bs, child_name, child_role,
1959                                    perm, shared_perm, parent_bs, errp);
1960     if (child == NULL) {
1961         return NULL;
1962     }
1963 
1964     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1965     return child;
1966 }
1967 
1968 static void bdrv_detach_child(BdrvChild *child)
1969 {
1970     if (child->next.le_prev) {
1971         QLIST_REMOVE(child, next);
1972         child->next.le_prev = NULL;
1973     }
1974 
1975     bdrv_replace_child(child, NULL);
1976 
1977     g_free(child->name);
1978     g_free(child);
1979 }
1980 
1981 void bdrv_root_unref_child(BdrvChild *child)
1982 {
1983     BlockDriverState *child_bs;
1984 
1985     child_bs = child->bs;
1986     bdrv_detach_child(child);
1987     bdrv_unref(child_bs);
1988 }
1989 
1990 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1991 {
1992     if (child == NULL) {
1993         return;
1994     }
1995 
1996     if (child->bs->inherits_from == parent) {
1997         BdrvChild *c;
1998 
1999         /* Remove inherits_from only when the last reference between parent and
2000          * child->bs goes away. */
2001         QLIST_FOREACH(c, &parent->children, next) {
2002             if (c != child && c->bs == child->bs) {
2003                 break;
2004             }
2005         }
2006         if (c == NULL) {
2007             child->bs->inherits_from = NULL;
2008         }
2009     }
2010 
2011     bdrv_root_unref_child(child);
2012 }
2013 
2014 
2015 static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
2016 {
2017     BdrvChild *c;
2018     QLIST_FOREACH(c, &bs->parents, next_parent) {
2019         if (c->role->change_media) {
2020             c->role->change_media(c, load);
2021         }
2022     }
2023 }
2024 
2025 static void bdrv_parent_cb_resize(BlockDriverState *bs)
2026 {
2027     BdrvChild *c;
2028     QLIST_FOREACH(c, &bs->parents, next_parent) {
2029         if (c->role->resize) {
2030             c->role->resize(c);
2031         }
2032     }
2033 }
2034 
2035 /*
2036  * Sets the backing file link of a BDS. A new reference is created; callers
2037  * which don't need their own reference any more must call bdrv_unref().
2038  */
2039 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
2040                          Error **errp)
2041 {
2042     if (backing_hd) {
2043         bdrv_ref(backing_hd);
2044     }
2045 
2046     if (bs->backing) {
2047         bdrv_unref_child(bs, bs->backing);
2048     }
2049 
2050     if (!backing_hd) {
2051         bs->backing = NULL;
2052         goto out;
2053     }
2054 
2055     bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing,
2056                                     errp);
2057     if (!bs->backing) {
2058         bdrv_unref(backing_hd);
2059     }
2060 
2061     bdrv_refresh_filename(bs);
2062 
2063 out:
2064     bdrv_refresh_limits(bs, NULL);
2065 }
2066 
2067 /*
2068  * Opens the backing file for a BlockDriverState if not yet open
2069  *
2070  * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
2071  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
2072  * itself, all options starting with "${bdref_key}." are considered part of the
2073  * BlockdevRef.
2074  *
2075  * TODO Can this be unified with bdrv_open_image()?
2076  */
2077 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
2078                            const char *bdref_key, Error **errp)
2079 {
2080     char *backing_filename = g_malloc0(PATH_MAX);
2081     char *bdref_key_dot;
2082     const char *reference = NULL;
2083     int ret = 0;
2084     BlockDriverState *backing_hd;
2085     QDict *options;
2086     QDict *tmp_parent_options = NULL;
2087     Error *local_err = NULL;
2088 
2089     if (bs->backing != NULL) {
2090         goto free_exit;
2091     }
2092 
2093     /* NULL means an empty set of options */
2094     if (parent_options == NULL) {
2095         tmp_parent_options = qdict_new();
2096         parent_options = tmp_parent_options;
2097     }
2098 
2099     bs->open_flags &= ~BDRV_O_NO_BACKING;
2100 
2101     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
2102     qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
2103     g_free(bdref_key_dot);
2104 
2105     /*
2106      * Caution: while qdict_get_try_str() is fine, getting non-string
2107      * types would require more care.  When @parent_options come from
2108      * -blockdev or blockdev_add, its members are typed according to
2109      * the QAPI schema, but when they come from -drive, they're all
2110      * QString.
2111      */
2112     reference = qdict_get_try_str(parent_options, bdref_key);
2113     if (reference || qdict_haskey(options, "file.filename")) {
2114         backing_filename[0] = '\0';
2115     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
2116         QDECREF(options);
2117         goto free_exit;
2118     } else {
2119         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
2120                                        &local_err);
2121         if (local_err) {
2122             ret = -EINVAL;
2123             error_propagate(errp, local_err);
2124             QDECREF(options);
2125             goto free_exit;
2126         }
2127     }
2128 
2129     if (!bs->drv || !bs->drv->supports_backing) {
2130         ret = -EINVAL;
2131         error_setg(errp, "Driver doesn't support backing files");
2132         QDECREF(options);
2133         goto free_exit;
2134     }
2135 
2136     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
2137         qdict_put_str(options, "driver", bs->backing_format);
2138     }
2139 
2140     backing_hd = bdrv_open_inherit(*backing_filename ? backing_filename : NULL,
2141                                    reference, options, 0, bs, &child_backing,
2142                                    errp);
2143     if (!backing_hd) {
2144         bs->open_flags |= BDRV_O_NO_BACKING;
2145         error_prepend(errp, "Could not open backing file: ");
2146         ret = -EINVAL;
2147         goto free_exit;
2148     }
2149 
2150     /* Hook up the backing file link; drop our reference, bs owns the
2151      * backing_hd reference now */
2152     bdrv_set_backing_hd(bs, backing_hd, &local_err);
2153     bdrv_unref(backing_hd);
2154     if (local_err) {
2155         error_propagate(errp, local_err);
2156         ret = -EINVAL;
2157         goto free_exit;
2158     }
2159 
2160     qdict_del(parent_options, bdref_key);
2161 
2162 free_exit:
2163     g_free(backing_filename);
2164     QDECREF(tmp_parent_options);
2165     return ret;
2166 }
2167 
2168 static BlockDriverState *
2169 bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
2170                    BlockDriverState *parent, const BdrvChildRole *child_role,
2171                    bool allow_none, Error **errp)
2172 {
2173     BlockDriverState *bs = NULL;
2174     QDict *image_options;
2175     char *bdref_key_dot;
2176     const char *reference;
2177 
2178     assert(child_role != NULL);
2179 
2180     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
2181     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
2182     g_free(bdref_key_dot);
2183 
2184     /*
2185      * Caution: while qdict_get_try_str() is fine, getting non-string
2186      * types would require more care.  When @options come from
2187      * -blockdev or blockdev_add, its members are typed according to
2188      * the QAPI schema, but when they come from -drive, they're all
2189      * QString.
2190      */
2191     reference = qdict_get_try_str(options, bdref_key);
2192     if (!filename && !reference && !qdict_size(image_options)) {
2193         if (!allow_none) {
2194             error_setg(errp, "A block device must be specified for \"%s\"",
2195                        bdref_key);
2196         }
2197         QDECREF(image_options);
2198         goto done;
2199     }
2200 
2201     bs = bdrv_open_inherit(filename, reference, image_options, 0,
2202                            parent, child_role, errp);
2203     if (!bs) {
2204         goto done;
2205     }
2206 
2207 done:
2208     qdict_del(options, bdref_key);
2209     return bs;
2210 }
2211 
2212 /*
2213  * Opens a disk image whose options are given as BlockdevRef in another block
2214  * device's options.
2215  *
2216  * If allow_none is true, no image will be opened if filename is false and no
2217  * BlockdevRef is given. NULL will be returned, but errp remains unset.
2218  *
2219  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
2220  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
2221  * itself, all options starting with "${bdref_key}." are considered part of the
2222  * BlockdevRef.
2223  *
2224  * The BlockdevRef will be removed from the options QDict.
2225  */
2226 BdrvChild *bdrv_open_child(const char *filename,
2227                            QDict *options, const char *bdref_key,
2228                            BlockDriverState *parent,
2229                            const BdrvChildRole *child_role,
2230                            bool allow_none, Error **errp)
2231 {
2232     BdrvChild *c;
2233     BlockDriverState *bs;
2234 
2235     bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role,
2236                             allow_none, errp);
2237     if (bs == NULL) {
2238         return NULL;
2239     }
2240 
2241     c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
2242     if (!c) {
2243         bdrv_unref(bs);
2244         return NULL;
2245     }
2246 
2247     return c;
2248 }
2249 
2250 static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
2251                                                    int flags,
2252                                                    QDict *snapshot_options,
2253                                                    Error **errp)
2254 {
2255     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
2256     char *tmp_filename = g_malloc0(PATH_MAX + 1);
2257     int64_t total_size;
2258     QemuOpts *opts = NULL;
2259     BlockDriverState *bs_snapshot = NULL;
2260     Error *local_err = NULL;
2261     int ret;
2262 
2263     /* if snapshot, we create a temporary backing file and open it
2264        instead of opening 'filename' directly */
2265 
2266     /* Get the required size from the image */
2267     total_size = bdrv_getlength(bs);
2268     if (total_size < 0) {
2269         error_setg_errno(errp, -total_size, "Could not get image size");
2270         goto out;
2271     }
2272 
2273     /* Create the temporary image */
2274     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
2275     if (ret < 0) {
2276         error_setg_errno(errp, -ret, "Could not get temporary filename");
2277         goto out;
2278     }
2279 
2280     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
2281                             &error_abort);
2282     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
2283     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
2284     qemu_opts_del(opts);
2285     if (ret < 0) {
2286         error_prepend(errp, "Could not create temporary overlay '%s': ",
2287                       tmp_filename);
2288         goto out;
2289     }
2290 
2291     /* Prepare options QDict for the temporary file */
2292     qdict_put_str(snapshot_options, "file.driver", "file");
2293     qdict_put_str(snapshot_options, "file.filename", tmp_filename);
2294     qdict_put_str(snapshot_options, "driver", "qcow2");
2295 
2296     bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp);
2297     snapshot_options = NULL;
2298     if (!bs_snapshot) {
2299         goto out;
2300     }
2301 
2302     /* bdrv_append() consumes a strong reference to bs_snapshot
2303      * (i.e. it will call bdrv_unref() on it) even on error, so in
2304      * order to be able to return one, we have to increase
2305      * bs_snapshot's refcount here */
2306     bdrv_ref(bs_snapshot);
2307     bdrv_append(bs_snapshot, bs, &local_err);
2308     if (local_err) {
2309         error_propagate(errp, local_err);
2310         bs_snapshot = NULL;
2311         goto out;
2312     }
2313 
2314 out:
2315     QDECREF(snapshot_options);
2316     g_free(tmp_filename);
2317     return bs_snapshot;
2318 }
2319 
2320 /*
2321  * Opens a disk image (raw, qcow2, vmdk, ...)
2322  *
2323  * options is a QDict of options to pass to the block drivers, or NULL for an
2324  * empty set of options. The reference to the QDict belongs to the block layer
2325  * after the call (even on failure), so if the caller intends to reuse the
2326  * dictionary, it needs to use QINCREF() before calling bdrv_open.
2327  *
2328  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
2329  * If it is not NULL, the referenced BDS will be reused.
2330  *
2331  * The reference parameter may be used to specify an existing block device which
2332  * should be opened. If specified, neither options nor a filename may be given,
2333  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
2334  */
2335 static BlockDriverState *bdrv_open_inherit(const char *filename,
2336                                            const char *reference,
2337                                            QDict *options, int flags,
2338                                            BlockDriverState *parent,
2339                                            const BdrvChildRole *child_role,
2340                                            Error **errp)
2341 {
2342     int ret;
2343     BlockBackend *file = NULL;
2344     BlockDriverState *bs;
2345     BlockDriver *drv = NULL;
2346     const char *drvname;
2347     const char *backing;
2348     Error *local_err = NULL;
2349     QDict *snapshot_options = NULL;
2350     int snapshot_flags = 0;
2351 
2352     assert(!child_role || !flags);
2353     assert(!child_role == !parent);
2354 
2355     if (reference) {
2356         bool options_non_empty = options ? qdict_size(options) : false;
2357         QDECREF(options);
2358 
2359         if (filename || options_non_empty) {
2360             error_setg(errp, "Cannot reference an existing block device with "
2361                        "additional options or a new filename");
2362             return NULL;
2363         }
2364 
2365         bs = bdrv_lookup_bs(reference, reference, errp);
2366         if (!bs) {
2367             return NULL;
2368         }
2369 
2370         bdrv_ref(bs);
2371         return bs;
2372     }
2373 
2374     bs = bdrv_new();
2375 
2376     /* NULL means an empty set of options */
2377     if (options == NULL) {
2378         options = qdict_new();
2379     }
2380 
2381     /* json: syntax counts as explicit options, as if in the QDict */
2382     parse_json_protocol(options, &filename, &local_err);
2383     if (local_err) {
2384         goto fail;
2385     }
2386 
2387     bs->explicit_options = qdict_clone_shallow(options);
2388 
2389     if (child_role) {
2390         bs->inherits_from = parent;
2391         child_role->inherit_options(&flags, options,
2392                                     parent->open_flags, parent->options);
2393     }
2394 
2395     ret = bdrv_fill_options(&options, filename, &flags, &local_err);
2396     if (local_err) {
2397         goto fail;
2398     }
2399 
2400     /*
2401      * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags.
2402      * Caution: getting a boolean member of @options requires care.
2403      * When @options come from -blockdev or blockdev_add, members are
2404      * typed according to the QAPI schema, but when they come from
2405      * -drive, they're all QString.
2406      */
2407     if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") &&
2408         !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) {
2409         flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR);
2410     } else {
2411         flags &= ~BDRV_O_RDWR;
2412     }
2413 
2414     if (flags & BDRV_O_SNAPSHOT) {
2415         snapshot_options = qdict_new();
2416         bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
2417                                    flags, options);
2418         /* Let bdrv_backing_options() override "read-only" */
2419         qdict_del(options, BDRV_OPT_READ_ONLY);
2420         bdrv_backing_options(&flags, options, flags, options);
2421     }
2422 
2423     bs->open_flags = flags;
2424     bs->options = options;
2425     options = qdict_clone_shallow(options);
2426 
2427     /* Find the right image format driver */
2428     /* See cautionary note on accessing @options above */
2429     drvname = qdict_get_try_str(options, "driver");
2430     if (drvname) {
2431         drv = bdrv_find_format(drvname);
2432         if (!drv) {
2433             error_setg(errp, "Unknown driver: '%s'", drvname);
2434             goto fail;
2435         }
2436     }
2437 
2438     assert(drvname || !(flags & BDRV_O_PROTOCOL));
2439 
2440     /* See cautionary note on accessing @options above */
2441     backing = qdict_get_try_str(options, "backing");
2442     if (backing && *backing == '\0') {
2443         flags |= BDRV_O_NO_BACKING;
2444         qdict_del(options, "backing");
2445     }
2446 
2447     /* Open image file without format layer. This BlockBackend is only used for
2448      * probing, the block drivers will do their own bdrv_open_child() for the
2449      * same BDS, which is why we put the node name back into options. */
2450     if ((flags & BDRV_O_PROTOCOL) == 0) {
2451         BlockDriverState *file_bs;
2452 
2453         file_bs = bdrv_open_child_bs(filename, options, "file", bs,
2454                                      &child_file, true, &local_err);
2455         if (local_err) {
2456             goto fail;
2457         }
2458         if (file_bs != NULL) {
2459             file = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
2460             blk_insert_bs(file, file_bs, &local_err);
2461             bdrv_unref(file_bs);
2462             if (local_err) {
2463                 goto fail;
2464             }
2465 
2466             qdict_put_str(options, "file", bdrv_get_node_name(file_bs));
2467         }
2468     }
2469 
2470     /* Image format probing */
2471     bs->probed = !drv;
2472     if (!drv && file) {
2473         ret = find_image_format(file, filename, &drv, &local_err);
2474         if (ret < 0) {
2475             goto fail;
2476         }
2477         /*
2478          * This option update would logically belong in bdrv_fill_options(),
2479          * but we first need to open bs->file for the probing to work, while
2480          * opening bs->file already requires the (mostly) final set of options
2481          * so that cache mode etc. can be inherited.
2482          *
2483          * Adding the driver later is somewhat ugly, but it's not an option
2484          * that would ever be inherited, so it's correct. We just need to make
2485          * sure to update both bs->options (which has the full effective
2486          * options for bs) and options (which has file.* already removed).
2487          */
2488         qdict_put_str(bs->options, "driver", drv->format_name);
2489         qdict_put_str(options, "driver", drv->format_name);
2490     } else if (!drv) {
2491         error_setg(errp, "Must specify either driver or file");
2492         goto fail;
2493     }
2494 
2495     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
2496     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
2497     /* file must be NULL if a protocol BDS is about to be created
2498      * (the inverse results in an error message from bdrv_open_common()) */
2499     assert(!(flags & BDRV_O_PROTOCOL) || !file);
2500 
2501     /* Open the image */
2502     ret = bdrv_open_common(bs, file, options, &local_err);
2503     if (ret < 0) {
2504         goto fail;
2505     }
2506 
2507     if (file) {
2508         blk_unref(file);
2509         file = NULL;
2510     }
2511 
2512     /* If there is a backing file, use it */
2513     if ((flags & BDRV_O_NO_BACKING) == 0) {
2514         ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
2515         if (ret < 0) {
2516             goto close_and_fail;
2517         }
2518     }
2519 
2520     bdrv_refresh_filename(bs);
2521 
2522     /* Check if any unknown options were used */
2523     if (qdict_size(options) != 0) {
2524         const QDictEntry *entry = qdict_first(options);
2525         if (flags & BDRV_O_PROTOCOL) {
2526             error_setg(errp, "Block protocol '%s' doesn't support the option "
2527                        "'%s'", drv->format_name, entry->key);
2528         } else {
2529             error_setg(errp,
2530                        "Block format '%s' does not support the option '%s'",
2531                        drv->format_name, entry->key);
2532         }
2533 
2534         goto close_and_fail;
2535     }
2536 
2537     if (!bdrv_key_required(bs)) {
2538         bdrv_parent_cb_change_media(bs, true);
2539     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
2540                && !runstate_check(RUN_STATE_INMIGRATE)
2541                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
2542         error_setg(errp,
2543                    "Guest must be stopped for opening of encrypted image");
2544         goto close_and_fail;
2545     }
2546 
2547     QDECREF(options);
2548 
2549     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
2550      * temporary snapshot afterwards. */
2551     if (snapshot_flags) {
2552         BlockDriverState *snapshot_bs;
2553         snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags,
2554                                                 snapshot_options, &local_err);
2555         snapshot_options = NULL;
2556         if (local_err) {
2557             goto close_and_fail;
2558         }
2559         /* We are not going to return bs but the overlay on top of it
2560          * (snapshot_bs); thus, we have to drop the strong reference to bs
2561          * (which we obtained by calling bdrv_new()). bs will not be deleted,
2562          * though, because the overlay still has a reference to it. */
2563         bdrv_unref(bs);
2564         bs = snapshot_bs;
2565     }
2566 
2567     return bs;
2568 
2569 fail:
2570     blk_unref(file);
2571     if (bs->file != NULL) {
2572         bdrv_unref_child(bs, bs->file);
2573     }
2574     QDECREF(snapshot_options);
2575     QDECREF(bs->explicit_options);
2576     QDECREF(bs->options);
2577     QDECREF(options);
2578     bs->options = NULL;
2579     bdrv_unref(bs);
2580     error_propagate(errp, local_err);
2581     return NULL;
2582 
2583 close_and_fail:
2584     bdrv_unref(bs);
2585     QDECREF(snapshot_options);
2586     QDECREF(options);
2587     error_propagate(errp, local_err);
2588     return NULL;
2589 }
2590 
2591 BlockDriverState *bdrv_open(const char *filename, const char *reference,
2592                             QDict *options, int flags, Error **errp)
2593 {
2594     return bdrv_open_inherit(filename, reference, options, flags, NULL,
2595                              NULL, errp);
2596 }
2597 
2598 typedef struct BlockReopenQueueEntry {
2599      bool prepared;
2600      BDRVReopenState state;
2601      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
2602 } BlockReopenQueueEntry;
2603 
2604 /*
2605  * Adds a BlockDriverState to a simple queue for an atomic, transactional
2606  * reopen of multiple devices.
2607  *
2608  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
2609  * already performed, or alternatively may be NULL a new BlockReopenQueue will
2610  * be created and initialized. This newly created BlockReopenQueue should be
2611  * passed back in for subsequent calls that are intended to be of the same
2612  * atomic 'set'.
2613  *
2614  * bs is the BlockDriverState to add to the reopen queue.
2615  *
2616  * options contains the changed options for the associated bs
2617  * (the BlockReopenQueue takes ownership)
2618  *
2619  * flags contains the open flags for the associated bs
2620  *
2621  * returns a pointer to bs_queue, which is either the newly allocated
2622  * bs_queue, or the existing bs_queue being used.
2623  *
2624  */
2625 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
2626                                                  BlockDriverState *bs,
2627                                                  QDict *options,
2628                                                  int flags,
2629                                                  const BdrvChildRole *role,
2630                                                  QDict *parent_options,
2631                                                  int parent_flags)
2632 {
2633     assert(bs != NULL);
2634 
2635     BlockReopenQueueEntry *bs_entry;
2636     BdrvChild *child;
2637     QDict *old_options, *explicit_options;
2638 
2639     if (bs_queue == NULL) {
2640         bs_queue = g_new0(BlockReopenQueue, 1);
2641         QSIMPLEQ_INIT(bs_queue);
2642     }
2643 
2644     if (!options) {
2645         options = qdict_new();
2646     }
2647 
2648     /* Check if this BlockDriverState is already in the queue */
2649     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
2650         if (bs == bs_entry->state.bs) {
2651             break;
2652         }
2653     }
2654 
2655     /*
2656      * Precedence of options:
2657      * 1. Explicitly passed in options (highest)
2658      * 2. Set in flags (only for top level)
2659      * 3. Retained from explicitly set options of bs
2660      * 4. Inherited from parent node
2661      * 5. Retained from effective options of bs
2662      */
2663 
2664     if (!parent_options) {
2665         /*
2666          * Any setting represented by flags is always updated. If the
2667          * corresponding QDict option is set, it takes precedence. Otherwise
2668          * the flag is translated into a QDict option. The old setting of bs is
2669          * not considered.
2670          */
2671         update_options_from_flags(options, flags);
2672     }
2673 
2674     /* Old explicitly set values (don't overwrite by inherited value) */
2675     if (bs_entry) {
2676         old_options = qdict_clone_shallow(bs_entry->state.explicit_options);
2677     } else {
2678         old_options = qdict_clone_shallow(bs->explicit_options);
2679     }
2680     bdrv_join_options(bs, options, old_options);
2681     QDECREF(old_options);
2682 
2683     explicit_options = qdict_clone_shallow(options);
2684 
2685     /* Inherit from parent node */
2686     if (parent_options) {
2687         assert(!flags);
2688         role->inherit_options(&flags, options, parent_flags, parent_options);
2689     }
2690 
2691     /* Old values are used for options that aren't set yet */
2692     old_options = qdict_clone_shallow(bs->options);
2693     bdrv_join_options(bs, options, old_options);
2694     QDECREF(old_options);
2695 
2696     /* bdrv_open() masks this flag out */
2697     flags &= ~BDRV_O_PROTOCOL;
2698 
2699     QLIST_FOREACH(child, &bs->children, next) {
2700         QDict *new_child_options;
2701         char *child_key_dot;
2702 
2703         /* reopen can only change the options of block devices that were
2704          * implicitly created and inherited options. For other (referenced)
2705          * block devices, a syntax like "backing.foo" results in an error. */
2706         if (child->bs->inherits_from != bs) {
2707             continue;
2708         }
2709 
2710         child_key_dot = g_strdup_printf("%s.", child->name);
2711         qdict_extract_subqdict(options, &new_child_options, child_key_dot);
2712         g_free(child_key_dot);
2713 
2714         bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
2715                                 child->role, options, flags);
2716     }
2717 
2718     if (!bs_entry) {
2719         bs_entry = g_new0(BlockReopenQueueEntry, 1);
2720         QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
2721     } else {
2722         QDECREF(bs_entry->state.options);
2723         QDECREF(bs_entry->state.explicit_options);
2724     }
2725 
2726     bs_entry->state.bs = bs;
2727     bs_entry->state.options = options;
2728     bs_entry->state.explicit_options = explicit_options;
2729     bs_entry->state.flags = flags;
2730 
2731     return bs_queue;
2732 }
2733 
2734 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
2735                                     BlockDriverState *bs,
2736                                     QDict *options, int flags)
2737 {
2738     return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
2739                                    NULL, NULL, 0);
2740 }
2741 
2742 /*
2743  * Reopen multiple BlockDriverStates atomically & transactionally.
2744  *
2745  * The queue passed in (bs_queue) must have been built up previous
2746  * via bdrv_reopen_queue().
2747  *
2748  * Reopens all BDS specified in the queue, with the appropriate
2749  * flags.  All devices are prepared for reopen, and failure of any
2750  * device will cause all device changes to be abandonded, and intermediate
2751  * data cleaned up.
2752  *
2753  * If all devices prepare successfully, then the changes are committed
2754  * to all devices.
2755  *
2756  */
2757 int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp)
2758 {
2759     int ret = -1;
2760     BlockReopenQueueEntry *bs_entry, *next;
2761     Error *local_err = NULL;
2762 
2763     assert(bs_queue != NULL);
2764 
2765     aio_context_release(ctx);
2766     bdrv_drain_all_begin();
2767     aio_context_acquire(ctx);
2768 
2769     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
2770         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
2771             error_propagate(errp, local_err);
2772             goto cleanup;
2773         }
2774         bs_entry->prepared = true;
2775     }
2776 
2777     /* If we reach this point, we have success and just need to apply the
2778      * changes
2779      */
2780     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
2781         bdrv_reopen_commit(&bs_entry->state);
2782     }
2783 
2784     ret = 0;
2785 
2786 cleanup:
2787     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
2788         if (ret && bs_entry->prepared) {
2789             bdrv_reopen_abort(&bs_entry->state);
2790         } else if (ret) {
2791             QDECREF(bs_entry->state.explicit_options);
2792         }
2793         QDECREF(bs_entry->state.options);
2794         g_free(bs_entry);
2795     }
2796     g_free(bs_queue);
2797 
2798     bdrv_drain_all_end();
2799 
2800     return ret;
2801 }
2802 
2803 
2804 /* Reopen a single BlockDriverState with the specified flags. */
2805 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
2806 {
2807     int ret = -1;
2808     Error *local_err = NULL;
2809     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
2810 
2811     ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err);
2812     if (local_err != NULL) {
2813         error_propagate(errp, local_err);
2814     }
2815     return ret;
2816 }
2817 
2818 
2819 /*
2820  * Prepares a BlockDriverState for reopen. All changes are staged in the
2821  * 'opaque' field of the BDRVReopenState, which is used and allocated by
2822  * the block driver layer .bdrv_reopen_prepare()
2823  *
2824  * bs is the BlockDriverState to reopen
2825  * flags are the new open flags
2826  * queue is the reopen queue
2827  *
2828  * Returns 0 on success, non-zero on error.  On error errp will be set
2829  * as well.
2830  *
2831  * On failure, bdrv_reopen_abort() will be called to clean up any data.
2832  * It is the responsibility of the caller to then call the abort() or
2833  * commit() for any other BDS that have been left in a prepare() state
2834  *
2835  */
2836 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
2837                         Error **errp)
2838 {
2839     int ret = -1;
2840     Error *local_err = NULL;
2841     BlockDriver *drv;
2842     QemuOpts *opts;
2843     const char *value;
2844     bool read_only;
2845 
2846     assert(reopen_state != NULL);
2847     assert(reopen_state->bs->drv != NULL);
2848     drv = reopen_state->bs->drv;
2849 
2850     /* Process generic block layer options */
2851     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
2852     qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
2853     if (local_err) {
2854         error_propagate(errp, local_err);
2855         ret = -EINVAL;
2856         goto error;
2857     }
2858 
2859     update_flags_from_options(&reopen_state->flags, opts);
2860 
2861     /* node-name and driver must be unchanged. Put them back into the QDict, so
2862      * that they are checked at the end of this function. */
2863     value = qemu_opt_get(opts, "node-name");
2864     if (value) {
2865         qdict_put_str(reopen_state->options, "node-name", value);
2866     }
2867 
2868     value = qemu_opt_get(opts, "driver");
2869     if (value) {
2870         qdict_put_str(reopen_state->options, "driver", value);
2871     }
2872 
2873     /* If we are to stay read-only, do not allow permission change
2874      * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is
2875      * not set, or if the BDS still has copy_on_read enabled */
2876     read_only = !(reopen_state->flags & BDRV_O_RDWR);
2877     ret = bdrv_can_set_read_only(reopen_state->bs, read_only, &local_err);
2878     if (local_err) {
2879         error_propagate(errp, local_err);
2880         goto error;
2881     }
2882 
2883 
2884     ret = bdrv_flush(reopen_state->bs);
2885     if (ret) {
2886         error_setg_errno(errp, -ret, "Error flushing drive");
2887         goto error;
2888     }
2889 
2890     if (drv->bdrv_reopen_prepare) {
2891         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2892         if (ret) {
2893             if (local_err != NULL) {
2894                 error_propagate(errp, local_err);
2895             } else {
2896                 error_setg(errp, "failed while preparing to reopen image '%s'",
2897                            reopen_state->bs->filename);
2898             }
2899             goto error;
2900         }
2901     } else {
2902         /* It is currently mandatory to have a bdrv_reopen_prepare()
2903          * handler for each supported drv. */
2904         error_setg(errp, "Block format '%s' used by node '%s' "
2905                    "does not support reopening files", drv->format_name,
2906                    bdrv_get_device_or_node_name(reopen_state->bs));
2907         ret = -1;
2908         goto error;
2909     }
2910 
2911     /* Options that are not handled are only okay if they are unchanged
2912      * compared to the old state. It is expected that some options are only
2913      * used for the initial open, but not reopen (e.g. filename) */
2914     if (qdict_size(reopen_state->options)) {
2915         const QDictEntry *entry = qdict_first(reopen_state->options);
2916 
2917         do {
2918             QString *new_obj = qobject_to_qstring(entry->value);
2919             const char *new = qstring_get_str(new_obj);
2920             /*
2921              * Caution: while qdict_get_try_str() is fine, getting
2922              * non-string types would require more care.  When
2923              * bs->options come from -blockdev or blockdev_add, its
2924              * members are typed according to the QAPI schema, but
2925              * when they come from -drive, they're all QString.
2926              */
2927             const char *old = qdict_get_try_str(reopen_state->bs->options,
2928                                                 entry->key);
2929 
2930             if (!old || strcmp(new, old)) {
2931                 error_setg(errp, "Cannot change the option '%s'", entry->key);
2932                 ret = -EINVAL;
2933                 goto error;
2934             }
2935         } while ((entry = qdict_next(reopen_state->options, entry)));
2936     }
2937 
2938     ret = 0;
2939 
2940 error:
2941     qemu_opts_del(opts);
2942     return ret;
2943 }
2944 
2945 /*
2946  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2947  * makes them final by swapping the staging BlockDriverState contents into
2948  * the active BlockDriverState contents.
2949  */
2950 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2951 {
2952     BlockDriver *drv;
2953 
2954     assert(reopen_state != NULL);
2955     drv = reopen_state->bs->drv;
2956     assert(drv != NULL);
2957 
2958     /* If there are any driver level actions to take */
2959     if (drv->bdrv_reopen_commit) {
2960         drv->bdrv_reopen_commit(reopen_state);
2961     }
2962 
2963     /* set BDS specific flags now */
2964     QDECREF(reopen_state->bs->explicit_options);
2965 
2966     reopen_state->bs->explicit_options   = reopen_state->explicit_options;
2967     reopen_state->bs->open_flags         = reopen_state->flags;
2968     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
2969 
2970     bdrv_refresh_limits(reopen_state->bs, NULL);
2971 }
2972 
2973 /*
2974  * Abort the reopen, and delete and free the staged changes in
2975  * reopen_state
2976  */
2977 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2978 {
2979     BlockDriver *drv;
2980 
2981     assert(reopen_state != NULL);
2982     drv = reopen_state->bs->drv;
2983     assert(drv != NULL);
2984 
2985     if (drv->bdrv_reopen_abort) {
2986         drv->bdrv_reopen_abort(reopen_state);
2987     }
2988 
2989     QDECREF(reopen_state->explicit_options);
2990 }
2991 
2992 
2993 static void bdrv_close(BlockDriverState *bs)
2994 {
2995     BdrvAioNotifier *ban, *ban_next;
2996 
2997     assert(!bs->job);
2998     assert(!bs->refcnt);
2999 
3000     bdrv_drained_begin(bs); /* complete I/O */
3001     bdrv_flush(bs);
3002     bdrv_drain(bs); /* in case flush left pending I/O */
3003 
3004     bdrv_release_named_dirty_bitmaps(bs);
3005     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
3006 
3007     if (bs->drv) {
3008         BdrvChild *child, *next;
3009 
3010         bs->drv->bdrv_close(bs);
3011         bs->drv = NULL;
3012 
3013         bdrv_set_backing_hd(bs, NULL, &error_abort);
3014 
3015         if (bs->file != NULL) {
3016             bdrv_unref_child(bs, bs->file);
3017             bs->file = NULL;
3018         }
3019 
3020         QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
3021             /* TODO Remove bdrv_unref() from drivers' close function and use
3022              * bdrv_unref_child() here */
3023             if (child->bs->inherits_from == bs) {
3024                 child->bs->inherits_from = NULL;
3025             }
3026             bdrv_detach_child(child);
3027         }
3028 
3029         g_free(bs->opaque);
3030         bs->opaque = NULL;
3031         bs->copy_on_read = 0;
3032         bs->backing_file[0] = '\0';
3033         bs->backing_format[0] = '\0';
3034         bs->total_sectors = 0;
3035         bs->encrypted = false;
3036         bs->valid_key = false;
3037         bs->sg = false;
3038         QDECREF(bs->options);
3039         QDECREF(bs->explicit_options);
3040         bs->options = NULL;
3041         QDECREF(bs->full_open_options);
3042         bs->full_open_options = NULL;
3043     }
3044 
3045     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3046         g_free(ban);
3047     }
3048     QLIST_INIT(&bs->aio_notifiers);
3049     bdrv_drained_end(bs);
3050 }
3051 
3052 void bdrv_close_all(void)
3053 {
3054     block_job_cancel_sync_all();
3055     nbd_export_close_all();
3056 
3057     /* Drop references from requests still in flight, such as canceled block
3058      * jobs whose AIO context has not been polled yet */
3059     bdrv_drain_all();
3060 
3061     blk_remove_all_bs();
3062     blockdev_close_all_bdrv_states();
3063 
3064     assert(QTAILQ_EMPTY(&all_bdrv_states));
3065 }
3066 
3067 static bool should_update_child(BdrvChild *c, BlockDriverState *to)
3068 {
3069     BdrvChild *to_c;
3070 
3071     if (c->role->stay_at_node) {
3072         return false;
3073     }
3074 
3075     if (c->role == &child_backing) {
3076         /* If @from is a backing file of @to, ignore the child to avoid
3077          * creating a loop. We only want to change the pointer of other
3078          * parents. */
3079         QLIST_FOREACH(to_c, &to->children, next) {
3080             if (to_c == c) {
3081                 break;
3082             }
3083         }
3084         if (to_c) {
3085             return false;
3086         }
3087     }
3088 
3089     return true;
3090 }
3091 
3092 void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
3093                        Error **errp)
3094 {
3095     BdrvChild *c, *next;
3096     GSList *list = NULL, *p;
3097     uint64_t old_perm, old_shared;
3098     uint64_t perm = 0, shared = BLK_PERM_ALL;
3099     int ret;
3100 
3101     assert(!atomic_read(&from->in_flight));
3102     assert(!atomic_read(&to->in_flight));
3103 
3104     /* Make sure that @from doesn't go away until we have successfully attached
3105      * all of its parents to @to. */
3106     bdrv_ref(from);
3107 
3108     /* Put all parents into @list and calculate their cumulative permissions */
3109     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
3110         if (!should_update_child(c, to)) {
3111             continue;
3112         }
3113         list = g_slist_prepend(list, c);
3114         perm |= c->perm;
3115         shared &= c->shared_perm;
3116     }
3117 
3118     /* Check whether the required permissions can be granted on @to, ignoring
3119      * all BdrvChild in @list so that they can't block themselves. */
3120     ret = bdrv_check_update_perm(to, perm, shared, list, errp);
3121     if (ret < 0) {
3122         bdrv_abort_perm_update(to);
3123         goto out;
3124     }
3125 
3126     /* Now actually perform the change. We performed the permission check for
3127      * all elements of @list at once, so set the permissions all at once at the
3128      * very end. */
3129     for (p = list; p != NULL; p = p->next) {
3130         c = p->data;
3131 
3132         bdrv_ref(to);
3133         bdrv_replace_child_noperm(c, to);
3134         bdrv_unref(from);
3135     }
3136 
3137     bdrv_get_cumulative_perm(to, &old_perm, &old_shared);
3138     bdrv_set_perm(to, old_perm | perm, old_shared | shared);
3139 
3140 out:
3141     g_slist_free(list);
3142     bdrv_unref(from);
3143 }
3144 
3145 /*
3146  * Add new bs contents at the top of an image chain while the chain is
3147  * live, while keeping required fields on the top layer.
3148  *
3149  * This will modify the BlockDriverState fields, and swap contents
3150  * between bs_new and bs_top. Both bs_new and bs_top are modified.
3151  *
3152  * bs_new must not be attached to a BlockBackend.
3153  *
3154  * This function does not create any image files.
3155  *
3156  * bdrv_append() takes ownership of a bs_new reference and unrefs it because
3157  * that's what the callers commonly need. bs_new will be referenced by the old
3158  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
3159  * reference of its own, it must call bdrv_ref().
3160  */
3161 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
3162                  Error **errp)
3163 {
3164     Error *local_err = NULL;
3165 
3166     bdrv_set_backing_hd(bs_new, bs_top, &local_err);
3167     if (local_err) {
3168         error_propagate(errp, local_err);
3169         goto out;
3170     }
3171 
3172     bdrv_replace_node(bs_top, bs_new, &local_err);
3173     if (local_err) {
3174         error_propagate(errp, local_err);
3175         bdrv_set_backing_hd(bs_new, NULL, &error_abort);
3176         goto out;
3177     }
3178 
3179     /* bs_new is now referenced by its new parents, we don't need the
3180      * additional reference any more. */
3181 out:
3182     bdrv_unref(bs_new);
3183 }
3184 
3185 static void bdrv_delete(BlockDriverState *bs)
3186 {
3187     assert(!bs->job);
3188     assert(bdrv_op_blocker_is_empty(bs));
3189     assert(!bs->refcnt);
3190 
3191     bdrv_close(bs);
3192 
3193     /* remove from list, if necessary */
3194     if (bs->node_name[0] != '\0') {
3195         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
3196     }
3197     QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
3198 
3199     g_free(bs);
3200 }
3201 
3202 /*
3203  * Run consistency checks on an image
3204  *
3205  * Returns 0 if the check could be completed (it doesn't mean that the image is
3206  * free of errors) or -errno when an internal error occurred. The results of the
3207  * check are stored in res.
3208  */
3209 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
3210 {
3211     if (bs->drv == NULL) {
3212         return -ENOMEDIUM;
3213     }
3214     if (bs->drv->bdrv_check == NULL) {
3215         return -ENOTSUP;
3216     }
3217 
3218     memset(res, 0, sizeof(*res));
3219     return bs->drv->bdrv_check(bs, res, fix);
3220 }
3221 
3222 /*
3223  * Return values:
3224  * 0        - success
3225  * -EINVAL  - backing format specified, but no file
3226  * -ENOSPC  - can't update the backing file because no space is left in the
3227  *            image file header
3228  * -ENOTSUP - format driver doesn't support changing the backing file
3229  */
3230 int bdrv_change_backing_file(BlockDriverState *bs,
3231     const char *backing_file, const char *backing_fmt)
3232 {
3233     BlockDriver *drv = bs->drv;
3234     int ret;
3235 
3236     /* Backing file format doesn't make sense without a backing file */
3237     if (backing_fmt && !backing_file) {
3238         return -EINVAL;
3239     }
3240 
3241     if (drv->bdrv_change_backing_file != NULL) {
3242         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
3243     } else {
3244         ret = -ENOTSUP;
3245     }
3246 
3247     if (ret == 0) {
3248         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
3249         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
3250     }
3251     return ret;
3252 }
3253 
3254 /*
3255  * Finds the image layer in the chain that has 'bs' as its backing file.
3256  *
3257  * active is the current topmost image.
3258  *
3259  * Returns NULL if bs is not found in active's image chain,
3260  * or if active == bs.
3261  *
3262  * Returns the bottommost base image if bs == NULL.
3263  */
3264 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
3265                                     BlockDriverState *bs)
3266 {
3267     while (active && bs != backing_bs(active)) {
3268         active = backing_bs(active);
3269     }
3270 
3271     return active;
3272 }
3273 
3274 /* Given a BDS, searches for the base layer. */
3275 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
3276 {
3277     return bdrv_find_overlay(bs, NULL);
3278 }
3279 
3280 /*
3281  * Drops images above 'base' up to and including 'top', and sets the image
3282  * above 'top' to have base as its backing file.
3283  *
3284  * Requires that the overlay to 'top' is opened r/w, so that the backing file
3285  * information in 'bs' can be properly updated.
3286  *
3287  * E.g., this will convert the following chain:
3288  * bottom <- base <- intermediate <- top <- active
3289  *
3290  * to
3291  *
3292  * bottom <- base <- active
3293  *
3294  * It is allowed for bottom==base, in which case it converts:
3295  *
3296  * base <- intermediate <- top <- active
3297  *
3298  * to
3299  *
3300  * base <- active
3301  *
3302  * If backing_file_str is non-NULL, it will be used when modifying top's
3303  * overlay image metadata.
3304  *
3305  * Error conditions:
3306  *  if active == top, that is considered an error
3307  *
3308  */
3309 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
3310                            BlockDriverState *base, const char *backing_file_str)
3311 {
3312     BlockDriverState *new_top_bs = NULL;
3313     Error *local_err = NULL;
3314     int ret = -EIO;
3315 
3316     if (!top->drv || !base->drv) {
3317         goto exit;
3318     }
3319 
3320     new_top_bs = bdrv_find_overlay(active, top);
3321 
3322     if (new_top_bs == NULL) {
3323         /* we could not find the image above 'top', this is an error */
3324         goto exit;
3325     }
3326 
3327     /* special case of new_top_bs->backing->bs already pointing to base - nothing
3328      * to do, no intermediate images */
3329     if (backing_bs(new_top_bs) == base) {
3330         ret = 0;
3331         goto exit;
3332     }
3333 
3334     /* Make sure that base is in the backing chain of top */
3335     if (!bdrv_chain_contains(top, base)) {
3336         goto exit;
3337     }
3338 
3339     /* success - we can delete the intermediate states, and link top->base */
3340     backing_file_str = backing_file_str ? backing_file_str : base->filename;
3341     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
3342                                    base->drv ? base->drv->format_name : "");
3343     if (ret) {
3344         goto exit;
3345     }
3346 
3347     bdrv_set_backing_hd(new_top_bs, base, &local_err);
3348     if (local_err) {
3349         ret = -EPERM;
3350         error_report_err(local_err);
3351         goto exit;
3352     }
3353 
3354     ret = 0;
3355 exit:
3356     return ret;
3357 }
3358 
3359 /**
3360  * Truncate file to 'offset' bytes (needed only for file protocols)
3361  */
3362 int bdrv_truncate(BdrvChild *child, int64_t offset, Error **errp)
3363 {
3364     BlockDriverState *bs = child->bs;
3365     BlockDriver *drv = bs->drv;
3366     int ret;
3367 
3368     assert(child->perm & BLK_PERM_RESIZE);
3369 
3370     if (!drv) {
3371         error_setg(errp, "No medium inserted");
3372         return -ENOMEDIUM;
3373     }
3374     if (!drv->bdrv_truncate) {
3375         error_setg(errp, "Image format driver does not support resize");
3376         return -ENOTSUP;
3377     }
3378     if (bs->read_only) {
3379         error_setg(errp, "Image is read-only");
3380         return -EACCES;
3381     }
3382 
3383     assert(!(bs->open_flags & BDRV_O_INACTIVE));
3384 
3385     ret = drv->bdrv_truncate(bs, offset, errp);
3386     if (ret == 0) {
3387         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3388         bdrv_dirty_bitmap_truncate(bs);
3389         bdrv_parent_cb_resize(bs);
3390         ++bs->write_gen;
3391     }
3392     return ret;
3393 }
3394 
3395 /**
3396  * Length of a allocated file in bytes. Sparse files are counted by actual
3397  * allocated space. Return < 0 if error or unknown.
3398  */
3399 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3400 {
3401     BlockDriver *drv = bs->drv;
3402     if (!drv) {
3403         return -ENOMEDIUM;
3404     }
3405     if (drv->bdrv_get_allocated_file_size) {
3406         return drv->bdrv_get_allocated_file_size(bs);
3407     }
3408     if (bs->file) {
3409         return bdrv_get_allocated_file_size(bs->file->bs);
3410     }
3411     return -ENOTSUP;
3412 }
3413 
3414 /**
3415  * Return number of sectors on success, -errno on error.
3416  */
3417 int64_t bdrv_nb_sectors(BlockDriverState *bs)
3418 {
3419     BlockDriver *drv = bs->drv;
3420 
3421     if (!drv)
3422         return -ENOMEDIUM;
3423 
3424     if (drv->has_variable_length) {
3425         int ret = refresh_total_sectors(bs, bs->total_sectors);
3426         if (ret < 0) {
3427             return ret;
3428         }
3429     }
3430     return bs->total_sectors;
3431 }
3432 
3433 /**
3434  * Return length in bytes on success, -errno on error.
3435  * The length is always a multiple of BDRV_SECTOR_SIZE.
3436  */
3437 int64_t bdrv_getlength(BlockDriverState *bs)
3438 {
3439     int64_t ret = bdrv_nb_sectors(bs);
3440 
3441     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
3442     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
3443 }
3444 
3445 /* return 0 as number of sectors if no device present or error */
3446 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
3447 {
3448     int64_t nb_sectors = bdrv_nb_sectors(bs);
3449 
3450     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
3451 }
3452 
3453 bool bdrv_is_sg(BlockDriverState *bs)
3454 {
3455     return bs->sg;
3456 }
3457 
3458 bool bdrv_is_encrypted(BlockDriverState *bs)
3459 {
3460     if (bs->backing && bs->backing->bs->encrypted) {
3461         return true;
3462     }
3463     return bs->encrypted;
3464 }
3465 
3466 bool bdrv_key_required(BlockDriverState *bs)
3467 {
3468     BdrvChild *backing = bs->backing;
3469 
3470     if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
3471         return true;
3472     }
3473     return (bs->encrypted && !bs->valid_key);
3474 }
3475 
3476 int bdrv_set_key(BlockDriverState *bs, const char *key)
3477 {
3478     int ret;
3479     if (bs->backing && bs->backing->bs->encrypted) {
3480         ret = bdrv_set_key(bs->backing->bs, key);
3481         if (ret < 0)
3482             return ret;
3483         if (!bs->encrypted)
3484             return 0;
3485     }
3486     if (!bs->encrypted) {
3487         return -EINVAL;
3488     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3489         return -ENOMEDIUM;
3490     }
3491     ret = bs->drv->bdrv_set_key(bs, key);
3492     if (ret < 0) {
3493         bs->valid_key = false;
3494     } else if (!bs->valid_key) {
3495         /* call the change callback now, we skipped it on open */
3496         bs->valid_key = true;
3497         bdrv_parent_cb_change_media(bs, true);
3498     }
3499     return ret;
3500 }
3501 
3502 /*
3503  * Provide an encryption key for @bs.
3504  * If @key is non-null:
3505  *     If @bs is not encrypted, fail.
3506  *     Else if the key is invalid, fail.
3507  *     Else set @bs's key to @key, replacing the existing key, if any.
3508  * If @key is null:
3509  *     If @bs is encrypted and still lacks a key, fail.
3510  *     Else do nothing.
3511  * On failure, store an error object through @errp if non-null.
3512  */
3513 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
3514 {
3515     if (key) {
3516         if (!bdrv_is_encrypted(bs)) {
3517             error_setg(errp, "Node '%s' is not encrypted",
3518                       bdrv_get_device_or_node_name(bs));
3519         } else if (bdrv_set_key(bs, key) < 0) {
3520             error_setg(errp, QERR_INVALID_PASSWORD);
3521         }
3522     } else {
3523         if (bdrv_key_required(bs)) {
3524             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
3525                       "'%s' (%s) is encrypted",
3526                       bdrv_get_device_or_node_name(bs),
3527                       bdrv_get_encrypted_filename(bs));
3528         }
3529     }
3530 }
3531 
3532 const char *bdrv_get_format_name(BlockDriverState *bs)
3533 {
3534     return bs->drv ? bs->drv->format_name : NULL;
3535 }
3536 
3537 static int qsort_strcmp(const void *a, const void *b)
3538 {
3539     return strcmp(*(char *const *)a, *(char *const *)b);
3540 }
3541 
3542 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
3543                          void *opaque)
3544 {
3545     BlockDriver *drv;
3546     int count = 0;
3547     int i;
3548     const char **formats = NULL;
3549 
3550     QLIST_FOREACH(drv, &bdrv_drivers, list) {
3551         if (drv->format_name) {
3552             bool found = false;
3553             int i = count;
3554             while (formats && i && !found) {
3555                 found = !strcmp(formats[--i], drv->format_name);
3556             }
3557 
3558             if (!found) {
3559                 formats = g_renew(const char *, formats, count + 1);
3560                 formats[count++] = drv->format_name;
3561             }
3562         }
3563     }
3564 
3565     for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) {
3566         const char *format_name = block_driver_modules[i].format_name;
3567 
3568         if (format_name) {
3569             bool found = false;
3570             int j = count;
3571 
3572             while (formats && j && !found) {
3573                 found = !strcmp(formats[--j], format_name);
3574             }
3575 
3576             if (!found) {
3577                 formats = g_renew(const char *, formats, count + 1);
3578                 formats[count++] = format_name;
3579             }
3580         }
3581     }
3582 
3583     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3584 
3585     for (i = 0; i < count; i++) {
3586         it(opaque, formats[i]);
3587     }
3588 
3589     g_free(formats);
3590 }
3591 
3592 /* This function is to find a node in the bs graph */
3593 BlockDriverState *bdrv_find_node(const char *node_name)
3594 {
3595     BlockDriverState *bs;
3596 
3597     assert(node_name);
3598 
3599     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3600         if (!strcmp(node_name, bs->node_name)) {
3601             return bs;
3602         }
3603     }
3604     return NULL;
3605 }
3606 
3607 /* Put this QMP function here so it can access the static graph_bdrv_states. */
3608 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
3609 {
3610     BlockDeviceInfoList *list, *entry;
3611     BlockDriverState *bs;
3612 
3613     list = NULL;
3614     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3615         BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, errp);
3616         if (!info) {
3617             qapi_free_BlockDeviceInfoList(list);
3618             return NULL;
3619         }
3620         entry = g_malloc0(sizeof(*entry));
3621         entry->value = info;
3622         entry->next = list;
3623         list = entry;
3624     }
3625 
3626     return list;
3627 }
3628 
3629 BlockDriverState *bdrv_lookup_bs(const char *device,
3630                                  const char *node_name,
3631                                  Error **errp)
3632 {
3633     BlockBackend *blk;
3634     BlockDriverState *bs;
3635 
3636     if (device) {
3637         blk = blk_by_name(device);
3638 
3639         if (blk) {
3640             bs = blk_bs(blk);
3641             if (!bs) {
3642                 error_setg(errp, "Device '%s' has no medium", device);
3643             }
3644 
3645             return bs;
3646         }
3647     }
3648 
3649     if (node_name) {
3650         bs = bdrv_find_node(node_name);
3651 
3652         if (bs) {
3653             return bs;
3654         }
3655     }
3656 
3657     error_setg(errp, "Cannot find device=%s nor node_name=%s",
3658                      device ? device : "",
3659                      node_name ? node_name : "");
3660     return NULL;
3661 }
3662 
3663 /* If 'base' is in the same chain as 'top', return true. Otherwise,
3664  * return false.  If either argument is NULL, return false. */
3665 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3666 {
3667     while (top && top != base) {
3668         top = backing_bs(top);
3669     }
3670 
3671     return top != NULL;
3672 }
3673 
3674 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
3675 {
3676     if (!bs) {
3677         return QTAILQ_FIRST(&graph_bdrv_states);
3678     }
3679     return QTAILQ_NEXT(bs, node_list);
3680 }
3681 
3682 const char *bdrv_get_node_name(const BlockDriverState *bs)
3683 {
3684     return bs->node_name;
3685 }
3686 
3687 const char *bdrv_get_parent_name(const BlockDriverState *bs)
3688 {
3689     BdrvChild *c;
3690     const char *name;
3691 
3692     /* If multiple parents have a name, just pick the first one. */
3693     QLIST_FOREACH(c, &bs->parents, next_parent) {
3694         if (c->role->get_name) {
3695             name = c->role->get_name(c);
3696             if (name && *name) {
3697                 return name;
3698             }
3699         }
3700     }
3701 
3702     return NULL;
3703 }
3704 
3705 /* TODO check what callers really want: bs->node_name or blk_name() */
3706 const char *bdrv_get_device_name(const BlockDriverState *bs)
3707 {
3708     return bdrv_get_parent_name(bs) ?: "";
3709 }
3710 
3711 /* This can be used to identify nodes that might not have a device
3712  * name associated. Since node and device names live in the same
3713  * namespace, the result is unambiguous. The exception is if both are
3714  * absent, then this returns an empty (non-null) string. */
3715 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
3716 {
3717     return bdrv_get_parent_name(bs) ?: bs->node_name;
3718 }
3719 
3720 int bdrv_get_flags(BlockDriverState *bs)
3721 {
3722     return bs->open_flags;
3723 }
3724 
3725 int bdrv_has_zero_init_1(BlockDriverState *bs)
3726 {
3727     return 1;
3728 }
3729 
3730 int bdrv_has_zero_init(BlockDriverState *bs)
3731 {
3732     assert(bs->drv);
3733 
3734     /* If BS is a copy on write image, it is initialized to
3735        the contents of the base image, which may not be zeroes.  */
3736     if (bs->backing) {
3737         return 0;
3738     }
3739     if (bs->drv->bdrv_has_zero_init) {
3740         return bs->drv->bdrv_has_zero_init(bs);
3741     }
3742 
3743     /* safe default */
3744     return 0;
3745 }
3746 
3747 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3748 {
3749     BlockDriverInfo bdi;
3750 
3751     if (bs->backing) {
3752         return false;
3753     }
3754 
3755     if (bdrv_get_info(bs, &bdi) == 0) {
3756         return bdi.unallocated_blocks_are_zero;
3757     }
3758 
3759     return false;
3760 }
3761 
3762 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3763 {
3764     BlockDriverInfo bdi;
3765 
3766     if (!(bs->open_flags & BDRV_O_UNMAP)) {
3767         return false;
3768     }
3769 
3770     if (bdrv_get_info(bs, &bdi) == 0) {
3771         return bdi.can_write_zeroes_with_unmap;
3772     }
3773 
3774     return false;
3775 }
3776 
3777 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3778 {
3779     if (bs->backing && bs->backing->bs->encrypted)
3780         return bs->backing_file;
3781     else if (bs->encrypted)
3782         return bs->filename;
3783     else
3784         return NULL;
3785 }
3786 
3787 void bdrv_get_backing_filename(BlockDriverState *bs,
3788                                char *filename, int filename_size)
3789 {
3790     pstrcpy(filename, filename_size, bs->backing_file);
3791 }
3792 
3793 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3794 {
3795     BlockDriver *drv = bs->drv;
3796     if (!drv)
3797         return -ENOMEDIUM;
3798     if (!drv->bdrv_get_info)
3799         return -ENOTSUP;
3800     memset(bdi, 0, sizeof(*bdi));
3801     return drv->bdrv_get_info(bs, bdi);
3802 }
3803 
3804 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3805 {
3806     BlockDriver *drv = bs->drv;
3807     if (drv && drv->bdrv_get_specific_info) {
3808         return drv->bdrv_get_specific_info(bs);
3809     }
3810     return NULL;
3811 }
3812 
3813 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
3814 {
3815     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3816         return;
3817     }
3818 
3819     bs->drv->bdrv_debug_event(bs, event);
3820 }
3821 
3822 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3823                           const char *tag)
3824 {
3825     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3826         bs = bs->file ? bs->file->bs : NULL;
3827     }
3828 
3829     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3830         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3831     }
3832 
3833     return -ENOTSUP;
3834 }
3835 
3836 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3837 {
3838     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3839         bs = bs->file ? bs->file->bs : NULL;
3840     }
3841 
3842     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3843         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3844     }
3845 
3846     return -ENOTSUP;
3847 }
3848 
3849 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3850 {
3851     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3852         bs = bs->file ? bs->file->bs : NULL;
3853     }
3854 
3855     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3856         return bs->drv->bdrv_debug_resume(bs, tag);
3857     }
3858 
3859     return -ENOTSUP;
3860 }
3861 
3862 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3863 {
3864     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3865         bs = bs->file ? bs->file->bs : NULL;
3866     }
3867 
3868     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3869         return bs->drv->bdrv_debug_is_suspended(bs, tag);
3870     }
3871 
3872     return false;
3873 }
3874 
3875 /* backing_file can either be relative, or absolute, or a protocol.  If it is
3876  * relative, it must be relative to the chain.  So, passing in bs->filename
3877  * from a BDS as backing_file should not be done, as that may be relative to
3878  * the CWD rather than the chain. */
3879 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3880         const char *backing_file)
3881 {
3882     char *filename_full = NULL;
3883     char *backing_file_full = NULL;
3884     char *filename_tmp = NULL;
3885     int is_protocol = 0;
3886     BlockDriverState *curr_bs = NULL;
3887     BlockDriverState *retval = NULL;
3888     Error *local_error = NULL;
3889 
3890     if (!bs || !bs->drv || !backing_file) {
3891         return NULL;
3892     }
3893 
3894     filename_full     = g_malloc(PATH_MAX);
3895     backing_file_full = g_malloc(PATH_MAX);
3896     filename_tmp      = g_malloc(PATH_MAX);
3897 
3898     is_protocol = path_has_protocol(backing_file);
3899 
3900     for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3901 
3902         /* If either of the filename paths is actually a protocol, then
3903          * compare unmodified paths; otherwise make paths relative */
3904         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3905             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3906                 retval = curr_bs->backing->bs;
3907                 break;
3908             }
3909             /* Also check against the full backing filename for the image */
3910             bdrv_get_full_backing_filename(curr_bs, backing_file_full, PATH_MAX,
3911                                            &local_error);
3912             if (local_error == NULL) {
3913                 if (strcmp(backing_file, backing_file_full) == 0) {
3914                     retval = curr_bs->backing->bs;
3915                     break;
3916                 }
3917             } else {
3918                 error_free(local_error);
3919                 local_error = NULL;
3920             }
3921         } else {
3922             /* If not an absolute filename path, make it relative to the current
3923              * image's filename path */
3924             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3925                          backing_file);
3926 
3927             /* We are going to compare absolute pathnames */
3928             if (!realpath(filename_tmp, filename_full)) {
3929                 continue;
3930             }
3931 
3932             /* We need to make sure the backing filename we are comparing against
3933              * is relative to the current image filename (or absolute) */
3934             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3935                          curr_bs->backing_file);
3936 
3937             if (!realpath(filename_tmp, backing_file_full)) {
3938                 continue;
3939             }
3940 
3941             if (strcmp(backing_file_full, filename_full) == 0) {
3942                 retval = curr_bs->backing->bs;
3943                 break;
3944             }
3945         }
3946     }
3947 
3948     g_free(filename_full);
3949     g_free(backing_file_full);
3950     g_free(filename_tmp);
3951     return retval;
3952 }
3953 
3954 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3955 {
3956     if (!bs->drv) {
3957         return 0;
3958     }
3959 
3960     if (!bs->backing) {
3961         return 0;
3962     }
3963 
3964     return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3965 }
3966 
3967 void bdrv_init(void)
3968 {
3969     module_call_init(MODULE_INIT_BLOCK);
3970 }
3971 
3972 void bdrv_init_with_whitelist(void)
3973 {
3974     use_bdrv_whitelist = 1;
3975     bdrv_init();
3976 }
3977 
3978 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3979 {
3980     BdrvChild *child, *parent;
3981     uint64_t perm, shared_perm;
3982     Error *local_err = NULL;
3983     int ret;
3984 
3985     if (!bs->drv)  {
3986         return;
3987     }
3988 
3989     if (!(bs->open_flags & BDRV_O_INACTIVE)) {
3990         return;
3991     }
3992 
3993     QLIST_FOREACH(child, &bs->children, next) {
3994         bdrv_invalidate_cache(child->bs, &local_err);
3995         if (local_err) {
3996             error_propagate(errp, local_err);
3997             return;
3998         }
3999     }
4000 
4001     bs->open_flags &= ~BDRV_O_INACTIVE;
4002     if (bs->drv->bdrv_invalidate_cache) {
4003         bs->drv->bdrv_invalidate_cache(bs, &local_err);
4004         if (local_err) {
4005             bs->open_flags |= BDRV_O_INACTIVE;
4006             error_propagate(errp, local_err);
4007             return;
4008         }
4009     }
4010 
4011     ret = refresh_total_sectors(bs, bs->total_sectors);
4012     if (ret < 0) {
4013         bs->open_flags |= BDRV_O_INACTIVE;
4014         error_setg_errno(errp, -ret, "Could not refresh total sector count");
4015         return;
4016     }
4017 
4018     /* Update permissions, they may differ for inactive nodes */
4019     bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
4020     ret = bdrv_check_perm(bs, perm, shared_perm, NULL, &local_err);
4021     if (ret < 0) {
4022         bs->open_flags |= BDRV_O_INACTIVE;
4023         error_propagate(errp, local_err);
4024         return;
4025     }
4026     bdrv_set_perm(bs, perm, shared_perm);
4027 
4028     QLIST_FOREACH(parent, &bs->parents, next_parent) {
4029         if (parent->role->activate) {
4030             parent->role->activate(parent, &local_err);
4031             if (local_err) {
4032                 error_propagate(errp, local_err);
4033                 return;
4034             }
4035         }
4036     }
4037 }
4038 
4039 void bdrv_invalidate_cache_all(Error **errp)
4040 {
4041     BlockDriverState *bs;
4042     Error *local_err = NULL;
4043     BdrvNextIterator it;
4044 
4045     for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
4046         AioContext *aio_context = bdrv_get_aio_context(bs);
4047 
4048         aio_context_acquire(aio_context);
4049         bdrv_invalidate_cache(bs, &local_err);
4050         aio_context_release(aio_context);
4051         if (local_err) {
4052             error_propagate(errp, local_err);
4053             return;
4054         }
4055     }
4056 }
4057 
4058 static int bdrv_inactivate_recurse(BlockDriverState *bs,
4059                                    bool setting_flag)
4060 {
4061     BdrvChild *child, *parent;
4062     int ret;
4063 
4064     if (!setting_flag && bs->drv->bdrv_inactivate) {
4065         ret = bs->drv->bdrv_inactivate(bs);
4066         if (ret < 0) {
4067             return ret;
4068         }
4069     }
4070 
4071     if (setting_flag) {
4072         uint64_t perm, shared_perm;
4073 
4074         bs->open_flags |= BDRV_O_INACTIVE;
4075 
4076         QLIST_FOREACH(parent, &bs->parents, next_parent) {
4077             if (parent->role->inactivate) {
4078                 ret = parent->role->inactivate(parent);
4079                 if (ret < 0) {
4080                     bs->open_flags &= ~BDRV_O_INACTIVE;
4081                     return ret;
4082                 }
4083             }
4084         }
4085 
4086         /* Update permissions, they may differ for inactive nodes */
4087         bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
4088         bdrv_check_perm(bs, perm, shared_perm, NULL, &error_abort);
4089         bdrv_set_perm(bs, perm, shared_perm);
4090     }
4091 
4092     QLIST_FOREACH(child, &bs->children, next) {
4093         ret = bdrv_inactivate_recurse(child->bs, setting_flag);
4094         if (ret < 0) {
4095             return ret;
4096         }
4097     }
4098 
4099     return 0;
4100 }
4101 
4102 int bdrv_inactivate_all(void)
4103 {
4104     BlockDriverState *bs = NULL;
4105     BdrvNextIterator it;
4106     int ret = 0;
4107     int pass;
4108 
4109     for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
4110         aio_context_acquire(bdrv_get_aio_context(bs));
4111     }
4112 
4113     /* We do two passes of inactivation. The first pass calls to drivers'
4114      * .bdrv_inactivate callbacks recursively so all cache is flushed to disk;
4115      * the second pass sets the BDRV_O_INACTIVE flag so that no further write
4116      * is allowed. */
4117     for (pass = 0; pass < 2; pass++) {
4118         for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
4119             ret = bdrv_inactivate_recurse(bs, pass);
4120             if (ret < 0) {
4121                 goto out;
4122             }
4123         }
4124     }
4125 
4126 out:
4127     for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
4128         aio_context_release(bdrv_get_aio_context(bs));
4129     }
4130 
4131     return ret;
4132 }
4133 
4134 /**************************************************************/
4135 /* removable device support */
4136 
4137 /**
4138  * Return TRUE if the media is present
4139  */
4140 bool bdrv_is_inserted(BlockDriverState *bs)
4141 {
4142     BlockDriver *drv = bs->drv;
4143     BdrvChild *child;
4144 
4145     if (!drv) {
4146         return false;
4147     }
4148     if (drv->bdrv_is_inserted) {
4149         return drv->bdrv_is_inserted(bs);
4150     }
4151     QLIST_FOREACH(child, &bs->children, next) {
4152         if (!bdrv_is_inserted(child->bs)) {
4153             return false;
4154         }
4155     }
4156     return true;
4157 }
4158 
4159 /**
4160  * Return whether the media changed since the last call to this
4161  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
4162  */
4163 int bdrv_media_changed(BlockDriverState *bs)
4164 {
4165     BlockDriver *drv = bs->drv;
4166 
4167     if (drv && drv->bdrv_media_changed) {
4168         return drv->bdrv_media_changed(bs);
4169     }
4170     return -ENOTSUP;
4171 }
4172 
4173 /**
4174  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
4175  */
4176 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
4177 {
4178     BlockDriver *drv = bs->drv;
4179 
4180     if (drv && drv->bdrv_eject) {
4181         drv->bdrv_eject(bs, eject_flag);
4182     }
4183 }
4184 
4185 /**
4186  * Lock or unlock the media (if it is locked, the user won't be able
4187  * to eject it manually).
4188  */
4189 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
4190 {
4191     BlockDriver *drv = bs->drv;
4192 
4193     trace_bdrv_lock_medium(bs, locked);
4194 
4195     if (drv && drv->bdrv_lock_medium) {
4196         drv->bdrv_lock_medium(bs, locked);
4197     }
4198 }
4199 
4200 /* Get a reference to bs */
4201 void bdrv_ref(BlockDriverState *bs)
4202 {
4203     bs->refcnt++;
4204 }
4205 
4206 /* Release a previously grabbed reference to bs.
4207  * If after releasing, reference count is zero, the BlockDriverState is
4208  * deleted. */
4209 void bdrv_unref(BlockDriverState *bs)
4210 {
4211     if (!bs) {
4212         return;
4213     }
4214     assert(bs->refcnt > 0);
4215     if (--bs->refcnt == 0) {
4216         bdrv_delete(bs);
4217     }
4218 }
4219 
4220 struct BdrvOpBlocker {
4221     Error *reason;
4222     QLIST_ENTRY(BdrvOpBlocker) list;
4223 };
4224 
4225 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
4226 {
4227     BdrvOpBlocker *blocker;
4228     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
4229     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
4230         blocker = QLIST_FIRST(&bs->op_blockers[op]);
4231         if (errp) {
4232             *errp = error_copy(blocker->reason);
4233             error_prepend(errp, "Node '%s' is busy: ",
4234                           bdrv_get_device_or_node_name(bs));
4235         }
4236         return true;
4237     }
4238     return false;
4239 }
4240 
4241 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
4242 {
4243     BdrvOpBlocker *blocker;
4244     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
4245 
4246     blocker = g_new0(BdrvOpBlocker, 1);
4247     blocker->reason = reason;
4248     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
4249 }
4250 
4251 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
4252 {
4253     BdrvOpBlocker *blocker, *next;
4254     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
4255     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
4256         if (blocker->reason == reason) {
4257             QLIST_REMOVE(blocker, list);
4258             g_free(blocker);
4259         }
4260     }
4261 }
4262 
4263 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
4264 {
4265     int i;
4266     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
4267         bdrv_op_block(bs, i, reason);
4268     }
4269 }
4270 
4271 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
4272 {
4273     int i;
4274     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
4275         bdrv_op_unblock(bs, i, reason);
4276     }
4277 }
4278 
4279 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
4280 {
4281     int i;
4282 
4283     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
4284         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
4285             return false;
4286         }
4287     }
4288     return true;
4289 }
4290 
4291 void bdrv_img_create(const char *filename, const char *fmt,
4292                      const char *base_filename, const char *base_fmt,
4293                      char *options, uint64_t img_size, int flags, bool quiet,
4294                      Error **errp)
4295 {
4296     QemuOptsList *create_opts = NULL;
4297     QemuOpts *opts = NULL;
4298     const char *backing_fmt, *backing_file;
4299     int64_t size;
4300     BlockDriver *drv, *proto_drv;
4301     Error *local_err = NULL;
4302     int ret = 0;
4303 
4304     /* Find driver and parse its options */
4305     drv = bdrv_find_format(fmt);
4306     if (!drv) {
4307         error_setg(errp, "Unknown file format '%s'", fmt);
4308         return;
4309     }
4310 
4311     proto_drv = bdrv_find_protocol(filename, true, errp);
4312     if (!proto_drv) {
4313         return;
4314     }
4315 
4316     if (!drv->create_opts) {
4317         error_setg(errp, "Format driver '%s' does not support image creation",
4318                    drv->format_name);
4319         return;
4320     }
4321 
4322     if (!proto_drv->create_opts) {
4323         error_setg(errp, "Protocol driver '%s' does not support image creation",
4324                    proto_drv->format_name);
4325         return;
4326     }
4327 
4328     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4329     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
4330 
4331     /* Create parameter list with default values */
4332     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4333     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
4334 
4335     /* Parse -o options */
4336     if (options) {
4337         qemu_opts_do_parse(opts, options, NULL, &local_err);
4338         if (local_err) {
4339             error_report_err(local_err);
4340             local_err = NULL;
4341             error_setg(errp, "Invalid options for file format '%s'", fmt);
4342             goto out;
4343         }
4344     }
4345 
4346     if (base_filename) {
4347         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
4348         if (local_err) {
4349             error_setg(errp, "Backing file not supported for file format '%s'",
4350                        fmt);
4351             goto out;
4352         }
4353     }
4354 
4355     if (base_fmt) {
4356         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
4357         if (local_err) {
4358             error_setg(errp, "Backing file format not supported for file "
4359                              "format '%s'", fmt);
4360             goto out;
4361         }
4362     }
4363 
4364     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
4365     if (backing_file) {
4366         if (!strcmp(filename, backing_file)) {
4367             error_setg(errp, "Error: Trying to create an image with the "
4368                              "same filename as the backing file");
4369             goto out;
4370         }
4371     }
4372 
4373     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
4374 
4375     // The size for the image must always be specified, with one exception:
4376     // If we are using a backing file, we can obtain the size from there
4377     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
4378     if (size == -1) {
4379         if (backing_file) {
4380             BlockDriverState *bs;
4381             char *full_backing = g_new0(char, PATH_MAX);
4382             int64_t size;
4383             int back_flags;
4384             QDict *backing_options = NULL;
4385 
4386             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
4387                                                          full_backing, PATH_MAX,
4388                                                          &local_err);
4389             if (local_err) {
4390                 g_free(full_backing);
4391                 goto out;
4392             }
4393 
4394             /* backing files always opened read-only */
4395             back_flags = flags;
4396             back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
4397 
4398             if (backing_fmt) {
4399                 backing_options = qdict_new();
4400                 qdict_put_str(backing_options, "driver", backing_fmt);
4401             }
4402 
4403             bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
4404                            &local_err);
4405             g_free(full_backing);
4406             if (!bs) {
4407                 goto out;
4408             }
4409             size = bdrv_getlength(bs);
4410             if (size < 0) {
4411                 error_setg_errno(errp, -size, "Could not get size of '%s'",
4412                                  backing_file);
4413                 bdrv_unref(bs);
4414                 goto out;
4415             }
4416 
4417             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
4418 
4419             bdrv_unref(bs);
4420         } else {
4421             error_setg(errp, "Image creation needs a size parameter");
4422             goto out;
4423         }
4424     }
4425 
4426     if (!quiet) {
4427         printf("Formatting '%s', fmt=%s ", filename, fmt);
4428         qemu_opts_print(opts, " ");
4429         puts("");
4430     }
4431 
4432     ret = bdrv_create(drv, filename, opts, &local_err);
4433 
4434     if (ret == -EFBIG) {
4435         /* This is generally a better message than whatever the driver would
4436          * deliver (especially because of the cluster_size_hint), since that
4437          * is most probably not much different from "image too large". */
4438         const char *cluster_size_hint = "";
4439         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
4440             cluster_size_hint = " (try using a larger cluster size)";
4441         }
4442         error_setg(errp, "The image size is too large for file format '%s'"
4443                    "%s", fmt, cluster_size_hint);
4444         error_free(local_err);
4445         local_err = NULL;
4446     }
4447 
4448 out:
4449     qemu_opts_del(opts);
4450     qemu_opts_free(create_opts);
4451     error_propagate(errp, local_err);
4452 }
4453 
4454 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
4455 {
4456     return bs->aio_context;
4457 }
4458 
4459 void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
4460 {
4461     aio_co_enter(bdrv_get_aio_context(bs), co);
4462 }
4463 
4464 static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
4465 {
4466     QLIST_REMOVE(ban, list);
4467     g_free(ban);
4468 }
4469 
4470 void bdrv_detach_aio_context(BlockDriverState *bs)
4471 {
4472     BdrvAioNotifier *baf, *baf_tmp;
4473     BdrvChild *child;
4474 
4475     if (!bs->drv) {
4476         return;
4477     }
4478 
4479     assert(!bs->walking_aio_notifiers);
4480     bs->walking_aio_notifiers = true;
4481     QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
4482         if (baf->deleted) {
4483             bdrv_do_remove_aio_context_notifier(baf);
4484         } else {
4485             baf->detach_aio_context(baf->opaque);
4486         }
4487     }
4488     /* Never mind iterating again to check for ->deleted.  bdrv_close() will
4489      * remove remaining aio notifiers if we aren't called again.
4490      */
4491     bs->walking_aio_notifiers = false;
4492 
4493     if (bs->drv->bdrv_detach_aio_context) {
4494         bs->drv->bdrv_detach_aio_context(bs);
4495     }
4496     QLIST_FOREACH(child, &bs->children, next) {
4497         bdrv_detach_aio_context(child->bs);
4498     }
4499 
4500     bs->aio_context = NULL;
4501 }
4502 
4503 void bdrv_attach_aio_context(BlockDriverState *bs,
4504                              AioContext *new_context)
4505 {
4506     BdrvAioNotifier *ban, *ban_tmp;
4507     BdrvChild *child;
4508 
4509     if (!bs->drv) {
4510         return;
4511     }
4512 
4513     bs->aio_context = new_context;
4514 
4515     QLIST_FOREACH(child, &bs->children, next) {
4516         bdrv_attach_aio_context(child->bs, new_context);
4517     }
4518     if (bs->drv->bdrv_attach_aio_context) {
4519         bs->drv->bdrv_attach_aio_context(bs, new_context);
4520     }
4521 
4522     assert(!bs->walking_aio_notifiers);
4523     bs->walking_aio_notifiers = true;
4524     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) {
4525         if (ban->deleted) {
4526             bdrv_do_remove_aio_context_notifier(ban);
4527         } else {
4528             ban->attached_aio_context(new_context, ban->opaque);
4529         }
4530     }
4531     bs->walking_aio_notifiers = false;
4532 }
4533 
4534 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
4535 {
4536     AioContext *ctx = bdrv_get_aio_context(bs);
4537 
4538     aio_disable_external(ctx);
4539     bdrv_parent_drained_begin(bs);
4540     bdrv_drain(bs); /* ensure there are no in-flight requests */
4541 
4542     while (aio_poll(ctx, false)) {
4543         /* wait for all bottom halves to execute */
4544     }
4545 
4546     bdrv_detach_aio_context(bs);
4547 
4548     /* This function executes in the old AioContext so acquire the new one in
4549      * case it runs in a different thread.
4550      */
4551     aio_context_acquire(new_context);
4552     bdrv_attach_aio_context(bs, new_context);
4553     bdrv_parent_drained_end(bs);
4554     aio_enable_external(ctx);
4555     aio_context_release(new_context);
4556 }
4557 
4558 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
4559         void (*attached_aio_context)(AioContext *new_context, void *opaque),
4560         void (*detach_aio_context)(void *opaque), void *opaque)
4561 {
4562     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
4563     *ban = (BdrvAioNotifier){
4564         .attached_aio_context = attached_aio_context,
4565         .detach_aio_context   = detach_aio_context,
4566         .opaque               = opaque
4567     };
4568 
4569     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
4570 }
4571 
4572 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
4573                                       void (*attached_aio_context)(AioContext *,
4574                                                                    void *),
4575                                       void (*detach_aio_context)(void *),
4576                                       void *opaque)
4577 {
4578     BdrvAioNotifier *ban, *ban_next;
4579 
4580     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
4581         if (ban->attached_aio_context == attached_aio_context &&
4582             ban->detach_aio_context   == detach_aio_context   &&
4583             ban->opaque               == opaque               &&
4584             ban->deleted              == false)
4585         {
4586             if (bs->walking_aio_notifiers) {
4587                 ban->deleted = true;
4588             } else {
4589                 bdrv_do_remove_aio_context_notifier(ban);
4590             }
4591             return;
4592         }
4593     }
4594 
4595     abort();
4596 }
4597 
4598 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
4599                        BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
4600 {
4601     if (!bs->drv->bdrv_amend_options) {
4602         return -ENOTSUP;
4603     }
4604     return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
4605 }
4606 
4607 /* This function will be called by the bdrv_recurse_is_first_non_filter method
4608  * of block filter and by bdrv_is_first_non_filter.
4609  * It is used to test if the given bs is the candidate or recurse more in the
4610  * node graph.
4611  */
4612 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
4613                                       BlockDriverState *candidate)
4614 {
4615     /* return false if basic checks fails */
4616     if (!bs || !bs->drv) {
4617         return false;
4618     }
4619 
4620     /* the code reached a non block filter driver -> check if the bs is
4621      * the same as the candidate. It's the recursion termination condition.
4622      */
4623     if (!bs->drv->is_filter) {
4624         return bs == candidate;
4625     }
4626     /* Down this path the driver is a block filter driver */
4627 
4628     /* If the block filter recursion method is defined use it to recurse down
4629      * the node graph.
4630      */
4631     if (bs->drv->bdrv_recurse_is_first_non_filter) {
4632         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
4633     }
4634 
4635     /* the driver is a block filter but don't allow to recurse -> return false
4636      */
4637     return false;
4638 }
4639 
4640 /* This function checks if the candidate is the first non filter bs down it's
4641  * bs chain. Since we don't have pointers to parents it explore all bs chains
4642  * from the top. Some filters can choose not to pass down the recursion.
4643  */
4644 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
4645 {
4646     BlockDriverState *bs;
4647     BdrvNextIterator it;
4648 
4649     /* walk down the bs forest recursively */
4650     for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
4651         bool perm;
4652 
4653         /* try to recurse in this top level bs */
4654         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
4655 
4656         /* candidate is the first non filter */
4657         if (perm) {
4658             return true;
4659         }
4660     }
4661 
4662     return false;
4663 }
4664 
4665 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
4666                                         const char *node_name, Error **errp)
4667 {
4668     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
4669     AioContext *aio_context;
4670 
4671     if (!to_replace_bs) {
4672         error_setg(errp, "Node name '%s' not found", node_name);
4673         return NULL;
4674     }
4675 
4676     aio_context = bdrv_get_aio_context(to_replace_bs);
4677     aio_context_acquire(aio_context);
4678 
4679     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
4680         to_replace_bs = NULL;
4681         goto out;
4682     }
4683 
4684     /* We don't want arbitrary node of the BDS chain to be replaced only the top
4685      * most non filter in order to prevent data corruption.
4686      * Another benefit is that this tests exclude backing files which are
4687      * blocked by the backing blockers.
4688      */
4689     if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
4690         error_setg(errp, "Only top most non filter can be replaced");
4691         to_replace_bs = NULL;
4692         goto out;
4693     }
4694 
4695 out:
4696     aio_context_release(aio_context);
4697     return to_replace_bs;
4698 }
4699 
4700 static bool append_open_options(QDict *d, BlockDriverState *bs)
4701 {
4702     const QDictEntry *entry;
4703     QemuOptDesc *desc;
4704     BdrvChild *child;
4705     bool found_any = false;
4706     const char *p;
4707 
4708     for (entry = qdict_first(bs->options); entry;
4709          entry = qdict_next(bs->options, entry))
4710     {
4711         /* Exclude options for children */
4712         QLIST_FOREACH(child, &bs->children, next) {
4713             if (strstart(qdict_entry_key(entry), child->name, &p)
4714                 && (!*p || *p == '.'))
4715             {
4716                 break;
4717             }
4718         }
4719         if (child) {
4720             continue;
4721         }
4722 
4723         /* And exclude all non-driver-specific options */
4724         for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
4725             if (!strcmp(qdict_entry_key(entry), desc->name)) {
4726                 break;
4727             }
4728         }
4729         if (desc->name) {
4730             continue;
4731         }
4732 
4733         qobject_incref(qdict_entry_value(entry));
4734         qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
4735         found_any = true;
4736     }
4737 
4738     return found_any;
4739 }
4740 
4741 /* Updates the following BDS fields:
4742  *  - exact_filename: A filename which may be used for opening a block device
4743  *                    which (mostly) equals the given BDS (even without any
4744  *                    other options; so reading and writing must return the same
4745  *                    results, but caching etc. may be different)
4746  *  - full_open_options: Options which, when given when opening a block device
4747  *                       (without a filename), result in a BDS (mostly)
4748  *                       equalling the given one
4749  *  - filename: If exact_filename is set, it is copied here. Otherwise,
4750  *              full_open_options is converted to a JSON object, prefixed with
4751  *              "json:" (for use through the JSON pseudo protocol) and put here.
4752  */
4753 void bdrv_refresh_filename(BlockDriverState *bs)
4754 {
4755     BlockDriver *drv = bs->drv;
4756     QDict *opts;
4757 
4758     if (!drv) {
4759         return;
4760     }
4761 
4762     /* This BDS's file name will most probably depend on its file's name, so
4763      * refresh that first */
4764     if (bs->file) {
4765         bdrv_refresh_filename(bs->file->bs);
4766     }
4767 
4768     if (drv->bdrv_refresh_filename) {
4769         /* Obsolete information is of no use here, so drop the old file name
4770          * information before refreshing it */
4771         bs->exact_filename[0] = '\0';
4772         if (bs->full_open_options) {
4773             QDECREF(bs->full_open_options);
4774             bs->full_open_options = NULL;
4775         }
4776 
4777         opts = qdict_new();
4778         append_open_options(opts, bs);
4779         drv->bdrv_refresh_filename(bs, opts);
4780         QDECREF(opts);
4781     } else if (bs->file) {
4782         /* Try to reconstruct valid information from the underlying file */
4783         bool has_open_options;
4784 
4785         bs->exact_filename[0] = '\0';
4786         if (bs->full_open_options) {
4787             QDECREF(bs->full_open_options);
4788             bs->full_open_options = NULL;
4789         }
4790 
4791         opts = qdict_new();
4792         has_open_options = append_open_options(opts, bs);
4793 
4794         /* If no specific options have been given for this BDS, the filename of
4795          * the underlying file should suffice for this one as well */
4796         if (bs->file->bs->exact_filename[0] && !has_open_options) {
4797             strcpy(bs->exact_filename, bs->file->bs->exact_filename);
4798         }
4799         /* Reconstructing the full options QDict is simple for most format block
4800          * drivers, as long as the full options are known for the underlying
4801          * file BDS. The full options QDict of that file BDS should somehow
4802          * contain a representation of the filename, therefore the following
4803          * suffices without querying the (exact_)filename of this BDS. */
4804         if (bs->file->bs->full_open_options) {
4805             qdict_put_str(opts, "driver", drv->format_name);
4806             QINCREF(bs->file->bs->full_open_options);
4807             qdict_put(opts, "file", bs->file->bs->full_open_options);
4808 
4809             bs->full_open_options = opts;
4810         } else {
4811             QDECREF(opts);
4812         }
4813     } else if (!bs->full_open_options && qdict_size(bs->options)) {
4814         /* There is no underlying file BDS (at least referenced by BDS.file),
4815          * so the full options QDict should be equal to the options given
4816          * specifically for this block device when it was opened (plus the
4817          * driver specification).
4818          * Because those options don't change, there is no need to update
4819          * full_open_options when it's already set. */
4820 
4821         opts = qdict_new();
4822         append_open_options(opts, bs);
4823         qdict_put_str(opts, "driver", drv->format_name);
4824 
4825         if (bs->exact_filename[0]) {
4826             /* This may not work for all block protocol drivers (some may
4827              * require this filename to be parsed), but we have to find some
4828              * default solution here, so just include it. If some block driver
4829              * does not support pure options without any filename at all or
4830              * needs some special format of the options QDict, it needs to
4831              * implement the driver-specific bdrv_refresh_filename() function.
4832              */
4833             qdict_put_str(opts, "filename", bs->exact_filename);
4834         }
4835 
4836         bs->full_open_options = opts;
4837     }
4838 
4839     if (bs->exact_filename[0]) {
4840         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4841     } else if (bs->full_open_options) {
4842         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4843         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4844                  qstring_get_str(json));
4845         QDECREF(json);
4846     }
4847 }
4848 
4849 /*
4850  * Hot add/remove a BDS's child. So the user can take a child offline when
4851  * it is broken and take a new child online
4852  */
4853 void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
4854                     Error **errp)
4855 {
4856 
4857     if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
4858         error_setg(errp, "The node %s does not support adding a child",
4859                    bdrv_get_device_or_node_name(parent_bs));
4860         return;
4861     }
4862 
4863     if (!QLIST_EMPTY(&child_bs->parents)) {
4864         error_setg(errp, "The node %s already has a parent",
4865                    child_bs->node_name);
4866         return;
4867     }
4868 
4869     parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp);
4870 }
4871 
4872 void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
4873 {
4874     BdrvChild *tmp;
4875 
4876     if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
4877         error_setg(errp, "The node %s does not support removing a child",
4878                    bdrv_get_device_or_node_name(parent_bs));
4879         return;
4880     }
4881 
4882     QLIST_FOREACH(tmp, &parent_bs->children, next) {
4883         if (tmp == child) {
4884             break;
4885         }
4886     }
4887 
4888     if (!tmp) {
4889         error_setg(errp, "The node %s does not have a child named %s",
4890                    bdrv_get_device_or_node_name(parent_bs),
4891                    bdrv_get_device_or_node_name(child->bs));
4892         return;
4893     }
4894 
4895     parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
4896 }
4897