xref: /openbmc/qemu/blockdev.c (revision f7bbb156)
1 /*
2  * QEMU host block devices
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or
7  * later.  See the COPYING file in the top-level directory.
8  *
9  * This file incorporates work covered by the following copyright and
10  * permission notice:
11  *
12  * Copyright (c) 2003-2008 Fabrice Bellard
13  *
14  * Permission is hereby granted, free of charge, to any person obtaining a copy
15  * of this software and associated documentation files (the "Software"), to deal
16  * in the Software without restriction, including without limitation the rights
17  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18  * copies of the Software, and to permit persons to whom the Software is
19  * furnished to do so, subject to the following conditions:
20  *
21  * The above copyright notice and this permission notice shall be included in
22  * all copies or substantial portions of the Software.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
30  * THE SOFTWARE.
31  */
32 
33 #include "qemu/osdep.h"
34 #include "sysemu/block-backend.h"
35 #include "sysemu/blockdev.h"
36 #include "hw/block/block.h"
37 #include "block/blockjob.h"
38 #include "block/qdict.h"
39 #include "block/throttle-groups.h"
40 #include "monitor/monitor.h"
41 #include "qemu/error-report.h"
42 #include "qemu/option.h"
43 #include "qemu/qemu-print.h"
44 #include "qemu/config-file.h"
45 #include "qapi/qapi-commands-block.h"
46 #include "qapi/qapi-commands-transaction.h"
47 #include "qapi/qapi-visit-block-core.h"
48 #include "qapi/qmp/qdict.h"
49 #include "qapi/qmp/qnum.h"
50 #include "qapi/qmp/qstring.h"
51 #include "qapi/error.h"
52 #include "qapi/qmp/qerror.h"
53 #include "qapi/qmp/qlist.h"
54 #include "qapi/qobject-output-visitor.h"
55 #include "sysemu/sysemu.h"
56 #include "sysemu/iothread.h"
57 #include "block/block_int.h"
58 #include "block/trace.h"
59 #include "sysemu/runstate.h"
60 #include "sysemu/replay.h"
61 #include "qemu/cutils.h"
62 #include "qemu/help_option.h"
63 #include "qemu/main-loop.h"
64 #include "qemu/throttle-options.h"
65 
66 /* Protected by BQL */
67 QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
68     QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
69 
70 void bdrv_set_monitor_owned(BlockDriverState *bs)
71 {
72     GLOBAL_STATE_CODE();
73     QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
74 }
75 
76 static const char *const if_name[IF_COUNT] = {
77     [IF_NONE] = "none",
78     [IF_IDE] = "ide",
79     [IF_SCSI] = "scsi",
80     [IF_FLOPPY] = "floppy",
81     [IF_PFLASH] = "pflash",
82     [IF_MTD] = "mtd",
83     [IF_SD] = "sd",
84     [IF_VIRTIO] = "virtio",
85     [IF_XEN] = "xen",
86 };
87 
88 static int if_max_devs[IF_COUNT] = {
89     /*
90      * Do not change these numbers!  They govern how drive option
91      * index maps to unit and bus.  That mapping is ABI.
92      *
93      * All controllers used to implement if=T drives need to support
94      * if_max_devs[T] units, for any T with if_max_devs[T] != 0.
95      * Otherwise, some index values map to "impossible" bus, unit
96      * values.
97      *
98      * For instance, if you change [IF_SCSI] to 255, -drive
99      * if=scsi,index=12 no longer means bus=1,unit=5, but
100      * bus=0,unit=12.  With an lsi53c895a controller (7 units max),
101      * the drive can't be set up.  Regression.
102      */
103     [IF_IDE] = 2,
104     [IF_SCSI] = 7,
105 };
106 
107 /**
108  * Boards may call this to offer board-by-board overrides
109  * of the default, global values.
110  */
111 void override_max_devs(BlockInterfaceType type, int max_devs)
112 {
113     BlockBackend *blk;
114     DriveInfo *dinfo;
115 
116     GLOBAL_STATE_CODE();
117 
118     if (max_devs <= 0) {
119         return;
120     }
121 
122     for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
123         dinfo = blk_legacy_dinfo(blk);
124         if (dinfo->type == type) {
125             fprintf(stderr, "Cannot override units-per-bus property of"
126                     " the %s interface, because a drive of that type has"
127                     " already been added.\n", if_name[type]);
128             g_assert_not_reached();
129         }
130     }
131 
132     if_max_devs[type] = max_devs;
133 }
134 
135 /*
136  * We automatically delete the drive when a device using it gets
137  * unplugged.  Questionable feature, but we can't just drop it.
138  * Device models call blockdev_mark_auto_del() to schedule the
139  * automatic deletion, and generic qdev code calls blockdev_auto_del()
140  * when deletion is actually safe.
141  */
142 void blockdev_mark_auto_del(BlockBackend *blk)
143 {
144     DriveInfo *dinfo = blk_legacy_dinfo(blk);
145     BlockJob *job;
146 
147     GLOBAL_STATE_CODE();
148 
149     if (!dinfo) {
150         return;
151     }
152 
153     for (job = block_job_next(NULL); job; job = block_job_next(job)) {
154         if (block_job_has_bdrv(job, blk_bs(blk))) {
155             AioContext *aio_context = job->job.aio_context;
156             aio_context_acquire(aio_context);
157 
158             job_cancel(&job->job, false);
159 
160             aio_context_release(aio_context);
161         }
162     }
163 
164     dinfo->auto_del = 1;
165 }
166 
167 void blockdev_auto_del(BlockBackend *blk)
168 {
169     DriveInfo *dinfo = blk_legacy_dinfo(blk);
170     GLOBAL_STATE_CODE();
171 
172     if (dinfo && dinfo->auto_del) {
173         monitor_remove_blk(blk);
174         blk_unref(blk);
175     }
176 }
177 
178 static int drive_index_to_bus_id(BlockInterfaceType type, int index)
179 {
180     int max_devs = if_max_devs[type];
181     return max_devs ? index / max_devs : 0;
182 }
183 
184 static int drive_index_to_unit_id(BlockInterfaceType type, int index)
185 {
186     int max_devs = if_max_devs[type];
187     return max_devs ? index % max_devs : index;
188 }
189 
190 QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
191                     const char *optstr)
192 {
193     QemuOpts *opts;
194 
195     GLOBAL_STATE_CODE();
196 
197     opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
198     if (!opts) {
199         return NULL;
200     }
201     if (type != IF_DEFAULT) {
202         qemu_opt_set(opts, "if", if_name[type], &error_abort);
203     }
204     if (index >= 0) {
205         qemu_opt_set_number(opts, "index", index, &error_abort);
206     }
207     if (file)
208         qemu_opt_set(opts, "file", file, &error_abort);
209     return opts;
210 }
211 
212 DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
213 {
214     BlockBackend *blk;
215     DriveInfo *dinfo;
216 
217     GLOBAL_STATE_CODE();
218 
219     for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
220         dinfo = blk_legacy_dinfo(blk);
221         if (dinfo && dinfo->type == type
222             && dinfo->bus == bus && dinfo->unit == unit) {
223             return dinfo;
224         }
225     }
226 
227     return NULL;
228 }
229 
230 /*
231  * Check board claimed all -drive that are meant to be claimed.
232  * Fatal error if any remain unclaimed.
233  */
234 void drive_check_orphaned(void)
235 {
236     BlockBackend *blk;
237     DriveInfo *dinfo;
238     Location loc;
239     bool orphans = false;
240 
241     GLOBAL_STATE_CODE();
242 
243     for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
244         dinfo = blk_legacy_dinfo(blk);
245         /*
246          * Ignore default drives, because we create certain default
247          * drives unconditionally, then leave them unclaimed.  Not the
248          * users fault.
249          * Ignore IF_VIRTIO, because it gets desugared into -device,
250          * so we can leave failing to -device.
251          * Ignore IF_NONE, because leaving unclaimed IF_NONE remains
252          * available for device_add is a feature.
253          */
254         if (dinfo->is_default || dinfo->type == IF_VIRTIO
255             || dinfo->type == IF_NONE) {
256             continue;
257         }
258         if (!blk_get_attached_dev(blk)) {
259             loc_push_none(&loc);
260             qemu_opts_loc_restore(dinfo->opts);
261             error_report("machine type does not support"
262                          " if=%s,bus=%d,unit=%d",
263                          if_name[dinfo->type], dinfo->bus, dinfo->unit);
264             loc_pop(&loc);
265             orphans = true;
266         }
267     }
268 
269     if (orphans) {
270         exit(1);
271     }
272 }
273 
274 DriveInfo *drive_get_by_index(BlockInterfaceType type, int index)
275 {
276     GLOBAL_STATE_CODE();
277     return drive_get(type,
278                      drive_index_to_bus_id(type, index),
279                      drive_index_to_unit_id(type, index));
280 }
281 
282 int drive_get_max_bus(BlockInterfaceType type)
283 {
284     int max_bus;
285     BlockBackend *blk;
286     DriveInfo *dinfo;
287 
288     GLOBAL_STATE_CODE();
289 
290     max_bus = -1;
291     for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
292         dinfo = blk_legacy_dinfo(blk);
293         if (dinfo && dinfo->type == type && dinfo->bus > max_bus) {
294             max_bus = dinfo->bus;
295         }
296     }
297     return max_bus;
298 }
299 
300 static void bdrv_format_print(void *opaque, const char *name)
301 {
302     qemu_printf(" %s", name);
303 }
304 
305 typedef struct {
306     QEMUBH *bh;
307     BlockDriverState *bs;
308 } BDRVPutRefBH;
309 
310 static int parse_block_error_action(const char *buf, bool is_read, Error **errp)
311 {
312     if (!strcmp(buf, "ignore")) {
313         return BLOCKDEV_ON_ERROR_IGNORE;
314     } else if (!is_read && !strcmp(buf, "enospc")) {
315         return BLOCKDEV_ON_ERROR_ENOSPC;
316     } else if (!strcmp(buf, "stop")) {
317         return BLOCKDEV_ON_ERROR_STOP;
318     } else if (!strcmp(buf, "report")) {
319         return BLOCKDEV_ON_ERROR_REPORT;
320     } else {
321         error_setg(errp, "'%s' invalid %s error action",
322                    buf, is_read ? "read" : "write");
323         return -1;
324     }
325 }
326 
327 static bool parse_stats_intervals(BlockAcctStats *stats, QList *intervals,
328                                   Error **errp)
329 {
330     const QListEntry *entry;
331     for (entry = qlist_first(intervals); entry; entry = qlist_next(entry)) {
332         switch (qobject_type(entry->value)) {
333 
334         case QTYPE_QSTRING: {
335             unsigned long long length;
336             const char *str = qstring_get_str(qobject_to(QString,
337                                                          entry->value));
338             if (parse_uint_full(str, &length, 10) == 0 &&
339                 length > 0 && length <= UINT_MAX) {
340                 block_acct_add_interval(stats, (unsigned) length);
341             } else {
342                 error_setg(errp, "Invalid interval length: %s", str);
343                 return false;
344             }
345             break;
346         }
347 
348         case QTYPE_QNUM: {
349             int64_t length = qnum_get_int(qobject_to(QNum, entry->value));
350 
351             if (length > 0 && length <= UINT_MAX) {
352                 block_acct_add_interval(stats, (unsigned) length);
353             } else {
354                 error_setg(errp, "Invalid interval length: %" PRId64, length);
355                 return false;
356             }
357             break;
358         }
359 
360         default:
361             error_setg(errp, "The specification of stats-intervals is invalid");
362             return false;
363         }
364     }
365     return true;
366 }
367 
368 typedef enum { MEDIA_DISK, MEDIA_CDROM } DriveMediaType;
369 
370 /* All parameters but @opts are optional and may be set to NULL. */
371 static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
372     const char **throttling_group, ThrottleConfig *throttle_cfg,
373     BlockdevDetectZeroesOptions *detect_zeroes, Error **errp)
374 {
375     Error *local_error = NULL;
376     const char *aio;
377 
378     if (bdrv_flags) {
379         if (qemu_opt_get_bool(opts, "copy-on-read", false)) {
380             *bdrv_flags |= BDRV_O_COPY_ON_READ;
381         }
382 
383         if ((aio = qemu_opt_get(opts, "aio")) != NULL) {
384             if (bdrv_parse_aio(aio, bdrv_flags) < 0) {
385                 error_setg(errp, "invalid aio option");
386                 return;
387             }
388         }
389     }
390 
391     /* disk I/O throttling */
392     if (throttling_group) {
393         *throttling_group = qemu_opt_get(opts, "throttling.group");
394     }
395 
396     if (throttle_cfg) {
397         throttle_config_init(throttle_cfg);
398         throttle_cfg->buckets[THROTTLE_BPS_TOTAL].avg =
399             qemu_opt_get_number(opts, "throttling.bps-total", 0);
400         throttle_cfg->buckets[THROTTLE_BPS_READ].avg  =
401             qemu_opt_get_number(opts, "throttling.bps-read", 0);
402         throttle_cfg->buckets[THROTTLE_BPS_WRITE].avg =
403             qemu_opt_get_number(opts, "throttling.bps-write", 0);
404         throttle_cfg->buckets[THROTTLE_OPS_TOTAL].avg =
405             qemu_opt_get_number(opts, "throttling.iops-total", 0);
406         throttle_cfg->buckets[THROTTLE_OPS_READ].avg =
407             qemu_opt_get_number(opts, "throttling.iops-read", 0);
408         throttle_cfg->buckets[THROTTLE_OPS_WRITE].avg =
409             qemu_opt_get_number(opts, "throttling.iops-write", 0);
410 
411         throttle_cfg->buckets[THROTTLE_BPS_TOTAL].max =
412             qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
413         throttle_cfg->buckets[THROTTLE_BPS_READ].max  =
414             qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
415         throttle_cfg->buckets[THROTTLE_BPS_WRITE].max =
416             qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
417         throttle_cfg->buckets[THROTTLE_OPS_TOTAL].max =
418             qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
419         throttle_cfg->buckets[THROTTLE_OPS_READ].max =
420             qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
421         throttle_cfg->buckets[THROTTLE_OPS_WRITE].max =
422             qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
423 
424         throttle_cfg->buckets[THROTTLE_BPS_TOTAL].burst_length =
425             qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1);
426         throttle_cfg->buckets[THROTTLE_BPS_READ].burst_length  =
427             qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1);
428         throttle_cfg->buckets[THROTTLE_BPS_WRITE].burst_length =
429             qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1);
430         throttle_cfg->buckets[THROTTLE_OPS_TOTAL].burst_length =
431             qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1);
432         throttle_cfg->buckets[THROTTLE_OPS_READ].burst_length =
433             qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1);
434         throttle_cfg->buckets[THROTTLE_OPS_WRITE].burst_length =
435             qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1);
436 
437         throttle_cfg->op_size =
438             qemu_opt_get_number(opts, "throttling.iops-size", 0);
439 
440         if (!throttle_is_valid(throttle_cfg, errp)) {
441             return;
442         }
443     }
444 
445     if (detect_zeroes) {
446         *detect_zeroes =
447             qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup,
448                             qemu_opt_get(opts, "detect-zeroes"),
449                             BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
450                             &local_error);
451         if (local_error) {
452             error_propagate(errp, local_error);
453             return;
454         }
455     }
456 }
457 
458 static OnOffAuto account_get_opt(QemuOpts *opts, const char *name)
459 {
460     if (!qemu_opt_find(opts, name)) {
461         return ON_OFF_AUTO_AUTO;
462     }
463     if (qemu_opt_get_bool(opts, name, true)) {
464         return ON_OFF_AUTO_ON;
465     }
466     return ON_OFF_AUTO_OFF;
467 }
468 
469 /* Takes the ownership of bs_opts */
470 static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
471                                    Error **errp)
472 {
473     const char *buf;
474     int bdrv_flags = 0;
475     int on_read_error, on_write_error;
476     OnOffAuto account_invalid, account_failed;
477     bool writethrough, read_only;
478     BlockBackend *blk;
479     BlockDriverState *bs;
480     ThrottleConfig cfg;
481     int snapshot = 0;
482     Error *error = NULL;
483     QemuOpts *opts;
484     QDict *interval_dict = NULL;
485     QList *interval_list = NULL;
486     const char *id;
487     BlockdevDetectZeroesOptions detect_zeroes =
488         BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
489     const char *throttling_group = NULL;
490 
491     /* Check common options by copying from bs_opts to opts, all other options
492      * stay in bs_opts for processing by bdrv_open(). */
493     id = qdict_get_try_str(bs_opts, "id");
494     opts = qemu_opts_create(&qemu_common_drive_opts, id, 1, errp);
495     if (!opts) {
496         goto err_no_opts;
497     }
498 
499     if (!qemu_opts_absorb_qdict(opts, bs_opts, errp)) {
500         goto early_err;
501     }
502 
503     if (id) {
504         qdict_del(bs_opts, "id");
505     }
506 
507     /* extract parameters */
508     snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
509 
510     account_invalid = account_get_opt(opts, "stats-account-invalid");
511     account_failed = account_get_opt(opts, "stats-account-failed");
512 
513     writethrough = !qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true);
514 
515     id = qemu_opts_id(opts);
516 
517     qdict_extract_subqdict(bs_opts, &interval_dict, "stats-intervals.");
518     qdict_array_split(interval_dict, &interval_list);
519 
520     if (qdict_size(interval_dict) != 0) {
521         error_setg(errp, "Invalid option stats-intervals.%s",
522                    qdict_first(interval_dict)->key);
523         goto early_err;
524     }
525 
526     extract_common_blockdev_options(opts, &bdrv_flags, &throttling_group, &cfg,
527                                     &detect_zeroes, &error);
528     if (error) {
529         error_propagate(errp, error);
530         goto early_err;
531     }
532 
533     if ((buf = qemu_opt_get(opts, "format")) != NULL) {
534         if (is_help_option(buf)) {
535             qemu_printf("Supported formats:");
536             bdrv_iterate_format(bdrv_format_print, NULL, false);
537             qemu_printf("\nSupported formats (read-only):");
538             bdrv_iterate_format(bdrv_format_print, NULL, true);
539             qemu_printf("\n");
540             goto early_err;
541         }
542 
543         if (qdict_haskey(bs_opts, "driver")) {
544             error_setg(errp, "Cannot specify both 'driver' and 'format'");
545             goto early_err;
546         }
547         qdict_put_str(bs_opts, "driver", buf);
548     }
549 
550     on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
551     if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
552         on_write_error = parse_block_error_action(buf, 0, &error);
553         if (error) {
554             error_propagate(errp, error);
555             goto early_err;
556         }
557     }
558 
559     on_read_error = BLOCKDEV_ON_ERROR_REPORT;
560     if ((buf = qemu_opt_get(opts, "rerror")) != NULL) {
561         on_read_error = parse_block_error_action(buf, 1, &error);
562         if (error) {
563             error_propagate(errp, error);
564             goto early_err;
565         }
566     }
567 
568     if (snapshot) {
569         bdrv_flags |= BDRV_O_SNAPSHOT;
570     }
571 
572     read_only = qemu_opt_get_bool(opts, BDRV_OPT_READ_ONLY, false);
573 
574     /* init */
575     if ((!file || !*file) && !qdict_size(bs_opts)) {
576         BlockBackendRootState *blk_rs;
577 
578         blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
579         blk_rs = blk_get_root_state(blk);
580         blk_rs->open_flags    = bdrv_flags | (read_only ? 0 : BDRV_O_RDWR);
581         blk_rs->detect_zeroes = detect_zeroes;
582 
583         qobject_unref(bs_opts);
584     } else {
585         if (file && !*file) {
586             file = NULL;
587         }
588 
589         /* bdrv_open() defaults to the values in bdrv_flags (for compatibility
590          * with other callers) rather than what we want as the real defaults.
591          * Apply the defaults here instead. */
592         qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
593         qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
594         qdict_set_default_str(bs_opts, BDRV_OPT_READ_ONLY,
595                               read_only ? "on" : "off");
596         qdict_set_default_str(bs_opts, BDRV_OPT_AUTO_READ_ONLY, "on");
597         assert((bdrv_flags & BDRV_O_CACHE_MASK) == 0);
598 
599         if (runstate_check(RUN_STATE_INMIGRATE)) {
600             bdrv_flags |= BDRV_O_INACTIVE;
601         }
602 
603         blk = blk_new_open(file, NULL, bs_opts, bdrv_flags, errp);
604         if (!blk) {
605             goto err_no_bs_opts;
606         }
607         bs = blk_bs(blk);
608 
609         bs->detect_zeroes = detect_zeroes;
610 
611         block_acct_setup(blk_get_stats(blk), account_invalid, account_failed);
612 
613         if (!parse_stats_intervals(blk_get_stats(blk), interval_list, errp)) {
614             blk_unref(blk);
615             blk = NULL;
616             goto err_no_bs_opts;
617         }
618     }
619 
620     /* disk I/O throttling */
621     if (throttle_enabled(&cfg)) {
622         if (!throttling_group) {
623             throttling_group = id;
624         }
625         blk_io_limits_enable(blk, throttling_group);
626         blk_set_io_limits(blk, &cfg);
627     }
628 
629     blk_set_enable_write_cache(blk, !writethrough);
630     blk_set_on_error(blk, on_read_error, on_write_error);
631 
632     if (!monitor_add_blk(blk, id, errp)) {
633         blk_unref(blk);
634         blk = NULL;
635         goto err_no_bs_opts;
636     }
637 
638 err_no_bs_opts:
639     qemu_opts_del(opts);
640     qobject_unref(interval_dict);
641     qobject_unref(interval_list);
642     return blk;
643 
644 early_err:
645     qemu_opts_del(opts);
646     qobject_unref(interval_dict);
647     qobject_unref(interval_list);
648 err_no_opts:
649     qobject_unref(bs_opts);
650     return NULL;
651 }
652 
653 /* Takes the ownership of bs_opts */
654 BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
655 {
656     int bdrv_flags = 0;
657 
658     GLOBAL_STATE_CODE();
659     /* bdrv_open() defaults to the values in bdrv_flags (for compatibility
660      * with other callers) rather than what we want as the real defaults.
661      * Apply the defaults here instead. */
662     qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
663     qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
664     qdict_set_default_str(bs_opts, BDRV_OPT_READ_ONLY, "off");
665 
666     if (runstate_check(RUN_STATE_INMIGRATE)) {
667         bdrv_flags |= BDRV_O_INACTIVE;
668     }
669 
670     return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
671 }
672 
673 void blockdev_close_all_bdrv_states(void)
674 {
675     BlockDriverState *bs, *next_bs;
676 
677     GLOBAL_STATE_CODE();
678     QTAILQ_FOREACH_SAFE(bs, &monitor_bdrv_states, monitor_list, next_bs) {
679         AioContext *ctx = bdrv_get_aio_context(bs);
680 
681         aio_context_acquire(ctx);
682         bdrv_unref(bs);
683         aio_context_release(ctx);
684     }
685 }
686 
687 /* Iterates over the list of monitor-owned BlockDriverStates */
688 BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs)
689 {
690     GLOBAL_STATE_CODE();
691     return bs ? QTAILQ_NEXT(bs, monitor_list)
692               : QTAILQ_FIRST(&monitor_bdrv_states);
693 }
694 
695 static bool qemu_opt_rename(QemuOpts *opts, const char *from, const char *to,
696                             Error **errp)
697 {
698     const char *value;
699 
700     value = qemu_opt_get(opts, from);
701     if (value) {
702         if (qemu_opt_find(opts, to)) {
703             error_setg(errp, "'%s' and its alias '%s' can't be used at the "
704                        "same time", to, from);
705             return false;
706         }
707     }
708 
709     /* rename all items in opts */
710     while ((value = qemu_opt_get(opts, from))) {
711         qemu_opt_set(opts, to, value, &error_abort);
712         qemu_opt_unset(opts, from);
713     }
714     return true;
715 }
716 
717 QemuOptsList qemu_legacy_drive_opts = {
718     .name = "drive",
719     .head = QTAILQ_HEAD_INITIALIZER(qemu_legacy_drive_opts.head),
720     .desc = {
721         {
722             .name = "bus",
723             .type = QEMU_OPT_NUMBER,
724             .help = "bus number",
725         },{
726             .name = "unit",
727             .type = QEMU_OPT_NUMBER,
728             .help = "unit number (i.e. lun for scsi)",
729         },{
730             .name = "index",
731             .type = QEMU_OPT_NUMBER,
732             .help = "index number",
733         },{
734             .name = "media",
735             .type = QEMU_OPT_STRING,
736             .help = "media type (disk, cdrom)",
737         },{
738             .name = "if",
739             .type = QEMU_OPT_STRING,
740             .help = "interface (ide, scsi, sd, mtd, floppy, pflash, virtio)",
741         },{
742             .name = "file",
743             .type = QEMU_OPT_STRING,
744             .help = "file name",
745         },
746 
747         /* Options that are passed on, but have special semantics with -drive */
748         {
749             .name = BDRV_OPT_READ_ONLY,
750             .type = QEMU_OPT_BOOL,
751             .help = "open drive file as read-only",
752         },{
753             .name = "rerror",
754             .type = QEMU_OPT_STRING,
755             .help = "read error action",
756         },{
757             .name = "werror",
758             .type = QEMU_OPT_STRING,
759             .help = "write error action",
760         },{
761             .name = "copy-on-read",
762             .type = QEMU_OPT_BOOL,
763             .help = "copy read data from backing file into image file",
764         },
765 
766         { /* end of list */ }
767     },
768 };
769 
770 DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type,
771                      Error **errp)
772 {
773     const char *value;
774     BlockBackend *blk;
775     DriveInfo *dinfo = NULL;
776     QDict *bs_opts;
777     QemuOpts *legacy_opts;
778     DriveMediaType media = MEDIA_DISK;
779     BlockInterfaceType type;
780     int max_devs, bus_id, unit_id, index;
781     const char *werror, *rerror;
782     bool read_only = false;
783     bool copy_on_read;
784     const char *filename;
785     int i;
786 
787     GLOBAL_STATE_CODE();
788 
789     /* Change legacy command line options into QMP ones */
790     static const struct {
791         const char *from;
792         const char *to;
793     } opt_renames[] = {
794         { "iops",           "throttling.iops-total" },
795         { "iops_rd",        "throttling.iops-read" },
796         { "iops_wr",        "throttling.iops-write" },
797 
798         { "bps",            "throttling.bps-total" },
799         { "bps_rd",         "throttling.bps-read" },
800         { "bps_wr",         "throttling.bps-write" },
801 
802         { "iops_max",       "throttling.iops-total-max" },
803         { "iops_rd_max",    "throttling.iops-read-max" },
804         { "iops_wr_max",    "throttling.iops-write-max" },
805 
806         { "bps_max",        "throttling.bps-total-max" },
807         { "bps_rd_max",     "throttling.bps-read-max" },
808         { "bps_wr_max",     "throttling.bps-write-max" },
809 
810         { "iops_size",      "throttling.iops-size" },
811 
812         { "group",          "throttling.group" },
813 
814         { "readonly",       BDRV_OPT_READ_ONLY },
815     };
816 
817     for (i = 0; i < ARRAY_SIZE(opt_renames); i++) {
818         if (!qemu_opt_rename(all_opts, opt_renames[i].from,
819                              opt_renames[i].to, errp)) {
820             return NULL;
821         }
822     }
823 
824     value = qemu_opt_get(all_opts, "cache");
825     if (value) {
826         int flags = 0;
827         bool writethrough;
828 
829         if (bdrv_parse_cache_mode(value, &flags, &writethrough) != 0) {
830             error_setg(errp, "invalid cache option");
831             return NULL;
832         }
833 
834         /* Specific options take precedence */
835         if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_WB)) {
836             qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_WB,
837                               !writethrough, &error_abort);
838         }
839         if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_DIRECT)) {
840             qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_DIRECT,
841                               !!(flags & BDRV_O_NOCACHE), &error_abort);
842         }
843         if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_NO_FLUSH)) {
844             qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_NO_FLUSH,
845                               !!(flags & BDRV_O_NO_FLUSH), &error_abort);
846         }
847         qemu_opt_unset(all_opts, "cache");
848     }
849 
850     /* Get a QDict for processing the options */
851     bs_opts = qdict_new();
852     qemu_opts_to_qdict(all_opts, bs_opts);
853 
854     legacy_opts = qemu_opts_create(&qemu_legacy_drive_opts, NULL, 0,
855                                    &error_abort);
856     if (!qemu_opts_absorb_qdict(legacy_opts, bs_opts, errp)) {
857         goto fail;
858     }
859 
860     /* Media type */
861     value = qemu_opt_get(legacy_opts, "media");
862     if (value) {
863         if (!strcmp(value, "disk")) {
864             media = MEDIA_DISK;
865         } else if (!strcmp(value, "cdrom")) {
866             media = MEDIA_CDROM;
867             read_only = true;
868         } else {
869             error_setg(errp, "'%s' invalid media", value);
870             goto fail;
871         }
872     }
873 
874     /* copy-on-read is disabled with a warning for read-only devices */
875     read_only |= qemu_opt_get_bool(legacy_opts, BDRV_OPT_READ_ONLY, false);
876     copy_on_read = qemu_opt_get_bool(legacy_opts, "copy-on-read", false);
877 
878     if (read_only && copy_on_read) {
879         warn_report("disabling copy-on-read on read-only drive");
880         copy_on_read = false;
881     }
882 
883     qdict_put_str(bs_opts, BDRV_OPT_READ_ONLY, read_only ? "on" : "off");
884     qdict_put_str(bs_opts, "copy-on-read", copy_on_read ? "on" : "off");
885 
886     /* Controller type */
887     value = qemu_opt_get(legacy_opts, "if");
888     if (value) {
889         for (type = 0;
890              type < IF_COUNT && strcmp(value, if_name[type]);
891              type++) {
892         }
893         if (type == IF_COUNT) {
894             error_setg(errp, "unsupported bus type '%s'", value);
895             goto fail;
896         }
897     } else {
898         type = block_default_type;
899     }
900 
901     /* Device address specified by bus/unit or index.
902      * If none was specified, try to find the first free one. */
903     bus_id  = qemu_opt_get_number(legacy_opts, "bus", 0);
904     unit_id = qemu_opt_get_number(legacy_opts, "unit", -1);
905     index   = qemu_opt_get_number(legacy_opts, "index", -1);
906 
907     max_devs = if_max_devs[type];
908 
909     if (index != -1) {
910         if (bus_id != 0 || unit_id != -1) {
911             error_setg(errp, "index cannot be used with bus and unit");
912             goto fail;
913         }
914         bus_id = drive_index_to_bus_id(type, index);
915         unit_id = drive_index_to_unit_id(type, index);
916     }
917 
918     if (unit_id == -1) {
919        unit_id = 0;
920        while (drive_get(type, bus_id, unit_id) != NULL) {
921            unit_id++;
922            if (max_devs && unit_id >= max_devs) {
923                unit_id -= max_devs;
924                bus_id++;
925            }
926        }
927     }
928 
929     if (max_devs && unit_id >= max_devs) {
930         error_setg(errp, "unit %d too big (max is %d)", unit_id, max_devs - 1);
931         goto fail;
932     }
933 
934     if (drive_get(type, bus_id, unit_id) != NULL) {
935         error_setg(errp, "drive with bus=%d, unit=%d (index=%d) exists",
936                    bus_id, unit_id, index);
937         goto fail;
938     }
939 
940     /* no id supplied -> create one */
941     if (qemu_opts_id(all_opts) == NULL) {
942         char *new_id;
943         const char *mediastr = "";
944         if (type == IF_IDE || type == IF_SCSI) {
945             mediastr = (media == MEDIA_CDROM) ? "-cd" : "-hd";
946         }
947         if (max_devs) {
948             new_id = g_strdup_printf("%s%i%s%i", if_name[type], bus_id,
949                                      mediastr, unit_id);
950         } else {
951             new_id = g_strdup_printf("%s%s%i", if_name[type],
952                                      mediastr, unit_id);
953         }
954         qdict_put_str(bs_opts, "id", new_id);
955         g_free(new_id);
956     }
957 
958     /* Add virtio block device */
959     if (type == IF_VIRTIO) {
960         QemuOpts *devopts;
961         devopts = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
962                                    &error_abort);
963         qemu_opt_set(devopts, "driver", "virtio-blk", &error_abort);
964         qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"),
965                      &error_abort);
966     }
967 
968     filename = qemu_opt_get(legacy_opts, "file");
969 
970     /* Check werror/rerror compatibility with if=... */
971     werror = qemu_opt_get(legacy_opts, "werror");
972     if (werror != NULL) {
973         if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO &&
974             type != IF_NONE) {
975             error_setg(errp, "werror is not supported by this bus type");
976             goto fail;
977         }
978         qdict_put_str(bs_opts, "werror", werror);
979     }
980 
981     rerror = qemu_opt_get(legacy_opts, "rerror");
982     if (rerror != NULL) {
983         if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI &&
984             type != IF_NONE) {
985             error_setg(errp, "rerror is not supported by this bus type");
986             goto fail;
987         }
988         qdict_put_str(bs_opts, "rerror", rerror);
989     }
990 
991     /* Actual block device init: Functionality shared with blockdev-add */
992     blk = blockdev_init(filename, bs_opts, errp);
993     bs_opts = NULL;
994     if (!blk) {
995         goto fail;
996     }
997 
998     /* Create legacy DriveInfo */
999     dinfo = g_malloc0(sizeof(*dinfo));
1000     dinfo->opts = all_opts;
1001 
1002     dinfo->type = type;
1003     dinfo->bus = bus_id;
1004     dinfo->unit = unit_id;
1005 
1006     blk_set_legacy_dinfo(blk, dinfo);
1007 
1008     switch(type) {
1009     case IF_IDE:
1010     case IF_SCSI:
1011     case IF_XEN:
1012     case IF_NONE:
1013         dinfo->media_cd = media == MEDIA_CDROM;
1014         break;
1015     default:
1016         break;
1017     }
1018 
1019 fail:
1020     qemu_opts_del(legacy_opts);
1021     qobject_unref(bs_opts);
1022     return dinfo;
1023 }
1024 
1025 static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp)
1026 {
1027     BlockDriverState *bs;
1028 
1029     bs = bdrv_lookup_bs(name, name, errp);
1030     if (bs == NULL) {
1031         return NULL;
1032     }
1033 
1034     if (!bdrv_is_root_node(bs)) {
1035         error_setg(errp, "Need a root block node");
1036         return NULL;
1037     }
1038 
1039     if (!bdrv_is_inserted(bs)) {
1040         error_setg(errp, "Device has no medium");
1041         return NULL;
1042     }
1043 
1044     return bs;
1045 }
1046 
1047 static void blockdev_do_action(TransactionAction *action, Error **errp)
1048 {
1049     TransactionActionList list;
1050 
1051     list.value = action;
1052     list.next = NULL;
1053     qmp_transaction(&list, false, NULL, errp);
1054 }
1055 
1056 void qmp_blockdev_snapshot_sync(bool has_device, const char *device,
1057                                 bool has_node_name, const char *node_name,
1058                                 const char *snapshot_file,
1059                                 bool has_snapshot_node_name,
1060                                 const char *snapshot_node_name,
1061                                 bool has_format, const char *format,
1062                                 bool has_mode, NewImageMode mode, Error **errp)
1063 {
1064     BlockdevSnapshotSync snapshot = {
1065         .has_device = has_device,
1066         .device = (char *) device,
1067         .has_node_name = has_node_name,
1068         .node_name = (char *) node_name,
1069         .snapshot_file = (char *) snapshot_file,
1070         .has_snapshot_node_name = has_snapshot_node_name,
1071         .snapshot_node_name = (char *) snapshot_node_name,
1072         .has_format = has_format,
1073         .format = (char *) format,
1074         .has_mode = has_mode,
1075         .mode = mode,
1076     };
1077     TransactionAction action = {
1078         .type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC,
1079         .u.blockdev_snapshot_sync.data = &snapshot,
1080     };
1081     blockdev_do_action(&action, errp);
1082 }
1083 
1084 void qmp_blockdev_snapshot(const char *node, const char *overlay,
1085                            Error **errp)
1086 {
1087     BlockdevSnapshot snapshot_data = {
1088         .node = (char *) node,
1089         .overlay = (char *) overlay
1090     };
1091     TransactionAction action = {
1092         .type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT,
1093         .u.blockdev_snapshot.data = &snapshot_data,
1094     };
1095     blockdev_do_action(&action, errp);
1096 }
1097 
1098 void qmp_blockdev_snapshot_internal_sync(const char *device,
1099                                          const char *name,
1100                                          Error **errp)
1101 {
1102     BlockdevSnapshotInternal snapshot = {
1103         .device = (char *) device,
1104         .name = (char *) name
1105     };
1106     TransactionAction action = {
1107         .type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC,
1108         .u.blockdev_snapshot_internal_sync.data = &snapshot,
1109     };
1110     blockdev_do_action(&action, errp);
1111 }
1112 
1113 SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
1114                                                          bool has_id,
1115                                                          const char *id,
1116                                                          bool has_name,
1117                                                          const char *name,
1118                                                          Error **errp)
1119 {
1120     BlockDriverState *bs;
1121     AioContext *aio_context;
1122     QEMUSnapshotInfo sn;
1123     Error *local_err = NULL;
1124     SnapshotInfo *info = NULL;
1125     int ret;
1126 
1127     bs = qmp_get_root_bs(device, errp);
1128     if (!bs) {
1129         return NULL;
1130     }
1131     aio_context = bdrv_get_aio_context(bs);
1132     aio_context_acquire(aio_context);
1133 
1134     if (!has_id) {
1135         id = NULL;
1136     }
1137 
1138     if (!has_name) {
1139         name = NULL;
1140     }
1141 
1142     if (!id && !name) {
1143         error_setg(errp, "Name or id must be provided");
1144         goto out_aio_context;
1145     }
1146 
1147     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) {
1148         goto out_aio_context;
1149     }
1150 
1151     ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err);
1152     if (local_err) {
1153         error_propagate(errp, local_err);
1154         goto out_aio_context;
1155     }
1156     if (!ret) {
1157         error_setg(errp,
1158                    "Snapshot with id '%s' and name '%s' does not exist on "
1159                    "device '%s'",
1160                    STR_OR_NULL(id), STR_OR_NULL(name), device);
1161         goto out_aio_context;
1162     }
1163 
1164     bdrv_snapshot_delete(bs, id, name, &local_err);
1165     if (local_err) {
1166         error_propagate(errp, local_err);
1167         goto out_aio_context;
1168     }
1169 
1170     aio_context_release(aio_context);
1171 
1172     info = g_new0(SnapshotInfo, 1);
1173     info->id = g_strdup(sn.id_str);
1174     info->name = g_strdup(sn.name);
1175     info->date_nsec = sn.date_nsec;
1176     info->date_sec = sn.date_sec;
1177     info->vm_state_size = sn.vm_state_size;
1178     info->vm_clock_nsec = sn.vm_clock_nsec % 1000000000;
1179     info->vm_clock_sec = sn.vm_clock_nsec / 1000000000;
1180     if (sn.icount != -1ULL) {
1181         info->icount = sn.icount;
1182         info->has_icount = true;
1183     }
1184 
1185     return info;
1186 
1187 out_aio_context:
1188     aio_context_release(aio_context);
1189     return NULL;
1190 }
1191 
1192 /* New and old BlockDriverState structs for atomic group operations */
1193 
1194 typedef struct BlkActionState BlkActionState;
1195 
1196 /**
1197  * BlkActionOps:
1198  * Table of operations that define an Action.
1199  *
1200  * @instance_size: Size of state struct, in bytes.
1201  * @prepare: Prepare the work, must NOT be NULL.
1202  * @commit: Commit the changes, can be NULL.
1203  * @abort: Abort the changes on fail, can be NULL.
1204  * @clean: Clean up resources after all transaction actions have called
1205  *         commit() or abort(). Can be NULL.
1206  *
1207  * Only prepare() may fail. In a single transaction, only one of commit() or
1208  * abort() will be called. clean() will always be called if it is present.
1209  *
1210  * Always run under BQL.
1211  */
1212 typedef struct BlkActionOps {
1213     size_t instance_size;
1214     void (*prepare)(BlkActionState *common, Error **errp);
1215     void (*commit)(BlkActionState *common);
1216     void (*abort)(BlkActionState *common);
1217     void (*clean)(BlkActionState *common);
1218 } BlkActionOps;
1219 
1220 /**
1221  * BlkActionState:
1222  * Describes one Action's state within a Transaction.
1223  *
1224  * @action: QAPI-defined enum identifying which Action to perform.
1225  * @ops: Table of ActionOps this Action can perform.
1226  * @block_job_txn: Transaction which this action belongs to.
1227  * @entry: List membership for all Actions in this Transaction.
1228  *
1229  * This structure must be arranged as first member in a subclassed type,
1230  * assuming that the compiler will also arrange it to the same offsets as the
1231  * base class.
1232  */
1233 struct BlkActionState {
1234     TransactionAction *action;
1235     const BlkActionOps *ops;
1236     JobTxn *block_job_txn;
1237     TransactionProperties *txn_props;
1238     QTAILQ_ENTRY(BlkActionState) entry;
1239 };
1240 
1241 /* internal snapshot private data */
1242 typedef struct InternalSnapshotState {
1243     BlkActionState common;
1244     BlockDriverState *bs;
1245     QEMUSnapshotInfo sn;
1246     bool created;
1247 } InternalSnapshotState;
1248 
1249 
1250 static int action_check_completion_mode(BlkActionState *s, Error **errp)
1251 {
1252     if (s->txn_props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
1253         error_setg(errp,
1254                    "Action '%s' does not support Transaction property "
1255                    "completion-mode = %s",
1256                    TransactionActionKind_str(s->action->type),
1257                    ActionCompletionMode_str(s->txn_props->completion_mode));
1258         return -1;
1259     }
1260     return 0;
1261 }
1262 
1263 static void internal_snapshot_prepare(BlkActionState *common,
1264                                       Error **errp)
1265 {
1266     Error *local_err = NULL;
1267     const char *device;
1268     const char *name;
1269     BlockDriverState *bs;
1270     QEMUSnapshotInfo old_sn, *sn;
1271     bool ret;
1272     int64_t rt;
1273     BlockdevSnapshotInternal *internal;
1274     InternalSnapshotState *state;
1275     AioContext *aio_context;
1276     int ret1;
1277 
1278     g_assert(common->action->type ==
1279              TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC);
1280     internal = common->action->u.blockdev_snapshot_internal_sync.data;
1281     state = DO_UPCAST(InternalSnapshotState, common, common);
1282 
1283     /* 1. parse input */
1284     device = internal->device;
1285     name = internal->name;
1286 
1287     /* 2. check for validation */
1288     if (action_check_completion_mode(common, errp) < 0) {
1289         return;
1290     }
1291 
1292     bs = qmp_get_root_bs(device, errp);
1293     if (!bs) {
1294         return;
1295     }
1296 
1297     aio_context = bdrv_get_aio_context(bs);
1298     aio_context_acquire(aio_context);
1299 
1300     state->bs = bs;
1301 
1302     /* Paired with .clean() */
1303     bdrv_drained_begin(bs);
1304 
1305     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) {
1306         goto out;
1307     }
1308 
1309     if (bdrv_is_read_only(bs)) {
1310         error_setg(errp, "Device '%s' is read only", device);
1311         goto out;
1312     }
1313 
1314     if (!bdrv_can_snapshot(bs)) {
1315         error_setg(errp, "Block format '%s' used by device '%s' "
1316                    "does not support internal snapshots",
1317                    bs->drv->format_name, device);
1318         goto out;
1319     }
1320 
1321     if (!strlen(name)) {
1322         error_setg(errp, "Name is empty");
1323         goto out;
1324     }
1325 
1326     /* check whether a snapshot with name exist */
1327     ret = bdrv_snapshot_find_by_id_and_name(bs, NULL, name, &old_sn,
1328                                             &local_err);
1329     if (local_err) {
1330         error_propagate(errp, local_err);
1331         goto out;
1332     } else if (ret) {
1333         error_setg(errp,
1334                    "Snapshot with name '%s' already exists on device '%s'",
1335                    name, device);
1336         goto out;
1337     }
1338 
1339     /* 3. take the snapshot */
1340     sn = &state->sn;
1341     pstrcpy(sn->name, sizeof(sn->name), name);
1342     rt = g_get_real_time();
1343     sn->date_sec = rt / G_USEC_PER_SEC;
1344     sn->date_nsec = (rt % G_USEC_PER_SEC) * 1000;
1345     sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1346     if (replay_mode != REPLAY_MODE_NONE) {
1347         sn->icount = replay_get_current_icount();
1348     } else {
1349         sn->icount = -1ULL;
1350     }
1351 
1352     ret1 = bdrv_snapshot_create(bs, sn);
1353     if (ret1 < 0) {
1354         error_setg_errno(errp, -ret1,
1355                          "Failed to create snapshot '%s' on device '%s'",
1356                          name, device);
1357         goto out;
1358     }
1359 
1360     /* 4. succeed, mark a snapshot is created */
1361     state->created = true;
1362 
1363 out:
1364     aio_context_release(aio_context);
1365 }
1366 
1367 static void internal_snapshot_abort(BlkActionState *common)
1368 {
1369     InternalSnapshotState *state =
1370                              DO_UPCAST(InternalSnapshotState, common, common);
1371     BlockDriverState *bs = state->bs;
1372     QEMUSnapshotInfo *sn = &state->sn;
1373     AioContext *aio_context;
1374     Error *local_error = NULL;
1375 
1376     if (!state->created) {
1377         return;
1378     }
1379 
1380     aio_context = bdrv_get_aio_context(state->bs);
1381     aio_context_acquire(aio_context);
1382 
1383     if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) {
1384         error_reportf_err(local_error,
1385                           "Failed to delete snapshot with id '%s' and "
1386                           "name '%s' on device '%s' in abort: ",
1387                           sn->id_str, sn->name,
1388                           bdrv_get_device_name(bs));
1389     }
1390 
1391     aio_context_release(aio_context);
1392 }
1393 
1394 static void internal_snapshot_clean(BlkActionState *common)
1395 {
1396     InternalSnapshotState *state = DO_UPCAST(InternalSnapshotState,
1397                                              common, common);
1398     AioContext *aio_context;
1399 
1400     if (!state->bs) {
1401         return;
1402     }
1403 
1404     aio_context = bdrv_get_aio_context(state->bs);
1405     aio_context_acquire(aio_context);
1406 
1407     bdrv_drained_end(state->bs);
1408 
1409     aio_context_release(aio_context);
1410 }
1411 
1412 /* external snapshot private data */
1413 typedef struct ExternalSnapshotState {
1414     BlkActionState common;
1415     BlockDriverState *old_bs;
1416     BlockDriverState *new_bs;
1417     bool overlay_appended;
1418 } ExternalSnapshotState;
1419 
1420 static void external_snapshot_prepare(BlkActionState *common,
1421                                       Error **errp)
1422 {
1423     int ret;
1424     int flags = 0;
1425     QDict *options = NULL;
1426     Error *local_err = NULL;
1427     /* Device and node name of the image to generate the snapshot from */
1428     const char *device;
1429     const char *node_name;
1430     /* Reference to the new image (for 'blockdev-snapshot') */
1431     const char *snapshot_ref;
1432     /* File name of the new image (for 'blockdev-snapshot-sync') */
1433     const char *new_image_file;
1434     ExternalSnapshotState *state =
1435                              DO_UPCAST(ExternalSnapshotState, common, common);
1436     TransactionAction *action = common->action;
1437     AioContext *aio_context;
1438     uint64_t perm, shared;
1439 
1440     /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar
1441      * purpose but a different set of parameters */
1442     switch (action->type) {
1443     case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT:
1444         {
1445             BlockdevSnapshot *s = action->u.blockdev_snapshot.data;
1446             device = s->node;
1447             node_name = s->node;
1448             new_image_file = NULL;
1449             snapshot_ref = s->overlay;
1450         }
1451         break;
1452     case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
1453         {
1454             BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync.data;
1455             device = s->has_device ? s->device : NULL;
1456             node_name = s->has_node_name ? s->node_name : NULL;
1457             new_image_file = s->snapshot_file;
1458             snapshot_ref = NULL;
1459         }
1460         break;
1461     default:
1462         g_assert_not_reached();
1463     }
1464 
1465     /* start processing */
1466     if (action_check_completion_mode(common, errp) < 0) {
1467         return;
1468     }
1469 
1470     state->old_bs = bdrv_lookup_bs(device, node_name, errp);
1471     if (!state->old_bs) {
1472         return;
1473     }
1474 
1475     aio_context = bdrv_get_aio_context(state->old_bs);
1476     aio_context_acquire(aio_context);
1477 
1478     /* Paired with .clean() */
1479     bdrv_drained_begin(state->old_bs);
1480 
1481     if (!bdrv_is_inserted(state->old_bs)) {
1482         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
1483         goto out;
1484     }
1485 
1486     if (bdrv_op_is_blocked(state->old_bs,
1487                            BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, errp)) {
1488         goto out;
1489     }
1490 
1491     if (!bdrv_is_read_only(state->old_bs)) {
1492         if (bdrv_flush(state->old_bs)) {
1493             error_setg(errp, QERR_IO_ERROR);
1494             goto out;
1495         }
1496     }
1497 
1498     if (action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC) {
1499         BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync.data;
1500         const char *format = s->has_format ? s->format : "qcow2";
1501         enum NewImageMode mode;
1502         const char *snapshot_node_name =
1503             s->has_snapshot_node_name ? s->snapshot_node_name : NULL;
1504 
1505         if (node_name && !snapshot_node_name) {
1506             error_setg(errp, "New overlay node-name missing");
1507             goto out;
1508         }
1509 
1510         if (snapshot_node_name &&
1511             bdrv_lookup_bs(snapshot_node_name, snapshot_node_name, NULL)) {
1512             error_setg(errp, "New overlay node-name already in use");
1513             goto out;
1514         }
1515 
1516         flags = state->old_bs->open_flags;
1517         flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_COPY_ON_READ);
1518         flags |= BDRV_O_NO_BACKING;
1519 
1520         /* create new image w/backing file */
1521         mode = s->has_mode ? s->mode : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
1522         if (mode != NEW_IMAGE_MODE_EXISTING) {
1523             int64_t size = bdrv_getlength(state->old_bs);
1524             if (size < 0) {
1525                 error_setg_errno(errp, -size, "bdrv_getlength failed");
1526                 goto out;
1527             }
1528             bdrv_refresh_filename(state->old_bs);
1529             bdrv_img_create(new_image_file, format,
1530                             state->old_bs->filename,
1531                             state->old_bs->drv->format_name,
1532                             NULL, size, flags, false, &local_err);
1533             if (local_err) {
1534                 error_propagate(errp, local_err);
1535                 goto out;
1536             }
1537         }
1538 
1539         options = qdict_new();
1540         if (snapshot_node_name) {
1541             qdict_put_str(options, "node-name", snapshot_node_name);
1542         }
1543         qdict_put_str(options, "driver", format);
1544     }
1545 
1546     state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags,
1547                               errp);
1548     /* We will manually add the backing_hd field to the bs later */
1549     if (!state->new_bs) {
1550         goto out;
1551     }
1552 
1553     /*
1554      * Allow attaching a backing file to an overlay that's already in use only
1555      * if the parents don't assume that they are already seeing a valid image.
1556      * (Specifically, allow it as a mirror target, which is write-only access.)
1557      */
1558     bdrv_get_cumulative_perm(state->new_bs, &perm, &shared);
1559     if (perm & BLK_PERM_CONSISTENT_READ) {
1560         error_setg(errp, "The overlay is already in use");
1561         goto out;
1562     }
1563 
1564     if (state->new_bs->drv->is_filter) {
1565         error_setg(errp, "Filters cannot be used as overlays");
1566         goto out;
1567     }
1568 
1569     if (bdrv_cow_child(state->new_bs)) {
1570         error_setg(errp, "The overlay already has a backing image");
1571         goto out;
1572     }
1573 
1574     if (!state->new_bs->drv->supports_backing) {
1575         error_setg(errp, "The overlay does not support backing images");
1576         goto out;
1577     }
1578 
1579     ret = bdrv_append(state->new_bs, state->old_bs, errp);
1580     if (ret < 0) {
1581         goto out;
1582     }
1583     state->overlay_appended = true;
1584 
1585 out:
1586     aio_context_release(aio_context);
1587 }
1588 
1589 static void external_snapshot_commit(BlkActionState *common)
1590 {
1591     ExternalSnapshotState *state =
1592                              DO_UPCAST(ExternalSnapshotState, common, common);
1593     AioContext *aio_context;
1594 
1595     aio_context = bdrv_get_aio_context(state->old_bs);
1596     aio_context_acquire(aio_context);
1597 
1598     /* We don't need (or want) to use the transactional
1599      * bdrv_reopen_multiple() across all the entries at once, because we
1600      * don't want to abort all of them if one of them fails the reopen */
1601     if (!qatomic_read(&state->old_bs->copy_on_read)) {
1602         bdrv_reopen_set_read_only(state->old_bs, true, NULL);
1603     }
1604 
1605     aio_context_release(aio_context);
1606 }
1607 
1608 static void external_snapshot_abort(BlkActionState *common)
1609 {
1610     ExternalSnapshotState *state =
1611                              DO_UPCAST(ExternalSnapshotState, common, common);
1612     if (state->new_bs) {
1613         if (state->overlay_appended) {
1614             AioContext *aio_context;
1615             AioContext *tmp_context;
1616             int ret;
1617 
1618             aio_context = bdrv_get_aio_context(state->old_bs);
1619             aio_context_acquire(aio_context);
1620 
1621             bdrv_ref(state->old_bs);   /* we can't let bdrv_set_backind_hd()
1622                                           close state->old_bs; we need it */
1623             bdrv_set_backing_hd(state->new_bs, NULL, &error_abort);
1624 
1625             /*
1626              * The call to bdrv_set_backing_hd() above returns state->old_bs to
1627              * the main AioContext. As we're still going to be using it, return
1628              * it to the AioContext it was before.
1629              */
1630             tmp_context = bdrv_get_aio_context(state->old_bs);
1631             if (aio_context != tmp_context) {
1632                 aio_context_release(aio_context);
1633                 aio_context_acquire(tmp_context);
1634 
1635                 ret = bdrv_try_set_aio_context(state->old_bs,
1636                                                aio_context, NULL);
1637                 assert(ret == 0);
1638 
1639                 aio_context_release(tmp_context);
1640                 aio_context_acquire(aio_context);
1641             }
1642 
1643             bdrv_replace_node(state->new_bs, state->old_bs, &error_abort);
1644             bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */
1645 
1646             aio_context_release(aio_context);
1647         }
1648     }
1649 }
1650 
1651 static void external_snapshot_clean(BlkActionState *common)
1652 {
1653     ExternalSnapshotState *state =
1654                              DO_UPCAST(ExternalSnapshotState, common, common);
1655     AioContext *aio_context;
1656 
1657     if (!state->old_bs) {
1658         return;
1659     }
1660 
1661     aio_context = bdrv_get_aio_context(state->old_bs);
1662     aio_context_acquire(aio_context);
1663 
1664     bdrv_drained_end(state->old_bs);
1665     bdrv_unref(state->new_bs);
1666 
1667     aio_context_release(aio_context);
1668 }
1669 
1670 typedef struct DriveBackupState {
1671     BlkActionState common;
1672     BlockDriverState *bs;
1673     BlockJob *job;
1674 } DriveBackupState;
1675 
1676 static BlockJob *do_backup_common(BackupCommon *backup,
1677                                   BlockDriverState *bs,
1678                                   BlockDriverState *target_bs,
1679                                   AioContext *aio_context,
1680                                   JobTxn *txn, Error **errp);
1681 
1682 static void drive_backup_prepare(BlkActionState *common, Error **errp)
1683 {
1684     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
1685     DriveBackup *backup;
1686     BlockDriverState *bs;
1687     BlockDriverState *target_bs;
1688     BlockDriverState *source = NULL;
1689     AioContext *aio_context;
1690     AioContext *old_context;
1691     QDict *options;
1692     Error *local_err = NULL;
1693     int flags;
1694     int64_t size;
1695     bool set_backing_hd = false;
1696     int ret;
1697 
1698     assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
1699     backup = common->action->u.drive_backup.data;
1700 
1701     if (!backup->has_mode) {
1702         backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
1703     }
1704 
1705     bs = bdrv_lookup_bs(backup->device, backup->device, errp);
1706     if (!bs) {
1707         return;
1708     }
1709 
1710     if (!bs->drv) {
1711         error_setg(errp, "Device has no medium");
1712         return;
1713     }
1714 
1715     aio_context = bdrv_get_aio_context(bs);
1716     aio_context_acquire(aio_context);
1717 
1718     state->bs = bs;
1719     /* Paired with .clean() */
1720     bdrv_drained_begin(bs);
1721 
1722     if (!backup->has_format) {
1723         backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ?
1724                          NULL : (char *) bs->drv->format_name;
1725     }
1726 
1727     /* Early check to avoid creating target */
1728     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
1729         goto out;
1730     }
1731 
1732     flags = bs->open_flags | BDRV_O_RDWR;
1733 
1734     /*
1735      * See if we have a backing HD we can use to create our new image
1736      * on top of.
1737      */
1738     if (backup->sync == MIRROR_SYNC_MODE_TOP) {
1739         /*
1740          * Backup will not replace the source by the target, so none
1741          * of the filters skipped here will be removed (in contrast to
1742          * mirror).  Therefore, we can skip all of them when looking
1743          * for the first COW relationship.
1744          */
1745         source = bdrv_cow_bs(bdrv_skip_filters(bs));
1746         if (!source) {
1747             backup->sync = MIRROR_SYNC_MODE_FULL;
1748         }
1749     }
1750     if (backup->sync == MIRROR_SYNC_MODE_NONE) {
1751         source = bs;
1752         flags |= BDRV_O_NO_BACKING;
1753         set_backing_hd = true;
1754     }
1755 
1756     size = bdrv_getlength(bs);
1757     if (size < 0) {
1758         error_setg_errno(errp, -size, "bdrv_getlength failed");
1759         goto out;
1760     }
1761 
1762     if (backup->mode != NEW_IMAGE_MODE_EXISTING) {
1763         assert(backup->format);
1764         if (source) {
1765             /* Implicit filters should not appear in the filename */
1766             BlockDriverState *explicit_backing =
1767                 bdrv_skip_implicit_filters(source);
1768 
1769             bdrv_refresh_filename(explicit_backing);
1770             bdrv_img_create(backup->target, backup->format,
1771                             explicit_backing->filename,
1772                             explicit_backing->drv->format_name, NULL,
1773                             size, flags, false, &local_err);
1774         } else {
1775             bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL,
1776                             size, flags, false, &local_err);
1777         }
1778     }
1779 
1780     if (local_err) {
1781         error_propagate(errp, local_err);
1782         goto out;
1783     }
1784 
1785     options = qdict_new();
1786     qdict_put_str(options, "discard", "unmap");
1787     qdict_put_str(options, "detect-zeroes", "unmap");
1788     if (backup->format) {
1789         qdict_put_str(options, "driver", backup->format);
1790     }
1791 
1792     target_bs = bdrv_open(backup->target, NULL, options, flags, errp);
1793     if (!target_bs) {
1794         goto out;
1795     }
1796 
1797     /* Honor bdrv_try_set_aio_context() context acquisition requirements. */
1798     old_context = bdrv_get_aio_context(target_bs);
1799     aio_context_release(aio_context);
1800     aio_context_acquire(old_context);
1801 
1802     ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
1803     if (ret < 0) {
1804         bdrv_unref(target_bs);
1805         aio_context_release(old_context);
1806         return;
1807     }
1808 
1809     aio_context_release(old_context);
1810     aio_context_acquire(aio_context);
1811 
1812     if (set_backing_hd) {
1813         if (bdrv_set_backing_hd(target_bs, source, errp) < 0) {
1814             goto unref;
1815         }
1816     }
1817 
1818     state->job = do_backup_common(qapi_DriveBackup_base(backup),
1819                                   bs, target_bs, aio_context,
1820                                   common->block_job_txn, errp);
1821 
1822 unref:
1823     bdrv_unref(target_bs);
1824 out:
1825     aio_context_release(aio_context);
1826 }
1827 
1828 static void drive_backup_commit(BlkActionState *common)
1829 {
1830     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
1831     AioContext *aio_context;
1832 
1833     aio_context = bdrv_get_aio_context(state->bs);
1834     aio_context_acquire(aio_context);
1835 
1836     assert(state->job);
1837     job_start(&state->job->job);
1838 
1839     aio_context_release(aio_context);
1840 }
1841 
1842 static void drive_backup_abort(BlkActionState *common)
1843 {
1844     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
1845 
1846     if (state->job) {
1847         AioContext *aio_context;
1848 
1849         aio_context = bdrv_get_aio_context(state->bs);
1850         aio_context_acquire(aio_context);
1851 
1852         job_cancel_sync(&state->job->job, true);
1853 
1854         aio_context_release(aio_context);
1855     }
1856 }
1857 
1858 static void drive_backup_clean(BlkActionState *common)
1859 {
1860     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
1861     AioContext *aio_context;
1862 
1863     if (!state->bs) {
1864         return;
1865     }
1866 
1867     aio_context = bdrv_get_aio_context(state->bs);
1868     aio_context_acquire(aio_context);
1869 
1870     bdrv_drained_end(state->bs);
1871 
1872     aio_context_release(aio_context);
1873 }
1874 
1875 typedef struct BlockdevBackupState {
1876     BlkActionState common;
1877     BlockDriverState *bs;
1878     BlockJob *job;
1879 } BlockdevBackupState;
1880 
1881 static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
1882 {
1883     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
1884     BlockdevBackup *backup;
1885     BlockDriverState *bs;
1886     BlockDriverState *target_bs;
1887     AioContext *aio_context;
1888     AioContext *old_context;
1889     int ret;
1890 
1891     assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
1892     backup = common->action->u.blockdev_backup.data;
1893 
1894     bs = bdrv_lookup_bs(backup->device, backup->device, errp);
1895     if (!bs) {
1896         return;
1897     }
1898 
1899     target_bs = bdrv_lookup_bs(backup->target, backup->target, errp);
1900     if (!target_bs) {
1901         return;
1902     }
1903 
1904     /* Honor bdrv_try_set_aio_context() context acquisition requirements. */
1905     aio_context = bdrv_get_aio_context(bs);
1906     old_context = bdrv_get_aio_context(target_bs);
1907     aio_context_acquire(old_context);
1908 
1909     ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
1910     if (ret < 0) {
1911         aio_context_release(old_context);
1912         return;
1913     }
1914 
1915     aio_context_release(old_context);
1916     aio_context_acquire(aio_context);
1917     state->bs = bs;
1918 
1919     /* Paired with .clean() */
1920     bdrv_drained_begin(state->bs);
1921 
1922     state->job = do_backup_common(qapi_BlockdevBackup_base(backup),
1923                                   bs, target_bs, aio_context,
1924                                   common->block_job_txn, errp);
1925 
1926     aio_context_release(aio_context);
1927 }
1928 
1929 static void blockdev_backup_commit(BlkActionState *common)
1930 {
1931     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
1932     AioContext *aio_context;
1933 
1934     aio_context = bdrv_get_aio_context(state->bs);
1935     aio_context_acquire(aio_context);
1936 
1937     assert(state->job);
1938     job_start(&state->job->job);
1939 
1940     aio_context_release(aio_context);
1941 }
1942 
1943 static void blockdev_backup_abort(BlkActionState *common)
1944 {
1945     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
1946 
1947     if (state->job) {
1948         AioContext *aio_context;
1949 
1950         aio_context = bdrv_get_aio_context(state->bs);
1951         aio_context_acquire(aio_context);
1952 
1953         job_cancel_sync(&state->job->job, true);
1954 
1955         aio_context_release(aio_context);
1956     }
1957 }
1958 
1959 static void blockdev_backup_clean(BlkActionState *common)
1960 {
1961     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
1962     AioContext *aio_context;
1963 
1964     if (!state->bs) {
1965         return;
1966     }
1967 
1968     aio_context = bdrv_get_aio_context(state->bs);
1969     aio_context_acquire(aio_context);
1970 
1971     bdrv_drained_end(state->bs);
1972 
1973     aio_context_release(aio_context);
1974 }
1975 
1976 typedef struct BlockDirtyBitmapState {
1977     BlkActionState common;
1978     BdrvDirtyBitmap *bitmap;
1979     BlockDriverState *bs;
1980     HBitmap *backup;
1981     bool prepared;
1982     bool was_enabled;
1983 } BlockDirtyBitmapState;
1984 
1985 static void block_dirty_bitmap_add_prepare(BlkActionState *common,
1986                                            Error **errp)
1987 {
1988     Error *local_err = NULL;
1989     BlockDirtyBitmapAdd *action;
1990     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
1991                                              common, common);
1992 
1993     if (action_check_completion_mode(common, errp) < 0) {
1994         return;
1995     }
1996 
1997     action = common->action->u.block_dirty_bitmap_add.data;
1998     /* AIO context taken and released within qmp_block_dirty_bitmap_add */
1999     qmp_block_dirty_bitmap_add(action->node, action->name,
2000                                action->has_granularity, action->granularity,
2001                                action->has_persistent, action->persistent,
2002                                action->has_disabled, action->disabled,
2003                                &local_err);
2004 
2005     if (!local_err) {
2006         state->prepared = true;
2007     } else {
2008         error_propagate(errp, local_err);
2009     }
2010 }
2011 
2012 static void block_dirty_bitmap_add_abort(BlkActionState *common)
2013 {
2014     BlockDirtyBitmapAdd *action;
2015     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2016                                              common, common);
2017 
2018     action = common->action->u.block_dirty_bitmap_add.data;
2019     /* Should not be able to fail: IF the bitmap was added via .prepare(),
2020      * then the node reference and bitmap name must have been valid.
2021      */
2022     if (state->prepared) {
2023         qmp_block_dirty_bitmap_remove(action->node, action->name, &error_abort);
2024     }
2025 }
2026 
2027 static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
2028                                              Error **errp)
2029 {
2030     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2031                                              common, common);
2032     BlockDirtyBitmap *action;
2033 
2034     if (action_check_completion_mode(common, errp) < 0) {
2035         return;
2036     }
2037 
2038     action = common->action->u.block_dirty_bitmap_clear.data;
2039     state->bitmap = block_dirty_bitmap_lookup(action->node,
2040                                               action->name,
2041                                               &state->bs,
2042                                               errp);
2043     if (!state->bitmap) {
2044         return;
2045     }
2046 
2047     if (bdrv_dirty_bitmap_check(state->bitmap, BDRV_BITMAP_DEFAULT, errp)) {
2048         return;
2049     }
2050 
2051     bdrv_clear_dirty_bitmap(state->bitmap, &state->backup);
2052 }
2053 
2054 static void block_dirty_bitmap_restore(BlkActionState *common)
2055 {
2056     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2057                                              common, common);
2058 
2059     if (state->backup) {
2060         bdrv_restore_dirty_bitmap(state->bitmap, state->backup);
2061     }
2062 }
2063 
2064 static void block_dirty_bitmap_free_backup(BlkActionState *common)
2065 {
2066     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2067                                              common, common);
2068 
2069     hbitmap_free(state->backup);
2070 }
2071 
2072 static void block_dirty_bitmap_enable_prepare(BlkActionState *common,
2073                                               Error **errp)
2074 {
2075     BlockDirtyBitmap *action;
2076     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2077                                              common, common);
2078 
2079     if (action_check_completion_mode(common, errp) < 0) {
2080         return;
2081     }
2082 
2083     action = common->action->u.block_dirty_bitmap_enable.data;
2084     state->bitmap = block_dirty_bitmap_lookup(action->node,
2085                                               action->name,
2086                                               NULL,
2087                                               errp);
2088     if (!state->bitmap) {
2089         return;
2090     }
2091 
2092     if (bdrv_dirty_bitmap_check(state->bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
2093         return;
2094     }
2095 
2096     state->was_enabled = bdrv_dirty_bitmap_enabled(state->bitmap);
2097     bdrv_enable_dirty_bitmap(state->bitmap);
2098 }
2099 
2100 static void block_dirty_bitmap_enable_abort(BlkActionState *common)
2101 {
2102     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2103                                              common, common);
2104 
2105     if (!state->was_enabled) {
2106         bdrv_disable_dirty_bitmap(state->bitmap);
2107     }
2108 }
2109 
2110 static void block_dirty_bitmap_disable_prepare(BlkActionState *common,
2111                                                Error **errp)
2112 {
2113     BlockDirtyBitmap *action;
2114     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2115                                              common, common);
2116 
2117     if (action_check_completion_mode(common, errp) < 0) {
2118         return;
2119     }
2120 
2121     action = common->action->u.block_dirty_bitmap_disable.data;
2122     state->bitmap = block_dirty_bitmap_lookup(action->node,
2123                                               action->name,
2124                                               NULL,
2125                                               errp);
2126     if (!state->bitmap) {
2127         return;
2128     }
2129 
2130     if (bdrv_dirty_bitmap_check(state->bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
2131         return;
2132     }
2133 
2134     state->was_enabled = bdrv_dirty_bitmap_enabled(state->bitmap);
2135     bdrv_disable_dirty_bitmap(state->bitmap);
2136 }
2137 
2138 static void block_dirty_bitmap_disable_abort(BlkActionState *common)
2139 {
2140     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2141                                              common, common);
2142 
2143     if (state->was_enabled) {
2144         bdrv_enable_dirty_bitmap(state->bitmap);
2145     }
2146 }
2147 
2148 static void block_dirty_bitmap_merge_prepare(BlkActionState *common,
2149                                              Error **errp)
2150 {
2151     BlockDirtyBitmapMerge *action;
2152     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2153                                              common, common);
2154 
2155     if (action_check_completion_mode(common, errp) < 0) {
2156         return;
2157     }
2158 
2159     action = common->action->u.block_dirty_bitmap_merge.data;
2160 
2161     state->bitmap = block_dirty_bitmap_merge(action->node, action->target,
2162                                              action->bitmaps, &state->backup,
2163                                              errp);
2164 }
2165 
2166 static void block_dirty_bitmap_remove_prepare(BlkActionState *common,
2167                                               Error **errp)
2168 {
2169     BlockDirtyBitmap *action;
2170     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2171                                              common, common);
2172 
2173     if (action_check_completion_mode(common, errp) < 0) {
2174         return;
2175     }
2176 
2177     action = common->action->u.block_dirty_bitmap_remove.data;
2178 
2179     state->bitmap = block_dirty_bitmap_remove(action->node, action->name,
2180                                               false, &state->bs, errp);
2181     if (state->bitmap) {
2182         bdrv_dirty_bitmap_skip_store(state->bitmap, true);
2183         bdrv_dirty_bitmap_set_busy(state->bitmap, true);
2184     }
2185 }
2186 
2187 static void block_dirty_bitmap_remove_abort(BlkActionState *common)
2188 {
2189     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2190                                              common, common);
2191 
2192     if (state->bitmap) {
2193         bdrv_dirty_bitmap_skip_store(state->bitmap, false);
2194         bdrv_dirty_bitmap_set_busy(state->bitmap, false);
2195     }
2196 }
2197 
2198 static void block_dirty_bitmap_remove_commit(BlkActionState *common)
2199 {
2200     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2201                                              common, common);
2202 
2203     bdrv_dirty_bitmap_set_busy(state->bitmap, false);
2204     bdrv_release_dirty_bitmap(state->bitmap);
2205 }
2206 
2207 static void abort_prepare(BlkActionState *common, Error **errp)
2208 {
2209     error_setg(errp, "Transaction aborted using Abort action");
2210 }
2211 
2212 static void abort_commit(BlkActionState *common)
2213 {
2214     g_assert_not_reached(); /* this action never succeeds */
2215 }
2216 
2217 static const BlkActionOps actions[] = {
2218     [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = {
2219         .instance_size = sizeof(ExternalSnapshotState),
2220         .prepare  = external_snapshot_prepare,
2221         .commit   = external_snapshot_commit,
2222         .abort = external_snapshot_abort,
2223         .clean = external_snapshot_clean,
2224     },
2225     [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = {
2226         .instance_size = sizeof(ExternalSnapshotState),
2227         .prepare  = external_snapshot_prepare,
2228         .commit   = external_snapshot_commit,
2229         .abort = external_snapshot_abort,
2230         .clean = external_snapshot_clean,
2231     },
2232     [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
2233         .instance_size = sizeof(DriveBackupState),
2234         .prepare = drive_backup_prepare,
2235         .commit = drive_backup_commit,
2236         .abort = drive_backup_abort,
2237         .clean = drive_backup_clean,
2238     },
2239     [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
2240         .instance_size = sizeof(BlockdevBackupState),
2241         .prepare = blockdev_backup_prepare,
2242         .commit = blockdev_backup_commit,
2243         .abort = blockdev_backup_abort,
2244         .clean = blockdev_backup_clean,
2245     },
2246     [TRANSACTION_ACTION_KIND_ABORT] = {
2247         .instance_size = sizeof(BlkActionState),
2248         .prepare = abort_prepare,
2249         .commit = abort_commit,
2250     },
2251     [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC] = {
2252         .instance_size = sizeof(InternalSnapshotState),
2253         .prepare  = internal_snapshot_prepare,
2254         .abort = internal_snapshot_abort,
2255         .clean = internal_snapshot_clean,
2256     },
2257     [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD] = {
2258         .instance_size = sizeof(BlockDirtyBitmapState),
2259         .prepare = block_dirty_bitmap_add_prepare,
2260         .abort = block_dirty_bitmap_add_abort,
2261     },
2262     [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = {
2263         .instance_size = sizeof(BlockDirtyBitmapState),
2264         .prepare = block_dirty_bitmap_clear_prepare,
2265         .commit = block_dirty_bitmap_free_backup,
2266         .abort = block_dirty_bitmap_restore,
2267     },
2268     [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE] = {
2269         .instance_size = sizeof(BlockDirtyBitmapState),
2270         .prepare = block_dirty_bitmap_enable_prepare,
2271         .abort = block_dirty_bitmap_enable_abort,
2272     },
2273     [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE] = {
2274         .instance_size = sizeof(BlockDirtyBitmapState),
2275         .prepare = block_dirty_bitmap_disable_prepare,
2276         .abort = block_dirty_bitmap_disable_abort,
2277     },
2278     [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE] = {
2279         .instance_size = sizeof(BlockDirtyBitmapState),
2280         .prepare = block_dirty_bitmap_merge_prepare,
2281         .commit = block_dirty_bitmap_free_backup,
2282         .abort = block_dirty_bitmap_restore,
2283     },
2284     [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE] = {
2285         .instance_size = sizeof(BlockDirtyBitmapState),
2286         .prepare = block_dirty_bitmap_remove_prepare,
2287         .commit = block_dirty_bitmap_remove_commit,
2288         .abort = block_dirty_bitmap_remove_abort,
2289     },
2290     /* Where are transactions for MIRROR, COMMIT and STREAM?
2291      * Although these blockjobs use transaction callbacks like the backup job,
2292      * these jobs do not necessarily adhere to transaction semantics.
2293      * These jobs may not fully undo all of their actions on abort, nor do they
2294      * necessarily work in transactions with more than one job in them.
2295      */
2296 };
2297 
2298 /**
2299  * Allocate a TransactionProperties structure if necessary, and fill
2300  * that structure with desired defaults if they are unset.
2301  */
2302 static TransactionProperties *get_transaction_properties(
2303     TransactionProperties *props)
2304 {
2305     if (!props) {
2306         props = g_new0(TransactionProperties, 1);
2307     }
2308 
2309     if (!props->has_completion_mode) {
2310         props->has_completion_mode = true;
2311         props->completion_mode = ACTION_COMPLETION_MODE_INDIVIDUAL;
2312     }
2313 
2314     return props;
2315 }
2316 
2317 /*
2318  * 'Atomic' group operations.  The operations are performed as a set, and if
2319  * any fail then we roll back all operations in the group.
2320  *
2321  * Always run under BQL.
2322  */
2323 void qmp_transaction(TransactionActionList *dev_list,
2324                      bool has_props,
2325                      struct TransactionProperties *props,
2326                      Error **errp)
2327 {
2328     TransactionActionList *dev_entry = dev_list;
2329     JobTxn *block_job_txn = NULL;
2330     BlkActionState *state, *next;
2331     Error *local_err = NULL;
2332 
2333     GLOBAL_STATE_CODE();
2334 
2335     QTAILQ_HEAD(, BlkActionState) snap_bdrv_states;
2336     QTAILQ_INIT(&snap_bdrv_states);
2337 
2338     /* Does this transaction get canceled as a group on failure?
2339      * If not, we don't really need to make a JobTxn.
2340      */
2341     props = get_transaction_properties(props);
2342     if (props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
2343         block_job_txn = job_txn_new();
2344     }
2345 
2346     /* drain all i/o before any operations */
2347     bdrv_drain_all();
2348 
2349     /* We don't do anything in this loop that commits us to the operations */
2350     while (NULL != dev_entry) {
2351         TransactionAction *dev_info = NULL;
2352         const BlkActionOps *ops;
2353 
2354         dev_info = dev_entry->value;
2355         dev_entry = dev_entry->next;
2356 
2357         assert(dev_info->type < ARRAY_SIZE(actions));
2358 
2359         ops = &actions[dev_info->type];
2360         assert(ops->instance_size > 0);
2361 
2362         state = g_malloc0(ops->instance_size);
2363         state->ops = ops;
2364         state->action = dev_info;
2365         state->block_job_txn = block_job_txn;
2366         state->txn_props = props;
2367         QTAILQ_INSERT_TAIL(&snap_bdrv_states, state, entry);
2368 
2369         state->ops->prepare(state, &local_err);
2370         if (local_err) {
2371             error_propagate(errp, local_err);
2372             goto delete_and_fail;
2373         }
2374     }
2375 
2376     QTAILQ_FOREACH(state, &snap_bdrv_states, entry) {
2377         if (state->ops->commit) {
2378             state->ops->commit(state);
2379         }
2380     }
2381 
2382     /* success */
2383     goto exit;
2384 
2385 delete_and_fail:
2386     /* failure, and it is all-or-none; roll back all operations */
2387     QTAILQ_FOREACH_REVERSE(state, &snap_bdrv_states, entry) {
2388         if (state->ops->abort) {
2389             state->ops->abort(state);
2390         }
2391     }
2392 exit:
2393     QTAILQ_FOREACH_SAFE(state, &snap_bdrv_states, entry, next) {
2394         if (state->ops->clean) {
2395             state->ops->clean(state);
2396         }
2397         g_free(state);
2398     }
2399     if (!has_props) {
2400         qapi_free_TransactionProperties(props);
2401     }
2402     job_txn_unref(block_job_txn);
2403 }
2404 
2405 BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node,
2406                                                               const char *name,
2407                                                               Error **errp)
2408 {
2409     BdrvDirtyBitmap *bitmap;
2410     BlockDriverState *bs;
2411     BlockDirtyBitmapSha256 *ret = NULL;
2412     char *sha256;
2413 
2414     bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
2415     if (!bitmap || !bs) {
2416         return NULL;
2417     }
2418 
2419     sha256 = bdrv_dirty_bitmap_sha256(bitmap, errp);
2420     if (sha256 == NULL) {
2421         return NULL;
2422     }
2423 
2424     ret = g_new(BlockDirtyBitmapSha256, 1);
2425     ret->sha256 = sha256;
2426 
2427     return ret;
2428 }
2429 
2430 void coroutine_fn qmp_block_resize(bool has_device, const char *device,
2431                                    bool has_node_name, const char *node_name,
2432                                    int64_t size, Error **errp)
2433 {
2434     Error *local_err = NULL;
2435     BlockBackend *blk;
2436     BlockDriverState *bs;
2437     AioContext *old_ctx;
2438 
2439     bs = bdrv_lookup_bs(has_device ? device : NULL,
2440                         has_node_name ? node_name : NULL,
2441                         &local_err);
2442     if (local_err) {
2443         error_propagate(errp, local_err);
2444         return;
2445     }
2446 
2447     if (size < 0) {
2448         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "size", "a >0 size");
2449         return;
2450     }
2451 
2452     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) {
2453         error_setg(errp, QERR_DEVICE_IN_USE, device);
2454         return;
2455     }
2456 
2457     blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
2458     if (!blk) {
2459         return;
2460     }
2461 
2462     bdrv_co_lock(bs);
2463     bdrv_drained_begin(bs);
2464     bdrv_co_unlock(bs);
2465 
2466     old_ctx = bdrv_co_enter(bs);
2467     blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
2468     bdrv_co_leave(bs, old_ctx);
2469 
2470     bdrv_co_lock(bs);
2471     bdrv_drained_end(bs);
2472     blk_unref(blk);
2473     bdrv_co_unlock(bs);
2474 }
2475 
2476 void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
2477                       bool has_base, const char *base,
2478                       bool has_base_node, const char *base_node,
2479                       bool has_backing_file, const char *backing_file,
2480                       bool has_bottom, const char *bottom,
2481                       bool has_speed, int64_t speed,
2482                       bool has_on_error, BlockdevOnError on_error,
2483                       bool has_filter_node_name, const char *filter_node_name,
2484                       bool has_auto_finalize, bool auto_finalize,
2485                       bool has_auto_dismiss, bool auto_dismiss,
2486                       Error **errp)
2487 {
2488     BlockDriverState *bs, *iter, *iter_end;
2489     BlockDriverState *base_bs = NULL;
2490     BlockDriverState *bottom_bs = NULL;
2491     AioContext *aio_context;
2492     Error *local_err = NULL;
2493     int job_flags = JOB_DEFAULT;
2494 
2495     if (has_base && has_base_node) {
2496         error_setg(errp, "'base' and 'base-node' cannot be specified "
2497                    "at the same time");
2498         return;
2499     }
2500 
2501     if (has_base && has_bottom) {
2502         error_setg(errp, "'base' and 'bottom' cannot be specified "
2503                    "at the same time");
2504         return;
2505     }
2506 
2507     if (has_bottom && has_base_node) {
2508         error_setg(errp, "'bottom' and 'base-node' cannot be specified "
2509                    "at the same time");
2510         return;
2511     }
2512 
2513     if (!has_on_error) {
2514         on_error = BLOCKDEV_ON_ERROR_REPORT;
2515     }
2516 
2517     bs = bdrv_lookup_bs(device, device, errp);
2518     if (!bs) {
2519         return;
2520     }
2521 
2522     aio_context = bdrv_get_aio_context(bs);
2523     aio_context_acquire(aio_context);
2524 
2525     if (has_base) {
2526         base_bs = bdrv_find_backing_image(bs, base);
2527         if (base_bs == NULL) {
2528             error_setg(errp, "Can't find '%s' in the backing chain", base);
2529             goto out;
2530         }
2531         assert(bdrv_get_aio_context(base_bs) == aio_context);
2532     }
2533 
2534     if (has_base_node) {
2535         base_bs = bdrv_lookup_bs(NULL, base_node, errp);
2536         if (!base_bs) {
2537             goto out;
2538         }
2539         if (bs == base_bs || !bdrv_chain_contains(bs, base_bs)) {
2540             error_setg(errp, "Node '%s' is not a backing image of '%s'",
2541                        base_node, device);
2542             goto out;
2543         }
2544         assert(bdrv_get_aio_context(base_bs) == aio_context);
2545         bdrv_refresh_filename(base_bs);
2546     }
2547 
2548     if (has_bottom) {
2549         bottom_bs = bdrv_lookup_bs(NULL, bottom, errp);
2550         if (!bottom_bs) {
2551             goto out;
2552         }
2553         if (!bottom_bs->drv) {
2554             error_setg(errp, "Node '%s' is not open", bottom);
2555             goto out;
2556         }
2557         if (bottom_bs->drv->is_filter) {
2558             error_setg(errp, "Node '%s' is a filter, use a non-filter node "
2559                        "as 'bottom'", bottom);
2560             goto out;
2561         }
2562         if (!bdrv_chain_contains(bs, bottom_bs)) {
2563             error_setg(errp, "Node '%s' is not in a chain starting from '%s'",
2564                        bottom, device);
2565             goto out;
2566         }
2567         assert(bdrv_get_aio_context(bottom_bs) == aio_context);
2568     }
2569 
2570     /*
2571      * Check for op blockers in the whole chain between bs and base (or bottom)
2572      */
2573     iter_end = has_bottom ? bdrv_filter_or_cow_bs(bottom_bs) : base_bs;
2574     for (iter = bs; iter && iter != iter_end;
2575          iter = bdrv_filter_or_cow_bs(iter))
2576     {
2577         if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) {
2578             goto out;
2579         }
2580     }
2581 
2582     /* if we are streaming the entire chain, the result will have no backing
2583      * file, and specifying one is therefore an error */
2584     if (base_bs == NULL && has_backing_file) {
2585         error_setg(errp, "backing file specified, but streaming the "
2586                          "entire chain");
2587         goto out;
2588     }
2589 
2590     if (has_auto_finalize && !auto_finalize) {
2591         job_flags |= JOB_MANUAL_FINALIZE;
2592     }
2593     if (has_auto_dismiss && !auto_dismiss) {
2594         job_flags |= JOB_MANUAL_DISMISS;
2595     }
2596 
2597     stream_start(has_job_id ? job_id : NULL, bs, base_bs, backing_file,
2598                  bottom_bs, job_flags, has_speed ? speed : 0, on_error,
2599                  filter_node_name, &local_err);
2600     if (local_err) {
2601         error_propagate(errp, local_err);
2602         goto out;
2603     }
2604 
2605     trace_qmp_block_stream(bs);
2606 
2607 out:
2608     aio_context_release(aio_context);
2609 }
2610 
2611 void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
2612                       bool has_base_node, const char *base_node,
2613                       bool has_base, const char *base,
2614                       bool has_top_node, const char *top_node,
2615                       bool has_top, const char *top,
2616                       bool has_backing_file, const char *backing_file,
2617                       bool has_speed, int64_t speed,
2618                       bool has_on_error, BlockdevOnError on_error,
2619                       bool has_filter_node_name, const char *filter_node_name,
2620                       bool has_auto_finalize, bool auto_finalize,
2621                       bool has_auto_dismiss, bool auto_dismiss,
2622                       Error **errp)
2623 {
2624     BlockDriverState *bs;
2625     BlockDriverState *iter;
2626     BlockDriverState *base_bs, *top_bs;
2627     AioContext *aio_context;
2628     Error *local_err = NULL;
2629     int job_flags = JOB_DEFAULT;
2630     uint64_t top_perm, top_shared;
2631 
2632     if (!has_speed) {
2633         speed = 0;
2634     }
2635     if (!has_on_error) {
2636         on_error = BLOCKDEV_ON_ERROR_REPORT;
2637     }
2638     if (!has_filter_node_name) {
2639         filter_node_name = NULL;
2640     }
2641     if (has_auto_finalize && !auto_finalize) {
2642         job_flags |= JOB_MANUAL_FINALIZE;
2643     }
2644     if (has_auto_dismiss && !auto_dismiss) {
2645         job_flags |= JOB_MANUAL_DISMISS;
2646     }
2647 
2648     /* Important Note:
2649      *  libvirt relies on the DeviceNotFound error class in order to probe for
2650      *  live commit feature versions; for this to work, we must make sure to
2651      *  perform the device lookup before any generic errors that may occur in a
2652      *  scenario in which all optional arguments are omitted. */
2653     bs = qmp_get_root_bs(device, &local_err);
2654     if (!bs) {
2655         bs = bdrv_lookup_bs(device, device, NULL);
2656         if (!bs) {
2657             error_free(local_err);
2658             error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
2659                       "Device '%s' not found", device);
2660         } else {
2661             error_propagate(errp, local_err);
2662         }
2663         return;
2664     }
2665 
2666     aio_context = bdrv_get_aio_context(bs);
2667     aio_context_acquire(aio_context);
2668 
2669     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) {
2670         goto out;
2671     }
2672 
2673     /* default top_bs is the active layer */
2674     top_bs = bs;
2675 
2676     if (has_top_node && has_top) {
2677         error_setg(errp, "'top-node' and 'top' are mutually exclusive");
2678         goto out;
2679     } else if (has_top_node) {
2680         top_bs = bdrv_lookup_bs(NULL, top_node, errp);
2681         if (top_bs == NULL) {
2682             goto out;
2683         }
2684         if (!bdrv_chain_contains(bs, top_bs)) {
2685             error_setg(errp, "'%s' is not in this backing file chain",
2686                        top_node);
2687             goto out;
2688         }
2689     } else if (has_top && top) {
2690         /* This strcmp() is just a shortcut, there is no need to
2691          * refresh @bs's filename.  If it mismatches,
2692          * bdrv_find_backing_image() will do the refresh and may still
2693          * return @bs. */
2694         if (strcmp(bs->filename, top) != 0) {
2695             top_bs = bdrv_find_backing_image(bs, top);
2696         }
2697     }
2698 
2699     if (top_bs == NULL) {
2700         error_setg(errp, "Top image file %s not found", top ? top : "NULL");
2701         goto out;
2702     }
2703 
2704     assert(bdrv_get_aio_context(top_bs) == aio_context);
2705 
2706     if (has_base_node && has_base) {
2707         error_setg(errp, "'base-node' and 'base' are mutually exclusive");
2708         goto out;
2709     } else if (has_base_node) {
2710         base_bs = bdrv_lookup_bs(NULL, base_node, errp);
2711         if (base_bs == NULL) {
2712             goto out;
2713         }
2714         if (!bdrv_chain_contains(top_bs, base_bs)) {
2715             error_setg(errp, "'%s' is not in this backing file chain",
2716                        base_node);
2717             goto out;
2718         }
2719     } else if (has_base && base) {
2720         base_bs = bdrv_find_backing_image(top_bs, base);
2721         if (base_bs == NULL) {
2722             error_setg(errp, "Can't find '%s' in the backing chain", base);
2723             goto out;
2724         }
2725     } else {
2726         base_bs = bdrv_find_base(top_bs);
2727         if (base_bs == NULL) {
2728             error_setg(errp, "There is no backimg image");
2729             goto out;
2730         }
2731     }
2732 
2733     assert(bdrv_get_aio_context(base_bs) == aio_context);
2734 
2735     for (iter = top_bs; iter != bdrv_filter_or_cow_bs(base_bs);
2736          iter = bdrv_filter_or_cow_bs(iter))
2737     {
2738         if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
2739             goto out;
2740         }
2741     }
2742 
2743     /* Do not allow attempts to commit an image into itself */
2744     if (top_bs == base_bs) {
2745         error_setg(errp, "cannot commit an image into itself");
2746         goto out;
2747     }
2748 
2749     /*
2750      * Active commit is required if and only if someone has taken a
2751      * WRITE permission on the top node.  Historically, we have always
2752      * used active commit for top nodes, so continue that practice
2753      * lest we possibly break clients that rely on this behavior, e.g.
2754      * to later attach this node to a writing parent.
2755      * (Active commit is never really wrong.)
2756      */
2757     bdrv_get_cumulative_perm(top_bs, &top_perm, &top_shared);
2758     if (top_perm & BLK_PERM_WRITE ||
2759         bdrv_skip_filters(top_bs) == bdrv_skip_filters(bs))
2760     {
2761         if (has_backing_file) {
2762             if (bdrv_skip_filters(top_bs) == bdrv_skip_filters(bs)) {
2763                 error_setg(errp, "'backing-file' specified,"
2764                                  " but 'top' is the active layer");
2765             } else {
2766                 error_setg(errp, "'backing-file' specified, but 'top' has a "
2767                                  "writer on it");
2768             }
2769             goto out;
2770         }
2771         if (!has_job_id) {
2772             /*
2773              * Emulate here what block_job_create() does, because it
2774              * is possible that @bs != @top_bs (the block job should
2775              * be named after @bs, even if @top_bs is the actual
2776              * source)
2777              */
2778             job_id = bdrv_get_device_name(bs);
2779         }
2780         commit_active_start(job_id, top_bs, base_bs, job_flags, speed, on_error,
2781                             filter_node_name, NULL, NULL, false, &local_err);
2782     } else {
2783         BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
2784         if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
2785             goto out;
2786         }
2787         commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, job_flags,
2788                      speed, on_error, has_backing_file ? backing_file : NULL,
2789                      filter_node_name, &local_err);
2790     }
2791     if (local_err != NULL) {
2792         error_propagate(errp, local_err);
2793         goto out;
2794     }
2795 
2796 out:
2797     aio_context_release(aio_context);
2798 }
2799 
2800 /* Common QMP interface for drive-backup and blockdev-backup */
2801 static BlockJob *do_backup_common(BackupCommon *backup,
2802                                   BlockDriverState *bs,
2803                                   BlockDriverState *target_bs,
2804                                   AioContext *aio_context,
2805                                   JobTxn *txn, Error **errp)
2806 {
2807     BlockJob *job = NULL;
2808     BdrvDirtyBitmap *bmap = NULL;
2809     BackupPerf perf = { .max_workers = 64 };
2810     int job_flags = JOB_DEFAULT;
2811 
2812     if (!backup->has_speed) {
2813         backup->speed = 0;
2814     }
2815     if (!backup->has_on_source_error) {
2816         backup->on_source_error = BLOCKDEV_ON_ERROR_REPORT;
2817     }
2818     if (!backup->has_on_target_error) {
2819         backup->on_target_error = BLOCKDEV_ON_ERROR_REPORT;
2820     }
2821     if (!backup->has_job_id) {
2822         backup->job_id = NULL;
2823     }
2824     if (!backup->has_auto_finalize) {
2825         backup->auto_finalize = true;
2826     }
2827     if (!backup->has_auto_dismiss) {
2828         backup->auto_dismiss = true;
2829     }
2830     if (!backup->has_compress) {
2831         backup->compress = false;
2832     }
2833 
2834     if (backup->x_perf) {
2835         if (backup->x_perf->has_use_copy_range) {
2836             perf.use_copy_range = backup->x_perf->use_copy_range;
2837         }
2838         if (backup->x_perf->has_max_workers) {
2839             perf.max_workers = backup->x_perf->max_workers;
2840         }
2841         if (backup->x_perf->has_max_chunk) {
2842             perf.max_chunk = backup->x_perf->max_chunk;
2843         }
2844     }
2845 
2846     if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) ||
2847         (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL)) {
2848         /* done before desugaring 'incremental' to print the right message */
2849         if (!backup->has_bitmap) {
2850             error_setg(errp, "must provide a valid bitmap name for "
2851                        "'%s' sync mode", MirrorSyncMode_str(backup->sync));
2852             return NULL;
2853         }
2854     }
2855 
2856     if (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL) {
2857         if (backup->has_bitmap_mode &&
2858             backup->bitmap_mode != BITMAP_SYNC_MODE_ON_SUCCESS) {
2859             error_setg(errp, "Bitmap sync mode must be '%s' "
2860                        "when using sync mode '%s'",
2861                        BitmapSyncMode_str(BITMAP_SYNC_MODE_ON_SUCCESS),
2862                        MirrorSyncMode_str(backup->sync));
2863             return NULL;
2864         }
2865         backup->has_bitmap_mode = true;
2866         backup->sync = MIRROR_SYNC_MODE_BITMAP;
2867         backup->bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS;
2868     }
2869 
2870     if (backup->has_bitmap) {
2871         bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
2872         if (!bmap) {
2873             error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap);
2874             return NULL;
2875         }
2876         if (!backup->has_bitmap_mode) {
2877             error_setg(errp, "Bitmap sync mode must be given "
2878                        "when providing a bitmap");
2879             return NULL;
2880         }
2881         if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_ALLOW_RO, errp)) {
2882             return NULL;
2883         }
2884 
2885         /* This does not produce a useful bitmap artifact: */
2886         if (backup->sync == MIRROR_SYNC_MODE_NONE) {
2887             error_setg(errp, "sync mode '%s' does not produce meaningful bitmap"
2888                        " outputs", MirrorSyncMode_str(backup->sync));
2889             return NULL;
2890         }
2891 
2892         /* If the bitmap isn't used for input or output, this is useless: */
2893         if (backup->bitmap_mode == BITMAP_SYNC_MODE_NEVER &&
2894             backup->sync != MIRROR_SYNC_MODE_BITMAP) {
2895             error_setg(errp, "Bitmap sync mode '%s' has no meaningful effect"
2896                        " when combined with sync mode '%s'",
2897                        BitmapSyncMode_str(backup->bitmap_mode),
2898                        MirrorSyncMode_str(backup->sync));
2899             return NULL;
2900         }
2901     }
2902 
2903     if (!backup->has_bitmap && backup->has_bitmap_mode) {
2904         error_setg(errp, "Cannot specify bitmap sync mode without a bitmap");
2905         return NULL;
2906     }
2907 
2908     if (!backup->auto_finalize) {
2909         job_flags |= JOB_MANUAL_FINALIZE;
2910     }
2911     if (!backup->auto_dismiss) {
2912         job_flags |= JOB_MANUAL_DISMISS;
2913     }
2914 
2915     job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
2916                             backup->sync, bmap, backup->bitmap_mode,
2917                             backup->compress,
2918                             backup->filter_node_name,
2919                             &perf,
2920                             backup->on_source_error,
2921                             backup->on_target_error,
2922                             job_flags, NULL, NULL, txn, errp);
2923     return job;
2924 }
2925 
2926 void qmp_drive_backup(DriveBackup *backup, Error **errp)
2927 {
2928     TransactionAction action = {
2929         .type = TRANSACTION_ACTION_KIND_DRIVE_BACKUP,
2930         .u.drive_backup.data = backup,
2931     };
2932     blockdev_do_action(&action, errp);
2933 }
2934 
2935 BlockDeviceInfoList *qmp_query_named_block_nodes(bool has_flat,
2936                                                  bool flat,
2937                                                  Error **errp)
2938 {
2939     bool return_flat = has_flat && flat;
2940 
2941     return bdrv_named_nodes_list(return_flat, errp);
2942 }
2943 
2944 XDbgBlockGraph *qmp_x_debug_query_block_graph(Error **errp)
2945 {
2946     return bdrv_get_xdbg_block_graph(errp);
2947 }
2948 
2949 void qmp_blockdev_backup(BlockdevBackup *backup, Error **errp)
2950 {
2951     TransactionAction action = {
2952         .type = TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP,
2953         .u.blockdev_backup.data = backup,
2954     };
2955     blockdev_do_action(&action, errp);
2956 }
2957 
2958 /* Parameter check and block job starting for drive mirroring.
2959  * Caller should hold @device and @target's aio context (must be the same).
2960  **/
2961 static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
2962                                    BlockDriverState *target,
2963                                    bool has_replaces, const char *replaces,
2964                                    enum MirrorSyncMode sync,
2965                                    BlockMirrorBackingMode backing_mode,
2966                                    bool zero_target,
2967                                    bool has_speed, int64_t speed,
2968                                    bool has_granularity, uint32_t granularity,
2969                                    bool has_buf_size, int64_t buf_size,
2970                                    bool has_on_source_error,
2971                                    BlockdevOnError on_source_error,
2972                                    bool has_on_target_error,
2973                                    BlockdevOnError on_target_error,
2974                                    bool has_unmap, bool unmap,
2975                                    bool has_filter_node_name,
2976                                    const char *filter_node_name,
2977                                    bool has_copy_mode, MirrorCopyMode copy_mode,
2978                                    bool has_auto_finalize, bool auto_finalize,
2979                                    bool has_auto_dismiss, bool auto_dismiss,
2980                                    Error **errp)
2981 {
2982     BlockDriverState *unfiltered_bs;
2983     int job_flags = JOB_DEFAULT;
2984 
2985     if (!has_speed) {
2986         speed = 0;
2987     }
2988     if (!has_on_source_error) {
2989         on_source_error = BLOCKDEV_ON_ERROR_REPORT;
2990     }
2991     if (!has_on_target_error) {
2992         on_target_error = BLOCKDEV_ON_ERROR_REPORT;
2993     }
2994     if (!has_granularity) {
2995         granularity = 0;
2996     }
2997     if (!has_buf_size) {
2998         buf_size = 0;
2999     }
3000     if (!has_unmap) {
3001         unmap = true;
3002     }
3003     if (!has_filter_node_name) {
3004         filter_node_name = NULL;
3005     }
3006     if (!has_copy_mode) {
3007         copy_mode = MIRROR_COPY_MODE_BACKGROUND;
3008     }
3009     if (has_auto_finalize && !auto_finalize) {
3010         job_flags |= JOB_MANUAL_FINALIZE;
3011     }
3012     if (has_auto_dismiss && !auto_dismiss) {
3013         job_flags |= JOB_MANUAL_DISMISS;
3014     }
3015 
3016     if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
3017         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
3018                    "a value in range [512B, 64MB]");
3019         return;
3020     }
3021     if (granularity & (granularity - 1)) {
3022         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
3023                    "a power of 2");
3024         return;
3025     }
3026 
3027     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR_SOURCE, errp)) {
3028         return;
3029     }
3030     if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_MIRROR_TARGET, errp)) {
3031         return;
3032     }
3033 
3034     if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) {
3035         sync = MIRROR_SYNC_MODE_FULL;
3036     }
3037 
3038     if (!has_replaces) {
3039         /* We want to mirror from @bs, but keep implicit filters on top */
3040         unfiltered_bs = bdrv_skip_implicit_filters(bs);
3041         if (unfiltered_bs != bs) {
3042             replaces = unfiltered_bs->node_name;
3043             has_replaces = true;
3044         }
3045     }
3046 
3047     if (has_replaces) {
3048         BlockDriverState *to_replace_bs;
3049         AioContext *replace_aio_context;
3050         int64_t bs_size, replace_size;
3051 
3052         bs_size = bdrv_getlength(bs);
3053         if (bs_size < 0) {
3054             error_setg_errno(errp, -bs_size, "Failed to query device's size");
3055             return;
3056         }
3057 
3058         to_replace_bs = check_to_replace_node(bs, replaces, errp);
3059         if (!to_replace_bs) {
3060             return;
3061         }
3062 
3063         replace_aio_context = bdrv_get_aio_context(to_replace_bs);
3064         aio_context_acquire(replace_aio_context);
3065         replace_size = bdrv_getlength(to_replace_bs);
3066         aio_context_release(replace_aio_context);
3067 
3068         if (replace_size < 0) {
3069             error_setg_errno(errp, -replace_size,
3070                              "Failed to query the replacement node's size");
3071             return;
3072         }
3073         if (bs_size != replace_size) {
3074             error_setg(errp, "cannot replace image with a mirror image of "
3075                              "different size");
3076             return;
3077         }
3078     }
3079 
3080     /* pass the node name to replace to mirror start since it's loose coupling
3081      * and will allow to check whether the node still exist at mirror completion
3082      */
3083     mirror_start(job_id, bs, target,
3084                  has_replaces ? replaces : NULL, job_flags,
3085                  speed, granularity, buf_size, sync, backing_mode, zero_target,
3086                  on_source_error, on_target_error, unmap, filter_node_name,
3087                  copy_mode, errp);
3088 }
3089 
3090 void qmp_drive_mirror(DriveMirror *arg, Error **errp)
3091 {
3092     BlockDriverState *bs;
3093     BlockDriverState *target_backing_bs, *target_bs;
3094     AioContext *aio_context;
3095     AioContext *old_context;
3096     BlockMirrorBackingMode backing_mode;
3097     Error *local_err = NULL;
3098     QDict *options = NULL;
3099     int flags;
3100     int64_t size;
3101     const char *format = arg->format;
3102     bool zero_target;
3103     int ret;
3104 
3105     bs = qmp_get_root_bs(arg->device, errp);
3106     if (!bs) {
3107         return;
3108     }
3109 
3110     /* Early check to avoid creating target */
3111     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR_SOURCE, errp)) {
3112         return;
3113     }
3114 
3115     aio_context = bdrv_get_aio_context(bs);
3116     aio_context_acquire(aio_context);
3117 
3118     if (!arg->has_mode) {
3119         arg->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
3120     }
3121 
3122     if (!arg->has_format) {
3123         format = (arg->mode == NEW_IMAGE_MODE_EXISTING
3124                   ? NULL : bs->drv->format_name);
3125     }
3126 
3127     flags = bs->open_flags | BDRV_O_RDWR;
3128     target_backing_bs = bdrv_cow_bs(bdrv_skip_filters(bs));
3129     if (!target_backing_bs && arg->sync == MIRROR_SYNC_MODE_TOP) {
3130         arg->sync = MIRROR_SYNC_MODE_FULL;
3131     }
3132     if (arg->sync == MIRROR_SYNC_MODE_NONE) {
3133         target_backing_bs = bs;
3134     }
3135 
3136     size = bdrv_getlength(bs);
3137     if (size < 0) {
3138         error_setg_errno(errp, -size, "bdrv_getlength failed");
3139         goto out;
3140     }
3141 
3142     if (arg->has_replaces) {
3143         if (!arg->has_node_name) {
3144             error_setg(errp, "a node-name must be provided when replacing a"
3145                              " named node of the graph");
3146             goto out;
3147         }
3148     }
3149 
3150     if (arg->mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) {
3151         backing_mode = MIRROR_SOURCE_BACKING_CHAIN;
3152     } else {
3153         backing_mode = MIRROR_OPEN_BACKING_CHAIN;
3154     }
3155 
3156     /* Don't open backing image in create() */
3157     flags |= BDRV_O_NO_BACKING;
3158 
3159     if ((arg->sync == MIRROR_SYNC_MODE_FULL || !target_backing_bs)
3160         && arg->mode != NEW_IMAGE_MODE_EXISTING)
3161     {
3162         /* create new image w/o backing file */
3163         assert(format);
3164         bdrv_img_create(arg->target, format,
3165                         NULL, NULL, NULL, size, flags, false, &local_err);
3166     } else {
3167         /* Implicit filters should not appear in the filename */
3168         BlockDriverState *explicit_backing =
3169             bdrv_skip_implicit_filters(target_backing_bs);
3170 
3171         switch (arg->mode) {
3172         case NEW_IMAGE_MODE_EXISTING:
3173             break;
3174         case NEW_IMAGE_MODE_ABSOLUTE_PATHS:
3175             /* create new image with backing file */
3176             bdrv_refresh_filename(explicit_backing);
3177             bdrv_img_create(arg->target, format,
3178                             explicit_backing->filename,
3179                             explicit_backing->drv->format_name,
3180                             NULL, size, flags, false, &local_err);
3181             break;
3182         default:
3183             abort();
3184         }
3185     }
3186 
3187     if (local_err) {
3188         error_propagate(errp, local_err);
3189         goto out;
3190     }
3191 
3192     options = qdict_new();
3193     if (arg->has_node_name) {
3194         qdict_put_str(options, "node-name", arg->node_name);
3195     }
3196     if (format) {
3197         qdict_put_str(options, "driver", format);
3198     }
3199 
3200     /* Mirroring takes care of copy-on-write using the source's backing
3201      * file.
3202      */
3203     target_bs = bdrv_open(arg->target, NULL, options, flags, errp);
3204     if (!target_bs) {
3205         goto out;
3206     }
3207 
3208     zero_target = (arg->sync == MIRROR_SYNC_MODE_FULL &&
3209                    (arg->mode == NEW_IMAGE_MODE_EXISTING ||
3210                     !bdrv_has_zero_init(target_bs)));
3211 
3212 
3213     /* Honor bdrv_try_set_aio_context() context acquisition requirements. */
3214     old_context = bdrv_get_aio_context(target_bs);
3215     aio_context_release(aio_context);
3216     aio_context_acquire(old_context);
3217 
3218     ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
3219     if (ret < 0) {
3220         bdrv_unref(target_bs);
3221         aio_context_release(old_context);
3222         return;
3223     }
3224 
3225     aio_context_release(old_context);
3226     aio_context_acquire(aio_context);
3227 
3228     blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
3229                            arg->has_replaces, arg->replaces, arg->sync,
3230                            backing_mode, zero_target,
3231                            arg->has_speed, arg->speed,
3232                            arg->has_granularity, arg->granularity,
3233                            arg->has_buf_size, arg->buf_size,
3234                            arg->has_on_source_error, arg->on_source_error,
3235                            arg->has_on_target_error, arg->on_target_error,
3236                            arg->has_unmap, arg->unmap,
3237                            false, NULL,
3238                            arg->has_copy_mode, arg->copy_mode,
3239                            arg->has_auto_finalize, arg->auto_finalize,
3240                            arg->has_auto_dismiss, arg->auto_dismiss,
3241                            errp);
3242     bdrv_unref(target_bs);
3243 out:
3244     aio_context_release(aio_context);
3245 }
3246 
3247 void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
3248                          const char *device, const char *target,
3249                          bool has_replaces, const char *replaces,
3250                          MirrorSyncMode sync,
3251                          bool has_speed, int64_t speed,
3252                          bool has_granularity, uint32_t granularity,
3253                          bool has_buf_size, int64_t buf_size,
3254                          bool has_on_source_error,
3255                          BlockdevOnError on_source_error,
3256                          bool has_on_target_error,
3257                          BlockdevOnError on_target_error,
3258                          bool has_filter_node_name,
3259                          const char *filter_node_name,
3260                          bool has_copy_mode, MirrorCopyMode copy_mode,
3261                          bool has_auto_finalize, bool auto_finalize,
3262                          bool has_auto_dismiss, bool auto_dismiss,
3263                          Error **errp)
3264 {
3265     BlockDriverState *bs;
3266     BlockDriverState *target_bs;
3267     AioContext *aio_context;
3268     AioContext *old_context;
3269     BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
3270     bool zero_target;
3271     int ret;
3272 
3273     bs = qmp_get_root_bs(device, errp);
3274     if (!bs) {
3275         return;
3276     }
3277 
3278     target_bs = bdrv_lookup_bs(target, target, errp);
3279     if (!target_bs) {
3280         return;
3281     }
3282 
3283     zero_target = (sync == MIRROR_SYNC_MODE_FULL);
3284 
3285     /* Honor bdrv_try_set_aio_context() context acquisition requirements. */
3286     old_context = bdrv_get_aio_context(target_bs);
3287     aio_context = bdrv_get_aio_context(bs);
3288     aio_context_acquire(old_context);
3289 
3290     ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
3291 
3292     aio_context_release(old_context);
3293     aio_context_acquire(aio_context);
3294 
3295     if (ret < 0) {
3296         goto out;
3297     }
3298 
3299     blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs,
3300                            has_replaces, replaces, sync, backing_mode,
3301                            zero_target, has_speed, speed,
3302                            has_granularity, granularity,
3303                            has_buf_size, buf_size,
3304                            has_on_source_error, on_source_error,
3305                            has_on_target_error, on_target_error,
3306                            true, true,
3307                            has_filter_node_name, filter_node_name,
3308                            has_copy_mode, copy_mode,
3309                            has_auto_finalize, auto_finalize,
3310                            has_auto_dismiss, auto_dismiss,
3311                            errp);
3312 out:
3313     aio_context_release(aio_context);
3314 }
3315 
3316 /* Get a block job using its ID and acquire its AioContext */
3317 static BlockJob *find_block_job(const char *id, AioContext **aio_context,
3318                                 Error **errp)
3319 {
3320     BlockJob *job;
3321 
3322     assert(id != NULL);
3323 
3324     *aio_context = NULL;
3325 
3326     job = block_job_get(id);
3327 
3328     if (!job) {
3329         error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE,
3330                   "Block job '%s' not found", id);
3331         return NULL;
3332     }
3333 
3334     *aio_context = block_job_get_aio_context(job);
3335     aio_context_acquire(*aio_context);
3336 
3337     return job;
3338 }
3339 
3340 void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp)
3341 {
3342     AioContext *aio_context;
3343     BlockJob *job = find_block_job(device, &aio_context, errp);
3344 
3345     if (!job) {
3346         return;
3347     }
3348 
3349     block_job_set_speed(job, speed, errp);
3350     aio_context_release(aio_context);
3351 }
3352 
3353 void qmp_block_job_cancel(const char *device,
3354                           bool has_force, bool force, Error **errp)
3355 {
3356     AioContext *aio_context;
3357     BlockJob *job = find_block_job(device, &aio_context, errp);
3358 
3359     if (!job) {
3360         return;
3361     }
3362 
3363     if (!has_force) {
3364         force = false;
3365     }
3366 
3367     if (job_user_paused(&job->job) && !force) {
3368         error_setg(errp, "The block job for device '%s' is currently paused",
3369                    device);
3370         goto out;
3371     }
3372 
3373     trace_qmp_block_job_cancel(job);
3374     job_user_cancel(&job->job, force, errp);
3375 out:
3376     aio_context_release(aio_context);
3377 }
3378 
3379 void qmp_block_job_pause(const char *device, Error **errp)
3380 {
3381     AioContext *aio_context;
3382     BlockJob *job = find_block_job(device, &aio_context, errp);
3383 
3384     if (!job) {
3385         return;
3386     }
3387 
3388     trace_qmp_block_job_pause(job);
3389     job_user_pause(&job->job, errp);
3390     aio_context_release(aio_context);
3391 }
3392 
3393 void qmp_block_job_resume(const char *device, Error **errp)
3394 {
3395     AioContext *aio_context;
3396     BlockJob *job = find_block_job(device, &aio_context, errp);
3397 
3398     if (!job) {
3399         return;
3400     }
3401 
3402     trace_qmp_block_job_resume(job);
3403     job_user_resume(&job->job, errp);
3404     aio_context_release(aio_context);
3405 }
3406 
3407 void qmp_block_job_complete(const char *device, Error **errp)
3408 {
3409     AioContext *aio_context;
3410     BlockJob *job = find_block_job(device, &aio_context, errp);
3411 
3412     if (!job) {
3413         return;
3414     }
3415 
3416     trace_qmp_block_job_complete(job);
3417     job_complete(&job->job, errp);
3418     aio_context_release(aio_context);
3419 }
3420 
3421 void qmp_block_job_finalize(const char *id, Error **errp)
3422 {
3423     AioContext *aio_context;
3424     BlockJob *job = find_block_job(id, &aio_context, errp);
3425 
3426     if (!job) {
3427         return;
3428     }
3429 
3430     trace_qmp_block_job_finalize(job);
3431     job_ref(&job->job);
3432     job_finalize(&job->job, errp);
3433 
3434     /*
3435      * Job's context might have changed via job_finalize (and job_txn_apply
3436      * automatically acquires the new one), so make sure we release the correct
3437      * one.
3438      */
3439     aio_context = block_job_get_aio_context(job);
3440     job_unref(&job->job);
3441     aio_context_release(aio_context);
3442 }
3443 
3444 void qmp_block_job_dismiss(const char *id, Error **errp)
3445 {
3446     AioContext *aio_context;
3447     BlockJob *bjob = find_block_job(id, &aio_context, errp);
3448     Job *job;
3449 
3450     if (!bjob) {
3451         return;
3452     }
3453 
3454     trace_qmp_block_job_dismiss(bjob);
3455     job = &bjob->job;
3456     job_dismiss(&job, errp);
3457     aio_context_release(aio_context);
3458 }
3459 
3460 void qmp_change_backing_file(const char *device,
3461                              const char *image_node_name,
3462                              const char *backing_file,
3463                              Error **errp)
3464 {
3465     BlockDriverState *bs = NULL;
3466     AioContext *aio_context;
3467     BlockDriverState *image_bs = NULL;
3468     Error *local_err = NULL;
3469     bool ro;
3470     int ret;
3471 
3472     bs = qmp_get_root_bs(device, errp);
3473     if (!bs) {
3474         return;
3475     }
3476 
3477     aio_context = bdrv_get_aio_context(bs);
3478     aio_context_acquire(aio_context);
3479 
3480     image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err);
3481     if (local_err) {
3482         error_propagate(errp, local_err);
3483         goto out;
3484     }
3485 
3486     if (!image_bs) {
3487         error_setg(errp, "image file not found");
3488         goto out;
3489     }
3490 
3491     if (bdrv_find_base(image_bs) == image_bs) {
3492         error_setg(errp, "not allowing backing file change on an image "
3493                          "without a backing file");
3494         goto out;
3495     }
3496 
3497     /* even though we are not necessarily operating on bs, we need it to
3498      * determine if block ops are currently prohibited on the chain */
3499     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_CHANGE, errp)) {
3500         goto out;
3501     }
3502 
3503     /* final sanity check */
3504     if (!bdrv_chain_contains(bs, image_bs)) {
3505         error_setg(errp, "'%s' and image file are not in the same chain",
3506                    device);
3507         goto out;
3508     }
3509 
3510     /* if not r/w, reopen to make r/w */
3511     ro = bdrv_is_read_only(image_bs);
3512 
3513     if (ro) {
3514         if (bdrv_reopen_set_read_only(image_bs, false, errp) != 0) {
3515             goto out;
3516         }
3517     }
3518 
3519     ret = bdrv_change_backing_file(image_bs, backing_file,
3520                                    image_bs->drv ? image_bs->drv->format_name : "",
3521                                    false);
3522 
3523     if (ret < 0) {
3524         error_setg_errno(errp, -ret, "Could not change backing file to '%s'",
3525                          backing_file);
3526         /* don't exit here, so we can try to restore open flags if
3527          * appropriate */
3528     }
3529 
3530     if (ro) {
3531         bdrv_reopen_set_read_only(image_bs, true, errp);
3532     }
3533 
3534 out:
3535     aio_context_release(aio_context);
3536 }
3537 
3538 void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
3539 {
3540     BlockDriverState *bs;
3541     QObject *obj;
3542     Visitor *v = qobject_output_visitor_new(&obj);
3543     QDict *qdict;
3544 
3545     visit_type_BlockdevOptions(v, NULL, &options, &error_abort);
3546     visit_complete(v, &obj);
3547     qdict = qobject_to(QDict, obj);
3548 
3549     qdict_flatten(qdict);
3550 
3551     if (!qdict_get_try_str(qdict, "node-name")) {
3552         error_setg(errp, "'node-name' must be specified for the root node");
3553         goto fail;
3554     }
3555 
3556     bs = bds_tree_init(qdict, errp);
3557     if (!bs) {
3558         goto fail;
3559     }
3560 
3561     bdrv_set_monitor_owned(bs);
3562 
3563 fail:
3564     visit_free(v);
3565 }
3566 
3567 void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
3568 {
3569     BlockReopenQueue *queue = NULL;
3570     GSList *drained = NULL;
3571     GSList *p;
3572 
3573     /* Add each one of the BDS that we want to reopen to the queue */
3574     for (; reopen_list != NULL; reopen_list = reopen_list->next) {
3575         BlockdevOptions *options = reopen_list->value;
3576         BlockDriverState *bs;
3577         AioContext *ctx;
3578         QObject *obj;
3579         Visitor *v;
3580         QDict *qdict;
3581 
3582         /* Check for the selected node name */
3583         if (!options->has_node_name) {
3584             error_setg(errp, "node-name not specified");
3585             goto fail;
3586         }
3587 
3588         bs = bdrv_find_node(options->node_name);
3589         if (!bs) {
3590             error_setg(errp, "Failed to find node with node-name='%s'",
3591                        options->node_name);
3592             goto fail;
3593         }
3594 
3595         /* Put all options in a QDict and flatten it */
3596         v = qobject_output_visitor_new(&obj);
3597         visit_type_BlockdevOptions(v, NULL, &options, &error_abort);
3598         visit_complete(v, &obj);
3599         visit_free(v);
3600 
3601         qdict = qobject_to(QDict, obj);
3602 
3603         qdict_flatten(qdict);
3604 
3605         ctx = bdrv_get_aio_context(bs);
3606         aio_context_acquire(ctx);
3607 
3608         bdrv_subtree_drained_begin(bs);
3609         queue = bdrv_reopen_queue(queue, bs, qdict, false);
3610         drained = g_slist_prepend(drained, bs);
3611 
3612         aio_context_release(ctx);
3613     }
3614 
3615     /* Perform the reopen operation */
3616     bdrv_reopen_multiple(queue, errp);
3617     queue = NULL;
3618 
3619 fail:
3620     bdrv_reopen_queue_free(queue);
3621     for (p = drained; p; p = p->next) {
3622         BlockDriverState *bs = p->data;
3623         AioContext *ctx = bdrv_get_aio_context(bs);
3624 
3625         aio_context_acquire(ctx);
3626         bdrv_subtree_drained_end(bs);
3627         aio_context_release(ctx);
3628     }
3629     g_slist_free(drained);
3630 }
3631 
3632 void qmp_blockdev_del(const char *node_name, Error **errp)
3633 {
3634     AioContext *aio_context;
3635     BlockDriverState *bs;
3636 
3637     GLOBAL_STATE_CODE();
3638 
3639     bs = bdrv_find_node(node_name);
3640     if (!bs) {
3641         error_setg(errp, "Failed to find node with node-name='%s'", node_name);
3642         return;
3643     }
3644     if (bdrv_has_blk(bs)) {
3645         error_setg(errp, "Node %s is in use", node_name);
3646         return;
3647     }
3648     aio_context = bdrv_get_aio_context(bs);
3649     aio_context_acquire(aio_context);
3650 
3651     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, errp)) {
3652         goto out;
3653     }
3654 
3655     if (!QTAILQ_IN_USE(bs, monitor_list)) {
3656         error_setg(errp, "Node %s is not owned by the monitor",
3657                    bs->node_name);
3658         goto out;
3659     }
3660 
3661     if (bs->refcnt > 1) {
3662         error_setg(errp, "Block device %s is in use",
3663                    bdrv_get_device_or_node_name(bs));
3664         goto out;
3665     }
3666 
3667     QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list);
3668     bdrv_unref(bs);
3669 
3670 out:
3671     aio_context_release(aio_context);
3672 }
3673 
3674 static BdrvChild *bdrv_find_child(BlockDriverState *parent_bs,
3675                                   const char *child_name)
3676 {
3677     BdrvChild *child;
3678 
3679     QLIST_FOREACH(child, &parent_bs->children, next) {
3680         if (strcmp(child->name, child_name) == 0) {
3681             return child;
3682         }
3683     }
3684 
3685     return NULL;
3686 }
3687 
3688 void qmp_x_blockdev_change(const char *parent, bool has_child,
3689                            const char *child, bool has_node,
3690                            const char *node, Error **errp)
3691 {
3692     BlockDriverState *parent_bs, *new_bs = NULL;
3693     BdrvChild *p_child;
3694 
3695     parent_bs = bdrv_lookup_bs(parent, parent, errp);
3696     if (!parent_bs) {
3697         return;
3698     }
3699 
3700     if (has_child == has_node) {
3701         if (has_child) {
3702             error_setg(errp, "The parameters child and node are in conflict");
3703         } else {
3704             error_setg(errp, "Either child or node must be specified");
3705         }
3706         return;
3707     }
3708 
3709     if (has_child) {
3710         p_child = bdrv_find_child(parent_bs, child);
3711         if (!p_child) {
3712             error_setg(errp, "Node '%s' does not have child '%s'",
3713                        parent, child);
3714             return;
3715         }
3716         bdrv_del_child(parent_bs, p_child, errp);
3717     }
3718 
3719     if (has_node) {
3720         new_bs = bdrv_find_node(node);
3721         if (!new_bs) {
3722             error_setg(errp, "Node '%s' not found", node);
3723             return;
3724         }
3725         bdrv_add_child(parent_bs, new_bs, errp);
3726     }
3727 }
3728 
3729 BlockJobInfoList *qmp_query_block_jobs(Error **errp)
3730 {
3731     BlockJobInfoList *head = NULL, **tail = &head;
3732     BlockJob *job;
3733 
3734     for (job = block_job_next(NULL); job; job = block_job_next(job)) {
3735         BlockJobInfo *value;
3736         AioContext *aio_context;
3737 
3738         if (block_job_is_internal(job)) {
3739             continue;
3740         }
3741         aio_context = block_job_get_aio_context(job);
3742         aio_context_acquire(aio_context);
3743         value = block_job_query(job, errp);
3744         aio_context_release(aio_context);
3745         if (!value) {
3746             qapi_free_BlockJobInfoList(head);
3747             return NULL;
3748         }
3749         QAPI_LIST_APPEND(tail, value);
3750     }
3751 
3752     return head;
3753 }
3754 
3755 void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread,
3756                                  bool has_force, bool force, Error **errp)
3757 {
3758     AioContext *old_context;
3759     AioContext *new_context;
3760     BlockDriverState *bs;
3761 
3762     bs = bdrv_find_node(node_name);
3763     if (!bs) {
3764         error_setg(errp, "Failed to find node with node-name='%s'", node_name);
3765         return;
3766     }
3767 
3768     /* Protects against accidents. */
3769     if (!(has_force && force) && bdrv_has_blk(bs)) {
3770         error_setg(errp, "Node %s is associated with a BlockBackend and could "
3771                          "be in use (use force=true to override this check)",
3772                          node_name);
3773         return;
3774     }
3775 
3776     if (iothread->type == QTYPE_QSTRING) {
3777         IOThread *obj = iothread_by_id(iothread->u.s);
3778         if (!obj) {
3779             error_setg(errp, "Cannot find iothread %s", iothread->u.s);
3780             return;
3781         }
3782 
3783         new_context = iothread_get_aio_context(obj);
3784     } else {
3785         new_context = qemu_get_aio_context();
3786     }
3787 
3788     old_context = bdrv_get_aio_context(bs);
3789     aio_context_acquire(old_context);
3790 
3791     bdrv_try_set_aio_context(bs, new_context, errp);
3792 
3793     aio_context_release(old_context);
3794 }
3795 
3796 QemuOptsList qemu_common_drive_opts = {
3797     .name = "drive",
3798     .head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head),
3799     .desc = {
3800         {
3801             .name = "snapshot",
3802             .type = QEMU_OPT_BOOL,
3803             .help = "enable/disable snapshot mode",
3804         },{
3805             .name = "aio",
3806             .type = QEMU_OPT_STRING,
3807             .help = "host AIO implementation (threads, native, io_uring)",
3808         },{
3809             .name = BDRV_OPT_CACHE_WB,
3810             .type = QEMU_OPT_BOOL,
3811             .help = "Enable writeback mode",
3812         },{
3813             .name = "format",
3814             .type = QEMU_OPT_STRING,
3815             .help = "disk format (raw, qcow2, ...)",
3816         },{
3817             .name = "rerror",
3818             .type = QEMU_OPT_STRING,
3819             .help = "read error action",
3820         },{
3821             .name = "werror",
3822             .type = QEMU_OPT_STRING,
3823             .help = "write error action",
3824         },{
3825             .name = BDRV_OPT_READ_ONLY,
3826             .type = QEMU_OPT_BOOL,
3827             .help = "open drive file as read-only",
3828         },
3829 
3830         THROTTLE_OPTS,
3831 
3832         {
3833             .name = "throttling.group",
3834             .type = QEMU_OPT_STRING,
3835             .help = "name of the block throttling group",
3836         },{
3837             .name = "copy-on-read",
3838             .type = QEMU_OPT_BOOL,
3839             .help = "copy read data from backing file into image file",
3840         },{
3841             .name = "detect-zeroes",
3842             .type = QEMU_OPT_STRING,
3843             .help = "try to optimize zero writes (off, on, unmap)",
3844         },{
3845             .name = "stats-account-invalid",
3846             .type = QEMU_OPT_BOOL,
3847             .help = "whether to account for invalid I/O operations "
3848                     "in the statistics",
3849         },{
3850             .name = "stats-account-failed",
3851             .type = QEMU_OPT_BOOL,
3852             .help = "whether to account for failed I/O operations "
3853                     "in the statistics",
3854         },
3855         { /* end of list */ }
3856     },
3857 };
3858 
3859 QemuOptsList qemu_drive_opts = {
3860     .name = "drive",
3861     .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),
3862     .desc = {
3863         /*
3864          * no elements => accept any params
3865          * validation will happen later
3866          */
3867         { /* end of list */ }
3868     },
3869 };
3870