xref: /openbmc/qemu/qemu-img.c (revision 52292ba8b65723546478bc2c146542a601382e82)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include <getopt.h>
27 
28 #include "qemu/help-texts.h"
29 #include "qemu/qemu-progress.h"
30 #include "qemu-version.h"
31 #include "qapi/error.h"
32 #include "qapi/qapi-commands-block-core.h"
33 #include "qapi/qapi-visit-block-core.h"
34 #include "qapi/qobject-output-visitor.h"
35 #include "qobject/qjson.h"
36 #include "qobject/qdict.h"
37 #include "qemu/cutils.h"
38 #include "qemu/config-file.h"
39 #include "qemu/option.h"
40 #include "qemu/error-report.h"
41 #include "qemu/log.h"
42 #include "qemu/main-loop.h"
43 #include "qemu/module.h"
44 #include "qemu/sockets.h"
45 #include "qemu/units.h"
46 #include "qemu/memalign.h"
47 #include "qom/object_interfaces.h"
48 #include "system/block-backend.h"
49 #include "block/block_int.h"
50 #include "block/blockjob.h"
51 #include "block/dirty-bitmap.h"
52 #include "block/qapi.h"
53 #include "crypto/init.h"
54 #include "trace/control.h"
55 #include "qemu/throttle.h"
56 #include "block/throttle-groups.h"
57 
58 #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
59                           "\n" QEMU_COPYRIGHT "\n"
60 
61 typedef struct img_cmd_t {
62     const char *name;
63     int (*handler)(const struct img_cmd_t *ccmd, int argc, char **argv);
64 } img_cmd_t;
65 
66 enum {
67     OPTION_OUTPUT = 256,
68     OPTION_BACKING_CHAIN = 257,
69     OPTION_OBJECT = 258,
70     OPTION_IMAGE_OPTS = 259,
71     OPTION_PATTERN = 260,
72     OPTION_FLUSH_INTERVAL = 261,
73     OPTION_NO_DRAIN = 262,
74     OPTION_TARGET_IMAGE_OPTS = 263,
75     OPTION_SIZE = 264,
76     OPTION_PREALLOCATION = 265,
77     OPTION_SHRINK = 266,
78     OPTION_SALVAGE = 267,
79     OPTION_TARGET_IS_ZERO = 268,
80     OPTION_ADD = 269,
81     OPTION_REMOVE = 270,
82     OPTION_CLEAR = 271,
83     OPTION_ENABLE = 272,
84     OPTION_DISABLE = 273,
85     OPTION_MERGE = 274,
86     OPTION_BITMAPS = 275,
87     OPTION_FORCE = 276,
88     OPTION_SKIP_BROKEN = 277,
89 };
90 
91 typedef enum OutputFormat {
92     OFORMAT_JSON,
93     OFORMAT_HUMAN,
94 } OutputFormat;
95 
96 /* Default to cache=writeback as data integrity is not important for qemu-img */
97 #define BDRV_DEFAULT_CACHE "writeback"
98 
99 static void format_print(void *opaque, const char *name)
100 {
101     printf(" %s", name);
102 }
103 
104 static G_NORETURN
105 void tryhelp(const char *argv0)
106 {
107     error_printf("Try '%s --help' for more information\n", argv0);
108     exit(EXIT_FAILURE);
109 }
110 
111 static G_NORETURN G_GNUC_PRINTF(2, 3)
112 void error_exit(const char *argv0, const char *fmt, ...)
113 {
114     va_list ap;
115 
116     va_start(ap, fmt);
117     error_vreport(fmt, ap);
118     va_end(ap);
119 
120     tryhelp(argv0);
121 }
122 
123 static G_NORETURN
124 void missing_argument(const char *option)
125 {
126     error_exit("qemu-img", "missing argument for option '%s'", option);
127 }
128 
129 static G_NORETURN
130 void unrecognized_option(const char *option)
131 {
132     error_exit("qemu-img", "unrecognized option '%s'", option);
133 }
134 
135 /*
136  * Print --help output for a command and exit.
137  * @syntax and @description are multi-line with trailing EOL
138  * (to allow easy extending of the text)
139  * @syntax has each subsequent line indented by 8 chars.
140  * @description is indented by 2 chars for argument on each own line,
141  * and with 5 chars for argument description (like -h arg below).
142  */
143 static G_NORETURN
144 void cmd_help(const img_cmd_t *ccmd,
145               const char *syntax, const char *arguments)
146 {
147     printf(
148 "Usage:\n"
149 "\n"
150 "  %s %s %s"
151 "\n"
152 "Arguments:\n"
153 "  -h, --help\n"
154 "     print this help and exit\n"
155 "%s\n",
156            "qemu-img", ccmd->name,
157            syntax, arguments);
158     exit(EXIT_SUCCESS);
159 }
160 
161 /* Please keep in synch with docs/tools/qemu-img.rst */
162 static G_NORETURN
163 void help(void)
164 {
165     const char *help_msg =
166            QEMU_IMG_VERSION
167            "usage: qemu-img [standard options] command [command options]\n"
168            "QEMU disk image utility\n"
169            "\n"
170            "    '-h', '--help'       display this help and exit\n"
171            "    '-V', '--version'    output version information and exit\n"
172            "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
173            "                         specify tracing options\n"
174            "\n"
175            "Command syntax:\n"
176 #define DEF(option, callback, arg_string)        \
177            "  " arg_string "\n"
178 #include "qemu-img-cmds.h"
179 #undef DEF
180            "\n"
181            "Command parameters:\n"
182            "  'filename' is a disk image filename\n"
183            "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
184            "    manual page for a description of the object properties. The most common\n"
185            "    object type is a 'secret', which is used to supply passwords and/or\n"
186            "    encryption keys.\n"
187            "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
188            "  'cache' is the cache mode used to write the output disk image, the valid\n"
189            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
190            "    'directsync' and 'unsafe' (default for convert)\n"
191            "  'src_cache' is the cache mode used to read input disk images, the valid\n"
192            "    options are the same as for the 'cache' option\n"
193            "  'size' is the disk image size in bytes. Optional suffixes\n"
194            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
195            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
196            "    supported. 'b' is ignored.\n"
197            "  'output_filename' is the destination disk image filename\n"
198            "  'output_fmt' is the destination format\n"
199            "  'options' is a comma separated list of format specific options in a\n"
200            "    name=value format. Use -o help for an overview of the options supported by\n"
201            "    the used format\n"
202            "  'snapshot_param' is param used for internal snapshot, format\n"
203            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
204            "    '[ID_OR_NAME]'\n"
205            "  '-c' indicates that target image must be compressed (qcow format only)\n"
206            "  '-u' allows unsafe backing chains. For rebasing, it is assumed that old and\n"
207            "       new backing file match exactly. The image doesn't need a working\n"
208            "       backing file before rebasing in this case (useful for renaming the\n"
209            "       backing file). For image creation, allow creating without attempting\n"
210            "       to open the backing file.\n"
211            "  '-h' with or without a command shows this help and lists the supported formats\n"
212            "  '-p' show progress of command (only certain commands)\n"
213            "  '-q' use Quiet mode - do not print any output (except errors)\n"
214            "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
215            "       contain only zeros for qemu-img to create a sparse image during\n"
216            "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
217            "       unallocated or zero sectors, and the destination image will always be\n"
218            "       fully allocated\n"
219            "  '--output' takes the format in which the output must be done (human or json)\n"
220            "  '-n' skips the target volume creation (useful if the volume is created\n"
221            "       prior to running qemu-img)\n"
222            "\n"
223            "Parameters to bitmap subcommand:\n"
224            "  'bitmap' is the name of the bitmap to manipulate, through one or more\n"
225            "       actions from '--add', '--remove', '--clear', '--enable', '--disable',\n"
226            "       or '--merge source'\n"
227            "  '-g granularity' sets the granularity for '--add' actions\n"
228            "  '-b source' and '-F src_fmt' tell '--merge' actions to find the source\n"
229            "       bitmaps from an alternative file\n"
230            "\n"
231            "Parameters to check subcommand:\n"
232            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
233            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
234            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
235            "       hiding corruption that has already occurred.\n"
236            "\n"
237            "Parameters to convert subcommand:\n"
238            "  '--bitmaps' copies all top-level persistent bitmaps to destination\n"
239            "  '-m' specifies how many coroutines work in parallel during the convert\n"
240            "       process (defaults to 8)\n"
241            "  '-W' allow to write to the target out of order rather than sequential\n"
242            "\n"
243            "Parameters to snapshot subcommand:\n"
244            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
245            "  '-a' applies a snapshot (revert disk to saved state)\n"
246            "  '-c' creates a snapshot\n"
247            "  '-d' deletes a snapshot\n"
248            "  '-l' lists all snapshots in the given image\n"
249            "\n"
250            "Parameters to compare subcommand:\n"
251            "  '-f' first image format\n"
252            "  '-F' second image format\n"
253            "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
254            "\n"
255            "Parameters to dd subcommand:\n"
256            "  'bs=BYTES' read and write up to BYTES bytes at a time "
257            "(default: 512)\n"
258            "  'count=N' copy only N input blocks\n"
259            "  'if=FILE' read from FILE\n"
260            "  'of=FILE' write to FILE\n"
261            "  'skip=N' skip N bs-sized blocks at the start of input\n";
262 
263     printf("%s\nSupported formats:", help_msg);
264     bdrv_iterate_format(format_print, NULL, false);
265     printf("\n\n" QEMU_HELP_BOTTOM "\n");
266     exit(EXIT_SUCCESS);
267 }
268 
269 /*
270  * Is @list safe for accumulate_options()?
271  * It is when multiple of them can be joined together separated by ','.
272  * To make that work, @list must not start with ',' (or else a
273  * separating ',' preceding it gets escaped), and it must not end with
274  * an odd number of ',' (or else a separating ',' following it gets
275  * escaped), or be empty (or else a separating ',' preceding it can
276  * escape a separating ',' following it).
277  *
278  */
279 static bool is_valid_option_list(const char *list)
280 {
281     size_t len = strlen(list);
282     size_t i;
283 
284     if (!list[0] || list[0] == ',') {
285         return false;
286     }
287 
288     for (i = len; i > 0 && list[i - 1] == ','; i--) {
289     }
290     if ((len - i) % 2) {
291         return false;
292     }
293 
294     return true;
295 }
296 
297 static int accumulate_options(char **options, char *list)
298 {
299     char *new_options;
300 
301     if (!is_valid_option_list(list)) {
302         error_report("Invalid option list: %s", list);
303         return -1;
304     }
305 
306     if (!*options) {
307         *options = g_strdup(list);
308     } else {
309         new_options = g_strdup_printf("%s,%s", *options, list);
310         g_free(*options);
311         *options = new_options;
312     }
313     return 0;
314 }
315 
316 static QemuOptsList qemu_source_opts = {
317     .name = "source",
318     .implied_opt_name = "file",
319     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
320     .desc = {
321         { }
322     },
323 };
324 
325 static int G_GNUC_PRINTF(2, 3) qprintf(bool quiet, const char *fmt, ...)
326 {
327     int ret = 0;
328     if (!quiet) {
329         va_list args;
330         va_start(args, fmt);
331         ret = vprintf(fmt, args);
332         va_end(args);
333     }
334     return ret;
335 }
336 
337 
338 static int print_block_option_help(const char *filename, const char *fmt)
339 {
340     BlockDriver *drv, *proto_drv;
341     QemuOptsList *create_opts = NULL;
342     Error *local_err = NULL;
343 
344     /* Find driver and parse its options */
345     drv = bdrv_find_format(fmt);
346     if (!drv) {
347         error_report("Unknown file format '%s'", fmt);
348         return 1;
349     }
350 
351     if (!drv->create_opts) {
352         error_report("Format driver '%s' does not support image creation", fmt);
353         return 1;
354     }
355 
356     create_opts = qemu_opts_append(create_opts, drv->create_opts);
357     if (filename) {
358         proto_drv = bdrv_find_protocol(filename, true, &local_err);
359         if (!proto_drv) {
360             error_report_err(local_err);
361             qemu_opts_free(create_opts);
362             return 1;
363         }
364         if (!proto_drv->create_opts) {
365             error_report("Protocol driver '%s' does not support image creation",
366                          proto_drv->format_name);
367             qemu_opts_free(create_opts);
368             return 1;
369         }
370         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
371     }
372 
373     if (filename) {
374         printf("Supported options:\n");
375     } else {
376         printf("Supported %s options:\n", fmt);
377     }
378     qemu_opts_print_help(create_opts, false);
379     qemu_opts_free(create_opts);
380 
381     if (!filename) {
382         printf("\n"
383                "The protocol level may support further options.\n"
384                "Specify the target filename to include those options.\n");
385     }
386 
387     return 0;
388 }
389 
390 
391 static BlockBackend *img_open_opts(const char *optstr,
392                                    QemuOpts *opts, int flags, bool writethrough,
393                                    bool quiet, bool force_share)
394 {
395     QDict *options;
396     Error *local_err = NULL;
397     BlockBackend *blk;
398     options = qemu_opts_to_qdict(opts, NULL);
399     if (force_share) {
400         if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
401             && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
402             error_report("--force-share/-U conflicts with image options");
403             qobject_unref(options);
404             return NULL;
405         }
406         qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
407     }
408     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
409     if (!blk) {
410         error_reportf_err(local_err, "Could not open '%s': ", optstr);
411         return NULL;
412     }
413     blk_set_enable_write_cache(blk, !writethrough);
414 
415     return blk;
416 }
417 
418 static BlockBackend *img_open_file(const char *filename,
419                                    QDict *options,
420                                    const char *fmt, int flags,
421                                    bool writethrough, bool quiet,
422                                    bool force_share)
423 {
424     BlockBackend *blk;
425     Error *local_err = NULL;
426 
427     if (!options) {
428         options = qdict_new();
429     }
430     if (fmt) {
431         qdict_put_str(options, "driver", fmt);
432     }
433 
434     if (force_share) {
435         qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
436     }
437     blk = blk_new_open(filename, NULL, options, flags, &local_err);
438     if (!blk) {
439         error_reportf_err(local_err, "Could not open '%s': ", filename);
440         return NULL;
441     }
442     blk_set_enable_write_cache(blk, !writethrough);
443 
444     return blk;
445 }
446 
447 
448 static int img_add_key_secrets(void *opaque,
449                                const char *name, const char *value,
450                                Error **errp)
451 {
452     QDict *options = opaque;
453 
454     if (g_str_has_suffix(name, "key-secret")) {
455         qdict_put_str(options, name, value);
456     }
457 
458     return 0;
459 }
460 
461 
462 static BlockBackend *img_open(bool image_opts,
463                               const char *filename,
464                               const char *fmt, int flags, bool writethrough,
465                               bool quiet, bool force_share)
466 {
467     BlockBackend *blk;
468     if (image_opts) {
469         QemuOpts *opts;
470         if (fmt) {
471             error_report("--image-opts and --format are mutually exclusive");
472             return NULL;
473         }
474         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
475                                        filename, true);
476         if (!opts) {
477             return NULL;
478         }
479         blk = img_open_opts(filename, opts, flags, writethrough, quiet,
480                             force_share);
481     } else {
482         blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
483                             force_share);
484     }
485 
486     if (blk) {
487         blk_set_force_allow_inactivate(blk);
488     }
489 
490     return blk;
491 }
492 
493 
494 static int add_old_style_options(const char *fmt, QemuOpts *opts,
495                                  const char *base_filename,
496                                  const char *base_fmt)
497 {
498     if (base_filename) {
499         if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename,
500                           NULL)) {
501             error_report("Backing file not supported for file format '%s'",
502                          fmt);
503             return -1;
504         }
505     }
506     if (base_fmt) {
507         if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) {
508             error_report("Backing file format not supported for file "
509                          "format '%s'", fmt);
510             return -1;
511         }
512     }
513     return 0;
514 }
515 
516 static int64_t cvtnum_full(const char *name, const char *value, int64_t min,
517                            int64_t max)
518 {
519     int err;
520     uint64_t res;
521 
522     err = qemu_strtosz(value, NULL, &res);
523     if (err < 0 && err != -ERANGE) {
524         error_report("Invalid %s specified. You may use "
525                      "k, M, G, T, P or E suffixes for", name);
526         error_report("kilobytes, megabytes, gigabytes, terabytes, "
527                      "petabytes and exabytes.");
528         return err;
529     }
530     if (err == -ERANGE || res > max || res < min) {
531         error_report("Invalid %s specified. Must be between %" PRId64
532                      " and %" PRId64 ".", name, min, max);
533         return -ERANGE;
534     }
535     return res;
536 }
537 
538 static int64_t cvtnum(const char *name, const char *value)
539 {
540     return cvtnum_full(name, value, 0, INT64_MAX);
541 }
542 
543 static int img_create(const img_cmd_t *ccmd, int argc, char **argv)
544 {
545     int c;
546     int64_t img_size = -1;
547     const char *fmt = "raw";
548     const char *base_fmt = NULL;
549     const char *filename;
550     const char *base_filename = NULL;
551     char *options = NULL;
552     Error *local_err = NULL;
553     bool quiet = false;
554     int flags = 0;
555 
556     for(;;) {
557         static const struct option long_options[] = {
558             {"help", no_argument, 0, 'h'},
559             {"format", required_argument, 0, 'f'},
560             {"options", required_argument, 0, 'o'},
561             {"backing", required_argument, 0, 'b'},
562             {"backing-format", required_argument, 0, 'B'}, /* was -F in 10.0 */
563             {"backing-unsafe", no_argument, 0, 'u'},
564             {"quiet", no_argument, 0, 'q'},
565             {"object", required_argument, 0, OPTION_OBJECT},
566             {0, 0, 0, 0}
567         };
568         c = getopt_long(argc, argv, "hf:o:b:F:B:uq",
569                         long_options, NULL);
570         if (c == -1) {
571             break;
572         }
573         switch(c) {
574         case 'h':
575             cmd_help(ccmd, "[-f FMT] [-o FMT_OPTS]\n"
576 "        [-b BACKING_FILE [-B BACKING_FMT]] [-u]\n"
577 "        [-q] [--object OBJDEF] FILE [SIZE]\n"
578 ,
579 "  -f, --format FMT\n"
580 "     specifies the format of the new image (default: raw)\n"
581 "  -o, --options FMT_OPTS\n"
582 "     format-specific options (specify '-o help' for help)\n"
583 "  -b, --backing BACKING_FILE\n"
584 "     create target image to be a CoW on top of BACKING_FILE\n"
585 "  -B, --backing-format BACKING_FMT (was -F in <= 10.0)\n"
586 "     specifies the format of BACKING_FILE (default: probing is used)\n"
587 "  -u, --backing-unsafe\n"
588 "     do not fail if BACKING_FILE can not be read\n"
589 "  -q, --quiet\n"
590 "     quiet mode (produce only error messages if any)\n"
591 "  --object OBJDEF\n"
592 "     defines QEMU user-creatable object\n"
593 "  FILE\n"
594 "     name of the image file to create (will be overritten if already exists)\n"
595 "  SIZE[bKMGTPE]\n"
596 "     image size with optional multiplier suffix (powers of 1024)\n"
597 "     (required unless BACKING_FILE is specified)\n"
598 );
599             break;
600         case 'f':
601             fmt = optarg;
602             break;
603         case 'o':
604             if (accumulate_options(&options, optarg) < 0) {
605                 goto fail;
606             }
607             break;
608         case 'b':
609             base_filename = optarg;
610             break;
611         case 'F': /* <=10.0 */
612         case 'B':
613             base_fmt = optarg;
614             break;
615         case 'u':
616             flags |= BDRV_O_NO_BACKING;
617             break;
618         case 'q':
619             quiet = true;
620             break;
621         case OPTION_OBJECT:
622             user_creatable_process_cmdline(optarg);
623             break;
624         default:
625             tryhelp(argv[0]);
626         }
627     }
628 
629     /* Get the filename */
630     filename = (optind < argc) ? argv[optind] : NULL;
631     if (options && has_help_option(options)) {
632         g_free(options);
633         return print_block_option_help(filename, fmt);
634     }
635 
636     if (optind >= argc) {
637         error_exit(argv[0], "Expecting image file name");
638     }
639     optind++;
640 
641     /* Get image size, if specified */
642     if (optind < argc) {
643         img_size = cvtnum("image size", argv[optind++]);
644         if (img_size < 0) {
645             goto fail;
646         }
647     }
648     if (optind != argc) {
649         error_exit(argv[0], "Unexpected argument: %s", argv[optind]);
650     }
651 
652     bdrv_img_create(filename, fmt, base_filename, base_fmt,
653                     options, img_size, flags, quiet, &local_err);
654     if (local_err) {
655         error_reportf_err(local_err, "%s: ", filename);
656         goto fail;
657     }
658 
659     g_free(options);
660     return 0;
661 
662 fail:
663     g_free(options);
664     return 1;
665 }
666 
667 static void dump_json_image_check(ImageCheck *check, bool quiet)
668 {
669     GString *str;
670     QObject *obj;
671     Visitor *v = qobject_output_visitor_new(&obj);
672 
673     visit_type_ImageCheck(v, NULL, &check, &error_abort);
674     visit_complete(v, &obj);
675     str = qobject_to_json_pretty(obj, true);
676     assert(str != NULL);
677     qprintf(quiet, "%s\n", str->str);
678     qobject_unref(obj);
679     visit_free(v);
680     g_string_free(str, true);
681 }
682 
683 static void dump_human_image_check(ImageCheck *check, bool quiet)
684 {
685     if (!(check->corruptions || check->leaks || check->check_errors)) {
686         qprintf(quiet, "No errors were found on the image.\n");
687     } else {
688         if (check->corruptions) {
689             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
690                     "Data may be corrupted, or further writes to the image "
691                     "may corrupt it.\n",
692                     check->corruptions);
693         }
694 
695         if (check->leaks) {
696             qprintf(quiet,
697                     "\n%" PRId64 " leaked clusters were found on the image.\n"
698                     "This means waste of disk space, but no harm to data.\n",
699                     check->leaks);
700         }
701 
702         if (check->check_errors) {
703             qprintf(quiet,
704                     "\n%" PRId64
705                     " internal errors have occurred during the check.\n",
706                     check->check_errors);
707         }
708     }
709 
710     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
711         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
712                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
713                 check->allocated_clusters, check->total_clusters,
714                 check->allocated_clusters * 100.0 / check->total_clusters,
715                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
716                 check->compressed_clusters * 100.0 /
717                 check->allocated_clusters);
718     }
719 
720     if (check->image_end_offset) {
721         qprintf(quiet,
722                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
723     }
724 }
725 
726 static int collect_image_check(BlockDriverState *bs,
727                    ImageCheck *check,
728                    const char *filename,
729                    const char *fmt,
730                    int fix)
731 {
732     int ret;
733     BdrvCheckResult result;
734 
735     ret = bdrv_check(bs, &result, fix);
736     if (ret < 0) {
737         return ret;
738     }
739 
740     check->filename                 = g_strdup(filename);
741     check->format                   = g_strdup(bdrv_get_format_name(bs));
742     check->check_errors             = result.check_errors;
743     check->corruptions              = result.corruptions;
744     check->has_corruptions          = result.corruptions != 0;
745     check->leaks                    = result.leaks;
746     check->has_leaks                = result.leaks != 0;
747     check->corruptions_fixed        = result.corruptions_fixed;
748     check->has_corruptions_fixed    = result.corruptions_fixed != 0;
749     check->leaks_fixed              = result.leaks_fixed;
750     check->has_leaks_fixed          = result.leaks_fixed != 0;
751     check->image_end_offset         = result.image_end_offset;
752     check->has_image_end_offset     = result.image_end_offset != 0;
753     check->total_clusters           = result.bfi.total_clusters;
754     check->has_total_clusters       = result.bfi.total_clusters != 0;
755     check->allocated_clusters       = result.bfi.allocated_clusters;
756     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
757     check->fragmented_clusters      = result.bfi.fragmented_clusters;
758     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
759     check->compressed_clusters      = result.bfi.compressed_clusters;
760     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
761 
762     return 0;
763 }
764 
765 /*
766  * Checks an image for consistency. Exit codes:
767  *
768  *  0 - Check completed, image is good
769  *  1 - Check not completed because of internal errors
770  *  2 - Check completed, image is corrupted
771  *  3 - Check completed, image has leaked clusters, but is good otherwise
772  * 63 - Checks are not supported by the image format
773  */
774 static int img_check(const img_cmd_t *ccmd, int argc, char **argv)
775 {
776     int c, ret;
777     OutputFormat output_format = OFORMAT_HUMAN;
778     const char *filename, *fmt, *output, *cache;
779     BlockBackend *blk;
780     BlockDriverState *bs;
781     int fix = 0;
782     int flags = BDRV_O_CHECK;
783     bool writethrough;
784     ImageCheck *check;
785     bool quiet = false;
786     bool image_opts = false;
787     bool force_share = false;
788 
789     fmt = NULL;
790     output = NULL;
791     cache = BDRV_DEFAULT_CACHE;
792 
793     for(;;) {
794         int option_index = 0;
795         static const struct option long_options[] = {
796             {"help", no_argument, 0, 'h'},
797             {"format", required_argument, 0, 'f'},
798             {"repair", required_argument, 0, 'r'},
799             {"output", required_argument, 0, OPTION_OUTPUT},
800             {"object", required_argument, 0, OPTION_OBJECT},
801             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
802             {"force-share", no_argument, 0, 'U'},
803             {0, 0, 0, 0}
804         };
805         c = getopt_long(argc, argv, ":hf:r:T:qU",
806                         long_options, &option_index);
807         if (c == -1) {
808             break;
809         }
810         switch(c) {
811         case ':':
812             missing_argument(argv[optind - 1]);
813             break;
814         case '?':
815             unrecognized_option(argv[optind - 1]);
816             break;
817         case 'h':
818             help();
819             break;
820         case 'f':
821             fmt = optarg;
822             break;
823         case 'r':
824             flags |= BDRV_O_RDWR;
825 
826             if (!strcmp(optarg, "leaks")) {
827                 fix = BDRV_FIX_LEAKS;
828             } else if (!strcmp(optarg, "all")) {
829                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
830             } else {
831                 error_exit(argv[0], "Unknown option value for -r "
832                            "(expecting 'leaks' or 'all'): %s", optarg);
833             }
834             break;
835         case OPTION_OUTPUT:
836             output = optarg;
837             break;
838         case 'T':
839             cache = optarg;
840             break;
841         case 'q':
842             quiet = true;
843             break;
844         case 'U':
845             force_share = true;
846             break;
847         case OPTION_OBJECT:
848             user_creatable_process_cmdline(optarg);
849             break;
850         case OPTION_IMAGE_OPTS:
851             image_opts = true;
852             break;
853         }
854     }
855     if (optind != argc - 1) {
856         error_exit(argv[0], "Expecting one image file name");
857     }
858     filename = argv[optind++];
859 
860     if (output && !strcmp(output, "json")) {
861         output_format = OFORMAT_JSON;
862     } else if (output && !strcmp(output, "human")) {
863         output_format = OFORMAT_HUMAN;
864     } else if (output) {
865         error_report("--output must be used with human or json as argument.");
866         return 1;
867     }
868 
869     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
870     if (ret < 0) {
871         error_report("Invalid source cache option: %s", cache);
872         return 1;
873     }
874 
875     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
876                    force_share);
877     if (!blk) {
878         return 1;
879     }
880     bs = blk_bs(blk);
881 
882     check = g_new0(ImageCheck, 1);
883     ret = collect_image_check(bs, check, filename, fmt, fix);
884 
885     if (ret == -ENOTSUP) {
886         error_report("This image format does not support checks");
887         ret = 63;
888         goto fail;
889     }
890 
891     if (check->corruptions_fixed || check->leaks_fixed) {
892         int corruptions_fixed, leaks_fixed;
893         bool has_leaks_fixed, has_corruptions_fixed;
894 
895         leaks_fixed         = check->leaks_fixed;
896         has_leaks_fixed     = check->has_leaks_fixed;
897         corruptions_fixed   = check->corruptions_fixed;
898         has_corruptions_fixed = check->has_corruptions_fixed;
899 
900         if (output_format == OFORMAT_HUMAN) {
901             qprintf(quiet,
902                     "The following inconsistencies were found and repaired:\n\n"
903                     "    %" PRId64 " leaked clusters\n"
904                     "    %" PRId64 " corruptions\n\n"
905                     "Double checking the fixed image now...\n",
906                     check->leaks_fixed,
907                     check->corruptions_fixed);
908         }
909 
910         qapi_free_ImageCheck(check);
911         check = g_new0(ImageCheck, 1);
912         ret = collect_image_check(bs, check, filename, fmt, 0);
913 
914         check->leaks_fixed          = leaks_fixed;
915         check->has_leaks_fixed      = has_leaks_fixed;
916         check->corruptions_fixed    = corruptions_fixed;
917         check->has_corruptions_fixed = has_corruptions_fixed;
918     }
919 
920     if (!ret) {
921         switch (output_format) {
922         case OFORMAT_HUMAN:
923             dump_human_image_check(check, quiet);
924             break;
925         case OFORMAT_JSON:
926             dump_json_image_check(check, quiet);
927             break;
928         }
929     }
930 
931     if (ret || check->check_errors) {
932         if (ret) {
933             error_report("Check failed: %s", strerror(-ret));
934         } else {
935             error_report("Check failed");
936         }
937         ret = 1;
938         goto fail;
939     }
940 
941     if (check->corruptions) {
942         ret = 2;
943     } else if (check->leaks) {
944         ret = 3;
945     } else {
946         ret = 0;
947     }
948 
949 fail:
950     qapi_free_ImageCheck(check);
951     blk_unref(blk);
952     return ret;
953 }
954 
955 typedef struct CommonBlockJobCBInfo {
956     BlockDriverState *bs;
957     Error **errp;
958 } CommonBlockJobCBInfo;
959 
960 static void common_block_job_cb(void *opaque, int ret)
961 {
962     CommonBlockJobCBInfo *cbi = opaque;
963 
964     if (ret < 0) {
965         error_setg_errno(cbi->errp, -ret, "Block job failed");
966     }
967 }
968 
969 static void run_block_job(BlockJob *job, Error **errp)
970 {
971     uint64_t progress_current, progress_total;
972     AioContext *aio_context = block_job_get_aio_context(job);
973     int ret = 0;
974 
975     job_lock();
976     job_ref_locked(&job->job);
977     do {
978         float progress = 0.0f;
979         job_unlock();
980         aio_poll(aio_context, true);
981 
982         progress_get_snapshot(&job->job.progress, &progress_current,
983                               &progress_total);
984         if (progress_total) {
985             progress = (float)progress_current / progress_total * 100.f;
986         }
987         qemu_progress_print(progress, 0);
988         job_lock();
989     } while (!job_is_ready_locked(&job->job) &&
990              !job_is_completed_locked(&job->job));
991 
992     if (!job_is_completed_locked(&job->job)) {
993         ret = job_complete_sync_locked(&job->job, errp);
994     } else {
995         ret = job->job.ret;
996     }
997     job_unref_locked(&job->job);
998     job_unlock();
999 
1000     /* publish completion progress only when success */
1001     if (!ret) {
1002         qemu_progress_print(100.f, 0);
1003     }
1004 }
1005 
1006 static int img_commit(const img_cmd_t *ccmd, int argc, char **argv)
1007 {
1008     int c, ret, flags;
1009     const char *filename, *fmt, *cache, *base;
1010     BlockBackend *blk;
1011     BlockDriverState *bs, *base_bs;
1012     BlockJob *job;
1013     bool progress = false, quiet = false, drop = false;
1014     bool writethrough;
1015     Error *local_err = NULL;
1016     CommonBlockJobCBInfo cbi;
1017     bool image_opts = false;
1018     int64_t rate_limit = 0;
1019 
1020     fmt = NULL;
1021     cache = BDRV_DEFAULT_CACHE;
1022     base = NULL;
1023     for(;;) {
1024         static const struct option long_options[] = {
1025             {"help", no_argument, 0, 'h'},
1026             {"object", required_argument, 0, OPTION_OBJECT},
1027             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1028             {0, 0, 0, 0}
1029         };
1030         c = getopt_long(argc, argv, ":f:ht:b:dpqr:",
1031                         long_options, NULL);
1032         if (c == -1) {
1033             break;
1034         }
1035         switch(c) {
1036         case ':':
1037             missing_argument(argv[optind - 1]);
1038             break;
1039         case '?':
1040             unrecognized_option(argv[optind - 1]);
1041             break;
1042         case 'h':
1043             help();
1044             break;
1045         case 'f':
1046             fmt = optarg;
1047             break;
1048         case 't':
1049             cache = optarg;
1050             break;
1051         case 'b':
1052             base = optarg;
1053             /* -b implies -d */
1054             drop = true;
1055             break;
1056         case 'd':
1057             drop = true;
1058             break;
1059         case 'p':
1060             progress = true;
1061             break;
1062         case 'q':
1063             quiet = true;
1064             break;
1065         case 'r':
1066             rate_limit = cvtnum("rate limit", optarg);
1067             if (rate_limit < 0) {
1068                 return 1;
1069             }
1070             break;
1071         case OPTION_OBJECT:
1072             user_creatable_process_cmdline(optarg);
1073             break;
1074         case OPTION_IMAGE_OPTS:
1075             image_opts = true;
1076             break;
1077         }
1078     }
1079 
1080     /* Progress is not shown in Quiet mode */
1081     if (quiet) {
1082         progress = false;
1083     }
1084 
1085     if (optind != argc - 1) {
1086         error_exit(argv[0], "Expecting one image file name");
1087     }
1088     filename = argv[optind++];
1089 
1090     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
1091     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1092     if (ret < 0) {
1093         error_report("Invalid cache option: %s", cache);
1094         return 1;
1095     }
1096 
1097     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
1098                    false);
1099     if (!blk) {
1100         return 1;
1101     }
1102     bs = blk_bs(blk);
1103 
1104     qemu_progress_init(progress, 1.f);
1105     qemu_progress_print(0.f, 100);
1106 
1107     bdrv_graph_rdlock_main_loop();
1108     if (base) {
1109         base_bs = bdrv_find_backing_image(bs, base);
1110         if (!base_bs) {
1111             error_setg(&local_err,
1112                        "Did not find '%s' in the backing chain of '%s'",
1113                        base, filename);
1114             bdrv_graph_rdunlock_main_loop();
1115             goto done;
1116         }
1117     } else {
1118         /* This is different from QMP, which by default uses the deepest file in
1119          * the backing chain (i.e., the very base); however, the traditional
1120          * behavior of qemu-img commit is using the immediate backing file. */
1121         base_bs = bdrv_backing_chain_next(bs);
1122         if (!base_bs) {
1123             error_setg(&local_err, "Image does not have a backing file");
1124             bdrv_graph_rdunlock_main_loop();
1125             goto done;
1126         }
1127     }
1128     bdrv_graph_rdunlock_main_loop();
1129 
1130     cbi = (CommonBlockJobCBInfo){
1131         .errp = &local_err,
1132         .bs   = bs,
1133     };
1134 
1135     commit_active_start("commit", bs, base_bs, JOB_DEFAULT, rate_limit,
1136                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1137                         &cbi, false, &local_err);
1138     if (local_err) {
1139         goto done;
1140     }
1141 
1142     /* When the block job completes, the BlockBackend reference will point to
1143      * the old backing file. In order to avoid that the top image is already
1144      * deleted, so we can still empty it afterwards, increment the reference
1145      * counter here preemptively. */
1146     if (!drop) {
1147         bdrv_ref(bs);
1148     }
1149 
1150     job = block_job_get("commit");
1151     assert(job);
1152     run_block_job(job, &local_err);
1153     if (local_err) {
1154         goto unref_backing;
1155     }
1156 
1157     if (!drop) {
1158         BlockBackend *old_backing_blk;
1159 
1160         old_backing_blk = blk_new_with_bs(bs, BLK_PERM_WRITE, BLK_PERM_ALL,
1161                                           &local_err);
1162         if (!old_backing_blk) {
1163             goto unref_backing;
1164         }
1165         ret = blk_make_empty(old_backing_blk, &local_err);
1166         blk_unref(old_backing_blk);
1167         if (ret == -ENOTSUP) {
1168             error_free(local_err);
1169             local_err = NULL;
1170         } else if (ret < 0) {
1171             goto unref_backing;
1172         }
1173     }
1174 
1175 unref_backing:
1176     if (!drop) {
1177         bdrv_unref(bs);
1178     }
1179 
1180 done:
1181     qemu_progress_end();
1182 
1183     /*
1184      * Manually inactivate the image first because this way we can know whether
1185      * an error occurred. blk_unref() doesn't tell us about failures.
1186      */
1187     ret = bdrv_inactivate_all();
1188     if (ret < 0 && !local_err) {
1189         error_setg_errno(&local_err, -ret, "Error while closing the image");
1190     }
1191     blk_unref(blk);
1192 
1193     if (local_err) {
1194         error_report_err(local_err);
1195         return 1;
1196     }
1197 
1198     qprintf(quiet, "Image committed.\n");
1199     return 0;
1200 }
1201 
1202 /*
1203  * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1204  * of the first sector boundary within buf where the sector contains a
1205  * non-zero byte.  This function is robust to a buffer that is not
1206  * sector-aligned.
1207  */
1208 static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1209 {
1210     int64_t i;
1211     int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1212 
1213     for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1214         if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1215             return i;
1216         }
1217     }
1218     if (i < n && !buffer_is_zero(buf + i, n - end)) {
1219         return i;
1220     }
1221     return -1;
1222 }
1223 
1224 /*
1225  * Returns true iff the first sector pointed to by 'buf' contains at least
1226  * a non-NUL byte.
1227  *
1228  * 'pnum' is set to the number of sectors (including and immediately following
1229  * the first one) that are known to be in the same allocated/unallocated state.
1230  * The function will try to align the end offset to alignment boundaries so
1231  * that the request will at least end aligned and consecutive requests will
1232  * also start at an aligned offset.
1233  */
1234 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1235                                 int64_t sector_num, int alignment)
1236 {
1237     bool is_zero;
1238     int i, tail;
1239 
1240     if (n <= 0) {
1241         *pnum = 0;
1242         return 0;
1243     }
1244     is_zero = buffer_is_zero(buf, BDRV_SECTOR_SIZE);
1245     for(i = 1; i < n; i++) {
1246         buf += BDRV_SECTOR_SIZE;
1247         if (is_zero != buffer_is_zero(buf, BDRV_SECTOR_SIZE)) {
1248             break;
1249         }
1250     }
1251 
1252     if (i == n) {
1253         /*
1254          * The whole buf is the same.
1255          * No reason to split it into chunks, so return now.
1256          */
1257         *pnum = i;
1258         return !is_zero;
1259     }
1260 
1261     tail = (sector_num + i) & (alignment - 1);
1262     if (tail) {
1263         if (is_zero && i <= tail) {
1264             /*
1265              * For sure next sector after i is data, and it will rewrite this
1266              * tail anyway due to RMW. So, let's just write data now.
1267              */
1268             is_zero = false;
1269         }
1270         if (!is_zero) {
1271             /* If possible, align up end offset of allocated areas. */
1272             i += alignment - tail;
1273             i = MIN(i, n);
1274         } else {
1275             /*
1276              * For sure next sector after i is data, and it will rewrite this
1277              * tail anyway due to RMW. Better is avoid RMW and write zeroes up
1278              * to aligned bound.
1279              */
1280             i -= tail;
1281         }
1282     }
1283     *pnum = i;
1284     return !is_zero;
1285 }
1286 
1287 /*
1288  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1289  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1290  * breaking up write requests for only small sparse areas.
1291  */
1292 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1293     int min, int64_t sector_num, int alignment)
1294 {
1295     int ret;
1296     int num_checked, num_used;
1297 
1298     if (n < min) {
1299         min = n;
1300     }
1301 
1302     ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1303     if (!ret) {
1304         return ret;
1305     }
1306 
1307     num_used = *pnum;
1308     buf += BDRV_SECTOR_SIZE * *pnum;
1309     n -= *pnum;
1310     sector_num += *pnum;
1311     num_checked = num_used;
1312 
1313     while (n > 0) {
1314         ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1315 
1316         buf += BDRV_SECTOR_SIZE * *pnum;
1317         n -= *pnum;
1318         sector_num += *pnum;
1319         num_checked += *pnum;
1320         if (ret) {
1321             num_used = num_checked;
1322         } else if (*pnum >= min) {
1323             break;
1324         }
1325     }
1326 
1327     *pnum = num_used;
1328     return 1;
1329 }
1330 
1331 /*
1332  * Compares two buffers chunk by chunk, where @chsize is the chunk size.
1333  * If @chsize is 0, default chunk size of BDRV_SECTOR_SIZE is used.
1334  * Returns 0 if the first chunk of each buffer matches, non-zero otherwise.
1335  *
1336  * @pnum is set to the size of the buffer prefix aligned to @chsize that
1337  * has the same matching status as the first chunk.
1338  */
1339 static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1340                            int64_t bytes, uint64_t chsize, int64_t *pnum)
1341 {
1342     bool res;
1343     int64_t i;
1344 
1345     assert(bytes > 0);
1346 
1347     if (!chsize) {
1348         chsize = BDRV_SECTOR_SIZE;
1349     }
1350     i = MIN(bytes, chsize);
1351 
1352     res = !!memcmp(buf1, buf2, i);
1353     while (i < bytes) {
1354         int64_t len = MIN(bytes - i, chsize);
1355 
1356         if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1357             break;
1358         }
1359         i += len;
1360     }
1361 
1362     *pnum = i;
1363     return res;
1364 }
1365 
1366 #define IO_BUF_SIZE (2 * MiB)
1367 
1368 /*
1369  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1370  *
1371  * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1372  * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1373  * failure), and 4 on error (the exit status for read errors), after emitting
1374  * an error message.
1375  *
1376  * @param blk:  BlockBackend for the image
1377  * @param offset: Starting offset to check
1378  * @param bytes: Number of bytes to check
1379  * @param filename: Name of disk file we are checking (logging purpose)
1380  * @param buffer: Allocated buffer for storing read data
1381  * @param quiet: Flag for quiet mode
1382  */
1383 static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1384                                int64_t bytes, const char *filename,
1385                                uint8_t *buffer, bool quiet)
1386 {
1387     int ret = 0;
1388     int64_t idx;
1389 
1390     ret = blk_pread(blk, offset, bytes, buffer, 0);
1391     if (ret < 0) {
1392         error_report("Error while reading offset %" PRId64 " of %s: %s",
1393                      offset, filename, strerror(-ret));
1394         return 4;
1395     }
1396     idx = find_nonzero(buffer, bytes);
1397     if (idx >= 0) {
1398         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1399                 offset + idx);
1400         return 1;
1401     }
1402 
1403     return 0;
1404 }
1405 
1406 /*
1407  * Compares two images. Exit codes:
1408  *
1409  * 0 - Images are identical or the requested help was printed
1410  * 1 - Images differ
1411  * >1 - Error occurred
1412  */
1413 static int img_compare(const img_cmd_t *ccmd, int argc, char **argv)
1414 {
1415     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1416     BlockBackend *blk1, *blk2;
1417     BlockDriverState *bs1, *bs2;
1418     int64_t total_size1, total_size2;
1419     uint8_t *buf1 = NULL, *buf2 = NULL;
1420     int64_t pnum1, pnum2;
1421     int allocated1, allocated2;
1422     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1423     bool progress = false, quiet = false, strict = false;
1424     int flags;
1425     bool writethrough;
1426     int64_t total_size;
1427     int64_t offset = 0;
1428     int64_t chunk;
1429     int c;
1430     uint64_t progress_base;
1431     bool image_opts = false;
1432     bool force_share = false;
1433 
1434     cache = BDRV_DEFAULT_CACHE;
1435     for (;;) {
1436         static const struct option long_options[] = {
1437             {"help", no_argument, 0, 'h'},
1438             {"object", required_argument, 0, OPTION_OBJECT},
1439             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1440             {"force-share", no_argument, 0, 'U'},
1441             {0, 0, 0, 0}
1442         };
1443         c = getopt_long(argc, argv, ":hf:F:T:pqsU",
1444                         long_options, NULL);
1445         if (c == -1) {
1446             break;
1447         }
1448         switch (c) {
1449         case ':':
1450             missing_argument(argv[optind - 1]);
1451             break;
1452         case '?':
1453             unrecognized_option(argv[optind - 1]);
1454             break;
1455         case 'h':
1456             help();
1457             break;
1458         case 'f':
1459             fmt1 = optarg;
1460             break;
1461         case 'F':
1462             fmt2 = optarg;
1463             break;
1464         case 'T':
1465             cache = optarg;
1466             break;
1467         case 'p':
1468             progress = true;
1469             break;
1470         case 'q':
1471             quiet = true;
1472             break;
1473         case 's':
1474             strict = true;
1475             break;
1476         case 'U':
1477             force_share = true;
1478             break;
1479         case OPTION_OBJECT:
1480             {
1481                 Error *local_err = NULL;
1482 
1483                 if (!user_creatable_add_from_str(optarg, &local_err)) {
1484                     if (local_err) {
1485                         error_report_err(local_err);
1486                         exit(2);
1487                     } else {
1488                         /* Help was printed */
1489                         exit(EXIT_SUCCESS);
1490                     }
1491                 }
1492                 break;
1493             }
1494         case OPTION_IMAGE_OPTS:
1495             image_opts = true;
1496             break;
1497         }
1498     }
1499 
1500     /* Progress is not shown in Quiet mode */
1501     if (quiet) {
1502         progress = false;
1503     }
1504 
1505 
1506     if (optind != argc - 2) {
1507         error_exit(argv[0], "Expecting two image file names");
1508     }
1509     filename1 = argv[optind++];
1510     filename2 = argv[optind++];
1511 
1512     /* Initialize before goto out */
1513     qemu_progress_init(progress, 2.0);
1514 
1515     flags = 0;
1516     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1517     if (ret < 0) {
1518         error_report("Invalid source cache option: %s", cache);
1519         ret = 2;
1520         goto out3;
1521     }
1522 
1523     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1524                     force_share);
1525     if (!blk1) {
1526         ret = 2;
1527         goto out3;
1528     }
1529 
1530     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1531                     force_share);
1532     if (!blk2) {
1533         ret = 2;
1534         goto out2;
1535     }
1536     bs1 = blk_bs(blk1);
1537     bs2 = blk_bs(blk2);
1538 
1539     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1540     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1541     total_size1 = blk_getlength(blk1);
1542     if (total_size1 < 0) {
1543         error_report("Can't get size of %s: %s",
1544                      filename1, strerror(-total_size1));
1545         ret = 4;
1546         goto out;
1547     }
1548     total_size2 = blk_getlength(blk2);
1549     if (total_size2 < 0) {
1550         error_report("Can't get size of %s: %s",
1551                      filename2, strerror(-total_size2));
1552         ret = 4;
1553         goto out;
1554     }
1555     total_size = MIN(total_size1, total_size2);
1556     progress_base = MAX(total_size1, total_size2);
1557 
1558     qemu_progress_print(0, 100);
1559 
1560     if (strict && total_size1 != total_size2) {
1561         ret = 1;
1562         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1563         goto out;
1564     }
1565 
1566     while (offset < total_size) {
1567         int status1, status2;
1568 
1569         status1 = bdrv_block_status_above(bs1, NULL, offset,
1570                                           total_size1 - offset, &pnum1, NULL,
1571                                           NULL);
1572         if (status1 < 0) {
1573             ret = 3;
1574             error_report("Sector allocation test failed for %s", filename1);
1575             goto out;
1576         }
1577         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1578 
1579         status2 = bdrv_block_status_above(bs2, NULL, offset,
1580                                           total_size2 - offset, &pnum2, NULL,
1581                                           NULL);
1582         if (status2 < 0) {
1583             ret = 3;
1584             error_report("Sector allocation test failed for %s", filename2);
1585             goto out;
1586         }
1587         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1588 
1589         assert(pnum1 && pnum2);
1590         chunk = MIN(pnum1, pnum2);
1591 
1592         if (strict) {
1593             if (status1 != status2) {
1594                 ret = 1;
1595                 qprintf(quiet, "Strict mode: Offset %" PRId64
1596                         " block status mismatch!\n", offset);
1597                 goto out;
1598             }
1599         }
1600         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1601             /* nothing to do */
1602         } else if (allocated1 == allocated2) {
1603             if (allocated1) {
1604                 int64_t pnum;
1605 
1606                 chunk = MIN(chunk, IO_BUF_SIZE);
1607                 ret = blk_pread(blk1, offset, chunk, buf1, 0);
1608                 if (ret < 0) {
1609                     error_report("Error while reading offset %" PRId64
1610                                  " of %s: %s",
1611                                  offset, filename1, strerror(-ret));
1612                     ret = 4;
1613                     goto out;
1614                 }
1615                 ret = blk_pread(blk2, offset, chunk, buf2, 0);
1616                 if (ret < 0) {
1617                     error_report("Error while reading offset %" PRId64
1618                                  " of %s: %s",
1619                                  offset, filename2, strerror(-ret));
1620                     ret = 4;
1621                     goto out;
1622                 }
1623                 ret = compare_buffers(buf1, buf2, chunk, 0, &pnum);
1624                 if (ret || pnum != chunk) {
1625                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1626                             offset + (ret ? 0 : pnum));
1627                     ret = 1;
1628                     goto out;
1629                 }
1630             }
1631         } else {
1632             chunk = MIN(chunk, IO_BUF_SIZE);
1633             if (allocated1) {
1634                 ret = check_empty_sectors(blk1, offset, chunk,
1635                                           filename1, buf1, quiet);
1636             } else {
1637                 ret = check_empty_sectors(blk2, offset, chunk,
1638                                           filename2, buf1, quiet);
1639             }
1640             if (ret) {
1641                 goto out;
1642             }
1643         }
1644         offset += chunk;
1645         qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1646     }
1647 
1648     if (total_size1 != total_size2) {
1649         BlockBackend *blk_over;
1650         const char *filename_over;
1651 
1652         qprintf(quiet, "Warning: Image size mismatch!\n");
1653         if (total_size1 > total_size2) {
1654             blk_over = blk1;
1655             filename_over = filename1;
1656         } else {
1657             blk_over = blk2;
1658             filename_over = filename2;
1659         }
1660 
1661         while (offset < progress_base) {
1662             ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1663                                           progress_base - offset, &chunk,
1664                                           NULL, NULL);
1665             if (ret < 0) {
1666                 ret = 3;
1667                 error_report("Sector allocation test failed for %s",
1668                              filename_over);
1669                 goto out;
1670 
1671             }
1672             if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1673                 chunk = MIN(chunk, IO_BUF_SIZE);
1674                 ret = check_empty_sectors(blk_over, offset, chunk,
1675                                           filename_over, buf1, quiet);
1676                 if (ret) {
1677                     goto out;
1678                 }
1679             }
1680             offset += chunk;
1681             qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1682         }
1683     }
1684 
1685     qprintf(quiet, "Images are identical.\n");
1686     ret = 0;
1687 
1688 out:
1689     qemu_vfree(buf1);
1690     qemu_vfree(buf2);
1691     blk_unref(blk2);
1692 out2:
1693     blk_unref(blk1);
1694 out3:
1695     qemu_progress_end();
1696     return ret;
1697 }
1698 
1699 /* Convenience wrapper around qmp_block_dirty_bitmap_merge */
1700 static void do_dirty_bitmap_merge(const char *dst_node, const char *dst_name,
1701                                   const char *src_node, const char *src_name,
1702                                   Error **errp)
1703 {
1704     BlockDirtyBitmapOrStr *merge_src;
1705     BlockDirtyBitmapOrStrList *list = NULL;
1706 
1707     merge_src = g_new0(BlockDirtyBitmapOrStr, 1);
1708     merge_src->type = QTYPE_QDICT;
1709     merge_src->u.external.node = g_strdup(src_node);
1710     merge_src->u.external.name = g_strdup(src_name);
1711     QAPI_LIST_PREPEND(list, merge_src);
1712     qmp_block_dirty_bitmap_merge(dst_node, dst_name, list, errp);
1713     qapi_free_BlockDirtyBitmapOrStrList(list);
1714 }
1715 
1716 enum ImgConvertBlockStatus {
1717     BLK_DATA,
1718     BLK_ZERO,
1719     BLK_BACKING_FILE,
1720 };
1721 
1722 #define MAX_COROUTINES 16
1723 #define CONVERT_THROTTLE_GROUP "img_convert"
1724 
1725 typedef struct ImgConvertState {
1726     BlockBackend **src;
1727     int64_t *src_sectors;
1728     int *src_alignment;
1729     int src_num;
1730     int64_t total_sectors;
1731     int64_t allocated_sectors;
1732     int64_t allocated_done;
1733     int64_t sector_num;
1734     int64_t wr_offs;
1735     enum ImgConvertBlockStatus status;
1736     int64_t sector_next_status;
1737     BlockBackend *target;
1738     bool has_zero_init;
1739     bool compressed;
1740     bool target_is_new;
1741     bool target_has_backing;
1742     int64_t target_backing_sectors; /* negative if unknown */
1743     bool wr_in_order;
1744     bool copy_range;
1745     bool salvage;
1746     bool quiet;
1747     int min_sparse;
1748     int alignment;
1749     size_t cluster_sectors;
1750     size_t buf_sectors;
1751     long num_coroutines;
1752     int running_coroutines;
1753     Coroutine *co[MAX_COROUTINES];
1754     int64_t wait_sector_num[MAX_COROUTINES];
1755     CoMutex lock;
1756     int ret;
1757 } ImgConvertState;
1758 
1759 static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1760                                 int *src_cur, int64_t *src_cur_offset)
1761 {
1762     *src_cur = 0;
1763     *src_cur_offset = 0;
1764     while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1765         *src_cur_offset += s->src_sectors[*src_cur];
1766         (*src_cur)++;
1767         assert(*src_cur < s->src_num);
1768     }
1769 }
1770 
1771 static int coroutine_mixed_fn GRAPH_RDLOCK
1772 convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1773 {
1774     int64_t src_cur_offset;
1775     int ret, n, src_cur;
1776     bool post_backing_zero = false;
1777 
1778     convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1779 
1780     assert(s->total_sectors > sector_num);
1781     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1782 
1783     if (s->target_backing_sectors >= 0) {
1784         if (sector_num >= s->target_backing_sectors) {
1785             post_backing_zero = true;
1786         } else if (sector_num + n > s->target_backing_sectors) {
1787             /* Split requests around target_backing_sectors (because
1788              * starting from there, zeros are handled differently) */
1789             n = s->target_backing_sectors - sector_num;
1790         }
1791     }
1792 
1793     if (s->sector_next_status <= sector_num) {
1794         uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1795         int64_t count;
1796         int tail;
1797         BlockDriverState *src_bs = blk_bs(s->src[src_cur]);
1798         BlockDriverState *base;
1799 
1800         if (s->target_has_backing) {
1801             base = bdrv_cow_bs(bdrv_skip_filters(src_bs));
1802         } else {
1803             base = NULL;
1804         }
1805 
1806         do {
1807             count = n * BDRV_SECTOR_SIZE;
1808 
1809             ret = bdrv_block_status_above(src_bs, base, offset, count, &count,
1810                                           NULL, NULL);
1811 
1812             if (ret < 0) {
1813                 if (s->salvage) {
1814                     if (n == 1) {
1815                         if (!s->quiet) {
1816                             warn_report("error while reading block status at "
1817                                         "offset %" PRIu64 ": %s", offset,
1818                                         strerror(-ret));
1819                         }
1820                         /* Just try to read the data, then */
1821                         ret = BDRV_BLOCK_DATA;
1822                         count = BDRV_SECTOR_SIZE;
1823                     } else {
1824                         /* Retry on a shorter range */
1825                         n = DIV_ROUND_UP(n, 4);
1826                     }
1827                 } else {
1828                     error_report("error while reading block status at offset "
1829                                  "%" PRIu64 ": %s", offset, strerror(-ret));
1830                     return ret;
1831                 }
1832             }
1833         } while (ret < 0);
1834 
1835         n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1836 
1837         /*
1838          * Avoid that s->sector_next_status becomes unaligned to the source
1839          * request alignment and/or cluster size to avoid unnecessary read
1840          * cycles.
1841          */
1842         tail = (sector_num - src_cur_offset + n) % s->src_alignment[src_cur];
1843         if (n > tail) {
1844             n -= tail;
1845         }
1846 
1847         if (ret & BDRV_BLOCK_ZERO) {
1848             s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1849         } else if (ret & BDRV_BLOCK_DATA) {
1850             s->status = BLK_DATA;
1851         } else {
1852             s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1853         }
1854 
1855         s->sector_next_status = sector_num + n;
1856     }
1857 
1858     n = MIN(n, s->sector_next_status - sector_num);
1859     if (s->status == BLK_DATA) {
1860         n = MIN(n, s->buf_sectors);
1861     }
1862 
1863     /* We need to write complete clusters for compressed images, so if an
1864      * unallocated area is shorter than that, we must consider the whole
1865      * cluster allocated. */
1866     if (s->compressed) {
1867         if (n < s->cluster_sectors) {
1868             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1869             s->status = BLK_DATA;
1870         } else {
1871             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1872         }
1873     }
1874 
1875     return n;
1876 }
1877 
1878 static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1879                                         int nb_sectors, uint8_t *buf)
1880 {
1881     uint64_t single_read_until = 0;
1882     int n, ret;
1883 
1884     assert(nb_sectors <= s->buf_sectors);
1885     while (nb_sectors > 0) {
1886         BlockBackend *blk;
1887         int src_cur;
1888         int64_t bs_sectors, src_cur_offset;
1889         uint64_t offset;
1890 
1891         /* In the case of compression with multiple source files, we can get a
1892          * nb_sectors that spreads into the next part. So we must be able to
1893          * read across multiple BDSes for one convert_read() call. */
1894         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1895         blk = s->src[src_cur];
1896         bs_sectors = s->src_sectors[src_cur];
1897 
1898         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1899 
1900         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1901         if (single_read_until > offset) {
1902             n = 1;
1903         }
1904 
1905         ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1906         if (ret < 0) {
1907             if (s->salvage) {
1908                 if (n > 1) {
1909                     single_read_until = offset + (n << BDRV_SECTOR_BITS);
1910                     continue;
1911                 } else {
1912                     if (!s->quiet) {
1913                         warn_report("error while reading offset %" PRIu64
1914                                     ": %s", offset, strerror(-ret));
1915                     }
1916                     memset(buf, 0, BDRV_SECTOR_SIZE);
1917                 }
1918             } else {
1919                 return ret;
1920             }
1921         }
1922 
1923         sector_num += n;
1924         nb_sectors -= n;
1925         buf += n * BDRV_SECTOR_SIZE;
1926     }
1927 
1928     return 0;
1929 }
1930 
1931 
1932 static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1933                                          int nb_sectors, uint8_t *buf,
1934                                          enum ImgConvertBlockStatus status)
1935 {
1936     int ret;
1937 
1938     while (nb_sectors > 0) {
1939         int n = nb_sectors;
1940         BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1941 
1942         switch (status) {
1943         case BLK_BACKING_FILE:
1944             /* If we have a backing file, leave clusters unallocated that are
1945              * unallocated in the source image, so that the backing file is
1946              * visible at the respective offset. */
1947             assert(s->target_has_backing);
1948             break;
1949 
1950         case BLK_DATA:
1951             /* If we're told to keep the target fully allocated (-S 0) or there
1952              * is real non-zero data, we must write it. Otherwise we can treat
1953              * it as zero sectors.
1954              * Compressed clusters need to be written as a whole, so in that
1955              * case we can only save the write if the buffer is completely
1956              * zeroed. */
1957             if (!s->min_sparse ||
1958                 (!s->compressed &&
1959                  is_allocated_sectors_min(buf, n, &n, s->min_sparse,
1960                                           sector_num, s->alignment)) ||
1961                 (s->compressed &&
1962                  !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1963             {
1964                 ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1965                                     n << BDRV_SECTOR_BITS, buf, flags);
1966                 if (ret < 0) {
1967                     return ret;
1968                 }
1969                 break;
1970             }
1971             /* fall-through */
1972 
1973         case BLK_ZERO:
1974             if (s->has_zero_init) {
1975                 assert(!s->target_has_backing);
1976                 break;
1977             }
1978             ret = blk_co_pwrite_zeroes(s->target,
1979                                        sector_num << BDRV_SECTOR_BITS,
1980                                        n << BDRV_SECTOR_BITS,
1981                                        BDRV_REQ_MAY_UNMAP);
1982             if (ret < 0) {
1983                 return ret;
1984             }
1985             break;
1986         }
1987 
1988         sector_num += n;
1989         nb_sectors -= n;
1990         buf += n * BDRV_SECTOR_SIZE;
1991     }
1992 
1993     return 0;
1994 }
1995 
1996 static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
1997                                               int nb_sectors)
1998 {
1999     int n, ret;
2000 
2001     while (nb_sectors > 0) {
2002         BlockBackend *blk;
2003         int src_cur;
2004         int64_t bs_sectors, src_cur_offset;
2005         int64_t offset;
2006 
2007         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
2008         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
2009         blk = s->src[src_cur];
2010         bs_sectors = s->src_sectors[src_cur];
2011 
2012         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
2013 
2014         ret = blk_co_copy_range(blk, offset, s->target,
2015                                 sector_num << BDRV_SECTOR_BITS,
2016                                 n << BDRV_SECTOR_BITS, 0, 0);
2017         if (ret < 0) {
2018             return ret;
2019         }
2020 
2021         sector_num += n;
2022         nb_sectors -= n;
2023     }
2024     return 0;
2025 }
2026 
2027 static void coroutine_fn convert_co_do_copy(void *opaque)
2028 {
2029     ImgConvertState *s = opaque;
2030     uint8_t *buf = NULL;
2031     int ret, i;
2032     int index = -1;
2033 
2034     for (i = 0; i < s->num_coroutines; i++) {
2035         if (s->co[i] == qemu_coroutine_self()) {
2036             index = i;
2037             break;
2038         }
2039     }
2040     assert(index >= 0);
2041 
2042     s->running_coroutines++;
2043     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
2044 
2045     while (1) {
2046         int n;
2047         int64_t sector_num;
2048         enum ImgConvertBlockStatus status;
2049         bool copy_range;
2050 
2051         qemu_co_mutex_lock(&s->lock);
2052         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
2053             qemu_co_mutex_unlock(&s->lock);
2054             break;
2055         }
2056         WITH_GRAPH_RDLOCK_GUARD() {
2057             n = convert_iteration_sectors(s, s->sector_num);
2058         }
2059         if (n < 0) {
2060             qemu_co_mutex_unlock(&s->lock);
2061             s->ret = n;
2062             break;
2063         }
2064         /* save current sector and allocation status to local variables */
2065         sector_num = s->sector_num;
2066         status = s->status;
2067         if (!s->min_sparse && s->status == BLK_ZERO) {
2068             n = MIN(n, s->buf_sectors);
2069         }
2070         /* increment global sector counter so that other coroutines can
2071          * already continue reading beyond this request */
2072         s->sector_num += n;
2073         qemu_co_mutex_unlock(&s->lock);
2074 
2075         if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
2076             s->allocated_done += n;
2077             qemu_progress_print(100.0 * s->allocated_done /
2078                                         s->allocated_sectors, 0);
2079         }
2080 
2081 retry:
2082         copy_range = s->copy_range && s->status == BLK_DATA;
2083         if (status == BLK_DATA && !copy_range) {
2084             ret = convert_co_read(s, sector_num, n, buf);
2085             if (ret < 0) {
2086                 error_report("error while reading at byte %lld: %s",
2087                              sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2088                 s->ret = ret;
2089             }
2090         } else if (!s->min_sparse && status == BLK_ZERO) {
2091             status = BLK_DATA;
2092             memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
2093         }
2094 
2095         if (s->wr_in_order) {
2096             /* keep writes in order */
2097             while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
2098                 s->wait_sector_num[index] = sector_num;
2099                 qemu_coroutine_yield();
2100             }
2101             s->wait_sector_num[index] = -1;
2102         }
2103 
2104         if (s->ret == -EINPROGRESS) {
2105             if (copy_range) {
2106                 WITH_GRAPH_RDLOCK_GUARD() {
2107                     ret = convert_co_copy_range(s, sector_num, n);
2108                 }
2109                 if (ret) {
2110                     s->copy_range = false;
2111                     goto retry;
2112                 }
2113             } else {
2114                 ret = convert_co_write(s, sector_num, n, buf, status);
2115             }
2116             if (ret < 0) {
2117                 error_report("error while writing at byte %lld: %s",
2118                              sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2119                 s->ret = ret;
2120             }
2121         }
2122 
2123         if (s->wr_in_order) {
2124             /* reenter the coroutine that might have waited
2125              * for this write to complete */
2126             s->wr_offs = sector_num + n;
2127             for (i = 0; i < s->num_coroutines; i++) {
2128                 if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
2129                     /*
2130                      * A -> B -> A cannot occur because A has
2131                      * s->wait_sector_num[i] == -1 during A -> B.  Therefore
2132                      * B will never enter A during this time window.
2133                      */
2134                     qemu_coroutine_enter(s->co[i]);
2135                     break;
2136                 }
2137             }
2138         }
2139     }
2140 
2141     qemu_vfree(buf);
2142     s->co[index] = NULL;
2143     s->running_coroutines--;
2144     if (!s->running_coroutines && s->ret == -EINPROGRESS) {
2145         /* the convert job finished successfully */
2146         s->ret = 0;
2147     }
2148 }
2149 
2150 static int convert_do_copy(ImgConvertState *s)
2151 {
2152     int ret, i, n;
2153     int64_t sector_num = 0;
2154 
2155     /* Check whether we have zero initialisation or can get it efficiently */
2156     if (!s->has_zero_init && s->target_is_new && s->min_sparse &&
2157         !s->target_has_backing) {
2158         bdrv_graph_rdlock_main_loop();
2159         s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
2160         bdrv_graph_rdunlock_main_loop();
2161     }
2162 
2163     /* Allocate buffer for copied data. For compressed images, only one cluster
2164      * can be copied at a time. */
2165     if (s->compressed) {
2166         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
2167             error_report("invalid cluster size");
2168             return -EINVAL;
2169         }
2170         s->buf_sectors = s->cluster_sectors;
2171     }
2172 
2173     while (sector_num < s->total_sectors) {
2174         bdrv_graph_rdlock_main_loop();
2175         n = convert_iteration_sectors(s, sector_num);
2176         bdrv_graph_rdunlock_main_loop();
2177         if (n < 0) {
2178             return n;
2179         }
2180         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2181         {
2182             s->allocated_sectors += n;
2183         }
2184         sector_num += n;
2185     }
2186 
2187     /* Do the copy */
2188     s->sector_next_status = 0;
2189     s->ret = -EINPROGRESS;
2190 
2191     qemu_co_mutex_init(&s->lock);
2192     for (i = 0; i < s->num_coroutines; i++) {
2193         s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2194         s->wait_sector_num[i] = -1;
2195         qemu_coroutine_enter(s->co[i]);
2196     }
2197 
2198     while (s->running_coroutines) {
2199         main_loop_wait(false);
2200     }
2201 
2202     if (s->compressed && !s->ret) {
2203         /* signal EOF to align */
2204         ret = blk_pwrite_compressed(s->target, 0, 0, NULL);
2205         if (ret < 0) {
2206             return ret;
2207         }
2208     }
2209 
2210     return s->ret;
2211 }
2212 
2213 /* Check that bitmaps can be copied, or output an error */
2214 static int convert_check_bitmaps(BlockDriverState *src, bool skip_broken)
2215 {
2216     BdrvDirtyBitmap *bm;
2217 
2218     if (!bdrv_supports_persistent_dirty_bitmap(src)) {
2219         error_report("Source lacks bitmap support");
2220         return -1;
2221     }
2222     FOR_EACH_DIRTY_BITMAP(src, bm) {
2223         if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2224             continue;
2225         }
2226         if (!skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2227             error_report("Cannot copy inconsistent bitmap '%s'",
2228                          bdrv_dirty_bitmap_name(bm));
2229             error_printf("Try --skip-broken-bitmaps, or "
2230                          "use 'qemu-img bitmap --remove' to delete it\n");
2231             return -1;
2232         }
2233     }
2234     return 0;
2235 }
2236 
2237 static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst,
2238                                 bool skip_broken)
2239 {
2240     BdrvDirtyBitmap *bm;
2241     Error *err = NULL;
2242 
2243     FOR_EACH_DIRTY_BITMAP(src, bm) {
2244         const char *name;
2245 
2246         if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2247             continue;
2248         }
2249         name = bdrv_dirty_bitmap_name(bm);
2250         if (skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2251             warn_report("Skipping inconsistent bitmap '%s'", name);
2252             continue;
2253         }
2254         qmp_block_dirty_bitmap_add(dst->node_name, name,
2255                                    true, bdrv_dirty_bitmap_granularity(bm),
2256                                    true, true,
2257                                    true, !bdrv_dirty_bitmap_enabled(bm),
2258                                    &err);
2259         if (err) {
2260             error_reportf_err(err, "Failed to create bitmap %s: ", name);
2261             return -1;
2262         }
2263 
2264         do_dirty_bitmap_merge(dst->node_name, name, src->node_name, name,
2265                               &err);
2266         if (err) {
2267             error_reportf_err(err, "Failed to populate bitmap %s: ", name);
2268             qmp_block_dirty_bitmap_remove(dst->node_name, name, NULL);
2269             return -1;
2270         }
2271     }
2272 
2273     return 0;
2274 }
2275 
2276 #define MAX_BUF_SECTORS 32768
2277 
2278 static void set_rate_limit(BlockBackend *blk, int64_t rate_limit)
2279 {
2280     ThrottleConfig cfg;
2281 
2282     throttle_config_init(&cfg);
2283     cfg.buckets[THROTTLE_BPS_WRITE].avg = rate_limit;
2284 
2285     blk_io_limits_enable(blk, CONVERT_THROTTLE_GROUP);
2286     blk_set_io_limits(blk, &cfg);
2287 }
2288 
2289 static int img_convert(const img_cmd_t *ccmd, int argc, char **argv)
2290 {
2291     int c, bs_i, flags, src_flags = BDRV_O_NO_SHARE;
2292     const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2293                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2294                *out_filename, *out_baseimg_param, *snapshot_name = NULL,
2295                *backing_fmt = NULL;
2296     BlockDriver *drv = NULL, *proto_drv = NULL;
2297     BlockDriverInfo bdi;
2298     BlockDriverState *out_bs;
2299     QemuOpts *opts = NULL, *sn_opts = NULL;
2300     QemuOptsList *create_opts = NULL;
2301     QDict *open_opts = NULL;
2302     char *options = NULL;
2303     Error *local_err = NULL;
2304     bool writethrough, src_writethrough, image_opts = false,
2305          skip_create = false, progress = false, tgt_image_opts = false;
2306     int64_t ret = -EINVAL;
2307     bool force_share = false;
2308     bool explict_min_sparse = false;
2309     bool bitmaps = false;
2310     bool skip_broken = false;
2311     int64_t rate_limit = 0;
2312 
2313     ImgConvertState s = (ImgConvertState) {
2314         /* Need at least 4k of zeros for sparse detection */
2315         .min_sparse         = 8,
2316         .copy_range         = false,
2317         .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2318         .wr_in_order        = true,
2319         .num_coroutines     = 8,
2320     };
2321 
2322     for(;;) {
2323         static const struct option long_options[] = {
2324             {"help", no_argument, 0, 'h'},
2325             {"object", required_argument, 0, OPTION_OBJECT},
2326             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2327             {"force-share", no_argument, 0, 'U'},
2328             {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2329             {"salvage", no_argument, 0, OPTION_SALVAGE},
2330             {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
2331             {"bitmaps", no_argument, 0, OPTION_BITMAPS},
2332             {"skip-broken-bitmaps", no_argument, 0, OPTION_SKIP_BROKEN},
2333             {0, 0, 0, 0}
2334         };
2335         c = getopt_long(argc, argv, ":hf:O:B:CcF:o:l:S:pt:T:qnm:WUr:",
2336                         long_options, NULL);
2337         if (c == -1) {
2338             break;
2339         }
2340         switch(c) {
2341         case ':':
2342             missing_argument(argv[optind - 1]);
2343             break;
2344         case '?':
2345             unrecognized_option(argv[optind - 1]);
2346             break;
2347         case 'h':
2348             help();
2349             break;
2350         case 'f':
2351             fmt = optarg;
2352             break;
2353         case 'O':
2354             out_fmt = optarg;
2355             break;
2356         case 'B':
2357             out_baseimg = optarg;
2358             break;
2359         case 'C':
2360             s.copy_range = true;
2361             break;
2362         case 'c':
2363             s.compressed = true;
2364             break;
2365         case 'F':
2366             backing_fmt = optarg;
2367             break;
2368         case 'o':
2369             if (accumulate_options(&options, optarg) < 0) {
2370                 goto fail_getopt;
2371             }
2372             break;
2373         case 'l':
2374             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2375                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2376                                                   optarg, false);
2377                 if (!sn_opts) {
2378                     error_report("Failed in parsing snapshot param '%s'",
2379                                  optarg);
2380                     goto fail_getopt;
2381                 }
2382             } else {
2383                 snapshot_name = optarg;
2384             }
2385             break;
2386         case 'S':
2387         {
2388             int64_t sval;
2389 
2390             sval = cvtnum("buffer size for sparse output", optarg);
2391             if (sval < 0) {
2392                 goto fail_getopt;
2393             } else if (!QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2394                 sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2395                 error_report("Invalid buffer size for sparse output specified. "
2396                     "Valid sizes are multiples of %llu up to %llu. Select "
2397                     "0 to disable sparse detection (fully allocates output).",
2398                     BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2399                 goto fail_getopt;
2400             }
2401 
2402             s.min_sparse = sval / BDRV_SECTOR_SIZE;
2403             explict_min_sparse = true;
2404             break;
2405         }
2406         case 'p':
2407             progress = true;
2408             break;
2409         case 't':
2410             cache = optarg;
2411             break;
2412         case 'T':
2413             src_cache = optarg;
2414             break;
2415         case 'q':
2416             s.quiet = true;
2417             break;
2418         case 'n':
2419             skip_create = true;
2420             break;
2421         case 'm':
2422             if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) ||
2423                 s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) {
2424                 error_report("Invalid number of coroutines. Allowed number of"
2425                              " coroutines is between 1 and %d", MAX_COROUTINES);
2426                 goto fail_getopt;
2427             }
2428             break;
2429         case 'W':
2430             s.wr_in_order = false;
2431             break;
2432         case 'U':
2433             force_share = true;
2434             break;
2435         case 'r':
2436             rate_limit = cvtnum("rate limit", optarg);
2437             if (rate_limit < 0) {
2438                 goto fail_getopt;
2439             }
2440             break;
2441         case OPTION_OBJECT:
2442             user_creatable_process_cmdline(optarg);
2443             break;
2444         case OPTION_IMAGE_OPTS:
2445             image_opts = true;
2446             break;
2447         case OPTION_SALVAGE:
2448             s.salvage = true;
2449             break;
2450         case OPTION_TARGET_IMAGE_OPTS:
2451             tgt_image_opts = true;
2452             break;
2453         case OPTION_TARGET_IS_ZERO:
2454             /*
2455              * The user asserting that the target is blank has the
2456              * same effect as the target driver supporting zero
2457              * initialisation.
2458              */
2459             s.has_zero_init = true;
2460             break;
2461         case OPTION_BITMAPS:
2462             bitmaps = true;
2463             break;
2464         case OPTION_SKIP_BROKEN:
2465             skip_broken = true;
2466             break;
2467         }
2468     }
2469 
2470     if (!out_fmt && !tgt_image_opts) {
2471         out_fmt = "raw";
2472     }
2473 
2474     if (skip_broken && !bitmaps) {
2475         error_report("Use of --skip-broken-bitmaps requires --bitmaps");
2476         goto fail_getopt;
2477     }
2478 
2479     if (s.compressed && s.copy_range) {
2480         error_report("Cannot enable copy offloading when -c is used");
2481         goto fail_getopt;
2482     }
2483 
2484     if (explict_min_sparse && s.copy_range) {
2485         error_report("Cannot enable copy offloading when -S is used");
2486         goto fail_getopt;
2487     }
2488 
2489     if (s.copy_range && s.salvage) {
2490         error_report("Cannot use copy offloading in salvaging mode");
2491         goto fail_getopt;
2492     }
2493 
2494     if (tgt_image_opts && !skip_create) {
2495         error_report("--target-image-opts requires use of -n flag");
2496         goto fail_getopt;
2497     }
2498 
2499     if (skip_create && options) {
2500         error_report("-o has no effect when skipping image creation");
2501         goto fail_getopt;
2502     }
2503 
2504     if (s.has_zero_init && !skip_create) {
2505         error_report("--target-is-zero requires use of -n flag");
2506         goto fail_getopt;
2507     }
2508 
2509     s.src_num = argc - optind - 1;
2510     out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2511 
2512     if (options && has_help_option(options)) {
2513         if (out_fmt) {
2514             ret = print_block_option_help(out_filename, out_fmt);
2515             goto fail_getopt;
2516         } else {
2517             error_report("Option help requires a format be specified");
2518             goto fail_getopt;
2519         }
2520     }
2521 
2522     if (s.src_num < 1) {
2523         error_report("Must specify image file name");
2524         goto fail_getopt;
2525     }
2526 
2527     /* ret is still -EINVAL until here */
2528     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2529     if (ret < 0) {
2530         error_report("Invalid source cache option: %s", src_cache);
2531         goto fail_getopt;
2532     }
2533 
2534     /* Initialize before goto out */
2535     if (s.quiet) {
2536         progress = false;
2537     }
2538     qemu_progress_init(progress, 1.0);
2539     qemu_progress_print(0, 100);
2540 
2541     s.src = g_new0(BlockBackend *, s.src_num);
2542     s.src_sectors = g_new(int64_t, s.src_num);
2543     s.src_alignment = g_new(int, s.src_num);
2544 
2545     for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2546         BlockDriverState *src_bs;
2547         s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2548                                fmt, src_flags, src_writethrough, s.quiet,
2549                                force_share);
2550         if (!s.src[bs_i]) {
2551             ret = -1;
2552             goto out;
2553         }
2554         s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2555         if (s.src_sectors[bs_i] < 0) {
2556             error_report("Could not get size of %s: %s",
2557                          argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2558             ret = -1;
2559             goto out;
2560         }
2561         src_bs = blk_bs(s.src[bs_i]);
2562         s.src_alignment[bs_i] = DIV_ROUND_UP(src_bs->bl.request_alignment,
2563                                              BDRV_SECTOR_SIZE);
2564         if (!bdrv_get_info(src_bs, &bdi)) {
2565             s.src_alignment[bs_i] = MAX(s.src_alignment[bs_i],
2566                                         bdi.cluster_size / BDRV_SECTOR_SIZE);
2567         }
2568         s.total_sectors += s.src_sectors[bs_i];
2569     }
2570 
2571     if (sn_opts) {
2572         bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2573                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2574                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2575                                &local_err);
2576     } else if (snapshot_name != NULL) {
2577         if (s.src_num > 1) {
2578             error_report("No support for concatenating multiple snapshot");
2579             ret = -1;
2580             goto out;
2581         }
2582 
2583         bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2584                                              &local_err);
2585     }
2586     if (local_err) {
2587         error_reportf_err(local_err, "Failed to load snapshot: ");
2588         ret = -1;
2589         goto out;
2590     }
2591 
2592     if (!skip_create) {
2593         /* Find driver and parse its options */
2594         drv = bdrv_find_format(out_fmt);
2595         if (!drv) {
2596             error_report("Unknown file format '%s'", out_fmt);
2597             ret = -1;
2598             goto out;
2599         }
2600 
2601         proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2602         if (!proto_drv) {
2603             error_report_err(local_err);
2604             ret = -1;
2605             goto out;
2606         }
2607 
2608         if (!drv->create_opts) {
2609             error_report("Format driver '%s' does not support image creation",
2610                          drv->format_name);
2611             ret = -1;
2612             goto out;
2613         }
2614 
2615         if (!proto_drv->create_opts) {
2616             error_report("Protocol driver '%s' does not support image creation",
2617                          proto_drv->format_name);
2618             ret = -1;
2619             goto out;
2620         }
2621 
2622         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2623         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2624 
2625         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2626         if (options) {
2627             if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
2628                 error_report_err(local_err);
2629                 ret = -1;
2630                 goto out;
2631             }
2632         }
2633 
2634         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
2635                             s.total_sectors * BDRV_SECTOR_SIZE, &error_abort);
2636         ret = add_old_style_options(out_fmt, opts, out_baseimg, backing_fmt);
2637         if (ret < 0) {
2638             goto out;
2639         }
2640     }
2641 
2642     /* Get backing file name if -o backing_file was used */
2643     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2644     if (out_baseimg_param) {
2645         out_baseimg = out_baseimg_param;
2646     }
2647     s.target_has_backing = (bool) out_baseimg;
2648 
2649     if (s.has_zero_init && s.target_has_backing) {
2650         error_report("Cannot use --target-is-zero when the destination "
2651                      "image has a backing file");
2652         goto out;
2653     }
2654 
2655     if (s.src_num > 1 && out_baseimg) {
2656         error_report("Having a backing file for the target makes no sense when "
2657                      "concatenating multiple input images");
2658         ret = -1;
2659         goto out;
2660     }
2661 
2662     if (out_baseimg_param) {
2663         if (!qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT)) {
2664             error_report("Use of backing file requires explicit "
2665                          "backing format");
2666             ret = -1;
2667             goto out;
2668         }
2669     }
2670 
2671     /* Check if compression is supported */
2672     if (s.compressed) {
2673         bool encryption =
2674             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2675         const char *encryptfmt =
2676             qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2677         const char *preallocation =
2678             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2679 
2680         if (drv && !block_driver_can_compress(drv)) {
2681             error_report("Compression not supported for this file format");
2682             ret = -1;
2683             goto out;
2684         }
2685 
2686         if (encryption || encryptfmt) {
2687             error_report("Compression and encryption not supported at "
2688                          "the same time");
2689             ret = -1;
2690             goto out;
2691         }
2692 
2693         if (preallocation
2694             && strcmp(preallocation, "off"))
2695         {
2696             error_report("Compression and preallocation not supported at "
2697                          "the same time");
2698             ret = -1;
2699             goto out;
2700         }
2701     }
2702 
2703     /* Determine if bitmaps need copying */
2704     if (bitmaps) {
2705         if (s.src_num > 1) {
2706             error_report("Copying bitmaps only possible with single source");
2707             ret = -1;
2708             goto out;
2709         }
2710         ret = convert_check_bitmaps(blk_bs(s.src[0]), skip_broken);
2711         if (ret < 0) {
2712             goto out;
2713         }
2714     }
2715 
2716     /*
2717      * The later open call will need any decryption secrets, and
2718      * bdrv_create() will purge "opts", so extract them now before
2719      * they are lost.
2720      */
2721     if (!skip_create) {
2722         open_opts = qdict_new();
2723         qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2724 
2725         /* Create the new image */
2726         ret = bdrv_create(drv, out_filename, opts, &local_err);
2727         if (ret < 0) {
2728             error_reportf_err(local_err, "%s: error while converting %s: ",
2729                               out_filename, out_fmt);
2730             goto out;
2731         }
2732     }
2733 
2734     s.target_is_new = !skip_create;
2735 
2736     flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2737     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2738     if (ret < 0) {
2739         error_report("Invalid cache option: %s", cache);
2740         goto out;
2741     }
2742 
2743     if (flags & BDRV_O_NOCACHE) {
2744         /*
2745          * If we open the target with O_DIRECT, it may be necessary to
2746          * extend its size to align to the physical sector size.
2747          */
2748         flags |= BDRV_O_RESIZE;
2749     }
2750 
2751     if (skip_create) {
2752         s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2753                             flags, writethrough, s.quiet, false);
2754     } else {
2755         /* TODO ultimately we should allow --target-image-opts
2756          * to be used even when -n is not given.
2757          * That has to wait for bdrv_create to be improved
2758          * to allow filenames in option syntax
2759          */
2760         s.target = img_open_file(out_filename, open_opts, out_fmt,
2761                                  flags, writethrough, s.quiet, false);
2762         open_opts = NULL; /* blk_new_open will have freed it */
2763     }
2764     if (!s.target) {
2765         ret = -1;
2766         goto out;
2767     }
2768     out_bs = blk_bs(s.target);
2769 
2770     if (bitmaps && !bdrv_supports_persistent_dirty_bitmap(out_bs)) {
2771         error_report("Format driver '%s' does not support bitmaps",
2772                      out_bs->drv->format_name);
2773         ret = -1;
2774         goto out;
2775     }
2776 
2777     if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2778         error_report("Compression not supported for this file format");
2779         ret = -1;
2780         goto out;
2781     }
2782 
2783     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2784      * or discard_alignment of the out_bs is greater. Limit to
2785      * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2786     s.buf_sectors = MIN(MAX_BUF_SECTORS,
2787                         MAX(s.buf_sectors,
2788                             MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2789                                 out_bs->bl.pdiscard_alignment >>
2790                                 BDRV_SECTOR_BITS)));
2791 
2792     /* try to align the write requests to the destination to avoid unnecessary
2793      * RMW cycles. */
2794     s.alignment = MAX(pow2floor(s.min_sparse),
2795                       DIV_ROUND_UP(out_bs->bl.request_alignment,
2796                                    BDRV_SECTOR_SIZE));
2797     assert(is_power_of_2(s.alignment));
2798 
2799     if (skip_create) {
2800         int64_t output_sectors = blk_nb_sectors(s.target);
2801         if (output_sectors < 0) {
2802             error_report("unable to get output image length: %s",
2803                          strerror(-output_sectors));
2804             ret = -1;
2805             goto out;
2806         } else if (output_sectors < s.total_sectors) {
2807             error_report("output file is smaller than input file");
2808             ret = -1;
2809             goto out;
2810         }
2811     }
2812 
2813     if (s.target_has_backing && s.target_is_new) {
2814         /* Errors are treated as "backing length unknown" (which means
2815          * s.target_backing_sectors has to be negative, which it will
2816          * be automatically).  The backing file length is used only
2817          * for optimizations, so such a case is not fatal. */
2818         bdrv_graph_rdlock_main_loop();
2819         s.target_backing_sectors =
2820             bdrv_nb_sectors(bdrv_backing_chain_next(out_bs));
2821         bdrv_graph_rdunlock_main_loop();
2822     } else {
2823         s.target_backing_sectors = -1;
2824     }
2825 
2826     ret = bdrv_get_info(out_bs, &bdi);
2827     if (ret < 0) {
2828         if (s.compressed) {
2829             error_report("could not get block driver info");
2830             goto out;
2831         }
2832     } else {
2833         s.compressed = s.compressed || bdi.needs_compressed_writes;
2834         s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2835     }
2836 
2837     if (rate_limit) {
2838         set_rate_limit(s.target, rate_limit);
2839     }
2840 
2841     ret = convert_do_copy(&s);
2842 
2843     /* Now copy the bitmaps */
2844     if (bitmaps && ret == 0) {
2845         ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs, skip_broken);
2846     }
2847 
2848 out:
2849     if (!ret) {
2850         qemu_progress_print(100, 0);
2851     }
2852     qemu_progress_end();
2853     qemu_opts_del(opts);
2854     qemu_opts_free(create_opts);
2855     qobject_unref(open_opts);
2856     blk_unref(s.target);
2857     if (s.src) {
2858         for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2859             blk_unref(s.src[bs_i]);
2860         }
2861         g_free(s.src);
2862     }
2863     g_free(s.src_sectors);
2864     g_free(s.src_alignment);
2865 fail_getopt:
2866     qemu_opts_del(sn_opts);
2867     g_free(options);
2868 
2869     return !!ret;
2870 }
2871 
2872 
2873 static void dump_snapshots(BlockDriverState *bs)
2874 {
2875     QEMUSnapshotInfo *sn_tab, *sn;
2876     int nb_sns, i;
2877 
2878     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2879     if (nb_sns <= 0)
2880         return;
2881     printf("Snapshot list:\n");
2882     bdrv_snapshot_dump(NULL);
2883     printf("\n");
2884     for(i = 0; i < nb_sns; i++) {
2885         sn = &sn_tab[i];
2886         bdrv_snapshot_dump(sn);
2887         printf("\n");
2888     }
2889     g_free(sn_tab);
2890 }
2891 
2892 static void dump_json_block_graph_info_list(BlockGraphInfoList *list)
2893 {
2894     GString *str;
2895     QObject *obj;
2896     Visitor *v = qobject_output_visitor_new(&obj);
2897 
2898     visit_type_BlockGraphInfoList(v, NULL, &list, &error_abort);
2899     visit_complete(v, &obj);
2900     str = qobject_to_json_pretty(obj, true);
2901     assert(str != NULL);
2902     printf("%s\n", str->str);
2903     qobject_unref(obj);
2904     visit_free(v);
2905     g_string_free(str, true);
2906 }
2907 
2908 static void dump_json_block_graph_info(BlockGraphInfo *info)
2909 {
2910     GString *str;
2911     QObject *obj;
2912     Visitor *v = qobject_output_visitor_new(&obj);
2913 
2914     visit_type_BlockGraphInfo(v, NULL, &info, &error_abort);
2915     visit_complete(v, &obj);
2916     str = qobject_to_json_pretty(obj, true);
2917     assert(str != NULL);
2918     printf("%s\n", str->str);
2919     qobject_unref(obj);
2920     visit_free(v);
2921     g_string_free(str, true);
2922 }
2923 
2924 static void dump_human_image_info(BlockGraphInfo *info, int indentation,
2925                                   const char *path)
2926 {
2927     BlockChildInfoList *children_list;
2928 
2929     bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation,
2930                         info->children == NULL);
2931 
2932     for (children_list = info->children; children_list;
2933          children_list = children_list->next)
2934     {
2935         BlockChildInfo *child = children_list->value;
2936         g_autofree char *child_path = NULL;
2937 
2938         printf("%*sChild node '%s%s':\n",
2939                indentation * 4, "", path, child->name);
2940         child_path = g_strdup_printf("%s%s/", path, child->name);
2941         dump_human_image_info(child->info, indentation + 1, child_path);
2942     }
2943 }
2944 
2945 static void dump_human_image_info_list(BlockGraphInfoList *list)
2946 {
2947     BlockGraphInfoList *elem;
2948     bool delim = false;
2949 
2950     for (elem = list; elem; elem = elem->next) {
2951         if (delim) {
2952             printf("\n");
2953         }
2954         delim = true;
2955 
2956         dump_human_image_info(elem->value, 0, "/");
2957     }
2958 }
2959 
2960 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2961 {
2962     return strcmp(a, b) == 0;
2963 }
2964 
2965 /**
2966  * Open an image file chain and return an BlockGraphInfoList
2967  *
2968  * @filename: topmost image filename
2969  * @fmt: topmost image format (may be NULL to autodetect)
2970  * @chain: true  - enumerate entire backing file chain
2971  *         false - only topmost image file
2972  *
2973  * Returns a list of BlockNodeInfo objects or NULL if there was an error
2974  * opening an image file.  If there was an error a message will have been
2975  * printed to stderr.
2976  */
2977 static BlockGraphInfoList *collect_image_info_list(bool image_opts,
2978                                                    const char *filename,
2979                                                    const char *fmt,
2980                                                    bool chain, bool force_share)
2981 {
2982     BlockGraphInfoList *head = NULL;
2983     BlockGraphInfoList **tail = &head;
2984     GHashTable *filenames;
2985     Error *err = NULL;
2986 
2987     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2988 
2989     while (filename) {
2990         BlockBackend *blk;
2991         BlockDriverState *bs;
2992         BlockGraphInfo *info;
2993 
2994         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2995             error_report("Backing file '%s' creates an infinite loop.",
2996                          filename);
2997             goto err;
2998         }
2999         g_hash_table_insert(filenames, (gpointer)filename, NULL);
3000 
3001         blk = img_open(image_opts, filename, fmt,
3002                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
3003                        force_share);
3004         if (!blk) {
3005             goto err;
3006         }
3007         bs = blk_bs(blk);
3008 
3009         /*
3010          * Note that the returned BlockGraphInfo object will not have
3011          * information about this image's backing node, because we have opened
3012          * it with BDRV_O_NO_BACKING.  Printing this object will therefore not
3013          * duplicate the backing chain information that we obtain by walking
3014          * the chain manually here.
3015          */
3016         bdrv_graph_rdlock_main_loop();
3017         bdrv_query_block_graph_info(bs, &info, &err);
3018         bdrv_graph_rdunlock_main_loop();
3019 
3020         if (err) {
3021             error_report_err(err);
3022             blk_unref(blk);
3023             goto err;
3024         }
3025 
3026         QAPI_LIST_APPEND(tail, info);
3027 
3028         blk_unref(blk);
3029 
3030         /* Clear parameters that only apply to the topmost image */
3031         filename = fmt = NULL;
3032         image_opts = false;
3033 
3034         if (chain) {
3035             if (info->full_backing_filename) {
3036                 filename = info->full_backing_filename;
3037             } else if (info->backing_filename) {
3038                 error_report("Could not determine absolute backing filename,"
3039                              " but backing filename '%s' present",
3040                              info->backing_filename);
3041                 goto err;
3042             }
3043             if (info->backing_filename_format) {
3044                 fmt = info->backing_filename_format;
3045             }
3046         }
3047     }
3048     g_hash_table_destroy(filenames);
3049     return head;
3050 
3051 err:
3052     qapi_free_BlockGraphInfoList(head);
3053     g_hash_table_destroy(filenames);
3054     return NULL;
3055 }
3056 
3057 static int img_info(const img_cmd_t *ccmd, int argc, char **argv)
3058 {
3059     int c;
3060     OutputFormat output_format = OFORMAT_HUMAN;
3061     bool chain = false;
3062     const char *filename, *fmt, *output;
3063     BlockGraphInfoList *list;
3064     bool image_opts = false;
3065     bool force_share = false;
3066 
3067     fmt = NULL;
3068     output = NULL;
3069     for(;;) {
3070         int option_index = 0;
3071         static const struct option long_options[] = {
3072             {"help", no_argument, 0, 'h'},
3073             {"format", required_argument, 0, 'f'},
3074             {"output", required_argument, 0, OPTION_OUTPUT},
3075             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
3076             {"object", required_argument, 0, OPTION_OBJECT},
3077             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3078             {"force-share", no_argument, 0, 'U'},
3079             {0, 0, 0, 0}
3080         };
3081         c = getopt_long(argc, argv, ":f:hU",
3082                         long_options, &option_index);
3083         if (c == -1) {
3084             break;
3085         }
3086         switch(c) {
3087         case ':':
3088             missing_argument(argv[optind - 1]);
3089             break;
3090         case '?':
3091             unrecognized_option(argv[optind - 1]);
3092             break;
3093         case 'h':
3094             help();
3095             break;
3096         case 'f':
3097             fmt = optarg;
3098             break;
3099         case 'U':
3100             force_share = true;
3101             break;
3102         case OPTION_OUTPUT:
3103             output = optarg;
3104             break;
3105         case OPTION_BACKING_CHAIN:
3106             chain = true;
3107             break;
3108         case OPTION_OBJECT:
3109             user_creatable_process_cmdline(optarg);
3110             break;
3111         case OPTION_IMAGE_OPTS:
3112             image_opts = true;
3113             break;
3114         }
3115     }
3116     if (optind != argc - 1) {
3117         error_exit(argv[0], "Expecting one image file name");
3118     }
3119     filename = argv[optind++];
3120 
3121     if (output && !strcmp(output, "json")) {
3122         output_format = OFORMAT_JSON;
3123     } else if (output && !strcmp(output, "human")) {
3124         output_format = OFORMAT_HUMAN;
3125     } else if (output) {
3126         error_report("--output must be used with human or json as argument.");
3127         return 1;
3128     }
3129 
3130     list = collect_image_info_list(image_opts, filename, fmt, chain,
3131                                    force_share);
3132     if (!list) {
3133         return 1;
3134     }
3135 
3136     switch (output_format) {
3137     case OFORMAT_HUMAN:
3138         dump_human_image_info_list(list);
3139         break;
3140     case OFORMAT_JSON:
3141         if (chain) {
3142             dump_json_block_graph_info_list(list);
3143         } else {
3144             dump_json_block_graph_info(list->value);
3145         }
3146         break;
3147     }
3148 
3149     qapi_free_BlockGraphInfoList(list);
3150     return 0;
3151 }
3152 
3153 static int dump_map_entry(OutputFormat output_format, MapEntry *e,
3154                           MapEntry *next)
3155 {
3156     switch (output_format) {
3157     case OFORMAT_HUMAN:
3158         if (e->data && !e->has_offset) {
3159             error_report("File contains external, encrypted or compressed clusters.");
3160             return -1;
3161         }
3162         if (e->data && !e->zero) {
3163             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
3164                    e->start, e->length,
3165                    e->has_offset ? e->offset : 0,
3166                    e->filename ?: "");
3167         }
3168         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
3169          * Modify the flags here to allow more coalescing.
3170          */
3171         if (next && (!next->data || next->zero)) {
3172             next->data = false;
3173             next->zero = true;
3174         }
3175         break;
3176     case OFORMAT_JSON:
3177         printf("{ \"start\": %"PRId64", \"length\": %"PRId64","
3178                " \"depth\": %"PRId64", \"present\": %s, \"zero\": %s,"
3179                " \"data\": %s, \"compressed\": %s",
3180                e->start, e->length, e->depth,
3181                e->present ? "true" : "false",
3182                e->zero ? "true" : "false",
3183                e->data ? "true" : "false",
3184                e->compressed ? "true" : "false");
3185         if (e->has_offset) {
3186             printf(", \"offset\": %"PRId64"", e->offset);
3187         }
3188         putchar('}');
3189 
3190         if (next) {
3191             puts(",");
3192         }
3193         break;
3194     }
3195     return 0;
3196 }
3197 
3198 static int get_block_status(BlockDriverState *bs, int64_t offset,
3199                             int64_t bytes, MapEntry *e)
3200 {
3201     int ret;
3202     int depth;
3203     BlockDriverState *file;
3204     bool has_offset;
3205     int64_t map;
3206     char *filename = NULL;
3207 
3208     GLOBAL_STATE_CODE();
3209     GRAPH_RDLOCK_GUARD_MAINLOOP();
3210 
3211     /* As an optimization, we could cache the current range of unallocated
3212      * clusters in each file of the chain, and avoid querying the same
3213      * range repeatedly.
3214      */
3215 
3216     depth = 0;
3217     for (;;) {
3218         bs = bdrv_skip_filters(bs);
3219         ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
3220         if (ret < 0) {
3221             return ret;
3222         }
3223         assert(bytes);
3224         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
3225             break;
3226         }
3227         bs = bdrv_cow_bs(bs);
3228         if (bs == NULL) {
3229             ret = 0;
3230             break;
3231         }
3232 
3233         depth++;
3234     }
3235 
3236     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
3237 
3238     if (file && has_offset) {
3239         bdrv_refresh_filename(file);
3240         filename = file->filename;
3241     }
3242 
3243     *e = (MapEntry) {
3244         .start = offset,
3245         .length = bytes,
3246         .data = !!(ret & BDRV_BLOCK_DATA),
3247         .zero = !!(ret & BDRV_BLOCK_ZERO),
3248         .compressed = !!(ret & BDRV_BLOCK_COMPRESSED),
3249         .offset = map,
3250         .has_offset = has_offset,
3251         .depth = depth,
3252         .present = !!(ret & BDRV_BLOCK_ALLOCATED),
3253         .filename = filename,
3254     };
3255 
3256     return 0;
3257 }
3258 
3259 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
3260 {
3261     if (curr->length == 0) {
3262         return false;
3263     }
3264     if (curr->zero != next->zero ||
3265         curr->data != next->data ||
3266         curr->compressed != next->compressed ||
3267         curr->depth != next->depth ||
3268         curr->present != next->present ||
3269         !curr->filename != !next->filename ||
3270         curr->has_offset != next->has_offset) {
3271         return false;
3272     }
3273     if (curr->filename && strcmp(curr->filename, next->filename)) {
3274         return false;
3275     }
3276     if (curr->has_offset && curr->offset + curr->length != next->offset) {
3277         return false;
3278     }
3279     return true;
3280 }
3281 
3282 static int img_map(const img_cmd_t *ccmd, int argc, char **argv)
3283 {
3284     int c;
3285     OutputFormat output_format = OFORMAT_HUMAN;
3286     BlockBackend *blk;
3287     BlockDriverState *bs;
3288     const char *filename, *fmt, *output;
3289     int64_t length;
3290     MapEntry curr = { .length = 0 }, next;
3291     int ret = 0;
3292     bool image_opts = false;
3293     bool force_share = false;
3294     int64_t start_offset = 0;
3295     int64_t max_length = -1;
3296 
3297     fmt = NULL;
3298     output = NULL;
3299     for (;;) {
3300         int option_index = 0;
3301         static const struct option long_options[] = {
3302             {"help", no_argument, 0, 'h'},
3303             {"format", required_argument, 0, 'f'},
3304             {"output", required_argument, 0, OPTION_OUTPUT},
3305             {"object", required_argument, 0, OPTION_OBJECT},
3306             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3307             {"force-share", no_argument, 0, 'U'},
3308             {"start-offset", required_argument, 0, 's'},
3309             {"max-length", required_argument, 0, 'l'},
3310             {0, 0, 0, 0}
3311         };
3312         c = getopt_long(argc, argv, ":f:s:l:hU",
3313                         long_options, &option_index);
3314         if (c == -1) {
3315             break;
3316         }
3317         switch (c) {
3318         case ':':
3319             missing_argument(argv[optind - 1]);
3320             break;
3321         case '?':
3322             unrecognized_option(argv[optind - 1]);
3323             break;
3324         case 'h':
3325             help();
3326             break;
3327         case 'f':
3328             fmt = optarg;
3329             break;
3330         case 'U':
3331             force_share = true;
3332             break;
3333         case OPTION_OUTPUT:
3334             output = optarg;
3335             break;
3336         case 's':
3337             start_offset = cvtnum("start offset", optarg);
3338             if (start_offset < 0) {
3339                 return 1;
3340             }
3341             break;
3342         case 'l':
3343             max_length = cvtnum("max length", optarg);
3344             if (max_length < 0) {
3345                 return 1;
3346             }
3347             break;
3348         case OPTION_OBJECT:
3349             user_creatable_process_cmdline(optarg);
3350             break;
3351         case OPTION_IMAGE_OPTS:
3352             image_opts = true;
3353             break;
3354         }
3355     }
3356     if (optind != argc - 1) {
3357         error_exit(argv[0], "Expecting one image file name");
3358     }
3359     filename = argv[optind];
3360 
3361     if (output && !strcmp(output, "json")) {
3362         output_format = OFORMAT_JSON;
3363     } else if (output && !strcmp(output, "human")) {
3364         output_format = OFORMAT_HUMAN;
3365     } else if (output) {
3366         error_report("--output must be used with human or json as argument.");
3367         return 1;
3368     }
3369 
3370     blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3371     if (!blk) {
3372         return 1;
3373     }
3374     bs = blk_bs(blk);
3375 
3376     if (output_format == OFORMAT_HUMAN) {
3377         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3378     } else if (output_format == OFORMAT_JSON) {
3379         putchar('[');
3380     }
3381 
3382     length = blk_getlength(blk);
3383     if (length < 0) {
3384         error_report("Failed to get size for '%s'", filename);
3385         return 1;
3386     }
3387     if (max_length != -1) {
3388         length = MIN(start_offset + max_length, length);
3389     }
3390 
3391     curr.start = start_offset;
3392     while (curr.start + curr.length < length) {
3393         int64_t offset = curr.start + curr.length;
3394         int64_t n = length - offset;
3395 
3396         ret = get_block_status(bs, offset, n, &next);
3397         if (ret < 0) {
3398             error_report("Could not read file metadata: %s", strerror(-ret));
3399             goto out;
3400         }
3401 
3402         if (entry_mergeable(&curr, &next)) {
3403             curr.length += next.length;
3404             continue;
3405         }
3406 
3407         if (curr.length > 0) {
3408             ret = dump_map_entry(output_format, &curr, &next);
3409             if (ret < 0) {
3410                 goto out;
3411             }
3412         }
3413         curr = next;
3414     }
3415 
3416     ret = dump_map_entry(output_format, &curr, NULL);
3417     if (output_format == OFORMAT_JSON) {
3418         puts("]");
3419     }
3420 
3421 out:
3422     blk_unref(blk);
3423     return ret < 0;
3424 }
3425 
3426 #define SNAPSHOT_LIST   1
3427 #define SNAPSHOT_CREATE 2
3428 #define SNAPSHOT_APPLY  3
3429 #define SNAPSHOT_DELETE 4
3430 
3431 static int img_snapshot(const img_cmd_t *ccmd, int argc, char **argv)
3432 {
3433     BlockBackend *blk;
3434     BlockDriverState *bs;
3435     QEMUSnapshotInfo sn;
3436     char *filename, *snapshot_name = NULL;
3437     int c, ret = 0, bdrv_oflags;
3438     int action = 0;
3439     bool quiet = false;
3440     Error *err = NULL;
3441     bool image_opts = false;
3442     bool force_share = false;
3443     int64_t rt;
3444 
3445     bdrv_oflags = BDRV_O_RDWR;
3446     /* Parse commandline parameters */
3447     for(;;) {
3448         static const struct option long_options[] = {
3449             {"help", no_argument, 0, 'h'},
3450             {"object", required_argument, 0, OPTION_OBJECT},
3451             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3452             {"force-share", no_argument, 0, 'U'},
3453             {0, 0, 0, 0}
3454         };
3455         c = getopt_long(argc, argv, ":la:c:d:hqU",
3456                         long_options, NULL);
3457         if (c == -1) {
3458             break;
3459         }
3460         switch(c) {
3461         case ':':
3462             missing_argument(argv[optind - 1]);
3463             break;
3464         case '?':
3465             unrecognized_option(argv[optind - 1]);
3466             break;
3467         case 'h':
3468             help();
3469             return 0;
3470         case 'l':
3471             if (action) {
3472                 error_exit(argv[0], "Cannot mix '-l', '-a', '-c', '-d'");
3473                 return 0;
3474             }
3475             action = SNAPSHOT_LIST;
3476             bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
3477             break;
3478         case 'a':
3479             if (action) {
3480                 error_exit(argv[0], "Cannot mix '-l', '-a', '-c', '-d'");
3481                 return 0;
3482             }
3483             action = SNAPSHOT_APPLY;
3484             snapshot_name = optarg;
3485             break;
3486         case 'c':
3487             if (action) {
3488                 error_exit(argv[0], "Cannot mix '-l', '-a', '-c', '-d'");
3489                 return 0;
3490             }
3491             action = SNAPSHOT_CREATE;
3492             snapshot_name = optarg;
3493             break;
3494         case 'd':
3495             if (action) {
3496                 error_exit(argv[0], "Cannot mix '-l', '-a', '-c', '-d'");
3497                 return 0;
3498             }
3499             action = SNAPSHOT_DELETE;
3500             snapshot_name = optarg;
3501             break;
3502         case 'q':
3503             quiet = true;
3504             break;
3505         case 'U':
3506             force_share = true;
3507             break;
3508         case OPTION_OBJECT:
3509             user_creatable_process_cmdline(optarg);
3510             break;
3511         case OPTION_IMAGE_OPTS:
3512             image_opts = true;
3513             break;
3514         }
3515     }
3516 
3517     if (optind != argc - 1) {
3518         error_exit(argv[0], "Expecting one image file name");
3519     }
3520     filename = argv[optind++];
3521 
3522     /* Open the image */
3523     blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
3524                    force_share);
3525     if (!blk) {
3526         return 1;
3527     }
3528     bs = blk_bs(blk);
3529 
3530     /* Perform the requested action */
3531     switch(action) {
3532     case SNAPSHOT_LIST:
3533         dump_snapshots(bs);
3534         break;
3535 
3536     case SNAPSHOT_CREATE:
3537         memset(&sn, 0, sizeof(sn));
3538         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3539 
3540         rt = g_get_real_time();
3541         sn.date_sec = rt / G_USEC_PER_SEC;
3542         sn.date_nsec = (rt % G_USEC_PER_SEC) * 1000;
3543 
3544         bdrv_graph_rdlock_main_loop();
3545         ret = bdrv_snapshot_create(bs, &sn);
3546         bdrv_graph_rdunlock_main_loop();
3547 
3548         if (ret) {
3549             error_report("Could not create snapshot '%s': %s",
3550                 snapshot_name, strerror(-ret));
3551         }
3552         break;
3553 
3554     case SNAPSHOT_APPLY:
3555         ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3556         if (ret) {
3557             error_reportf_err(err, "Could not apply snapshot '%s': ",
3558                               snapshot_name);
3559         }
3560         break;
3561 
3562     case SNAPSHOT_DELETE:
3563         bdrv_drain_all_begin();
3564         bdrv_graph_rdlock_main_loop();
3565         ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3566         if (ret < 0) {
3567             error_report("Could not delete snapshot '%s': snapshot not "
3568                          "found", snapshot_name);
3569             ret = 1;
3570         } else {
3571             ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3572             if (ret < 0) {
3573                 error_reportf_err(err, "Could not delete snapshot '%s': ",
3574                                   snapshot_name);
3575                 ret = 1;
3576             }
3577         }
3578         bdrv_graph_rdunlock_main_loop();
3579         bdrv_drain_all_end();
3580         break;
3581     }
3582 
3583     /* Cleanup */
3584     blk_unref(blk);
3585     if (ret) {
3586         return 1;
3587     }
3588     return 0;
3589 }
3590 
3591 static int img_rebase(const img_cmd_t *ccmd, int argc, char **argv)
3592 {
3593     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3594     uint8_t *buf_old = NULL;
3595     uint8_t *buf_new = NULL;
3596     BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3597     BlockDriverState *unfiltered_bs, *unfiltered_bs_cow;
3598     BlockDriverInfo bdi = {0};
3599     char *filename;
3600     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3601     int c, flags, src_flags, ret;
3602     BdrvRequestFlags write_flags = 0;
3603     bool writethrough, src_writethrough;
3604     int unsafe = 0;
3605     bool force_share = false;
3606     int progress = 0;
3607     bool quiet = false;
3608     bool compress = false;
3609     Error *local_err = NULL;
3610     bool image_opts = false;
3611     int64_t write_align;
3612 
3613     /* Parse commandline parameters */
3614     fmt = NULL;
3615     cache = BDRV_DEFAULT_CACHE;
3616     src_cache = BDRV_DEFAULT_CACHE;
3617     out_baseimg = NULL;
3618     out_basefmt = NULL;
3619     for(;;) {
3620         static const struct option long_options[] = {
3621             {"help", no_argument, 0, 'h'},
3622             {"object", required_argument, 0, OPTION_OBJECT},
3623             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3624             {"force-share", no_argument, 0, 'U'},
3625             {"compress", no_argument, 0, 'c'},
3626             {0, 0, 0, 0}
3627         };
3628         c = getopt_long(argc, argv, ":hf:F:b:upt:T:qUc",
3629                         long_options, NULL);
3630         if (c == -1) {
3631             break;
3632         }
3633         switch(c) {
3634         case ':':
3635             missing_argument(argv[optind - 1]);
3636             break;
3637         case '?':
3638             unrecognized_option(argv[optind - 1]);
3639             break;
3640         case 'h':
3641             help();
3642             return 0;
3643         case 'f':
3644             fmt = optarg;
3645             break;
3646         case 'F':
3647             out_basefmt = optarg;
3648             break;
3649         case 'b':
3650             out_baseimg = optarg;
3651             break;
3652         case 'u':
3653             unsafe = 1;
3654             break;
3655         case 'p':
3656             progress = 1;
3657             break;
3658         case 't':
3659             cache = optarg;
3660             break;
3661         case 'T':
3662             src_cache = optarg;
3663             break;
3664         case 'q':
3665             quiet = true;
3666             break;
3667         case OPTION_OBJECT:
3668             user_creatable_process_cmdline(optarg);
3669             break;
3670         case OPTION_IMAGE_OPTS:
3671             image_opts = true;
3672             break;
3673         case 'U':
3674             force_share = true;
3675             break;
3676         case 'c':
3677             compress = true;
3678             break;
3679         }
3680     }
3681 
3682     if (quiet) {
3683         progress = 0;
3684     }
3685 
3686     if (optind != argc - 1) {
3687         error_exit(argv[0], "Expecting one image file name");
3688     }
3689     if (!unsafe && !out_baseimg) {
3690         error_exit(argv[0],
3691                    "Must specify backing file (-b) or use unsafe mode (-u)");
3692     }
3693     filename = argv[optind++];
3694 
3695     qemu_progress_init(progress, 2.0);
3696     qemu_progress_print(0, 100);
3697 
3698     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3699     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3700     if (ret < 0) {
3701         error_report("Invalid cache option: %s", cache);
3702         goto out;
3703     }
3704 
3705     src_flags = 0;
3706     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3707     if (ret < 0) {
3708         error_report("Invalid source cache option: %s", src_cache);
3709         goto out;
3710     }
3711 
3712     /* The source files are opened read-only, don't care about WCE */
3713     assert((src_flags & BDRV_O_RDWR) == 0);
3714     (void) src_writethrough;
3715 
3716     /*
3717      * Open the images.
3718      *
3719      * Ignore the old backing file for unsafe rebase in case we want to correct
3720      * the reference to a renamed or moved backing file.
3721      */
3722     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3723                    false);
3724     if (!blk) {
3725         ret = -1;
3726         goto out;
3727     }
3728     bs = blk_bs(blk);
3729 
3730     bdrv_graph_rdlock_main_loop();
3731     unfiltered_bs = bdrv_skip_filters(bs);
3732     unfiltered_bs_cow = bdrv_cow_bs(unfiltered_bs);
3733     bdrv_graph_rdunlock_main_loop();
3734 
3735     if (compress && !block_driver_can_compress(unfiltered_bs->drv)) {
3736         error_report("Compression not supported for this file format");
3737         ret = -1;
3738         goto out;
3739     } else if (compress) {
3740         write_flags |= BDRV_REQ_WRITE_COMPRESSED;
3741     }
3742 
3743     if (out_basefmt != NULL) {
3744         if (bdrv_find_format(out_basefmt) == NULL) {
3745             error_report("Invalid format name: '%s'", out_basefmt);
3746             ret = -1;
3747             goto out;
3748         }
3749     }
3750 
3751     /*
3752      * We need overlay subcluster size (or cluster size in case writes are
3753      * compressed) to make sure write requests are aligned.
3754      */
3755     ret = bdrv_get_info(unfiltered_bs, &bdi);
3756     if (ret < 0) {
3757         error_report("could not get block driver info");
3758         goto out;
3759     } else if (bdi.subcluster_size == 0) {
3760         bdi.cluster_size = bdi.subcluster_size = 1;
3761     }
3762 
3763     write_align = compress ? bdi.cluster_size : bdi.subcluster_size;
3764 
3765     /* For safe rebasing we need to compare old and new backing file */
3766     if (!unsafe) {
3767         QDict *options = NULL;
3768         BlockDriverState *base_bs;
3769 
3770         bdrv_graph_rdlock_main_loop();
3771         base_bs = bdrv_cow_bs(unfiltered_bs);
3772         bdrv_graph_rdunlock_main_loop();
3773 
3774         if (base_bs) {
3775             blk_old_backing = blk_new(qemu_get_aio_context(),
3776                                       BLK_PERM_CONSISTENT_READ,
3777                                       BLK_PERM_ALL);
3778             ret = blk_insert_bs(blk_old_backing, base_bs,
3779                                 &local_err);
3780             if (ret < 0) {
3781                 error_reportf_err(local_err,
3782                                   "Could not reuse old backing file '%s': ",
3783                                   base_bs->filename);
3784                 goto out;
3785             }
3786         } else {
3787             blk_old_backing = NULL;
3788         }
3789 
3790         if (out_baseimg[0]) {
3791             const char *overlay_filename;
3792             char *out_real_path;
3793 
3794             options = qdict_new();
3795             if (out_basefmt) {
3796                 qdict_put_str(options, "driver", out_basefmt);
3797             }
3798             if (force_share) {
3799                 qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3800             }
3801 
3802             bdrv_graph_rdlock_main_loop();
3803             bdrv_refresh_filename(bs);
3804             bdrv_graph_rdunlock_main_loop();
3805             overlay_filename = bs->exact_filename[0] ? bs->exact_filename
3806                                                      : bs->filename;
3807             out_real_path =
3808                 bdrv_get_full_backing_filename_from_filename(overlay_filename,
3809                                                              out_baseimg,
3810                                                              &local_err);
3811             if (local_err) {
3812                 qobject_unref(options);
3813                 error_reportf_err(local_err,
3814                                   "Could not resolve backing filename: ");
3815                 ret = -1;
3816                 goto out;
3817             }
3818 
3819             /*
3820              * Find out whether we rebase an image on top of a previous image
3821              * in its chain.
3822              */
3823             prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
3824             if (prefix_chain_bs) {
3825                 qobject_unref(options);
3826                 g_free(out_real_path);
3827 
3828                 blk_new_backing = blk_new(qemu_get_aio_context(),
3829                                           BLK_PERM_CONSISTENT_READ,
3830                                           BLK_PERM_ALL);
3831                 ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
3832                                     &local_err);
3833                 if (ret < 0) {
3834                     error_reportf_err(local_err,
3835                                       "Could not reuse backing file '%s': ",
3836                                       out_baseimg);
3837                     goto out;
3838                 }
3839             } else {
3840                 blk_new_backing = blk_new_open(out_real_path, NULL,
3841                                                options, src_flags, &local_err);
3842                 g_free(out_real_path);
3843                 if (!blk_new_backing) {
3844                     error_reportf_err(local_err,
3845                                       "Could not open new backing file '%s': ",
3846                                       out_baseimg);
3847                     ret = -1;
3848                     goto out;
3849                 }
3850             }
3851         }
3852     }
3853 
3854     /*
3855      * Check each unallocated cluster in the COW file. If it is unallocated,
3856      * accesses go to the backing file. We must therefore compare this cluster
3857      * in the old and new backing file, and if they differ we need to copy it
3858      * from the old backing file into the COW file.
3859      *
3860      * If qemu-img crashes during this step, no harm is done. The content of
3861      * the image is the same as the original one at any time.
3862      */
3863     if (!unsafe) {
3864         int64_t size;
3865         int64_t old_backing_size = 0;
3866         int64_t new_backing_size = 0;
3867         uint64_t offset;
3868         int64_t n, n_old = 0, n_new = 0;
3869         float local_progress = 0;
3870 
3871         if (blk_old_backing && bdrv_opt_mem_align(blk_bs(blk_old_backing)) >
3872             bdrv_opt_mem_align(blk_bs(blk))) {
3873             buf_old = blk_blockalign(blk_old_backing, IO_BUF_SIZE);
3874         } else {
3875             buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3876         }
3877         buf_new = blk_blockalign(blk_new_backing, IO_BUF_SIZE);
3878 
3879         size = blk_getlength(blk);
3880         if (size < 0) {
3881             error_report("Could not get size of '%s': %s",
3882                          filename, strerror(-size));
3883             ret = -1;
3884             goto out;
3885         }
3886         if (blk_old_backing) {
3887             old_backing_size = blk_getlength(blk_old_backing);
3888             if (old_backing_size < 0) {
3889                 char backing_name[PATH_MAX];
3890 
3891                 bdrv_get_backing_filename(bs, backing_name,
3892                                           sizeof(backing_name));
3893                 error_report("Could not get size of '%s': %s",
3894                              backing_name, strerror(-old_backing_size));
3895                 ret = -1;
3896                 goto out;
3897             }
3898         }
3899         if (blk_new_backing) {
3900             new_backing_size = blk_getlength(blk_new_backing);
3901             if (new_backing_size < 0) {
3902                 error_report("Could not get size of '%s': %s",
3903                              out_baseimg, strerror(-new_backing_size));
3904                 ret = -1;
3905                 goto out;
3906             }
3907         }
3908 
3909         if (size != 0) {
3910             local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
3911         }
3912 
3913         for (offset = 0; offset < size; offset += n) {
3914             bool old_backing_eof = false;
3915             int64_t n_alloc;
3916 
3917             /* How many bytes can we handle with the next read? */
3918             n = MIN(IO_BUF_SIZE, size - offset);
3919 
3920             /* If the cluster is allocated, we don't need to take action */
3921             ret = bdrv_is_allocated(unfiltered_bs, offset, n, &n);
3922             if (ret < 0) {
3923                 error_report("error while reading image metadata: %s",
3924                              strerror(-ret));
3925                 goto out;
3926             }
3927             if (ret) {
3928                 continue;
3929             }
3930 
3931             if (prefix_chain_bs) {
3932                 uint64_t bytes = n;
3933 
3934                 /*
3935                  * If cluster wasn't changed since prefix_chain, we don't need
3936                  * to take action
3937                  */
3938                 ret = bdrv_is_allocated_above(unfiltered_bs_cow,
3939                                               prefix_chain_bs, false,
3940                                               offset, n, &n);
3941                 if (ret < 0) {
3942                     error_report("error while reading image metadata: %s",
3943                                  strerror(-ret));
3944                     goto out;
3945                 }
3946                 if (!ret && n) {
3947                     continue;
3948                 }
3949                 if (!n) {
3950                     /*
3951                      * If we've reached EOF of the old backing, it means that
3952                      * offsets beyond the old backing size were read as zeroes.
3953                      * Now we will need to explicitly zero the cluster in
3954                      * order to preserve that state after the rebase.
3955                      */
3956                     n = bytes;
3957                 }
3958             }
3959 
3960             /*
3961              * At this point we know that the region [offset; offset + n)
3962              * is unallocated within the target image.  This region might be
3963              * unaligned to the target image's (sub)cluster boundaries, as
3964              * old backing may have smaller clusters (or have subclusters).
3965              * We extend it to the aligned boundaries to avoid CoW on
3966              * partial writes in blk_pwrite(),
3967              */
3968             n += offset - QEMU_ALIGN_DOWN(offset, write_align);
3969             offset = QEMU_ALIGN_DOWN(offset, write_align);
3970             n += QEMU_ALIGN_UP(offset + n, write_align) - (offset + n);
3971             n = MIN(n, size - offset);
3972             assert(!bdrv_is_allocated(unfiltered_bs, offset, n, &n_alloc) &&
3973                    n_alloc == n);
3974 
3975             /*
3976              * Much like with the target image, we'll try to read as much
3977              * of the old and new backings as we can.
3978              */
3979             n_old = MIN(n, MAX(0, old_backing_size - (int64_t) offset));
3980             n_new = MIN(n, MAX(0, new_backing_size - (int64_t) offset));
3981 
3982             /*
3983              * Read old and new backing file and take into consideration that
3984              * backing files may be smaller than the COW image.
3985              */
3986             memset(buf_old + n_old, 0, n - n_old);
3987             if (!n_old) {
3988                 old_backing_eof = true;
3989             } else {
3990                 ret = blk_pread(blk_old_backing, offset, n_old, buf_old, 0);
3991                 if (ret < 0) {
3992                     error_report("error while reading from old backing file");
3993                     goto out;
3994                 }
3995             }
3996 
3997             memset(buf_new + n_new, 0, n - n_new);
3998             if (n_new) {
3999                 ret = blk_pread(blk_new_backing, offset, n_new, buf_new, 0);
4000                 if (ret < 0) {
4001                     error_report("error while reading from new backing file");
4002                     goto out;
4003                 }
4004             }
4005 
4006             /* If they differ, we need to write to the COW file */
4007             uint64_t written = 0;
4008 
4009             while (written < n) {
4010                 int64_t pnum;
4011 
4012                 if (compare_buffers(buf_old + written, buf_new + written,
4013                                     n - written, write_align, &pnum))
4014                 {
4015                     if (old_backing_eof) {
4016                         ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
4017                     } else {
4018                         assert(written + pnum <= IO_BUF_SIZE);
4019                         ret = blk_pwrite(blk, offset + written, pnum,
4020                                          buf_old + written, write_flags);
4021                     }
4022                     if (ret < 0) {
4023                         error_report("Error while writing to COW image: %s",
4024                             strerror(-ret));
4025                         goto out;
4026                     }
4027                 }
4028 
4029                 written += pnum;
4030                 if (offset + written >= old_backing_size) {
4031                     old_backing_eof = true;
4032                 }
4033             }
4034             qemu_progress_print(local_progress, 100);
4035         }
4036     }
4037 
4038     /*
4039      * Change the backing file. All clusters that are different from the old
4040      * backing file are overwritten in the COW file now, so the visible content
4041      * doesn't change when we switch the backing file.
4042      */
4043     if (out_baseimg && *out_baseimg) {
4044         ret = bdrv_change_backing_file(unfiltered_bs, out_baseimg, out_basefmt,
4045                                        true);
4046     } else {
4047         ret = bdrv_change_backing_file(unfiltered_bs, NULL, NULL, false);
4048     }
4049 
4050     if (ret == -ENOSPC) {
4051         error_report("Could not change the backing file to '%s': No "
4052                      "space left in the file header", out_baseimg);
4053     } else if (ret == -EINVAL && out_baseimg && !out_basefmt) {
4054         error_report("Could not change the backing file to '%s': backing "
4055                      "format must be specified", out_baseimg);
4056     } else if (ret < 0) {
4057         error_report("Could not change the backing file to '%s': %s",
4058             out_baseimg, strerror(-ret));
4059     }
4060 
4061     qemu_progress_print(100, 0);
4062     /*
4063      * TODO At this point it is possible to check if any clusters that are
4064      * allocated in the COW file are the same in the backing file. If so, they
4065      * could be dropped from the COW file. Don't do this before switching the
4066      * backing file, in case of a crash this would lead to corruption.
4067      */
4068 out:
4069     qemu_progress_end();
4070     /* Cleanup */
4071     if (!unsafe) {
4072         blk_unref(blk_old_backing);
4073         blk_unref(blk_new_backing);
4074     }
4075     qemu_vfree(buf_old);
4076     qemu_vfree(buf_new);
4077 
4078     blk_unref(blk);
4079     if (ret) {
4080         return 1;
4081     }
4082     return 0;
4083 }
4084 
4085 static int img_resize(const img_cmd_t *ccmd, int argc, char **argv)
4086 {
4087     Error *err = NULL;
4088     int c, ret, relative;
4089     const char *filename, *fmt, *size;
4090     int64_t n, total_size, current_size;
4091     bool quiet = false;
4092     BlockBackend *blk = NULL;
4093     PreallocMode prealloc = PREALLOC_MODE_OFF;
4094     QemuOpts *param;
4095 
4096     static QemuOptsList resize_options = {
4097         .name = "resize_options",
4098         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
4099         .desc = {
4100             {
4101                 .name = BLOCK_OPT_SIZE,
4102                 .type = QEMU_OPT_SIZE,
4103                 .help = "Virtual disk size"
4104             }, {
4105                 /* end of list */
4106             }
4107         },
4108     };
4109     bool image_opts = false;
4110     bool shrink = false;
4111 
4112     /* Remove size from argv manually so that negative numbers are not treated
4113      * as options by getopt. */
4114     if (argc < 3) {
4115         error_exit(argv[0], "Not enough arguments");
4116         return 1;
4117     }
4118 
4119     size = argv[--argc];
4120 
4121     /* Parse getopt arguments */
4122     fmt = NULL;
4123     for(;;) {
4124         static const struct option long_options[] = {
4125             {"help", no_argument, 0, 'h'},
4126             {"object", required_argument, 0, OPTION_OBJECT},
4127             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4128             {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
4129             {"shrink", no_argument, 0, OPTION_SHRINK},
4130             {0, 0, 0, 0}
4131         };
4132         c = getopt_long(argc, argv, ":f:hq",
4133                         long_options, NULL);
4134         if (c == -1) {
4135             break;
4136         }
4137         switch(c) {
4138         case ':':
4139             missing_argument(argv[optind - 1]);
4140             break;
4141         case '?':
4142             unrecognized_option(argv[optind - 1]);
4143             break;
4144         case 'h':
4145             help();
4146             break;
4147         case 'f':
4148             fmt = optarg;
4149             break;
4150         case 'q':
4151             quiet = true;
4152             break;
4153         case OPTION_OBJECT:
4154             user_creatable_process_cmdline(optarg);
4155             break;
4156         case OPTION_IMAGE_OPTS:
4157             image_opts = true;
4158             break;
4159         case OPTION_PREALLOCATION:
4160             prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
4161                                        PREALLOC_MODE__MAX, NULL);
4162             if (prealloc == PREALLOC_MODE__MAX) {
4163                 error_report("Invalid preallocation mode '%s'", optarg);
4164                 return 1;
4165             }
4166             break;
4167         case OPTION_SHRINK:
4168             shrink = true;
4169             break;
4170         }
4171     }
4172     if (optind != argc - 1) {
4173         error_exit(argv[0], "Expecting image file name and size");
4174     }
4175     filename = argv[optind++];
4176 
4177     /* Choose grow, shrink, or absolute resize mode */
4178     switch (size[0]) {
4179     case '+':
4180         relative = 1;
4181         size++;
4182         break;
4183     case '-':
4184         relative = -1;
4185         size++;
4186         break;
4187     default:
4188         relative = 0;
4189         break;
4190     }
4191 
4192     /* Parse size */
4193     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
4194     if (!qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err)) {
4195         error_report_err(err);
4196         ret = -1;
4197         qemu_opts_del(param);
4198         goto out;
4199     }
4200     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
4201     qemu_opts_del(param);
4202 
4203     blk = img_open(image_opts, filename, fmt,
4204                    BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
4205                    false);
4206     if (!blk) {
4207         ret = -1;
4208         goto out;
4209     }
4210 
4211     current_size = blk_getlength(blk);
4212     if (current_size < 0) {
4213         error_report("Failed to inquire current image length: %s",
4214                      strerror(-current_size));
4215         ret = -1;
4216         goto out;
4217     }
4218 
4219     if (relative) {
4220         total_size = current_size + n * relative;
4221     } else {
4222         total_size = n;
4223     }
4224     if (total_size <= 0) {
4225         error_report("New image size must be positive");
4226         ret = -1;
4227         goto out;
4228     }
4229 
4230     if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
4231         error_report("Preallocation can only be used for growing images");
4232         ret = -1;
4233         goto out;
4234     }
4235 
4236     if (total_size < current_size && !shrink) {
4237         error_report("Use the --shrink option to perform a shrink operation.");
4238         warn_report("Shrinking an image will delete all data beyond the "
4239                     "shrunken image's end. Before performing such an "
4240                     "operation, make sure there is no important data there.");
4241         ret = -1;
4242         goto out;
4243     }
4244 
4245     /*
4246      * The user expects the image to have the desired size after
4247      * resizing, so pass @exact=true.  It is of no use to report
4248      * success when the image has not actually been resized.
4249      */
4250     ret = blk_truncate(blk, total_size, true, prealloc, 0, &err);
4251     if (!ret) {
4252         qprintf(quiet, "Image resized.\n");
4253     } else {
4254         error_report_err(err);
4255     }
4256 out:
4257     blk_unref(blk);
4258     if (ret) {
4259         return 1;
4260     }
4261     return 0;
4262 }
4263 
4264 static void amend_status_cb(BlockDriverState *bs,
4265                             int64_t offset, int64_t total_work_size,
4266                             void *opaque)
4267 {
4268     qemu_progress_print(100.f * offset / total_work_size, 0);
4269 }
4270 
4271 static int print_amend_option_help(const char *format)
4272 {
4273     BlockDriver *drv;
4274 
4275     GRAPH_RDLOCK_GUARD_MAINLOOP();
4276 
4277     /* Find driver and parse its options */
4278     drv = bdrv_find_format(format);
4279     if (!drv) {
4280         error_report("Unknown file format '%s'", format);
4281         return 1;
4282     }
4283 
4284     if (!drv->bdrv_amend_options) {
4285         error_report("Format driver '%s' does not support option amendment",
4286                      format);
4287         return 1;
4288     }
4289 
4290     /* Every driver supporting amendment must have amend_opts */
4291     assert(drv->amend_opts);
4292 
4293     printf("Amend options for '%s':\n", format);
4294     qemu_opts_print_help(drv->amend_opts, false);
4295     return 0;
4296 }
4297 
4298 static int img_amend(const img_cmd_t *ccmd, int argc, char **argv)
4299 {
4300     Error *err = NULL;
4301     int c, ret = 0;
4302     char *options = NULL;
4303     QemuOptsList *amend_opts = NULL;
4304     QemuOpts *opts = NULL;
4305     const char *fmt = NULL, *filename, *cache;
4306     int flags;
4307     bool writethrough;
4308     bool quiet = false, progress = false;
4309     BlockBackend *blk = NULL;
4310     BlockDriverState *bs = NULL;
4311     bool image_opts = false;
4312     bool force = false;
4313 
4314     cache = BDRV_DEFAULT_CACHE;
4315     for (;;) {
4316         static const struct option long_options[] = {
4317             {"help", no_argument, 0, 'h'},
4318             {"object", required_argument, 0, OPTION_OBJECT},
4319             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4320             {"force", no_argument, 0, OPTION_FORCE},
4321             {0, 0, 0, 0}
4322         };
4323         c = getopt_long(argc, argv, ":ho:f:t:pq",
4324                         long_options, NULL);
4325         if (c == -1) {
4326             break;
4327         }
4328 
4329         switch (c) {
4330         case ':':
4331             missing_argument(argv[optind - 1]);
4332             break;
4333         case '?':
4334             unrecognized_option(argv[optind - 1]);
4335             break;
4336         case 'h':
4337             help();
4338             break;
4339         case 'o':
4340             if (accumulate_options(&options, optarg) < 0) {
4341                 ret = -1;
4342                 goto out_no_progress;
4343             }
4344             break;
4345         case 'f':
4346             fmt = optarg;
4347             break;
4348         case 't':
4349             cache = optarg;
4350             break;
4351         case 'p':
4352             progress = true;
4353             break;
4354         case 'q':
4355             quiet = true;
4356             break;
4357         case OPTION_OBJECT:
4358             user_creatable_process_cmdline(optarg);
4359             break;
4360         case OPTION_IMAGE_OPTS:
4361             image_opts = true;
4362             break;
4363         case OPTION_FORCE:
4364             force = true;
4365             break;
4366         }
4367     }
4368 
4369     if (!options) {
4370         error_exit(argv[0], "Must specify options (-o)");
4371     }
4372 
4373     if (quiet) {
4374         progress = false;
4375     }
4376     qemu_progress_init(progress, 1.0);
4377 
4378     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
4379     if (fmt && has_help_option(options)) {
4380         /* If a format is explicitly specified (and possibly no filename is
4381          * given), print option help here */
4382         ret = print_amend_option_help(fmt);
4383         goto out;
4384     }
4385 
4386     if (optind != argc - 1) {
4387         error_report("Expecting one image file name");
4388         ret = -1;
4389         goto out;
4390     }
4391 
4392     flags = BDRV_O_RDWR;
4393     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4394     if (ret < 0) {
4395         error_report("Invalid cache option: %s", cache);
4396         goto out;
4397     }
4398 
4399     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4400                    false);
4401     if (!blk) {
4402         ret = -1;
4403         goto out;
4404     }
4405     bs = blk_bs(blk);
4406 
4407     fmt = bs->drv->format_name;
4408 
4409     if (has_help_option(options)) {
4410         /* If the format was auto-detected, print option help here */
4411         ret = print_amend_option_help(fmt);
4412         goto out;
4413     }
4414 
4415     bdrv_graph_rdlock_main_loop();
4416     if (!bs->drv->bdrv_amend_options) {
4417         error_report("Format driver '%s' does not support option amendment",
4418                      fmt);
4419         bdrv_graph_rdunlock_main_loop();
4420         ret = -1;
4421         goto out;
4422     }
4423 
4424     /* Every driver supporting amendment must have amend_opts */
4425     assert(bs->drv->amend_opts);
4426 
4427     amend_opts = qemu_opts_append(amend_opts, bs->drv->amend_opts);
4428     opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4429     if (!qemu_opts_do_parse(opts, options, NULL, &err)) {
4430         /* Try to parse options using the create options */
4431         amend_opts = qemu_opts_append(amend_opts, bs->drv->create_opts);
4432         qemu_opts_del(opts);
4433         opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4434         if (qemu_opts_do_parse(opts, options, NULL, NULL)) {
4435             error_append_hint(&err,
4436                               "This option is only supported for image creation\n");
4437         }
4438 
4439         bdrv_graph_rdunlock_main_loop();
4440         error_report_err(err);
4441         ret = -1;
4442         goto out;
4443     }
4444 
4445     /* In case the driver does not call amend_status_cb() */
4446     qemu_progress_print(0.f, 0);
4447     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, force, &err);
4448     qemu_progress_print(100.f, 0);
4449     bdrv_graph_rdunlock_main_loop();
4450 
4451     if (ret < 0) {
4452         error_report_err(err);
4453         goto out;
4454     }
4455 
4456 out:
4457     qemu_progress_end();
4458 
4459 out_no_progress:
4460     blk_unref(blk);
4461     qemu_opts_del(opts);
4462     qemu_opts_free(amend_opts);
4463     g_free(options);
4464 
4465     if (ret) {
4466         return 1;
4467     }
4468     return 0;
4469 }
4470 
4471 typedef struct BenchData {
4472     BlockBackend *blk;
4473     uint64_t image_size;
4474     bool write;
4475     int bufsize;
4476     int step;
4477     int nrreq;
4478     int n;
4479     int flush_interval;
4480     bool drain_on_flush;
4481     uint8_t *buf;
4482     QEMUIOVector *qiov;
4483 
4484     int in_flight;
4485     bool in_flush;
4486     uint64_t offset;
4487 } BenchData;
4488 
4489 static void bench_undrained_flush_cb(void *opaque, int ret)
4490 {
4491     if (ret < 0) {
4492         error_report("Failed flush request: %s", strerror(-ret));
4493         exit(EXIT_FAILURE);
4494     }
4495 }
4496 
4497 static void bench_cb(void *opaque, int ret)
4498 {
4499     BenchData *b = opaque;
4500     BlockAIOCB *acb;
4501 
4502     if (ret < 0) {
4503         error_report("Failed request: %s", strerror(-ret));
4504         exit(EXIT_FAILURE);
4505     }
4506 
4507     if (b->in_flush) {
4508         /* Just finished a flush with drained queue: Start next requests */
4509         assert(b->in_flight == 0);
4510         b->in_flush = false;
4511     } else if (b->in_flight > 0) {
4512         int remaining = b->n - b->in_flight;
4513 
4514         b->n--;
4515         b->in_flight--;
4516 
4517         /* Time for flush? Drain queue if requested, then flush */
4518         if (b->flush_interval && remaining % b->flush_interval == 0) {
4519             if (!b->in_flight || !b->drain_on_flush) {
4520                 BlockCompletionFunc *cb;
4521 
4522                 if (b->drain_on_flush) {
4523                     b->in_flush = true;
4524                     cb = bench_cb;
4525                 } else {
4526                     cb = bench_undrained_flush_cb;
4527                 }
4528 
4529                 acb = blk_aio_flush(b->blk, cb, b);
4530                 if (!acb) {
4531                     error_report("Failed to issue flush request");
4532                     exit(EXIT_FAILURE);
4533                 }
4534             }
4535             if (b->drain_on_flush) {
4536                 return;
4537             }
4538         }
4539     }
4540 
4541     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4542         int64_t offset = b->offset;
4543         /* blk_aio_* might look for completed I/Os and kick bench_cb
4544          * again, so make sure this operation is counted by in_flight
4545          * and b->offset is ready for the next submission.
4546          */
4547         b->in_flight++;
4548         b->offset += b->step;
4549         if (b->image_size <= b->bufsize) {
4550             b->offset = 0;
4551         } else {
4552             b->offset %= b->image_size - b->bufsize;
4553         }
4554         if (b->write) {
4555             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4556         } else {
4557             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4558         }
4559         if (!acb) {
4560             error_report("Failed to issue request");
4561             exit(EXIT_FAILURE);
4562         }
4563     }
4564 }
4565 
4566 static int img_bench(const img_cmd_t *ccmd, int argc, char **argv)
4567 {
4568     int c, ret = 0;
4569     const char *fmt = NULL, *filename;
4570     bool quiet = false;
4571     bool image_opts = false;
4572     bool is_write = false;
4573     int count = 75000;
4574     int depth = 64;
4575     int64_t offset = 0;
4576     size_t bufsize = 4096;
4577     int pattern = 0;
4578     size_t step = 0;
4579     int flush_interval = 0;
4580     bool drain_on_flush = true;
4581     int64_t image_size;
4582     BlockBackend *blk = NULL;
4583     BenchData data = {};
4584     int flags = 0;
4585     bool writethrough = false;
4586     struct timeval t1, t2;
4587     int i;
4588     bool force_share = false;
4589     size_t buf_size = 0;
4590 
4591     for (;;) {
4592         static const struct option long_options[] = {
4593             {"help", no_argument, 0, 'h'},
4594             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4595             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4596             {"pattern", required_argument, 0, OPTION_PATTERN},
4597             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4598             {"force-share", no_argument, 0, 'U'},
4599             {0, 0, 0, 0}
4600         };
4601         c = getopt_long(argc, argv, ":hc:d:f:ni:o:qs:S:t:wU", long_options,
4602                         NULL);
4603         if (c == -1) {
4604             break;
4605         }
4606 
4607         switch (c) {
4608         case ':':
4609             missing_argument(argv[optind - 1]);
4610             break;
4611         case '?':
4612             unrecognized_option(argv[optind - 1]);
4613             break;
4614         case 'h':
4615             help();
4616             break;
4617         case 'c':
4618         {
4619             unsigned long res;
4620 
4621             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4622                 error_report("Invalid request count specified");
4623                 return 1;
4624             }
4625             count = res;
4626             break;
4627         }
4628         case 'd':
4629         {
4630             unsigned long res;
4631 
4632             if (qemu_strtoul(optarg, NULL, 0, &res) <= 0 || res > INT_MAX) {
4633                 error_report("Invalid queue depth specified");
4634                 return 1;
4635             }
4636             depth = res;
4637             break;
4638         }
4639         case 'f':
4640             fmt = optarg;
4641             break;
4642         case 'n':
4643             flags |= BDRV_O_NATIVE_AIO;
4644             break;
4645         case 'i':
4646             ret = bdrv_parse_aio(optarg, &flags);
4647             if (ret < 0) {
4648                 error_report("Invalid aio option: %s", optarg);
4649                 ret = -1;
4650                 goto out;
4651             }
4652             break;
4653         case 'o':
4654         {
4655             offset = cvtnum("offset", optarg);
4656             if (offset < 0) {
4657                 return 1;
4658             }
4659             break;
4660         }
4661             break;
4662         case 'q':
4663             quiet = true;
4664             break;
4665         case 's':
4666         {
4667             int64_t sval;
4668 
4669             sval = cvtnum_full("buffer size", optarg, 0, INT_MAX);
4670             if (sval < 0) {
4671                 return 1;
4672             }
4673 
4674             bufsize = sval;
4675             break;
4676         }
4677         case 'S':
4678         {
4679             int64_t sval;
4680 
4681             sval = cvtnum_full("step_size", optarg, 0, INT_MAX);
4682             if (sval < 0) {
4683                 return 1;
4684             }
4685 
4686             step = sval;
4687             break;
4688         }
4689         case 't':
4690             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4691             if (ret < 0) {
4692                 error_report("Invalid cache mode");
4693                 ret = -1;
4694                 goto out;
4695             }
4696             break;
4697         case 'w':
4698             flags |= BDRV_O_RDWR;
4699             is_write = true;
4700             break;
4701         case 'U':
4702             force_share = true;
4703             break;
4704         case OPTION_PATTERN:
4705         {
4706             unsigned long res;
4707 
4708             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
4709                 error_report("Invalid pattern byte specified");
4710                 return 1;
4711             }
4712             pattern = res;
4713             break;
4714         }
4715         case OPTION_FLUSH_INTERVAL:
4716         {
4717             unsigned long res;
4718 
4719             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4720                 error_report("Invalid flush interval specified");
4721                 return 1;
4722             }
4723             flush_interval = res;
4724             break;
4725         }
4726         case OPTION_NO_DRAIN:
4727             drain_on_flush = false;
4728             break;
4729         case OPTION_IMAGE_OPTS:
4730             image_opts = true;
4731             break;
4732         }
4733     }
4734 
4735     if (optind != argc - 1) {
4736         error_exit(argv[0], "Expecting one image file name");
4737     }
4738     filename = argv[argc - 1];
4739 
4740     if (!is_write && flush_interval) {
4741         error_report("--flush-interval is only available in write tests");
4742         ret = -1;
4743         goto out;
4744     }
4745     if (flush_interval && flush_interval < depth) {
4746         error_report("Flush interval can't be smaller than depth");
4747         ret = -1;
4748         goto out;
4749     }
4750 
4751     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4752                    force_share);
4753     if (!blk) {
4754         ret = -1;
4755         goto out;
4756     }
4757 
4758     image_size = blk_getlength(blk);
4759     if (image_size < 0) {
4760         ret = image_size;
4761         goto out;
4762     }
4763 
4764     data = (BenchData) {
4765         .blk            = blk,
4766         .image_size     = image_size,
4767         .bufsize        = bufsize,
4768         .step           = step ?: bufsize,
4769         .nrreq          = depth,
4770         .n              = count,
4771         .offset         = offset,
4772         .write          = is_write,
4773         .flush_interval = flush_interval,
4774         .drain_on_flush = drain_on_flush,
4775     };
4776     printf("Sending %d %s requests, %d bytes each, %d in parallel "
4777            "(starting at offset %" PRId64 ", step size %d)\n",
4778            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4779            data.offset, data.step);
4780     if (flush_interval) {
4781         printf("Sending flush every %d requests\n", flush_interval);
4782     }
4783 
4784     buf_size = data.nrreq * data.bufsize;
4785     data.buf = blk_blockalign(blk, buf_size);
4786     memset(data.buf, pattern, data.nrreq * data.bufsize);
4787 
4788     blk_register_buf(blk, data.buf, buf_size, &error_fatal);
4789 
4790     data.qiov = g_new(QEMUIOVector, data.nrreq);
4791     for (i = 0; i < data.nrreq; i++) {
4792         qemu_iovec_init(&data.qiov[i], 1);
4793         qemu_iovec_add(&data.qiov[i],
4794                        data.buf + i * data.bufsize, data.bufsize);
4795     }
4796 
4797     gettimeofday(&t1, NULL);
4798     bench_cb(&data, 0);
4799 
4800     while (data.n > 0) {
4801         main_loop_wait(false);
4802     }
4803     gettimeofday(&t2, NULL);
4804 
4805     printf("Run completed in %3.3f seconds.\n",
4806            (t2.tv_sec - t1.tv_sec)
4807            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
4808 
4809 out:
4810     if (data.buf) {
4811         blk_unregister_buf(blk, data.buf, buf_size);
4812     }
4813     qemu_vfree(data.buf);
4814     blk_unref(blk);
4815 
4816     if (ret) {
4817         return 1;
4818     }
4819     return 0;
4820 }
4821 
4822 enum ImgBitmapAct {
4823     BITMAP_ADD,
4824     BITMAP_REMOVE,
4825     BITMAP_CLEAR,
4826     BITMAP_ENABLE,
4827     BITMAP_DISABLE,
4828     BITMAP_MERGE,
4829 };
4830 typedef struct ImgBitmapAction {
4831     enum ImgBitmapAct act;
4832     const char *src; /* only used for merge */
4833     QSIMPLEQ_ENTRY(ImgBitmapAction) next;
4834 } ImgBitmapAction;
4835 
4836 static int img_bitmap(const img_cmd_t *ccmd, int argc, char **argv)
4837 {
4838     Error *err = NULL;
4839     int c, ret = 1;
4840     QemuOpts *opts = NULL;
4841     const char *fmt = NULL, *src_fmt = NULL, *src_filename = NULL;
4842     const char *filename, *bitmap;
4843     BlockBackend *blk = NULL, *src = NULL;
4844     BlockDriverState *bs = NULL, *src_bs = NULL;
4845     bool image_opts = false;
4846     int64_t granularity = 0;
4847     bool add = false, merge = false;
4848     QSIMPLEQ_HEAD(, ImgBitmapAction) actions;
4849     ImgBitmapAction *act, *act_next;
4850     const char *op;
4851     int inactivate_ret;
4852 
4853     QSIMPLEQ_INIT(&actions);
4854 
4855     for (;;) {
4856         static const struct option long_options[] = {
4857             {"help", no_argument, 0, 'h'},
4858             {"object", required_argument, 0, OPTION_OBJECT},
4859             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4860             {"add", no_argument, 0, OPTION_ADD},
4861             {"remove", no_argument, 0, OPTION_REMOVE},
4862             {"clear", no_argument, 0, OPTION_CLEAR},
4863             {"enable", no_argument, 0, OPTION_ENABLE},
4864             {"disable", no_argument, 0, OPTION_DISABLE},
4865             {"merge", required_argument, 0, OPTION_MERGE},
4866             {"granularity", required_argument, 0, 'g'},
4867             {"source-file", required_argument, 0, 'b'},
4868             {"source-format", required_argument, 0, 'F'},
4869             {0, 0, 0, 0}
4870         };
4871         c = getopt_long(argc, argv, ":b:f:F:g:h", long_options, NULL);
4872         if (c == -1) {
4873             break;
4874         }
4875 
4876         switch (c) {
4877         case ':':
4878             missing_argument(argv[optind - 1]);
4879             break;
4880         case '?':
4881             unrecognized_option(argv[optind - 1]);
4882             break;
4883         case 'h':
4884             help();
4885             break;
4886         case 'b':
4887             src_filename = optarg;
4888             break;
4889         case 'f':
4890             fmt = optarg;
4891             break;
4892         case 'F':
4893             src_fmt = optarg;
4894             break;
4895         case 'g':
4896             granularity = cvtnum("granularity", optarg);
4897             if (granularity < 0) {
4898                 return 1;
4899             }
4900             break;
4901         case OPTION_ADD:
4902             act = g_new0(ImgBitmapAction, 1);
4903             act->act = BITMAP_ADD;
4904             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4905             add = true;
4906             break;
4907         case OPTION_REMOVE:
4908             act = g_new0(ImgBitmapAction, 1);
4909             act->act = BITMAP_REMOVE;
4910             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4911             break;
4912         case OPTION_CLEAR:
4913             act = g_new0(ImgBitmapAction, 1);
4914             act->act = BITMAP_CLEAR;
4915             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4916             break;
4917         case OPTION_ENABLE:
4918             act = g_new0(ImgBitmapAction, 1);
4919             act->act = BITMAP_ENABLE;
4920             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4921             break;
4922         case OPTION_DISABLE:
4923             act = g_new0(ImgBitmapAction, 1);
4924             act->act = BITMAP_DISABLE;
4925             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4926             break;
4927         case OPTION_MERGE:
4928             act = g_new0(ImgBitmapAction, 1);
4929             act->act = BITMAP_MERGE;
4930             act->src = optarg;
4931             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4932             merge = true;
4933             break;
4934         case OPTION_OBJECT:
4935             user_creatable_process_cmdline(optarg);
4936             break;
4937         case OPTION_IMAGE_OPTS:
4938             image_opts = true;
4939             break;
4940         }
4941     }
4942 
4943     if (QSIMPLEQ_EMPTY(&actions)) {
4944         error_report("Need at least one of --add, --remove, --clear, "
4945                      "--enable, --disable, or --merge");
4946         goto out;
4947     }
4948 
4949     if (granularity && !add) {
4950         error_report("granularity only supported with --add");
4951         goto out;
4952     }
4953     if (src_fmt && !src_filename) {
4954         error_report("-F only supported with -b");
4955         goto out;
4956     }
4957     if (src_filename && !merge) {
4958         error_report("Merge bitmap source file only supported with "
4959                      "--merge");
4960         goto out;
4961     }
4962 
4963     if (optind != argc - 2) {
4964         error_report("Expecting filename and bitmap name");
4965         goto out;
4966     }
4967 
4968     filename = argv[optind];
4969     bitmap = argv[optind + 1];
4970 
4971     /*
4972      * No need to open backing chains; we will be manipulating bitmaps
4973      * directly in this image without reference to image contents.
4974      */
4975     blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR | BDRV_O_NO_BACKING,
4976                    false, false, false);
4977     if (!blk) {
4978         goto out;
4979     }
4980     bs = blk_bs(blk);
4981     if (src_filename) {
4982         src = img_open(false, src_filename, src_fmt, BDRV_O_NO_BACKING,
4983                        false, false, false);
4984         if (!src) {
4985             goto out;
4986         }
4987         src_bs = blk_bs(src);
4988     } else {
4989         src_bs = bs;
4990     }
4991 
4992     QSIMPLEQ_FOREACH_SAFE(act, &actions, next, act_next) {
4993         switch (act->act) {
4994         case BITMAP_ADD:
4995             qmp_block_dirty_bitmap_add(bs->node_name, bitmap,
4996                                        !!granularity, granularity, true, true,
4997                                        false, false, &err);
4998             op = "add";
4999             break;
5000         case BITMAP_REMOVE:
5001             qmp_block_dirty_bitmap_remove(bs->node_name, bitmap, &err);
5002             op = "remove";
5003             break;
5004         case BITMAP_CLEAR:
5005             qmp_block_dirty_bitmap_clear(bs->node_name, bitmap, &err);
5006             op = "clear";
5007             break;
5008         case BITMAP_ENABLE:
5009             qmp_block_dirty_bitmap_enable(bs->node_name, bitmap, &err);
5010             op = "enable";
5011             break;
5012         case BITMAP_DISABLE:
5013             qmp_block_dirty_bitmap_disable(bs->node_name, bitmap, &err);
5014             op = "disable";
5015             break;
5016         case BITMAP_MERGE:
5017             do_dirty_bitmap_merge(bs->node_name, bitmap, src_bs->node_name,
5018                                   act->src, &err);
5019             op = "merge";
5020             break;
5021         default:
5022             g_assert_not_reached();
5023         }
5024 
5025         if (err) {
5026             error_reportf_err(err, "Operation %s on bitmap %s failed: ",
5027                               op, bitmap);
5028             goto out;
5029         }
5030         g_free(act);
5031     }
5032 
5033     ret = 0;
5034 
5035  out:
5036     /*
5037      * Manually inactivate the images first because this way we can know whether
5038      * an error occurred. blk_unref() doesn't tell us about failures.
5039      */
5040     inactivate_ret = bdrv_inactivate_all();
5041     if (inactivate_ret < 0) {
5042         error_report("Error while closing the image: %s", strerror(-inactivate_ret));
5043         ret = 1;
5044     }
5045 
5046     blk_unref(src);
5047     blk_unref(blk);
5048     qemu_opts_del(opts);
5049     return ret;
5050 }
5051 
5052 #define C_BS      01
5053 #define C_COUNT   02
5054 #define C_IF      04
5055 #define C_OF      010
5056 #define C_SKIP    020
5057 
5058 struct DdInfo {
5059     unsigned int flags;
5060     int64_t count;
5061 };
5062 
5063 struct DdIo {
5064     int bsz;    /* Block size */
5065     char *filename;
5066     uint8_t *buf;
5067     int64_t offset;
5068 };
5069 
5070 struct DdOpts {
5071     const char *name;
5072     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
5073     unsigned int flag;
5074 };
5075 
5076 static int img_dd_bs(const char *arg,
5077                      struct DdIo *in, struct DdIo *out,
5078                      struct DdInfo *dd)
5079 {
5080     int64_t res;
5081 
5082     res = cvtnum_full("bs", arg, 1, INT_MAX);
5083 
5084     if (res < 0) {
5085         return 1;
5086     }
5087     in->bsz = out->bsz = res;
5088 
5089     return 0;
5090 }
5091 
5092 static int img_dd_count(const char *arg,
5093                         struct DdIo *in, struct DdIo *out,
5094                         struct DdInfo *dd)
5095 {
5096     dd->count = cvtnum("count", arg);
5097 
5098     if (dd->count < 0) {
5099         return 1;
5100     }
5101 
5102     return 0;
5103 }
5104 
5105 static int img_dd_if(const char *arg,
5106                      struct DdIo *in, struct DdIo *out,
5107                      struct DdInfo *dd)
5108 {
5109     in->filename = g_strdup(arg);
5110 
5111     return 0;
5112 }
5113 
5114 static int img_dd_of(const char *arg,
5115                      struct DdIo *in, struct DdIo *out,
5116                      struct DdInfo *dd)
5117 {
5118     out->filename = g_strdup(arg);
5119 
5120     return 0;
5121 }
5122 
5123 static int img_dd_skip(const char *arg,
5124                        struct DdIo *in, struct DdIo *out,
5125                        struct DdInfo *dd)
5126 {
5127     in->offset = cvtnum("skip", arg);
5128 
5129     if (in->offset < 0) {
5130         return 1;
5131     }
5132 
5133     return 0;
5134 }
5135 
5136 static int img_dd(const img_cmd_t *ccmd, int argc, char **argv)
5137 {
5138     int ret = 0;
5139     char *arg = NULL;
5140     char *tmp;
5141     BlockDriver *drv = NULL, *proto_drv = NULL;
5142     BlockBackend *blk1 = NULL, *blk2 = NULL;
5143     QemuOpts *opts = NULL;
5144     QemuOptsList *create_opts = NULL;
5145     Error *local_err = NULL;
5146     bool image_opts = false;
5147     int c, i;
5148     const char *out_fmt = "raw";
5149     const char *fmt = NULL;
5150     int64_t size = 0;
5151     int64_t out_pos, in_pos;
5152     bool force_share = false;
5153     struct DdInfo dd = {
5154         .flags = 0,
5155         .count = 0,
5156     };
5157     struct DdIo in = {
5158         .bsz = 512, /* Block size is by default 512 bytes */
5159         .filename = NULL,
5160         .buf = NULL,
5161         .offset = 0
5162     };
5163     struct DdIo out = {
5164         .bsz = 512,
5165         .filename = NULL,
5166         .buf = NULL,
5167         .offset = 0
5168     };
5169 
5170     const struct DdOpts options[] = {
5171         { "bs", img_dd_bs, C_BS },
5172         { "count", img_dd_count, C_COUNT },
5173         { "if", img_dd_if, C_IF },
5174         { "of", img_dd_of, C_OF },
5175         { "skip", img_dd_skip, C_SKIP },
5176         { NULL, NULL, 0 }
5177     };
5178     const struct option long_options[] = {
5179         { "help", no_argument, 0, 'h'},
5180         { "object", required_argument, 0, OPTION_OBJECT},
5181         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5182         { "force-share", no_argument, 0, 'U'},
5183         { 0, 0, 0, 0 }
5184     };
5185 
5186     while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
5187         if (c == EOF) {
5188             break;
5189         }
5190         switch (c) {
5191         case 'O':
5192             out_fmt = optarg;
5193             break;
5194         case 'f':
5195             fmt = optarg;
5196             break;
5197         case ':':
5198             missing_argument(argv[optind - 1]);
5199             break;
5200         case '?':
5201             unrecognized_option(argv[optind - 1]);
5202             break;
5203         case 'h':
5204             help();
5205             break;
5206         case 'U':
5207             force_share = true;
5208             break;
5209         case OPTION_OBJECT:
5210             user_creatable_process_cmdline(optarg);
5211             break;
5212         case OPTION_IMAGE_OPTS:
5213             image_opts = true;
5214             break;
5215         }
5216     }
5217 
5218     for (i = optind; i < argc; i++) {
5219         int j;
5220         arg = g_strdup(argv[i]);
5221 
5222         tmp = strchr(arg, '=');
5223         if (tmp == NULL) {
5224             error_report("unrecognized operand %s", arg);
5225             ret = -1;
5226             goto out;
5227         }
5228 
5229         *tmp++ = '\0';
5230 
5231         for (j = 0; options[j].name != NULL; j++) {
5232             if (!strcmp(arg, options[j].name)) {
5233                 break;
5234             }
5235         }
5236         if (options[j].name == NULL) {
5237             error_report("unrecognized operand %s", arg);
5238             ret = -1;
5239             goto out;
5240         }
5241 
5242         if (options[j].f(tmp, &in, &out, &dd) != 0) {
5243             ret = -1;
5244             goto out;
5245         }
5246         dd.flags |= options[j].flag;
5247         g_free(arg);
5248         arg = NULL;
5249     }
5250 
5251     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
5252         error_report("Must specify both input and output files");
5253         ret = -1;
5254         goto out;
5255     }
5256 
5257     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
5258                     force_share);
5259 
5260     if (!blk1) {
5261         ret = -1;
5262         goto out;
5263     }
5264 
5265     drv = bdrv_find_format(out_fmt);
5266     if (!drv) {
5267         error_report("Unknown file format");
5268         ret = -1;
5269         goto out;
5270     }
5271     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
5272 
5273     if (!proto_drv) {
5274         error_report_err(local_err);
5275         ret = -1;
5276         goto out;
5277     }
5278     if (!drv->create_opts) {
5279         error_report("Format driver '%s' does not support image creation",
5280                      drv->format_name);
5281         ret = -1;
5282         goto out;
5283     }
5284     if (!proto_drv->create_opts) {
5285         error_report("Protocol driver '%s' does not support image creation",
5286                      proto_drv->format_name);
5287         ret = -1;
5288         goto out;
5289     }
5290     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5291     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
5292 
5293     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5294 
5295     size = blk_getlength(blk1);
5296     if (size < 0) {
5297         error_report("Failed to get size for '%s'", in.filename);
5298         ret = -1;
5299         goto out;
5300     }
5301 
5302     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
5303         dd.count * in.bsz < size) {
5304         size = dd.count * in.bsz;
5305     }
5306 
5307     /* Overflow means the specified offset is beyond input image's size */
5308     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5309                               size < in.bsz * in.offset)) {
5310         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
5311     } else {
5312         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
5313                             size - in.bsz * in.offset, &error_abort);
5314     }
5315 
5316     ret = bdrv_create(drv, out.filename, opts, &local_err);
5317     if (ret < 0) {
5318         error_reportf_err(local_err,
5319                           "%s: error while creating output image: ",
5320                           out.filename);
5321         ret = -1;
5322         goto out;
5323     }
5324 
5325     /* TODO, we can't honour --image-opts for the target,
5326      * since it needs to be given in a format compatible
5327      * with the bdrv_create() call above which does not
5328      * support image-opts style.
5329      */
5330     blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
5331                          false, false, false);
5332 
5333     if (!blk2) {
5334         ret = -1;
5335         goto out;
5336     }
5337 
5338     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5339                               size < in.offset * in.bsz)) {
5340         /* We give a warning if the skip option is bigger than the input
5341          * size and create an empty output disk image (i.e. like dd(1)).
5342          */
5343         error_report("%s: cannot skip to specified offset", in.filename);
5344         in_pos = size;
5345     } else {
5346         in_pos = in.offset * in.bsz;
5347     }
5348 
5349     in.buf = g_new(uint8_t, in.bsz);
5350 
5351     for (out_pos = 0; in_pos < size; ) {
5352         int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
5353 
5354         ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
5355         if (ret < 0) {
5356             error_report("error while reading from input image file: %s",
5357                          strerror(-ret));
5358             goto out;
5359         }
5360         in_pos += bytes;
5361 
5362         ret = blk_pwrite(blk2, out_pos, bytes, in.buf, 0);
5363         if (ret < 0) {
5364             error_report("error while writing to output image file: %s",
5365                          strerror(-ret));
5366             goto out;
5367         }
5368         out_pos += bytes;
5369     }
5370 
5371 out:
5372     g_free(arg);
5373     qemu_opts_del(opts);
5374     qemu_opts_free(create_opts);
5375     blk_unref(blk1);
5376     blk_unref(blk2);
5377     g_free(in.filename);
5378     g_free(out.filename);
5379     g_free(in.buf);
5380     g_free(out.buf);
5381 
5382     if (ret) {
5383         return 1;
5384     }
5385     return 0;
5386 }
5387 
5388 static void dump_json_block_measure_info(BlockMeasureInfo *info)
5389 {
5390     GString *str;
5391     QObject *obj;
5392     Visitor *v = qobject_output_visitor_new(&obj);
5393 
5394     visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
5395     visit_complete(v, &obj);
5396     str = qobject_to_json_pretty(obj, true);
5397     assert(str != NULL);
5398     printf("%s\n", str->str);
5399     qobject_unref(obj);
5400     visit_free(v);
5401     g_string_free(str, true);
5402 }
5403 
5404 static int img_measure(const img_cmd_t *ccmd, int argc, char **argv)
5405 {
5406     static const struct option long_options[] = {
5407         {"help", no_argument, 0, 'h'},
5408         {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5409         {"object", required_argument, 0, OPTION_OBJECT},
5410         {"output", required_argument, 0, OPTION_OUTPUT},
5411         {"size", required_argument, 0, OPTION_SIZE},
5412         {"force-share", no_argument, 0, 'U'},
5413         {0, 0, 0, 0}
5414     };
5415     OutputFormat output_format = OFORMAT_HUMAN;
5416     BlockBackend *in_blk = NULL;
5417     BlockDriver *drv;
5418     const char *filename = NULL;
5419     const char *fmt = NULL;
5420     const char *out_fmt = "raw";
5421     char *options = NULL;
5422     char *snapshot_name = NULL;
5423     bool force_share = false;
5424     QemuOpts *opts = NULL;
5425     QemuOpts *object_opts = NULL;
5426     QemuOpts *sn_opts = NULL;
5427     QemuOptsList *create_opts = NULL;
5428     bool image_opts = false;
5429     int64_t img_size = -1;
5430     BlockMeasureInfo *info = NULL;
5431     Error *local_err = NULL;
5432     int ret = 1;
5433     int c;
5434 
5435     while ((c = getopt_long(argc, argv, "hf:O:o:l:U",
5436                             long_options, NULL)) != -1) {
5437         switch (c) {
5438         case '?':
5439         case 'h':
5440             help();
5441             break;
5442         case 'f':
5443             fmt = optarg;
5444             break;
5445         case 'O':
5446             out_fmt = optarg;
5447             break;
5448         case 'o':
5449             if (accumulate_options(&options, optarg) < 0) {
5450                 goto out;
5451             }
5452             break;
5453         case 'l':
5454             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
5455                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
5456                                                   optarg, false);
5457                 if (!sn_opts) {
5458                     error_report("Failed in parsing snapshot param '%s'",
5459                                  optarg);
5460                     goto out;
5461                 }
5462             } else {
5463                 snapshot_name = optarg;
5464             }
5465             break;
5466         case 'U':
5467             force_share = true;
5468             break;
5469         case OPTION_OBJECT:
5470             user_creatable_process_cmdline(optarg);
5471             break;
5472         case OPTION_IMAGE_OPTS:
5473             image_opts = true;
5474             break;
5475         case OPTION_OUTPUT:
5476             if (!strcmp(optarg, "json")) {
5477                 output_format = OFORMAT_JSON;
5478             } else if (!strcmp(optarg, "human")) {
5479                 output_format = OFORMAT_HUMAN;
5480             } else {
5481                 error_report("--output must be used with human or json "
5482                              "as argument.");
5483                 goto out;
5484             }
5485             break;
5486         case OPTION_SIZE:
5487             img_size = cvtnum("image size", optarg);
5488             if (img_size < 0) {
5489                 goto out;
5490             }
5491             break;
5492         }
5493     }
5494 
5495     if (argc - optind > 1) {
5496         error_report("At most one filename argument is allowed.");
5497         goto out;
5498     } else if (argc - optind == 1) {
5499         filename = argv[optind];
5500     }
5501 
5502     if (!filename && (image_opts || fmt || snapshot_name || sn_opts)) {
5503         error_report("--image-opts, -f, and -l require a filename argument.");
5504         goto out;
5505     }
5506     if (filename && img_size != -1) {
5507         error_report("--size N cannot be used together with a filename.");
5508         goto out;
5509     }
5510     if (!filename && img_size == -1) {
5511         error_report("Either --size N or one filename must be specified.");
5512         goto out;
5513     }
5514 
5515     if (filename) {
5516         in_blk = img_open(image_opts, filename, fmt, 0,
5517                           false, false, force_share);
5518         if (!in_blk) {
5519             goto out;
5520         }
5521 
5522         if (sn_opts) {
5523             bdrv_snapshot_load_tmp(blk_bs(in_blk),
5524                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
5525                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
5526                     &local_err);
5527         } else if (snapshot_name != NULL) {
5528             bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
5529                     snapshot_name, &local_err);
5530         }
5531         if (local_err) {
5532             error_reportf_err(local_err, "Failed to load snapshot: ");
5533             goto out;
5534         }
5535     }
5536 
5537     drv = bdrv_find_format(out_fmt);
5538     if (!drv) {
5539         error_report("Unknown file format '%s'", out_fmt);
5540         goto out;
5541     }
5542     if (!drv->create_opts) {
5543         error_report("Format driver '%s' does not support image creation",
5544                      drv->format_name);
5545         goto out;
5546     }
5547 
5548     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5549     create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
5550     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5551     if (options) {
5552         if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
5553             error_report_err(local_err);
5554             error_report("Invalid options for file format '%s'", out_fmt);
5555             goto out;
5556         }
5557     }
5558     if (img_size != -1) {
5559         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
5560     }
5561 
5562     info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
5563     if (local_err) {
5564         error_report_err(local_err);
5565         goto out;
5566     }
5567 
5568     if (output_format == OFORMAT_HUMAN) {
5569         printf("required size: %" PRIu64 "\n", info->required);
5570         printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
5571         if (info->has_bitmaps) {
5572             printf("bitmaps size: %" PRIu64 "\n", info->bitmaps);
5573         }
5574     } else {
5575         dump_json_block_measure_info(info);
5576     }
5577 
5578     ret = 0;
5579 
5580 out:
5581     qapi_free_BlockMeasureInfo(info);
5582     qemu_opts_del(object_opts);
5583     qemu_opts_del(opts);
5584     qemu_opts_del(sn_opts);
5585     qemu_opts_free(create_opts);
5586     g_free(options);
5587     blk_unref(in_blk);
5588     return ret;
5589 }
5590 
5591 static const img_cmd_t img_cmds[] = {
5592 #define DEF(option, callback, arg_string)        \
5593     { option, callback },
5594 #include "qemu-img-cmds.h"
5595 #undef DEF
5596     { NULL, NULL, },
5597 };
5598 
5599 int main(int argc, char **argv)
5600 {
5601     const img_cmd_t *cmd;
5602     const char *cmdname;
5603     int c;
5604     static const struct option long_options[] = {
5605         {"help", no_argument, 0, 'h'},
5606         {"version", no_argument, 0, 'V'},
5607         {"trace", required_argument, NULL, 'T'},
5608         {0, 0, 0, 0}
5609     };
5610 
5611 #ifdef CONFIG_POSIX
5612     signal(SIGPIPE, SIG_IGN);
5613 #endif
5614 
5615     socket_init();
5616     error_init(argv[0]);
5617     module_call_init(MODULE_INIT_TRACE);
5618     qemu_init_exec_dir(argv[0]);
5619 
5620     qemu_init_main_loop(&error_fatal);
5621 
5622     qcrypto_init(&error_fatal);
5623 
5624     module_call_init(MODULE_INIT_QOM);
5625     bdrv_init();
5626 
5627     qemu_add_opts(&qemu_source_opts);
5628     qemu_add_opts(&qemu_trace_opts);
5629 
5630     while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
5631         switch (c) {
5632         case ':':
5633             missing_argument(argv[optind - 1]);
5634             return 0;
5635         case '?':
5636             unrecognized_option(argv[optind - 1]);
5637             return 0;
5638         case 'h':
5639             help();
5640             return 0;
5641         case 'V':
5642             printf(QEMU_IMG_VERSION);
5643             return 0;
5644         case 'T':
5645             trace_opt_parse(optarg);
5646             break;
5647         }
5648     }
5649 
5650     if (optind >= argc) {
5651         error_exit(argv[0], "Not enough arguments");
5652     }
5653 
5654     cmdname = argv[optind];
5655 
5656     if (!trace_init_backends()) {
5657         exit(1);
5658     }
5659     trace_init_file();
5660     qemu_set_log(LOG_TRACE, &error_fatal);
5661 
5662     /* find the command */
5663     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5664         if (!strcmp(cmdname, cmd->name)) {
5665             g_autofree char *argv0 = g_strdup_printf("%s %s", argv[0], cmdname);
5666             /* reset options and getopt processing (incl return order) */
5667             argv += optind;
5668             argc -= optind;
5669             qemu_reset_optind();
5670             argv[0] = argv0;
5671             return cmd->handler(cmd, argc, argv);
5672         }
5673     }
5674 
5675     /* not found */
5676     error_exit(argv[0], "Command not found: %s", cmdname);
5677 }
5678